3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from errno import EACCES
10 from io import BytesIO
11 import os, sys, stat, time, math
13 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
14 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
15 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
16 hostname, istty2, log, parse_date_or_fatal, parse_num,
17 path_components, progress, qprogress, resolve_parent,
18 saved_errors, stripped_path_components,
19 userfullname, username, valid_save_name)
23 bup save [-tc] [-n name] <filenames...>
25 r,remote= hostname:/path/to/repo of remote repository
26 t,tree output a tree id
27 c,commit output a commit id
28 n,name= name of backup set to update (if any)
29 d,date= date for the commit (seconds since the epoch)
30 v,verbose increase log output (can be used more than once)
31 q,quiet don't show progress meter
32 smaller= only back up files smaller than n bytes
33 bwlimit= maximum bytes/sec to transmit to server
34 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
35 strip strips the path to every filename given
36 strip-path= path-prefix to be stripped when saving
37 graft= a graft point *old_path*=*new_path* (can be used more than once)
38 #,compress= set compression level to # (0-9, 9 is highest) [1]
40 o = options.Options(optspec)
41 (opt, flags, extra) = o.parse(sys.argv[1:])
43 git.check_repo_or_die()
44 if not (opt.tree or opt.commit or opt.name):
45 o.fatal("use one or more of -t, -c, -n")
47 o.fatal("no filenames given")
49 opt.progress = (istty2 and not opt.quiet)
50 opt.smaller = parse_num(opt.smaller or 0)
52 client.bwlimit = parse_num(opt.bwlimit)
55 date = parse_date_or_fatal(opt.date, o.fatal)
59 if opt.strip and opt.strip_path:
60 o.fatal("--strip is incompatible with --strip-path")
65 o.fatal("--strip is incompatible with --graft")
68 o.fatal("--strip-path is incompatible with --graft")
70 for (option, parameter) in flags:
71 if option == "--graft":
72 splitted_parameter = parameter.split('=')
73 if len(splitted_parameter) != 2:
74 o.fatal("a graft point must be of the form old_path=new_path")
75 old_path, new_path = splitted_parameter
76 if not (old_path and new_path):
77 o.fatal("a graft point cannot be empty")
78 graft_points.append((resolve_parent(old_path),
79 resolve_parent(new_path)))
81 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
82 if is_reverse and opt.remote:
83 o.fatal("don't use -r in reverse mode; it's automatic")
85 if opt.name and not valid_save_name(opt.name):
86 o.fatal("'%s' is not a valid branch name" % opt.name)
87 refname = opt.name and 'refs/heads/%s' % opt.name or None
88 if opt.remote or is_reverse:
90 cli = client.Client(opt.remote)
91 except client.ClientError as e:
94 oldref = refname and cli.read_ref(refname) or None
95 w = cli.new_packwriter(compression_level=opt.compress)
98 oldref = refname and git.read_ref(refname) or None
99 w = git.PackWriter(compression_level=opt.compress)
105 if dir.endswith('/'):
111 # Metadata is stored in a file named .bupm in each directory. The
112 # first metadata entry will be the metadata for the current directory.
113 # The remaining entries will be for each of the other directory
114 # elements, in the order they're listed in the index.
116 # Since the git tree elements are sorted according to
117 # git.shalist_item_sort_key, the metalist items are accumulated as
118 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
119 # created. The sort_key must be computed using the element's real
120 # name and mode rather than the git mode and (possibly mangled) name.
122 # Maintain a stack of information representing the current location in
123 # the archive being constructed. The current path is recorded in
124 # parts, which will be something like ['', 'home', 'someuser'], and
125 # the accumulated content and metadata for of the dirs in parts is
126 # stored in parallel stacks in shalists and metalists.
128 parts = [] # Current archive position (stack of dir names).
129 shalists = [] # Hashes for each dir in paths.
130 metalists = [] # Metadata for each dir in paths.
133 def _push(part, metadata):
134 # Enter a new archive directory -- make it the current directory.
137 metalists.append([('', metadata)]) # This dir's metadata (no name).
140 def _pop(force_tree, dir_metadata=None):
141 # Leave the current archive directory and add its tree to its parent.
142 assert(len(parts) >= 1)
144 shalist = shalists.pop()
145 metalist = metalists.pop()
146 if metalist and not force_tree:
147 if dir_metadata: # Override the original metadata pushed for this dir.
148 metalist = [('', dir_metadata)] + metalist[1:]
149 sorted_metalist = sorted(metalist, key = lambda x : x[0])
150 metadata = ''.join([m[1].encode() for m in sorted_metalist])
151 metadata_f = BytesIO(metadata)
152 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
154 keep_boundaries=False)
155 shalist.append((mode, '.bupm', id))
156 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
164 if name in names_seen:
165 parent_path = '/'.join(parts) + '/'
166 add_error('error: ignoring duplicate path %r in %r'
167 % (name, parent_path))
171 tree = w.new_tree(clean_list)
173 shalists[-1].append((GIT_MODE_TREE,
174 git.mangle_name(part,
175 GIT_MODE_TREE, GIT_MODE_TREE),
181 def progress_report(n):
182 global count, subcount, lastremain
184 cc = count + subcount
185 pct = total and (cc*100.0/total) or 0
187 elapsed = now - tstart
188 kps = elapsed and int(cc/1024./elapsed)
189 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
190 kps = int(kps/kps_frac)*kps_frac
192 remain = elapsed*1.0/cc * (total-cc)
195 if (lastremain and (remain > lastremain)
196 and ((remain - lastremain)/lastremain < 0.05)):
200 hours = int(remain/60/60)
201 mins = int(remain/60 - hours*60)
202 secs = int(remain - hours*60*60 - mins*60)
207 kpsstr = '%dk/s' % kps
209 remainstr = '%dh%dm' % (hours, mins)
211 remainstr = '%dm%d' % (mins, secs)
213 remainstr = '%ds' % secs
214 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
215 % (pct, cc/1024, total/1024, fcount, ftotal,
219 indexfile = opt.indexfile or git.repo('bupindex')
220 r = index.Reader(indexfile)
222 msr = index.MetaStoreReader(indexfile + '.meta')
223 except IOError as ex:
224 if ex.errno != EACCES:
226 log('error: cannot access %r; have you run bup index?' % indexfile)
228 hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink')
230 def already_saved(ent):
231 return ent.is_valid() and w.exists(ent.sha) and ent.sha
233 def wantrecurse_pre(ent):
234 return not already_saved(ent)
236 def wantrecurse_during(ent):
237 return not already_saved(ent) or ent.sha_missing()
239 def find_hardlink_target(hlink_db, ent):
240 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
241 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
247 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
248 if not (ftotal % 10024):
249 qprogress('Reading index: %d\r' % ftotal)
250 exists = ent.exists()
251 hashvalid = already_saved(ent)
252 ent.set_sha_missing(not hashvalid)
253 if not opt.smaller or ent.size < opt.smaller:
254 if exists and not hashvalid:
257 progress('Reading index: %d, done.\n' % ftotal)
258 hashsplit.progress_callback = progress_report
260 # Root collisions occur when strip or graft options map more than one
261 # path to the same directory (paths which originally had separate
262 # parents). When that situation is detected, use empty metadata for
263 # the parent. Otherwise, use the metadata for the common parent.
264 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
266 # FIXME: Add collision tests, or handle collisions some other way.
268 # FIXME: Detect/handle strip/graft name collisions (other than root),
269 # i.e. if '/foo/bar' and '/bar' both map to '/'.
272 root_collision = None
274 count = subcount = fcount = 0
277 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
278 (dir, file) = os.path.split(ent.name)
279 exists = (ent.flags & index.IX_EXISTS)
280 hashvalid = already_saved(ent)
281 wasmissing = ent.sha_missing()
287 if ent.sha == index.EMPTY_SHA:
294 log('%s %-70s\n' % (status, ent.name))
295 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
296 if not lastdir.startswith(dir):
297 log('%s %-70s\n' % (status, os.path.join(dir, '')))
306 if opt.smaller and ent.size >= opt.smaller:
307 if exists and not hashvalid:
309 log('skipping large file "%s"\n' % ent.name)
310 lastskip_name = ent.name
313 assert(dir.startswith('/'))
315 dirp = stripped_path_components(dir, extra)
317 dirp = stripped_path_components(dir, [opt.strip_path])
319 dirp = grafted_path_components(graft_points, dir)
321 dirp = path_components(dir)
323 # At this point, dirp contains a representation of the archive
324 # path that looks like [(archive_dir_name, real_fs_path), ...].
325 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
326 # might look like this at some point:
327 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
329 # This dual representation supports stripping/grafting, where the
330 # archive path may not have a direct correspondence with the
331 # filesystem. The root directory is represented by an initial
332 # component named '', and any component that doesn't have a
333 # corresponding filesystem directory (due to grafting, for
334 # example) will have a real_fs_path of None, i.e. [('', None),
337 if first_root == None:
339 elif first_root != dirp[0]:
340 root_collision = True
342 # If switching to a new sub-tree, finish the current sub-tree.
343 while parts > [x[0] for x in dirp]:
344 _pop(force_tree = None)
346 # If switching to a new sub-tree, start a new sub-tree.
347 for path_component in dirp[len(parts):]:
348 dir_name, fs_path = path_component
349 # Not indexed, so just grab the FS metadata or use empty metadata.
351 meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata()
352 except (OSError, IOError) as e:
354 lastskip_name = dir_name
355 meta = metadata.Metadata()
356 _push(dir_name, meta)
360 continue # We're at the top level -- keep the current root dir
361 # Since there's no filename, this is a subdir -- finish it.
362 oldtree = already_saved(ent) # may be None
363 newtree = _pop(force_tree = oldtree)
365 if lastskip_name and lastskip_name.startswith(ent.name):
368 ent.validate(GIT_MODE_TREE, newtree)
370 if exists and wasmissing:
374 # it's not a directory
378 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
379 git_info = (ent.gitmode, git_name, id)
380 shalists[-1].append(git_info)
381 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
382 meta = msr.metadata_at(ent.meta_ofs)
383 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
384 # Restore the times that were cleared to 0 in the metastore.
385 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
386 metalists[-1].append((sort_key, meta))
388 if stat.S_ISREG(ent.mode):
390 f = hashsplit.open_noatime(ent.name)
391 except (IOError, OSError) as e:
393 lastskip_name = ent.name
396 (mode, id) = hashsplit.split_to_blob_or_tree(
397 w.new_blob, w.new_tree, [f],
398 keep_boundaries=False)
399 except (IOError, OSError) as e:
400 add_error('%s: %s' % (ent.name, e))
401 lastskip_name = ent.name
403 if stat.S_ISDIR(ent.mode):
404 assert(0) # handled above
405 elif stat.S_ISLNK(ent.mode):
407 rl = os.readlink(ent.name)
408 except (OSError, IOError) as e:
410 lastskip_name = ent.name
412 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
414 # Everything else should be fully described by its
415 # metadata, so just record an empty blob, so the paths
416 # in the tree and .bupm will match up.
417 (mode, id) = (GIT_MODE_FILE, w.new_blob(""))
420 ent.validate(mode, id)
422 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
423 git_info = (mode, git_name, id)
424 shalists[-1].append(git_info)
425 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
426 hlink = find_hardlink_target(hlink_db, ent)
428 meta = metadata.from_path(ent.name, hardlink_target=hlink)
429 except (OSError, IOError) as e:
431 lastskip_name = ent.name
433 metalists[-1].append((sort_key, meta))
435 if exists and wasmissing:
441 pct = total and count*100.0/total or 100
442 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
443 % (pct, count/1024, total/1024, fcount, ftotal))
445 while len(parts) > 1: # _pop() all the parts above the root
446 _pop(force_tree = None)
447 assert(len(shalists) == 1)
448 assert(len(metalists) == 1)
450 # Finish the root directory.
451 tree = _pop(force_tree = None,
452 # When there's a collision, use empty metadata for the root.
453 dir_metadata = metadata.Metadata() if root_collision else None)
456 print(tree.encode('hex'))
457 if opt.commit or opt.name:
458 msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv
459 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
460 commit = w.new_commit(tree, oldref, userline, date, None,
461 userline, date, None, msg)
463 print(commit.encode('hex'))
466 w.close() # must close before we can update the ref
470 cli.update_ref(refname, commit, oldref)
472 git.update_ref(refname, commit, oldref)
478 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))