3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/../../../config/bin/python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, print_function
19 # Intentionally replace the dirname "$0" that python prepends
21 sys.path[0] = os.path.dirname(os.path.realpath(__file__)) + '/../..'
23 from binascii import hexlify
24 from errno import EACCES
25 from io import BytesIO
26 import math, stat, time
28 from bup import compat, hashsplit, git, options, index, client, metadata
29 from bup import hlinkdb
30 from bup.compat import argv_bytes, environ
31 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
32 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
33 hostname, istty2, log, parse_date_or_fatal, parse_num,
34 path_components, progress, qprogress, resolve_parent,
35 saved_errors, stripped_path_components,
37 from bup.io import byte_stream, path_msg
38 from bup.pwdgrp import userfullname, username
42 bup save [-tc] [-n name] <filenames...>
44 r,remote= hostname:/path/to/repo of remote repository
45 t,tree output a tree id
46 c,commit output a commit id
47 n,name= name of backup set to update (if any)
48 d,date= date for the commit (seconds since the epoch)
49 v,verbose increase log output (can be used more than once)
50 q,quiet don't show progress meter
51 smaller= only back up files smaller than n bytes
52 bwlimit= maximum bytes/sec to transmit to server
53 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
54 strip strips the path to every filename given
55 strip-path= path-prefix to be stripped when saving
56 graft= a graft point *old_path*=*new_path* (can be used more than once)
57 #,compress= set compression level to # (0-9, 9 is highest) [1]
59 o = options.Options(optspec)
60 opt, flags, extra = o.parse(compat.argv[1:])
63 opt.indexfile = argv_bytes(opt.indexfile)
65 opt.name = argv_bytes(opt.name)
67 opt.remote = argv_bytes(opt.remote)
69 opt.strip_path = argv_bytes(opt.strip_path)
71 git.check_repo_or_die()
72 if not (opt.tree or opt.commit or opt.name):
73 o.fatal("use one or more of -t, -c, -n")
75 o.fatal("no filenames given")
77 extra = [argv_bytes(x) for x in extra]
79 opt.progress = (istty2 and not opt.quiet)
80 opt.smaller = parse_num(opt.smaller or 0)
82 client.bwlimit = parse_num(opt.bwlimit)
85 date = parse_date_or_fatal(opt.date, o.fatal)
89 if opt.strip and opt.strip_path:
90 o.fatal("--strip is incompatible with --strip-path")
95 o.fatal("--strip is incompatible with --graft")
98 o.fatal("--strip-path is incompatible with --graft")
100 for (option, parameter) in flags:
101 if option == "--graft":
102 parameter = argv_bytes(parameter)
103 splitted_parameter = parameter.split(b'=')
104 if len(splitted_parameter) != 2:
105 o.fatal("a graft point must be of the form old_path=new_path")
106 old_path, new_path = splitted_parameter
107 if not (old_path and new_path):
108 o.fatal("a graft point cannot be empty")
109 graft_points.append((resolve_parent(old_path),
110 resolve_parent(new_path)))
112 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
113 if is_reverse and opt.remote:
114 o.fatal("don't use -r in reverse mode; it's automatic")
117 if name and not valid_save_name(name):
118 o.fatal("'%s' is not a valid branch name" % path_msg(name))
119 refname = name and b'refs/heads/%s' % name or None
120 if opt.remote or is_reverse:
122 cli = client.Client(opt.remote)
123 except client.ClientError as e:
126 oldref = refname and cli.read_ref(refname) or None
127 w = cli.new_packwriter(compression_level=opt.compress)
130 oldref = refname and git.read_ref(refname) or None
131 w = git.PackWriter(compression_level=opt.compress)
136 # Metadata is stored in a file named .bupm in each directory. The
137 # first metadata entry will be the metadata for the current directory.
138 # The remaining entries will be for each of the other directory
139 # elements, in the order they're listed in the index.
141 # Since the git tree elements are sorted according to
142 # git.shalist_item_sort_key, the metalist items are accumulated as
143 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
144 # created. The sort_key should have been computed using the element's
145 # mangled name and git mode (after hashsplitting), but the code isn't
146 # actually doing that but rather uses the element's real name and mode.
147 # This makes things a bit more difficult when reading it back, see
148 # vfs.ordered_tree_entries().
150 # Maintain a stack of information representing the current location in
151 # the archive being constructed. The current path is recorded in
152 # parts, which will be something like ['', 'home', 'someuser'], and
153 # the accumulated content and metadata for of the dirs in parts is
154 # stored in parallel stacks in shalists and metalists.
156 parts = [] # Current archive position (stack of dir names).
157 shalists = [] # Hashes for each dir in paths.
158 metalists = [] # Metadata for each dir in paths.
161 def _push(part, metadata):
162 # Enter a new archive directory -- make it the current directory.
165 metalists.append([(b'', metadata)]) # This dir's metadata (no name).
168 def _pop(force_tree, dir_metadata=None):
169 # Leave the current archive directory and add its tree to its parent.
170 assert(len(parts) >= 1)
172 shalist = shalists.pop()
173 metalist = metalists.pop()
174 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
180 metaidx = 1 # entry at 0 is for the dir
183 if name in names_seen:
184 parent_path = b'/'.join(parts) + b'/'
185 add_error('error: ignoring duplicate path %s in %s'
186 % (path_msg(name), path_msg(parent_path)))
187 if not stat.S_ISDIR(x[0]):
188 del metalist[metaidx]
192 if not stat.S_ISDIR(x[0]):
195 if dir_metadata: # Override the original metadata pushed for this dir.
196 metalist = [(b'', dir_metadata)] + metalist[1:]
197 sorted_metalist = sorted(metalist, key = lambda x : x[0])
198 metadata = b''.join([m[1].encode() for m in sorted_metalist])
199 metadata_f = BytesIO(metadata)
200 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
202 keep_boundaries=False)
203 clean_list.append((mode, b'.bupm', id))
205 tree = w.new_tree(clean_list)
207 shalists[-1].append((GIT_MODE_TREE,
208 git.mangle_name(part,
209 GIT_MODE_TREE, GIT_MODE_TREE),
215 def progress_report(n):
216 global count, subcount, lastremain
218 cc = count + subcount
219 pct = total and (cc*100.0/total) or 0
221 elapsed = now - tstart
222 kps = elapsed and int(cc/1024./elapsed)
223 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
224 kps = int(kps/kps_frac)*kps_frac
226 remain = elapsed*1.0/cc * (total-cc)
229 if (lastremain and (remain > lastremain)
230 and ((remain - lastremain)/lastremain < 0.05)):
234 hours = int(remain/60/60)
235 mins = int(remain/60 - hours*60)
236 secs = int(remain - hours*60*60 - mins*60)
241 kpsstr = '%dk/s' % kps
243 remainstr = '%dh%dm' % (hours, mins)
245 remainstr = '%dm%d' % (mins, secs)
247 remainstr = '%ds' % secs
248 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
249 % (pct, cc/1024, total/1024, fcount, ftotal,
253 indexfile = opt.indexfile or git.repo(b'bupindex')
254 r = index.Reader(indexfile)
256 msr = index.MetaStoreReader(indexfile + b'.meta')
257 except IOError as ex:
258 if ex.errno != EACCES:
260 log('error: cannot access %r; have you run bup index?'
261 % path_msg(indexfile))
263 hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
265 def already_saved(ent):
266 return ent.is_valid() and w.exists(ent.sha) and ent.sha
268 def wantrecurse_pre(ent):
269 return not already_saved(ent)
271 def wantrecurse_during(ent):
272 return not already_saved(ent) or ent.sha_missing()
274 def find_hardlink_target(hlink_db, ent):
275 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
276 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
282 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
283 if not (ftotal % 10024):
284 qprogress('Reading index: %d\r' % ftotal)
285 exists = ent.exists()
286 hashvalid = already_saved(ent)
287 ent.set_sha_missing(not hashvalid)
288 if not opt.smaller or ent.size < opt.smaller:
289 if exists and not hashvalid:
292 progress('Reading index: %d, done.\n' % ftotal)
293 hashsplit.progress_callback = progress_report
295 # Root collisions occur when strip or graft options map more than one
296 # path to the same directory (paths which originally had separate
297 # parents). When that situation is detected, use empty metadata for
298 # the parent. Otherwise, use the metadata for the common parent.
299 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
301 # FIXME: Add collision tests, or handle collisions some other way.
303 # FIXME: Detect/handle strip/graft name collisions (other than root),
304 # i.e. if '/foo/bar' and '/bar' both map to '/'.
307 root_collision = None
309 count = subcount = fcount = 0
312 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
313 (dir, file) = os.path.split(ent.name)
314 exists = (ent.flags & index.IX_EXISTS)
315 hashvalid = already_saved(ent)
316 wasmissing = ent.sha_missing()
322 if ent.sha == index.EMPTY_SHA:
329 log('%s %-70s\n' % (status, path_msg(ent.name)))
330 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
331 if not lastdir.startswith(dir):
332 log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
341 if opt.smaller and ent.size >= opt.smaller:
342 if exists and not hashvalid:
344 log('skipping large file "%s"\n' % path_msg(ent.name))
345 lastskip_name = ent.name
348 assert(dir.startswith(b'/'))
350 dirp = stripped_path_components(dir, extra)
352 dirp = stripped_path_components(dir, [opt.strip_path])
354 dirp = grafted_path_components(graft_points, dir)
356 dirp = path_components(dir)
358 # At this point, dirp contains a representation of the archive
359 # path that looks like [(archive_dir_name, real_fs_path), ...].
360 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
361 # might look like this at some point:
362 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
364 # This dual representation supports stripping/grafting, where the
365 # archive path may not have a direct correspondence with the
366 # filesystem. The root directory is represented by an initial
367 # component named '', and any component that doesn't have a
368 # corresponding filesystem directory (due to grafting, for
369 # example) will have a real_fs_path of None, i.e. [('', None),
372 if first_root == None:
374 elif first_root != dirp[0]:
375 root_collision = True
377 # If switching to a new sub-tree, finish the current sub-tree.
378 while parts > [x[0] for x in dirp]:
379 _pop(force_tree = None)
381 # If switching to a new sub-tree, start a new sub-tree.
382 for path_component in dirp[len(parts):]:
383 dir_name, fs_path = path_component
384 # Not indexed, so just grab the FS metadata or use empty metadata.
386 meta = metadata.from_path(fs_path, normalized=True) \
387 if fs_path else metadata.Metadata()
388 except (OSError, IOError) as e:
390 lastskip_name = dir_name
391 meta = metadata.Metadata()
392 _push(dir_name, meta)
396 continue # We're at the top level -- keep the current root dir
397 # Since there's no filename, this is a subdir -- finish it.
398 oldtree = already_saved(ent) # may be None
399 newtree = _pop(force_tree = oldtree)
401 if lastskip_name and lastskip_name.startswith(ent.name):
404 ent.validate(GIT_MODE_TREE, newtree)
406 if exists and wasmissing:
410 # it's not a directory
413 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
414 git_info = (ent.gitmode, git_name, id)
415 shalists[-1].append(git_info)
416 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
417 meta = msr.metadata_at(ent.meta_ofs)
418 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
419 # Restore the times that were cleared to 0 in the metastore.
420 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
421 metalists[-1].append((sort_key, meta))
424 if stat.S_ISREG(ent.mode):
426 with hashsplit.open_noatime(ent.name) as f:
427 (mode, id) = hashsplit.split_to_blob_or_tree(
428 w.new_blob, w.new_tree, [f],
429 keep_boundaries=False)
430 except (IOError, OSError) as e:
431 add_error('%s: %s' % (ent.name, e))
432 lastskip_name = ent.name
433 elif stat.S_ISDIR(ent.mode):
434 assert(0) # handled above
435 elif stat.S_ISLNK(ent.mode):
437 rl = os.readlink(ent.name)
438 except (OSError, IOError) as e:
440 lastskip_name = ent.name
442 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
444 # Everything else should be fully described by its
445 # metadata, so just record an empty blob, so the paths
446 # in the tree and .bupm will match up.
447 (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))
450 ent.validate(mode, id)
452 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
453 git_info = (mode, git_name, id)
454 shalists[-1].append(git_info)
455 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
456 hlink = find_hardlink_target(hlink_db, ent)
458 meta = metadata.from_path(ent.name, hardlink_target=hlink,
460 except (OSError, IOError) as e:
462 lastskip_name = ent.name
463 meta = metadata.Metadata()
464 metalists[-1].append((sort_key, meta))
466 if exists and wasmissing:
472 pct = total and count*100.0/total or 100
473 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
474 % (pct, count/1024, total/1024, fcount, ftotal))
476 while len(parts) > 1: # _pop() all the parts above the root
477 _pop(force_tree = None)
478 assert(len(shalists) == 1)
479 assert(len(metalists) == 1)
481 # Finish the root directory.
482 tree = _pop(force_tree = None,
483 # When there's a collision, use empty metadata for the root.
484 dir_metadata = metadata.Metadata() if root_collision else None)
487 out = byte_stream(sys.stdout)
490 out.write(hexlify(tree))
492 if opt.commit or name:
493 if compat.py_maj > 2:
494 # Strip b prefix from python 3 bytes reprs to preserve previous format
495 msgcmd = b'[%s]' % b', '.join([repr(argv_bytes(x))[1:].encode('ascii')
496 for x in compat.argv])
498 msgcmd = repr(compat.argv)
499 msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
500 userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
501 commit = w.new_commit(tree, oldref, userline, date, None,
502 userline, date, None, msg)
504 out.write(hexlify(commit))
508 w.close() # must close before we can update the ref
512 cli.update_ref(refname, commit, oldref)
514 git.update_ref(refname, commit, oldref)
520 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))