3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/bup-python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, print_function
18 from binascii import hexlify
19 from errno import EACCES
20 from io import BytesIO
21 import os, sys, stat, time, math
23 from bup import compat, hashsplit, git, options, index, client, metadata
24 from bup import hlinkdb
25 from bup.compat import argv_bytes, environ
26 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
27 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
28 hostname, istty2, log, parse_date_or_fatal, parse_num,
29 path_components, progress, qprogress, resolve_parent,
30 saved_errors, stripped_path_components,
32 from bup.io import byte_stream, path_msg
33 from bup.pwdgrp import userfullname, username
37 bup save [-tc] [-n name] <filenames...>
39 r,remote= hostname:/path/to/repo of remote repository
40 t,tree output a tree id
41 c,commit output a commit id
42 n,name= name of backup set to update (if any)
43 d,date= date for the commit (seconds since the epoch)
44 v,verbose increase log output (can be used more than once)
45 q,quiet don't show progress meter
46 smaller= only back up files smaller than n bytes
47 bwlimit= maximum bytes/sec to transmit to server
48 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
49 strip strips the path to every filename given
50 strip-path= path-prefix to be stripped when saving
51 graft= a graft point *old_path*=*new_path* (can be used more than once)
52 #,compress= set compression level to # (0-9, 9 is highest) [1]
54 o = options.Options(optspec)
55 opt, flags, extra = o.parse(compat.argv[1:])
58 opt.indexfile = argv_bytes(opt.indexfile)
60 opt.name = argv_bytes(opt.name)
62 opt.remote = argv_bytes(opt.remote)
64 opt.strip_path = argv_bytes(opt.strip_path)
66 git.check_repo_or_die()
67 if not (opt.tree or opt.commit or opt.name):
68 o.fatal("use one or more of -t, -c, -n")
70 o.fatal("no filenames given")
72 extra = [argv_bytes(x) for x in extra]
74 opt.progress = (istty2 and not opt.quiet)
75 opt.smaller = parse_num(opt.smaller or 0)
77 client.bwlimit = parse_num(opt.bwlimit)
80 date = parse_date_or_fatal(opt.date, o.fatal)
84 if opt.strip and opt.strip_path:
85 o.fatal("--strip is incompatible with --strip-path")
90 o.fatal("--strip is incompatible with --graft")
93 o.fatal("--strip-path is incompatible with --graft")
95 for (option, parameter) in flags:
96 if option == "--graft":
97 parameter = argv_bytes(parameter)
98 splitted_parameter = parameter.split(b'=')
99 if len(splitted_parameter) != 2:
100 o.fatal("a graft point must be of the form old_path=new_path")
101 old_path, new_path = splitted_parameter
102 if not (old_path and new_path):
103 o.fatal("a graft point cannot be empty")
104 graft_points.append((resolve_parent(old_path),
105 resolve_parent(new_path)))
107 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
108 if is_reverse and opt.remote:
109 o.fatal("don't use -r in reverse mode; it's automatic")
112 if name and not valid_save_name(name):
113 o.fatal("'%s' is not a valid branch name" % path_msg(name))
114 refname = name and b'refs/heads/%s' % name or None
115 if opt.remote or is_reverse:
117 cli = client.Client(opt.remote)
118 except client.ClientError as e:
121 oldref = refname and cli.read_ref(refname) or None
122 w = cli.new_packwriter(compression_level=opt.compress)
125 oldref = refname and git.read_ref(refname) or None
126 w = git.PackWriter(compression_level=opt.compress)
131 # Metadata is stored in a file named .bupm in each directory. The
132 # first metadata entry will be the metadata for the current directory.
133 # The remaining entries will be for each of the other directory
134 # elements, in the order they're listed in the index.
136 # Since the git tree elements are sorted according to
137 # git.shalist_item_sort_key, the metalist items are accumulated as
138 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
139 # created. The sort_key should have been computed using the element's
140 # mangled name and git mode (after hashsplitting), but the code isn't
141 # actually doing that but rather uses the element's real name and mode.
142 # This makes things a bit more difficult when reading it back, see
143 # vfs.ordered_tree_entries().
145 # Maintain a stack of information representing the current location in
146 # the archive being constructed. The current path is recorded in
147 # parts, which will be something like ['', 'home', 'someuser'], and
148 # the accumulated content and metadata for of the dirs in parts is
149 # stored in parallel stacks in shalists and metalists.
151 parts = [] # Current archive position (stack of dir names).
152 shalists = [] # Hashes for each dir in paths.
153 metalists = [] # Metadata for each dir in paths.
156 def _push(part, metadata):
157 # Enter a new archive directory -- make it the current directory.
160 metalists.append([(b'', metadata)]) # This dir's metadata (no name).
163 def _pop(force_tree, dir_metadata=None):
164 # Leave the current archive directory and add its tree to its parent.
165 assert(len(parts) >= 1)
167 shalist = shalists.pop()
168 metalist = metalists.pop()
169 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
175 metaidx = 1 # entry at 0 is for the dir
178 if name in names_seen:
179 parent_path = b'/'.join(parts) + b'/'
180 add_error('error: ignoring duplicate path %s in %s'
181 % (path_msg(name), path_msg(parent_path)))
182 if not stat.S_ISDIR(x[0]):
183 del metalist[metaidx]
187 if not stat.S_ISDIR(x[0]):
190 if dir_metadata: # Override the original metadata pushed for this dir.
191 metalist = [(b'', dir_metadata)] + metalist[1:]
192 sorted_metalist = sorted(metalist, key = lambda x : x[0])
193 metadata = b''.join([m[1].encode() for m in sorted_metalist])
194 metadata_f = BytesIO(metadata)
195 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
197 keep_boundaries=False)
198 clean_list.append((mode, b'.bupm', id))
200 tree = w.new_tree(clean_list)
202 shalists[-1].append((GIT_MODE_TREE,
203 git.mangle_name(part,
204 GIT_MODE_TREE, GIT_MODE_TREE),
210 def progress_report(n):
211 global count, subcount, lastremain
213 cc = count + subcount
214 pct = total and (cc*100.0/total) or 0
216 elapsed = now - tstart
217 kps = elapsed and int(cc/1024./elapsed)
218 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
219 kps = int(kps/kps_frac)*kps_frac
221 remain = elapsed*1.0/cc * (total-cc)
224 if (lastremain and (remain > lastremain)
225 and ((remain - lastremain)/lastremain < 0.05)):
229 hours = int(remain/60/60)
230 mins = int(remain/60 - hours*60)
231 secs = int(remain - hours*60*60 - mins*60)
236 kpsstr = '%dk/s' % kps
238 remainstr = '%dh%dm' % (hours, mins)
240 remainstr = '%dm%d' % (mins, secs)
242 remainstr = '%ds' % secs
243 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
244 % (pct, cc/1024, total/1024, fcount, ftotal,
248 indexfile = opt.indexfile or git.repo(b'bupindex')
249 r = index.Reader(indexfile)
251 msr = index.MetaStoreReader(indexfile + b'.meta')
252 except IOError as ex:
253 if ex.errno != EACCES:
255 log('error: cannot access %r; have you run bup index?'
256 % path_msg(indexfile))
258 hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
260 def already_saved(ent):
261 return ent.is_valid() and w.exists(ent.sha) and ent.sha
263 def wantrecurse_pre(ent):
264 return not already_saved(ent)
266 def wantrecurse_during(ent):
267 return not already_saved(ent) or ent.sha_missing()
269 def find_hardlink_target(hlink_db, ent):
270 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
271 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
277 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
278 if not (ftotal % 10024):
279 qprogress('Reading index: %d\r' % ftotal)
280 exists = ent.exists()
281 hashvalid = already_saved(ent)
282 ent.set_sha_missing(not hashvalid)
283 if not opt.smaller or ent.size < opt.smaller:
284 if exists and not hashvalid:
287 progress('Reading index: %d, done.\n' % ftotal)
288 hashsplit.progress_callback = progress_report
290 # Root collisions occur when strip or graft options map more than one
291 # path to the same directory (paths which originally had separate
292 # parents). When that situation is detected, use empty metadata for
293 # the parent. Otherwise, use the metadata for the common parent.
294 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
296 # FIXME: Add collision tests, or handle collisions some other way.
298 # FIXME: Detect/handle strip/graft name collisions (other than root),
299 # i.e. if '/foo/bar' and '/bar' both map to '/'.
302 root_collision = None
304 count = subcount = fcount = 0
307 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
308 (dir, file) = os.path.split(ent.name)
309 exists = (ent.flags & index.IX_EXISTS)
310 hashvalid = already_saved(ent)
311 wasmissing = ent.sha_missing()
317 if ent.sha == index.EMPTY_SHA:
324 log('%s %-70s\n' % (status, path_msg(ent.name)))
325 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
326 if not lastdir.startswith(dir):
327 log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
336 if opt.smaller and ent.size >= opt.smaller:
337 if exists and not hashvalid:
339 log('skipping large file "%s"\n' % path_msg(ent.name))
340 lastskip_name = ent.name
343 assert(dir.startswith(b'/'))
345 dirp = stripped_path_components(dir, extra)
347 dirp = stripped_path_components(dir, [opt.strip_path])
349 dirp = grafted_path_components(graft_points, dir)
351 dirp = path_components(dir)
353 # At this point, dirp contains a representation of the archive
354 # path that looks like [(archive_dir_name, real_fs_path), ...].
355 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
356 # might look like this at some point:
357 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
359 # This dual representation supports stripping/grafting, where the
360 # archive path may not have a direct correspondence with the
361 # filesystem. The root directory is represented by an initial
362 # component named '', and any component that doesn't have a
363 # corresponding filesystem directory (due to grafting, for
364 # example) will have a real_fs_path of None, i.e. [('', None),
367 if first_root == None:
369 elif first_root != dirp[0]:
370 root_collision = True
372 # If switching to a new sub-tree, finish the current sub-tree.
373 while parts > [x[0] for x in dirp]:
374 _pop(force_tree = None)
376 # If switching to a new sub-tree, start a new sub-tree.
377 for path_component in dirp[len(parts):]:
378 dir_name, fs_path = path_component
379 # Not indexed, so just grab the FS metadata or use empty metadata.
381 meta = metadata.from_path(fs_path, normalized=True) \
382 if fs_path else metadata.Metadata()
383 except (OSError, IOError) as e:
385 lastskip_name = dir_name
386 meta = metadata.Metadata()
387 _push(dir_name, meta)
391 continue # We're at the top level -- keep the current root dir
392 # Since there's no filename, this is a subdir -- finish it.
393 oldtree = already_saved(ent) # may be None
394 newtree = _pop(force_tree = oldtree)
396 if lastskip_name and lastskip_name.startswith(ent.name):
399 ent.validate(GIT_MODE_TREE, newtree)
401 if exists and wasmissing:
405 # it's not a directory
408 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
409 git_info = (ent.gitmode, git_name, id)
410 shalists[-1].append(git_info)
411 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
412 meta = msr.metadata_at(ent.meta_ofs)
413 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
414 # Restore the times that were cleared to 0 in the metastore.
415 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
416 metalists[-1].append((sort_key, meta))
419 if stat.S_ISREG(ent.mode):
421 with hashsplit.open_noatime(ent.name) as f:
422 (mode, id) = hashsplit.split_to_blob_or_tree(
423 w.new_blob, w.new_tree, [f],
424 keep_boundaries=False)
425 except (IOError, OSError) as e:
426 add_error('%s: %s' % (ent.name, e))
427 lastskip_name = ent.name
428 elif stat.S_ISDIR(ent.mode):
429 assert(0) # handled above
430 elif stat.S_ISLNK(ent.mode):
432 rl = os.readlink(ent.name)
433 except (OSError, IOError) as e:
435 lastskip_name = ent.name
437 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
439 # Everything else should be fully described by its
440 # metadata, so just record an empty blob, so the paths
441 # in the tree and .bupm will match up.
442 (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))
445 ent.validate(mode, id)
447 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
448 git_info = (mode, git_name, id)
449 shalists[-1].append(git_info)
450 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
451 hlink = find_hardlink_target(hlink_db, ent)
453 meta = metadata.from_path(ent.name, hardlink_target=hlink,
455 except (OSError, IOError) as e:
457 lastskip_name = ent.name
458 meta = metadata.Metadata()
459 metalists[-1].append((sort_key, meta))
461 if exists and wasmissing:
467 pct = total and count*100.0/total or 100
468 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
469 % (pct, count/1024, total/1024, fcount, ftotal))
471 while len(parts) > 1: # _pop() all the parts above the root
472 _pop(force_tree = None)
473 assert(len(shalists) == 1)
474 assert(len(metalists) == 1)
476 # Finish the root directory.
477 tree = _pop(force_tree = None,
478 # When there's a collision, use empty metadata for the root.
479 dir_metadata = metadata.Metadata() if root_collision else None)
482 out = byte_stream(sys.stdout)
485 out.write(hexlify(tree))
487 if opt.commit or name:
488 msg = (b'bup save\n\nGenerated by command:\n%r\n'
489 % [argv_bytes(x) for x in sys.argv])
490 userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
491 commit = w.new_commit(tree, oldref, userline, date, None,
492 userline, date, None, msg)
494 out.write(hexlify(commit))
498 w.close() # must close before we can update the ref
502 cli.update_ref(refname, commit, oldref)
504 git.update_ref(refname, commit, oldref)
510 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))