3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from binascii import hexlify
10 from errno import EACCES
11 from io import BytesIO
12 import os, sys, stat, time, math
14 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
15 from bup.compat import argv_bytes, environ
16 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
17 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
18 hostname, istty2, log, parse_date_or_fatal, parse_num,
19 path_components, progress, qprogress, resolve_parent,
20 saved_errors, stripped_path_components,
22 from bup.io import byte_stream, path_msg
23 from bup.pwdgrp import userfullname, username
27 bup save [-tc] [-n name] <filenames...>
29 r,remote= hostname:/path/to/repo of remote repository
30 t,tree output a tree id
31 c,commit output a commit id
32 n,name= name of backup set to update (if any)
33 d,date= date for the commit (seconds since the epoch)
34 v,verbose increase log output (can be used more than once)
35 q,quiet don't show progress meter
36 smaller= only back up files smaller than n bytes
37 bwlimit= maximum bytes/sec to transmit to server
38 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
39 strip strips the path to every filename given
40 strip-path= path-prefix to be stripped when saving
41 graft= a graft point *old_path*=*new_path* (can be used more than once)
42 #,compress= set compression level to # (0-9, 9 is highest) [1]
44 o = options.Options(optspec)
45 (opt, flags, extra) = o.parse(sys.argv[1:])
48 opt.indexfile = argv_bytes(opt.indexfile)
50 opt.name = argv_bytes(opt.name)
52 opt.remote = argv_bytes(opt.remote)
54 opt.strip_path = argv_bytes(opt.strip_path)
56 git.check_repo_or_die()
57 if not (opt.tree or opt.commit or opt.name):
58 o.fatal("use one or more of -t, -c, -n")
60 o.fatal("no filenames given")
62 extra = [argv_bytes(x) for x in extra]
64 opt.progress = (istty2 and not opt.quiet)
65 opt.smaller = parse_num(opt.smaller or 0)
67 client.bwlimit = parse_num(opt.bwlimit)
70 date = parse_date_or_fatal(opt.date, o.fatal)
74 if opt.strip and opt.strip_path:
75 o.fatal("--strip is incompatible with --strip-path")
80 o.fatal("--strip is incompatible with --graft")
83 o.fatal("--strip-path is incompatible with --graft")
85 for (option, parameter) in flags:
86 if option == "--graft":
87 parameter = argv_bytes(parameter)
88 splitted_parameter = parameter.split(b'=')
89 if len(splitted_parameter) != 2:
90 o.fatal("a graft point must be of the form old_path=new_path")
91 old_path, new_path = splitted_parameter
92 if not (old_path and new_path):
93 o.fatal("a graft point cannot be empty")
94 graft_points.append((resolve_parent(old_path),
95 resolve_parent(new_path)))
97 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
98 if is_reverse and opt.remote:
99 o.fatal("don't use -r in reverse mode; it's automatic")
102 if name and not valid_save_name(name):
103 o.fatal("'%s' is not a valid branch name" % path_msg(name))
104 refname = name and b'refs/heads/%s' % name or None
105 if opt.remote or is_reverse:
107 cli = client.Client(opt.remote)
108 except client.ClientError as e:
111 oldref = refname and cli.read_ref(refname) or None
112 w = cli.new_packwriter(compression_level=opt.compress)
115 oldref = refname and git.read_ref(refname) or None
116 w = git.PackWriter(compression_level=opt.compress)
121 # Metadata is stored in a file named .bupm in each directory. The
122 # first metadata entry will be the metadata for the current directory.
123 # The remaining entries will be for each of the other directory
124 # elements, in the order they're listed in the index.
126 # Since the git tree elements are sorted according to
127 # git.shalist_item_sort_key, the metalist items are accumulated as
128 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
129 # created. The sort_key must be computed using the element's real
130 # name and mode rather than the git mode and (possibly mangled) name.
132 # Maintain a stack of information representing the current location in
133 # the archive being constructed. The current path is recorded in
134 # parts, which will be something like ['', 'home', 'someuser'], and
135 # the accumulated content and metadata for of the dirs in parts is
136 # stored in parallel stacks in shalists and metalists.
138 parts = [] # Current archive position (stack of dir names).
139 shalists = [] # Hashes for each dir in paths.
140 metalists = [] # Metadata for each dir in paths.
143 def _push(part, metadata):
144 # Enter a new archive directory -- make it the current directory.
147 metalists.append([(b'', metadata)]) # This dir's metadata (no name).
150 def _pop(force_tree, dir_metadata=None):
151 # Leave the current archive directory and add its tree to its parent.
152 assert(len(parts) >= 1)
154 shalist = shalists.pop()
155 metalist = metalists.pop()
156 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
162 metaidx = 1 # entry at 0 is for the dir
165 if name in names_seen:
166 parent_path = b'/'.join(parts) + b'/'
167 add_error('error: ignoring duplicate path %s in %s'
168 % (path_msg(name), path_msg(parent_path)))
169 if not stat.S_ISDIR(x[0]):
170 del metalist[metaidx]
174 if not stat.S_ISDIR(x[0]):
178 if dir_metadata: # Override the original metadata pushed for this dir.
179 metalist = [(b'', dir_metadata)] + metalist[1:]
180 sorted_metalist = sorted(metalist, key = lambda x : x[0])
181 metadata = b''.join([m[1].encode() for m in sorted_metalist])
182 metadata_f = BytesIO(metadata)
183 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
185 keep_boundaries=False)
186 clean_list.append((mode, b'.bupm', id))
187 tree = w.new_tree(clean_list)
189 shalists[-1].append((GIT_MODE_TREE,
190 git.mangle_name(part,
191 GIT_MODE_TREE, GIT_MODE_TREE),
197 def progress_report(n):
198 global count, subcount, lastremain
200 cc = count + subcount
201 pct = total and (cc*100.0/total) or 0
203 elapsed = now - tstart
204 kps = elapsed and int(cc/1024./elapsed)
205 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
206 kps = int(kps/kps_frac)*kps_frac
208 remain = elapsed*1.0/cc * (total-cc)
211 if (lastremain and (remain > lastremain)
212 and ((remain - lastremain)/lastremain < 0.05)):
216 hours = int(remain/60/60)
217 mins = int(remain/60 - hours*60)
218 secs = int(remain - hours*60*60 - mins*60)
223 kpsstr = '%dk/s' % kps
225 remainstr = '%dh%dm' % (hours, mins)
227 remainstr = '%dm%d' % (mins, secs)
229 remainstr = '%ds' % secs
230 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
231 % (pct, cc/1024, total/1024, fcount, ftotal,
235 indexfile = opt.indexfile or git.repo(b'bupindex')
236 r = index.Reader(indexfile)
238 msr = index.MetaStoreReader(indexfile + b'.meta')
239 except IOError as ex:
240 if ex.errno != EACCES:
242 log('error: cannot access %r; have you run bup index?'
243 % path_msg(indexfile))
245 hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
247 def already_saved(ent):
248 return ent.is_valid() and w.exists(ent.sha) and ent.sha
250 def wantrecurse_pre(ent):
251 return not already_saved(ent)
253 def wantrecurse_during(ent):
254 return not already_saved(ent) or ent.sha_missing()
256 def find_hardlink_target(hlink_db, ent):
257 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
258 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
264 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
265 if not (ftotal % 10024):
266 qprogress('Reading index: %d\r' % ftotal)
267 exists = ent.exists()
268 hashvalid = already_saved(ent)
269 ent.set_sha_missing(not hashvalid)
270 if not opt.smaller or ent.size < opt.smaller:
271 if exists and not hashvalid:
274 progress('Reading index: %d, done.\n' % ftotal)
275 hashsplit.progress_callback = progress_report
277 # Root collisions occur when strip or graft options map more than one
278 # path to the same directory (paths which originally had separate
279 # parents). When that situation is detected, use empty metadata for
280 # the parent. Otherwise, use the metadata for the common parent.
281 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
283 # FIXME: Add collision tests, or handle collisions some other way.
285 # FIXME: Detect/handle strip/graft name collisions (other than root),
286 # i.e. if '/foo/bar' and '/bar' both map to '/'.
289 root_collision = None
291 count = subcount = fcount = 0
294 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
295 (dir, file) = os.path.split(ent.name)
296 exists = (ent.flags & index.IX_EXISTS)
297 hashvalid = already_saved(ent)
298 wasmissing = ent.sha_missing()
304 if ent.sha == index.EMPTY_SHA:
311 log('%s %-70s\n' % (status, path_msg(ent.name)))
312 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
313 if not lastdir.startswith(dir):
314 log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
323 if opt.smaller and ent.size >= opt.smaller:
324 if exists and not hashvalid:
326 log('skipping large file "%s"\n' % path_msg(ent.name))
327 lastskip_name = ent.name
330 assert(dir.startswith(b'/'))
332 dirp = stripped_path_components(dir, extra)
334 dirp = stripped_path_components(dir, [opt.strip_path])
336 dirp = grafted_path_components(graft_points, dir)
338 dirp = path_components(dir)
340 # At this point, dirp contains a representation of the archive
341 # path that looks like [(archive_dir_name, real_fs_path), ...].
342 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
343 # might look like this at some point:
344 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
346 # This dual representation supports stripping/grafting, where the
347 # archive path may not have a direct correspondence with the
348 # filesystem. The root directory is represented by an initial
349 # component named '', and any component that doesn't have a
350 # corresponding filesystem directory (due to grafting, for
351 # example) will have a real_fs_path of None, i.e. [('', None),
354 if first_root == None:
356 elif first_root != dirp[0]:
357 root_collision = True
359 # If switching to a new sub-tree, finish the current sub-tree.
360 while parts > [x[0] for x in dirp]:
361 _pop(force_tree = None)
363 # If switching to a new sub-tree, start a new sub-tree.
364 for path_component in dirp[len(parts):]:
365 dir_name, fs_path = path_component
366 # Not indexed, so just grab the FS metadata or use empty metadata.
368 meta = metadata.from_path(fs_path, normalized=True) \
369 if fs_path else metadata.Metadata()
370 except (OSError, IOError) as e:
372 lastskip_name = dir_name
373 meta = metadata.Metadata()
374 _push(dir_name, meta)
378 continue # We're at the top level -- keep the current root dir
379 # Since there's no filename, this is a subdir -- finish it.
380 oldtree = already_saved(ent) # may be None
381 newtree = _pop(force_tree = oldtree)
383 if lastskip_name and lastskip_name.startswith(ent.name):
386 ent.validate(GIT_MODE_TREE, newtree)
388 if exists and wasmissing:
392 # it's not a directory
395 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
396 git_info = (ent.gitmode, git_name, id)
397 shalists[-1].append(git_info)
398 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
399 meta = msr.metadata_at(ent.meta_ofs)
400 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
401 # Restore the times that were cleared to 0 in the metastore.
402 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
403 metalists[-1].append((sort_key, meta))
406 if stat.S_ISREG(ent.mode):
408 f = hashsplit.open_noatime(ent.name)
409 except (IOError, OSError) as e:
411 lastskip_name = ent.name
414 (mode, id) = hashsplit.split_to_blob_or_tree(
415 w.new_blob, w.new_tree, [f],
416 keep_boundaries=False)
417 except (IOError, OSError) as e:
418 add_error('%s: %s' % (ent.name, e))
419 lastskip_name = ent.name
420 elif stat.S_ISDIR(ent.mode):
421 assert(0) # handled above
422 elif stat.S_ISLNK(ent.mode):
424 rl = os.readlink(ent.name)
425 except (OSError, IOError) as e:
427 lastskip_name = ent.name
429 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
431 # Everything else should be fully described by its
432 # metadata, so just record an empty blob, so the paths
433 # in the tree and .bupm will match up.
434 (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))
437 ent.validate(mode, id)
439 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
440 git_info = (mode, git_name, id)
441 shalists[-1].append(git_info)
442 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
443 hlink = find_hardlink_target(hlink_db, ent)
445 meta = metadata.from_path(ent.name, hardlink_target=hlink,
447 except (OSError, IOError) as e:
449 lastskip_name = ent.name
450 meta = metadata.Metadata()
451 metalists[-1].append((sort_key, meta))
453 if exists and wasmissing:
459 pct = total and count*100.0/total or 100
460 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
461 % (pct, count/1024, total/1024, fcount, ftotal))
463 while len(parts) > 1: # _pop() all the parts above the root
464 _pop(force_tree = None)
465 assert(len(shalists) == 1)
466 assert(len(metalists) == 1)
468 # Finish the root directory.
469 tree = _pop(force_tree = None,
470 # When there's a collision, use empty metadata for the root.
471 dir_metadata = metadata.Metadata() if root_collision else None)
474 out = byte_stream(sys.stdout)
477 out.write(hexlify(tree))
479 if opt.commit or name:
480 msg = (b'bup save\n\nGenerated by command:\n%r\n'
481 % [argv_bytes(x) for x in sys.argv])
482 userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
483 commit = w.new_commit(tree, oldref, userline, date, None,
484 userline, date, None, msg)
486 out.write(hexlify(commit))
490 w.close() # must close before we can update the ref
494 cli.update_ref(refname, commit, oldref)
496 git.update_ref(refname, commit, oldref)
502 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))