3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from binascii import hexlify
10 from errno import EACCES
11 from io import BytesIO
12 import os, sys, stat, time, math
14 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
15 from bup.compat import argv_bytes, environ
16 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
17 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
18 hostname, istty2, log, parse_date_or_fatal, parse_num,
19 path_components, progress, qprogress, resolve_parent,
20 saved_errors, stripped_path_components,
22 from bup.io import byte_stream, path_msg
23 from bup.pwdgrp import userfullname, username
27 bup save [-tc] [-n name] <filenames...>
29 r,remote= hostname:/path/to/repo of remote repository
30 t,tree output a tree id
31 c,commit output a commit id
32 n,name= name of backup set to update (if any)
33 d,date= date for the commit (seconds since the epoch)
34 v,verbose increase log output (can be used more than once)
35 q,quiet don't show progress meter
36 smaller= only back up files smaller than n bytes
37 bwlimit= maximum bytes/sec to transmit to server
38 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
39 strip strips the path to every filename given
40 strip-path= path-prefix to be stripped when saving
41 graft= a graft point *old_path*=*new_path* (can be used more than once)
42 #,compress= set compression level to # (0-9, 9 is highest) [1]
44 o = options.Options(optspec)
45 (opt, flags, extra) = o.parse(sys.argv[1:])
48 opt.indexfile = argv_bytes(opt.indexfile)
50 opt.name = argv_bytes(opt.name)
52 opt.remote = argv_bytes(opt.remote)
54 opt.strip_path = argv_bytes(opt.strip_path)
56 git.check_repo_or_die()
57 if not (opt.tree or opt.commit or opt.name):
58 o.fatal("use one or more of -t, -c, -n")
60 o.fatal("no filenames given")
62 extra = [argv_bytes(x) for x in extra]
64 opt.progress = (istty2 and not opt.quiet)
65 opt.smaller = parse_num(opt.smaller or 0)
67 client.bwlimit = parse_num(opt.bwlimit)
70 date = parse_date_or_fatal(opt.date, o.fatal)
74 if opt.strip and opt.strip_path:
75 o.fatal("--strip is incompatible with --strip-path")
80 o.fatal("--strip is incompatible with --graft")
83 o.fatal("--strip-path is incompatible with --graft")
85 for (option, parameter) in flags:
86 if option == "--graft":
87 parameter = argv_bytes(parameter)
88 splitted_parameter = parameter.split(b'=')
89 if len(splitted_parameter) != 2:
90 o.fatal("a graft point must be of the form old_path=new_path")
91 old_path, new_path = splitted_parameter
92 if not (old_path and new_path):
93 o.fatal("a graft point cannot be empty")
94 graft_points.append((resolve_parent(old_path),
95 resolve_parent(new_path)))
97 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
98 if is_reverse and opt.remote:
99 o.fatal("don't use -r in reverse mode; it's automatic")
102 if name and not valid_save_name(name):
103 o.fatal("'%s' is not a valid branch name" % path_msg(name))
104 refname = name and b'refs/heads/%s' % name or None
105 if opt.remote or is_reverse:
107 cli = client.Client(opt.remote)
108 except client.ClientError as e:
111 oldref = refname and cli.read_ref(refname) or None
112 w = cli.new_packwriter(compression_level=opt.compress)
115 oldref = refname and git.read_ref(refname) or None
116 w = git.PackWriter(compression_level=opt.compress)
121 # Metadata is stored in a file named .bupm in each directory. The
122 # first metadata entry will be the metadata for the current directory.
123 # The remaining entries will be for each of the other directory
124 # elements, in the order they're listed in the index.
126 # Since the git tree elements are sorted according to
127 # git.shalist_item_sort_key, the metalist items are accumulated as
128 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
129 # created. The sort_key should have been computed using the element's
130 # mangled name and git mode (after hashsplitting), but the code isn't
131 # actually doing that but rather uses the element's real name and mode.
132 # This makes things a bit more difficult when reading it back, see
133 # vfs.ordered_tree_entries().
135 # Maintain a stack of information representing the current location in
136 # the archive being constructed. The current path is recorded in
137 # parts, which will be something like ['', 'home', 'someuser'], and
138 # the accumulated content and metadata for of the dirs in parts is
139 # stored in parallel stacks in shalists and metalists.
141 parts = [] # Current archive position (stack of dir names).
142 shalists = [] # Hashes for each dir in paths.
143 metalists = [] # Metadata for each dir in paths.
146 def _push(part, metadata):
147 # Enter a new archive directory -- make it the current directory.
150 metalists.append([(b'', metadata)]) # This dir's metadata (no name).
153 def _pop(force_tree, dir_metadata=None):
154 # Leave the current archive directory and add its tree to its parent.
155 assert(len(parts) >= 1)
157 shalist = shalists.pop()
158 metalist = metalists.pop()
159 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
165 metaidx = 1 # entry at 0 is for the dir
168 if name in names_seen:
169 parent_path = b'/'.join(parts) + b'/'
170 add_error('error: ignoring duplicate path %s in %s'
171 % (path_msg(name), path_msg(parent_path)))
172 if not stat.S_ISDIR(x[0]):
173 del metalist[metaidx]
177 if not stat.S_ISDIR(x[0]):
181 if dir_metadata: # Override the original metadata pushed for this dir.
182 metalist = [(b'', dir_metadata)] + metalist[1:]
183 sorted_metalist = sorted(metalist, key = lambda x : x[0])
184 metadata = b''.join([m[1].encode() for m in sorted_metalist])
185 metadata_f = BytesIO(metadata)
186 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
188 keep_boundaries=False)
189 clean_list.append((mode, b'.bupm', id))
190 tree = w.new_tree(clean_list)
192 shalists[-1].append((GIT_MODE_TREE,
193 git.mangle_name(part,
194 GIT_MODE_TREE, GIT_MODE_TREE),
200 def progress_report(n):
201 global count, subcount, lastremain
203 cc = count + subcount
204 pct = total and (cc*100.0/total) or 0
206 elapsed = now - tstart
207 kps = elapsed and int(cc/1024./elapsed)
208 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
209 kps = int(kps/kps_frac)*kps_frac
211 remain = elapsed*1.0/cc * (total-cc)
214 if (lastremain and (remain > lastremain)
215 and ((remain - lastremain)/lastremain < 0.05)):
219 hours = int(remain/60/60)
220 mins = int(remain/60 - hours*60)
221 secs = int(remain - hours*60*60 - mins*60)
226 kpsstr = '%dk/s' % kps
228 remainstr = '%dh%dm' % (hours, mins)
230 remainstr = '%dm%d' % (mins, secs)
232 remainstr = '%ds' % secs
233 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
234 % (pct, cc/1024, total/1024, fcount, ftotal,
238 indexfile = opt.indexfile or git.repo(b'bupindex')
239 r = index.Reader(indexfile)
241 msr = index.MetaStoreReader(indexfile + b'.meta')
242 except IOError as ex:
243 if ex.errno != EACCES:
245 log('error: cannot access %r; have you run bup index?'
246 % path_msg(indexfile))
248 hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
250 def already_saved(ent):
251 return ent.is_valid() and w.exists(ent.sha) and ent.sha
253 def wantrecurse_pre(ent):
254 return not already_saved(ent)
256 def wantrecurse_during(ent):
257 return not already_saved(ent) or ent.sha_missing()
259 def find_hardlink_target(hlink_db, ent):
260 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
261 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
267 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
268 if not (ftotal % 10024):
269 qprogress('Reading index: %d\r' % ftotal)
270 exists = ent.exists()
271 hashvalid = already_saved(ent)
272 ent.set_sha_missing(not hashvalid)
273 if not opt.smaller or ent.size < opt.smaller:
274 if exists and not hashvalid:
277 progress('Reading index: %d, done.\n' % ftotal)
278 hashsplit.progress_callback = progress_report
280 # Root collisions occur when strip or graft options map more than one
281 # path to the same directory (paths which originally had separate
282 # parents). When that situation is detected, use empty metadata for
283 # the parent. Otherwise, use the metadata for the common parent.
284 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
286 # FIXME: Add collision tests, or handle collisions some other way.
288 # FIXME: Detect/handle strip/graft name collisions (other than root),
289 # i.e. if '/foo/bar' and '/bar' both map to '/'.
292 root_collision = None
294 count = subcount = fcount = 0
297 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
298 (dir, file) = os.path.split(ent.name)
299 exists = (ent.flags & index.IX_EXISTS)
300 hashvalid = already_saved(ent)
301 wasmissing = ent.sha_missing()
307 if ent.sha == index.EMPTY_SHA:
314 log('%s %-70s\n' % (status, path_msg(ent.name)))
315 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
316 if not lastdir.startswith(dir):
317 log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
326 if opt.smaller and ent.size >= opt.smaller:
327 if exists and not hashvalid:
329 log('skipping large file "%s"\n' % path_msg(ent.name))
330 lastskip_name = ent.name
333 assert(dir.startswith(b'/'))
335 dirp = stripped_path_components(dir, extra)
337 dirp = stripped_path_components(dir, [opt.strip_path])
339 dirp = grafted_path_components(graft_points, dir)
341 dirp = path_components(dir)
343 # At this point, dirp contains a representation of the archive
344 # path that looks like [(archive_dir_name, real_fs_path), ...].
345 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
346 # might look like this at some point:
347 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
349 # This dual representation supports stripping/grafting, where the
350 # archive path may not have a direct correspondence with the
351 # filesystem. The root directory is represented by an initial
352 # component named '', and any component that doesn't have a
353 # corresponding filesystem directory (due to grafting, for
354 # example) will have a real_fs_path of None, i.e. [('', None),
357 if first_root == None:
359 elif first_root != dirp[0]:
360 root_collision = True
362 # If switching to a new sub-tree, finish the current sub-tree.
363 while parts > [x[0] for x in dirp]:
364 _pop(force_tree = None)
366 # If switching to a new sub-tree, start a new sub-tree.
367 for path_component in dirp[len(parts):]:
368 dir_name, fs_path = path_component
369 # Not indexed, so just grab the FS metadata or use empty metadata.
371 meta = metadata.from_path(fs_path, normalized=True) \
372 if fs_path else metadata.Metadata()
373 except (OSError, IOError) as e:
375 lastskip_name = dir_name
376 meta = metadata.Metadata()
377 _push(dir_name, meta)
381 continue # We're at the top level -- keep the current root dir
382 # Since there's no filename, this is a subdir -- finish it.
383 oldtree = already_saved(ent) # may be None
384 newtree = _pop(force_tree = oldtree)
386 if lastskip_name and lastskip_name.startswith(ent.name):
389 ent.validate(GIT_MODE_TREE, newtree)
391 if exists and wasmissing:
395 # it's not a directory
398 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
399 git_info = (ent.gitmode, git_name, id)
400 shalists[-1].append(git_info)
401 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
402 meta = msr.metadata_at(ent.meta_ofs)
403 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
404 # Restore the times that were cleared to 0 in the metastore.
405 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
406 metalists[-1].append((sort_key, meta))
409 if stat.S_ISREG(ent.mode):
411 f = hashsplit.open_noatime(ent.name)
412 except (IOError, OSError) as e:
414 lastskip_name = ent.name
417 (mode, id) = hashsplit.split_to_blob_or_tree(
418 w.new_blob, w.new_tree, [f],
419 keep_boundaries=False)
420 except (IOError, OSError) as e:
421 add_error('%s: %s' % (ent.name, e))
422 lastskip_name = ent.name
423 elif stat.S_ISDIR(ent.mode):
424 assert(0) # handled above
425 elif stat.S_ISLNK(ent.mode):
427 rl = os.readlink(ent.name)
428 except (OSError, IOError) as e:
430 lastskip_name = ent.name
432 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
434 # Everything else should be fully described by its
435 # metadata, so just record an empty blob, so the paths
436 # in the tree and .bupm will match up.
437 (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))
440 ent.validate(mode, id)
442 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
443 git_info = (mode, git_name, id)
444 shalists[-1].append(git_info)
445 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
446 hlink = find_hardlink_target(hlink_db, ent)
448 meta = metadata.from_path(ent.name, hardlink_target=hlink,
450 except (OSError, IOError) as e:
452 lastskip_name = ent.name
453 meta = metadata.Metadata()
454 metalists[-1].append((sort_key, meta))
456 if exists and wasmissing:
462 pct = total and count*100.0/total or 100
463 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
464 % (pct, count/1024, total/1024, fcount, ftotal))
466 while len(parts) > 1: # _pop() all the parts above the root
467 _pop(force_tree = None)
468 assert(len(shalists) == 1)
469 assert(len(metalists) == 1)
471 # Finish the root directory.
472 tree = _pop(force_tree = None,
473 # When there's a collision, use empty metadata for the root.
474 dir_metadata = metadata.Metadata() if root_collision else None)
477 out = byte_stream(sys.stdout)
480 out.write(hexlify(tree))
482 if opt.commit or name:
483 msg = (b'bup save\n\nGenerated by command:\n%r\n'
484 % [argv_bytes(x) for x in sys.argv])
485 userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
486 commit = w.new_commit(tree, oldref, userline, date, None,
487 userline, date, None, msg)
489 out.write(hexlify(commit))
493 w.close() # must close before we can update the ref
497 cli.update_ref(refname, commit, oldref)
499 git.update_ref(refname, commit, oldref)
505 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))