3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from binascii import hexlify
10 from errno import EACCES
11 from io import BytesIO
12 import os, sys, stat, time, math
14 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
15 from bup.compat import argv_bytes, environ
16 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
17 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
18 hostname, istty2, log, parse_date_or_fatal, parse_num,
19 path_components, progress, qprogress, resolve_parent,
20 saved_errors, stripped_path_components,
22 from bup.io import byte_stream, path_msg
23 from bup.pwdgrp import userfullname, username
27 bup save [-tc] [-n name] <filenames...>
29 r,remote= hostname:/path/to/repo of remote repository
30 t,tree output a tree id
31 c,commit output a commit id
32 n,name= name of backup set to update (if any)
33 d,date= date for the commit (seconds since the epoch)
34 v,verbose increase log output (can be used more than once)
35 q,quiet don't show progress meter
36 smaller= only back up files smaller than n bytes
37 bwlimit= maximum bytes/sec to transmit to server
38 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
39 strip strips the path to every filename given
40 strip-path= path-prefix to be stripped when saving
41 graft= a graft point *old_path*=*new_path* (can be used more than once)
42 #,compress= set compression level to # (0-9, 9 is highest) [1]
44 o = options.Options(optspec)
45 (opt, flags, extra) = o.parse(sys.argv[1:])
48 opt.indexfile = argv_bytes(opt.indexfile)
50 opt.name = argv_bytes(opt.name)
52 opt.remote = argv_bytes(opt.remote)
54 opt.strip_path = argv_bytes(opt.strip_path)
56 git.check_repo_or_die()
57 if not (opt.tree or opt.commit or opt.name):
58 o.fatal("use one or more of -t, -c, -n")
60 o.fatal("no filenames given")
62 extra = [argv_bytes(x) for x in extra]
64 opt.progress = (istty2 and not opt.quiet)
65 opt.smaller = parse_num(opt.smaller or 0)
67 client.bwlimit = parse_num(opt.bwlimit)
70 date = parse_date_or_fatal(opt.date, o.fatal)
74 if opt.strip and opt.strip_path:
75 o.fatal("--strip is incompatible with --strip-path")
80 o.fatal("--strip is incompatible with --graft")
83 o.fatal("--strip-path is incompatible with --graft")
85 for (option, parameter) in flags:
86 if option == "--graft":
87 parameter = argv_bytes(parameter)
88 splitted_parameter = parameter.split(b'=')
89 if len(splitted_parameter) != 2:
90 o.fatal("a graft point must be of the form old_path=new_path")
91 old_path, new_path = splitted_parameter
92 if not (old_path and new_path):
93 o.fatal("a graft point cannot be empty")
94 graft_points.append((resolve_parent(old_path),
95 resolve_parent(new_path)))
97 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
98 if is_reverse and opt.remote:
99 o.fatal("don't use -r in reverse mode; it's automatic")
102 if name and not valid_save_name(name):
103 o.fatal("'%s' is not a valid branch name" % path_msg(name))
104 refname = name and b'refs/heads/%s' % name or None
105 if opt.remote or is_reverse:
107 cli = client.Client(opt.remote)
108 except client.ClientError as e:
111 oldref = refname and cli.read_ref(refname) or None
112 w = cli.new_packwriter(compression_level=opt.compress)
115 oldref = refname and git.read_ref(refname) or None
116 w = git.PackWriter(compression_level=opt.compress)
121 # Metadata is stored in a file named .bupm in each directory. The
122 # first metadata entry will be the metadata for the current directory.
123 # The remaining entries will be for each of the other directory
124 # elements, in the order they're listed in the index.
126 # Since the git tree elements are sorted according to
127 # git.shalist_item_sort_key, the metalist items are accumulated as
128 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
129 # created. The sort_key must be computed using the element's real
130 # name and mode rather than the git mode and (possibly mangled) name.
132 # Maintain a stack of information representing the current location in
133 # the archive being constructed. The current path is recorded in
134 # parts, which will be something like ['', 'home', 'someuser'], and
135 # the accumulated content and metadata for of the dirs in parts is
136 # stored in parallel stacks in shalists and metalists.
138 parts = [] # Current archive position (stack of dir names).
139 shalists = [] # Hashes for each dir in paths.
140 metalists = [] # Metadata for each dir in paths.
143 def _push(part, metadata):
144 # Enter a new archive directory -- make it the current directory.
147 metalists.append([(b'', metadata)]) # This dir's metadata (no name).
150 def _pop(force_tree, dir_metadata=None):
151 # Leave the current archive directory and add its tree to its parent.
152 assert(len(parts) >= 1)
154 shalist = shalists.pop()
155 metalist = metalists.pop()
156 if metalist and not force_tree:
157 if dir_metadata: # Override the original metadata pushed for this dir.
158 metalist = [(b'', dir_metadata)] + metalist[1:]
159 sorted_metalist = sorted(metalist, key = lambda x : x[0])
160 metadata = b''.join([m[1].encode() for m in sorted_metalist])
161 metadata_f = BytesIO(metadata)
162 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
164 keep_boundaries=False)
165 shalist.append((mode, b'.bupm', id))
166 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
174 if name in names_seen:
175 parent_path = b'/'.join(parts) + b'/'
176 add_error('error: ignoring duplicate path %s in %s'
177 % (path_msg(name), path_msg(parent_path)))
181 tree = w.new_tree(clean_list)
183 shalists[-1].append((GIT_MODE_TREE,
184 git.mangle_name(part,
185 GIT_MODE_TREE, GIT_MODE_TREE),
191 def progress_report(n):
192 global count, subcount, lastremain
194 cc = count + subcount
195 pct = total and (cc*100.0/total) or 0
197 elapsed = now - tstart
198 kps = elapsed and int(cc/1024./elapsed)
199 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
200 kps = int(kps/kps_frac)*kps_frac
202 remain = elapsed*1.0/cc * (total-cc)
205 if (lastremain and (remain > lastremain)
206 and ((remain - lastremain)/lastremain < 0.05)):
210 hours = int(remain/60/60)
211 mins = int(remain/60 - hours*60)
212 secs = int(remain - hours*60*60 - mins*60)
217 kpsstr = '%dk/s' % kps
219 remainstr = '%dh%dm' % (hours, mins)
221 remainstr = '%dm%d' % (mins, secs)
223 remainstr = '%ds' % secs
224 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
225 % (pct, cc/1024, total/1024, fcount, ftotal,
229 indexfile = opt.indexfile or git.repo(b'bupindex')
230 r = index.Reader(indexfile)
232 msr = index.MetaStoreReader(indexfile + b'.meta')
233 except IOError as ex:
234 if ex.errno != EACCES:
236 log('error: cannot access %r; have you run bup index?'
237 % path_msg(indexfile))
239 hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
241 def already_saved(ent):
242 return ent.is_valid() and w.exists(ent.sha) and ent.sha
244 def wantrecurse_pre(ent):
245 return not already_saved(ent)
247 def wantrecurse_during(ent):
248 return not already_saved(ent) or ent.sha_missing()
250 def find_hardlink_target(hlink_db, ent):
251 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
252 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
258 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
259 if not (ftotal % 10024):
260 qprogress('Reading index: %d\r' % ftotal)
261 exists = ent.exists()
262 hashvalid = already_saved(ent)
263 ent.set_sha_missing(not hashvalid)
264 if not opt.smaller or ent.size < opt.smaller:
265 if exists and not hashvalid:
268 progress('Reading index: %d, done.\n' % ftotal)
269 hashsplit.progress_callback = progress_report
271 # Root collisions occur when strip or graft options map more than one
272 # path to the same directory (paths which originally had separate
273 # parents). When that situation is detected, use empty metadata for
274 # the parent. Otherwise, use the metadata for the common parent.
275 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
277 # FIXME: Add collision tests, or handle collisions some other way.
279 # FIXME: Detect/handle strip/graft name collisions (other than root),
280 # i.e. if '/foo/bar' and '/bar' both map to '/'.
283 root_collision = None
285 count = subcount = fcount = 0
288 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
289 (dir, file) = os.path.split(ent.name)
290 exists = (ent.flags & index.IX_EXISTS)
291 hashvalid = already_saved(ent)
292 wasmissing = ent.sha_missing()
298 if ent.sha == index.EMPTY_SHA:
305 log('%s %-70s\n' % (status, path_msg(ent.name)))
306 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
307 if not lastdir.startswith(dir):
308 log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
317 if opt.smaller and ent.size >= opt.smaller:
318 if exists and not hashvalid:
320 log('skipping large file "%s"\n' % path_msg(ent.name))
321 lastskip_name = ent.name
324 assert(dir.startswith(b'/'))
326 dirp = stripped_path_components(dir, extra)
328 dirp = stripped_path_components(dir, [opt.strip_path])
330 dirp = grafted_path_components(graft_points, dir)
332 dirp = path_components(dir)
334 # At this point, dirp contains a representation of the archive
335 # path that looks like [(archive_dir_name, real_fs_path), ...].
336 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
337 # might look like this at some point:
338 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
340 # This dual representation supports stripping/grafting, where the
341 # archive path may not have a direct correspondence with the
342 # filesystem. The root directory is represented by an initial
343 # component named '', and any component that doesn't have a
344 # corresponding filesystem directory (due to grafting, for
345 # example) will have a real_fs_path of None, i.e. [('', None),
348 if first_root == None:
350 elif first_root != dirp[0]:
351 root_collision = True
353 # If switching to a new sub-tree, finish the current sub-tree.
354 while parts > [x[0] for x in dirp]:
355 _pop(force_tree = None)
357 # If switching to a new sub-tree, start a new sub-tree.
358 for path_component in dirp[len(parts):]:
359 dir_name, fs_path = path_component
360 # Not indexed, so just grab the FS metadata or use empty metadata.
362 meta = metadata.from_path(fs_path, normalized=True) \
363 if fs_path else metadata.Metadata()
364 except (OSError, IOError) as e:
366 lastskip_name = dir_name
367 meta = metadata.Metadata()
368 _push(dir_name, meta)
372 continue # We're at the top level -- keep the current root dir
373 # Since there's no filename, this is a subdir -- finish it.
374 oldtree = already_saved(ent) # may be None
375 newtree = _pop(force_tree = oldtree)
377 if lastskip_name and lastskip_name.startswith(ent.name):
380 ent.validate(GIT_MODE_TREE, newtree)
382 if exists and wasmissing:
386 # it's not a directory
390 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
391 git_info = (ent.gitmode, git_name, id)
392 shalists[-1].append(git_info)
393 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
394 meta = msr.metadata_at(ent.meta_ofs)
395 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
396 # Restore the times that were cleared to 0 in the metastore.
397 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
398 metalists[-1].append((sort_key, meta))
400 if stat.S_ISREG(ent.mode):
402 f = hashsplit.open_noatime(ent.name)
403 except (IOError, OSError) as e:
405 lastskip_name = ent.name
408 (mode, id) = hashsplit.split_to_blob_or_tree(
409 w.new_blob, w.new_tree, [f],
410 keep_boundaries=False)
411 except (IOError, OSError) as e:
412 add_error('%s: %s' % (ent.name, e))
413 lastskip_name = ent.name
415 if stat.S_ISDIR(ent.mode):
416 assert(0) # handled above
417 elif stat.S_ISLNK(ent.mode):
419 rl = os.readlink(ent.name)
420 except (OSError, IOError) as e:
422 lastskip_name = ent.name
424 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
426 # Everything else should be fully described by its
427 # metadata, so just record an empty blob, so the paths
428 # in the tree and .bupm will match up.
429 (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))
432 ent.validate(mode, id)
434 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
435 git_info = (mode, git_name, id)
436 shalists[-1].append(git_info)
437 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
438 hlink = find_hardlink_target(hlink_db, ent)
440 meta = metadata.from_path(ent.name, hardlink_target=hlink,
442 except (OSError, IOError) as e:
444 lastskip_name = ent.name
446 metalists[-1].append((sort_key, meta))
448 if exists and wasmissing:
454 pct = total and count*100.0/total or 100
455 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
456 % (pct, count/1024, total/1024, fcount, ftotal))
458 while len(parts) > 1: # _pop() all the parts above the root
459 _pop(force_tree = None)
460 assert(len(shalists) == 1)
461 assert(len(metalists) == 1)
463 # Finish the root directory.
464 tree = _pop(force_tree = None,
465 # When there's a collision, use empty metadata for the root.
466 dir_metadata = metadata.Metadata() if root_collision else None)
469 out = byte_stream(sys.stdout)
472 out.write(hexlify(tree))
474 if opt.commit or name:
475 msg = (b'bup save\n\nGenerated by command:\n%r\n'
476 % [argv_bytes(x) for x in sys.argv])
477 userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
478 commit = w.new_commit(tree, oldref, userline, date, None,
479 userline, date, None, msg)
481 out.write(hexlify(commit))
485 w.close() # must close before we can update the ref
489 cli.update_ref(refname, commit, oldref)
491 git.update_ref(refname, commit, oldref)
497 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))