3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from errno import EACCES
10 from io import BytesIO
11 import os, sys, stat, time, math
13 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
14 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
15 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
16 hostname, istty2, log, parse_date_or_fatal, parse_num,
17 path_components, progress, qprogress, resolve_parent,
18 saved_errors, stripped_path_components,
19 userfullname, username, valid_save_name)
23 bup save [-tc] [-n name] <filenames...>
25 r,remote= hostname:/path/to/repo of remote repository
26 t,tree output a tree id
27 c,commit output a commit id
28 n,name= name of backup set to update (if any)
29 d,date= date for the commit (seconds since the epoch)
30 v,verbose increase log output (can be used more than once)
31 q,quiet don't show progress meter
32 smaller= only back up files smaller than n bytes
33 bwlimit= maximum bytes/sec to transmit to server
34 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
35 strip strips the path to every filename given
36 strip-path= path-prefix to be stripped when saving
37 graft= a graft point *old_path*=*new_path* (can be used more than once)
38 #,compress= set compression level to # (0-9, 9 is highest) [1]
40 o = options.Options(optspec)
41 (opt, flags, extra) = o.parse(sys.argv[1:])
43 git.check_repo_or_die()
44 if not (opt.tree or opt.commit or opt.name):
45 o.fatal("use one or more of -t, -c, -n")
47 o.fatal("no filenames given")
49 opt.progress = (istty2 and not opt.quiet)
50 opt.smaller = parse_num(opt.smaller or 0)
52 client.bwlimit = parse_num(opt.bwlimit)
55 date = parse_date_or_fatal(opt.date, o.fatal)
59 if opt.strip and opt.strip_path:
60 o.fatal("--strip is incompatible with --strip-path")
65 o.fatal("--strip is incompatible with --graft")
68 o.fatal("--strip-path is incompatible with --graft")
70 for (option, parameter) in flags:
71 if option == "--graft":
72 splitted_parameter = parameter.split('=')
73 if len(splitted_parameter) != 2:
74 o.fatal("a graft point must be of the form old_path=new_path")
75 old_path, new_path = splitted_parameter
76 if not (old_path and new_path):
77 o.fatal("a graft point cannot be empty")
78 graft_points.append((resolve_parent(old_path),
79 resolve_parent(new_path)))
81 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
82 if is_reverse and opt.remote:
83 o.fatal("don't use -r in reverse mode; it's automatic")
85 if opt.name and not valid_save_name(opt.name):
86 o.fatal("'%s' is not a valid branch name" % opt.name)
87 refname = opt.name and 'refs/heads/%s' % opt.name or None
88 if opt.remote or is_reverse:
90 cli = client.Client(opt.remote)
91 except client.ClientError as e:
94 oldref = refname and cli.read_ref(refname) or None
95 w = cli.new_packwriter(compression_level=opt.compress)
98 oldref = refname and git.read_ref(refname) or None
99 w = git.PackWriter(compression_level=opt.compress)
105 if dir.endswith('/'):
110 # Metadata is stored in a file named .bupm in each directory. The
111 # first metadata entry will be the metadata for the current directory.
112 # The remaining entries will be for each of the other directory
113 # elements, in the order they're listed in the index.
115 # Since the git tree elements are sorted according to
116 # git.shalist_item_sort_key, the metalist items are accumulated as
117 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
118 # created. The sort_key should have been computed using the element's
119 # mangled name and git mode (after hashsplitting), but the code isn't
120 # actually doing that but rather uses the element's real name and mode.
121 # This makes things a bit more difficult when reading it back, see
122 # vfs.ordered_tree_entries().
124 # Maintain a stack of information representing the current location in
125 # the archive being constructed. The current path is recorded in
126 # parts, which will be something like ['', 'home', 'someuser'], and
127 # the accumulated content and metadata for of the dirs in parts is
128 # stored in parallel stacks in shalists and metalists.
130 parts = [] # Current archive position (stack of dir names).
131 shalists = [] # Hashes for each dir in paths.
132 metalists = [] # Metadata for each dir in paths.
135 def _push(part, metadata):
136 # Enter a new archive directory -- make it the current directory.
139 metalists.append([('', metadata)]) # This dir's metadata (no name).
142 def _pop(force_tree, dir_metadata=None):
143 # Leave the current archive directory and add its tree to its parent.
144 assert(len(parts) >= 1)
146 shalist = shalists.pop()
147 metalist = metalists.pop()
148 if metalist and not force_tree:
149 if dir_metadata: # Override the original metadata pushed for this dir.
150 metalist = [('', dir_metadata)] + metalist[1:]
151 sorted_metalist = sorted(metalist, key = lambda x : x[0])
152 metadata = ''.join([m[1].encode() for m in sorted_metalist])
153 metadata_f = BytesIO(metadata)
154 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
156 keep_boundaries=False)
157 shalist.append((mode, '.bupm', id))
158 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
164 metaidx = 1 # entry at 0 is for the dir
167 if name in names_seen:
168 parent_path = '/'.join(parts) + '/'
169 add_error('error: ignoring duplicate path %r in %r'
170 % (name, parent_path))
171 if not stat.S_ISDIR(x[0]):
172 del metalist[metaidx]
176 if not stat.S_ISDIR(x[0]):
178 tree = w.new_tree(clean_list)
180 shalists[-1].append((GIT_MODE_TREE,
181 git.mangle_name(part,
182 GIT_MODE_TREE, GIT_MODE_TREE),
188 def progress_report(n):
189 global count, subcount, lastremain
191 cc = count + subcount
192 pct = total and (cc*100.0/total) or 0
194 elapsed = now - tstart
195 kps = elapsed and int(cc/1024./elapsed)
196 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
197 kps = int(kps/kps_frac)*kps_frac
199 remain = elapsed*1.0/cc * (total-cc)
202 if (lastremain and (remain > lastremain)
203 and ((remain - lastremain)/lastremain < 0.05)):
207 hours = int(remain/60/60)
208 mins = int(remain/60 - hours*60)
209 secs = int(remain - hours*60*60 - mins*60)
214 kpsstr = '%dk/s' % kps
216 remainstr = '%dh%dm' % (hours, mins)
218 remainstr = '%dm%d' % (mins, secs)
220 remainstr = '%ds' % secs
221 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
222 % (pct, cc/1024, total/1024, fcount, ftotal,
226 indexfile = opt.indexfile or git.repo('bupindex')
227 r = index.Reader(indexfile)
229 msr = index.MetaStoreReader(indexfile + '.meta')
230 except IOError as ex:
231 if ex.errno != EACCES:
233 log('error: cannot access %r; have you run bup index?' % indexfile)
235 hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink')
237 def already_saved(ent):
238 return ent.is_valid() and w.exists(ent.sha) and ent.sha
240 def wantrecurse_pre(ent):
241 return not already_saved(ent)
243 def wantrecurse_during(ent):
244 return not already_saved(ent) or ent.sha_missing()
246 def find_hardlink_target(hlink_db, ent):
247 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
248 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
254 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
255 if not (ftotal % 10024):
256 qprogress('Reading index: %d\r' % ftotal)
257 exists = ent.exists()
258 hashvalid = already_saved(ent)
259 ent.set_sha_missing(not hashvalid)
260 if not opt.smaller or ent.size < opt.smaller:
261 if exists and not hashvalid:
264 progress('Reading index: %d, done.\n' % ftotal)
265 hashsplit.progress_callback = progress_report
267 # Root collisions occur when strip or graft options map more than one
268 # path to the same directory (paths which originally had separate
269 # parents). When that situation is detected, use empty metadata for
270 # the parent. Otherwise, use the metadata for the common parent.
271 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
273 # FIXME: Add collision tests, or handle collisions some other way.
275 # FIXME: Detect/handle strip/graft name collisions (other than root),
276 # i.e. if '/foo/bar' and '/bar' both map to '/'.
279 root_collision = None
281 count = subcount = fcount = 0
284 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
285 (dir, file) = os.path.split(ent.name)
286 exists = (ent.flags & index.IX_EXISTS)
287 hashvalid = already_saved(ent)
288 wasmissing = ent.sha_missing()
294 if ent.sha == index.EMPTY_SHA:
301 log('%s %-70s\n' % (status, ent.name))
302 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
303 if not lastdir.startswith(dir):
304 log('%s %-70s\n' % (status, os.path.join(dir, '')))
313 if opt.smaller and ent.size >= opt.smaller:
314 if exists and not hashvalid:
316 log('skipping large file "%s"\n' % ent.name)
317 lastskip_name = ent.name
320 assert(dir.startswith('/'))
322 dirp = stripped_path_components(dir, extra)
324 dirp = stripped_path_components(dir, [opt.strip_path])
326 dirp = grafted_path_components(graft_points, dir)
328 dirp = path_components(dir)
330 # At this point, dirp contains a representation of the archive
331 # path that looks like [(archive_dir_name, real_fs_path), ...].
332 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
333 # might look like this at some point:
334 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
336 # This dual representation supports stripping/grafting, where the
337 # archive path may not have a direct correspondence with the
338 # filesystem. The root directory is represented by an initial
339 # component named '', and any component that doesn't have a
340 # corresponding filesystem directory (due to grafting, for
341 # example) will have a real_fs_path of None, i.e. [('', None),
344 if first_root == None:
346 elif first_root != dirp[0]:
347 root_collision = True
349 # If switching to a new sub-tree, finish the current sub-tree.
350 while parts > [x[0] for x in dirp]:
351 _pop(force_tree = None)
353 # If switching to a new sub-tree, start a new sub-tree.
354 for path_component in dirp[len(parts):]:
355 dir_name, fs_path = path_component
356 # Not indexed, so just grab the FS metadata or use empty metadata.
358 meta = metadata.from_path(fs_path, normalized=True) \
359 if fs_path else metadata.Metadata()
360 except (OSError, IOError) as e:
362 lastskip_name = dir_name
363 meta = metadata.Metadata()
364 _push(dir_name, meta)
368 continue # We're at the top level -- keep the current root dir
369 # Since there's no filename, this is a subdir -- finish it.
370 oldtree = already_saved(ent) # may be None
371 newtree = _pop(force_tree = oldtree)
373 if lastskip_name and lastskip_name.startswith(ent.name):
376 ent.validate(GIT_MODE_TREE, newtree)
378 if exists and wasmissing:
382 # it's not a directory
386 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
387 git_info = (ent.gitmode, git_name, id)
388 shalists[-1].append(git_info)
389 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
390 meta = msr.metadata_at(ent.meta_ofs)
391 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
392 # Restore the times that were cleared to 0 in the metastore.
393 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
394 metalists[-1].append((sort_key, meta))
396 if stat.S_ISREG(ent.mode):
398 with hashsplit.open_noatime(ent.name) as f:
399 (mode, id) = hashsplit.split_to_blob_or_tree(
400 w.new_blob, w.new_tree, [f],
401 keep_boundaries=False)
402 except (IOError, OSError) as e:
403 add_error('%s: %s' % (ent.name, e))
404 lastskip_name = ent.name
406 if stat.S_ISDIR(ent.mode):
407 assert(0) # handled above
408 elif stat.S_ISLNK(ent.mode):
410 rl = os.readlink(ent.name)
411 except (OSError, IOError) as e:
413 lastskip_name = ent.name
415 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
417 # Everything else should be fully described by its
418 # metadata, so just record an empty blob, so the paths
419 # in the tree and .bupm will match up.
420 (mode, id) = (GIT_MODE_FILE, w.new_blob(""))
423 ent.validate(mode, id)
425 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
426 git_info = (mode, git_name, id)
427 shalists[-1].append(git_info)
428 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
429 hlink = find_hardlink_target(hlink_db, ent)
431 meta = metadata.from_path(ent.name, hardlink_target=hlink,
433 except (OSError, IOError) as e:
435 lastskip_name = ent.name
436 meta = metadata.Metadata()
437 metalists[-1].append((sort_key, meta))
439 if exists and wasmissing:
445 pct = total and count*100.0/total or 100
446 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
447 % (pct, count/1024, total/1024, fcount, ftotal))
449 while len(parts) > 1: # _pop() all the parts above the root
450 _pop(force_tree = None)
451 assert(len(shalists) == 1)
452 assert(len(metalists) == 1)
454 # Finish the root directory.
455 tree = _pop(force_tree = None,
456 # When there's a collision, use empty metadata for the root.
457 dir_metadata = metadata.Metadata() if root_collision else None)
460 print(tree.encode('hex'))
461 if opt.commit or opt.name:
462 msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv
463 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
464 commit = w.new_commit(tree, oldref, userline, date, None,
465 userline, date, None, msg)
467 print(commit.encode('hex'))
470 w.close() # must close before we can update the ref
474 cli.update_ref(refname, commit, oldref)
476 git.update_ref(refname, commit, oldref)
482 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))