3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from errno import EACCES
10 from io import BytesIO
11 import os, sys, stat, time, math
13 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
14 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
15 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
16 hostname, istty2, log, parse_date_or_fatal, parse_num,
17 path_components, progress, qprogress, resolve_parent,
18 saved_errors, stripped_path_components,
20 from bup.pwdgrp import userfullname, username
24 bup save [-tc] [-n name] <filenames...>
26 r,remote= hostname:/path/to/repo of remote repository
27 t,tree output a tree id
28 c,commit output a commit id
29 n,name= name of backup set to update (if any)
30 d,date= date for the commit (seconds since the epoch)
31 v,verbose increase log output (can be used more than once)
32 q,quiet don't show progress meter
33 smaller= only back up files smaller than n bytes
34 bwlimit= maximum bytes/sec to transmit to server
35 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
36 strip strips the path to every filename given
37 strip-path= path-prefix to be stripped when saving
38 graft= a graft point *old_path*=*new_path* (can be used more than once)
39 #,compress= set compression level to # (0-9, 9 is highest) [1]
41 o = options.Options(optspec)
42 (opt, flags, extra) = o.parse(sys.argv[1:])
44 git.check_repo_or_die()
45 if not (opt.tree or opt.commit or opt.name):
46 o.fatal("use one or more of -t, -c, -n")
48 o.fatal("no filenames given")
50 opt.progress = (istty2 and not opt.quiet)
51 opt.smaller = parse_num(opt.smaller or 0)
53 client.bwlimit = parse_num(opt.bwlimit)
56 date = parse_date_or_fatal(opt.date, o.fatal)
60 if opt.strip and opt.strip_path:
61 o.fatal("--strip is incompatible with --strip-path")
66 o.fatal("--strip is incompatible with --graft")
69 o.fatal("--strip-path is incompatible with --graft")
71 for (option, parameter) in flags:
72 if option == "--graft":
73 splitted_parameter = parameter.split('=')
74 if len(splitted_parameter) != 2:
75 o.fatal("a graft point must be of the form old_path=new_path")
76 old_path, new_path = splitted_parameter
77 if not (old_path and new_path):
78 o.fatal("a graft point cannot be empty")
79 graft_points.append((resolve_parent(old_path),
80 resolve_parent(new_path)))
82 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
83 if is_reverse and opt.remote:
84 o.fatal("don't use -r in reverse mode; it's automatic")
86 if opt.name and not valid_save_name(opt.name):
87 o.fatal("'%s' is not a valid branch name" % opt.name)
88 refname = opt.name and 'refs/heads/%s' % opt.name or None
89 if opt.remote or is_reverse:
91 cli = client.Client(opt.remote)
92 except client.ClientError as e:
95 oldref = refname and cli.read_ref(refname) or None
96 w = cli.new_packwriter(compression_level=opt.compress)
99 oldref = refname and git.read_ref(refname) or None
100 w = git.PackWriter(compression_level=opt.compress)
106 if dir.endswith('/'):
111 # Metadata is stored in a file named .bupm in each directory. The
112 # first metadata entry will be the metadata for the current directory.
113 # The remaining entries will be for each of the other directory
114 # elements, in the order they're listed in the index.
116 # Since the git tree elements are sorted according to
117 # git.shalist_item_sort_key, the metalist items are accumulated as
118 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
119 # created. The sort_key must be computed using the element's real
120 # name and mode rather than the git mode and (possibly mangled) name.
122 # Maintain a stack of information representing the current location in
123 # the archive being constructed. The current path is recorded in
124 # parts, which will be something like ['', 'home', 'someuser'], and
125 # the accumulated content and metadata for of the dirs in parts is
126 # stored in parallel stacks in shalists and metalists.
128 parts = [] # Current archive position (stack of dir names).
129 shalists = [] # Hashes for each dir in paths.
130 metalists = [] # Metadata for each dir in paths.
133 def _push(part, metadata):
134 # Enter a new archive directory -- make it the current directory.
137 metalists.append([('', metadata)]) # This dir's metadata (no name).
140 def _pop(force_tree, dir_metadata=None):
141 # Leave the current archive directory and add its tree to its parent.
142 assert(len(parts) >= 1)
144 shalist = shalists.pop()
145 metalist = metalists.pop()
146 if metalist and not force_tree:
147 if dir_metadata: # Override the original metadata pushed for this dir.
148 metalist = [('', dir_metadata)] + metalist[1:]
149 sorted_metalist = sorted(metalist, key = lambda x : x[0])
150 metadata = ''.join([m[1].encode() for m in sorted_metalist])
151 metadata_f = BytesIO(metadata)
152 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
154 keep_boundaries=False)
155 shalist.append((mode, '.bupm', id))
156 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
164 if name in names_seen:
165 parent_path = '/'.join(parts) + '/'
166 add_error('error: ignoring duplicate path %r in %r'
167 % (name, parent_path))
171 tree = w.new_tree(clean_list)
173 shalists[-1].append((GIT_MODE_TREE,
174 git.mangle_name(part,
175 GIT_MODE_TREE, GIT_MODE_TREE),
181 def progress_report(n):
182 global count, subcount, lastremain
184 cc = count + subcount
185 pct = total and (cc*100.0/total) or 0
187 elapsed = now - tstart
188 kps = elapsed and int(cc/1024./elapsed)
189 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
190 kps = int(kps/kps_frac)*kps_frac
192 remain = elapsed*1.0/cc * (total-cc)
195 if (lastremain and (remain > lastremain)
196 and ((remain - lastremain)/lastremain < 0.05)):
200 hours = int(remain/60/60)
201 mins = int(remain/60 - hours*60)
202 secs = int(remain - hours*60*60 - mins*60)
207 kpsstr = '%dk/s' % kps
209 remainstr = '%dh%dm' % (hours, mins)
211 remainstr = '%dm%d' % (mins, secs)
213 remainstr = '%ds' % secs
214 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
215 % (pct, cc/1024, total/1024, fcount, ftotal,
219 indexfile = opt.indexfile or git.repo('bupindex')
220 r = index.Reader(indexfile)
222 msr = index.MetaStoreReader(indexfile + '.meta')
223 except IOError as ex:
224 if ex.errno != EACCES:
226 log('error: cannot access %r; have you run bup index?' % indexfile)
228 hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink')
230 def already_saved(ent):
231 return ent.is_valid() and w.exists(ent.sha) and ent.sha
233 def wantrecurse_pre(ent):
234 return not already_saved(ent)
236 def wantrecurse_during(ent):
237 return not already_saved(ent) or ent.sha_missing()
239 def find_hardlink_target(hlink_db, ent):
240 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
241 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
247 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
248 if not (ftotal % 10024):
249 qprogress('Reading index: %d\r' % ftotal)
250 exists = ent.exists()
251 hashvalid = already_saved(ent)
252 ent.set_sha_missing(not hashvalid)
253 if not opt.smaller or ent.size < opt.smaller:
254 if exists and not hashvalid:
257 progress('Reading index: %d, done.\n' % ftotal)
258 hashsplit.progress_callback = progress_report
260 # Root collisions occur when strip or graft options map more than one
261 # path to the same directory (paths which originally had separate
262 # parents). When that situation is detected, use empty metadata for
263 # the parent. Otherwise, use the metadata for the common parent.
264 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
266 # FIXME: Add collision tests, or handle collisions some other way.
268 # FIXME: Detect/handle strip/graft name collisions (other than root),
269 # i.e. if '/foo/bar' and '/bar' both map to '/'.
272 root_collision = None
274 count = subcount = fcount = 0
277 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
278 (dir, file) = os.path.split(ent.name)
279 exists = (ent.flags & index.IX_EXISTS)
280 hashvalid = already_saved(ent)
281 wasmissing = ent.sha_missing()
287 if ent.sha == index.EMPTY_SHA:
294 log('%s %-70s\n' % (status, ent.name))
295 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
296 if not lastdir.startswith(dir):
297 log('%s %-70s\n' % (status, os.path.join(dir, '')))
306 if opt.smaller and ent.size >= opt.smaller:
307 if exists and not hashvalid:
309 log('skipping large file "%s"\n' % ent.name)
310 lastskip_name = ent.name
313 assert(dir.startswith('/'))
315 dirp = stripped_path_components(dir, extra)
317 dirp = stripped_path_components(dir, [opt.strip_path])
319 dirp = grafted_path_components(graft_points, dir)
321 dirp = path_components(dir)
323 # At this point, dirp contains a representation of the archive
324 # path that looks like [(archive_dir_name, real_fs_path), ...].
325 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
326 # might look like this at some point:
327 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
329 # This dual representation supports stripping/grafting, where the
330 # archive path may not have a direct correspondence with the
331 # filesystem. The root directory is represented by an initial
332 # component named '', and any component that doesn't have a
333 # corresponding filesystem directory (due to grafting, for
334 # example) will have a real_fs_path of None, i.e. [('', None),
337 if first_root == None:
339 elif first_root != dirp[0]:
340 root_collision = True
342 # If switching to a new sub-tree, finish the current sub-tree.
343 while parts > [x[0] for x in dirp]:
344 _pop(force_tree = None)
346 # If switching to a new sub-tree, start a new sub-tree.
347 for path_component in dirp[len(parts):]:
348 dir_name, fs_path = path_component
349 # Not indexed, so just grab the FS metadata or use empty metadata.
351 meta = metadata.from_path(fs_path, normalized=True) \
352 if fs_path else metadata.Metadata()
353 except (OSError, IOError) as e:
355 lastskip_name = dir_name
356 meta = metadata.Metadata()
357 _push(dir_name, meta)
361 continue # We're at the top level -- keep the current root dir
362 # Since there's no filename, this is a subdir -- finish it.
363 oldtree = already_saved(ent) # may be None
364 newtree = _pop(force_tree = oldtree)
366 if lastskip_name and lastskip_name.startswith(ent.name):
369 ent.validate(GIT_MODE_TREE, newtree)
371 if exists and wasmissing:
375 # it's not a directory
379 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
380 git_info = (ent.gitmode, git_name, id)
381 shalists[-1].append(git_info)
382 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
383 meta = msr.metadata_at(ent.meta_ofs)
384 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
385 # Restore the times that were cleared to 0 in the metastore.
386 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
387 metalists[-1].append((sort_key, meta))
389 if stat.S_ISREG(ent.mode):
391 f = hashsplit.open_noatime(ent.name)
392 except (IOError, OSError) as e:
394 lastskip_name = ent.name
397 (mode, id) = hashsplit.split_to_blob_or_tree(
398 w.new_blob, w.new_tree, [f],
399 keep_boundaries=False)
400 except (IOError, OSError) as e:
401 add_error('%s: %s' % (ent.name, e))
402 lastskip_name = ent.name
404 if stat.S_ISDIR(ent.mode):
405 assert(0) # handled above
406 elif stat.S_ISLNK(ent.mode):
408 rl = os.readlink(ent.name)
409 except (OSError, IOError) as e:
411 lastskip_name = ent.name
413 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
415 # Everything else should be fully described by its
416 # metadata, so just record an empty blob, so the paths
417 # in the tree and .bupm will match up.
418 (mode, id) = (GIT_MODE_FILE, w.new_blob(""))
421 ent.validate(mode, id)
423 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
424 git_info = (mode, git_name, id)
425 shalists[-1].append(git_info)
426 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
427 hlink = find_hardlink_target(hlink_db, ent)
429 meta = metadata.from_path(ent.name, hardlink_target=hlink,
431 except (OSError, IOError) as e:
433 lastskip_name = ent.name
435 metalists[-1].append((sort_key, meta))
437 if exists and wasmissing:
443 pct = total and count*100.0/total or 100
444 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
445 % (pct, count/1024, total/1024, fcount, ftotal))
447 while len(parts) > 1: # _pop() all the parts above the root
448 _pop(force_tree = None)
449 assert(len(shalists) == 1)
450 assert(len(metalists) == 1)
452 # Finish the root directory.
453 tree = _pop(force_tree = None,
454 # When there's a collision, use empty metadata for the root.
455 dir_metadata = metadata.Metadata() if root_collision else None)
458 print(tree.encode('hex'))
459 if opt.commit or opt.name:
460 msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv
461 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
462 commit = w.new_commit(tree, oldref, userline, date, None,
463 userline, date, None, msg)
465 print(commit.encode('hex'))
468 w.close() # must close before we can update the ref
472 cli.update_ref(refname, commit, oldref)
474 git.update_ref(refname, commit, oldref)
480 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))