3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from errno import EACCES
10 from io import BytesIO
11 import os, sys, stat, time, math
13 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
14 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
15 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
16 hostname, istty2, log, parse_date_or_fatal, parse_num,
17 path_components, progress, qprogress, resolve_parent,
18 saved_errors, stripped_path_components,
19 userfullname, username, valid_save_name)
23 bup save [-tc] [-n name] <filenames...>
25 r,remote= hostname:/path/to/repo of remote repository
26 t,tree output a tree id
27 c,commit output a commit id
28 n,name= name of backup set to update (if any)
29 d,date= date for the commit (seconds since the epoch)
30 v,verbose increase log output (can be used more than once)
31 q,quiet don't show progress meter
32 smaller= only back up files smaller than n bytes
33 bwlimit= maximum bytes/sec to transmit to server
34 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
35 strip strips the path to every filename given
36 strip-path= path-prefix to be stripped when saving
37 graft= a graft point *old_path*=*new_path* (can be used more than once)
38 #,compress= set compression level to # (0-9, 9 is highest) [1]
40 o = options.Options(optspec)
41 (opt, flags, extra) = o.parse(sys.argv[1:])
43 git.check_repo_or_die()
44 if not (opt.tree or opt.commit or opt.name):
45 o.fatal("use one or more of -t, -c, -n")
47 o.fatal("no filenames given")
49 opt.progress = (istty2 and not opt.quiet)
50 opt.smaller = parse_num(opt.smaller or 0)
52 client.bwlimit = parse_num(opt.bwlimit)
55 date = parse_date_or_fatal(opt.date, o.fatal)
59 if opt.strip and opt.strip_path:
60 o.fatal("--strip is incompatible with --strip-path")
65 o.fatal("--strip is incompatible with --graft")
68 o.fatal("--strip-path is incompatible with --graft")
70 for (option, parameter) in flags:
71 if option == "--graft":
72 splitted_parameter = parameter.split('=')
73 if len(splitted_parameter) != 2:
74 o.fatal("a graft point must be of the form old_path=new_path")
75 old_path, new_path = splitted_parameter
76 if not (old_path and new_path):
77 o.fatal("a graft point cannot be empty")
78 graft_points.append((resolve_parent(old_path),
79 resolve_parent(new_path)))
81 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
82 if is_reverse and opt.remote:
83 o.fatal("don't use -r in reverse mode; it's automatic")
85 if opt.name and not valid_save_name(opt.name):
86 o.fatal("'%s' is not a valid branch name" % opt.name)
87 refname = opt.name and 'refs/heads/%s' % opt.name or None
88 if opt.remote or is_reverse:
90 cli = client.Client(opt.remote)
91 except client.ClientError as e:
94 oldref = refname and cli.read_ref(refname) or None
95 w = cli.new_packwriter(compression_level=opt.compress)
98 oldref = refname and git.read_ref(refname) or None
99 w = git.PackWriter(compression_level=opt.compress)
105 if dir.endswith('/'):
110 # Metadata is stored in a file named .bupm in each directory. The
111 # first metadata entry will be the metadata for the current directory.
112 # The remaining entries will be for each of the other directory
113 # elements, in the order they're listed in the index.
115 # Since the git tree elements are sorted according to
116 # git.shalist_item_sort_key, the metalist items are accumulated as
117 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
118 # created. The sort_key must be computed using the element's real
119 # name and mode rather than the git mode and (possibly mangled) name.
121 # Maintain a stack of information representing the current location in
122 # the archive being constructed. The current path is recorded in
123 # parts, which will be something like ['', 'home', 'someuser'], and
124 # the accumulated content and metadata for of the dirs in parts is
125 # stored in parallel stacks in shalists and metalists.
127 parts = [] # Current archive position (stack of dir names).
128 shalists = [] # Hashes for each dir in paths.
129 metalists = [] # Metadata for each dir in paths.
132 def _push(part, metadata):
133 # Enter a new archive directory -- make it the current directory.
136 metalists.append([('', metadata)]) # This dir's metadata (no name).
139 def _pop(force_tree, dir_metadata=None):
140 # Leave the current archive directory and add its tree to its parent.
141 assert(len(parts) >= 1)
143 shalist = shalists.pop()
144 metalist = metalists.pop()
145 if metalist and not force_tree:
146 if dir_metadata: # Override the original metadata pushed for this dir.
147 metalist = [('', dir_metadata)] + metalist[1:]
148 sorted_metalist = sorted(metalist, key = lambda x : x[0])
149 metadata = ''.join([m[1].encode() for m in sorted_metalist])
150 metadata_f = BytesIO(metadata)
151 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
153 keep_boundaries=False)
154 shalist.append((mode, '.bupm', id))
155 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
161 metaidx = 1 # entry at 0 is for the dir
164 if name in names_seen:
165 parent_path = '/'.join(parts) + '/'
166 add_error('error: ignoring duplicate path %r in %r'
167 % (name, parent_path))
168 if not stat.S_ISDIR(x[0]):
169 del metalist[metaidx]
173 if not stat.S_ISDIR(x[0]):
175 tree = w.new_tree(clean_list)
177 shalists[-1].append((GIT_MODE_TREE,
178 git.mangle_name(part,
179 GIT_MODE_TREE, GIT_MODE_TREE),
185 def progress_report(n):
186 global count, subcount, lastremain
188 cc = count + subcount
189 pct = total and (cc*100.0/total) or 0
191 elapsed = now - tstart
192 kps = elapsed and int(cc/1024./elapsed)
193 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
194 kps = int(kps/kps_frac)*kps_frac
196 remain = elapsed*1.0/cc * (total-cc)
199 if (lastremain and (remain > lastremain)
200 and ((remain - lastremain)/lastremain < 0.05)):
204 hours = int(remain/60/60)
205 mins = int(remain/60 - hours*60)
206 secs = int(remain - hours*60*60 - mins*60)
211 kpsstr = '%dk/s' % kps
213 remainstr = '%dh%dm' % (hours, mins)
215 remainstr = '%dm%d' % (mins, secs)
217 remainstr = '%ds' % secs
218 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
219 % (pct, cc/1024, total/1024, fcount, ftotal,
223 indexfile = opt.indexfile or git.repo('bupindex')
224 r = index.Reader(indexfile)
226 msr = index.MetaStoreReader(indexfile + '.meta')
227 except IOError as ex:
228 if ex.errno != EACCES:
230 log('error: cannot access %r; have you run bup index?' % indexfile)
232 hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink')
234 def already_saved(ent):
235 return ent.is_valid() and w.exists(ent.sha) and ent.sha
237 def wantrecurse_pre(ent):
238 return not already_saved(ent)
240 def wantrecurse_during(ent):
241 return not already_saved(ent) or ent.sha_missing()
243 def find_hardlink_target(hlink_db, ent):
244 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
245 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
251 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
252 if not (ftotal % 10024):
253 qprogress('Reading index: %d\r' % ftotal)
254 exists = ent.exists()
255 hashvalid = already_saved(ent)
256 ent.set_sha_missing(not hashvalid)
257 if not opt.smaller or ent.size < opt.smaller:
258 if exists and not hashvalid:
261 progress('Reading index: %d, done.\n' % ftotal)
262 hashsplit.progress_callback = progress_report
264 # Root collisions occur when strip or graft options map more than one
265 # path to the same directory (paths which originally had separate
266 # parents). When that situation is detected, use empty metadata for
267 # the parent. Otherwise, use the metadata for the common parent.
268 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
270 # FIXME: Add collision tests, or handle collisions some other way.
272 # FIXME: Detect/handle strip/graft name collisions (other than root),
273 # i.e. if '/foo/bar' and '/bar' both map to '/'.
276 root_collision = None
278 count = subcount = fcount = 0
281 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
282 (dir, file) = os.path.split(ent.name)
283 exists = (ent.flags & index.IX_EXISTS)
284 hashvalid = already_saved(ent)
285 wasmissing = ent.sha_missing()
291 if ent.sha == index.EMPTY_SHA:
298 log('%s %-70s\n' % (status, ent.name))
299 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
300 if not lastdir.startswith(dir):
301 log('%s %-70s\n' % (status, os.path.join(dir, '')))
310 if opt.smaller and ent.size >= opt.smaller:
311 if exists and not hashvalid:
313 log('skipping large file "%s"\n' % ent.name)
314 lastskip_name = ent.name
317 assert(dir.startswith('/'))
319 dirp = stripped_path_components(dir, extra)
321 dirp = stripped_path_components(dir, [opt.strip_path])
323 dirp = grafted_path_components(graft_points, dir)
325 dirp = path_components(dir)
327 # At this point, dirp contains a representation of the archive
328 # path that looks like [(archive_dir_name, real_fs_path), ...].
329 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
330 # might look like this at some point:
331 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
333 # This dual representation supports stripping/grafting, where the
334 # archive path may not have a direct correspondence with the
335 # filesystem. The root directory is represented by an initial
336 # component named '', and any component that doesn't have a
337 # corresponding filesystem directory (due to grafting, for
338 # example) will have a real_fs_path of None, i.e. [('', None),
341 if first_root == None:
343 elif first_root != dirp[0]:
344 root_collision = True
346 # If switching to a new sub-tree, finish the current sub-tree.
347 while parts > [x[0] for x in dirp]:
348 _pop(force_tree = None)
350 # If switching to a new sub-tree, start a new sub-tree.
351 for path_component in dirp[len(parts):]:
352 dir_name, fs_path = path_component
353 # Not indexed, so just grab the FS metadata or use empty metadata.
355 meta = metadata.from_path(fs_path, normalized=True) \
356 if fs_path else metadata.Metadata()
357 except (OSError, IOError) as e:
359 lastskip_name = dir_name
360 meta = metadata.Metadata()
361 _push(dir_name, meta)
365 continue # We're at the top level -- keep the current root dir
366 # Since there's no filename, this is a subdir -- finish it.
367 oldtree = already_saved(ent) # may be None
368 newtree = _pop(force_tree = oldtree)
370 if lastskip_name and lastskip_name.startswith(ent.name):
373 ent.validate(GIT_MODE_TREE, newtree)
375 if exists and wasmissing:
379 # it's not a directory
383 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
384 git_info = (ent.gitmode, git_name, id)
385 shalists[-1].append(git_info)
386 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
387 meta = msr.metadata_at(ent.meta_ofs)
388 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
389 # Restore the times that were cleared to 0 in the metastore.
390 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
391 metalists[-1].append((sort_key, meta))
393 if stat.S_ISREG(ent.mode):
395 f = hashsplit.open_noatime(ent.name)
396 except (IOError, OSError) as e:
398 lastskip_name = ent.name
401 (mode, id) = hashsplit.split_to_blob_or_tree(
402 w.new_blob, w.new_tree, [f],
403 keep_boundaries=False)
404 except (IOError, OSError) as e:
405 add_error('%s: %s' % (ent.name, e))
406 lastskip_name = ent.name
408 if stat.S_ISDIR(ent.mode):
409 assert(0) # handled above
410 elif stat.S_ISLNK(ent.mode):
412 rl = os.readlink(ent.name)
413 except (OSError, IOError) as e:
415 lastskip_name = ent.name
417 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
419 # Everything else should be fully described by its
420 # metadata, so just record an empty blob, so the paths
421 # in the tree and .bupm will match up.
422 (mode, id) = (GIT_MODE_FILE, w.new_blob(""))
425 ent.validate(mode, id)
427 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
428 git_info = (mode, git_name, id)
429 shalists[-1].append(git_info)
430 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
431 hlink = find_hardlink_target(hlink_db, ent)
433 meta = metadata.from_path(ent.name, hardlink_target=hlink,
435 except (OSError, IOError) as e:
437 lastskip_name = ent.name
438 meta = metadata.Metadata()
439 metalists[-1].append((sort_key, meta))
441 if exists and wasmissing:
447 pct = total and count*100.0/total or 100
448 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
449 % (pct, count/1024, total/1024, fcount, ftotal))
451 while len(parts) > 1: # _pop() all the parts above the root
452 _pop(force_tree = None)
453 assert(len(shalists) == 1)
454 assert(len(metalists) == 1)
456 # Finish the root directory.
457 tree = _pop(force_tree = None,
458 # When there's a collision, use empty metadata for the root.
459 dir_metadata = metadata.Metadata() if root_collision else None)
462 print(tree.encode('hex'))
463 if opt.commit or opt.name:
464 msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv
465 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
466 commit = w.new_commit(tree, oldref, userline, date, None,
467 userline, date, None, msg)
469 print(commit.encode('hex'))
472 w.close() # must close before we can update the ref
476 cli.update_ref(refname, commit, oldref)
478 git.update_ref(refname, commit, oldref)
484 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))