3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from errno import EACCES
10 from io import BytesIO
11 import os, sys, stat, time, math
13 from bup import hashsplit, git, options, index, client, metadata, hlinkdb
14 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
15 from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
16 hostname, istty2, log, parse_date_or_fatal, parse_num,
17 path_components, progress, qprogress, resolve_parent,
18 saved_errors, stripped_path_components,
19 userfullname, username, valid_save_name)
23 bup save [-tc] [-n name] <filenames...>
25 r,remote= hostname:/path/to/repo of remote repository
26 t,tree output a tree id
27 c,commit output a commit id
28 n,name= name of backup set to update (if any)
29 d,date= date for the commit (seconds since the epoch)
30 v,verbose increase log output (can be used more than once)
31 q,quiet don't show progress meter
32 smaller= only back up files smaller than n bytes
33 bwlimit= maximum bytes/sec to transmit to server
34 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
35 strip strips the path to every filename given
36 strip-path= path-prefix to be stripped when saving
37 graft= a graft point *old_path*=*new_path* (can be used more than once)
38 #,compress= set compression level to # (0-9, 9 is highest) [1]
40 o = options.Options(optspec)
41 (opt, flags, extra) = o.parse(sys.argv[1:])
43 git.check_repo_or_die()
44 if not (opt.tree or opt.commit or opt.name):
45 o.fatal("use one or more of -t, -c, -n")
47 o.fatal("no filenames given")
49 opt.progress = (istty2 and not opt.quiet)
50 opt.smaller = parse_num(opt.smaller or 0)
52 client.bwlimit = parse_num(opt.bwlimit)
55 date = parse_date_or_fatal(opt.date, o.fatal)
59 if opt.strip and opt.strip_path:
60 o.fatal("--strip is incompatible with --strip-path")
65 o.fatal("--strip is incompatible with --graft")
68 o.fatal("--strip-path is incompatible with --graft")
70 for (option, parameter) in flags:
71 if option == "--graft":
72 splitted_parameter = parameter.split('=')
73 if len(splitted_parameter) != 2:
74 o.fatal("a graft point must be of the form old_path=new_path")
75 old_path, new_path = splitted_parameter
76 if not (old_path and new_path):
77 o.fatal("a graft point cannot be empty")
78 graft_points.append((resolve_parent(old_path),
79 resolve_parent(new_path)))
81 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
82 if is_reverse and opt.remote:
83 o.fatal("don't use -r in reverse mode; it's automatic")
85 if opt.name and not valid_save_name(opt.name):
86 o.fatal("'%s' is not a valid branch name" % opt.name)
87 refname = opt.name and 'refs/heads/%s' % opt.name or None
88 if opt.remote or is_reverse:
90 cli = client.Client(opt.remote)
91 except client.ClientError as e:
94 oldref = refname and cli.read_ref(refname) or None
95 w = cli.new_packwriter(compression_level=opt.compress)
98 oldref = refname and git.read_ref(refname) or None
99 w = git.PackWriter(compression_level=opt.compress)
105 if dir.endswith('/'):
110 # Metadata is stored in a file named .bupm in each directory. The
111 # first metadata entry will be the metadata for the current directory.
112 # The remaining entries will be for each of the other directory
113 # elements, in the order they're listed in the index.
115 # Since the git tree elements are sorted according to
116 # git.shalist_item_sort_key, the metalist items are accumulated as
117 # (sort_key, metadata) tuples, and then sorted when the .bupm file is
118 # created. The sort_key must be computed using the element's real
119 # name and mode rather than the git mode and (possibly mangled) name.
121 # Maintain a stack of information representing the current location in
122 # the archive being constructed. The current path is recorded in
123 # parts, which will be something like ['', 'home', 'someuser'], and
124 # the accumulated content and metadata for of the dirs in parts is
125 # stored in parallel stacks in shalists and metalists.
127 parts = [] # Current archive position (stack of dir names).
128 shalists = [] # Hashes for each dir in paths.
129 metalists = [] # Metadata for each dir in paths.
132 def _push(part, metadata):
133 # Enter a new archive directory -- make it the current directory.
136 metalists.append([('', metadata)]) # This dir's metadata (no name).
139 def _pop(force_tree, dir_metadata=None):
140 # Leave the current archive directory and add its tree to its parent.
141 assert(len(parts) >= 1)
143 shalist = shalists.pop()
144 metalist = metalists.pop()
145 if metalist and not force_tree:
146 if dir_metadata: # Override the original metadata pushed for this dir.
147 metalist = [('', dir_metadata)] + metalist[1:]
148 sorted_metalist = sorted(metalist, key = lambda x : x[0])
149 metadata = ''.join([m[1].encode() for m in sorted_metalist])
150 metadata_f = BytesIO(metadata)
151 mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
153 keep_boundaries=False)
154 shalist.append((mode, '.bupm', id))
155 # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
163 if name in names_seen:
164 parent_path = '/'.join(parts) + '/'
165 add_error('error: ignoring duplicate path %r in %r'
166 % (name, parent_path))
170 tree = w.new_tree(clean_list)
172 shalists[-1].append((GIT_MODE_TREE,
173 git.mangle_name(part,
174 GIT_MODE_TREE, GIT_MODE_TREE),
180 def progress_report(n):
181 global count, subcount, lastremain
183 cc = count + subcount
184 pct = total and (cc*100.0/total) or 0
186 elapsed = now - tstart
187 kps = elapsed and int(cc/1024./elapsed)
188 kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
189 kps = int(kps/kps_frac)*kps_frac
191 remain = elapsed*1.0/cc * (total-cc)
194 if (lastremain and (remain > lastremain)
195 and ((remain - lastremain)/lastremain < 0.05)):
199 hours = int(remain/60/60)
200 mins = int(remain/60 - hours*60)
201 secs = int(remain - hours*60*60 - mins*60)
206 kpsstr = '%dk/s' % kps
208 remainstr = '%dh%dm' % (hours, mins)
210 remainstr = '%dm%d' % (mins, secs)
212 remainstr = '%ds' % secs
213 qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
214 % (pct, cc/1024, total/1024, fcount, ftotal,
218 indexfile = opt.indexfile or git.repo('bupindex')
219 r = index.Reader(indexfile)
221 msr = index.MetaStoreReader(indexfile + '.meta')
222 except IOError as ex:
223 if ex.errno != EACCES:
225 log('error: cannot access %r; have you run bup index?' % indexfile)
227 hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink')
229 def already_saved(ent):
230 return ent.is_valid() and w.exists(ent.sha) and ent.sha
232 def wantrecurse_pre(ent):
233 return not already_saved(ent)
235 def wantrecurse_during(ent):
236 return not already_saved(ent) or ent.sha_missing()
238 def find_hardlink_target(hlink_db, ent):
239 if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
240 link_paths = hlink_db.node_paths(ent.dev, ent.ino)
246 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
247 if not (ftotal % 10024):
248 qprogress('Reading index: %d\r' % ftotal)
249 exists = ent.exists()
250 hashvalid = already_saved(ent)
251 ent.set_sha_missing(not hashvalid)
252 if not opt.smaller or ent.size < opt.smaller:
253 if exists and not hashvalid:
256 progress('Reading index: %d, done.\n' % ftotal)
257 hashsplit.progress_callback = progress_report
259 # Root collisions occur when strip or graft options map more than one
260 # path to the same directory (paths which originally had separate
261 # parents). When that situation is detected, use empty metadata for
262 # the parent. Otherwise, use the metadata for the common parent.
263 # Collision example: "bup save ... --strip /foo /foo/bar /bar".
265 # FIXME: Add collision tests, or handle collisions some other way.
267 # FIXME: Detect/handle strip/graft name collisions (other than root),
268 # i.e. if '/foo/bar' and '/bar' both map to '/'.
271 root_collision = None
273 count = subcount = fcount = 0
276 for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
277 (dir, file) = os.path.split(ent.name)
278 exists = (ent.flags & index.IX_EXISTS)
279 hashvalid = already_saved(ent)
280 wasmissing = ent.sha_missing()
286 if ent.sha == index.EMPTY_SHA:
293 log('%s %-70s\n' % (status, ent.name))
294 elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
295 if not lastdir.startswith(dir):
296 log('%s %-70s\n' % (status, os.path.join(dir, '')))
305 if opt.smaller and ent.size >= opt.smaller:
306 if exists and not hashvalid:
308 log('skipping large file "%s"\n' % ent.name)
309 lastskip_name = ent.name
312 assert(dir.startswith('/'))
314 dirp = stripped_path_components(dir, extra)
316 dirp = stripped_path_components(dir, [opt.strip_path])
318 dirp = grafted_path_components(graft_points, dir)
320 dirp = path_components(dir)
322 # At this point, dirp contains a representation of the archive
323 # path that looks like [(archive_dir_name, real_fs_path), ...].
324 # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
325 # might look like this at some point:
326 # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].
328 # This dual representation supports stripping/grafting, where the
329 # archive path may not have a direct correspondence with the
330 # filesystem. The root directory is represented by an initial
331 # component named '', and any component that doesn't have a
332 # corresponding filesystem directory (due to grafting, for
333 # example) will have a real_fs_path of None, i.e. [('', None),
336 if first_root == None:
338 elif first_root != dirp[0]:
339 root_collision = True
341 # If switching to a new sub-tree, finish the current sub-tree.
342 while parts > [x[0] for x in dirp]:
343 _pop(force_tree = None)
345 # If switching to a new sub-tree, start a new sub-tree.
346 for path_component in dirp[len(parts):]:
347 dir_name, fs_path = path_component
348 # Not indexed, so just grab the FS metadata or use empty metadata.
350 meta = metadata.from_path(fs_path, normalized=True) \
351 if fs_path else metadata.Metadata()
352 except (OSError, IOError) as e:
354 lastskip_name = dir_name
355 meta = metadata.Metadata()
356 _push(dir_name, meta)
360 continue # We're at the top level -- keep the current root dir
361 # Since there's no filename, this is a subdir -- finish it.
362 oldtree = already_saved(ent) # may be None
363 newtree = _pop(force_tree = oldtree)
365 if lastskip_name and lastskip_name.startswith(ent.name):
368 ent.validate(GIT_MODE_TREE, newtree)
370 if exists and wasmissing:
374 # it's not a directory
378 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
379 git_info = (ent.gitmode, git_name, id)
380 shalists[-1].append(git_info)
381 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
382 meta = msr.metadata_at(ent.meta_ofs)
383 meta.hardlink_target = find_hardlink_target(hlink_db, ent)
384 # Restore the times that were cleared to 0 in the metastore.
385 (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
386 metalists[-1].append((sort_key, meta))
388 if stat.S_ISREG(ent.mode):
390 f = hashsplit.open_noatime(ent.name)
391 except (IOError, OSError) as e:
393 lastskip_name = ent.name
396 (mode, id) = hashsplit.split_to_blob_or_tree(
397 w.new_blob, w.new_tree, [f],
398 keep_boundaries=False)
399 except (IOError, OSError) as e:
400 add_error('%s: %s' % (ent.name, e))
401 lastskip_name = ent.name
403 if stat.S_ISDIR(ent.mode):
404 assert(0) # handled above
405 elif stat.S_ISLNK(ent.mode):
407 rl = os.readlink(ent.name)
408 except (OSError, IOError) as e:
410 lastskip_name = ent.name
412 (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl))
414 # Everything else should be fully described by its
415 # metadata, so just record an empty blob, so the paths
416 # in the tree and .bupm will match up.
417 (mode, id) = (GIT_MODE_FILE, w.new_blob(""))
420 ent.validate(mode, id)
422 git_name = git.mangle_name(file, ent.mode, ent.gitmode)
423 git_info = (mode, git_name, id)
424 shalists[-1].append(git_info)
425 sort_key = git.shalist_item_sort_key((ent.mode, file, id))
426 hlink = find_hardlink_target(hlink_db, ent)
428 meta = metadata.from_path(ent.name, hardlink_target=hlink,
430 except (OSError, IOError) as e:
432 lastskip_name = ent.name
434 metalists[-1].append((sort_key, meta))
436 if exists and wasmissing:
442 pct = total and count*100.0/total or 100
443 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
444 % (pct, count/1024, total/1024, fcount, ftotal))
446 while len(parts) > 1: # _pop() all the parts above the root
447 _pop(force_tree = None)
448 assert(len(shalists) == 1)
449 assert(len(metalists) == 1)
451 # Finish the root directory.
452 tree = _pop(force_tree = None,
453 # When there's a collision, use empty metadata for the root.
454 dir_metadata = metadata.Metadata() if root_collision else None)
457 print(tree.encode('hex'))
458 if opt.commit or opt.name:
459 msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv
460 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
461 commit = w.new_commit(tree, oldref, userline, date, None,
462 userline, date, None, msg)
464 print(commit.encode('hex'))
467 w.close() # must close before we can update the ref
471 cli.update_ref(refname, commit, oldref)
473 git.update_ref(refname, commit, oldref)
479 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))