X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fsave-cmd.py;h=91d01ca84912dfa2f0e5ca5a1069f816fc06ede6;hb=c40b3dd5fd74e72024fbaad3daf5a958aefa1c54;hp=d8ab9486136bfdb4c742df2054626d18b01a4c72;hpb=2ba8e57424a6c53dcb12ea36d3b2f27979c431c7;p=bup.git diff --git a/cmd/save-cmd.py b/cmd/save-cmd.py index d8ab948..91d01ca 100755 --- a/cmd/save-cmd.py +++ b/cmd/save-cmd.py @@ -1,8 +1,22 @@ -#!/usr/bin/env python -import sys, stat, time, math -from bup import hashsplit, git, options, index, client -from bup.helpers import * +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import +from errno import EACCES +from io import BytesIO +import os, sys, stat, time, math + +from bup import hashsplit, git, options, index, client, metadata, hlinkdb from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK +from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c, + hostname, istty2, log, parse_date_or_fatal, parse_num, + path_components, progress, qprogress, resolve_parent, + saved_errors, stripped_path_components, + userfullname, username, valid_save_name) optspec = """ @@ -21,8 +35,7 @@ f,indexfile= the name of the index file (normally BUP_DIR/bupindex) strip strips the path to every filename given strip-path= path-prefix to be stripped when saving graft= a graft point *old_path*=*new_path* (can be used more than once) -0 set compression-level to 0 -9 set compression-level to 9 +#,compress= set compression level to # (0-9, 9 is highest) [1] """ o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) @@ -33,13 +46,6 @@ if not (opt.tree or opt.commit or opt.name): if not extra: o.fatal("no filenames given") -if opt['0']: - compression_level = 0 -elif opt['9']: - compression_level = 9 -else: - compression_level = 1 - opt.progress = (istty2 and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) if opt.bwlimit: @@ -66,24 +72,31 @@ if opt.graft: splitted_parameter = parameter.split('=') if len(splitted_parameter) != 2: o.fatal("a graft point must be of the form old_path=new_path") - graft_points.append((realpath(splitted_parameter[0]), - realpath(splitted_parameter[1]))) + old_path, new_path = splitted_parameter + if not (old_path and new_path): + o.fatal("a graft point cannot be empty") + graft_points.append((resolve_parent(old_path), + resolve_parent(new_path))) is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") -if opt.name and opt.name.startswith('.'): +if opt.name and not valid_save_name(opt.name): o.fatal("'%s' is not a valid branch name" % opt.name) refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.remote or is_reverse: - cli = client.Client(opt.remote) + try: + cli = client.Client(opt.remote) + except client.ClientError as e: + log('error: %s' % e) + sys.exit(1) oldref = refname and cli.read_ref(refname) or None - w = cli.new_packwriter() + w = cli.new_packwriter(compression_level=opt.compress) else: cli = None oldref = refname and git.read_ref(refname) or None - w = git.PackWriter(compression_level=compression_level) + w = git.PackWriter(compression_level=opt.compress) handle_ctrl_c() @@ -95,28 +108,75 @@ def eatslash(dir): return dir -parts = [''] -shalists = [[]] +# Metadata is stored in a file named .bupm in each directory. The +# first metadata entry will be the metadata for the current directory. +# The remaining entries will be for each of the other directory +# elements, in the order they're listed in the index. +# +# Since the git tree elements are sorted according to +# git.shalist_item_sort_key, the metalist items are accumulated as +# (sort_key, metadata) tuples, and then sorted when the .bupm file is +# created. The sort_key must be computed using the element's real +# name and mode rather than the git mode and (possibly mangled) name. + +# Maintain a stack of information representing the current location in +# the archive being constructed. The current path is recorded in +# parts, which will be something like ['', 'home', 'someuser'], and +# the accumulated content and metadata for of the dirs in parts is +# stored in parallel stacks in shalists and metalists. + +parts = [] # Current archive position (stack of dir names). +shalists = [] # Hashes for each dir in paths. +metalists = [] # Metadata for each dir in paths. -def _push(part): - assert(part) + +def _push(part, metadata): + # Enter a new archive directory -- make it the current directory. parts.append(part) shalists.append([]) + metalists.append([('', metadata)]) # This dir's metadata (no name). + -def _pop(force_tree): +def _pop(force_tree, dir_metadata=None): + # Leave the current archive directory and add its tree to its parent. assert(len(parts) >= 1) part = parts.pop() shalist = shalists.pop() - tree = force_tree or w.new_tree(shalist) + metalist = metalists.pop() + if metalist and not force_tree: + if dir_metadata: # Override the original metadata pushed for this dir. + metalist = [('', dir_metadata)] + metalist[1:] + sorted_metalist = sorted(metalist, key = lambda x : x[0]) + metadata = ''.join([m[1].encode() for m in sorted_metalist]) + metadata_f = BytesIO(metadata) + mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, + [metadata_f], + keep_boundaries=False) + shalist.append((mode, '.bupm', id)) + # FIXME: only test if collision is possible (i.e. given --strip, etc.)? + if force_tree: + tree = force_tree + else: + names_seen = set() + clean_list = [] + for x in shalist: + name = x[1] + if name in names_seen: + parent_path = '/'.join(parts) + '/' + add_error('error: ignoring duplicate path %r in %r' + % (name, parent_path)) + else: + names_seen.add(name) + clean_list.append(x) + tree = w.new_tree(clean_list) if shalists: shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE, GIT_MODE_TREE), tree)) - else: # this was the toplevel, so put it back for sanity - shalists.append(shalist) return tree + lastremain = None def progress_report(n): global count, subcount, lastremain @@ -158,6 +218,14 @@ def progress_report(n): indexfile = opt.indexfile or git.repo('bupindex') r = index.Reader(indexfile) +try: + msr = index.MetaStoreReader(indexfile + '.meta') +except IOError as ex: + if ex.errno != EACCES: + raise + log('error: cannot access %r; have you run bup index?' % indexfile) + sys.exit(1) +hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha @@ -168,6 +236,12 @@ def wantrecurse_pre(ent): def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() +def find_hardlink_target(hlink_db, ent): + if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1: + link_paths = hlink_db.node_paths(ent.dev, ent.ino) + if link_paths: + return link_paths[0] + total = ftotal = 0 if opt.progress: for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre): @@ -183,6 +257,19 @@ if opt.progress: progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report +# Root collisions occur when strip or graft options map more than one +# path to the same directory (paths which originally had separate +# parents). When that situation is detected, use empty metadata for +# the parent. Otherwise, use the metadata for the common parent. +# Collision example: "bup save ... --strip /foo /foo/bar /bar". + +# FIXME: Add collision tests, or handle collisions some other way. + +# FIXME: Detect/handle strip/graft name collisions (other than root), +# i.e. if '/foo/bar' and '/bar' both map to '/'. + +first_root = None +root_collision = None tstart = time.time() count = subcount = fcount = 0 lastskip_name = None @@ -218,30 +305,60 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: - add_error('skipping large file "%s"' % ent.name) + if opt.verbose: + log('skipping large file "%s"\n' % ent.name) lastskip_name = ent.name continue assert(dir.startswith('/')) if opt.strip: - stripped_base_path = strip_base_path(dir, extra) - dirp = stripped_base_path.split('/') + dirp = stripped_path_components(dir, extra) elif opt.strip_path: - dirp = strip_path(opt.strip_path, dir).split('/') + dirp = stripped_path_components(dir, [opt.strip_path]) elif graft_points: - grafted = graft_path(graft_points, dir) - dirp = grafted.split('/') + dirp = grafted_path_components(graft_points, dir) else: - dirp = dir.split('/') - while parts > dirp: + dirp = path_components(dir) + + # At this point, dirp contains a representation of the archive + # path that looks like [(archive_dir_name, real_fs_path), ...]. + # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp + # might look like this at some point: + # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. + + # This dual representation supports stripping/grafting, where the + # archive path may not have a direct correspondence with the + # filesystem. The root directory is represented by an initial + # component named '', and any component that doesn't have a + # corresponding filesystem directory (due to grafting, for + # example) will have a real_fs_path of None, i.e. [('', None), + # ...]. + + if first_root == None: + first_root = dirp[0] + elif first_root != dirp[0]: + root_collision = True + + # If switching to a new sub-tree, finish the current sub-tree. + while parts > [x[0] for x in dirp]: _pop(force_tree = None) - if dir != '/': - for part in dirp[len(parts):]: - _push(part) + + # If switching to a new sub-tree, start a new sub-tree. + for path_component in dirp[len(parts):]: + dir_name, fs_path = path_component + # Not indexed, so just grab the FS metadata or use empty metadata. + try: + meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() + except (OSError, IOError) as e: + add_error(e) + lastskip_name = dir_name + meta = metadata.Metadata() + _push(dir_name, meta) if not file: - # no filename portion means this is a subdir. But - # sub/parentdirectories already handled in the pop/push() part above. + if len(parts) == 1: + continue # We're at the top level -- keep the current root dir + # Since there's no filename, this is a subdir -- finish it. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: @@ -258,14 +375,20 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): id = None if hashvalid: id = ent.sha - shalists[-1].append((ent.gitmode, - git.mangle_name(file, ent.mode, ent.gitmode), - id)) + git_name = git.mangle_name(file, ent.mode, ent.gitmode) + git_info = (ent.gitmode, git_name, id) + shalists[-1].append(git_info) + sort_key = git.shalist_item_sort_key((ent.mode, file, id)) + meta = msr.metadata_at(ent.meta_ofs) + meta.hardlink_target = find_hardlink_target(hlink_db, ent) + # Restore the times that were cleared to 0 in the metastore. + (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) + metalists[-1].append((sort_key, meta)) else: if stat.S_ISREG(ent.mode): try: f = hashsplit.open_noatime(ent.name) - except (IOError, OSError), e: + except (IOError, OSError) as e: add_error(e) lastskip_name = ent.name else: @@ -273,7 +396,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (mode, id) = hashsplit.split_to_blob_or_tree( w.new_blob, w.new_tree, [f], keep_boundaries=False) - except (IOError, OSError), e: + except (IOError, OSError) as e: add_error('%s: %s' % (ent.name, e)) lastskip_name = ent.name else: @@ -282,20 +405,33 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): elif stat.S_ISLNK(ent.mode): try: rl = os.readlink(ent.name) - except (OSError, IOError), e: + except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name else: (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl)) else: - add_error(Exception('skipping special file "%s"' % ent.name)) - lastskip_name = ent.name + # Everything else should be fully described by its + # metadata, so just record an empty blob, so the paths + # in the tree and .bupm will match up. + (mode, id) = (GIT_MODE_FILE, w.new_blob("")) + if id: ent.validate(mode, id) ent.repack() - shalists[-1].append((mode, - git.mangle_name(file, ent.mode, ent.gitmode), - id)) + git_name = git.mangle_name(file, ent.mode, ent.gitmode) + git_info = (mode, git_name, id) + shalists[-1].append(git_info) + sort_key = git.shalist_item_sort_key((ent.mode, file, id)) + hlink = find_hardlink_target(hlink_db, ent) + try: + meta = metadata.from_path(ent.name, hardlink_target=hlink) + except (OSError, IOError) as e: + add_error(e) + lastskip_name = ent.name + else: + metalists[-1].append((sort_key, meta)) + if exists and wasmissing: count += oldsize subcount = 0 @@ -306,18 +442,27 @@ if opt.progress: progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, count/1024, total/1024, fcount, ftotal)) -while len(parts) > 1: +while len(parts) > 1: # _pop() all the parts above the root _pop(force_tree = None) assert(len(shalists) == 1) -tree = w.new_tree(shalists[-1]) +assert(len(metalists) == 1) + +# Finish the root directory. +tree = _pop(force_tree = None, + # When there's a collision, use empty metadata for the root. + dir_metadata = metadata.Metadata() if root_collision else None) + if opt.tree: print tree.encode('hex') if opt.commit or opt.name: - msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv - commit = w.new_commit(oldref, tree, date, msg) + msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv + userline = '%s <%s@%s>' % (userfullname(), username(), hostname()) + commit = w.new_commit(tree, oldref, userline, date, None, + userline, date, None, msg) if opt.commit: print commit.encode('hex') +msr.close() w.close() # must close before we can update the ref if opt.name: