X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Frestore-cmd.py;h=d52748914a52c5fe4943265a1881b4c4e8bd2704;hb=6c14d90cecf074255ce522032b107abf961e5e3d;hp=6066b6545412d697711d7b0f13763eb0c4962afc;hpb=bab7b16b760ba1fd686f6f5b274fc8ba2ad360e2;p=bup.git diff --git a/cmd/restore-cmd.py b/cmd/restore-cmd.py index 6066b65..d527489 100755 --- a/cmd/restore-cmd.py +++ b/cmd/restore-cmd.py @@ -1,70 +1,293 @@ #!/usr/bin/env python -import sys, stat, time -from bup import options, git, vfs +import copy, errno, sys, stat, re +from bup import options, git, metadata, vfs from bup.helpers import * +from bup._helpers import write_sparsely optspec = """ bup restore [-C outdir] -- -C,outdir= change to given outdir before extracting files -v,verbose increase log output (can be used more than once) -q,quiet don't show progress meter +C,outdir= change to given outdir before extracting files +numeric-ids restore numeric IDs (user, group, etc.) rather than names +exclude-rx= skip paths matching the unanchored regex (may be repeated) +exclude-rx-from= skip --exclude-rx patterns in file (may be repeated) +sparse create sparse files +v,verbose increase log output (can be used more than once) +map-user= given OLD=NEW, restore OLD user as NEW user +map-group= given OLD=NEW, restore OLD group as NEW group +map-uid= given OLD=NEW, restore OLD uid as NEW uid +map-gid= given OLD=NEW, restore OLD gid as NEW gid +q,quiet don't show progress meter """ -total_restored = last_progress = 0 +total_restored = 0 +# stdout should be flushed after each line, even when not connected to a tty +sys.stdout.flush() +sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 1) def verbose1(s): - global last_progress if opt.verbose >= 1: print s - last_progress = 0 def verbose2(s): - global last_progress if opt.verbose >= 2: print s - last_progress = 0 def plog(s): - global last_progress if opt.quiet: return - now = time.time() - if now - last_progress > 0.2: - progress(s) - last_progress = now + qprogress(s) -def do_node(top, n): - global total_restored - fullname = n.fullname(stop_at=top) - unlink(fullname) +def valid_restore_path(path): + path = os.path.normpath(path) + if path.startswith('/'): + path = path[1:] + if '/' in path: + return True + + +def print_info(n, fullname): if stat.S_ISDIR(n.mode): verbose1('%s/' % fullname) - mkdirp(fullname) elif stat.S_ISLNK(n.mode): verbose2('%s@ -> %s' % (fullname, n.readlink())) - os.symlink(n.readlink(), fullname) else: verbose2(fullname) - outf = open(fullname, 'wb') - try: - for b in chunkyreader(n.open()): - outf.write(b) - finally: - outf.close() - total_restored += 1 - plog('Restoring: %d\r' % total_restored) - for sub in n: - do_node(top, sub) - - + + +def create_path(n, fullname, meta): + if meta: + meta.create_path(fullname) + else: + # These fallbacks are important -- meta could be null if, for + # example, save created a "fake" item, i.e. a new strip/graft + # path element, etc. You can find cases like that by + # searching for "Metadata()". + unlink(fullname) + if stat.S_ISDIR(n.mode): + mkdirp(fullname) + elif stat.S_ISLNK(n.mode): + os.symlink(n.readlink(), fullname) + + +def parse_owner_mappings(type, options, fatal): + """Traverse the options and parse all --map-TYPEs, or call Option.fatal().""" + opt_name = '--map-' + type + value_rx = r'^([^=]+)=([^=]*)$' + if type in ('uid', 'gid'): + value_rx = r'^(-?[0-9]+)=(-?[0-9]+)$' + owner_map = {} + for flag in options: + (option, parameter) = flag + if option != opt_name: + continue + match = re.match(value_rx, parameter) + if not match: + raise fatal("couldn't parse %s as %s mapping" % (parameter, type)) + old_id, new_id = match.groups() + if type in ('uid', 'gid'): + old_id = int(old_id) + new_id = int(new_id) + owner_map[old_id] = new_id + return owner_map + + +def apply_metadata(meta, name, restore_numeric_ids, owner_map): + m = copy.deepcopy(meta) + m.user = owner_map['user'].get(m.user, m.user) + m.group = owner_map['group'].get(m.group, m.group) + m.uid = owner_map['uid'].get(m.uid, m.uid) + m.gid = owner_map['gid'].get(m.gid, m.gid) + m.apply_to_path(name, restore_numeric_ids = restore_numeric_ids) + + +# Track a list of (restore_path, vfs_path, meta) triples for each path +# we've written for a given hardlink_target. This allows us to handle +# the case where we restore a set of hardlinks out of order (with +# respect to the original save call(s)) -- i.e. when we don't restore +# the hardlink_target path first. This data also allows us to attempt +# to handle other situations like hardlink sets that change on disk +# during a save, or between index and save. +targets_written = {} + +def hardlink_compatible(target_path, target_vfs_path, target_meta, + src_node, src_meta): + global top + if not os.path.exists(target_path): + return False + target_node = top.lresolve(target_vfs_path) + if src_node.mode != target_node.mode \ + or src_node.mtime != target_node.mtime \ + or src_node.ctime != target_node.ctime \ + or src_node.hash != target_node.hash: + return False + if not src_meta.same_file(target_meta): + return False + return True + + +def hardlink_if_possible(fullname, node, meta): + """Find a suitable hardlink target, link to it, and return true, + otherwise return false.""" + # Expect the caller to handle restoring the metadata if + # hardlinking isn't possible. + global targets_written + target = meta.hardlink_target + target_versions = targets_written.get(target) + if target_versions: + # Check every path in the set that we've written so far for a match. + for (target_path, target_vfs_path, target_meta) in target_versions: + if hardlink_compatible(target_path, target_vfs_path, target_meta, + node, meta): + try: + os.link(target_path, fullname) + return True + except OSError, e: + if e.errno != errno.EXDEV: + raise + else: + target_versions = [] + targets_written[target] = target_versions + full_vfs_path = node.fullname() + target_versions.append((fullname, full_vfs_path, meta)) + return False + + +def write_file_content(fullname, n): + outf = open(fullname, 'wb') + try: + for b in chunkyreader(n.open()): + outf.write(b) + finally: + outf.close() + + +def write_file_content_sparsely(fullname, n): + outfd = os.open(fullname, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0600) + try: + trailing_zeros = 0; + for b in chunkyreader(n.open()): + trailing_zeros = write_sparsely(outfd, b, 512, trailing_zeros) + pos = os.lseek(outfd, trailing_zeros, os.SEEK_END) + os.ftruncate(outfd, pos) + finally: + os.close(outfd) + + +def find_dir_item_metadata_by_name(dir, name): + """Find metadata in dir (a node) for an item with the given name, + or for the directory itself if the name is ''.""" + meta_stream = None + try: + mfile = dir.metadata_file() # VFS file -- cannot close(). + if mfile: + meta_stream = mfile.open() + # First entry is for the dir itself. + meta = metadata.Metadata.read(meta_stream) + if name == '': + return meta + for sub in dir: + if stat.S_ISDIR(sub.mode): + meta = find_dir_item_metadata_by_name(sub, '') + else: + meta = metadata.Metadata.read(meta_stream) + if sub.name == name: + return meta + finally: + if meta_stream: + meta_stream.close() + + +def do_root(n, sparse, owner_map, restore_root_meta = True): + # Very similar to do_node(), except that this function doesn't + # create a path for n's destination directory (and so ignores + # n.fullname). It assumes the destination is '.', and restores + # n's metadata and content there. + global total_restored, opt + meta_stream = None + try: + # Directory metadata is the first entry in any .bupm file in + # the directory. Get it. + mfile = n.metadata_file() # VFS file -- cannot close(). + root_meta = None + if mfile: + meta_stream = mfile.open() + root_meta = metadata.Metadata.read(meta_stream) + print_info(n, '.') + total_restored += 1 + plog('Restoring: %d\r' % total_restored) + for sub in n: + m = None + # Don't get metadata if this is a dir -- handled in sub do_node(). + if meta_stream and not stat.S_ISDIR(sub.mode): + m = metadata.Metadata.read(meta_stream) + do_node(n, sub, sparse, owner_map, meta = m) + if root_meta and restore_root_meta: + apply_metadata(root_meta, '.', opt.numeric_ids, owner_map) + finally: + if meta_stream: + meta_stream.close() + +def do_node(top, n, sparse, owner_map, meta = None): + # Create n.fullname(), relative to the current directory, and + # restore all of its metadata, when available. The meta argument + # will be None for dirs, or when there is no .bupm (i.e. no + # metadata). + global total_restored, opt + meta_stream = None + write_content = sparse and write_file_content_sparsely or write_file_content + try: + fullname = n.fullname(stop_at=top) + # Match behavior of index --exclude-rx with respect to paths. + exclude_candidate = '/' + fullname + if(stat.S_ISDIR(n.mode)): + exclude_candidate += '/' + if should_rx_exclude_path(exclude_candidate, exclude_rxs): + return + # If this is a directory, its metadata is the first entry in + # any .bupm file inside the directory. Get it. + if(stat.S_ISDIR(n.mode)): + mfile = n.metadata_file() # VFS file -- cannot close(). + if mfile: + meta_stream = mfile.open() + meta = metadata.Metadata.read(meta_stream) + print_info(n, fullname) + + created_hardlink = False + if meta and meta.hardlink_target: + created_hardlink = hardlink_if_possible(fullname, n, meta) + + if not created_hardlink: + create_path(n, fullname, meta) + if meta: + if stat.S_ISREG(meta.mode): + write_content(fullname, n) + elif stat.S_ISREG(n.mode): + write_content(fullname, n) + + total_restored += 1 + plog('Restoring: %d\r' % total_restored) + for sub in n: + m = None + # Don't get metadata if this is a dir -- handled in sub do_node(). + if meta_stream and not stat.S_ISDIR(sub.mode): + m = metadata.Metadata.read(meta_stream) + do_node(top, sub, sparse, owner_map, meta = m) + if meta and not created_hardlink: + apply_metadata(meta, fullname, opt.numeric_ids, owner_map) + finally: + if meta_stream: + meta_stream.close() + n.release() + + handle_ctrl_c() -o = options.Options('bup restore', optspec) +o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() @@ -73,12 +296,21 @@ top = vfs.RefList(None) if not extra: o.fatal('must specify at least one filename to restore') +exclude_rxs = parse_rx_excludes(flags, o.fatal) + +owner_map = {} +for map_type in ('user', 'group', 'uid', 'gid'): + owner_map[map_type] = parse_owner_mappings(map_type, flags, o.fatal) + if opt.outdir: mkdirp(opt.outdir) os.chdir(opt.outdir) ret = 0 for d in extra: + if not valid_restore_path(d): + add_error("ERROR: path %r doesn't include a branch and revision" % d) + continue path,name = os.path.split(d) try: n = top.lresolve(d) @@ -87,18 +319,31 @@ for d in extra: continue isdir = stat.S_ISDIR(n.mode) if not name or name == '.': - # trailing slash: extract children to cwd + # Source is /foo/what/ever/ or /foo/what/ever/. -- extract + # what/ever/* to the current directory, and if name == '.' + # (i.e. /foo/what/ever/.), then also restore what/ever's + # metadata to the current directory. if not isdir: add_error('%r: not a directory' % d) else: - for sub in n: - do_node(n, sub) + do_root(n, opt.sparse, owner_map, restore_root_meta = (name == '.')) else: - # no trailing slash: extract node and its children to cwd - do_node(n.parent, n) + # Source is /foo/what/ever -- extract ./ever to cwd. + if isinstance(n, vfs.FakeSymlink): + # Source is actually /foo/what, i.e. a top-level commit + # like /foo/latest, which is a symlink to ../.commit/SHA. + # So dereference it, and restore ../.commit/SHA/. to + # "./what/.". + target = n.dereference() + mkdirp(n.name) + os.chdir(n.name) + do_root(target, opt.sparse, owner_map) + else: # Not a directory or fake symlink. + meta = find_dir_item_metadata_by_name(n.parent, n.name) + do_node(n.parent, n, opt.sparse, owner_map, meta = meta) if not opt.quiet: - log('Restoring: %d, done.\n' % total_restored) + progress('Restoring: %d, done.\n' % total_restored) if saved_errors: log('WARNING: %d errors encountered while restoring.\n' % len(saved_errors))