X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Frestore-cmd.py;h=d52748914a52c5fe4943265a1881b4c4e8bd2704;hb=6c14d90cecf074255ce522032b107abf961e5e3d;hp=43209b3a3112d563e35c6c7440bac2076306c5ba;hpb=b5ae9433b8d43275f68d97a89a81d0816e777e15;p=bup.git diff --git a/cmd/restore-cmd.py b/cmd/restore-cmd.py index 43209b3..d527489 100755 --- a/cmd/restore-cmd.py +++ b/cmd/restore-cmd.py @@ -1,19 +1,30 @@ #!/usr/bin/env python -import errno, sys, stat +import copy, errno, sys, stat, re from bup import options, git, metadata, vfs from bup.helpers import * +from bup._helpers import write_sparsely optspec = """ bup restore [-C outdir] -- C,outdir= change to given outdir before extracting files numeric-ids restore numeric IDs (user, group, etc.) rather than names +exclude-rx= skip paths matching the unanchored regex (may be repeated) +exclude-rx-from= skip --exclude-rx patterns in file (may be repeated) +sparse create sparse files v,verbose increase log output (can be used more than once) +map-user= given OLD=NEW, restore OLD user as NEW user +map-group= given OLD=NEW, restore OLD group as NEW group +map-uid= given OLD=NEW, restore OLD uid as NEW uid +map-gid= given OLD=NEW, restore OLD gid as NEW gid q,quiet don't show progress meter """ total_restored = 0 +# stdout should be flushed after each line, even when not connected to a tty +sys.stdout.flush() +sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 1) def verbose1(s): if opt.verbose >= 1: @@ -31,6 +42,14 @@ def plog(s): qprogress(s) +def valid_restore_path(path): + path = os.path.normpath(path) + if path.startswith('/'): + path = path[1:] + if '/' in path: + return True + + def print_info(n, fullname): if stat.S_ISDIR(n.mode): verbose1('%s/' % fullname) @@ -54,6 +73,38 @@ def create_path(n, fullname, meta): elif stat.S_ISLNK(n.mode): os.symlink(n.readlink(), fullname) + +def parse_owner_mappings(type, options, fatal): + """Traverse the options and parse all --map-TYPEs, or call Option.fatal().""" + opt_name = '--map-' + type + value_rx = r'^([^=]+)=([^=]*)$' + if type in ('uid', 'gid'): + value_rx = r'^(-?[0-9]+)=(-?[0-9]+)$' + owner_map = {} + for flag in options: + (option, parameter) = flag + if option != opt_name: + continue + match = re.match(value_rx, parameter) + if not match: + raise fatal("couldn't parse %s as %s mapping" % (parameter, type)) + old_id, new_id = match.groups() + if type in ('uid', 'gid'): + old_id = int(old_id) + new_id = int(new_id) + owner_map[old_id] = new_id + return owner_map + + +def apply_metadata(meta, name, restore_numeric_ids, owner_map): + m = copy.deepcopy(meta) + m.user = owner_map['user'].get(m.user, m.user) + m.group = owner_map['group'].get(m.group, m.group) + m.uid = owner_map['uid'].get(m.uid, m.uid) + m.gid = owner_map['gid'].get(m.gid, m.gid) + m.apply_to_path(name, restore_numeric_ids = restore_numeric_ids) + + # Track a list of (restore_path, vfs_path, meta) triples for each path # we've written for a given hardlink_target. This allows us to handle # the case where we restore a set of hardlinks out of order (with @@ -70,7 +121,6 @@ def hardlink_compatible(target_path, target_vfs_path, target_meta, return False target_node = top.lresolve(target_vfs_path) if src_node.mode != target_node.mode \ - or src_node.atime != target_node.atime \ or src_node.mtime != target_node.mtime \ or src_node.ctime != target_node.ctime \ or src_node.hash != target_node.hash: @@ -116,6 +166,18 @@ def write_file_content(fullname, n): outf.close() +def write_file_content_sparsely(fullname, n): + outfd = os.open(fullname, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0600) + try: + trailing_zeros = 0; + for b in chunkyreader(n.open()): + trailing_zeros = write_sparsely(outfd, b, 512, trailing_zeros) + pos = os.lseek(outfd, trailing_zeros, os.SEEK_END) + os.ftruncate(outfd, pos) + finally: + os.close(outfd) + + def find_dir_item_metadata_by_name(dir, name): """Find metadata in dir (a node) for an item with the given name, or for the directory itself if the name is ''.""" @@ -140,7 +202,7 @@ def find_dir_item_metadata_by_name(dir, name): meta_stream.close() -def do_root(n): +def do_root(n, sparse, owner_map, restore_root_meta = True): # Very similar to do_node(), except that this function doesn't # create a path for n's destination directory (and so ignores # n.fullname). It assumes the destination is '.', and restores @@ -151,9 +213,10 @@ def do_root(n): # Directory metadata is the first entry in any .bupm file in # the directory. Get it. mfile = n.metadata_file() # VFS file -- cannot close(). + root_meta = None if mfile: meta_stream = mfile.open() - meta = metadata.Metadata.read(meta_stream) + root_meta = metadata.Metadata.read(meta_stream) print_info(n, '.') total_restored += 1 plog('Restoring: %d\r' % total_restored) @@ -162,23 +225,29 @@ def do_root(n): # Don't get metadata if this is a dir -- handled in sub do_node(). if meta_stream and not stat.S_ISDIR(sub.mode): m = metadata.Metadata.read(meta_stream) - do_node(n, sub, m) - if meta: - meta.apply_to_path('.', restore_numeric_ids = opt.numeric_ids) + do_node(n, sub, sparse, owner_map, meta = m) + if root_meta and restore_root_meta: + apply_metadata(root_meta, '.', opt.numeric_ids, owner_map) finally: if meta_stream: meta_stream.close() - -def do_node(top, n, meta=None): +def do_node(top, n, sparse, owner_map, meta = None): # Create n.fullname(), relative to the current directory, and # restore all of its metadata, when available. The meta argument # will be None for dirs, or when there is no .bupm (i.e. no # metadata). global total_restored, opt meta_stream = None + write_content = sparse and write_file_content_sparsely or write_file_content try: fullname = n.fullname(stop_at=top) + # Match behavior of index --exclude-rx with respect to paths. + exclude_candidate = '/' + fullname + if(stat.S_ISDIR(n.mode)): + exclude_candidate += '/' + if should_rx_exclude_path(exclude_candidate, exclude_rxs): + return # If this is a directory, its metadata is the first entry in # any .bupm file inside the directory. Get it. if(stat.S_ISDIR(n.mode)): @@ -196,9 +265,9 @@ def do_node(top, n, meta=None): create_path(n, fullname, meta) if meta: if stat.S_ISREG(meta.mode): - write_file_content(fullname, n) + write_content(fullname, n) elif stat.S_ISREG(n.mode): - write_file_content(fullname, n) + write_content(fullname, n) total_restored += 1 plog('Restoring: %d\r' % total_restored) @@ -207,12 +276,14 @@ def do_node(top, n, meta=None): # Don't get metadata if this is a dir -- handled in sub do_node(). if meta_stream and not stat.S_ISDIR(sub.mode): m = metadata.Metadata.read(meta_stream) - do_node(top, sub, m) + do_node(top, sub, sparse, owner_map, meta = m) if meta and not created_hardlink: - meta.apply_to_path(fullname, restore_numeric_ids = opt.numeric_ids) + apply_metadata(meta, fullname, opt.numeric_ids, owner_map) finally: if meta_stream: meta_stream.close() + n.release() + handle_ctrl_c() @@ -225,12 +296,21 @@ top = vfs.RefList(None) if not extra: o.fatal('must specify at least one filename to restore') +exclude_rxs = parse_rx_excludes(flags, o.fatal) + +owner_map = {} +for map_type in ('user', 'group', 'uid', 'gid'): + owner_map[map_type] = parse_owner_mappings(map_type, flags, o.fatal) + if opt.outdir: mkdirp(opt.outdir) os.chdir(opt.outdir) ret = 0 for d in extra: + if not valid_restore_path(d): + add_error("ERROR: path %r doesn't include a branch and revision" % d) + continue path,name = os.path.split(d) try: n = top.lresolve(d) @@ -246,11 +326,7 @@ for d in extra: if not isdir: add_error('%r: not a directory' % d) else: - if name == '.': - do_root(n) - else: - for sub in n: - do_node(n, sub) + do_root(n, opt.sparse, owner_map, restore_root_meta = (name == '.')) else: # Source is /foo/what/ever -- extract ./ever to cwd. if isinstance(n, vfs.FakeSymlink): @@ -261,10 +337,10 @@ for d in extra: target = n.dereference() mkdirp(n.name) os.chdir(n.name) - do_root(target) + do_root(target, opt.sparse, owner_map) else: # Not a directory or fake symlink. meta = find_dir_item_metadata_by_name(n.parent, n.name) - do_node(n.parent, n, meta=meta) + do_node(n.parent, n, opt.sparse, owner_map, meta = meta) if not opt.quiet: progress('Restoring: %d, done.\n' % total_restored)