From bab7b16b760ba1fd686f6f5b274fc8ba2ad360e2 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 8 Sep 2010 02:37:54 -0700 Subject: [PATCH] cmd/restore: embarrassingly slow implementation of 'bup restore' Well, that was easy, since vfs.py already existed and is doing most of the hard work. Only 103 lines including all the log message handling and whatnot. Only one catch: the restoring code is definitely not optimized. Among other things (like the probably-excessive-for-our-restoring-needs layering in vfs.py), we're still calling into 'git cat-file --stdin' to retrieve our objects. This involves lots and lots of context switches, plus it can't use midx files for its lookups. The result is that restoring takes much more CPU time and memory than it really should. But oh well, we have to start somewhere. Signed-off-by: Avery Pennarun --- Documentation/bup-ftp.md | 2 +- Documentation/bup-fuse.md | 3 +- Documentation/bup-restore.md | 108 +++++++++++++++++++++++++++++++++++ Documentation/bup-save.md | 3 +- Documentation/bup-web.md | 2 +- Documentation/bup.md | 2 + README.md | 14 +---- cmd/restore-cmd.py | 105 ++++++++++++++++++++++++++++++++++ lib/bup/vfs.py | 10 ++-- main.py | 1 + t/test.sh | 9 +++ 11 files changed, 238 insertions(+), 21 deletions(-) create mode 100644 Documentation/bup-restore.md create mode 100755 cmd/restore-cmd.py diff --git a/Documentation/bup-ftp.md b/Documentation/bup-ftp.md index 04d0c72..8173320 100644 --- a/Documentation/bup-ftp.md +++ b/Documentation/bup-ftp.md @@ -80,7 +80,7 @@ quit # SEE ALSO -`bup-join`(1), `bup-fuse`(1), `bup-ls`(1), `bup-save`(1), `git-show`(1) +`bup-fuse`(1), `bup-ls`(1), `bup-save`(1), `bup-restore`(1) # BUP diff --git a/Documentation/bup-fuse.md b/Documentation/bup-fuse.md index 1a4469d..3304b0b 100644 --- a/Documentation/bup-fuse.md +++ b/Documentation/bup-fuse.md @@ -50,7 +50,8 @@ should unmount it with `umount`(8). # SEE ALSO -`fuse`(7), `fusermount`(1), `bup-ls`(1), `bup-ftp`(1) +`fuse`(7), `fusermount`(1), `bup-ls`(1), `bup-ftp`(1), +`bup-restore`(1), `bup-web`(1) # BUP diff --git a/Documentation/bup-restore.md b/Documentation/bup-restore.md new file mode 100644 index 0000000..8e2687d --- /dev/null +++ b/Documentation/bup-restore.md @@ -0,0 +1,108 @@ +% bup-restore(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-restore - extract files from a backup set + +# SYNOPSIS + +bup restore [--outdir=*outdir*] [-v] [-q] + +# DESCRIPTION + +`bup restore` extracts files from a backup set (created +with `bup-save`(1)) to the local filesystem. + +The specified *paths* are of the form +/_branch_/_revision_/_path/to/file_. The components of the +path are as follows: + +branch +: the name of the backup set to restore from; this + corresponds to the --name (-n) option to `bup save`. + +revision +: the revision of the backup set to restore. The + revision *latest* is always the most recent + backup on the given branch. You can discover other + revisions using `bup ls /branch`. + +/path/to/file +: the original absolute filesystem path to the file you + want to restore. For example, `/etc/passwd`. + +Note: if the /path/to/file is a directory, `bup restore` +will restore that directory as well as recursively +restoring all its contents. + +If /path/to/file is a directory ending in a slash (ie. +/path/to/dir/), `bup restore` will restore the children of +that directory directly to the current directory (or the +`--outdir`). If the directory does *not* end in a slash, +the children will be restored to a subdirectory of the +current directory. See the EXAMPLES section to see how +this works. + + +# OPTIONS + +-C, --outdir=*outdir* +: create and change to directory *outdir* before + extracting the files. + +-v, --verbose +: increase log output. Given once, prints every + directory as it is restored; given twice, prints every + file and directory. + +-q, --quiet +: don't show the progress meter. Normally, is stderr is + a tty, a progress display is printed that shows the + total number of files restored. + +# EXAMPLE + +Create a simple test backup set: + + $ bup index -u /etc + $ bup save -n mybackup /etc/passwd /etc/profile + +Restore just one file: + + $ bup restore /mybackup/latest/etc/passwd + Restoring: 1, done. + + $ ls -l passwd + -rw-r--r-- 1 apenwarr apenwarr 1478 2010-09-08 03:06 passwd + +Restore the whole directory (no trailing slash): + + $ bup restore -C test1 /mybackup/latest/etc + Restoring: 3, done. + + $ find test1 + test1 + test1/etc + test1/etc/passwd + test1/etc/profile + +Restore the whole directory (trailing slash): + + $ bup restore -C test2 /mybackup/latest/etc/ + Restoring: 2, done. + + $ find test2 + test2 + test2/passwd + test2/profile + + +# SEE ALSO + +`bup-save`(1), `bup-ftp`(1), `bup-fuse`(1), `bup-web`(1) + +# BUP + +Part of the `bup`(1) suite. diff --git a/Documentation/bup-save.md b/Documentation/bup-save.md index debf64e..9471474 100644 --- a/Documentation/bup-save.md +++ b/Documentation/bup-save.md @@ -83,7 +83,8 @@ for `bup-index`(1). # SEE ALSO -`bup-index`(1), `bup-split`(1), `bup-on`(1) +`bup-index`(1), `bup-split`(1), `bup-on`(1), +`bup-restore`(1) # BUP diff --git a/Documentation/bup-web.md b/Documentation/bup-web.md index 23e82a8..706fab0 100644 --- a/Documentation/bup-web.md +++ b/Documentation/bup-web.md @@ -34,7 +34,7 @@ can omit the bind address to bind to all available interfaces: `:8080`. # SEE ALSO -`bup-fuse`(1), `bup-ls`(1), `bup-ftp`(1) +`bup-fuse`(1), `bup-ls`(1), `bup-ftp`(1), `bup-restore`(1) # BUP diff --git a/Documentation/bup.md b/Documentation/bup.md index 3dc6ee9..560e0d6 100644 --- a/Documentation/bup.md +++ b/Documentation/bup.md @@ -51,6 +51,8 @@ pages. : Create or display the index of files to back up `bup-on`(1) : Backup a remote machine to the local one +`bup-restore`(1) +: Extract files from a backup set `bup-save`(1) : Save files into a backup set (note: run "bup index" first) `bup-web`(1) diff --git a/README.md b/README.md index f53c87c..8a66b8e 100644 --- a/README.md +++ b/README.md @@ -230,23 +230,11 @@ Things that are stupid for now but which we'll fix later Help with any of these problems, or others, is very welcome. Join the mailing list (see below) if you'd like to help. - - 'bup save' doesn't know about file metadata. + - 'bup save' and 'bup restore' don't know about file metadata. That means we aren't saving file attributes, mtimes, ownership, hard links, MacOS resource forks, etc. Clearly this needs to be improved. - - There's no 'bup restore' yet. - - 'bup save' saves files in the standard git 'tree of blobs' format, so you - could then "restore" the files using something like 'git checkout'. But - that's a git command, not a bup command, so it's hard to explain and - doesn't support retrieving objects from a remote bup server without first - fetching and packing an entire (possibly huge) pack, which could be very - slow. Also, like 'bup save', you would need extra features in order to - properly restore file metadata. And files that bup has split into - chunks will need to be recombined. Although there's no restore tool, - 'bup fuse' does accomplish some of this already. - - 'bup index' is slower than it should be. It's still rather fast: it can iterate through all the filenames on my diff --git a/cmd/restore-cmd.py b/cmd/restore-cmd.py new file mode 100755 index 0000000..6066b65 --- /dev/null +++ b/cmd/restore-cmd.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +import sys, stat, time +from bup import options, git, vfs +from bup.helpers import * + +optspec = """ +bup restore [-C outdir] +-- +C,outdir= change to given outdir before extracting files +v,verbose increase log output (can be used more than once) +q,quiet don't show progress meter +""" + +total_restored = last_progress = 0 + + +def verbose1(s): + global last_progress + if opt.verbose >= 1: + print s + last_progress = 0 + + +def verbose2(s): + global last_progress + if opt.verbose >= 2: + print s + last_progress = 0 + + +def plog(s): + global last_progress + if opt.quiet: + return + now = time.time() + if now - last_progress > 0.2: + progress(s) + last_progress = now + + +def do_node(top, n): + global total_restored + fullname = n.fullname(stop_at=top) + unlink(fullname) + if stat.S_ISDIR(n.mode): + verbose1('%s/' % fullname) + mkdirp(fullname) + elif stat.S_ISLNK(n.mode): + verbose2('%s@ -> %s' % (fullname, n.readlink())) + os.symlink(n.readlink(), fullname) + else: + verbose2(fullname) + outf = open(fullname, 'wb') + try: + for b in chunkyreader(n.open()): + outf.write(b) + finally: + outf.close() + total_restored += 1 + plog('Restoring: %d\r' % total_restored) + for sub in n: + do_node(top, sub) + + +handle_ctrl_c() + +o = options.Options('bup restore', optspec) +(opt, flags, extra) = o.parse(sys.argv[1:]) + +git.check_repo_or_die() +top = vfs.RefList(None) + +if not extra: + o.fatal('must specify at least one filename to restore') + +if opt.outdir: + mkdirp(opt.outdir) + os.chdir(opt.outdir) + +ret = 0 +for d in extra: + path,name = os.path.split(d) + try: + n = top.lresolve(d) + except vfs.NodeError, e: + add_error(e) + continue + isdir = stat.S_ISDIR(n.mode) + if not name or name == '.': + # trailing slash: extract children to cwd + if not isdir: + add_error('%r: not a directory' % d) + else: + for sub in n: + do_node(n, sub) + else: + # no trailing slash: extract node and its children to cwd + do_node(n.parent, n) + +if not opt.quiet: + log('Restoring: %d, done.\n' % total_restored) + +if saved_errors: + log('WARNING: %d errors encountered while restoring.\n' % len(saved_errors)) + sys.exit(1) diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py index 479a905..9baa6b5 100644 --- a/lib/bup/vfs.py +++ b/lib/bup/vfs.py @@ -172,15 +172,17 @@ class Node(object): self._subs = None def __cmp__(a, b): - return cmp(a.name or None, b.name or None) + return cmp(a and a.name or None, b and b.name or None) def __iter__(self): return iter(self.subs()) - def fullname(self): + def fullname(self, stop_at=None): """Get this file's full path.""" - if self.parent: - return os.path.join(self.parent.fullname(), self.name) + assert(self != stop_at) # would be the empty string; too weird + if self.parent and self.parent != stop_at: + return os.path.join(self.parent.fullname(stop_at=stop_at), + self.name) else: return self.name diff --git a/main.py b/main.py index b55b619..35e28e0 100755 --- a/main.py +++ b/main.py @@ -39,6 +39,7 @@ def usage(): help = 'Print detailed help for the given command', index = 'Create or display the index of files to back up', on = 'Backup a remote machine to the local one', + restore = 'Extract files from a backup set', save = 'Save files into a backup set (note: run "bup index" first)', web = 'Launch a web server to examine backup sets', ) diff --git a/t/test.sh b/t/test.sh index e654255..1a904ae 100755 --- a/t/test.sh +++ b/t/test.sh @@ -166,6 +166,15 @@ WVSTART "save/git-fsck" WVPASS [ "$n" -eq 0 ] ) || exit 1 +WVSTART "restore" +rm -rf buprestore.tmp +WVFAIL bup restore boink +WVPASS bup restore -C buprestore.tmp "/master/latest/$TOP/$D" +WVPASSEQ "$(ls buprestore.tmp)" "bupdata.tmp" +rm -rf buprestore.tmp +WVPASS bup restore -C buprestore.tmp "/master/latest/$TOP/$D/" +WVPASS diff -ur $D/ buprestore.tmp/ + WVSTART "ftp" WVPASS bup ftp "cat /master/latest/$TOP/$D/b" >$D/b.new WVPASS bup ftp "cat /master/latest/$TOP/$D/f" >$D/f.new -- 2.39.2