From c4d3e3aebd973cdb27d3c8057db05ed7f81475b8 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Mon, 20 Oct 2014 18:44:06 -0500 Subject: [PATCH] Add probabilistic "bup gc", but require --unsafe WARNING: This code is UNSAFE, please test it with care. Among other things, it adds the first bup command that intends to modify packfiles. Also, the code assumes/requires a "quiet" repository; any simultaneous access is dangerous. Add a "bup gc" command that uses a Bloom filter to track the live objects during the mark phase. This means that the collection is probabilistic -- that it may retain some (known) percentage of garbage, but it can also work within a reasonable, fixed RAM budget for any particular percentage and repository size. In many cases, this may be sufficient and preferable, but in the longer run, we might also want to add a precise method that guarantees bup retains *no* unreachable/dead objects -- nothing here should prevent that. This command is potentially dangerous, so until we've had broader testing, require all invocations to specify --unsafe, and make it clear in the documentation that this command isn't considered stable. Thanks to Tim Riemenschneider for suggesting the pruning of already visited trees, and an earlier implementation thereof. Thanks to Ben Kelley for testing and reporting bugs in earlier versions of the current implementation. Thanks to Ben Kelly for help devising this Bloom-based approach, and for testing and reporting bugs in earlier versions. Signed-off-by: Rob Browning Tested-by: Rob Browning --- Documentation/bup-gc.md | 60 +++++++++ Makefile | 1 + cmd/gc-cmd.py | 278 ++++++++++++++++++++++++++++++++++++++++ lib/bup/bloom.py | 15 ++- lib/bup/client.py | 3 +- lib/bup/git.py | 20 ++- lib/bup/midx.py | 6 +- t/test-gc.sh | 177 +++++++++++++++++++++++++ 8 files changed, 551 insertions(+), 9 deletions(-) create mode 100644 Documentation/bup-gc.md create mode 100755 cmd/gc-cmd.py create mode 100755 t/test-gc.sh diff --git a/Documentation/bup-gc.md b/Documentation/bup-gc.md new file mode 100644 index 0000000..1088a85 --- /dev/null +++ b/Documentation/bup-gc.md @@ -0,0 +1,60 @@ +% bup-gc(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-gc - remove unreferenced, unneeded data (CAUTION: EXPERIMENTAL) + +# SYNOPSIS + +bup gc [-#|--verbose] <*branch*|*save*...> + +# DESCRIPTION + +`bup gc` removes (permanently deletes) unreachable data from the +repository, data that isn't referred to directly or indirectly by the +current set of branches (backup sets) and tags. But bear in mind that +given deduplication, deleting a save and running the garbage collector +might or might not actually delete anything (or reclaim any space). + +With the current, proababilistic implementation, some fraction of the +unreachable data may be retained. In exchange, the garbage collection +should require much less RAM than might by some more precise +approaches. + +Typically, the garbage collector would be invoked after some set of +invocations of `bup rm`. + +WARNING: This is one of the few bup commands that modifies your archive +in intentionally destructive ways. + +# OPTIONS + +\--threshold=N +: only rewrite a packfile if it's over N percent garbage; otherwise + leave it alone. The default threshold is 10%. + +-v, \--verbose +: increase verbosity (can be used more than once). With one -v, bup + prints every directory name as it gets backed up. With two -v, + it also prints every filename. + +-*#*, \--compress=*#* +: set the compression level to # (a value from 0-9, where + 9 is the highest and 0 is no compression). The default + is 6. + +# EXAMPLES + + # Remove all saves of "home" and most of the otherwise unreferenced data. + $ bup rm home + $ bup gc + +# SEE ALSO + +`bup-rm`(1) and `bup-fsck`(1) + +# BUP + +Part of the `bup`(1) suite. diff --git a/Makefile b/Makefile index 30e9aeb..51d04ac 100644 --- a/Makefile +++ b/Makefile @@ -146,6 +146,7 @@ runtests-python: all t/tmp cmdline_tests := \ t/test-rm.sh \ + t/test-gc.sh \ t/test-main.sh \ t/test-list-idx.sh \ t/test-index.sh \ diff --git a/cmd/gc-cmd.py b/cmd/gc-cmd.py new file mode 100755 index 0000000..dfb75a9 --- /dev/null +++ b/cmd/gc-cmd.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python +import glob, os, stat, subprocess, sys, tempfile +from bup import bloom, git, midx, options, vfs +from bup.git import walk_object +from bup.helpers import handle_ctrl_c, log, progress, qprogress, saved_errors +from os.path import basename + +# This command uses a Bloom filter to track the live objects during +# the mark phase. This means that the collection is probabilistic; it +# may retain some (known) percentage of garbage, but it can also work +# within a reasonable, fixed RAM budget for any particular percentage +# and repository size. +# +# The collection proceeds as follows: +# +# - Scan all live objects by walking all of the refs, and insert +# every hash encountered into a new Bloom "liveness" filter. +# Compute the size of the liveness filter based on the total +# number of objects in the repository. This is the "mark phase". +# +# - Clear the data that's dependent on the repository's object set, +# i.e. the reflog, the normal Bloom filter, and the midxes. +# +# - Traverse all of the pack files, consulting the liveness filter +# to decide which objects to keep. +# +# For each pack file, rewrite it iff it probably contains more +# than (currently) 10% garbage (computed by an initial traversal +# of the packfile in consultation with the liveness filter). To +# rewrite, traverse the packfile (again) and write each hash that +# tests positive against the liveness filter to a packwriter. +# +# During the traversal of all of the packfiles, delete redundant, +# old packfiles only after the packwriter has finished the pack +# that contains all of their live objects. +# +# The current code unconditionally tracks the set of tree hashes seen +# during the mark phase, and skips any that have already been visited. +# This should decrease the IO load at the cost of increased RAM use. + +# FIXME: add a bloom filter tuning parameter? + + +optspec = """ +bup gc [options...] +-- +v,verbose increase log output (can be used more than once) +threshold only rewrite a packfile if it's over this percent garbage [10] +#,compress= set compression level to # (0-9, 9 is highest) [1] +unsafe use the command even though it may be DANGEROUS +""" + + +class Nonlocal: + pass + + +def count_objects(dir): + # For now we'll just use open_idx(), but we could probably be much + # more efficient since all we need is a single integer (the last + # fanout entry) from each index. + global opt + object_count = 0 + indexes = glob.glob(os.path.join(dir, '*.idx')) + for i, idx_name in enumerate(indexes): + if opt.verbose: + log('found %d objects (%d/%d %s)\r' + % (object_count, i + 1, len(indexes), + os.path.basename(idx_name))) + idx = git.open_idx(idx_name) + object_count += len(idx) + return object_count + + +def report_live_item(n, total, ref_name, ref_id, item): + global opt + status = 'scanned %02.2f%%' % (n * 100.0 / total) + hex_id = ref_id.encode('hex') + dirslash = '/' if item.type == 'tree' else '' + chunk_path = item.chunk_path + + if chunk_path: + if opt.verbose < 4: + return + ps = '/'.join(item.path) + chunk_ps = '/'.join(chunk_path) + log('%s %s:%s/%s%s\n' % (status, hex_id, ps, chunk_ps, dirslash)) + return + + # Top commit, for example has none. + demangled = git.demangle_name(item.path[-1], item.mode)[0] if item.path \ + else None + + # Don't print mangled paths unless the verbosity is over 3. + if demangled: + ps = '/'.join(item.path[:-1] + [demangled]) + if opt.verbose == 1: + qprogress('%s %s:%s%s\r' % (status, hex_id, ps, dirslash)) + elif (opt.verbose > 1 and item.type == 'tree') \ + or (opt.verbose > 2 and item.type == 'blob'): + log('%s %s:%s%s\n' % (status, hex_id, ps, dirslash)) + elif opt.verbose > 3: + ps = '/'.join(item.path) + log('%s %s:%s%s\n' % (status, hex_id, ps, dirslash)) + + +def find_live_objects(existing_count, cat_pipe, opt): + prune_visited_trees = True # In case we want a command line option later + pack_dir = git.repo('objects/pack') + ffd, bloom_filename = tempfile.mkstemp('.bloom', 'tmp-gc-', pack_dir) + os.close(ffd) + # FIXME: allow selection of k? + # FIXME: support ephemeral bloom filters (i.e. *never* written to disk) + live_objs = bloom.create(bloom_filename, expected=existing_count, k=None) + stop_at, trees_visited = None, None + if prune_visited_trees: + trees_visited = set() + stop_at = lambda (x): x.decode('hex') in trees_visited + approx_live_count = 0 + for ref_name, ref_id in git.list_refs(): + for item in walk_object(cat_pipe, ref_id.encode('hex'), + stop_at=stop_at, + include_data=None): + # FIXME: batch ids + if opt.verbose: + report_live_item(approx_live_count, existing_count, + ref_name, ref_id, item) + bin_id = item.id.decode('hex') + if trees_visited is not None and item.type == 'tree': + trees_visited.add(bin_id) + if opt.verbose: + if not live_objs.exists(bin_id): + live_objs.add(bin_id) + approx_live_count += 1 + else: + live_objs.add(bin_id) + trees_visited = None + if opt.verbose: + log('expecting to retain about %.2f%% unnecessary objects\n' + % live_objs.pfalse_positive()) + return live_objs + + +def sweep(live_objects, existing_count, cat_pipe, opt): + # Traverse all the packs, saving the (probably) live data. + + ns = Nonlocal() + ns.stale_files = [] + def remove_stale_files(new_pack_prefix): + if opt.verbose and new_pack_prefix: + log('created ' + basename(new_pack_prefix) + '\n') + for p in ns.stale_files: + if opt.verbose: + log('removing ' + basename(p) + '\n') + os.unlink(p) + ns.stale_files = [] + + writer = git.PackWriter(objcache_maker=None, + compression_level=opt.compress, + run_midx=False, + on_pack_finish=remove_stale_files) + + # FIXME: sanity check .idx names vs .pack names? + collect_count = 0 + for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')): + if opt.verbose: + qprogress('preserving live data (%d%% complete)\r' + % ((float(collect_count) / existing_count) * 100)) + idx = git.open_idx(idx_name) + + idx_live_count = 0 + for i in xrange(0, len(idx)): + sha = idx.shatable[i * 20 : (i + 1) * 20] + if live_objects.exists(sha): + idx_live_count += 1 + + collect_count += idx_live_count + if idx_live_count == 0: + if opt.verbose: + log('deleting %s\n' + % git.repo_rel(basename(idx_name))) + ns.stale_files.append(idx_name) + ns.stale_files.append(idx_name[:-3] + 'pack') + continue + + live_frac = idx_live_count / float(len(idx)) + if live_frac > ((100 - opt.threshold) / 100.0): + if opt.verbose: + log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)), + live_frac * 100)) + continue + + if opt.verbose: + log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), + live_frac * 100)) + for i in xrange(0, len(idx)): + sha = idx.shatable[i * 20 : (i + 1) * 20] + if live_objects.exists(sha): + item_it = cat_pipe.get(sha.encode('hex')) + type = item_it.next() + writer.write(sha, type, ''.join(item_it)) + + ns.stale_files.append(idx_name) + ns.stale_files.append(idx_name[:-3] + 'pack') + + if opt.verbose: + progress('preserving live data (%d%% complete)\n' + % ((float(collect_count) / existing_count) * 100)) + + # Nothing should have recreated midx/bloom yet. + pack_dir = git.repo('objects/pack') + assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom'))) + assert(not glob.glob(os.path.join(pack_dir, '*.midx'))) + + # try/catch should call writer.abort()? + # This will finally run midx. + writer.close() # Can only change refs (if needed) after this. + remove_stale_files(None) # In case we didn't write to the writer. + + if opt.verbose: + log('discarded %d%% of objects\n' + % ((existing_count - count_objects(pack_dir)) + / float(existing_count) * 100)) + + +# FIXME: server mode? +# FIXME: make sure client handles server-side changes reasonably + +handle_ctrl_c() + +o = options.Options(optspec) +(opt, flags, extra) = o.parse(sys.argv[1:]) + +if not opt.unsafe: + o.fatal('refusing to run dangerous, experimental command without --unsafe') + +if extra: + o.fatal('no positional parameters expected') + +if opt.threshold: + try: + opt.threshold = int(opt.threshold) + except ValueError: + o.fatal('threshold must be an integer percentage value') + if opt.threshold < 0 or opt.threshold > 100: + o.fatal('threshold must be an integer percentage value') + +git.check_repo_or_die() + +cat_pipe = vfs.cp() +existing_count = count_objects(git.repo('objects/pack')) +if opt.verbose: + log('found %d objects\n' % existing_count) +if not existing_count: + if opt.verbose: + log('nothing to collect\n') +else: + live_objects = find_live_objects(existing_count, cat_pipe, opt) + try: + # FIXME: just rename midxes and bloom, and restore them at the end if + # we didn't change any packs? + if opt.verbose: log('clearing midx files\n') + midx.clear_midxes() + if opt.verbose: log('clearing bloom filter\n') + bloom.clear_bloom(git.repo('objects/pack')) + if opt.verbose: log('clearing reflog\n') + expirelog_cmd = ['git', 'reflog', 'expire', '--all', '--expire=all'] + expirelog = subprocess.Popen(expirelog_cmd, preexec_fn = git._gitenv()) + git._git_wait(' '.join(expirelog_cmd), expirelog) + if opt.verbose: log('removing unreachable data\n') + sweep(live_objects, existing_count, cat_pipe, opt) + finally: + live_objects.close() + os.unlink(live_objects.name) + +if saved_errors: + log('WARNING: %d errors encountered during gc\n' % len(saved_errors)) + sys.exit(1) diff --git a/lib/bup/bloom.py b/lib/bup/bloom.py index ef896f5..7787866 100644 --- a/lib/bup/bloom.py +++ b/lib/bup/bloom.py @@ -84,7 +84,7 @@ import sys, os, math, mmap, struct from bup import _helpers from bup.helpers import (debug1, debug2, log, mmap_read, mmap_readwrite, - mmap_readwrite_private) + mmap_readwrite_private, unlink) BLOOM_VERSION = 2 @@ -197,11 +197,15 @@ class ShaBloom: k = self.k return 100*(1-math.exp(-k*float(n)/m))**k - def add_idx(self, ix): - """Add the object to the filter, return current pfalse_positive.""" + def add(self, ids): + """Add the hashes in ids (packed binary 20-bytes) to the filter.""" if not self.map: raise Exception("Cannot add to closed bloom") - self.entries += bloom_add(self.map, ix.shatable, self.bits, self.k) + self.entries += bloom_add(self.map, ids, self.bits, self.k) + + def add_idx(self, ix): + """Add the object to the filter.""" + self.add(ix.shatable) self.idxnames.append(os.path.basename(ix.name)) def exists(self, sha): @@ -244,3 +248,6 @@ def create(name, expected, delaywrite=None, f=None, k=None): expected = 1 return ShaBloom(name, f=f, readwrite=True, expected=expected) + +def clear_bloom(dir): + unlink(os.path.join(dir, 'bup.bloom')) diff --git a/lib/bup/client.py b/lib/bup/client.py index 6b0da1c..9d6f1d4 100644 --- a/lib/bup/client.py +++ b/lib/bup/client.py @@ -304,7 +304,8 @@ class PackWriter_Remote(git.PackWriter): self.onopen() self._packopen = True - def _end(self): + def _end(self, run_midx=True): + assert(run_midx) # We don't support this via remote yet if self._packopen and self.file: self.file.write('\0\0\0\0') self._packopen = False diff --git a/lib/bup/git.py b/lib/bup/git.py index ba616d5..0f770a4 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -579,9 +579,13 @@ def idxmerge(idxlist, final_progress=True): def _make_objcache(): return PackIdxList(repo('objects/pack')) +# bup-gc assumes that it can disable all PackWriter activities +# (bloom/midx/cache) via the constructor and close() arguments. + class PackWriter: """Writes Git objects inside a pack file.""" - def __init__(self, objcache_maker=_make_objcache, compression_level=1): + def __init__(self, objcache_maker=_make_objcache, compression_level=1, + run_midx=True, on_pack_finish=None): self.file = None self.parentfd = None self.count = 0 @@ -591,6 +595,8 @@ class PackWriter: self.objcache_maker = objcache_maker self.objcache = None self.compression_level = compression_level + self.run_midx=run_midx + self.on_pack_finish = on_pack_finish def __del__(self): self.close() @@ -655,7 +661,7 @@ class PackWriter: def breakpoint(self): """Clear byte and object counts and return the last processed id.""" - id = self._end() + id = self._end(self.run_midx) self.outbytes = self.count = 0 return id @@ -671,11 +677,15 @@ class PackWriter: self._require_objcache() return self.objcache.exists(id, want_source=want_source) + def write(self, sha, type, content): + """Write an object to the pack file. Fails if sha exists().""" + self._write(sha, type, content) + def maybe_write(self, type, content): """Write an object to the pack file if not present and return its id.""" sha = calc_hash(type, content) if not self.exists(sha): - self._write(sha, type, content) + self.write(sha, type, content) self._require_objcache() self.objcache.add(sha) return sha @@ -769,6 +779,10 @@ class PackWriter: if run_midx: auto_midx(repo('objects/pack')) + + if self.on_pack_finish: + self.on_pack_finish(nameprefix) + return nameprefix def close(self, run_midx=True): diff --git a/lib/bup/midx.py b/lib/bup/midx.py index 99c47a2..1bf946c 100644 --- a/lib/bup/midx.py +++ b/lib/bup/midx.py @@ -1,5 +1,5 @@ -import mmap, struct +import git, glob, mmap, os, struct from bup import _helpers from bup.helpers import log, mmap_read @@ -121,3 +121,7 @@ class PackMidx: return int(self._fanget(self.entries-1)) +def clear_midxes(dir=None): + dir = dir or git.repo('objects/pack') + for midx in glob.glob(os.path.join(dir, '*.midx')): + os.unlink(midx) diff --git a/t/test-gc.sh b/t/test-gc.sh new file mode 100755 index 0000000..face6e5 --- /dev/null +++ b/t/test-gc.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash +. ./wvtest-bup.sh + +set -o pipefail + +top="$(WVPASS pwd)" || exit $? +tmpdir="$(WVPASS wvmktempdir)" || exit $? + +export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" + +GC_OPTS=--unsafe + +bup() { "$top/bup" "$@"; } +compare-trees() { "$top/t/compare-trees" "$@"; } + +WVPASS cd "$tmpdir" +WVPASS bup init + + +WVSTART "gc (unchanged repo)" + +WVPASS mkdir src-1 +WVPASS bup random 1k > src-1/1 +WVPASS bup index src-1 +WVPASS bup save --strip -n src-1 src-1 + +WVPASS bup gc $GC_OPTS -v + +WVPASS bup restore -C "$tmpdir/restore" /src-1/latest +WVPASS compare-trees src-1/ "$tmpdir/restore/latest/" + + +WVSTART "gc (unchanged, new branch)" + +WVPASS mkdir src-2 +WVPASS bup random 10M > src-2/1 +WVPASS bup index src-2 +WVPASS bup save --strip -n src-2 src-2 + +WVPASS bup gc $GC_OPTS -v + +WVPASS rm -r "$tmpdir/restore" +WVPASS bup restore -C "$tmpdir/restore" /src-1/latest +WVPASS compare-trees src-1/ "$tmpdir/restore/latest/" + +WVPASS rm -r "$tmpdir/restore" +WVPASS bup restore -C "$tmpdir/restore" /src-2/latest +WVPASS compare-trees src-2/ "$tmpdir/restore/latest/" + + +WVSTART "gc (removed branch)" + +size_before=$(WVPASS du -k -s "$BUP_DIR" | WVPASS cut -f1) || exit $? +WVPASS rm "$BUP_DIR/refs/heads/src-2" +WVPASS bup gc $GC_OPTS -v +size_after=$(WVPASS du -k -s "$BUP_DIR" | WVPASS cut -f1) || exit $? + +WVPASS [ "$size_before" -gt 5000 ] +WVPASS [ "$size_after" -lt 500 ] + +WVPASS rm -r "$tmpdir/restore" +WVPASS bup restore -C "$tmpdir/restore" /src-1/latest +WVPASS compare-trees src-1/ "$tmpdir/restore/latest/" + +WVPASS rm -r "$tmpdir/restore" +WVFAIL bup restore -C "$tmpdir/restore" /src-2/latest + + +WVPASS mkdir src-ab-clean src-ab-clean/a src-ab-clean/b +WVPASS bup random 1k > src-ab-clean/a/1 +WVPASS bup random 10M > src-ab-clean/b/1 + + +WVSTART "gc (rewriting)" + +WVPASS rm -rf "$BUP_DIR" +WVPASS bup init +WVPASS rm -rf src-ab +WVPASS cp -pPR src-ab-clean src-ab + +WVPASS bup index src-ab +WVPASS bup save --strip -n src-ab src-ab +WVPASS bup index --clear +WVPASS bup index src-ab +WVPASS bup save -vvv --strip -n a src-ab/a + +size_before=$(WVPASS du -k -s "$BUP_DIR" | WVPASS cut -f1) || exit $? +WVPASS rm "$BUP_DIR/refs/heads/src-ab" +WVPASS bup gc $GC_OPTS -v +size_after=$(WVPASS du -k -s "$BUP_DIR" | WVPASS cut -f1) || exit $? + +WVPASS [ "$size_before" -gt 5000 ] +WVPASS [ "$size_after" -lt 500 ] + +WVPASS rm -r "$tmpdir/restore" +WVPASS bup restore -C "$tmpdir/restore" /a/latest +WVPASS compare-trees src-ab/a/ "$tmpdir/restore/latest/" + +WVPASS rm -r "$tmpdir/restore" +WVFAIL bup restore -C "$tmpdir/restore" /src-ab/latest + + +WVSTART "gc (save -r after repo rewriting)" + +WVPASS rm -rf "$BUP_DIR" +WVPASS bup init +WVPASS bup -d bup-remote init +WVPASS rm -rf src-ab +WVPASS cp -pPR src-ab-clean src-ab + +WVPASS bup index src-ab +WVPASS bup save -r :bup-remote --strip -n src-ab src-ab +WVPASS bup index --clear +WVPASS bup index src-ab +WVPASS bup save -r :bup-remote -vvv --strip -n a src-ab/a + +size_before=$(WVPASS du -k -s bup-remote | WVPASS cut -f1) || exit $? +WVPASS rm bup-remote/refs/heads/src-ab +WVPASS bup -d bup-remote gc $GC_OPTS -v +size_after=$(WVPASS du -k -s bup-remote | WVPASS cut -f1) || exit $? + +WVPASS [ "$size_before" -gt 5000 ] +WVPASS [ "$size_after" -lt 500 ] + +WVPASS rm -rf "$tmpdir/restore" +WVPASS bup -d bup-remote restore -C "$tmpdir/restore" /a/latest +WVPASS compare-trees src-ab/a/ "$tmpdir/restore/latest/" + +WVPASS rm -r "$tmpdir/restore" +WVFAIL bup -d bup-remote restore -C "$tmpdir/restore" /src-ab/latest + +# Make sure a post-gc index/save that includes gc-ed data works +WVPASS bup index src-ab +WVPASS bup save -r :bup-remote --strip -n src-ab src-ab +WVPASS rm -r "$tmpdir/restore" +WVPASS bup -d bup-remote restore -C "$tmpdir/restore" /src-ab/latest +WVPASS compare-trees src-ab/ "$tmpdir/restore/latest/" + + +WVSTART "gc (bup on after repo rewriting)" + +WVPASS rm -rf "$BUP_DIR" +WVPASS bup init +WVPASS rm -rf src-ab +WVPASS cp -pPR src-ab-clean src-ab + +WVPASS bup on - index src-ab +WVPASS bup on - save --strip -n src-ab src-ab +WVPASS bup index --clear +WVPASS bup on - index src-ab +WVPASS bup on - save -vvv --strip -n a src-ab/a + +size_before=$(WVPASS du -k -s "$BUP_DIR" | WVPASS cut -f1) || exit $? +WVPASS rm "$BUP_DIR/refs/heads/src-ab" +WVPASS bup gc $GC_OPTS -v +size_after=$(WVPASS du -k -s "$BUP_DIR" | WVPASS cut -f1) || exit $? + +WVPASS [ "$size_before" -gt 5000 ] +WVPASS [ "$size_after" -lt 500 ] + +WVPASS rm -r "$tmpdir/restore" +WVPASS bup restore -C "$tmpdir/restore" /a/latest +WVPASS compare-trees src-ab/a/ "$tmpdir/restore/latest/" + +WVPASS rm -r "$tmpdir/restore" +WVFAIL bup restore -C "$tmpdir/restore" /src-ab/latest + +# Make sure a post-gc index/save that includes gc-ed data works +WVPASS bup on - index src-ab +WVPASS bup on - save --strip -n src-ab src-ab +WVPASS rm -r "$tmpdir/restore" +WVPASS bup restore -C "$tmpdir/restore" /src-ab/latest +WVPASS compare-trees src-ab/ "$tmpdir/restore/latest/" + + +WVPASS rm -rf "$tmpdir" -- 2.39.2