From: Rob Browning Date: Tue, 29 Dec 2015 20:53:54 +0000 (-0600) Subject: Add "bup rm", but require --unsafe invocation X-Git-Tag: 0.28-rc1~20^2~1 X-Git-Url: https://arthur.barton.de/gitweb/?p=bup.git;a=commitdiff_plain;h=37b642f6143920cb775cef3416072e113a750328 Add "bup rm", but require --unsafe invocation Allow the removal of branches, and the removal of saves from specified branches. This command only removes the references, so until "bup gc" is available, all of the related data will still be in the repository, though possibly difficult to reach, unless otherwise tagged. This command is potentially dangerous, so until we've had broader testing, require all invocations to specify --unsafe, and make it clear in the documentation that this command isn't considered stable. Thanks to Nix for reporting an earlier mistake in the manpage. Signed-off-by: Rob Browning Tested-by: Rob Browning --- diff --git a/Documentation/bup-rm.md b/Documentation/bup-rm.md new file mode 100644 index 0000000..a0382e1 --- /dev/null +++ b/Documentation/bup-rm.md @@ -0,0 +1,50 @@ +% bup-rm(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-rm - remove references to archive content (CAUTION: EXPERIMENTAL) + +# SYNOPSIS + +bup rm [-#|--verbose] <*branch*|*save*...> + +# DESCRIPTION + +`bup rm` removes the indicated *branch*es (backup sets) and *save*s. +By itself, this command does not delete any actual data (nor recover +any storage space), but it may make it very difficult or impossible to +refer to the deleted items, unless there are other references to them +(e.g. tags). + +A subsequent garbage collection, either by the forthcoming `bup gc` +command, or by a normal `git gc`, may permanently delete data that is +no longer reachable from the remaining branches or tags, and reclaim +the related storage space. + +NOTE: This is one of the few bup commands that modifies your archive +in intentionally destructive ways. + +# OPTIONS + +-v, \--verbose +: increase verbosity (can be used more than once). + +-*#*, \--compress=*#* +: set the compression level to # (a value from 0-9, where + 9 is the highest and 0 is no compression). The default + is 6. Note that `bup rm` may only write new commits. + +# EXAMPLES + + # Delete the backup set (branch) foo and a save in bar. + $ bup rm /foo /bar/2014-10-21-214720 + +# SEE ALSO + +`bup-save`(1), `bup-fsck`(1), and `bup-tag`(1) + +# BUP + +Part of the `bup`(1) suite. diff --git a/Makefile b/Makefile index 410613f..30e9aeb 100644 --- a/Makefile +++ b/Makefile @@ -145,6 +145,7 @@ runtests-python: all t/tmp | tee -a t/tmp/test-log/$$$$.log cmdline_tests := \ + t/test-rm.sh \ t/test-main.sh \ t/test-list-idx.sh \ t/test-index.sh \ diff --git a/cmd/rm-cmd.py b/cmd/rm-cmd.py new file mode 100755 index 0000000..63c7445 --- /dev/null +++ b/cmd/rm-cmd.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python + +import sys + +from bup import client, git, options, vfs +from bup.git import get_commit_items +from bup.helpers import add_error, handle_ctrl_c, log, saved_errors + +optspec = """ +bup rm +-- +#,compress= set compression level to # (0-9, 9 is highest) [6] +v,verbose increase verbosity (can be specified multiple times) +unsafe use the command even though it may be DANGEROUS +""" + +def append_commit(hash, parent, cp, writer): + ci = get_commit_items(hash, cp) + tree = ci.tree.decode('hex') + author = '%s <%s>' % (ci.author_name, ci.author_mail) + committer = '%s <%s>' % (ci.committer_name, ci.committer_mail) + c = writer.new_commit(tree, parent, + author, ci.author_sec, ci.author_offset, + committer, ci.committer_sec, ci.committer_offset, + ci.message) + return c, tree + + +def filter_branch(tip_commit_hex, exclude, writer): + # May return None if everything is excluded. + commits = [c for _, c in git.rev_list(tip_commit_hex)] + commits.reverse() + last_c, tree = None, None + # Rather than assert that we always find an exclusion here, we'll + # just let the StopIteration signal the error. + first_exclusion = next(i for i, c in enumerate(commits) if exclude(c)) + if first_exclusion != 0: + last_c = commits[first_exclusion - 1] + tree = get_commit_items(last_c.encode('hex'), + git.cp()).tree.decode('hex') + commits = commits[first_exclusion:] + for c in commits: + if exclude(c): + continue + last_c, tree = append_commit(c.encode('hex'), last_c, git.cp(), writer) + return last_c + + +def rm_saves(saves, writer): + assert(saves) + branch_node = saves[0].parent + for save in saves: # Be certain they're all on the same branch + assert(save.parent == branch_node) + rm_commits = frozenset([x.dereference().hash for x in saves]) + orig_tip = branch_node.hash + new_tip = filter_branch(orig_tip.encode('hex'), + lambda x: x in rm_commits, + writer) + assert(orig_tip) + assert(new_tip != orig_tip) + return orig_tip, new_tip + + +def dead_items(vfs_top, paths): + """Return an optimized set of removals, reporting errors via + add_error, and if there are any errors, return None, None.""" + dead_branches = {} + dead_saves = {} + # Scan for bad requests, and opportunities to optimize + for path in paths: + try: + n = vfs_top.lresolve(path) + except vfs.NodeError as e: + add_error('unable to resolve %s: %s' % (path, e)) + else: + if isinstance(n, vfs.BranchList): # rm /foo + branchname = n.name + dead_branches[branchname] = n + dead_saves.pop(branchname, None) # rm /foo obviates rm /foo/bar + elif isinstance(n, vfs.FakeSymlink) and isinstance(n.parent, + vfs.BranchList): + if n.name == 'latest': + add_error("error: cannot delete 'latest' symlink") + else: + branchname = n.parent.name + if branchname not in dead_branches: + dead_saves.setdefault(branchname, []).append(n) + else: + add_error("don't know how to remove %r yet" % n.fullname()) + if saved_errors: + return None, None + return dead_branches, dead_saves + + +handle_ctrl_c() + +o = options.Options(optspec) +opt, flags, extra = o.parse(sys.argv[1:]) + +if not opt.unsafe: + o.fatal('refusing to run dangerous, experimental command without --unsafe') + +if len(extra) < 1: + o.fatal('no paths specified') + +paths = extra + +git.check_repo_or_die() +top = vfs.RefList(None) + +dead_branches, dead_saves = dead_items(top, paths) +if saved_errors: + log('not proceeding with any removals\n') + sys.exit(1) + +updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) +writer = None + +if dead_saves: + writer = git.PackWriter(compression_level=opt.compress) + +for branch, saves in dead_saves.iteritems(): + assert(saves) + updated_refs['refs/heads/' + branch] = rm_saves(saves, writer) + +for branch, node in dead_branches.iteritems(): + ref = 'refs/heads/' + branch + assert(not ref in updated_refs) + updated_refs[ref] = (node.hash, None) + +if writer: + # Must close before we can update the ref(s) below. + writer.close() + +# Only update the refs here, at the very end, so that if something +# goes wrong above, the old refs will be undisturbed. Make an attempt +# to update each ref. +for ref_name, info in updated_refs.iteritems(): + orig_ref, new_ref = info + try: + if not new_ref: + git.delete_ref(ref_name, orig_ref.encode('hex')) + else: + git.update_ref(ref_name, new_ref, orig_ref) + if opt.verbose: + new_hex = new_ref.encode('hex') + if orig_ref: + orig_hex = orig_ref.encode('hex') + log('updated %r (%s -> %s)\n' + % (ref_name, orig_hex, new_hex)) + else: + log('updated %r (%s)\n' % (ref_name, new_hex)) + except (git.GitError, client.ClientError) as ex: + if new_ref: + add_error('while trying to update %r (%s -> %s): %s' + % (ref_name, orig_ref, new_ref, ex)) + else: + add_error('while trying to delete %r (%s): %s' + % (ref_name, orig_ref, ex)) + +if saved_errors: + log('warning: %d errors encountered\n' % len(saved_errors)) + sys.exit(1) diff --git a/lib/bup/git.py b/lib/bup/git.py index 73071fa..315d8f3 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -10,7 +10,8 @@ from itertools import islice from bup import _helpers, path, midx, bloom, xstat from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2, fdatasync, - hostname, log, merge_iter, mmap_read, mmap_readwrite, + hostname, localtime, log, merge_iter, + mmap_read, mmap_readwrite, progress, qprogress, unlink, username, userfullname, utc_offset_str) @@ -951,10 +952,11 @@ def update_ref(refname, newval, oldval, repo_dir=None): _git_wait('git update-ref', p) -def delete_ref(refname): - """Delete a repository reference.""" +def delete_ref(refname, oldvalue=None): + """Delete a repository reference (see git update-ref(1)).""" assert(refname.startswith('refs/')) - p = subprocess.Popen(['git', 'update-ref', '-d', refname], + oldvalue = [] if not oldvalue else [oldvalue] + p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue, preexec_fn = _gitenv()) _git_wait('git update-ref', p) diff --git a/t/test-rm.sh b/t/test-rm.sh new file mode 100755 index 0000000..80824fb --- /dev/null +++ b/t/test-rm.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +. ./wvtest-bup.sh || exit $? +. ./t/lib.sh || exit $? + +set -o pipefail + +# Perhaps this should check the rsync version instead, and not sure if +# it's just darwin, or all of these. +case "$(uname)" in + CYGWIN*|NetBSD) + rsx='' + ;; + Darwin) + rsx=. + ;; + *) + rsx=... + ;; +esac + +if test "$(uname)" = Darwin; then + deleting=deleting +else + deleting="deleting " + plusx=++ +fi + +top="$(WVPASS pwd)" || exit $? +tmpdir="$(WVPASS wvmktempdir)" || exit $? + +export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" + + +bup() { "$top/bup" "$@"; } +compare-trees() { "$top/t/compare-trees" "$@"; } + + +WVPASS bup init +WVPASS cd "$tmpdir" + + +WVSTART "rm /foo (lone branch)" +WVPASS mkdir src src/foo +WVPASS echo twisty-maze > src/1 +WVPASS bup index src +WVPASS bup save -n src src +WVPASS "$top"/t/sync-tree bup/ bup-baseline/ +# FIXME: test -n +WVPASS bup tick # Make sure we always get the timestamp changes below +WVPASS bup rm --unsafe /src +WVPASSEQ "$(compare-trees bup/ bup-baseline/)" \ +"*$deleting logs/refs/heads/src +*$deleting refs/heads/src +.d..t...${rsx} logs/refs/heads/ +.d..t...${rsx} refs/heads/" + + +WVSTART "rm /foo (one of many)" +WVPASS rm -rf bup +WVPASS mv bup-baseline bup +WVPASS echo twisty-maze > src/2 +WVPASS bup index src +WVPASS bup save -n src-2 src +WVPASS echo twisty-maze > src/3 +WVPASS bup index src +WVPASS bup save -n src-3 src +WVPASS "$top"/t/sync-tree bup/ bup-baseline/ +WVPASS bup tick # Make sure we always get the timestamp changes below +WVPASS bup rm --unsafe /src +WVPASSEQ "$(compare-trees bup/ bup-baseline/)" \ +"*$deleting logs/refs/heads/src +*$deleting refs/heads/src +.d..t...${rsx} logs/refs/heads/ +.d..t...${rsx} refs/heads/" + + +WVSTART "rm /foo /bar (multiple of many)" +WVPASS rm -rf bup +WVPASS mv bup-baseline bup +WVPASS echo twisty-maze > src/4 +WVPASS bup index src +WVPASS bup save -n src-4 src +WVPASS echo twisty-maze > src/5 +WVPASS bup index src +WVPASS bup save -n src-5 src +WVPASS "$top"/t/sync-tree bup/ bup-baseline/ +WVPASS bup tick # Make sure we always get the timestamp changes below +WVPASS bup rm --unsafe /src-2 /src-4 +WVPASSEQ "$(compare-trees bup/ bup-baseline/)" \ +"*$deleting logs/refs/heads/src-4 +*$deleting logs/refs/heads/src-2 +*$deleting refs/heads/src-4 +*$deleting refs/heads/src-2 +.d..t...${rsx} logs/refs/heads/ +.d..t...${rsx} refs/heads/" + + +WVSTART "rm /foo /bar (all)" +WVPASS rm -rf bup +WVPASS mv bup-baseline bup +WVPASS "$top"/t/sync-tree bup/ bup-baseline/ +WVPASS bup tick # Make sure we always get the timestamp changes below +WVPASS bup rm --unsafe /src /src-2 /src-3 /src-4 /src-5 +WVPASSEQ "$(compare-trees bup/ bup-baseline/)" \ +"*$deleting logs/refs/heads/src-5 +*$deleting logs/refs/heads/src-4 +*$deleting logs/refs/heads/src-3 +*$deleting logs/refs/heads/src-2 +*$deleting logs/refs/heads/src +*$deleting refs/heads/src-5 +*$deleting refs/heads/src-4 +*$deleting refs/heads/src-3 +*$deleting refs/heads/src-2 +*$deleting refs/heads/src +.d..t...${rsx} logs/refs/heads/ +.d..t...${rsx} refs/heads/" + + +WVSTART "rm /foo/bar (lone save - equivalent to rm /foo)" +WVPASS rm -rf bup bup-baseline src +WVPASS bup init +WVPASS mkdir src +WVPASS echo twisty-maze > src/1 +WVPASS bup index src +WVPASS bup save -n src src +save1="$(WVPASS bup ls src | head -n 1)" || exit $? +WVPASS "$top"/t/sync-tree bup/ bup-baseline/ +WVPASS bup tick # Make sure we always get the timestamp changes below +WVFAIL bup rm --unsafe /src/latest +WVPASS bup rm --unsafe /src/"$save1" +WVPASSEQ "$(compare-trees bup/ bup-baseline/)" \ +"*$deleting logs/refs/heads/src +*$deleting refs/heads/src +.d..t...${rsx} logs/refs/heads/ +.d..t...${rsx} refs/heads/" + + +verify-changes-caused-by-rewriting-save() +( + local before="$1" + local after="$2" + local tmpdir="$(WVPASS wvmktempdir)" || exit $? + (WVPASS cd "$before" && WVPASS find . | WVPASS sort) > "$tmpdir/before" + (WVPASS cd "$after" && WVPASS find . | WVPASS sort) > "$tmpdir/after" + new_paths="$(WVPASS comm -13 "$tmpdir/before" "$tmpdir/after")" || exit $? + new_idx="$(echo "$new_paths" | WVPASS grep -E '^\./objects/pack/pack-.*\.idx$' | cut -b 3-)" + new_pack="$(echo "$new_paths" | WVPASS grep -E '^\./objects/pack/pack-.*\.pack$' | cut -b 3-)" + WVPASSEQ "$(compare-trees "$after/" "$before/")" \ +">fcst...${rsx} logs/refs/heads/src +.d..t...${rsx} objects/ +.d..t...${rsx} objects/pack/ +>fcst...${rsx} objects/pack/bup.bloom +>f+++++++${plusx} $new_idx +>f+++++++${plusx} $new_pack +.d..t...${rsx} refs/heads/ +>fc.t...${rsx} refs/heads/src" + WVPASS rm -rf "$tmpdir" +) + +commit-hash-n() +{ + local n="$1" repo="$2" branch="$3" + GIT_DIR="$repo" WVPASS git rev-list --reverse "$branch" \ + | WVPASS awk "FNR == $n" +} + +rm-safe-cinfo() +{ + local n="$1" repo="$2" branch="$3" hash + hash="$(commit-hash-n "$n" "$repo" "$branch")" || exit $? + local fmt='Tree: %T%n' + fmt="${fmt}Author: %an <%ae> %ai%n" + fmt="${fmt}Committer: %cn <%ce> %ci%n" + fmt="${fmt}%n%s%n%b" + GIT_DIR="$repo" WVPASS git log -n1 --pretty=format:"$fmt" "$hash" +} + + +WVSTART 'rm /foo/BAR (setup)' +WVPASS rm -rf bup bup-baseline src +WVPASS bup init +WVPASS mkdir src +WVPASS echo twisty-maze > src/1 +WVPASS bup index src +WVPASS bup save -n src src +WVPASS echo twisty-maze > src/2 +WVPASS bup index src +WVPASS bup tick +WVPASS bup save -n src src +WVPASS echo twisty-maze > src/3 +WVPASS bup index src +WVPASS bup tick +WVPASS bup save -n src src +WVPASS mv bup bup-baseline +WVPASS bup tick # Make sure we always get the timestamp changes below + + +WVSTART "rm /foo/BAR (first of many)" +WVPASS "$top"/t/sync-tree bup-baseline/ bup/ +victim="$(WVPASS bup ls src | head -n 1)" || exit $? +WVPASS bup rm --unsafe /src/"$victim" +verify-changes-caused-by-rewriting-save bup-baseline bup +WVPASSEQ 2 $(git rev-list src | wc -l) +WVPASSEQ "$(rm-safe-cinfo 1 bup src)" "$(rm-safe-cinfo 2 bup-baseline src)" +WVPASSEQ "$(rm-safe-cinfo 2 bup src)" "$(rm-safe-cinfo 3 bup-baseline src)" + + +WVSTART "rm /foo/BAR (one of many)" +WVPASS "$top"/t/sync-tree bup-baseline/ bup/ +victim="$(WVPASS bup ls src | tail -n +2 | head -n 1)" || exit $? +WVPASS bup rm --unsafe /src/"$victim" +verify-changes-caused-by-rewriting-save bup-baseline bup +WVPASSEQ 2 $(git rev-list src | wc -l) +WVPASSEQ "$(commit-hash-n 1 bup src)" "$(commit-hash-n 1 bup-baseline src)" +WVPASSEQ "$(rm-safe-cinfo 2 bup src)" "$(rm-safe-cinfo 3 bup-baseline src)" + + +WVSTART "rm /foo/BAR (last of many)" +WVPASS "$top"/t/sync-tree bup-baseline/ bup/ +victim="$(WVPASS bup ls src | tail -n 2 | head -n 1)" || exit $? +WVPASS bup rm --unsafe -vv /src/"$victim" +WVPASSEQ "$(compare-trees bup/ bup-baseline/)" \ +">fcst...${rsx} logs/refs/heads/src +.d..t...${rsx} refs/heads/ +>fc.t...${rsx} refs/heads/src" +WVPASSEQ 2 $(git rev-list src | wc -l) +WVPASSEQ "$(commit-hash-n 1 bup src)" "$(commit-hash-n 1 bup-baseline src)" +WVPASSEQ "$(commit-hash-n 2 bup src)" "$(commit-hash-n 2 bup-baseline src)" + + +# FIXME: test that committer changes when rewriting, when appropriate. + +WVPASS rm -rf "$tmpdir"