From: Rob Browning Date: Sat, 21 Jul 2012 20:09:47 +0000 (-0500) Subject: Add support for "bup index --exclude-rx ...". X-Git-Tag: bup-0.25-rc2~29 X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?p=bup.git;a=commitdiff_plain;h=cb354ed6baf6699f92717f42eb70d9bb6ff82933 Add support for "bup index --exclude-rx ...". When --exclude-rx is provided to bup index, exclude any path matching , which must be a Python regular expression (http://docs.python.org/library/re.html). The pattern will be compared against the full path, without anchoring, so "x/y" will match "ox/yard" or "box/yards". To exclude the contents of /tmp, but not the directory itself, use "^/tmp/.". You may check the behavior at runtime by setting BUP_DEBUG=2 in the environment. Thanks to Zoran Zaric for reporting a bug in an earlier version of this patch. Signed-off-by: Rob Browning Reviewed-by: Zoran Zaric --- diff --git a/Documentation/bup-index.md b/Documentation/bup-index.md index a2a4979..fe783b2 100644 --- a/Documentation/bup-index.md +++ b/Documentation/bup-index.md @@ -10,7 +10,7 @@ bup-index - print and/or update the bup filesystem index bup index \<-p|-m|-s|-u\> [-H] [-l] [-x] [\--fake-valid] [\--no-check-device] [\--fake-invalid] [\--check] [\--clear] [-f *indexfile*] [\--exclude *path*] -[\--exclude-from *filename*] [-v] \ +[\--exclude-from *filename*] [\--exclude-rx *pattern*] [-v] \ # DESCRIPTION @@ -153,6 +153,21 @@ does, due to the accommodations described above. : a file that contains exclude paths (can be used more than once) +\--exclude-rx=*pattern* +: exclude any path matching *pattern*, which must be a Python regular + expression (http://docs.python.org/library/re.html). The pattern + will be compared against the full path, without anchoring, so + "x/y" will match "ox/yard" or "box/yards". To exclude the + contents of /tmp, but not the directory itself, use + "^/tmp/.". (can be specified more than once) + + Examples: + + * '/foo$' - exclude any file named foo + * '/foo/$' - exclude any directory named foo + * '/foo/.' - exclude the content of any directory named foo + * '^/tmp/.' - exclude root-level /tmp's content, but not /tmp itself + \--no-check-device : don't mark a an entry invalid if the device number (stat(2) st_dev) changes. This can be useful when indexing remote, diff --git a/cmd/index-cmd.py b/cmd/index-cmd.py index 6f743c6..4904c01 100755 --- a/cmd/index-cmd.py +++ b/cmd/index-cmd.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import sys, stat, time, os, errno +import sys, stat, time, os, errno, re from bup import metadata, options, git, index, drecurse, hlinkdb from bup.helpers import * from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE @@ -82,7 +82,8 @@ def update_index(top, excluded_paths): bup_dir = os.path.abspath(git.repo()) for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, - excluded_paths=excluded_paths): + excluded_paths=excluded_paths, + exclude_rxs=exclude_rxs): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() @@ -186,6 +187,7 @@ fake-invalid mark all index entries as invalid f,indexfile= the name of the index file (normally BUP_DIR/bupindex) exclude= a path to exclude from the backup (can be used more than once) exclude-from= a file that contains exclude paths (can be used more than once) +exclude-rx= skip paths that match the unanchored regular expression v,verbose increase log output (can be used more than once) x,xdev,one-file-system don't cross filesystem boundaries """ @@ -225,6 +227,7 @@ if opt.clear: clear_index(indexfile) excluded_paths = parse_excludes(flags, o.fatal) +exclude_rxs = parse_rx_excludes(flags, o.fatal) paths = index.reduce_paths(extra) if opt.update: diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py index 5d49770..694c403 100644 --- a/lib/bup/drecurse.py +++ b/lib/bup/drecurse.py @@ -49,12 +49,17 @@ def _dirlist(): return l -def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None): +def _recursive_dirlist(prepend, xdev, bup_dir=None, + excluded_paths=None, + exclude_rxs=None): for (name,pst) in _dirlist(): + path = prepend + name if excluded_paths: - if os.path.normpath(prepend+name) in excluded_paths: - debug1('Skipping %r: excluded.\n' % (prepend+name)) + if os.path.normpath(path) in excluded_paths: + debug1('Skipping %r: excluded.\n' % path) continue + if exclude_rxs and should_rx_exclude_path(path, exclude_rxs): + continue if name.endswith('/'): if xdev != None and pst.st_dev != xdev: debug1('Skipping %r: different filesystem.\n' % (prepend+name)) @@ -70,13 +75,15 @@ def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None): else: for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev, bup_dir=bup_dir, - excluded_paths=excluded_paths): + excluded_paths=excluded_paths, + exclude_rxs=exclude_rxs): yield i os.chdir('..') yield (prepend + name, pst) -def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None): +def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None, + exclude_rxs=None): startdir = OsFile('.') try: assert(type(paths) != type('')) @@ -104,7 +111,8 @@ def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None): prepend = os.path.join(path, '') for i in _recursive_dirlist(prepend=prepend, xdev=xdev, bup_dir=bup_dir, - excluded_paths=excluded_paths): + excluded_paths=excluded_paths, + exclude_rxs=exclude_rxs): yield i startdir.fchdir() else: diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index c136aeb..339c6d7 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -728,6 +728,28 @@ def parse_excludes(options, fatal): return excluded_paths +def parse_rx_excludes(options, fatal): + """Traverse the options and extract all rx excludes, or call + Option.fatal().""" + rxs = [v for f, v in options if f == '--exclude-rx'] + for i in range(len(rxs)): + try: + rxs[i] = re.compile(rxs[i]) + except re.error, ex: + o.fatal('invalid --exclude-rx pattern (%s):' % (ex, rxs[i])) + return rxs + + +def should_rx_exclude_path(path, exclude_rxs): + """Return True if path matches a regular expression in exclude_rxs.""" + for rx in exclude_rxs: + if rx.search(path): + debug1('Skipping %r: excluded by rx pattern %r.\n' + % (path, rx.pattern)) + return True + return False + + # FIXME: Carefully consider the use of functions (os.path.*, etc.) # that resolve against the current filesystem in the strip/graft # functions for example, but elsewhere as well. I suspect bup's not diff --git a/t/test.sh b/t/test.sh index 32173c8..362c519 100755 --- a/t/test.sh +++ b/t/test.sh @@ -691,7 +691,6 @@ WVSTART "save disjoint top-level directories" WVPASSEQ "$(bup ls -a src/latest)" "$(echo -e "$top_dir/\ntmp/" | sort)" ) || WVFAIL - WVSTART "clear-index" D=clear-index.tmp export BUP_DIR="$TOP/$D/.bup" @@ -711,3 +710,88 @@ bup index -u $TOP/$D WVPASSEQ "$(bup index -p)" "$D/bar $D/ ./" + +# bup index --exclude-rx ... +( + set -e + export BUP_DIR="$TOP/buptest.tmp" + D=bupdata.tmp + + WVSTART "index --exclude-rx '^/foo' (root anchor)" + rm -rf "$D" "$BUP_DIR" buprestore.tmp + WVPASS bup init + mkdir $D + touch $D/a + touch $D/b + mkdir $D/sub1 + mkdir $D/sub2 + touch $D/sub1/a + touch $D/sub2/b + WVPASS bup index -u $D --exclude-rx "^$(pwd)/$D/sub1/" + bup save --strip -n bupdir $D + bup restore -C buprestore.tmp /bupdir/latest/ + WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ". +./a +./b +./sub2 +./sub2/b" + + WVSTART "index --exclude-rx '/foo$' (non-dir, tail anchor)" + rm -rf "$D" "$BUP_DIR" buprestore.tmp + WVPASS bup init + mkdir $D + touch $D/a + touch $D/b + touch $D/foo + mkdir $D/sub + mkdir $D/sub/foo + touch $D/sub/foo/a + WVPASS bup index -u $D --exclude-rx '/foo$' + bup save --strip -n bupdir $D + bup restore -C buprestore.tmp /bupdir/latest/ + WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ". +./a +./b +./sub +./sub/foo +./sub/foo/a" + + WVSTART "index --exclude-rx '/foo/$' (dir, tail anchor)" + rm -rf "$D" "$BUP_DIR" buprestore.tmp + WVPASS bup init + mkdir $D + touch $D/a + touch $D/b + touch $D/foo + mkdir $D/sub + mkdir $D/sub/foo + touch $D/sub/foo/a + WVPASS bup index -u $D --exclude-rx '/foo/$' + bup save --strip -n bupdir $D + bup restore -C buprestore.tmp /bupdir/latest/ + WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ". +./a +./b +./foo +./sub" + + WVSTART "index --exclude-rx '/foo/.' (dir content)" + rm -rf "$D" "$BUP_DIR" buprestore.tmp + WVPASS bup init + mkdir $D + touch $D/a + touch $D/b + touch $D/foo + mkdir $D/sub + mkdir $D/sub/foo + touch $D/sub/foo/a + WVPASS bup index -u $D --exclude-rx '/foo/.' + bup save --strip -n bupdir $D + bup restore -C buprestore.tmp /bupdir/latest/ + WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ". +./a +./b +./foo +./sub +./sub/foo" +) || WVFAIL