]> arthur.barton.de Git - bup.git/commitdiff
Add support for "bup index --exclude-rx <pattern> ...".
authorRob Browning <rlb@defaultvalue.org>
Sat, 21 Jul 2012 20:09:47 +0000 (15:09 -0500)
committerRob Browning <rlb@defaultvalue.org>
Sun, 24 Mar 2013 00:57:29 +0000 (19:57 -0500)
When --exclude-rx <pattern> is provided to bup index, exclude any path
matching <pattern>, which must be a Python regular expression
(http://docs.python.org/library/re.html).  The pattern will be
compared against the full path, without anchoring, so "x/y" will match
"ox/yard" or "box/yards".  To exclude the contents of /tmp, but not
the directory itself, use "^/tmp/.".

You may check the behavior at runtime by setting BUP_DEBUG=2 in the
environment.

Thanks to Zoran Zaric <zz@zoranzaric.de> for reporting a bug in an
earlier version of this patch.

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Reviewed-by: Zoran Zaric <zz@zoranzaric.de>
Documentation/bup-index.md
cmd/index-cmd.py
lib/bup/drecurse.py
lib/bup/helpers.py
t/test.sh

index a2a49797b9f76faf5599bf73112d0d183c646926..fe783b243affaf9353a4507aa03b68b3658034aa 100644 (file)
@@ -10,7 +10,7 @@ bup-index - print and/or update the bup filesystem index
 
 bup index \<-p|-m|-s|-u\> [-H] [-l] [-x] [\--fake-valid] [\--no-check-device]
 [\--fake-invalid] [\--check] [\--clear] [-f *indexfile*] [\--exclude *path*]
-[\--exclude-from *filename*] [-v] \<filenames...\>
+[\--exclude-from *filename*] [\--exclude-rx *pattern*] [-v] \<filenames...\>
 
 # DESCRIPTION
 
@@ -153,6 +153,21 @@ does, due to the accommodations described above.
 :   a file that contains exclude paths (can be used more
     than once)
 
+\--exclude-rx=*pattern*
+:   exclude any path matching *pattern*, which must be a Python regular
+    expression (http://docs.python.org/library/re.html).  The pattern
+    will be compared against the full path, without anchoring, so
+    "x/y" will match "ox/yard" or "box/yards".  To exclude the
+    contents of /tmp, but not the directory itself, use
+    "^/tmp/.". (can be specified more than once)
+
+    Examples:
+
+      * '/foo$' - exclude any file named foo
+      * '/foo/$' - exclude any directory named foo
+      * '/foo/.' - exclude the content of any directory named foo
+      * '^/tmp/.' - exclude root-level /tmp's content, but not /tmp itself
+
 \--no-check-device
 :   don't mark a an entry invalid if the device number (stat(2)
     st_dev) changes.  This can be useful when indexing remote,
index 6f743c6555034fdacc0bf556f978be29f77dc54f..4904c0110747f0f23b7babb90668691b50b17281 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-import sys, stat, time, os, errno
+import sys, stat, time, os, errno, re
 from bup import metadata, options, git, index, drecurse, hlinkdb
 from bup.helpers import *
 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
@@ -82,7 +82,8 @@ def update_index(top, excluded_paths):
     bup_dir = os.path.abspath(git.repo())
     for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
                                                  bup_dir=bup_dir,
-                                                 excluded_paths=excluded_paths):
+                                                 excluded_paths=excluded_paths,
+                                                 exclude_rxs=exclude_rxs):
         if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
             sys.stdout.write('%s\n' % path)
             sys.stdout.flush()
@@ -186,6 +187,7 @@ fake-invalid mark all index entries as invalid
 f,indexfile=  the name of the index file (normally BUP_DIR/bupindex)
 exclude=   a path to exclude from the backup (can be used more than once)
 exclude-from= a file that contains exclude paths (can be used more than once)
+exclude-rx= skip paths that match the unanchored regular expression
 v,verbose  increase log output (can be used more than once)
 x,xdev,one-file-system  don't cross filesystem boundaries
 """
@@ -225,6 +227,7 @@ if opt.clear:
     clear_index(indexfile)
 
 excluded_paths = parse_excludes(flags, o.fatal)
+exclude_rxs = parse_rx_excludes(flags, o.fatal)
 paths = index.reduce_paths(extra)
 
 if opt.update:
index 5d497709531350176465d853b209376aa78e1344..694c403594d1aec45c6979629c20314fd4c948ea 100644 (file)
@@ -49,12 +49,17 @@ def _dirlist():
     return l
 
 
-def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None):
+def _recursive_dirlist(prepend, xdev, bup_dir=None,
+                       excluded_paths=None,
+                       exclude_rxs=None):
     for (name,pst) in _dirlist():
+        path = prepend + name
         if excluded_paths:
-            if os.path.normpath(prepend+name) in excluded_paths:
-                debug1('Skipping %r: excluded.\n' % (prepend+name))
+            if os.path.normpath(path) in excluded_paths:
+                debug1('Skipping %r: excluded.\n' % path)
                 continue
+        if exclude_rxs and should_rx_exclude_path(path, exclude_rxs):
+            continue
         if name.endswith('/'):
             if xdev != None and pst.st_dev != xdev:
                 debug1('Skipping %r: different filesystem.\n' % (prepend+name))
@@ -70,13 +75,15 @@ def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None):
             else:
                 for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev,
                                             bup_dir=bup_dir,
-                                            excluded_paths=excluded_paths):
+                                            excluded_paths=excluded_paths,
+                                            exclude_rxs=exclude_rxs):
                     yield i
                 os.chdir('..')
         yield (prepend + name, pst)
 
 
-def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None):
+def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None,
+                      exclude_rxs=None):
     startdir = OsFile('.')
     try:
         assert(type(paths) != type(''))
@@ -104,7 +111,8 @@ def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None):
                 prepend = os.path.join(path, '')
                 for i in _recursive_dirlist(prepend=prepend, xdev=xdev,
                                             bup_dir=bup_dir,
-                                            excluded_paths=excluded_paths):
+                                            excluded_paths=excluded_paths,
+                                            exclude_rxs=exclude_rxs):
                     yield i
                 startdir.fchdir()
             else:
index c136aeb1c08bc0788f063438578184bc0eec5354..339c6d735277d0400d2bcbcdfc759192588704b3 100644 (file)
@@ -728,6 +728,28 @@ def parse_excludes(options, fatal):
     return excluded_paths
 
 
+def parse_rx_excludes(options, fatal):
+    """Traverse the options and extract all rx excludes, or call
+    Option.fatal()."""
+    rxs = [v for f, v in options if f == '--exclude-rx']
+    for i in range(len(rxs)):
+        try:
+            rxs[i] = re.compile(rxs[i])
+        except re.error, ex:
+            o.fatal('invalid --exclude-rx pattern (%s):' % (ex, rxs[i]))
+    return rxs
+
+
+def should_rx_exclude_path(path, exclude_rxs):
+    """Return True if path matches a regular expression in exclude_rxs."""
+    for rx in exclude_rxs:
+        if rx.search(path):
+            debug1('Skipping %r: excluded by rx pattern %r.\n'
+                   % (path, rx.pattern))
+            return True
+    return False
+
+
 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 # that resolve against the current filesystem in the strip/graft
 # functions for example, but elsewhere as well.  I suspect bup's not
index 32173c83ad813463854efd60e048462f1230afb9..362c51922016becbce8ec7e82e65123ae5ea7b74 100755 (executable)
--- a/t/test.sh
+++ b/t/test.sh
@@ -691,7 +691,6 @@ WVSTART "save disjoint top-level directories"
     WVPASSEQ "$(bup ls -a src/latest)" "$(echo -e "$top_dir/\ntmp/" | sort)"
 ) || WVFAIL
 
-
 WVSTART "clear-index"
 D=clear-index.tmp
 export BUP_DIR="$TOP/$D/.bup"
@@ -711,3 +710,88 @@ bup index -u $TOP/$D
 WVPASSEQ "$(bup index -p)" "$D/bar
 $D/
 ./"
+
+# bup index --exclude-rx ...
+(
+    set -e
+    export BUP_DIR="$TOP/buptest.tmp"
+    D=bupdata.tmp
+
+    WVSTART "index --exclude-rx '^/foo' (root anchor)"
+    rm -rf "$D" "$BUP_DIR" buprestore.tmp
+    WVPASS bup init
+    mkdir $D
+    touch $D/a
+    touch $D/b
+    mkdir $D/sub1
+    mkdir $D/sub2
+    touch $D/sub1/a
+    touch $D/sub2/b
+    WVPASS bup index -u $D --exclude-rx "^$(pwd)/$D/sub1/"
+    bup save --strip -n bupdir $D
+    bup restore -C buprestore.tmp /bupdir/latest/
+    WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ".
+./a
+./b
+./sub2
+./sub2/b"
+
+    WVSTART "index --exclude-rx '/foo$' (non-dir, tail anchor)"
+    rm -rf "$D" "$BUP_DIR" buprestore.tmp
+    WVPASS bup init
+    mkdir $D
+    touch $D/a
+    touch $D/b
+    touch $D/foo
+    mkdir $D/sub
+    mkdir $D/sub/foo
+    touch $D/sub/foo/a
+    WVPASS bup index -u $D --exclude-rx '/foo$'
+    bup save --strip -n bupdir $D
+    bup restore -C buprestore.tmp /bupdir/latest/
+    WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ".
+./a
+./b
+./sub
+./sub/foo
+./sub/foo/a"
+
+    WVSTART "index --exclude-rx '/foo/$' (dir, tail anchor)"
+    rm -rf "$D" "$BUP_DIR" buprestore.tmp
+    WVPASS bup init
+    mkdir $D
+    touch $D/a
+    touch $D/b
+    touch $D/foo
+    mkdir $D/sub
+    mkdir $D/sub/foo
+    touch $D/sub/foo/a
+    WVPASS bup index -u $D --exclude-rx '/foo/$'
+    bup save --strip -n bupdir $D
+    bup restore -C buprestore.tmp /bupdir/latest/
+    WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ".
+./a
+./b
+./foo
+./sub"
+
+    WVSTART "index --exclude-rx '/foo/.' (dir content)"
+    rm -rf "$D" "$BUP_DIR" buprestore.tmp
+    WVPASS bup init
+    mkdir $D
+    touch $D/a
+    touch $D/b
+    touch $D/foo
+    mkdir $D/sub
+    mkdir $D/sub/foo
+    touch $D/sub/foo/a
+    WVPASS bup index -u $D --exclude-rx '/foo/.'
+    bup save --strip -n bupdir $D
+    bup restore -C buprestore.tmp /bupdir/latest/
+    WVPASSEQ "$(cd buprestore.tmp && find . | sort)" ".
+./a
+./b
+./foo
+./sub
+./sub/foo"
+) || WVFAIL