From 38ab72c98ea162df2c8c2312cf48c889c9e0d3f8 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sun, 10 Jan 2016 13:44:04 -0600 Subject: [PATCH] Never omit explicitly named mounts during index -x Assuming mounts like this: / /usr and an index invocation like "bup index -x / /usr", don't skip indexing /usr. Previously bup would skip /usr when -x was specified because reduce_paths() would drop /usr from the list of paths to index, assuming that /usr would be covered when indexing /. However, when /usr was actually reached, -x would cause it to be skipped. To fix that, exempt all mount points mentioned on the command line from -x during filesystem traversal, expand the (root only) tests to cover this situation, and check some corner cases. Thanks to Wayne Scott for reporting the problem, and Yves-Alexis Perez for a reminder. Signed-off-by: Rob Browning Tested-by: Rob Browning --- Documentation/bup-index.md | 2 +- cmd/index-cmd.py | 24 +++++---- lib/bup/drecurse.py | 18 ++++--- lib/bup/index.py | 35 ++++++++---- t/test-xdev.sh | 107 +++++++++++++++++++++++++++++++------ 5 files changed, 144 insertions(+), 42 deletions(-) diff --git a/Documentation/bup-index.md b/Documentation/bup-index.md index 38eee92..accbfec 100644 --- a/Documentation/bup-index.md +++ b/Documentation/bup-index.md @@ -139,7 +139,7 @@ does, due to the accommodations described above. format to the `-l` option to `ls`(1). -x, \--xdev, \--one-file-system -: don't cross filesystem boundaries when recursing through the +: don't cross filesystem boundaries when traversing the filesystem -- though as with tar and rsync, the mount points themselves will still be indexed. Only applicable if you're using `-u`. diff --git a/cmd/index-cmd.py b/cmd/index-cmd.py index 8390452..848eeaa 100755 --- a/cmd/index-cmd.py +++ b/cmd/index-cmd.py @@ -8,6 +8,7 @@ exec "$bup_python" "$0" ${1+"$@"} import sys, stat, time, os, errno, re from bup import metadata, options, git, index, drecurse, hlinkdb +from bup.drecurse import recursive_dirlist from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE from bup.helpers import (handle_ctrl_c, log, parse_excludes, parse_rx_excludes, progress, qprogress, saved_errors) @@ -70,7 +71,7 @@ def clear_index(indexfile): raise -def update_index(top, excluded_paths, exclude_rxs): +def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) @@ -89,10 +90,12 @@ def update_index(top, excluded_paths, exclude_rxs): total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() - for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, - bup_dir=bup_dir, - excluded_paths=excluded_paths, - exclude_rxs=exclude_rxs): + for path, pst in recursive_dirlist([top], + xdev=opt.xdev, + bup_dir=bup_dir, + excluded_paths=excluded_paths, + exclude_rxs=exclude_rxs, + xdev_exceptions=xdev_exceptions): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() @@ -253,15 +256,14 @@ if opt.clear: log('clear: clearing index.\n') clear_index(indexfile) -excluded_paths = parse_excludes(flags, o.fatal) -exclude_rxs = parse_rx_excludes(flags, o.fatal) -paths = index.reduce_paths(extra) - if opt.update: if not extra: o.fatal('update mode (-u) requested but no paths given') - for (rp,path) in paths: - update_index(rp, excluded_paths, exclude_rxs) + excluded_paths = parse_excludes(flags, o.fatal) + exclude_rxs = parse_rx_excludes(flags, o.fatal) + xexcept = index.unique_resolved_paths(extra) + for rp, path in index.reduce_paths(extra): + update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept) if opt['print'] or opt.status or opt.modified: for (name, ent) in index.Reader(indexfile).filter(extra or ['']): diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py index 37e189a..f52d8ec 100644 --- a/lib/bup/drecurse.py +++ b/lib/bup/drecurse.py @@ -54,7 +54,8 @@ def _dirlist(): def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None, - exclude_rxs=None): + exclude_rxs=None, + xdev_exceptions=frozenset()): for (name,pst) in _dirlist(): path = prepend + name if excluded_paths: @@ -68,7 +69,8 @@ def _recursive_dirlist(prepend, xdev, bup_dir=None, if os.path.normpath(path) == bup_dir: debug1('Skipping BUP_DIR.\n') continue - if xdev != None and pst.st_dev != xdev: + if xdev != None and pst.st_dev != xdev \ + and path not in xdev_exceptions: debug1('Skipping contents of %r: different filesystem.\n' % path) else: try: @@ -79,14 +81,17 @@ def _recursive_dirlist(prepend, xdev, bup_dir=None, for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, - exclude_rxs=exclude_rxs): + exclude_rxs=exclude_rxs, + xdev_exceptions=xdev_exceptions): yield i os.chdir('..') yield (path, pst) -def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None, - exclude_rxs=None): +def recursive_dirlist(paths, xdev, bup_dir=None, + excluded_paths=None, + exclude_rxs=None, + xdev_exceptions=frozenset()): startdir = OsFile('.') try: assert(type(paths) != type('')) @@ -115,7 +120,8 @@ def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None, for i in _recursive_dirlist(prepend=prepend, xdev=xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, - exclude_rxs=exclude_rxs): + exclude_rxs=exclude_rxs, + xdev_exceptions=xdev_exceptions): yield i startdir.fchdir() else: diff --git a/lib/bup/index.py b/lib/bup/index.py index a315e88..9c81d9c 100644 --- a/lib/bup/index.py +++ b/lib/bup/index.py @@ -543,18 +543,34 @@ class Writer: return Reader(self.tmpname) +def _slashappend_or_add_error(p, caller): + """Return p, after ensuring it has a single trailing slash if it names + a directory, unless there's an OSError, in which case, call + add_error() and return None.""" + try: + st = os.lstat(p) + except OSError as e: + add_error('%s: %s' % (caller, e)) + return None + else: + if stat.S_ISDIR(st.st_mode): + return slashappend(p) + return p + + +def unique_resolved_paths(paths): + "Return a collection of unique resolved paths." + rps = (_slashappend_or_add_error(resolve_parent(p), 'unique_resolved_paths') + for p in paths) + return frozenset((x for x in rps if x is not None)) + + def reduce_paths(paths): xpaths = [] for p in paths: - rp = resolve_parent(p) - try: - st = os.lstat(rp) - if stat.S_ISDIR(st.st_mode): - rp = slashappend(rp) - p = slashappend(p) - xpaths.append((rp, p)) - except OSError as e: - add_error('reduce_paths: %s' % e) + rp = _slashappend_or_add_error(resolve_parent(p), 'reduce_paths') + if rp: + xpaths.append((rp, slashappend(p) if rp.endswith('/') else p)) xpaths.sort() paths = [] @@ -568,6 +584,7 @@ def reduce_paths(paths): paths.sort(reverse=True) return paths + def merge(*iters): def pfunc(count, total): qprogress('bup: merging indexes (%d/%d)\r' % (count, total)) diff --git a/t/test-xdev.sh b/t/test-xdev.sh index c71df93..424692a 100755 --- a/t/test-xdev.sh +++ b/t/test-xdev.sh @@ -33,21 +33,39 @@ WVPASS pushd "$tmpdir" WVSTART 'drecurse' -WVPASS dd if=/dev/zero of=testfs.img bs=1M count=32 -WVPASS mkfs -F testfs.img # Don't care what type. -WVPASS mkdir -p src/mnt/{a,b,c} -WVPASS mount -o loop testfs.img src/mnt -WVPASS mkdir -p src/mnt/x -WVPASS touch src/1 src/mnt/2 src/mnt/x/3 - -WVPASSEQ "$(bup drecurse src | grep -vF lost+found)" "src/mnt/x/3 -src/mnt/x/ -src/mnt/2 -src/mnt/ +WVPASS dd if=/dev/zero of=testfs-1.img bs=1M count=32 +WVPASS dd if=/dev/zero of=testfs-2.img bs=1M count=32 +WVPASS mkfs -F testfs-1.img # Don't care what type (though must have symlinks) +WVPASS mkfs -F testfs-2.img # Don't care what type (though must have symlinks) +WVPASS mkdir -p src/mnt-1/hidden-1 src/mnt-2/hidden-2 +WVPASS mount -o loop testfs-1.img src/mnt-1 +WVPASS mount -o loop testfs-1.img src/mnt-2 + +WVPASS touch src/1 + +WVPASS mkdir -p src/mnt-1/x +WVPASS touch src/mnt-1/2 src/mnt-1/x/3 + +WVPASS touch src/mnt-2/4 + +(WVPASS cd src && WVPASS ln -s mnt-2 mnt-link) +(WVPASS cd src && WVPASS ln -s . top) + +WVPASSEQ "$(bup drecurse src | grep -vF lost+found)" "src/top +src/mnt-link +src/mnt-2/4 +src/mnt-2/ +src/mnt-1/x/3 +src/mnt-1/x/ +src/mnt-1/2 +src/mnt-1/ src/1 src/" -WVPASSEQ "$(bup drecurse -x src)" "src/mnt/ +WVPASSEQ "$(bup drecurse -x src)" "src/top +src/mnt-link +src/mnt-2/ +src/mnt-1/ src/1 src/" @@ -60,6 +78,7 @@ WVPASS bup restore -C src-restore "/src/latest$(pwd)/" WVPASS test -d src-restore/src WVPASS "$top/t/compare-trees" -c src/ src-restore/src/ +# Test -x when none of the mount points are explicitly indexed WVPASS rm -r "$BUP_DIR" src-restore WVPASS bup init WVPASS bup index -x src @@ -70,8 +89,66 @@ WVPASS test -d src-restore/src WVPASSEQ "$(cd src-restore/src && find . -not -name lost+found | LC_ALL=C sort)" \ ". ./1 -./mnt" +./mnt-1 +./mnt-2 +./mnt-link +./top" -WVPASS popd -WVPASS umount "$tmpdir/src/mnt" +# Test -x when a mount point is explicitly indexed. This should +# include the mount. +WVPASS rm -r "$BUP_DIR" src-restore +WVPASS bup init +WVPASS bup index -x src src/mnt-2 +WVPASS bup save -n src src +WVPASS mkdir src-restore +WVPASS bup restore -C src-restore "/src/latest$(pwd)/" +WVPASS test -d src-restore/src +WVPASSEQ "$(cd src-restore/src && find . -not -name lost+found | LC_ALL=C sort)" \ +". +./1 +./mnt-1 +./mnt-2 +./mnt-2/4 +./mnt-link +./top" + +# Test -x when a direct link to a mount point is explicitly indexed. +# This should *not* include the mount. +WVPASS rm -r "$BUP_DIR" src-restore +WVPASS bup init +WVPASS bup index -x src src/mnt-link +WVPASS bup save -n src src +WVPASS mkdir src-restore +WVPASS bup restore -C src-restore "/src/latest$(pwd)/" +WVPASS test -d src-restore/src +WVPASSEQ "$(cd src-restore/src && find . -not -name lost+found | LC_ALL=C sort)" \ +". +./1 +./mnt-1 +./mnt-2 +./mnt-link +./top" + +# Test -x when a path that resolves to a mount point is explicitly +# indexed (i.e. dir symlnks that redirect the leaf to a mount point). +# This should include the mount. +WVPASS rm -r "$BUP_DIR" src-restore +WVPASS bup init +WVPASS bup index -x src src/top/top/mnt-2 +WVPASS bup save -n src src +WVPASS mkdir src-restore +WVPASS bup restore -C src-restore "/src/latest$(pwd)/" +WVPASS test -d src-restore/src +WVPASSEQ "$(cd src-restore/src && find . -not -name lost+found | LC_ALL=C sort)" \ +". +./1 +./mnt-1 +./mnt-2 +./mnt-2/4 +./mnt-link +./top" + +WVPASS cd "$top" +WVPASS umount "$tmpdir/src/mnt-1" +WVPASS umount "$tmpdir/src/mnt-2" WVPASS rm -r "$tmpdir" -- 2.39.2