]> arthur.barton.de Git - bup.git/commitdiff
Split directory recursion stuff from cmd-index.py into drecurse.py.
authorAvery Pennarun <apenwarr@gmail.com>
Wed, 3 Feb 2010 21:42:48 +0000 (16:42 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Wed, 3 Feb 2010 21:42:48 +0000 (16:42 -0500)
Also add a new command, 'bup drecurse', which just recurses through a
directory tree and prints all the filenames.  This is useful for timing
performance vs. the native 'find' command.

The result is a bit embarrassing; for my home directory of about 188000
files, drecurse is about 10x slower:

$ time bup drecurse -q ~
real 0m2.935s
user 0m2.312s
sys 0m0.580s

$ time find ~ -printf ''
real 0m0.385s
user 0m0.096s
sys 0m0.284s

time find ~ -printf '%s\n' >/dev/null
real 0m0.662s
user 0m0.208s
sys 0m0.456s

Makefile
cmd-drecurse.py [new file with mode: 0755]
cmd-index.py
drecurse.py [new file with mode: 0644]
helpers.py
index.py

index 8b3be5520f3ac4d6628cc2697a1b9b5e3ab7cc19..a007484646be1f3206681fdf97534ba11c522e7f 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ endif
 default: all
 
 all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
-       bup-midx bup-fuse bup-ls bup-damage bup-fsck bup-margin \
+       bup-midx bup-fuse bup-ls bup-damage bup-fsck bup-margin bup-drecurse \
        bup memtest randomgen$(EXT) _hashsplit$(SOEXT)
 
 randomgen$(EXT): randomgen.o
diff --git a/cmd-drecurse.py b/cmd-drecurse.py
new file mode 100755 (executable)
index 0000000..ffe2505
--- /dev/null
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+import options, drecurse
+from helpers import *
+
+optspec = """
+bup drecurse <path>
+--
+x,xdev   don't cross filesystem boundaries
+q,quiet  don't actually print filenames
+"""
+o = options.Options('bup drecurse', optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+if len(extra) != 1:
+    log("drecurse: exactly one filename expected\n")
+    o.usage()
+
+for (name,st) in drecurse.recursive_dirlist(extra, opt.xdev):
+    if not opt.quiet:
+        print name
index fc6805fb0e1fb52d9e1cf34c4b2c671e6c2a801e..db97bed94cec3b8ebb04c24d60962e0b2cf93882 100755 (executable)
@@ -1,103 +1,9 @@
 #!/usr/bin/env python
 import os, sys, stat, time
-import options, git, index
+import options, git, index, drecurse
 from helpers import *
 
 
-try:
-    O_LARGEFILE = os.O_LARGEFILE
-except AttributeError:
-    O_LARGEFILE = 0
-
-
-class OsFile:
-    def __init__(self, path):
-        self.fd = None
-        self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
-        
-    def __del__(self):
-        if self.fd:
-            fd = self.fd
-            self.fd = None
-            os.close(fd)
-
-    def fchdir(self):
-        os.fchdir(self.fd)
-
-
-saved_errors = []
-def add_error(e):
-    saved_errors.append(e)
-    log('\n%s\n' % e)
-
-
-# the use of fchdir() and lstat() are for two reasons:
-#  - help out the kernel by not making it repeatedly look up the absolute path
-#  - avoid race conditions caused by doing listdir() on a changing symlink
-def dirlist(path):
-    l = []
-    try:
-        OsFile(path).fchdir()
-    except OSError, e:
-        add_error(e)
-        return l
-    for n in os.listdir('.'):
-        try:
-            st = os.lstat(n)
-        except OSError, e:
-            add_error(Exception('in %s: %s' % (index.realpath(path), str(e))))
-            continue
-        if stat.S_ISDIR(st.st_mode):
-            n += '/'
-        l.append((os.path.join(path, n), st))
-    l.sort(reverse=True)
-    return l
-
-
-def _recursive_dirlist(path, xdev):
-    olddir = OsFile('.')
-    for (path,pst) in dirlist(path):
-        if xdev != None and pst.st_dev != xdev:
-            log('Skipping %r: different filesystem.\n' % path)
-            continue
-        if stat.S_ISDIR(pst.st_mode):
-            for i in _recursive_dirlist(path, xdev=xdev):
-                yield i
-        yield (path,pst)
-    olddir.fchdir()
-
-
-def _matchlen(a,b):
-    bi = iter(b)
-    count = 0
-    for ai in a:
-        try:
-            if bi.next() == ai:
-                count += 1
-        except StopIteration:
-            break
-    return count
-
-
-def recursive_dirlist(paths):
-    last = ()
-    for path in paths:
-        pathsplit = index.pathsplit(path)
-        while _matchlen(pathsplit, last) < len(last):
-            yield (''.join(last), None)
-            last.pop()
-        pst = os.lstat(path)
-        if opt.xdev:
-            xdev = pst.st_dev
-        else:
-            xdev = None
-        if stat.S_ISDIR(pst.st_mode):
-            for i in _recursive_dirlist(path, xdev=xdev):
-                yield i
-        yield (path,pst)
-        last = pathsplit[:-1]
-
-
 def merge_indexes(out, r1, r2):
     log('bup: merging indexes.\n')
     for e in index._last_writer_wins_iter([r1, r2]):
@@ -132,7 +38,7 @@ def update_index(top):
 
     #log('doing: %r\n' % paths)
 
-    for (path,pst) in recursive_dirlist([top]):
+    for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev):
         #log('got: %r\n' % path)
         if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
             sys.stdout.write('%s\n' % path)
diff --git a/drecurse.py b/drecurse.py
new file mode 100644 (file)
index 0000000..b1f9e16
--- /dev/null
@@ -0,0 +1,92 @@
+import stat
+from helpers import *
+
+try:
+    O_LARGEFILE = os.O_LARGEFILE
+except AttributeError:
+    O_LARGEFILE = 0
+
+
+class OsFile:
+    def __init__(self, path):
+        self.fd = None
+        self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
+        
+    def __del__(self):
+        if self.fd:
+            fd = self.fd
+            self.fd = None
+            os.close(fd)
+
+    def fchdir(self):
+        os.fchdir(self.fd)
+
+
+# the use of fchdir() and lstat() are for two reasons:
+#  - help out the kernel by not making it repeatedly look up the absolute path
+#  - avoid race conditions caused by doing listdir() on a changing symlink
+def dirlist(path):
+    l = []
+    try:
+        OsFile(path).fchdir()
+    except OSError, e:
+        add_error(e)
+        return l
+    for n in os.listdir('.'):
+        try:
+            st = os.lstat(n)
+        except OSError, e:
+            add_error(Exception('in %s: %s' % (index.realpath(path), str(e))))
+            continue
+        if stat.S_ISDIR(st.st_mode):
+            n += '/'
+        l.append((os.path.join(path, n), st))
+    l.sort(reverse=True)
+    return l
+
+
+def _recursive_dirlist(path, xdev):
+    olddir = OsFile('.')
+    for (path,pst) in dirlist(path):
+        if xdev != None and pst.st_dev != xdev:
+            log('Skipping %r: different filesystem.\n' % path)
+            continue
+        if stat.S_ISDIR(pst.st_mode):
+            for i in _recursive_dirlist(path, xdev=xdev):
+                yield i
+        yield (path,pst)
+    olddir.fchdir()
+
+
+def _matchlen(a,b):
+    bi = iter(b)
+    count = 0
+    for ai in a:
+        try:
+            if bi.next() == ai:
+                count += 1
+        except StopIteration:
+            break
+    return count
+
+
+def recursive_dirlist(paths, xdev):
+    assert(type(paths) != type(''))
+    last = ()
+    for path in paths:
+        ps = pathsplit(path)
+        while _matchlen(ps, last) < len(last):
+            yield (''.join(last), None)
+            last.pop()
+        pst = os.lstat(path)
+        if xdev:
+            xdev = pst.st_dev
+        else:
+            xdev = None
+        if stat.S_ISDIR(pst.st_mode):
+            for i in _recursive_dirlist(path, xdev=xdev):
+                yield i
+        yield (path,pst)
+        last = ps[:-1]
+
+
index 3209bfb0fca96530d1edd8afd03a1cb594080577..7f54430ac9a298f1637a694b4201162c3b6afa35 100644 (file)
@@ -37,6 +37,17 @@ def readpipe(argv):
     return r
 
 
+# FIXME: this function isn't very generic, because it splits the filename
+# in an odd way and depends on a terminating '/' to indicate directories.
+# But it's used in a couple of places, so let's put it here.
+def pathsplit(p):
+    l = p.split('/')
+    l = list([i+'/' for i in l[:-1]]) + l[-1:]
+    if l[-1] == '':
+        l.pop()  # extra blank caused by terminating '/'
+    return l
+
+
 _username = None
 def username():
     global _username
@@ -169,3 +180,9 @@ def mmap_read(f, len = 0):
 
 def mmap_readwrite(f, len = 0):
     return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
+
+
+saved_errors = []
+def add_error(e):
+    saved_errors.append(e)
+    log('\n%s\n' % e)
index c41ac77776cd0f6a8733b1eb9ba1a7d5206b2ad8..9a6475fd18e2e8d92dc775c102bf4237f6f96f4e 100644 (file)
--- a/index.py
+++ b/index.py
@@ -139,12 +139,13 @@ class Reader:
         if f:
             b = f.read(len(INDEX_HDR))
             if b != INDEX_HDR:
-                raise Error('%s: header: expected %r, got %r'
+                log('warning: %s: header: expected %r, got %r'
                                  % (filename, INDEX_HDR, b))
-            st = os.fstat(f.fileno())
-            if st.st_size:
-                self.m = mmap_readwrite(f)
-                self.writable = True
+            else:
+                st = os.fstat(f.fileno())
+                if st.st_size:
+                    self.m = mmap_readwrite(f)
+                    self.writable = True
 
     def __del__(self):
         self.close()
@@ -216,14 +217,6 @@ def _last_writer_wins_iter(iters):
         l = filter(None, l)
 
 
-def pathsplit(p):
-    l = p.split('/')
-    l = list([i+'/' for i in l[:-1]]) + l[-1:]
-    if l[-1] == '':
-        l.pop()  # extra blank caused by terminating '/'
-    return l
-
-
 class Writer:
     def __init__(self, filename):
         self.stack = []