]> arthur.barton.de Git - bup.git/commitdiff
Move python library files to lib/bup/
authorAvery Pennarun <apenwarr@gmail.com>
Sun, 28 Feb 2010 21:17:35 +0000 (16:17 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Sun, 28 Feb 2010 22:49:10 +0000 (17:49 -0500)
...and update other programs so that they import them correctly from their
new location.

This is necessary so that the bup library files can eventually be installed
somewhere other than wherever the 'bup' executable ends up.  Plus it's
clearer and safer to say 'from bup import options' instead of just 'import
options', in case someone else writes an 'options' module.

I wish I could have named the directory just 'bup', but I can't; there's
already a program with that name.

Also, in the name of sanity, rename memtest.py to 'bup memtest' so that it
can get the new paths automatically.

51 files changed:
Makefile
_hashsplit.c [deleted file]
bup.py [deleted file]
client.py [deleted file]
cmd-damage.py
cmd-drecurse.py
cmd-fsck.py
cmd-ftp.py
cmd-fuse.py
cmd-index.py
cmd-init.py
cmd-join.py
cmd-ls.py
cmd-margin.py
cmd-memtest.py [new file with mode: 0755]
cmd-midx.py
cmd-newliner.py
cmd-random.py
cmd-save.py
cmd-server.py
cmd-split.py
cmd-tick.py
csetup.py [deleted file]
drecurse.py [deleted file]
git.py [deleted file]
hashsplit.py [deleted file]
helpers.py [deleted file]
index.py [deleted file]
lib/bup/__init__.py [new file with mode: 0644]
lib/bup/_hashsplit.c [new file with mode: 0644]
lib/bup/client.py [new file with mode: 0644]
lib/bup/csetup.py [new file with mode: 0644]
lib/bup/drecurse.py [new file with mode: 0644]
lib/bup/git.py [new file with mode: 0644]
lib/bup/hashsplit.py [new file with mode: 0644]
lib/bup/helpers.py [new file with mode: 0644]
lib/bup/index.py [new file with mode: 0644]
lib/bup/options.py [new file with mode: 0644]
lib/bup/shquote.py [new file with mode: 0644]
lib/bup/vfs.py [new file with mode: 0644]
main.py [new file with mode: 0755]
memtest.py [deleted file]
options.py [deleted file]
shquote.py [deleted file]
t/__init__.py
t/tgit.py
t/thelpers.py
t/tindex.py
t/toptions.py
t/tshquote.py
vfs.py [deleted file]

index eacedd047f83bdeaf0fb20816ad5f22f4c80f5ac..b47dad657bfb8581b2bb15485ba14bb5f85ebd8a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -21,8 +21,8 @@ default: all
 
 all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
        bup-midx bup-fuse bup-ls bup-damage bup-fsck bup-margin bup-drecurse \
-       bup-random bup-ftp bup-newliner \
-       bup memtest _hashsplit$(SOEXT) \
+       bup-random bup-ftp bup-newliner bup-memtest \
+       bup lib/bup/_hashsplit$(SOEXT) \
        Documentation/all
        
 %/all:
@@ -31,10 +31,10 @@ all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
 %/clean:
        $(MAKE) -C $* clean
 
-_hashsplit$(SOEXT): _hashsplit.c csetup.py
+lib/bup/_hashsplit$(SOEXT): lib/bup/_hashsplit.c lib/bup/csetup.py
        @rm -f $@
-       python csetup.py build
-       cp build/*/_hashsplit$(SOEXT) .
+       cd lib/bup && python csetup.py build
+       cp lib/bup/build/*/_hashsplit$(SOEXT) lib/bup/
        
 runtests: all runtests-python runtests-cmdline
 
@@ -53,7 +53,7 @@ test: all
 %: %.o
        $(CC) $(CFLAGS) (LDFLAGS) -o $@ $^ $(LIBS)
        
-bup: bup.py
+bup: main.py
        rm -f $@
        ln -s $< $@
        
@@ -73,7 +73,8 @@ bup-%: cmd-%.sh
        gcc -c -o $@ $< $(CPPFLAGS) $(CFLAGS)
 
 clean: Documentation/clean
-       rm -f *.o *.so *.dll *.exe *~ .*~ *.pyc */*.pyc */*~ \
+       rm -f *.o *.so */*/*.so *.dll *.exe .*~ *~ */*~ */*/*~ \
+               *.pyc */*.pyc */*/*.pyc\
                bup bup-* randomgen memtest \
                out[12] out2[tc] tags[12] tags2[tc]
-       rm -rf *.tmp build
+       rm -rf *.tmp build lib/bup/build
diff --git a/_hashsplit.c b/_hashsplit.c
deleted file mode 100644 (file)
index e78f597..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-#include <Python.h>
-#include <assert.h>
-#include <stdint.h>
-
-#define BLOBBITS (13)
-#define BLOBSIZE (1<<BLOBBITS)
-#define WINDOWBITS (7)
-#define WINDOWSIZE (1<<(WINDOWBITS-1))
-
-
-// FIXME: replace this with a not-stupid rolling checksum algorithm,
-// such as the one used in rsync (Adler32?)
-static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
-{
-    return ((old<<1) | (old>>31)) ^ drop ^ add;
-}
-
-
-static int find_ofs(const unsigned char *buf, int len, int *bits)
-{
-    unsigned char window[WINDOWSIZE];
-    uint32_t sum = 0;
-    int i = 0, count;
-    memset(window, 0, sizeof(window));
-    
-    for (count = 0; count < len; count++)
-    {
-       sum = stupidsum_add(sum, window[i], buf[count]);
-       window[i] = buf[count];
-       i = (i + 1) % WINDOWSIZE;
-       if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
-       {
-           if (bits)
-           {
-               *bits = BLOBBITS;
-               sum >>= BLOBBITS;
-               for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
-                   ;
-           }
-           return count+1;
-       }
-    }
-    return 0;
-}
-
-
-static PyObject *blobbits(PyObject *self, PyObject *args)
-{
-    if (!PyArg_ParseTuple(args, ""))
-       return NULL;
-    return Py_BuildValue("i", BLOBBITS);
-}
-
-
-static PyObject *splitbuf(PyObject *self, PyObject *args)
-{
-    unsigned char *buf = NULL;
-    int len = 0, out = 0, bits = -1;
-
-    if (!PyArg_ParseTuple(args, "t#", &buf, &len))
-       return NULL;
-    out = find_ofs(buf, len, &bits);
-    return Py_BuildValue("ii", out, bits);
-}
-
-
-static PyObject *bitmatch(PyObject *self, PyObject *args)
-{
-    unsigned char *buf1 = NULL, *buf2 = NULL;
-    int len1 = 0, len2 = 0;
-    int byte, bit;
-
-    if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
-       return NULL;
-    
-    bit = 0;
-    for (byte = 0; byte < len1 && byte < len2; byte++)
-    {
-       int b1 = buf1[byte], b2 = buf2[byte];
-       if (b1 != b2)
-       {
-           for (bit = 0; bit < 8; bit++)
-               if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
-                   break;
-           break;
-       }
-    }
-    
-    return Py_BuildValue("i", byte*8 + bit);
-}
-
-
-// I would have made this a lower-level function that just fills in a buffer
-// with random values, and then written those values from python.  But that's
-// about 20% slower in my tests, and since we typically generate random
-// numbers for benchmarking other parts of bup, any slowness in generating
-// random bytes will make our benchmarks inaccurate.  Plus nobody wants
-// pseudorandom bytes much except for this anyway.
-static PyObject *write_random(PyObject *self, PyObject *args)
-{
-    uint32_t buf[1024/4];
-    int fd = -1, seed = 0;
-    ssize_t ret;
-    long long len = 0, kbytes = 0, written = 0;
-
-    if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
-       return NULL;
-    
-    srandom(seed);
-    
-    for (kbytes = len/1024; kbytes > 0; kbytes--)
-    {
-       int i;
-       for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
-           buf[i] = random();
-       ret = write(fd, buf, sizeof(buf));
-       if (ret < 0)
-           ret = 0;
-       written += ret;
-       if (ret < sizeof(buf))
-           break;
-       if (!(kbytes%1024))
-           fprintf(stderr, ".");
-    }
-    
-    return Py_BuildValue("L", written);
-}
-
-
-static PyMethodDef hashsplit_methods[] = {
-    { "blobbits", blobbits, METH_VARARGS,
-       "Return the number of bits in the rolling checksum." },
-    { "splitbuf", splitbuf, METH_VARARGS,
-       "Split a list of strings based on a rolling checksum." },
-    { "bitmatch", bitmatch, METH_VARARGS,
-       "Count the number of matching prefix bits between two strings." },
-    { "write_random", write_random, METH_VARARGS,
-       "Write random bytes to the given file descriptor" },
-    { NULL, NULL, 0, NULL },  // sentinel
-};
-
-PyMODINIT_FUNC init_hashsplit(void)
-{
-    Py_InitModule("_hashsplit", hashsplit_methods);
-}
diff --git a/bup.py b/bup.py
deleted file mode 100755 (executable)
index 56f3afe..0000000
--- a/bup.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-import sys, os, subprocess
-import git
-from helpers import *
-
-argv = sys.argv
-exe = argv[0]
-exepath = os.path.split(exe)[0] or '.'
-
-def usage():
-    log('Usage: bup <subcmd> <options...>\n\n')
-    log('Available subcommands:\n')
-    for c in sorted(os.listdir(exepath)):
-        if c.startswith('bup-') and c.find('.') < 0:
-            log('\t%s\n' % c[4:])
-    sys.exit(99)
-
-if len(argv) < 2 or not argv[1] or argv[1][0] == '-':
-    usage()
-
-subcmd = argv[1]
-if subcmd == 'help':
-    usage()
-
-def subpath(s):
-    return os.path.join(exepath, 'bup-%s' % s)
-
-if not os.path.exists(subpath(subcmd)):
-    log('error: unknown command "%s"\n' % subcmd)
-    usage()
-
-
-already_fixed = atoi(os.environ.get('BUP_FORCE_TTY'))
-if subcmd in ['ftp']:
-    already_fixed = True
-fix_stdout = not already_fixed and os.isatty(1)
-fix_stderr = not already_fixed and os.isatty(2)
-
-def force_tty():
-    if fix_stdout or fix_stderr:
-        os.environ['BUP_FORCE_TTY'] = '1'
-
-if fix_stdout or fix_stderr:
-    realf = fix_stderr and 2 or 1
-    n = subprocess.Popen([subpath('newliner')],
-                         stdin=subprocess.PIPE, stdout=os.dup(realf),
-                         close_fds=True, preexec_fn=force_tty)
-    outf = fix_stdout and n.stdin.fileno() or 1
-    errf = fix_stderr and n.stdin.fileno() or 2
-else:
-    n = None
-    outf = 1
-    errf = 2
-
-ret = 95
-try:
-    try:
-        p = subprocess.Popen([subpath(subcmd)] + argv[2:],
-                             stdout=outf, stderr=errf, preexec_fn=force_tty)
-        ret = p.wait()
-    except OSError, e:
-        log('%s: %s\n' % (subpath(subcmd), e))
-        ret = 98
-    except KeyboardInterrupt, e:
-        ret = 94
-finally:
-    if n:
-        n.stdin.close()
-        try:
-            n.wait()
-        except:
-            pass
-sys.exit(ret)
diff --git a/client.py b/client.py
deleted file mode 100644 (file)
index a9e9c95..0000000
--- a/client.py
+++ /dev/null
@@ -1,258 +0,0 @@
-import re, struct, errno, select
-import git
-from helpers import *
-from subprocess import Popen, PIPE
-
-
-class ClientError(Exception):
-    pass
-
-
-class Client:
-    def __init__(self, remote, create=False):
-        self._busy = None
-        self.p = None
-        self.conn = None
-        rs = remote.split(':', 1)
-        nicedir = os.path.split(os.path.abspath(sys.argv[0]))[0]
-        nicedir = re.sub(r':', "_", nicedir)
-        if len(rs) == 1:
-            (host, dir) = ('NONE', remote)
-            def fixenv():
-                os.environ['PATH'] = ':'.join([nicedir,
-                                               os.environ.get('PATH', '')])
-            argv = ['bup', 'server']
-        else:
-            (host, dir) = rs
-            fixenv = None
-            # WARNING: shell quoting security holes are possible here, so we
-            # have to be super careful.  We have to use 'sh -c' because
-            # csh-derived shells can't handle PATH= notation.  We can't
-            # set PATH in advance, because ssh probably replaces it.  We
-            # can't exec *safely* using argv, because *both* ssh and 'sh -c'
-            # allow shellquoting.  So we end up having to double-shellquote
-            # stuff here.
-            escapedir = re.sub(r'([^\w/])', r'\\\\\\\1', nicedir)
-            cmd = r"""
-                       sh -c PATH=%s:'$PATH bup server'
-                   """ % escapedir
-            argv = ['ssh', host, '--', cmd.strip()]
-            #log('argv is: %r\n' % argv)
-        (self.host, self.dir) = (host, dir)
-        self.cachedir = git.repo('index-cache/%s'
-                                 % re.sub(r'[^@\w]', '_', 
-                                          "%s:%s" % (host, dir)))
-        try:
-            self.p = p = Popen(argv, stdin=PIPE, stdout=PIPE, preexec_fn=fixenv)
-        except OSError, e:
-            raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
-        self.conn = conn = Conn(p.stdout, p.stdin)
-        if dir:
-            dir = re.sub(r'[\r\n]', ' ', dir)
-            if create:
-                conn.write('init-dir %s\n' % dir)
-            else:
-                conn.write('set-dir %s\n' % dir)
-            self.check_ok()
-        self.sync_indexes_del()
-
-    def __del__(self):
-        try:
-            self.close()
-        except IOError, e:
-            if e.errno == errno.EPIPE:
-                pass
-            else:
-                raise
-
-    def close(self):
-        if self.conn and not self._busy:
-            self.conn.write('quit\n')
-        if self.p:
-            self.p.stdin.close()
-            while self.p.stdout.read(65536):
-                pass
-            self.p.stdout.close()
-            self.p.wait()
-            rv = self.p.wait()
-            if rv:
-                raise ClientError('server tunnel returned exit code %d' % rv)
-        self.conn = None
-        self.p = None
-
-    def check_ok(self):
-        rv = self.p.poll()
-        if rv != None:
-            raise ClientError('server exited unexpectedly with code %r' % rv)
-        try:
-            return self.conn.check_ok()
-        except Exception, e:
-            raise ClientError, e, sys.exc_info()[2]
-
-    def check_busy(self):
-        if self._busy:
-            raise ClientError('already busy with command %r' % self._busy)
-        
-    def _not_busy(self):
-        self._busy = None
-
-    def sync_indexes_del(self):
-        self.check_busy()
-        conn = self.conn
-        conn.write('list-indexes\n')
-        packdir = git.repo('objects/pack')
-        all = {}
-        needed = {}
-        for line in linereader(conn):
-            if not line:
-                break
-            all[line] = 1
-            assert(line.find('/') < 0)
-            if not os.path.exists(os.path.join(self.cachedir, line)):
-                needed[line] = 1
-        self.check_ok()
-
-        mkdirp(self.cachedir)
-        for f in os.listdir(self.cachedir):
-            if f.endswith('.idx') and not f in all:
-                log('pruning old index: %r\n' % f)
-                os.unlink(os.path.join(self.cachedir, f))
-
-    def sync_index(self, name):
-        #log('requesting %r\n' % name)
-        mkdirp(self.cachedir)
-        self.conn.write('send-index %s\n' % name)
-        n = struct.unpack('!I', self.conn.read(4))[0]
-        assert(n)
-        fn = os.path.join(self.cachedir, name)
-        f = open(fn + '.tmp', 'w')
-        count = 0
-        progress('Receiving index: %d/%d\r' % (count, n))
-        for b in chunkyreader(self.conn, n):
-            f.write(b)
-            count += len(b)
-            progress('Receiving index: %d/%d\r' % (count, n))
-        progress('Receiving index: %d/%d, done.\n' % (count, n))
-        self.check_ok()
-        f.close()
-        os.rename(fn + '.tmp', fn)
-
-    def _make_objcache(self):
-        ob = self._busy
-        self._busy = None
-        #self.sync_indexes()
-        self._busy = ob
-        return git.MultiPackIndex(self.cachedir)
-
-    def _suggest_pack(self, indexname):
-        log('received index suggestion: %s\n' % indexname)
-        ob = self._busy
-        if ob:
-            assert(ob == 'receive-objects')
-            self._busy = None
-            self.conn.write('\xff\xff\xff\xff')  # suspend receive-objects
-            self.conn.drain_and_check_ok()
-        self.sync_index(indexname)
-        if ob:
-            self.conn.write('receive-objects\n')
-            self._busy = ob
-
-    def new_packwriter(self):
-        self.check_busy()
-        self._busy = 'receive-objects'
-        return PackWriter_Remote(self.conn,
-                                 objcache_maker = self._make_objcache,
-                                 suggest_pack = self._suggest_pack,
-                                 onclose = self._not_busy)
-
-    def read_ref(self, refname):
-        self.check_busy()
-        self.conn.write('read-ref %s\n' % refname)
-        r = self.conn.readline().strip()
-        self.check_ok()
-        if r:
-            assert(len(r) == 40)   # hexified sha
-            return r.decode('hex')
-        else:
-            return None   # nonexistent ref
-
-    def update_ref(self, refname, newval, oldval):
-        self.check_busy()
-        self.conn.write('update-ref %s\n%s\n%s\n' 
-                        % (refname, newval.encode('hex'),
-                           (oldval or '').encode('hex')))
-        self.check_ok()
-
-    def cat(self, id):
-        self.check_busy()
-        self._busy = 'cat'
-        self.conn.write('cat %s\n' % re.sub(r'[\n\r]', '_', id))
-        while 1:
-            sz = struct.unpack('!I', self.conn.read(4))[0]
-            if not sz: break
-            yield self.conn.read(sz)
-        e = self.check_ok()
-        self._not_busy()
-        if e:
-            raise KeyError(str(e))
-
-
-class PackWriter_Remote(git.PackWriter):
-    def __init__(self, conn, objcache_maker, suggest_pack, onclose):
-        git.PackWriter.__init__(self, objcache_maker)
-        self.file = conn
-        self.filename = 'remote socket'
-        self.suggest_pack = suggest_pack
-        self.onclose = onclose
-        self._packopen = False
-
-    def _open(self):
-        if not self._packopen:
-            self._make_objcache()
-            self.file.write('receive-objects\n')
-            self._packopen = True
-
-    def _end(self):
-        if self._packopen and self.file:
-            self.file.write('\0\0\0\0')
-            self._packopen = False
-            while True:
-                line = self.file.readline().strip()
-                if line.startswith('index '):
-                    pass
-                else:
-                    break
-            id = line
-            self.file.check_ok()
-            self.objcache = None
-            if self.onclose:
-                self.onclose()
-            if self.suggest_pack:
-                self.suggest_pack(id)
-            return id
-
-    def close(self):
-        id = self._end()
-        self.file = None
-        return id
-
-    def abort(self):
-        raise GitError("don't know how to abort remote pack writing")
-
-    def _raw_write(self, datalist):
-        assert(self.file)
-        if not self._packopen:
-            self._open()
-        data = ''.join(datalist)
-        assert(len(data))
-        self.file.write(struct.pack('!I', len(data)) + data)
-        self.outbytes += len(data)
-        self.count += 1
-
-        if self.file.has_input():
-            line = self.file.readline().strip()
-            assert(line.startswith('index '))
-            idxname = line[6:]
-            if self.suggest_pack:
-                self.suggest_pack(idxname)
-                self.objcache.refresh()
index 685a109ea9bd255f0dd34a0814301d8acb61ee63..6f630fd130ca3e4f9318b7e4ad90b0e61004c430 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, os, random
-import options
-from helpers import *
+from bup import options
+from bup.helpers import *
 
 
 def randblock(n):
index c2b70794bf6e80349026f69bc00928aaba6535a1..99780af7a33bbaff35e96c6d818a89d16f95b8a3 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-import options, drecurse
-from helpers import *
+from bup import options, drecurse
+from bup.helpers import *
 
 optspec = """
 bup drecurse <path>
index 30f9b07c770dfa790a0b2f27574cc8edaad14de5..36c8a350a0c524c1f94356c6f549b6c1dd16056a 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, os, glob, subprocess, time, sha
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
 
 par2_ok = 0
 nullf = open('/dev/null')
index 9594b39e3f4769a89466df7bd22b434c6e7697de..b59cf44318b78bd0389bdea78e72cac2219197ae 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, os, re, stat, readline, fnmatch
-import options, git, shquote, vfs
-from helpers import *
+from bup import options, git, shquote, vfs
+from bup.helpers import *
 
 def print_node(text, n):
     if stat.S_ISDIR(n.mode):
index 0b0bab64ce286dce8125116b96c3e402bbd3ed64..ffcd036ffffd275ebfd73aad7b30f8d448d6cf39 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, os, stat, errno, fuse, re, time, tempfile
-import options, git, vfs
-from helpers import *
+from bup import options, git, vfs
+from bup.helpers import *
 
 
 class Stat(fuse.Stat):
index fe1acb1d20b6b02f3332cb9c089e40bee2ef401d..683989007988e5a38a48a27256dd33a8efd106b9 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import os, sys, stat, time
-import options, git, index, drecurse
-from helpers import *
+from bup import options, git, index, drecurse
+from bup.helpers import *
 
 
 def merge_indexes(out, r1, r2):
index 8dca178bd7d30475fd267a90a57c564811b97162..04f32484d6bd78a6ad2c37022e5ac8af6cedd6a8 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-import git, options, client
-from helpers import *
+from bup import git, options, client
+from bup.helpers import *
 
 optspec = """
 [BUP_DIR=...] bup init [-r host:path]
index 26f390c43ce3efa581d38f39b96782695f591b64..0e1e1ffbc8108b3404d0dd3fdc3379b0d8dac72f 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, time, struct
-import hashsplit, git, options, client
-from helpers import *
+from bup import hashsplit, git, options, client
+from bup.helpers import *
 from subprocess import PIPE
 
 
index 2ce6bb039b5f1dfb8b87728d5bb413aa879c2cef..5a22c1daafa078234c843e94344114a33a27dce5 100755 (executable)
--- a/cmd-ls.py
+++ b/cmd-ls.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, os, stat
-import options, git, vfs
-from helpers import *
+from bup import options, git, vfs
+from bup.helpers import *
 
 def print_node(text, n):
     prefix = ''
index 05f7d2b68e30bcce97b5a194645350b026672947..85b32890b493ea4df2ea4cd8b0f661d8f2253c4e 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys
-import options, git, _hashsplit
-from helpers import *
+from bup import options, git, _hashsplit
+from bup.helpers import *
 
 
 optspec = """
diff --git a/cmd-memtest.py b/cmd-memtest.py
new file mode 100755 (executable)
index 0000000..cf106e4
--- /dev/null
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+import sys, re, struct, mmap
+from bup import git, options
+from bup.helpers import *
+
+
+def s_from_bytes(bytes):
+    clist = [chr(b) for b in bytes]
+    return ''.join(clist)
+
+
+def report(count):
+    fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk']
+    d = {}
+    for line in open('/proc/self/status').readlines():
+        l = re.split(r':\s*', line.strip(), 1)
+        d[l[0]] = l[1]
+    if count >= 0:
+        e1 = count
+        fields = [d[k] for k in fields]
+    else:
+        e1 = ''
+    print ('%9s  ' + ('%10s ' * len(fields))) % tuple([e1] + fields)
+    sys.stdout.flush()
+
+
+optspec = """
+memtest [-n elements] [-c cycles]
+--
+n,number=  number of objects per cycle
+c,cycles=  number of cycles to run
+ignore-midx  ignore .midx files, use only .idx files
+"""
+o = options.Options(sys.argv[0], optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+if extra:
+    o.fatal('no arguments expected')
+
+git.ignore_midx = opt.ignore_midx
+
+git.check_repo_or_die()
+m = git.MultiPackIndex(git.repo('objects/pack'))
+
+cycles = opt.cycles or 100
+number = opt.number or 10000
+
+report(-1)
+f = open('/dev/urandom')
+a = mmap.mmap(-1, 20)
+report(0)
+for c in xrange(cycles):
+    for n in xrange(number):
+        b = f.read(3)
+        if 0:
+            bytes = list(struct.unpack('!BBB', b)) + [0]*17
+            bytes[2] &= 0xf0
+            bin = struct.pack('!20s', s_from_bytes(bytes))
+        else:
+            a[0:2] = b[0:2]
+            a[2] = chr(ord(b[2]) & 0xf0)
+            bin = str(a[0:20])
+        #print bin.encode('hex')
+        m.exists(bin)
+    report((c+1)*number)
index 36907ebaa0ee7569e68a652f221a12119eee5559..871111196c9ecabba994106da2e76a705c8749e9 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, math, struct, glob, sha
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
 
 PAGE_SIZE=4096
 SHA_PER_PAGE=PAGE_SIZE/200.
index 0b665aa2a79127487523917ab9dadceb963c373c..6c4e60f336e7c0ce76f47f8a0334a5daec48b517 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import sys, os, re
-import options
+from bup import options
 
 optspec = """
 bup newliner
index 518cea8bb5ba7247c2fbeb72599a7b28df41c67e..91820a858441384c93d1651322658bbf673295e1 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, mmap
-import options, _hashsplit
-from helpers import *
+from bup import options, _hashsplit
+from bup.helpers import *
 
 optspec = """
 bup random [-S seed] <numbytes>
index e2b1f47358779c1ab426c735279c1ad769f1cacc..0350fff2ffdf45f2b4a7daeee6bb22d38339f8f8 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, re, errno, stat, time, math
-import hashsplit, git, options, index, client
-from helpers import *
+from bup import hashsplit, git, options, index, client
+from bup.helpers import *
 
 
 optspec = """
index 92aa8c1dfadb479f1bef507b36cd23559d8244f1..59459d1fa3f2215b45ba520996659191fe662f6a 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, struct, mmap
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
 
 suspended_w = None
 
index ae7b3806f42171bb78379462006f60356c1fc63d..0f8408c7071077b0fccbdaef7757509dbbc92378 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import sys, time, struct
-import hashsplit, git, options, client
-from helpers import *
+from bup import hashsplit, git, options, client
+from bup.helpers import *
 from subprocess import PIPE
 
 
index 12bd97eb077056c3f1c8630d9247a50b729aed6b..8375dee2b82e7ffed26ea18e39d4eed227780de0 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import sys, time
-import options
+from bup import options
 
 optspec = """
 bup tick
diff --git a/csetup.py b/csetup.py
deleted file mode 100644 (file)
index b58932c..0000000
--- a/csetup.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from distutils.core import setup, Extension
-
-_hashsplit_mod = Extension('_hashsplit', sources=['_hashsplit.c'])
-
-setup(name='_hashsplit',
-      version='0.1',
-      description='hashsplit helper library for bup',
-      ext_modules=[_hashsplit_mod])
diff --git a/drecurse.py b/drecurse.py
deleted file mode 100644 (file)
index 33bcca4..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-import stat, heapq
-from helpers import *
-
-try:
-    O_LARGEFILE = os.O_LARGEFILE
-except AttributeError:
-    O_LARGEFILE = 0
-
-
-# the use of fchdir() and lstat() is for two reasons:
-#  - help out the kernel by not making it repeatedly look up the absolute path
-#  - avoid race conditions caused by doing listdir() on a changing symlink
-class OsFile:
-    def __init__(self, path):
-        self.fd = None
-        self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
-        
-    def __del__(self):
-        if self.fd:
-            fd = self.fd
-            self.fd = None
-            os.close(fd)
-
-    def fchdir(self):
-        os.fchdir(self.fd)
-
-    def stat(self):
-        return os.fstat(self.fd)
-
-
-_IFMT = stat.S_IFMT(0xffffffff)  # avoid function call in inner loop
-def _dirlist():
-    l = []
-    for n in os.listdir('.'):
-        try:
-            st = os.lstat(n)
-        except OSError, e:
-            add_error(Exception('%s: %s' % (realpath(n), str(e))))
-            continue
-        if (st.st_mode & _IFMT) == stat.S_IFDIR:
-            n += '/'
-        l.append((n,st))
-    l.sort(reverse=True)
-    return l
-
-
-def _recursive_dirlist(prepend, xdev):
-    for (name,pst) in _dirlist():
-        if name.endswith('/'):
-            if xdev != None and pst.st_dev != xdev:
-                log('Skipping %r: different filesystem.\n' % (prepend+name))
-                continue
-            try:
-                OsFile(name).fchdir()
-            except OSError, e:
-                add_error('%s: %s' % (prepend, e))
-            else:
-                for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev):
-                    yield i
-                os.chdir('..')
-        yield (prepend + name, pst)
-
-
-def recursive_dirlist(paths, xdev):
-    startdir = OsFile('.')
-    try:
-        assert(type(paths) != type(''))
-        for path in paths:
-            try:
-                pst = os.lstat(path)
-                if stat.S_ISLNK(pst.st_mode):
-                    yield (path, pst)
-                    continue
-            except OSError, e:
-                add_error(e)
-                continue
-            try:
-                pfile = OsFile(path)
-            except OSError, e:
-                add_error(e)
-                continue
-            pst = pfile.stat()
-            if xdev:
-                xdev = pst.st_dev
-            else:
-                xdev = None
-            if stat.S_ISDIR(pst.st_mode):
-                pfile.fchdir()
-                prepend = os.path.join(path, '')
-                for i in _recursive_dirlist(prepend=prepend, xdev=xdev):
-                    yield i
-                startdir.fchdir()
-            else:
-                prepend = path
-            yield (prepend,pst)
-    except:
-        try:
-            startdir.fchdir()
-        except:
-            pass
-        raise
diff --git a/git.py b/git.py
deleted file mode 100644 (file)
index c5381f0..0000000
--- a/git.py
+++ /dev/null
@@ -1,696 +0,0 @@
-import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
-import heapq
-from helpers import *
-
-verbose = 0
-ignore_midx = 0
-home_repodir = os.path.expanduser('~/.bup')
-repodir = None
-
-_typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
-_typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
-
-
-class GitError(Exception):
-    pass
-
-
-def repo(sub = ''):
-    global repodir
-    if not repodir:
-        raise GitError('You should call check_repo_or_die()')
-    gd = os.path.join(repodir, '.git')
-    if os.path.exists(gd):
-        repodir = gd
-    return os.path.join(repodir, sub)
-
-
-def _encode_packobj(type, content):
-    szout = ''
-    sz = len(content)
-    szbits = (sz & 0x0f) | (_typemap[type]<<4)
-    sz >>= 4
-    while 1:
-        if sz: szbits |= 0x80
-        szout += chr(szbits)
-        if not sz:
-            break
-        szbits = sz & 0x7f
-        sz >>= 7
-    z = zlib.compressobj(1)
-    yield szout
-    yield z.compress(content)
-    yield z.flush()
-
-
-def _encode_looseobj(type, content):
-    z = zlib.compressobj(1)
-    yield z.compress('%s %d\0' % (type, len(content)))
-    yield z.compress(content)
-    yield z.flush()
-
-
-def _decode_looseobj(buf):
-    assert(buf);
-    s = zlib.decompress(buf)
-    i = s.find('\0')
-    assert(i > 0)
-    l = s[:i].split(' ')
-    type = l[0]
-    sz = int(l[1])
-    content = s[i+1:]
-    assert(type in _typemap)
-    assert(sz == len(content))
-    return (type, content)
-
-
-def _decode_packobj(buf):
-    assert(buf)
-    c = ord(buf[0])
-    type = _typermap[(c & 0x70) >> 4]
-    sz = c & 0x0f
-    shift = 4
-    i = 0
-    while c & 0x80:
-        i += 1
-        c = ord(buf[i])
-        sz |= (c & 0x7f) << shift
-        shift += 7
-        if not (c & 0x80):
-            break
-    return (type, zlib.decompress(buf[i+1:]))
-
-
-class PackIndex:
-    def __init__(self, filename):
-        self.name = filename
-        self.map = mmap_read(open(filename))
-        assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
-        self.fanout = list(struct.unpack('!256I',
-                                         str(buffer(self.map, 8, 256*4))))
-        self.fanout.append(0)  # entry "-1"
-        nsha = self.fanout[255]
-        self.ofstable = buffer(self.map,
-                               8 + 256*4 + nsha*20 + nsha*4,
-                               nsha*4)
-        self.ofs64table = buffer(self.map,
-                                 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
-
-    def _ofs_from_idx(self, idx):
-        ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
-        if ofs & 0x80000000:
-            idx64 = ofs & 0x7fffffff
-            ofs = struct.unpack('!I',
-                                str(buffer(self.ofs64table, idx64*8, 8)))[0]
-        return ofs
-
-    def _idx_from_hash(self, hash):
-        assert(len(hash) == 20)
-        b1 = ord(hash[0])
-        start = self.fanout[b1-1] # range -1..254
-        end = self.fanout[b1] # range 0..255
-        buf = buffer(self.map, 8 + 256*4, end*20)
-        want = str(hash)
-        while start < end:
-            mid = start + (end-start)/2
-            v = str(buf[mid*20:(mid+1)*20])
-            if v < want:
-                start = mid+1
-            elif v > want:
-                end = mid
-            else: # got it!
-                return mid
-        return None
-        
-    def find_offset(self, hash):
-        idx = self._idx_from_hash(hash)
-        if idx != None:
-            return self._ofs_from_idx(idx)
-        return None
-
-    def exists(self, hash):
-        return hash and (self._idx_from_hash(hash) != None) and True or None
-
-    def __iter__(self):
-        for i in xrange(self.fanout[255]):
-            yield buffer(self.map, 8 + 256*4 + 20*i, 20)
-
-    def __len__(self):
-        return int(self.fanout[255])
-
-
-def extract_bits(buf, bits):
-    mask = (1<<bits) - 1
-    v = struct.unpack('!I', buf[0:4])[0]
-    v = (v >> (32-bits)) & mask
-    return v
-
-
-class PackMidx:
-    def __init__(self, filename):
-        self.name = filename
-        assert(filename.endswith('.midx'))
-        self.map = mmap_read(open(filename))
-        if str(self.map[0:8]) == 'MIDX\0\0\0\1':
-            log('Warning: ignoring old-style midx %r\n' % filename)
-            self.bits = 0
-            self.entries = 1
-            self.fanout = buffer('\0\0\0\0')
-            self.shalist = buffer('\0'*20)
-            self.idxnames = []
-        else:
-            assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
-            self.bits = struct.unpack('!I', self.map[8:12])[0]
-            self.entries = 2**self.bits
-            self.fanout = buffer(self.map, 12, self.entries*4)
-            shaofs = 12 + self.entries*4
-            nsha = self._fanget(self.entries-1)
-            self.shalist = buffer(self.map, shaofs, nsha*20)
-            self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
-
-    def _fanget(self, i):
-        start = i*4
-        s = self.fanout[start:start+4]
-        return struct.unpack('!I', s)[0]
-    
-    def exists(self, hash):
-        want = str(hash)
-        el = extract_bits(want, self.bits)
-        if el:
-            start = self._fanget(el-1)
-        else:
-            start = 0
-        end = self._fanget(el)
-        while start < end:
-            mid = start + (end-start)/2
-            v = str(self.shalist[mid*20:(mid+1)*20])
-            if v < want:
-                start = mid+1
-            elif v > want:
-                end = mid
-            else: # got it!
-                return True
-        return None
-    
-    def __iter__(self):
-        for i in xrange(self._fanget(self.entries-1)):
-            yield buffer(self.shalist, i*20, 20)
-    
-    def __len__(self):
-        return int(self._fanget(self.entries-1))
-
-
-_mpi_count = 0
-class MultiPackIndex:
-    def __init__(self, dir):
-        global _mpi_count
-        assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
-        _mpi_count += 1
-        self.dir = dir
-        self.also = {}
-        self.packs = []
-        self.refresh()
-
-    def __del__(self):
-        global _mpi_count
-        _mpi_count -= 1
-        assert(_mpi_count == 0)
-
-    def __iter__(self):
-        return iter(idxmerge(self.packs))
-
-    def exists(self, hash):
-        if hash in self.also:
-            return True
-        for i in range(len(self.packs)):
-            p = self.packs[i]
-            if p.exists(hash):
-                # reorder so most recently used packs are searched first
-                self.packs = [p] + self.packs[:i] + self.packs[i+1:]
-                return p.name
-        return None
-
-    def refresh(self, skip_midx = False, forget_packs = False):
-        if forget_packs:
-            self.packs = []
-        skip_midx = skip_midx or ignore_midx
-        d = dict((p.name, 1) for p in self.packs)
-        if os.path.exists(self.dir):
-            if not skip_midx:
-                midxl = []
-                for f in os.listdir(self.dir):
-                    full = os.path.join(self.dir, f)
-                    if f.endswith('.midx') and not d.get(full):
-                        mx = PackMidx(full)
-                        (mxd, mxf) = os.path.split(mx.name)
-                        broken = 0
-                        for n in mx.idxnames:
-                            if not os.path.exists(os.path.join(mxd, n)):
-                                log(('warning: index %s missing\n' +
-                                    '  used by %s\n') % (n, mxf))
-                                broken += 1
-                        if not broken:
-                            midxl.append(mx)
-                midxl.sort(lambda x,y: -cmp(len(x),len(y)))
-                for ix in midxl:
-                    any = 0
-                    for sub in ix.idxnames:
-                        if not d.get(os.path.join(self.dir, sub)):
-                            self.packs.append(ix)
-                            d[ix.name] = 1
-                            for name in ix.idxnames:
-                                d[os.path.join(self.dir, name)] = 1
-                            any += 1
-                            break
-                    if not any:
-                        log('midx: removing redundant: %s\n' 
-                            % os.path.basename(ix.name))
-                        unlink(ix.name)
-            for f in os.listdir(self.dir):
-                full = os.path.join(self.dir, f)
-                if f.endswith('.idx') and not d.get(full):
-                    self.packs.append(PackIndex(full))
-                    d[full] = 1
-        log('MultiPackIndex: using %d index%s.\n' 
-            % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
-
-    def add(self, hash):
-        self.also[hash] = 1
-
-    def zap_also(self):
-        self.also = {}
-
-
-def calc_hash(type, content):
-    header = '%s %d\0' % (type, len(content))
-    sum = sha.sha(header)
-    sum.update(content)
-    return sum.digest()
-
-
-def _shalist_sort_key(ent):
-    (mode, name, id) = ent
-    if stat.S_ISDIR(int(mode, 8)):
-        return name + '/'
-    else:
-        return name
-
-
-def idxmerge(idxlist):
-    total = sum(len(i) for i in idxlist)
-    iters = (iter(i) for i in idxlist)
-    heap = [(next(it), it) for it in iters]
-    heapq.heapify(heap)
-    count = 0
-    last = None
-    while heap:
-        if (count % 10024) == 0:
-            progress('Reading indexes: %.2f%% (%d/%d)\r'
-                     % (count*100.0/total, count, total))
-        (e, it) = heap[0]
-        if e != last:
-            yield e
-            last = e
-        count += 1
-        e = next(it)
-        if e:
-            heapq.heapreplace(heap, (e, it))
-        else:
-            heapq.heappop(heap)
-    log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
-
-    
-class PackWriter:
-    def __init__(self, objcache_maker=None):
-        self.count = 0
-        self.outbytes = 0
-        self.filename = None
-        self.file = None
-        self.objcache_maker = objcache_maker
-        self.objcache = None
-
-    def __del__(self):
-        self.close()
-
-    def _make_objcache(self):
-        if not self.objcache:
-            if self.objcache_maker:
-                self.objcache = self.objcache_maker()
-            else:
-                self.objcache = MultiPackIndex(repo('objects/pack'))
-
-    def _open(self):
-        if not self.file:
-            self._make_objcache()
-            (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
-            self.file = os.fdopen(fd, 'w+b')
-            assert(name.endswith('.pack'))
-            self.filename = name[:-5]
-            self.file.write('PACK\0\0\0\2\0\0\0\0')
-
-    def _raw_write(self, datalist):
-        self._open()
-        f = self.file
-        for d in datalist:
-            f.write(d)
-            self.outbytes += len(d)
-        self.count += 1
-
-    def _write(self, bin, type, content):
-        if verbose:
-            log('>')
-        self._raw_write(_encode_packobj(type, content))
-        return bin
-
-    def breakpoint(self):
-        id = self._end()
-        self.outbytes = self.count = 0
-        return id
-
-    def write(self, type, content):
-        return self._write(calc_hash(type, content), type, content)
-
-    def exists(self, id):
-        if not self.objcache:
-            self._make_objcache()
-        return self.objcache.exists(id)
-
-    def maybe_write(self, type, content):
-        bin = calc_hash(type, content)
-        if not self.exists(bin):
-            self._write(bin, type, content)
-            self.objcache.add(bin)
-        return bin
-
-    def new_blob(self, blob):
-        return self.maybe_write('blob', blob)
-
-    def new_tree(self, shalist):
-        shalist = sorted(shalist, key = _shalist_sort_key)
-        l = []
-        for (mode,name,bin) in shalist:
-            assert(mode)
-            assert(mode != '0')
-            assert(mode[0] != '0')
-            assert(name)
-            assert(len(bin) == 20)
-            l.append('%s %s\0%s' % (mode,name,bin))
-        return self.maybe_write('tree', ''.join(l))
-
-    def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
-        l = []
-        if tree: l.append('tree %s' % tree.encode('hex'))
-        if parent: l.append('parent %s' % parent.encode('hex'))
-        if author: l.append('author %s %s' % (author, _git_date(adate)))
-        if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
-        l.append('')
-        l.append(msg)
-        return self.maybe_write('commit', '\n'.join(l))
-
-    def new_commit(self, parent, tree, msg):
-        now = time.time()
-        userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
-        commit = self._new_commit(tree, parent,
-                                  userline, now, userline, now,
-                                  msg)
-        return commit
-
-    def abort(self):
-        f = self.file
-        if f:
-            self.file = None
-            f.close()
-            os.unlink(self.filename + '.pack')
-
-    def _end(self):
-        f = self.file
-        if not f: return None
-        self.file = None
-        self.objcache = None
-
-        # update object count
-        f.seek(8)
-        cp = struct.pack('!i', self.count)
-        assert(len(cp) == 4)
-        f.write(cp)
-
-        # calculate the pack sha1sum
-        f.seek(0)
-        sum = sha.sha()
-        while 1:
-            b = f.read(65536)
-            sum.update(b)
-            if not b: break
-        f.write(sum.digest())
-        
-        f.close()
-
-        p = subprocess.Popen(['git', 'index-pack', '-v',
-                              '--index-version=2',
-                              self.filename + '.pack'],
-                             preexec_fn = _gitenv,
-                             stdout = subprocess.PIPE)
-        out = p.stdout.read().strip()
-        _git_wait('git index-pack', p)
-        if not out:
-            raise GitError('git index-pack produced no output')
-        nameprefix = repo('objects/pack/%s' % out)
-        if os.path.exists(self.filename + '.map'):
-            os.unlink(self.filename + '.map')
-        os.rename(self.filename + '.pack', nameprefix + '.pack')
-        os.rename(self.filename + '.idx', nameprefix + '.idx')
-        return nameprefix
-
-    def close(self):
-        return self._end()
-
-
-def _git_date(date):
-    return time.strftime('%s %z', time.localtime(date))
-
-
-def _gitenv():
-    os.environ['GIT_DIR'] = os.path.abspath(repo())
-
-
-def list_refs(refname = None):
-    argv = ['git', 'show-ref', '--']
-    if refname:
-        argv += [refname]
-    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
-    out = p.stdout.read().strip()
-    rv = p.wait()  # not fatal
-    if rv:
-        assert(not out)
-    if out:
-        for d in out.split('\n'):
-            (sha, name) = d.split(' ', 1)
-            yield (name, sha.decode('hex'))
-
-
-def read_ref(refname):
-    l = list(list_refs(refname))
-    if l:
-        assert(len(l) == 1)
-        return l[0][1]
-    else:
-        return None
-
-
-def rev_list(ref):
-    assert(not ref.startswith('-'))
-    argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
-    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
-    commit = None
-    for row in p.stdout:
-        s = row.strip()
-        if s.startswith('commit '):
-            commit = s[7:].decode('hex')
-        else:
-            date = int(s)
-            yield (date, commit)
-    rv = p.wait()  # not fatal
-    if rv:
-        raise GitError, 'git rev-list returned error %d' % rv
-
-
-def update_ref(refname, newval, oldval):
-    if not oldval:
-        oldval = ''
-    assert(refname.startswith('refs/heads/'))
-    p = subprocess.Popen(['git', 'update-ref', refname,
-                          newval.encode('hex'), oldval.encode('hex')],
-                         preexec_fn = _gitenv)
-    _git_wait('git update-ref', p)
-
-
-def guess_repo(path=None):
-    global repodir
-    if path:
-        repodir = path
-    if not repodir:
-        repodir = os.environ.get('BUP_DIR')
-        if not repodir:
-            repodir = os.path.expanduser('~/.bup')
-
-
-def init_repo(path=None):
-    guess_repo(path)
-    d = repo()
-    if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
-        raise GitError('"%d" exists but is not a directory\n' % d)
-    p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
-                         preexec_fn = _gitenv)
-    _git_wait('git init', p)
-    p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
-                         stdout=sys.stderr, preexec_fn = _gitenv)
-    _git_wait('git config', p)
-
-
-def check_repo_or_die(path=None):
-    guess_repo(path)
-    if not os.path.isdir(repo('objects/pack/.')):
-        if repodir == home_repodir:
-            init_repo()
-        else:
-            log('error: %r is not a bup/git repository\n' % repo())
-            sys.exit(15)
-
-
-def _treeparse(buf):
-    ofs = 0
-    while ofs < len(buf):
-        z = buf[ofs:].find('\0')
-        assert(z > 0)
-        spl = buf[ofs:ofs+z].split(' ', 1)
-        assert(len(spl) == 2)
-        sha = buf[ofs+z+1:ofs+z+1+20]
-        ofs += z+1+20
-        yield (spl[0], spl[1], sha)
-
-
-_ver = None
-def ver():
-    global _ver
-    if not _ver:
-        p = subprocess.Popen(['git', '--version'],
-                             stdout=subprocess.PIPE)
-        gvs = p.stdout.read()
-        _git_wait('git --version', p)
-        m = re.match(r'git version (\S+.\S+)', gvs)
-        if not m:
-            raise GitError('git --version weird output: %r' % gvs)
-        _ver = tuple(m.group(1).split('.'))
-    needed = ('1','5', '3', '1')
-    if _ver < needed:
-        raise GitError('git version %s or higher is required; you have %s'
-                       % ('.'.join(needed), '.'.join(_ver)))
-    return _ver
-
-
-def _git_wait(cmd, p):
-    rv = p.wait()
-    if rv != 0:
-        raise GitError('%s returned %d' % (cmd, rv))
-
-
-def _git_capture(argv):
-    p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
-    r = p.stdout.read()
-    _git_wait(repr(argv), p)
-    return r
-
-
-_ver_warned = 0
-class CatPipe:
-    def __init__(self):
-        global _ver_warned
-        wanted = ('1','5','6')
-        if ver() < wanted:
-            if not _ver_warned:
-                log('warning: git version < %s; bup will be slow.\n'
-                    % '.'.join(wanted))
-                _ver_warned = 1
-            self.get = self._slow_get
-        else:
-            self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
-                                      stdin=subprocess.PIPE, 
-                                      stdout=subprocess.PIPE,
-                                      preexec_fn = _gitenv)
-            self.get = self._fast_get
-            self.inprogress = None
-
-    def _fast_get(self, id):
-        if self.inprogress:
-            log('_fast_get: opening %r while %r is open' 
-                % (id, self.inprogress))
-        assert(not self.inprogress)
-        assert(id.find('\n') < 0)
-        assert(id.find('\r') < 0)
-        assert(id[0] != '-')
-        self.inprogress = id
-        self.p.stdin.write('%s\n' % id)
-        hdr = self.p.stdout.readline()
-        if hdr.endswith(' missing\n'):
-            raise KeyError('blob %r is missing' % id)
-        spl = hdr.split(' ')
-        if len(spl) != 3 or len(spl[0]) != 40:
-            raise GitError('expected blob, got %r' % spl)
-        (hex, type, size) = spl
-
-        def ondone():
-            assert(self.p.stdout.readline() == '\n')
-            self.inprogress = None
-
-        it = AutoFlushIter(chunkyreader(self.p.stdout, int(spl[2])),
-                           ondone = ondone)
-        yield type
-        for blob in it:
-            yield blob
-        del it
-
-    def _slow_get(self, id):
-        assert(id.find('\n') < 0)
-        assert(id.find('\r') < 0)
-        assert(id[0] != '-')
-        type = _git_capture(['git', 'cat-file', '-t', id]).strip()
-        yield type
-
-        p = subprocess.Popen(['git', 'cat-file', type, id],
-                             stdout=subprocess.PIPE,
-                             preexec_fn = _gitenv)
-        for blob in chunkyreader(p.stdout):
-            yield blob
-        _git_wait('git cat-file', p)
-
-    def _join(self, it):
-        type = it.next()
-        if type == 'blob':
-            for blob in it:
-                yield blob
-        elif type == 'tree':
-            treefile = ''.join(it)
-            for (mode, name, sha) in _treeparse(treefile):
-                for blob in self.join(sha.encode('hex')):
-                    yield blob
-        elif type == 'commit':
-            treeline = ''.join(it).split('\n')[0]
-            assert(treeline.startswith('tree '))
-            for blob in self.join(treeline[5:]):
-                yield blob
-        else:
-            raise GitError('invalid object type %r: expected blob/tree/commit'
-                           % type)
-
-    def join(self, id):
-        try:
-            for d in self._join(self.get(id)):
-                yield d
-        except StopIteration:
-            log('booger!\n')
-        
-
-def cat(id):
-    c = CatPipe()
-    for d in c.join(id):
-        yield d
diff --git a/hashsplit.py b/hashsplit.py
deleted file mode 100644 (file)
index f9bc48d..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-import sys, math
-import git, _hashsplit
-from helpers import *
-
-BLOB_LWM = 8192*2
-BLOB_MAX = BLOB_LWM*2
-BLOB_HWM = 1024*1024
-MAX_PER_TREE = 256
-progress_callback = None
-max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
-max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
-fanout = 16
-
-class Buf:
-    def __init__(self):
-        self.data = ''
-        self.start = 0
-
-    def put(self, s):
-        if s:
-            self.data = buffer(self.data, self.start) + s
-            self.start = 0
-            
-    def peek(self, count):
-        return buffer(self.data, self.start, count)
-    
-    def eat(self, count):
-        self.start += count
-
-    def get(self, count):
-        v = buffer(self.data, self.start, count)
-        self.start += count
-        return v
-
-    def used(self):
-        return len(self.data) - self.start
-
-
-def splitbuf(buf):
-    b = buf.peek(buf.used())
-    (ofs, bits) = _hashsplit.splitbuf(b)
-    if ofs:
-        buf.eat(ofs)
-        return (buffer(b, 0, ofs), bits)
-    return (None, 0)
-
-
-def blobiter(files):
-    for f in files:
-        while 1:
-            b = f.read(BLOB_HWM)
-            if not b:
-                break
-            yield b
-
-
-def drainbuf(buf, finalize):
-    while 1:
-        (blob, bits) = splitbuf(buf)
-        if blob:
-            yield (blob, bits)
-        else:
-            break
-    if buf.used() > BLOB_MAX:
-        # limit max blob size
-        yield (buf.get(buf.used()), 0)
-    elif finalize and buf.used():
-        yield (buf.get(buf.used()), 0)
-
-
-def hashsplit_iter(files):
-    assert(BLOB_HWM > BLOB_MAX)
-    buf = Buf()
-    fi = blobiter(files)
-    while 1:
-        for i in drainbuf(buf, finalize=False):
-            yield i
-        while buf.used() < BLOB_HWM:
-            bnew = next(fi)
-            if not bnew:
-                # eof
-                for i in drainbuf(buf, finalize=True):
-                    yield i
-                return
-            buf.put(bnew)
-
-
-total_split = 0
-def _split_to_blobs(w, files):
-    global total_split
-    for (blob, bits) in hashsplit_iter(files):
-        sha = w.new_blob(blob)
-        total_split += len(blob)
-        if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
-            w.breakpoint()
-        if progress_callback:
-            progress_callback(len(blob))
-        yield (sha, len(blob), bits)
-
-
-def _make_shalist(l):
-    ofs = 0
-    shalist = []
-    for (mode, sha, size) in l:
-        shalist.append((mode, '%016x' % ofs, sha))
-        ofs += size
-    total = ofs
-    return (shalist, total)
-
-
-def _squish(w, stacks, n):
-    i = 0
-    while i<n or len(stacks[i]) > MAX_PER_TREE:
-        while len(stacks) <= i+1:
-            stacks.append([])
-        if len(stacks[i]) == 1:
-            stacks[i+1] += stacks[i]
-        elif stacks[i]:
-            (shalist, size) = _make_shalist(stacks[i])
-            tree = w.new_tree(shalist)
-            stacks[i+1].append(('40000', tree, size))
-        stacks[i] = []
-        i += 1
-
-
-def split_to_shalist(w, files):
-    sl = _split_to_blobs(w, files)
-    if not fanout:
-        shal = []
-        for (sha,size,bits) in sl:
-            shal.append(('100644', sha, size))
-        return _make_shalist(shal)[0]
-    else:
-        base_bits = _hashsplit.blobbits()
-        fanout_bits = int(math.log(fanout, 2))
-        def bits_to_idx(n):
-            assert(n >= base_bits)
-            return (n - base_bits)/fanout_bits
-        stacks = [[]]
-        for (sha,size,bits) in sl:
-            assert(bits <= 32)
-            stacks[0].append(('100644', sha, size))
-            if bits > base_bits:
-                _squish(w, stacks, bits_to_idx(bits))
-        #log('stacks: %r\n' % [len(i) for i in stacks])
-        _squish(w, stacks, len(stacks)-1)
-        #log('stacks: %r\n' % [len(i) for i in stacks])
-        return _make_shalist(stacks[-1])[0]
-
-
-def split_to_blob_or_tree(w, files):
-    shalist = list(split_to_shalist(w, files))
-    if len(shalist) == 1:
-        return (shalist[0][0], shalist[0][2])
-    elif len(shalist) == 0:
-        return ('100644', w.new_blob(''))
-    else:
-        return ('40000', w.new_tree(shalist))
diff --git a/helpers.py b/helpers.py
deleted file mode 100644 (file)
index 75cf09c..0000000
+++ /dev/null
@@ -1,269 +0,0 @@
-import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
-
-
-def log(s):
-    sys.stderr.write(s)
-
-
-def mkdirp(d):
-    try:
-        os.makedirs(d)
-    except OSError, e:
-        if e.errno == errno.EEXIST:
-            pass
-        else:
-            raise
-
-
-def next(it):
-    try:
-        return it.next()
-    except StopIteration:
-        return None
-    
-    
-def unlink(f):
-    try:
-        os.unlink(f)
-    except OSError, e:
-        if e.errno == errno.ENOENT:
-            pass  # it doesn't exist, that's what you asked for
-
-
-def readpipe(argv):
-    p = subprocess.Popen(argv, stdout=subprocess.PIPE)
-    r = p.stdout.read()
-    p.wait()
-    return r
-
-
-# FIXME: this function isn't very generic, because it splits the filename
-# in an odd way and depends on a terminating '/' to indicate directories.
-# But it's used in a couple of places, so let's put it here.
-def pathsplit(p):
-    l = p.split('/')
-    l = [i+'/' for i in l[:-1]] + l[-1:]
-    if l[-1] == '':
-        l.pop()  # extra blank caused by terminating '/'
-    return l
-
-
-# like os.path.realpath, but doesn't follow a symlink for the last element.
-# (ie. if 'p' itself is itself a symlink, this one won't follow it)
-def realpath(p):
-    try:
-        st = os.lstat(p)
-    except OSError:
-        st = None
-    if st and stat.S_ISLNK(st.st_mode):
-        (dir, name) = os.path.split(p)
-        dir = os.path.realpath(dir)
-        out = os.path.join(dir, name)
-    else:
-        out = os.path.realpath(p)
-    #log('realpathing:%r,%r\n' % (p, out))
-    return out
-
-
-_username = None
-def username():
-    global _username
-    if not _username:
-        uid = os.getuid()
-        try:
-            _username = pwd.getpwuid(uid)[0]
-        except KeyError:
-            _username = 'user%d' % uid
-    return _username
-
-
-_userfullname = None
-def userfullname():
-    global _userfullname
-    if not _userfullname:
-        uid = os.getuid()
-        try:
-            _userfullname = pwd.getpwuid(uid)[4].split(',')[0]
-        except KeyError:
-            _userfullname = 'user%d' % uid
-    return _userfullname
-
-
-_hostname = None
-def hostname():
-    global _hostname
-    if not _hostname:
-        _hostname = socket.getfqdn()
-    return _hostname
-
-
-class NotOk(Exception):
-    pass
-
-class Conn:
-    def __init__(self, inp, outp):
-        self.inp = inp
-        self.outp = outp
-
-    def read(self, size):
-        self.outp.flush()
-        return self.inp.read(size)
-
-    def readline(self):
-        self.outp.flush()
-        return self.inp.readline()
-
-    def write(self, data):
-        #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
-        self.outp.write(data)
-
-    def has_input(self):
-        [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
-        if rl:
-            assert(rl[0] == self.inp.fileno())
-            return True
-        else:
-            return None
-
-    def ok(self):
-        self.write('\nok\n')
-
-    def error(self, s):
-        s = re.sub(r'\s+', ' ', str(s))
-        self.write('\nerror %s\n' % s)
-
-    def _check_ok(self, onempty):
-        self.outp.flush()
-        rl = ''
-        for rl in linereader(self.inp):
-            #log('%d got line: %r\n' % (os.getpid(), rl))
-            if not rl:  # empty line
-                continue
-            elif rl == 'ok':
-                return None
-            elif rl.startswith('error '):
-                #log('client: error: %s\n' % rl[6:])
-                return NotOk(rl[6:])
-            else:
-                onempty(rl)
-        raise Exception('server exited unexpectedly; see errors above')
-
-    def drain_and_check_ok(self):
-        def onempty(rl):
-            pass
-        return self._check_ok(onempty)
-
-    def check_ok(self):
-        def onempty(rl):
-            raise Exception('expected "ok", got %r' % rl)
-        return self._check_ok(onempty)
-
-
-def linereader(f):
-    while 1:
-        line = f.readline()
-        if not line:
-            break
-        yield line[:-1]
-
-
-def chunkyreader(f, count = None):
-    if count != None:
-        while count > 0:
-            b = f.read(min(count, 65536))
-            if not b:
-                raise IOError('EOF with %d bytes remaining' % count)
-            yield b
-            count -= len(b)
-    else:
-        while 1:
-            b = f.read(65536)
-            if not b: break
-            yield b
-
-
-class AutoFlushIter:
-    def __init__(self, it, ondone = None):
-        self.it = it
-        self.ondone = ondone
-
-    def __iter__(self):
-        return self
-        
-    def next(self):
-        return self.it.next()
-        
-    def __del__(self):
-        for i in self.it:
-            pass
-        if self.ondone:
-            self.ondone()
-
-
-def slashappend(s):
-    if s and not s.endswith('/'):
-        return s + '/'
-    else:
-        return s
-
-
-def _mmap_do(f, len, flags, prot):
-    if not len:
-        st = os.fstat(f.fileno())
-        len = st.st_size
-    map = mmap.mmap(f.fileno(), len, flags, prot)
-    f.close()  # map will persist beyond file close
-    return map
-
-
-def mmap_read(f, len = 0):
-    return _mmap_do(f, len, mmap.MAP_PRIVATE, mmap.PROT_READ)
-
-
-def mmap_readwrite(f, len = 0):
-    return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
-
-
-def parse_num(s):
-    g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
-    if not g:
-        raise ValueError("can't parse %r as a number" % s)
-    (val, unit) = g.groups()
-    num = float(val)
-    unit = unit.lower()
-    if unit in ['t', 'tb']:
-        mult = 1024*1024*1024*1024
-    elif unit in ['g', 'gb']:
-        mult = 1024*1024*1024
-    elif unit in ['m', 'mb']:
-        mult = 1024*1024
-    elif unit in ['k', 'kb']:
-        mult = 1024
-    elif unit in ['', 'b']:
-        mult = 1
-    else:
-        raise ValueError("invalid unit %r in number %r" % (unit, s))
-    return int(num*mult)
-
-
-# count the number of elements in an iterator (consumes the iterator)
-def count(l):
-    return reduce(lambda x,y: x+1, l)
-
-
-def atoi(s):
-    try:
-        return int(s or '0')
-    except ValueError:
-        return 0
-
-
-saved_errors = []
-def add_error(e):
-    saved_errors.append(e)
-    log('%-70s\n' % e)
-
-istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY'))
-def progress(s):
-    if istty:
-        log(s)
diff --git a/index.py b/index.py
deleted file mode 100644 (file)
index 536af8f..0000000
--- a/index.py
+++ /dev/null
@@ -1,426 +0,0 @@
-import os, stat, time, struct, tempfile
-from helpers import *
-
-EMPTY_SHA = '\0'*20
-FAKE_SHA = '\x01'*20
-INDEX_HDR = 'BUPI\0\0\0\2'
-INDEX_SIG = '!IIIIIQII20sHII'
-ENTLEN = struct.calcsize(INDEX_SIG)
-FOOTER_SIG = '!Q'
-FOOTLEN = struct.calcsize(FOOTER_SIG)
-
-IX_EXISTS = 0x8000
-IX_HASHVALID = 0x4000
-
-class Error(Exception):
-    pass
-
-
-class Level:
-    def __init__(self, ename, parent):
-        self.parent = parent
-        self.ename = ename
-        self.list = []
-        self.count = 0
-
-    def write(self, f):
-        (ofs,n) = (f.tell(), len(self.list))
-        if self.list:
-            count = len(self.list)
-            #log('popping %r with %d entries\n' 
-            #    % (''.join(self.ename), count))
-            for e in self.list:
-                e.write(f)
-            if self.parent:
-                self.parent.count += count + self.count
-        return (ofs,n)
-
-
-def _golevel(level, f, ename, newentry):
-    # close nodes back up the tree
-    assert(level)
-    while ename[:len(level.ename)] != level.ename:
-        n = BlankNewEntry(level.ename[-1])
-        (n.children_ofs,n.children_n) = level.write(f)
-        level.parent.list.append(n)
-        level = level.parent
-
-    # create nodes down the tree
-    while len(level.ename) < len(ename):
-        level = Level(ename[:len(level.ename)+1], level)
-
-    # are we in precisely the right place?
-    assert(ename == level.ename)
-    n = newentry or BlankNewEntry(ename and level.ename[-1] or None)
-    (n.children_ofs,n.children_n) = level.write(f)
-    if level.parent:
-        level.parent.list.append(n)
-    level = level.parent
-
-    return level
-
-
-class Entry:
-    def __init__(self, basename, name):
-        self.basename = str(basename)
-        self.name = str(name)
-        self.children_ofs = 0
-        self.children_n = 0
-
-    def __repr__(self):
-        return ("(%s,0x%04x,%d,%d,%d,%d,%d,%s/%s,0x%04x,0x%08x/%d)" 
-                % (self.name, self.dev,
-                   self.ctime, self.mtime, self.uid, self.gid,
-                   self.size, oct(self.mode), oct(self.gitmode),
-                   self.flags, self.children_ofs, self.children_n))
-
-    def packed(self):
-        return struct.pack(INDEX_SIG,
-                           self.dev, self.ctime, self.mtime, 
-                           self.uid, self.gid, self.size, self.mode,
-                           self.gitmode, self.sha, self.flags,
-                           self.children_ofs, self.children_n)
-
-    def from_stat(self, st, tstart):
-        old = (self.dev, self.ctime, self.mtime,
-               self.uid, self.gid, self.size, self.flags & IX_EXISTS)
-        new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
-               st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
-        self.dev = st.st_dev
-        self.ctime = int(st.st_ctime)
-        self.mtime = int(st.st_mtime)
-        self.uid = st.st_uid
-        self.gid = st.st_gid
-        self.size = st.st_size
-        self.mode = st.st_mode
-        self.flags |= IX_EXISTS
-        if int(st.st_ctime) >= tstart or old != new \
-              or self.sha == EMPTY_SHA or not self.gitmode:
-            self.invalidate()
-
-    def is_valid(self):
-        f = IX_HASHVALID|IX_EXISTS
-        return (self.flags & f) == f
-
-    def invalidate(self):
-        self.flags &= ~IX_HASHVALID
-
-    def validate(self, gitmode, sha):
-        assert(sha)
-        assert(gitmode)
-        self.gitmode = gitmode
-        self.sha = sha
-        self.flags |= IX_HASHVALID|IX_EXISTS
-
-    def exists(self):
-        return not self.is_deleted()
-
-    def is_deleted(self):
-        return (self.flags & IX_EXISTS) == 0
-
-    def set_deleted(self):
-        if self.flags & IX_EXISTS:
-            self.flags &= ~(IX_EXISTS | IX_HASHVALID)
-
-    def is_real(self):
-        return not self.is_fake()
-
-    def is_fake(self):
-        return not self.ctime
-
-    def __cmp__(a, b):
-        return (cmp(a.name, b.name)
-                or -cmp(a.is_valid(), b.is_valid())
-                or -cmp(a.is_fake(), b.is_fake()))
-
-    def write(self, f):
-        f.write(self.basename + '\0' + self.packed())
-
-
-class NewEntry(Entry):
-    def __init__(self, basename, name, dev, ctime, mtime, uid, gid,
-                 size, mode, gitmode, sha, flags, children_ofs, children_n):
-        Entry.__init__(self, basename, name)
-        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
-         self.size, self.mode, self.gitmode, self.sha,
-         self.flags, self.children_ofs, self.children_n
-         ) = (dev, int(ctime), int(mtime), uid, gid,
-              size, mode, gitmode, sha, flags, children_ofs, children_n)
-
-
-class BlankNewEntry(NewEntry):
-    def __init__(self, basename):
-        NewEntry.__init__(self, basename, basename,
-                          0, 0, 0, 0, 0, 0, 0,
-                          0, EMPTY_SHA, 0, 0, 0)
-
-
-class ExistingEntry(Entry):
-    def __init__(self, parent, basename, name, m, ofs):
-        Entry.__init__(self, basename, name)
-        self.parent = parent
-        self._m = m
-        self._ofs = ofs
-        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
-         self.size, self.mode, self.gitmode, self.sha,
-         self.flags, self.children_ofs, self.children_n
-         ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN)))
-
-    def repack(self):
-        self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
-        if self.parent and not self.is_valid():
-            self.parent.invalidate()
-            self.parent.repack()
-
-    def iter(self, name=None, wantrecurse=None):
-        dname = name
-        if dname and not dname.endswith('/'):
-            dname += '/'
-        ofs = self.children_ofs
-        assert(ofs <= len(self._m))
-        assert(self.children_n < 1000000)
-        for i in xrange(self.children_n):
-            eon = self._m.find('\0', ofs)
-            assert(eon >= 0)
-            assert(eon >= ofs)
-            assert(eon > ofs)
-            basename = str(buffer(self._m, ofs, eon-ofs))
-            child = ExistingEntry(self, basename, self.name + basename,
-                                  self._m, eon+1)
-            if (not dname
-                 or child.name.startswith(dname)
-                 or child.name.endswith('/') and dname.startswith(child.name)):
-                if not wantrecurse or wantrecurse(child):
-                    for e in child.iter(name=name, wantrecurse=wantrecurse):
-                        yield e
-            if not name or child.name == name or child.name.startswith(dname):
-                yield child
-            ofs = eon + 1 + ENTLEN
-
-    def __iter__(self):
-        return self.iter()
-            
-
-class Reader:
-    def __init__(self, filename):
-        self.filename = filename
-        self.m = ''
-        self.writable = False
-        self.count = 0
-        f = None
-        try:
-            f = open(filename, 'r+')
-        except IOError, e:
-            if e.errno == errno.ENOENT:
-                pass
-            else:
-                raise
-        if f:
-            b = f.read(len(INDEX_HDR))
-            if b != INDEX_HDR:
-                log('warning: %s: header: expected %r, got %r'
-                                 % (filename, INDEX_HDR, b))
-            else:
-                st = os.fstat(f.fileno())
-                if st.st_size:
-                    self.m = mmap_readwrite(f)
-                    self.writable = True
-                    self.count = struct.unpack(FOOTER_SIG,
-                          str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
-
-    def __del__(self):
-        self.close()
-
-    def __len__(self):
-        return int(self.count)
-
-    def forward_iter(self):
-        ofs = len(INDEX_HDR)
-        while ofs+ENTLEN <= len(self.m)-FOOTLEN:
-            eon = self.m.find('\0', ofs)
-            assert(eon >= 0)
-            assert(eon >= ofs)
-            assert(eon > ofs)
-            basename = str(buffer(self.m, ofs, eon-ofs))
-            yield ExistingEntry(None, basename, basename, self.m, eon+1)
-            ofs = eon + 1 + ENTLEN
-
-    def iter(self, name=None, wantrecurse=None):
-        if len(self.m) > len(INDEX_HDR)+ENTLEN:
-            dname = name
-            if dname and not dname.endswith('/'):
-                dname += '/'
-            root = ExistingEntry(None, '/', '/',
-                                 self.m, len(self.m)-FOOTLEN-ENTLEN)
-            for sub in root.iter(name=name, wantrecurse=wantrecurse):
-                yield sub
-            if not dname or dname == root.name:
-                yield root
-
-    def __iter__(self):
-        return self.iter()
-
-    def exists(self):
-        return self.m
-
-    def save(self):
-        if self.writable and self.m:
-            self.m.flush()
-
-    def close(self):
-        self.save()
-        if self.writable and self.m:
-            self.m = None
-            self.writable = False
-
-    def filter(self, prefixes, wantrecurse=None):
-        for (rp, path) in reduce_paths(prefixes):
-            for e in self.iter(rp, wantrecurse=wantrecurse):
-                assert(e.name.startswith(rp))
-                name = path + e.name[len(rp):]
-                yield (name, e)
-
-
-class Writer:
-    def __init__(self, filename):
-        self.rootlevel = self.level = Level([], None)
-        self.f = None
-        self.count = 0
-        self.lastfile = None
-        self.filename = None
-        self.filename = filename = realpath(filename)
-        (dir,name) = os.path.split(filename)
-        (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
-        self.f = os.fdopen(ffd, 'wb', 65536)
-        self.f.write(INDEX_HDR)
-
-    def __del__(self):
-        self.abort()
-
-    def abort(self):
-        f = self.f
-        self.f = None
-        if f:
-            f.close()
-            os.unlink(self.tmpname)
-
-    def flush(self):
-        if self.level:
-            self.level = _golevel(self.level, self.f, [], None)
-            self.count = self.rootlevel.count
-            if self.count:
-                self.count += 1
-            self.f.write(struct.pack(FOOTER_SIG, self.count))
-            self.f.flush()
-        assert(self.level == None)
-
-    def close(self):
-        self.flush()
-        f = self.f
-        self.f = None
-        if f:
-            f.close()
-            os.rename(self.tmpname, self.filename)
-
-    def _add(self, ename, entry):
-        if self.lastfile and self.lastfile <= ename:
-            raise Error('%r must come before %r' 
-                             % (''.join(e.name), ''.join(self.lastfile)))
-            self.lastfile = e.name
-        self.level = _golevel(self.level, self.f, ename, entry)
-
-    def add(self, name, st, hashgen = None):
-        endswith = name.endswith('/')
-        ename = pathsplit(name)
-        basename = ename[-1]
-        #log('add: %r %r\n' % (basename, name))
-        flags = IX_EXISTS
-        sha = None
-        if hashgen:
-            (gitmode, sha) = hashgen(name)
-            flags |= IX_HASHVALID
-        else:
-            (gitmode, sha) = (0, EMPTY_SHA)
-        if st:
-            isdir = stat.S_ISDIR(st.st_mode)
-            assert(isdir == endswith)
-            e = NewEntry(basename, name, st.st_dev, int(st.st_ctime),
-                         int(st.st_mtime), st.st_uid, st.st_gid,
-                         st.st_size, st.st_mode, gitmode, sha, flags,
-                         0, 0)
-        else:
-            assert(endswith)
-            e = BlankNewEntry(basename)
-            e.gitmode = gitmode
-            e.sha = sha
-            e.flags = flags
-        self._add(ename, e)
-
-    def add_ixentry(self, e):
-        e.children_ofs = e.children_n = 0
-        self._add(pathsplit(e.name), e)
-
-    def new_reader(self):
-        self.flush()
-        return Reader(self.tmpname)
-
-
-def reduce_paths(paths):
-    xpaths = []
-    for p in paths:
-        rp = realpath(p)
-        try:
-            st = os.lstat(rp)
-            if stat.S_ISDIR(st.st_mode):
-                rp = slashappend(rp)
-                p = slashappend(p)
-        except OSError, e:
-            if e.errno != errno.ENOENT:
-                raise
-        xpaths.append((rp, p))
-    xpaths.sort()
-
-    paths = []
-    prev = None
-    for (rp, p) in xpaths:
-        if prev and (prev == rp 
-                     or (prev.endswith('/') and rp.startswith(prev))):
-            continue # already superceded by previous path
-        paths.append((rp, p))
-        prev = rp
-    paths.sort(reverse=True)
-    return paths
-
-
-class MergeIter:
-    def __init__(self, iters):
-        self.iters = iters
-
-    def __len__(self):
-        # FIXME: doesn't remove duplicated entries between iters.
-        # That only happens for parent directories, but will mean the
-        # actual iteration returns fewer entries than this function counts.
-        return sum(len(it) for it in self.iters)
-
-    def __iter__(self):
-        total = len(self)
-        l = [iter(it) for it in self.iters]
-        l = [(next(it),it) for it in l]
-        l = filter(lambda x: x[0], l)
-        count = 0
-        lastname = None
-        while l:
-            if not (count % 1024):
-                progress('bup: merging indexes (%d/%d)\r' % (count, total))
-            l.sort()
-            (e,it) = l.pop()
-            if not e:
-                continue
-            if e.name != lastname:
-                yield e
-                lastname = e.name
-            n = next(it)
-            if n:
-                l.append((n,it))
-            count += 1
-        log('bup: merging indexes (%d/%d), done.\n' % (count, total))
diff --git a/lib/bup/__init__.py b/lib/bup/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/lib/bup/_hashsplit.c b/lib/bup/_hashsplit.c
new file mode 100644 (file)
index 0000000..e78f597
--- /dev/null
@@ -0,0 +1,145 @@
+#include <Python.h>
+#include <assert.h>
+#include <stdint.h>
+
+#define BLOBBITS (13)
+#define BLOBSIZE (1<<BLOBBITS)
+#define WINDOWBITS (7)
+#define WINDOWSIZE (1<<(WINDOWBITS-1))
+
+
+// FIXME: replace this with a not-stupid rolling checksum algorithm,
+// such as the one used in rsync (Adler32?)
+static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
+{
+    return ((old<<1) | (old>>31)) ^ drop ^ add;
+}
+
+
+static int find_ofs(const unsigned char *buf, int len, int *bits)
+{
+    unsigned char window[WINDOWSIZE];
+    uint32_t sum = 0;
+    int i = 0, count;
+    memset(window, 0, sizeof(window));
+    
+    for (count = 0; count < len; count++)
+    {
+       sum = stupidsum_add(sum, window[i], buf[count]);
+       window[i] = buf[count];
+       i = (i + 1) % WINDOWSIZE;
+       if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
+       {
+           if (bits)
+           {
+               *bits = BLOBBITS;
+               sum >>= BLOBBITS;
+               for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
+                   ;
+           }
+           return count+1;
+       }
+    }
+    return 0;
+}
+
+
+static PyObject *blobbits(PyObject *self, PyObject *args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+       return NULL;
+    return Py_BuildValue("i", BLOBBITS);
+}
+
+
+static PyObject *splitbuf(PyObject *self, PyObject *args)
+{
+    unsigned char *buf = NULL;
+    int len = 0, out = 0, bits = -1;
+
+    if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+       return NULL;
+    out = find_ofs(buf, len, &bits);
+    return Py_BuildValue("ii", out, bits);
+}
+
+
+static PyObject *bitmatch(PyObject *self, PyObject *args)
+{
+    unsigned char *buf1 = NULL, *buf2 = NULL;
+    int len1 = 0, len2 = 0;
+    int byte, bit;
+
+    if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
+       return NULL;
+    
+    bit = 0;
+    for (byte = 0; byte < len1 && byte < len2; byte++)
+    {
+       int b1 = buf1[byte], b2 = buf2[byte];
+       if (b1 != b2)
+       {
+           for (bit = 0; bit < 8; bit++)
+               if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
+                   break;
+           break;
+       }
+    }
+    
+    return Py_BuildValue("i", byte*8 + bit);
+}
+
+
+// I would have made this a lower-level function that just fills in a buffer
+// with random values, and then written those values from python.  But that's
+// about 20% slower in my tests, and since we typically generate random
+// numbers for benchmarking other parts of bup, any slowness in generating
+// random bytes will make our benchmarks inaccurate.  Plus nobody wants
+// pseudorandom bytes much except for this anyway.
+static PyObject *write_random(PyObject *self, PyObject *args)
+{
+    uint32_t buf[1024/4];
+    int fd = -1, seed = 0;
+    ssize_t ret;
+    long long len = 0, kbytes = 0, written = 0;
+
+    if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+       return NULL;
+    
+    srandom(seed);
+    
+    for (kbytes = len/1024; kbytes > 0; kbytes--)
+    {
+       int i;
+       for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
+           buf[i] = random();
+       ret = write(fd, buf, sizeof(buf));
+       if (ret < 0)
+           ret = 0;
+       written += ret;
+       if (ret < sizeof(buf))
+           break;
+       if (!(kbytes%1024))
+           fprintf(stderr, ".");
+    }
+    
+    return Py_BuildValue("L", written);
+}
+
+
+static PyMethodDef hashsplit_methods[] = {
+    { "blobbits", blobbits, METH_VARARGS,
+       "Return the number of bits in the rolling checksum." },
+    { "splitbuf", splitbuf, METH_VARARGS,
+       "Split a list of strings based on a rolling checksum." },
+    { "bitmatch", bitmatch, METH_VARARGS,
+       "Count the number of matching prefix bits between two strings." },
+    { "write_random", write_random, METH_VARARGS,
+       "Write random bytes to the given file descriptor" },
+    { NULL, NULL, 0, NULL },  // sentinel
+};
+
+PyMODINIT_FUNC init_hashsplit(void)
+{
+    Py_InitModule("_hashsplit", hashsplit_methods);
+}
diff --git a/lib/bup/client.py b/lib/bup/client.py
new file mode 100644 (file)
index 0000000..6df1358
--- /dev/null
@@ -0,0 +1,258 @@
+import re, struct, errno, select
+from bup import git
+from bup.helpers import *
+from subprocess import Popen, PIPE
+
+
+class ClientError(Exception):
+    pass
+
+
+class Client:
+    def __init__(self, remote, create=False):
+        self._busy = None
+        self.p = None
+        self.conn = None
+        rs = remote.split(':', 1)
+        nicedir = os.path.split(os.path.abspath(sys.argv[0]))[0]
+        nicedir = re.sub(r':', "_", nicedir)
+        if len(rs) == 1:
+            (host, dir) = ('NONE', remote)
+            def fixenv():
+                os.environ['PATH'] = ':'.join([nicedir,
+                                               os.environ.get('PATH', '')])
+            argv = ['bup', 'server']
+        else:
+            (host, dir) = rs
+            fixenv = None
+            # WARNING: shell quoting security holes are possible here, so we
+            # have to be super careful.  We have to use 'sh -c' because
+            # csh-derived shells can't handle PATH= notation.  We can't
+            # set PATH in advance, because ssh probably replaces it.  We
+            # can't exec *safely* using argv, because *both* ssh and 'sh -c'
+            # allow shellquoting.  So we end up having to double-shellquote
+            # stuff here.
+            escapedir = re.sub(r'([^\w/])', r'\\\\\\\1', nicedir)
+            cmd = r"""
+                       sh -c PATH=%s:'$PATH bup server'
+                   """ % escapedir
+            argv = ['ssh', host, '--', cmd.strip()]
+            #log('argv is: %r\n' % argv)
+        (self.host, self.dir) = (host, dir)
+        self.cachedir = git.repo('index-cache/%s'
+                                 % re.sub(r'[^@\w]', '_', 
+                                          "%s:%s" % (host, dir)))
+        try:
+            self.p = p = Popen(argv, stdin=PIPE, stdout=PIPE, preexec_fn=fixenv)
+        except OSError, e:
+            raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
+        self.conn = conn = Conn(p.stdout, p.stdin)
+        if dir:
+            dir = re.sub(r'[\r\n]', ' ', dir)
+            if create:
+                conn.write('init-dir %s\n' % dir)
+            else:
+                conn.write('set-dir %s\n' % dir)
+            self.check_ok()
+        self.sync_indexes_del()
+
+    def __del__(self):
+        try:
+            self.close()
+        except IOError, e:
+            if e.errno == errno.EPIPE:
+                pass
+            else:
+                raise
+
+    def close(self):
+        if self.conn and not self._busy:
+            self.conn.write('quit\n')
+        if self.p:
+            self.p.stdin.close()
+            while self.p.stdout.read(65536):
+                pass
+            self.p.stdout.close()
+            self.p.wait()
+            rv = self.p.wait()
+            if rv:
+                raise ClientError('server tunnel returned exit code %d' % rv)
+        self.conn = None
+        self.p = None
+
+    def check_ok(self):
+        rv = self.p.poll()
+        if rv != None:
+            raise ClientError('server exited unexpectedly with code %r' % rv)
+        try:
+            return self.conn.check_ok()
+        except Exception, e:
+            raise ClientError, e, sys.exc_info()[2]
+
+    def check_busy(self):
+        if self._busy:
+            raise ClientError('already busy with command %r' % self._busy)
+        
+    def _not_busy(self):
+        self._busy = None
+
+    def sync_indexes_del(self):
+        self.check_busy()
+        conn = self.conn
+        conn.write('list-indexes\n')
+        packdir = git.repo('objects/pack')
+        all = {}
+        needed = {}
+        for line in linereader(conn):
+            if not line:
+                break
+            all[line] = 1
+            assert(line.find('/') < 0)
+            if not os.path.exists(os.path.join(self.cachedir, line)):
+                needed[line] = 1
+        self.check_ok()
+
+        mkdirp(self.cachedir)
+        for f in os.listdir(self.cachedir):
+            if f.endswith('.idx') and not f in all:
+                log('pruning old index: %r\n' % f)
+                os.unlink(os.path.join(self.cachedir, f))
+
+    def sync_index(self, name):
+        #log('requesting %r\n' % name)
+        mkdirp(self.cachedir)
+        self.conn.write('send-index %s\n' % name)
+        n = struct.unpack('!I', self.conn.read(4))[0]
+        assert(n)
+        fn = os.path.join(self.cachedir, name)
+        f = open(fn + '.tmp', 'w')
+        count = 0
+        progress('Receiving index: %d/%d\r' % (count, n))
+        for b in chunkyreader(self.conn, n):
+            f.write(b)
+            count += len(b)
+            progress('Receiving index: %d/%d\r' % (count, n))
+        progress('Receiving index: %d/%d, done.\n' % (count, n))
+        self.check_ok()
+        f.close()
+        os.rename(fn + '.tmp', fn)
+
+    def _make_objcache(self):
+        ob = self._busy
+        self._busy = None
+        #self.sync_indexes()
+        self._busy = ob
+        return git.MultiPackIndex(self.cachedir)
+
+    def _suggest_pack(self, indexname):
+        log('received index suggestion: %s\n' % indexname)
+        ob = self._busy
+        if ob:
+            assert(ob == 'receive-objects')
+            self._busy = None
+            self.conn.write('\xff\xff\xff\xff')  # suspend receive-objects
+            self.conn.drain_and_check_ok()
+        self.sync_index(indexname)
+        if ob:
+            self.conn.write('receive-objects\n')
+            self._busy = ob
+
+    def new_packwriter(self):
+        self.check_busy()
+        self._busy = 'receive-objects'
+        return PackWriter_Remote(self.conn,
+                                 objcache_maker = self._make_objcache,
+                                 suggest_pack = self._suggest_pack,
+                                 onclose = self._not_busy)
+
+    def read_ref(self, refname):
+        self.check_busy()
+        self.conn.write('read-ref %s\n' % refname)
+        r = self.conn.readline().strip()
+        self.check_ok()
+        if r:
+            assert(len(r) == 40)   # hexified sha
+            return r.decode('hex')
+        else:
+            return None   # nonexistent ref
+
+    def update_ref(self, refname, newval, oldval):
+        self.check_busy()
+        self.conn.write('update-ref %s\n%s\n%s\n' 
+                        % (refname, newval.encode('hex'),
+                           (oldval or '').encode('hex')))
+        self.check_ok()
+
+    def cat(self, id):
+        self.check_busy()
+        self._busy = 'cat'
+        self.conn.write('cat %s\n' % re.sub(r'[\n\r]', '_', id))
+        while 1:
+            sz = struct.unpack('!I', self.conn.read(4))[0]
+            if not sz: break
+            yield self.conn.read(sz)
+        e = self.check_ok()
+        self._not_busy()
+        if e:
+            raise KeyError(str(e))
+
+
+class PackWriter_Remote(git.PackWriter):
+    def __init__(self, conn, objcache_maker, suggest_pack, onclose):
+        git.PackWriter.__init__(self, objcache_maker)
+        self.file = conn
+        self.filename = 'remote socket'
+        self.suggest_pack = suggest_pack
+        self.onclose = onclose
+        self._packopen = False
+
+    def _open(self):
+        if not self._packopen:
+            self._make_objcache()
+            self.file.write('receive-objects\n')
+            self._packopen = True
+
+    def _end(self):
+        if self._packopen and self.file:
+            self.file.write('\0\0\0\0')
+            self._packopen = False
+            while True:
+                line = self.file.readline().strip()
+                if line.startswith('index '):
+                    pass
+                else:
+                    break
+            id = line
+            self.file.check_ok()
+            self.objcache = None
+            if self.onclose:
+                self.onclose()
+            if self.suggest_pack:
+                self.suggest_pack(id)
+            return id
+
+    def close(self):
+        id = self._end()
+        self.file = None
+        return id
+
+    def abort(self):
+        raise GitError("don't know how to abort remote pack writing")
+
+    def _raw_write(self, datalist):
+        assert(self.file)
+        if not self._packopen:
+            self._open()
+        data = ''.join(datalist)
+        assert(len(data))
+        self.file.write(struct.pack('!I', len(data)) + data)
+        self.outbytes += len(data)
+        self.count += 1
+
+        if self.file.has_input():
+            line = self.file.readline().strip()
+            assert(line.startswith('index '))
+            idxname = line[6:]
+            if self.suggest_pack:
+                self.suggest_pack(idxname)
+                self.objcache.refresh()
diff --git a/lib/bup/csetup.py b/lib/bup/csetup.py
new file mode 100644 (file)
index 0000000..b58932c
--- /dev/null
@@ -0,0 +1,8 @@
+from distutils.core import setup, Extension
+
+_hashsplit_mod = Extension('_hashsplit', sources=['_hashsplit.c'])
+
+setup(name='_hashsplit',
+      version='0.1',
+      description='hashsplit helper library for bup',
+      ext_modules=[_hashsplit_mod])
diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py
new file mode 100644 (file)
index 0000000..c3daaa8
--- /dev/null
@@ -0,0 +1,101 @@
+import stat, heapq
+from bup.helpers import *
+
+try:
+    O_LARGEFILE = os.O_LARGEFILE
+except AttributeError:
+    O_LARGEFILE = 0
+
+
+# the use of fchdir() and lstat() is for two reasons:
+#  - help out the kernel by not making it repeatedly look up the absolute path
+#  - avoid race conditions caused by doing listdir() on a changing symlink
+class OsFile:
+    def __init__(self, path):
+        self.fd = None
+        self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
+        
+    def __del__(self):
+        if self.fd:
+            fd = self.fd
+            self.fd = None
+            os.close(fd)
+
+    def fchdir(self):
+        os.fchdir(self.fd)
+
+    def stat(self):
+        return os.fstat(self.fd)
+
+
+_IFMT = stat.S_IFMT(0xffffffff)  # avoid function call in inner loop
+def _dirlist():
+    l = []
+    for n in os.listdir('.'):
+        try:
+            st = os.lstat(n)
+        except OSError, e:
+            add_error(Exception('%s: %s' % (realpath(n), str(e))))
+            continue
+        if (st.st_mode & _IFMT) == stat.S_IFDIR:
+            n += '/'
+        l.append((n,st))
+    l.sort(reverse=True)
+    return l
+
+
+def _recursive_dirlist(prepend, xdev):
+    for (name,pst) in _dirlist():
+        if name.endswith('/'):
+            if xdev != None and pst.st_dev != xdev:
+                log('Skipping %r: different filesystem.\n' % (prepend+name))
+                continue
+            try:
+                OsFile(name).fchdir()
+            except OSError, e:
+                add_error('%s: %s' % (prepend, e))
+            else:
+                for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev):
+                    yield i
+                os.chdir('..')
+        yield (prepend + name, pst)
+
+
+def recursive_dirlist(paths, xdev):
+    startdir = OsFile('.')
+    try:
+        assert(type(paths) != type(''))
+        for path in paths:
+            try:
+                pst = os.lstat(path)
+                if stat.S_ISLNK(pst.st_mode):
+                    yield (path, pst)
+                    continue
+            except OSError, e:
+                add_error(e)
+                continue
+            try:
+                pfile = OsFile(path)
+            except OSError, e:
+                add_error(e)
+                continue
+            pst = pfile.stat()
+            if xdev:
+                xdev = pst.st_dev
+            else:
+                xdev = None
+            if stat.S_ISDIR(pst.st_mode):
+                pfile.fchdir()
+                prepend = os.path.join(path, '')
+                for i in _recursive_dirlist(prepend=prepend, xdev=xdev):
+                    yield i
+                startdir.fchdir()
+            else:
+                prepend = path
+            yield (prepend,pst)
+    except:
+        try:
+            startdir.fchdir()
+        except:
+            pass
+        raise
diff --git a/lib/bup/git.py b/lib/bup/git.py
new file mode 100644 (file)
index 0000000..77e90bf
--- /dev/null
@@ -0,0 +1,696 @@
+import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
+import heapq
+from bup.helpers import *
+
+verbose = 0
+ignore_midx = 0
+home_repodir = os.path.expanduser('~/.bup')
+repodir = None
+
+_typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
+_typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
+
+
+class GitError(Exception):
+    pass
+
+
+def repo(sub = ''):
+    global repodir
+    if not repodir:
+        raise GitError('You should call check_repo_or_die()')
+    gd = os.path.join(repodir, '.git')
+    if os.path.exists(gd):
+        repodir = gd
+    return os.path.join(repodir, sub)
+
+
+def _encode_packobj(type, content):
+    szout = ''
+    sz = len(content)
+    szbits = (sz & 0x0f) | (_typemap[type]<<4)
+    sz >>= 4
+    while 1:
+        if sz: szbits |= 0x80
+        szout += chr(szbits)
+        if not sz:
+            break
+        szbits = sz & 0x7f
+        sz >>= 7
+    z = zlib.compressobj(1)
+    yield szout
+    yield z.compress(content)
+    yield z.flush()
+
+
+def _encode_looseobj(type, content):
+    z = zlib.compressobj(1)
+    yield z.compress('%s %d\0' % (type, len(content)))
+    yield z.compress(content)
+    yield z.flush()
+
+
+def _decode_looseobj(buf):
+    assert(buf);
+    s = zlib.decompress(buf)
+    i = s.find('\0')
+    assert(i > 0)
+    l = s[:i].split(' ')
+    type = l[0]
+    sz = int(l[1])
+    content = s[i+1:]
+    assert(type in _typemap)
+    assert(sz == len(content))
+    return (type, content)
+
+
+def _decode_packobj(buf):
+    assert(buf)
+    c = ord(buf[0])
+    type = _typermap[(c & 0x70) >> 4]
+    sz = c & 0x0f
+    shift = 4
+    i = 0
+    while c & 0x80:
+        i += 1
+        c = ord(buf[i])
+        sz |= (c & 0x7f) << shift
+        shift += 7
+        if not (c & 0x80):
+            break
+    return (type, zlib.decompress(buf[i+1:]))
+
+
+class PackIndex:
+    def __init__(self, filename):
+        self.name = filename
+        self.map = mmap_read(open(filename))
+        assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
+        self.fanout = list(struct.unpack('!256I',
+                                         str(buffer(self.map, 8, 256*4))))
+        self.fanout.append(0)  # entry "-1"
+        nsha = self.fanout[255]
+        self.ofstable = buffer(self.map,
+                               8 + 256*4 + nsha*20 + nsha*4,
+                               nsha*4)
+        self.ofs64table = buffer(self.map,
+                                 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
+
+    def _ofs_from_idx(self, idx):
+        ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
+        if ofs & 0x80000000:
+            idx64 = ofs & 0x7fffffff
+            ofs = struct.unpack('!I',
+                                str(buffer(self.ofs64table, idx64*8, 8)))[0]
+        return ofs
+
+    def _idx_from_hash(self, hash):
+        assert(len(hash) == 20)
+        b1 = ord(hash[0])
+        start = self.fanout[b1-1] # range -1..254
+        end = self.fanout[b1] # range 0..255
+        buf = buffer(self.map, 8 + 256*4, end*20)
+        want = str(hash)
+        while start < end:
+            mid = start + (end-start)/2
+            v = str(buf[mid*20:(mid+1)*20])
+            if v < want:
+                start = mid+1
+            elif v > want:
+                end = mid
+            else: # got it!
+                return mid
+        return None
+        
+    def find_offset(self, hash):
+        idx = self._idx_from_hash(hash)
+        if idx != None:
+            return self._ofs_from_idx(idx)
+        return None
+
+    def exists(self, hash):
+        return hash and (self._idx_from_hash(hash) != None) and True or None
+
+    def __iter__(self):
+        for i in xrange(self.fanout[255]):
+            yield buffer(self.map, 8 + 256*4 + 20*i, 20)
+
+    def __len__(self):
+        return int(self.fanout[255])
+
+
+def extract_bits(buf, bits):
+    mask = (1<<bits) - 1
+    v = struct.unpack('!I', buf[0:4])[0]
+    v = (v >> (32-bits)) & mask
+    return v
+
+
+class PackMidx:
+    def __init__(self, filename):
+        self.name = filename
+        assert(filename.endswith('.midx'))
+        self.map = mmap_read(open(filename))
+        if str(self.map[0:8]) == 'MIDX\0\0\0\1':
+            log('Warning: ignoring old-style midx %r\n' % filename)
+            self.bits = 0
+            self.entries = 1
+            self.fanout = buffer('\0\0\0\0')
+            self.shalist = buffer('\0'*20)
+            self.idxnames = []
+        else:
+            assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
+            self.bits = struct.unpack('!I', self.map[8:12])[0]
+            self.entries = 2**self.bits
+            self.fanout = buffer(self.map, 12, self.entries*4)
+            shaofs = 12 + self.entries*4
+            nsha = self._fanget(self.entries-1)
+            self.shalist = buffer(self.map, shaofs, nsha*20)
+            self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
+
+    def _fanget(self, i):
+        start = i*4
+        s = self.fanout[start:start+4]
+        return struct.unpack('!I', s)[0]
+    
+    def exists(self, hash):
+        want = str(hash)
+        el = extract_bits(want, self.bits)
+        if el:
+            start = self._fanget(el-1)
+        else:
+            start = 0
+        end = self._fanget(el)
+        while start < end:
+            mid = start + (end-start)/2
+            v = str(self.shalist[mid*20:(mid+1)*20])
+            if v < want:
+                start = mid+1
+            elif v > want:
+                end = mid
+            else: # got it!
+                return True
+        return None
+    
+    def __iter__(self):
+        for i in xrange(self._fanget(self.entries-1)):
+            yield buffer(self.shalist, i*20, 20)
+    
+    def __len__(self):
+        return int(self._fanget(self.entries-1))
+
+
+_mpi_count = 0
+class MultiPackIndex:
+    def __init__(self, dir):
+        global _mpi_count
+        assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
+        _mpi_count += 1
+        self.dir = dir
+        self.also = {}
+        self.packs = []
+        self.refresh()
+
+    def __del__(self):
+        global _mpi_count
+        _mpi_count -= 1
+        assert(_mpi_count == 0)
+
+    def __iter__(self):
+        return iter(idxmerge(self.packs))
+
+    def exists(self, hash):
+        if hash in self.also:
+            return True
+        for i in range(len(self.packs)):
+            p = self.packs[i]
+            if p.exists(hash):
+                # reorder so most recently used packs are searched first
+                self.packs = [p] + self.packs[:i] + self.packs[i+1:]
+                return p.name
+        return None
+
+    def refresh(self, skip_midx = False, forget_packs = False):
+        if forget_packs:
+            self.packs = []
+        skip_midx = skip_midx or ignore_midx
+        d = dict((p.name, 1) for p in self.packs)
+        if os.path.exists(self.dir):
+            if not skip_midx:
+                midxl = []
+                for f in os.listdir(self.dir):
+                    full = os.path.join(self.dir, f)
+                    if f.endswith('.midx') and not d.get(full):
+                        mx = PackMidx(full)
+                        (mxd, mxf) = os.path.split(mx.name)
+                        broken = 0
+                        for n in mx.idxnames:
+                            if not os.path.exists(os.path.join(mxd, n)):
+                                log(('warning: index %s missing\n' +
+                                    '  used by %s\n') % (n, mxf))
+                                broken += 1
+                        if not broken:
+                            midxl.append(mx)
+                midxl.sort(lambda x,y: -cmp(len(x),len(y)))
+                for ix in midxl:
+                    any = 0
+                    for sub in ix.idxnames:
+                        if not d.get(os.path.join(self.dir, sub)):
+                            self.packs.append(ix)
+                            d[ix.name] = 1
+                            for name in ix.idxnames:
+                                d[os.path.join(self.dir, name)] = 1
+                            any += 1
+                            break
+                    if not any:
+                        log('midx: removing redundant: %s\n' 
+                            % os.path.basename(ix.name))
+                        unlink(ix.name)
+            for f in os.listdir(self.dir):
+                full = os.path.join(self.dir, f)
+                if f.endswith('.idx') and not d.get(full):
+                    self.packs.append(PackIndex(full))
+                    d[full] = 1
+        log('MultiPackIndex: using %d index%s.\n' 
+            % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
+
+    def add(self, hash):
+        self.also[hash] = 1
+
+    def zap_also(self):
+        self.also = {}
+
+
+def calc_hash(type, content):
+    header = '%s %d\0' % (type, len(content))
+    sum = sha.sha(header)
+    sum.update(content)
+    return sum.digest()
+
+
+def _shalist_sort_key(ent):
+    (mode, name, id) = ent
+    if stat.S_ISDIR(int(mode, 8)):
+        return name + '/'
+    else:
+        return name
+
+
+def idxmerge(idxlist):
+    total = sum(len(i) for i in idxlist)
+    iters = (iter(i) for i in idxlist)
+    heap = [(next(it), it) for it in iters]
+    heapq.heapify(heap)
+    count = 0
+    last = None
+    while heap:
+        if (count % 10024) == 0:
+            progress('Reading indexes: %.2f%% (%d/%d)\r'
+                     % (count*100.0/total, count, total))
+        (e, it) = heap[0]
+        if e != last:
+            yield e
+            last = e
+        count += 1
+        e = next(it)
+        if e:
+            heapq.heapreplace(heap, (e, it))
+        else:
+            heapq.heappop(heap)
+    log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
+
+    
+class PackWriter:
+    def __init__(self, objcache_maker=None):
+        self.count = 0
+        self.outbytes = 0
+        self.filename = None
+        self.file = None
+        self.objcache_maker = objcache_maker
+        self.objcache = None
+
+    def __del__(self):
+        self.close()
+
+    def _make_objcache(self):
+        if not self.objcache:
+            if self.objcache_maker:
+                self.objcache = self.objcache_maker()
+            else:
+                self.objcache = MultiPackIndex(repo('objects/pack'))
+
+    def _open(self):
+        if not self.file:
+            self._make_objcache()
+            (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
+            self.file = os.fdopen(fd, 'w+b')
+            assert(name.endswith('.pack'))
+            self.filename = name[:-5]
+            self.file.write('PACK\0\0\0\2\0\0\0\0')
+
+    def _raw_write(self, datalist):
+        self._open()
+        f = self.file
+        for d in datalist:
+            f.write(d)
+            self.outbytes += len(d)
+        self.count += 1
+
+    def _write(self, bin, type, content):
+        if verbose:
+            log('>')
+        self._raw_write(_encode_packobj(type, content))
+        return bin
+
+    def breakpoint(self):
+        id = self._end()
+        self.outbytes = self.count = 0
+        return id
+
+    def write(self, type, content):
+        return self._write(calc_hash(type, content), type, content)
+
+    def exists(self, id):
+        if not self.objcache:
+            self._make_objcache()
+        return self.objcache.exists(id)
+
+    def maybe_write(self, type, content):
+        bin = calc_hash(type, content)
+        if not self.exists(bin):
+            self._write(bin, type, content)
+            self.objcache.add(bin)
+        return bin
+
+    def new_blob(self, blob):
+        return self.maybe_write('blob', blob)
+
+    def new_tree(self, shalist):
+        shalist = sorted(shalist, key = _shalist_sort_key)
+        l = []
+        for (mode,name,bin) in shalist:
+            assert(mode)
+            assert(mode != '0')
+            assert(mode[0] != '0')
+            assert(name)
+            assert(len(bin) == 20)
+            l.append('%s %s\0%s' % (mode,name,bin))
+        return self.maybe_write('tree', ''.join(l))
+
+    def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
+        l = []
+        if tree: l.append('tree %s' % tree.encode('hex'))
+        if parent: l.append('parent %s' % parent.encode('hex'))
+        if author: l.append('author %s %s' % (author, _git_date(adate)))
+        if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
+        l.append('')
+        l.append(msg)
+        return self.maybe_write('commit', '\n'.join(l))
+
+    def new_commit(self, parent, tree, msg):
+        now = time.time()
+        userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
+        commit = self._new_commit(tree, parent,
+                                  userline, now, userline, now,
+                                  msg)
+        return commit
+
+    def abort(self):
+        f = self.file
+        if f:
+            self.file = None
+            f.close()
+            os.unlink(self.filename + '.pack')
+
+    def _end(self):
+        f = self.file
+        if not f: return None
+        self.file = None
+        self.objcache = None
+
+        # update object count
+        f.seek(8)
+        cp = struct.pack('!i', self.count)
+        assert(len(cp) == 4)
+        f.write(cp)
+
+        # calculate the pack sha1sum
+        f.seek(0)
+        sum = sha.sha()
+        while 1:
+            b = f.read(65536)
+            sum.update(b)
+            if not b: break
+        f.write(sum.digest())
+        
+        f.close()
+
+        p = subprocess.Popen(['git', 'index-pack', '-v',
+                              '--index-version=2',
+                              self.filename + '.pack'],
+                             preexec_fn = _gitenv,
+                             stdout = subprocess.PIPE)
+        out = p.stdout.read().strip()
+        _git_wait('git index-pack', p)
+        if not out:
+            raise GitError('git index-pack produced no output')
+        nameprefix = repo('objects/pack/%s' % out)
+        if os.path.exists(self.filename + '.map'):
+            os.unlink(self.filename + '.map')
+        os.rename(self.filename + '.pack', nameprefix + '.pack')
+        os.rename(self.filename + '.idx', nameprefix + '.idx')
+        return nameprefix
+
+    def close(self):
+        return self._end()
+
+
+def _git_date(date):
+    return time.strftime('%s %z', time.localtime(date))
+
+
+def _gitenv():
+    os.environ['GIT_DIR'] = os.path.abspath(repo())
+
+
+def list_refs(refname = None):
+    argv = ['git', 'show-ref', '--']
+    if refname:
+        argv += [refname]
+    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+    out = p.stdout.read().strip()
+    rv = p.wait()  # not fatal
+    if rv:
+        assert(not out)
+    if out:
+        for d in out.split('\n'):
+            (sha, name) = d.split(' ', 1)
+            yield (name, sha.decode('hex'))
+
+
+def read_ref(refname):
+    l = list(list_refs(refname))
+    if l:
+        assert(len(l) == 1)
+        return l[0][1]
+    else:
+        return None
+
+
+def rev_list(ref):
+    assert(not ref.startswith('-'))
+    argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
+    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+    commit = None
+    for row in p.stdout:
+        s = row.strip()
+        if s.startswith('commit '):
+            commit = s[7:].decode('hex')
+        else:
+            date = int(s)
+            yield (date, commit)
+    rv = p.wait()  # not fatal
+    if rv:
+        raise GitError, 'git rev-list returned error %d' % rv
+
+
+def update_ref(refname, newval, oldval):
+    if not oldval:
+        oldval = ''
+    assert(refname.startswith('refs/heads/'))
+    p = subprocess.Popen(['git', 'update-ref', refname,
+                          newval.encode('hex'), oldval.encode('hex')],
+                         preexec_fn = _gitenv)
+    _git_wait('git update-ref', p)
+
+
+def guess_repo(path=None):
+    global repodir
+    if path:
+        repodir = path
+    if not repodir:
+        repodir = os.environ.get('BUP_DIR')
+        if not repodir:
+            repodir = os.path.expanduser('~/.bup')
+
+
+def init_repo(path=None):
+    guess_repo(path)
+    d = repo()
+    if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
+        raise GitError('"%d" exists but is not a directory\n' % d)
+    p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
+                         preexec_fn = _gitenv)
+    _git_wait('git init', p)
+    p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
+                         stdout=sys.stderr, preexec_fn = _gitenv)
+    _git_wait('git config', p)
+
+
+def check_repo_or_die(path=None):
+    guess_repo(path)
+    if not os.path.isdir(repo('objects/pack/.')):
+        if repodir == home_repodir:
+            init_repo()
+        else:
+            log('error: %r is not a bup/git repository\n' % repo())
+            sys.exit(15)
+
+
+def _treeparse(buf):
+    ofs = 0
+    while ofs < len(buf):
+        z = buf[ofs:].find('\0')
+        assert(z > 0)
+        spl = buf[ofs:ofs+z].split(' ', 1)
+        assert(len(spl) == 2)
+        sha = buf[ofs+z+1:ofs+z+1+20]
+        ofs += z+1+20
+        yield (spl[0], spl[1], sha)
+
+
+_ver = None
+def ver():
+    global _ver
+    if not _ver:
+        p = subprocess.Popen(['git', '--version'],
+                             stdout=subprocess.PIPE)
+        gvs = p.stdout.read()
+        _git_wait('git --version', p)
+        m = re.match(r'git version (\S+.\S+)', gvs)
+        if not m:
+            raise GitError('git --version weird output: %r' % gvs)
+        _ver = tuple(m.group(1).split('.'))
+    needed = ('1','5', '3', '1')
+    if _ver < needed:
+        raise GitError('git version %s or higher is required; you have %s'
+                       % ('.'.join(needed), '.'.join(_ver)))
+    return _ver
+
+
+def _git_wait(cmd, p):
+    rv = p.wait()
+    if rv != 0:
+        raise GitError('%s returned %d' % (cmd, rv))
+
+
+def _git_capture(argv):
+    p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
+    r = p.stdout.read()
+    _git_wait(repr(argv), p)
+    return r
+
+
+_ver_warned = 0
+class CatPipe:
+    def __init__(self):
+        global _ver_warned
+        wanted = ('1','5','6')
+        if ver() < wanted:
+            if not _ver_warned:
+                log('warning: git version < %s; bup will be slow.\n'
+                    % '.'.join(wanted))
+                _ver_warned = 1
+            self.get = self._slow_get
+        else:
+            self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
+                                      stdin=subprocess.PIPE, 
+                                      stdout=subprocess.PIPE,
+                                      preexec_fn = _gitenv)
+            self.get = self._fast_get
+            self.inprogress = None
+
+    def _fast_get(self, id):
+        if self.inprogress:
+            log('_fast_get: opening %r while %r is open' 
+                % (id, self.inprogress))
+        assert(not self.inprogress)
+        assert(id.find('\n') < 0)
+        assert(id.find('\r') < 0)
+        assert(id[0] != '-')
+        self.inprogress = id
+        self.p.stdin.write('%s\n' % id)
+        hdr = self.p.stdout.readline()
+        if hdr.endswith(' missing\n'):
+            raise KeyError('blob %r is missing' % id)
+        spl = hdr.split(' ')
+        if len(spl) != 3 or len(spl[0]) != 40:
+            raise GitError('expected blob, got %r' % spl)
+        (hex, type, size) = spl
+
+        def ondone():
+            assert(self.p.stdout.readline() == '\n')
+            self.inprogress = None
+
+        it = AutoFlushIter(chunkyreader(self.p.stdout, int(spl[2])),
+                           ondone = ondone)
+        yield type
+        for blob in it:
+            yield blob
+        del it
+
+    def _slow_get(self, id):
+        assert(id.find('\n') < 0)
+        assert(id.find('\r') < 0)
+        assert(id[0] != '-')
+        type = _git_capture(['git', 'cat-file', '-t', id]).strip()
+        yield type
+
+        p = subprocess.Popen(['git', 'cat-file', type, id],
+                             stdout=subprocess.PIPE,
+                             preexec_fn = _gitenv)
+        for blob in chunkyreader(p.stdout):
+            yield blob
+        _git_wait('git cat-file', p)
+
+    def _join(self, it):
+        type = it.next()
+        if type == 'blob':
+            for blob in it:
+                yield blob
+        elif type == 'tree':
+            treefile = ''.join(it)
+            for (mode, name, sha) in _treeparse(treefile):
+                for blob in self.join(sha.encode('hex')):
+                    yield blob
+        elif type == 'commit':
+            treeline = ''.join(it).split('\n')[0]
+            assert(treeline.startswith('tree '))
+            for blob in self.join(treeline[5:]):
+                yield blob
+        else:
+            raise GitError('invalid object type %r: expected blob/tree/commit'
+                           % type)
+
+    def join(self, id):
+        try:
+            for d in self._join(self.get(id)):
+                yield d
+        except StopIteration:
+            log('booger!\n')
+        
+
+def cat(id):
+    c = CatPipe()
+    for d in c.join(id):
+        yield d
diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py
new file mode 100644 (file)
index 0000000..f85011d
--- /dev/null
@@ -0,0 +1,158 @@
+import sys, math
+from bup import git, _hashsplit
+from bup.helpers import *
+
+BLOB_LWM = 8192*2
+BLOB_MAX = BLOB_LWM*2
+BLOB_HWM = 1024*1024
+MAX_PER_TREE = 256
+progress_callback = None
+max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
+max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
+fanout = 16
+
+class Buf:
+    def __init__(self):
+        self.data = ''
+        self.start = 0
+
+    def put(self, s):
+        if s:
+            self.data = buffer(self.data, self.start) + s
+            self.start = 0
+            
+    def peek(self, count):
+        return buffer(self.data, self.start, count)
+    
+    def eat(self, count):
+        self.start += count
+
+    def get(self, count):
+        v = buffer(self.data, self.start, count)
+        self.start += count
+        return v
+
+    def used(self):
+        return len(self.data) - self.start
+
+
+def splitbuf(buf):
+    b = buf.peek(buf.used())
+    (ofs, bits) = _hashsplit.splitbuf(b)
+    if ofs:
+        buf.eat(ofs)
+        return (buffer(b, 0, ofs), bits)
+    return (None, 0)
+
+
+def blobiter(files):
+    for f in files:
+        while 1:
+            b = f.read(BLOB_HWM)
+            if not b:
+                break
+            yield b
+
+
+def drainbuf(buf, finalize):
+    while 1:
+        (blob, bits) = splitbuf(buf)
+        if blob:
+            yield (blob, bits)
+        else:
+            break
+    if buf.used() > BLOB_MAX:
+        # limit max blob size
+        yield (buf.get(buf.used()), 0)
+    elif finalize and buf.used():
+        yield (buf.get(buf.used()), 0)
+
+
+def hashsplit_iter(files):
+    assert(BLOB_HWM > BLOB_MAX)
+    buf = Buf()
+    fi = blobiter(files)
+    while 1:
+        for i in drainbuf(buf, finalize=False):
+            yield i
+        while buf.used() < BLOB_HWM:
+            bnew = next(fi)
+            if not bnew:
+                # eof
+                for i in drainbuf(buf, finalize=True):
+                    yield i
+                return
+            buf.put(bnew)
+
+
+total_split = 0
+def _split_to_blobs(w, files):
+    global total_split
+    for (blob, bits) in hashsplit_iter(files):
+        sha = w.new_blob(blob)
+        total_split += len(blob)
+        if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
+            w.breakpoint()
+        if progress_callback:
+            progress_callback(len(blob))
+        yield (sha, len(blob), bits)
+
+
+def _make_shalist(l):
+    ofs = 0
+    shalist = []
+    for (mode, sha, size) in l:
+        shalist.append((mode, '%016x' % ofs, sha))
+        ofs += size
+    total = ofs
+    return (shalist, total)
+
+
+def _squish(w, stacks, n):
+    i = 0
+    while i<n or len(stacks[i]) > MAX_PER_TREE:
+        while len(stacks) <= i+1:
+            stacks.append([])
+        if len(stacks[i]) == 1:
+            stacks[i+1] += stacks[i]
+        elif stacks[i]:
+            (shalist, size) = _make_shalist(stacks[i])
+            tree = w.new_tree(shalist)
+            stacks[i+1].append(('40000', tree, size))
+        stacks[i] = []
+        i += 1
+
+
+def split_to_shalist(w, files):
+    sl = _split_to_blobs(w, files)
+    if not fanout:
+        shal = []
+        for (sha,size,bits) in sl:
+            shal.append(('100644', sha, size))
+        return _make_shalist(shal)[0]
+    else:
+        base_bits = _hashsplit.blobbits()
+        fanout_bits = int(math.log(fanout, 2))
+        def bits_to_idx(n):
+            assert(n >= base_bits)
+            return (n - base_bits)/fanout_bits
+        stacks = [[]]
+        for (sha,size,bits) in sl:
+            assert(bits <= 32)
+            stacks[0].append(('100644', sha, size))
+            if bits > base_bits:
+                _squish(w, stacks, bits_to_idx(bits))
+        #log('stacks: %r\n' % [len(i) for i in stacks])
+        _squish(w, stacks, len(stacks)-1)
+        #log('stacks: %r\n' % [len(i) for i in stacks])
+        return _make_shalist(stacks[-1])[0]
+
+
+def split_to_blob_or_tree(w, files):
+    shalist = list(split_to_shalist(w, files))
+    if len(shalist) == 1:
+        return (shalist[0][0], shalist[0][2])
+    elif len(shalist) == 0:
+        return ('100644', w.new_blob(''))
+    else:
+        return ('40000', w.new_tree(shalist))
diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
new file mode 100644 (file)
index 0000000..75cf09c
--- /dev/null
@@ -0,0 +1,269 @@
+import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
+
+
+def log(s):
+    sys.stderr.write(s)
+
+
+def mkdirp(d):
+    try:
+        os.makedirs(d)
+    except OSError, e:
+        if e.errno == errno.EEXIST:
+            pass
+        else:
+            raise
+
+
+def next(it):
+    try:
+        return it.next()
+    except StopIteration:
+        return None
+    
+    
+def unlink(f):
+    try:
+        os.unlink(f)
+    except OSError, e:
+        if e.errno == errno.ENOENT:
+            pass  # it doesn't exist, that's what you asked for
+
+
+def readpipe(argv):
+    p = subprocess.Popen(argv, stdout=subprocess.PIPE)
+    r = p.stdout.read()
+    p.wait()
+    return r
+
+
+# FIXME: this function isn't very generic, because it splits the filename
+# in an odd way and depends on a terminating '/' to indicate directories.
+# But it's used in a couple of places, so let's put it here.
+def pathsplit(p):
+    l = p.split('/')
+    l = [i+'/' for i in l[:-1]] + l[-1:]
+    if l[-1] == '':
+        l.pop()  # extra blank caused by terminating '/'
+    return l
+
+
+# like os.path.realpath, but doesn't follow a symlink for the last element.
+# (ie. if 'p' itself is itself a symlink, this one won't follow it)
+def realpath(p):
+    try:
+        st = os.lstat(p)
+    except OSError:
+        st = None
+    if st and stat.S_ISLNK(st.st_mode):
+        (dir, name) = os.path.split(p)
+        dir = os.path.realpath(dir)
+        out = os.path.join(dir, name)
+    else:
+        out = os.path.realpath(p)
+    #log('realpathing:%r,%r\n' % (p, out))
+    return out
+
+
+_username = None
+def username():
+    global _username
+    if not _username:
+        uid = os.getuid()
+        try:
+            _username = pwd.getpwuid(uid)[0]
+        except KeyError:
+            _username = 'user%d' % uid
+    return _username
+
+
+_userfullname = None
+def userfullname():
+    global _userfullname
+    if not _userfullname:
+        uid = os.getuid()
+        try:
+            _userfullname = pwd.getpwuid(uid)[4].split(',')[0]
+        except KeyError:
+            _userfullname = 'user%d' % uid
+    return _userfullname
+
+
+_hostname = None
+def hostname():
+    global _hostname
+    if not _hostname:
+        _hostname = socket.getfqdn()
+    return _hostname
+
+
+class NotOk(Exception):
+    pass
+
+class Conn:
+    def __init__(self, inp, outp):
+        self.inp = inp
+        self.outp = outp
+
+    def read(self, size):
+        self.outp.flush()
+        return self.inp.read(size)
+
+    def readline(self):
+        self.outp.flush()
+        return self.inp.readline()
+
+    def write(self, data):
+        #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
+        self.outp.write(data)
+
+    def has_input(self):
+        [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
+        if rl:
+            assert(rl[0] == self.inp.fileno())
+            return True
+        else:
+            return None
+
+    def ok(self):
+        self.write('\nok\n')
+
+    def error(self, s):
+        s = re.sub(r'\s+', ' ', str(s))
+        self.write('\nerror %s\n' % s)
+
+    def _check_ok(self, onempty):
+        self.outp.flush()
+        rl = ''
+        for rl in linereader(self.inp):
+            #log('%d got line: %r\n' % (os.getpid(), rl))
+            if not rl:  # empty line
+                continue
+            elif rl == 'ok':
+                return None
+            elif rl.startswith('error '):
+                #log('client: error: %s\n' % rl[6:])
+                return NotOk(rl[6:])
+            else:
+                onempty(rl)
+        raise Exception('server exited unexpectedly; see errors above')
+
+    def drain_and_check_ok(self):
+        def onempty(rl):
+            pass
+        return self._check_ok(onempty)
+
+    def check_ok(self):
+        def onempty(rl):
+            raise Exception('expected "ok", got %r' % rl)
+        return self._check_ok(onempty)
+
+
+def linereader(f):
+    while 1:
+        line = f.readline()
+        if not line:
+            break
+        yield line[:-1]
+
+
+def chunkyreader(f, count = None):
+    if count != None:
+        while count > 0:
+            b = f.read(min(count, 65536))
+            if not b:
+                raise IOError('EOF with %d bytes remaining' % count)
+            yield b
+            count -= len(b)
+    else:
+        while 1:
+            b = f.read(65536)
+            if not b: break
+            yield b
+
+
+class AutoFlushIter:
+    def __init__(self, it, ondone = None):
+        self.it = it
+        self.ondone = ondone
+
+    def __iter__(self):
+        return self
+        
+    def next(self):
+        return self.it.next()
+        
+    def __del__(self):
+        for i in self.it:
+            pass
+        if self.ondone:
+            self.ondone()
+
+
+def slashappend(s):
+    if s and not s.endswith('/'):
+        return s + '/'
+    else:
+        return s
+
+
+def _mmap_do(f, len, flags, prot):
+    if not len:
+        st = os.fstat(f.fileno())
+        len = st.st_size
+    map = mmap.mmap(f.fileno(), len, flags, prot)
+    f.close()  # map will persist beyond file close
+    return map
+
+
+def mmap_read(f, len = 0):
+    return _mmap_do(f, len, mmap.MAP_PRIVATE, mmap.PROT_READ)
+
+
+def mmap_readwrite(f, len = 0):
+    return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
+
+
+def parse_num(s):
+    g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
+    if not g:
+        raise ValueError("can't parse %r as a number" % s)
+    (val, unit) = g.groups()
+    num = float(val)
+    unit = unit.lower()
+    if unit in ['t', 'tb']:
+        mult = 1024*1024*1024*1024
+    elif unit in ['g', 'gb']:
+        mult = 1024*1024*1024
+    elif unit in ['m', 'mb']:
+        mult = 1024*1024
+    elif unit in ['k', 'kb']:
+        mult = 1024
+    elif unit in ['', 'b']:
+        mult = 1
+    else:
+        raise ValueError("invalid unit %r in number %r" % (unit, s))
+    return int(num*mult)
+
+
+# count the number of elements in an iterator (consumes the iterator)
+def count(l):
+    return reduce(lambda x,y: x+1, l)
+
+
+def atoi(s):
+    try:
+        return int(s or '0')
+    except ValueError:
+        return 0
+
+
+saved_errors = []
+def add_error(e):
+    saved_errors.append(e)
+    log('%-70s\n' % e)
+
+istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY'))
+def progress(s):
+    if istty:
+        log(s)
diff --git a/lib/bup/index.py b/lib/bup/index.py
new file mode 100644 (file)
index 0000000..39cae88
--- /dev/null
@@ -0,0 +1,426 @@
+import os, stat, time, struct, tempfile
+from bup.helpers import *
+
+EMPTY_SHA = '\0'*20
+FAKE_SHA = '\x01'*20
+INDEX_HDR = 'BUPI\0\0\0\2'
+INDEX_SIG = '!IIIIIQII20sHII'
+ENTLEN = struct.calcsize(INDEX_SIG)
+FOOTER_SIG = '!Q'
+FOOTLEN = struct.calcsize(FOOTER_SIG)
+
+IX_EXISTS = 0x8000
+IX_HASHVALID = 0x4000
+
+class Error(Exception):
+    pass
+
+
+class Level:
+    def __init__(self, ename, parent):
+        self.parent = parent
+        self.ename = ename
+        self.list = []
+        self.count = 0
+
+    def write(self, f):
+        (ofs,n) = (f.tell(), len(self.list))
+        if self.list:
+            count = len(self.list)
+            #log('popping %r with %d entries\n' 
+            #    % (''.join(self.ename), count))
+            for e in self.list:
+                e.write(f)
+            if self.parent:
+                self.parent.count += count + self.count
+        return (ofs,n)
+
+
+def _golevel(level, f, ename, newentry):
+    # close nodes back up the tree
+    assert(level)
+    while ename[:len(level.ename)] != level.ename:
+        n = BlankNewEntry(level.ename[-1])
+        (n.children_ofs,n.children_n) = level.write(f)
+        level.parent.list.append(n)
+        level = level.parent
+
+    # create nodes down the tree
+    while len(level.ename) < len(ename):
+        level = Level(ename[:len(level.ename)+1], level)
+
+    # are we in precisely the right place?
+    assert(ename == level.ename)
+    n = newentry or BlankNewEntry(ename and level.ename[-1] or None)
+    (n.children_ofs,n.children_n) = level.write(f)
+    if level.parent:
+        level.parent.list.append(n)
+    level = level.parent
+
+    return level
+
+
+class Entry:
+    def __init__(self, basename, name):
+        self.basename = str(basename)
+        self.name = str(name)
+        self.children_ofs = 0
+        self.children_n = 0
+
+    def __repr__(self):
+        return ("(%s,0x%04x,%d,%d,%d,%d,%d,%s/%s,0x%04x,0x%08x/%d)" 
+                % (self.name, self.dev,
+                   self.ctime, self.mtime, self.uid, self.gid,
+                   self.size, oct(self.mode), oct(self.gitmode),
+                   self.flags, self.children_ofs, self.children_n))
+
+    def packed(self):
+        return struct.pack(INDEX_SIG,
+                           self.dev, self.ctime, self.mtime, 
+                           self.uid, self.gid, self.size, self.mode,
+                           self.gitmode, self.sha, self.flags,
+                           self.children_ofs, self.children_n)
+
+    def from_stat(self, st, tstart):
+        old = (self.dev, self.ctime, self.mtime,
+               self.uid, self.gid, self.size, self.flags & IX_EXISTS)
+        new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
+               st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
+        self.dev = st.st_dev
+        self.ctime = int(st.st_ctime)
+        self.mtime = int(st.st_mtime)
+        self.uid = st.st_uid
+        self.gid = st.st_gid
+        self.size = st.st_size
+        self.mode = st.st_mode
+        self.flags |= IX_EXISTS
+        if int(st.st_ctime) >= tstart or old != new \
+              or self.sha == EMPTY_SHA or not self.gitmode:
+            self.invalidate()
+
+    def is_valid(self):
+        f = IX_HASHVALID|IX_EXISTS
+        return (self.flags & f) == f
+
+    def invalidate(self):
+        self.flags &= ~IX_HASHVALID
+
+    def validate(self, gitmode, sha):
+        assert(sha)
+        assert(gitmode)
+        self.gitmode = gitmode
+        self.sha = sha
+        self.flags |= IX_HASHVALID|IX_EXISTS
+
+    def exists(self):
+        return not self.is_deleted()
+
+    def is_deleted(self):
+        return (self.flags & IX_EXISTS) == 0
+
+    def set_deleted(self):
+        if self.flags & IX_EXISTS:
+            self.flags &= ~(IX_EXISTS | IX_HASHVALID)
+
+    def is_real(self):
+        return not self.is_fake()
+
+    def is_fake(self):
+        return not self.ctime
+
+    def __cmp__(a, b):
+        return (cmp(a.name, b.name)
+                or -cmp(a.is_valid(), b.is_valid())
+                or -cmp(a.is_fake(), b.is_fake()))
+
+    def write(self, f):
+        f.write(self.basename + '\0' + self.packed())
+
+
+class NewEntry(Entry):
+    def __init__(self, basename, name, dev, ctime, mtime, uid, gid,
+                 size, mode, gitmode, sha, flags, children_ofs, children_n):
+        Entry.__init__(self, basename, name)
+        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
+         self.size, self.mode, self.gitmode, self.sha,
+         self.flags, self.children_ofs, self.children_n
+         ) = (dev, int(ctime), int(mtime), uid, gid,
+              size, mode, gitmode, sha, flags, children_ofs, children_n)
+
+
+class BlankNewEntry(NewEntry):
+    def __init__(self, basename):
+        NewEntry.__init__(self, basename, basename,
+                          0, 0, 0, 0, 0, 0, 0,
+                          0, EMPTY_SHA, 0, 0, 0)
+
+
+class ExistingEntry(Entry):
+    def __init__(self, parent, basename, name, m, ofs):
+        Entry.__init__(self, basename, name)
+        self.parent = parent
+        self._m = m
+        self._ofs = ofs
+        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
+         self.size, self.mode, self.gitmode, self.sha,
+         self.flags, self.children_ofs, self.children_n
+         ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN)))
+
+    def repack(self):
+        self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
+        if self.parent and not self.is_valid():
+            self.parent.invalidate()
+            self.parent.repack()
+
+    def iter(self, name=None, wantrecurse=None):
+        dname = name
+        if dname and not dname.endswith('/'):
+            dname += '/'
+        ofs = self.children_ofs
+        assert(ofs <= len(self._m))
+        assert(self.children_n < 1000000)
+        for i in xrange(self.children_n):
+            eon = self._m.find('\0', ofs)
+            assert(eon >= 0)
+            assert(eon >= ofs)
+            assert(eon > ofs)
+            basename = str(buffer(self._m, ofs, eon-ofs))
+            child = ExistingEntry(self, basename, self.name + basename,
+                                  self._m, eon+1)
+            if (not dname
+                 or child.name.startswith(dname)
+                 or child.name.endswith('/') and dname.startswith(child.name)):
+                if not wantrecurse or wantrecurse(child):
+                    for e in child.iter(name=name, wantrecurse=wantrecurse):
+                        yield e
+            if not name or child.name == name or child.name.startswith(dname):
+                yield child
+            ofs = eon + 1 + ENTLEN
+
+    def __iter__(self):
+        return self.iter()
+            
+
+class Reader:
+    def __init__(self, filename):
+        self.filename = filename
+        self.m = ''
+        self.writable = False
+        self.count = 0
+        f = None
+        try:
+            f = open(filename, 'r+')
+        except IOError, e:
+            if e.errno == errno.ENOENT:
+                pass
+            else:
+                raise
+        if f:
+            b = f.read(len(INDEX_HDR))
+            if b != INDEX_HDR:
+                log('warning: %s: header: expected %r, got %r'
+                                 % (filename, INDEX_HDR, b))
+            else:
+                st = os.fstat(f.fileno())
+                if st.st_size:
+                    self.m = mmap_readwrite(f)
+                    self.writable = True
+                    self.count = struct.unpack(FOOTER_SIG,
+                          str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
+
+    def __del__(self):
+        self.close()
+
+    def __len__(self):
+        return int(self.count)
+
+    def forward_iter(self):
+        ofs = len(INDEX_HDR)
+        while ofs+ENTLEN <= len(self.m)-FOOTLEN:
+            eon = self.m.find('\0', ofs)
+            assert(eon >= 0)
+            assert(eon >= ofs)
+            assert(eon > ofs)
+            basename = str(buffer(self.m, ofs, eon-ofs))
+            yield ExistingEntry(None, basename, basename, self.m, eon+1)
+            ofs = eon + 1 + ENTLEN
+
+    def iter(self, name=None, wantrecurse=None):
+        if len(self.m) > len(INDEX_HDR)+ENTLEN:
+            dname = name
+            if dname and not dname.endswith('/'):
+                dname += '/'
+            root = ExistingEntry(None, '/', '/',
+                                 self.m, len(self.m)-FOOTLEN-ENTLEN)
+            for sub in root.iter(name=name, wantrecurse=wantrecurse):
+                yield sub
+            if not dname or dname == root.name:
+                yield root
+
+    def __iter__(self):
+        return self.iter()
+
+    def exists(self):
+        return self.m
+
+    def save(self):
+        if self.writable and self.m:
+            self.m.flush()
+
+    def close(self):
+        self.save()
+        if self.writable and self.m:
+            self.m = None
+            self.writable = False
+
+    def filter(self, prefixes, wantrecurse=None):
+        for (rp, path) in reduce_paths(prefixes):
+            for e in self.iter(rp, wantrecurse=wantrecurse):
+                assert(e.name.startswith(rp))
+                name = path + e.name[len(rp):]
+                yield (name, e)
+
+
+class Writer:
+    def __init__(self, filename):
+        self.rootlevel = self.level = Level([], None)
+        self.f = None
+        self.count = 0
+        self.lastfile = None
+        self.filename = None
+        self.filename = filename = realpath(filename)
+        (dir,name) = os.path.split(filename)
+        (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
+        self.f = os.fdopen(ffd, 'wb', 65536)
+        self.f.write(INDEX_HDR)
+
+    def __del__(self):
+        self.abort()
+
+    def abort(self):
+        f = self.f
+        self.f = None
+        if f:
+            f.close()
+            os.unlink(self.tmpname)
+
+    def flush(self):
+        if self.level:
+            self.level = _golevel(self.level, self.f, [], None)
+            self.count = self.rootlevel.count
+            if self.count:
+                self.count += 1
+            self.f.write(struct.pack(FOOTER_SIG, self.count))
+            self.f.flush()
+        assert(self.level == None)
+
+    def close(self):
+        self.flush()
+        f = self.f
+        self.f = None
+        if f:
+            f.close()
+            os.rename(self.tmpname, self.filename)
+
+    def _add(self, ename, entry):
+        if self.lastfile and self.lastfile <= ename:
+            raise Error('%r must come before %r' 
+                             % (''.join(e.name), ''.join(self.lastfile)))
+            self.lastfile = e.name
+        self.level = _golevel(self.level, self.f, ename, entry)
+
+    def add(self, name, st, hashgen = None):
+        endswith = name.endswith('/')
+        ename = pathsplit(name)
+        basename = ename[-1]
+        #log('add: %r %r\n' % (basename, name))
+        flags = IX_EXISTS
+        sha = None
+        if hashgen:
+            (gitmode, sha) = hashgen(name)
+            flags |= IX_HASHVALID
+        else:
+            (gitmode, sha) = (0, EMPTY_SHA)
+        if st:
+            isdir = stat.S_ISDIR(st.st_mode)
+            assert(isdir == endswith)
+            e = NewEntry(basename, name, st.st_dev, int(st.st_ctime),
+                         int(st.st_mtime), st.st_uid, st.st_gid,
+                         st.st_size, st.st_mode, gitmode, sha, flags,
+                         0, 0)
+        else:
+            assert(endswith)
+            e = BlankNewEntry(basename)
+            e.gitmode = gitmode
+            e.sha = sha
+            e.flags = flags
+        self._add(ename, e)
+
+    def add_ixentry(self, e):
+        e.children_ofs = e.children_n = 0
+        self._add(pathsplit(e.name), e)
+
+    def new_reader(self):
+        self.flush()
+        return Reader(self.tmpname)
+
+
+def reduce_paths(paths):
+    xpaths = []
+    for p in paths:
+        rp = realpath(p)
+        try:
+            st = os.lstat(rp)
+            if stat.S_ISDIR(st.st_mode):
+                rp = slashappend(rp)
+                p = slashappend(p)
+        except OSError, e:
+            if e.errno != errno.ENOENT:
+                raise
+        xpaths.append((rp, p))
+    xpaths.sort()
+
+    paths = []
+    prev = None
+    for (rp, p) in xpaths:
+        if prev and (prev == rp 
+                     or (prev.endswith('/') and rp.startswith(prev))):
+            continue # already superceded by previous path
+        paths.append((rp, p))
+        prev = rp
+    paths.sort(reverse=True)
+    return paths
+
+
+class MergeIter:
+    def __init__(self, iters):
+        self.iters = iters
+
+    def __len__(self):
+        # FIXME: doesn't remove duplicated entries between iters.
+        # That only happens for parent directories, but will mean the
+        # actual iteration returns fewer entries than this function counts.
+        return sum(len(it) for it in self.iters)
+
+    def __iter__(self):
+        total = len(self)
+        l = [iter(it) for it in self.iters]
+        l = [(next(it),it) for it in l]
+        l = filter(lambda x: x[0], l)
+        count = 0
+        lastname = None
+        while l:
+            if not (count % 1024):
+                progress('bup: merging indexes (%d/%d)\r' % (count, total))
+            l.sort()
+            (e,it) = l.pop()
+            if not e:
+                continue
+            if e.name != lastname:
+                yield e
+                lastname = e.name
+            n = next(it)
+            if n:
+                l.append((n,it))
+            count += 1
+        log('bup: merging indexes (%d/%d), done.\n' % (count, total))
diff --git a/lib/bup/options.py b/lib/bup/options.py
new file mode 100644 (file)
index 0000000..7ae529f
--- /dev/null
@@ -0,0 +1,119 @@
+import textwrap, getopt, re
+from bup.helpers import *
+
+class OptDict:
+    def __init__(self):
+        self._opts = {}
+
+    def __setitem__(self, k, v):
+        self._opts[k] = v
+        
+    def __getitem__(self, k):
+        return self._opts[k]
+
+    def __getattr__(self, k):
+        return self[k]
+
+
+class Options:
+    def __init__(self, exe, optspec):
+        self.exe = exe
+        self.optspec = optspec
+        self._aliases = {}
+        self._shortopts = 'h?'
+        self._longopts = ['help']
+        self._hasparms = {}
+        self._usagestr = self._gen_usage()
+        
+    def _gen_usage(self):
+        out = []
+        lines = self.optspec.strip().split('\n')
+        lines.reverse()
+        first_syn = True
+        while lines:
+            l = lines.pop()
+            if l == '--': break
+            out.append('%s: %s\n' % (first_syn and 'usage' or '   or', l))
+            first_syn = False
+        out.append('\n')
+        while lines:
+            l = lines.pop()
+            if l.startswith(' '):
+                out.append('\n%s\n' % l.lstrip())
+            elif l:
+                (flags, extra) = l.split(' ', 1)
+                extra = extra.strip()
+                if flags.endswith('='):
+                    flags = flags[:-1]
+                    has_parm = 1
+                else:
+                    has_parm = 0
+                flagl = flags.split(',')
+                flagl_nice = []
+                for f in flagl:
+                    f_nice = re.sub(r'\W', '_', f)
+                    self._aliases[f] = flagl[0]
+                    self._aliases[f_nice] = flagl[0]
+                    self._hasparms[f] = has_parm
+                    if len(f) == 1:
+                        self._shortopts += f + (has_parm and ':' or '')
+                        flagl_nice.append('-' + f)
+                    else:
+                        assert(not f.startswith('no-')) # supported implicitly
+                        self._longopts.append(f + (has_parm and '=' or ''))
+                        self._longopts.append('no-' + f)
+                        flagl_nice.append('--' + f)
+                flags_nice = ', '.join(flagl_nice)
+                if has_parm:
+                    flags_nice += ' ...'
+                prefix = '    %-20s  ' % flags_nice
+                argtext = '\n'.join(textwrap.wrap(extra, width=70,
+                                                initial_indent=prefix,
+                                                subsequent_indent=' '*28))
+                out.append(argtext + '\n')
+            else:
+                out.append('\n')
+        return ''.join(out).rstrip() + '\n'
+    
+    def usage(self):
+        log(self._usagestr)
+        sys.exit(97)
+
+    def fatal(self, s):
+        log('error: %s\n' % s)
+        return self.usage()
+        
+    def parse(self, args):
+        try:
+            (flags,extra) = getopt.gnu_getopt(args,
+                                              self._shortopts, self._longopts)
+        except getopt.GetoptError, e:
+            self.fatal(e)
+
+        opt = OptDict()
+        for f in self._aliases.values():
+            opt[f] = None
+        for (k,v) in flags:
+            while k.startswith('-'):
+                k = k[1:]
+            if k in ['h', '?', 'help']:
+                self.usage()
+            if k.startswith('no-'):
+                k = self._aliases[k[3:]]
+                opt[k] = None
+            else:
+                k = self._aliases[k]
+                if not self._hasparms[k]:
+                    assert(v == '')
+                    opt[k] = (opt._opts.get(k) or 0) + 1
+                else:
+                    try:
+                        vv = int(v)
+                        if str(vv) == v:
+                            v = vv
+                    except ValueError:
+                        pass
+                    opt[k] = v
+        for (f1,f2) in self._aliases.items():
+            opt[f1] = opt[f2]
+        return (opt,flags,extra)
diff --git a/lib/bup/shquote.py b/lib/bup/shquote.py
new file mode 100644 (file)
index 0000000..dc339ec
--- /dev/null
@@ -0,0 +1,87 @@
+import re
+
+q = "'"
+qq = '"'
+
+
+class QuoteError(Exception):
+    pass
+
+
+def _quotesplit(line):
+    inquote = None
+    inescape = None
+    wordstart = 0
+    word = ''
+    for i in range(len(line)):
+        c = line[i]
+        if inescape:
+            if inquote == q and c != q:
+                word += '\\'  # single-q backslashes can only quote single-q
+            word += c
+            inescape = False
+        elif c == '\\':
+            inescape = True
+        elif c == inquote:
+            inquote = None
+            # this is un-sh-like, but do it for sanity when autocompleting
+            yield (wordstart, word)
+            word = ''
+            wordstart = i+1
+        elif not inquote and not word and (c == q or c == qq):
+            # the 'not word' constraint on this is un-sh-like, but do it
+            # for sanity when autocompleting
+            inquote = c
+            wordstart = i
+        elif not inquote and c in [' ', '\n', '\r', '\t']:
+            if word:
+                yield (wordstart, word)
+            word = ''
+            wordstart = i+1
+        else:
+            word += c
+    if word:
+        yield (wordstart, word)
+    if inquote or inescape or word:
+        raise QuoteError()
+
+
+def quotesplit(line):
+    l = []
+    try:
+        for i in _quotesplit(line):
+            l.append(i)
+    except QuoteError:
+        pass
+    return l
+
+
+def unfinished_word(line):
+    try:
+        for (wordstart,word) in _quotesplit(line):
+            pass
+    except QuoteError:
+        firstchar = line[wordstart]
+        if firstchar in [q, qq]:
+            return (firstchar, word)
+        else:
+            return (None, word)
+    else:
+        return (None, '')
+
+
+def quotify(qtype, word, terminate):
+    if qtype == qq:
+        return qq + word.replace(qq, '\\"') + (terminate and qq or '')
+    elif qtype == q:
+        return q + word.replace(q, "\\'") + (terminate and q or '')
+    else:
+        return re.sub(r'([\"\' \t\n\r])', r'\\\1', word)
+
+
+def what_to_add(qtype, origword, newword, terminate):
+    if not newword.startswith(origword):
+        return ''
+    else:
+        qold = quotify(qtype, origword, terminate=False)
+        return quotify(qtype, newword, terminate=terminate)[len(qold):]
diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py
new file mode 100644 (file)
index 0000000..efa0947
--- /dev/null
@@ -0,0 +1,243 @@
+import os, re, stat, time
+from bup import git
+from helpers import *
+
+EMPTY_SHA='\0'*20
+
+_cp = None
+def cp():
+    global _cp
+    if not _cp:
+        _cp = git.CatPipe()
+    return _cp
+
+class NodeError(Exception):
+    pass
+class NoSuchFile(NodeError):
+    pass
+class NotDir(NodeError):
+    pass
+class NotFile(NodeError):
+    pass
+class TooManySymlinks(NodeError):
+    pass
+
+
+class FileReader:
+    def __init__(self, node):
+        self.n = node
+        self.ofs = 0
+        self.size = self.n.size()
+
+    def seek(self, ofs):
+        if ofs > self.size:
+            self.ofs = self.size
+        elif ofs < 0:
+            self.ofs = 0
+        else:
+            self.ofs = ofs
+
+    def tell(self):
+        return self.ofs
+
+    def read(self, count = -1):
+        if count < 0:
+            count = self.size - self.ofs
+        buf = self.n.readbytes(self.ofs, count)
+        self.ofs += len(buf)
+        return buf
+
+
+class Node:
+    def __init__(self, parent, name, mode, hash):
+        self.parent = parent
+        self.name = name
+        self.mode = mode
+        self.hash = hash
+        self._subs = None
+        
+    def __cmp__(a, b):
+        return cmp(a.name or None, b.name or None)
+    
+    def __iter__(self):
+        return iter(self.subs())
+    
+    def fullname(self):
+        if self.parent:
+            return os.path.join(self.parent.fullname(), self.name)
+        else:
+            return self.name
+    
+    def _mksubs(self):
+        self._subs = {}
+        
+    def subs(self):
+        if self._subs == None:
+            self._mksubs()
+        return sorted(self._subs.values())
+        
+    def sub(self, name):
+        if self._subs == None:
+            self._mksubs()
+        ret = self._subs.get(name)
+        if not ret:
+            raise NoSuchFile("no file %r in %r" % (name, self.name))
+        return ret
+
+    def top(self):
+        if self.parent:
+            return self.parent.top()
+        else:
+            return self
+
+    def _lresolve(self, parts):
+        #log('_lresolve %r in %r\n' % (parts, self.name))
+        if not parts:
+            return self
+        (first, rest) = (parts[0], parts[1:])
+        if first == '.':
+            return self._lresolve(rest)
+        elif first == '..':
+            if not self.parent:
+                raise NoSuchFile("no parent dir for %r" % self.name)
+            return self.parent._lresolve(rest)
+        elif rest:
+            return self.sub(first)._lresolve(rest)
+        else:
+            return self.sub(first)
+
+    def lresolve(self, path):
+        start = self
+        if path.startswith('/'):
+            start = self.top()
+            path = path[1:]
+        parts = re.split(r'/+', path or '.')
+        if not parts[-1]:
+            parts[-1] = '.'
+        #log('parts: %r %r\n' % (path, parts))
+        return start._lresolve(parts)
+
+    def resolve(self, path):
+        return self.lresolve(path).lresolve('')
+    
+    def nlinks(self):
+        if self._subs == None:
+            self._mksubs()
+        return 1
+
+    def size(self):
+        return 0
+
+    def open(self):
+        raise NotFile('%s is not a regular file' % self.name)
+    
+    def readbytes(self, ofs, count):
+        raise NotFile('%s is not a regular file' % self.name)
+    
+    def read(self, num = -1):
+        if num < 0:
+            num = self.size()
+        return self.readbytes(0, num)
+    
+    
+class File(Node):
+    def _content(self):
+        return cp().join(self.hash.encode('hex'))
+
+    def open(self):
+        return FileReader(self)
+    
+    def size(self):
+        # FIXME inefficient
+        return sum(len(blob) for blob in self._content())
+    
+    def readbytes(self, ofs, count):
+        # FIXME inefficient
+        buf = ''.join(self._content())
+        return buf[ofs:ofs+count]
+    
+
+_symrefs = 0
+class Symlink(File):
+    def __init__(self, parent, name, hash):
+        File.__init__(self, parent, name, 0120000, hash)
+
+    def readlink(self):
+        return self.read(1024)
+
+    def dereference(self):
+        global _symrefs
+        if _symrefs > 100:
+            raise TooManySymlinks('too many levels of symlinks: %r'
+                                  % self.fullname())
+        _symrefs += 1
+        try:
+            return self.parent.lresolve(self.readlink())
+        finally:
+            _symrefs -= 1
+
+    def _lresolve(self, parts):
+        return self.dereference()._lresolve(parts)
+    
+
+class FakeSymlink(Symlink):
+    def __init__(self, parent, name, toname):
+        Symlink.__init__(self, parent, name, EMPTY_SHA)
+        self.toname = toname
+        
+    def _content(self):
+        return self.toname
+    
+
+class Dir(Node):
+    def _mksubs(self):
+        self._subs = {}
+        it = cp().get(self.hash.encode('hex'))
+        type = it.next()
+        if type == 'commit':
+            del it
+            it = cp().get(self.hash.encode('hex') + ':')
+            type = it.next()
+        assert(type == 'tree')
+        for (mode,name,sha) in git._treeparse(''.join(it)):
+            mode = int(mode, 8)
+            if stat.S_ISDIR(mode):
+                self._subs[name] = Dir(self, name, mode, sha)
+            elif stat.S_ISLNK(mode):
+                self._subs[name] = Symlink(self, name, sha)
+            else:
+                self._subs[name] = File(self, name, mode, sha)
+                
+
+class CommitList(Node):
+    def __init__(self, parent, name, hash):
+        Node.__init__(self, parent, name, 040000, hash)
+        
+    def _mksubs(self):
+        self._subs = {}
+        revs = list(git.rev_list(self.hash.encode('hex')))
+        for (date, commit) in revs:
+            l = time.localtime(date)
+            ls = time.strftime('%Y-%m-%d-%H%M%S', l)
+            commithex = commit.encode('hex')
+            self._subs[commithex] = Dir(self, commithex, 040000, commit)
+            self._subs[ls] = FakeSymlink(self, ls, commit.encode('hex'))
+            latest = max(revs)
+        if latest:
+            (date, commit) = latest
+            self._subs['latest'] = FakeSymlink(self, 'latest',
+                                               commit.encode('hex'))
+
+    
+class RefList(Node):
+    def __init__(self, parent):
+        Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
+        
+    def _mksubs(self):
+        self._subs = {}
+        for (name,sha) in git.list_refs():
+            if name.startswith('refs/heads/'):
+                name = name[11:]
+                self._subs[name] = CommitList(self, name, sha)
+        
+
diff --git a/main.py b/main.py
new file mode 100755 (executable)
index 0000000..d5fab14
--- /dev/null
+++ b/main.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+import sys, os, subprocess
+
+argv = sys.argv
+exe = argv[0]
+exepath = os.path.split(exe)[0] or '.'
+
+# fix the PYTHONPATH to include our lib dir
+libpath = os.path.join(exepath, 'lib')
+sys.path[:0] = [libpath]
+os.environ['PYTHONPATH'] = libpath + ':' + os.environ.get('PYTHONPATH', '')
+
+from bup.helpers import *
+
+def usage():
+    log('Usage: bup <subcmd> <options...>\n\n')
+    log('Available subcommands:\n')
+    for c in sorted(os.listdir(exepath)):
+        if c.startswith('bup-') and c.find('.') < 0:
+            log('\t%s\n' % c[4:])
+    sys.exit(99)
+
+if len(argv) < 2 or not argv[1] or argv[1][0] == '-':
+    usage()
+
+subcmd = argv[1]
+if subcmd == 'help':
+    usage()
+
+def subpath(s):
+    return os.path.join(exepath, 'bup-%s' % s)
+
+if not os.path.exists(subpath(subcmd)):
+    log('error: unknown command "%s"\n' % subcmd)
+    usage()
+
+
+already_fixed = atoi(os.environ.get('BUP_FORCE_TTY'))
+if subcmd in ['ftp']:
+    already_fixed = True
+fix_stdout = not already_fixed and os.isatty(1)
+fix_stderr = not already_fixed and os.isatty(2)
+
+def force_tty():
+    if fix_stdout or fix_stderr:
+        os.environ['BUP_FORCE_TTY'] = '1'
+
+if fix_stdout or fix_stderr:
+    realf = fix_stderr and 2 or 1
+    n = subprocess.Popen([subpath('newliner')],
+                         stdin=subprocess.PIPE, stdout=os.dup(realf),
+                         close_fds=True, preexec_fn=force_tty)
+    outf = fix_stdout and n.stdin.fileno() or 1
+    errf = fix_stderr and n.stdin.fileno() or 2
+else:
+    n = None
+    outf = 1
+    errf = 2
+
+ret = 95
+try:
+    try:
+        p = subprocess.Popen([subpath(subcmd)] + argv[2:],
+                             stdout=outf, stderr=errf, preexec_fn=force_tty)
+        ret = p.wait()
+    except OSError, e:
+        log('%s: %s\n' % (subpath(subcmd), e))
+        ret = 98
+    except KeyboardInterrupt, e:
+        ret = 94
+finally:
+    if n:
+        n.stdin.close()
+        try:
+            n.wait()
+        except:
+            pass
+sys.exit(ret)
diff --git a/memtest.py b/memtest.py
deleted file mode 100755 (executable)
index 7595259..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-import sys, re, struct, mmap
-import git, options
-from helpers import *
-
-
-def s_from_bytes(bytes):
-    clist = [chr(b) for b in bytes]
-    return ''.join(clist)
-
-
-def report(count):
-    fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk']
-    d = {}
-    for line in open('/proc/self/status').readlines():
-        l = re.split(r':\s*', line.strip(), 1)
-        d[l[0]] = l[1]
-    if count >= 0:
-        e1 = count
-        fields = [d[k] for k in fields]
-    else:
-        e1 = ''
-    print ('%9s  ' + ('%10s ' * len(fields))) % tuple([e1] + fields)
-
-
-optspec = """
-memtest [-n elements] [-c cycles]
---
-n,number=  number of objects per cycle
-c,cycles=  number of cycles to run
-ignore-midx  ignore .midx files, use only .idx files
-"""
-o = options.Options(sys.argv[0], optspec)
-(opt, flags, extra) = o.parse(sys.argv[1:])
-
-if extra:
-    o.fatal('no arguments expected')
-
-git.ignore_midx = opt.ignore_midx
-
-git.check_repo_or_die()
-m = git.MultiPackIndex(git.repo('objects/pack'))
-
-cycles = opt.cycles or 100
-number = opt.number or 10000
-
-report(-1)
-f = open('/dev/urandom')
-a = mmap.mmap(-1, 20)
-report(0)
-for c in xrange(cycles):
-    for n in xrange(number):
-        b = f.read(3)
-        if 0:
-            bytes = list(struct.unpack('!BBB', b)) + [0]*17
-            bytes[2] &= 0xf0
-            bin = struct.pack('!20s', s_from_bytes(bytes))
-        else:
-            a[0:2] = b[0:2]
-            a[2] = chr(ord(b[2]) & 0xf0)
-            bin = str(a[0:20])
-        #print bin.encode('hex')
-        m.exists(bin)
-    report((c+1)*number)
diff --git a/options.py b/options.py
deleted file mode 100644 (file)
index 165016c..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-import textwrap, getopt, re
-from helpers import *
-
-class OptDict:
-    def __init__(self):
-        self._opts = {}
-
-    def __setitem__(self, k, v):
-        self._opts[k] = v
-        
-    def __getitem__(self, k):
-        return self._opts[k]
-
-    def __getattr__(self, k):
-        return self[k]
-
-
-class Options:
-    def __init__(self, exe, optspec):
-        self.exe = exe
-        self.optspec = optspec
-        self._aliases = {}
-        self._shortopts = 'h?'
-        self._longopts = ['help']
-        self._hasparms = {}
-        self._usagestr = self._gen_usage()
-        
-    def _gen_usage(self):
-        out = []
-        lines = self.optspec.strip().split('\n')
-        lines.reverse()
-        first_syn = True
-        while lines:
-            l = lines.pop()
-            if l == '--': break
-            out.append('%s: %s\n' % (first_syn and 'usage' or '   or', l))
-            first_syn = False
-        out.append('\n')
-        while lines:
-            l = lines.pop()
-            if l.startswith(' '):
-                out.append('\n%s\n' % l.lstrip())
-            elif l:
-                (flags, extra) = l.split(' ', 1)
-                extra = extra.strip()
-                if flags.endswith('='):
-                    flags = flags[:-1]
-                    has_parm = 1
-                else:
-                    has_parm = 0
-                flagl = flags.split(',')
-                flagl_nice = []
-                for f in flagl:
-                    f_nice = re.sub(r'\W', '_', f)
-                    self._aliases[f] = flagl[0]
-                    self._aliases[f_nice] = flagl[0]
-                    self._hasparms[f] = has_parm
-                    if len(f) == 1:
-                        self._shortopts += f + (has_parm and ':' or '')
-                        flagl_nice.append('-' + f)
-                    else:
-                        assert(not f.startswith('no-')) # supported implicitly
-                        self._longopts.append(f + (has_parm and '=' or ''))
-                        self._longopts.append('no-' + f)
-                        flagl_nice.append('--' + f)
-                flags_nice = ', '.join(flagl_nice)
-                if has_parm:
-                    flags_nice += ' ...'
-                prefix = '    %-20s  ' % flags_nice
-                argtext = '\n'.join(textwrap.wrap(extra, width=70,
-                                                initial_indent=prefix,
-                                                subsequent_indent=' '*28))
-                out.append(argtext + '\n')
-            else:
-                out.append('\n')
-        return ''.join(out).rstrip() + '\n'
-    
-    def usage(self):
-        log(self._usagestr)
-        sys.exit(97)
-
-    def fatal(self, s):
-        log('error: %s\n' % s)
-        return self.usage()
-        
-    def parse(self, args):
-        try:
-            (flags,extra) = getopt.gnu_getopt(args,
-                                              self._shortopts, self._longopts)
-        except getopt.GetoptError, e:
-            self.fatal(e)
-
-        opt = OptDict()
-        for f in self._aliases.values():
-            opt[f] = None
-        for (k,v) in flags:
-            while k.startswith('-'):
-                k = k[1:]
-            if k in ['h', '?', 'help']:
-                self.usage()
-            if k.startswith('no-'):
-                k = self._aliases[k[3:]]
-                opt[k] = None
-            else:
-                k = self._aliases[k]
-                if not self._hasparms[k]:
-                    assert(v == '')
-                    opt[k] = (opt._opts.get(k) or 0) + 1
-                else:
-                    try:
-                        vv = int(v)
-                        if str(vv) == v:
-                            v = vv
-                    except ValueError:
-                        pass
-                    opt[k] = v
-        for (f1,f2) in self._aliases.items():
-            opt[f1] = opt[f2]
-        return (opt,flags,extra)
diff --git a/shquote.py b/shquote.py
deleted file mode 100644 (file)
index dc339ec..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-import re
-
-q = "'"
-qq = '"'
-
-
-class QuoteError(Exception):
-    pass
-
-
-def _quotesplit(line):
-    inquote = None
-    inescape = None
-    wordstart = 0
-    word = ''
-    for i in range(len(line)):
-        c = line[i]
-        if inescape:
-            if inquote == q and c != q:
-                word += '\\'  # single-q backslashes can only quote single-q
-            word += c
-            inescape = False
-        elif c == '\\':
-            inescape = True
-        elif c == inquote:
-            inquote = None
-            # this is un-sh-like, but do it for sanity when autocompleting
-            yield (wordstart, word)
-            word = ''
-            wordstart = i+1
-        elif not inquote and not word and (c == q or c == qq):
-            # the 'not word' constraint on this is un-sh-like, but do it
-            # for sanity when autocompleting
-            inquote = c
-            wordstart = i
-        elif not inquote and c in [' ', '\n', '\r', '\t']:
-            if word:
-                yield (wordstart, word)
-            word = ''
-            wordstart = i+1
-        else:
-            word += c
-    if word:
-        yield (wordstart, word)
-    if inquote or inescape or word:
-        raise QuoteError()
-
-
-def quotesplit(line):
-    l = []
-    try:
-        for i in _quotesplit(line):
-            l.append(i)
-    except QuoteError:
-        pass
-    return l
-
-
-def unfinished_word(line):
-    try:
-        for (wordstart,word) in _quotesplit(line):
-            pass
-    except QuoteError:
-        firstchar = line[wordstart]
-        if firstchar in [q, qq]:
-            return (firstchar, word)
-        else:
-            return (None, word)
-    else:
-        return (None, '')
-
-
-def quotify(qtype, word, terminate):
-    if qtype == qq:
-        return qq + word.replace(qq, '\\"') + (terminate and qq or '')
-    elif qtype == q:
-        return q + word.replace(q, "\\'") + (terminate and q or '')
-    else:
-        return re.sub(r'([\"\' \t\n\r])', r'\\\1', word)
-
-
-def what_to_add(qtype, origword, newword, terminate):
-    if not newword.startswith(origword):
-        return ''
-    else:
-        qold = quotify(qtype, origword, terminate=False)
-        return quotify(qtype, newword, terminate=terminate)[len(qold):]
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..cb95e530a04c1aa60fd4696c6b5cc461dbf2529d 100644 (file)
@@ -0,0 +1,2 @@
+import sys
+sys.path[:0] = ['lib']
index 321a3430f1962d10424ef16e2e391d6479255d5b..f43ab53551ae5e083cf43b1cbc42f1b97a3e3139 100644 (file)
--- a/t/tgit.py
+++ b/t/tgit.py
@@ -1,6 +1,7 @@
-import git, time
+import time
+from bup import git
+from bup.helpers import *
 from wvtest import *
-from helpers import *
 
 
 @wvtest
index 5e59bcaeb8e352f0f98338626770d236fe78ceae..9f24962644fb25072c27879968f98e38ee62fba8 100644 (file)
@@ -1,4 +1,4 @@
-from helpers import *
+from bup.helpers import *
 from wvtest import *
 
 @wvtest
index e5c043b7633ff48fb9a54038264dc179e34b0eb2..330b44467d0b61d1961ccf0a39f10d670c961daa 100644 (file)
@@ -1,7 +1,7 @@
 import os
-import index
+from bup import index
+from bup.helpers import *
 from wvtest import *
-from helpers import *
 
 @wvtest
 def index_basic():
index 937cf707d5339adda34190df683a8716945cdd88..4596e8b245ce593791f99e0d6457ffcd6886027a 100644 (file)
@@ -1,4 +1,4 @@
-import options
+from bup import options
 from wvtest import *
 
 @wvtest
index 9f9c8cc845c76177bdf850b2efa789974f41bb52..15b06ec1a53992435cf91fb529d4f85c77367727 100644 (file)
@@ -1,5 +1,5 @@
+from bup import shquote
 from wvtest import *
-import shquote
 
 def qst(line):
     return [s[1] for s in shquote.quotesplit(line)]
diff --git a/vfs.py b/vfs.py
deleted file mode 100644 (file)
index a97d4f5..0000000
--- a/vfs.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import os, re, stat, time
-import git
-from helpers import *
-
-EMPTY_SHA='\0'*20
-
-_cp = None
-def cp():
-    global _cp
-    if not _cp:
-        _cp = git.CatPipe()
-    return _cp
-
-class NodeError(Exception):
-    pass
-class NoSuchFile(NodeError):
-    pass
-class NotDir(NodeError):
-    pass
-class NotFile(NodeError):
-    pass
-class TooManySymlinks(NodeError):
-    pass
-
-
-class FileReader:
-    def __init__(self, node):
-        self.n = node
-        self.ofs = 0
-        self.size = self.n.size()
-
-    def seek(self, ofs):
-        if ofs > self.size:
-            self.ofs = self.size
-        elif ofs < 0:
-            self.ofs = 0
-        else:
-            self.ofs = ofs
-
-    def tell(self):
-        return self.ofs
-
-    def read(self, count = -1):
-        if count < 0:
-            count = self.size - self.ofs
-        buf = self.n.readbytes(self.ofs, count)
-        self.ofs += len(buf)
-        return buf
-
-
-class Node:
-    def __init__(self, parent, name, mode, hash):
-        self.parent = parent
-        self.name = name
-        self.mode = mode
-        self.hash = hash
-        self._subs = None
-        
-    def __cmp__(a, b):
-        return cmp(a.name or None, b.name or None)
-    
-    def __iter__(self):
-        return iter(self.subs())
-    
-    def fullname(self):
-        if self.parent:
-            return os.path.join(self.parent.fullname(), self.name)
-        else:
-            return self.name
-    
-    def _mksubs(self):
-        self._subs = {}
-        
-    def subs(self):
-        if self._subs == None:
-            self._mksubs()
-        return sorted(self._subs.values())
-        
-    def sub(self, name):
-        if self._subs == None:
-            self._mksubs()
-        ret = self._subs.get(name)
-        if not ret:
-            raise NoSuchFile("no file %r in %r" % (name, self.name))
-        return ret
-
-    def top(self):
-        if self.parent:
-            return self.parent.top()
-        else:
-            return self
-
-    def _lresolve(self, parts):
-        #log('_lresolve %r in %r\n' % (parts, self.name))
-        if not parts:
-            return self
-        (first, rest) = (parts[0], parts[1:])
-        if first == '.':
-            return self._lresolve(rest)
-        elif first == '..':
-            if not self.parent:
-                raise NoSuchFile("no parent dir for %r" % self.name)
-            return self.parent._lresolve(rest)
-        elif rest:
-            return self.sub(first)._lresolve(rest)
-        else:
-            return self.sub(first)
-
-    def lresolve(self, path):
-        start = self
-        if path.startswith('/'):
-            start = self.top()
-            path = path[1:]
-        parts = re.split(r'/+', path or '.')
-        if not parts[-1]:
-            parts[-1] = '.'
-        #log('parts: %r %r\n' % (path, parts))
-        return start._lresolve(parts)
-
-    def resolve(self, path):
-        return self.lresolve(path).lresolve('')
-    
-    def nlinks(self):
-        if self._subs == None:
-            self._mksubs()
-        return 1
-
-    def size(self):
-        return 0
-
-    def open(self):
-        raise NotFile('%s is not a regular file' % self.name)
-    
-    def readbytes(self, ofs, count):
-        raise NotFile('%s is not a regular file' % self.name)
-    
-    def read(self, num = -1):
-        if num < 0:
-            num = self.size()
-        return self.readbytes(0, num)
-    
-    
-class File(Node):
-    def _content(self):
-        return cp().join(self.hash.encode('hex'))
-
-    def open(self):
-        return FileReader(self)
-    
-    def size(self):
-        # FIXME inefficient
-        return sum(len(blob) for blob in self._content())
-    
-    def readbytes(self, ofs, count):
-        # FIXME inefficient
-        buf = ''.join(self._content())
-        return buf[ofs:ofs+count]
-    
-
-_symrefs = 0
-class Symlink(File):
-    def __init__(self, parent, name, hash):
-        File.__init__(self, parent, name, 0120000, hash)
-
-    def readlink(self):
-        return self.read(1024)
-
-    def dereference(self):
-        global _symrefs
-        if _symrefs > 100:
-            raise TooManySymlinks('too many levels of symlinks: %r'
-                                  % self.fullname())
-        _symrefs += 1
-        try:
-            return self.parent.lresolve(self.readlink())
-        finally:
-            _symrefs -= 1
-
-    def _lresolve(self, parts):
-        return self.dereference()._lresolve(parts)
-    
-
-class FakeSymlink(Symlink):
-    def __init__(self, parent, name, toname):
-        Symlink.__init__(self, parent, name, EMPTY_SHA)
-        self.toname = toname
-        
-    def _content(self):
-        return self.toname
-    
-
-class Dir(Node):
-    def _mksubs(self):
-        self._subs = {}
-        it = cp().get(self.hash.encode('hex'))
-        type = it.next()
-        if type == 'commit':
-            del it
-            it = cp().get(self.hash.encode('hex') + ':')
-            type = it.next()
-        assert(type == 'tree')
-        for (mode,name,sha) in git._treeparse(''.join(it)):
-            mode = int(mode, 8)
-            if stat.S_ISDIR(mode):
-                self._subs[name] = Dir(self, name, mode, sha)
-            elif stat.S_ISLNK(mode):
-                self._subs[name] = Symlink(self, name, sha)
-            else:
-                self._subs[name] = File(self, name, mode, sha)
-                
-
-class CommitList(Node):
-    def __init__(self, parent, name, hash):
-        Node.__init__(self, parent, name, 040000, hash)
-        
-    def _mksubs(self):
-        self._subs = {}
-        revs = list(git.rev_list(self.hash.encode('hex')))
-        for (date, commit) in revs:
-            l = time.localtime(date)
-            ls = time.strftime('%Y-%m-%d-%H%M%S', l)
-            commithex = commit.encode('hex')
-            self._subs[commithex] = Dir(self, commithex, 040000, commit)
-            self._subs[ls] = FakeSymlink(self, ls, commit.encode('hex'))
-            latest = max(revs)
-        if latest:
-            (date, commit) = latest
-            self._subs['latest'] = FakeSymlink(self, 'latest',
-                                               commit.encode('hex'))
-
-    
-class RefList(Node):
-    def __init__(self, parent):
-        Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
-        
-    def _mksubs(self):
-        self._subs = {}
-        for (name,sha) in git.list_refs():
-            if name.startswith('refs/heads/'):
-                name = name[11:]
-                self._subs[name] = CommitList(self, name, sha)
-        
-