Move python library files to lib/bup/

author Avery Pennarun <apenwarr@gmail.com>

Sun, 28 Feb 2010 21:17:35 +0000 (16:17 -0500)

committer Avery Pennarun <apenwarr@gmail.com>

Sun, 28 Feb 2010 22:49:10 +0000 (17:49 -0500)
author Avery Pennarun <apenwarr@gmail.com>
Sun, 28 Feb 2010 21:17:35 +0000 (16:17 -0500)
committer Avery Pennarun <apenwarr@gmail.com>
Sun, 28 Feb 2010 22:49:10 +0000 (17:49 -0500)
diff --git a/Makefile b/Makefile

index eacedd047f83bdeaf0fb20816ad5f22f4c80f5ac..b47dad657bfb8581b2bb15485ba14bb5f85ebd8a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -21,8 +21,8 @@ default: all
  
  all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
         bup-midx bup-fuse bup-ls bup-damage bup-fsck bup-margin bup-drecurse \
-       bup-random bup-ftp bup-newliner \
-       bup memtest _hashsplit$(SOEXT) \
+       bup-random bup-ftp bup-newliner bup-memtest \
+       bup lib/bup/_hashsplit$(SOEXT) \
         Documentation/all
         
  %/all:
@@ -31,10 +31,10 @@ all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
  %/clean:
         $(MAKE) -C $* clean
  
-_hashsplit$(SOEXT): _hashsplit.c csetup.py
+lib/bup/_hashsplit$(SOEXT): lib/bup/_hashsplit.c lib/bup/csetup.py
         @rm -f $@
-       python csetup.py build
-       cp build/*/_hashsplit$(SOEXT) .
+       cd lib/bup && python csetup.py build
+       cp lib/bup/build/*/_hashsplit$(SOEXT) lib/bup/
         
  runtests: all runtests-python runtests-cmdline
  
@@ -53,7 +53,7 @@ test: all
  %: %.o
         $(CC) $(CFLAGS) (LDFLAGS) -o $@ $^ $(LIBS)
         
-bup: bup.py
+bup: main.py
         rm -f $@
         ln -s $< $@
         
@@ -73,7 +73,8 @@ bup-%: cmd-%.sh
         gcc -c -o $@ $< $(CPPFLAGS) $(CFLAGS)
  
  clean: Documentation/clean
-       rm -f *.o *.so *.dll *.exe *~ .*~ *.pyc */*.pyc */*~ \
+       rm -f *.o *.so */*/*.so *.dll *.exe .*~ *~ */*~ */*/*~ \
+               *.pyc */*.pyc */*/*.pyc\
                 bup bup-* randomgen memtest \
                 out[12] out2[tc] tags[12] tags2[tc]
-       rm -rf *.tmp build
+       rm -rf *.tmp build lib/bup/build
diff --git a/_hashsplit.c b/_hashsplit.c

deleted file mode 100644 (file)

index e78f597..0000000
--- a/_hashsplit.c
+++ /dev/null
@@ -1,145 +0,0 @@
-#include <Python.h>
-#include <assert.h>
-#include <stdint.h>
-
-#define BLOBBITS (13)
-#define BLOBSIZE (1<<BLOBBITS)
-#define WINDOWBITS (7)
-#define WINDOWSIZE (1<<(WINDOWBITS-1))
-
-
-// FIXME: replace this with a not-stupid rolling checksum algorithm,
-// such as the one used in rsync (Adler32?)
-static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
-{
-    return ((old<<1) | (old>>31)) ^ drop ^ add;
-}
-
-
-static int find_ofs(const unsigned char *buf, int len, int *bits)
-{
-    unsigned char window[WINDOWSIZE];
-    uint32_t sum = 0;
-    int i = 0, count;
-    memset(window, 0, sizeof(window));
-    
-    for (count = 0; count < len; count++)
-    {
-       sum = stupidsum_add(sum, window[i], buf[count]);
-       window[i] = buf[count];
-       i = (i + 1) % WINDOWSIZE;
-       if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
-       {
-           if (bits)
-           {
-               *bits = BLOBBITS;
-               sum >>= BLOBBITS;
-               for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
-                   ;
-           }
-           return count+1;
-       }
-    }
-    return 0;
-}
-
-
-static PyObject *blobbits(PyObject *self, PyObject *args)
-{
-    if (!PyArg_ParseTuple(args, ""))
-       return NULL;
-    return Py_BuildValue("i", BLOBBITS);
-}
-
-
-static PyObject *splitbuf(PyObject *self, PyObject *args)
-{
-    unsigned char *buf = NULL;
-    int len = 0, out = 0, bits = -1;
-
-    if (!PyArg_ParseTuple(args, "t#", &buf, &len))
-       return NULL;
-    out = find_ofs(buf, len, &bits);
-    return Py_BuildValue("ii", out, bits);
-}
-
-
-static PyObject *bitmatch(PyObject *self, PyObject *args)
-{
-    unsigned char *buf1 = NULL, *buf2 = NULL;
-    int len1 = 0, len2 = 0;
-    int byte, bit;
-
-    if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
-       return NULL;
-    
-    bit = 0;
-    for (byte = 0; byte < len1 && byte < len2; byte++)
-    {
-       int b1 = buf1[byte], b2 = buf2[byte];
-       if (b1 != b2)
-       {
-           for (bit = 0; bit < 8; bit++)
-               if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
-                   break;
-           break;
-       }
-    }
-    
-    return Py_BuildValue("i", byte*8 + bit);
-}
-
-
-// I would have made this a lower-level function that just fills in a buffer
-// with random values, and then written those values from python.  But that's
-// about 20% slower in my tests, and since we typically generate random
-// numbers for benchmarking other parts of bup, any slowness in generating
-// random bytes will make our benchmarks inaccurate.  Plus nobody wants
-// pseudorandom bytes much except for this anyway.
-static PyObject *write_random(PyObject *self, PyObject *args)
-{
-    uint32_t buf[1024/4];
-    int fd = -1, seed = 0;
-    ssize_t ret;
-    long long len = 0, kbytes = 0, written = 0;
-
-    if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
-       return NULL;
-    
-    srandom(seed);
-    
-    for (kbytes = len/1024; kbytes > 0; kbytes--)
-    {
-       int i;
-       for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
-           buf[i] = random();
-       ret = write(fd, buf, sizeof(buf));
-       if (ret < 0)
-           ret = 0;
-       written += ret;
-       if (ret < sizeof(buf))
-           break;
-       if (!(kbytes%1024))
-           fprintf(stderr, ".");
-    }
-    
-    return Py_BuildValue("L", written);
-}
-
-
-static PyMethodDef hashsplit_methods[] = {
-    { "blobbits", blobbits, METH_VARARGS,
-       "Return the number of bits in the rolling checksum." },
-    { "splitbuf", splitbuf, METH_VARARGS,
-       "Split a list of strings based on a rolling checksum." },
-    { "bitmatch", bitmatch, METH_VARARGS,
-       "Count the number of matching prefix bits between two strings." },
-    { "write_random", write_random, METH_VARARGS,
-       "Write random bytes to the given file descriptor" },
-    { NULL, NULL, 0, NULL },  // sentinel
-};
-
-PyMODINIT_FUNC init_hashsplit(void)
-{
-    Py_InitModule("_hashsplit", hashsplit_methods);
-}
diff --git a/bup.py b/bup.py

deleted file mode 100755 (executable)

index 56f3afe..0000000
--- a/bup.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-import sys, os, subprocess
-import git
-from helpers import *
-
-argv = sys.argv
-exe = argv[0]
-exepath = os.path.split(exe)[0] or '.'
-
-def usage():
-    log('Usage: bup <subcmd> <options...>\n\n')
-    log('Available subcommands:\n')
-    for c in sorted(os.listdir(exepath)):
-        if c.startswith('bup-') and c.find('.') < 0:
-            log('\t%s\n' % c[4:])
-    sys.exit(99)
-
-if len(argv) < 2 or not argv[1] or argv[1][0] == '-':
-    usage()
-
-subcmd = argv[1]
-if subcmd == 'help':
-    usage()
-
-def subpath(s):
-    return os.path.join(exepath, 'bup-%s' % s)
-
-if not os.path.exists(subpath(subcmd)):
-    log('error: unknown command "%s"\n' % subcmd)
-    usage()
-
-
-already_fixed = atoi(os.environ.get('BUP_FORCE_TTY'))
-if subcmd in ['ftp']:
-    already_fixed = True
-fix_stdout = not already_fixed and os.isatty(1)
-fix_stderr = not already_fixed and os.isatty(2)
-
-def force_tty():
-    if fix_stdout or fix_stderr:
-        os.environ['BUP_FORCE_TTY'] = '1'
-
-if fix_stdout or fix_stderr:
-    realf = fix_stderr and 2 or 1
-    n = subprocess.Popen([subpath('newliner')],
-                         stdin=subprocess.PIPE, stdout=os.dup(realf),
-                         close_fds=True, preexec_fn=force_tty)
-    outf = fix_stdout and n.stdin.fileno() or 1
-    errf = fix_stderr and n.stdin.fileno() or 2
-else:
-    n = None
-    outf = 1
-    errf = 2
-
-ret = 95
-try:
-    try:
-        p = subprocess.Popen([subpath(subcmd)] + argv[2:],
-                             stdout=outf, stderr=errf, preexec_fn=force_tty)
-        ret = p.wait()
-    except OSError, e:
-        log('%s: %s\n' % (subpath(subcmd), e))
-        ret = 98
-    except KeyboardInterrupt, e:
-        ret = 94
-finally:
-    if n:
-        n.stdin.close()
-        try:
-            n.wait()
-        except:
-            pass
-sys.exit(ret)
diff --git a/client.py b/client.py

deleted file mode 100644 (file)

index a9e9c95..0000000
--- a/client.py
+++ /dev/null
@@ -1,258 +0,0 @@
-import re, struct, errno, select
-import git
-from helpers import *
-from subprocess import Popen, PIPE
-
-
-class ClientError(Exception):
-    pass
-
-
-class Client:
-    def __init__(self, remote, create=False):
-        self._busy = None
-        self.p = None
-        self.conn = None
-        rs = remote.split(':', 1)
-        nicedir = os.path.split(os.path.abspath(sys.argv[0]))[0]
-        nicedir = re.sub(r':', "_", nicedir)
-        if len(rs) == 1:
-            (host, dir) = ('NONE', remote)
-            def fixenv():
-                os.environ['PATH'] = ':'.join([nicedir,
-                                               os.environ.get('PATH', '')])
-            argv = ['bup', 'server']
-        else:
-            (host, dir) = rs
-            fixenv = None
-            # WARNING: shell quoting security holes are possible here, so we
-            # have to be super careful.  We have to use 'sh -c' because
-            # csh-derived shells can't handle PATH= notation.  We can't
-            # set PATH in advance, because ssh probably replaces it.  We
-            # can't exec *safely* using argv, because *both* ssh and 'sh -c'
-            # allow shellquoting.  So we end up having to double-shellquote
-            # stuff here.
-            escapedir = re.sub(r'([^\w/])', r'\\\\\\\1', nicedir)
-            cmd = r"""
-                       sh -c PATH=%s:'$PATH bup server'
-                   """ % escapedir
-            argv = ['ssh', host, '--', cmd.strip()]
-            #log('argv is: %r\n' % argv)
-        (self.host, self.dir) = (host, dir)
-        self.cachedir = git.repo('index-cache/%s'
-                                 % re.sub(r'[^@\w]', '_', 
-                                          "%s:%s" % (host, dir)))
-        try:
-            self.p = p = Popen(argv, stdin=PIPE, stdout=PIPE, preexec_fn=fixenv)
-        except OSError, e:
-            raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
-        self.conn = conn = Conn(p.stdout, p.stdin)
-        if dir:
-            dir = re.sub(r'[\r\n]', ' ', dir)
-            if create:
-                conn.write('init-dir %s\n' % dir)
-            else:
-                conn.write('set-dir %s\n' % dir)
-            self.check_ok()
-        self.sync_indexes_del()
-
-    def __del__(self):
-        try:
-            self.close()
-        except IOError, e:
-            if e.errno == errno.EPIPE:
-                pass
-            else:
-                raise
-
-    def close(self):
-        if self.conn and not self._busy:
-            self.conn.write('quit\n')
-        if self.p:
-            self.p.stdin.close()
-            while self.p.stdout.read(65536):
-                pass
-            self.p.stdout.close()
-            self.p.wait()
-            rv = self.p.wait()
-            if rv:
-                raise ClientError('server tunnel returned exit code %d' % rv)
-        self.conn = None
-        self.p = None
-
-    def check_ok(self):
-        rv = self.p.poll()
-        if rv != None:
-            raise ClientError('server exited unexpectedly with code %r' % rv)
-        try:
-            return self.conn.check_ok()
-        except Exception, e:
-            raise ClientError, e, sys.exc_info()[2]
-
-    def check_busy(self):
-        if self._busy:
-            raise ClientError('already busy with command %r' % self._busy)
-        
-    def _not_busy(self):
-        self._busy = None
-
-    def sync_indexes_del(self):
-        self.check_busy()
-        conn = self.conn
-        conn.write('list-indexes\n')
-        packdir = git.repo('objects/pack')
-        all = {}
-        needed = {}
-        for line in linereader(conn):
-            if not line:
-                break
-            all[line] = 1
-            assert(line.find('/') < 0)
-            if not os.path.exists(os.path.join(self.cachedir, line)):
-                needed[line] = 1
-        self.check_ok()
-
-        mkdirp(self.cachedir)
-        for f in os.listdir(self.cachedir):
-            if f.endswith('.idx') and not f in all:
-                log('pruning old index: %r\n' % f)
-                os.unlink(os.path.join(self.cachedir, f))
-
-    def sync_index(self, name):
-        #log('requesting %r\n' % name)
-        mkdirp(self.cachedir)
-        self.conn.write('send-index %s\n' % name)
-        n = struct.unpack('!I', self.conn.read(4))[0]
-        assert(n)
-        fn = os.path.join(self.cachedir, name)
-        f = open(fn + '.tmp', 'w')
-        count = 0
-        progress('Receiving index: %d/%d\r' % (count, n))
-        for b in chunkyreader(self.conn, n):
-            f.write(b)
-            count += len(b)
-            progress('Receiving index: %d/%d\r' % (count, n))
-        progress('Receiving index: %d/%d, done.\n' % (count, n))
-        self.check_ok()
-        f.close()
-        os.rename(fn + '.tmp', fn)
-
-    def _make_objcache(self):
-        ob = self._busy
-        self._busy = None
-        #self.sync_indexes()
-        self._busy = ob
-        return git.MultiPackIndex(self.cachedir)
-
-    def _suggest_pack(self, indexname):
-        log('received index suggestion: %s\n' % indexname)
-        ob = self._busy
-        if ob:
-            assert(ob == 'receive-objects')
-            self._busy = None
-            self.conn.write('\xff\xff\xff\xff')  # suspend receive-objects
-            self.conn.drain_and_check_ok()
-        self.sync_index(indexname)
-        if ob:
-            self.conn.write('receive-objects\n')
-            self._busy = ob
-
-    def new_packwriter(self):
-        self.check_busy()
-        self._busy = 'receive-objects'
-        return PackWriter_Remote(self.conn,
-                                 objcache_maker = self._make_objcache,
-                                 suggest_pack = self._suggest_pack,
-                                 onclose = self._not_busy)
-
-    def read_ref(self, refname):
-        self.check_busy()
-        self.conn.write('read-ref %s\n' % refname)
-        r = self.conn.readline().strip()
-        self.check_ok()
-        if r:
-            assert(len(r) == 40)   # hexified sha
-            return r.decode('hex')
-        else:
-            return None   # nonexistent ref
-
-    def update_ref(self, refname, newval, oldval):
-        self.check_busy()
-        self.conn.write('update-ref %s\n%s\n%s\n' 
-                        % (refname, newval.encode('hex'),
-                           (oldval or '').encode('hex')))
-        self.check_ok()
-
-    def cat(self, id):
-        self.check_busy()
-        self._busy = 'cat'
-        self.conn.write('cat %s\n' % re.sub(r'[\n\r]', '_', id))
-        while 1:
-            sz = struct.unpack('!I', self.conn.read(4))[0]
-            if not sz: break
-            yield self.conn.read(sz)
-        e = self.check_ok()
-        self._not_busy()
-        if e:
-            raise KeyError(str(e))
-
-
-class PackWriter_Remote(git.PackWriter):
-    def __init__(self, conn, objcache_maker, suggest_pack, onclose):
-        git.PackWriter.__init__(self, objcache_maker)
-        self.file = conn
-        self.filename = 'remote socket'
-        self.suggest_pack = suggest_pack
-        self.onclose = onclose
-        self._packopen = False
-
-    def _open(self):
-        if not self._packopen:
-            self._make_objcache()
-            self.file.write('receive-objects\n')
-            self._packopen = True
-
-    def _end(self):
-        if self._packopen and self.file:
-            self.file.write('\0\0\0\0')
-            self._packopen = False
-            while True:
-                line = self.file.readline().strip()
-                if line.startswith('index '):
-                    pass
-                else:
-                    break
-            id = line
-            self.file.check_ok()
-            self.objcache = None
-            if self.onclose:
-                self.onclose()
-            if self.suggest_pack:
-                self.suggest_pack(id)
-            return id
-
-    def close(self):
-        id = self._end()
-        self.file = None
-        return id
-
-    def abort(self):
-        raise GitError("don't know how to abort remote pack writing")
-
-    def _raw_write(self, datalist):
-        assert(self.file)
-        if not self._packopen:
-            self._open()
-        data = ''.join(datalist)
-        assert(len(data))
-        self.file.write(struct.pack('!I', len(data)) + data)
-        self.outbytes += len(data)
-        self.count += 1
-
-        if self.file.has_input():
-            line = self.file.readline().strip()
-            assert(line.startswith('index '))
-            idxname = line[6:]
-            if self.suggest_pack:
-                self.suggest_pack(idxname)
-                self.objcache.refresh()
diff --git a/cmd-damage.py b/cmd-damage.py

index 685a109ea9bd255f0dd34a0814301d8acb61ee63..6f630fd130ca3e4f9318b7e4ad90b0e61004c430 100755 (executable)
--- a/cmd-damage.py
+++ b/cmd-damage.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, os, random
-import options
-from helpers import *
+from bup import options
+from bup.helpers import *
  
  
  def randblock(n):
diff --git a/cmd-drecurse.py b/cmd-drecurse.py

index c2b70794bf6e80349026f69bc00928aaba6535a1..99780af7a33bbaff35e96c6d818a89d16f95b8a3 100755 (executable)
--- a/cmd-drecurse.py
+++ b/cmd-drecurse.py
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
-import options, drecurse
-from helpers import *
+from bup import options, drecurse
+from bup.helpers import *
  
  optspec = """
  bup drecurse <path>
diff --git a/cmd-fsck.py b/cmd-fsck.py

index 30f9b07c770dfa790a0b2f27574cc8edaad14de5..36c8a350a0c524c1f94356c6f549b6c1dd16056a 100755 (executable)
--- a/cmd-fsck.py
+++ b/cmd-fsck.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, os, glob, subprocess, time, sha
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
  
  par2_ok = 0
  nullf = open('/dev/null')
diff --git a/cmd-ftp.py b/cmd-ftp.py

index 9594b39e3f4769a89466df7bd22b434c6e7697de..b59cf44318b78bd0389bdea78e72cac2219197ae 100755 (executable)
--- a/cmd-ftp.py
+++ b/cmd-ftp.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, os, re, stat, readline, fnmatch
-import options, git, shquote, vfs
-from helpers import *
+from bup import options, git, shquote, vfs
+from bup.helpers import *
  
  def print_node(text, n):
      if stat.S_ISDIR(n.mode):
diff --git a/cmd-fuse.py b/cmd-fuse.py

index 0b0bab64ce286dce8125116b96c3e402bbd3ed64..ffcd036ffffd275ebfd73aad7b30f8d448d6cf39 100755 (executable)
--- a/cmd-fuse.py
+++ b/cmd-fuse.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, os, stat, errno, fuse, re, time, tempfile
-import options, git, vfs
-from helpers import *
+from bup import options, git, vfs
+from bup.helpers import *
  
  
  class Stat(fuse.Stat):
diff --git a/cmd-index.py b/cmd-index.py

index fe1acb1d20b6b02f3332cb9c089e40bee2ef401d..683989007988e5a38a48a27256dd33a8efd106b9 100755 (executable)
--- a/cmd-index.py
+++ b/cmd-index.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import os, sys, stat, time
-import options, git, index, drecurse
-from helpers import *
+from bup import options, git, index, drecurse
+from bup.helpers import *
  
  
  def merge_indexes(out, r1, r2):
diff --git a/cmd-init.py b/cmd-init.py

index 8dca178bd7d30475fd267a90a57c564811b97162..04f32484d6bd78a6ad2c37022e5ac8af6cedd6a8 100755 (executable)
--- a/cmd-init.py
+++ b/cmd-init.py
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
-import git, options, client
-from helpers import *
+from bup import git, options, client
+from bup.helpers import *
  
  optspec = """
  [BUP_DIR=...] bup init [-r host:path]
diff --git a/cmd-join.py b/cmd-join.py

index 26f390c43ce3efa581d38f39b96782695f591b64..0e1e1ffbc8108b3404d0dd3fdc3379b0d8dac72f 100755 (executable)
--- a/cmd-join.py
+++ b/cmd-join.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, time, struct
-import hashsplit, git, options, client
-from helpers import *
+from bup import hashsplit, git, options, client
+from bup.helpers import *
  from subprocess import PIPE
  
  
diff --git a/cmd-ls.py b/cmd-ls.py

index 2ce6bb039b5f1dfb8b87728d5bb413aa879c2cef..5a22c1daafa078234c843e94344114a33a27dce5 100755 (executable)
--- a/cmd-ls.py
+++ b/cmd-ls.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, os, stat
-import options, git, vfs
-from helpers import *
+from bup import options, git, vfs
+from bup.helpers import *
  
  def print_node(text, n):
      prefix = ''
diff --git a/cmd-margin.py b/cmd-margin.py

index 05f7d2b68e30bcce97b5a194645350b026672947..85b32890b493ea4df2ea4cd8b0f661d8f2253c4e 100755 (executable)
--- a/cmd-margin.py
+++ b/cmd-margin.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys
-import options, git, _hashsplit
-from helpers import *
+from bup import options, git, _hashsplit
+from bup.helpers import *
  
  
  optspec = """
diff --git a/cmd-memtest.py b/cmd-memtest.py

new file mode 100755 (executable)

index 0000000..cf106e4
--- /dev/null
+++ b/cmd-memtest.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+import sys, re, struct, mmap
+from bup import git, options
+from bup.helpers import *
+
+
+def s_from_bytes(bytes):
+    clist = [chr(b) for b in bytes]
+    return ''.join(clist)
+
+
+def report(count):
+    fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk']
+    d = {}
+    for line in open('/proc/self/status').readlines():
+        l = re.split(r':\s*', line.strip(), 1)
+        d[l[0]] = l[1]
+    if count >= 0:
+        e1 = count
+        fields = [d[k] for k in fields]
+    else:
+        e1 = ''
+    print ('%9s  ' + ('%10s ' * len(fields))) % tuple([e1] + fields)
+    sys.stdout.flush()
+
+
+optspec = """
+memtest [-n elements] [-c cycles]
+--
+n,number=  number of objects per cycle
+c,cycles=  number of cycles to run
+ignore-midx  ignore .midx files, use only .idx files
+"""
+o = options.Options(sys.argv[0], optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+if extra:
+    o.fatal('no arguments expected')
+
+git.ignore_midx = opt.ignore_midx
+
+git.check_repo_or_die()
+m = git.MultiPackIndex(git.repo('objects/pack'))
+
+cycles = opt.cycles or 100
+number = opt.number or 10000
+
+report(-1)
+f = open('/dev/urandom')
+a = mmap.mmap(-1, 20)
+report(0)
+for c in xrange(cycles):
+    for n in xrange(number):
+        b = f.read(3)
+        if 0:
+            bytes = list(struct.unpack('!BBB', b)) + [0]*17
+            bytes[2] &= 0xf0
+            bin = struct.pack('!20s', s_from_bytes(bytes))
+        else:
+            a[0:2] = b[0:2]
+            a[2] = chr(ord(b[2]) & 0xf0)
+            bin = str(a[0:20])
+        #print bin.encode('hex')
+        m.exists(bin)
+    report((c+1)*number)
diff --git a/cmd-midx.py b/cmd-midx.py

index 36907ebaa0ee7569e68a652f221a12119eee5559..871111196c9ecabba994106da2e76a705c8749e9 100755 (executable)
--- a/cmd-midx.py
+++ b/cmd-midx.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, math, struct, glob, sha
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
  
  PAGE_SIZE=4096
  SHA_PER_PAGE=PAGE_SIZE/200.
diff --git a/cmd-newliner.py b/cmd-newliner.py

index 0b665aa2a79127487523917ab9dadceb963c373c..6c4e60f336e7c0ce76f47f8a0334a5daec48b517 100755 (executable)
--- a/cmd-newliner.py
+++ b/cmd-newliner.py
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
  import sys, os, re
-import options
+from bup import options
  
  optspec = """
  bup newliner
diff --git a/cmd-random.py b/cmd-random.py

index 518cea8bb5ba7247c2fbeb72599a7b28df41c67e..91820a858441384c93d1651322658bbf673295e1 100755 (executable)
--- a/cmd-random.py
+++ b/cmd-random.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, mmap
-import options, _hashsplit
-from helpers import *
+from bup import options, _hashsplit
+from bup.helpers import *
  
  optspec = """
  bup random [-S seed] <numbytes>
diff --git a/cmd-save.py b/cmd-save.py

index e2b1f47358779c1ab426c735279c1ad769f1cacc..0350fff2ffdf45f2b4a7daeee6bb22d38339f8f8 100755 (executable)
--- a/cmd-save.py
+++ b/cmd-save.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, re, errno, stat, time, math
-import hashsplit, git, options, index, client
-from helpers import *
+from bup import hashsplit, git, options, index, client
+from bup.helpers import *
  
  
  optspec = """
diff --git a/cmd-server.py b/cmd-server.py

index 92aa8c1dfadb479f1bef507b36cd23559d8244f1..59459d1fa3f2215b45ba520996659191fe662f6a 100755 (executable)
--- a/cmd-server.py
+++ b/cmd-server.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, struct, mmap
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
  
  suspended_w = None
  
diff --git a/cmd-split.py b/cmd-split.py

index ae7b3806f42171bb78379462006f60356c1fc63d..0f8408c7071077b0fccbdaef7757509dbbc92378 100755 (executable)
--- a/cmd-split.py
+++ b/cmd-split.py
@@ -1,7 +1,7 @@
  #!/usr/bin/env python
  import sys, time, struct
-import hashsplit, git, options, client
-from helpers import *
+from bup import hashsplit, git, options, client
+from bup.helpers import *
  from subprocess import PIPE
  
  
diff --git a/cmd-tick.py b/cmd-tick.py

index 12bd97eb077056c3f1c8630d9247a50b729aed6b..8375dee2b82e7ffed26ea18e39d4eed227780de0 100755 (executable)
--- a/cmd-tick.py
+++ b/cmd-tick.py
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
  import sys, time
-import options
+from bup import options
  
  optspec = """
  bup tick
diff --git a/csetup.py b/csetup.py

deleted file mode 100644 (file)

index b58932c..0000000
--- a/csetup.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from distutils.core import setup, Extension
-
-_hashsplit_mod = Extension('_hashsplit', sources=['_hashsplit.c'])
-
-setup(name='_hashsplit',
-      version='0.1',
-      description='hashsplit helper library for bup',
-      ext_modules=[_hashsplit_mod])
diff --git a/drecurse.py b/drecurse.py

deleted file mode 100644 (file)

index 33bcca4..0000000
--- a/drecurse.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import stat, heapq
-from helpers import *
-
-try:
-    O_LARGEFILE = os.O_LARGEFILE
-except AttributeError:
-    O_LARGEFILE = 0
-
-
-# the use of fchdir() and lstat() is for two reasons:
-#  - help out the kernel by not making it repeatedly look up the absolute path
-#  - avoid race conditions caused by doing listdir() on a changing symlink
-class OsFile:
-    def __init__(self, path):
-        self.fd = None
-        self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
-        
-    def __del__(self):
-        if self.fd:
-            fd = self.fd
-            self.fd = None
-            os.close(fd)
-
-    def fchdir(self):
-        os.fchdir(self.fd)
-
-    def stat(self):
-        return os.fstat(self.fd)
-
-
-_IFMT = stat.S_IFMT(0xffffffff)  # avoid function call in inner loop
-def _dirlist():
-    l = []
-    for n in os.listdir('.'):
-        try:
-            st = os.lstat(n)
-        except OSError, e:
-            add_error(Exception('%s: %s' % (realpath(n), str(e))))
-            continue
-        if (st.st_mode & _IFMT) == stat.S_IFDIR:
-            n += '/'
-        l.append((n,st))
-    l.sort(reverse=True)
-    return l
-
-
-def _recursive_dirlist(prepend, xdev):
-    for (name,pst) in _dirlist():
-        if name.endswith('/'):
-            if xdev != None and pst.st_dev != xdev:
-                log('Skipping %r: different filesystem.\n' % (prepend+name))
-                continue
-            try:
-                OsFile(name).fchdir()
-            except OSError, e:
-                add_error('%s: %s' % (prepend, e))
-            else:
-                for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev):
-                    yield i
-                os.chdir('..')
-        yield (prepend + name, pst)
-
-
-def recursive_dirlist(paths, xdev):
-    startdir = OsFile('.')
-    try:
-        assert(type(paths) != type(''))
-        for path in paths:
-            try:
-                pst = os.lstat(path)
-                if stat.S_ISLNK(pst.st_mode):
-                    yield (path, pst)
-                    continue
-            except OSError, e:
-                add_error(e)
-                continue
-            try:
-                pfile = OsFile(path)
-            except OSError, e:
-                add_error(e)
-                continue
-            pst = pfile.stat()
-            if xdev:
-                xdev = pst.st_dev
-            else:
-                xdev = None
-            if stat.S_ISDIR(pst.st_mode):
-                pfile.fchdir()
-                prepend = os.path.join(path, '')
-                for i in _recursive_dirlist(prepend=prepend, xdev=xdev):
-                    yield i
-                startdir.fchdir()
-            else:
-                prepend = path
-            yield (prepend,pst)
-    except:
-        try:
-            startdir.fchdir()
-        except:
-            pass
-        raise
diff --git a/git.py b/git.py

deleted file mode 100644 (file)

index c5381f0..0000000
--- a/git.py
+++ /dev/null
@@ -1,696 +0,0 @@
-import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
-import heapq
-from helpers import *
-
-verbose = 0
-ignore_midx = 0
-home_repodir = os.path.expanduser('~/.bup')
-repodir = None
-
-_typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
-_typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
-
-
-class GitError(Exception):
-    pass
-
-
-def repo(sub = ''):
-    global repodir
-    if not repodir:
-        raise GitError('You should call check_repo_or_die()')
-    gd = os.path.join(repodir, '.git')
-    if os.path.exists(gd):
-        repodir = gd
-    return os.path.join(repodir, sub)
-
-
-def _encode_packobj(type, content):
-    szout = ''
-    sz = len(content)
-    szbits = (sz & 0x0f) | (_typemap[type]<<4)
-    sz >>= 4
-    while 1:
-        if sz: szbits |= 0x80
-        szout += chr(szbits)
-        if not sz:
-            break
-        szbits = sz & 0x7f
-        sz >>= 7
-    z = zlib.compressobj(1)
-    yield szout
-    yield z.compress(content)
-    yield z.flush()
-
-
-def _encode_looseobj(type, content):
-    z = zlib.compressobj(1)
-    yield z.compress('%s %d\0' % (type, len(content)))
-    yield z.compress(content)
-    yield z.flush()
-
-
-def _decode_looseobj(buf):
-    assert(buf);
-    s = zlib.decompress(buf)
-    i = s.find('\0')
-    assert(i > 0)
-    l = s[:i].split(' ')
-    type = l[0]
-    sz = int(l[1])
-    content = s[i+1:]
-    assert(type in _typemap)
-    assert(sz == len(content))
-    return (type, content)
-
-
-def _decode_packobj(buf):
-    assert(buf)
-    c = ord(buf[0])
-    type = _typermap[(c & 0x70) >> 4]
-    sz = c & 0x0f
-    shift = 4
-    i = 0
-    while c & 0x80:
-        i += 1
-        c = ord(buf[i])
-        sz |= (c & 0x7f) << shift
-        shift += 7
-        if not (c & 0x80):
-            break
-    return (type, zlib.decompress(buf[i+1:]))
-
-
-class PackIndex:
-    def __init__(self, filename):
-        self.name = filename
-        self.map = mmap_read(open(filename))
-        assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
-        self.fanout = list(struct.unpack('!256I',
-                                         str(buffer(self.map, 8, 256*4))))
-        self.fanout.append(0)  # entry "-1"
-        nsha = self.fanout[255]
-        self.ofstable = buffer(self.map,
-                               8 + 256*4 + nsha*20 + nsha*4,
-                               nsha*4)
-        self.ofs64table = buffer(self.map,
-                                 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
-
-    def _ofs_from_idx(self, idx):
-        ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
-        if ofs & 0x80000000:
-            idx64 = ofs & 0x7fffffff
-            ofs = struct.unpack('!I',
-                                str(buffer(self.ofs64table, idx64*8, 8)))[0]
-        return ofs
-
-    def _idx_from_hash(self, hash):
-        assert(len(hash) == 20)
-        b1 = ord(hash[0])
-        start = self.fanout[b1-1] # range -1..254
-        end = self.fanout[b1] # range 0..255
-        buf = buffer(self.map, 8 + 256*4, end*20)
-        want = str(hash)
-        while start < end:
-            mid = start + (end-start)/2
-            v = str(buf[mid*20:(mid+1)*20])
-            if v < want:
-                start = mid+1
-            elif v > want:
-                end = mid
-            else: # got it!
-                return mid
-        return None
-        
-    def find_offset(self, hash):
-        idx = self._idx_from_hash(hash)
-        if idx != None:
-            return self._ofs_from_idx(idx)
-        return None
-
-    def exists(self, hash):
-        return hash and (self._idx_from_hash(hash) != None) and True or None
-
-    def __iter__(self):
-        for i in xrange(self.fanout[255]):
-            yield buffer(self.map, 8 + 256*4 + 20*i, 20)
-
-    def __len__(self):
-        return int(self.fanout[255])
-
-
-def extract_bits(buf, bits):
-    mask = (1<<bits) - 1
-    v = struct.unpack('!I', buf[0:4])[0]
-    v = (v >> (32-bits)) & mask
-    return v
-
-
-class PackMidx:
-    def __init__(self, filename):
-        self.name = filename
-        assert(filename.endswith('.midx'))
-        self.map = mmap_read(open(filename))
-        if str(self.map[0:8]) == 'MIDX\0\0\0\1':
-            log('Warning: ignoring old-style midx %r\n' % filename)
-            self.bits = 0
-            self.entries = 1
-            self.fanout = buffer('\0\0\0\0')
-            self.shalist = buffer('\0'*20)
-            self.idxnames = []
-        else:
-            assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
-            self.bits = struct.unpack('!I', self.map[8:12])[0]
-            self.entries = 2**self.bits
-            self.fanout = buffer(self.map, 12, self.entries*4)
-            shaofs = 12 + self.entries*4
-            nsha = self._fanget(self.entries-1)
-            self.shalist = buffer(self.map, shaofs, nsha*20)
-            self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
-
-    def _fanget(self, i):
-        start = i*4
-        s = self.fanout[start:start+4]
-        return struct.unpack('!I', s)[0]
-    
-    def exists(self, hash):
-        want = str(hash)
-        el = extract_bits(want, self.bits)
-        if el:
-            start = self._fanget(el-1)
-        else:
-            start = 0
-        end = self._fanget(el)
-        while start < end:
-            mid = start + (end-start)/2
-            v = str(self.shalist[mid*20:(mid+1)*20])
-            if v < want:
-                start = mid+1
-            elif v > want:
-                end = mid
-            else: # got it!
-                return True
-        return None
-    
-    def __iter__(self):
-        for i in xrange(self._fanget(self.entries-1)):
-            yield buffer(self.shalist, i*20, 20)
-    
-    def __len__(self):
-        return int(self._fanget(self.entries-1))
-
-
-_mpi_count = 0
-class MultiPackIndex:
-    def __init__(self, dir):
-        global _mpi_count
-        assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
-        _mpi_count += 1
-        self.dir = dir
-        self.also = {}
-        self.packs = []
-        self.refresh()
-
-    def __del__(self):
-        global _mpi_count
-        _mpi_count -= 1
-        assert(_mpi_count == 0)
-
-    def __iter__(self):
-        return iter(idxmerge(self.packs))
-
-    def exists(self, hash):
-        if hash in self.also:
-            return True
-        for i in range(len(self.packs)):
-            p = self.packs[i]
-            if p.exists(hash):
-                # reorder so most recently used packs are searched first
-                self.packs = [p] + self.packs[:i] + self.packs[i+1:]
-                return p.name
-        return None
-
-    def refresh(self, skip_midx = False, forget_packs = False):
-        if forget_packs:
-            self.packs = []
-        skip_midx = skip_midx or ignore_midx
-        d = dict((p.name, 1) for p in self.packs)
-        if os.path.exists(self.dir):
-            if not skip_midx:
-                midxl = []
-                for f in os.listdir(self.dir):
-                    full = os.path.join(self.dir, f)
-                    if f.endswith('.midx') and not d.get(full):
-                        mx = PackMidx(full)
-                        (mxd, mxf) = os.path.split(mx.name)
-                        broken = 0
-                        for n in mx.idxnames:
-                            if not os.path.exists(os.path.join(mxd, n)):
-                                log(('warning: index %s missing\n' +
-                                    '  used by %s\n') % (n, mxf))
-                                broken += 1
-                        if not broken:
-                            midxl.append(mx)
-                midxl.sort(lambda x,y: -cmp(len(x),len(y)))
-                for ix in midxl:
-                    any = 0
-                    for sub in ix.idxnames:
-                        if not d.get(os.path.join(self.dir, sub)):
-                            self.packs.append(ix)
-                            d[ix.name] = 1
-                            for name in ix.idxnames:
-                                d[os.path.join(self.dir, name)] = 1
-                            any += 1
-                            break
-                    if not any:
-                        log('midx: removing redundant: %s\n' 
-                            % os.path.basename(ix.name))
-                        unlink(ix.name)
-            for f in os.listdir(self.dir):
-                full = os.path.join(self.dir, f)
-                if f.endswith('.idx') and not d.get(full):
-                    self.packs.append(PackIndex(full))
-                    d[full] = 1
-        log('MultiPackIndex: using %d index%s.\n' 
-            % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
-
-    def add(self, hash):
-        self.also[hash] = 1
-
-    def zap_also(self):
-        self.also = {}
-
-
-def calc_hash(type, content):
-    header = '%s %d\0' % (type, len(content))
-    sum = sha.sha(header)
-    sum.update(content)
-    return sum.digest()
-
-
-def _shalist_sort_key(ent):
-    (mode, name, id) = ent
-    if stat.S_ISDIR(int(mode, 8)):
-        return name + '/'
-    else:
-        return name
-
-
-def idxmerge(idxlist):
-    total = sum(len(i) for i in idxlist)
-    iters = (iter(i) for i in idxlist)
-    heap = [(next(it), it) for it in iters]
-    heapq.heapify(heap)
-    count = 0
-    last = None
-    while heap:
-        if (count % 10024) == 0:
-            progress('Reading indexes: %.2f%% (%d/%d)\r'
-                     % (count*100.0/total, count, total))
-        (e, it) = heap[0]
-        if e != last:
-            yield e
-            last = e
-        count += 1
-        e = next(it)
-        if e:
-            heapq.heapreplace(heap, (e, it))
-        else:
-            heapq.heappop(heap)
-    log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
-
-    
-class PackWriter:
-    def __init__(self, objcache_maker=None):
-        self.count = 0
-        self.outbytes = 0
-        self.filename = None
-        self.file = None
-        self.objcache_maker = objcache_maker
-        self.objcache = None
-
-    def __del__(self):
-        self.close()
-
-    def _make_objcache(self):
-        if not self.objcache:
-            if self.objcache_maker:
-                self.objcache = self.objcache_maker()
-            else:
-                self.objcache = MultiPackIndex(repo('objects/pack'))
-
-    def _open(self):
-        if not self.file:
-            self._make_objcache()
-            (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
-            self.file = os.fdopen(fd, 'w+b')
-            assert(name.endswith('.pack'))
-            self.filename = name[:-5]
-            self.file.write('PACK\0\0\0\2\0\0\0\0')
-
-    def _raw_write(self, datalist):
-        self._open()
-        f = self.file
-        for d in datalist:
-            f.write(d)
-            self.outbytes += len(d)
-        self.count += 1
-
-    def _write(self, bin, type, content):
-        if verbose:
-            log('>')
-        self._raw_write(_encode_packobj(type, content))
-        return bin
-
-    def breakpoint(self):
-        id = self._end()
-        self.outbytes = self.count = 0
-        return id
-
-    def write(self, type, content):
-        return self._write(calc_hash(type, content), type, content)
-
-    def exists(self, id):
-        if not self.objcache:
-            self._make_objcache()
-        return self.objcache.exists(id)
-
-    def maybe_write(self, type, content):
-        bin = calc_hash(type, content)
-        if not self.exists(bin):
-            self._write(bin, type, content)
-            self.objcache.add(bin)
-        return bin
-
-    def new_blob(self, blob):
-        return self.maybe_write('blob', blob)
-
-    def new_tree(self, shalist):
-        shalist = sorted(shalist, key = _shalist_sort_key)
-        l = []
-        for (mode,name,bin) in shalist:
-            assert(mode)
-            assert(mode != '0')
-            assert(mode[0] != '0')
-            assert(name)
-            assert(len(bin) == 20)
-            l.append('%s %s\0%s' % (mode,name,bin))
-        return self.maybe_write('tree', ''.join(l))
-
-    def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
-        l = []
-        if tree: l.append('tree %s' % tree.encode('hex'))
-        if parent: l.append('parent %s' % parent.encode('hex'))
-        if author: l.append('author %s %s' % (author, _git_date(adate)))
-        if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
-        l.append('')
-        l.append(msg)
-        return self.maybe_write('commit', '\n'.join(l))
-
-    def new_commit(self, parent, tree, msg):
-        now = time.time()
-        userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
-        commit = self._new_commit(tree, parent,
-                                  userline, now, userline, now,
-                                  msg)
-        return commit
-
-    def abort(self):
-        f = self.file
-        if f:
-            self.file = None
-            f.close()
-            os.unlink(self.filename + '.pack')
-
-    def _end(self):
-        f = self.file
-        if not f: return None
-        self.file = None
-        self.objcache = None
-
-        # update object count
-        f.seek(8)
-        cp = struct.pack('!i', self.count)
-        assert(len(cp) == 4)
-        f.write(cp)
-
-        # calculate the pack sha1sum
-        f.seek(0)
-        sum = sha.sha()
-        while 1:
-            b = f.read(65536)
-            sum.update(b)
-            if not b: break
-        f.write(sum.digest())
-        
-        f.close()
-
-        p = subprocess.Popen(['git', 'index-pack', '-v',
-                              '--index-version=2',
-                              self.filename + '.pack'],
-                             preexec_fn = _gitenv,
-                             stdout = subprocess.PIPE)
-        out = p.stdout.read().strip()
-        _git_wait('git index-pack', p)
-        if not out:
-            raise GitError('git index-pack produced no output')
-        nameprefix = repo('objects/pack/%s' % out)
-        if os.path.exists(self.filename + '.map'):
-            os.unlink(self.filename + '.map')
-        os.rename(self.filename + '.pack', nameprefix + '.pack')
-        os.rename(self.filename + '.idx', nameprefix + '.idx')
-        return nameprefix
-
-    def close(self):
-        return self._end()
-
-
-def _git_date(date):
-    return time.strftime('%s %z', time.localtime(date))
-
-
-def _gitenv():
-    os.environ['GIT_DIR'] = os.path.abspath(repo())
-
-
-def list_refs(refname = None):
-    argv = ['git', 'show-ref', '--']
-    if refname:
-        argv += [refname]
-    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
-    out = p.stdout.read().strip()
-    rv = p.wait()  # not fatal
-    if rv:
-        assert(not out)
-    if out:
-        for d in out.split('\n'):
-            (sha, name) = d.split(' ', 1)
-            yield (name, sha.decode('hex'))
-
-
-def read_ref(refname):
-    l = list(list_refs(refname))
-    if l:
-        assert(len(l) == 1)
-        return l[0][1]
-    else:
-        return None
-
-
-def rev_list(ref):
-    assert(not ref.startswith('-'))
-    argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
-    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
-    commit = None
-    for row in p.stdout:
-        s = row.strip()
-        if s.startswith('commit '):
-            commit = s[7:].decode('hex')
-        else:
-            date = int(s)
-            yield (date, commit)
-    rv = p.wait()  # not fatal
-    if rv:
-        raise GitError, 'git rev-list returned error %d' % rv
-
-
-def update_ref(refname, newval, oldval):
-    if not oldval:
-        oldval = ''
-    assert(refname.startswith('refs/heads/'))
-    p = subprocess.Popen(['git', 'update-ref', refname,
-                          newval.encode('hex'), oldval.encode('hex')],
-                         preexec_fn = _gitenv)
-    _git_wait('git update-ref', p)
-
-
-def guess_repo(path=None):
-    global repodir
-    if path:
-        repodir = path
-    if not repodir:
-        repodir = os.environ.get('BUP_DIR')
-        if not repodir:
-            repodir = os.path.expanduser('~/.bup')
-
-
-def init_repo(path=None):
-    guess_repo(path)
-    d = repo()
-    if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
-        raise GitError('"%d" exists but is not a directory\n' % d)
-    p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
-                         preexec_fn = _gitenv)
-    _git_wait('git init', p)
-    p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
-                         stdout=sys.stderr, preexec_fn = _gitenv)
-    _git_wait('git config', p)
-
-
-def check_repo_or_die(path=None):
-    guess_repo(path)
-    if not os.path.isdir(repo('objects/pack/.')):
-        if repodir == home_repodir:
-            init_repo()
-        else:
-            log('error: %r is not a bup/git repository\n' % repo())
-            sys.exit(15)
-
-
-def _treeparse(buf):
-    ofs = 0
-    while ofs < len(buf):
-        z = buf[ofs:].find('\0')
-        assert(z > 0)
-        spl = buf[ofs:ofs+z].split(' ', 1)
-        assert(len(spl) == 2)
-        sha = buf[ofs+z+1:ofs+z+1+20]
-        ofs += z+1+20
-        yield (spl[0], spl[1], sha)
-
-
-_ver = None
-def ver():
-    global _ver
-    if not _ver:
-        p = subprocess.Popen(['git', '--version'],
-                             stdout=subprocess.PIPE)
-        gvs = p.stdout.read()
-        _git_wait('git --version', p)
-        m = re.match(r'git version (\S+.\S+)', gvs)
-        if not m:
-            raise GitError('git --version weird output: %r' % gvs)
-        _ver = tuple(m.group(1).split('.'))
-    needed = ('1','5', '3', '1')
-    if _ver < needed:
-        raise GitError('git version %s or higher is required; you have %s'
-                       % ('.'.join(needed), '.'.join(_ver)))
-    return _ver
-
-
-def _git_wait(cmd, p):
-    rv = p.wait()
-    if rv != 0:
-        raise GitError('%s returned %d' % (cmd, rv))
-
-
-def _git_capture(argv):
-    p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
-    r = p.stdout.read()
-    _git_wait(repr(argv), p)
-    return r
-
-
-_ver_warned = 0
-class CatPipe:
-    def __init__(self):
-        global _ver_warned
-        wanted = ('1','5','6')
-        if ver() < wanted:
-            if not _ver_warned:
-                log('warning: git version < %s; bup will be slow.\n'
-                    % '.'.join(wanted))
-                _ver_warned = 1
-            self.get = self._slow_get
-        else:
-            self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
-                                      stdin=subprocess.PIPE, 
-                                      stdout=subprocess.PIPE,
-                                      preexec_fn = _gitenv)
-            self.get = self._fast_get
-            self.inprogress = None
-
-    def _fast_get(self, id):
-        if self.inprogress:
-            log('_fast_get: opening %r while %r is open' 
-                % (id, self.inprogress))
-        assert(not self.inprogress)
-        assert(id.find('\n') < 0)
-        assert(id.find('\r') < 0)
-        assert(id[0] != '-')
-        self.inprogress = id
-        self.p.stdin.write('%s\n' % id)
-        hdr = self.p.stdout.readline()
-        if hdr.endswith(' missing\n'):
-            raise KeyError('blob %r is missing' % id)
-        spl = hdr.split(' ')
-        if len(spl) != 3 or len(spl[0]) != 40:
-            raise GitError('expected blob, got %r' % spl)
-        (hex, type, size) = spl
-
-        def ondone():
-            assert(self.p.stdout.readline() == '\n')
-            self.inprogress = None
-
-        it = AutoFlushIter(chunkyreader(self.p.stdout, int(spl[2])),
-                           ondone = ondone)
-        yield type
-        for blob in it:
-            yield blob
-        del it
-
-    def _slow_get(self, id):
-        assert(id.find('\n') < 0)
-        assert(id.find('\r') < 0)
-        assert(id[0] != '-')
-        type = _git_capture(['git', 'cat-file', '-t', id]).strip()
-        yield type
-
-        p = subprocess.Popen(['git', 'cat-file', type, id],
-                             stdout=subprocess.PIPE,
-                             preexec_fn = _gitenv)
-        for blob in chunkyreader(p.stdout):
-            yield blob
-        _git_wait('git cat-file', p)
-
-    def _join(self, it):
-        type = it.next()
-        if type == 'blob':
-            for blob in it:
-                yield blob
-        elif type == 'tree':
-            treefile = ''.join(it)
-            for (mode, name, sha) in _treeparse(treefile):
-                for blob in self.join(sha.encode('hex')):
-                    yield blob
-        elif type == 'commit':
-            treeline = ''.join(it).split('\n')[0]
-            assert(treeline.startswith('tree '))
-            for blob in self.join(treeline[5:]):
-                yield blob
-        else:
-            raise GitError('invalid object type %r: expected blob/tree/commit'
-                           % type)
-
-    def join(self, id):
-        try:
-            for d in self._join(self.get(id)):
-                yield d
-        except StopIteration:
-            log('booger!\n')
-        
-
-def cat(id):
-    c = CatPipe()
-    for d in c.join(id):
-        yield d
diff --git a/hashsplit.py b/hashsplit.py

deleted file mode 100644 (file)

index f9bc48d..0000000
--- a/hashsplit.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import sys, math
-import git, _hashsplit
-from helpers import *
-
-BLOB_LWM = 8192*2
-BLOB_MAX = BLOB_LWM*2
-BLOB_HWM = 1024*1024
-MAX_PER_TREE = 256
-progress_callback = None
-max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
-max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
-fanout = 16
-
-class Buf:
-    def __init__(self):
-        self.data = ''
-        self.start = 0
-
-    def put(self, s):
-        if s:
-            self.data = buffer(self.data, self.start) + s
-            self.start = 0
-            
-    def peek(self, count):
-        return buffer(self.data, self.start, count)
-    
-    def eat(self, count):
-        self.start += count
-
-    def get(self, count):
-        v = buffer(self.data, self.start, count)
-        self.start += count
-        return v
-
-    def used(self):
-        return len(self.data) - self.start
-
-
-def splitbuf(buf):
-    b = buf.peek(buf.used())
-    (ofs, bits) = _hashsplit.splitbuf(b)
-    if ofs:
-        buf.eat(ofs)
-        return (buffer(b, 0, ofs), bits)
-    return (None, 0)
-
-
-def blobiter(files):
-    for f in files:
-        while 1:
-            b = f.read(BLOB_HWM)
-            if not b:
-                break
-            yield b
-
-
-def drainbuf(buf, finalize):
-    while 1:
-        (blob, bits) = splitbuf(buf)
-        if blob:
-            yield (blob, bits)
-        else:
-            break
-    if buf.used() > BLOB_MAX:
-        # limit max blob size
-        yield (buf.get(buf.used()), 0)
-    elif finalize and buf.used():
-        yield (buf.get(buf.used()), 0)
-
-
-def hashsplit_iter(files):
-    assert(BLOB_HWM > BLOB_MAX)
-    buf = Buf()
-    fi = blobiter(files)
-    while 1:
-        for i in drainbuf(buf, finalize=False):
-            yield i
-        while buf.used() < BLOB_HWM:
-            bnew = next(fi)
-            if not bnew:
-                # eof
-                for i in drainbuf(buf, finalize=True):
-                    yield i
-                return
-            buf.put(bnew)
-
-
-total_split = 0
-def _split_to_blobs(w, files):
-    global total_split
-    for (blob, bits) in hashsplit_iter(files):
-        sha = w.new_blob(blob)
-        total_split += len(blob)
-        if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
-            w.breakpoint()
-        if progress_callback:
-            progress_callback(len(blob))
-        yield (sha, len(blob), bits)
-
-
-def _make_shalist(l):
-    ofs = 0
-    shalist = []
-    for (mode, sha, size) in l:
-        shalist.append((mode, '%016x' % ofs, sha))
-        ofs += size
-    total = ofs
-    return (shalist, total)
-
-
-def _squish(w, stacks, n):
-    i = 0
-    while i<n or len(stacks[i]) > MAX_PER_TREE:
-        while len(stacks) <= i+1:
-            stacks.append([])
-        if len(stacks[i]) == 1:
-            stacks[i+1] += stacks[i]
-        elif stacks[i]:
-            (shalist, size) = _make_shalist(stacks[i])
-            tree = w.new_tree(shalist)
-            stacks[i+1].append(('40000', tree, size))
-        stacks[i] = []
-        i += 1
-
-
-def split_to_shalist(w, files):
-    sl = _split_to_blobs(w, files)
-    if not fanout:
-        shal = []
-        for (sha,size,bits) in sl:
-            shal.append(('100644', sha, size))
-        return _make_shalist(shal)[0]
-    else:
-        base_bits = _hashsplit.blobbits()
-        fanout_bits = int(math.log(fanout, 2))
-        def bits_to_idx(n):
-            assert(n >= base_bits)
-            return (n - base_bits)/fanout_bits
-        stacks = [[]]
-        for (sha,size,bits) in sl:
-            assert(bits <= 32)
-            stacks[0].append(('100644', sha, size))
-            if bits > base_bits:
-                _squish(w, stacks, bits_to_idx(bits))
-        #log('stacks: %r\n' % [len(i) for i in stacks])
-        _squish(w, stacks, len(stacks)-1)
-        #log('stacks: %r\n' % [len(i) for i in stacks])
-        return _make_shalist(stacks[-1])[0]
-
-
-def split_to_blob_or_tree(w, files):
-    shalist = list(split_to_shalist(w, files))
-    if len(shalist) == 1:
-        return (shalist[0][0], shalist[0][2])
-    elif len(shalist) == 0:
-        return ('100644', w.new_blob(''))
-    else:
-        return ('40000', w.new_tree(shalist))
diff --git a/helpers.py b/helpers.py

deleted file mode 100644 (file)

index 75cf09c..0000000
--- a/helpers.py
+++ /dev/null
@@ -1,269 +0,0 @@
-import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
-
-
-def log(s):
-    sys.stderr.write(s)
-
-
-def mkdirp(d):
-    try:
-        os.makedirs(d)
-    except OSError, e:
-        if e.errno == errno.EEXIST:
-            pass
-        else:
-            raise
-
-
-def next(it):
-    try:
-        return it.next()
-    except StopIteration:
-        return None
-    
-    
-def unlink(f):
-    try:
-        os.unlink(f)
-    except OSError, e:
-        if e.errno == errno.ENOENT:
-            pass  # it doesn't exist, that's what you asked for
-
-
-def readpipe(argv):
-    p = subprocess.Popen(argv, stdout=subprocess.PIPE)
-    r = p.stdout.read()
-    p.wait()
-    return r
-
-
-# FIXME: this function isn't very generic, because it splits the filename
-# in an odd way and depends on a terminating '/' to indicate directories.
-# But it's used in a couple of places, so let's put it here.
-def pathsplit(p):
-    l = p.split('/')
-    l = [i+'/' for i in l[:-1]] + l[-1:]
-    if l[-1] == '':
-        l.pop()  # extra blank caused by terminating '/'
-    return l
-
-
-# like os.path.realpath, but doesn't follow a symlink for the last element.
-# (ie. if 'p' itself is itself a symlink, this one won't follow it)
-def realpath(p):
-    try:
-        st = os.lstat(p)
-    except OSError:
-        st = None
-    if st and stat.S_ISLNK(st.st_mode):
-        (dir, name) = os.path.split(p)
-        dir = os.path.realpath(dir)
-        out = os.path.join(dir, name)
-    else:
-        out = os.path.realpath(p)
-    #log('realpathing:%r,%r\n' % (p, out))
-    return out
-
-
-_username = None
-def username():
-    global _username
-    if not _username:
-        uid = os.getuid()
-        try:
-            _username = pwd.getpwuid(uid)[0]
-        except KeyError:
-            _username = 'user%d' % uid
-    return _username
-
-
-_userfullname = None
-def userfullname():
-    global _userfullname
-    if not _userfullname:
-        uid = os.getuid()
-        try:
-            _userfullname = pwd.getpwuid(uid)[4].split(',')[0]
-        except KeyError:
-            _userfullname = 'user%d' % uid
-    return _userfullname
-
-
-_hostname = None
-def hostname():
-    global _hostname
-    if not _hostname:
-        _hostname = socket.getfqdn()
-    return _hostname
-
-
-class NotOk(Exception):
-    pass
-
-class Conn:
-    def __init__(self, inp, outp):
-        self.inp = inp
-        self.outp = outp
-
-    def read(self, size):
-        self.outp.flush()
-        return self.inp.read(size)
-
-    def readline(self):
-        self.outp.flush()
-        return self.inp.readline()
-
-    def write(self, data):
-        #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
-        self.outp.write(data)
-
-    def has_input(self):
-        [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
-        if rl:
-            assert(rl[0] == self.inp.fileno())
-            return True
-        else:
-            return None
-
-    def ok(self):
-        self.write('\nok\n')
-
-    def error(self, s):
-        s = re.sub(r'\s+', ' ', str(s))
-        self.write('\nerror %s\n' % s)
-
-    def _check_ok(self, onempty):
-        self.outp.flush()
-        rl = ''
-        for rl in linereader(self.inp):
-            #log('%d got line: %r\n' % (os.getpid(), rl))
-            if not rl:  # empty line
-                continue
-            elif rl == 'ok':
-                return None
-            elif rl.startswith('error '):
-                #log('client: error: %s\n' % rl[6:])
-                return NotOk(rl[6:])
-            else:
-                onempty(rl)
-        raise Exception('server exited unexpectedly; see errors above')
-
-    def drain_and_check_ok(self):
-        def onempty(rl):
-            pass
-        return self._check_ok(onempty)
-
-    def check_ok(self):
-        def onempty(rl):
-            raise Exception('expected "ok", got %r' % rl)
-        return self._check_ok(onempty)
-
-
-def linereader(f):
-    while 1:
-        line = f.readline()
-        if not line:
-            break
-        yield line[:-1]
-
-
-def chunkyreader(f, count = None):
-    if count != None:
-        while count > 0:
-            b = f.read(min(count, 65536))
-            if not b:
-                raise IOError('EOF with %d bytes remaining' % count)
-            yield b
-            count -= len(b)
-    else:
-        while 1:
-            b = f.read(65536)
-            if not b: break
-            yield b
-
-
-class AutoFlushIter:
-    def __init__(self, it, ondone = None):
-        self.it = it
-        self.ondone = ondone
-
-    def __iter__(self):
-        return self
-        
-    def next(self):
-        return self.it.next()
-        
-    def __del__(self):
-        for i in self.it:
-            pass
-        if self.ondone:
-            self.ondone()
-
-
-def slashappend(s):
-    if s and not s.endswith('/'):
-        return s + '/'
-    else:
-        return s
-
-
-def _mmap_do(f, len, flags, prot):
-    if not len:
-        st = os.fstat(f.fileno())
-        len = st.st_size
-    map = mmap.mmap(f.fileno(), len, flags, prot)
-    f.close()  # map will persist beyond file close
-    return map
-
-
-def mmap_read(f, len = 0):
-    return _mmap_do(f, len, mmap.MAP_PRIVATE, mmap.PROT_READ)
-
-
-def mmap_readwrite(f, len = 0):
-    return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
-
-
-def parse_num(s):
-    g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
-    if not g:
-        raise ValueError("can't parse %r as a number" % s)
-    (val, unit) = g.groups()
-    num = float(val)
-    unit = unit.lower()
-    if unit in ['t', 'tb']:
-        mult = 1024*1024*1024*1024
-    elif unit in ['g', 'gb']:
-        mult = 1024*1024*1024
-    elif unit in ['m', 'mb']:
-        mult = 1024*1024
-    elif unit in ['k', 'kb']:
-        mult = 1024
-    elif unit in ['', 'b']:
-        mult = 1
-    else:
-        raise ValueError("invalid unit %r in number %r" % (unit, s))
-    return int(num*mult)
-
-
-# count the number of elements in an iterator (consumes the iterator)
-def count(l):
-    return reduce(lambda x,y: x+1, l)
-
-
-def atoi(s):
-    try:
-        return int(s or '0')
-    except ValueError:
-        return 0
-
-
-saved_errors = []
-def add_error(e):
-    saved_errors.append(e)
-    log('%-70s\n' % e)
-
-istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY'))
-def progress(s):
-    if istty:
-        log(s)
diff --git a/index.py b/index.py

deleted file mode 100644 (file)

index 536af8f..0000000
--- a/index.py
+++ /dev/null
@@ -1,426 +0,0 @@
-import os, stat, time, struct, tempfile
-from helpers import *
-
-EMPTY_SHA = '\0'*20
-FAKE_SHA = '\x01'*20
-INDEX_HDR = 'BUPI\0\0\0\2'
-INDEX_SIG = '!IIIIIQII20sHII'
-ENTLEN = struct.calcsize(INDEX_SIG)
-FOOTER_SIG = '!Q'
-FOOTLEN = struct.calcsize(FOOTER_SIG)
-
-IX_EXISTS = 0x8000
-IX_HASHVALID = 0x4000
-
-class Error(Exception):
-    pass
-
-
-class Level:
-    def __init__(self, ename, parent):
-        self.parent = parent
-        self.ename = ename
-        self.list = []
-        self.count = 0
-
-    def write(self, f):
-        (ofs,n) = (f.tell(), len(self.list))
-        if self.list:
-            count = len(self.list)
-            #log('popping %r with %d entries\n' 
-            #    % (''.join(self.ename), count))
-            for e in self.list:
-                e.write(f)
-            if self.parent:
-                self.parent.count += count + self.count
-        return (ofs,n)
-
-
-def _golevel(level, f, ename, newentry):
-    # close nodes back up the tree
-    assert(level)
-    while ename[:len(level.ename)] != level.ename:
-        n = BlankNewEntry(level.ename[-1])
-        (n.children_ofs,n.children_n) = level.write(f)
-        level.parent.list.append(n)
-        level = level.parent
-
-    # create nodes down the tree
-    while len(level.ename) < len(ename):
-        level = Level(ename[:len(level.ename)+1], level)
-
-    # are we in precisely the right place?
-    assert(ename == level.ename)
-    n = newentry or BlankNewEntry(ename and level.ename[-1] or None)
-    (n.children_ofs,n.children_n) = level.write(f)
-    if level.parent:
-        level.parent.list.append(n)
-    level = level.parent
-
-    return level
-
-
-class Entry:
-    def __init__(self, basename, name):
-        self.basename = str(basename)
-        self.name = str(name)
-        self.children_ofs = 0
-        self.children_n = 0
-
-    def __repr__(self):
-        return ("(%s,0x%04x,%d,%d,%d,%d,%d,%s/%s,0x%04x,0x%08x/%d)" 
-                % (self.name, self.dev,
-                   self.ctime, self.mtime, self.uid, self.gid,
-                   self.size, oct(self.mode), oct(self.gitmode),
-                   self.flags, self.children_ofs, self.children_n))
-
-    def packed(self):
-        return struct.pack(INDEX_SIG,
-                           self.dev, self.ctime, self.mtime, 
-                           self.uid, self.gid, self.size, self.mode,
-                           self.gitmode, self.sha, self.flags,
-                           self.children_ofs, self.children_n)
-
-    def from_stat(self, st, tstart):
-        old = (self.dev, self.ctime, self.mtime,
-               self.uid, self.gid, self.size, self.flags & IX_EXISTS)
-        new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
-               st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
-        self.dev = st.st_dev
-        self.ctime = int(st.st_ctime)
-        self.mtime = int(st.st_mtime)
-        self.uid = st.st_uid
-        self.gid = st.st_gid
-        self.size = st.st_size
-        self.mode = st.st_mode
-        self.flags |= IX_EXISTS
-        if int(st.st_ctime) >= tstart or old != new \
-              or self.sha == EMPTY_SHA or not self.gitmode:
-            self.invalidate()
-
-    def is_valid(self):
-        f = IX_HASHVALID|IX_EXISTS
-        return (self.flags & f) == f
-
-    def invalidate(self):
-        self.flags &= ~IX_HASHVALID
-
-    def validate(self, gitmode, sha):
-        assert(sha)
-        assert(gitmode)
-        self.gitmode = gitmode
-        self.sha = sha
-        self.flags |= IX_HASHVALID|IX_EXISTS
-
-    def exists(self):
-        return not self.is_deleted()
-
-    def is_deleted(self):
-        return (self.flags & IX_EXISTS) == 0
-
-    def set_deleted(self):
-        if self.flags & IX_EXISTS:
-            self.flags &= ~(IX_EXISTS | IX_HASHVALID)
-
-    def is_real(self):
-        return not self.is_fake()
-
-    def is_fake(self):
-        return not self.ctime
-
-    def __cmp__(a, b):
-        return (cmp(a.name, b.name)
-                or -cmp(a.is_valid(), b.is_valid())
-                or -cmp(a.is_fake(), b.is_fake()))
-
-    def write(self, f):
-        f.write(self.basename + '\0' + self.packed())
-
-
-class NewEntry(Entry):
-    def __init__(self, basename, name, dev, ctime, mtime, uid, gid,
-                 size, mode, gitmode, sha, flags, children_ofs, children_n):
-        Entry.__init__(self, basename, name)
-        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
-         self.size, self.mode, self.gitmode, self.sha,
-         self.flags, self.children_ofs, self.children_n
-         ) = (dev, int(ctime), int(mtime), uid, gid,
-              size, mode, gitmode, sha, flags, children_ofs, children_n)
-
-
-class BlankNewEntry(NewEntry):
-    def __init__(self, basename):
-        NewEntry.__init__(self, basename, basename,
-                          0, 0, 0, 0, 0, 0, 0,
-                          0, EMPTY_SHA, 0, 0, 0)
-
-
-class ExistingEntry(Entry):
-    def __init__(self, parent, basename, name, m, ofs):
-        Entry.__init__(self, basename, name)
-        self.parent = parent
-        self._m = m
-        self._ofs = ofs
-        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
-         self.size, self.mode, self.gitmode, self.sha,
-         self.flags, self.children_ofs, self.children_n
-         ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN)))
-
-    def repack(self):
-        self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
-        if self.parent and not self.is_valid():
-            self.parent.invalidate()
-            self.parent.repack()
-
-    def iter(self, name=None, wantrecurse=None):
-        dname = name
-        if dname and not dname.endswith('/'):
-            dname += '/'
-        ofs = self.children_ofs
-        assert(ofs <= len(self._m))
-        assert(self.children_n < 1000000)
-        for i in xrange(self.children_n):
-            eon = self._m.find('\0', ofs)
-            assert(eon >= 0)
-            assert(eon >= ofs)
-            assert(eon > ofs)
-            basename = str(buffer(self._m, ofs, eon-ofs))
-            child = ExistingEntry(self, basename, self.name + basename,
-                                  self._m, eon+1)
-            if (not dname
-                 or child.name.startswith(dname)
-                 or child.name.endswith('/') and dname.startswith(child.name)):
-                if not wantrecurse or wantrecurse(child):
-                    for e in child.iter(name=name, wantrecurse=wantrecurse):
-                        yield e
-            if not name or child.name == name or child.name.startswith(dname):
-                yield child
-            ofs = eon + 1 + ENTLEN
-
-    def __iter__(self):
-        return self.iter()
-            
-
-class Reader:
-    def __init__(self, filename):
-        self.filename = filename
-        self.m = ''
-        self.writable = False
-        self.count = 0
-        f = None
-        try:
-            f = open(filename, 'r+')
-        except IOError, e:
-            if e.errno == errno.ENOENT:
-                pass
-            else:
-                raise
-        if f:
-            b = f.read(len(INDEX_HDR))
-            if b != INDEX_HDR:
-                log('warning: %s: header: expected %r, got %r'
-                                 % (filename, INDEX_HDR, b))
-            else:
-                st = os.fstat(f.fileno())
-                if st.st_size:
-                    self.m = mmap_readwrite(f)
-                    self.writable = True
-                    self.count = struct.unpack(FOOTER_SIG,
-                          str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
-
-    def __del__(self):
-        self.close()
-
-    def __len__(self):
-        return int(self.count)
-
-    def forward_iter(self):
-        ofs = len(INDEX_HDR)
-        while ofs+ENTLEN <= len(self.m)-FOOTLEN:
-            eon = self.m.find('\0', ofs)
-            assert(eon >= 0)
-            assert(eon >= ofs)
-            assert(eon > ofs)
-            basename = str(buffer(self.m, ofs, eon-ofs))
-            yield ExistingEntry(None, basename, basename, self.m, eon+1)
-            ofs = eon + 1 + ENTLEN
-
-    def iter(self, name=None, wantrecurse=None):
-        if len(self.m) > len(INDEX_HDR)+ENTLEN:
-            dname = name
-            if dname and not dname.endswith('/'):
-                dname += '/'
-            root = ExistingEntry(None, '/', '/',
-                                 self.m, len(self.m)-FOOTLEN-ENTLEN)
-            for sub in root.iter(name=name, wantrecurse=wantrecurse):
-                yield sub
-            if not dname or dname == root.name:
-                yield root
-
-    def __iter__(self):
-        return self.iter()
-
-    def exists(self):
-        return self.m
-
-    def save(self):
-        if self.writable and self.m:
-            self.m.flush()
-
-    def close(self):
-        self.save()
-        if self.writable and self.m:
-            self.m = None
-            self.writable = False
-
-    def filter(self, prefixes, wantrecurse=None):
-        for (rp, path) in reduce_paths(prefixes):
-            for e in self.iter(rp, wantrecurse=wantrecurse):
-                assert(e.name.startswith(rp))
-                name = path + e.name[len(rp):]
-                yield (name, e)
-
-
-class Writer:
-    def __init__(self, filename):
-        self.rootlevel = self.level = Level([], None)
-        self.f = None
-        self.count = 0
-        self.lastfile = None
-        self.filename = None
-        self.filename = filename = realpath(filename)
-        (dir,name) = os.path.split(filename)
-        (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
-        self.f = os.fdopen(ffd, 'wb', 65536)
-        self.f.write(INDEX_HDR)
-
-    def __del__(self):
-        self.abort()
-
-    def abort(self):
-        f = self.f
-        self.f = None
-        if f:
-            f.close()
-            os.unlink(self.tmpname)
-
-    def flush(self):
-        if self.level:
-            self.level = _golevel(self.level, self.f, [], None)
-            self.count = self.rootlevel.count
-            if self.count:
-                self.count += 1
-            self.f.write(struct.pack(FOOTER_SIG, self.count))
-            self.f.flush()
-        assert(self.level == None)
-
-    def close(self):
-        self.flush()
-        f = self.f
-        self.f = None
-        if f:
-            f.close()
-            os.rename(self.tmpname, self.filename)
-
-    def _add(self, ename, entry):
-        if self.lastfile and self.lastfile <= ename:
-            raise Error('%r must come before %r' 
-                             % (''.join(e.name), ''.join(self.lastfile)))
-            self.lastfile = e.name
-        self.level = _golevel(self.level, self.f, ename, entry)
-
-    def add(self, name, st, hashgen = None):
-        endswith = name.endswith('/')
-        ename = pathsplit(name)
-        basename = ename[-1]
-        #log('add: %r %r\n' % (basename, name))
-        flags = IX_EXISTS
-        sha = None
-        if hashgen:
-            (gitmode, sha) = hashgen(name)
-            flags |= IX_HASHVALID
-        else:
-            (gitmode, sha) = (0, EMPTY_SHA)
-        if st:
-            isdir = stat.S_ISDIR(st.st_mode)
-            assert(isdir == endswith)
-            e = NewEntry(basename, name, st.st_dev, int(st.st_ctime),
-                         int(st.st_mtime), st.st_uid, st.st_gid,
-                         st.st_size, st.st_mode, gitmode, sha, flags,
-                         0, 0)
-        else:
-            assert(endswith)
-            e = BlankNewEntry(basename)
-            e.gitmode = gitmode
-            e.sha = sha
-            e.flags = flags
-        self._add(ename, e)
-
-    def add_ixentry(self, e):
-        e.children_ofs = e.children_n = 0
-        self._add(pathsplit(e.name), e)
-
-    def new_reader(self):
-        self.flush()
-        return Reader(self.tmpname)
-
-
-def reduce_paths(paths):
-    xpaths = []
-    for p in paths:
-        rp = realpath(p)
-        try:
-            st = os.lstat(rp)
-            if stat.S_ISDIR(st.st_mode):
-                rp = slashappend(rp)
-                p = slashappend(p)
-        except OSError, e:
-            if e.errno != errno.ENOENT:
-                raise
-        xpaths.append((rp, p))
-    xpaths.sort()
-
-    paths = []
-    prev = None
-    for (rp, p) in xpaths:
-        if prev and (prev == rp 
-                     or (prev.endswith('/') and rp.startswith(prev))):
-            continue # already superceded by previous path
-        paths.append((rp, p))
-        prev = rp
-    paths.sort(reverse=True)
-    return paths
-
-
-class MergeIter:
-    def __init__(self, iters):
-        self.iters = iters
-
-    def __len__(self):
-        # FIXME: doesn't remove duplicated entries between iters.
-        # That only happens for parent directories, but will mean the
-        # actual iteration returns fewer entries than this function counts.
-        return sum(len(it) for it in self.iters)
-
-    def __iter__(self):
-        total = len(self)
-        l = [iter(it) for it in self.iters]
-        l = [(next(it),it) for it in l]
-        l = filter(lambda x: x[0], l)
-        count = 0
-        lastname = None
-        while l:
-            if not (count % 1024):
-                progress('bup: merging indexes (%d/%d)\r' % (count, total))
-            l.sort()
-            (e,it) = l.pop()
-            if not e:
-                continue
-            if e.name != lastname:
-                yield e
-                lastname = e.name
-            n = next(it)
-            if n:
-                l.append((n,it))
-            count += 1
-        log('bup: merging indexes (%d/%d), done.\n' % (count, total))
diff --git a/lib/bup/__init__.py b/lib/bup/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/lib/bup/_hashsplit.c b/lib/bup/_hashsplit.c

new file mode 100644 (file)

index 0000000..e78f597
--- /dev/null
+++ b/lib/bup/_hashsplit.c
@@ -0,0 +1,145 @@
+#include <Python.h>
+#include <assert.h>
+#include <stdint.h>
+
+#define BLOBBITS (13)
+#define BLOBSIZE (1<<BLOBBITS)
+#define WINDOWBITS (7)
+#define WINDOWSIZE (1<<(WINDOWBITS-1))
+
+
+// FIXME: replace this with a not-stupid rolling checksum algorithm,
+// such as the one used in rsync (Adler32?)
+static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
+{
+    return ((old<<1) | (old>>31)) ^ drop ^ add;
+}
+
+
+static int find_ofs(const unsigned char *buf, int len, int *bits)
+{
+    unsigned char window[WINDOWSIZE];
+    uint32_t sum = 0;
+    int i = 0, count;
+    memset(window, 0, sizeof(window));
+    
+    for (count = 0; count < len; count++)
+    {
+       sum = stupidsum_add(sum, window[i], buf[count]);
+       window[i] = buf[count];
+       i = (i + 1) % WINDOWSIZE;
+       if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
+       {
+           if (bits)
+           {
+               *bits = BLOBBITS;
+               sum >>= BLOBBITS;
+               for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
+                   ;
+           }
+           return count+1;
+       }
+    }
+    return 0;
+}
+
+
+static PyObject *blobbits(PyObject *self, PyObject *args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+       return NULL;
+    return Py_BuildValue("i", BLOBBITS);
+}
+
+
+static PyObject *splitbuf(PyObject *self, PyObject *args)
+{
+    unsigned char *buf = NULL;
+    int len = 0, out = 0, bits = -1;
+
+    if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+       return NULL;
+    out = find_ofs(buf, len, &bits);
+    return Py_BuildValue("ii", out, bits);
+}
+
+
+static PyObject *bitmatch(PyObject *self, PyObject *args)
+{
+    unsigned char *buf1 = NULL, *buf2 = NULL;
+    int len1 = 0, len2 = 0;
+    int byte, bit;
+
+    if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
+       return NULL;
+    
+    bit = 0;
+    for (byte = 0; byte < len1 && byte < len2; byte++)
+    {
+       int b1 = buf1[byte], b2 = buf2[byte];
+       if (b1 != b2)
+       {
+           for (bit = 0; bit < 8; bit++)
+               if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
+                   break;
+           break;
+       }
+    }
+    
+    return Py_BuildValue("i", byte*8 + bit);
+}
+
+
+// I would have made this a lower-level function that just fills in a buffer
+// with random values, and then written those values from python.  But that's
+// about 20% slower in my tests, and since we typically generate random
+// numbers for benchmarking other parts of bup, any slowness in generating
+// random bytes will make our benchmarks inaccurate.  Plus nobody wants
+// pseudorandom bytes much except for this anyway.
+static PyObject *write_random(PyObject *self, PyObject *args)
+{
+    uint32_t buf[1024/4];
+    int fd = -1, seed = 0;
+    ssize_t ret;
+    long long len = 0, kbytes = 0, written = 0;
+
+    if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+       return NULL;
+    
+    srandom(seed);
+    
+    for (kbytes = len/1024; kbytes > 0; kbytes--)
+    {
+       int i;
+       for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
+           buf[i] = random();
+       ret = write(fd, buf, sizeof(buf));
+       if (ret < 0)
+           ret = 0;
+       written += ret;
+       if (ret < sizeof(buf))
+           break;
+       if (!(kbytes%1024))
+           fprintf(stderr, ".");
+    }
+    
+    return Py_BuildValue("L", written);
+}
+
+
+static PyMethodDef hashsplit_methods[] = {
+    { "blobbits", blobbits, METH_VARARGS,
+       "Return the number of bits in the rolling checksum." },
+    { "splitbuf", splitbuf, METH_VARARGS,
+       "Split a list of strings based on a rolling checksum." },
+    { "bitmatch", bitmatch, METH_VARARGS,
+       "Count the number of matching prefix bits between two strings." },
+    { "write_random", write_random, METH_VARARGS,
+       "Write random bytes to the given file descriptor" },
+    { NULL, NULL, 0, NULL },  // sentinel
+};
+
+PyMODINIT_FUNC init_hashsplit(void)
+{
+    Py_InitModule("_hashsplit", hashsplit_methods);
+}
diff --git a/lib/bup/client.py b/lib/bup/client.py

new file mode 100644 (file)

index 0000000..6df1358
--- /dev/null
+++ b/lib/bup/client.py
@@ -0,0 +1,258 @@
+import re, struct, errno, select
+from bup import git
+from bup.helpers import *
+from subprocess import Popen, PIPE
+
+
+class ClientError(Exception):
+    pass
+
+
+class Client:
+    def __init__(self, remote, create=False):
+        self._busy = None
+        self.p = None
+        self.conn = None
+        rs = remote.split(':', 1)
+        nicedir = os.path.split(os.path.abspath(sys.argv[0]))[0]
+        nicedir = re.sub(r':', "_", nicedir)
+        if len(rs) == 1:
+            (host, dir) = ('NONE', remote)
+            def fixenv():
+                os.environ['PATH'] = ':'.join([nicedir,
+                                               os.environ.get('PATH', '')])
+            argv = ['bup', 'server']
+        else:
+            (host, dir) = rs
+            fixenv = None
+            # WARNING: shell quoting security holes are possible here, so we
+            # have to be super careful.  We have to use 'sh -c' because
+            # csh-derived shells can't handle PATH= notation.  We can't
+            # set PATH in advance, because ssh probably replaces it.  We
+            # can't exec *safely* using argv, because *both* ssh and 'sh -c'
+            # allow shellquoting.  So we end up having to double-shellquote
+            # stuff here.
+            escapedir = re.sub(r'([^\w/])', r'\\\\\\\1', nicedir)
+            cmd = r"""
+                       sh -c PATH=%s:'$PATH bup server'
+                   """ % escapedir
+            argv = ['ssh', host, '--', cmd.strip()]
+            #log('argv is: %r\n' % argv)
+        (self.host, self.dir) = (host, dir)
+        self.cachedir = git.repo('index-cache/%s'
+                                 % re.sub(r'[^@\w]', '_', 
+                                          "%s:%s" % (host, dir)))
+        try:
+            self.p = p = Popen(argv, stdin=PIPE, stdout=PIPE, preexec_fn=fixenv)
+        except OSError, e:
+            raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
+        self.conn = conn = Conn(p.stdout, p.stdin)
+        if dir:
+            dir = re.sub(r'[\r\n]', ' ', dir)
+            if create:
+                conn.write('init-dir %s\n' % dir)
+            else:
+                conn.write('set-dir %s\n' % dir)
+            self.check_ok()
+        self.sync_indexes_del()
+
+    def __del__(self):
+        try:
+            self.close()
+        except IOError, e:
+            if e.errno == errno.EPIPE:
+                pass
+            else:
+                raise
+
+    def close(self):
+        if self.conn and not self._busy:
+            self.conn.write('quit\n')
+        if self.p:
+            self.p.stdin.close()
+            while self.p.stdout.read(65536):
+                pass
+            self.p.stdout.close()
+            self.p.wait()
+            rv = self.p.wait()
+            if rv:
+                raise ClientError('server tunnel returned exit code %d' % rv)
+        self.conn = None
+        self.p = None
+
+    def check_ok(self):
+        rv = self.p.poll()
+        if rv != None:
+            raise ClientError('server exited unexpectedly with code %r' % rv)
+        try:
+            return self.conn.check_ok()
+        except Exception, e:
+            raise ClientError, e, sys.exc_info()[2]
+
+    def check_busy(self):
+        if self._busy:
+            raise ClientError('already busy with command %r' % self._busy)
+        
+    def _not_busy(self):
+        self._busy = None
+
+    def sync_indexes_del(self):
+        self.check_busy()
+        conn = self.conn
+        conn.write('list-indexes\n')
+        packdir = git.repo('objects/pack')
+        all = {}
+        needed = {}
+        for line in linereader(conn):
+            if not line:
+                break
+            all[line] = 1
+            assert(line.find('/') < 0)
+            if not os.path.exists(os.path.join(self.cachedir, line)):
+                needed[line] = 1
+        self.check_ok()
+
+        mkdirp(self.cachedir)
+        for f in os.listdir(self.cachedir):
+            if f.endswith('.idx') and not f in all:
+                log('pruning old index: %r\n' % f)
+                os.unlink(os.path.join(self.cachedir, f))
+
+    def sync_index(self, name):
+        #log('requesting %r\n' % name)
+        mkdirp(self.cachedir)
+        self.conn.write('send-index %s\n' % name)
+        n = struct.unpack('!I', self.conn.read(4))[0]
+        assert(n)
+        fn = os.path.join(self.cachedir, name)
+        f = open(fn + '.tmp', 'w')
+        count = 0
+        progress('Receiving index: %d/%d\r' % (count, n))
+        for b in chunkyreader(self.conn, n):
+            f.write(b)
+            count += len(b)
+            progress('Receiving index: %d/%d\r' % (count, n))
+        progress('Receiving index: %d/%d, done.\n' % (count, n))
+        self.check_ok()
+        f.close()
+        os.rename(fn + '.tmp', fn)
+
+    def _make_objcache(self):
+        ob = self._busy
+        self._busy = None
+        #self.sync_indexes()
+        self._busy = ob
+        return git.MultiPackIndex(self.cachedir)
+
+    def _suggest_pack(self, indexname):
+        log('received index suggestion: %s\n' % indexname)
+        ob = self._busy
+        if ob:
+            assert(ob == 'receive-objects')
+            self._busy = None
+            self.conn.write('\xff\xff\xff\xff')  # suspend receive-objects
+            self.conn.drain_and_check_ok()
+        self.sync_index(indexname)
+        if ob:
+            self.conn.write('receive-objects\n')
+            self._busy = ob
+
+    def new_packwriter(self):
+        self.check_busy()
+        self._busy = 'receive-objects'
+        return PackWriter_Remote(self.conn,
+                                 objcache_maker = self._make_objcache,
+                                 suggest_pack = self._suggest_pack,
+                                 onclose = self._not_busy)
+
+    def read_ref(self, refname):
+        self.check_busy()
+        self.conn.write('read-ref %s\n' % refname)
+        r = self.conn.readline().strip()
+        self.check_ok()
+        if r:
+            assert(len(r) == 40)   # hexified sha
+            return r.decode('hex')
+        else:
+            return None   # nonexistent ref
+
+    def update_ref(self, refname, newval, oldval):
+        self.check_busy()
+        self.conn.write('update-ref %s\n%s\n%s\n' 
+                        % (refname, newval.encode('hex'),
+                           (oldval or '').encode('hex')))
+        self.check_ok()
+
+    def cat(self, id):
+        self.check_busy()
+        self._busy = 'cat'
+        self.conn.write('cat %s\n' % re.sub(r'[\n\r]', '_', id))
+        while 1:
+            sz = struct.unpack('!I', self.conn.read(4))[0]
+            if not sz: break
+            yield self.conn.read(sz)
+        e = self.check_ok()
+        self._not_busy()
+        if e:
+            raise KeyError(str(e))
+
+
+class PackWriter_Remote(git.PackWriter):
+    def __init__(self, conn, objcache_maker, suggest_pack, onclose):
+        git.PackWriter.__init__(self, objcache_maker)
+        self.file = conn
+        self.filename = 'remote socket'
+        self.suggest_pack = suggest_pack
+        self.onclose = onclose
+        self._packopen = False
+
+    def _open(self):
+        if not self._packopen:
+            self._make_objcache()
+            self.file.write('receive-objects\n')
+            self._packopen = True
+
+    def _end(self):
+        if self._packopen and self.file:
+            self.file.write('\0\0\0\0')
+            self._packopen = False
+            while True:
+                line = self.file.readline().strip()
+                if line.startswith('index '):
+                    pass
+                else:
+                    break
+            id = line
+            self.file.check_ok()
+            self.objcache = None
+            if self.onclose:
+                self.onclose()
+            if self.suggest_pack:
+                self.suggest_pack(id)
+            return id
+
+    def close(self):
+        id = self._end()
+        self.file = None
+        return id
+
+    def abort(self):
+        raise GitError("don't know how to abort remote pack writing")
+
+    def _raw_write(self, datalist):
+        assert(self.file)
+        if not self._packopen:
+            self._open()
+        data = ''.join(datalist)
+        assert(len(data))
+        self.file.write(struct.pack('!I', len(data)) + data)
+        self.outbytes += len(data)
+        self.count += 1
+
+        if self.file.has_input():
+            line = self.file.readline().strip()
+            assert(line.startswith('index '))
+            idxname = line[6:]
+            if self.suggest_pack:
+                self.suggest_pack(idxname)
+                self.objcache.refresh()
diff --git a/lib/bup/csetup.py b/lib/bup/csetup.py

new file mode 100644 (file)

index 0000000..b58932c
--- /dev/null
+++ b/lib/bup/csetup.py
@@ -0,0 +1,8 @@
+from distutils.core import setup, Extension
+
+_hashsplit_mod = Extension('_hashsplit', sources=['_hashsplit.c'])
+
+setup(name='_hashsplit',
+      version='0.1',
+      description='hashsplit helper library for bup',
+      ext_modules=[_hashsplit_mod])
diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py

new file mode 100644 (file)

index 0000000..c3daaa8
--- /dev/null
+++ b/lib/bup/drecurse.py
@@ -0,0 +1,101 @@
+import stat, heapq
+from bup.helpers import *
+
+try:
+    O_LARGEFILE = os.O_LARGEFILE
+except AttributeError:
+    O_LARGEFILE = 0
+
+
+# the use of fchdir() and lstat() is for two reasons:
+#  - help out the kernel by not making it repeatedly look up the absolute path
+#  - avoid race conditions caused by doing listdir() on a changing symlink
+class OsFile:
+    def __init__(self, path):
+        self.fd = None
+        self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
+        
+    def __del__(self):
+        if self.fd:
+            fd = self.fd
+            self.fd = None
+            os.close(fd)
+
+    def fchdir(self):
+        os.fchdir(self.fd)
+
+    def stat(self):
+        return os.fstat(self.fd)
+
+
+_IFMT = stat.S_IFMT(0xffffffff)  # avoid function call in inner loop
+def _dirlist():
+    l = []
+    for n in os.listdir('.'):
+        try:
+            st = os.lstat(n)
+        except OSError, e:
+            add_error(Exception('%s: %s' % (realpath(n), str(e))))
+            continue
+        if (st.st_mode & _IFMT) == stat.S_IFDIR:
+            n += '/'
+        l.append((n,st))
+    l.sort(reverse=True)
+    return l
+
+
+def _recursive_dirlist(prepend, xdev):
+    for (name,pst) in _dirlist():
+        if name.endswith('/'):
+            if xdev != None and pst.st_dev != xdev:
+                log('Skipping %r: different filesystem.\n' % (prepend+name))
+                continue
+            try:
+                OsFile(name).fchdir()
+            except OSError, e:
+                add_error('%s: %s' % (prepend, e))
+            else:
+                for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev):
+                    yield i
+                os.chdir('..')
+        yield (prepend + name, pst)
+
+
+def recursive_dirlist(paths, xdev):
+    startdir = OsFile('.')
+    try:
+        assert(type(paths) != type(''))
+        for path in paths:
+            try:
+                pst = os.lstat(path)
+                if stat.S_ISLNK(pst.st_mode):
+                    yield (path, pst)
+                    continue
+            except OSError, e:
+                add_error(e)
+                continue
+            try:
+                pfile = OsFile(path)
+            except OSError, e:
+                add_error(e)
+                continue
+            pst = pfile.stat()
+            if xdev:
+                xdev = pst.st_dev
+            else:
+                xdev = None
+            if stat.S_ISDIR(pst.st_mode):
+                pfile.fchdir()
+                prepend = os.path.join(path, '')
+                for i in _recursive_dirlist(prepend=prepend, xdev=xdev):
+                    yield i
+                startdir.fchdir()
+            else:
+                prepend = path
+            yield (prepend,pst)
+    except:
+        try:
+            startdir.fchdir()
+        except:
+            pass
+        raise
diff --git a/lib/bup/git.py b/lib/bup/git.py

new file mode 100644 (file)

index 0000000..77e90bf
--- /dev/null
+++ b/lib/bup/git.py
@@ -0,0 +1,696 @@
+import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
+import heapq
+from bup.helpers import *
+
+verbose = 0
+ignore_midx = 0
+home_repodir = os.path.expanduser('~/.bup')
+repodir = None
+
+_typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
+_typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
+
+
+class GitError(Exception):
+    pass
+
+
+def repo(sub = ''):
+    global repodir
+    if not repodir:
+        raise GitError('You should call check_repo_or_die()')
+    gd = os.path.join(repodir, '.git')
+    if os.path.exists(gd):
+        repodir = gd
+    return os.path.join(repodir, sub)
+
+
+def _encode_packobj(type, content):
+    szout = ''
+    sz = len(content)
+    szbits = (sz & 0x0f) | (_typemap[type]<<4)
+    sz >>= 4
+    while 1:
+        if sz: szbits |= 0x80
+        szout += chr(szbits)
+        if not sz:
+            break
+        szbits = sz & 0x7f
+        sz >>= 7
+    z = zlib.compressobj(1)
+    yield szout
+    yield z.compress(content)
+    yield z.flush()
+
+
+def _encode_looseobj(type, content):
+    z = zlib.compressobj(1)
+    yield z.compress('%s %d\0' % (type, len(content)))
+    yield z.compress(content)
+    yield z.flush()
+
+
+def _decode_looseobj(buf):
+    assert(buf);
+    s = zlib.decompress(buf)
+    i = s.find('\0')
+    assert(i > 0)
+    l = s[:i].split(' ')
+    type = l[0]
+    sz = int(l[1])
+    content = s[i+1:]
+    assert(type in _typemap)
+    assert(sz == len(content))
+    return (type, content)
+
+
+def _decode_packobj(buf):
+    assert(buf)
+    c = ord(buf[0])
+    type = _typermap[(c & 0x70) >> 4]
+    sz = c & 0x0f
+    shift = 4
+    i = 0
+    while c & 0x80:
+        i += 1
+        c = ord(buf[i])
+        sz |= (c & 0x7f) << shift
+        shift += 7
+        if not (c & 0x80):
+            break
+    return (type, zlib.decompress(buf[i+1:]))
+
+
+class PackIndex:
+    def __init__(self, filename):
+        self.name = filename
+        self.map = mmap_read(open(filename))
+        assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
+        self.fanout = list(struct.unpack('!256I',
+                                         str(buffer(self.map, 8, 256*4))))
+        self.fanout.append(0)  # entry "-1"
+        nsha = self.fanout[255]
+        self.ofstable = buffer(self.map,
+                               8 + 256*4 + nsha*20 + nsha*4,
+                               nsha*4)
+        self.ofs64table = buffer(self.map,
+                                 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
+
+    def _ofs_from_idx(self, idx):
+        ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
+        if ofs & 0x80000000:
+            idx64 = ofs & 0x7fffffff
+            ofs = struct.unpack('!I',
+                                str(buffer(self.ofs64table, idx64*8, 8)))[0]
+        return ofs
+
+    def _idx_from_hash(self, hash):
+        assert(len(hash) == 20)
+        b1 = ord(hash[0])
+        start = self.fanout[b1-1] # range -1..254
+        end = self.fanout[b1] # range 0..255
+        buf = buffer(self.map, 8 + 256*4, end*20)
+        want = str(hash)
+        while start < end:
+            mid = start + (end-start)/2
+            v = str(buf[mid*20:(mid+1)*20])
+            if v < want:
+                start = mid+1
+            elif v > want:
+                end = mid
+            else: # got it!
+                return mid
+        return None
+        
+    def find_offset(self, hash):
+        idx = self._idx_from_hash(hash)
+        if idx != None:
+            return self._ofs_from_idx(idx)
+        return None
+
+    def exists(self, hash):
+        return hash and (self._idx_from_hash(hash) != None) and True or None
+
+    def __iter__(self):
+        for i in xrange(self.fanout[255]):
+            yield buffer(self.map, 8 + 256*4 + 20*i, 20)
+
+    def __len__(self):
+        return int(self.fanout[255])
+
+
+def extract_bits(buf, bits):
+    mask = (1<<bits) - 1
+    v = struct.unpack('!I', buf[0:4])[0]
+    v = (v >> (32-bits)) & mask
+    return v
+
+
+class PackMidx:
+    def __init__(self, filename):
+        self.name = filename
+        assert(filename.endswith('.midx'))
+        self.map = mmap_read(open(filename))
+        if str(self.map[0:8]) == 'MIDX\0\0\0\1':
+            log('Warning: ignoring old-style midx %r\n' % filename)
+            self.bits = 0
+            self.entries = 1
+            self.fanout = buffer('\0\0\0\0')
+            self.shalist = buffer('\0'*20)
+            self.idxnames = []
+        else:
+            assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
+            self.bits = struct.unpack('!I', self.map[8:12])[0]
+            self.entries = 2**self.bits
+            self.fanout = buffer(self.map, 12, self.entries*4)
+            shaofs = 12 + self.entries*4
+            nsha = self._fanget(self.entries-1)
+            self.shalist = buffer(self.map, shaofs, nsha*20)
+            self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
+
+    def _fanget(self, i):
+        start = i*4
+        s = self.fanout[start:start+4]
+        return struct.unpack('!I', s)[0]
+    
+    def exists(self, hash):
+        want = str(hash)
+        el = extract_bits(want, self.bits)
+        if el:
+            start = self._fanget(el-1)
+        else:
+            start = 0
+        end = self._fanget(el)
+        while start < end:
+            mid = start + (end-start)/2
+            v = str(self.shalist[mid*20:(mid+1)*20])
+            if v < want:
+                start = mid+1
+            elif v > want:
+                end = mid
+            else: # got it!
+                return True
+        return None
+    
+    def __iter__(self):
+        for i in xrange(self._fanget(self.entries-1)):
+            yield buffer(self.shalist, i*20, 20)
+    
+    def __len__(self):
+        return int(self._fanget(self.entries-1))
+
+
+_mpi_count = 0
+class MultiPackIndex:
+    def __init__(self, dir):
+        global _mpi_count
+        assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
+        _mpi_count += 1
+        self.dir = dir
+        self.also = {}
+        self.packs = []
+        self.refresh()
+
+    def __del__(self):
+        global _mpi_count
+        _mpi_count -= 1
+        assert(_mpi_count == 0)
+
+    def __iter__(self):
+        return iter(idxmerge(self.packs))
+
+    def exists(self, hash):
+        if hash in self.also:
+            return True
+        for i in range(len(self.packs)):
+            p = self.packs[i]
+            if p.exists(hash):
+                # reorder so most recently used packs are searched first
+                self.packs = [p] + self.packs[:i] + self.packs[i+1:]
+                return p.name
+        return None
+
+    def refresh(self, skip_midx = False, forget_packs = False):
+        if forget_packs:
+            self.packs = []
+        skip_midx = skip_midx or ignore_midx
+        d = dict((p.name, 1) for p in self.packs)
+        if os.path.exists(self.dir):
+            if not skip_midx:
+                midxl = []
+                for f in os.listdir(self.dir):
+                    full = os.path.join(self.dir, f)
+                    if f.endswith('.midx') and not d.get(full):
+                        mx = PackMidx(full)
+                        (mxd, mxf) = os.path.split(mx.name)
+                        broken = 0
+                        for n in mx.idxnames:
+                            if not os.path.exists(os.path.join(mxd, n)):
+                                log(('warning: index %s missing\n' +
+                                    '  used by %s\n') % (n, mxf))
+                                broken += 1
+                        if not broken:
+                            midxl.append(mx)
+                midxl.sort(lambda x,y: -cmp(len(x),len(y)))
+                for ix in midxl:
+                    any = 0
+                    for sub in ix.idxnames:
+                        if not d.get(os.path.join(self.dir, sub)):
+                            self.packs.append(ix)
+                            d[ix.name] = 1
+                            for name in ix.idxnames:
+                                d[os.path.join(self.dir, name)] = 1
+                            any += 1
+                            break
+                    if not any:
+                        log('midx: removing redundant: %s\n' 
+                            % os.path.basename(ix.name))
+                        unlink(ix.name)
+            for f in os.listdir(self.dir):
+                full = os.path.join(self.dir, f)
+                if f.endswith('.idx') and not d.get(full):
+                    self.packs.append(PackIndex(full))
+                    d[full] = 1
+        log('MultiPackIndex: using %d index%s.\n' 
+            % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
+
+    def add(self, hash):
+        self.also[hash] = 1
+
+    def zap_also(self):
+        self.also = {}
+
+
+def calc_hash(type, content):
+    header = '%s %d\0' % (type, len(content))
+    sum = sha.sha(header)
+    sum.update(content)
+    return sum.digest()
+
+
+def _shalist_sort_key(ent):
+    (mode, name, id) = ent
+    if stat.S_ISDIR(int(mode, 8)):
+        return name + '/'
+    else:
+        return name
+
+
+def idxmerge(idxlist):
+    total = sum(len(i) for i in idxlist)
+    iters = (iter(i) for i in idxlist)
+    heap = [(next(it), it) for it in iters]
+    heapq.heapify(heap)
+    count = 0
+    last = None
+    while heap:
+        if (count % 10024) == 0:
+            progress('Reading indexes: %.2f%% (%d/%d)\r'
+                     % (count*100.0/total, count, total))
+        (e, it) = heap[0]
+        if e != last:
+            yield e
+            last = e
+        count += 1
+        e = next(it)
+        if e:
+            heapq.heapreplace(heap, (e, it))
+        else:
+            heapq.heappop(heap)
+    log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
+
+    
+class PackWriter:
+    def __init__(self, objcache_maker=None):
+        self.count = 0
+        self.outbytes = 0
+        self.filename = None
+        self.file = None
+        self.objcache_maker = objcache_maker
+        self.objcache = None
+
+    def __del__(self):
+        self.close()
+
+    def _make_objcache(self):
+        if not self.objcache:
+            if self.objcache_maker:
+                self.objcache = self.objcache_maker()
+            else:
+                self.objcache = MultiPackIndex(repo('objects/pack'))
+
+    def _open(self):
+        if not self.file:
+            self._make_objcache()
+            (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
+            self.file = os.fdopen(fd, 'w+b')
+            assert(name.endswith('.pack'))
+            self.filename = name[:-5]
+            self.file.write('PACK\0\0\0\2\0\0\0\0')
+
+    def _raw_write(self, datalist):
+        self._open()
+        f = self.file
+        for d in datalist:
+            f.write(d)
+            self.outbytes += len(d)
+        self.count += 1
+
+    def _write(self, bin, type, content):
+        if verbose:
+            log('>')
+        self._raw_write(_encode_packobj(type, content))
+        return bin
+
+    def breakpoint(self):
+        id = self._end()
+        self.outbytes = self.count = 0
+        return id
+
+    def write(self, type, content):
+        return self._write(calc_hash(type, content), type, content)
+
+    def exists(self, id):
+        if not self.objcache:
+            self._make_objcache()
+        return self.objcache.exists(id)
+
+    def maybe_write(self, type, content):
+        bin = calc_hash(type, content)
+        if not self.exists(bin):
+            self._write(bin, type, content)
+            self.objcache.add(bin)
+        return bin
+
+    def new_blob(self, blob):
+        return self.maybe_write('blob', blob)
+
+    def new_tree(self, shalist):
+        shalist = sorted(shalist, key = _shalist_sort_key)
+        l = []
+        for (mode,name,bin) in shalist:
+            assert(mode)
+            assert(mode != '0')
+            assert(mode[0] != '0')
+            assert(name)
+            assert(len(bin) == 20)
+            l.append('%s %s\0%s' % (mode,name,bin))
+        return self.maybe_write('tree', ''.join(l))
+
+    def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
+        l = []
+        if tree: l.append('tree %s' % tree.encode('hex'))
+        if parent: l.append('parent %s' % parent.encode('hex'))
+        if author: l.append('author %s %s' % (author, _git_date(adate)))
+        if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
+        l.append('')
+        l.append(msg)
+        return self.maybe_write('commit', '\n'.join(l))
+
+    def new_commit(self, parent, tree, msg):
+        now = time.time()
+        userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
+        commit = self._new_commit(tree, parent,
+                                  userline, now, userline, now,
+                                  msg)
+        return commit
+
+    def abort(self):
+        f = self.file
+        if f:
+            self.file = None
+            f.close()
+            os.unlink(self.filename + '.pack')
+
+    def _end(self):
+        f = self.file
+        if not f: return None
+        self.file = None
+        self.objcache = None
+
+        # update object count
+        f.seek(8)
+        cp = struct.pack('!i', self.count)
+        assert(len(cp) == 4)
+        f.write(cp)
+
+        # calculate the pack sha1sum
+        f.seek(0)
+        sum = sha.sha()
+        while 1:
+            b = f.read(65536)
+            sum.update(b)
+            if not b: break
+        f.write(sum.digest())
+        
+        f.close()
+
+        p = subprocess.Popen(['git', 'index-pack', '-v',
+                              '--index-version=2',
+                              self.filename + '.pack'],
+                             preexec_fn = _gitenv,
+                             stdout = subprocess.PIPE)
+        out = p.stdout.read().strip()
+        _git_wait('git index-pack', p)
+        if not out:
+            raise GitError('git index-pack produced no output')
+        nameprefix = repo('objects/pack/%s' % out)
+        if os.path.exists(self.filename + '.map'):
+            os.unlink(self.filename + '.map')
+        os.rename(self.filename + '.pack', nameprefix + '.pack')
+        os.rename(self.filename + '.idx', nameprefix + '.idx')
+        return nameprefix
+
+    def close(self):
+        return self._end()
+
+
+def _git_date(date):
+    return time.strftime('%s %z', time.localtime(date))
+
+
+def _gitenv():
+    os.environ['GIT_DIR'] = os.path.abspath(repo())
+
+
+def list_refs(refname = None):
+    argv = ['git', 'show-ref', '--']
+    if refname:
+        argv += [refname]
+    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+    out = p.stdout.read().strip()
+    rv = p.wait()  # not fatal
+    if rv:
+        assert(not out)
+    if out:
+        for d in out.split('\n'):
+            (sha, name) = d.split(' ', 1)
+            yield (name, sha.decode('hex'))
+
+
+def read_ref(refname):
+    l = list(list_refs(refname))
+    if l:
+        assert(len(l) == 1)
+        return l[0][1]
+    else:
+        return None
+
+
+def rev_list(ref):
+    assert(not ref.startswith('-'))
+    argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
+    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+    commit = None
+    for row in p.stdout:
+        s = row.strip()
+        if s.startswith('commit '):
+            commit = s[7:].decode('hex')
+        else:
+            date = int(s)
+            yield (date, commit)
+    rv = p.wait()  # not fatal
+    if rv:
+        raise GitError, 'git rev-list returned error %d' % rv
+
+
+def update_ref(refname, newval, oldval):
+    if not oldval:
+        oldval = ''
+    assert(refname.startswith('refs/heads/'))
+    p = subprocess.Popen(['git', 'update-ref', refname,
+                          newval.encode('hex'), oldval.encode('hex')],
+                         preexec_fn = _gitenv)
+    _git_wait('git update-ref', p)
+
+
+def guess_repo(path=None):
+    global repodir
+    if path:
+        repodir = path
+    if not repodir:
+        repodir = os.environ.get('BUP_DIR')
+        if not repodir:
+            repodir = os.path.expanduser('~/.bup')
+
+
+def init_repo(path=None):
+    guess_repo(path)
+    d = repo()
+    if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
+        raise GitError('"%d" exists but is not a directory\n' % d)
+    p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
+                         preexec_fn = _gitenv)
+    _git_wait('git init', p)
+    p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
+                         stdout=sys.stderr, preexec_fn = _gitenv)
+    _git_wait('git config', p)
+
+
+def check_repo_or_die(path=None):
+    guess_repo(path)
+    if not os.path.isdir(repo('objects/pack/.')):
+        if repodir == home_repodir:
+            init_repo()
+        else:
+            log('error: %r is not a bup/git repository\n' % repo())
+            sys.exit(15)
+
+
+def _treeparse(buf):
+    ofs = 0
+    while ofs < len(buf):
+        z = buf[ofs:].find('\0')
+        assert(z > 0)
+        spl = buf[ofs:ofs+z].split(' ', 1)
+        assert(len(spl) == 2)
+        sha = buf[ofs+z+1:ofs+z+1+20]
+        ofs += z+1+20
+        yield (spl[0], spl[1], sha)
+
+
+_ver = None
+def ver():
+    global _ver
+    if not _ver:
+        p = subprocess.Popen(['git', '--version'],
+                             stdout=subprocess.PIPE)
+        gvs = p.stdout.read()
+        _git_wait('git --version', p)
+        m = re.match(r'git version (\S+.\S+)', gvs)
+        if not m:
+            raise GitError('git --version weird output: %r' % gvs)
+        _ver = tuple(m.group(1).split('.'))
+    needed = ('1','5', '3', '1')
+    if _ver < needed:
+        raise GitError('git version %s or higher is required; you have %s'
+                       % ('.'.join(needed), '.'.join(_ver)))
+    return _ver
+
+
+def _git_wait(cmd, p):
+    rv = p.wait()
+    if rv != 0:
+        raise GitError('%s returned %d' % (cmd, rv))
+
+
+def _git_capture(argv):
+    p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
+    r = p.stdout.read()
+    _git_wait(repr(argv), p)
+    return r
+
+
+_ver_warned = 0
+class CatPipe:
+    def __init__(self):
+        global _ver_warned
+        wanted = ('1','5','6')
+        if ver() < wanted:
+            if not _ver_warned:
+                log('warning: git version < %s; bup will be slow.\n'
+                    % '.'.join(wanted))
+                _ver_warned = 1
+            self.get = self._slow_get
+        else:
+            self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
+                                      stdin=subprocess.PIPE, 
+                                      stdout=subprocess.PIPE,
+                                      preexec_fn = _gitenv)
+            self.get = self._fast_get
+            self.inprogress = None
+
+    def _fast_get(self, id):
+        if self.inprogress:
+            log('_fast_get: opening %r while %r is open' 
+                % (id, self.inprogress))
+        assert(not self.inprogress)
+        assert(id.find('\n') < 0)
+        assert(id.find('\r') < 0)
+        assert(id[0] != '-')
+        self.inprogress = id
+        self.p.stdin.write('%s\n' % id)
+        hdr = self.p.stdout.readline()
+        if hdr.endswith(' missing\n'):
+            raise KeyError('blob %r is missing' % id)
+        spl = hdr.split(' ')
+        if len(spl) != 3 or len(spl[0]) != 40:
+            raise GitError('expected blob, got %r' % spl)
+        (hex, type, size) = spl
+
+        def ondone():
+            assert(self.p.stdout.readline() == '\n')
+            self.inprogress = None
+
+        it = AutoFlushIter(chunkyreader(self.p.stdout, int(spl[2])),
+                           ondone = ondone)
+        yield type
+        for blob in it:
+            yield blob
+        del it
+
+    def _slow_get(self, id):
+        assert(id.find('\n') < 0)
+        assert(id.find('\r') < 0)
+        assert(id[0] != '-')
+        type = _git_capture(['git', 'cat-file', '-t', id]).strip()
+        yield type
+
+        p = subprocess.Popen(['git', 'cat-file', type, id],
+                             stdout=subprocess.PIPE,
+                             preexec_fn = _gitenv)
+        for blob in chunkyreader(p.stdout):
+            yield blob
+        _git_wait('git cat-file', p)
+
+    def _join(self, it):
+        type = it.next()
+        if type == 'blob':
+            for blob in it:
+                yield blob
+        elif type == 'tree':
+            treefile = ''.join(it)
+            for (mode, name, sha) in _treeparse(treefile):
+                for blob in self.join(sha.encode('hex')):
+                    yield blob
+        elif type == 'commit':
+            treeline = ''.join(it).split('\n')[0]
+            assert(treeline.startswith('tree '))
+            for blob in self.join(treeline[5:]):
+                yield blob
+        else:
+            raise GitError('invalid object type %r: expected blob/tree/commit'
+                           % type)
+
+    def join(self, id):
+        try:
+            for d in self._join(self.get(id)):
+                yield d
+        except StopIteration:
+            log('booger!\n')
+        
+
+def cat(id):
+    c = CatPipe()
+    for d in c.join(id):
+        yield d
diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py

new file mode 100644 (file)

index 0000000..f85011d
--- /dev/null
+++ b/lib/bup/hashsplit.py
@@ -0,0 +1,158 @@
+import sys, math
+from bup import git, _hashsplit
+from bup.helpers import *
+
+BLOB_LWM = 8192*2
+BLOB_MAX = BLOB_LWM*2
+BLOB_HWM = 1024*1024
+MAX_PER_TREE = 256
+progress_callback = None
+max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
+max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
+fanout = 16
+
+class Buf:
+    def __init__(self):
+        self.data = ''
+        self.start = 0
+
+    def put(self, s):
+        if s:
+            self.data = buffer(self.data, self.start) + s
+            self.start = 0
+            
+    def peek(self, count):
+        return buffer(self.data, self.start, count)
+    
+    def eat(self, count):
+        self.start += count
+
+    def get(self, count):
+        v = buffer(self.data, self.start, count)
+        self.start += count
+        return v
+
+    def used(self):
+        return len(self.data) - self.start
+
+
+def splitbuf(buf):
+    b = buf.peek(buf.used())
+    (ofs, bits) = _hashsplit.splitbuf(b)
+    if ofs:
+        buf.eat(ofs)
+        return (buffer(b, 0, ofs), bits)
+    return (None, 0)
+
+
+def blobiter(files):
+    for f in files:
+        while 1:
+            b = f.read(BLOB_HWM)
+            if not b:
+                break
+            yield b
+
+
+def drainbuf(buf, finalize):
+    while 1:
+        (blob, bits) = splitbuf(buf)
+        if blob:
+            yield (blob, bits)
+        else:
+            break
+    if buf.used() > BLOB_MAX:
+        # limit max blob size
+        yield (buf.get(buf.used()), 0)
+    elif finalize and buf.used():
+        yield (buf.get(buf.used()), 0)
+
+
+def hashsplit_iter(files):
+    assert(BLOB_HWM > BLOB_MAX)
+    buf = Buf()
+    fi = blobiter(files)
+    while 1:
+        for i in drainbuf(buf, finalize=False):
+            yield i
+        while buf.used() < BLOB_HWM:
+            bnew = next(fi)
+            if not bnew:
+                # eof
+                for i in drainbuf(buf, finalize=True):
+                    yield i
+                return
+            buf.put(bnew)
+
+
+total_split = 0
+def _split_to_blobs(w, files):
+    global total_split
+    for (blob, bits) in hashsplit_iter(files):
+        sha = w.new_blob(blob)
+        total_split += len(blob)
+        if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
+            w.breakpoint()
+        if progress_callback:
+            progress_callback(len(blob))
+        yield (sha, len(blob), bits)
+
+
+def _make_shalist(l):
+    ofs = 0
+    shalist = []
+    for (mode, sha, size) in l:
+        shalist.append((mode, '%016x' % ofs, sha))
+        ofs += size
+    total = ofs
+    return (shalist, total)
+
+
+def _squish(w, stacks, n):
+    i = 0
+    while i<n or len(stacks[i]) > MAX_PER_TREE:
+        while len(stacks) <= i+1:
+            stacks.append([])
+        if len(stacks[i]) == 1:
+            stacks[i+1] += stacks[i]
+        elif stacks[i]:
+            (shalist, size) = _make_shalist(stacks[i])
+            tree = w.new_tree(shalist)
+            stacks[i+1].append(('40000', tree, size))
+        stacks[i] = []
+        i += 1
+
+
+def split_to_shalist(w, files):
+    sl = _split_to_blobs(w, files)
+    if not fanout:
+        shal = []
+        for (sha,size,bits) in sl:
+            shal.append(('100644', sha, size))
+        return _make_shalist(shal)[0]
+    else:
+        base_bits = _hashsplit.blobbits()
+        fanout_bits = int(math.log(fanout, 2))
+        def bits_to_idx(n):
+            assert(n >= base_bits)
+            return (n - base_bits)/fanout_bits
+        stacks = [[]]
+        for (sha,size,bits) in sl:
+            assert(bits <= 32)
+            stacks[0].append(('100644', sha, size))
+            if bits > base_bits:
+                _squish(w, stacks, bits_to_idx(bits))
+        #log('stacks: %r\n' % [len(i) for i in stacks])
+        _squish(w, stacks, len(stacks)-1)
+        #log('stacks: %r\n' % [len(i) for i in stacks])
+        return _make_shalist(stacks[-1])[0]
+
+
+def split_to_blob_or_tree(w, files):
+    shalist = list(split_to_shalist(w, files))
+    if len(shalist) == 1:
+        return (shalist[0][0], shalist[0][2])
+    elif len(shalist) == 0:
+        return ('100644', w.new_blob(''))
+    else:
+        return ('40000', w.new_tree(shalist))
diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py

new file mode 100644 (file)

index 0000000..75cf09c
--- /dev/null
+++ b/lib/bup/helpers.py
@@ -0,0 +1,269 @@
+import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
+
+
+def log(s):
+    sys.stderr.write(s)
+
+
+def mkdirp(d):
+    try:
+        os.makedirs(d)
+    except OSError, e:
+        if e.errno == errno.EEXIST:
+            pass
+        else:
+            raise
+
+
+def next(it):
+    try:
+        return it.next()
+    except StopIteration:
+        return None
+    
+    
+def unlink(f):
+    try:
+        os.unlink(f)
+    except OSError, e:
+        if e.errno == errno.ENOENT:
+            pass  # it doesn't exist, that's what you asked for
+
+
+def readpipe(argv):
+    p = subprocess.Popen(argv, stdout=subprocess.PIPE)
+    r = p.stdout.read()
+    p.wait()
+    return r
+
+
+# FIXME: this function isn't very generic, because it splits the filename
+# in an odd way and depends on a terminating '/' to indicate directories.
+# But it's used in a couple of places, so let's put it here.
+def pathsplit(p):
+    l = p.split('/')
+    l = [i+'/' for i in l[:-1]] + l[-1:]
+    if l[-1] == '':
+        l.pop()  # extra blank caused by terminating '/'
+    return l
+
+
+# like os.path.realpath, but doesn't follow a symlink for the last element.
+# (ie. if 'p' itself is itself a symlink, this one won't follow it)
+def realpath(p):
+    try:
+        st = os.lstat(p)
+    except OSError:
+        st = None
+    if st and stat.S_ISLNK(st.st_mode):
+        (dir, name) = os.path.split(p)
+        dir = os.path.realpath(dir)
+        out = os.path.join(dir, name)
+    else:
+        out = os.path.realpath(p)
+    #log('realpathing:%r,%r\n' % (p, out))
+    return out
+
+
+_username = None
+def username():
+    global _username
+    if not _username:
+        uid = os.getuid()
+        try:
+            _username = pwd.getpwuid(uid)[0]
+        except KeyError:
+            _username = 'user%d' % uid
+    return _username
+
+
+_userfullname = None
+def userfullname():
+    global _userfullname
+    if not _userfullname:
+        uid = os.getuid()
+        try:
+            _userfullname = pwd.getpwuid(uid)[4].split(',')[0]
+        except KeyError:
+            _userfullname = 'user%d' % uid
+    return _userfullname
+
+
+_hostname = None
+def hostname():
+    global _hostname
+    if not _hostname:
+        _hostname = socket.getfqdn()
+    return _hostname
+
+
+class NotOk(Exception):
+    pass
+
+class Conn:
+    def __init__(self, inp, outp):
+        self.inp = inp
+        self.outp = outp
+
+    def read(self, size):
+        self.outp.flush()
+        return self.inp.read(size)
+
+    def readline(self):
+        self.outp.flush()
+        return self.inp.readline()
+
+    def write(self, data):
+        #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
+        self.outp.write(data)
+
+    def has_input(self):
+        [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
+        if rl:
+            assert(rl[0] == self.inp.fileno())
+            return True
+        else:
+            return None
+
+    def ok(self):
+        self.write('\nok\n')
+
+    def error(self, s):
+        s = re.sub(r'\s+', ' ', str(s))
+        self.write('\nerror %s\n' % s)
+
+    def _check_ok(self, onempty):
+        self.outp.flush()
+        rl = ''
+        for rl in linereader(self.inp):
+            #log('%d got line: %r\n' % (os.getpid(), rl))
+            if not rl:  # empty line
+                continue
+            elif rl == 'ok':
+                return None
+            elif rl.startswith('error '):
+                #log('client: error: %s\n' % rl[6:])
+                return NotOk(rl[6:])
+            else:
+                onempty(rl)
+        raise Exception('server exited unexpectedly; see errors above')
+
+    def drain_and_check_ok(self):
+        def onempty(rl):
+            pass
+        return self._check_ok(onempty)
+
+    def check_ok(self):
+        def onempty(rl):
+            raise Exception('expected "ok", got %r' % rl)
+        return self._check_ok(onempty)
+
+
+def linereader(f):
+    while 1:
+        line = f.readline()
+        if not line:
+            break
+        yield line[:-1]
+
+
+def chunkyreader(f, count = None):
+    if count != None:
+        while count > 0:
+            b = f.read(min(count, 65536))
+            if not b:
+                raise IOError('EOF with %d bytes remaining' % count)
+            yield b
+            count -= len(b)
+    else:
+        while 1:
+            b = f.read(65536)
+            if not b: break
+            yield b
+
+
+class AutoFlushIter:
+    def __init__(self, it, ondone = None):
+        self.it = it
+        self.ondone = ondone
+
+    def __iter__(self):
+        return self
+        
+    def next(self):
+        return self.it.next()
+        
+    def __del__(self):
+        for i in self.it:
+            pass
+        if self.ondone:
+            self.ondone()
+
+
+def slashappend(s):
+    if s and not s.endswith('/'):
+        return s + '/'
+    else:
+        return s
+
+
+def _mmap_do(f, len, flags, prot):
+    if not len:
+        st = os.fstat(f.fileno())
+        len = st.st_size
+    map = mmap.mmap(f.fileno(), len, flags, prot)
+    f.close()  # map will persist beyond file close
+    return map
+
+
+def mmap_read(f, len = 0):
+    return _mmap_do(f, len, mmap.MAP_PRIVATE, mmap.PROT_READ)
+
+
+def mmap_readwrite(f, len = 0):
+    return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
+
+
+def parse_num(s):
+    g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
+    if not g:
+        raise ValueError("can't parse %r as a number" % s)
+    (val, unit) = g.groups()
+    num = float(val)
+    unit = unit.lower()
+    if unit in ['t', 'tb']:
+        mult = 1024*1024*1024*1024
+    elif unit in ['g', 'gb']:
+        mult = 1024*1024*1024
+    elif unit in ['m', 'mb']:
+        mult = 1024*1024
+    elif unit in ['k', 'kb']:
+        mult = 1024
+    elif unit in ['', 'b']:
+        mult = 1
+    else:
+        raise ValueError("invalid unit %r in number %r" % (unit, s))
+    return int(num*mult)
+
+
+# count the number of elements in an iterator (consumes the iterator)
+def count(l):
+    return reduce(lambda x,y: x+1, l)
+
+
+def atoi(s):
+    try:
+        return int(s or '0')
+    except ValueError:
+        return 0
+
+
+saved_errors = []
+def add_error(e):
+    saved_errors.append(e)
+    log('%-70s\n' % e)
+
+istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY'))
+def progress(s):
+    if istty:
+        log(s)
diff --git a/lib/bup/index.py b/lib/bup/index.py

new file mode 100644 (file)

index 0000000..39cae88
--- /dev/null
+++ b/lib/bup/index.py
@@ -0,0 +1,426 @@
+import os, stat, time, struct, tempfile
+from bup.helpers import *
+
+EMPTY_SHA = '\0'*20
+FAKE_SHA = '\x01'*20
+INDEX_HDR = 'BUPI\0\0\0\2'
+INDEX_SIG = '!IIIIIQII20sHII'
+ENTLEN = struct.calcsize(INDEX_SIG)
+FOOTER_SIG = '!Q'
+FOOTLEN = struct.calcsize(FOOTER_SIG)
+
+IX_EXISTS = 0x8000
+IX_HASHVALID = 0x4000
+
+class Error(Exception):
+    pass
+
+
+class Level:
+    def __init__(self, ename, parent):
+        self.parent = parent
+        self.ename = ename
+        self.list = []
+        self.count = 0
+
+    def write(self, f):
+        (ofs,n) = (f.tell(), len(self.list))
+        if self.list:
+            count = len(self.list)
+            #log('popping %r with %d entries\n' 
+            #    % (''.join(self.ename), count))
+            for e in self.list:
+                e.write(f)
+            if self.parent:
+                self.parent.count += count + self.count
+        return (ofs,n)
+
+
+def _golevel(level, f, ename, newentry):
+    # close nodes back up the tree
+    assert(level)
+    while ename[:len(level.ename)] != level.ename:
+        n = BlankNewEntry(level.ename[-1])
+        (n.children_ofs,n.children_n) = level.write(f)
+        level.parent.list.append(n)
+        level = level.parent
+
+    # create nodes down the tree
+    while len(level.ename) < len(ename):
+        level = Level(ename[:len(level.ename)+1], level)
+
+    # are we in precisely the right place?
+    assert(ename == level.ename)
+    n = newentry or BlankNewEntry(ename and level.ename[-1] or None)
+    (n.children_ofs,n.children_n) = level.write(f)
+    if level.parent:
+        level.parent.list.append(n)
+    level = level.parent
+
+    return level
+
+
+class Entry:
+    def __init__(self, basename, name):
+        self.basename = str(basename)
+        self.name = str(name)
+        self.children_ofs = 0
+        self.children_n = 0
+
+    def __repr__(self):
+        return ("(%s,0x%04x,%d,%d,%d,%d,%d,%s/%s,0x%04x,0x%08x/%d)" 
+                % (self.name, self.dev,
+                   self.ctime, self.mtime, self.uid, self.gid,
+                   self.size, oct(self.mode), oct(self.gitmode),
+                   self.flags, self.children_ofs, self.children_n))
+
+    def packed(self):
+        return struct.pack(INDEX_SIG,
+                           self.dev, self.ctime, self.mtime, 
+                           self.uid, self.gid, self.size, self.mode,
+                           self.gitmode, self.sha, self.flags,
+                           self.children_ofs, self.children_n)
+
+    def from_stat(self, st, tstart):
+        old = (self.dev, self.ctime, self.mtime,
+               self.uid, self.gid, self.size, self.flags & IX_EXISTS)
+        new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
+               st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
+        self.dev = st.st_dev
+        self.ctime = int(st.st_ctime)
+        self.mtime = int(st.st_mtime)
+        self.uid = st.st_uid
+        self.gid = st.st_gid
+        self.size = st.st_size
+        self.mode = st.st_mode
+        self.flags |= IX_EXISTS
+        if int(st.st_ctime) >= tstart or old != new \
+              or self.sha == EMPTY_SHA or not self.gitmode:
+            self.invalidate()
+
+    def is_valid(self):
+        f = IX_HASHVALID|IX_EXISTS
+        return (self.flags & f) == f
+
+    def invalidate(self):
+        self.flags &= ~IX_HASHVALID
+
+    def validate(self, gitmode, sha):
+        assert(sha)
+        assert(gitmode)
+        self.gitmode = gitmode
+        self.sha = sha
+        self.flags |= IX_HASHVALID|IX_EXISTS
+
+    def exists(self):
+        return not self.is_deleted()
+
+    def is_deleted(self):
+        return (self.flags & IX_EXISTS) == 0
+
+    def set_deleted(self):
+        if self.flags & IX_EXISTS:
+            self.flags &= ~(IX_EXISTS | IX_HASHVALID)
+
+    def is_real(self):
+        return not self.is_fake()
+
+    def is_fake(self):
+        return not self.ctime
+
+    def __cmp__(a, b):
+        return (cmp(a.name, b.name)
+                or -cmp(a.is_valid(), b.is_valid())
+                or -cmp(a.is_fake(), b.is_fake()))
+
+    def write(self, f):
+        f.write(self.basename + '\0' + self.packed())
+
+
+class NewEntry(Entry):
+    def __init__(self, basename, name, dev, ctime, mtime, uid, gid,
+                 size, mode, gitmode, sha, flags, children_ofs, children_n):
+        Entry.__init__(self, basename, name)
+        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
+         self.size, self.mode, self.gitmode, self.sha,
+         self.flags, self.children_ofs, self.children_n
+         ) = (dev, int(ctime), int(mtime), uid, gid,
+              size, mode, gitmode, sha, flags, children_ofs, children_n)
+
+
+class BlankNewEntry(NewEntry):
+    def __init__(self, basename):
+        NewEntry.__init__(self, basename, basename,
+                          0, 0, 0, 0, 0, 0, 0,
+                          0, EMPTY_SHA, 0, 0, 0)
+
+
+class ExistingEntry(Entry):
+    def __init__(self, parent, basename, name, m, ofs):
+        Entry.__init__(self, basename, name)
+        self.parent = parent
+        self._m = m
+        self._ofs = ofs
+        (self.dev, self.ctime, self.mtime, self.uid, self.gid,
+         self.size, self.mode, self.gitmode, self.sha,
+         self.flags, self.children_ofs, self.children_n
+         ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN)))
+
+    def repack(self):
+        self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
+        if self.parent and not self.is_valid():
+            self.parent.invalidate()
+            self.parent.repack()
+
+    def iter(self, name=None, wantrecurse=None):
+        dname = name
+        if dname and not dname.endswith('/'):
+            dname += '/'
+        ofs = self.children_ofs
+        assert(ofs <= len(self._m))
+        assert(self.children_n < 1000000)
+        for i in xrange(self.children_n):
+            eon = self._m.find('\0', ofs)
+            assert(eon >= 0)
+            assert(eon >= ofs)
+            assert(eon > ofs)
+            basename = str(buffer(self._m, ofs, eon-ofs))
+            child = ExistingEntry(self, basename, self.name + basename,
+                                  self._m, eon+1)
+            if (not dname
+                 or child.name.startswith(dname)
+                 or child.name.endswith('/') and dname.startswith(child.name)):
+                if not wantrecurse or wantrecurse(child):
+                    for e in child.iter(name=name, wantrecurse=wantrecurse):
+                        yield e
+            if not name or child.name == name or child.name.startswith(dname):
+                yield child
+            ofs = eon + 1 + ENTLEN
+
+    def __iter__(self):
+        return self.iter()
+            
+
+class Reader:
+    def __init__(self, filename):
+        self.filename = filename
+        self.m = ''
+        self.writable = False
+        self.count = 0
+        f = None
+        try:
+            f = open(filename, 'r+')
+        except IOError, e:
+            if e.errno == errno.ENOENT:
+                pass
+            else:
+                raise
+        if f:
+            b = f.read(len(INDEX_HDR))
+            if b != INDEX_HDR:
+                log('warning: %s: header: expected %r, got %r'
+                                 % (filename, INDEX_HDR, b))
+            else:
+                st = os.fstat(f.fileno())
+                if st.st_size:
+                    self.m = mmap_readwrite(f)
+                    self.writable = True
+                    self.count = struct.unpack(FOOTER_SIG,
+                          str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
+
+    def __del__(self):
+        self.close()
+
+    def __len__(self):
+        return int(self.count)
+
+    def forward_iter(self):
+        ofs = len(INDEX_HDR)
+        while ofs+ENTLEN <= len(self.m)-FOOTLEN:
+            eon = self.m.find('\0', ofs)
+            assert(eon >= 0)
+            assert(eon >= ofs)
+            assert(eon > ofs)
+            basename = str(buffer(self.m, ofs, eon-ofs))
+            yield ExistingEntry(None, basename, basename, self.m, eon+1)
+            ofs = eon + 1 + ENTLEN
+
+    def iter(self, name=None, wantrecurse=None):
+        if len(self.m) > len(INDEX_HDR)+ENTLEN:
+            dname = name
+            if dname and not dname.endswith('/'):
+                dname += '/'
+            root = ExistingEntry(None, '/', '/',
+                                 self.m, len(self.m)-FOOTLEN-ENTLEN)
+            for sub in root.iter(name=name, wantrecurse=wantrecurse):
+                yield sub
+            if not dname or dname == root.name:
+                yield root
+
+    def __iter__(self):
+        return self.iter()
+
+    def exists(self):
+        return self.m
+
+    def save(self):
+        if self.writable and self.m:
+            self.m.flush()
+
+    def close(self):
+        self.save()
+        if self.writable and self.m:
+            self.m = None
+            self.writable = False
+
+    def filter(self, prefixes, wantrecurse=None):
+        for (rp, path) in reduce_paths(prefixes):
+            for e in self.iter(rp, wantrecurse=wantrecurse):
+                assert(e.name.startswith(rp))
+                name = path + e.name[len(rp):]
+                yield (name, e)
+
+
+class Writer:
+    def __init__(self, filename):
+        self.rootlevel = self.level = Level([], None)
+        self.f = None
+        self.count = 0
+        self.lastfile = None
+        self.filename = None
+        self.filename = filename = realpath(filename)
+        (dir,name) = os.path.split(filename)
+        (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
+        self.f = os.fdopen(ffd, 'wb', 65536)
+        self.f.write(INDEX_HDR)
+
+    def __del__(self):
+        self.abort()
+
+    def abort(self):
+        f = self.f
+        self.f = None
+        if f:
+            f.close()
+            os.unlink(self.tmpname)
+
+    def flush(self):
+        if self.level:
+            self.level = _golevel(self.level, self.f, [], None)
+            self.count = self.rootlevel.count
+            if self.count:
+                self.count += 1
+            self.f.write(struct.pack(FOOTER_SIG, self.count))
+            self.f.flush()
+        assert(self.level == None)
+
+    def close(self):
+        self.flush()
+        f = self.f
+        self.f = None
+        if f:
+            f.close()
+            os.rename(self.tmpname, self.filename)
+
+    def _add(self, ename, entry):
+        if self.lastfile and self.lastfile <= ename:
+            raise Error('%r must come before %r' 
+                             % (''.join(e.name), ''.join(self.lastfile)))
+            self.lastfile = e.name
+        self.level = _golevel(self.level, self.f, ename, entry)
+
+    def add(self, name, st, hashgen = None):
+        endswith = name.endswith('/')
+        ename = pathsplit(name)
+        basename = ename[-1]
+        #log('add: %r %r\n' % (basename, name))
+        flags = IX_EXISTS
+        sha = None
+        if hashgen:
+            (gitmode, sha) = hashgen(name)
+            flags |= IX_HASHVALID
+        else:
+            (gitmode, sha) = (0, EMPTY_SHA)
+        if st:
+            isdir = stat.S_ISDIR(st.st_mode)
+            assert(isdir == endswith)
+            e = NewEntry(basename, name, st.st_dev, int(st.st_ctime),
+                         int(st.st_mtime), st.st_uid, st.st_gid,
+                         st.st_size, st.st_mode, gitmode, sha, flags,
+                         0, 0)
+        else:
+            assert(endswith)
+            e = BlankNewEntry(basename)
+            e.gitmode = gitmode
+            e.sha = sha
+            e.flags = flags
+        self._add(ename, e)
+
+    def add_ixentry(self, e):
+        e.children_ofs = e.children_n = 0
+        self._add(pathsplit(e.name), e)
+
+    def new_reader(self):
+        self.flush()
+        return Reader(self.tmpname)
+
+
+def reduce_paths(paths):
+    xpaths = []
+    for p in paths:
+        rp = realpath(p)
+        try:
+            st = os.lstat(rp)
+            if stat.S_ISDIR(st.st_mode):
+                rp = slashappend(rp)
+                p = slashappend(p)
+        except OSError, e:
+            if e.errno != errno.ENOENT:
+                raise
+        xpaths.append((rp, p))
+    xpaths.sort()
+
+    paths = []
+    prev = None
+    for (rp, p) in xpaths:
+        if prev and (prev == rp 
+                     or (prev.endswith('/') and rp.startswith(prev))):
+            continue # already superceded by previous path
+        paths.append((rp, p))
+        prev = rp
+    paths.sort(reverse=True)
+    return paths
+
+
+class MergeIter:
+    def __init__(self, iters):
+        self.iters = iters
+
+    def __len__(self):
+        # FIXME: doesn't remove duplicated entries between iters.
+        # That only happens for parent directories, but will mean the
+        # actual iteration returns fewer entries than this function counts.
+        return sum(len(it) for it in self.iters)
+
+    def __iter__(self):
+        total = len(self)
+        l = [iter(it) for it in self.iters]
+        l = [(next(it),it) for it in l]
+        l = filter(lambda x: x[0], l)
+        count = 0
+        lastname = None
+        while l:
+            if not (count % 1024):
+                progress('bup: merging indexes (%d/%d)\r' % (count, total))
+            l.sort()
+            (e,it) = l.pop()
+            if not e:
+                continue
+            if e.name != lastname:
+                yield e
+                lastname = e.name
+            n = next(it)
+            if n:
+                l.append((n,it))
+            count += 1
+        log('bup: merging indexes (%d/%d), done.\n' % (count, total))
diff --git a/lib/bup/options.py b/lib/bup/options.py

new file mode 100644 (file)

index 0000000..7ae529f
--- /dev/null
+++ b/lib/bup/options.py
@@ -0,0 +1,119 @@
+import textwrap, getopt, re
+from bup.helpers import *
+
+class OptDict:
+    def __init__(self):
+        self._opts = {}
+
+    def __setitem__(self, k, v):
+        self._opts[k] = v
+        
+    def __getitem__(self, k):
+        return self._opts[k]
+
+    def __getattr__(self, k):
+        return self[k]
+
+
+class Options:
+    def __init__(self, exe, optspec):
+        self.exe = exe
+        self.optspec = optspec
+        self._aliases = {}
+        self._shortopts = 'h?'
+        self._longopts = ['help']
+        self._hasparms = {}
+        self._usagestr = self._gen_usage()
+        
+    def _gen_usage(self):
+        out = []
+        lines = self.optspec.strip().split('\n')
+        lines.reverse()
+        first_syn = True
+        while lines:
+            l = lines.pop()
+            if l == '--': break
+            out.append('%s: %s\n' % (first_syn and 'usage' or '   or', l))
+            first_syn = False
+        out.append('\n')
+        while lines:
+            l = lines.pop()
+            if l.startswith(' '):
+                out.append('\n%s\n' % l.lstrip())
+            elif l:
+                (flags, extra) = l.split(' ', 1)
+                extra = extra.strip()
+                if flags.endswith('='):
+                    flags = flags[:-1]
+                    has_parm = 1
+                else:
+                    has_parm = 0
+                flagl = flags.split(',')
+                flagl_nice = []
+                for f in flagl:
+                    f_nice = re.sub(r'\W', '_', f)
+                    self._aliases[f] = flagl[0]
+                    self._aliases[f_nice] = flagl[0]
+                    self._hasparms[f] = has_parm
+                    if len(f) == 1:
+                        self._shortopts += f + (has_parm and ':' or '')
+                        flagl_nice.append('-' + f)
+                    else:
+                        assert(not f.startswith('no-')) # supported implicitly
+                        self._longopts.append(f + (has_parm and '=' or ''))
+                        self._longopts.append('no-' + f)
+                        flagl_nice.append('--' + f)
+                flags_nice = ', '.join(flagl_nice)
+                if has_parm:
+                    flags_nice += ' ...'
+                prefix = '    %-20s  ' % flags_nice
+                argtext = '\n'.join(textwrap.wrap(extra, width=70,
+                                                initial_indent=prefix,
+                                                subsequent_indent=' '*28))
+                out.append(argtext + '\n')
+            else:
+                out.append('\n')
+        return ''.join(out).rstrip() + '\n'
+    
+    def usage(self):
+        log(self._usagestr)
+        sys.exit(97)
+
+    def fatal(self, s):
+        log('error: %s\n' % s)
+        return self.usage()
+        
+    def parse(self, args):
+        try:
+            (flags,extra) = getopt.gnu_getopt(args,
+                                              self._shortopts, self._longopts)
+        except getopt.GetoptError, e:
+            self.fatal(e)
+
+        opt = OptDict()
+        for f in self._aliases.values():
+            opt[f] = None
+        for (k,v) in flags:
+            while k.startswith('-'):
+                k = k[1:]
+            if k in ['h', '?', 'help']:
+                self.usage()
+            if k.startswith('no-'):
+                k = self._aliases[k[3:]]
+                opt[k] = None
+            else:
+                k = self._aliases[k]
+                if not self._hasparms[k]:
+                    assert(v == '')
+                    opt[k] = (opt._opts.get(k) or 0) + 1
+                else:
+                    try:
+                        vv = int(v)
+                        if str(vv) == v:
+                            v = vv
+                    except ValueError:
+                        pass
+                    opt[k] = v
+        for (f1,f2) in self._aliases.items():
+            opt[f1] = opt[f2]
+        return (opt,flags,extra)
diff --git a/lib/bup/shquote.py b/lib/bup/shquote.py

new file mode 100644 (file)

index 0000000..dc339ec
--- /dev/null
+++ b/lib/bup/shquote.py
@@ -0,0 +1,87 @@
+import re
+
+q = "'"
+qq = '"'
+
+
+class QuoteError(Exception):
+    pass
+
+
+def _quotesplit(line):
+    inquote = None
+    inescape = None
+    wordstart = 0
+    word = ''
+    for i in range(len(line)):
+        c = line[i]
+        if inescape:
+            if inquote == q and c != q:
+                word += '\\'  # single-q backslashes can only quote single-q
+            word += c
+            inescape = False
+        elif c == '\\':
+            inescape = True
+        elif c == inquote:
+            inquote = None
+            # this is un-sh-like, but do it for sanity when autocompleting
+            yield (wordstart, word)
+            word = ''
+            wordstart = i+1
+        elif not inquote and not word and (c == q or c == qq):
+            # the 'not word' constraint on this is un-sh-like, but do it
+            # for sanity when autocompleting
+            inquote = c
+            wordstart = i
+        elif not inquote and c in [' ', '\n', '\r', '\t']:
+            if word:
+                yield (wordstart, word)
+            word = ''
+            wordstart = i+1
+        else:
+            word += c
+    if word:
+        yield (wordstart, word)
+    if inquote or inescape or word:
+        raise QuoteError()
+
+
+def quotesplit(line):
+    l = []
+    try:
+        for i in _quotesplit(line):
+            l.append(i)
+    except QuoteError:
+        pass
+    return l
+
+
+def unfinished_word(line):
+    try:
+        for (wordstart,word) in _quotesplit(line):
+            pass
+    except QuoteError:
+        firstchar = line[wordstart]
+        if firstchar in [q, qq]:
+            return (firstchar, word)
+        else:
+            return (None, word)
+    else:
+        return (None, '')
+
+
+def quotify(qtype, word, terminate):
+    if qtype == qq:
+        return qq + word.replace(qq, '\\"') + (terminate and qq or '')
+    elif qtype == q:
+        return q + word.replace(q, "\\'") + (terminate and q or '')
+    else:
+        return re.sub(r'([\"\' \t\n\r])', r'\\\1', word)
+
+
+def what_to_add(qtype, origword, newword, terminate):
+    if not newword.startswith(origword):
+        return ''
+    else:
+        qold = quotify(qtype, origword, terminate=False)
+        return quotify(qtype, newword, terminate=terminate)[len(qold):]
diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py

new file mode 100644 (file)

index 0000000..efa0947
--- /dev/null
+++ b/lib/bup/vfs.py
@@ -0,0 +1,243 @@
+import os, re, stat, time
+from bup import git
+from helpers import *
+
+EMPTY_SHA='\0'*20
+
+_cp = None
+def cp():
+    global _cp
+    if not _cp:
+        _cp = git.CatPipe()
+    return _cp
+
+class NodeError(Exception):
+    pass
+class NoSuchFile(NodeError):
+    pass
+class NotDir(NodeError):
+    pass
+class NotFile(NodeError):
+    pass
+class TooManySymlinks(NodeError):
+    pass
+
+
+class FileReader:
+    def __init__(self, node):
+        self.n = node
+        self.ofs = 0
+        self.size = self.n.size()
+
+    def seek(self, ofs):
+        if ofs > self.size:
+            self.ofs = self.size
+        elif ofs < 0:
+            self.ofs = 0
+        else:
+            self.ofs = ofs
+
+    def tell(self):
+        return self.ofs
+
+    def read(self, count = -1):
+        if count < 0:
+            count = self.size - self.ofs
+        buf = self.n.readbytes(self.ofs, count)
+        self.ofs += len(buf)
+        return buf
+
+
+class Node:
+    def __init__(self, parent, name, mode, hash):
+        self.parent = parent
+        self.name = name
+        self.mode = mode
+        self.hash = hash
+        self._subs = None
+        
+    def __cmp__(a, b):
+        return cmp(a.name or None, b.name or None)
+    
+    def __iter__(self):
+        return iter(self.subs())
+    
+    def fullname(self):
+        if self.parent:
+            return os.path.join(self.parent.fullname(), self.name)
+        else:
+            return self.name
+    
+    def _mksubs(self):
+        self._subs = {}
+        
+    def subs(self):
+        if self._subs == None:
+            self._mksubs()
+        return sorted(self._subs.values())
+        
+    def sub(self, name):
+        if self._subs == None:
+            self._mksubs()
+        ret = self._subs.get(name)
+        if not ret:
+            raise NoSuchFile("no file %r in %r" % (name, self.name))
+        return ret
+
+    def top(self):
+        if self.parent:
+            return self.parent.top()
+        else:
+            return self
+
+    def _lresolve(self, parts):
+        #log('_lresolve %r in %r\n' % (parts, self.name))
+        if not parts:
+            return self
+        (first, rest) = (parts[0], parts[1:])
+        if first == '.':
+            return self._lresolve(rest)
+        elif first == '..':
+            if not self.parent:
+                raise NoSuchFile("no parent dir for %r" % self.name)
+            return self.parent._lresolve(rest)
+        elif rest:
+            return self.sub(first)._lresolve(rest)
+        else:
+            return self.sub(first)
+
+    def lresolve(self, path):
+        start = self
+        if path.startswith('/'):
+            start = self.top()
+            path = path[1:]
+        parts = re.split(r'/+', path or '.')
+        if not parts[-1]:
+            parts[-1] = '.'
+        #log('parts: %r %r\n' % (path, parts))
+        return start._lresolve(parts)
+
+    def resolve(self, path):
+        return self.lresolve(path).lresolve('')
+    
+    def nlinks(self):
+        if self._subs == None:
+            self._mksubs()
+        return 1
+
+    def size(self):
+        return 0
+
+    def open(self):
+        raise NotFile('%s is not a regular file' % self.name)
+    
+    def readbytes(self, ofs, count):
+        raise NotFile('%s is not a regular file' % self.name)
+    
+    def read(self, num = -1):
+        if num < 0:
+            num = self.size()
+        return self.readbytes(0, num)
+    
+    
+class File(Node):
+    def _content(self):
+        return cp().join(self.hash.encode('hex'))
+
+    def open(self):
+        return FileReader(self)
+    
+    def size(self):
+        # FIXME inefficient
+        return sum(len(blob) for blob in self._content())
+    
+    def readbytes(self, ofs, count):
+        # FIXME inefficient
+        buf = ''.join(self._content())
+        return buf[ofs:ofs+count]
+    
+
+_symrefs = 0
+class Symlink(File):
+    def __init__(self, parent, name, hash):
+        File.__init__(self, parent, name, 0120000, hash)
+
+    def readlink(self):
+        return self.read(1024)
+
+    def dereference(self):
+        global _symrefs
+        if _symrefs > 100:
+            raise TooManySymlinks('too many levels of symlinks: %r'
+                                  % self.fullname())
+        _symrefs += 1
+        try:
+            return self.parent.lresolve(self.readlink())
+        finally:
+            _symrefs -= 1
+
+    def _lresolve(self, parts):
+        return self.dereference()._lresolve(parts)
+    
+
+class FakeSymlink(Symlink):
+    def __init__(self, parent, name, toname):
+        Symlink.__init__(self, parent, name, EMPTY_SHA)
+        self.toname = toname
+        
+    def _content(self):
+        return self.toname
+    
+
+class Dir(Node):
+    def _mksubs(self):
+        self._subs = {}
+        it = cp().get(self.hash.encode('hex'))
+        type = it.next()
+        if type == 'commit':
+            del it
+            it = cp().get(self.hash.encode('hex') + ':')
+            type = it.next()
+        assert(type == 'tree')
+        for (mode,name,sha) in git._treeparse(''.join(it)):
+            mode = int(mode, 8)
+            if stat.S_ISDIR(mode):
+                self._subs[name] = Dir(self, name, mode, sha)
+            elif stat.S_ISLNK(mode):
+                self._subs[name] = Symlink(self, name, sha)
+            else:
+                self._subs[name] = File(self, name, mode, sha)
+                
+
+class CommitList(Node):
+    def __init__(self, parent, name, hash):
+        Node.__init__(self, parent, name, 040000, hash)
+        
+    def _mksubs(self):
+        self._subs = {}
+        revs = list(git.rev_list(self.hash.encode('hex')))
+        for (date, commit) in revs:
+            l = time.localtime(date)
+            ls = time.strftime('%Y-%m-%d-%H%M%S', l)
+            commithex = commit.encode('hex')
+            self._subs[commithex] = Dir(self, commithex, 040000, commit)
+            self._subs[ls] = FakeSymlink(self, ls, commit.encode('hex'))
+            latest = max(revs)
+        if latest:
+            (date, commit) = latest
+            self._subs['latest'] = FakeSymlink(self, 'latest',
+                                               commit.encode('hex'))
+
+    
+class RefList(Node):
+    def __init__(self, parent):
+        Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
+        
+    def _mksubs(self):
+        self._subs = {}
+        for (name,sha) in git.list_refs():
+            if name.startswith('refs/heads/'):
+                name = name[11:]
+                self._subs[name] = CommitList(self, name, sha)
+        
+
diff --git a/main.py b/main.py

new file mode 100755 (executable)

index 0000000..d5fab14
--- /dev/null
+++ b/main.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+import sys, os, subprocess
+
+argv = sys.argv
+exe = argv[0]
+exepath = os.path.split(exe)[0] or '.'
+
+# fix the PYTHONPATH to include our lib dir
+libpath = os.path.join(exepath, 'lib')
+sys.path[:0] = [libpath]
+os.environ['PYTHONPATH'] = libpath + ':' + os.environ.get('PYTHONPATH', '')
+
+from bup.helpers import *
+
+def usage():
+    log('Usage: bup <subcmd> <options...>\n\n')
+    log('Available subcommands:\n')
+    for c in sorted(os.listdir(exepath)):
+        if c.startswith('bup-') and c.find('.') < 0:
+            log('\t%s\n' % c[4:])
+    sys.exit(99)
+
+if len(argv) < 2 or not argv[1] or argv[1][0] == '-':
+    usage()
+
+subcmd = argv[1]
+if subcmd == 'help':
+    usage()
+
+def subpath(s):
+    return os.path.join(exepath, 'bup-%s' % s)
+
+if not os.path.exists(subpath(subcmd)):
+    log('error: unknown command "%s"\n' % subcmd)
+    usage()
+
+
+already_fixed = atoi(os.environ.get('BUP_FORCE_TTY'))
+if subcmd in ['ftp']:
+    already_fixed = True
+fix_stdout = not already_fixed and os.isatty(1)
+fix_stderr = not already_fixed and os.isatty(2)
+
+def force_tty():
+    if fix_stdout or fix_stderr:
+        os.environ['BUP_FORCE_TTY'] = '1'
+
+if fix_stdout or fix_stderr:
+    realf = fix_stderr and 2 or 1
+    n = subprocess.Popen([subpath('newliner')],
+                         stdin=subprocess.PIPE, stdout=os.dup(realf),
+                         close_fds=True, preexec_fn=force_tty)
+    outf = fix_stdout and n.stdin.fileno() or 1
+    errf = fix_stderr and n.stdin.fileno() or 2
+else:
+    n = None
+    outf = 1
+    errf = 2
+
+ret = 95
+try:
+    try:
+        p = subprocess.Popen([subpath(subcmd)] + argv[2:],
+                             stdout=outf, stderr=errf, preexec_fn=force_tty)
+        ret = p.wait()
+    except OSError, e:
+        log('%s: %s\n' % (subpath(subcmd), e))
+        ret = 98
+    except KeyboardInterrupt, e:
+        ret = 94
+finally:
+    if n:
+        n.stdin.close()
+        try:
+            n.wait()
+        except:
+            pass
+sys.exit(ret)
diff --git a/memtest.py b/memtest.py

deleted file mode 100755 (executable)

index 7595259..0000000
--- a/memtest.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-import sys, re, struct, mmap
-import git, options
-from helpers import *
-
-
-def s_from_bytes(bytes):
-    clist = [chr(b) for b in bytes]
-    return ''.join(clist)
-
-
-def report(count):
-    fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk']
-    d = {}
-    for line in open('/proc/self/status').readlines():
-        l = re.split(r':\s*', line.strip(), 1)
-        d[l[0]] = l[1]
-    if count >= 0:
-        e1 = count
-        fields = [d[k] for k in fields]
-    else:
-        e1 = ''
-    print ('%9s  ' + ('%10s ' * len(fields))) % tuple([e1] + fields)
-
-
-optspec = """
-memtest [-n elements] [-c cycles]
---
-n,number=  number of objects per cycle
-c,cycles=  number of cycles to run
-ignore-midx  ignore .midx files, use only .idx files
-"""
-o = options.Options(sys.argv[0], optspec)
-(opt, flags, extra) = o.parse(sys.argv[1:])
-
-if extra:
-    o.fatal('no arguments expected')
-
-git.ignore_midx = opt.ignore_midx
-
-git.check_repo_or_die()
-m = git.MultiPackIndex(git.repo('objects/pack'))
-
-cycles = opt.cycles or 100
-number = opt.number or 10000
-
-report(-1)
-f = open('/dev/urandom')
-a = mmap.mmap(-1, 20)
-report(0)
-for c in xrange(cycles):
-    for n in xrange(number):
-        b = f.read(3)
-        if 0:
-            bytes = list(struct.unpack('!BBB', b)) + [0]*17
-            bytes[2] &= 0xf0
-            bin = struct.pack('!20s', s_from_bytes(bytes))
-        else:
-            a[0:2] = b[0:2]
-            a[2] = chr(ord(b[2]) & 0xf0)
-            bin = str(a[0:20])
-        #print bin.encode('hex')
-        m.exists(bin)
-    report((c+1)*number)
diff --git a/options.py b/options.py

deleted file mode 100644 (file)

index 165016c..0000000
--- a/options.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import textwrap, getopt, re
-from helpers import *
-
-class OptDict:
-    def __init__(self):
-        self._opts = {}
-
-    def __setitem__(self, k, v):
-        self._opts[k] = v
-        
-    def __getitem__(self, k):
-        return self._opts[k]
-
-    def __getattr__(self, k):
-        return self[k]
-
-
-class Options:
-    def __init__(self, exe, optspec):
-        self.exe = exe
-        self.optspec = optspec
-        self._aliases = {}
-        self._shortopts = 'h?'
-        self._longopts = ['help']
-        self._hasparms = {}
-        self._usagestr = self._gen_usage()
-        
-    def _gen_usage(self):
-        out = []
-        lines = self.optspec.strip().split('\n')
-        lines.reverse()
-        first_syn = True
-        while lines:
-            l = lines.pop()
-            if l == '--': break
-            out.append('%s: %s\n' % (first_syn and 'usage' or '   or', l))
-            first_syn = False
-        out.append('\n')
-        while lines:
-            l = lines.pop()
-            if l.startswith(' '):
-                out.append('\n%s\n' % l.lstrip())
-            elif l:
-                (flags, extra) = l.split(' ', 1)
-                extra = extra.strip()
-                if flags.endswith('='):
-                    flags = flags[:-1]
-                    has_parm = 1
-                else:
-                    has_parm = 0
-                flagl = flags.split(',')
-                flagl_nice = []
-                for f in flagl:
-                    f_nice = re.sub(r'\W', '_', f)
-                    self._aliases[f] = flagl[0]
-                    self._aliases[f_nice] = flagl[0]
-                    self._hasparms[f] = has_parm
-                    if len(f) == 1:
-                        self._shortopts += f + (has_parm and ':' or '')
-                        flagl_nice.append('-' + f)
-                    else:
-                        assert(not f.startswith('no-')) # supported implicitly
-                        self._longopts.append(f + (has_parm and '=' or ''))
-                        self._longopts.append('no-' + f)
-                        flagl_nice.append('--' + f)
-                flags_nice = ', '.join(flagl_nice)
-                if has_parm:
-                    flags_nice += ' ...'
-                prefix = '    %-20s  ' % flags_nice
-                argtext = '\n'.join(textwrap.wrap(extra, width=70,
-                                                initial_indent=prefix,
-                                                subsequent_indent=' '*28))
-                out.append(argtext + '\n')
-            else:
-                out.append('\n')
-        return ''.join(out).rstrip() + '\n'
-    
-    def usage(self):
-        log(self._usagestr)
-        sys.exit(97)
-
-    def fatal(self, s):
-        log('error: %s\n' % s)
-        return self.usage()
-        
-    def parse(self, args):
-        try:
-            (flags,extra) = getopt.gnu_getopt(args,
-                                              self._shortopts, self._longopts)
-        except getopt.GetoptError, e:
-            self.fatal(e)
-
-        opt = OptDict()
-        for f in self._aliases.values():
-            opt[f] = None
-        for (k,v) in flags:
-            while k.startswith('-'):
-                k = k[1:]
-            if k in ['h', '?', 'help']:
-                self.usage()
-            if k.startswith('no-'):
-                k = self._aliases[k[3:]]
-                opt[k] = None
-            else:
-                k = self._aliases[k]
-                if not self._hasparms[k]:
-                    assert(v == '')
-                    opt[k] = (opt._opts.get(k) or 0) + 1
-                else:
-                    try:
-                        vv = int(v)
-                        if str(vv) == v:
-                            v = vv
-                    except ValueError:
-                        pass
-                    opt[k] = v
-        for (f1,f2) in self._aliases.items():
-            opt[f1] = opt[f2]
-        return (opt,flags,extra)
diff --git a/shquote.py b/shquote.py

deleted file mode 100644 (file)

index dc339ec..0000000
--- a/shquote.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import re
-
-q = "'"
-qq = '"'
-
-
-class QuoteError(Exception):
-    pass
-
-
-def _quotesplit(line):
-    inquote = None
-    inescape = None
-    wordstart = 0
-    word = ''
-    for i in range(len(line)):
-        c = line[i]
-        if inescape:
-            if inquote == q and c != q:
-                word += '\\'  # single-q backslashes can only quote single-q
-            word += c
-            inescape = False
-        elif c == '\\':
-            inescape = True
-        elif c == inquote:
-            inquote = None
-            # this is un-sh-like, but do it for sanity when autocompleting
-            yield (wordstart, word)
-            word = ''
-            wordstart = i+1
-        elif not inquote and not word and (c == q or c == qq):
-            # the 'not word' constraint on this is un-sh-like, but do it
-            # for sanity when autocompleting
-            inquote = c
-            wordstart = i
-        elif not inquote and c in [' ', '\n', '\r', '\t']:
-            if word:
-                yield (wordstart, word)
-            word = ''
-            wordstart = i+1
-        else:
-            word += c
-    if word:
-        yield (wordstart, word)
-    if inquote or inescape or word:
-        raise QuoteError()
-
-
-def quotesplit(line):
-    l = []
-    try:
-        for i in _quotesplit(line):
-            l.append(i)
-    except QuoteError:
-        pass
-    return l
-
-
-def unfinished_word(line):
-    try:
-        for (wordstart,word) in _quotesplit(line):
-            pass
-    except QuoteError:
-        firstchar = line[wordstart]
-        if firstchar in [q, qq]:
-            return (firstchar, word)
-        else:
-            return (None, word)
-    else:
-        return (None, '')
-
-
-def quotify(qtype, word, terminate):
-    if qtype == qq:
-        return qq + word.replace(qq, '\\"') + (terminate and qq or '')
-    elif qtype == q:
-        return q + word.replace(q, "\\'") + (terminate and q or '')
-    else:
-        return re.sub(r'([\"\' \t\n\r])', r'\\\1', word)
-
-
-def what_to_add(qtype, origword, newword, terminate):
-    if not newword.startswith(origword):
-        return ''
-    else:
-        qold = quotify(qtype, origword, terminate=False)
-        return quotify(qtype, newword, terminate=terminate)[len(qold):]
diff --git a/t/__init__.py b/t/__init__.py

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..cb95e530a04c1aa60fd4696c6b5cc461dbf2529d 100644 (file)
--- a/t/__init__.py
+++ b/t/__init__.py
@@ -0,0 +1,2 @@
+import sys
+sys.path[:0] = ['lib']
diff --git a/t/tgit.py b/t/tgit.py

index 321a3430f1962d10424ef16e2e391d6479255d5b..f43ab53551ae5e083cf43b1cbc42f1b97a3e3139 100644 (file)
--- a/t/tgit.py
+++ b/t/tgit.py
@@ -1,6 +1,7 @@
-import git, time
+import time
+from bup import git
+from bup.helpers import *
  from wvtest import *
-from helpers import *
  
  
  @wvtest
diff --git a/t/thelpers.py b/t/thelpers.py

index 5e59bcaeb8e352f0f98338626770d236fe78ceae..9f24962644fb25072c27879968f98e38ee62fba8 100644 (file)
--- a/t/thelpers.py
+++ b/t/thelpers.py
@@ -1,4 +1,4 @@
-from helpers import *
+from bup.helpers import *
  from wvtest import *
  
  @wvtest
diff --git a/t/tindex.py b/t/tindex.py

index e5c043b7633ff48fb9a54038264dc179e34b0eb2..330b44467d0b61d1961ccf0a39f10d670c961daa 100644 (file)
--- a/t/tindex.py
+++ b/t/tindex.py
@@ -1,7 +1,7 @@
  import os
-import index
+from bup import index
+from bup.helpers import *
  from wvtest import *
-from helpers import *
  
  @wvtest
  def index_basic():
diff --git a/t/toptions.py b/t/toptions.py

index 937cf707d5339adda34190df683a8716945cdd88..4596e8b245ce593791f99e0d6457ffcd6886027a 100644 (file)
--- a/t/toptions.py
+++ b/t/toptions.py
@@ -1,4 +1,4 @@
-import options
+from bup import options
  from wvtest import *
  
  @wvtest
diff --git a/t/tshquote.py b/t/tshquote.py

index 9f9c8cc845c76177bdf850b2efa789974f41bb52..15b06ec1a53992435cf91fb529d4f85c77367727 100644 (file)
--- a/t/tshquote.py
+++ b/t/tshquote.py
@@ -1,5 +1,5 @@
+from bup import shquote
  from wvtest import *
-import shquote
  
  def qst(line):
      return [s[1] for s in shquote.quotesplit(line)]
diff --git a/vfs.py b/vfs.py

deleted file mode 100644 (file)

index a97d4f5..0000000
--- a/vfs.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import os, re, stat, time
-import git
-from helpers import *
-
-EMPTY_SHA='\0'*20
-
-_cp = None
-def cp():
-    global _cp
-    if not _cp:
-        _cp = git.CatPipe()
-    return _cp
-
-class NodeError(Exception):
-    pass
-class NoSuchFile(NodeError):
-    pass
-class NotDir(NodeError):
-    pass
-class NotFile(NodeError):
-    pass
-class TooManySymlinks(NodeError):
-    pass
-
-
-class FileReader:
-    def __init__(self, node):
-        self.n = node
-        self.ofs = 0
-        self.size = self.n.size()
-
-    def seek(self, ofs):
-        if ofs > self.size:
-            self.ofs = self.size
-        elif ofs < 0:
-            self.ofs = 0
-        else:
-            self.ofs = ofs
-
-    def tell(self):
-        return self.ofs
-
-    def read(self, count = -1):
-        if count < 0:
-            count = self.size - self.ofs
-        buf = self.n.readbytes(self.ofs, count)
-        self.ofs += len(buf)
-        return buf
-
-
-class Node:
-    def __init__(self, parent, name, mode, hash):
-        self.parent = parent
-        self.name = name
-        self.mode = mode
-        self.hash = hash
-        self._subs = None
-        
-    def __cmp__(a, b):
-        return cmp(a.name or None, b.name or None)
-    
-    def __iter__(self):
-        return iter(self.subs())
-    
-    def fullname(self):
-        if self.parent:
-            return os.path.join(self.parent.fullname(), self.name)
-        else:
-            return self.name
-    
-    def _mksubs(self):
-        self._subs = {}
-        
-    def subs(self):
-        if self._subs == None:
-            self._mksubs()
-        return sorted(self._subs.values())
-        
-    def sub(self, name):
-        if self._subs == None:
-            self._mksubs()
-        ret = self._subs.get(name)
-        if not ret:
-            raise NoSuchFile("no file %r in %r" % (name, self.name))
-        return ret
-
-    def top(self):
-        if self.parent:
-            return self.parent.top()
-        else:
-            return self
-
-    def _lresolve(self, parts):
-        #log('_lresolve %r in %r\n' % (parts, self.name))
-        if not parts:
-            return self
-        (first, rest) = (parts[0], parts[1:])
-        if first == '.':
-            return self._lresolve(rest)
-        elif first == '..':
-            if not self.parent:
-                raise NoSuchFile("no parent dir for %r" % self.name)
-            return self.parent._lresolve(rest)
-        elif rest:
-            return self.sub(first)._lresolve(rest)
-        else:
-            return self.sub(first)
-
-    def lresolve(self, path):
-        start = self
-        if path.startswith('/'):
-            start = self.top()
-            path = path[1:]
-        parts = re.split(r'/+', path or '.')
-        if not parts[-1]:
-            parts[-1] = '.'
-        #log('parts: %r %r\n' % (path, parts))
-        return start._lresolve(parts)
-
-    def resolve(self, path):
-        return self.lresolve(path).lresolve('')
-    
-    def nlinks(self):
-        if self._subs == None:
-            self._mksubs()
-        return 1
-
-    def size(self):
-        return 0
-
-    def open(self):
-        raise NotFile('%s is not a regular file' % self.name)
-    
-    def readbytes(self, ofs, count):
-        raise NotFile('%s is not a regular file' % self.name)
-    
-    def read(self, num = -1):
-        if num < 0:
-            num = self.size()
-        return self.readbytes(0, num)
-    
-    
-class File(Node):
-    def _content(self):
-        return cp().join(self.hash.encode('hex'))
-
-    def open(self):
-        return FileReader(self)
-    
-    def size(self):
-        # FIXME inefficient
-        return sum(len(blob) for blob in self._content())
-    
-    def readbytes(self, ofs, count):
-        # FIXME inefficient
-        buf = ''.join(self._content())
-        return buf[ofs:ofs+count]
-    
-
-_symrefs = 0
-class Symlink(File):
-    def __init__(self, parent, name, hash):
-        File.__init__(self, parent, name, 0120000, hash)
-
-    def readlink(self):
-        return self.read(1024)
-
-    def dereference(self):
-        global _symrefs
-        if _symrefs > 100:
-            raise TooManySymlinks('too many levels of symlinks: %r'
-                                  % self.fullname())
-        _symrefs += 1
-        try:
-            return self.parent.lresolve(self.readlink())
-        finally:
-            _symrefs -= 1
-
-    def _lresolve(self, parts):
-        return self.dereference()._lresolve(parts)
-    
-
-class FakeSymlink(Symlink):
-    def __init__(self, parent, name, toname):
-        Symlink.__init__(self, parent, name, EMPTY_SHA)
-        self.toname = toname
-        
-    def _content(self):
-        return self.toname
-    
-
-class Dir(Node):
-    def _mksubs(self):
-        self._subs = {}
-        it = cp().get(self.hash.encode('hex'))
-        type = it.next()
-        if type == 'commit':
-            del it
-            it = cp().get(self.hash.encode('hex') + ':')
-            type = it.next()
-        assert(type == 'tree')
-        for (mode,name,sha) in git._treeparse(''.join(it)):
-            mode = int(mode, 8)
-            if stat.S_ISDIR(mode):
-                self._subs[name] = Dir(self, name, mode, sha)
-            elif stat.S_ISLNK(mode):
-                self._subs[name] = Symlink(self, name, sha)
-            else:
-                self._subs[name] = File(self, name, mode, sha)
-                
-
-class CommitList(Node):
-    def __init__(self, parent, name, hash):
-        Node.__init__(self, parent, name, 040000, hash)
-        
-    def _mksubs(self):
-        self._subs = {}
-        revs = list(git.rev_list(self.hash.encode('hex')))
-        for (date, commit) in revs:
-            l = time.localtime(date)
-            ls = time.strftime('%Y-%m-%d-%H%M%S', l)
-            commithex = commit.encode('hex')
-            self._subs[commithex] = Dir(self, commithex, 040000, commit)
-            self._subs[ls] = FakeSymlink(self, ls, commit.encode('hex'))
-            latest = max(revs)
-        if latest:
-            (date, commit) = latest
-            self._subs['latest'] = FakeSymlink(self, 'latest',
-                                               commit.encode('hex'))
-
-    
-class RefList(Node):
-    def __init__(self, parent):
-        Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
-        
-    def _mksubs(self):
-        self._subs = {}
-        for (name,sha) in git.list_refs():
-            if name.startswith('refs/heads/'):
-                name = name[11:]
-                self._subs[name] = CommitList(self, name, sha)
-        
-
author	Avery Pennarun <apenwarr@gmail.com>
	Sun, 28 Feb 2010 21:17:35 +0000 (16:17 -0500)
committer	Avery Pennarun <apenwarr@gmail.com>
	Sun, 28 Feb 2010 22:49:10 +0000 (17:49 -0500)
Makefile		patch \| blob \| history
_hashsplit.c	[deleted file]	patch \| blob \| history
bup.py	[deleted file]	patch \| blob \| history
client.py	[deleted file]	patch \| blob \| history
cmd-damage.py		patch \| blob \| history
cmd-drecurse.py		patch \| blob \| history
cmd-fsck.py		patch \| blob \| history
cmd-ftp.py		patch \| blob \| history
cmd-fuse.py		patch \| blob \| history
cmd-index.py		patch \| blob \| history
cmd-init.py		patch \| blob \| history
cmd-join.py		patch \| blob \| history
cmd-ls.py		patch \| blob \| history
cmd-margin.py		patch \| blob \| history
cmd-memtest.py	[new file with mode: 0755]	patch \| blob
cmd-midx.py		patch \| blob \| history
cmd-newliner.py		patch \| blob \| history
cmd-random.py		patch \| blob \| history
cmd-save.py		patch \| blob \| history
cmd-server.py		patch \| blob \| history
cmd-split.py		patch \| blob \| history
cmd-tick.py		patch \| blob \| history
csetup.py	[deleted file]	patch \| blob \| history
drecurse.py	[deleted file]	patch \| blob \| history
git.py	[deleted file]	patch \| blob \| history
hashsplit.py	[deleted file]	patch \| blob \| history
helpers.py	[deleted file]	patch \| blob \| history
index.py	[deleted file]	patch \| blob \| history
lib/bup/__init__.py	[new file with mode: 0644]	patch \| blob
lib/bup/_hashsplit.c	[new file with mode: 0644]	patch \| blob
lib/bup/client.py	[new file with mode: 0644]	patch \| blob
lib/bup/csetup.py	[new file with mode: 0644]	patch \| blob
lib/bup/drecurse.py	[new file with mode: 0644]	patch \| blob
lib/bup/git.py	[new file with mode: 0644]	patch \| blob
lib/bup/hashsplit.py	[new file with mode: 0644]	patch \| blob
lib/bup/helpers.py	[new file with mode: 0644]	patch \| blob
lib/bup/index.py	[new file with mode: 0644]	patch \| blob
lib/bup/options.py	[new file with mode: 0644]	patch \| blob
lib/bup/shquote.py	[new file with mode: 0644]	patch \| blob
lib/bup/vfs.py	[new file with mode: 0644]	patch \| blob
main.py	[new file with mode: 0755]	patch \| blob
memtest.py	[deleted file]	patch \| blob \| history
options.py	[deleted file]	patch \| blob \| history
shquote.py	[deleted file]	patch \| blob \| history
t/__init__.py		patch \| blob \| history
t/tgit.py		patch \| blob \| history
t/thelpers.py		patch \| blob \| history
t/tindex.py		patch \| blob \| history
t/toptions.py		patch \| blob \| history
t/tshquote.py		patch \| blob \| history
vfs.py	[deleted file]	patch \| blob \| history