all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
bup-midx bup-fuse bup-ls bup-damage bup-fsck bup-margin bup-drecurse \
- bup-random bup-ftp bup-newliner \
- bup memtest _hashsplit$(SOEXT) \
+ bup-random bup-ftp bup-newliner bup-memtest \
+ bup lib/bup/_hashsplit$(SOEXT) \
Documentation/all
%/all:
%/clean:
$(MAKE) -C $* clean
-_hashsplit$(SOEXT): _hashsplit.c csetup.py
+lib/bup/_hashsplit$(SOEXT): lib/bup/_hashsplit.c lib/bup/csetup.py
@rm -f $@
- python csetup.py build
- cp build/*/_hashsplit$(SOEXT) .
+ cd lib/bup && python csetup.py build
+ cp lib/bup/build/*/_hashsplit$(SOEXT) lib/bup/
runtests: all runtests-python runtests-cmdline
%: %.o
$(CC) $(CFLAGS) (LDFLAGS) -o $@ $^ $(LIBS)
-bup: bup.py
+bup: main.py
rm -f $@
ln -s $< $@
gcc -c -o $@ $< $(CPPFLAGS) $(CFLAGS)
clean: Documentation/clean
- rm -f *.o *.so *.dll *.exe *~ .*~ *.pyc */*.pyc */*~ \
+ rm -f *.o *.so */*/*.so *.dll *.exe .*~ *~ */*~ */*/*~ \
+ *.pyc */*.pyc */*/*.pyc\
bup bup-* randomgen memtest \
out[12] out2[tc] tags[12] tags2[tc]
- rm -rf *.tmp build
+ rm -rf *.tmp build lib/bup/build
+++ /dev/null
-#include <Python.h>
-#include <assert.h>
-#include <stdint.h>
-
-#define BLOBBITS (13)
-#define BLOBSIZE (1<<BLOBBITS)
-#define WINDOWBITS (7)
-#define WINDOWSIZE (1<<(WINDOWBITS-1))
-
-
-// FIXME: replace this with a not-stupid rolling checksum algorithm,
-// such as the one used in rsync (Adler32?)
-static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
-{
- return ((old<<1) | (old>>31)) ^ drop ^ add;
-}
-
-
-static int find_ofs(const unsigned char *buf, int len, int *bits)
-{
- unsigned char window[WINDOWSIZE];
- uint32_t sum = 0;
- int i = 0, count;
- memset(window, 0, sizeof(window));
-
- for (count = 0; count < len; count++)
- {
- sum = stupidsum_add(sum, window[i], buf[count]);
- window[i] = buf[count];
- i = (i + 1) % WINDOWSIZE;
- if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
- {
- if (bits)
- {
- *bits = BLOBBITS;
- sum >>= BLOBBITS;
- for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
- ;
- }
- return count+1;
- }
- }
- return 0;
-}
-
-
-static PyObject *blobbits(PyObject *self, PyObject *args)
-{
- if (!PyArg_ParseTuple(args, ""))
- return NULL;
- return Py_BuildValue("i", BLOBBITS);
-}
-
-
-static PyObject *splitbuf(PyObject *self, PyObject *args)
-{
- unsigned char *buf = NULL;
- int len = 0, out = 0, bits = -1;
-
- if (!PyArg_ParseTuple(args, "t#", &buf, &len))
- return NULL;
- out = find_ofs(buf, len, &bits);
- return Py_BuildValue("ii", out, bits);
-}
-
-
-static PyObject *bitmatch(PyObject *self, PyObject *args)
-{
- unsigned char *buf1 = NULL, *buf2 = NULL;
- int len1 = 0, len2 = 0;
- int byte, bit;
-
- if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
- return NULL;
-
- bit = 0;
- for (byte = 0; byte < len1 && byte < len2; byte++)
- {
- int b1 = buf1[byte], b2 = buf2[byte];
- if (b1 != b2)
- {
- for (bit = 0; bit < 8; bit++)
- if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
- break;
- break;
- }
- }
-
- return Py_BuildValue("i", byte*8 + bit);
-}
-
-
-// I would have made this a lower-level function that just fills in a buffer
-// with random values, and then written those values from python. But that's
-// about 20% slower in my tests, and since we typically generate random
-// numbers for benchmarking other parts of bup, any slowness in generating
-// random bytes will make our benchmarks inaccurate. Plus nobody wants
-// pseudorandom bytes much except for this anyway.
-static PyObject *write_random(PyObject *self, PyObject *args)
-{
- uint32_t buf[1024/4];
- int fd = -1, seed = 0;
- ssize_t ret;
- long long len = 0, kbytes = 0, written = 0;
-
- if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
- return NULL;
-
- srandom(seed);
-
- for (kbytes = len/1024; kbytes > 0; kbytes--)
- {
- int i;
- for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
- buf[i] = random();
- ret = write(fd, buf, sizeof(buf));
- if (ret < 0)
- ret = 0;
- written += ret;
- if (ret < sizeof(buf))
- break;
- if (!(kbytes%1024))
- fprintf(stderr, ".");
- }
-
- return Py_BuildValue("L", written);
-}
-
-
-static PyMethodDef hashsplit_methods[] = {
- { "blobbits", blobbits, METH_VARARGS,
- "Return the number of bits in the rolling checksum." },
- { "splitbuf", splitbuf, METH_VARARGS,
- "Split a list of strings based on a rolling checksum." },
- { "bitmatch", bitmatch, METH_VARARGS,
- "Count the number of matching prefix bits between two strings." },
- { "write_random", write_random, METH_VARARGS,
- "Write random bytes to the given file descriptor" },
- { NULL, NULL, 0, NULL }, // sentinel
-};
-
-PyMODINIT_FUNC init_hashsplit(void)
-{
- Py_InitModule("_hashsplit", hashsplit_methods);
-}
+++ /dev/null
-#!/usr/bin/env python
-import sys, os, subprocess
-import git
-from helpers import *
-
-argv = sys.argv
-exe = argv[0]
-exepath = os.path.split(exe)[0] or '.'
-
-def usage():
- log('Usage: bup <subcmd> <options...>\n\n')
- log('Available subcommands:\n')
- for c in sorted(os.listdir(exepath)):
- if c.startswith('bup-') and c.find('.') < 0:
- log('\t%s\n' % c[4:])
- sys.exit(99)
-
-if len(argv) < 2 or not argv[1] or argv[1][0] == '-':
- usage()
-
-subcmd = argv[1]
-if subcmd == 'help':
- usage()
-
-def subpath(s):
- return os.path.join(exepath, 'bup-%s' % s)
-
-if not os.path.exists(subpath(subcmd)):
- log('error: unknown command "%s"\n' % subcmd)
- usage()
-
-
-already_fixed = atoi(os.environ.get('BUP_FORCE_TTY'))
-if subcmd in ['ftp']:
- already_fixed = True
-fix_stdout = not already_fixed and os.isatty(1)
-fix_stderr = not already_fixed and os.isatty(2)
-
-def force_tty():
- if fix_stdout or fix_stderr:
- os.environ['BUP_FORCE_TTY'] = '1'
-
-if fix_stdout or fix_stderr:
- realf = fix_stderr and 2 or 1
- n = subprocess.Popen([subpath('newliner')],
- stdin=subprocess.PIPE, stdout=os.dup(realf),
- close_fds=True, preexec_fn=force_tty)
- outf = fix_stdout and n.stdin.fileno() or 1
- errf = fix_stderr and n.stdin.fileno() or 2
-else:
- n = None
- outf = 1
- errf = 2
-
-ret = 95
-try:
- try:
- p = subprocess.Popen([subpath(subcmd)] + argv[2:],
- stdout=outf, stderr=errf, preexec_fn=force_tty)
- ret = p.wait()
- except OSError, e:
- log('%s: %s\n' % (subpath(subcmd), e))
- ret = 98
- except KeyboardInterrupt, e:
- ret = 94
-finally:
- if n:
- n.stdin.close()
- try:
- n.wait()
- except:
- pass
-sys.exit(ret)
+++ /dev/null
-import re, struct, errno, select
-import git
-from helpers import *
-from subprocess import Popen, PIPE
-
-
-class ClientError(Exception):
- pass
-
-
-class Client:
- def __init__(self, remote, create=False):
- self._busy = None
- self.p = None
- self.conn = None
- rs = remote.split(':', 1)
- nicedir = os.path.split(os.path.abspath(sys.argv[0]))[0]
- nicedir = re.sub(r':', "_", nicedir)
- if len(rs) == 1:
- (host, dir) = ('NONE', remote)
- def fixenv():
- os.environ['PATH'] = ':'.join([nicedir,
- os.environ.get('PATH', '')])
- argv = ['bup', 'server']
- else:
- (host, dir) = rs
- fixenv = None
- # WARNING: shell quoting security holes are possible here, so we
- # have to be super careful. We have to use 'sh -c' because
- # csh-derived shells can't handle PATH= notation. We can't
- # set PATH in advance, because ssh probably replaces it. We
- # can't exec *safely* using argv, because *both* ssh and 'sh -c'
- # allow shellquoting. So we end up having to double-shellquote
- # stuff here.
- escapedir = re.sub(r'([^\w/])', r'\\\\\\\1', nicedir)
- cmd = r"""
- sh -c PATH=%s:'$PATH bup server'
- """ % escapedir
- argv = ['ssh', host, '--', cmd.strip()]
- #log('argv is: %r\n' % argv)
- (self.host, self.dir) = (host, dir)
- self.cachedir = git.repo('index-cache/%s'
- % re.sub(r'[^@\w]', '_',
- "%s:%s" % (host, dir)))
- try:
- self.p = p = Popen(argv, stdin=PIPE, stdout=PIPE, preexec_fn=fixenv)
- except OSError, e:
- raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
- self.conn = conn = Conn(p.stdout, p.stdin)
- if dir:
- dir = re.sub(r'[\r\n]', ' ', dir)
- if create:
- conn.write('init-dir %s\n' % dir)
- else:
- conn.write('set-dir %s\n' % dir)
- self.check_ok()
- self.sync_indexes_del()
-
- def __del__(self):
- try:
- self.close()
- except IOError, e:
- if e.errno == errno.EPIPE:
- pass
- else:
- raise
-
- def close(self):
- if self.conn and not self._busy:
- self.conn.write('quit\n')
- if self.p:
- self.p.stdin.close()
- while self.p.stdout.read(65536):
- pass
- self.p.stdout.close()
- self.p.wait()
- rv = self.p.wait()
- if rv:
- raise ClientError('server tunnel returned exit code %d' % rv)
- self.conn = None
- self.p = None
-
- def check_ok(self):
- rv = self.p.poll()
- if rv != None:
- raise ClientError('server exited unexpectedly with code %r' % rv)
- try:
- return self.conn.check_ok()
- except Exception, e:
- raise ClientError, e, sys.exc_info()[2]
-
- def check_busy(self):
- if self._busy:
- raise ClientError('already busy with command %r' % self._busy)
-
- def _not_busy(self):
- self._busy = None
-
- def sync_indexes_del(self):
- self.check_busy()
- conn = self.conn
- conn.write('list-indexes\n')
- packdir = git.repo('objects/pack')
- all = {}
- needed = {}
- for line in linereader(conn):
- if not line:
- break
- all[line] = 1
- assert(line.find('/') < 0)
- if not os.path.exists(os.path.join(self.cachedir, line)):
- needed[line] = 1
- self.check_ok()
-
- mkdirp(self.cachedir)
- for f in os.listdir(self.cachedir):
- if f.endswith('.idx') and not f in all:
- log('pruning old index: %r\n' % f)
- os.unlink(os.path.join(self.cachedir, f))
-
- def sync_index(self, name):
- #log('requesting %r\n' % name)
- mkdirp(self.cachedir)
- self.conn.write('send-index %s\n' % name)
- n = struct.unpack('!I', self.conn.read(4))[0]
- assert(n)
- fn = os.path.join(self.cachedir, name)
- f = open(fn + '.tmp', 'w')
- count = 0
- progress('Receiving index: %d/%d\r' % (count, n))
- for b in chunkyreader(self.conn, n):
- f.write(b)
- count += len(b)
- progress('Receiving index: %d/%d\r' % (count, n))
- progress('Receiving index: %d/%d, done.\n' % (count, n))
- self.check_ok()
- f.close()
- os.rename(fn + '.tmp', fn)
-
- def _make_objcache(self):
- ob = self._busy
- self._busy = None
- #self.sync_indexes()
- self._busy = ob
- return git.MultiPackIndex(self.cachedir)
-
- def _suggest_pack(self, indexname):
- log('received index suggestion: %s\n' % indexname)
- ob = self._busy
- if ob:
- assert(ob == 'receive-objects')
- self._busy = None
- self.conn.write('\xff\xff\xff\xff') # suspend receive-objects
- self.conn.drain_and_check_ok()
- self.sync_index(indexname)
- if ob:
- self.conn.write('receive-objects\n')
- self._busy = ob
-
- def new_packwriter(self):
- self.check_busy()
- self._busy = 'receive-objects'
- return PackWriter_Remote(self.conn,
- objcache_maker = self._make_objcache,
- suggest_pack = self._suggest_pack,
- onclose = self._not_busy)
-
- def read_ref(self, refname):
- self.check_busy()
- self.conn.write('read-ref %s\n' % refname)
- r = self.conn.readline().strip()
- self.check_ok()
- if r:
- assert(len(r) == 40) # hexified sha
- return r.decode('hex')
- else:
- return None # nonexistent ref
-
- def update_ref(self, refname, newval, oldval):
- self.check_busy()
- self.conn.write('update-ref %s\n%s\n%s\n'
- % (refname, newval.encode('hex'),
- (oldval or '').encode('hex')))
- self.check_ok()
-
- def cat(self, id):
- self.check_busy()
- self._busy = 'cat'
- self.conn.write('cat %s\n' % re.sub(r'[\n\r]', '_', id))
- while 1:
- sz = struct.unpack('!I', self.conn.read(4))[0]
- if not sz: break
- yield self.conn.read(sz)
- e = self.check_ok()
- self._not_busy()
- if e:
- raise KeyError(str(e))
-
-
-class PackWriter_Remote(git.PackWriter):
- def __init__(self, conn, objcache_maker, suggest_pack, onclose):
- git.PackWriter.__init__(self, objcache_maker)
- self.file = conn
- self.filename = 'remote socket'
- self.suggest_pack = suggest_pack
- self.onclose = onclose
- self._packopen = False
-
- def _open(self):
- if not self._packopen:
- self._make_objcache()
- self.file.write('receive-objects\n')
- self._packopen = True
-
- def _end(self):
- if self._packopen and self.file:
- self.file.write('\0\0\0\0')
- self._packopen = False
- while True:
- line = self.file.readline().strip()
- if line.startswith('index '):
- pass
- else:
- break
- id = line
- self.file.check_ok()
- self.objcache = None
- if self.onclose:
- self.onclose()
- if self.suggest_pack:
- self.suggest_pack(id)
- return id
-
- def close(self):
- id = self._end()
- self.file = None
- return id
-
- def abort(self):
- raise GitError("don't know how to abort remote pack writing")
-
- def _raw_write(self, datalist):
- assert(self.file)
- if not self._packopen:
- self._open()
- data = ''.join(datalist)
- assert(len(data))
- self.file.write(struct.pack('!I', len(data)) + data)
- self.outbytes += len(data)
- self.count += 1
-
- if self.file.has_input():
- line = self.file.readline().strip()
- assert(line.startswith('index '))
- idxname = line[6:]
- if self.suggest_pack:
- self.suggest_pack(idxname)
- self.objcache.refresh()
#!/usr/bin/env python
import sys, os, random
-import options
-from helpers import *
+from bup import options
+from bup.helpers import *
def randblock(n):
#!/usr/bin/env python
-import options, drecurse
-from helpers import *
+from bup import options, drecurse
+from bup.helpers import *
optspec = """
bup drecurse <path>
#!/usr/bin/env python
import sys, os, glob, subprocess, time, sha
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
par2_ok = 0
nullf = open('/dev/null')
#!/usr/bin/env python
import sys, os, re, stat, readline, fnmatch
-import options, git, shquote, vfs
-from helpers import *
+from bup import options, git, shquote, vfs
+from bup.helpers import *
def print_node(text, n):
if stat.S_ISDIR(n.mode):
#!/usr/bin/env python
import sys, os, stat, errno, fuse, re, time, tempfile
-import options, git, vfs
-from helpers import *
+from bup import options, git, vfs
+from bup.helpers import *
class Stat(fuse.Stat):
#!/usr/bin/env python
import os, sys, stat, time
-import options, git, index, drecurse
-from helpers import *
+from bup import options, git, index, drecurse
+from bup.helpers import *
def merge_indexes(out, r1, r2):
#!/usr/bin/env python
-import git, options, client
-from helpers import *
+from bup import git, options, client
+from bup.helpers import *
optspec = """
[BUP_DIR=...] bup init [-r host:path]
#!/usr/bin/env python
import sys, time, struct
-import hashsplit, git, options, client
-from helpers import *
+from bup import hashsplit, git, options, client
+from bup.helpers import *
from subprocess import PIPE
#!/usr/bin/env python
import sys, os, stat
-import options, git, vfs
-from helpers import *
+from bup import options, git, vfs
+from bup.helpers import *
def print_node(text, n):
prefix = ''
#!/usr/bin/env python
import sys
-import options, git, _hashsplit
-from helpers import *
+from bup import options, git, _hashsplit
+from bup.helpers import *
optspec = """
--- /dev/null
+#!/usr/bin/env python
+import sys, re, struct, mmap
+from bup import git, options
+from bup.helpers import *
+
+
+def s_from_bytes(bytes):
+ clist = [chr(b) for b in bytes]
+ return ''.join(clist)
+
+
+def report(count):
+ fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk']
+ d = {}
+ for line in open('/proc/self/status').readlines():
+ l = re.split(r':\s*', line.strip(), 1)
+ d[l[0]] = l[1]
+ if count >= 0:
+ e1 = count
+ fields = [d[k] for k in fields]
+ else:
+ e1 = ''
+ print ('%9s ' + ('%10s ' * len(fields))) % tuple([e1] + fields)
+ sys.stdout.flush()
+
+
+optspec = """
+memtest [-n elements] [-c cycles]
+--
+n,number= number of objects per cycle
+c,cycles= number of cycles to run
+ignore-midx ignore .midx files, use only .idx files
+"""
+o = options.Options(sys.argv[0], optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+if extra:
+ o.fatal('no arguments expected')
+
+git.ignore_midx = opt.ignore_midx
+
+git.check_repo_or_die()
+m = git.MultiPackIndex(git.repo('objects/pack'))
+
+cycles = opt.cycles or 100
+number = opt.number or 10000
+
+report(-1)
+f = open('/dev/urandom')
+a = mmap.mmap(-1, 20)
+report(0)
+for c in xrange(cycles):
+ for n in xrange(number):
+ b = f.read(3)
+ if 0:
+ bytes = list(struct.unpack('!BBB', b)) + [0]*17
+ bytes[2] &= 0xf0
+ bin = struct.pack('!20s', s_from_bytes(bytes))
+ else:
+ a[0:2] = b[0:2]
+ a[2] = chr(ord(b[2]) & 0xf0)
+ bin = str(a[0:20])
+ #print bin.encode('hex')
+ m.exists(bin)
+ report((c+1)*number)
#!/usr/bin/env python
import sys, math, struct, glob, sha
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
PAGE_SIZE=4096
SHA_PER_PAGE=PAGE_SIZE/200.
#!/usr/bin/env python
import sys, os, re
-import options
+from bup import options
optspec = """
bup newliner
#!/usr/bin/env python
import sys, mmap
-import options, _hashsplit
-from helpers import *
+from bup import options, _hashsplit
+from bup.helpers import *
optspec = """
bup random [-S seed] <numbytes>
#!/usr/bin/env python
import sys, re, errno, stat, time, math
-import hashsplit, git, options, index, client
-from helpers import *
+from bup import hashsplit, git, options, index, client
+from bup.helpers import *
optspec = """
#!/usr/bin/env python
import sys, struct, mmap
-import options, git
-from helpers import *
+from bup import options, git
+from bup.helpers import *
suspended_w = None
#!/usr/bin/env python
import sys, time, struct
-import hashsplit, git, options, client
-from helpers import *
+from bup import hashsplit, git, options, client
+from bup.helpers import *
from subprocess import PIPE
#!/usr/bin/env python
import sys, time
-import options
+from bup import options
optspec = """
bup tick
+++ /dev/null
-from distutils.core import setup, Extension
-
-_hashsplit_mod = Extension('_hashsplit', sources=['_hashsplit.c'])
-
-setup(name='_hashsplit',
- version='0.1',
- description='hashsplit helper library for bup',
- ext_modules=[_hashsplit_mod])
+++ /dev/null
-import stat, heapq
-from helpers import *
-
-try:
- O_LARGEFILE = os.O_LARGEFILE
-except AttributeError:
- O_LARGEFILE = 0
-
-
-# the use of fchdir() and lstat() is for two reasons:
-# - help out the kernel by not making it repeatedly look up the absolute path
-# - avoid race conditions caused by doing listdir() on a changing symlink
-class OsFile:
- def __init__(self, path):
- self.fd = None
- self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
-
- def __del__(self):
- if self.fd:
- fd = self.fd
- self.fd = None
- os.close(fd)
-
- def fchdir(self):
- os.fchdir(self.fd)
-
- def stat(self):
- return os.fstat(self.fd)
-
-
-_IFMT = stat.S_IFMT(0xffffffff) # avoid function call in inner loop
-def _dirlist():
- l = []
- for n in os.listdir('.'):
- try:
- st = os.lstat(n)
- except OSError, e:
- add_error(Exception('%s: %s' % (realpath(n), str(e))))
- continue
- if (st.st_mode & _IFMT) == stat.S_IFDIR:
- n += '/'
- l.append((n,st))
- l.sort(reverse=True)
- return l
-
-
-def _recursive_dirlist(prepend, xdev):
- for (name,pst) in _dirlist():
- if name.endswith('/'):
- if xdev != None and pst.st_dev != xdev:
- log('Skipping %r: different filesystem.\n' % (prepend+name))
- continue
- try:
- OsFile(name).fchdir()
- except OSError, e:
- add_error('%s: %s' % (prepend, e))
- else:
- for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev):
- yield i
- os.chdir('..')
- yield (prepend + name, pst)
-
-
-def recursive_dirlist(paths, xdev):
- startdir = OsFile('.')
- try:
- assert(type(paths) != type(''))
- for path in paths:
- try:
- pst = os.lstat(path)
- if stat.S_ISLNK(pst.st_mode):
- yield (path, pst)
- continue
- except OSError, e:
- add_error(e)
- continue
- try:
- pfile = OsFile(path)
- except OSError, e:
- add_error(e)
- continue
- pst = pfile.stat()
- if xdev:
- xdev = pst.st_dev
- else:
- xdev = None
- if stat.S_ISDIR(pst.st_mode):
- pfile.fchdir()
- prepend = os.path.join(path, '')
- for i in _recursive_dirlist(prepend=prepend, xdev=xdev):
- yield i
- startdir.fchdir()
- else:
- prepend = path
- yield (prepend,pst)
- except:
- try:
- startdir.fchdir()
- except:
- pass
- raise
+++ /dev/null
-import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
-import heapq
-from helpers import *
-
-verbose = 0
-ignore_midx = 0
-home_repodir = os.path.expanduser('~/.bup')
-repodir = None
-
-_typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
-_typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
-
-
-class GitError(Exception):
- pass
-
-
-def repo(sub = ''):
- global repodir
- if not repodir:
- raise GitError('You should call check_repo_or_die()')
- gd = os.path.join(repodir, '.git')
- if os.path.exists(gd):
- repodir = gd
- return os.path.join(repodir, sub)
-
-
-def _encode_packobj(type, content):
- szout = ''
- sz = len(content)
- szbits = (sz & 0x0f) | (_typemap[type]<<4)
- sz >>= 4
- while 1:
- if sz: szbits |= 0x80
- szout += chr(szbits)
- if not sz:
- break
- szbits = sz & 0x7f
- sz >>= 7
- z = zlib.compressobj(1)
- yield szout
- yield z.compress(content)
- yield z.flush()
-
-
-def _encode_looseobj(type, content):
- z = zlib.compressobj(1)
- yield z.compress('%s %d\0' % (type, len(content)))
- yield z.compress(content)
- yield z.flush()
-
-
-def _decode_looseobj(buf):
- assert(buf);
- s = zlib.decompress(buf)
- i = s.find('\0')
- assert(i > 0)
- l = s[:i].split(' ')
- type = l[0]
- sz = int(l[1])
- content = s[i+1:]
- assert(type in _typemap)
- assert(sz == len(content))
- return (type, content)
-
-
-def _decode_packobj(buf):
- assert(buf)
- c = ord(buf[0])
- type = _typermap[(c & 0x70) >> 4]
- sz = c & 0x0f
- shift = 4
- i = 0
- while c & 0x80:
- i += 1
- c = ord(buf[i])
- sz |= (c & 0x7f) << shift
- shift += 7
- if not (c & 0x80):
- break
- return (type, zlib.decompress(buf[i+1:]))
-
-
-class PackIndex:
- def __init__(self, filename):
- self.name = filename
- self.map = mmap_read(open(filename))
- assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
- self.fanout = list(struct.unpack('!256I',
- str(buffer(self.map, 8, 256*4))))
- self.fanout.append(0) # entry "-1"
- nsha = self.fanout[255]
- self.ofstable = buffer(self.map,
- 8 + 256*4 + nsha*20 + nsha*4,
- nsha*4)
- self.ofs64table = buffer(self.map,
- 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
-
- def _ofs_from_idx(self, idx):
- ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
- if ofs & 0x80000000:
- idx64 = ofs & 0x7fffffff
- ofs = struct.unpack('!I',
- str(buffer(self.ofs64table, idx64*8, 8)))[0]
- return ofs
-
- def _idx_from_hash(self, hash):
- assert(len(hash) == 20)
- b1 = ord(hash[0])
- start = self.fanout[b1-1] # range -1..254
- end = self.fanout[b1] # range 0..255
- buf = buffer(self.map, 8 + 256*4, end*20)
- want = str(hash)
- while start < end:
- mid = start + (end-start)/2
- v = str(buf[mid*20:(mid+1)*20])
- if v < want:
- start = mid+1
- elif v > want:
- end = mid
- else: # got it!
- return mid
- return None
-
- def find_offset(self, hash):
- idx = self._idx_from_hash(hash)
- if idx != None:
- return self._ofs_from_idx(idx)
- return None
-
- def exists(self, hash):
- return hash and (self._idx_from_hash(hash) != None) and True or None
-
- def __iter__(self):
- for i in xrange(self.fanout[255]):
- yield buffer(self.map, 8 + 256*4 + 20*i, 20)
-
- def __len__(self):
- return int(self.fanout[255])
-
-
-def extract_bits(buf, bits):
- mask = (1<<bits) - 1
- v = struct.unpack('!I', buf[0:4])[0]
- v = (v >> (32-bits)) & mask
- return v
-
-
-class PackMidx:
- def __init__(self, filename):
- self.name = filename
- assert(filename.endswith('.midx'))
- self.map = mmap_read(open(filename))
- if str(self.map[0:8]) == 'MIDX\0\0\0\1':
- log('Warning: ignoring old-style midx %r\n' % filename)
- self.bits = 0
- self.entries = 1
- self.fanout = buffer('\0\0\0\0')
- self.shalist = buffer('\0'*20)
- self.idxnames = []
- else:
- assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
- self.bits = struct.unpack('!I', self.map[8:12])[0]
- self.entries = 2**self.bits
- self.fanout = buffer(self.map, 12, self.entries*4)
- shaofs = 12 + self.entries*4
- nsha = self._fanget(self.entries-1)
- self.shalist = buffer(self.map, shaofs, nsha*20)
- self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
-
- def _fanget(self, i):
- start = i*4
- s = self.fanout[start:start+4]
- return struct.unpack('!I', s)[0]
-
- def exists(self, hash):
- want = str(hash)
- el = extract_bits(want, self.bits)
- if el:
- start = self._fanget(el-1)
- else:
- start = 0
- end = self._fanget(el)
- while start < end:
- mid = start + (end-start)/2
- v = str(self.shalist[mid*20:(mid+1)*20])
- if v < want:
- start = mid+1
- elif v > want:
- end = mid
- else: # got it!
- return True
- return None
-
- def __iter__(self):
- for i in xrange(self._fanget(self.entries-1)):
- yield buffer(self.shalist, i*20, 20)
-
- def __len__(self):
- return int(self._fanget(self.entries-1))
-
-
-_mpi_count = 0
-class MultiPackIndex:
- def __init__(self, dir):
- global _mpi_count
- assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
- _mpi_count += 1
- self.dir = dir
- self.also = {}
- self.packs = []
- self.refresh()
-
- def __del__(self):
- global _mpi_count
- _mpi_count -= 1
- assert(_mpi_count == 0)
-
- def __iter__(self):
- return iter(idxmerge(self.packs))
-
- def exists(self, hash):
- if hash in self.also:
- return True
- for i in range(len(self.packs)):
- p = self.packs[i]
- if p.exists(hash):
- # reorder so most recently used packs are searched first
- self.packs = [p] + self.packs[:i] + self.packs[i+1:]
- return p.name
- return None
-
- def refresh(self, skip_midx = False, forget_packs = False):
- if forget_packs:
- self.packs = []
- skip_midx = skip_midx or ignore_midx
- d = dict((p.name, 1) for p in self.packs)
- if os.path.exists(self.dir):
- if not skip_midx:
- midxl = []
- for f in os.listdir(self.dir):
- full = os.path.join(self.dir, f)
- if f.endswith('.midx') and not d.get(full):
- mx = PackMidx(full)
- (mxd, mxf) = os.path.split(mx.name)
- broken = 0
- for n in mx.idxnames:
- if not os.path.exists(os.path.join(mxd, n)):
- log(('warning: index %s missing\n' +
- ' used by %s\n') % (n, mxf))
- broken += 1
- if not broken:
- midxl.append(mx)
- midxl.sort(lambda x,y: -cmp(len(x),len(y)))
- for ix in midxl:
- any = 0
- for sub in ix.idxnames:
- if not d.get(os.path.join(self.dir, sub)):
- self.packs.append(ix)
- d[ix.name] = 1
- for name in ix.idxnames:
- d[os.path.join(self.dir, name)] = 1
- any += 1
- break
- if not any:
- log('midx: removing redundant: %s\n'
- % os.path.basename(ix.name))
- unlink(ix.name)
- for f in os.listdir(self.dir):
- full = os.path.join(self.dir, f)
- if f.endswith('.idx') and not d.get(full):
- self.packs.append(PackIndex(full))
- d[full] = 1
- log('MultiPackIndex: using %d index%s.\n'
- % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
-
- def add(self, hash):
- self.also[hash] = 1
-
- def zap_also(self):
- self.also = {}
-
-
-def calc_hash(type, content):
- header = '%s %d\0' % (type, len(content))
- sum = sha.sha(header)
- sum.update(content)
- return sum.digest()
-
-
-def _shalist_sort_key(ent):
- (mode, name, id) = ent
- if stat.S_ISDIR(int(mode, 8)):
- return name + '/'
- else:
- return name
-
-
-def idxmerge(idxlist):
- total = sum(len(i) for i in idxlist)
- iters = (iter(i) for i in idxlist)
- heap = [(next(it), it) for it in iters]
- heapq.heapify(heap)
- count = 0
- last = None
- while heap:
- if (count % 10024) == 0:
- progress('Reading indexes: %.2f%% (%d/%d)\r'
- % (count*100.0/total, count, total))
- (e, it) = heap[0]
- if e != last:
- yield e
- last = e
- count += 1
- e = next(it)
- if e:
- heapq.heapreplace(heap, (e, it))
- else:
- heapq.heappop(heap)
- log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
-
-
-class PackWriter:
- def __init__(self, objcache_maker=None):
- self.count = 0
- self.outbytes = 0
- self.filename = None
- self.file = None
- self.objcache_maker = objcache_maker
- self.objcache = None
-
- def __del__(self):
- self.close()
-
- def _make_objcache(self):
- if not self.objcache:
- if self.objcache_maker:
- self.objcache = self.objcache_maker()
- else:
- self.objcache = MultiPackIndex(repo('objects/pack'))
-
- def _open(self):
- if not self.file:
- self._make_objcache()
- (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
- self.file = os.fdopen(fd, 'w+b')
- assert(name.endswith('.pack'))
- self.filename = name[:-5]
- self.file.write('PACK\0\0\0\2\0\0\0\0')
-
- def _raw_write(self, datalist):
- self._open()
- f = self.file
- for d in datalist:
- f.write(d)
- self.outbytes += len(d)
- self.count += 1
-
- def _write(self, bin, type, content):
- if verbose:
- log('>')
- self._raw_write(_encode_packobj(type, content))
- return bin
-
- def breakpoint(self):
- id = self._end()
- self.outbytes = self.count = 0
- return id
-
- def write(self, type, content):
- return self._write(calc_hash(type, content), type, content)
-
- def exists(self, id):
- if not self.objcache:
- self._make_objcache()
- return self.objcache.exists(id)
-
- def maybe_write(self, type, content):
- bin = calc_hash(type, content)
- if not self.exists(bin):
- self._write(bin, type, content)
- self.objcache.add(bin)
- return bin
-
- def new_blob(self, blob):
- return self.maybe_write('blob', blob)
-
- def new_tree(self, shalist):
- shalist = sorted(shalist, key = _shalist_sort_key)
- l = []
- for (mode,name,bin) in shalist:
- assert(mode)
- assert(mode != '0')
- assert(mode[0] != '0')
- assert(name)
- assert(len(bin) == 20)
- l.append('%s %s\0%s' % (mode,name,bin))
- return self.maybe_write('tree', ''.join(l))
-
- def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
- l = []
- if tree: l.append('tree %s' % tree.encode('hex'))
- if parent: l.append('parent %s' % parent.encode('hex'))
- if author: l.append('author %s %s' % (author, _git_date(adate)))
- if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
- l.append('')
- l.append(msg)
- return self.maybe_write('commit', '\n'.join(l))
-
- def new_commit(self, parent, tree, msg):
- now = time.time()
- userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
- commit = self._new_commit(tree, parent,
- userline, now, userline, now,
- msg)
- return commit
-
- def abort(self):
- f = self.file
- if f:
- self.file = None
- f.close()
- os.unlink(self.filename + '.pack')
-
- def _end(self):
- f = self.file
- if not f: return None
- self.file = None
- self.objcache = None
-
- # update object count
- f.seek(8)
- cp = struct.pack('!i', self.count)
- assert(len(cp) == 4)
- f.write(cp)
-
- # calculate the pack sha1sum
- f.seek(0)
- sum = sha.sha()
- while 1:
- b = f.read(65536)
- sum.update(b)
- if not b: break
- f.write(sum.digest())
-
- f.close()
-
- p = subprocess.Popen(['git', 'index-pack', '-v',
- '--index-version=2',
- self.filename + '.pack'],
- preexec_fn = _gitenv,
- stdout = subprocess.PIPE)
- out = p.stdout.read().strip()
- _git_wait('git index-pack', p)
- if not out:
- raise GitError('git index-pack produced no output')
- nameprefix = repo('objects/pack/%s' % out)
- if os.path.exists(self.filename + '.map'):
- os.unlink(self.filename + '.map')
- os.rename(self.filename + '.pack', nameprefix + '.pack')
- os.rename(self.filename + '.idx', nameprefix + '.idx')
- return nameprefix
-
- def close(self):
- return self._end()
-
-
-def _git_date(date):
- return time.strftime('%s %z', time.localtime(date))
-
-
-def _gitenv():
- os.environ['GIT_DIR'] = os.path.abspath(repo())
-
-
-def list_refs(refname = None):
- argv = ['git', 'show-ref', '--']
- if refname:
- argv += [refname]
- p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
- out = p.stdout.read().strip()
- rv = p.wait() # not fatal
- if rv:
- assert(not out)
- if out:
- for d in out.split('\n'):
- (sha, name) = d.split(' ', 1)
- yield (name, sha.decode('hex'))
-
-
-def read_ref(refname):
- l = list(list_refs(refname))
- if l:
- assert(len(l) == 1)
- return l[0][1]
- else:
- return None
-
-
-def rev_list(ref):
- assert(not ref.startswith('-'))
- argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
- p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
- commit = None
- for row in p.stdout:
- s = row.strip()
- if s.startswith('commit '):
- commit = s[7:].decode('hex')
- else:
- date = int(s)
- yield (date, commit)
- rv = p.wait() # not fatal
- if rv:
- raise GitError, 'git rev-list returned error %d' % rv
-
-
-def update_ref(refname, newval, oldval):
- if not oldval:
- oldval = ''
- assert(refname.startswith('refs/heads/'))
- p = subprocess.Popen(['git', 'update-ref', refname,
- newval.encode('hex'), oldval.encode('hex')],
- preexec_fn = _gitenv)
- _git_wait('git update-ref', p)
-
-
-def guess_repo(path=None):
- global repodir
- if path:
- repodir = path
- if not repodir:
- repodir = os.environ.get('BUP_DIR')
- if not repodir:
- repodir = os.path.expanduser('~/.bup')
-
-
-def init_repo(path=None):
- guess_repo(path)
- d = repo()
- if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
- raise GitError('"%d" exists but is not a directory\n' % d)
- p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
- preexec_fn = _gitenv)
- _git_wait('git init', p)
- p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
- stdout=sys.stderr, preexec_fn = _gitenv)
- _git_wait('git config', p)
-
-
-def check_repo_or_die(path=None):
- guess_repo(path)
- if not os.path.isdir(repo('objects/pack/.')):
- if repodir == home_repodir:
- init_repo()
- else:
- log('error: %r is not a bup/git repository\n' % repo())
- sys.exit(15)
-
-
-def _treeparse(buf):
- ofs = 0
- while ofs < len(buf):
- z = buf[ofs:].find('\0')
- assert(z > 0)
- spl = buf[ofs:ofs+z].split(' ', 1)
- assert(len(spl) == 2)
- sha = buf[ofs+z+1:ofs+z+1+20]
- ofs += z+1+20
- yield (spl[0], spl[1], sha)
-
-
-_ver = None
-def ver():
- global _ver
- if not _ver:
- p = subprocess.Popen(['git', '--version'],
- stdout=subprocess.PIPE)
- gvs = p.stdout.read()
- _git_wait('git --version', p)
- m = re.match(r'git version (\S+.\S+)', gvs)
- if not m:
- raise GitError('git --version weird output: %r' % gvs)
- _ver = tuple(m.group(1).split('.'))
- needed = ('1','5', '3', '1')
- if _ver < needed:
- raise GitError('git version %s or higher is required; you have %s'
- % ('.'.join(needed), '.'.join(_ver)))
- return _ver
-
-
-def _git_wait(cmd, p):
- rv = p.wait()
- if rv != 0:
- raise GitError('%s returned %d' % (cmd, rv))
-
-
-def _git_capture(argv):
- p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
- r = p.stdout.read()
- _git_wait(repr(argv), p)
- return r
-
-
-_ver_warned = 0
-class CatPipe:
- def __init__(self):
- global _ver_warned
- wanted = ('1','5','6')
- if ver() < wanted:
- if not _ver_warned:
- log('warning: git version < %s; bup will be slow.\n'
- % '.'.join(wanted))
- _ver_warned = 1
- self.get = self._slow_get
- else:
- self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- preexec_fn = _gitenv)
- self.get = self._fast_get
- self.inprogress = None
-
- def _fast_get(self, id):
- if self.inprogress:
- log('_fast_get: opening %r while %r is open'
- % (id, self.inprogress))
- assert(not self.inprogress)
- assert(id.find('\n') < 0)
- assert(id.find('\r') < 0)
- assert(id[0] != '-')
- self.inprogress = id
- self.p.stdin.write('%s\n' % id)
- hdr = self.p.stdout.readline()
- if hdr.endswith(' missing\n'):
- raise KeyError('blob %r is missing' % id)
- spl = hdr.split(' ')
- if len(spl) != 3 or len(spl[0]) != 40:
- raise GitError('expected blob, got %r' % spl)
- (hex, type, size) = spl
-
- def ondone():
- assert(self.p.stdout.readline() == '\n')
- self.inprogress = None
-
- it = AutoFlushIter(chunkyreader(self.p.stdout, int(spl[2])),
- ondone = ondone)
- yield type
- for blob in it:
- yield blob
- del it
-
- def _slow_get(self, id):
- assert(id.find('\n') < 0)
- assert(id.find('\r') < 0)
- assert(id[0] != '-')
- type = _git_capture(['git', 'cat-file', '-t', id]).strip()
- yield type
-
- p = subprocess.Popen(['git', 'cat-file', type, id],
- stdout=subprocess.PIPE,
- preexec_fn = _gitenv)
- for blob in chunkyreader(p.stdout):
- yield blob
- _git_wait('git cat-file', p)
-
- def _join(self, it):
- type = it.next()
- if type == 'blob':
- for blob in it:
- yield blob
- elif type == 'tree':
- treefile = ''.join(it)
- for (mode, name, sha) in _treeparse(treefile):
- for blob in self.join(sha.encode('hex')):
- yield blob
- elif type == 'commit':
- treeline = ''.join(it).split('\n')[0]
- assert(treeline.startswith('tree '))
- for blob in self.join(treeline[5:]):
- yield blob
- else:
- raise GitError('invalid object type %r: expected blob/tree/commit'
- % type)
-
- def join(self, id):
- try:
- for d in self._join(self.get(id)):
- yield d
- except StopIteration:
- log('booger!\n')
-
-
-def cat(id):
- c = CatPipe()
- for d in c.join(id):
- yield d
+++ /dev/null
-import sys, math
-import git, _hashsplit
-from helpers import *
-
-BLOB_LWM = 8192*2
-BLOB_MAX = BLOB_LWM*2
-BLOB_HWM = 1024*1024
-MAX_PER_TREE = 256
-progress_callback = None
-max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
-max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
-fanout = 16
-
-class Buf:
- def __init__(self):
- self.data = ''
- self.start = 0
-
- def put(self, s):
- if s:
- self.data = buffer(self.data, self.start) + s
- self.start = 0
-
- def peek(self, count):
- return buffer(self.data, self.start, count)
-
- def eat(self, count):
- self.start += count
-
- def get(self, count):
- v = buffer(self.data, self.start, count)
- self.start += count
- return v
-
- def used(self):
- return len(self.data) - self.start
-
-
-def splitbuf(buf):
- b = buf.peek(buf.used())
- (ofs, bits) = _hashsplit.splitbuf(b)
- if ofs:
- buf.eat(ofs)
- return (buffer(b, 0, ofs), bits)
- return (None, 0)
-
-
-def blobiter(files):
- for f in files:
- while 1:
- b = f.read(BLOB_HWM)
- if not b:
- break
- yield b
-
-
-def drainbuf(buf, finalize):
- while 1:
- (blob, bits) = splitbuf(buf)
- if blob:
- yield (blob, bits)
- else:
- break
- if buf.used() > BLOB_MAX:
- # limit max blob size
- yield (buf.get(buf.used()), 0)
- elif finalize and buf.used():
- yield (buf.get(buf.used()), 0)
-
-
-def hashsplit_iter(files):
- assert(BLOB_HWM > BLOB_MAX)
- buf = Buf()
- fi = blobiter(files)
- while 1:
- for i in drainbuf(buf, finalize=False):
- yield i
- while buf.used() < BLOB_HWM:
- bnew = next(fi)
- if not bnew:
- # eof
- for i in drainbuf(buf, finalize=True):
- yield i
- return
- buf.put(bnew)
-
-
-total_split = 0
-def _split_to_blobs(w, files):
- global total_split
- for (blob, bits) in hashsplit_iter(files):
- sha = w.new_blob(blob)
- total_split += len(blob)
- if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
- w.breakpoint()
- if progress_callback:
- progress_callback(len(blob))
- yield (sha, len(blob), bits)
-
-
-def _make_shalist(l):
- ofs = 0
- shalist = []
- for (mode, sha, size) in l:
- shalist.append((mode, '%016x' % ofs, sha))
- ofs += size
- total = ofs
- return (shalist, total)
-
-
-def _squish(w, stacks, n):
- i = 0
- while i<n or len(stacks[i]) > MAX_PER_TREE:
- while len(stacks) <= i+1:
- stacks.append([])
- if len(stacks[i]) == 1:
- stacks[i+1] += stacks[i]
- elif stacks[i]:
- (shalist, size) = _make_shalist(stacks[i])
- tree = w.new_tree(shalist)
- stacks[i+1].append(('40000', tree, size))
- stacks[i] = []
- i += 1
-
-
-def split_to_shalist(w, files):
- sl = _split_to_blobs(w, files)
- if not fanout:
- shal = []
- for (sha,size,bits) in sl:
- shal.append(('100644', sha, size))
- return _make_shalist(shal)[0]
- else:
- base_bits = _hashsplit.blobbits()
- fanout_bits = int(math.log(fanout, 2))
- def bits_to_idx(n):
- assert(n >= base_bits)
- return (n - base_bits)/fanout_bits
- stacks = [[]]
- for (sha,size,bits) in sl:
- assert(bits <= 32)
- stacks[0].append(('100644', sha, size))
- if bits > base_bits:
- _squish(w, stacks, bits_to_idx(bits))
- #log('stacks: %r\n' % [len(i) for i in stacks])
- _squish(w, stacks, len(stacks)-1)
- #log('stacks: %r\n' % [len(i) for i in stacks])
- return _make_shalist(stacks[-1])[0]
-
-
-def split_to_blob_or_tree(w, files):
- shalist = list(split_to_shalist(w, files))
- if len(shalist) == 1:
- return (shalist[0][0], shalist[0][2])
- elif len(shalist) == 0:
- return ('100644', w.new_blob(''))
- else:
- return ('40000', w.new_tree(shalist))
+++ /dev/null
-import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
-
-
-def log(s):
- sys.stderr.write(s)
-
-
-def mkdirp(d):
- try:
- os.makedirs(d)
- except OSError, e:
- if e.errno == errno.EEXIST:
- pass
- else:
- raise
-
-
-def next(it):
- try:
- return it.next()
- except StopIteration:
- return None
-
-
-def unlink(f):
- try:
- os.unlink(f)
- except OSError, e:
- if e.errno == errno.ENOENT:
- pass # it doesn't exist, that's what you asked for
-
-
-def readpipe(argv):
- p = subprocess.Popen(argv, stdout=subprocess.PIPE)
- r = p.stdout.read()
- p.wait()
- return r
-
-
-# FIXME: this function isn't very generic, because it splits the filename
-# in an odd way and depends on a terminating '/' to indicate directories.
-# But it's used in a couple of places, so let's put it here.
-def pathsplit(p):
- l = p.split('/')
- l = [i+'/' for i in l[:-1]] + l[-1:]
- if l[-1] == '':
- l.pop() # extra blank caused by terminating '/'
- return l
-
-
-# like os.path.realpath, but doesn't follow a symlink for the last element.
-# (ie. if 'p' itself is itself a symlink, this one won't follow it)
-def realpath(p):
- try:
- st = os.lstat(p)
- except OSError:
- st = None
- if st and stat.S_ISLNK(st.st_mode):
- (dir, name) = os.path.split(p)
- dir = os.path.realpath(dir)
- out = os.path.join(dir, name)
- else:
- out = os.path.realpath(p)
- #log('realpathing:%r,%r\n' % (p, out))
- return out
-
-
-_username = None
-def username():
- global _username
- if not _username:
- uid = os.getuid()
- try:
- _username = pwd.getpwuid(uid)[0]
- except KeyError:
- _username = 'user%d' % uid
- return _username
-
-
-_userfullname = None
-def userfullname():
- global _userfullname
- if not _userfullname:
- uid = os.getuid()
- try:
- _userfullname = pwd.getpwuid(uid)[4].split(',')[0]
- except KeyError:
- _userfullname = 'user%d' % uid
- return _userfullname
-
-
-_hostname = None
-def hostname():
- global _hostname
- if not _hostname:
- _hostname = socket.getfqdn()
- return _hostname
-
-
-class NotOk(Exception):
- pass
-
-class Conn:
- def __init__(self, inp, outp):
- self.inp = inp
- self.outp = outp
-
- def read(self, size):
- self.outp.flush()
- return self.inp.read(size)
-
- def readline(self):
- self.outp.flush()
- return self.inp.readline()
-
- def write(self, data):
- #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
- self.outp.write(data)
-
- def has_input(self):
- [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
- if rl:
- assert(rl[0] == self.inp.fileno())
- return True
- else:
- return None
-
- def ok(self):
- self.write('\nok\n')
-
- def error(self, s):
- s = re.sub(r'\s+', ' ', str(s))
- self.write('\nerror %s\n' % s)
-
- def _check_ok(self, onempty):
- self.outp.flush()
- rl = ''
- for rl in linereader(self.inp):
- #log('%d got line: %r\n' % (os.getpid(), rl))
- if not rl: # empty line
- continue
- elif rl == 'ok':
- return None
- elif rl.startswith('error '):
- #log('client: error: %s\n' % rl[6:])
- return NotOk(rl[6:])
- else:
- onempty(rl)
- raise Exception('server exited unexpectedly; see errors above')
-
- def drain_and_check_ok(self):
- def onempty(rl):
- pass
- return self._check_ok(onempty)
-
- def check_ok(self):
- def onempty(rl):
- raise Exception('expected "ok", got %r' % rl)
- return self._check_ok(onempty)
-
-
-def linereader(f):
- while 1:
- line = f.readline()
- if not line:
- break
- yield line[:-1]
-
-
-def chunkyreader(f, count = None):
- if count != None:
- while count > 0:
- b = f.read(min(count, 65536))
- if not b:
- raise IOError('EOF with %d bytes remaining' % count)
- yield b
- count -= len(b)
- else:
- while 1:
- b = f.read(65536)
- if not b: break
- yield b
-
-
-class AutoFlushIter:
- def __init__(self, it, ondone = None):
- self.it = it
- self.ondone = ondone
-
- def __iter__(self):
- return self
-
- def next(self):
- return self.it.next()
-
- def __del__(self):
- for i in self.it:
- pass
- if self.ondone:
- self.ondone()
-
-
-def slashappend(s):
- if s and not s.endswith('/'):
- return s + '/'
- else:
- return s
-
-
-def _mmap_do(f, len, flags, prot):
- if not len:
- st = os.fstat(f.fileno())
- len = st.st_size
- map = mmap.mmap(f.fileno(), len, flags, prot)
- f.close() # map will persist beyond file close
- return map
-
-
-def mmap_read(f, len = 0):
- return _mmap_do(f, len, mmap.MAP_PRIVATE, mmap.PROT_READ)
-
-
-def mmap_readwrite(f, len = 0):
- return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
-
-
-def parse_num(s):
- g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
- if not g:
- raise ValueError("can't parse %r as a number" % s)
- (val, unit) = g.groups()
- num = float(val)
- unit = unit.lower()
- if unit in ['t', 'tb']:
- mult = 1024*1024*1024*1024
- elif unit in ['g', 'gb']:
- mult = 1024*1024*1024
- elif unit in ['m', 'mb']:
- mult = 1024*1024
- elif unit in ['k', 'kb']:
- mult = 1024
- elif unit in ['', 'b']:
- mult = 1
- else:
- raise ValueError("invalid unit %r in number %r" % (unit, s))
- return int(num*mult)
-
-
-# count the number of elements in an iterator (consumes the iterator)
-def count(l):
- return reduce(lambda x,y: x+1, l)
-
-
-def atoi(s):
- try:
- return int(s or '0')
- except ValueError:
- return 0
-
-
-saved_errors = []
-def add_error(e):
- saved_errors.append(e)
- log('%-70s\n' % e)
-
-istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY'))
-def progress(s):
- if istty:
- log(s)
+++ /dev/null
-import os, stat, time, struct, tempfile
-from helpers import *
-
-EMPTY_SHA = '\0'*20
-FAKE_SHA = '\x01'*20
-INDEX_HDR = 'BUPI\0\0\0\2'
-INDEX_SIG = '!IIIIIQII20sHII'
-ENTLEN = struct.calcsize(INDEX_SIG)
-FOOTER_SIG = '!Q'
-FOOTLEN = struct.calcsize(FOOTER_SIG)
-
-IX_EXISTS = 0x8000
-IX_HASHVALID = 0x4000
-
-class Error(Exception):
- pass
-
-
-class Level:
- def __init__(self, ename, parent):
- self.parent = parent
- self.ename = ename
- self.list = []
- self.count = 0
-
- def write(self, f):
- (ofs,n) = (f.tell(), len(self.list))
- if self.list:
- count = len(self.list)
- #log('popping %r with %d entries\n'
- # % (''.join(self.ename), count))
- for e in self.list:
- e.write(f)
- if self.parent:
- self.parent.count += count + self.count
- return (ofs,n)
-
-
-def _golevel(level, f, ename, newentry):
- # close nodes back up the tree
- assert(level)
- while ename[:len(level.ename)] != level.ename:
- n = BlankNewEntry(level.ename[-1])
- (n.children_ofs,n.children_n) = level.write(f)
- level.parent.list.append(n)
- level = level.parent
-
- # create nodes down the tree
- while len(level.ename) < len(ename):
- level = Level(ename[:len(level.ename)+1], level)
-
- # are we in precisely the right place?
- assert(ename == level.ename)
- n = newentry or BlankNewEntry(ename and level.ename[-1] or None)
- (n.children_ofs,n.children_n) = level.write(f)
- if level.parent:
- level.parent.list.append(n)
- level = level.parent
-
- return level
-
-
-class Entry:
- def __init__(self, basename, name):
- self.basename = str(basename)
- self.name = str(name)
- self.children_ofs = 0
- self.children_n = 0
-
- def __repr__(self):
- return ("(%s,0x%04x,%d,%d,%d,%d,%d,%s/%s,0x%04x,0x%08x/%d)"
- % (self.name, self.dev,
- self.ctime, self.mtime, self.uid, self.gid,
- self.size, oct(self.mode), oct(self.gitmode),
- self.flags, self.children_ofs, self.children_n))
-
- def packed(self):
- return struct.pack(INDEX_SIG,
- self.dev, self.ctime, self.mtime,
- self.uid, self.gid, self.size, self.mode,
- self.gitmode, self.sha, self.flags,
- self.children_ofs, self.children_n)
-
- def from_stat(self, st, tstart):
- old = (self.dev, self.ctime, self.mtime,
- self.uid, self.gid, self.size, self.flags & IX_EXISTS)
- new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
- st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
- self.dev = st.st_dev
- self.ctime = int(st.st_ctime)
- self.mtime = int(st.st_mtime)
- self.uid = st.st_uid
- self.gid = st.st_gid
- self.size = st.st_size
- self.mode = st.st_mode
- self.flags |= IX_EXISTS
- if int(st.st_ctime) >= tstart or old != new \
- or self.sha == EMPTY_SHA or not self.gitmode:
- self.invalidate()
-
- def is_valid(self):
- f = IX_HASHVALID|IX_EXISTS
- return (self.flags & f) == f
-
- def invalidate(self):
- self.flags &= ~IX_HASHVALID
-
- def validate(self, gitmode, sha):
- assert(sha)
- assert(gitmode)
- self.gitmode = gitmode
- self.sha = sha
- self.flags |= IX_HASHVALID|IX_EXISTS
-
- def exists(self):
- return not self.is_deleted()
-
- def is_deleted(self):
- return (self.flags & IX_EXISTS) == 0
-
- def set_deleted(self):
- if self.flags & IX_EXISTS:
- self.flags &= ~(IX_EXISTS | IX_HASHVALID)
-
- def is_real(self):
- return not self.is_fake()
-
- def is_fake(self):
- return not self.ctime
-
- def __cmp__(a, b):
- return (cmp(a.name, b.name)
- or -cmp(a.is_valid(), b.is_valid())
- or -cmp(a.is_fake(), b.is_fake()))
-
- def write(self, f):
- f.write(self.basename + '\0' + self.packed())
-
-
-class NewEntry(Entry):
- def __init__(self, basename, name, dev, ctime, mtime, uid, gid,
- size, mode, gitmode, sha, flags, children_ofs, children_n):
- Entry.__init__(self, basename, name)
- (self.dev, self.ctime, self.mtime, self.uid, self.gid,
- self.size, self.mode, self.gitmode, self.sha,
- self.flags, self.children_ofs, self.children_n
- ) = (dev, int(ctime), int(mtime), uid, gid,
- size, mode, gitmode, sha, flags, children_ofs, children_n)
-
-
-class BlankNewEntry(NewEntry):
- def __init__(self, basename):
- NewEntry.__init__(self, basename, basename,
- 0, 0, 0, 0, 0, 0, 0,
- 0, EMPTY_SHA, 0, 0, 0)
-
-
-class ExistingEntry(Entry):
- def __init__(self, parent, basename, name, m, ofs):
- Entry.__init__(self, basename, name)
- self.parent = parent
- self._m = m
- self._ofs = ofs
- (self.dev, self.ctime, self.mtime, self.uid, self.gid,
- self.size, self.mode, self.gitmode, self.sha,
- self.flags, self.children_ofs, self.children_n
- ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN)))
-
- def repack(self):
- self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
- if self.parent and not self.is_valid():
- self.parent.invalidate()
- self.parent.repack()
-
- def iter(self, name=None, wantrecurse=None):
- dname = name
- if dname and not dname.endswith('/'):
- dname += '/'
- ofs = self.children_ofs
- assert(ofs <= len(self._m))
- assert(self.children_n < 1000000)
- for i in xrange(self.children_n):
- eon = self._m.find('\0', ofs)
- assert(eon >= 0)
- assert(eon >= ofs)
- assert(eon > ofs)
- basename = str(buffer(self._m, ofs, eon-ofs))
- child = ExistingEntry(self, basename, self.name + basename,
- self._m, eon+1)
- if (not dname
- or child.name.startswith(dname)
- or child.name.endswith('/') and dname.startswith(child.name)):
- if not wantrecurse or wantrecurse(child):
- for e in child.iter(name=name, wantrecurse=wantrecurse):
- yield e
- if not name or child.name == name or child.name.startswith(dname):
- yield child
- ofs = eon + 1 + ENTLEN
-
- def __iter__(self):
- return self.iter()
-
-
-class Reader:
- def __init__(self, filename):
- self.filename = filename
- self.m = ''
- self.writable = False
- self.count = 0
- f = None
- try:
- f = open(filename, 'r+')
- except IOError, e:
- if e.errno == errno.ENOENT:
- pass
- else:
- raise
- if f:
- b = f.read(len(INDEX_HDR))
- if b != INDEX_HDR:
- log('warning: %s: header: expected %r, got %r'
- % (filename, INDEX_HDR, b))
- else:
- st = os.fstat(f.fileno())
- if st.st_size:
- self.m = mmap_readwrite(f)
- self.writable = True
- self.count = struct.unpack(FOOTER_SIG,
- str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
-
- def __del__(self):
- self.close()
-
- def __len__(self):
- return int(self.count)
-
- def forward_iter(self):
- ofs = len(INDEX_HDR)
- while ofs+ENTLEN <= len(self.m)-FOOTLEN:
- eon = self.m.find('\0', ofs)
- assert(eon >= 0)
- assert(eon >= ofs)
- assert(eon > ofs)
- basename = str(buffer(self.m, ofs, eon-ofs))
- yield ExistingEntry(None, basename, basename, self.m, eon+1)
- ofs = eon + 1 + ENTLEN
-
- def iter(self, name=None, wantrecurse=None):
- if len(self.m) > len(INDEX_HDR)+ENTLEN:
- dname = name
- if dname and not dname.endswith('/'):
- dname += '/'
- root = ExistingEntry(None, '/', '/',
- self.m, len(self.m)-FOOTLEN-ENTLEN)
- for sub in root.iter(name=name, wantrecurse=wantrecurse):
- yield sub
- if not dname or dname == root.name:
- yield root
-
- def __iter__(self):
- return self.iter()
-
- def exists(self):
- return self.m
-
- def save(self):
- if self.writable and self.m:
- self.m.flush()
-
- def close(self):
- self.save()
- if self.writable and self.m:
- self.m = None
- self.writable = False
-
- def filter(self, prefixes, wantrecurse=None):
- for (rp, path) in reduce_paths(prefixes):
- for e in self.iter(rp, wantrecurse=wantrecurse):
- assert(e.name.startswith(rp))
- name = path + e.name[len(rp):]
- yield (name, e)
-
-
-class Writer:
- def __init__(self, filename):
- self.rootlevel = self.level = Level([], None)
- self.f = None
- self.count = 0
- self.lastfile = None
- self.filename = None
- self.filename = filename = realpath(filename)
- (dir,name) = os.path.split(filename)
- (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
- self.f = os.fdopen(ffd, 'wb', 65536)
- self.f.write(INDEX_HDR)
-
- def __del__(self):
- self.abort()
-
- def abort(self):
- f = self.f
- self.f = None
- if f:
- f.close()
- os.unlink(self.tmpname)
-
- def flush(self):
- if self.level:
- self.level = _golevel(self.level, self.f, [], None)
- self.count = self.rootlevel.count
- if self.count:
- self.count += 1
- self.f.write(struct.pack(FOOTER_SIG, self.count))
- self.f.flush()
- assert(self.level == None)
-
- def close(self):
- self.flush()
- f = self.f
- self.f = None
- if f:
- f.close()
- os.rename(self.tmpname, self.filename)
-
- def _add(self, ename, entry):
- if self.lastfile and self.lastfile <= ename:
- raise Error('%r must come before %r'
- % (''.join(e.name), ''.join(self.lastfile)))
- self.lastfile = e.name
- self.level = _golevel(self.level, self.f, ename, entry)
-
- def add(self, name, st, hashgen = None):
- endswith = name.endswith('/')
- ename = pathsplit(name)
- basename = ename[-1]
- #log('add: %r %r\n' % (basename, name))
- flags = IX_EXISTS
- sha = None
- if hashgen:
- (gitmode, sha) = hashgen(name)
- flags |= IX_HASHVALID
- else:
- (gitmode, sha) = (0, EMPTY_SHA)
- if st:
- isdir = stat.S_ISDIR(st.st_mode)
- assert(isdir == endswith)
- e = NewEntry(basename, name, st.st_dev, int(st.st_ctime),
- int(st.st_mtime), st.st_uid, st.st_gid,
- st.st_size, st.st_mode, gitmode, sha, flags,
- 0, 0)
- else:
- assert(endswith)
- e = BlankNewEntry(basename)
- e.gitmode = gitmode
- e.sha = sha
- e.flags = flags
- self._add(ename, e)
-
- def add_ixentry(self, e):
- e.children_ofs = e.children_n = 0
- self._add(pathsplit(e.name), e)
-
- def new_reader(self):
- self.flush()
- return Reader(self.tmpname)
-
-
-def reduce_paths(paths):
- xpaths = []
- for p in paths:
- rp = realpath(p)
- try:
- st = os.lstat(rp)
- if stat.S_ISDIR(st.st_mode):
- rp = slashappend(rp)
- p = slashappend(p)
- except OSError, e:
- if e.errno != errno.ENOENT:
- raise
- xpaths.append((rp, p))
- xpaths.sort()
-
- paths = []
- prev = None
- for (rp, p) in xpaths:
- if prev and (prev == rp
- or (prev.endswith('/') and rp.startswith(prev))):
- continue # already superceded by previous path
- paths.append((rp, p))
- prev = rp
- paths.sort(reverse=True)
- return paths
-
-
-class MergeIter:
- def __init__(self, iters):
- self.iters = iters
-
- def __len__(self):
- # FIXME: doesn't remove duplicated entries between iters.
- # That only happens for parent directories, but will mean the
- # actual iteration returns fewer entries than this function counts.
- return sum(len(it) for it in self.iters)
-
- def __iter__(self):
- total = len(self)
- l = [iter(it) for it in self.iters]
- l = [(next(it),it) for it in l]
- l = filter(lambda x: x[0], l)
- count = 0
- lastname = None
- while l:
- if not (count % 1024):
- progress('bup: merging indexes (%d/%d)\r' % (count, total))
- l.sort()
- (e,it) = l.pop()
- if not e:
- continue
- if e.name != lastname:
- yield e
- lastname = e.name
- n = next(it)
- if n:
- l.append((n,it))
- count += 1
- log('bup: merging indexes (%d/%d), done.\n' % (count, total))
--- /dev/null
+#include <Python.h>
+#include <assert.h>
+#include <stdint.h>
+
+#define BLOBBITS (13)
+#define BLOBSIZE (1<<BLOBBITS)
+#define WINDOWBITS (7)
+#define WINDOWSIZE (1<<(WINDOWBITS-1))
+
+
+// FIXME: replace this with a not-stupid rolling checksum algorithm,
+// such as the one used in rsync (Adler32?)
+static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
+{
+ return ((old<<1) | (old>>31)) ^ drop ^ add;
+}
+
+
+static int find_ofs(const unsigned char *buf, int len, int *bits)
+{
+ unsigned char window[WINDOWSIZE];
+ uint32_t sum = 0;
+ int i = 0, count;
+ memset(window, 0, sizeof(window));
+
+ for (count = 0; count < len; count++)
+ {
+ sum = stupidsum_add(sum, window[i], buf[count]);
+ window[i] = buf[count];
+ i = (i + 1) % WINDOWSIZE;
+ if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
+ {
+ if (bits)
+ {
+ *bits = BLOBBITS;
+ sum >>= BLOBBITS;
+ for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
+ ;
+ }
+ return count+1;
+ }
+ }
+ return 0;
+}
+
+
+static PyObject *blobbits(PyObject *self, PyObject *args)
+{
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+ return Py_BuildValue("i", BLOBBITS);
+}
+
+
+static PyObject *splitbuf(PyObject *self, PyObject *args)
+{
+ unsigned char *buf = NULL;
+ int len = 0, out = 0, bits = -1;
+
+ if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+ return NULL;
+ out = find_ofs(buf, len, &bits);
+ return Py_BuildValue("ii", out, bits);
+}
+
+
+static PyObject *bitmatch(PyObject *self, PyObject *args)
+{
+ unsigned char *buf1 = NULL, *buf2 = NULL;
+ int len1 = 0, len2 = 0;
+ int byte, bit;
+
+ if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
+ return NULL;
+
+ bit = 0;
+ for (byte = 0; byte < len1 && byte < len2; byte++)
+ {
+ int b1 = buf1[byte], b2 = buf2[byte];
+ if (b1 != b2)
+ {
+ for (bit = 0; bit < 8; bit++)
+ if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
+ break;
+ break;
+ }
+ }
+
+ return Py_BuildValue("i", byte*8 + bit);
+}
+
+
+// I would have made this a lower-level function that just fills in a buffer
+// with random values, and then written those values from python. But that's
+// about 20% slower in my tests, and since we typically generate random
+// numbers for benchmarking other parts of bup, any slowness in generating
+// random bytes will make our benchmarks inaccurate. Plus nobody wants
+// pseudorandom bytes much except for this anyway.
+static PyObject *write_random(PyObject *self, PyObject *args)
+{
+ uint32_t buf[1024/4];
+ int fd = -1, seed = 0;
+ ssize_t ret;
+ long long len = 0, kbytes = 0, written = 0;
+
+ if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+ return NULL;
+
+ srandom(seed);
+
+ for (kbytes = len/1024; kbytes > 0; kbytes--)
+ {
+ int i;
+ for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
+ buf[i] = random();
+ ret = write(fd, buf, sizeof(buf));
+ if (ret < 0)
+ ret = 0;
+ written += ret;
+ if (ret < sizeof(buf))
+ break;
+ if (!(kbytes%1024))
+ fprintf(stderr, ".");
+ }
+
+ return Py_BuildValue("L", written);
+}
+
+
+static PyMethodDef hashsplit_methods[] = {
+ { "blobbits", blobbits, METH_VARARGS,
+ "Return the number of bits in the rolling checksum." },
+ { "splitbuf", splitbuf, METH_VARARGS,
+ "Split a list of strings based on a rolling checksum." },
+ { "bitmatch", bitmatch, METH_VARARGS,
+ "Count the number of matching prefix bits between two strings." },
+ { "write_random", write_random, METH_VARARGS,
+ "Write random bytes to the given file descriptor" },
+ { NULL, NULL, 0, NULL }, // sentinel
+};
+
+PyMODINIT_FUNC init_hashsplit(void)
+{
+ Py_InitModule("_hashsplit", hashsplit_methods);
+}
--- /dev/null
+import re, struct, errno, select
+from bup import git
+from bup.helpers import *
+from subprocess import Popen, PIPE
+
+
+class ClientError(Exception):
+ pass
+
+
+class Client:
+ def __init__(self, remote, create=False):
+ self._busy = None
+ self.p = None
+ self.conn = None
+ rs = remote.split(':', 1)
+ nicedir = os.path.split(os.path.abspath(sys.argv[0]))[0]
+ nicedir = re.sub(r':', "_", nicedir)
+ if len(rs) == 1:
+ (host, dir) = ('NONE', remote)
+ def fixenv():
+ os.environ['PATH'] = ':'.join([nicedir,
+ os.environ.get('PATH', '')])
+ argv = ['bup', 'server']
+ else:
+ (host, dir) = rs
+ fixenv = None
+ # WARNING: shell quoting security holes are possible here, so we
+ # have to be super careful. We have to use 'sh -c' because
+ # csh-derived shells can't handle PATH= notation. We can't
+ # set PATH in advance, because ssh probably replaces it. We
+ # can't exec *safely* using argv, because *both* ssh and 'sh -c'
+ # allow shellquoting. So we end up having to double-shellquote
+ # stuff here.
+ escapedir = re.sub(r'([^\w/])', r'\\\\\\\1', nicedir)
+ cmd = r"""
+ sh -c PATH=%s:'$PATH bup server'
+ """ % escapedir
+ argv = ['ssh', host, '--', cmd.strip()]
+ #log('argv is: %r\n' % argv)
+ (self.host, self.dir) = (host, dir)
+ self.cachedir = git.repo('index-cache/%s'
+ % re.sub(r'[^@\w]', '_',
+ "%s:%s" % (host, dir)))
+ try:
+ self.p = p = Popen(argv, stdin=PIPE, stdout=PIPE, preexec_fn=fixenv)
+ except OSError, e:
+ raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
+ self.conn = conn = Conn(p.stdout, p.stdin)
+ if dir:
+ dir = re.sub(r'[\r\n]', ' ', dir)
+ if create:
+ conn.write('init-dir %s\n' % dir)
+ else:
+ conn.write('set-dir %s\n' % dir)
+ self.check_ok()
+ self.sync_indexes_del()
+
+ def __del__(self):
+ try:
+ self.close()
+ except IOError, e:
+ if e.errno == errno.EPIPE:
+ pass
+ else:
+ raise
+
+ def close(self):
+ if self.conn and not self._busy:
+ self.conn.write('quit\n')
+ if self.p:
+ self.p.stdin.close()
+ while self.p.stdout.read(65536):
+ pass
+ self.p.stdout.close()
+ self.p.wait()
+ rv = self.p.wait()
+ if rv:
+ raise ClientError('server tunnel returned exit code %d' % rv)
+ self.conn = None
+ self.p = None
+
+ def check_ok(self):
+ rv = self.p.poll()
+ if rv != None:
+ raise ClientError('server exited unexpectedly with code %r' % rv)
+ try:
+ return self.conn.check_ok()
+ except Exception, e:
+ raise ClientError, e, sys.exc_info()[2]
+
+ def check_busy(self):
+ if self._busy:
+ raise ClientError('already busy with command %r' % self._busy)
+
+ def _not_busy(self):
+ self._busy = None
+
+ def sync_indexes_del(self):
+ self.check_busy()
+ conn = self.conn
+ conn.write('list-indexes\n')
+ packdir = git.repo('objects/pack')
+ all = {}
+ needed = {}
+ for line in linereader(conn):
+ if not line:
+ break
+ all[line] = 1
+ assert(line.find('/') < 0)
+ if not os.path.exists(os.path.join(self.cachedir, line)):
+ needed[line] = 1
+ self.check_ok()
+
+ mkdirp(self.cachedir)
+ for f in os.listdir(self.cachedir):
+ if f.endswith('.idx') and not f in all:
+ log('pruning old index: %r\n' % f)
+ os.unlink(os.path.join(self.cachedir, f))
+
+ def sync_index(self, name):
+ #log('requesting %r\n' % name)
+ mkdirp(self.cachedir)
+ self.conn.write('send-index %s\n' % name)
+ n = struct.unpack('!I', self.conn.read(4))[0]
+ assert(n)
+ fn = os.path.join(self.cachedir, name)
+ f = open(fn + '.tmp', 'w')
+ count = 0
+ progress('Receiving index: %d/%d\r' % (count, n))
+ for b in chunkyreader(self.conn, n):
+ f.write(b)
+ count += len(b)
+ progress('Receiving index: %d/%d\r' % (count, n))
+ progress('Receiving index: %d/%d, done.\n' % (count, n))
+ self.check_ok()
+ f.close()
+ os.rename(fn + '.tmp', fn)
+
+ def _make_objcache(self):
+ ob = self._busy
+ self._busy = None
+ #self.sync_indexes()
+ self._busy = ob
+ return git.MultiPackIndex(self.cachedir)
+
+ def _suggest_pack(self, indexname):
+ log('received index suggestion: %s\n' % indexname)
+ ob = self._busy
+ if ob:
+ assert(ob == 'receive-objects')
+ self._busy = None
+ self.conn.write('\xff\xff\xff\xff') # suspend receive-objects
+ self.conn.drain_and_check_ok()
+ self.sync_index(indexname)
+ if ob:
+ self.conn.write('receive-objects\n')
+ self._busy = ob
+
+ def new_packwriter(self):
+ self.check_busy()
+ self._busy = 'receive-objects'
+ return PackWriter_Remote(self.conn,
+ objcache_maker = self._make_objcache,
+ suggest_pack = self._suggest_pack,
+ onclose = self._not_busy)
+
+ def read_ref(self, refname):
+ self.check_busy()
+ self.conn.write('read-ref %s\n' % refname)
+ r = self.conn.readline().strip()
+ self.check_ok()
+ if r:
+ assert(len(r) == 40) # hexified sha
+ return r.decode('hex')
+ else:
+ return None # nonexistent ref
+
+ def update_ref(self, refname, newval, oldval):
+ self.check_busy()
+ self.conn.write('update-ref %s\n%s\n%s\n'
+ % (refname, newval.encode('hex'),
+ (oldval or '').encode('hex')))
+ self.check_ok()
+
+ def cat(self, id):
+ self.check_busy()
+ self._busy = 'cat'
+ self.conn.write('cat %s\n' % re.sub(r'[\n\r]', '_', id))
+ while 1:
+ sz = struct.unpack('!I', self.conn.read(4))[0]
+ if not sz: break
+ yield self.conn.read(sz)
+ e = self.check_ok()
+ self._not_busy()
+ if e:
+ raise KeyError(str(e))
+
+
+class PackWriter_Remote(git.PackWriter):
+ def __init__(self, conn, objcache_maker, suggest_pack, onclose):
+ git.PackWriter.__init__(self, objcache_maker)
+ self.file = conn
+ self.filename = 'remote socket'
+ self.suggest_pack = suggest_pack
+ self.onclose = onclose
+ self._packopen = False
+
+ def _open(self):
+ if not self._packopen:
+ self._make_objcache()
+ self.file.write('receive-objects\n')
+ self._packopen = True
+
+ def _end(self):
+ if self._packopen and self.file:
+ self.file.write('\0\0\0\0')
+ self._packopen = False
+ while True:
+ line = self.file.readline().strip()
+ if line.startswith('index '):
+ pass
+ else:
+ break
+ id = line
+ self.file.check_ok()
+ self.objcache = None
+ if self.onclose:
+ self.onclose()
+ if self.suggest_pack:
+ self.suggest_pack(id)
+ return id
+
+ def close(self):
+ id = self._end()
+ self.file = None
+ return id
+
+ def abort(self):
+ raise GitError("don't know how to abort remote pack writing")
+
+ def _raw_write(self, datalist):
+ assert(self.file)
+ if not self._packopen:
+ self._open()
+ data = ''.join(datalist)
+ assert(len(data))
+ self.file.write(struct.pack('!I', len(data)) + data)
+ self.outbytes += len(data)
+ self.count += 1
+
+ if self.file.has_input():
+ line = self.file.readline().strip()
+ assert(line.startswith('index '))
+ idxname = line[6:]
+ if self.suggest_pack:
+ self.suggest_pack(idxname)
+ self.objcache.refresh()
--- /dev/null
+from distutils.core import setup, Extension
+
+_hashsplit_mod = Extension('_hashsplit', sources=['_hashsplit.c'])
+
+setup(name='_hashsplit',
+ version='0.1',
+ description='hashsplit helper library for bup',
+ ext_modules=[_hashsplit_mod])
--- /dev/null
+import stat, heapq
+from bup.helpers import *
+
+try:
+ O_LARGEFILE = os.O_LARGEFILE
+except AttributeError:
+ O_LARGEFILE = 0
+
+
+# the use of fchdir() and lstat() is for two reasons:
+# - help out the kernel by not making it repeatedly look up the absolute path
+# - avoid race conditions caused by doing listdir() on a changing symlink
+class OsFile:
+ def __init__(self, path):
+ self.fd = None
+ self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW)
+
+ def __del__(self):
+ if self.fd:
+ fd = self.fd
+ self.fd = None
+ os.close(fd)
+
+ def fchdir(self):
+ os.fchdir(self.fd)
+
+ def stat(self):
+ return os.fstat(self.fd)
+
+
+_IFMT = stat.S_IFMT(0xffffffff) # avoid function call in inner loop
+def _dirlist():
+ l = []
+ for n in os.listdir('.'):
+ try:
+ st = os.lstat(n)
+ except OSError, e:
+ add_error(Exception('%s: %s' % (realpath(n), str(e))))
+ continue
+ if (st.st_mode & _IFMT) == stat.S_IFDIR:
+ n += '/'
+ l.append((n,st))
+ l.sort(reverse=True)
+ return l
+
+
+def _recursive_dirlist(prepend, xdev):
+ for (name,pst) in _dirlist():
+ if name.endswith('/'):
+ if xdev != None and pst.st_dev != xdev:
+ log('Skipping %r: different filesystem.\n' % (prepend+name))
+ continue
+ try:
+ OsFile(name).fchdir()
+ except OSError, e:
+ add_error('%s: %s' % (prepend, e))
+ else:
+ for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev):
+ yield i
+ os.chdir('..')
+ yield (prepend + name, pst)
+
+
+def recursive_dirlist(paths, xdev):
+ startdir = OsFile('.')
+ try:
+ assert(type(paths) != type(''))
+ for path in paths:
+ try:
+ pst = os.lstat(path)
+ if stat.S_ISLNK(pst.st_mode):
+ yield (path, pst)
+ continue
+ except OSError, e:
+ add_error(e)
+ continue
+ try:
+ pfile = OsFile(path)
+ except OSError, e:
+ add_error(e)
+ continue
+ pst = pfile.stat()
+ if xdev:
+ xdev = pst.st_dev
+ else:
+ xdev = None
+ if stat.S_ISDIR(pst.st_mode):
+ pfile.fchdir()
+ prepend = os.path.join(path, '')
+ for i in _recursive_dirlist(prepend=prepend, xdev=xdev):
+ yield i
+ startdir.fchdir()
+ else:
+ prepend = path
+ yield (prepend,pst)
+ except:
+ try:
+ startdir.fchdir()
+ except:
+ pass
+ raise
--- /dev/null
+import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
+import heapq
+from bup.helpers import *
+
+verbose = 0
+ignore_midx = 0
+home_repodir = os.path.expanduser('~/.bup')
+repodir = None
+
+_typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
+_typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
+
+
+class GitError(Exception):
+ pass
+
+
+def repo(sub = ''):
+ global repodir
+ if not repodir:
+ raise GitError('You should call check_repo_or_die()')
+ gd = os.path.join(repodir, '.git')
+ if os.path.exists(gd):
+ repodir = gd
+ return os.path.join(repodir, sub)
+
+
+def _encode_packobj(type, content):
+ szout = ''
+ sz = len(content)
+ szbits = (sz & 0x0f) | (_typemap[type]<<4)
+ sz >>= 4
+ while 1:
+ if sz: szbits |= 0x80
+ szout += chr(szbits)
+ if not sz:
+ break
+ szbits = sz & 0x7f
+ sz >>= 7
+ z = zlib.compressobj(1)
+ yield szout
+ yield z.compress(content)
+ yield z.flush()
+
+
+def _encode_looseobj(type, content):
+ z = zlib.compressobj(1)
+ yield z.compress('%s %d\0' % (type, len(content)))
+ yield z.compress(content)
+ yield z.flush()
+
+
+def _decode_looseobj(buf):
+ assert(buf);
+ s = zlib.decompress(buf)
+ i = s.find('\0')
+ assert(i > 0)
+ l = s[:i].split(' ')
+ type = l[0]
+ sz = int(l[1])
+ content = s[i+1:]
+ assert(type in _typemap)
+ assert(sz == len(content))
+ return (type, content)
+
+
+def _decode_packobj(buf):
+ assert(buf)
+ c = ord(buf[0])
+ type = _typermap[(c & 0x70) >> 4]
+ sz = c & 0x0f
+ shift = 4
+ i = 0
+ while c & 0x80:
+ i += 1
+ c = ord(buf[i])
+ sz |= (c & 0x7f) << shift
+ shift += 7
+ if not (c & 0x80):
+ break
+ return (type, zlib.decompress(buf[i+1:]))
+
+
+class PackIndex:
+ def __init__(self, filename):
+ self.name = filename
+ self.map = mmap_read(open(filename))
+ assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
+ self.fanout = list(struct.unpack('!256I',
+ str(buffer(self.map, 8, 256*4))))
+ self.fanout.append(0) # entry "-1"
+ nsha = self.fanout[255]
+ self.ofstable = buffer(self.map,
+ 8 + 256*4 + nsha*20 + nsha*4,
+ nsha*4)
+ self.ofs64table = buffer(self.map,
+ 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
+
+ def _ofs_from_idx(self, idx):
+ ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
+ if ofs & 0x80000000:
+ idx64 = ofs & 0x7fffffff
+ ofs = struct.unpack('!I',
+ str(buffer(self.ofs64table, idx64*8, 8)))[0]
+ return ofs
+
+ def _idx_from_hash(self, hash):
+ assert(len(hash) == 20)
+ b1 = ord(hash[0])
+ start = self.fanout[b1-1] # range -1..254
+ end = self.fanout[b1] # range 0..255
+ buf = buffer(self.map, 8 + 256*4, end*20)
+ want = str(hash)
+ while start < end:
+ mid = start + (end-start)/2
+ v = str(buf[mid*20:(mid+1)*20])
+ if v < want:
+ start = mid+1
+ elif v > want:
+ end = mid
+ else: # got it!
+ return mid
+ return None
+
+ def find_offset(self, hash):
+ idx = self._idx_from_hash(hash)
+ if idx != None:
+ return self._ofs_from_idx(idx)
+ return None
+
+ def exists(self, hash):
+ return hash and (self._idx_from_hash(hash) != None) and True or None
+
+ def __iter__(self):
+ for i in xrange(self.fanout[255]):
+ yield buffer(self.map, 8 + 256*4 + 20*i, 20)
+
+ def __len__(self):
+ return int(self.fanout[255])
+
+
+def extract_bits(buf, bits):
+ mask = (1<<bits) - 1
+ v = struct.unpack('!I', buf[0:4])[0]
+ v = (v >> (32-bits)) & mask
+ return v
+
+
+class PackMidx:
+ def __init__(self, filename):
+ self.name = filename
+ assert(filename.endswith('.midx'))
+ self.map = mmap_read(open(filename))
+ if str(self.map[0:8]) == 'MIDX\0\0\0\1':
+ log('Warning: ignoring old-style midx %r\n' % filename)
+ self.bits = 0
+ self.entries = 1
+ self.fanout = buffer('\0\0\0\0')
+ self.shalist = buffer('\0'*20)
+ self.idxnames = []
+ else:
+ assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
+ self.bits = struct.unpack('!I', self.map[8:12])[0]
+ self.entries = 2**self.bits
+ self.fanout = buffer(self.map, 12, self.entries*4)
+ shaofs = 12 + self.entries*4
+ nsha = self._fanget(self.entries-1)
+ self.shalist = buffer(self.map, shaofs, nsha*20)
+ self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
+
+ def _fanget(self, i):
+ start = i*4
+ s = self.fanout[start:start+4]
+ return struct.unpack('!I', s)[0]
+
+ def exists(self, hash):
+ want = str(hash)
+ el = extract_bits(want, self.bits)
+ if el:
+ start = self._fanget(el-1)
+ else:
+ start = 0
+ end = self._fanget(el)
+ while start < end:
+ mid = start + (end-start)/2
+ v = str(self.shalist[mid*20:(mid+1)*20])
+ if v < want:
+ start = mid+1
+ elif v > want:
+ end = mid
+ else: # got it!
+ return True
+ return None
+
+ def __iter__(self):
+ for i in xrange(self._fanget(self.entries-1)):
+ yield buffer(self.shalist, i*20, 20)
+
+ def __len__(self):
+ return int(self._fanget(self.entries-1))
+
+
+_mpi_count = 0
+class MultiPackIndex:
+ def __init__(self, dir):
+ global _mpi_count
+ assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
+ _mpi_count += 1
+ self.dir = dir
+ self.also = {}
+ self.packs = []
+ self.refresh()
+
+ def __del__(self):
+ global _mpi_count
+ _mpi_count -= 1
+ assert(_mpi_count == 0)
+
+ def __iter__(self):
+ return iter(idxmerge(self.packs))
+
+ def exists(self, hash):
+ if hash in self.also:
+ return True
+ for i in range(len(self.packs)):
+ p = self.packs[i]
+ if p.exists(hash):
+ # reorder so most recently used packs are searched first
+ self.packs = [p] + self.packs[:i] + self.packs[i+1:]
+ return p.name
+ return None
+
+ def refresh(self, skip_midx = False, forget_packs = False):
+ if forget_packs:
+ self.packs = []
+ skip_midx = skip_midx or ignore_midx
+ d = dict((p.name, 1) for p in self.packs)
+ if os.path.exists(self.dir):
+ if not skip_midx:
+ midxl = []
+ for f in os.listdir(self.dir):
+ full = os.path.join(self.dir, f)
+ if f.endswith('.midx') and not d.get(full):
+ mx = PackMidx(full)
+ (mxd, mxf) = os.path.split(mx.name)
+ broken = 0
+ for n in mx.idxnames:
+ if not os.path.exists(os.path.join(mxd, n)):
+ log(('warning: index %s missing\n' +
+ ' used by %s\n') % (n, mxf))
+ broken += 1
+ if not broken:
+ midxl.append(mx)
+ midxl.sort(lambda x,y: -cmp(len(x),len(y)))
+ for ix in midxl:
+ any = 0
+ for sub in ix.idxnames:
+ if not d.get(os.path.join(self.dir, sub)):
+ self.packs.append(ix)
+ d[ix.name] = 1
+ for name in ix.idxnames:
+ d[os.path.join(self.dir, name)] = 1
+ any += 1
+ break
+ if not any:
+ log('midx: removing redundant: %s\n'
+ % os.path.basename(ix.name))
+ unlink(ix.name)
+ for f in os.listdir(self.dir):
+ full = os.path.join(self.dir, f)
+ if f.endswith('.idx') and not d.get(full):
+ self.packs.append(PackIndex(full))
+ d[full] = 1
+ log('MultiPackIndex: using %d index%s.\n'
+ % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
+
+ def add(self, hash):
+ self.also[hash] = 1
+
+ def zap_also(self):
+ self.also = {}
+
+
+def calc_hash(type, content):
+ header = '%s %d\0' % (type, len(content))
+ sum = sha.sha(header)
+ sum.update(content)
+ return sum.digest()
+
+
+def _shalist_sort_key(ent):
+ (mode, name, id) = ent
+ if stat.S_ISDIR(int(mode, 8)):
+ return name + '/'
+ else:
+ return name
+
+
+def idxmerge(idxlist):
+ total = sum(len(i) for i in idxlist)
+ iters = (iter(i) for i in idxlist)
+ heap = [(next(it), it) for it in iters]
+ heapq.heapify(heap)
+ count = 0
+ last = None
+ while heap:
+ if (count % 10024) == 0:
+ progress('Reading indexes: %.2f%% (%d/%d)\r'
+ % (count*100.0/total, count, total))
+ (e, it) = heap[0]
+ if e != last:
+ yield e
+ last = e
+ count += 1
+ e = next(it)
+ if e:
+ heapq.heapreplace(heap, (e, it))
+ else:
+ heapq.heappop(heap)
+ log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
+
+
+class PackWriter:
+ def __init__(self, objcache_maker=None):
+ self.count = 0
+ self.outbytes = 0
+ self.filename = None
+ self.file = None
+ self.objcache_maker = objcache_maker
+ self.objcache = None
+
+ def __del__(self):
+ self.close()
+
+ def _make_objcache(self):
+ if not self.objcache:
+ if self.objcache_maker:
+ self.objcache = self.objcache_maker()
+ else:
+ self.objcache = MultiPackIndex(repo('objects/pack'))
+
+ def _open(self):
+ if not self.file:
+ self._make_objcache()
+ (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
+ self.file = os.fdopen(fd, 'w+b')
+ assert(name.endswith('.pack'))
+ self.filename = name[:-5]
+ self.file.write('PACK\0\0\0\2\0\0\0\0')
+
+ def _raw_write(self, datalist):
+ self._open()
+ f = self.file
+ for d in datalist:
+ f.write(d)
+ self.outbytes += len(d)
+ self.count += 1
+
+ def _write(self, bin, type, content):
+ if verbose:
+ log('>')
+ self._raw_write(_encode_packobj(type, content))
+ return bin
+
+ def breakpoint(self):
+ id = self._end()
+ self.outbytes = self.count = 0
+ return id
+
+ def write(self, type, content):
+ return self._write(calc_hash(type, content), type, content)
+
+ def exists(self, id):
+ if not self.objcache:
+ self._make_objcache()
+ return self.objcache.exists(id)
+
+ def maybe_write(self, type, content):
+ bin = calc_hash(type, content)
+ if not self.exists(bin):
+ self._write(bin, type, content)
+ self.objcache.add(bin)
+ return bin
+
+ def new_blob(self, blob):
+ return self.maybe_write('blob', blob)
+
+ def new_tree(self, shalist):
+ shalist = sorted(shalist, key = _shalist_sort_key)
+ l = []
+ for (mode,name,bin) in shalist:
+ assert(mode)
+ assert(mode != '0')
+ assert(mode[0] != '0')
+ assert(name)
+ assert(len(bin) == 20)
+ l.append('%s %s\0%s' % (mode,name,bin))
+ return self.maybe_write('tree', ''.join(l))
+
+ def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
+ l = []
+ if tree: l.append('tree %s' % tree.encode('hex'))
+ if parent: l.append('parent %s' % parent.encode('hex'))
+ if author: l.append('author %s %s' % (author, _git_date(adate)))
+ if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
+ l.append('')
+ l.append(msg)
+ return self.maybe_write('commit', '\n'.join(l))
+
+ def new_commit(self, parent, tree, msg):
+ now = time.time()
+ userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
+ commit = self._new_commit(tree, parent,
+ userline, now, userline, now,
+ msg)
+ return commit
+
+ def abort(self):
+ f = self.file
+ if f:
+ self.file = None
+ f.close()
+ os.unlink(self.filename + '.pack')
+
+ def _end(self):
+ f = self.file
+ if not f: return None
+ self.file = None
+ self.objcache = None
+
+ # update object count
+ f.seek(8)
+ cp = struct.pack('!i', self.count)
+ assert(len(cp) == 4)
+ f.write(cp)
+
+ # calculate the pack sha1sum
+ f.seek(0)
+ sum = sha.sha()
+ while 1:
+ b = f.read(65536)
+ sum.update(b)
+ if not b: break
+ f.write(sum.digest())
+
+ f.close()
+
+ p = subprocess.Popen(['git', 'index-pack', '-v',
+ '--index-version=2',
+ self.filename + '.pack'],
+ preexec_fn = _gitenv,
+ stdout = subprocess.PIPE)
+ out = p.stdout.read().strip()
+ _git_wait('git index-pack', p)
+ if not out:
+ raise GitError('git index-pack produced no output')
+ nameprefix = repo('objects/pack/%s' % out)
+ if os.path.exists(self.filename + '.map'):
+ os.unlink(self.filename + '.map')
+ os.rename(self.filename + '.pack', nameprefix + '.pack')
+ os.rename(self.filename + '.idx', nameprefix + '.idx')
+ return nameprefix
+
+ def close(self):
+ return self._end()
+
+
+def _git_date(date):
+ return time.strftime('%s %z', time.localtime(date))
+
+
+def _gitenv():
+ os.environ['GIT_DIR'] = os.path.abspath(repo())
+
+
+def list_refs(refname = None):
+ argv = ['git', 'show-ref', '--']
+ if refname:
+ argv += [refname]
+ p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+ out = p.stdout.read().strip()
+ rv = p.wait() # not fatal
+ if rv:
+ assert(not out)
+ if out:
+ for d in out.split('\n'):
+ (sha, name) = d.split(' ', 1)
+ yield (name, sha.decode('hex'))
+
+
+def read_ref(refname):
+ l = list(list_refs(refname))
+ if l:
+ assert(len(l) == 1)
+ return l[0][1]
+ else:
+ return None
+
+
+def rev_list(ref):
+ assert(not ref.startswith('-'))
+ argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
+ p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+ commit = None
+ for row in p.stdout:
+ s = row.strip()
+ if s.startswith('commit '):
+ commit = s[7:].decode('hex')
+ else:
+ date = int(s)
+ yield (date, commit)
+ rv = p.wait() # not fatal
+ if rv:
+ raise GitError, 'git rev-list returned error %d' % rv
+
+
+def update_ref(refname, newval, oldval):
+ if not oldval:
+ oldval = ''
+ assert(refname.startswith('refs/heads/'))
+ p = subprocess.Popen(['git', 'update-ref', refname,
+ newval.encode('hex'), oldval.encode('hex')],
+ preexec_fn = _gitenv)
+ _git_wait('git update-ref', p)
+
+
+def guess_repo(path=None):
+ global repodir
+ if path:
+ repodir = path
+ if not repodir:
+ repodir = os.environ.get('BUP_DIR')
+ if not repodir:
+ repodir = os.path.expanduser('~/.bup')
+
+
+def init_repo(path=None):
+ guess_repo(path)
+ d = repo()
+ if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
+ raise GitError('"%d" exists but is not a directory\n' % d)
+ p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
+ preexec_fn = _gitenv)
+ _git_wait('git init', p)
+ p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
+ stdout=sys.stderr, preexec_fn = _gitenv)
+ _git_wait('git config', p)
+
+
+def check_repo_or_die(path=None):
+ guess_repo(path)
+ if not os.path.isdir(repo('objects/pack/.')):
+ if repodir == home_repodir:
+ init_repo()
+ else:
+ log('error: %r is not a bup/git repository\n' % repo())
+ sys.exit(15)
+
+
+def _treeparse(buf):
+ ofs = 0
+ while ofs < len(buf):
+ z = buf[ofs:].find('\0')
+ assert(z > 0)
+ spl = buf[ofs:ofs+z].split(' ', 1)
+ assert(len(spl) == 2)
+ sha = buf[ofs+z+1:ofs+z+1+20]
+ ofs += z+1+20
+ yield (spl[0], spl[1], sha)
+
+
+_ver = None
+def ver():
+ global _ver
+ if not _ver:
+ p = subprocess.Popen(['git', '--version'],
+ stdout=subprocess.PIPE)
+ gvs = p.stdout.read()
+ _git_wait('git --version', p)
+ m = re.match(r'git version (\S+.\S+)', gvs)
+ if not m:
+ raise GitError('git --version weird output: %r' % gvs)
+ _ver = tuple(m.group(1).split('.'))
+ needed = ('1','5', '3', '1')
+ if _ver < needed:
+ raise GitError('git version %s or higher is required; you have %s'
+ % ('.'.join(needed), '.'.join(_ver)))
+ return _ver
+
+
+def _git_wait(cmd, p):
+ rv = p.wait()
+ if rv != 0:
+ raise GitError('%s returned %d' % (cmd, rv))
+
+
+def _git_capture(argv):
+ p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
+ r = p.stdout.read()
+ _git_wait(repr(argv), p)
+ return r
+
+
+_ver_warned = 0
+class CatPipe:
+ def __init__(self):
+ global _ver_warned
+ wanted = ('1','5','6')
+ if ver() < wanted:
+ if not _ver_warned:
+ log('warning: git version < %s; bup will be slow.\n'
+ % '.'.join(wanted))
+ _ver_warned = 1
+ self.get = self._slow_get
+ else:
+ self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ preexec_fn = _gitenv)
+ self.get = self._fast_get
+ self.inprogress = None
+
+ def _fast_get(self, id):
+ if self.inprogress:
+ log('_fast_get: opening %r while %r is open'
+ % (id, self.inprogress))
+ assert(not self.inprogress)
+ assert(id.find('\n') < 0)
+ assert(id.find('\r') < 0)
+ assert(id[0] != '-')
+ self.inprogress = id
+ self.p.stdin.write('%s\n' % id)
+ hdr = self.p.stdout.readline()
+ if hdr.endswith(' missing\n'):
+ raise KeyError('blob %r is missing' % id)
+ spl = hdr.split(' ')
+ if len(spl) != 3 or len(spl[0]) != 40:
+ raise GitError('expected blob, got %r' % spl)
+ (hex, type, size) = spl
+
+ def ondone():
+ assert(self.p.stdout.readline() == '\n')
+ self.inprogress = None
+
+ it = AutoFlushIter(chunkyreader(self.p.stdout, int(spl[2])),
+ ondone = ondone)
+ yield type
+ for blob in it:
+ yield blob
+ del it
+
+ def _slow_get(self, id):
+ assert(id.find('\n') < 0)
+ assert(id.find('\r') < 0)
+ assert(id[0] != '-')
+ type = _git_capture(['git', 'cat-file', '-t', id]).strip()
+ yield type
+
+ p = subprocess.Popen(['git', 'cat-file', type, id],
+ stdout=subprocess.PIPE,
+ preexec_fn = _gitenv)
+ for blob in chunkyreader(p.stdout):
+ yield blob
+ _git_wait('git cat-file', p)
+
+ def _join(self, it):
+ type = it.next()
+ if type == 'blob':
+ for blob in it:
+ yield blob
+ elif type == 'tree':
+ treefile = ''.join(it)
+ for (mode, name, sha) in _treeparse(treefile):
+ for blob in self.join(sha.encode('hex')):
+ yield blob
+ elif type == 'commit':
+ treeline = ''.join(it).split('\n')[0]
+ assert(treeline.startswith('tree '))
+ for blob in self.join(treeline[5:]):
+ yield blob
+ else:
+ raise GitError('invalid object type %r: expected blob/tree/commit'
+ % type)
+
+ def join(self, id):
+ try:
+ for d in self._join(self.get(id)):
+ yield d
+ except StopIteration:
+ log('booger!\n')
+
+
+def cat(id):
+ c = CatPipe()
+ for d in c.join(id):
+ yield d
--- /dev/null
+import sys, math
+from bup import git, _hashsplit
+from bup.helpers import *
+
+BLOB_LWM = 8192*2
+BLOB_MAX = BLOB_LWM*2
+BLOB_HWM = 1024*1024
+MAX_PER_TREE = 256
+progress_callback = None
+max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
+max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
+fanout = 16
+
+class Buf:
+ def __init__(self):
+ self.data = ''
+ self.start = 0
+
+ def put(self, s):
+ if s:
+ self.data = buffer(self.data, self.start) + s
+ self.start = 0
+
+ def peek(self, count):
+ return buffer(self.data, self.start, count)
+
+ def eat(self, count):
+ self.start += count
+
+ def get(self, count):
+ v = buffer(self.data, self.start, count)
+ self.start += count
+ return v
+
+ def used(self):
+ return len(self.data) - self.start
+
+
+def splitbuf(buf):
+ b = buf.peek(buf.used())
+ (ofs, bits) = _hashsplit.splitbuf(b)
+ if ofs:
+ buf.eat(ofs)
+ return (buffer(b, 0, ofs), bits)
+ return (None, 0)
+
+
+def blobiter(files):
+ for f in files:
+ while 1:
+ b = f.read(BLOB_HWM)
+ if not b:
+ break
+ yield b
+
+
+def drainbuf(buf, finalize):
+ while 1:
+ (blob, bits) = splitbuf(buf)
+ if blob:
+ yield (blob, bits)
+ else:
+ break
+ if buf.used() > BLOB_MAX:
+ # limit max blob size
+ yield (buf.get(buf.used()), 0)
+ elif finalize and buf.used():
+ yield (buf.get(buf.used()), 0)
+
+
+def hashsplit_iter(files):
+ assert(BLOB_HWM > BLOB_MAX)
+ buf = Buf()
+ fi = blobiter(files)
+ while 1:
+ for i in drainbuf(buf, finalize=False):
+ yield i
+ while buf.used() < BLOB_HWM:
+ bnew = next(fi)
+ if not bnew:
+ # eof
+ for i in drainbuf(buf, finalize=True):
+ yield i
+ return
+ buf.put(bnew)
+
+
+total_split = 0
+def _split_to_blobs(w, files):
+ global total_split
+ for (blob, bits) in hashsplit_iter(files):
+ sha = w.new_blob(blob)
+ total_split += len(blob)
+ if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
+ w.breakpoint()
+ if progress_callback:
+ progress_callback(len(blob))
+ yield (sha, len(blob), bits)
+
+
+def _make_shalist(l):
+ ofs = 0
+ shalist = []
+ for (mode, sha, size) in l:
+ shalist.append((mode, '%016x' % ofs, sha))
+ ofs += size
+ total = ofs
+ return (shalist, total)
+
+
+def _squish(w, stacks, n):
+ i = 0
+ while i<n or len(stacks[i]) > MAX_PER_TREE:
+ while len(stacks) <= i+1:
+ stacks.append([])
+ if len(stacks[i]) == 1:
+ stacks[i+1] += stacks[i]
+ elif stacks[i]:
+ (shalist, size) = _make_shalist(stacks[i])
+ tree = w.new_tree(shalist)
+ stacks[i+1].append(('40000', tree, size))
+ stacks[i] = []
+ i += 1
+
+
+def split_to_shalist(w, files):
+ sl = _split_to_blobs(w, files)
+ if not fanout:
+ shal = []
+ for (sha,size,bits) in sl:
+ shal.append(('100644', sha, size))
+ return _make_shalist(shal)[0]
+ else:
+ base_bits = _hashsplit.blobbits()
+ fanout_bits = int(math.log(fanout, 2))
+ def bits_to_idx(n):
+ assert(n >= base_bits)
+ return (n - base_bits)/fanout_bits
+ stacks = [[]]
+ for (sha,size,bits) in sl:
+ assert(bits <= 32)
+ stacks[0].append(('100644', sha, size))
+ if bits > base_bits:
+ _squish(w, stacks, bits_to_idx(bits))
+ #log('stacks: %r\n' % [len(i) for i in stacks])
+ _squish(w, stacks, len(stacks)-1)
+ #log('stacks: %r\n' % [len(i) for i in stacks])
+ return _make_shalist(stacks[-1])[0]
+
+
+def split_to_blob_or_tree(w, files):
+ shalist = list(split_to_shalist(w, files))
+ if len(shalist) == 1:
+ return (shalist[0][0], shalist[0][2])
+ elif len(shalist) == 0:
+ return ('100644', w.new_blob(''))
+ else:
+ return ('40000', w.new_tree(shalist))
--- /dev/null
+import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
+
+
+def log(s):
+ sys.stderr.write(s)
+
+
+def mkdirp(d):
+ try:
+ os.makedirs(d)
+ except OSError, e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+
+def next(it):
+ try:
+ return it.next()
+ except StopIteration:
+ return None
+
+
+def unlink(f):
+ try:
+ os.unlink(f)
+ except OSError, e:
+ if e.errno == errno.ENOENT:
+ pass # it doesn't exist, that's what you asked for
+
+
+def readpipe(argv):
+ p = subprocess.Popen(argv, stdout=subprocess.PIPE)
+ r = p.stdout.read()
+ p.wait()
+ return r
+
+
+# FIXME: this function isn't very generic, because it splits the filename
+# in an odd way and depends on a terminating '/' to indicate directories.
+# But it's used in a couple of places, so let's put it here.
+def pathsplit(p):
+ l = p.split('/')
+ l = [i+'/' for i in l[:-1]] + l[-1:]
+ if l[-1] == '':
+ l.pop() # extra blank caused by terminating '/'
+ return l
+
+
+# like os.path.realpath, but doesn't follow a symlink for the last element.
+# (ie. if 'p' itself is itself a symlink, this one won't follow it)
+def realpath(p):
+ try:
+ st = os.lstat(p)
+ except OSError:
+ st = None
+ if st and stat.S_ISLNK(st.st_mode):
+ (dir, name) = os.path.split(p)
+ dir = os.path.realpath(dir)
+ out = os.path.join(dir, name)
+ else:
+ out = os.path.realpath(p)
+ #log('realpathing:%r,%r\n' % (p, out))
+ return out
+
+
+_username = None
+def username():
+ global _username
+ if not _username:
+ uid = os.getuid()
+ try:
+ _username = pwd.getpwuid(uid)[0]
+ except KeyError:
+ _username = 'user%d' % uid
+ return _username
+
+
+_userfullname = None
+def userfullname():
+ global _userfullname
+ if not _userfullname:
+ uid = os.getuid()
+ try:
+ _userfullname = pwd.getpwuid(uid)[4].split(',')[0]
+ except KeyError:
+ _userfullname = 'user%d' % uid
+ return _userfullname
+
+
+_hostname = None
+def hostname():
+ global _hostname
+ if not _hostname:
+ _hostname = socket.getfqdn()
+ return _hostname
+
+
+class NotOk(Exception):
+ pass
+
+class Conn:
+ def __init__(self, inp, outp):
+ self.inp = inp
+ self.outp = outp
+
+ def read(self, size):
+ self.outp.flush()
+ return self.inp.read(size)
+
+ def readline(self):
+ self.outp.flush()
+ return self.inp.readline()
+
+ def write(self, data):
+ #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
+ self.outp.write(data)
+
+ def has_input(self):
+ [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
+ if rl:
+ assert(rl[0] == self.inp.fileno())
+ return True
+ else:
+ return None
+
+ def ok(self):
+ self.write('\nok\n')
+
+ def error(self, s):
+ s = re.sub(r'\s+', ' ', str(s))
+ self.write('\nerror %s\n' % s)
+
+ def _check_ok(self, onempty):
+ self.outp.flush()
+ rl = ''
+ for rl in linereader(self.inp):
+ #log('%d got line: %r\n' % (os.getpid(), rl))
+ if not rl: # empty line
+ continue
+ elif rl == 'ok':
+ return None
+ elif rl.startswith('error '):
+ #log('client: error: %s\n' % rl[6:])
+ return NotOk(rl[6:])
+ else:
+ onempty(rl)
+ raise Exception('server exited unexpectedly; see errors above')
+
+ def drain_and_check_ok(self):
+ def onempty(rl):
+ pass
+ return self._check_ok(onempty)
+
+ def check_ok(self):
+ def onempty(rl):
+ raise Exception('expected "ok", got %r' % rl)
+ return self._check_ok(onempty)
+
+
+def linereader(f):
+ while 1:
+ line = f.readline()
+ if not line:
+ break
+ yield line[:-1]
+
+
+def chunkyreader(f, count = None):
+ if count != None:
+ while count > 0:
+ b = f.read(min(count, 65536))
+ if not b:
+ raise IOError('EOF with %d bytes remaining' % count)
+ yield b
+ count -= len(b)
+ else:
+ while 1:
+ b = f.read(65536)
+ if not b: break
+ yield b
+
+
+class AutoFlushIter:
+ def __init__(self, it, ondone = None):
+ self.it = it
+ self.ondone = ondone
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ return self.it.next()
+
+ def __del__(self):
+ for i in self.it:
+ pass
+ if self.ondone:
+ self.ondone()
+
+
+def slashappend(s):
+ if s and not s.endswith('/'):
+ return s + '/'
+ else:
+ return s
+
+
+def _mmap_do(f, len, flags, prot):
+ if not len:
+ st = os.fstat(f.fileno())
+ len = st.st_size
+ map = mmap.mmap(f.fileno(), len, flags, prot)
+ f.close() # map will persist beyond file close
+ return map
+
+
+def mmap_read(f, len = 0):
+ return _mmap_do(f, len, mmap.MAP_PRIVATE, mmap.PROT_READ)
+
+
+def mmap_readwrite(f, len = 0):
+ return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
+
+
+def parse_num(s):
+ g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
+ if not g:
+ raise ValueError("can't parse %r as a number" % s)
+ (val, unit) = g.groups()
+ num = float(val)
+ unit = unit.lower()
+ if unit in ['t', 'tb']:
+ mult = 1024*1024*1024*1024
+ elif unit in ['g', 'gb']:
+ mult = 1024*1024*1024
+ elif unit in ['m', 'mb']:
+ mult = 1024*1024
+ elif unit in ['k', 'kb']:
+ mult = 1024
+ elif unit in ['', 'b']:
+ mult = 1
+ else:
+ raise ValueError("invalid unit %r in number %r" % (unit, s))
+ return int(num*mult)
+
+
+# count the number of elements in an iterator (consumes the iterator)
+def count(l):
+ return reduce(lambda x,y: x+1, l)
+
+
+def atoi(s):
+ try:
+ return int(s or '0')
+ except ValueError:
+ return 0
+
+
+saved_errors = []
+def add_error(e):
+ saved_errors.append(e)
+ log('%-70s\n' % e)
+
+istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY'))
+def progress(s):
+ if istty:
+ log(s)
--- /dev/null
+import os, stat, time, struct, tempfile
+from bup.helpers import *
+
+EMPTY_SHA = '\0'*20
+FAKE_SHA = '\x01'*20
+INDEX_HDR = 'BUPI\0\0\0\2'
+INDEX_SIG = '!IIIIIQII20sHII'
+ENTLEN = struct.calcsize(INDEX_SIG)
+FOOTER_SIG = '!Q'
+FOOTLEN = struct.calcsize(FOOTER_SIG)
+
+IX_EXISTS = 0x8000
+IX_HASHVALID = 0x4000
+
+class Error(Exception):
+ pass
+
+
+class Level:
+ def __init__(self, ename, parent):
+ self.parent = parent
+ self.ename = ename
+ self.list = []
+ self.count = 0
+
+ def write(self, f):
+ (ofs,n) = (f.tell(), len(self.list))
+ if self.list:
+ count = len(self.list)
+ #log('popping %r with %d entries\n'
+ # % (''.join(self.ename), count))
+ for e in self.list:
+ e.write(f)
+ if self.parent:
+ self.parent.count += count + self.count
+ return (ofs,n)
+
+
+def _golevel(level, f, ename, newentry):
+ # close nodes back up the tree
+ assert(level)
+ while ename[:len(level.ename)] != level.ename:
+ n = BlankNewEntry(level.ename[-1])
+ (n.children_ofs,n.children_n) = level.write(f)
+ level.parent.list.append(n)
+ level = level.parent
+
+ # create nodes down the tree
+ while len(level.ename) < len(ename):
+ level = Level(ename[:len(level.ename)+1], level)
+
+ # are we in precisely the right place?
+ assert(ename == level.ename)
+ n = newentry or BlankNewEntry(ename and level.ename[-1] or None)
+ (n.children_ofs,n.children_n) = level.write(f)
+ if level.parent:
+ level.parent.list.append(n)
+ level = level.parent
+
+ return level
+
+
+class Entry:
+ def __init__(self, basename, name):
+ self.basename = str(basename)
+ self.name = str(name)
+ self.children_ofs = 0
+ self.children_n = 0
+
+ def __repr__(self):
+ return ("(%s,0x%04x,%d,%d,%d,%d,%d,%s/%s,0x%04x,0x%08x/%d)"
+ % (self.name, self.dev,
+ self.ctime, self.mtime, self.uid, self.gid,
+ self.size, oct(self.mode), oct(self.gitmode),
+ self.flags, self.children_ofs, self.children_n))
+
+ def packed(self):
+ return struct.pack(INDEX_SIG,
+ self.dev, self.ctime, self.mtime,
+ self.uid, self.gid, self.size, self.mode,
+ self.gitmode, self.sha, self.flags,
+ self.children_ofs, self.children_n)
+
+ def from_stat(self, st, tstart):
+ old = (self.dev, self.ctime, self.mtime,
+ self.uid, self.gid, self.size, self.flags & IX_EXISTS)
+ new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
+ st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
+ self.dev = st.st_dev
+ self.ctime = int(st.st_ctime)
+ self.mtime = int(st.st_mtime)
+ self.uid = st.st_uid
+ self.gid = st.st_gid
+ self.size = st.st_size
+ self.mode = st.st_mode
+ self.flags |= IX_EXISTS
+ if int(st.st_ctime) >= tstart or old != new \
+ or self.sha == EMPTY_SHA or not self.gitmode:
+ self.invalidate()
+
+ def is_valid(self):
+ f = IX_HASHVALID|IX_EXISTS
+ return (self.flags & f) == f
+
+ def invalidate(self):
+ self.flags &= ~IX_HASHVALID
+
+ def validate(self, gitmode, sha):
+ assert(sha)
+ assert(gitmode)
+ self.gitmode = gitmode
+ self.sha = sha
+ self.flags |= IX_HASHVALID|IX_EXISTS
+
+ def exists(self):
+ return not self.is_deleted()
+
+ def is_deleted(self):
+ return (self.flags & IX_EXISTS) == 0
+
+ def set_deleted(self):
+ if self.flags & IX_EXISTS:
+ self.flags &= ~(IX_EXISTS | IX_HASHVALID)
+
+ def is_real(self):
+ return not self.is_fake()
+
+ def is_fake(self):
+ return not self.ctime
+
+ def __cmp__(a, b):
+ return (cmp(a.name, b.name)
+ or -cmp(a.is_valid(), b.is_valid())
+ or -cmp(a.is_fake(), b.is_fake()))
+
+ def write(self, f):
+ f.write(self.basename + '\0' + self.packed())
+
+
+class NewEntry(Entry):
+ def __init__(self, basename, name, dev, ctime, mtime, uid, gid,
+ size, mode, gitmode, sha, flags, children_ofs, children_n):
+ Entry.__init__(self, basename, name)
+ (self.dev, self.ctime, self.mtime, self.uid, self.gid,
+ self.size, self.mode, self.gitmode, self.sha,
+ self.flags, self.children_ofs, self.children_n
+ ) = (dev, int(ctime), int(mtime), uid, gid,
+ size, mode, gitmode, sha, flags, children_ofs, children_n)
+
+
+class BlankNewEntry(NewEntry):
+ def __init__(self, basename):
+ NewEntry.__init__(self, basename, basename,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, EMPTY_SHA, 0, 0, 0)
+
+
+class ExistingEntry(Entry):
+ def __init__(self, parent, basename, name, m, ofs):
+ Entry.__init__(self, basename, name)
+ self.parent = parent
+ self._m = m
+ self._ofs = ofs
+ (self.dev, self.ctime, self.mtime, self.uid, self.gid,
+ self.size, self.mode, self.gitmode, self.sha,
+ self.flags, self.children_ofs, self.children_n
+ ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN)))
+
+ def repack(self):
+ self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
+ if self.parent and not self.is_valid():
+ self.parent.invalidate()
+ self.parent.repack()
+
+ def iter(self, name=None, wantrecurse=None):
+ dname = name
+ if dname and not dname.endswith('/'):
+ dname += '/'
+ ofs = self.children_ofs
+ assert(ofs <= len(self._m))
+ assert(self.children_n < 1000000)
+ for i in xrange(self.children_n):
+ eon = self._m.find('\0', ofs)
+ assert(eon >= 0)
+ assert(eon >= ofs)
+ assert(eon > ofs)
+ basename = str(buffer(self._m, ofs, eon-ofs))
+ child = ExistingEntry(self, basename, self.name + basename,
+ self._m, eon+1)
+ if (not dname
+ or child.name.startswith(dname)
+ or child.name.endswith('/') and dname.startswith(child.name)):
+ if not wantrecurse or wantrecurse(child):
+ for e in child.iter(name=name, wantrecurse=wantrecurse):
+ yield e
+ if not name or child.name == name or child.name.startswith(dname):
+ yield child
+ ofs = eon + 1 + ENTLEN
+
+ def __iter__(self):
+ return self.iter()
+
+
+class Reader:
+ def __init__(self, filename):
+ self.filename = filename
+ self.m = ''
+ self.writable = False
+ self.count = 0
+ f = None
+ try:
+ f = open(filename, 'r+')
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ pass
+ else:
+ raise
+ if f:
+ b = f.read(len(INDEX_HDR))
+ if b != INDEX_HDR:
+ log('warning: %s: header: expected %r, got %r'
+ % (filename, INDEX_HDR, b))
+ else:
+ st = os.fstat(f.fileno())
+ if st.st_size:
+ self.m = mmap_readwrite(f)
+ self.writable = True
+ self.count = struct.unpack(FOOTER_SIG,
+ str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
+
+ def __del__(self):
+ self.close()
+
+ def __len__(self):
+ return int(self.count)
+
+ def forward_iter(self):
+ ofs = len(INDEX_HDR)
+ while ofs+ENTLEN <= len(self.m)-FOOTLEN:
+ eon = self.m.find('\0', ofs)
+ assert(eon >= 0)
+ assert(eon >= ofs)
+ assert(eon > ofs)
+ basename = str(buffer(self.m, ofs, eon-ofs))
+ yield ExistingEntry(None, basename, basename, self.m, eon+1)
+ ofs = eon + 1 + ENTLEN
+
+ def iter(self, name=None, wantrecurse=None):
+ if len(self.m) > len(INDEX_HDR)+ENTLEN:
+ dname = name
+ if dname and not dname.endswith('/'):
+ dname += '/'
+ root = ExistingEntry(None, '/', '/',
+ self.m, len(self.m)-FOOTLEN-ENTLEN)
+ for sub in root.iter(name=name, wantrecurse=wantrecurse):
+ yield sub
+ if not dname or dname == root.name:
+ yield root
+
+ def __iter__(self):
+ return self.iter()
+
+ def exists(self):
+ return self.m
+
+ def save(self):
+ if self.writable and self.m:
+ self.m.flush()
+
+ def close(self):
+ self.save()
+ if self.writable and self.m:
+ self.m = None
+ self.writable = False
+
+ def filter(self, prefixes, wantrecurse=None):
+ for (rp, path) in reduce_paths(prefixes):
+ for e in self.iter(rp, wantrecurse=wantrecurse):
+ assert(e.name.startswith(rp))
+ name = path + e.name[len(rp):]
+ yield (name, e)
+
+
+class Writer:
+ def __init__(self, filename):
+ self.rootlevel = self.level = Level([], None)
+ self.f = None
+ self.count = 0
+ self.lastfile = None
+ self.filename = None
+ self.filename = filename = realpath(filename)
+ (dir,name) = os.path.split(filename)
+ (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
+ self.f = os.fdopen(ffd, 'wb', 65536)
+ self.f.write(INDEX_HDR)
+
+ def __del__(self):
+ self.abort()
+
+ def abort(self):
+ f = self.f
+ self.f = None
+ if f:
+ f.close()
+ os.unlink(self.tmpname)
+
+ def flush(self):
+ if self.level:
+ self.level = _golevel(self.level, self.f, [], None)
+ self.count = self.rootlevel.count
+ if self.count:
+ self.count += 1
+ self.f.write(struct.pack(FOOTER_SIG, self.count))
+ self.f.flush()
+ assert(self.level == None)
+
+ def close(self):
+ self.flush()
+ f = self.f
+ self.f = None
+ if f:
+ f.close()
+ os.rename(self.tmpname, self.filename)
+
+ def _add(self, ename, entry):
+ if self.lastfile and self.lastfile <= ename:
+ raise Error('%r must come before %r'
+ % (''.join(e.name), ''.join(self.lastfile)))
+ self.lastfile = e.name
+ self.level = _golevel(self.level, self.f, ename, entry)
+
+ def add(self, name, st, hashgen = None):
+ endswith = name.endswith('/')
+ ename = pathsplit(name)
+ basename = ename[-1]
+ #log('add: %r %r\n' % (basename, name))
+ flags = IX_EXISTS
+ sha = None
+ if hashgen:
+ (gitmode, sha) = hashgen(name)
+ flags |= IX_HASHVALID
+ else:
+ (gitmode, sha) = (0, EMPTY_SHA)
+ if st:
+ isdir = stat.S_ISDIR(st.st_mode)
+ assert(isdir == endswith)
+ e = NewEntry(basename, name, st.st_dev, int(st.st_ctime),
+ int(st.st_mtime), st.st_uid, st.st_gid,
+ st.st_size, st.st_mode, gitmode, sha, flags,
+ 0, 0)
+ else:
+ assert(endswith)
+ e = BlankNewEntry(basename)
+ e.gitmode = gitmode
+ e.sha = sha
+ e.flags = flags
+ self._add(ename, e)
+
+ def add_ixentry(self, e):
+ e.children_ofs = e.children_n = 0
+ self._add(pathsplit(e.name), e)
+
+ def new_reader(self):
+ self.flush()
+ return Reader(self.tmpname)
+
+
+def reduce_paths(paths):
+ xpaths = []
+ for p in paths:
+ rp = realpath(p)
+ try:
+ st = os.lstat(rp)
+ if stat.S_ISDIR(st.st_mode):
+ rp = slashappend(rp)
+ p = slashappend(p)
+ except OSError, e:
+ if e.errno != errno.ENOENT:
+ raise
+ xpaths.append((rp, p))
+ xpaths.sort()
+
+ paths = []
+ prev = None
+ for (rp, p) in xpaths:
+ if prev and (prev == rp
+ or (prev.endswith('/') and rp.startswith(prev))):
+ continue # already superceded by previous path
+ paths.append((rp, p))
+ prev = rp
+ paths.sort(reverse=True)
+ return paths
+
+
+class MergeIter:
+ def __init__(self, iters):
+ self.iters = iters
+
+ def __len__(self):
+ # FIXME: doesn't remove duplicated entries between iters.
+ # That only happens for parent directories, but will mean the
+ # actual iteration returns fewer entries than this function counts.
+ return sum(len(it) for it in self.iters)
+
+ def __iter__(self):
+ total = len(self)
+ l = [iter(it) for it in self.iters]
+ l = [(next(it),it) for it in l]
+ l = filter(lambda x: x[0], l)
+ count = 0
+ lastname = None
+ while l:
+ if not (count % 1024):
+ progress('bup: merging indexes (%d/%d)\r' % (count, total))
+ l.sort()
+ (e,it) = l.pop()
+ if not e:
+ continue
+ if e.name != lastname:
+ yield e
+ lastname = e.name
+ n = next(it)
+ if n:
+ l.append((n,it))
+ count += 1
+ log('bup: merging indexes (%d/%d), done.\n' % (count, total))
--- /dev/null
+import textwrap, getopt, re
+from bup.helpers import *
+
+class OptDict:
+ def __init__(self):
+ self._opts = {}
+
+ def __setitem__(self, k, v):
+ self._opts[k] = v
+
+ def __getitem__(self, k):
+ return self._opts[k]
+
+ def __getattr__(self, k):
+ return self[k]
+
+
+class Options:
+ def __init__(self, exe, optspec):
+ self.exe = exe
+ self.optspec = optspec
+ self._aliases = {}
+ self._shortopts = 'h?'
+ self._longopts = ['help']
+ self._hasparms = {}
+ self._usagestr = self._gen_usage()
+
+ def _gen_usage(self):
+ out = []
+ lines = self.optspec.strip().split('\n')
+ lines.reverse()
+ first_syn = True
+ while lines:
+ l = lines.pop()
+ if l == '--': break
+ out.append('%s: %s\n' % (first_syn and 'usage' or ' or', l))
+ first_syn = False
+ out.append('\n')
+ while lines:
+ l = lines.pop()
+ if l.startswith(' '):
+ out.append('\n%s\n' % l.lstrip())
+ elif l:
+ (flags, extra) = l.split(' ', 1)
+ extra = extra.strip()
+ if flags.endswith('='):
+ flags = flags[:-1]
+ has_parm = 1
+ else:
+ has_parm = 0
+ flagl = flags.split(',')
+ flagl_nice = []
+ for f in flagl:
+ f_nice = re.sub(r'\W', '_', f)
+ self._aliases[f] = flagl[0]
+ self._aliases[f_nice] = flagl[0]
+ self._hasparms[f] = has_parm
+ if len(f) == 1:
+ self._shortopts += f + (has_parm and ':' or '')
+ flagl_nice.append('-' + f)
+ else:
+ assert(not f.startswith('no-')) # supported implicitly
+ self._longopts.append(f + (has_parm and '=' or ''))
+ self._longopts.append('no-' + f)
+ flagl_nice.append('--' + f)
+ flags_nice = ', '.join(flagl_nice)
+ if has_parm:
+ flags_nice += ' ...'
+ prefix = ' %-20s ' % flags_nice
+ argtext = '\n'.join(textwrap.wrap(extra, width=70,
+ initial_indent=prefix,
+ subsequent_indent=' '*28))
+ out.append(argtext + '\n')
+ else:
+ out.append('\n')
+ return ''.join(out).rstrip() + '\n'
+
+ def usage(self):
+ log(self._usagestr)
+ sys.exit(97)
+
+ def fatal(self, s):
+ log('error: %s\n' % s)
+ return self.usage()
+
+ def parse(self, args):
+ try:
+ (flags,extra) = getopt.gnu_getopt(args,
+ self._shortopts, self._longopts)
+ except getopt.GetoptError, e:
+ self.fatal(e)
+
+ opt = OptDict()
+ for f in self._aliases.values():
+ opt[f] = None
+ for (k,v) in flags:
+ while k.startswith('-'):
+ k = k[1:]
+ if k in ['h', '?', 'help']:
+ self.usage()
+ if k.startswith('no-'):
+ k = self._aliases[k[3:]]
+ opt[k] = None
+ else:
+ k = self._aliases[k]
+ if not self._hasparms[k]:
+ assert(v == '')
+ opt[k] = (opt._opts.get(k) or 0) + 1
+ else:
+ try:
+ vv = int(v)
+ if str(vv) == v:
+ v = vv
+ except ValueError:
+ pass
+ opt[k] = v
+ for (f1,f2) in self._aliases.items():
+ opt[f1] = opt[f2]
+ return (opt,flags,extra)
--- /dev/null
+import re
+
+q = "'"
+qq = '"'
+
+
+class QuoteError(Exception):
+ pass
+
+
+def _quotesplit(line):
+ inquote = None
+ inescape = None
+ wordstart = 0
+ word = ''
+ for i in range(len(line)):
+ c = line[i]
+ if inescape:
+ if inquote == q and c != q:
+ word += '\\' # single-q backslashes can only quote single-q
+ word += c
+ inescape = False
+ elif c == '\\':
+ inescape = True
+ elif c == inquote:
+ inquote = None
+ # this is un-sh-like, but do it for sanity when autocompleting
+ yield (wordstart, word)
+ word = ''
+ wordstart = i+1
+ elif not inquote and not word and (c == q or c == qq):
+ # the 'not word' constraint on this is un-sh-like, but do it
+ # for sanity when autocompleting
+ inquote = c
+ wordstart = i
+ elif not inquote and c in [' ', '\n', '\r', '\t']:
+ if word:
+ yield (wordstart, word)
+ word = ''
+ wordstart = i+1
+ else:
+ word += c
+ if word:
+ yield (wordstart, word)
+ if inquote or inescape or word:
+ raise QuoteError()
+
+
+def quotesplit(line):
+ l = []
+ try:
+ for i in _quotesplit(line):
+ l.append(i)
+ except QuoteError:
+ pass
+ return l
+
+
+def unfinished_word(line):
+ try:
+ for (wordstart,word) in _quotesplit(line):
+ pass
+ except QuoteError:
+ firstchar = line[wordstart]
+ if firstchar in [q, qq]:
+ return (firstchar, word)
+ else:
+ return (None, word)
+ else:
+ return (None, '')
+
+
+def quotify(qtype, word, terminate):
+ if qtype == qq:
+ return qq + word.replace(qq, '\\"') + (terminate and qq or '')
+ elif qtype == q:
+ return q + word.replace(q, "\\'") + (terminate and q or '')
+ else:
+ return re.sub(r'([\"\' \t\n\r])', r'\\\1', word)
+
+
+def what_to_add(qtype, origword, newword, terminate):
+ if not newword.startswith(origword):
+ return ''
+ else:
+ qold = quotify(qtype, origword, terminate=False)
+ return quotify(qtype, newword, terminate=terminate)[len(qold):]
--- /dev/null
+import os, re, stat, time
+from bup import git
+from helpers import *
+
+EMPTY_SHA='\0'*20
+
+_cp = None
+def cp():
+ global _cp
+ if not _cp:
+ _cp = git.CatPipe()
+ return _cp
+
+class NodeError(Exception):
+ pass
+class NoSuchFile(NodeError):
+ pass
+class NotDir(NodeError):
+ pass
+class NotFile(NodeError):
+ pass
+class TooManySymlinks(NodeError):
+ pass
+
+
+class FileReader:
+ def __init__(self, node):
+ self.n = node
+ self.ofs = 0
+ self.size = self.n.size()
+
+ def seek(self, ofs):
+ if ofs > self.size:
+ self.ofs = self.size
+ elif ofs < 0:
+ self.ofs = 0
+ else:
+ self.ofs = ofs
+
+ def tell(self):
+ return self.ofs
+
+ def read(self, count = -1):
+ if count < 0:
+ count = self.size - self.ofs
+ buf = self.n.readbytes(self.ofs, count)
+ self.ofs += len(buf)
+ return buf
+
+
+class Node:
+ def __init__(self, parent, name, mode, hash):
+ self.parent = parent
+ self.name = name
+ self.mode = mode
+ self.hash = hash
+ self._subs = None
+
+ def __cmp__(a, b):
+ return cmp(a.name or None, b.name or None)
+
+ def __iter__(self):
+ return iter(self.subs())
+
+ def fullname(self):
+ if self.parent:
+ return os.path.join(self.parent.fullname(), self.name)
+ else:
+ return self.name
+
+ def _mksubs(self):
+ self._subs = {}
+
+ def subs(self):
+ if self._subs == None:
+ self._mksubs()
+ return sorted(self._subs.values())
+
+ def sub(self, name):
+ if self._subs == None:
+ self._mksubs()
+ ret = self._subs.get(name)
+ if not ret:
+ raise NoSuchFile("no file %r in %r" % (name, self.name))
+ return ret
+
+ def top(self):
+ if self.parent:
+ return self.parent.top()
+ else:
+ return self
+
+ def _lresolve(self, parts):
+ #log('_lresolve %r in %r\n' % (parts, self.name))
+ if not parts:
+ return self
+ (first, rest) = (parts[0], parts[1:])
+ if first == '.':
+ return self._lresolve(rest)
+ elif first == '..':
+ if not self.parent:
+ raise NoSuchFile("no parent dir for %r" % self.name)
+ return self.parent._lresolve(rest)
+ elif rest:
+ return self.sub(first)._lresolve(rest)
+ else:
+ return self.sub(first)
+
+ def lresolve(self, path):
+ start = self
+ if path.startswith('/'):
+ start = self.top()
+ path = path[1:]
+ parts = re.split(r'/+', path or '.')
+ if not parts[-1]:
+ parts[-1] = '.'
+ #log('parts: %r %r\n' % (path, parts))
+ return start._lresolve(parts)
+
+ def resolve(self, path):
+ return self.lresolve(path).lresolve('')
+
+ def nlinks(self):
+ if self._subs == None:
+ self._mksubs()
+ return 1
+
+ def size(self):
+ return 0
+
+ def open(self):
+ raise NotFile('%s is not a regular file' % self.name)
+
+ def readbytes(self, ofs, count):
+ raise NotFile('%s is not a regular file' % self.name)
+
+ def read(self, num = -1):
+ if num < 0:
+ num = self.size()
+ return self.readbytes(0, num)
+
+
+class File(Node):
+ def _content(self):
+ return cp().join(self.hash.encode('hex'))
+
+ def open(self):
+ return FileReader(self)
+
+ def size(self):
+ # FIXME inefficient
+ return sum(len(blob) for blob in self._content())
+
+ def readbytes(self, ofs, count):
+ # FIXME inefficient
+ buf = ''.join(self._content())
+ return buf[ofs:ofs+count]
+
+
+_symrefs = 0
+class Symlink(File):
+ def __init__(self, parent, name, hash):
+ File.__init__(self, parent, name, 0120000, hash)
+
+ def readlink(self):
+ return self.read(1024)
+
+ def dereference(self):
+ global _symrefs
+ if _symrefs > 100:
+ raise TooManySymlinks('too many levels of symlinks: %r'
+ % self.fullname())
+ _symrefs += 1
+ try:
+ return self.parent.lresolve(self.readlink())
+ finally:
+ _symrefs -= 1
+
+ def _lresolve(self, parts):
+ return self.dereference()._lresolve(parts)
+
+
+class FakeSymlink(Symlink):
+ def __init__(self, parent, name, toname):
+ Symlink.__init__(self, parent, name, EMPTY_SHA)
+ self.toname = toname
+
+ def _content(self):
+ return self.toname
+
+
+class Dir(Node):
+ def _mksubs(self):
+ self._subs = {}
+ it = cp().get(self.hash.encode('hex'))
+ type = it.next()
+ if type == 'commit':
+ del it
+ it = cp().get(self.hash.encode('hex') + ':')
+ type = it.next()
+ assert(type == 'tree')
+ for (mode,name,sha) in git._treeparse(''.join(it)):
+ mode = int(mode, 8)
+ if stat.S_ISDIR(mode):
+ self._subs[name] = Dir(self, name, mode, sha)
+ elif stat.S_ISLNK(mode):
+ self._subs[name] = Symlink(self, name, sha)
+ else:
+ self._subs[name] = File(self, name, mode, sha)
+
+
+class CommitList(Node):
+ def __init__(self, parent, name, hash):
+ Node.__init__(self, parent, name, 040000, hash)
+
+ def _mksubs(self):
+ self._subs = {}
+ revs = list(git.rev_list(self.hash.encode('hex')))
+ for (date, commit) in revs:
+ l = time.localtime(date)
+ ls = time.strftime('%Y-%m-%d-%H%M%S', l)
+ commithex = commit.encode('hex')
+ self._subs[commithex] = Dir(self, commithex, 040000, commit)
+ self._subs[ls] = FakeSymlink(self, ls, commit.encode('hex'))
+ latest = max(revs)
+ if latest:
+ (date, commit) = latest
+ self._subs['latest'] = FakeSymlink(self, 'latest',
+ commit.encode('hex'))
+
+
+class RefList(Node):
+ def __init__(self, parent):
+ Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
+
+ def _mksubs(self):
+ self._subs = {}
+ for (name,sha) in git.list_refs():
+ if name.startswith('refs/heads/'):
+ name = name[11:]
+ self._subs[name] = CommitList(self, name, sha)
+
+
--- /dev/null
+#!/usr/bin/env python
+import sys, os, subprocess
+
+argv = sys.argv
+exe = argv[0]
+exepath = os.path.split(exe)[0] or '.'
+
+# fix the PYTHONPATH to include our lib dir
+libpath = os.path.join(exepath, 'lib')
+sys.path[:0] = [libpath]
+os.environ['PYTHONPATH'] = libpath + ':' + os.environ.get('PYTHONPATH', '')
+
+from bup.helpers import *
+
+def usage():
+ log('Usage: bup <subcmd> <options...>\n\n')
+ log('Available subcommands:\n')
+ for c in sorted(os.listdir(exepath)):
+ if c.startswith('bup-') and c.find('.') < 0:
+ log('\t%s\n' % c[4:])
+ sys.exit(99)
+
+if len(argv) < 2 or not argv[1] or argv[1][0] == '-':
+ usage()
+
+subcmd = argv[1]
+if subcmd == 'help':
+ usage()
+
+def subpath(s):
+ return os.path.join(exepath, 'bup-%s' % s)
+
+if not os.path.exists(subpath(subcmd)):
+ log('error: unknown command "%s"\n' % subcmd)
+ usage()
+
+
+already_fixed = atoi(os.environ.get('BUP_FORCE_TTY'))
+if subcmd in ['ftp']:
+ already_fixed = True
+fix_stdout = not already_fixed and os.isatty(1)
+fix_stderr = not already_fixed and os.isatty(2)
+
+def force_tty():
+ if fix_stdout or fix_stderr:
+ os.environ['BUP_FORCE_TTY'] = '1'
+
+if fix_stdout or fix_stderr:
+ realf = fix_stderr and 2 or 1
+ n = subprocess.Popen([subpath('newliner')],
+ stdin=subprocess.PIPE, stdout=os.dup(realf),
+ close_fds=True, preexec_fn=force_tty)
+ outf = fix_stdout and n.stdin.fileno() or 1
+ errf = fix_stderr and n.stdin.fileno() or 2
+else:
+ n = None
+ outf = 1
+ errf = 2
+
+ret = 95
+try:
+ try:
+ p = subprocess.Popen([subpath(subcmd)] + argv[2:],
+ stdout=outf, stderr=errf, preexec_fn=force_tty)
+ ret = p.wait()
+ except OSError, e:
+ log('%s: %s\n' % (subpath(subcmd), e))
+ ret = 98
+ except KeyboardInterrupt, e:
+ ret = 94
+finally:
+ if n:
+ n.stdin.close()
+ try:
+ n.wait()
+ except:
+ pass
+sys.exit(ret)
+++ /dev/null
-#!/usr/bin/env python
-import sys, re, struct, mmap
-import git, options
-from helpers import *
-
-
-def s_from_bytes(bytes):
- clist = [chr(b) for b in bytes]
- return ''.join(clist)
-
-
-def report(count):
- fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk']
- d = {}
- for line in open('/proc/self/status').readlines():
- l = re.split(r':\s*', line.strip(), 1)
- d[l[0]] = l[1]
- if count >= 0:
- e1 = count
- fields = [d[k] for k in fields]
- else:
- e1 = ''
- print ('%9s ' + ('%10s ' * len(fields))) % tuple([e1] + fields)
-
-
-optspec = """
-memtest [-n elements] [-c cycles]
---
-n,number= number of objects per cycle
-c,cycles= number of cycles to run
-ignore-midx ignore .midx files, use only .idx files
-"""
-o = options.Options(sys.argv[0], optspec)
-(opt, flags, extra) = o.parse(sys.argv[1:])
-
-if extra:
- o.fatal('no arguments expected')
-
-git.ignore_midx = opt.ignore_midx
-
-git.check_repo_or_die()
-m = git.MultiPackIndex(git.repo('objects/pack'))
-
-cycles = opt.cycles or 100
-number = opt.number or 10000
-
-report(-1)
-f = open('/dev/urandom')
-a = mmap.mmap(-1, 20)
-report(0)
-for c in xrange(cycles):
- for n in xrange(number):
- b = f.read(3)
- if 0:
- bytes = list(struct.unpack('!BBB', b)) + [0]*17
- bytes[2] &= 0xf0
- bin = struct.pack('!20s', s_from_bytes(bytes))
- else:
- a[0:2] = b[0:2]
- a[2] = chr(ord(b[2]) & 0xf0)
- bin = str(a[0:20])
- #print bin.encode('hex')
- m.exists(bin)
- report((c+1)*number)
+++ /dev/null
-import textwrap, getopt, re
-from helpers import *
-
-class OptDict:
- def __init__(self):
- self._opts = {}
-
- def __setitem__(self, k, v):
- self._opts[k] = v
-
- def __getitem__(self, k):
- return self._opts[k]
-
- def __getattr__(self, k):
- return self[k]
-
-
-class Options:
- def __init__(self, exe, optspec):
- self.exe = exe
- self.optspec = optspec
- self._aliases = {}
- self._shortopts = 'h?'
- self._longopts = ['help']
- self._hasparms = {}
- self._usagestr = self._gen_usage()
-
- def _gen_usage(self):
- out = []
- lines = self.optspec.strip().split('\n')
- lines.reverse()
- first_syn = True
- while lines:
- l = lines.pop()
- if l == '--': break
- out.append('%s: %s\n' % (first_syn and 'usage' or ' or', l))
- first_syn = False
- out.append('\n')
- while lines:
- l = lines.pop()
- if l.startswith(' '):
- out.append('\n%s\n' % l.lstrip())
- elif l:
- (flags, extra) = l.split(' ', 1)
- extra = extra.strip()
- if flags.endswith('='):
- flags = flags[:-1]
- has_parm = 1
- else:
- has_parm = 0
- flagl = flags.split(',')
- flagl_nice = []
- for f in flagl:
- f_nice = re.sub(r'\W', '_', f)
- self._aliases[f] = flagl[0]
- self._aliases[f_nice] = flagl[0]
- self._hasparms[f] = has_parm
- if len(f) == 1:
- self._shortopts += f + (has_parm and ':' or '')
- flagl_nice.append('-' + f)
- else:
- assert(not f.startswith('no-')) # supported implicitly
- self._longopts.append(f + (has_parm and '=' or ''))
- self._longopts.append('no-' + f)
- flagl_nice.append('--' + f)
- flags_nice = ', '.join(flagl_nice)
- if has_parm:
- flags_nice += ' ...'
- prefix = ' %-20s ' % flags_nice
- argtext = '\n'.join(textwrap.wrap(extra, width=70,
- initial_indent=prefix,
- subsequent_indent=' '*28))
- out.append(argtext + '\n')
- else:
- out.append('\n')
- return ''.join(out).rstrip() + '\n'
-
- def usage(self):
- log(self._usagestr)
- sys.exit(97)
-
- def fatal(self, s):
- log('error: %s\n' % s)
- return self.usage()
-
- def parse(self, args):
- try:
- (flags,extra) = getopt.gnu_getopt(args,
- self._shortopts, self._longopts)
- except getopt.GetoptError, e:
- self.fatal(e)
-
- opt = OptDict()
- for f in self._aliases.values():
- opt[f] = None
- for (k,v) in flags:
- while k.startswith('-'):
- k = k[1:]
- if k in ['h', '?', 'help']:
- self.usage()
- if k.startswith('no-'):
- k = self._aliases[k[3:]]
- opt[k] = None
- else:
- k = self._aliases[k]
- if not self._hasparms[k]:
- assert(v == '')
- opt[k] = (opt._opts.get(k) or 0) + 1
- else:
- try:
- vv = int(v)
- if str(vv) == v:
- v = vv
- except ValueError:
- pass
- opt[k] = v
- for (f1,f2) in self._aliases.items():
- opt[f1] = opt[f2]
- return (opt,flags,extra)
+++ /dev/null
-import re
-
-q = "'"
-qq = '"'
-
-
-class QuoteError(Exception):
- pass
-
-
-def _quotesplit(line):
- inquote = None
- inescape = None
- wordstart = 0
- word = ''
- for i in range(len(line)):
- c = line[i]
- if inescape:
- if inquote == q and c != q:
- word += '\\' # single-q backslashes can only quote single-q
- word += c
- inescape = False
- elif c == '\\':
- inescape = True
- elif c == inquote:
- inquote = None
- # this is un-sh-like, but do it for sanity when autocompleting
- yield (wordstart, word)
- word = ''
- wordstart = i+1
- elif not inquote and not word and (c == q or c == qq):
- # the 'not word' constraint on this is un-sh-like, but do it
- # for sanity when autocompleting
- inquote = c
- wordstart = i
- elif not inquote and c in [' ', '\n', '\r', '\t']:
- if word:
- yield (wordstart, word)
- word = ''
- wordstart = i+1
- else:
- word += c
- if word:
- yield (wordstart, word)
- if inquote or inescape or word:
- raise QuoteError()
-
-
-def quotesplit(line):
- l = []
- try:
- for i in _quotesplit(line):
- l.append(i)
- except QuoteError:
- pass
- return l
-
-
-def unfinished_word(line):
- try:
- for (wordstart,word) in _quotesplit(line):
- pass
- except QuoteError:
- firstchar = line[wordstart]
- if firstchar in [q, qq]:
- return (firstchar, word)
- else:
- return (None, word)
- else:
- return (None, '')
-
-
-def quotify(qtype, word, terminate):
- if qtype == qq:
- return qq + word.replace(qq, '\\"') + (terminate and qq or '')
- elif qtype == q:
- return q + word.replace(q, "\\'") + (terminate and q or '')
- else:
- return re.sub(r'([\"\' \t\n\r])', r'\\\1', word)
-
-
-def what_to_add(qtype, origword, newword, terminate):
- if not newword.startswith(origword):
- return ''
- else:
- qold = quotify(qtype, origword, terminate=False)
- return quotify(qtype, newword, terminate=terminate)[len(qold):]
+import sys
+sys.path[:0] = ['lib']
-import git, time
+import time
+from bup import git
+from bup.helpers import *
from wvtest import *
-from helpers import *
@wvtest
-from helpers import *
+from bup.helpers import *
from wvtest import *
@wvtest
import os
-import index
+from bup import index
+from bup.helpers import *
from wvtest import *
-from helpers import *
@wvtest
def index_basic():
-import options
+from bup import options
from wvtest import *
@wvtest
+from bup import shquote
from wvtest import *
-import shquote
def qst(line):
return [s[1] for s in shquote.quotesplit(line)]
+++ /dev/null
-import os, re, stat, time
-import git
-from helpers import *
-
-EMPTY_SHA='\0'*20
-
-_cp = None
-def cp():
- global _cp
- if not _cp:
- _cp = git.CatPipe()
- return _cp
-
-class NodeError(Exception):
- pass
-class NoSuchFile(NodeError):
- pass
-class NotDir(NodeError):
- pass
-class NotFile(NodeError):
- pass
-class TooManySymlinks(NodeError):
- pass
-
-
-class FileReader:
- def __init__(self, node):
- self.n = node
- self.ofs = 0
- self.size = self.n.size()
-
- def seek(self, ofs):
- if ofs > self.size:
- self.ofs = self.size
- elif ofs < 0:
- self.ofs = 0
- else:
- self.ofs = ofs
-
- def tell(self):
- return self.ofs
-
- def read(self, count = -1):
- if count < 0:
- count = self.size - self.ofs
- buf = self.n.readbytes(self.ofs, count)
- self.ofs += len(buf)
- return buf
-
-
-class Node:
- def __init__(self, parent, name, mode, hash):
- self.parent = parent
- self.name = name
- self.mode = mode
- self.hash = hash
- self._subs = None
-
- def __cmp__(a, b):
- return cmp(a.name or None, b.name or None)
-
- def __iter__(self):
- return iter(self.subs())
-
- def fullname(self):
- if self.parent:
- return os.path.join(self.parent.fullname(), self.name)
- else:
- return self.name
-
- def _mksubs(self):
- self._subs = {}
-
- def subs(self):
- if self._subs == None:
- self._mksubs()
- return sorted(self._subs.values())
-
- def sub(self, name):
- if self._subs == None:
- self._mksubs()
- ret = self._subs.get(name)
- if not ret:
- raise NoSuchFile("no file %r in %r" % (name, self.name))
- return ret
-
- def top(self):
- if self.parent:
- return self.parent.top()
- else:
- return self
-
- def _lresolve(self, parts):
- #log('_lresolve %r in %r\n' % (parts, self.name))
- if not parts:
- return self
- (first, rest) = (parts[0], parts[1:])
- if first == '.':
- return self._lresolve(rest)
- elif first == '..':
- if not self.parent:
- raise NoSuchFile("no parent dir for %r" % self.name)
- return self.parent._lresolve(rest)
- elif rest:
- return self.sub(first)._lresolve(rest)
- else:
- return self.sub(first)
-
- def lresolve(self, path):
- start = self
- if path.startswith('/'):
- start = self.top()
- path = path[1:]
- parts = re.split(r'/+', path or '.')
- if not parts[-1]:
- parts[-1] = '.'
- #log('parts: %r %r\n' % (path, parts))
- return start._lresolve(parts)
-
- def resolve(self, path):
- return self.lresolve(path).lresolve('')
-
- def nlinks(self):
- if self._subs == None:
- self._mksubs()
- return 1
-
- def size(self):
- return 0
-
- def open(self):
- raise NotFile('%s is not a regular file' % self.name)
-
- def readbytes(self, ofs, count):
- raise NotFile('%s is not a regular file' % self.name)
-
- def read(self, num = -1):
- if num < 0:
- num = self.size()
- return self.readbytes(0, num)
-
-
-class File(Node):
- def _content(self):
- return cp().join(self.hash.encode('hex'))
-
- def open(self):
- return FileReader(self)
-
- def size(self):
- # FIXME inefficient
- return sum(len(blob) for blob in self._content())
-
- def readbytes(self, ofs, count):
- # FIXME inefficient
- buf = ''.join(self._content())
- return buf[ofs:ofs+count]
-
-
-_symrefs = 0
-class Symlink(File):
- def __init__(self, parent, name, hash):
- File.__init__(self, parent, name, 0120000, hash)
-
- def readlink(self):
- return self.read(1024)
-
- def dereference(self):
- global _symrefs
- if _symrefs > 100:
- raise TooManySymlinks('too many levels of symlinks: %r'
- % self.fullname())
- _symrefs += 1
- try:
- return self.parent.lresolve(self.readlink())
- finally:
- _symrefs -= 1
-
- def _lresolve(self, parts):
- return self.dereference()._lresolve(parts)
-
-
-class FakeSymlink(Symlink):
- def __init__(self, parent, name, toname):
- Symlink.__init__(self, parent, name, EMPTY_SHA)
- self.toname = toname
-
- def _content(self):
- return self.toname
-
-
-class Dir(Node):
- def _mksubs(self):
- self._subs = {}
- it = cp().get(self.hash.encode('hex'))
- type = it.next()
- if type == 'commit':
- del it
- it = cp().get(self.hash.encode('hex') + ':')
- type = it.next()
- assert(type == 'tree')
- for (mode,name,sha) in git._treeparse(''.join(it)):
- mode = int(mode, 8)
- if stat.S_ISDIR(mode):
- self._subs[name] = Dir(self, name, mode, sha)
- elif stat.S_ISLNK(mode):
- self._subs[name] = Symlink(self, name, sha)
- else:
- self._subs[name] = File(self, name, mode, sha)
-
-
-class CommitList(Node):
- def __init__(self, parent, name, hash):
- Node.__init__(self, parent, name, 040000, hash)
-
- def _mksubs(self):
- self._subs = {}
- revs = list(git.rev_list(self.hash.encode('hex')))
- for (date, commit) in revs:
- l = time.localtime(date)
- ls = time.strftime('%Y-%m-%d-%H%M%S', l)
- commithex = commit.encode('hex')
- self._subs[commithex] = Dir(self, commithex, 040000, commit)
- self._subs[ls] = FakeSymlink(self, ls, commit.encode('hex'))
- latest = max(revs)
- if latest:
- (date, commit) = latest
- self._subs['latest'] = FakeSymlink(self, 'latest',
- commit.encode('hex'))
-
-
-class RefList(Node):
- def __init__(self, parent):
- Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
-
- def _mksubs(self):
- self._subs = {}
- for (name,sha) in git.list_refs():
- if name.startswith('refs/heads/'):
- name = name[11:]
- self._subs[name] = CommitList(self, name, sha)
-
-