From 8fa671984c14627182f89020758a0f46761fe11f Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Sun, 3 Jan 2010 05:00:38 -0500 Subject: [PATCH 1/1] Extremely basic 'bup server' support. It's enough to send a pack to the remote end with 'bup split', though 'bup save' doesn't support it yet, and we're not smart enough to do incremental backups, which means we generate the gigantic pack every single time. --- .gitignore | 5 +--- Makefile | 7 +++--- bup.py | 3 +-- cmd-save.py | 1 + cmd-server.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++ cmd-split.py | 30 ++++++++++++++++++++++-- git.py | 64 +++++++++++++++++++++++++++++++++++++-------------- helpers.py | 8 +++++++ 8 files changed, 150 insertions(+), 28 deletions(-) create mode 100755 cmd-server.py diff --git a/.gitignore b/.gitignore index 092f3a0..0645681 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,5 @@ bup -bup-split -bup-join -bup-save -bup-init +bup-* randomgen *.o *.so diff --git a/Makefile b/Makefile index fe91974..e19cd3b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ CFLAGS=-Wall -g -O2 -Werror -I/usr/include/python2.5 -g -fPIC default: all -all: bup-split bup-join bup-save bup-init bup randomgen chashsplit.so +all: bup-split bup-join bup-save bup-init bup-server bup randomgen chashsplit.so randomgen: randomgen.o @@ -38,5 +38,6 @@ bup-%: cmd-%.sh clean: rm -f *.o *.so *~ .*~ *.pyc */*.pyc */*~ \ - bup bup-split bup-join bup-save bup-init randomgen \ - out[12] out2[tc] tags[12] tags2[tc] *.tmp + bup bup-* randomgen \ + out[12] out2[tc] tags[12] tags2[tc] + rm -rf *.tmp diff --git a/bup.py b/bup.py index d15cc48..d5b5c76 100755 --- a/bup.py +++ b/bup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -import sys, os +import sys, os, git argv = sys.argv exe = argv[0] @@ -16,7 +16,6 @@ def usage(): log('\t%s\n' % c[4:]) exit(99) - if len(argv) < 2 or not argv[1] or argv[1][0] == '-': usage() diff --git a/cmd-save.py b/cmd-save.py index 2e571d1..14cacf1 100755 --- a/cmd-save.py +++ b/cmd-save.py @@ -105,6 +105,7 @@ v,verbose increase log output (can be used more than once) o = options.Options('bup save', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) +git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): log("bup save: use one or more of -t, -c, -n\n") o.usage() diff --git a/cmd-server.py b/cmd-server.py new file mode 100755 index 0000000..35f98a3 --- /dev/null +++ b/cmd-server.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +import sys, struct +import options, git +from helpers import * + + +def receive_objects(f): + w = git.PackWriter() + while 1: + ns = f.read(4) + if not ns: + w.abort() + raise Exception('object read: expected length header, got EOF\n') + n = struct.unpack('!I', ns)[0] + #log('expecting %d bytes\n' % n) + if not n: + w.close() + return + buf = f.read(n) + #log('read %d bytes\n' % n) + if len(buf) < n: + w.abort() + raise Exception('object read: expected %d bytes, got %d\n' + % (n, len(buf))) + w._raw_write(buf) + w.close() + + +optspec = """ +bup server +""" +o = options.Options('bup server', optspec) +(opt, flags, extra) = o.parse(sys.argv[1:]) + +if extra: + log('bup server: no arguments expected\n') + o.usage() + +log('bup server: reading from stdin.\n') + +f = sys.stdin +lr = linereader(f) +for _line in lr: + line = _line.strip() + if not line: + continue + log('bup server: command: %r\n' % line) + if line == 'quit': + break + elif line == 'set-dir': + git.repodir = lr.next() + git.check_repo_or_die() + log('bup server: bupdir is %r\n' % git.repodir) + elif line == 'receive-objects': + git.check_repo_or_die() + receive_objects(f) + else: + raise Exception('unknown server command: %r\n' % line) + +log('bup server: done\n') diff --git a/cmd-split.py b/cmd-split.py index 9d3d485..b013e6d 100755 --- a/cmd-split.py +++ b/cmd-split.py @@ -1,11 +1,12 @@ #!/usr/bin/env python -import sys, time +import sys, time, re import hashsplit, git, options from helpers import * optspec = """ bup split [-tcb] [-n name] [--bench] [filenames...] -- +r,remote= remote repository path b,blobs output a series of blob ids t,tree output a tree id c,commit output a commit id @@ -16,6 +17,7 @@ bench print benchmark timings to stderr o = options.Options('bup split', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) +git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name): log("bup split: use one or more of -b, -t, -c, -n\n") o.usage() @@ -27,7 +29,26 @@ if opt.verbose >= 2: start_time = time.time() -w = git.PackWriter() +def server_connect(remote): + rs = remote.split(':', 1) + if len(rs) == 1: + p = subprocess.Popen(['bup', 'server', '-d', opt.remote], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + else: + (host, dir) = rs + p = subprocess.Popen(['ssh', host, '--', 'bup', 'server'], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + dir = re.sub(r'[\r\n]', ' ', dir) + p.stdin.write('set-dir\n%s\n' % dir) + return p + +if opt.remote: + p = server_connect(opt.remote) + p.stdin.write('receive-objects\n') + w = git.PackWriter_Remote(p.stdin) +else: + w = git.PackWriter() + (shalist,tree) = hashsplit.split_to_tree(w, hashsplit.autofiles(extra)) if opt.verbose: @@ -44,6 +65,11 @@ if opt.commit or opt.name: if opt.commit: print commit.encode('hex') +if opt.remote: + w.close() + p.stdin.write('quit\n') + p.wait() + secs = time.time() - start_time size = hashsplit.total_split if opt.bench: diff --git a/git.py b/git.py index 7f09dcc..a5fcbe2 100644 --- a/git.py +++ b/git.py @@ -2,9 +2,10 @@ import os, errno, zlib, time, sha, subprocess, struct, mmap, stat from helpers import * verbose = 0 +repodir = os.environ.get('BUP_DIR', '.git') -def repodir(sub = ''): - return os.path.join(os.environ.get('BUP_DIR', '.git'), sub) +def repo(sub = ''): + return os.path.join(repodir, sub) class PackIndex: @@ -102,12 +103,11 @@ def _shalist_sort_key(ent): _typemap = dict(blob=3, tree=2, commit=1, tag=8) class PackWriter: - def __init__(self): + def __init__(self, objcache=None): self.count = 0 - self.binlist = [] - self.objcache = MultiPackIndex(repodir('objects/pack')) self.filename = None self.file = None + self.objcache = objcache or MultiPackIndex(repo('objects/pack')) def __del__(self): self.close() @@ -115,35 +115,40 @@ class PackWriter: def _open(self): assert(not self.file) self.objcache.zap_also() - self.filename = repodir('objects/bup%d' % os.getpid()) + self.filename = repo('objects/bup%d' % os.getpid()) self.file = open(self.filename + '.pack', 'w+') self.file.write('PACK\0\0\0\2\0\0\0\0') - def _write(self, bin, type, content): + def _raw_write(self, datalist): if not self.file: self._open() f = self.file + for d in datalist: + f.write(d) + self.count += 1 + def _write(self, bin, type, content): if verbose: log('>') - + + out = [] + sz = len(content) szbits = (sz & 0x0f) | (_typemap[type]<<4) sz >>= 4 while 1: if sz: szbits |= 0x80 - f.write(chr(szbits)) + out.append(chr(szbits)) if not sz: break szbits = sz & 0x7f sz >>= 7 - + z = zlib.compressobj(1) - f.write(z.compress(content)) - f.write(z.flush()) + out.append(z.compress(content)) + out.append(z.flush()) - self.count += 1 - self.binlist.append(bin) + self._raw_write(out) return bin def write(self, type, content): @@ -223,18 +228,37 @@ class PackWriter: out = p.stdout.read().strip() if p.wait() or not out: raise Exception('git index-pack returned an error') - nameprefix = repodir('objects/pack/%s' % out) + nameprefix = repo('objects/pack/%s' % out) os.rename(self.filename + '.pack', nameprefix + '.pack') os.rename(self.filename + '.idx', nameprefix + '.idx') return nameprefix +class PackWriter_Remote(PackWriter): + def __init__(self, file, objcache=None): + PackWriter.__init__(self, objcache) + self.file = file + self.filename = 'remote socket' + + def close(self): + if self.file: + self.file.write('\0\0\0\0') + self.file.flush() + self.file = None + + def _raw_write(self, datalist): + assert(self.file) + data = ''.join(datalist) + assert(len(data)) + self.file.write(struct.pack('!I', len(data)) + data) + + def _git_date(date): return time.strftime('%s %z', time.localtime(date)) def _gitenv(): - os.environ['GIT_DIR'] = os.path.abspath(repodir()) + os.environ['GIT_DIR'] = os.path.abspath(repo()) def _read_ref(refname): @@ -259,9 +283,15 @@ def _update_ref(refname, newval, oldval): def init_repo(): - d = repodir() + d = repo() if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')): raise Exception('"%d" exists but is not a directory\n' % d) p = subprocess.Popen(['git', 'init', '--bare'], preexec_fn = _gitenv) return p.wait() + + +def check_repo_or_die(): + if not os.path.isdir(repo('objects/pack/.')): + log('error: %r is not a git repository\n' % repo()) + exit(15) diff --git a/helpers.py b/helpers.py index 7e67b7a..1f89ce5 100644 --- a/helpers.py +++ b/helpers.py @@ -45,3 +45,11 @@ def hostname(): except OSError: pass return _hostname or 'localhost' + + +def linereader(f): + while 1: + line = f.readline() + if not line: + break + yield line[:-1] -- 2.39.2