#!/usr/bin/env python
-import sys, os, subprocess, errno, zlib, time, getopt
-import hashsplit
-import git
+import sys, time, struct
+import hashsplit, git, options, client
from helpers import *
-
-BLOB_LWM = 8192*2
-BLOB_MAX = BLOB_LWM*2
-BLOB_HWM = 1024*1024
-
-
-class Buf:
- def __init__(self):
- self.data = ''
- self.start = 0
-
- def put(self, s):
- #log('oldsize=%d+%d adding=%d\n' % (len(self.data), self.start, len(s)))
- if s:
- self.data = buffer(self.data, self.start) + s
- self.start = 0
-
- def peek(self, count):
- return buffer(self.data, self.start, count)
-
- def eat(self, count):
- self.start += count
-
- def get(self, count):
- v = buffer(self.data, self.start, count)
- self.start += count
- return v
-
- def used(self):
- return len(self.data) - self.start
-
-
-def splitbuf(buf):
- b = buf.peek(buf.used())
- ofs = hashsplit.splitbuf(b)
- if ofs:
- buf.eat(ofs)
- return buffer(b, 0, ofs)
- return None
-
-
-def hashsplit_iter(f):
- ofs = 0
- buf = Buf()
- blob = 1
-
- eof = 0
- lv = 0
- while blob or not eof:
- if not eof and (buf.used() < BLOB_LWM or not blob):
- bnew = sys.stdin.read(BLOB_HWM)
- if not len(bnew): eof = 1
- #log('got %d, total %d\n' % (len(bnew), buf.used()))
- buf.put(bnew)
-
- blob = splitbuf(buf)
- if eof and not blob:
- blob = buf.get(buf.used())
- if not blob and buf.used() >= BLOB_MAX:
- blob = buf.get(BLOB_MAX) # limit max blob size
- if not blob and not eof:
- continue
-
- if blob:
- yield (ofs, len(blob), git.hash_blob(blob))
- ofs += len(blob)
-
- nv = (ofs + buf.used())/1000000
- if nv != lv:
- log('%d\t' % nv)
- lv = nv
-
-
-def usage():
- log('Usage: bup split [-t] <filename\n')
- exit(97)
-
-gen_tree = False
-
-def argparse(usage, argv, shortopts, allow_extra):
- try:
- (flags,extra) = getopt.getopt(argv[1:], shortopts)
- except getopt.GetoptError, e:
- log('%s: %s\n' % (argv[0], e))
- usage()
- if extra and not allow_extra:
- log('%s: invalid argument "%s"\n' % (argv[0], extra[0]))
- usage()
- return flags
-
-
-flags = argparse(usage, sys.argv, 't', False)
-for (flag,parm) in flags:
- if flag == '-t':
- gen_tree = True
-
+from subprocess import PIPE
+
+
+optspec = """
+bup split [-tcb] [-n name] [--bench] [filenames...]
+--
+r,remote= remote repository path
+b,blobs output a series of blob ids
+t,tree output a tree id
+c,commit output a commit id
+n,name= name of backup set to update (if any)
+v,verbose increase log output (can be used more than once)
+bench print benchmark timings to stderr
+max-pack-size= maximum bytes in a single pack
+max-pack-objects= maximum number of objects in a single pack
+fanout= maximum number of blobs in a single tree
+"""
+o = options.Options('bup split', optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+git.check_repo_or_die()
+if not (opt.blobs or opt.tree or opt.commit or opt.name):
+ log("bup split: use one or more of -b, -t, -c, -n\n")
+ o.usage()
+
+hashsplit.split_verbosely = opt.verbose
+if opt.verbose >= 2:
+ git.verbose = opt.verbose - 1
+ opt.bench = 1
+if opt.max_pack_size:
+ hashsplit.max_pack_size = int(opt.max_pack_size)
+if opt.max_pack_objects:
+ hashsplit.max_pack_objects = int(opt.max_pack_objects)
+if opt.fanout:
+ hashsplit.fanout = int(opt.fanout)
+if opt.blobs:
+ hashsplit.fanout = 0
start_time = time.time()
-shalist = []
-for (ofs, size, sha) in hashsplit_iter(sys.stdin):
- #log('SPLIT @ %-8d size=%-8d\n' % (ofs, size))
- if not gen_tree:
- print sha
- shalist.append(('100644', '%016x.bupchunk' % ofs, sha))
-if gen_tree:
- print git.gen_tree(shalist)
+refname = opt.name and 'refs/heads/%s' % opt.name or None
+if opt.remote:
+ cli = client.Client(opt.remote)
+ oldref = refname and cli.read_ref(refname) or None
+ w = cli.new_packwriter()
+else:
+ cli = None
+ oldref = refname and git.read_ref(refname) or None
+ w = git.PackWriter()
+
+shalist = hashsplit.split_to_shalist(w, hashsplit.autofiles(extra))
+tree = w.new_tree(shalist)
+
+if opt.verbose:
+ log('\n')
+if opt.blobs:
+ for (mode,name,bin) in shalist:
+ print bin.encode('hex')
+if opt.tree:
+ print tree.encode('hex')
+if opt.commit or opt.name:
+ msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv
+ ref = opt.name and ('refs/heads/%s' % opt.name) or None
+ commit = w.new_commit(oldref, tree, msg)
+ if opt.commit:
+ print commit.encode('hex')
+
+w.close() # must close before we can update the ref
+
+if opt.name:
+ if cli:
+ cli.update_ref(refname, commit, oldref)
+ else:
+ git.update_ref(refname, commit, oldref)
+
+if cli:
+ cli.close()
secs = time.time() - start_time
-log('\n%.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
- % (ofs/1024., secs, ofs/1024./secs))
+size = hashsplit.total_split
+if opt.bench:
+ log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
+ % (size/1024., secs, size/1024./secs))