optspec = """
-bup split [-tcb] [-n name] [--bench] [filenames...]
+bup split <-t|-c|-b|-n name|--copy|--noop> [--bench] [filenames...]
--
-r,remote= remote repository path
+ Modes:
b,blobs output a series of blob ids
t,tree output a tree id
c,commit output a commit id
-n,name= name of backup set to update (if any)
+n,name= save the result under the given name
+noop split the input, but throw away the result
+copy split the input, copy it to stdout, don't save to repo
+ Options:
+r,remote= remote repository path
d,date= date for the commit (seconds since the epoch)
q,quiet don't print progress messages
v,verbose increase log output (can be used more than once)
git-ids read a list of git object ids from stdin and split their contents
keep-boundaries don't let one chunk span two input files
-noop don't actually save the data anywhere
-copy just copy input to output, hashsplitting along the way
bench print benchmark timings to stderr
max-pack-size= maximum bytes in a single pack
max-pack-objects= maximum number of objects in a single pack
fanout= maximum number of blobs in a single tree
bwlimit= maximum bytes/sec to transmit to server
+#,compress= set compression level to # (0-9, 9 is highest) [1]
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])
if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
opt.commit or opt.name):
o.fatal('-N and --copy are incompatible with -b, -t, -c, -n')
+if opt.blobs and (opt.tree or opt.commit or opt.name):
+ o.fatal('-b is incompatible with -t, -c, -n')
if extra and opt.git_ids:
o.fatal("don't provide filenames when using --git-ids")
git.verbose = opt.verbose - 1
opt.bench = 1
if opt.max_pack_size:
- hashsplit.max_pack_size = parse_num(opt.max_pack_size)
+ git.max_pack_size = parse_num(opt.max_pack_size)
if opt.max_pack_objects:
- hashsplit.max_pack_objects = parse_num(opt.max_pack_objects)
+ git.max_pack_objects = parse_num(opt.max_pack_objects)
if opt.fanout:
hashsplit.fanout = parse_num(opt.fanout)
if opt.blobs:
else:
date = time.time()
-
-last_prog = total_bytes = 0
+total_bytes = 0
def prog(filenum, nbytes):
- global last_prog, total_bytes
+ global total_bytes
total_bytes += nbytes
- now = time.time()
- if now - last_prog < 0.2:
- return
if filenum > 0:
- progress('Splitting: file #%d, %d kbytes\r'
- % (filenum+1, total_bytes/1024))
+ qprogress('Splitting: file #%d, %d kbytes\r'
+ % (filenum+1, total_bytes/1024))
else:
- progress('Splitting: %d kbytes\r' % (total_bytes/1024))
- last_prog = now
+ qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
is_reverse = os.environ.get('BUP_SERVER_REVERSE')
if opt.noop or opt.copy:
cli = pack_writer = oldref = None
elif opt.remote or is_reverse:
- cli = client.Client(opt.remote)
+ cli = client.Client(opt.remote, compression_level=opt.compress)
oldref = refname and cli.read_ref(refname) or None
pack_writer = cli.new_packwriter()
else:
cli = None
oldref = refname and git.read_ref(refname) or None
- pack_writer = git.PackWriter()
+ pack_writer = git.PackWriter(compression_level=opt.compress)
if opt.git_ids:
# the input is actually a series of git object ids that we should retrieve
# the input either comes from a series of files or from stdin.
files = extra and (open(fn) for fn in extra) or [sys.stdin]
-if pack_writer:
- shalist = hashsplit.split_to_shalist(pack_writer, files,
+if pack_writer and opt.blobs:
+ shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
+ keep_boundaries=opt.keep_boundaries,
+ progress=prog)
+ for (sha, size, level) in shalist:
+ print sha.encode('hex')
+ reprogress()
+elif pack_writer: # tree or commit or name
+ shalist = hashsplit.split_to_shalist(pack_writer.new_blob,
+ pack_writer.new_tree,
+ files,
keep_boundaries=opt.keep_boundaries,
progress=prog)
tree = pack_writer.new_tree(shalist)
else:
last = 0
- for (blob, bits) in hashsplit.hashsplit_iter(files,
- keep_boundaries=opt.keep_boundaries,
- progress=prog):
+ it = hashsplit.hashsplit_iter(files,
+ keep_boundaries=opt.keep_boundaries,
+ progress=prog)
+ for (blob, level) in it:
hashsplit.total_split += len(blob)
if opt.copy:
sys.stdout.write(str(blob))
megs = hashsplit.total_split/1024/1024
if not opt.quiet and last != megs:
- progress('%d Mbytes read\r' % megs)
last = megs
- progress('%d Mbytes read, done.\n' % megs)
if opt.verbose:
log('\n')
-if opt.blobs:
- for (mode,name,bin) in shalist:
- print bin.encode('hex')
if opt.tree:
print tree.encode('hex')
if opt.commit or opt.name:
secs = time.time() - start_time
size = hashsplit.total_split
if opt.bench:
- log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
+ log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
% (size/1024., secs, size/1024./secs))
if saved_errors: