bup split [-r *host*:*path*] <-b|-t|-c|-n *name*> [-v] [-q]
[--bench] [--max-pack-size=*bytes*]
- [--max-pack-objects=*n*] [--fanout=*count] [filenames...]
+ [--max-pack-objects=*n*] [--fanout=*count]
+ [--keep-boundaries] [filenames...]
# DESCRIPTION
-v, --verbose
: increase verbosity (can be used more than once).
+--keep-boundaries
+: if multiple filenames are given on the command line,
+ they are normally concatenated together as if the
+ content all came from a single file. That is, the
+ set of blobs/trees produced is identical to what it
+ would have been if there had been a single input file.
+ However, if you use `--keep-boundaries`, each file is
+ split separately. You still only get a single tree or
+ commit or series of blobs, but each blob comes from
+ only one of the files; the end of one of the input
+ files always ends a blob.
+
--noop
: read the data and split it into blocks based on the "bupsplit"
rolling checksum algorithm, but don't do anything with
add_error(e)
lastskip_name = ent.name
else:
- (mode, id) = hashsplit.split_to_blob_or_tree(w, [f])
+ (mode, id) = hashsplit.split_to_blob_or_tree(w, [f], False)
else:
if stat.S_ISDIR(ent.mode):
assert(0) # handled above
d,date= date for the commit (seconds since the epoch)
q,quiet don't print progress messages
v,verbose increase log output (can be used more than once)
+keep-boundaries don't let one chunk span two input files
noop don't actually save the data anywhere
copy just copy input to output, hashsplitting along the way
bench print benchmark timings to stderr
files = extra and (open(fn) for fn in extra) or [sys.stdin]
if pack_writer:
- shalist = hashsplit.split_to_shalist(pack_writer, files)
+ shalist = hashsplit.split_to_shalist(pack_writer, files,
+ keep_boundaries=opt.keep_boundaries)
tree = pack_writer.new_tree(shalist)
else:
last = 0
- for (blob, bits) in hashsplit.hashsplit_iter(files):
+ for (blob, bits) in hashsplit.hashsplit_iter(files,
+ keep_boundaries=opt.keep_boundaries):
hashsplit.total_split += len(blob)
if opt.copy:
sys.stdout.write(str(blob))
yield (buf.get(buf.used()), 0)
-def hashsplit_iter(files):
+def _hashsplit_iter(files):
assert(BLOB_HWM > BLOB_MAX)
buf = Buf()
fi = blobiter(files)
buf.put(bnew)
+def _hashsplit_iter_keep_boundaries(files):
+ for f in files:
+ for i in _hashsplit_iter([f]):
+ yield i
+
+
+def hashsplit_iter(files, keep_boundaries):
+ if keep_boundaries:
+ return _hashsplit_iter_keep_boundaries(files)
+ else:
+ return _hashsplit_iter(files)
+
+
total_split = 0
-def _split_to_blobs(w, files):
+def _split_to_blobs(w, files, keep_boundaries):
global total_split
- for (blob, bits) in hashsplit_iter(files):
+ for (blob, bits) in hashsplit_iter(files, keep_boundaries):
sha = w.new_blob(blob)
total_split += len(blob)
if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
i += 1
-def split_to_shalist(w, files):
- sl = _split_to_blobs(w, files)
+def split_to_shalist(w, files, keep_boundaries):
+ sl = _split_to_blobs(w, files, keep_boundaries)
if not fanout:
shal = []
for (sha,size,bits) in sl:
return _make_shalist(stacks[-1])[0]
-def split_to_blob_or_tree(w, files):
- shalist = list(split_to_shalist(w, files))
+def split_to_blob_or_tree(w, files, keep_boundaries):
+ shalist = list(split_to_shalist(w, files, keep_boundaries))
if len(shalist) == 1:
return (shalist[0][0], shalist[0][2])
elif len(shalist) == 0:
WVFAIL bup save -r :$BUP_DIR -n r-test $D/fake/path
WVSTART "split"
+echo a >a.tmp
+echo b >b.tmp
+WVPASSEQ $(bup split -b a.tmp b.tmp | wc -l) 1
+WVPASSEQ $(bup split -b --keep-boundaries a.tmp b.tmp | wc -l) 2
WVPASS bup split --bench -b <t/testfile1 >tags1.tmp
WVPASS bup split -vvvv -b t/testfile2 >tags2.tmp
WVPASS bup margin