bup split [-r *host*:*path*] <-b|-t|-c|-n *name*> [-v] [-q]
[--bench] [--max-pack-size=*bytes*]
[--max-pack-objects=*n*] [--fanout=*count]
- [--keep-boundaries] [filenames...]
+ [--git-ids] [--keep-boundaries] [filenames...]
# DESCRIPTION
-v, --verbose
: increase verbosity (can be used more than once).
+--git-ids
+: stdin is a list of git object ids instead of raw data.
+ `bup split` will read the contents of each named git
+ object (if it exists in the bup repository) and split
+ it. This might be useful for converting a git
+ repository with large binary files to use bup-style
+ hashsplitting instead. This option is probably most
+ useful when combined with `--keep-boundaries`.
+
--keep-boundaries
: if multiple filenames are given on the command line,
they are normally concatenated together as if the
d,date= date for the commit (seconds since the epoch)
q,quiet don't print progress messages
v,verbose increase log output (can be used more than once)
+git-ids read a list of git object ids from stdin and split their contents
keep-boundaries don't let one chunk span two input files
noop don't actually save the data anywhere
copy just copy input to output, hashsplitting along the way
if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
opt.commit or opt.name):
o.fatal('-N and --copy are incompatible with -b, -t, -c, -n')
+if extra and opt.git_ids:
+ o.fatal("don't provide filenames when using --git-ids")
if opt.verbose >= 2:
git.verbose = opt.verbose - 1
oldref = refname and git.read_ref(refname) or None
pack_writer = git.PackWriter()
-files = extra and (open(fn) for fn in extra) or [sys.stdin]
+if opt.git_ids:
+ # the input is actually a series of git object ids that we should retrieve
+ # and split.
+ #
+ # This is a bit messy, but basically it converts from a series of
+ # CatPipe.get() iterators into a series of file-type objects.
+ # It would be less ugly if either CatPipe.get() returned a file-like object
+ # (not very efficient), or split_to_shalist() expected an iterator instead
+ # of a file.
+ cp = git.CatPipe()
+ class IterToFile:
+ def __init__(self, it):
+ self.it = iter(it)
+ def read(self, size):
+ v = next(self.it)
+ return v or ''
+ def read_ids():
+ while 1:
+ line = sys.stdin.readline()
+ if not line:
+ break
+ if line:
+ line = line.strip()
+ try:
+ it = cp.get(line.strip())
+ next(it) # skip the file type
+ except KeyError, e:
+ add_error('error: %s' % e)
+ continue
+ yield IterToFile(it)
+ files = read_ids()
+else:
+ # the input either comes from a series of files or from stdin.
+ files = extra and (open(fn) for fn in extra) or [sys.stdin]
+
if pack_writer:
shalist = hashsplit.split_to_shalist(pack_writer, files,
keep_boundaries=opt.keep_boundaries)
if opt.bench:
log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
% (size/1024., secs, size/1024./secs))
+
+if saved_errors:
+ log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+ sys.exit(1)
assert(not self.inprogress)
assert(id.find('\n') < 0)
assert(id.find('\r') < 0)
- assert(id[0] != '-')
+ assert(not id.startswith('-'))
self.inprogress = id
self.p.stdin.write('%s\n' % id)
hdr = self.p.stdout.readline()
if hdr.endswith(' missing\n'):
+ self.inprogress = None
raise KeyError('blob %r is missing' % id)
spl = hdr.split(' ')
if len(spl) != 3 or len(spl[0]) != 40:
def fadvise_done(f, ofs):
assert(ofs >= 0)
- if ofs > 0:
+ if ofs > 0 and hasattr(f, 'fileno'):
_helpers.fadvise_done(f.fileno(), ofs)
WVSTART "split"
echo a >a.tmp
echo b >b.tmp
-WVPASSEQ $(bup split -b a.tmp b.tmp | wc -l) 1
-WVPASSEQ $(bup split -b --keep-boundaries a.tmp b.tmp | wc -l) 2
+WVPASS bup split -b a.tmp >taga.tmp
+WVPASS bup split -b b.tmp >tagb.tmp
+cat a.tmp b.tmp | WVPASS bup split -b >tagab.tmp
+WVPASSEQ "$(cat taga.tmp | wc -l)" 1
+WVPASSEQ "$(cat tagb.tmp | wc -l)" 1
+WVPASSEQ "$(cat tagab.tmp | wc -l)" 1
+WVPASSEQ "$(cat tag[ab].tmp | wc -l)" 2
+WVPASSEQ "$(bup split -b a.tmp b.tmp)" "$(cat tagab.tmp)"
+WVPASSEQ "$(bup split -b --keep-boundaries a.tmp b.tmp)" "$(cat tag[ab].tmp)"
+WVPASSEQ "$(cat tag[ab].tmp | bup split -b --keep-boundaries --git-ids)" \
+ "$(cat tag[ab].tmp)"
+WVPASSEQ "$(cat tag[ab].tmp | bup split -b --git-ids)" \
+ "$(cat tagab.tmp)"
WVPASS bup split --bench -b <t/testfile1 >tags1.tmp
WVPASS bup split -vvvv -b t/testfile2 >tags2.tmp
WVPASS bup margin