X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fsplit-cmd.py;h=2a72bd673fb912d47b89f8713578342c0d5849b2;hb=5570b95f0519dfa7c2ce78413131d4433738cc8f;hp=94fba53bda7793f76ef1cdec467c5fb96059a762;hpb=91eb5521829376b4ee4e5cb872965583336d7416;p=bup.git

diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py
index 94fba53..2a72bd6 100755
--- a/cmd/split-cmd.py
+++ b/cmd/split-cmd.py
@@ -15,6 +15,8 @@ n,name=    name of backup set to update (if any)
 d,date=    date for the commit (seconds since the epoch)
 q,quiet    don't print progress messages
 v,verbose  increase log output (can be used more than once)
+git-ids    read a list of git object ids from stdin and split their contents
+keep-boundaries  don't let one chunk span two input files
 noop       don't actually save the data anywhere
 copy       just copy input to output, hashsplitting along the way
 bench      print benchmark timings to stderr
@@ -34,6 +36,8 @@ if not (opt.blobs or opt.tree or opt.commit or opt.name or
 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or 
                                opt.commit or opt.name):
     o.fatal('-N and --copy are incompatible with -b, -t, -c, -n')
+if extra and opt.git_ids:
+    o.fatal("don't provide filenames when using --git-ids")
 
 if opt.verbose >= 2:
     git.verbose = opt.verbose - 1
@@ -76,13 +80,49 @@ else:
     oldref = refname and git.read_ref(refname) or None
     pack_writer = git.PackWriter()
 
-files = extra and (open(fn) for fn in extra) or [sys.stdin]
+if opt.git_ids:
+    # the input is actually a series of git object ids that we should retrieve
+    # and split.
+    #
+    # This is a bit messy, but basically it converts from a series of
+    # CatPipe.get() iterators into a series of file-type objects.
+    # It would be less ugly if either CatPipe.get() returned a file-like object
+    # (not very efficient), or split_to_shalist() expected an iterator instead
+    # of a file.
+    cp = git.CatPipe()
+    class IterToFile:
+        def __init__(self, it):
+            self.it = iter(it)
+        def read(self, size):
+            v = next(self.it)
+            return v or ''
+    def read_ids():
+        while 1:
+            line = sys.stdin.readline()
+            if not line:
+                break
+            if line:
+                line = line.strip()
+            try:
+                it = cp.get(line.strip())
+                next(it)  # skip the file type
+            except KeyError, e:
+                add_error('error: %s' % e)
+                continue
+            yield IterToFile(it)
+    files = read_ids()
+else:
+    # the input either comes from a series of files or from stdin.
+    files = extra and (open(fn) for fn in extra) or [sys.stdin]
+
 if pack_writer:
-    shalist = hashsplit.split_to_shalist(pack_writer, files)
+    shalist = hashsplit.split_to_shalist(pack_writer, files,
+                                         keep_boundaries=opt.keep_boundaries)
     tree = pack_writer.new_tree(shalist)
 else:
     last = 0
-    for (blob, bits) in hashsplit.hashsplit_iter(files):
+    for (blob, bits) in hashsplit.hashsplit_iter(files,
+                                    keep_boundaries=opt.keep_boundaries):
         hashsplit.total_split += len(blob)
         if opt.copy:
             sys.stdout.write(str(blob))
@@ -123,3 +163,7 @@ size = hashsplit.total_split
 if opt.bench:
     log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
         % (size/1024., secs, size/1024./secs))
+
+if saved_errors:
+    log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+    sys.exit(1)