]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/hashsplit.py
cmd/split: add a new --keep-boundaries option.
[bup.git] / lib / bup / hashsplit.py
index 28e90d28a676f24aa121e070f3e60fe74d3f27e2..b9896a096ff84e8813842011d8cd170913cb20a3 100644 (file)
@@ -1,5 +1,5 @@
-import sys, math
-from bup import _faster
+import math
+from bup import _helpers
 from bup.helpers import *
 
 BLOB_LWM = 8192*2
@@ -38,7 +38,7 @@ class Buf:
 
 def splitbuf(buf):
     b = buf.peek(buf.used())
-    (ofs, bits) = _faster.splitbuf(b)
+    (ofs, bits) = _helpers.splitbuf(b)
     if ofs:
         buf.eat(ofs)
         return (buffer(b, 0, ofs), bits)
@@ -72,7 +72,7 @@ def drainbuf(buf, finalize):
         yield (buf.get(buf.used()), 0)
 
 
-def hashsplit_iter(files):
+def _hashsplit_iter(files):
     assert(BLOB_HWM > BLOB_MAX)
     buf = Buf()
     fi = blobiter(files)
@@ -89,10 +89,23 @@ def hashsplit_iter(files):
             buf.put(bnew)
 
 
+def _hashsplit_iter_keep_boundaries(files):
+    for f in files:
+        for i in _hashsplit_iter([f]):
+            yield i
+
+
+def hashsplit_iter(files, keep_boundaries):
+    if keep_boundaries:
+        return _hashsplit_iter_keep_boundaries(files)
+    else:
+        return _hashsplit_iter(files)
+
+
 total_split = 0
-def _split_to_blobs(w, files):
+def _split_to_blobs(w, files, keep_boundaries):
     global total_split
-    for (blob, bits) in hashsplit_iter(files):
+    for (blob, bits) in hashsplit_iter(files, keep_boundaries):
         sha = w.new_blob(blob)
         total_split += len(blob)
         if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
@@ -127,15 +140,15 @@ def _squish(w, stacks, n):
         i += 1
 
 
-def split_to_shalist(w, files):
-    sl = _split_to_blobs(w, files)
+def split_to_shalist(w, files, keep_boundaries):
+    sl = _split_to_blobs(w, files, keep_boundaries)
     if not fanout:
         shal = []
         for (sha,size,bits) in sl:
             shal.append(('100644', sha, size))
         return _make_shalist(shal)[0]
     else:
-        base_bits = _faster.blobbits()
+        base_bits = _helpers.blobbits()
         fanout_bits = int(math.log(fanout, 2))
         def bits_to_idx(n):
             assert(n >= base_bits)
@@ -152,8 +165,8 @@ def split_to_shalist(w, files):
         return _make_shalist(stacks[-1])[0]
 
 
-def split_to_blob_or_tree(w, files):
-    shalist = list(split_to_shalist(w, files))
+def split_to_blob_or_tree(w, files, keep_boundaries):
+    shalist = list(split_to_shalist(w, files, keep_boundaries))
     if len(shalist) == 1:
         return (shalist[0][0], shalist[0][2])
     elif len(shalist) == 0:
@@ -163,7 +176,7 @@ def split_to_blob_or_tree(w, files):
 
 
 def open_noatime(name):
-    fd = _faster.open_noatime(name)
+    fd = _helpers.open_noatime(name)
     try:
         return os.fdopen(fd, 'rb', 1024*1024)
     except:
@@ -177,4 +190,4 @@ def open_noatime(name):
 def fadvise_done(f, ofs):
     assert(ofs >= 0)
     if ofs > 0:
-        _faster.fadvise_done(f.fileno(), ofs)
+        _helpers.fadvise_done(f.fileno(), ofs)