]> arthur.barton.de Git - bup.git/commitdiff
hashsplit.py: convert from 'bits' to 'level' earlier in the sequence.
authorAvery Pennarun <apenwarr@gmail.com>
Thu, 17 Feb 2011 10:30:47 +0000 (02:30 -0800)
committerAvery Pennarun <apenwarr@gmail.com>
Sun, 20 Feb 2011 05:38:28 +0000 (21:38 -0800)
The hierarchy level is a more directly useful measurement than the bit count,
although right now neither is used very heavily.

Signed-off-by: Avery Pennarun <apenwarr@gmail.com>
cmd/split-cmd.py
lib/bup/_helpers.c
lib/bup/hashsplit.py

index 756d1b566a3937c7db7e3b62d48b98529b18579a..e016d1e8c897c266c0b6b8bc692585a4ea503f70 100755 (executable)
@@ -130,9 +130,10 @@ if pack_writer:
     tree = pack_writer.new_tree(shalist)
 else:
     last = 0
-    for (blob, bits) in hashsplit.hashsplit_iter(files,
-                                    keep_boundaries=opt.keep_boundaries,
-                                    progress=prog):
+    it = hashsplit.hashsplit_iter(files,
+                                  keep_boundaries=opt.keep_boundaries,
+                                  progress=prog)
+    for (blob, level) in it:
         hashsplit.total_split += len(blob)
         if opt.copy:
             sys.stdout.write(str(blob))
index 4d12ddfe9a7ea8d7cbf6d1d34f2fd795fbd62799..d077cd9710c08bd007f1a8424cd4ea2f326333ff 100644 (file)
@@ -87,6 +87,7 @@ static PyObject *splitbuf(PyObject *self, PyObject *args)
     if (!PyArg_ParseTuple(args, "t#", &buf, &len))
        return NULL;
     out = bupsplit_find_ofs(buf, len, &bits);
+    if (out) assert(bits >= BUP_BLOBBITS);
     return Py_BuildValue("ii", out, bits);
 }
 
index 6134b6111eb9a37b17c4cc8551d8ca814a5ec3d0..f9d5a4dfe44a96ce6f9e61a41e4034b16985a0a5 100644 (file)
@@ -54,7 +54,7 @@ def readfile_iter(files, progress=None):
             yield b
 
 
-def _splitbuf(buf):
+def _splitbuf(buf, basebits, fanbits):
     while 1:
         b = buf.peek(buf.used())
         (ofs, bits) = _helpers.splitbuf(b)
@@ -62,7 +62,8 @@ def _splitbuf(buf):
             ofs = BLOB_MAX
         if ofs:
             buf.eat(ofs)
-            yield buffer(b, 0, ofs), bits
+            level = (bits-basebits)//fanbits  # integer division
+            yield buffer(b, 0, ofs), level
         else:
             break
     while buf.used() >= BLOB_MAX:
@@ -72,11 +73,13 @@ def _splitbuf(buf):
 
 def _hashsplit_iter(files, progress):
     assert(BLOB_READ_SIZE > BLOB_MAX)
+    basebits = _helpers.blobbits()
+    fanbits = int(math.log(fanout or 128, 2))
     buf = Buf()
     for inblock in readfile_iter(files, progress):
         buf.put(inblock)
-        for buf_and_bits in _splitbuf(buf):
-            yield buf_and_bits
+        for buf_and_level in _splitbuf(buf, basebits, fanbits):
+            yield buf_and_level
     if buf.used():
         yield buf.get(buf.used()), 0
 
@@ -90,8 +93,8 @@ def _hashsplit_iter_keep_boundaries(files, progress):
                 return progress(real_filenum, nbytes)
         else:
             prog = None
-        for buf_and_bits in _hashsplit_iter([f], progress=prog):
-            yield buf_and_bits
+        for buf_and_level in _hashsplit_iter([f], progress=prog):
+            yield buf_and_level
 
 
 def hashsplit_iter(files, keep_boundaries, progress):
@@ -104,14 +107,14 @@ def hashsplit_iter(files, keep_boundaries, progress):
 total_split = 0
 def _split_to_blobs(w, files, keep_boundaries, progress):
     global total_split
-    for (blob, bits) in hashsplit_iter(files, keep_boundaries, progress):
+    for (blob, level) in hashsplit_iter(files, keep_boundaries, progress):
         sha = w.new_blob(blob)
         total_split += len(blob)
         if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
             w.breakpoint()
         if progress_callback:
             progress_callback(len(blob))
-        yield (sha, len(blob), bits)
+        yield (sha, len(blob), level)
 
 
 def _make_shalist(l):
@@ -143,21 +146,15 @@ def split_to_shalist(w, files, keep_boundaries, progress=None):
     sl = _split_to_blobs(w, files, keep_boundaries, progress)
     if not fanout:
         shal = []
-        for (sha,size,bits) in sl:
+        for (sha,size,level) in sl:
             shal.append(('100644', sha, size))
         return _make_shalist(shal)[0]
     else:
-        base_bits = _helpers.blobbits()
-        fanout_bits = int(math.log(fanout, 2))
-        def bits_to_idx(n):
-            assert(n >= base_bits)
-            return (n - base_bits)/fanout_bits
         stacks = [[]]
-        for (sha,size,bits) in sl:
-            assert(bits <= 32)
+        for (sha,size,level) in sl:
             stacks[0].append(('100644', sha, size))
-            if bits > base_bits:
-                _squish(w, stacks, bits_to_idx(bits))
+            if level:
+                _squish(w, stacks, level)
         #log('stacks: %r\n' % [len(i) for i in stacks])
         _squish(w, stacks, len(stacks)-1)
         #log('stacks: %r\n' % [len(i) for i in stacks])