Always return a level 0 blob from _splitbuf() for BLOB_MAX sized blobs.

author Aidan Hobson Sayers <aidanhs@cantab.net>

Sat, 23 Nov 2013 17:23:04 +0000 (17:23 +0000)

committer Rob Browning <rlb@defaultvalue.org>

Sat, 15 Feb 2014 18:45:09 +0000 (12:45 -0600)
author Aidan Hobson Sayers <aidanhs@cantab.net>
Sat, 23 Nov 2013 17:23:04 +0000 (17:23 +0000)
committer Rob Browning <rlb@defaultvalue.org>
Sat, 15 Feb 2014 18:45:09 +0000 (12:45 -0600)
diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py

index 345f67bc097273dc10ee3732d58320f8ec31de26..bd3cb24b9f727de92a098fc027f2a03ae1f277ec 100644 (file)
--- a/lib/bup/hashsplit.py
+++ b/lib/bup/hashsplit.py
@@ -61,11 +61,13 @@ def _splitbuf(buf, basebits, fanbits):
      while 1:
          b = buf.peek(buf.used())
          (ofs, bits) = _helpers.splitbuf(b)
-        if ofs > BLOB_MAX:
-            ofs = BLOB_MAX
          if ofs:
+            if ofs > BLOB_MAX:
+                ofs = BLOB_MAX
+                level = 0
+            else:
+                level = (bits-basebits)//fanbits  # integer division
              buf.eat(ofs)
-            level = (bits-basebits)//fanbits  # integer division
              yield buffer(b, 0, ofs), level
          else:
              break
diff --git a/lib/bup/t/thashsplit.py b/lib/bup/t/thashsplit.py

index 43d589b4124e6eebdcd95673e0217c7d253ac726..62d973e6410a51ca55e3d4ff6b86709388c3c511 100644 (file)
--- a/lib/bup/t/thashsplit.py
+++ b/lib/bup/t/thashsplit.py
@@ -1,6 +1,53 @@
  from bup import hashsplit, _helpers
  from wvtest import *
+from cStringIO import StringIO
  
  @wvtest
  def test_rolling_sums():
      WVPASS(_helpers.selftest())
+
+@wvtest
+def test_fanout_behaviour():
+
+    # Drop in replacement for bupsplit, but splitting if the int value of a
+    # byte >= BUP_BLOBBITS
+    basebits = _helpers.blobbits()
+    def splitbuf(buf):
+        ofs = 0
+        for c in buf:
+            ofs += 1
+            if ord(c) >= basebits:
+                return ofs, ord(c)
+        return 0, 0
+
+    old_splitbuf = _helpers.splitbuf
+    _helpers.splitbuf = splitbuf
+    old_BLOB_MAX = hashsplit.BLOB_MAX
+    hashsplit.BLOB_MAX = 4
+    old_BLOB_READ_SIZE = hashsplit.BLOB_READ_SIZE
+    hashsplit.BLOB_READ_SIZE = 10
+    old_fanout = hashsplit.fanout
+    hashsplit.fanout = 2
+
+    levels = lambda f: [(len(b), l) for b, l in
+        hashsplit.hashsplit_iter([f], True, None)]
+    # Return a string of n null bytes
+    z = lambda n: '\x00' * n
+    # Return a byte which will be split with a level of n
+    sb = lambda n: chr(basebits + n)
+
+    split_never = StringIO(z(16))
+    split_first = StringIO(z(1) + sb(3) + z(14))
+    split_end   = StringIO(z(13) + sb(1) + z(2))
+    split_many  = StringIO(sb(1) + z(3) + sb(2) + z(4) +
+                            sb(0) + z(4) + sb(5) + z(1))
+    WVPASSEQ(levels(split_never), [(4, 0), (4, 0), (4, 0), (4, 0)])
+    WVPASSEQ(levels(split_first), [(2, 3), (4, 0), (4, 0), (4, 0), (2, 0)])
+    WVPASSEQ(levels(split_end), [(4, 0), (4, 0), (4, 0), (2, 1), (2, 0)])
+    WVPASSEQ(levels(split_many),
+        [(1, 1), (4, 2), (4, 0), (1, 0), (4, 0), (1, 5), (1, 0)])
+
+    _helpers.splitbuf = old_splitbuf
+    hashsplit.BLOB_MAX = old_BLOB_MAX
+    hashsplit.BLOB_READ_SIZE = old_BLOB_READ_SIZE
+    hashsplit.fanout = old_fanout
author	Aidan Hobson Sayers <aidanhs@cantab.net>
	Sat, 23 Nov 2013 17:23:04 +0000 (17:23 +0000)
committer	Rob Browning <rlb@defaultvalue.org>
	Sat, 15 Feb 2014 18:45:09 +0000 (12:45 -0600)
lib/bup/hashsplit.py		patch \| blob \| history
lib/bup/t/thashsplit.py		patch \| blob \| history