Fix hashsplit for python 3; add compat.buffer

author Rob Browning <rlb@defaultvalue.org>

Wed, 9 Oct 2019 06:11:39 +0000 (01:11 -0500)

committer Rob Browning <rlb@defaultvalue.org>

Sun, 13 Oct 2019 17:48:24 +0000 (12:48 -0500)
author Rob Browning <rlb@defaultvalue.org>
Wed, 9 Oct 2019 06:11:39 +0000 (01:11 -0500)
committer Rob Browning <rlb@defaultvalue.org>
Sun, 13 Oct 2019 17:48:24 +0000 (12:48 -0500)
diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c

index 9078be3da723d8e7757662950a7521bdc275754f..b93e24c06427630be4b796fd525dd065201d3758 100644 (file)
--- a/lib/bup/_helpers.c
+++ b/lib/bup/_helpers.c
@@ -529,14 +529,29 @@ static PyObject *blobbits(PyObject *self, PyObject *args)
  
  static PyObject *splitbuf(PyObject *self, PyObject *args)
  {
-    unsigned char *buf = NULL;
-    Py_ssize_t len = 0;
+    // We stick to buffers in python 2 because they appear to be
+    // substantially smaller than memoryviews, and because
+    // zlib.compress() in python 2 can't accept a memoryview
+    // (cf. hashsplit.py).
      int out = 0, bits = -1;
-
-    if (!PyArg_ParseTuple(args, "t#", &buf, &len))
-       return NULL;
-    assert(len <= INT_MAX);
-    out = bupsplit_find_ofs(buf, len, &bits);
+    if (PY_MAJOR_VERSION > 2)
+    {
+        Py_buffer buf;
+        if (!PyArg_ParseTuple(args, "y*", &buf))
+            return NULL;
+        assert(buf.len <= INT_MAX);
+        out = bupsplit_find_ofs(buf.buf, buf.len, &bits);
+        PyBuffer_Release(&buf);
+    }
+    else
+    {
+        unsigned char *buf = NULL;
+        Py_ssize_t len = 0;
+        if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+            return NULL;
+        assert(len <= INT_MAX);
+        out = bupsplit_find_ofs(buf, len, &bits);
+    }
      if (out) assert(bits >= BUP_BLOBBITS);
      return Py_BuildValue("ii", out, bits);
  }
diff --git a/lib/bup/compat.py b/lib/bup/compat.py

index 53f52869e2c2d9dbd8d3b7987b4e202d3b3bae20..2458574f02dfc16ee274c81930e5da8ce847ef2a 100644 (file)
--- a/lib/bup/compat.py
+++ b/lib/bup/compat.py
@@ -1,5 +1,6 @@
  
  from __future__ import absolute_import, print_function
+from array import array
  from traceback import print_exception
  import sys
  
@@ -29,6 +30,23 @@ if py3:
      def bytes_from_uint(i):
          return bytes((i,))
  
+    byte_int = lambda x: x
+
+    def buffer(object, offset=None, size=None):
+        if size:
+            assert offset is not None
+            return memoryview(object)[offset:offset + size]
+        if offset:
+            return memoryview(object)[offset:]
+        return memoryview(object)
+
+    def buffer_concat(b1, b2):
+        if isinstance(b1, memoryview):
+            b1 = b1.tobytes()
+        if isinstance(b1, memoryview):
+            b2 = b2.tobytes()
+        return b1 + b2
+
  else:  # Python 2
  
      from pipes import quote
@@ -75,6 +93,11 @@ else:  # Python 2
      def bytes_from_uint(i):
          return chr(i)
  
+    byte_int = ord
+
+    def buffer_concat(b1, b2):
+        return b1 + b2
+
  
  def wrap_main(main):
      """Run main() and raise a SystemExit with the return value if it
diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py

index 3cbcfc981b4e1280604712b2b0ffe2fbeff9411e..af0ad172be93a9e46188e0e3806b1cc6bdfa571c 100644 (file)
--- a/lib/bup/hashsplit.py
+++ b/lib/bup/hashsplit.py
@@ -2,9 +2,14 @@
  from __future__ import absolute_import
  import io, math, os
  
-from bup import _helpers, helpers
+from bup import _helpers, compat, helpers
+from bup.compat import buffer_concat
  from bup.helpers import sc_page_size
  
+if compat.py_maj > 2:
+    from bup.compat import buffer, buffer_concat
+
+
  _fmincore = getattr(helpers, 'fmincore', None)
  
  BLOB_MAX = 8192*4   # 8192 is the "typical" blob size for bupsplit
@@ -22,12 +27,12 @@ GIT_MODE_SYMLINK = 0o120000
  # be ok if we always only put() large amounts of data at a time.
  class Buf:
      def __init__(self):
-        self.data = ''
+        self.data = b''
          self.start = 0
  
      def put(self, s):
          if s:
-            self.data = buffer(self.data, self.start) + s
+            self.data = buffer_concat(buffer(self.data, self.start), s)
              self.start = 0
              
      def peek(self, count):
diff --git a/lib/bup/t/thashsplit.py b/lib/bup/t/thashsplit.py

index 5b995860442c16521c523216c7023f0f8efbf8c3..fc6a9ab31ae6f5bf8a3afdf89cb296e19d10a4e3 100644 (file)
--- a/lib/bup/t/thashsplit.py
+++ b/lib/bup/t/thashsplit.py
@@ -5,6 +5,7 @@ from io import BytesIO
  from wvtest import *
  
  from bup import hashsplit, _helpers, helpers
+from bup.compat import byte_int, bytes_from_uint
  from buptest import no_lingering_errors
  
  
@@ -97,10 +98,11 @@ def test_fanout_behaviour():
      basebits = _helpers.blobbits()
      def splitbuf(buf):
          ofs = 0
-        for c in buf:
+        for b in buf:
+            b = byte_int(b)
              ofs += 1
-            if ord(c) >= basebits:
-                return ofs, ord(c)
+            if b >= basebits:
+                return ofs, b
          return 0, 0
  
      with no_lingering_errors():
@@ -116,9 +118,9 @@ def test_fanout_behaviour():
          levels = lambda f: [(len(b), l) for b, l in
              hashsplit.hashsplit_iter([f], True, None)]
          # Return a string of n null bytes
-        z = lambda n: '\x00' * n
+        z = lambda n: b'\x00' * n
          # Return a byte which will be split with a level of n
-        sb = lambda n: chr(basebits + n)
+        sb = lambda n: bytes_from_uint(basebits + n)
  
          split_never = BytesIO(z(16))
          split_first = BytesIO(z(1) + sb(3) + z(14))
author	Rob Browning <rlb@defaultvalue.org>
	Wed, 9 Oct 2019 06:11:39 +0000 (01:11 -0500)
committer	Rob Browning <rlb@defaultvalue.org>
	Sun, 13 Oct 2019 17:48:24 +0000 (12:48 -0500)
lib/bup/_helpers.c		patch \| blob \| history
lib/bup/compat.py		patch \| blob \| history
lib/bup/hashsplit.py		patch \| blob \| history
lib/bup/t/thashsplit.py		patch \| blob \| history