In order to get hashsplit working, stick with buffer()s in python 2.
We can't use a Py_buffer (via memoryview()) across both python
versions because python 2's zlib.compress() never learned how to
handle them, and we don't want to have to make an extra tobytes() copy
of all the data just to simplify the code.
Instead provide a python 3 only compat.buffer that's returns a
memoryview (slice when appropriate) in python 3.
Also noticed (and a bit unfortunate if accurate) that memoryview is
much larger than buffer, and for our bulk data, we have no need of any
additional sophistication:
$ python2
...
>>> import sys
>>> sys.getsizeof(buffer(''))
64
$ python3
...
>>> import sys
>>> sys.getsizeof(buffer(''))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'buffer' is not defined
>>>
>>> sys.getsizeof(memoryview(b''))
192
Of course worth keeping in mind when deciding between using one or
just copying the relevant region.
Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
static PyObject *splitbuf(PyObject *self, PyObject *args)
{
- unsigned char *buf = NULL;
- Py_ssize_t len = 0;
+ // We stick to buffers in python 2 because they appear to be
+ // substantially smaller than memoryviews, and because
+ // zlib.compress() in python 2 can't accept a memoryview
+ // (cf. hashsplit.py).
int out = 0, bits = -1;
-
- if (!PyArg_ParseTuple(args, "t#", &buf, &len))
- return NULL;
- assert(len <= INT_MAX);
- out = bupsplit_find_ofs(buf, len, &bits);
+ if (PY_MAJOR_VERSION > 2)
+ {
+ Py_buffer buf;
+ if (!PyArg_ParseTuple(args, "y*", &buf))
+ return NULL;
+ assert(buf.len <= INT_MAX);
+ out = bupsplit_find_ofs(buf.buf, buf.len, &bits);
+ PyBuffer_Release(&buf);
+ }
+ else
+ {
+ unsigned char *buf = NULL;
+ Py_ssize_t len = 0;
+ if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+ return NULL;
+ assert(len <= INT_MAX);
+ out = bupsplit_find_ofs(buf, len, &bits);
+ }
if (out) assert(bits >= BUP_BLOBBITS);
return Py_BuildValue("ii", out, bits);
}
from __future__ import absolute_import, print_function
+from array import array
from traceback import print_exception
import sys
def bytes_from_uint(i):
return bytes((i,))
+ byte_int = lambda x: x
+
+ def buffer(object, offset=None, size=None):
+ if size:
+ assert offset is not None
+ return memoryview(object)[offset:offset + size]
+ if offset:
+ return memoryview(object)[offset:]
+ return memoryview(object)
+
+ def buffer_concat(b1, b2):
+ if isinstance(b1, memoryview):
+ b1 = b1.tobytes()
+ if isinstance(b1, memoryview):
+ b2 = b2.tobytes()
+ return b1 + b2
+
else: # Python 2
from pipes import quote
def bytes_from_uint(i):
return chr(i)
+ byte_int = ord
+
+ def buffer_concat(b1, b2):
+ return b1 + b2
+
def wrap_main(main):
"""Run main() and raise a SystemExit with the return value if it
from __future__ import absolute_import
import io, math, os
-from bup import _helpers, helpers
+from bup import _helpers, compat, helpers
+from bup.compat import buffer_concat
from bup.helpers import sc_page_size
+if compat.py_maj > 2:
+ from bup.compat import buffer, buffer_concat
+
+
_fmincore = getattr(helpers, 'fmincore', None)
BLOB_MAX = 8192*4 # 8192 is the "typical" blob size for bupsplit
# be ok if we always only put() large amounts of data at a time.
class Buf:
def __init__(self):
- self.data = ''
+ self.data = b''
self.start = 0
def put(self, s):
if s:
- self.data = buffer(self.data, self.start) + s
+ self.data = buffer_concat(buffer(self.data, self.start), s)
self.start = 0
def peek(self, count):
from wvtest import *
from bup import hashsplit, _helpers, helpers
+from bup.compat import byte_int, bytes_from_uint
from buptest import no_lingering_errors
basebits = _helpers.blobbits()
def splitbuf(buf):
ofs = 0
- for c in buf:
+ for b in buf:
+ b = byte_int(b)
ofs += 1
- if ord(c) >= basebits:
- return ofs, ord(c)
+ if b >= basebits:
+ return ofs, b
return 0, 0
with no_lingering_errors():
levels = lambda f: [(len(b), l) for b, l in
hashsplit.hashsplit_iter([f], True, None)]
# Return a string of n null bytes
- z = lambda n: '\x00' * n
+ z = lambda n: b'\x00' * n
# Return a byte which will be split with a level of n
- sb = lambda n: chr(basebits + n)
+ sb = lambda n: bytes_from_uint(basebits + n)
split_never = BytesIO(z(16))
split_first = BytesIO(z(1) + sb(3) + z(14))