From d87a9fadca4fddb8fd40299c62567bf192572122 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Mon, 14 Oct 2019 18:48:24 -0500 Subject: [PATCH 1/1] Make adjustments to fix tbloom for python 3 Make the changes needed to get the tbloom tests passing with python 3. Change the C helper argument conversions to types that work with each version (there didn't appear to be a good, shared common format string type). Switch to a Py_buffer in some places because python 3 didn't appear to have a conversion that would convert all the binary inputs (buffer/memoryview/...) to a plain C byte array the way python 2 can. Change all the other typical things, i.e. byte paths, '' to b'', and // for integral division, etc. With this, tbloom should pass with both python major versions (python 3 version tested was python 3.7.5). Signed-off-by: Rob Browning Tested-by: Rob Browning --- Makefile | 2 +- lib/bup/_helpers.c | 102 ++++++++++++++++++++++++++++---------------- lib/bup/bloom.py | 22 +++++----- lib/bup/helpers.py | 2 +- lib/bup/t/tbloom.py | 20 ++++----- 5 files changed, 89 insertions(+), 59 deletions(-) diff --git a/Makefile b/Makefile index 8fbc251..560652b 100644 --- a/Makefile +++ b/Makefile @@ -144,6 +144,7 @@ t/tmp: runtests: runtests-python runtests-cmdline python_tests := \ + lib/bup/t/tbloom.py \ lib/bup/t/thashsplit.py \ lib/bup/t/toptions.py \ lib/bup/t/tshquote.py \ @@ -152,7 +153,6 @@ python_tests := \ ifeq "2" "$(bup_python_majver)" python_tests += \ - lib/bup/t/tbloom.py \ lib/bup/t/tclient.py \ lib/bup/t/tgit.py \ lib/bup/t/thelpers.py \ diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c index d9bd66c..54e980a 100644 --- a/lib/bup/_helpers.c +++ b/lib/bup/_helpers.c @@ -71,13 +71,21 @@ typedef struct { int istty2; } state_t; +// cstr_argf: for byte vectors without null characters (e.g. paths) +// rbuf_argf: for read-only byte vectors +// wbuf_argf: for mutable byte vectors + #if PY_MAJOR_VERSION < 3 static state_t state; # define get_state(x) (&state) # define cstr_argf "s" +# define rbuf_argf "s#" +# define wbuf_argf "s*" #else # define get_state(x) ((state_t *) PyModule_GetState(x)) # define cstr_argf "y" +# define rbuf_argf "y#" +# define wbuf_argf "y*" #endif // PY_MAJOR_VERSION >= 3 @@ -678,72 +686,94 @@ BLOOM_GET_BIT(bloom_get_bit5, to_bloom_address_bitmask5, uint32_t) static PyObject *bloom_add(PyObject *self, PyObject *args) { - unsigned char *sha = NULL, *bloom = NULL; - unsigned char *end; - Py_ssize_t len = 0, blen = 0; + Py_buffer bloom, sha; int nbits = 0, k = 0; + if (!PyArg_ParseTuple(args, wbuf_argf wbuf_argf "ii", + &bloom, &sha, &nbits, &k)) + return NULL; - if (!PyArg_ParseTuple(args, "w#s#ii", &bloom, &blen, &sha, &len, &nbits, &k)) - return NULL; + PyObject *result = NULL; - if (blen < 16+(1< 29) - return NULL; - for (end = sha + len; sha < end; sha += 20/k) - bloom_set_bit5(bloom, sha, nbits); + if (nbits > 29) + goto clean_and_return; + unsigned char *cur = sha.buf; + for (unsigned char *end = cur + sha.len; cur < end; cur += 20/k) + bloom_set_bit5(bloom.buf, cur, nbits); } else if (k == 4) { - if (nbits > 37) - return NULL; - for (end = sha + len; sha < end; sha += 20/k) - bloom_set_bit4(bloom, sha, nbits); + if (nbits > 37) + goto clean_and_return; + unsigned char *cur = sha.buf; + unsigned char *end = cur + sha.len; + for (; cur < end; cur += 20/k) + bloom_set_bit4(bloom.buf, cur, nbits); } else - return NULL; + goto clean_and_return; + result = Py_BuildValue("n", sha.len / 20); - return Py_BuildValue("n", len/20); + clean_and_return: + PyBuffer_Release(&bloom); + PyBuffer_Release(&sha); + return result; } static PyObject *bloom_contains(PyObject *self, PyObject *args) { - unsigned char *sha = NULL, *bloom = NULL; - Py_ssize_t len = 0, blen = 0; + Py_buffer bloom; + unsigned char *sha = NULL; + Py_ssize_t len = 0; int nbits = 0, k = 0; - unsigned char *end; - int steps; + if (!PyArg_ParseTuple(args, wbuf_argf rbuf_argf "ii", + &bloom, &sha, &len, &nbits, &k)) + return NULL; - if (!PyArg_ParseTuple(args, "t#s#ii", &bloom, &blen, &sha, &len, &nbits, &k)) - return NULL; + PyObject *result = NULL; if (len != 20) - return NULL; + goto clean_and_return; if (k == 5) { - if (nbits > 29) - return NULL; - for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++) - if (!bloom_get_bit5(bloom, sha, nbits)) - return Py_BuildValue("Oi", Py_None, steps); + if (nbits > 29) + goto clean_and_return; + int steps; + unsigned char *end; + for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++) + if (!bloom_get_bit5(bloom.buf, sha, nbits)) + { + result = Py_BuildValue("Oi", Py_None, steps); + goto clean_and_return; + } } else if (k == 4) { - if (nbits > 37) - return NULL; - for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++) - if (!bloom_get_bit4(bloom, sha, nbits)) - return Py_BuildValue("Oi", Py_None, steps); + if (nbits > 37) + goto clean_and_return; + int steps; + unsigned char *end; + for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++) + if (!bloom_get_bit4(bloom.buf, sha, nbits)) + { + result = Py_BuildValue("Oi", Py_None, steps); + goto clean_and_return; + } } else - return NULL; + goto clean_and_return; - return Py_BuildValue("ii", 1, k); + result = Py_BuildValue("ii", 1, k); + + clean_and_return: + PyBuffer_Release(&bloom); + return result; } diff --git a/lib/bup/bloom.py b/lib/bup/bloom.py index 54da4b8..18ed460 100644 --- a/lib/bup/bloom.py +++ b/lib/bup/bloom.py @@ -109,7 +109,7 @@ class ShaBloom: self.name = filename self.rwfile = None self.map = None - assert(filename.endswith('.bloom')) + assert(filename.endswith(b'.bloom')) if readwrite: assert(expected > 0) self.rwfile = f = f or open(filename, 'r+b') @@ -131,7 +131,7 @@ class ShaBloom: # one bit flipped per memory page), let's use a "private" mmap, # which defeats Linux's ability to flush it to disk. Then we'll # flush it as one big lump during close(). - pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5 + pages = os.fstat(f.fileno()).st_size // 4096 * 5 # assume k=5 self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: @@ -142,8 +142,8 @@ class ShaBloom: self.rwfile = None f = f or open(filename, 'rb') self.map = mmap_read(f) - got = str(self.map[0:4]) - if got != 'BLOM': + got = self.map[0:4] + if got != b'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] @@ -157,9 +157,9 @@ class ShaBloom: return self._init_failed() self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16]) - idxnamestr = str(self.map[16 + 2**self.bits:]) + idxnamestr = self.map[16 + 2**self.bits:] if idxnamestr: - self.idxnames = idxnamestr.split('\0') + self.idxnames = idxnamestr.split(b'\0') else: self.idxnames = [] @@ -189,7 +189,7 @@ class ShaBloom: self.map.flush() self.rwfile.seek(16 + 2**self.bits) if self.idxnames: - self.rwfile.write('\0'.join(self.idxnames)) + self.rwfile.write(b'\0'.join(self.idxnames)) self._init_failed() def pfalse_positive(self, additional=0): @@ -220,7 +220,7 @@ class ShaBloom: _total_searches += 1 if not self.map: return None - found, steps = bloom_contains(self.map, str(sha), self.bits, self.k) + found, steps = bloom_contains(self.map, sha, self.bits, self.k) _total_steps += steps return found @@ -230,14 +230,14 @@ class ShaBloom: def create(name, expected, delaywrite=None, f=None, k=None): """Create and return a bloom filter for `expected` entries.""" - bits = int(math.floor(math.log(expected*MAX_BITS_EACH/8,2))) + bits = int(math.floor(math.log(expected * MAX_BITS_EACH // 8, 2))) k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4) if bits > MAX_BLOOM_BITS[k]: log('bloom: warning, max bits exceeded, non-optimal\n') bits = MAX_BLOOM_BITS[k] debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k)) f = f or open(name, 'w+b') - f.write('BLOM') + f.write(b'BLOM') f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0)) assert(f.tell() == 16) # NOTE: On some systems this will not extend+zerofill, but it does on @@ -251,4 +251,4 @@ def create(name, expected, delaywrite=None, f=None, k=None): def clear_bloom(dir): - unlink(os.path.join(dir, 'bup.bloom')) + unlink(os.path.join(dir, b'bup.bloom')) diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index b962b73..64d5b26 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -149,7 +149,7 @@ def log(s): """Print a log message to stderr.""" global _last_prog sys.stdout.flush() - _hard_write(sys.stderr.fileno(), s) + _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode()) _last_prog = 0 diff --git a/lib/bup/t/tbloom.py b/lib/bup/t/tbloom.py index 2f17046..3fa9f35 100644 --- a/lib/bup/t/tbloom.py +++ b/lib/bup/t/tbloom.py @@ -1,5 +1,5 @@ -from __future__ import absolute_import +from __future__ import absolute_import, print_function import errno, platform, tempfile from wvtest import * @@ -12,32 +12,32 @@ from buptest import no_lingering_errors, test_tempdir @wvtest def test_bloom(): with no_lingering_errors(): - with test_tempdir('bup-tbloom-') as tmpdir: + with test_tempdir(b'bup-tbloom-') as tmpdir: hashes = [os.urandom(20) for i in range(100)] class Idx: pass ix = Idx() - ix.name='dummy.idx' - ix.shatable = ''.join(hashes) + ix.name = b'dummy.idx' + ix.shatable = b''.join(hashes) for k in (4, 5): - b = bloom.create(tmpdir + '/pybuptest.bloom', expected=100, k=k) + b = bloom.create(tmpdir + b'/pybuptest.bloom', expected=100, k=k) b.add_idx(ix) WVPASSLT(b.pfalse_positive(), .1) b.close() - b = bloom.ShaBloom(tmpdir + '/pybuptest.bloom') + b = bloom.ShaBloom(tmpdir + b'/pybuptest.bloom') all_present = True for h in hashes: - all_present &= b.exists(h) + all_present &= (b.exists(h) or False) WVPASS(all_present) false_positives = 0 for h in [os.urandom(20) for i in range(1000)]: if b.exists(h): false_positives += 1 WVPASSLT(false_positives, 5) - os.unlink(tmpdir + '/pybuptest.bloom') + os.unlink(tmpdir + b'/pybuptest.bloom') tf = tempfile.TemporaryFile(dir=tmpdir) - b = bloom.create('bup.bloom', f=tf, expected=100) + b = bloom.create(b'bup.bloom', f=tf, expected=100) WVPASSEQ(b.rwfile, tf) WVPASSEQ(b.k, 5) @@ -47,7 +47,7 @@ def test_bloom(): tf = tempfile.TemporaryFile(dir=tmpdir) skip_test = False try: - b = bloom.create('bup.bloom', f=tf, expected=2**28, + b = bloom.create(b'bup.bloom', f=tf, expected=2**28, delaywrite=False) except EnvironmentError as ex: (ptr_width, linkage) = platform.architecture() -- 2.39.2