summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
63a0592)
Make the changes needed to get the tbloom tests passing with python 3.
Change the C helper argument conversions to types that work with each
version (there didn't appear to be a good, shared common format string
type). Switch to a Py_buffer in some places because python 3 didn't
appear to have a conversion that would convert all the binary
inputs (buffer/memoryview/...) to a plain C byte array the way python
2 can.
Change all the other typical things, i.e. byte paths, '' to b'', and
// for integral division, etc.
With this, tbloom should pass with both python major versions (python
3 version tested was python 3.7.5).
Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
runtests: runtests-python runtests-cmdline
python_tests := \
runtests: runtests-python runtests-cmdline
python_tests := \
lib/bup/t/thashsplit.py \
lib/bup/t/toptions.py \
lib/bup/t/tshquote.py \
lib/bup/t/thashsplit.py \
lib/bup/t/toptions.py \
lib/bup/t/tshquote.py \
ifeq "2" "$(bup_python_majver)"
python_tests += \
ifeq "2" "$(bup_python_majver)"
python_tests += \
lib/bup/t/tclient.py \
lib/bup/t/tgit.py \
lib/bup/t/thelpers.py \
lib/bup/t/tclient.py \
lib/bup/t/tgit.py \
lib/bup/t/thelpers.py \
+// cstr_argf: for byte vectors without null characters (e.g. paths)
+// rbuf_argf: for read-only byte vectors
+// wbuf_argf: for mutable byte vectors
+
#if PY_MAJOR_VERSION < 3
static state_t state;
# define get_state(x) (&state)
# define cstr_argf "s"
#if PY_MAJOR_VERSION < 3
static state_t state;
# define get_state(x) (&state)
# define cstr_argf "s"
+# define rbuf_argf "s#"
+# define wbuf_argf "s*"
#else
# define get_state(x) ((state_t *) PyModule_GetState(x))
# define cstr_argf "y"
#else
# define get_state(x) ((state_t *) PyModule_GetState(x))
# define cstr_argf "y"
+# define rbuf_argf "y#"
+# define wbuf_argf "y*"
#endif // PY_MAJOR_VERSION >= 3
#endif // PY_MAJOR_VERSION >= 3
static PyObject *bloom_add(PyObject *self, PyObject *args)
{
static PyObject *bloom_add(PyObject *self, PyObject *args)
{
- unsigned char *sha = NULL, *bloom = NULL;
- unsigned char *end;
- Py_ssize_t len = 0, blen = 0;
+ if (!PyArg_ParseTuple(args, wbuf_argf wbuf_argf "ii",
+ &bloom, &sha, &nbits, &k))
+ return NULL;
- if (!PyArg_ParseTuple(args, "w#s#ii", &bloom, &blen, &sha, &len, &nbits, &k))
- return NULL;
+ PyObject *result = NULL;
- if (blen < 16+(1<<nbits) || len % 20 != 0)
- return NULL;
+ if (bloom.len < 16+(1<<nbits) || sha.len % 20 != 0)
+ goto clean_and_return;
- if (nbits > 29)
- return NULL;
- for (end = sha + len; sha < end; sha += 20/k)
- bloom_set_bit5(bloom, sha, nbits);
+ if (nbits > 29)
+ goto clean_and_return;
+ unsigned char *cur = sha.buf;
+ for (unsigned char *end = cur + sha.len; cur < end; cur += 20/k)
+ bloom_set_bit5(bloom.buf, cur, nbits);
- if (nbits > 37)
- return NULL;
- for (end = sha + len; sha < end; sha += 20/k)
- bloom_set_bit4(bloom, sha, nbits);
+ if (nbits > 37)
+ goto clean_and_return;
+ unsigned char *cur = sha.buf;
+ unsigned char *end = cur + sha.len;
+ for (; cur < end; cur += 20/k)
+ bloom_set_bit4(bloom.buf, cur, nbits);
+ result = Py_BuildValue("n", sha.len / 20);
- return Py_BuildValue("n", len/20);
+ clean_and_return:
+ PyBuffer_Release(&bloom);
+ PyBuffer_Release(&sha);
+ return result;
}
static PyObject *bloom_contains(PyObject *self, PyObject *args)
{
}
static PyObject *bloom_contains(PyObject *self, PyObject *args)
{
- unsigned char *sha = NULL, *bloom = NULL;
- Py_ssize_t len = 0, blen = 0;
+ Py_buffer bloom;
+ unsigned char *sha = NULL;
+ Py_ssize_t len = 0;
- unsigned char *end;
- int steps;
+ if (!PyArg_ParseTuple(args, wbuf_argf rbuf_argf "ii",
+ &bloom, &sha, &len, &nbits, &k))
+ return NULL;
- if (!PyArg_ParseTuple(args, "t#s#ii", &bloom, &blen, &sha, &len, &nbits, &k))
- return NULL;
+ PyObject *result = NULL;
- if (nbits > 29)
- return NULL;
- for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
- if (!bloom_get_bit5(bloom, sha, nbits))
- return Py_BuildValue("Oi", Py_None, steps);
+ if (nbits > 29)
+ goto clean_and_return;
+ int steps;
+ unsigned char *end;
+ for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
+ if (!bloom_get_bit5(bloom.buf, sha, nbits))
+ {
+ result = Py_BuildValue("Oi", Py_None, steps);
+ goto clean_and_return;
+ }
- if (nbits > 37)
- return NULL;
- for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
- if (!bloom_get_bit4(bloom, sha, nbits))
- return Py_BuildValue("Oi", Py_None, steps);
+ if (nbits > 37)
+ goto clean_and_return;
+ int steps;
+ unsigned char *end;
+ for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
+ if (!bloom_get_bit4(bloom.buf, sha, nbits))
+ {
+ result = Py_BuildValue("Oi", Py_None, steps);
+ goto clean_and_return;
+ }
- return Py_BuildValue("ii", 1, k);
+ result = Py_BuildValue("ii", 1, k);
+
+ clean_and_return:
+ PyBuffer_Release(&bloom);
+ return result;
self.name = filename
self.rwfile = None
self.map = None
self.name = filename
self.rwfile = None
self.map = None
- assert(filename.endswith('.bloom'))
+ assert(filename.endswith(b'.bloom'))
if readwrite:
assert(expected > 0)
self.rwfile = f = f or open(filename, 'r+b')
if readwrite:
assert(expected > 0)
self.rwfile = f = f or open(filename, 'r+b')
# one bit flipped per memory page), let's use a "private" mmap,
# which defeats Linux's ability to flush it to disk. Then we'll
# flush it as one big lump during close().
# one bit flipped per memory page), let's use a "private" mmap,
# which defeats Linux's ability to flush it to disk. Then we'll
# flush it as one big lump during close().
- pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5
+ pages = os.fstat(f.fileno()).st_size // 4096 * 5 # assume k=5
self.delaywrite = expected > pages
debug1('bloom: delaywrite=%r\n' % self.delaywrite)
if self.delaywrite:
self.delaywrite = expected > pages
debug1('bloom: delaywrite=%r\n' % self.delaywrite)
if self.delaywrite:
self.rwfile = None
f = f or open(filename, 'rb')
self.map = mmap_read(f)
self.rwfile = None
f = f or open(filename, 'rb')
self.map = mmap_read(f)
- got = str(self.map[0:4])
- if got != 'BLOM':
+ got = self.map[0:4]
+ if got != b'BLOM':
log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
return self._init_failed()
ver = struct.unpack('!I', self.map[4:8])[0]
log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
return self._init_failed()
ver = struct.unpack('!I', self.map[4:8])[0]
return self._init_failed()
self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
return self._init_failed()
self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
- idxnamestr = str(self.map[16 + 2**self.bits:])
+ idxnamestr = self.map[16 + 2**self.bits:]
- self.idxnames = idxnamestr.split('\0')
+ self.idxnames = idxnamestr.split(b'\0')
self.map.flush()
self.rwfile.seek(16 + 2**self.bits)
if self.idxnames:
self.map.flush()
self.rwfile.seek(16 + 2**self.bits)
if self.idxnames:
- self.rwfile.write('\0'.join(self.idxnames))
+ self.rwfile.write(b'\0'.join(self.idxnames))
self._init_failed()
def pfalse_positive(self, additional=0):
self._init_failed()
def pfalse_positive(self, additional=0):
_total_searches += 1
if not self.map:
return None
_total_searches += 1
if not self.map:
return None
- found, steps = bloom_contains(self.map, str(sha), self.bits, self.k)
+ found, steps = bloom_contains(self.map, sha, self.bits, self.k)
_total_steps += steps
return found
_total_steps += steps
return found
def create(name, expected, delaywrite=None, f=None, k=None):
"""Create and return a bloom filter for `expected` entries."""
def create(name, expected, delaywrite=None, f=None, k=None):
"""Create and return a bloom filter for `expected` entries."""
- bits = int(math.floor(math.log(expected*MAX_BITS_EACH/8,2)))
+ bits = int(math.floor(math.log(expected * MAX_BITS_EACH // 8, 2)))
k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4)
if bits > MAX_BLOOM_BITS[k]:
log('bloom: warning, max bits exceeded, non-optimal\n')
bits = MAX_BLOOM_BITS[k]
debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k))
f = f or open(name, 'w+b')
k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4)
if bits > MAX_BLOOM_BITS[k]:
log('bloom: warning, max bits exceeded, non-optimal\n')
bits = MAX_BLOOM_BITS[k]
debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k))
f = f or open(name, 'w+b')
f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0))
assert(f.tell() == 16)
# NOTE: On some systems this will not extend+zerofill, but it does on
f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0))
assert(f.tell() == 16)
# NOTE: On some systems this will not extend+zerofill, but it does on
- unlink(os.path.join(dir, 'bup.bloom'))
+ unlink(os.path.join(dir, b'bup.bloom'))
"""Print a log message to stderr."""
global _last_prog
sys.stdout.flush()
"""Print a log message to stderr."""
global _last_prog
sys.stdout.flush()
- _hard_write(sys.stderr.fileno(), s)
+ _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
-from __future__ import absolute_import
+from __future__ import absolute_import, print_function
import errno, platform, tempfile
from wvtest import *
import errno, platform, tempfile
from wvtest import *
@wvtest
def test_bloom():
with no_lingering_errors():
@wvtest
def test_bloom():
with no_lingering_errors():
- with test_tempdir('bup-tbloom-') as tmpdir:
+ with test_tempdir(b'bup-tbloom-') as tmpdir:
hashes = [os.urandom(20) for i in range(100)]
class Idx:
pass
ix = Idx()
hashes = [os.urandom(20) for i in range(100)]
class Idx:
pass
ix = Idx()
- ix.name='dummy.idx'
- ix.shatable = ''.join(hashes)
+ ix.name = b'dummy.idx'
+ ix.shatable = b''.join(hashes)
- b = bloom.create(tmpdir + '/pybuptest.bloom', expected=100, k=k)
+ b = bloom.create(tmpdir + b'/pybuptest.bloom', expected=100, k=k)
b.add_idx(ix)
WVPASSLT(b.pfalse_positive(), .1)
b.close()
b.add_idx(ix)
WVPASSLT(b.pfalse_positive(), .1)
b.close()
- b = bloom.ShaBloom(tmpdir + '/pybuptest.bloom')
+ b = bloom.ShaBloom(tmpdir + b'/pybuptest.bloom')
all_present = True
for h in hashes:
all_present = True
for h in hashes:
- all_present &= b.exists(h)
+ all_present &= (b.exists(h) or False)
WVPASS(all_present)
false_positives = 0
for h in [os.urandom(20) for i in range(1000)]:
if b.exists(h):
false_positives += 1
WVPASSLT(false_positives, 5)
WVPASS(all_present)
false_positives = 0
for h in [os.urandom(20) for i in range(1000)]:
if b.exists(h):
false_positives += 1
WVPASSLT(false_positives, 5)
- os.unlink(tmpdir + '/pybuptest.bloom')
+ os.unlink(tmpdir + b'/pybuptest.bloom')
tf = tempfile.TemporaryFile(dir=tmpdir)
tf = tempfile.TemporaryFile(dir=tmpdir)
- b = bloom.create('bup.bloom', f=tf, expected=100)
+ b = bloom.create(b'bup.bloom', f=tf, expected=100)
WVPASSEQ(b.rwfile, tf)
WVPASSEQ(b.k, 5)
WVPASSEQ(b.rwfile, tf)
WVPASSEQ(b.k, 5)
tf = tempfile.TemporaryFile(dir=tmpdir)
skip_test = False
try:
tf = tempfile.TemporaryFile(dir=tmpdir)
skip_test = False
try:
- b = bloom.create('bup.bloom', f=tf, expected=2**28,
+ b = bloom.create(b'bup.bloom', f=tf, expected=2**28,
delaywrite=False)
except EnvironmentError as ex:
(ptr_width, linkage) = platform.architecture()
delaywrite=False)
except EnvironmentError as ex:
(ptr_width, linkage) = platform.architecture()