Brandon Low <lostlogic@lostlogicx.com> 2011-02-04
"""
-import sys, os, math, mmap, struct
+from __future__ import absolute_import
+import os, math, struct
from bup import _helpers
+from bup.compat import pending_raise
from bup.helpers import (debug1, debug2, log, mmap_read, mmap_readwrite,
mmap_readwrite_private, unlink)
class ShaBloom:
"""Wrapper which contains data from multiple index files. """
def __init__(self, filename, f=None, readwrite=False, expected=-1):
+ self.closed = False
self.name = filename
- self.rwfile = None
+ self.readwrite = readwrite
+ self.file = None
self.map = None
- assert(filename.endswith('.bloom'))
+ assert(filename.endswith(b'.bloom'))
if readwrite:
assert(expected > 0)
- self.rwfile = f = f or open(filename, 'r+b')
+ self.file = f = f or open(filename, 'r+b')
f.seek(0)
# Decide if we want to mmap() the pages as writable ('immediate'
# one bit flipped per memory page), let's use a "private" mmap,
# which defeats Linux's ability to flush it to disk. Then we'll
# flush it as one big lump during close().
- pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5
+ pages = os.fstat(f.fileno()).st_size // 4096 * 5 # assume k=5
self.delaywrite = expected > pages
debug1('bloom: delaywrite=%r\n' % self.delaywrite)
if self.delaywrite:
- self.map = mmap_readwrite_private(self.rwfile, close=False)
+ self.map = mmap_readwrite_private(self.file, close=False)
else:
- self.map = mmap_readwrite(self.rwfile, close=False)
+ self.map = mmap_readwrite(self.file, close=False)
else:
- self.rwfile = None
- f = f or open(filename, 'rb')
- self.map = mmap_read(f)
- got = str(self.map[0:4])
- if got != 'BLOM':
+ self.file = f or open(filename, 'rb')
+ self.map = mmap_read(self.file)
+ got = self.map[0:4]
+ if got != b'BLOM':
log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
- return self._init_failed()
+ self._init_failed()
+ return
ver = struct.unpack('!I', self.map[4:8])[0]
if ver < BLOOM_VERSION:
- log('Warning: ignoring old-style (v%d) bloom %r\n'
+ log('Warning: ignoring old-style (v%d) bloom %r\n'
% (ver, filename))
- return self._init_failed()
+ self._init_failed()
+ return
if ver > BLOOM_VERSION:
log('Warning: ignoring too-new (v%d) bloom %r\n'
% (ver, filename))
- return self._init_failed()
+ self._init_failed()
+ return
self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
- idxnamestr = str(self.map[16 + 2**self.bits:])
+ idxnamestr = self.map[16 + 2**self.bits:]
if idxnamestr:
- self.idxnames = idxnamestr.split('\0')
+ self.idxnames = idxnamestr.split(b'\0')
else:
self.idxnames = []
def _init_failed(self):
- if self.map:
- self.map = None
- if self.rwfile:
- self.rwfile.close()
- self.rwfile = None
self.idxnames = []
self.bits = self.entries = 0
+ self.map, tmp_map = None, self.map
+ self.file, tmp_file = None, self.file
+ try:
+ if tmp_map:
+ tmp_map.close()
+ finally: # This won't handle pending exceptions correctly in py2
+ if self.file:
+ tmp_file.close()
def valid(self):
return self.map and self.bits
+ def close(self):
+ self.closed = True
+ try:
+ if self.map and self.readwrite:
+ debug2("bloom: closing with %d entries\n" % self.entries)
+ self.map[12:16] = struct.pack('!I', self.entries)
+ if self.delaywrite:
+ self.file.seek(0)
+ self.file.write(self.map)
+ else:
+ self.map.flush()
+ self.file.seek(16 + 2**self.bits)
+ if self.idxnames:
+ self.file.write(b'\0'.join(self.idxnames))
+ finally: # This won't handle pending exceptions correctly in py2
+ self._init_failed()
+
def __del__(self):
- self.close()
+ assert self.closed
- def close(self):
- if self.map and self.rwfile:
- debug2("bloom: closing with %d entries\n" % self.entries)
- self.map[12:16] = struct.pack('!I', self.entries)
- if self.delaywrite:
- self.rwfile.seek(0)
- self.rwfile.write(self.map)
- else:
- self.map.flush()
- self.rwfile.seek(16 + 2**self.bits)
- if self.idxnames:
- self.rwfile.write('\0'.join(self.idxnames))
- self._init_failed()
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, traceback):
+ with pending_raise(value, rethrow=False):
+ self.close()
def pfalse_positive(self, additional=0):
n = self.entries + additional
_total_searches += 1
if not self.map:
return None
- found, steps = bloom_contains(self.map, str(sha), self.bits, self.k)
+ found, steps = bloom_contains(self.map, sha, self.bits, self.k)
_total_steps += steps
return found
def create(name, expected, delaywrite=None, f=None, k=None):
"""Create and return a bloom filter for `expected` entries."""
- bits = int(math.floor(math.log(expected*MAX_BITS_EACH/8,2)))
+ bits = int(math.floor(math.log(expected * MAX_BITS_EACH // 8, 2)))
k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4)
if bits > MAX_BLOOM_BITS[k]:
log('bloom: warning, max bits exceeded, non-optimal\n')
bits = MAX_BLOOM_BITS[k]
debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k))
f = f or open(name, 'w+b')
- f.write('BLOM')
+ f.write(b'BLOM')
f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0))
assert(f.tell() == 16)
# NOTE: On some systems this will not extend+zerofill, but it does on
def clear_bloom(dir):
- unlink(os.path.join(dir, 'bup.bloom'))
+ unlink(os.path.join(dir, b'bup.bloom'))