X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Fbloom.py;h=54da4b8cc8fed08971f17a56a08da5352354ca77;hb=c40b3dd5fd74e72024fbaad3daf5a958aefa1c54;hp=5974ee28a70da2d31716cb9f6e2583a2181e0ceb;hpb=50382bfbb56ebbf2ee37e24bd85c94601d1e18e3;p=bup.git diff --git a/lib/bup/bloom.py b/lib/bup/bloom.py index 5974ee2..54da4b8 100644 --- a/lib/bup/bloom.py +++ b/lib/bup/bloom.py @@ -79,9 +79,14 @@ None of this tells us what max_pfalse_positive to choose. Brandon Low 2011-02-04 """ -import sys, os, math, mmap + +from __future__ import absolute_import +import sys, os, math, mmap, struct + from bup import _helpers -from bup.helpers import * +from bup.helpers import (debug1, debug2, log, mmap_read, mmap_readwrite, + mmap_readwrite_private, unlink) + BLOOM_VERSION = 2 MAX_BITS_EACH = 32 # Kinda arbitrary, but 4 bytes per entry is pretty big @@ -94,6 +99,9 @@ _total_steps = 0 bloom_contains = _helpers.bloom_contains bloom_add = _helpers.bloom_add +# FIXME: check bloom create() and ShaBloom handling/ownership of "f". +# The ownership semantics should be clarified since the caller needs +# to know who is responsible for closing it. class ShaBloom: """Wrapper which contains data from multiple index files. """ @@ -190,11 +198,15 @@ class ShaBloom: k = self.k return 100*(1-math.exp(-k*float(n)/m))**k - def add_idx(self, ix): - """Add the object to the filter, return current pfalse_positive.""" + def add(self, ids): + """Add the hashes in ids (packed binary 20-bytes) to the filter.""" if not self.map: raise Exception("Cannot add to closed bloom") - self.entries += bloom_add(self.map, ix.shatable, self.bits, self.k) + self.entries += bloom_add(self.map, ids, self.bits, self.k) + + def add_idx(self, ix): + """Add the object to the filter.""" + self.add(ix.shatable) self.idxnames.append(os.path.basename(ix.name)) def exists(self, sha): @@ -237,3 +249,6 @@ def create(name, expected, delaywrite=None, f=None, k=None): expected = 1 return ShaBloom(name, f=f, readwrite=True, expected=expected) + +def clear_bloom(dir): + unlink(os.path.join(dir, 'bup.bloom'))