From b86fb21ae670f8d1dc42d26bd69252cb9cc0ab7f Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sun, 12 Jan 2020 15:33:15 -0600 Subject: [PATCH] bloom: fix logic controlling bloom regeneration Add missing MAX_BLOOM_BITS index in the logic in bup bloom that determines whether or not we should regenerate the filter. We never noticed because: $ python2 >>> 0 < {1 : 2} True $ python3 >>> 0 < {1 : 2} Traceback (most recent call last): File "", line 1, in TypeError: '<' not supported between instances of 'int' and 'dict' Also regnerate if the -k value differs from the existing filter's k. Thanks to Johannes Berg for pointing out some nontrivial problems in an earlier version. Signed-off-by: Rob Browning Tested-by: Rob Browning --- cmd/bloom-cmd.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cmd/bloom-cmd.py b/cmd/bloom-cmd.py index 3d195ca..09ed081 100755 --- a/cmd/bloom-cmd.py +++ b/cmd/bloom-cmd.py @@ -59,8 +59,9 @@ def check_bloom(path, bloomfilename, idx): _first = None -def do_bloom(path, outfilename): +def do_bloom(path, outfilename, k): global _first + assert k in (None, 4, 5) b = None if os.path.exists(outfilename) and not opt.force: b = bloom.ShaBloom(outfilename) @@ -82,7 +83,6 @@ def do_bloom(path, outfilename): else: add.append(name) add_count += len(ix) - total = add_count + rest_count if not add: debug1("bloom: nothing to do.\n") @@ -93,7 +93,11 @@ def do_bloom(path, outfilename): debug1("bloom: size %d != idx total %d, regenerating\n" % (len(b), rest_count)) b = None - elif (b.bits < bloom.MAX_BLOOM_BITS and + elif k is not None and k != b.k: + debug1("bloom: new k %d != existing k %d, regenerating\n" + % (k, b.k)) + b = None + elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): debug1("bloom: regenerating: adding %d entries gives " "%.2f%% false positives.\n" @@ -118,7 +122,7 @@ def do_bloom(path, outfilename): tfname = None if b is None: tfname = os.path.join(path, 'bup.tmp.bloom') - b = bloom.create(tfname, expected=add_count, k=opt.k) + b = bloom.create(tfname, expected=add_count, k=k) count = 0 icount = 0 for name in add: @@ -159,7 +163,7 @@ for path in paths: elif opt.ruin: ruin_bloom(outfilename) else: - do_bloom(path, outfilename) + do_bloom(path, outfilename, opt.k) if saved_errors: log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors)) -- 2.39.2