X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fbloom-cmd.py;h=d74cce2704567597fc1fce934032dfd025041e55;hb=refs%2Fheads%2Funused-variable-do_bloom;hp=033ad85f190c8b1a4bce787bf96dc7a7e57a3ada;hpb=49b04a52d00c99b26dac76b88e400ef546ed4389;p=bup.git diff --git a/cmd/bloom-cmd.py b/cmd/bloom-cmd.py index 033ad85..d74cce2 100755 --- a/cmd/bloom-cmd.py +++ b/cmd/bloom-cmd.py @@ -6,6 +6,7 @@ from bup.helpers import * optspec = """ bup bloom [options...] -- +ruin ruin the specified bloom file (clearing the bitfield) f,force ignore existing bloom file and regenerate it from scratch o,output= output bloom filename (default: auto) d,dir= input directory to look for idx files (default: auto) @@ -13,6 +14,17 @@ k,hashes= number of hash functions to use (4 or 5) (default: auto) c,check= check the given .idx file against the bloom filter """ + +def ruin_bloom(bloomfilename): + rbloomfilename = git.repo_rel(bloomfilename) + if not os.path.exists(bloomfilename): + log("%s\n" % bloomfilename) + add_error("bloom: %s not found to ruin\n" % rbloomfilename) + return + b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) + b.map[16:16+2**b.bits] = '\0' * 2**b.bits + + def check_bloom(path, bloomfilename, idx): rbloomfilename = git.repo_rel(bloomfilename) ridx = git.repo_rel(idx) @@ -69,14 +81,14 @@ def do_bloom(path, outfilename): if b: if len(b) != rest_count: - log("bloom: size %d != idx total %d, regenerating\n" - % (len(b), rest_count)) + debug1("bloom: size %d != idx total %d, regenerating\n" + % (len(b), rest_count)) b = None elif (b.bits < bloom.MAX_BLOOM_BITS and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): - log("bloom: regenerating: adding %d entries gives " - "%.2f%% false positives.\n" - % (add_count, b.pfalse_positive(add_count))) + debug1("bloom: regenerating: adding %d entries gives " + "%.2f%% false positives.\n" + % (add_count, b.pfalse_positive(add_count))) b = None else: b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) @@ -89,7 +101,7 @@ def do_bloom(path, outfilename): msg = b is None and 'creating from' or 'adding' if not _first: _first = path dirprefix = (_first != path) and git.repo_rel(path)+': ' or '' - log('bloom: %s%s %d file%s (%d object%s).\n' + progress('bloom: %s%s %d file%s (%d object%s).\n' % (dirprefix, msg, len(add), len(add)!=1 and 's' or '', add_count, add_count!=1 and 's' or '')) @@ -97,18 +109,20 @@ def do_bloom(path, outfilename): tfname = None if b is None: tfname = os.path.join(path, 'bup.tmp.bloom') - tf = open(tfname, 'w+') - b = bloom.ShaBloom.create(tfname, f=tf, expected=add_count, k=opt.k) - count = 0 + b = bloom.create(tfname, expected=add_count, k=opt.k) + icount = 0 for name in add: ix = git.open_idx(name) qprogress('bloom: writing %.2f%% (%d/%d objects)\r' % (icount*100.0/add_count, icount, add_count)) b.add_idx(ix) - count += 1 icount += len(ix) + # Currently, there's an open file object for tfname inside b. + # Make sure it's closed before rename. + b.close() + if tfname: os.rename(tfname, outfilename) @@ -132,6 +146,8 @@ for path in paths: outfilename = opt.output or os.path.join(path, 'bup.bloom') if opt.check: check_bloom(path, outfilename, opt.check) + elif opt.ruin: + ruin_bloom(outfilename) else: do_bloom(path, outfilename) @@ -139,4 +155,4 @@ if saved_errors: log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors)) sys.exit(1) elif opt.check: - log('all tests passed.\n') + log('All tests passed.\n')