X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?p=bup.git;a=blobdiff_plain;f=cmd%2Fbloom-cmd.py;h=0e7fbf4986facb51491c9539e4c6e5ed28b401fe;hp=47b4294ba7eec5fac62110b7a54a766c32886f9b;hb=c40b3dd5fd74e72024fbaad3daf5a958aefa1c54;hpb=f34a2a14ab9ab0ebc40db17f3cbe47da6da61814 diff --git a/cmd/bloom-cmd.py b/cmd/bloom-cmd.py index 47b4294..0e7fbf4 100755 --- a/cmd/bloom-cmd.py +++ b/cmd/bloom-cmd.py @@ -1,11 +1,21 @@ -#!/usr/bin/env python -import sys, glob, tempfile +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import +import glob, os, sys, tempfile + from bup import options, git, bloom -from bup.helpers import * +from bup.helpers import (add_error, debug1, handle_ctrl_c, log, progress, qprogress, + saved_errors) optspec = """ bup bloom [options...] -- +ruin ruin the specified bloom file (clearing the bitfield) f,force ignore existing bloom file and regenerate it from scratch o,output= output bloom filename (default: auto) d,dir= input directory to look for idx files (default: auto) @@ -13,6 +23,17 @@ k,hashes= number of hash functions to use (4 or 5) (default: auto) c,check= check the given .idx file against the bloom filter """ + +def ruin_bloom(bloomfilename): + rbloomfilename = git.repo_rel(bloomfilename) + if not os.path.exists(bloomfilename): + log("%s\n" % bloomfilename) + add_error("bloom: %s not found to ruin\n" % rbloomfilename) + return + b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) + b.map[16:16+2**b.bits] = '\0' * 2**b.bits + + def check_bloom(path, bloomfilename, idx): rbloomfilename = git.repo_rel(bloomfilename) ridx = git.repo_rel(idx) @@ -69,14 +90,14 @@ def do_bloom(path, outfilename): if b: if len(b) != rest_count: - log("bloom: size %d != idx total %d, regenerating\n" - % (len(b), rest_count)) + debug1("bloom: size %d != idx total %d, regenerating\n" + % (len(b), rest_count)) b = None elif (b.bits < bloom.MAX_BLOOM_BITS and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): - log("bloom: regenerating: adding %d entries gives " - "%.2f%% false positives.\n" - % (add_count, b.pfalse_positive(add_count))) + debug1("bloom: regenerating: adding %d entries gives " + "%.2f%% false positives.\n" + % (add_count, b.pfalse_positive(add_count))) b = None else: b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) @@ -89,7 +110,7 @@ def do_bloom(path, outfilename): msg = b is None and 'creating from' or 'adding' if not _first: _first = path dirprefix = (_first != path) and git.repo_rel(path)+': ' or '' - log('bloom: %s%s %d file%s (%d object%s).\n' + progress('bloom: %s%s %d file%s (%d object%s).\r' % (dirprefix, msg, len(add), len(add)!=1 and 's' or '', add_count, add_count!=1 and 's' or '')) @@ -97,8 +118,7 @@ def do_bloom(path, outfilename): tfname = None if b is None: tfname = os.path.join(path, 'bup.tmp.bloom') - tf = open(tfname, 'w+') - b = bloom.create(tfname, f=tf, expected=add_count, k=opt.k) + b = bloom.create(tfname, expected=add_count, k=opt.k) count = 0 icount = 0 for name in add: @@ -109,6 +129,10 @@ def do_bloom(path, outfilename): count += 1 icount += len(ix) + # Currently, there's an open file object for tfname inside b. + # Make sure it's closed before rename. + b.close() + if tfname: os.rename(tfname, outfilename) @@ -132,6 +156,8 @@ for path in paths: outfilename = opt.output or os.path.join(path, 'bup.bloom') if opt.check: check_bloom(path, outfilename, opt.check) + elif opt.ruin: + ruin_bloom(outfilename) else: do_bloom(path, outfilename) @@ -139,4 +165,4 @@ if saved_errors: log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors)) sys.exit(1) elif opt.check: - log('all tests passed.\n') + log('All tests passed.\n')