X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fbloom-cmd.py;h=d7537cafc2b9129b6339384107d58b95510a6e91;hb=e20bddc73e89e643b3f5226219137e8e6ce82cdb;hp=51e594bdf11784c24c63ac60714b7f9b85d0017b;hpb=1baade32f5fed243782d2be9677491d0d3396d19;p=bup.git diff --git a/cmd/bloom-cmd.py b/cmd/bloom-cmd.py index 51e594b..d7537ca 100755 --- a/cmd/bloom-cmd.py +++ b/cmd/bloom-cmd.py @@ -1,7 +1,19 @@ -#!/usr/bin/env python -import sys, glob, tempfile +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import +import glob, os, sys, tempfile + from bup import options, git, bloom -from bup.helpers import * +from bup.compat import argv_bytes, hexstr +from bup.helpers import (add_error, debug1, handle_ctrl_c, log, progress, qprogress, + saved_errors) +from bup.io import path_msg + optspec = """ bup bloom [options...] @@ -18,40 +30,40 @@ c,check= check the given .idx file against the bloom filter def ruin_bloom(bloomfilename): rbloomfilename = git.repo_rel(bloomfilename) if not os.path.exists(bloomfilename): - log("%s\n" % bloomfilename) - add_error("bloom: %s not found to ruin\n" % rbloomfilename) + log(path_msg(bloomfilename) + '\n') + add_error('bloom: %s not found to ruin\n' % path_msg(rbloomfilename)) return b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) - b.map[16:16+2**b.bits] = '\0' * 2**b.bits + b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits def check_bloom(path, bloomfilename, idx): rbloomfilename = git.repo_rel(bloomfilename) ridx = git.repo_rel(idx) if not os.path.exists(bloomfilename): - log("bloom: %s: does not exist.\n" % rbloomfilename) + log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename)) return b = bloom.ShaBloom(bloomfilename) if not b.valid(): - add_error("bloom: %r is invalid.\n" % rbloomfilename) + add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename)) return base = os.path.basename(idx) if base not in b.idxnames: - log("bloom: %s does not contain the idx.\n" % rbloomfilename) + log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename)) return if base == idx: idx = os.path.join(path, idx) - log("bloom: bloom file: %s\n" % rbloomfilename) - log("bloom: checking %s\n" % ridx) + log('bloom: bloom file: %s\n' % path_msg(rbloomfilename)) + log('bloom: checking %s\n' % path_msg(ridx)) for objsha in git.open_idx(idx): if not b.exists(objsha): - add_error("bloom: ERROR: object %s missing" - % str(objsha).encode('hex')) + add_error('bloom: ERROR: object %s missing' % hexstr(objsha)) _first = None -def do_bloom(path, outfilename): +def do_bloom(path, outfilename, k): global _first + assert k in (None, 4, 5) b = None if os.path.exists(outfilename) and not opt.force: b = bloom.ShaBloom(outfilename) @@ -63,7 +75,7 @@ def do_bloom(path, outfilename): rest = [] add_count = 0 rest_count = 0 - for i,name in enumerate(glob.glob('%s/*.idx' % path)): + for i, name in enumerate(glob.glob(b'%s/*.idx' % path)): progress('bloom: counting: %d\r' % i) ix = git.open_idx(name) ixbase = os.path.basename(name) @@ -73,7 +85,6 @@ def do_bloom(path, outfilename): else: add.append(name) add_count += len(ix) - total = add_count + rest_count if not add: debug1("bloom: nothing to do.\n") @@ -84,7 +95,11 @@ def do_bloom(path, outfilename): debug1("bloom: size %d != idx total %d, regenerating\n" % (len(b), rest_count)) b = None - elif (b.bits < bloom.MAX_BLOOM_BITS and + elif k is not None and k != b.k: + debug1("bloom: new k %d != existing k %d, regenerating\n" + % (k, b.k)) + b = None + elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): debug1("bloom: regenerating: adding %d entries gives " "%.2f%% false positives.\n" @@ -100,17 +115,16 @@ def do_bloom(path, outfilename): msg = b is None and 'creating from' or 'adding' if not _first: _first = path - dirprefix = (_first != path) and git.repo_rel(path)+': ' or '' - progress('bloom: %s%s %d file%s (%d object%s).\n' - % (dirprefix, msg, + dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b'' + progress('bloom: %s%s %d file%s (%d object%s).\r' + % (path_msg(dirprefix), msg, len(add), len(add)!=1 and 's' or '', add_count, add_count!=1 and 's' or '')) tfname = None if b is None: - tfname = os.path.join(path, 'bup.tmp.bloom') - tf = open(tfname, 'w+') - b = bloom.create(tfname, f=tf, expected=add_count, k=opt.k) + tfname = os.path.join(path, b'bup.tmp.bloom') + b = bloom.create(tfname, expected=add_count, k=k) count = 0 icount = 0 for name in add: @@ -121,6 +135,10 @@ def do_bloom(path, outfilename): count += 1 icount += len(ix) + # Currently, there's an open file object for tfname inside b. + # Make sure it's closed before rename. + b.close() + if tfname: os.rename(tfname, outfilename) @@ -133,21 +151,25 @@ o = options.Options(optspec) if extra: o.fatal('no positional parameters expected') -git.check_repo_or_die() - if not opt.check and opt.k and opt.k not in (4,5): o.fatal('only k values of 4 and 5 are supported') -paths = opt.dir and [opt.dir] or git.all_packdirs() +if opt.check: + opt.check = argv_bytes(opt.check) + +git.check_repo_or_die() + +output = argv_bytes(opt.output) if opt.output else None +paths = opt.dir and [argv_bytes(opt.dir)] or git.all_packdirs() for path in paths: - debug1('bloom: scanning %s\n' % path) - outfilename = opt.output or os.path.join(path, 'bup.bloom') + debug1('bloom: scanning %s\n' % path_msg(path)) + outfilename = output or os.path.join(path, b'bup.bloom') if opt.check: check_bloom(path, outfilename, opt.check) elif opt.ruin: ruin_bloom(outfilename) else: - do_bloom(path, outfilename) + do_bloom(path, outfilename, opt.k) if saved_errors: log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors))