From 23bb587af3ed059edc90af71d8a2e2a79b67b956 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 16 Feb 2011 16:01:30 -0800 Subject: [PATCH] cmd/{bloom,midx}: clean up progress messages. bloom was printing messages more often than necessary on fast computers, which could overwhelm the stderr output a bit. Also change to a percentage + number of objects, like midx and save do, rather than just printing the current file number. And don't print so many lines of output by default: now if bloom isn't end up doing anything, it doesn't print any output. And if it does do something, it prints only one output line per file. bloom and midx now both print the name of the directory where they're creating their output files; if you have multiple directories in .bup/index-cache, it was a little confusing to see them doing multiple runs for no apparent reason. Signed-off-by: Avery Pennarun --- cmd/bloom-cmd.py | 27 ++++++++++++++++++--------- cmd/midx-cmd.py | 8 ++++++-- lib/bup/git.py | 12 ++++++++++++ lib/bup/helpers.py | 31 +++++++++++++++++++++++-------- 4 files changed, 59 insertions(+), 19 deletions(-) diff --git a/cmd/bloom-cmd.py b/cmd/bloom-cmd.py index 59f84eb..ed148fc 100755 --- a/cmd/bloom-cmd.py +++ b/cmd/bloom-cmd.py @@ -11,7 +11,9 @@ d,dir= input directory to look for idx files (default: auto) k,hashes= number of hash functions to use (4 or 5) (default: auto) """ +_first = None def do_bloom(path, outfilename): + global _first if not outfilename: assert(path) outfilename = os.path.join(path, 'bup.bloom') @@ -27,7 +29,8 @@ def do_bloom(path, outfilename): rest = [] add_count = 0 rest_count = 0 - for name in glob.glob('%s/*.idx' % path): + for i,name in enumerate(glob.glob('%s/*.idx' % path)): + progress('bloom: counting: %d\r' % i) ix = git.open_idx(name) ixbase = os.path.basename(name) if b and (ixbase in b.idxnames): @@ -39,7 +42,7 @@ def do_bloom(path, outfilename): total = add_count + rest_count if not add: - log("bloom: Nothing to do\n") + debug1("bloom: nothing to do.\n") return if b: @@ -49,7 +52,8 @@ def do_bloom(path, outfilename): b = None elif (b.bits < git.MAX_BLOOM_BITS and b.pfalse_positive(add_count) > git.MAX_PFALSE_POSITIVE): - log("bloom: %d more entries => %.2f false positive, regenerating\n" + log("bloom: regenerating: adding %d entries gives " + "%.2f%% false positives.\n" % (add_count, b.pfalse_positive(add_count))) b = None else: @@ -61,10 +65,12 @@ def do_bloom(path, outfilename): del rest_count msg = b is None and 'creating from' or 'adding' - log('bloom: %s %d file%s (%d object%s).\n' % (msg, len(add), - len(add)!=1 and 's' or '', - add_count, - add_count!=1 and 's' or '')) + if not _first: _first = path + dirprefix = (_first != path) and git.repo_rel(path)+': ' or '' + log('bloom: %s%s %d file%s (%d object%s).\n' + % (dirprefix, msg, + len(add), len(add)!=1 and 's' or '', + add_count, add_count!=1 and 's' or '')) tfname = None if b is None: @@ -72,12 +78,14 @@ def do_bloom(path, outfilename): tf = open(tfname, 'w+') b = git.ShaBloom.create(tfname, f=tf, expected=add_count, k=opt.k) count = 0 + icount = 0 for name in add: ix = git.open_idx(name) - progress('Writing bloom: %d/%d\r' % (count, len(add))) + qprogress('bloom: writing %.2f%% (%d/%d objects)\r' + % (icount*100.0/add_count, icount, add_count)) b.add_idx(ix) count += 1 - log('Writing bloom: %d/%d, done.\n' % (count, len(add))) + icount += len(ix) if tfname: os.rename(tfname, outfilename) @@ -98,4 +106,5 @@ git.check_repo_or_die() paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: + debug1('bloom: scanning %s\n' % path) do_bloom(path, opt.output) diff --git a/cmd/midx-cmd.py b/cmd/midx-cmd.py index b4b37cf..5885d05 100755 --- a/cmd/midx-cmd.py +++ b/cmd/midx-cmd.py @@ -33,7 +33,9 @@ def max_files(): merge_into = _helpers.merge_into +_first = None def _do_midx(outdir, outfilename, infilenames, prefixstr): + global _first if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() @@ -56,8 +58,10 @@ def _do_midx(outdir, outfilename, infilenames, prefixstr): total += len(ix) inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20]))) - log('midx: %screating from %d files (%d objects).\n' - % (prefixstr, len(infilenames), total)) + if not _first: _first = outdir + dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or '' + log('midx: %s%screating from %d files (%d objects).\n' + % (dirprefix, prefixstr, len(infilenames), total)) if (not opt.force and (total < 1024 and len(infilenames) < 3)) \ or len(infilenames) < 2 \ or (opt.force and not total): diff --git a/lib/bup/git.py b/lib/bup/git.py index c503e49..659164f 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -125,6 +125,18 @@ def repo(sub = ''): return os.path.join(repodir, sub) +def repo_rel(path): + full = os.path.abspath(path) + fullrepo = os.path.abspath(repo('')) + if not fullrepo.endswith('/'): + fullrepo += '/' + if full.startswith(fullrepo): + path = full[len(fullrepo):] + if path.startswith('index-cache/'): + path = path[len('index-cache/'):] + return path + + def all_packdirs(): paths = [repo('objects/pack')] paths += glob.glob(repo('index-cache/*/.')) diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index e7c2f81..5b00283 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -1,7 +1,7 @@ """Helper functions and classes for bup.""" import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct -import heapq, operator +import heapq, operator, time from bup import _version, _helpers # This function should really be in helpers, not in bup.options. But we @@ -46,10 +46,14 @@ def _hard_write(fd, buf): assert(sz >= 0) buf = buf[sz:] + +_last_prog = 0 def log(s): """Print a log message to stderr.""" + global _last_prog sys.stdout.flush() _hard_write(sys.stderr.fileno(), s) + _last_prog = 0 def debug1(s): @@ -62,6 +66,24 @@ def debug2(s): log(s) +istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY')) +def progress(s): + """Calls log() if stderr is a TTY. Does nothing otherwise.""" + if istty: + log(s) + + +def qprogress(s): + """Calls progress() only if we haven't printed progress in a while. + + This avoids overloading the stderr buffer with excess junk.""" + global _last_prog + now = time.time() + if now - _last_prog > 0.1: + progress(s) + _last_prog = now + + def mkdirp(d, mode=None): """Recursively create directories on path 'd'. @@ -535,13 +557,6 @@ def add_error(e): log('%-70s\n' % e) -istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY')) -def progress(s): - """Calls log(s) if stderr is a TTY. Does nothing otherwise.""" - if istty: - log(s) - - def handle_ctrl_c(): """Replace the default exception handler for KeyboardInterrupt (Ctrl-C). -- 2.39.2