From a90f2ef771a19fcaf514aac9e7160674f1536eb0 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 17 Feb 2011 03:10:23 -0800 Subject: [PATCH] cmd/split: fixup progress message, and print -b output incrementally. As a side effect, you can no longer combine -b with -t, -c, or -n. But that was kind of a pointless thing to do anyway, because it silently enforced --fanout=0, which is almost certainly not what you wanted, precisely if you were using -t, -c, or -n. Signed-off-by: Avery Pennarun --- cmd/split-cmd.py | 30 ++++++++++++++++++------------ lib/bup/hashsplit.py | 5 +++-- lib/bup/helpers.py | 16 +++++++++++++++- t/test.sh | 2 +- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py index e016d1e..363896f 100755 --- a/cmd/split-cmd.py +++ b/cmd/split-cmd.py @@ -5,20 +5,22 @@ from bup.helpers import * optspec = """ -bup split [-tcb] [-n name] [--bench] [filenames...] +bup split <-t|-c|-b|-n name|--copy|--noop> [--bench] [filenames...] -- -r,remote= remote repository path + Modes: b,blobs output a series of blob ids t,tree output a tree id c,commit output a commit id -n,name= name of backup set to update (if any) +n,name= save the result under the given name +noop split the input, but throw away the result +copy split the input, copy it to stdout, don't save to repo + Options: +r,remote= remote repository path d,date= date for the commit (seconds since the epoch) q,quiet don't print progress messages v,verbose increase log output (can be used more than once) git-ids read a list of git object ids from stdin and split their contents keep-boundaries don't let one chunk span two input files -noop don't actually save the data anywhere -copy just copy input to output, hashsplitting along the way bench print benchmark timings to stderr max-pack-size= maximum bytes in a single pack max-pack-objects= maximum number of objects in a single pack @@ -36,6 +38,8 @@ if not (opt.blobs or opt.tree or opt.commit or opt.name or if (opt.noop or opt.copy) and (opt.blobs or opt.tree or opt.commit or opt.name): o.fatal('-N and --copy are incompatible with -b, -t, -c, -n') +if opt.blobs and (opt.tree or opt.commit or opt.name): + o.fatal('-b is incompatible with -t, -c, -n') if extra and opt.git_ids: o.fatal("don't provide filenames when using --git-ids") @@ -123,7 +127,14 @@ else: # the input either comes from a series of files or from stdin. files = extra and (open(fn) for fn in extra) or [sys.stdin] -if pack_writer: +if pack_writer and opt.blobs: + shalist = hashsplit.split_to_blobs(pack_writer, files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + for (sha, size, level) in shalist: + print sha.encode('hex') + reprogress() +elif pack_writer: # tree or commit or name shalist = hashsplit.split_to_shalist(pack_writer, files, keep_boundaries=opt.keep_boundaries, progress=prog) @@ -139,15 +150,10 @@ else: sys.stdout.write(str(blob)) megs = hashsplit.total_split/1024/1024 if not opt.quiet and last != megs: - progress('%d Mbytes read\r' % megs) last = megs - progress('%d Mbytes read, done.\n' % megs) if opt.verbose: log('\n') -if opt.blobs: - for (mode,name,bin) in shalist: - print bin.encode('hex') if opt.tree: print tree.encode('hex') if opt.commit or opt.name: @@ -172,7 +178,7 @@ if cli: secs = time.time() - start_time size = hashsplit.total_split if opt.bench: - log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' + log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' % (size/1024., secs, size/1024./secs)) if saved_errors: diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index f9d5a4d..439c63d 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -105,7 +105,7 @@ def hashsplit_iter(files, keep_boundaries, progress): total_split = 0 -def _split_to_blobs(w, files, keep_boundaries, progress): +def split_to_blobs(w, files, keep_boundaries, progress): global total_split for (blob, level) in hashsplit_iter(files, keep_boundaries, progress): sha = w.new_blob(blob) @@ -143,7 +143,8 @@ def _squish(w, stacks, n): def split_to_shalist(w, files, keep_boundaries, progress=None): - sl = _split_to_blobs(w, files, keep_boundaries, progress) + sl = split_to_blobs(w, files, keep_boundaries, progress) + assert(fanout != 0) if not fanout: shal = [] for (sha,size,level) in sl: diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index 5b00283..ed976bf 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -67,16 +67,20 @@ def debug2(s): istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY')) +_last_progress = '' def progress(s): """Calls log() if stderr is a TTY. Does nothing otherwise.""" + global _last_progress if istty: log(s) + _last_progress = s def qprogress(s): """Calls progress() only if we haven't printed progress in a while. - This avoids overloading the stderr buffer with excess junk.""" + This avoids overloading the stderr buffer with excess junk. + """ global _last_prog now = time.time() if now - _last_prog > 0.1: @@ -84,6 +88,16 @@ def qprogress(s): _last_prog = now +def reprogress(): + """Calls progress() to redisplay the most recent progress message. + + Useful after you've printed some other message that wipes out the + progress line. + """ + if _last_progress and _last_progress.endswith('\r'): + progress(_last_progress) + + def mkdirp(d, mode=None): """Recursively create directories on path 'd'. diff --git a/t/test.sh b/t/test.sh index 7186710..9b5e55a 100755 --- a/t/test.sh +++ b/t/test.sh @@ -208,7 +208,7 @@ WVSTART "save/git-fsck" #git prune (cd "$TOP/t/sampledata" && WVPASS bup save -vvn master /) || WVFAIL n=$(git fsck --full --strict 2>&1 | - egrep -v 'dangling (commit|tree)' | + egrep -v 'dangling (commit|tree|blob)' | tee -a /dev/stderr | wc -l) WVPASS [ "$n" -eq 0 ] -- 2.39.2