From d1df8e5eae825b5f45d80b836f1f42dc80e1d657 Mon Sep 17 00:00:00 2001 From: Robert Evans Date: Sun, 29 Apr 2018 06:47:30 -0400 Subject: [PATCH] Add bup split --noop <--blobs|--tree> This prints the resulting id without storing in the repo. Signed-off-by: Robert Evans [rlb@defaultvalue.org: remove trailing period from commit summary] Reviewed-by: Rob Browning Tested-by: Rob Browning --- Documentation/bup-split.md | 12 ++++++++---- cmd/split-cmd.py | 32 ++++++++++++++++++++------------ t/test-split-join.sh | 8 ++++++++ 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/Documentation/bup-split.md b/Documentation/bup-split.md index 22d63cb..89e4f03 100644 --- a/Documentation/bup-split.md +++ b/Documentation/bup-split.md @@ -12,7 +12,9 @@ bup split \[-t\] \[-c\] \[-n *name*\] COMMON\_OPTIONS bup split -b COMMON\_OPTIONS -bup split \<--noop \[--copy\]|--copy\> COMMON\_OPTIONS +bup split --copy COMMON\_OPTIONS + +bup split --noop \<\[-t|-b\]\> COMMON\_OPTIONS COMMON\_OPTIONS ~ \[-r *host*:*path*\] \[-v\] \[-q\] \[-d *seconds-since-epoch*\] \[\--bench\] @@ -76,9 +78,11 @@ being the most likely choice. \--noop : read the data and split it into blocks based on the "bupsplit" - rolling checksum algorithm, but don't do anything with the blocks. - This is mostly useful for benchmarking. Incompatible with -n, -t, - -c, and -b. + rolling checksum algorithm, but don't store anything in the repo. + Can be combined with -b or -t to compute (but not store) the git + blobs or tree ids for the dataset. This is mostly useful for + benchmarking and validating the bupsplit algorithm. Incompatible + with -n and -c. \--copy : like `--noop`, but also write the data to stdout. This can be diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py index d3ac64b..021083a 100755 --- a/cmd/split-cmd.py +++ b/cmd/split-cmd.py @@ -18,7 +18,8 @@ from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num, optspec = """ bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...] bup split -b OPTIONS [--git-ids | filenames...] -bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...] +bup split --copy OPTIONS [--git-ids | filenames...] +bup split --noop [<-b|-t>] OPTIONS [--git-ids | filenames...] -- Modes: b,blobs output a series of blob ids. Implies --fanout=0. @@ -49,9 +50,10 @@ git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") -if (opt.noop or opt.copy) and (opt.blobs or opt.tree or - opt.commit or opt.name): - o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n') +if opt.copy and (opt.blobs or opt.tree): + o.fatal('--copy is incompatible with -b, -t') +if (opt.noop or opt.copy) and (opt.commit or opt.name): + o.fatal('--noop and --copy are incompatible with -c, -n') if opt.blobs and (opt.tree or opt.commit or opt.name): o.fatal('-b is incompatible with -t, -c, -n') if extra and opt.git_ids: @@ -148,28 +150,34 @@ else: # the input either comes from a series of files or from stdin. files = extra and (open(fn) for fn in extra) or [sys.stdin] -if pack_writer and opt.blobs: - shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files, +if pack_writer: + new_blob = pack_writer.new_blob + new_tree = pack_writer.new_tree +elif opt.blobs or opt.tree: + # --noop mode + new_blob = lambda content: git.calc_hash('blob', content) + new_tree = lambda shalist: git.calc_hash('tree', git.tree_encode(shalist)) + +if opt.blobs: + shalist = hashsplit.split_to_blobs(new_blob, files, keep_boundaries=opt.keep_boundaries, progress=prog) for (sha, size, level) in shalist: print sha.encode('hex') reprogress() -elif pack_writer: # tree or commit or name +elif opt.tree or opt.commit or opt.name: if opt.name: # insert dummy_name which may be used as a restore target mode, sha = \ - hashsplit.split_to_blob_or_tree(pack_writer.new_blob, - pack_writer.new_tree, - files, + hashsplit.split_to_blob_or_tree(new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode) shalist = [(mode, splitfile_name, sha)] else: shalist = hashsplit.split_to_shalist( - pack_writer.new_blob, pack_writer.new_tree, files, + new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) - tree = pack_writer.new_tree(shalist) + tree = new_tree(shalist) else: last = 0 it = hashsplit.hashsplit_iter(files, diff --git a/t/test-split-join.sh b/t/test-split-join.sh index 8e3cc4d..e0627a8 100755 --- a/t/test-split-join.sh +++ b/t/test-split-join.sh @@ -15,6 +15,12 @@ WVPASS cd "$tmpdir" WVPASS bup init +WVSTART "split --noop" +WVPASS bup split --noop <"$top/t/testfile1" +WVPASS bup split --noop -b <"$top/t/testfile1" >tags1n.tmp +WVPASS bup split --noop -t <"$top/t/testfile2" >tags2tn.tmp +WVPASSEQ $(find "$BUP_DIR/objects/pack" -name '*.pack' | wc -l) 0 + WVSTART "split" WVPASS echo a >a.tmp WVPASS echo b >b.tmp @@ -66,6 +72,8 @@ WVPASS bup ls /lslr/latest WVPASS bup ls /lslr/latest/ #WVPASS bup ls /lslr/1971-01-01 # all dates always exist WVFAIL diff -u tags1.tmp tags2.tmp +WVPASS diff -u tags1.tmp tags1n.tmp +WVPASS diff -u tags2t.tmp tags2tn.tmp # fanout must be different from non-fanout WVFAIL diff tags2t.tmp tags2tf.tmp -- 2.39.2