X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fsplit-cmd.py;h=031a266acd3b8f42e8266a40de8b2c2fcfdada76;hb=c40b3dd5fd74e72024fbaad3daf5a958aefa1c54;hp=94fba53bda7793f76ef1cdec467c5fb96059a762;hpb=91eb5521829376b4ee4e5cb872965583336d7416;p=bup.git diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py index 94fba53..031a266 100755 --- a/cmd/split-cmd.py +++ b/cmd/split-cmd.py @@ -1,47 +1,73 @@ -#!/usr/bin/env python -import sys, time +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import +import os, sys, time + from bup import hashsplit, git, options, client -from bup.helpers import * +from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num, + qprogress, reprogress, saved_errors, + userfullname, username, valid_save_name, + parse_date_or_fatal) optspec = """ -bup split [-tcb] [-n name] [--bench] [filenames...] +bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...] +bup split -b OPTIONS [--git-ids | filenames...] +bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...] -- -r,remote= remote repository path -b,blobs output a series of blob ids + Modes: +b,blobs output a series of blob ids. Implies --fanout=0. t,tree output a tree id c,commit output a commit id -n,name= name of backup set to update (if any) +n,name= save the result under the given name +noop split the input, but throw away the result +copy split the input, copy it to stdout, don't save to repo + Options: +r,remote= remote repository path d,date= date for the commit (seconds since the epoch) q,quiet don't print progress messages v,verbose increase log output (can be used more than once) -noop don't actually save the data anywhere -copy just copy input to output, hashsplitting along the way +git-ids read a list of git object ids from stdin and split their contents +keep-boundaries don't let one chunk span two input files bench print benchmark timings to stderr max-pack-size= maximum bytes in a single pack max-pack-objects= maximum number of objects in a single pack -fanout= maximum number of blobs in a single tree +fanout= average number of blobs in a single tree bwlimit= maximum bytes/sec to transmit to server +#,compress= set compression level to # (0-9, 9 is highest) [1] """ -o = options.Options('bup split', optspec) +o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) handle_ctrl_c() git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): - o.fatal("use one or more of -b, -t, -c, -n, -N, --copy") -if (opt.noop or opt.copy) and (opt.blobs or opt.tree or + o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") +if (opt.noop or opt.copy) and (opt.blobs or opt.tree or opt.commit or opt.name): - o.fatal('-N and --copy are incompatible with -b, -t, -c, -n') + o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n') +if opt.blobs and (opt.tree or opt.commit or opt.name): + o.fatal('-b is incompatible with -t, -c, -n') +if extra and opt.git_ids: + o.fatal("don't provide filenames when using --git-ids") if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 + +max_pack_size = None if opt.max_pack_size: - hashsplit.max_pack_size = parse_num(opt.max_pack_size) + max_pack_size = parse_num(opt.max_pack_size) +max_pack_objects = None if opt.max_pack_objects: - hashsplit.max_pack_objects = parse_num(opt.max_pack_objects) + max_pack_objects = parse_num(opt.max_pack_objects) + if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: @@ -53,56 +79,120 @@ if opt.date: else: date = time.time() +total_bytes = 0 +def prog(filenum, nbytes): + global total_bytes + total_bytes += nbytes + if filenum > 0: + qprogress('Splitting: file #%d, %d kbytes\r' + % (filenum+1, total_bytes/1024)) + else: + qprogress('Splitting: %d kbytes\r' % (total_bytes/1024)) + is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() +if opt.name and not valid_save_name(opt.name): + o.fatal("'%s' is not a valid branch name." % opt.name) refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.noop or opt.copy: cli = pack_writer = oldref = None elif opt.remote or is_reverse: - if opt.remote and opt.remote.find(":") == -1: - o.fatal("--remote argument must contain a colon") - try: - cli = client.Client(opt.remote) - except client.ClientError: - o.fatal("server exited unexpectedly; see errors above") + cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None - pack_writer = cli.new_packwriter() + pack_writer = cli.new_packwriter(compression_level=opt.compress, + max_pack_size=max_pack_size, + max_pack_objects=max_pack_objects) else: cli = None oldref = refname and git.read_ref(refname) or None - pack_writer = git.PackWriter() + pack_writer = git.PackWriter(compression_level=opt.compress, + max_pack_size=max_pack_size, + max_pack_objects=max_pack_objects) -files = extra and (open(fn) for fn in extra) or [sys.stdin] -if pack_writer: - shalist = hashsplit.split_to_shalist(pack_writer, files) +if opt.git_ids: + # the input is actually a series of git object ids that we should retrieve + # and split. + # + # This is a bit messy, but basically it converts from a series of + # CatPipe.get() iterators into a series of file-type objects. + # It would be less ugly if either CatPipe.get() returned a file-like object + # (not very efficient), or split_to_shalist() expected an iterator instead + # of a file. + cp = git.CatPipe() + class IterToFile: + def __init__(self, it): + self.it = iter(it) + def read(self, size): + v = next(self.it, None) + return v or '' + def read_ids(): + while 1: + line = sys.stdin.readline() + if not line: + break + if line: + line = line.strip() + try: + it = cp.get(line.strip()) + next(it, None) # skip the file info + except KeyError as e: + add_error('error: %s' % e) + continue + yield IterToFile(it) + files = read_ids() +else: + # the input either comes from a series of files or from stdin. + files = extra and (open(fn) for fn in extra) or [sys.stdin] + +if pack_writer and opt.blobs: + shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + for (sha, size, level) in shalist: + print sha.encode('hex') + reprogress() +elif pack_writer: # tree or commit or name + if opt.name: # insert dummy_name which may be used as a restore target + mode, sha = \ + hashsplit.split_to_blob_or_tree(pack_writer.new_blob, + pack_writer.new_tree, + files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode) + shalist = [(mode, splitfile_name, sha)] + else: + shalist = hashsplit.split_to_shalist( + pack_writer.new_blob, pack_writer.new_tree, files, + keep_boundaries=opt.keep_boundaries, progress=prog) tree = pack_writer.new_tree(shalist) else: last = 0 - for (blob, bits) in hashsplit.hashsplit_iter(files): + it = hashsplit.hashsplit_iter(files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + for (blob, level) in it: hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) megs = hashsplit.total_split/1024/1024 if not opt.quiet and last != megs: - progress('%d Mbytes read\r' % megs) last = megs - progress('%d Mbytes read, done.\n' % megs) if opt.verbose: log('\n') -if opt.blobs: - for (mode,name,bin) in shalist: - print bin.encode('hex') if opt.tree: print tree.encode('hex') if opt.commit or opt.name: - msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv + msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None - commit = pack_writer.new_commit(oldref, tree, date, msg) + userline = '%s <%s@%s>' % (userfullname(), username(), hostname()) + commit = pack_writer.new_commit(tree, oldref, userline, date, None, + userline, date, None, msg) if opt.commit: print commit.encode('hex') @@ -121,5 +211,9 @@ if cli: secs = time.time() - start_time size = hashsplit.total_split if opt.bench: - log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' + log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' % (size/1024., secs, size/1024./secs)) + +if saved_errors: + log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) + sys.exit(1)