X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fsplit-cmd.py;h=bb4cf2e619bbb178c9974bed4e166b9776af8ba8;hb=093752b42c5548028c6f84c67f7741b2321c512f;hp=e8df4d30c66551b2b0984967ddf5c619641a4167;hpb=92ce9401f1beeafbb58edf5b5f567a7f35377f85;p=bup.git diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py index e8df4d3..bb4cf2e 100755 --- a/cmd/split-cmd.py +++ b/cmd/split-cmd.py @@ -1,100 +1,227 @@ -#!/usr/bin/env python -import sys, time, struct +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import, division, print_function +from binascii import hexlify +import os, sys, time + from bup import hashsplit, git, options, client -from bup.helpers import * -from subprocess import PIPE +from bup.compat import argv_bytes, environ +from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num, + qprogress, reprogress, saved_errors, + valid_save_name, + parse_date_or_fatal) +from bup.io import byte_stream +from bup.pwdgrp import userfullname, username optspec = """ -bup split [-tcb] [-n name] [--bench] [filenames...] +bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...] +bup split -b OPTIONS [--git-ids | filenames...] +bup split --copy OPTIONS [--git-ids | filenames...] +bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...] -- -r,remote= remote repository path -b,blobs output a series of blob ids + Modes: +b,blobs output a series of blob ids. Implies --fanout=0. t,tree output a tree id c,commit output a commit id -n,name= name of backup set to update (if any) -N,noop don't actually save the data anywhere +n,name= save the result under the given name +noop split the input, but throw away the result +copy split the input, copy it to stdout, don't save to repo + Options: +r,remote= remote repository path +d,date= date for the commit (seconds since the epoch) q,quiet don't print progress messages v,verbose increase log output (can be used more than once) -copy just copy input to output, hashsplitting along the way +git-ids read a list of git object ids from stdin and split their contents +keep-boundaries don't let one chunk span two input files bench print benchmark timings to stderr max-pack-size= maximum bytes in a single pack max-pack-objects= maximum number of objects in a single pack -fanout= maximum number of blobs in a single tree +fanout= average number of blobs in a single tree +bwlimit= maximum bytes/sec to transmit to server +#,compress= set compression level to # (0-9, 9 is highest) [1] """ -o = options.Options('bup split', optspec) +handle_ctrl_c() + +o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) +if opt.name: opt.name = argv_bytes(opt.name) +if opt.remote: opt.remote = argv_bytes(opt.remote) +if opt.verbose is None: opt.verbose = 0 -git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): - o.fatal("use one or more of -b, -t, -c, -n, -N, --copy") -if (opt.noop or opt.copy) and (opt.blobs or opt.tree or - opt.commit or opt.name): - o.fatal('-N is incompatible with -b, -t, -c, -n') + o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") +if opt.copy and (opt.blobs or opt.tree): + o.fatal('--copy is incompatible with -b, -t') +if (opt.noop or opt.copy) and (opt.commit or opt.name): + o.fatal('--noop and --copy are incompatible with -c, -n') +if opt.blobs and (opt.tree or opt.commit or opt.name): + o.fatal('-b is incompatible with -t, -c, -n') +if extra and opt.git_ids: + o.fatal("don't provide filenames when using --git-ids") if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 + +max_pack_size = None if opt.max_pack_size: - hashsplit.max_pack_size = parse_num(opt.max_pack_size) + max_pack_size = parse_num(opt.max_pack_size) +max_pack_objects = None if opt.max_pack_objects: - hashsplit.max_pack_objects = parse_num(opt.max_pack_objects) + max_pack_objects = parse_num(opt.max_pack_objects) + if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: hashsplit.fanout = 0 +if opt.bwlimit: + client.bwlimit = parse_num(opt.bwlimit) +if opt.date: + date = parse_date_or_fatal(opt.date, o.fatal) +else: + date = time.time() + +total_bytes = 0 +def prog(filenum, nbytes): + global total_bytes + total_bytes += nbytes + if filenum > 0: + qprogress('Splitting: file #%d, %d kbytes\r' + % (filenum+1, total_bytes // 1024)) + else: + qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024)) + -is_reverse = os.environ.get('BUP_SERVER_REVERSE') +is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() -refname = opt.name and 'refs/heads/%s' % opt.name or None +if opt.name and not valid_save_name(opt.name): + o.fatal("'%r' is not a valid branch name." % opt.name) +refname = opt.name and b'refs/heads/%s' % opt.name or None + if opt.noop or opt.copy: - cli = w = oldref = None + cli = pack_writer = oldref = None elif opt.remote or is_reverse: + git.check_repo_or_die() cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None - w = cli.new_packwriter() + pack_writer = cli.new_packwriter(compression_level=opt.compress, + max_pack_size=max_pack_size, + max_pack_objects=max_pack_objects) else: + git.check_repo_or_die() cli = None oldref = refname and git.read_ref(refname) or None - w = git.PackWriter() + pack_writer = git.PackWriter(compression_level=opt.compress, + max_pack_size=max_pack_size, + max_pack_objects=max_pack_objects) + +input = byte_stream(sys.stdin) -files = extra and (open(fn) for fn in extra) or [sys.stdin] -if w: - shalist = hashsplit.split_to_shalist(w, files) - tree = w.new_tree(shalist) +if opt.git_ids: + # the input is actually a series of git object ids that we should retrieve + # and split. + # + # This is a bit messy, but basically it converts from a series of + # CatPipe.get() iterators into a series of file-type objects. + # It would be less ugly if either CatPipe.get() returned a file-like object + # (not very efficient), or split_to_shalist() expected an iterator instead + # of a file. + cp = git.CatPipe() + class IterToFile: + def __init__(self, it): + self.it = iter(it) + def read(self, size): + v = next(self.it, None) + return v or b'' + def read_ids(): + while 1: + line = input.readline() + if not line: + break + if line: + line = line.strip() + try: + it = cp.get(line.strip()) + next(it, None) # skip the file info + except KeyError as e: + add_error('error: %s' % e) + continue + yield IterToFile(it) + files = read_ids() +else: + # the input either comes from a series of files or from stdin. + files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input] + +if pack_writer: + new_blob = pack_writer.new_blob + new_tree = pack_writer.new_tree +elif opt.blobs or opt.tree: + # --noop mode + new_blob = lambda content: git.calc_hash(b'blob', content) + new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist)) + +sys.stdout.flush() +out = byte_stream(sys.stdout) + +if opt.blobs: + shalist = hashsplit.split_to_blobs(new_blob, files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + for (sha, size, level) in shalist: + out.write(hexlify(sha) + b'\n') + reprogress() +elif opt.tree or opt.commit or opt.name: + if opt.name: # insert dummy_name which may be used as a restore target + mode, sha = \ + hashsplit.split_to_blob_or_tree(new_blob, new_tree, files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode) + shalist = [(mode, splitfile_name, sha)] + else: + shalist = hashsplit.split_to_shalist( + new_blob, new_tree, files, + keep_boundaries=opt.keep_boundaries, progress=prog) + tree = new_tree(shalist) else: last = 0 - for (blob, bits) in hashsplit.hashsplit_iter(files): + it = hashsplit.hashsplit_iter(files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + for (blob, level) in it: hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) - megs = hashsplit.total_split/1024/1024 + megs = hashsplit.total_split // 1024 // 1024 if not opt.quiet and last != megs: - progress('%d Mbytes read\r' % megs) last = megs - progress('%d Mbytes read, done.\n' % megs) if opt.verbose: log('\n') -if opt.blobs: - for (mode,name,bin) in shalist: - print bin.encode('hex') if opt.tree: - print tree.encode('hex') + out.write(hexlify(tree) + b'\n') if opt.commit or opt.name: - msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv - ref = opt.name and ('refs/heads/%s' % opt.name) or None - commit = w.new_commit(oldref, tree, msg) + msg = b'bup split\n\nGenerated by command:\n%r\n' % sys.argv + ref = opt.name and (b'refs/heads/%s' % opt.name) or None + userline = b'%s <%s@%s>' % (userfullname(), username(), hostname()) + commit = pack_writer.new_commit(tree, oldref, userline, date, None, + userline, date, None, msg) if opt.commit: - print commit.encode('hex') + out.write(hexlify(commit) + b'\n') + +if pack_writer: + pack_writer.close() # must close before we can update the ref -if w: - w.close() # must close before we can update the ref - if opt.name: if cli: cli.update_ref(refname, commit, oldref) @@ -107,5 +234,9 @@ if cli: secs = time.time() - start_time size = hashsplit.total_split if opt.bench: - log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' - % (size/1024., secs, size/1024./secs)) + log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' + % (size / 1024, secs, size / 1024 / secs)) + +if saved_errors: + log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) + sys.exit(1)