X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fsplit-cmd.py;h=021083a2f1762b87b24d7225b83fd9411c212f8d;hb=d1df8e5eae825b5f45d80b836f1f42dc80e1d657;hp=363896f792381f3b85d34cc3ab98339af0458d2d;hpb=a90f2ef771a19fcaf514aac9e7160674f1536eb0;p=bup.git diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py index 363896f..021083a 100755 --- a/cmd/split-cmd.py +++ b/cmd/split-cmd.py @@ -1,14 +1,28 @@ -#!/usr/bin/env python -import sys, time +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import +import os, sys, time + from bup import hashsplit, git, options, client -from bup.helpers import * +from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num, + qprogress, reprogress, saved_errors, + userfullname, username, valid_save_name, + parse_date_or_fatal) optspec = """ -bup split <-t|-c|-b|-n name|--copy|--noop> [--bench] [filenames...] +bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...] +bup split -b OPTIONS [--git-ids | filenames...] +bup split --copy OPTIONS [--git-ids | filenames...] +bup split --noop [<-b|-t>] OPTIONS [--git-ids | filenames...] -- Modes: -b,blobs output a series of blob ids +b,blobs output a series of blob ids. Implies --fanout=0. t,tree output a tree id c,commit output a commit id n,name= save the result under the given name @@ -24,8 +38,9 @@ keep-boundaries don't let one chunk span two input files bench print benchmark timings to stderr max-pack-size= maximum bytes in a single pack max-pack-objects= maximum number of objects in a single pack -fanout= maximum number of blobs in a single tree +fanout= average number of blobs in a single tree bwlimit= maximum bytes/sec to transmit to server +#,compress= set compression level to # (0-9, 9 is highest) [1] """ o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) @@ -34,10 +49,11 @@ handle_ctrl_c() git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): - o.fatal("use one or more of -b, -t, -c, -n, -N, --copy") -if (opt.noop or opt.copy) and (opt.blobs or opt.tree or - opt.commit or opt.name): - o.fatal('-N and --copy are incompatible with -b, -t, -c, -n') + o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") +if opt.copy and (opt.blobs or opt.tree): + o.fatal('--copy is incompatible with -b, -t') +if (opt.noop or opt.copy) and (opt.commit or opt.name): + o.fatal('--noop and --copy are incompatible with -c, -n') if opt.blobs and (opt.tree or opt.commit or opt.name): o.fatal('-b is incompatible with -t, -c, -n') if extra and opt.git_ids: @@ -46,10 +62,14 @@ if extra and opt.git_ids: if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 + +max_pack_size = None if opt.max_pack_size: - hashsplit.max_pack_size = parse_num(opt.max_pack_size) + max_pack_size = parse_num(opt.max_pack_size) +max_pack_objects = None if opt.max_pack_objects: - hashsplit.max_pack_objects = parse_num(opt.max_pack_objects) + max_pack_objects = parse_num(opt.max_pack_objects) + if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: @@ -61,7 +81,6 @@ if opt.date: else: date = time.time() - total_bytes = 0 def prog(filenum, nbytes): global total_bytes @@ -78,7 +97,7 @@ if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() -if opt.name and opt.name.startswith('.'): +if opt.name and not valid_save_name(opt.name): o.fatal("'%s' is not a valid branch name." % opt.name) refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.noop or opt.copy: @@ -86,11 +105,15 @@ if opt.noop or opt.copy: elif opt.remote or is_reverse: cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None - pack_writer = cli.new_packwriter() + pack_writer = cli.new_packwriter(compression_level=opt.compress, + max_pack_size=max_pack_size, + max_pack_objects=max_pack_objects) else: cli = None oldref = refname and git.read_ref(refname) or None - pack_writer = git.PackWriter() + pack_writer = git.PackWriter(compression_level=opt.compress, + max_pack_size=max_pack_size, + max_pack_objects=max_pack_objects) if opt.git_ids: # the input is actually a series of git object ids that we should retrieve @@ -106,7 +129,7 @@ if opt.git_ids: def __init__(self, it): self.it = iter(it) def read(self, size): - v = next(self.it) + v = next(self.it, None) return v or '' def read_ids(): while 1: @@ -117,8 +140,8 @@ if opt.git_ids: line = line.strip() try: it = cp.get(line.strip()) - next(it) # skip the file type - except KeyError, e: + next(it, None) # skip the file info + except KeyError as e: add_error('error: %s' % e) continue yield IterToFile(it) @@ -127,18 +150,34 @@ else: # the input either comes from a series of files or from stdin. files = extra and (open(fn) for fn in extra) or [sys.stdin] -if pack_writer and opt.blobs: - shalist = hashsplit.split_to_blobs(pack_writer, files, +if pack_writer: + new_blob = pack_writer.new_blob + new_tree = pack_writer.new_tree +elif opt.blobs or opt.tree: + # --noop mode + new_blob = lambda content: git.calc_hash('blob', content) + new_tree = lambda shalist: git.calc_hash('tree', git.tree_encode(shalist)) + +if opt.blobs: + shalist = hashsplit.split_to_blobs(new_blob, files, keep_boundaries=opt.keep_boundaries, progress=prog) for (sha, size, level) in shalist: print sha.encode('hex') reprogress() -elif pack_writer: # tree or commit or name - shalist = hashsplit.split_to_shalist(pack_writer, files, - keep_boundaries=opt.keep_boundaries, - progress=prog) - tree = pack_writer.new_tree(shalist) +elif opt.tree or opt.commit or opt.name: + if opt.name: # insert dummy_name which may be used as a restore target + mode, sha = \ + hashsplit.split_to_blob_or_tree(new_blob, new_tree, files, + keep_boundaries=opt.keep_boundaries, + progress=prog) + splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode) + shalist = [(mode, splitfile_name, sha)] + else: + shalist = hashsplit.split_to_shalist( + new_blob, new_tree, files, + keep_boundaries=opt.keep_boundaries, progress=prog) + tree = new_tree(shalist) else: last = 0 it = hashsplit.hashsplit_iter(files, @@ -157,9 +196,11 @@ if opt.verbose: if opt.tree: print tree.encode('hex') if opt.commit or opt.name: - msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv + msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None - commit = pack_writer.new_commit(oldref, tree, date, msg) + userline = '%s <%s@%s>' % (userfullname(), username(), hostname()) + commit = pack_writer.new_commit(tree, oldref, userline, date, None, + userline, date, None, msg) if opt.commit: print commit.encode('hex') @@ -178,7 +219,7 @@ if cli: secs = time.time() - start_time size = hashsplit.total_split if opt.bench: - log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' + log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' % (size/1024., secs, size/1024./secs)) if saved_errors: