lib/bup/cmd/split.py

   1
   2 from __future__ import absolute_import, division, print_function
   3 from binascii import hexlify
   4 import sys, time
   5
   6 from bup import compat, hashsplit, git, options, client
   7 from bup.compat import argv_bytes, environ
   8 from bup.helpers import (add_error, hostname, log, parse_num,
   9                          qprogress, reprogress, saved_errors,
  10                          valid_save_name,
  11                          parse_date_or_fatal)
  12 from bup.io import byte_stream
  13 from bup.pwdgrp import userfullname, username
  14
  15
  16 optspec = """
  17 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
  18 bup split -b OPTIONS [--git-ids | filenames...]
  19 bup split --copy OPTIONS [--git-ids | filenames...]
  20 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
  21 --
  22  Modes:
  23 b,blobs    output a series of blob ids.  Implies --fanout=0.
  24 t,tree     output a tree id
  25 c,commit   output a commit id
  26 n,name=    save the result under the given name
  27 noop       split the input, but throw away the result
  28 copy       split the input, copy it to stdout, don't save to repo
  29  Options:
  30 r,remote=  remote repository path
  31 d,date=    date for the commit (seconds since the epoch)
  32 q,quiet    don't print progress messages
  33 v,verbose  increase log output (can be used more than once)
  34 git-ids    read a list of git object ids from stdin and split their contents
  35 keep-boundaries  don't let one chunk span two input files
  36 bench      print benchmark timings to stderr
  37 max-pack-size=  maximum bytes in a single pack
  38 max-pack-objects=  maximum number of objects in a single pack
  39 fanout=    average number of blobs in a single tree
  40 bwlimit=   maximum bytes/sec to transmit to server
  41 #,compress=  set compression level to # (0-9, 9 is highest) [1]
  42 """
  43
  44 def main(argv):
  45     o = options.Options(optspec)
  46     opt, flags, extra = o.parse_bytes(argv[1:])
  47     if opt.name: opt.name = argv_bytes(opt.name)
  48     if opt.remote: opt.remote = argv_bytes(opt.remote)
  49     if opt.verbose is None: opt.verbose = 0
  50
  51     if not (opt.blobs or opt.tree or opt.commit or opt.name or
  52             opt.noop or opt.copy):
  53         o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
  54     if opt.copy and (opt.blobs or opt.tree):
  55         o.fatal('--copy is incompatible with -b, -t')
  56     if (opt.noop or opt.copy) and (opt.commit or opt.name):
  57         o.fatal('--noop and --copy are incompatible with -c, -n')
  58     if opt.blobs and (opt.tree or opt.commit or opt.name):
  59         o.fatal('-b is incompatible with -t, -c, -n')
  60     if extra and opt.git_ids:
  61         o.fatal("don't provide filenames when using --git-ids")
  62
  63     if opt.verbose >= 2:
  64         git.verbose = opt.verbose - 1
  65         opt.bench = 1
  66
  67     max_pack_size = None
  68     if opt.max_pack_size:
  69         max_pack_size = parse_num(opt.max_pack_size)
  70     max_pack_objects = None
  71     if opt.max_pack_objects:
  72         max_pack_objects = parse_num(opt.max_pack_objects)
  73
  74     if opt.fanout:
  75         hashsplit.fanout = parse_num(opt.fanout)
  76     if opt.blobs:
  77         hashsplit.fanout = 0
  78     if opt.bwlimit:
  79         client.bwlimit = parse_num(opt.bwlimit)
  80     if opt.date:
  81         date = parse_date_or_fatal(opt.date, o.fatal)
  82     else:
  83         date = time.time()
  84
  85     # Hack around lack of nonlocal vars in python 2
  86     total_bytes = [0]
  87     def prog(filenum, nbytes):
  88         total_bytes[0] += nbytes
  89         if filenum > 0:
  90             qprogress('Splitting: file #%d, %d kbytes\r'
  91                       % (filenum+1, total_bytes[0] // 1024))
  92         else:
  93             qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))
  94
  95
  96     is_reverse = environ.get(b'BUP_SERVER_REVERSE')
  97     if is_reverse and opt.remote:
  98         o.fatal("don't use -r in reverse mode; it's automatic")
  99     start_time = time.time()
 100
 101     if opt.name and not valid_save_name(opt.name):
 102         o.fatal("'%r' is not a valid branch name." % opt.name)
 103     refname = opt.name and b'refs/heads/%s' % opt.name or None
 104
 105     if opt.noop or opt.copy:
 106         cli = pack_writer = oldref = None
 107     elif opt.remote or is_reverse:
 108         git.check_repo_or_die()
 109         cli = client.Client(opt.remote)
 110         oldref = refname and cli.read_ref(refname) or None
 111         pack_writer = cli.new_packwriter(compression_level=opt.compress,
 112                                          max_pack_size=max_pack_size,
 113                                          max_pack_objects=max_pack_objects)
 114     else:
 115         git.check_repo_or_die()
 116         cli = None
 117         oldref = refname and git.read_ref(refname) or None
 118         pack_writer = git.PackWriter(compression_level=opt.compress,
 119                                      max_pack_size=max_pack_size,
 120                                      max_pack_objects=max_pack_objects)
 121
 122     input = byte_stream(sys.stdin)
 123
 124     if opt.git_ids:
 125         # the input is actually a series of git object ids that we should retrieve
 126         # and split.
 127         #
 128         # This is a bit messy, but basically it converts from a series of
 129         # CatPipe.get() iterators into a series of file-type objects.
 130         # It would be less ugly if either CatPipe.get() returned a file-like object
 131         # (not very efficient), or split_to_shalist() expected an iterator instead
 132         # of a file.
 133         cp = git.CatPipe()
 134         class IterToFile:
 135             def __init__(self, it):
 136                 self.it = iter(it)
 137             def read(self, size):
 138                 v = next(self.it, None)
 139                 return v or b''
 140         def read_ids():
 141             while 1:
 142                 line = input.readline()
 143                 if not line:
 144                     break
 145                 if line:
 146                     line = line.strip()
 147                 try:
 148                     it = cp.get(line.strip())
 149                     next(it, None)  # skip the file info
 150                 except KeyError as e:
 151                     add_error('error: %s' % e)
 152                     continue
 153                 yield IterToFile(it)
 154         files = read_ids()
 155     else:
 156         # the input either comes from a series of files or from stdin.
 157         files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
 158
 159     if pack_writer:
 160         new_blob = pack_writer.new_blob
 161         new_tree = pack_writer.new_tree
 162     elif opt.blobs or opt.tree:
 163         # --noop mode
 164         new_blob = lambda content: git.calc_hash(b'blob', content)
 165         new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
 166
 167     sys.stdout.flush()
 168     out = byte_stream(sys.stdout)
 169
 170     if opt.blobs:
 171         shalist = hashsplit.split_to_blobs(new_blob, files,
 172                                            keep_boundaries=opt.keep_boundaries,
 173                                            progress=prog)
 174         for (sha, size, level) in shalist:
 175             out.write(hexlify(sha) + b'\n')
 176             reprogress()
 177     elif opt.tree or opt.commit or opt.name:
 178         if opt.name: # insert dummy_name which may be used as a restore target
 179             mode, sha = \
 180                 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
 181                                                 keep_boundaries=opt.keep_boundaries,
 182                                                 progress=prog)
 183             splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
 184             shalist = [(mode, splitfile_name, sha)]
 185         else:
 186             shalist = hashsplit.split_to_shalist(
 187                           new_blob, new_tree, files,
 188                           keep_boundaries=opt.keep_boundaries, progress=prog)
 189         tree = new_tree(shalist)
 190     else:
 191         last = 0
 192         it = hashsplit.hashsplit_iter(files,
 193                                       keep_boundaries=opt.keep_boundaries,
 194                                       progress=prog)
 195         for (blob, level) in it:
 196             hashsplit.total_split += len(blob)
 197             if opt.copy:
 198                 sys.stdout.write(str(blob))
 199             megs = hashsplit.total_split // 1024 // 1024
 200             if not opt.quiet and last != megs:
 201                 last = megs
 202
 203     if opt.verbose:
 204         log('\n')
 205     if opt.tree:
 206         out.write(hexlify(tree) + b'\n')
 207     if opt.commit or opt.name:
 208         msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
 209         ref = opt.name and (b'refs/heads/%s' % opt.name) or None
 210         userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
 211         commit = pack_writer.new_commit(tree, oldref, userline, date, None,
 212                                         userline, date, None, msg)
 213         if opt.commit:
 214             out.write(hexlify(commit) + b'\n')
 215
 216     if pack_writer:
 217         pack_writer.close()  # must close before we can update the ref
 218
 219     if opt.name:
 220         if cli:
 221             cli.update_ref(refname, commit, oldref)
 222         else:
 223             git.update_ref(refname, commit, oldref)
 224
 225     if cli:
 226         cli.close()
 227
 228     secs = time.time() - start_time
 229     size = hashsplit.total_split
 230     if opt.bench:
 231         log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
 232             % (size / 1024, secs, size / 1024 / secs))
 233
 234     if saved_errors:
 235         log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
 236         sys.exit(1)