Use the new qprogress() function in more places.

[bup.git] / cmd / split-cmd.py
diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py

index 44c3cdc0b960b8e7b6589c05f1ddb0d1bc72480c..756d1b566a3937c7db7e3b62d48b98529b18579a 100755 (executable)
--- a/cmd/split-cmd.py
+++ b/cmd/split-cmd.py
@@ -15,6 +15,7 @@ n,name=    name of backup set to update (if any)
  d,date=    date for the commit (seconds since the epoch)
  q,quiet    don't print progress messages
  v,verbose  increase log output (can be used more than once)
+git-ids    read a list of git object ids from stdin and split their contents
  keep-boundaries  don't let one chunk span two input files
  noop       don't actually save the data anywhere
  copy       just copy input to output, hashsplitting along the way
@@ -24,7 +25,7 @@ max-pack-objects=  maximum number of objects in a single pack
  fanout=    maximum number of blobs in a single tree
  bwlimit=   maximum bytes/sec to transmit to server
  """
-o = options.Options('bup split', optspec)
+o = options.Options(optspec)
  (opt, flags, extra) = o.parse(sys.argv[1:])
  
  handle_ctrl_c()
@@ -35,6 +36,8 @@ if not (opt.blobs or opt.tree or opt.commit or opt.name or
  if (opt.noop or opt.copy) and (opt.blobs or opt.tree or 
                                 opt.commit or opt.name):
      o.fatal('-N and --copy are incompatible with -b, -t, -c, -n')
+if extra and opt.git_ids:
+    o.fatal("don't provide filenames when using --git-ids")
  
  if opt.verbose >= 2:
      git.verbose = opt.verbose - 1
@@ -55,21 +58,29 @@ else:
      date = time.time()
  
  
+total_bytes = 0
+def prog(filenum, nbytes):
+    global total_bytes
+    total_bytes += nbytes
+    if filenum > 0:
+        qprogress('Splitting: file #%d, %d kbytes\r'
+                  % (filenum+1, total_bytes/1024))
+    else:
+        qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
+
+
  is_reverse = os.environ.get('BUP_SERVER_REVERSE')
  if is_reverse and opt.remote:
      o.fatal("don't use -r in reverse mode; it's automatic")
  start_time = time.time()
  
+if opt.name and opt.name.startswith('.'):
+    o.fatal("'%s' is not a valid branch name." % opt.name)
  refname = opt.name and 'refs/heads/%s' % opt.name or None
  if opt.noop or opt.copy:
      cli = pack_writer = oldref = None
  elif opt.remote or is_reverse:
-    if opt.remote and opt.remote.find(":") == -1:
-        o.fatal("--remote argument must contain a colon")
-    try:
-        cli = client.Client(opt.remote)
-    except client.ClientError:
-        o.fatal("server exited unexpectedly; see errors above")
+    cli = client.Client(opt.remote)
      oldref = refname and cli.read_ref(refname) or None
      pack_writer = cli.new_packwriter()
  else:
@@ -77,15 +88,51 @@ else:
      oldref = refname and git.read_ref(refname) or None
      pack_writer = git.PackWriter()
  
-files = extra and (open(fn) for fn in extra) or [sys.stdin]
+if opt.git_ids:
+    # the input is actually a series of git object ids that we should retrieve
+    # and split.
+    #
+    # This is a bit messy, but basically it converts from a series of
+    # CatPipe.get() iterators into a series of file-type objects.
+    # It would be less ugly if either CatPipe.get() returned a file-like object
+    # (not very efficient), or split_to_shalist() expected an iterator instead
+    # of a file.
+    cp = git.CatPipe()
+    class IterToFile:
+        def __init__(self, it):
+            self.it = iter(it)
+        def read(self, size):
+            v = next(self.it)
+            return v or ''
+    def read_ids():
+        while 1:
+            line = sys.stdin.readline()
+            if not line:
+                break
+            if line:
+                line = line.strip()
+            try:
+                it = cp.get(line.strip())
+                next(it)  # skip the file type
+            except KeyError, e:
+                add_error('error: %s' % e)
+                continue
+            yield IterToFile(it)
+    files = read_ids()
+else:
+    # the input either comes from a series of files or from stdin.
+    files = extra and (open(fn) for fn in extra) or [sys.stdin]
+
  if pack_writer:
      shalist = hashsplit.split_to_shalist(pack_writer, files,
-                                         keep_boundaries=opt.keep_boundaries)
+                                         keep_boundaries=opt.keep_boundaries,
+                                         progress=prog)
      tree = pack_writer.new_tree(shalist)
  else:
      last = 0
      for (blob, bits) in hashsplit.hashsplit_iter(files,
-                                    keep_boundaries=opt.keep_boundaries):
+                                    keep_boundaries=opt.keep_boundaries,
+                                    progress=prog):
          hashsplit.total_split += len(blob)
          if opt.copy:
              sys.stdout.write(str(blob))
@@ -126,3 +173,7 @@ size = hashsplit.total_split
  if opt.bench:
      log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
          % (size/1024., secs, size/1024./secs))
+
+if saved_errors:
+    log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
+    sys.exit(1)