cmd/split: print a progress counter.

author Avery Pennarun <apenwarr@gmail.com>

Wed, 22 Sep 2010 14:13:18 +0000 (07:13 -0700)

committer Avery Pennarun <apenwarr@gmail.com>

Wed, 22 Sep 2010 14:33:51 +0000 (14:33 +0000)
author Avery Pennarun <apenwarr@gmail.com>
Wed, 22 Sep 2010 14:13:18 +0000 (07:13 -0700)
committer Avery Pennarun <apenwarr@gmail.com>
Wed, 22 Sep 2010 14:33:51 +0000 (14:33 +0000)
diff --git a/Documentation/bup-random.md b/Documentation/bup-random.md

index fe710f194aa9af77d3f32ab772353364f375bb3d..7a4c3e54d7bc17748ca8166a872bceb1d5b8c131 100644 (file)
--- a/Documentation/bup-random.md
+++ b/Documentation/bup-random.md
@@ -8,7 +8,7 @@ bup-random - generate a stream of random output
  
  # SYNOPSIS
  
-bup random [-S seed] [-f] <numbytes>
+bup random [-S seed] [-fv] <numbytes>
  
  # DESCRIPTION
  
@@ -47,6 +47,10 @@ can be helpful when running microbenchmarks.
  :   generate output even if stdout is a tty.  (Generating
      random data to a tty is generally considered
      ill-advised, but you can do if you really want.)
+    
+-v, --verbose
+:   print a progress message showing the number of bytes that
+    has been output so far.
  
  # EXAMPLES
      
diff --git a/cmd/random-cmd.py b/cmd/random-cmd.py

index 19732b9e18e519abd5848f6ca8a6f1954230638e..873b511f26428163d65f024fcd0666fca4d28395 100755 (executable)
--- a/cmd/random-cmd.py
+++ b/cmd/random-cmd.py
@@ -8,6 +8,7 @@ bup random [-S seed] <numbytes>
  --
  S,seed=   optional random number seed [1]
  f,force   print random data to stdout even if it's a tty
+v,verbose print byte counter to stderr
  """
  o = options.Options('bup random', optspec)
  (opt, flags, extra) = o.parse(sys.argv[1:])
@@ -21,7 +22,8 @@ handle_ctrl_c()
  
  if opt.force or (not os.isatty(1) and
                   not atoi(os.environ.get('BUP_FORCE_TTY')) & 1):
-    _helpers.write_random(sys.stdout.fileno(), total, opt.seed)
+    _helpers.write_random(sys.stdout.fileno(), total, opt.seed,
+                          opt.verbose and 1 or 0)
  else:
      log('error: not writing binary data to a terminal. Use -f to force.\n')
      sys.exit(1)
diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py

index 2a72bd673fb912d47b89f8713578342c0d5849b2..4155b24014f6a079e06acc90d356a562bf4cd1af 100755 (executable)
--- a/cmd/split-cmd.py
+++ b/cmd/split-cmd.py
@@ -58,6 +58,21 @@ else:
      date = time.time()
  
  
+last_prog = total_bytes = 0
+def prog(filenum, nbytes):
+    global last_prog, total_bytes
+    total_bytes += nbytes
+    now = time.time()
+    if now - last_prog < 0.2:
+        return
+    if filenum > 0:
+        progress('Splitting: file #%d, %d kbytes\r'
+                 % (filenum+1, total_bytes/1024))
+    else:
+        progress('Splitting: %d kbytes\r' % (total_bytes/1024))
+    last_prog = now
+
+
  is_reverse = os.environ.get('BUP_SERVER_REVERSE')
  if is_reverse and opt.remote:
      o.fatal("don't use -r in reverse mode; it's automatic")
@@ -117,12 +132,14 @@ else:
  
  if pack_writer:
      shalist = hashsplit.split_to_shalist(pack_writer, files,
-                                         keep_boundaries=opt.keep_boundaries)
+                                         keep_boundaries=opt.keep_boundaries,
+                                         progress=prog)
      tree = pack_writer.new_tree(shalist)
  else:
      last = 0
      for (blob, bits) in hashsplit.hashsplit_iter(files,
-                                    keep_boundaries=opt.keep_boundaries):
+                                    keep_boundaries=opt.keep_boundaries,
+                                    progress=prog):
          hashsplit.total_split += len(blob)
          if opt.copy:
              sys.stdout.write(str(blob))
diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c

index 75d26030545fc9b59041937c5e35a46401bdd0cc..7a144d24baabfe6ff3799856b3f4da114077f37d 100644 (file)
--- a/lib/bup/_helpers.c
+++ b/lib/bup/_helpers.c
@@ -105,11 +105,11 @@ static PyObject *extract_bits(PyObject *self, PyObject *args)
  static PyObject *write_random(PyObject *self, PyObject *args)
  {
      uint32_t buf[1024/4];
-    int fd = -1, seed = 0;
+    int fd = -1, seed = 0, verbose = 0;
      ssize_t ret;
      long long len = 0, kbytes = 0, written = 0;
  
-    if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+    if (!PyArg_ParseTuple(args, "iLii", &fd, &len, &seed, &verbose))
         return NULL;
      
      srandom(seed);
@@ -125,7 +125,7 @@ static PyObject *write_random(PyObject *self, PyObject *args)
         written += ret;
         if (ret < (int)sizeof(buf))
             break;
-       if (kbytes/1024 > 0 && !(kbytes%1024))
+       if (verbose && kbytes/1024 > 0 && !(kbytes%1024))
             fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024);
      }
      
diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py

index d73e9f689acb27662241ff6af0e8d2b98b509566..5de6a3fa1870e580fe1cd89e402c2d2b6fe64aa5 100644 (file)
--- a/lib/bup/hashsplit.py
+++ b/lib/bup/hashsplit.py
@@ -45,10 +45,13 @@ def splitbuf(buf):
      return (None, 0)
  
  
-def blobiter(files):
-    for f in files:
+def blobiter(files, progress=None):
+    for filenum,f in enumerate(files):
          ofs = 0
+        b = ''
          while 1:
+            if progress:
+                progress(filenum, len(b))
              fadvise_done(f, max(0, ofs - 1024*1024))
              b = f.read(BLOB_HWM)
              ofs += len(b)
@@ -72,10 +75,10 @@ def drainbuf(buf, finalize):
          yield (buf.get(buf.used()), 0)
  
  
-def _hashsplit_iter(files):
+def _hashsplit_iter(files, progress):
      assert(BLOB_HWM > BLOB_MAX)
      buf = Buf()
-    fi = blobiter(files)
+    fi = blobiter(files, progress)
      while 1:
          for i in drainbuf(buf, finalize=False):
              yield i
@@ -89,23 +92,30 @@ def _hashsplit_iter(files):
              buf.put(bnew)
  
  
-def _hashsplit_iter_keep_boundaries(files):
-    for f in files:
-        for i in _hashsplit_iter([f]):
+def _hashsplit_iter_keep_boundaries(files, progress):
+    for real_filenum,f in enumerate(files):
+        if progress:
+            def prog(filenum, nbytes):
+                # the inner _hashsplit_iter doesn't know the real file count,
+                # so we'll replace it here.
+                return progress(real_filenum, nbytes)
+        else:
+            prog = None
+        for i in _hashsplit_iter([f], progress=prog):
              yield i
  
  
-def hashsplit_iter(files, keep_boundaries):
+def hashsplit_iter(files, keep_boundaries, progress):
      if keep_boundaries:
-        return _hashsplit_iter_keep_boundaries(files)
+        return _hashsplit_iter_keep_boundaries(files, progress)
      else:
-        return _hashsplit_iter(files)
+        return _hashsplit_iter(files, progress)
  
  
  total_split = 0
-def _split_to_blobs(w, files, keep_boundaries):
+def _split_to_blobs(w, files, keep_boundaries, progress):
      global total_split
-    for (blob, bits) in hashsplit_iter(files, keep_boundaries):
+    for (blob, bits) in hashsplit_iter(files, keep_boundaries, progress):
          sha = w.new_blob(blob)
          total_split += len(blob)
          if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
@@ -140,8 +150,8 @@ def _squish(w, stacks, n):
          i += 1
  
  
-def split_to_shalist(w, files, keep_boundaries):
-    sl = _split_to_blobs(w, files, keep_boundaries)
+def split_to_shalist(w, files, keep_boundaries, progress=None):
+    sl = _split_to_blobs(w, files, keep_boundaries, progress)
      if not fanout:
          shal = []
          for (sha,size,bits) in sl:
author	Avery Pennarun <apenwarr@gmail.com>
	Wed, 22 Sep 2010 14:13:18 +0000 (07:13 -0700)
committer	Avery Pennarun <apenwarr@gmail.com>
	Wed, 22 Sep 2010 14:33:51 +0000 (14:33 +0000)
Documentation/bup-random.md		patch \| blob \| history
cmd/random-cmd.py		patch \| blob \| history
cmd/split-cmd.py		patch \| blob \| history
lib/bup/_helpers.c		patch \| blob \| history
lib/bup/hashsplit.py		patch \| blob \| history