]> arthur.barton.de Git - bup.git/commitdiff
cmd/split: print a progress counter.
authorAvery Pennarun <apenwarr@gmail.com>
Wed, 22 Sep 2010 14:13:18 +0000 (07:13 -0700)
committerAvery Pennarun <apenwarr@gmail.com>
Wed, 22 Sep 2010 14:33:51 +0000 (14:33 +0000)
We don't know how many bytes we're going to split in total, but we can at
least print the total number of bytes we've seen so far.

Also fix cmd/random to *not* print progress messages by default, since my
test situation is
bup random 100M | bup split -b
and they scribble over each other when they both print progress output.  bup
random now gets a '-v' option.

Signed-off-by: Avery Pennarun <apenwarr@gmail.com>
Documentation/bup-random.md
cmd/random-cmd.py
cmd/split-cmd.py
lib/bup/_helpers.c
lib/bup/hashsplit.py

index fe710f194aa9af77d3f32ab772353364f375bb3d..7a4c3e54d7bc17748ca8166a872bceb1d5b8c131 100644 (file)
@@ -8,7 +8,7 @@ bup-random - generate a stream of random output
 
 # SYNOPSIS
 
-bup random [-S seed] [-f] <numbytes>
+bup random [-S seed] [-fv] <numbytes>
 
 # DESCRIPTION
 
@@ -47,6 +47,10 @@ can be helpful when running microbenchmarks.
 :   generate output even if stdout is a tty.  (Generating
     random data to a tty is generally considered
     ill-advised, but you can do if you really want.)
+    
+-v, --verbose
+:   print a progress message showing the number of bytes that
+    has been output so far.
 
 # EXAMPLES
     
index 19732b9e18e519abd5848f6ca8a6f1954230638e..873b511f26428163d65f024fcd0666fca4d28395 100755 (executable)
@@ -8,6 +8,7 @@ bup random [-S seed] <numbytes>
 --
 S,seed=   optional random number seed [1]
 f,force   print random data to stdout even if it's a tty
+v,verbose print byte counter to stderr
 """
 o = options.Options('bup random', optspec)
 (opt, flags, extra) = o.parse(sys.argv[1:])
@@ -21,7 +22,8 @@ handle_ctrl_c()
 
 if opt.force or (not os.isatty(1) and
                  not atoi(os.environ.get('BUP_FORCE_TTY')) & 1):
-    _helpers.write_random(sys.stdout.fileno(), total, opt.seed)
+    _helpers.write_random(sys.stdout.fileno(), total, opt.seed,
+                          opt.verbose and 1 or 0)
 else:
     log('error: not writing binary data to a terminal. Use -f to force.\n')
     sys.exit(1)
index 2a72bd673fb912d47b89f8713578342c0d5849b2..4155b24014f6a079e06acc90d356a562bf4cd1af 100755 (executable)
@@ -58,6 +58,21 @@ else:
     date = time.time()
 
 
+last_prog = total_bytes = 0
+def prog(filenum, nbytes):
+    global last_prog, total_bytes
+    total_bytes += nbytes
+    now = time.time()
+    if now - last_prog < 0.2:
+        return
+    if filenum > 0:
+        progress('Splitting: file #%d, %d kbytes\r'
+                 % (filenum+1, total_bytes/1024))
+    else:
+        progress('Splitting: %d kbytes\r' % (total_bytes/1024))
+    last_prog = now
+
+
 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
 if is_reverse and opt.remote:
     o.fatal("don't use -r in reverse mode; it's automatic")
@@ -117,12 +132,14 @@ else:
 
 if pack_writer:
     shalist = hashsplit.split_to_shalist(pack_writer, files,
-                                         keep_boundaries=opt.keep_boundaries)
+                                         keep_boundaries=opt.keep_boundaries,
+                                         progress=prog)
     tree = pack_writer.new_tree(shalist)
 else:
     last = 0
     for (blob, bits) in hashsplit.hashsplit_iter(files,
-                                    keep_boundaries=opt.keep_boundaries):
+                                    keep_boundaries=opt.keep_boundaries,
+                                    progress=prog):
         hashsplit.total_split += len(blob)
         if opt.copy:
             sys.stdout.write(str(blob))
index 75d26030545fc9b59041937c5e35a46401bdd0cc..7a144d24baabfe6ff3799856b3f4da114077f37d 100644 (file)
@@ -105,11 +105,11 @@ static PyObject *extract_bits(PyObject *self, PyObject *args)
 static PyObject *write_random(PyObject *self, PyObject *args)
 {
     uint32_t buf[1024/4];
-    int fd = -1, seed = 0;
+    int fd = -1, seed = 0, verbose = 0;
     ssize_t ret;
     long long len = 0, kbytes = 0, written = 0;
 
-    if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+    if (!PyArg_ParseTuple(args, "iLii", &fd, &len, &seed, &verbose))
        return NULL;
     
     srandom(seed);
@@ -125,7 +125,7 @@ static PyObject *write_random(PyObject *self, PyObject *args)
        written += ret;
        if (ret < (int)sizeof(buf))
            break;
-       if (kbytes/1024 > 0 && !(kbytes%1024))
+       if (verbose && kbytes/1024 > 0 && !(kbytes%1024))
            fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024);
     }
     
index d73e9f689acb27662241ff6af0e8d2b98b509566..5de6a3fa1870e580fe1cd89e402c2d2b6fe64aa5 100644 (file)
@@ -45,10 +45,13 @@ def splitbuf(buf):
     return (None, 0)
 
 
-def blobiter(files):
-    for f in files:
+def blobiter(files, progress=None):
+    for filenum,f in enumerate(files):
         ofs = 0
+        b = ''
         while 1:
+            if progress:
+                progress(filenum, len(b))
             fadvise_done(f, max(0, ofs - 1024*1024))
             b = f.read(BLOB_HWM)
             ofs += len(b)
@@ -72,10 +75,10 @@ def drainbuf(buf, finalize):
         yield (buf.get(buf.used()), 0)
 
 
-def _hashsplit_iter(files):
+def _hashsplit_iter(files, progress):
     assert(BLOB_HWM > BLOB_MAX)
     buf = Buf()
-    fi = blobiter(files)
+    fi = blobiter(files, progress)
     while 1:
         for i in drainbuf(buf, finalize=False):
             yield i
@@ -89,23 +92,30 @@ def _hashsplit_iter(files):
             buf.put(bnew)
 
 
-def _hashsplit_iter_keep_boundaries(files):
-    for f in files:
-        for i in _hashsplit_iter([f]):
+def _hashsplit_iter_keep_boundaries(files, progress):
+    for real_filenum,f in enumerate(files):
+        if progress:
+            def prog(filenum, nbytes):
+                # the inner _hashsplit_iter doesn't know the real file count,
+                # so we'll replace it here.
+                return progress(real_filenum, nbytes)
+        else:
+            prog = None
+        for i in _hashsplit_iter([f], progress=prog):
             yield i
 
 
-def hashsplit_iter(files, keep_boundaries):
+def hashsplit_iter(files, keep_boundaries, progress):
     if keep_boundaries:
-        return _hashsplit_iter_keep_boundaries(files)
+        return _hashsplit_iter_keep_boundaries(files, progress)
     else:
-        return _hashsplit_iter(files)
+        return _hashsplit_iter(files, progress)
 
 
 total_split = 0
-def _split_to_blobs(w, files, keep_boundaries):
+def _split_to_blobs(w, files, keep_boundaries, progress):
     global total_split
-    for (blob, bits) in hashsplit_iter(files, keep_boundaries):
+    for (blob, bits) in hashsplit_iter(files, keep_boundaries, progress):
         sha = w.new_blob(blob)
         total_split += len(blob)
         if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
@@ -140,8 +150,8 @@ def _squish(w, stacks, n):
         i += 1
 
 
-def split_to_shalist(w, files, keep_boundaries):
-    sl = _split_to_blobs(w, files, keep_boundaries)
+def split_to_shalist(w, files, keep_boundaries, progress=None):
+    sl = _split_to_blobs(w, files, keep_boundaries, progress)
     if not fanout:
         shal = []
         for (sha,size,bits) in sl: