]> arthur.barton.de Git - bup.git/commitdiff
cmd/bloom: map only one .idx file at a time.
authorAvery Pennarun <apenwarr@gmail.com>
Tue, 8 Feb 2011 01:41:00 +0000 (17:41 -0800)
committerAvery Pennarun <apenwarr@gmail.com>
Tue, 8 Feb 2011 01:50:18 +0000 (17:50 -0800)
This massively decreases virtual memory allocation since we only ever need
to look at a single idx at once.

In theory, VM doesn't cost us anything, but on 32-bit systems we can
actually run out of address space if we try to map all the idx files at
once on a very large repo.

Signed-off-by: Avery Pennarun <apenwarr@gmail.com>
cmd/bloom-cmd.py

index 92cdf8b6652430f925577ceadecdac9e900af499..8ee75869f9139d7068742bd4d0fa15823980b116 100755 (executable)
@@ -30,11 +30,11 @@ def do_bloom(path, outfilename):
     for name in glob.glob('%s/*.idx' % path):
         ix = git.open_idx(name)
         ixbase = os.path.basename(name)
-        if b is not None and ixbase in b.idxnames:
-            rest.append(ix)
+        if b and (ixbase in b.idxnames):
+            rest.append(name)
             rest_count += len(ix)
         else:
-            add.append(ix)
+            add.append(name)
             add_count += len(ix)
     total = add_count + rest_count
 
@@ -68,7 +68,8 @@ def do_bloom(path, outfilename):
         b = git.ShaBloom.create(
                 tfname, f=tf, readwrite=True, expected=add_count, k=opt.k)
     count = 0
-    for ix in add:
+    for name in add:
+        ix = git.open_idx(name)
         progress('Writing bloom: %d/%d\r' % (count, len(add)))
         b.add_idx(ix)
         count += 1