This massively decreases virtual memory allocation since we only ever need
to look at a single idx at once.
In theory, VM doesn't cost us anything, but on 32-bit systems we can
actually run out of address space if we try to map all the idx files at
once on a very large repo.
Signed-off-by: Avery Pennarun <apenwarr@gmail.com>
for name in glob.glob('%s/*.idx' % path):
ix = git.open_idx(name)
ixbase = os.path.basename(name)
- if b is not None and ixbase in b.idxnames:
- rest.append(ix)
+ if b and (ixbase in b.idxnames):
+ rest.append(name)
rest_count += len(ix)
else:
- add.append(ix)
+ add.append(name)
add_count += len(ix)
total = add_count + rest_count
b = git.ShaBloom.create(
tfname, f=tf, readwrite=True, expected=add_count, k=opt.k)
count = 0
- for ix in add:
+ for name in add:
+ ix = git.open_idx(name)
progress('Writing bloom: %d/%d\r' % (count, len(add)))
b.add_idx(ix)
count += 1