+
+from __future__ import absolute_import
import glob, os, subprocess, sys, tempfile
from bup import bloom, git, midx
+from bup.compat import hexstr, range
from bup.git import MissingObject, walk_object
-from bup.helpers import log, progress, qprogress
+from bup.helpers import Nonlocal, log, progress, qprogress
from os.path import basename
# This garbage collector uses a Bloom filter to track the live objects
# FIXME: add a bloom filter tuning parameter?
-class Nonlocal:
- pass
-
-
def count_objects(dir, verbosity):
# For now we'll just use open_idx(), but we could probably be much
# more efficient since all we need is a single integer (the last
def report_live_item(n, total, ref_name, ref_id, item, verbosity):
status = 'scanned %02.2f%%' % (n * 100.0 / total)
- hex_id = ref_id.encode('hex')
+ hex_id = hexstr(ref_id)
dirslash = '/' if item.type == 'tree' else ''
chunk_path = item.chunk_path
# FIXME: allow selection of k?
# FIXME: support ephemeral bloom filters (i.e. *never* written to disk)
live_objs = bloom.create(bloom_filename, expected=existing_count, k=None)
+ # live_objs will hold on to the fd until close or exit
+ os.unlink(bloom_filename)
stop_at, trees_visited = None, None
if prune_visited_trees:
trees_visited = set()
- stop_at = lambda (x): x.decode('hex') in trees_visited
+ stop_at = lambda x: x.decode('hex') in trees_visited
approx_live_count = 0
for ref_name, ref_id in git.list_refs():
- for item in walk_object(cat_pipe, ref_id.encode('hex'),
+ for item in walk_object(cat_pipe.get, ref_id.encode('hex'),
stop_at=stop_at,
include_data=None):
# FIXME: batch ids
if verbosity:
report_live_item(approx_live_count, existing_count,
ref_name, ref_id, item, verbosity)
- bin_id = item.id.decode('hex')
if trees_visited is not None and item.type == 'tree':
- trees_visited.add(bin_id)
+ trees_visited.add(item.oid)
if verbosity:
- if not live_objs.exists(bin_id):
- live_objs.add(bin_id)
+ if not live_objs.exists(item.oid):
+ live_objs.add(item.oid)
approx_live_count += 1
else:
- live_objs.add(bin_id)
+ live_objs.add(item.oid)
trees_visited = None
if verbosity:
log('expecting to retain about %.2f%% unnecessary objects\n'
if verbosity and new_pack_prefix:
log('created ' + basename(new_pack_prefix) + '\n')
for p in ns.stale_files:
+ if new_pack_prefix and p.startswith(new_pack_prefix):
+ continue # Don't remove the new pack file
if verbosity:
log('removing ' + basename(p) + '\n')
os.unlink(p)
+ if ns.stale_files: # So git cat-pipe will close them
+ cat_pipe.restart()
ns.stale_files = []
writer = git.PackWriter(objcache_maker=None,
idx = git.open_idx(idx_name)
idx_live_count = 0
- for i in xrange(0, len(idx)):
+ for i in range(0, len(idx)):
sha = idx.shatable[i * 20 : (i + 1) * 20]
if live_objects.exists(sha):
idx_live_count += 1
if verbosity:
log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
live_frac * 100))
- for i in xrange(0, len(idx)):
+ for i in range(0, len(idx)):
sha = idx.shatable[i * 20 : (i + 1) * 20]
if live_objects.exists(sha):
item_it = cat_pipe.get(sha.encode('hex'))
- type = item_it.next()
- writer.just_write(sha, type, ''.join(item_it))
+ _, typ, _ = next(item_it)
+ writer.just_write(sha, typ, ''.join(item_it))
ns.stale_files.append(idx_name)
ns.stale_files.append(idx_name[:-3] + 'pack')
live_objects = find_live_objects(existing_count, cat_pipe,
verbosity=verbosity)
except MissingObject as ex:
- log('bup: missing object %r \n' % ex.id.encode('hex'))
+ log('bup: missing object %s \n' % hexstr(ex.oid))
sys.exit(1)
try:
# FIXME: just rename midxes and bloom, and restore them at the end if
# we didn't change any packs?
+ packdir = git.repo('objects/pack')
if verbosity: log('clearing midx files\n')
- midx.clear_midxes()
+ midx.clear_midxes(packdir)
if verbosity: log('clearing bloom filter\n')
- bloom.clear_bloom(git.repo('objects/pack'))
+ bloom.clear_bloom(packdir)
if verbosity: log('clearing reflog\n')
expirelog_cmd = ['git', 'reflog', 'expire', '--all', '--expire=all']
- expirelog = subprocess.Popen(expirelog_cmd, preexec_fn = git._gitenv())
+ expirelog = subprocess.Popen(expirelog_cmd, env=git._gitenv())
git._git_wait(' '.join(expirelog_cmd), expirelog)
if verbosity: log('removing unreachable data\n')
sweep(live_objects, existing_count, cat_pipe,
verbosity)
finally:
live_objects.close()
- os.unlink(live_objects.name)