]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/gc.py
compat.hexstr: add and use
[bup.git] / lib / bup / gc.py
index 3167ef57334e7f71cb88ba54e91be156e334c6cc..abf157ed521192b496f7877af3d74f641b09f5d0 100644 (file)
@@ -1,7 +1,10 @@
+
+from __future__ import absolute_import
 import glob, os, subprocess, sys, tempfile
 from bup import bloom, git, midx
+from bup.compat import hexstr, range
 from bup.git import MissingObject, walk_object
-from bup.helpers import log, progress, qprogress
+from bup.helpers import Nonlocal, log, progress, qprogress
 from os.path import basename
 
 # This garbage collector uses a Bloom filter to track the live objects
@@ -40,10 +43,6 @@ from os.path import basename
 # FIXME: add a bloom filter tuning parameter?
 
 
-class Nonlocal:
-    pass
-
-
 def count_objects(dir, verbosity):
     # For now we'll just use open_idx(), but we could probably be much
     # more efficient since all we need is a single integer (the last
@@ -61,7 +60,7 @@ def count_objects(dir, verbosity):
 
 def report_live_item(n, total, ref_name, ref_id, item, verbosity):
     status = 'scanned %02.2f%%' % (n * 100.0 / total)
-    hex_id = ref_id.encode('hex')
+    hex_id = hexstr(ref_id)
     dirslash = '/' if item.type == 'tree' else ''
     chunk_path = item.chunk_path
 
@@ -98,28 +97,29 @@ def find_live_objects(existing_count, cat_pipe, verbosity=0):
     # FIXME: allow selection of k?
     # FIXME: support ephemeral bloom filters (i.e. *never* written to disk)
     live_objs = bloom.create(bloom_filename, expected=existing_count, k=None)
+    # live_objs will hold on to the fd until close or exit
+    os.unlink(bloom_filename)
     stop_at, trees_visited = None, None
     if prune_visited_trees:
         trees_visited = set()
-        stop_at = lambda (x): x.decode('hex') in trees_visited
+        stop_at = lambda x: x.decode('hex') in trees_visited
     approx_live_count = 0
     for ref_name, ref_id in git.list_refs():
-        for item in walk_object(cat_pipe, ref_id.encode('hex'),
+        for item in walk_object(cat_pipe.get, ref_id.encode('hex'),
                                 stop_at=stop_at,
                                 include_data=None):
             # FIXME: batch ids
             if verbosity:
                 report_live_item(approx_live_count, existing_count,
                                  ref_name, ref_id, item, verbosity)
-            bin_id = item.id.decode('hex')
             if trees_visited is not None and item.type == 'tree':
-                trees_visited.add(bin_id)
+                trees_visited.add(item.oid)
             if verbosity:
-                if not live_objs.exists(bin_id):
-                    live_objs.add(bin_id)
+                if not live_objs.exists(item.oid):
+                    live_objs.add(item.oid)
                     approx_live_count += 1
             else:
-                live_objs.add(bin_id)
+                live_objs.add(item.oid)
     trees_visited = None
     if verbosity:
         log('expecting to retain about %.2f%% unnecessary objects\n'
@@ -137,9 +137,13 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
         if verbosity and new_pack_prefix:
             log('created ' + basename(new_pack_prefix) + '\n')
         for p in ns.stale_files:
+            if new_pack_prefix and p.startswith(new_pack_prefix):
+                continue  # Don't remove the new pack file
             if verbosity:
                 log('removing ' + basename(p) + '\n')
             os.unlink(p)
+        if ns.stale_files:  # So git cat-pipe will close them
+            cat_pipe.restart()
         ns.stale_files = []
 
     writer = git.PackWriter(objcache_maker=None,
@@ -156,7 +160,7 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
         idx = git.open_idx(idx_name)
 
         idx_live_count = 0
-        for i in xrange(0, len(idx)):
+        for i in range(0, len(idx)):
             sha = idx.shatable[i * 20 : (i + 1) * 20]
             if live_objects.exists(sha):
                 idx_live_count += 1
@@ -180,12 +184,12 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
         if verbosity:
             log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
                                                   live_frac * 100))
-        for i in xrange(0, len(idx)):
+        for i in range(0, len(idx)):
             sha = idx.shatable[i * 20 : (i + 1) * 20]
             if live_objects.exists(sha):
                 item_it = cat_pipe.get(sha.encode('hex'))
-                type = item_it.next()
-                writer.just_write(sha, type, ''.join(item_it))
+                _, typ, _ = next(item_it)
+                writer.just_write(sha, typ, ''.join(item_it))
 
         ns.stale_files.append(idx_name)
         ns.stale_files.append(idx_name[:-3] + 'pack')
@@ -223,18 +227,19 @@ def bup_gc(threshold=10, compression=1, verbosity=0):
             live_objects = find_live_objects(existing_count, cat_pipe,
                                              verbosity=verbosity)
         except MissingObject as ex:
-            log('bup: missing object %r \n' % ex.id.encode('hex'))
+            log('bup: missing object %s \n' % hexstr(ex.oid))
             sys.exit(1)
         try:
             # FIXME: just rename midxes and bloom, and restore them at the end if
             # we didn't change any packs?
+            packdir = git.repo('objects/pack')
             if verbosity: log('clearing midx files\n')
-            midx.clear_midxes()
+            midx.clear_midxes(packdir)
             if verbosity: log('clearing bloom filter\n')
-            bloom.clear_bloom(git.repo('objects/pack'))
+            bloom.clear_bloom(packdir)
             if verbosity: log('clearing reflog\n')
             expirelog_cmd = ['git', 'reflog', 'expire', '--all', '--expire=all']
-            expirelog = subprocess.Popen(expirelog_cmd, preexec_fn = git._gitenv())
+            expirelog = subprocess.Popen(expirelog_cmd, env=git._gitenv())
             git._git_wait(' '.join(expirelog_cmd), expirelog)
             if verbosity: log('removing unreachable data\n')
             sweep(live_objects, existing_count, cat_pipe,
@@ -242,4 +247,3 @@ def bup_gc(threshold=10, compression=1, verbosity=0):
                   verbosity)
         finally:
             live_objects.close()
-            os.unlink(live_objects.name)