]> arthur.barton.de Git - bup.git/commitdiff
When there are multiple overlapping .midx files, discard redundant ones.
authorAvery Pennarun <apenwarr@gmail.com>
Mon, 25 Jan 2010 06:41:44 +0000 (01:41 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Mon, 25 Jan 2010 06:41:44 +0000 (01:41 -0500)
That way if someone generates a .midx for a subset of .idx files, then
another for the *entire* set of .idx files, we'll automatically ignore the
former one, thus increasing search speed and improving memory thrashing
behaviour even further.

git.py
memtest.py

diff --git a/git.py b/git.py
index 9dd9662efe4c6a382de0b3ccdb2326494bffbbdb..67e14510dd6d3527df226960b4be8f5aa795f6c3 100644 (file)
--- a/git.py
+++ b/git.py
@@ -2,6 +2,7 @@ import os, errno, zlib, time, sha, subprocess, struct, stat, re
 from helpers import *
 
 verbose = 0
+ignore_midx = 0
 home_repodir = os.path.expanduser('~/.bup')
 repodir = None
 
@@ -218,20 +219,29 @@ class MultiPackIndex:
         return None
 
     def refresh(self):
+        global ignore_midx
         d = dict([(p.name, 1) for p in self.packs])
         if os.path.exists(self.dir):
-            for f in os.listdir(self.dir):
-                full = os.path.join(self.dir, f)
-                if f.endswith('.midx') and not d.get(full):
-                    ix = PackMidx(full)
-                    self.packs.append(ix)
-                    for name in ix.idxnames:
-                        d[os.path.join(self.dir, name)] = 1
+            if not ignore_midx:
+                midxl = []
+                for f in os.listdir(self.dir):
+                    full = os.path.join(self.dir, f)
+                    if f.endswith('.midx') and not d.get(full):
+                        midxl.append(PackMidx(full))
+                midxl.sort(lambda x,y: -cmp(len(x),len(y)))
+                for ix in midxl:
+                    any = 0
+                    for sub in ix.idxnames:
+                        if not d.get(os.path.join(self.dir, sub)):
+                            self.packs.append(ix)
+                            for name in ix.idxnames:
+                                d[os.path.join(self.dir, name)] = 1
+                            break
             for f in os.listdir(self.dir):
                 full = os.path.join(self.dir, f)
                 if f.endswith('.idx') and not d.get(full):
                     self.packs.append(PackIndex(full))
-        #log('MultiPackIndex: using %d packs.\n' % len(self.packs))
+        log('MultiPackIndex: using %d packs.\n' % len(self.packs))
 
     def add(self, hash):
         self.also[hash] = 1
index efaf1517c31c9533fe58ad075cc8f5da8f45a7fa..de24163bd95ac69dfe0ce0b85bac50e48dc1dd83 100755 (executable)
@@ -28,6 +28,7 @@ memtest [-n elements] [-c cycles]
 --
 n,number=  number of objects per cycle
 c,cycles=  number of cycles to run
+ignore-midx  ignore .midx files, use only .idx files
 """
 o = options.Options(sys.argv[0], optspec)
 (opt, flags, extra) = o.parse(sys.argv[1:])
@@ -35,6 +36,8 @@ o = options.Options(sys.argv[0], optspec)
 if extra:
     o.usage()
 
+git.ignore_midx = opt.ignore_midx
+
 git.check_repo_or_die()
 m = git.MultiPackIndex(git.repo('objects/pack'))