]> arthur.barton.de Git - bup.git/commitdiff
PackIdxList.refresh(): remember to exclude old midx files. bup-0.12b
authorAvery Pennarun <apenwarr@gmail.com>
Sun, 14 Mar 2010 06:59:45 +0000 (01:59 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Sun, 14 Mar 2010 09:05:34 +0000 (05:05 -0400)
Previously, if you called refresh(), it would fail to consider
the contents of already-loaded .midx files as already-loaded.  That means
it would load all the constituent .idx files, so you'd actually lose all the
advantages of the .midx after the first refresh().

Thus, the midx optimization mainly worked before you filled up your first
pack (about 1GB of data saved) or until you got an index suggestion.  This
explains why backups would slow down significantly after running for a
while.

Also, get rid of the stupid forget_packs option; just automatically prune
the packs that aren't relevant after the refresh.  This avoids the
possibility of weird behaviour if you set forget_packs incorrectly (which we
did).

cmd/server-cmd.py
lib/bup/git.py
t/tclient.py

index 647657b88bc80f3a9b42eebf4bd99207e022d3c5..495fedaa915253056fee75ee5887ea27edeb8dfb 100755 (executable)
@@ -83,7 +83,7 @@ def receive_objects(conn, junk):
             # fix that deficiency of midx files eventually, although it'll
             # make the files bigger.  This method is certainly not very
             # efficient.
-            w.objcache.refresh(skip_midx = True, forget_packs = True)
+            w.objcache.refresh(skip_midx = True)
             oldpack = w.objcache.exists(sha)
             log('new suggestion: %r\n' % oldpack)
             assert(oldpack)
index 5a688931a48e68fea0cb4f46ef5d6472a34185f6..789e4b1926b417d8bed5aba5be5296338f160709 100644 (file)
@@ -230,14 +230,17 @@ class PackIdxList:
                 return p.name
         return None
 
-    def refresh(self, skip_midx = False, forget_packs = False):
-        if forget_packs:
-            self.packs = []
+    def refresh(self, skip_midx = False):
         skip_midx = skip_midx or ignore_midx
-        d = dict((p.name, 1) for p in self.packs)
+        d = dict((p.name, p) for p in self.packs
+                 if not skip_midx or not isinstance(p, PackMidx))
         if os.path.exists(self.dir):
             if not skip_midx:
                 midxl = []
+                for ix in self.packs:
+                    if isinstance(ix, PackMidx):
+                        for name in ix.idxnames:
+                            d[os.path.join(self.dir, name)] = ix
                 for f in os.listdir(self.dir):
                     full = os.path.join(self.dir, f)
                     if f.endswith('.midx') and not d.get(full):
@@ -255,11 +258,12 @@ class PackIdxList:
                 for ix in midxl:
                     any = 0
                     for sub in ix.idxnames:
-                        if not d.get(os.path.join(self.dir, sub)):
-                            self.packs.append(ix)
-                            d[ix.name] = 1
+                        found = d.get(os.path.join(self.dir, sub))
+                        if not found or isinstance(found, PackIdx):
+                            # doesn't exist, or exists but not in a midx
+                            d[ix.name] = ix
                             for name in ix.idxnames:
-                                d[os.path.join(self.dir, name)] = 1
+                                d[os.path.join(self.dir, name)] = ix
                             any += 1
                             break
                     if not any:
@@ -269,8 +273,9 @@ class PackIdxList:
             for f in os.listdir(self.dir):
                 full = os.path.join(self.dir, f)
                 if f.endswith('.idx') and not d.get(full):
-                    self.packs.append(PackIdx(full))
-                    d[full] = 1
+                    ix = PackIdx(full)
+                    d[full] = ix
+            self.packs = list(set(d.values()))
         log('PackIdxList: using %d index%s.\n' 
             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 
index 6e7dd9a45a3fa2639eb922b7386591500d09b367..9a6a8885dab893c7f6f3515444051b1954a54ec9 100644 (file)
@@ -1,4 +1,4 @@
-import os, time, random
+import os, time, random, subprocess
 from bup import client, git, hashsplit
 from wvtest import *
 
@@ -8,19 +8,18 @@ def randbytes(sz):
         s += chr(random.randrange(0,256))
     return s
 
+s1 = randbytes(10000)
+s2 = randbytes(10000)
+    
 @wvtest
 def test_server_split_with_indexes():
     os.environ['BUP_MAIN_EXE'] = './bup'
     os.environ['BUP_DIR'] = bupdir = 'buptest_tclient.tmp'
-    git.init_repo()
-    git.check_repo_or_die()
+    git.init_repo(bupdir)
     lw = git.PackWriter()
     c = client.Client(bupdir, create=True)
     rw = c.new_packwriter()
 
-    s1 = randbytes(10000)
-    s2 = randbytes(10000)
-    
     lw.new_blob(s1)
     lw.close()
 
@@ -28,3 +27,26 @@ def test_server_split_with_indexes():
     rw.breakpoint()
     rw.new_blob(s1)
     
+
+@wvtest
+def test_midx_refreshing():
+    os.environ['BUP_MAIN_EXE'] = bupmain = './bup'
+    os.environ['BUP_DIR'] = bupdir = 'buptest_tmidx.tmp'
+    subprocess.call(['rm', '-rf', bupdir])
+    git.init_repo(bupdir)
+    lw = git.PackWriter()
+    lw.new_blob(s1)
+    lw.breakpoint()
+    lw.new_blob(s2)
+    del lw
+    pi = git.PackIdxList(bupdir + '/objects/pack')
+    WVPASSEQ(len(pi.packs), 2)
+    pi.refresh()
+    WVPASSEQ(len(pi.packs), 2)
+    subprocess.call([bupmain, 'midx', '-f'])
+    pi.refresh()
+    WVPASSEQ(len(pi.packs), 1)
+    pi.refresh(skip_midx=True)
+    WVPASSEQ(len(pi.packs), 2)
+    pi.refresh(skip_midx=False)
+    WVPASSEQ(len(pi.packs), 1)