]> arthur.barton.de Git - bup.git/commitdiff
git: fix PackIdxList keeping deleted files open
authorJohannes Berg <johannes@sipsolutions.net>
Sat, 18 Jan 2020 21:26:16 +0000 (22:26 +0100)
committerRob Browning <rlb@defaultvalue.org>
Sat, 25 Apr 2020 19:27:48 +0000 (14:27 -0500)
When an midx is deleted underneath bup, usually by itself running
'bup midx --auto', then PackIdxList may keep them open. This can
cause bup to run out of disk space easily since these files can
be fairly big, and can be recreated multiple times in a backup
run.

To fix this, remove any open PackMidx instances from the list and
close them explicitly.

Out of an abundance of caution, also explicitly close the bloom
instance if we have one - the same issue should apply here even if
I couldn't observe it, since the GC isn't guaranteed to clean up
the object immediately.

I remember debugging this issue years ago without coming to any
good conclusion, and it's been mentioned on the mailing list a few
times as well, e.g.
https://groups.google.com/d/msg/bup-list/AqIyv9n9WPE/-Wl2JVh5AQAJ

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reviewed-by: Rob Browning <rlb@defaultvalue.org>
(cherry picked from commit 5c746e43600c059c52b5fd78212499e3e9700946)
Tested-by: Rob Browning <rlb@defaultvalue.org>
lib/bup/git.py

index becaa73e8a080cd07eb06878d5f286994d114524..41a49d01a4b0c93ae1a86139280ba00db8ffb945 100644 (file)
@@ -509,6 +509,8 @@ class PackIdxList:
         The module-global variable 'ignore_midx' can force this function to
         always act as if skip_midx was True.
         """
+        if self.bloom is not None:
+            self.bloom.close()
         self.bloom = None # Always reopen the bloom as it may have been relaced
         self.do_bloom = False
         skip_midx = skip_midx or ignore_midx
@@ -517,11 +519,22 @@ class PackIdxList:
         if os.path.exists(self.dir):
             if not skip_midx:
                 midxl = []
+                midxes = set(glob.glob(os.path.join(self.dir, b'*.midx')))
+                # remove any *.midx files from our list that no longer exist
+                for ix in list(d.values()):
+                    if not isinstance(ix, midx.PackMidx):
+                        continue
+                    if ix.name in midxes:
+                        continue
+                    # remove the midx
+                    del d[ix.name]
+                    ix.close()
+                    self.packs.remove(ix)
                 for ix in self.packs:
                     if isinstance(ix, midx.PackMidx):
                         for name in ix.idxnames:
                             d[os.path.join(self.dir, name)] = ix
-                for full in glob.glob(os.path.join(self.dir,'*.midx')):
+                for full in midxes:
                     if not d.get(full):
                         mx = midx.PackMidx(full)
                         (mxd, mxf) = os.path.split(mx.name)