]> arthur.barton.de Git - bup.git/commitdiff
git/midx: provide context managers for idx classes
authorLuca Carlon <carlon.luca@gmail.com>
Thu, 21 May 2020 20:28:41 +0000 (22:28 +0200)
committerRob Browning <rlb@defaultvalue.org>
Sat, 23 May 2020 21:14:30 +0000 (16:14 -0500)
Opening files and then mmap()ing them keeps the files open at the
filesystem level, and then they cannot be fully removed until the
fd is closed when giving up the mapping.

On most filesystems, the file still exists but is no longer visible
ion this case. However, at least on CIFS this results in the file
still being visible in the folder, but it can no longer be opened
again, or such. This leads to a crash in 'bup gc' because it wants
to re-evaluate the idx it just tried to delete.

Teach the PackIdx classes the context manager protocol so we can
easily unmap once they're no longer needed, and use that in bup gc
(for now only there).

For consistency, already add the context manager protocol also to
the midx, even if it's not strictly needed yet since bup gc won't
actually do this to an midx.

Signed-off-by: Luca Carlon <carlon.luca@gmail.com>
[add commit message based on error description, add midx part,
remove shatable to avoid live pointers into unmapped region]
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reviewed-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
lib/bup/gc.py
lib/bup/git.py
lib/bup/midx.py

index b23029d3b75477928dab1038f2e1035581c42d49..e663cc02889c8dad1ac4912681258fa4eb91db7a 100644 (file)
@@ -163,40 +163,39 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
         if verbosity:
             qprogress('preserving live data (%d%% complete)\r'
                       % ((float(collect_count) / existing_count) * 100))
-        idx = git.open_idx(idx_name)
-
-        idx_live_count = 0
-        for sha in idx:
-            if live_objects.exists(sha):
-                idx_live_count += 1
+        with git.open_idx(idx_name) as idx:
+            idx_live_count = 0
+            for sha in idx:
+                if live_objects.exists(sha):
+                    idx_live_count += 1
+
+            collect_count += idx_live_count
+            if idx_live_count == 0:
+                if verbosity:
+                    log('deleting %s\n'
+                        % path_msg(git.repo_rel(basename(idx_name))))
+                ns.stale_files.append(idx_name)
+                ns.stale_files.append(idx_name[:-3] + b'pack')
+                continue
+
+            live_frac = idx_live_count / float(len(idx))
+            if live_frac > ((100 - threshold) / 100.0):
+                if verbosity:
+                    log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
+                                                    live_frac * 100))
+                continue
 
-        collect_count += idx_live_count
-        if idx_live_count == 0:
             if verbosity:
-                log('deleting %s\n'
-                    % path_msg(git.repo_rel(basename(idx_name))))
+                log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
+                                                    live_frac * 100))
+            for sha in idx:
+                if live_objects.exists(sha):
+                    item_it = cat_pipe.get(hexlify(sha))
+                    _, typ, _ = next(item_it)
+                    writer.just_write(sha, typ, b''.join(item_it))
+
             ns.stale_files.append(idx_name)
             ns.stale_files.append(idx_name[:-3] + b'pack')
-            continue
-
-        live_frac = idx_live_count / float(len(idx))
-        if live_frac > ((100 - threshold) / 100.0):
-            if verbosity:
-                log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
-                                                  live_frac * 100))
-            continue
-
-        if verbosity:
-            log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
-                                                  live_frac * 100))
-        for sha in idx:
-            if live_objects.exists(sha):
-                item_it = cat_pipe.get(hexlify(sha))
-                _, typ, _ = next(item_it)
-                writer.just_write(sha, typ, b''.join(item_it))
-
-        ns.stale_files.append(idx_name)
-        ns.stale_files.append(idx_name[:-3] + b'pack')
 
     if verbosity:
         progress('preserving live data (%d%% complete)\n'
index 8777a58df06afd612ef9574327cc4a54c3e805b6..c0ac91f4360452edbedb29945f0bc6051fc1b1d5 100644 (file)
@@ -414,6 +414,12 @@ class PackIdxV1(PackIdx):
         # Avoid slicing shatable for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -434,6 +440,12 @@ class PackIdxV1(PackIdx):
         for ofs in range(start, start + 24 * self.nsha, 24):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
 
 class PackIdxV2(PackIdx):
     """Object representation of a Git pack index (version 2) file."""
@@ -452,6 +464,12 @@ class PackIdxV2(PackIdx):
         # Avoid slicing this for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -477,6 +495,12 @@ class PackIdxV2(PackIdx):
         for ofs in range(start, start + 20 * self.nsha, 20):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
 
 _mpi_count = 0
 class PackIdxList:
index 0338906735fba53b3372916a588335677cfc044b..5edf88ecf892ffe27e8f1bddfc2dc7ba99f0a343 100644 (file)
@@ -58,6 +58,12 @@ class PackMidx:
     def __del__(self):
         self.close()
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.close()
+
     def _init_failed(self):
         self.bits = 0
         self.entries = 1