]> arthur.barton.de Git - bup.git/commitdiff
ShaBloom.__del__: replace with context management
authorRob Browning <rlb@defaultvalue.org>
Sun, 17 Oct 2021 17:17:36 +0000 (12:17 -0500)
committerRob Browning <rlb@defaultvalue.org>
Mon, 22 Nov 2021 16:35:28 +0000 (10:35 -0600)
These changes also just use finally in some cases, instead of the more
complex py2 compatible BaseException/with_pending_raise() catch becase
I'm leaning in favor of just dropping python 2 support soon.

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
lib/bup/bloom.py
lib/bup/cmd/bloom.py
lib/bup/gc.py
lib/bup/git.py
test/int/test_bloom.py

index 05ebd714b696da13ee1172a24df85c1aae1517ae..97024b90f7da67def21e0296124ea653769392ed 100644 (file)
@@ -84,6 +84,7 @@ from __future__ import absolute_import
 import os, math, struct
 
 from bup import _helpers
+from bup.compat import pending_raise
 from bup.helpers import (debug1, debug2, log, mmap_read, mmap_readwrite,
                          mmap_readwrite_private, unlink)
 
@@ -107,12 +108,13 @@ class ShaBloom:
     """Wrapper which contains data from multiple index files. """
     def __init__(self, filename, f=None, readwrite=False, expected=-1):
         self.name = filename
-        self.rwfile = None
+        self.readwrite = readwrite
+        self.file = None
         self.map = None
         assert(filename.endswith(b'.bloom'))
         if readwrite:
             assert(expected > 0)
-            self.rwfile = f = f or open(filename, 'r+b')
+            self.file = f = f or open(filename, 'r+b')
             f.seek(0)
 
             # Decide if we want to mmap() the pages as writable ('immediate'
@@ -135,13 +137,12 @@ class ShaBloom:
             self.delaywrite = expected > pages
             debug1('bloom: delaywrite=%r\n' % self.delaywrite)
             if self.delaywrite:
-                self.map = mmap_readwrite_private(self.rwfile, close=False)
+                self.map = mmap_readwrite_private(self.file, close=False)
             else:
-                self.map = mmap_readwrite(self.rwfile, close=False)
+                self.map = mmap_readwrite(self.file, close=False)
         else:
-            self.rwfile = None
-            f = f or open(filename, 'rb')
-            self.map = mmap_read(f)
+            self.file = f or open(filename, 'rb')
+            self.map = mmap_read(self.file)
         got = self.map[0:4]
         if got != b'BLOM':
             log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
@@ -167,33 +168,42 @@ class ShaBloom:
             self.idxnames = []
 
     def _init_failed(self):
-        if self.map:
-            self.map = None
-        if self.rwfile:
-            self.rwfile.close()
-            self.rwfile = None
         self.idxnames = []
         self.bits = self.entries = 0
+        self.map, tmp_map = None, self.map
+        self.file, tmp_file = None, self.file
+        try:
+            if tmp_map:
+                tmp_map.close()
+        finally:  # This won't handle pending exceptions correctly in py2
+            if self.file:
+                tmp_file.close()
 
     def valid(self):
         return self.map and self.bits
 
-    def __del__(self):
-        self.close()
-
     def close(self):
-        if self.map and self.rwfile:
-            debug2("bloom: closing with %d entries\n" % self.entries)
-            self.map[12:16] = struct.pack('!I', self.entries)
-            if self.delaywrite:
-                self.rwfile.seek(0)
-                self.rwfile.write(self.map)
-            else:
-                self.map.flush()
-            self.rwfile.seek(16 + 2**self.bits)
-            if self.idxnames:
-                self.rwfile.write(b'\0'.join(self.idxnames))
-        self._init_failed()
+        try:
+            if self.map and self.readwrite:
+                debug2("bloom: closing with %d entries\n" % self.entries)
+                self.map[12:16] = struct.pack('!I', self.entries)
+                if self.delaywrite:
+                    self.file.seek(0)
+                    self.file.write(self.map)
+                else:
+                    self.map.flush()
+                self.file.seek(16 + 2**self.bits)
+                if self.idxnames:
+                    self.file.write(b'\0'.join(self.idxnames))
+        finally:  # This won't handle pending exceptions correctly in py2
+            self._init_failed()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
 
     def pfalse_positive(self, additional=0):
         n = self.entries + additional
index 1896f5e3341441aa9c0ad7f7f8dd9dfc41d13e8b..f9959642c0c1a7c0cf33b918224d9ffa45d16f4d 100755 (executable)
@@ -28,8 +28,8 @@ def ruin_bloom(bloomfilename):
         log(path_msg(bloomfilename) + '\n')
         add_error('bloom: %s not found to ruin\n' % path_msg(rbloomfilename))
         return
-    b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1)
-    b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits
+    with bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) as b:
+        b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits
 
 
 def check_bloom(path, bloomfilename, idx):
@@ -38,21 +38,21 @@ def check_bloom(path, bloomfilename, idx):
     if not os.path.exists(bloomfilename):
         log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename))
         return
-    b = bloom.ShaBloom(bloomfilename)
-    if not b.valid():
-        add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename))
-        return
-    base = os.path.basename(idx)
-    if base not in b.idxnames:
-        log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename))
-        return
-    if base == idx:
-        idx = os.path.join(path, idx)
-    log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
-    log('bloom:   checking %s\n' % path_msg(ridx))
-    for objsha in git.open_idx(idx):
-        if not b.exists(objsha):
-            add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
+    with bloom.ShaBloom(bloomfilename) as b:
+        if not b.valid():
+            add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename))
+            return
+        base = os.path.basename(idx)
+        if base not in b.idxnames:
+            log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename))
+            return
+        if base == idx:
+            idx = os.path.join(path, idx)
+        log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
+        log('bloom:   checking %s\n' % path_msg(ridx))
+        for objsha in git.open_idx(idx):
+            if not b.exists(objsha):
+                add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
 
 
 _first = None
@@ -60,79 +60,86 @@ def do_bloom(path, outfilename, k, force):
     global _first
     assert k in (None, 4, 5)
     b = None
-    if os.path.exists(outfilename) and not force:
-        b = bloom.ShaBloom(outfilename)
-        if not b.valid():
-            debug1("bloom: Existing invalid bloom found, regenerating.\n")
-            b = None
-
-    add = []
-    rest = []
-    add_count = 0
-    rest_count = 0
-    for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
-        progress('bloom: counting: %d\r' % i)
-        ix = git.open_idx(name)
-        ixbase = os.path.basename(name)
-        if b and (ixbase in b.idxnames):
-            rest.append(name)
-            rest_count += len(ix)
-        else:
-            add.append(name)
-            add_count += len(ix)
-
-    if not add:
-        debug1("bloom: nothing to do.\n")
-        return
-
-    if b:
-        if len(b) != rest_count:
-            debug1("bloom: size %d != idx total %d, regenerating\n"
-                   % (len(b), rest_count))
-            b = None
-        elif k is not None and k != b.k:
-            debug1("bloom: new k %d != existing k %d, regenerating\n"
-                   % (k, b.k))
-            b = None
-        elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and
-              b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
-            debug1("bloom: regenerating: adding %d entries gives "
-                   "%.2f%% false positives.\n"
-                   % (add_count, b.pfalse_positive(add_count)))
-            b = None
-        else:
-            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
-    if not b: # Need all idxs to build from scratch
-        add += rest
-        add_count += rest_count
-    del rest
-    del rest_count
-
-    msg = b is None and 'creating from' or 'adding'
-    if not _first: _first = path
-    dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b''
-    progress('bloom: %s%s %d file%s (%d object%s).\r'
-        % (path_msg(dirprefix), msg,
-           len(add), len(add)!=1 and 's' or '',
-           add_count, add_count!=1 and 's' or ''))
-
-    tfname = None
-    if b is None:
-        tfname = os.path.join(path, b'bup.tmp.bloom')
-        b = bloom.create(tfname, expected=add_count, k=k)
-    count = 0
-    icount = 0
-    for name in add:
-        ix = git.open_idx(name)
-        qprogress('bloom: writing %.2f%% (%d/%d objects)\r'
-                  % (icount*100.0/add_count, icount, add_count))
-        b.add_idx(ix)
-        count += 1
-        icount += len(ix)
-
-    # Currently, there's an open file object for tfname inside b.
-    # Make sure it's closed before rename.
-    b.close()
+    try:
+        if os.path.exists(outfilename) and not force:
+            b = bloom.ShaBloom(outfilename)
+            if not b.valid():
+                debug1("bloom: Existing invalid bloom found, regenerating.\n")
+                b.close()
+                b = None
+
+        add = []
+        rest = []
+        add_count = 0
+        rest_count = 0
+        for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
+            progress('bloom: counting: %d\r' % i)
+            ix = git.open_idx(name)
+            ixbase = os.path.basename(name)
+            if b and (ixbase in b.idxnames):
+                rest.append(name)
+                rest_count += len(ix)
+            else:
+                add.append(name)
+                add_count += len(ix)
+
+        if not add:
+            debug1("bloom: nothing to do.\n")
+            return
+
+        if b:
+            if len(b) != rest_count:
+                debug1("bloom: size %d != idx total %d, regenerating\n"
+                       % (len(b), rest_count))
+                b, b_tmp = None, b
+                b_tmp.close()
+            elif k is not None and k != b.k:
+                debug1("bloom: new k %d != existing k %d, regenerating\n"
+                       % (k, b.k))
+                b, b_tmp = None, b
+                b_tmp.close()
+            elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and
+                  b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
+                debug1("bloom: regenerating: adding %d entries gives "
+                       "%.2f%% false positives.\n"
+                       % (add_count, b.pfalse_positive(add_count)))
+                b, b_tmp = None, b
+                b_tmp.close()
+            else:
+                b = bloom.ShaBloom(outfilename, readwrite=True,
+                                   expected=add_count)
+        if not b: # Need all idxs to build from scratch
+            add += rest
+            add_count += rest_count
+        del rest
+        del rest_count
+
+        msg = b is None and 'creating from' or 'adding'
+        if not _first: _first = path
+        dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b''
+        progress('bloom: %s%s %d file%s (%d object%s).\r'
+            % (path_msg(dirprefix), msg,
+               len(add), len(add)!=1 and 's' or '',
+               add_count, add_count!=1 and 's' or ''))
+
+        tfname = None
+        if b is None:
+            tfname = os.path.join(path, b'bup.tmp.bloom')
+            b = bloom.create(tfname, expected=add_count, k=k)
+        count = 0
+        icount = 0
+        for name in add:
+            ix = git.open_idx(name)
+            qprogress('bloom: writing %.2f%% (%d/%d objects)\r'
+                      % (icount*100.0/add_count, icount, add_count))
+            b.add_idx(ix)
+            count += 1
+            icount += len(ix)
+
+    finally:  # This won't handle pending exceptions correctly in py2
+        # Currently, there's an open file object for tfname inside b.
+        # Make sure it's closed before rename.
+        if b: b.close()
 
     if tfname:
         os.rename(tfname, outfilename)
index e8425ff0460a203614714c3073f672b44fa9ab4f..06bd3baf609353af1a1d238ebd02b32b1575d4e5 100644 (file)
@@ -236,7 +236,7 @@ def bup_gc(threshold=10, compression=1, verbosity=0):
         except MissingObject as ex:
             log('bup: missing object %r \n' % hexstr(ex.oid))
             sys.exit(1)
-        try:
+        with live_objects:
             # FIXME: just rename midxes and bloom, and restore them at the end if
             # we didn't change any packs?
             packdir = git.repo(b'objects/pack')
@@ -252,5 +252,3 @@ def bup_gc(threshold=10, compression=1, verbosity=0):
             sweep(live_objects, existing_count, cat_pipe,
                   threshold, compression,
                   verbosity)
-        finally:
-            live_objects.close()
index 4c1a95c52a985e5eaee1bb36ef4ca61651559c78..3f1d73463d62096d8c6cc63c15096631e18dad26 100644 (file)
@@ -643,14 +643,22 @@ class PackIdxList:
                         continue
                     d[full] = ix
             bfull = os.path.join(self.dir, b'bup.bloom')
-            if self.bloom is None and os.path.exists(bfull):
-                self.bloom = bloom.ShaBloom(bfull)
             self.packs = list(set(d.values()))
             self.packs.sort(reverse=True, key=lambda x: len(x))
-            if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
-                self.do_bloom = True
-            else:
-                self.bloom = None
+            if self.bloom is None and os.path.exists(bfull):
+                self.bloom = bloom.ShaBloom(bfull)
+            try:
+                if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
+                    self.do_bloom = True
+                else:
+                    if self.bloom:
+                        self.bloom, bloom_tmp = None, self.bloom
+                        bloom_tmp.close()
+            except BaseException as ex:
+                with pending_raise(ex):
+                    if self.bloom:
+                        self.bloom.close()
+
         debug1('PackIdxList: using %d index%s.\n'
             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 
index 5f75d7e6f1e70252843674b064796a4a51552f47..4169227cb07b84ced1783eda8b04cc95b511cd1a 100644 (file)
@@ -14,26 +14,25 @@ def test_bloom(tmpdir):
     ix.name = b'dummy.idx'
     ix.shatable = b''.join(hashes)
     for k in (4, 5):
-        b = bloom.create(tmpdir + b'/pybuptest.bloom', expected=100, k=k)
-        b.add_idx(ix)
-        assert b.pfalse_positive() < .1
-        b.close()
-        b = bloom.ShaBloom(tmpdir + b'/pybuptest.bloom')
-        all_present = True
-        for h in hashes:
-            all_present &= (b.exists(h) or False)
-        assert all_present
-        false_positives = 0
-        for h in [os.urandom(20) for i in range(1000)]:
-            if b.exists(h):
-                false_positives += 1
-        assert false_positives < 5
+        with bloom.create(tmpdir + b'/pybuptest.bloom', expected=100, k=k) as b:
+            b.add_idx(ix)
+            assert b.pfalse_positive() < .1
+        with bloom.ShaBloom(tmpdir + b'/pybuptest.bloom') as b:
+            all_present = True
+            for h in hashes:
+                all_present &= (b.exists(h) or False)
+            assert all_present
+            false_positives = 0
+            for h in [os.urandom(20) for i in range(1000)]:
+                if b.exists(h):
+                    false_positives += 1
+            assert false_positives < 5
         os.unlink(tmpdir + b'/pybuptest.bloom')
 
     tf = tempfile.TemporaryFile(dir=tmpdir)
-    b = bloom.create(b'bup.bloom', f=tf, expected=100)
-    assert b.rwfile == tf
-    assert b.k == 5
+    with bloom.create(b'bup.bloom', f=tf, expected=100) as b:
+        assert b.file == tf
+        assert b.k == 5
 
     # Test large (~1GiB) filter.  This may fail on s390 (31-bit
     # architecture), and anywhere else where the address space is
@@ -41,9 +40,9 @@ def test_bloom(tmpdir):
     tf = tempfile.TemporaryFile(dir=tmpdir)
     skip_test = False
     try:
-        b = bloom.create(b'bup.bloom', f=tf, expected=2**28,
-                         delaywrite=False)
-        assert b.k == 4
+        with bloom.create(b'bup.bloom', f=tf, expected=2**28,
+                          delaywrite=False) as b:
+            assert b.k == 4
     except EnvironmentError as ex:
         (ptr_width, linkage) = platform.architecture()
         if ptr_width == '32bit' and ex.errno == errno.ENOMEM: