]> arthur.barton.de Git - bup.git/commitdiff
Fully (and explicitly) close PackIdxLists
authorRob Browning <rlb@defaultvalue.org>
Sat, 23 Oct 2021 19:06:01 +0000 (14:06 -0500)
committerRob Browning <rlb@defaultvalue.org>
Mon, 22 Nov 2021 16:35:28 +0000 (10:35 -0600)
And stop checking _mpi_count in __del__ since there are no guarantees
about if/when it will run (and so could run after another has been
opened).

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
lib/bup/client.py
lib/bup/cmd/margin.py
lib/bup/cmd/memtest.py
lib/bup/cmd/tag.py
lib/bup/git.py
lib/bup/helpers.py
test/int/test_client.py
test/int/test_git.py

index 637737bb5828d62bc5e72e3ad13dd6ddf22dab13..ed902b74479c0f5db4ecb39689cd852919d470cd 100644 (file)
@@ -10,7 +10,7 @@ from bup import git, ssh, vfs
 from bup.compat import environ, pending_raise, range, reraise
 from bup.helpers import (Conn, atomically_replaced_file, chunkyreader, debug1,
                          debug2, linereader, lines_until_sentinel,
-                         mkdirp, progress, qprogress, DemuxConn)
+                         mkdirp, nullcontext_if_not, progress, qprogress, DemuxConn)
 from bup.io import path_msg
 from bup.vint import write_bvec
 
@@ -515,13 +515,16 @@ class PackWriter_Remote(git.PackWriter):
         # Called by other PackWriter methods like breakpoint().
         # Must not close the connection (self.file)
         assert(run_midx)  # We don't support this via remote yet
-        if self._packopen and self.file:
+        self.objcache, objcache = None, self.objcache
+        with nullcontext_if_not(objcache):
+            if not (self._packopen and self.file):
+                return None
             self.file.write(b'\0\0\0\0')
             self._packopen = False
             self.onclose() # Unbusy
-            self.objcache = None
+            if objcache is not None:
+                objcache.close()
             return self.suggest_packs() # Returns last idx received
-        return None
 
     def close(self):
         # Called by inherited __exit__
index 07f2b0f7780eff21f173284cf6cde2e1b68b897c..7836c7198a59819a0a916d9e9cb75a9c3dc576c6 100755 (executable)
@@ -24,44 +24,46 @@ def main(argv):
 
     git.check_repo_or_die()
 
-    mi = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
+    with git.PackIdxList(git.repo(b'objects/pack'),
+                         ignore_midx=opt.ignore_midx) as mi:
 
-    def do_predict(ix, out):
-        total = len(ix)
-        maxdiff = 0
-        for count,i in enumerate(ix):
-            prefix = struct.unpack('!Q', i[:8])[0]
-            expected = prefix * total // (1 << 64)
-            diff = count - expected
-            maxdiff = max(maxdiff, abs(diff))
-        out.write(b'%d of %d (%.3f%%) '
-                  % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
-        out.flush()
-        assert(count+1 == len(ix))
+        def do_predict(ix, out):
+            total = len(ix)
+            maxdiff = 0
+            for count,i in enumerate(ix):
+                prefix = struct.unpack('!Q', i[:8])[0]
+                expected = prefix * total // (1 << 64)
+                diff = count - expected
+                maxdiff = max(maxdiff, abs(diff))
+            out.write(b'%d of %d (%.3f%%) '
+                      % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
+            out.flush()
+            assert(count+1 == len(ix))
 
-    sys.stdout.flush()
-    out = byte_stream(sys.stdout)
+        sys.stdout.flush()
+        out = byte_stream(sys.stdout)
 
-    if opt.predict:
-        if opt.ignore_midx:
-            for pack in mi.packs:
-                do_predict(pack, out)
+        if opt.predict:
+            if opt.ignore_midx:
+                for pack in mi.packs:
+                    do_predict(pack, out)
+            else:
+                do_predict(mi, out)
         else:
-            do_predict(mi, out)
-    else:
-        # default mode: find longest matching prefix
-        last = b'\0'*20
-        longmatch = 0
-        for i in mi:
-            if i == last:
-                continue
-            #assert(str(i) >= last)
-            pm = _helpers.bitmatch(last, i)
-            longmatch = max(longmatch, pm)
-            last = i
-        out.write(b'%d\n' % longmatch)
-        log('%d matching prefix bits\n' % longmatch)
-        doublings = math.log(len(mi), 2)
+            # default mode: find longest matching prefix
+            last = b'\0'*20
+            longmatch = 0
+            for i in mi:
+                if i == last:
+                    continue
+                #assert(str(i) >= last)
+                pm = _helpers.bitmatch(last, i)
+                longmatch = max(longmatch, pm)
+                last = i
+            out.write(b'%d\n' % longmatch)
+            log('%d matching prefix bits\n' % longmatch)
+            doublings = math.log(len(mi), 2)
+
         bpd = longmatch / doublings
         log('%.2f bits per doubling\n' % bpd)
         remain = 160 - longmatch
index b2027c42d83c7db580576ded97cf746fc0425abc..e20c1b67806f272f940a6d9e422e243a0e011bf8 100755 (executable)
@@ -79,8 +79,6 @@ def main(argv):
         o.fatal('no arguments expected')
 
     git.check_repo_or_die()
-    m = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
-
     sys.stdout.flush()
     out = byte_stream(sys.stdout)
 
@@ -88,27 +86,30 @@ def main(argv):
     _helpers.random_sha()
     report(0, out)
 
-    if opt.existing:
-        def foreverit(mi):
-            while 1:
-                for e in mi:
-                    yield e
-        objit = iter(foreverit(m))
-
-    for c in range(opt.cycles):
-        for n in range(opt.number):
-            if opt.existing:
-                bin = next(objit)
-                assert(m.exists(bin))
-            else:
-                bin = _helpers.random_sha()
-
-                # technically, a randomly generated object id might exist.
-                # but the likelihood of that is the likelihood of finding
-                # a collision in sha-1 by accident, which is so unlikely that
-                # we don't care.
-                assert(not m.exists(bin))
-        report((c+1)*opt.number, out)
+    with git.PackIdxList(git.repo(b'objects/pack'),
+                         ignore_midx=opt.ignore_midx) as m:
+
+        if opt.existing:
+            def foreverit(mi):
+                while 1:
+                    for e in mi:
+                        yield e
+            objit = iter(foreverit(m))
+
+        for c in range(opt.cycles):
+            for n in range(opt.number):
+                if opt.existing:
+                    bin = next(objit)
+                    assert(m.exists(bin))
+                else:
+                    bin = _helpers.random_sha()
+
+                    # technically, a randomly generated object id might exist.
+                    # but the likelihood of that is the likelihood of finding
+                    # a collision in sha-1 by accident, which is so unlikely that
+                    # we don't care.
+                    assert(not m.exists(bin))
+            report((c+1)*opt.number, out)
 
     if bloom._total_searches:
         out.write(b'bloom: %d objects searched in %d steps: avg %.3f steps/object\n'
index 0f5475d5050d883e24a4451e2a02f1baa72ef488..3bb1b409a6a05305762a647157fabbdabdeedd49 100755 (executable)
@@ -74,10 +74,10 @@ def main(argv):
         log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
         sys.exit(2)
 
-    pL = git.PackIdxList(git.repo(b'objects/pack'))
-    if not pL.exists(hash):
-        log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
-        sys.exit(2)
+    with git.PackIdxList(git.repo(b'objects/pack')) as pL:
+        if not pL.exists(hash):
+            log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
+            sys.exit(2)
 
     tag_file = git.repo(b'refs/tags/' + tag_name)
     try:
index 0dc6e729ab759ed05bbb629f62e4bd96c6e4ef07..e3b235c5b0f170b56f987f860956d07a4f178304 100644 (file)
@@ -14,6 +14,7 @@ from bup import _helpers, hashsplit, path, midx, bloom, xstat
 from bup.compat import (buffer,
                         byte_int, bytes_from_byte, bytes_from_uint,
                         environ,
+                        ExitStack,
                         items,
                         pending_raise,
                         range,
@@ -27,6 +28,7 @@ from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
                          merge_dict,
                          merge_iter,
                          mmap_read, mmap_readwrite,
+                         nullcontext_if_not,
                          progress, qprogress, stat_if_exists,
                          unlink,
                          utc_offset_str)
@@ -517,8 +519,10 @@ _mpi_count = 0
 class PackIdxList:
     def __init__(self, dir, ignore_midx=False):
         global _mpi_count
+        # Q: was this also intended to prevent opening multiple repos?
         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
         _mpi_count += 1
+        self.open = True
         self.dir = dir
         self.also = set()
         self.packs = []
@@ -527,10 +531,32 @@ class PackIdxList:
         self.ignore_midx = ignore_midx
         self.refresh()
 
-    def __del__(self):
+    def close(self):
         global _mpi_count
+        if not self.open:
+            assert _mpi_count == 0
+            return
         _mpi_count -= 1
-        assert(_mpi_count == 0)
+        assert _mpi_count == 0
+        self.also = None
+        self.bloom, bloom = None, self.bloom
+        self.packs, packs = None, self.packs
+        self.open = False
+        with ExitStack() as stack:
+            for pack in packs:
+                stack.enter_context(pack)
+            if bloom:
+                bloom.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
+    def __del__(self):
+        assert not self.open
 
     def __iter__(self):
         return iter(idxmerge(self.packs))
@@ -721,7 +747,6 @@ def create_commit_blob(tree, parent,
     l.append(msg)
     return b'\n'.join(l)
 
-
 def _make_objcache():
     return PackIdxList(repo(b'objects/pack'))
 
@@ -878,45 +903,49 @@ class PackWriter:
         self.file, f = None, self.file
         self.idx, idx = None, self.idx
         self.parentfd, pfd, = None, self.parentfd
-        self.objcache = None
-
-        with finalized(pfd, lambda x: x is not None and os.close(x)), \
-             f:
 
-            if abort:
-                os.unlink(self.filename + b'.pack')
-                return None
+        try:
+            with nullcontext_if_not(self.objcache), \
+                 finalized(pfd, lambda x: x is not None and os.close(x)), \
+                 f:
+
+                if abort:
+                    os.unlink(self.filename + b'.pack')
+                    return None
+
+                # update object count
+                f.seek(8)
+                cp = struct.pack('!i', self.count)
+                assert len(cp) == 4
+                f.write(cp)
+
+                # calculate the pack sha1sum
+                f.seek(0)
+                sum = Sha1()
+                for b in chunkyreader(f):
+                    sum.update(b)
+                packbin = sum.digest()
+                f.write(packbin)
+                f.flush()
+                fdatasync(f.fileno())
+                f.close()
 
-            # update object count
-            f.seek(8)
-            cp = struct.pack('!i', self.count)
-            assert len(cp) == 4
-            f.write(cp)
-
-            # calculate the pack sha1sum
-            f.seek(0)
-            sum = Sha1()
-            for b in chunkyreader(f):
-                sum.update(b)
-            packbin = sum.digest()
-            f.write(packbin)
-            f.flush()
-            fdatasync(f.fileno())
-            f.close()
-
-            idx.write(self.filename + b'.idx', packbin)
-            nameprefix = os.path.join(self.repo_dir,
-                                      b'objects/pack/pack-' +  hexlify(packbin))
-            if os.path.exists(self.filename + b'.map'):
-                os.unlink(self.filename + b'.map')
-            os.rename(self.filename + b'.pack', nameprefix + b'.pack')
-            os.rename(self.filename + b'.idx', nameprefix + b'.idx')
-            os.fsync(pfd)
-            if run_midx:
-                auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
-            if self.on_pack_finish:
-                self.on_pack_finish(nameprefix)
-            return nameprefix
+                idx.write(self.filename + b'.idx', packbin)
+                nameprefix = os.path.join(self.repo_dir,
+                                          b'objects/pack/pack-' +  hexlify(packbin))
+                if os.path.exists(self.filename + b'.map'):
+                    os.unlink(self.filename + b'.map')
+                os.rename(self.filename + b'.pack', nameprefix + b'.pack')
+                os.rename(self.filename + b'.idx', nameprefix + b'.idx')
+                os.fsync(pfd)
+                if run_midx:
+                    auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+                if self.on_pack_finish:
+                    self.on_pack_finish(nameprefix)
+                return nameprefix
+        finally:
+            # Must be last -- some of the code above depends on it
+            self.objcache = None
 
     def abort(self):
         """Remove the pack file from disk."""
@@ -1090,16 +1119,15 @@ def rev_parse(committish, repo_dir=None):
         debug2("resolved from ref: commit = %s\n" % hexlify(head))
         return head
 
-    pL = PackIdxList(repo(b'objects/pack', repo_dir=repo_dir))
-
     if len(committish) == 40:
         try:
             hash = unhexlify(committish)
         except TypeError:
             return None
 
-        if pL.exists(hash):
-            return hash
+        with PackIdxList(repo(b'objects/pack', repo_dir=repo_dir)) as pL:
+            if pL.exists(hash):
+                return hash
 
     return None
 
index 3b37a2b72c065eccf1f9610575a1ee3f90212a8c..0530b51aa81575dadf67fd2101c507be31607beb 100644 (file)
@@ -12,7 +12,7 @@ import hashlib, heapq, math, operator, time, tempfile
 
 from bup import _helpers
 from bup import compat
-from bup.compat import argv_bytes, byte_int, pending_raise
+from bup.compat import argv_bytes, byte_int, nullcontext, pending_raise
 from bup.io import byte_stream, path_msg
 # This function should really be in helpers, not in bup.options.  But we
 # want options.py to be standalone so people can include it in other projects.
@@ -27,6 +27,10 @@ class Nonlocal:
     pass
 
 
+def nullcontext_if_not(manager):
+    return manager if manager is not None else nullcontext()
+
+
 @contextmanager
 def finalized(enter_result=None, finalize=None):
     assert finalize
index 757c3ab2f0fa0fcbc3a2c141ad20e284838d66d0..de17ecea74c682fa6ea1a44e72811b3b0b7709cb 100644 (file)
@@ -23,12 +23,10 @@ IDX_PAT = b'/*.idx'
 def test_server_split_with_indexes(tmpdir):
     environ[b'BUP_DIR'] = bupdir = tmpdir
     git.init_repo(bupdir)
-    with git.PackWriter() as lw, \
-         client.Client(bupdir, create=True) as c, \
-         c.new_packwriter() as rw:
+    with git.PackWriter() as lw:
         lw.new_blob(s1)
-        lw.close()
-
+    with client.Client(bupdir, create=True) as c, \
+         c.new_packwriter() as rw:
         rw.new_blob(s2)
         rw.breakpoint()
         rw.new_blob(s1)
@@ -122,25 +120,26 @@ def test_midx_refreshing(tmpdir):
         p2base = rw.close()
     p2name = os.path.join(c.cachedir, p2base)
 
-    pi = git.PackIdxList(bupdir + b'/objects/pack')
-    assert len(pi.packs) == 2
-    pi.refresh()
-    assert len(pi.packs) == 2
-    assert sorted([os.path.basename(i.name) for i in pi.packs]) == sorted([p1base, p2base])
-
-    with git.open_idx(p1name) as p1, \
-         git.open_idx(p2name) as p2:
-        assert p1.exists(s1sha)
-        assert not p2.exists(s1sha)
-        assert p2.exists(s2sha)
-
-    subprocess.call([path.exe(), b'midx', b'-f'])
-    pi.refresh()
-    assert len(pi.packs) == 1
-    pi.refresh(skip_midx=True)
-    assert len(pi.packs) == 2
-    pi.refresh(skip_midx=False)
-    assert len(pi.packs) == 1
+    with git.PackIdxList(bupdir + b'/objects/pack') as pi:
+        assert len(pi.packs) == 2
+        pi.refresh()
+        assert len(pi.packs) == 2
+        assert sorted([os.path.basename(i.name) for i in pi.packs]) \
+            == sorted([p1base, p2base])
+
+        with git.open_idx(p1name) as p1, \
+             git.open_idx(p2name) as p2:
+            assert p1.exists(s1sha)
+            assert not p2.exists(s1sha)
+            assert p2.exists(s2sha)
+
+        subprocess.call([path.exe(), b'midx', b'-f'])
+        pi.refresh()
+        assert len(pi.packs) == 1
+        pi.refresh(skip_midx=True)
+        assert len(pi.packs) == 2
+        pi.refresh(skip_midx=False)
+        assert len(pi.packs) == 1
 
 
 def test_remote_parsing():
index cae56caf312580fed32d611a1b5294374a80fd66..616fc6e70451cd7626383ec0c095f79ada36a634 100644 (file)
@@ -147,10 +147,10 @@ def test_packs(tmpdir):
 
         WVFAIL(r.find_offset(b'\0'*20))
 
-    r = git.PackIdxList(bupdir + b'/objects/pack')
-    WVPASS(r.exists(hashes[5]))
-    WVPASS(r.exists(hashes[6]))
-    WVFAIL(r.exists(b'\0'*20))
+    with git.PackIdxList(bupdir + b'/objects/pack') as r:
+        WVPASS(r.exists(hashes[5]))
+        WVPASS(r.exists(hashes[6]))
+        WVFAIL(r.exists(b'\0'*20))
 
 
 def test_pack_name_lookup(tmpdir):
@@ -169,11 +169,11 @@ def test_pack_name_lookup(tmpdir):
             log('\n')
             idxnames.append(os.path.basename(w.close() + b'.idx'))
 
-    r = git.PackIdxList(packdir)
-    WVPASSEQ(len(r.packs), 2)
-    for e,idxname in enumerate(idxnames):
-        for i in range(e*2, (e+1)*2):
-            WVPASSEQ(idxname, r.exists(hashes[i], want_source=True))
+    with git.PackIdxList(packdir) as r:
+        WVPASSEQ(len(r.packs), 2)
+        for e,idxname in enumerate(idxnames):
+            for i in range(e*2, (e+1)*2):
+                WVPASSEQ(idxname, r.exists(hashes[i], want_source=True))
 
 
 def test_long_index(tmpdir):
@@ -511,35 +511,35 @@ def test_midx_close(tmpdir):
     for i in range(10):
         _create_idx(tmpdir, i)
     git.auto_midx(tmpdir)
-    l = git.PackIdxList(tmpdir)
+    with git.PackIdxList(tmpdir) as l:
     # this doesn't exist (yet)
-    WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
-    for i in range(10, 15):
-        _create_idx(tmpdir, i)
-    # delete the midx ...
-    # TODO: why do we need to? git.auto_midx() below doesn't?!
-    for fn in os.listdir(tmpdir):
-        if fn.endswith(b'.midx'):
-            os.unlink(os.path.join(tmpdir, fn))
-    # and make a new one
-    git.auto_midx(tmpdir)
-    # check it still doesn't exist - we haven't refreshed
-    WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
-    # check that we still have the midx open, this really
-    # just checks more for the kernel API ('deleted' string)
-    for fn in openfiles():
-        if not b'midx-' in fn:
-            continue
-        WVPASSEQ(True, b'deleted' in fn)
-    # refresh the PackIdxList
-    l.refresh()
-    # and check that an object in pack 10 exists now
-    WVPASSEQ(True, l.exists(struct.pack('18xBB', 10, 0)))
-    for fn in openfiles():
-        if not b'midx-' in fn:
-            continue
-        # check that we don't have it open anymore
-        WVPASSEQ(False, b'deleted' in fn)
+        WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
+        for i in range(10, 15):
+            _create_idx(tmpdir, i)
+        # delete the midx ...
+        # TODO: why do we need to? git.auto_midx() below doesn't?!
+        for fn in os.listdir(tmpdir):
+            if fn.endswith(b'.midx'):
+                os.unlink(os.path.join(tmpdir, fn))
+        # and make a new one
+        git.auto_midx(tmpdir)
+        # check it still doesn't exist - we haven't refreshed
+        WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
+        # check that we still have the midx open, this really
+        # just checks more for the kernel API ('deleted' string)
+        for fn in openfiles():
+            if not b'midx-' in fn:
+                continue
+            WVPASSEQ(True, b'deleted' in fn)
+        # refresh the PackIdxList
+        l.refresh()
+        # and check that an object in pack 10 exists now
+        WVPASSEQ(True, l.exists(struct.pack('18xBB', 10, 0)))
+        for fn in openfiles():
+            if not b'midx-' in fn:
+                continue
+            # check that we don't have it open anymore
+            WVPASSEQ(False, b'deleted' in fn)
 
 def test_config():
     cfg_file = os.path.join(os.path.dirname(__file__), 'sample.conf')