From 1dc1f11918198d4a192e285de2c393dd6dc33a8b Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sun, 26 Sep 2021 14:53:18 -0500 Subject: [PATCH 1/1] HLinkDB.__del__: replace with context management Signed-off-by: Rob Browning Tested-by: Rob Browning --- lib/bup/cmd/index.py | 229 ++++++++++++++++++++++--------------------- lib/bup/cmd/save.py | 8 +- lib/bup/hlinkdb.py | 9 +- 3 files changed, 126 insertions(+), 120 deletions(-) diff --git a/lib/bup/cmd/index.py b/lib/bup/cmd/index.py index 35d393a..d4d9cae 100755 --- a/lib/bup/cmd/index.py +++ b/lib/bup/cmd/index.py @@ -78,128 +78,129 @@ def update_index(top, excluded_paths, exclude_rxs, indexfile, wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) - hlinks = hlinkdb.HLinkDB(indexfile + b'.hlink') - - fake_hash = None - if fake_valid: - def fake_hash(name): - return (GIT_MODE_FILE, index.FAKE_SHA) - - total = 0 - bup_dir = os.path.abspath(git.repo()) - index_start = time.time() - for path, pst in recursive_dirlist([top], - xdev=xdev, - bup_dir=bup_dir, - excluded_paths=excluded_paths, - exclude_rxs=exclude_rxs, - xdev_exceptions=xdev_exceptions): - if verbose>=2 or (verbose == 1 and stat.S_ISDIR(pst.st_mode)): - out.write(b'%s\n' % path) - out.flush() - elapsed = time.time() - index_start - paths_per_sec = total / elapsed if elapsed else 0 - qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) - elif not (total % 128): - elapsed = time.time() - index_start - paths_per_sec = total / elapsed if elapsed else 0 - qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) - total += 1 - - while rig.cur and rig.cur.name > path: # deleted paths - if rig.cur.exists(): - rig.cur.set_deleted() - rig.cur.repack() - if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): - hlinks.del_path(rig.cur.name) - rig.next() - - if rig.cur and rig.cur.name == path: # paths that already existed - need_repack = False - if(rig.cur.stale(pst, check_device=check_device)): + with hlinkdb.HLinkDB(indexfile + b'.hlink') as hlinks: + + fake_hash = None + if fake_valid: + def fake_hash(name): + return (GIT_MODE_FILE, index.FAKE_SHA) + + total = 0 + bup_dir = os.path.abspath(git.repo()) + index_start = time.time() + for path, pst in recursive_dirlist([top], + xdev=xdev, + bup_dir=bup_dir, + excluded_paths=excluded_paths, + exclude_rxs=exclude_rxs, + xdev_exceptions=xdev_exceptions): + if verbose>=2 or (verbose == 1 and stat.S_ISDIR(pst.st_mode)): + out.write(b'%s\n' % path) + out.flush() + elapsed = time.time() - index_start + paths_per_sec = total / elapsed if elapsed else 0 + qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) + elif not (total % 128): + elapsed = time.time() - index_start + paths_per_sec = total / elapsed if elapsed else 0 + qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) + total += 1 + + while rig.cur and rig.cur.name > path: # deleted paths + if rig.cur.exists(): + rig.cur.set_deleted() + rig.cur.repack() + if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): + hlinks.del_path(rig.cur.name) + rig.next() + + if rig.cur and rig.cur.name == path: # paths that already existed + need_repack = False + if(rig.cur.stale(pst, check_device=check_device)): + try: + meta = metadata.from_path(path, statinfo=pst) + except (OSError, IOError) as e: + add_error(e) + rig.next() + continue + if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: + hlinks.del_path(rig.cur.name) + if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: + hlinks.add_path(path, pst.st_dev, pst.st_ino) + # Clear these so they don't bloat the store -- they're + # already in the index (since they vary a lot and they're + # fixed length). If you've noticed "tmax", you might + # wonder why it's OK to do this, since that code may + # adjust (mangle) the index mtime and ctime -- producing + # fake values which must not end up in a .bupm. However, + # it looks like that shouldn't be possible: (1) When + # "save" validates the index entry, it always reads the + # metadata from the filesytem. (2) Metadata is only + # read/used from the index if hashvalid is true. (3) + # "faked" entries will be stale(), and so we'll invalidate + # them below. + meta.ctime = meta.mtime = meta.atime = 0 + meta_ofs = msw.store(meta) + rig.cur.update_from_stat(pst, meta_ofs) + rig.cur.invalidate() + need_repack = True + if not (rig.cur.flags & index.IX_HASHVALID): + if fake_hash: + if rig.cur.sha == index.EMPTY_SHA: + rig.cur.gitmode, rig.cur.sha = fake_hash(path) + rig.cur.flags |= index.IX_HASHVALID + need_repack = True + if fake_invalid: + rig.cur.invalidate() + need_repack = True + if need_repack: + rig.cur.repack() + rig.next() + else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) - rig.next() continue - if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: - hlinks.del_path(rig.cur.name) + # See same assignment to 0, above, for rationale. + meta.atime = meta.mtime = meta.ctime = 0 + meta_ofs = msw.store(meta) + wi.add(path, pst, meta_ofs, hashgen=fake_hash) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) - # Clear these so they don't bloat the store -- they're - # already in the index (since they vary a lot and they're - # fixed length). If you've noticed "tmax", you might - # wonder why it's OK to do this, since that code may - # adjust (mangle) the index mtime and ctime -- producing - # fake values which must not end up in a .bupm. However, - # it looks like that shouldn't be possible: (1) When - # "save" validates the index entry, it always reads the - # metadata from the filesytem. (2) Metadata is only - # read/used from the index if hashvalid is true. (3) - # "faked" entries will be stale(), and so we'll invalidate - # them below. - meta.ctime = meta.mtime = meta.atime = 0 - meta_ofs = msw.store(meta) - rig.cur.update_from_stat(pst, meta_ofs) - rig.cur.invalidate() - need_repack = True - if not (rig.cur.flags & index.IX_HASHVALID): - if fake_hash: - if rig.cur.sha == index.EMPTY_SHA: - rig.cur.gitmode, rig.cur.sha = fake_hash(path) - rig.cur.flags |= index.IX_HASHVALID - need_repack = True - if fake_invalid: - rig.cur.invalidate() - need_repack = True - if need_repack: - rig.cur.repack() - rig.next() - else: # new paths - try: - meta = metadata.from_path(path, statinfo=pst) - except (OSError, IOError) as e: - add_error(e) - continue - # See same assignment to 0, above, for rationale. - meta.atime = meta.mtime = meta.ctime = 0 - meta_ofs = msw.store(meta) - wi.add(path, pst, meta_ofs, hashgen=fake_hash) - if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: - hlinks.add_path(path, pst.st_dev, pst.st_ino) - - elapsed = time.time() - index_start - paths_per_sec = total / elapsed if elapsed else 0 - progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) - - hlinks.prepare_save() - - if ri.exists(): - ri.save() - wi.flush() - if wi.count: - wr = wi.new_reader() - if check: - log('check: before merging: oldfile\n') - check_index(ri, verbose) - log('check: before merging: newfile\n') - check_index(wr, verbose) - mi = index.Writer(indexfile, msw, tmax) - - for e in index.merge(ri, wr): - # FIXME: shouldn't we remove deleted entries eventually? When? - mi.add_ixentry(e) - - ri.close() - mi.close() - wr.close() - wi.abort() - else: - wi.close() - msw.close() - hlinks.commit_save() + elapsed = time.time() - index_start + paths_per_sec = total / elapsed if elapsed else 0 + progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) + + hlinks.prepare_save() + + if ri.exists(): + ri.save() + wi.flush() + if wi.count: + wr = wi.new_reader() + if check: + log('check: before merging: oldfile\n') + check_index(ri, verbose) + log('check: before merging: newfile\n') + check_index(wr, verbose) + mi = index.Writer(indexfile, msw, tmax) + + for e in index.merge(ri, wr): + # FIXME: shouldn't we remove deleted entries + # eventually? When? + mi.add_ixentry(e) + + ri.close() + mi.close() + wr.close() + wi.abort() + else: + wi.close() + + msw.close() + hlinks.commit_save() optspec = """ diff --git a/lib/bup/cmd/save.py b/lib/bup/cmd/save.py index bd6ce29..f053b8a 100755 --- a/lib/bup/cmd/save.py +++ b/lib/bup/cmd/save.py @@ -109,7 +109,7 @@ def opts_from_cmdline(argv): return opt -def save_tree(opt, w): +def save_tree(opt, indexfile, hlink_db, w): # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory @@ -225,7 +225,6 @@ def save_tree(opt, w): remainstr, kpsstr)) - indexfile = opt.indexfile or git.repo(b'bupindex') r = index.Reader(indexfile) try: msr = index.MetaStoreReader(indexfile + b'.meta') @@ -235,7 +234,6 @@ def save_tree(opt, w): log('error: cannot access %r; have you run bup index?' % path_msg(indexfile)) sys.exit(1) - hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha @@ -521,7 +519,9 @@ def main(argv): else: refname = parent = None - tree = save_tree(opt, w) + indexfile = opt.indexfile or git.repo(b'bupindex') + with hlinkdb.HLinkDB(indexfile + b'.hlink') as hlink_db: + tree = save_tree(opt, indexfile, hlink_db, w) if opt.tree: out.write(hexlify(tree)) out.write(b'\n') diff --git a/lib/bup/hlinkdb.py b/lib/bup/hlinkdb.py index f953a32..05a52ef 100644 --- a/lib/bup/hlinkdb.py +++ b/lib/bup/hlinkdb.py @@ -3,6 +3,7 @@ from __future__ import absolute_import import errno, os, tempfile from bup import compat +from bup.compat import pending_raise if compat.py_maj > 2: import pickle @@ -92,8 +93,12 @@ class HLinkDB: os.unlink(self._tmpname) self._tmpname = None - def __del__(self): - self.abort_save() + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + with pending_raise(value, rethrow=True): + self.abort_save() def add_path(self, path, dev, ino): # Assume path is new. -- 2.39.2