From 9379dc60e4bd5de0e78e767dd1dfc120a11280e6 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sun, 15 Oct 2017 14:40:15 -0500 Subject: [PATCH 1/1] vfs2: suffix duplicate save names (e.g. 1970-01-01-214640-07) From now on, append zero-padded integers to the names of saves that have the same commit time in order to avoid duplicates. The sequence number currently represents the save's reversed position in default rev-list order, so that given: /foo/1970-01-01-214640-09 /foo/1970-01-01-214640-10 In the normal case, the -10 save would indicate the next save made after -09 (and the -09 save would be the single parent commit for -10). Thanks to Ben Kelley for helping me sort out some of the details. Signed-off-by: Rob Browning Tested-by: Rob Browning --- lib/bup/t/tvfs.py | 49 ++++++++++++++++++++++++++++++++++ lib/bup/vfs2.py | 67 +++++++++++++++++++++++++++++------------------ 2 files changed, 91 insertions(+), 25 deletions(-) diff --git a/lib/bup/t/tvfs.py b/lib/bup/t/tvfs.py index e8387c0..c4ac57d 100644 --- a/lib/bup/t/tvfs.py +++ b/lib/bup/t/tvfs.py @@ -129,6 +129,18 @@ def test_item_mode(): wvpasseq(mode, vfs.item_mode(vfs.Item(oid=oid, meta=mode))) wvpasseq(meta.mode, vfs.item_mode(vfs.Item(oid=oid, meta=meta))) +@wvtest +def test_reverse_suffix_duplicates(): + suffix = lambda x: tuple(vfs._reverse_suffix_duplicates(x)) + wvpasseq(('x',), suffix(('x',))) + wvpasseq(('x', 'y'), suffix(('x', 'y'))) + wvpasseq(('x-1', 'x-0'), suffix(('x',) * 2)) + wvpasseq(['x-%02d' % n for n in reversed(range(11))], + list(suffix(('x',) * 11))) + wvpasseq(('x-1', 'x-0', 'y'), suffix(('x', 'x', 'y'))) + wvpasseq(('x', 'y-1', 'y-0'), suffix(('x', 'y', 'y'))) + wvpasseq(('x', 'y-1', 'y-0', 'z'), suffix(('x', 'y', 'y', 'z'))) + @wvtest def test_misc(): with no_lingering_errors(): @@ -373,4 +385,41 @@ def test_contents_with_mismatched_bupm_git_ordering(): name, item = next(((n, i) for n, i in contents if n == 'foo.')) wvpass(S_ISREG(item.meta.mode)) +@wvtest +def test_duplicate_save_dates(): + with no_lingering_errors(): + with test_tempdir('bup-tvfs-') as tmpdir: + bup_dir = tmpdir + '/bup' + environ['GIT_DIR'] = bup_dir + environ['BUP_DIR'] = bup_dir + environ['TZ'] = 'UTC' + git.repodir = bup_dir + data_path = tmpdir + '/src' + os.mkdir(data_path) + with open(data_path + '/file', 'w+') as tmpfile: + tmpfile.write(b'canary\n') + ex((bup_path, 'init')) + ex((bup_path, 'index', '-v', data_path)) + for i in range(11): + ex((bup_path, 'save', '-d', '100000', '-n', 'test', data_path)) + repo = LocalRepo() + res = vfs.resolve(repo, '/test') + wvpasseq(2, len(res)) + name, revlist = res[-1] + wvpasseq('test', name) + wvpasseq(('.', + '1970-01-02-034640-10', + '1970-01-02-034640-09', + '1970-01-02-034640-08', + '1970-01-02-034640-07', + '1970-01-02-034640-06', + '1970-01-02-034640-05', + '1970-01-02-034640-04', + '1970-01-02-034640-03', + '1970-01-02-034640-02', + '1970-01-02-034640-01', + '1970-01-02-034640-00', + 'latest'), + tuple(x[0] for x in vfs.contents(repo, revlist))) + # FIXME: add tests for the want_meta=False cases. diff --git a/lib/bup/vfs2.py b/lib/bup/vfs2.py index 09f7f6a..2482779 100644 --- a/lib/bup/vfs2.py +++ b/lib/bup/vfs2.py @@ -46,7 +46,7 @@ S_ISDIR(item_mode(item)) refers to a tree. from __future__ import print_function from collections import namedtuple from errno import ELOOP, ENOENT, ENOTDIR -from itertools import chain, dropwhile, izip +from itertools import chain, dropwhile, groupby, izip, tee from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG from time import localtime, strftime import exceptions, re, sys @@ -494,13 +494,37 @@ def tree_items_with_meta(repo, oid, tree_data, names): for item in tree_items(oid, tree_data, names, bupm): yield item -_save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}$') +_save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$') +def _reverse_suffix_duplicates(strs): + """Yields the elements of strs, with any runs of duplicate values + suffixed with -N suffixes, where the zero padded integer N + decreases to 0 by 1 (e.g. 10, 09, ..., 00). + + """ + for name, duplicates in groupby(strs): + ndup = len(tuple(duplicates)) + if ndup == 1: + yield name + else: + ndig = len(str(ndup - 1)) + fmt = '%s-' + '%0' + str(ndig) + 'd' + for i in xrange(ndup - 1, -1, -1): + yield fmt % (name, i) + +def _name_for_rev(rev): + commit, (tree_oidx, utc) = rev + assert len(commit) == 40 + return strftime('%Y-%m-%d-%H%M%S', localtime(utc)) + +def _item_for_rev(rev): + commit, (tree_oidx, utc) = rev + assert len(tree_oidx) == 40 + return Item(meta=default_dir_mode, oid=tree_oidx.decode('hex')) + def revlist_items(repo, oid, names): assert len(oid) == 20 oidx = oid.encode('hex') - - # There might well be duplicate names in this dir (time resolution is secs) names = frozenset(name for name in (names or tuple()) \ if _save_name_rx.match(name) or name in ('.', 'latest')) @@ -511,36 +535,33 @@ def revlist_items(repo, oid, names): revs = repo.rev_list((oidx,), format='%T %at', parse=parse_rev_auth_secs) first_rev = next(revs, None) revs = chain((first_rev,), revs) + rev_items, rev_names = tee(revs) + revs = None # Don't disturb the tees + rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names) + rev_items = (_item_for_rev(x) for x in rev_items) if not names: - for commit, (tree_oidx, utc) in revs: - assert len(tree_oidx) == 40 - name = strftime('%Y-%m-%d-%H%M%S', localtime(utc)) - yield name, Item(meta=default_dir_mode, oid=tree_oidx.decode('hex')) - if first_rev: - commit, (tree_oidx, utc) = first_rev - yield 'latest', Item(meta=default_dir_mode, - oid=tree_oidx.decode('hex')) + for item in rev_items: + yield next(rev_names), item + yield 'latest', _item_for_rev(first_rev) return # Revs are in reverse chronological order by default last_name = min(names) - for commit, (tree_oidx, utc) in revs: - assert len(tree_oidx) == 40 - name = strftime('%Y-%m-%d-%H%M%S', localtime(utc)) + for item in rev_items: + name = next(rev_names) # Might have -N dup suffix if name < last_name: break if not name in names: continue - yield name, Item(meta=default_dir_mode, oid=tree_oidx.decode('hex')) + yield name, item # FIXME: need real short circuit... - for _ in revs: - pass + for _ in rev_items: pass + for _ in rev_names: pass - if first_rev and 'latest' in names: - commit, (tree_oidx, utc) = first_rev - yield 'latest', Item(meta=default_dir_mode, oid=tree_oidx.decode('hex')) + if 'latest' in names: + yield 'latest', _item_for_rev(first_rev) def tags_items(repo, names): global _tags @@ -601,10 +622,6 @@ def contents(repo, item, names=None, want_meta=True): item) for all items, including, a first item named '.' representing the container itself. - Any given name might produce more than one result. For example, - saves to a branch that happen within the same second currently end - up with the same VFS timestmap, i.e. /foo/2017-09-10-150833/. - Note that want_meta is advisory. For any given item, item.meta might be a Metadata instance or a mode, and if the former, meta.size might be None. Missing sizes can be computed via via -- 2.39.2