]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/t/tvfs.py
Don't return invalid data for offset reads (observed via fuse)
[bup.git] / lib / bup / t / tvfs.py
index e8387c0089eaf5b7a8eb884556459dc316f4d91e..3000992fcb034802318a77cc1d3f1010297b09c0 100644 (file)
@@ -1,15 +1,18 @@
 
-from __future__ import print_function
+from __future__ import absolute_import, print_function
 from collections import namedtuple
+from errno import ELOOP, ENOTDIR
 from io import BytesIO
 from os import environ, symlink
+from random import Random, randint
 from stat import S_IFDIR, S_IFREG, S_ISDIR, S_ISREG
 from sys import stderr
 from time import localtime, strftime
 
 from wvtest import *
 
-from bup import git, metadata, vfs2 as vfs
+from bup._helpers import write_random
+from bup import git, metadata, vfs
 from bup.git import BUP_CHUNKED
 from bup.helpers import exc, exo, shstr
 from bup.metadata import Metadata
@@ -25,6 +28,46 @@ def ex(cmd, **kwargs):
     print(shstr(cmd), file=stderr)
     return exc(cmd, **kwargs)
 
+@wvtest
+def test_cache_behavior():
+    orig_max = vfs._cache_max_items
+    try:
+        vfs._cache_max_items = 2
+        vfs.clear_cache()
+        wvpasseq({}, vfs._cache)
+        wvpasseq([], vfs._cache_keys)
+        wvfail(vfs._cache_keys)
+        wvexcept(AssertionError, vfs.cache_notice, 'x', 1)
+        key_0 = b'\0' * 20
+        key_1 = b'\1' * 20
+        key_2 = b'\2' * 20
+        vfs.cache_notice(key_0, 'something')
+        wvpasseq({key_0 : 'something'}, vfs._cache)
+        wvpasseq([key_0], vfs._cache_keys)
+        vfs.cache_notice(key_1, 'something else')
+        wvpasseq({key_0 : 'something', key_1 : 'something else'}, vfs._cache)
+        wvpasseq(frozenset([key_0, key_1]), frozenset(vfs._cache_keys))
+        vfs.cache_notice(key_2, 'and also')
+        wvpasseq(2, len(vfs._cache))
+        wvpass(frozenset(vfs._cache.iteritems())
+               < frozenset({key_0 : 'something',
+                            key_1 : 'something else',
+                            key_2 : 'and also'}.iteritems()))
+        wvpasseq(2, len(vfs._cache_keys))
+        wvpass(frozenset(vfs._cache_keys) < frozenset([key_0, key_1, key_2]))
+        vfs.clear_cache()
+        wvpasseq({}, vfs._cache)
+        wvpasseq([], vfs._cache_keys)
+    finally:
+        vfs._cache_max_items = orig_max
+        vfs.clear_cache()
+
+## The clear_cache() calls below are to make sure that the test starts
+## from a known state since at the moment the cache entry for a given
+## item (like a commit) can change.  For example, its meta value might
+## be promoted from a mode to a Metadata instance once the tree it
+## refers to is traversed.
+
 TreeDictValue = namedtuple('TreeDictValue', ('name', 'oid', 'meta'))
 
 def tree_items(repo, oid):
@@ -35,7 +78,7 @@ def tree_items(repo, oid):
     """
     # This is a simpler approach than the one in the vfs, used to
     # cross-check its behavior.
-    tree_data, bupm_oid = vfs._tree_data_and_bupm(repo, oid)
+    tree_data, bupm_oid = vfs.tree_data_and_bupm(repo, oid)
     bupm = vfs._FileReader(repo, bupm_oid) if bupm_oid else None
     try:
         maybe_meta = lambda : Metadata.read(bupm) if bupm else None
@@ -129,6 +172,18 @@ def test_item_mode():
         wvpasseq(mode, vfs.item_mode(vfs.Item(oid=oid, meta=mode)))
         wvpasseq(meta.mode, vfs.item_mode(vfs.Item(oid=oid, meta=meta)))
 
+@wvtest
+def test_reverse_suffix_duplicates():
+    suffix = lambda x: tuple(vfs._reverse_suffix_duplicates(x))
+    wvpasseq(('x',), suffix(('x',)))
+    wvpasseq(('x', 'y'), suffix(('x', 'y')))
+    wvpasseq(('x-1', 'x-0'), suffix(('x',) * 2))
+    wvpasseq(['x-%02d' % n for n in reversed(range(11))],
+             list(suffix(('x',) * 11)))
+    wvpasseq(('x-1', 'x-0', 'y'), suffix(('x', 'x', 'y')))
+    wvpasseq(('x', 'y-1', 'y-0'), suffix(('x', 'y', 'y')))
+    wvpasseq(('x', 'y-1', 'y-0', 'z'), suffix(('x', 'y', 'y', 'z')))
+
 @wvtest
 def test_misc():
     with no_lingering_errors():
@@ -201,9 +256,12 @@ def test_resolve():
             save_time = 100000
             save_time_str = strftime('%Y-%m-%d-%H%M%S', localtime(save_time))
             os.mkdir(data_path)
+            os.mkdir(data_path + '/dir')
             with open(data_path + '/file', 'w+') as tmpfile:
                 print('canary', file=tmpfile)
-            symlink('file', data_path + '/symlink')
+            symlink('file', data_path + '/file-symlink')
+            symlink('dir', data_path + '/dir-symlink')
+            symlink('not-there', data_path + '/bad-symlink')
             ex((bup_path, 'init'))
             ex((bup_path, 'index', '-v', data_path))
             ex((bup_path, 'save', '-d', str(save_time), '-tvvn', 'test',
@@ -214,17 +272,22 @@ def test_resolve():
             tip_hash = exo(('git', 'show-ref', 'refs/heads/test'))[0]
             tip_oidx = tip_hash.strip().split()[0]
             tip_oid = tip_oidx.decode('hex')
-            tip_meta = Metadata()
-            tip_meta.mode = S_IFDIR | 0o755
-            tip_meta.uid = tip_meta.gid = tip_meta.size = 0
-            tip_meta.atime = tip_meta.mtime = tip_meta.ctime = save_time * 10**9
-            test_revlist = vfs.RevList(meta=tip_meta, oid=tip_oid)
             tip_tree_oidx = exo(('git', 'log', '--pretty=%T', '-n1',
                                  tip_oidx))[0].strip()
             tip_tree_oid = tip_tree_oidx.decode('hex')
             tip_tree = tree_dict(repo, tip_tree_oid)
+            test_revlist_w_meta = vfs.RevList(meta=tip_tree['.'].meta,
+                                              oid=tip_oid)
+            expected_latest_item = vfs.Commit(meta=S_IFDIR | 0o755,
+                                              oid=tip_tree_oid,
+                                              coid=tip_oid)
+            expected_latest_item_w_meta = vfs.Commit(meta=tip_tree['.'].meta,
+                                                     oid=tip_tree_oid,
+                                                     coid=tip_oid)
+            expected_test_tag_item = expected_latest_item
 
             wvstart('resolve: /')
+            vfs.clear_cache()
             res = resolve(repo, '/')
             wvpasseq(1, len(res))
             wvpasseq((('', vfs._root),), res)
@@ -232,10 +295,26 @@ def test_resolve():
             root_content = frozenset(vfs.contents(repo, root_item))
             wvpasseq(frozenset([('.', root_item),
                                 ('.tag', vfs._tags),
-                                ('test', test_revlist)]),
+                                ('test', test_revlist_w_meta)]),
                      root_content)
+            for path in ('//', '/.', '/./', '/..', '/../',
+                         '/test/latest/dir/../../..',
+                         '/test/latest/dir/../../../',
+                         '/test/latest/dir/../../../.',
+                         '/test/latest/dir/../../..//',
+                         '/test//latest/dir/../../..',
+                         '/test/./latest/dir/../../..',
+                         '/test/././latest/dir/../../..',
+                         '/test/.//./latest/dir/../../..',
+                         '/test//.//.//latest/dir/../../..'
+                         '/test//./latest/dir/../../..'):
+                wvstart('resolve: ' + path)
+                vfs.clear_cache()
+                res = resolve(repo, path)
+                wvpasseq((('', vfs._root),), res)
 
             wvstart('resolve: /.tag')
+            vfs.clear_cache()
             res = resolve(repo, '/.tag')
             wvpasseq(2, len(res))
             wvpasseq((('', vfs._root), ('.tag', vfs._tags)),
@@ -243,78 +322,282 @@ def test_resolve():
             ignore, tag_item = res[1]
             tag_content = frozenset(vfs.contents(repo, tag_item))
             wvpasseq(frozenset([('.', tag_item),
-                                ('test-tag', test_revlist)]),
+                                ('test-tag', expected_test_tag_item)]),
                      tag_content)
 
             wvstart('resolve: /test')
+            vfs.clear_cache()
             res = resolve(repo, '/test')
             wvpasseq(2, len(res))
-            wvpasseq((('', vfs._root), ('test', test_revlist)), res)
+            wvpasseq((('', vfs._root), ('test', test_revlist_w_meta)), res)
             ignore, test_item = res[1]
             test_content = frozenset(vfs.contents(repo, test_item))
-            expected_latest_item = vfs.Item(meta=S_IFDIR | 0o755,
-                                                    oid=tip_tree_oid)
-            wvpasseq(frozenset([('.', test_revlist),
-                                (save_time_str, expected_latest_item),
-                                ('latest', expected_latest_item)]),
+            # latest has metadata here due to caching
+            wvpasseq(frozenset([('.', test_revlist_w_meta),
+                                (save_time_str, expected_latest_item_w_meta),
+                                ('latest', expected_latest_item_w_meta)]),
                      test_content)
 
             wvstart('resolve: /test/latest')
+            vfs.clear_cache()
             res = resolve(repo, '/test/latest')
             wvpasseq(3, len(res))
-            expected_latest_item_w_meta = vfs.Item(meta=tip_tree['.'].meta,
-                                                   oid=tip_tree_oid)
+            expected_latest_item_w_meta = vfs.Commit(meta=tip_tree['.'].meta,
+                                                     oid=tip_tree_oid,
+                                                     coid=tip_oid)
             expected = (('', vfs._root),
-                        ('test', test_revlist),
+                        ('test', test_revlist_w_meta),
                         ('latest', expected_latest_item_w_meta))
             wvpasseq(expected, res)
             ignore, latest_item = res[2]
             latest_content = frozenset(vfs.contents(repo, latest_item))
             expected = frozenset((x.name, vfs.Item(oid=x.oid, meta=x.meta))
                                  for x in (tip_tree[name]
-                                           for name in ('.', 'file',
-                                                        'symlink')))
+                                           for name in ('.',
+                                                        'bad-symlink',
+                                                        'dir',
+                                                        'dir-symlink',
+                                                        'file',
+                                                        'file-symlink')))
             wvpasseq(expected, latest_content)
 
-            wvstart('resolve: /test/latest/foo')
+            wvstart('resolve: /test/latest/file')
+            vfs.clear_cache()
             res = resolve(repo, '/test/latest/file')
             wvpasseq(4, len(res))
             expected_file_item_w_meta = vfs.Item(meta=tip_tree['file'].meta,
                                                  oid=tip_tree['file'].oid)
             expected = (('', vfs._root),
-                        ('test', test_revlist),
+                        ('test', test_revlist_w_meta),
                         ('latest', expected_latest_item_w_meta),
                         ('file', expected_file_item_w_meta))
             wvpasseq(expected, res)
 
-            wvstart('resolve: /test/latest/symlink')
-            res = resolve(repo, '/test/latest/symlink')
+            wvstart('resolve: /test/latest/bad-symlink')
+            vfs.clear_cache()
+            res = resolve(repo, '/test/latest/bad-symlink')
+            wvpasseq(4, len(res))
+            expected = (('', vfs._root),
+                        ('test', test_revlist_w_meta),
+                        ('latest', expected_latest_item_w_meta),
+                        ('not-there', None))
+            wvpasseq(expected, res)
+
+            wvstart('lresolve: /test/latest/bad-symlink')
+            vfs.clear_cache()
+            res = lresolve(repo, '/test/latest/bad-symlink')
+            wvpasseq(4, len(res))
+            bad_symlink_value = tip_tree['bad-symlink']
+            expected_bad_symlink_item_w_meta = vfs.Item(meta=bad_symlink_value.meta,
+                                                        oid=bad_symlink_value.oid)
+            expected = (('', vfs._root),
+                        ('test', test_revlist_w_meta),
+                        ('latest', expected_latest_item_w_meta),
+                        ('bad-symlink', expected_bad_symlink_item_w_meta))
+            wvpasseq(expected, res)
+
+            wvstart('resolve: /test/latest/file-symlink')
+            vfs.clear_cache()
+            res = resolve(repo, '/test/latest/file-symlink')
             wvpasseq(4, len(res))
             expected = (('', vfs._root),
-                        ('test', test_revlist),
+                        ('test', test_revlist_w_meta),
                         ('latest', expected_latest_item_w_meta),
                         ('file', expected_file_item_w_meta))
             wvpasseq(expected, res)
 
-            wvstart('lresolve: /test/latest/symlink')
-            res = lresolve(repo, '/test/latest/symlink')
+            wvstart('lresolve: /test/latest/file-symlink')
+            vfs.clear_cache()
+            res = lresolve(repo, '/test/latest/file-symlink')
             wvpasseq(4, len(res))
-            symlink_value = tip_tree['symlink']
-            expected_symlink_item_w_meta = vfs.Item(meta=symlink_value.meta,
-                                                    oid=symlink_value.oid)
+            file_symlink_value = tip_tree['file-symlink']
+            expected_file_symlink_item_w_meta = vfs.Item(meta=file_symlink_value.meta,
+                                                         oid=file_symlink_value.oid)
             expected = (('', vfs._root),
-                        ('test', test_revlist),
+                        ('test', test_revlist_w_meta),
                         ('latest', expected_latest_item_w_meta),
-                        ('symlink', expected_symlink_item_w_meta))
+                        ('file-symlink', expected_file_symlink_item_w_meta))
             wvpasseq(expected, res)
 
             wvstart('resolve: /test/latest/missing')
+            vfs.clear_cache()
             res = resolve(repo, '/test/latest/missing')
             wvpasseq(4, len(res))
             name, item = res[-1]
             wvpasseq('missing', name)
             wvpass(item is None)
 
+            for path in ('/test/latest/file/',
+                         '/test/latest/file/.',
+                         '/test/latest/file/..',
+                         '/test/latest/file/../',
+                         '/test/latest/file/../.',
+                         '/test/latest/file/../..',
+                         '/test/latest/file/foo'):
+                wvstart('resolve: ' + path)
+                vfs.clear_cache()
+                try:
+                    resolve(repo, path)
+                except vfs.IOError as res_ex:
+                    wvpasseq(ENOTDIR, res_ex.errno)
+                    wvpasseq(['', 'test', 'latest', 'file'],
+                             [name for name, item in res_ex.terminus])
+
+            for path in ('/test/latest/file-symlink/',
+                         '/test/latest/file-symlink/.',
+                         '/test/latest/file-symlink/..',
+                         '/test/latest/file-symlink/../',
+                         '/test/latest/file-symlink/../.',
+                         '/test/latest/file-symlink/../..'):
+                wvstart('lresolve: ' + path)
+                vfs.clear_cache()
+                try:
+                    lresolve(repo, path)
+                except vfs.IOError as res_ex:
+                    wvpasseq(ENOTDIR, res_ex.errno)
+                    wvpasseq(['', 'test', 'latest', 'file'],
+                             [name for name, item in res_ex.terminus])
+
+            wvstart('resolve: non-directory parent')
+            vfs.clear_cache()
+            file_res = resolve(repo, '/test/latest/file')
+            try:
+                resolve(repo, 'foo', parent=file_res)
+            except vfs.IOError as res_ex:
+                wvpasseq(ENOTDIR, res_ex.errno)
+                wvpasseq(None, res_ex.terminus)
+
+            wvstart('lresolve: /test/latest/dir-symlink')
+            vfs.clear_cache()
+            res = lresolve(repo, '/test/latest/dir-symlink')
+            wvpasseq(4, len(res))
+            dir_symlink_value = tip_tree['dir-symlink']
+            expected_dir_symlink_item_w_meta = vfs.Item(meta=dir_symlink_value.meta,
+                                                         oid=dir_symlink_value.oid)
+            expected = (('', vfs._root),
+                        ('test', test_revlist_w_meta),
+                        ('latest', expected_latest_item_w_meta),
+                        ('dir-symlink', expected_dir_symlink_item_w_meta))
+            wvpasseq(expected, res)
+
+            dir_value = tip_tree['dir']
+            expected_dir_item = vfs.Item(oid=dir_value.oid,
+                                         meta=tree_dict(repo, dir_value.oid)['.'].meta)
+            expected = (('', vfs._root),
+                        ('test', test_revlist_w_meta),
+                        ('latest', expected_latest_item_w_meta),
+                        ('dir', expected_dir_item))
+            for resname, resolver in (('resolve', resolve),
+                                      ('lresolve', lresolve)):
+                for path in ('/test/latest/dir-symlink/',
+                             '/test/latest/dir-symlink/.'):
+                    wvstart(resname + ': ' + path)
+                    vfs.clear_cache()
+                    res = resolver(repo, path)
+                    wvpasseq(4, len(res))
+                    wvpasseq(expected, res)
+            wvstart('resolve: /test/latest/dir-symlink')
+            vfs.clear_cache()
+            res = resolve(repo, path)
+            wvpasseq(4, len(res))
+            wvpasseq(expected, res)
+
+def write_sized_random_content(parent_dir, size, seed):
+    verbose = 0
+    with open('%s/%d' % (parent_dir, size), 'wb') as f:
+        write_random(f.fileno(), size, seed, verbose)
+
+def validate_vfs_streaming_read(repo, item, expected_path, read_sizes):
+    for read_size in read_sizes:
+        with open(expected_path, 'rb') as expected:
+            with vfs.fopen(repo, item) as actual:
+                ex_buf = expected.read(read_size)
+                act_buf = actual.read(read_size)
+                while ex_buf and act_buf:
+                    wvpassge(read_size, len(ex_buf))
+                    wvpassge(read_size, len(act_buf))
+                    wvpasseq(len(ex_buf), len(act_buf))
+                    wvpass(ex_buf == act_buf)
+                    ex_buf = expected.read(read_size)
+                    act_buf = actual.read(read_size)
+                wvpasseq('', ex_buf)
+                wvpasseq('', act_buf)
+
+def validate_vfs_seeking_read(repo, item, expected_path, read_sizes):
+    def read_act(act_pos):
+        with vfs.fopen(repo, item) as actual:
+            actual.seek(act_pos)
+            wvpasseq(act_pos, actual.tell())
+            act_buf = actual.read(read_size)
+            act_pos += len(act_buf)
+            wvpasseq(act_pos, actual.tell())
+            return act_pos, act_buf
+
+    for read_size in read_sizes:
+        with open(expected_path, 'rb') as expected:
+                ex_buf = expected.read(read_size)
+                act_buf = None
+                act_pos = 0
+                while ex_buf:
+                    act_pos, act_buf = read_act(act_pos)
+                    wvpassge(read_size, len(ex_buf))
+                    wvpassge(read_size, len(act_buf))
+                    wvpasseq(len(ex_buf), len(act_buf))
+                    wvpass(ex_buf == act_buf)
+                    if not act_buf:
+                        break
+                    ex_buf = expected.read(read_size)
+                else:  # hit expected eof first
+                    act_pos, act_buf = read_act(act_pos)
+                wvpasseq('', ex_buf)
+                wvpasseq('', act_buf)
+
+@wvtest
+def test_read_and_seek():
+    # Write a set of randomly sized files containing random data whose
+    # names are their sizes, and then verify that what we get back
+    # from the vfs when seeking and reading with various block sizes
+    # matches the original content.
+    with no_lingering_errors():
+        with test_tempdir('bup-tvfs-read-') as tmpdir:
+            resolve = vfs.resolve
+            bup_dir = tmpdir + '/bup'
+            environ['GIT_DIR'] = bup_dir
+            environ['BUP_DIR'] = bup_dir
+            git.repodir = bup_dir
+            repo = LocalRepo()
+            data_path = tmpdir + '/src'
+            os.mkdir(data_path)
+            seed = randint(-(1 << 31), (1 << 31) - 1)
+            rand = Random()
+            rand.seed(seed)
+            print('test_read seed:', seed, file=sys.stderr)
+            max_size = 2 * 1024 * 1024
+            sizes = set((rand.randint(1, max_size) for _ in xrange(5)))
+            sizes.add(1)
+            sizes.add(max_size)
+            for size in sizes:
+                write_sized_random_content(data_path, size, seed)
+            ex((bup_path, 'init'))
+            ex((bup_path, 'index', '-v', data_path))
+            ex((bup_path, 'save', '-d', '100000', '-tvvn', 'test', '--strip',
+                data_path))
+            read_sizes = set((rand.randint(1, max_size) for _ in xrange(10)))
+            sizes.add(1)
+            sizes.add(max_size)
+            print('test_read src sizes:', sizes, file=sys.stderr)
+            print('test_read read sizes:', read_sizes, file=sys.stderr)
+            for size in sizes:
+                res = resolve(repo, '/test/latest/' + str(size))
+                _, item = res[-1]
+                wvpasseq(size, vfs.item_size(repo, res[-1][1]))
+                validate_vfs_streaming_read(repo, item,
+                                            '%s/%d' % (data_path, size),
+                                            read_sizes)
+                validate_vfs_seeking_read(repo, item,
+                                          '%s/%d' % (data_path, size),
+                                          read_sizes)
+
 @wvtest
 def test_resolve_loop():
     with no_lingering_errors():
@@ -333,7 +616,12 @@ def test_resolve_loop():
             ex((bup_path, 'index', '-v', data_path))
             ex((bup_path, 'save', '-d', '100000', '-tvvn', 'test', '--strip',
                 data_path))
-            wvexcept(vfs.Loop, resolve, repo, '/test/latest/loop')
+            try:
+                resolve(repo, '/test/latest/loop')
+            except vfs.IOError as res_ex:
+                wvpasseq(ELOOP, res_ex.errno)
+                wvpasseq(['', 'test', 'latest', 'loop'],
+                         [name for name, item in res_ex.terminus])
 
 @wvtest
 def test_contents_with_mismatched_bupm_git_ordering():
@@ -373,4 +661,41 @@ def test_contents_with_mismatched_bupm_git_ordering():
             name, item = next(((n, i) for n, i in contents if n == 'foo.'))
             wvpass(S_ISREG(item.meta.mode))
 
+@wvtest
+def test_duplicate_save_dates():
+    with no_lingering_errors():
+        with test_tempdir('bup-tvfs-') as tmpdir:
+            bup_dir = tmpdir + '/bup'
+            environ['GIT_DIR'] = bup_dir
+            environ['BUP_DIR'] = bup_dir
+            environ['TZ'] = 'UTC'
+            git.repodir = bup_dir
+            data_path = tmpdir + '/src'
+            os.mkdir(data_path)
+            with open(data_path + '/file', 'w+') as tmpfile:
+                tmpfile.write(b'canary\n')
+            ex((bup_path, 'init'))
+            ex((bup_path, 'index', '-v', data_path))
+            for i in range(11):
+                ex((bup_path, 'save', '-d', '100000', '-n', 'test', data_path))
+            repo = LocalRepo()
+            res = vfs.resolve(repo, '/test')
+            wvpasseq(2, len(res))
+            name, revlist = res[-1]
+            wvpasseq('test', name)
+            wvpasseq(('.',
+                      '1970-01-02-034640-00',
+                      '1970-01-02-034640-01',
+                      '1970-01-02-034640-02',
+                      '1970-01-02-034640-03',
+                      '1970-01-02-034640-04',
+                      '1970-01-02-034640-05',
+                      '1970-01-02-034640-06',
+                      '1970-01-02-034640-07',
+                      '1970-01-02-034640-08',
+                      '1970-01-02-034640-09',
+                      '1970-01-02-034640-10',
+                      'latest'),
+                     tuple(sorted(x[0] for x in vfs.contents(repo, revlist))))
+
 # FIXME: add tests for the want_meta=False cases.