Don't return invalid data for offset reads (observed via fuse)

author Rob Browning <rlb@defaultvalue.org>

Mon, 3 Dec 2018 18:58:03 +0000 (12:58 -0600)

committer Rob Browning <rlb@defaultvalue.org>

Sat, 8 Dec 2018 22:12:24 +0000 (16:12 -0600)
author Rob Browning <rlb@defaultvalue.org>
Mon, 3 Dec 2018 18:58:03 +0000 (12:58 -0600)
committer Rob Browning <rlb@defaultvalue.org>
Sat, 8 Dec 2018 22:12:24 +0000 (16:12 -0600)
diff --git a/lib/bup/t/tvfs.py b/lib/bup/t/tvfs.py

index d8670a71b774f5d35ff0e8eb4f6f24e26c759f7d..3000992fcb034802318a77cc1d3f1010297b09c0 100644 (file)
--- a/lib/bup/t/tvfs.py
+++ b/lib/bup/t/tvfs.py
@@ -4,12 +4,14 @@ from collections import namedtuple
  from errno import ELOOP, ENOTDIR
  from io import BytesIO
  from os import environ, symlink
+from random import Random, randint
  from stat import S_IFDIR, S_IFREG, S_ISDIR, S_ISREG
  from sys import stderr
  from time import localtime, strftime
  
  from wvtest import *
  
+from bup._helpers import write_random
  from bup import git, metadata, vfs
  from bup.git import BUP_CHUNKED
  from bup.helpers import exc, exo, shstr
@@ -500,6 +502,102 @@ def test_resolve():
              wvpasseq(4, len(res))
              wvpasseq(expected, res)
  
+def write_sized_random_content(parent_dir, size, seed):
+    verbose = 0
+    with open('%s/%d' % (parent_dir, size), 'wb') as f:
+        write_random(f.fileno(), size, seed, verbose)
+
+def validate_vfs_streaming_read(repo, item, expected_path, read_sizes):
+    for read_size in read_sizes:
+        with open(expected_path, 'rb') as expected:
+            with vfs.fopen(repo, item) as actual:
+                ex_buf = expected.read(read_size)
+                act_buf = actual.read(read_size)
+                while ex_buf and act_buf:
+                    wvpassge(read_size, len(ex_buf))
+                    wvpassge(read_size, len(act_buf))
+                    wvpasseq(len(ex_buf), len(act_buf))
+                    wvpass(ex_buf == act_buf)
+                    ex_buf = expected.read(read_size)
+                    act_buf = actual.read(read_size)
+                wvpasseq('', ex_buf)
+                wvpasseq('', act_buf)
+
+def validate_vfs_seeking_read(repo, item, expected_path, read_sizes):
+    def read_act(act_pos):
+        with vfs.fopen(repo, item) as actual:
+            actual.seek(act_pos)
+            wvpasseq(act_pos, actual.tell())
+            act_buf = actual.read(read_size)
+            act_pos += len(act_buf)
+            wvpasseq(act_pos, actual.tell())
+            return act_pos, act_buf
+
+    for read_size in read_sizes:
+        with open(expected_path, 'rb') as expected:
+                ex_buf = expected.read(read_size)
+                act_buf = None
+                act_pos = 0
+                while ex_buf:
+                    act_pos, act_buf = read_act(act_pos)
+                    wvpassge(read_size, len(ex_buf))
+                    wvpassge(read_size, len(act_buf))
+                    wvpasseq(len(ex_buf), len(act_buf))
+                    wvpass(ex_buf == act_buf)
+                    if not act_buf:
+                        break
+                    ex_buf = expected.read(read_size)
+                else:  # hit expected eof first
+                    act_pos, act_buf = read_act(act_pos)
+                wvpasseq('', ex_buf)
+                wvpasseq('', act_buf)
+
+@wvtest
+def test_read_and_seek():
+    # Write a set of randomly sized files containing random data whose
+    # names are their sizes, and then verify that what we get back
+    # from the vfs when seeking and reading with various block sizes
+    # matches the original content.
+    with no_lingering_errors():
+        with test_tempdir('bup-tvfs-read-') as tmpdir:
+            resolve = vfs.resolve
+            bup_dir = tmpdir + '/bup'
+            environ['GIT_DIR'] = bup_dir
+            environ['BUP_DIR'] = bup_dir
+            git.repodir = bup_dir
+            repo = LocalRepo()
+            data_path = tmpdir + '/src'
+            os.mkdir(data_path)
+            seed = randint(-(1 << 31), (1 << 31) - 1)
+            rand = Random()
+            rand.seed(seed)
+            print('test_read seed:', seed, file=sys.stderr)
+            max_size = 2 * 1024 * 1024
+            sizes = set((rand.randint(1, max_size) for _ in xrange(5)))
+            sizes.add(1)
+            sizes.add(max_size)
+            for size in sizes:
+                write_sized_random_content(data_path, size, seed)
+            ex((bup_path, 'init'))
+            ex((bup_path, 'index', '-v', data_path))
+            ex((bup_path, 'save', '-d', '100000', '-tvvn', 'test', '--strip',
+                data_path))
+            read_sizes = set((rand.randint(1, max_size) for _ in xrange(10)))
+            sizes.add(1)
+            sizes.add(max_size)
+            print('test_read src sizes:', sizes, file=sys.stderr)
+            print('test_read read sizes:', read_sizes, file=sys.stderr)
+            for size in sizes:
+                res = resolve(repo, '/test/latest/' + str(size))
+                _, item = res[-1]
+                wvpasseq(size, vfs.item_size(repo, res[-1][1]))
+                validate_vfs_streaming_read(repo, item,
+                                            '%s/%d' % (data_path, size),
+                                            read_sizes)
+                validate_vfs_seeking_read(repo, item,
+                                          '%s/%d' % (data_path, size),
+                                          read_sizes)
+
  @wvtest
  def test_resolve_loop():
      with no_lingering_errors():
diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py

index b84e4c886f25a1dcf50e88cb417c286cd3806b36..99ca85d39426a0de4e35617625872d9b804f9491 100644 (file)
--- a/lib/bup/vfs.py
+++ b/lib/bup/vfs.py
@@ -94,12 +94,25 @@ def _normal_or_chunked_file_size(repo, oid):
          _, obj_t, size = next(it)
      return ofs + sum(len(b) for b in it)
  
+def _skip_chunks_before_offset(tree, offset):
+    prev_ent = next(tree, None)
+    if not prev_ent:
+        return tree
+    ent = None
+    for ent in tree:
+        ent_ofs = int(ent[1], 16)
+        if ent_ofs > offset:
+            return chain([prev_ent, ent], tree)
+        if ent_ofs == offset:
+            return chain([ent], tree)
+        prev_ent = ent
+    return [prev_ent]
+
  def _tree_chunks(repo, tree, startofs):
      "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
      assert(startofs >= 0)
      # name is the chunk's hex offset in the original file
-    tree = dropwhile(lambda (_1, name, _2): int(name, 16) < startofs, tree)
-    for mode, name, oid in tree:
+    for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
          ofs = int(name, 16)
          skipmore = startofs - ofs
          if skipmore < 0:
author	Rob Browning <rlb@defaultvalue.org>
	Mon, 3 Dec 2018 18:58:03 +0000 (12:58 -0600)
committer	Rob Browning <rlb@defaultvalue.org>
	Sat, 8 Dec 2018 22:12:24 +0000 (16:12 -0600)
lib/bup/t/tvfs.py		patch \| blob \| history
lib/bup/vfs.py		patch \| blob \| history