]> arthur.barton.de Git - bup.git/commitdiff
vfs2._resolve_path: improve handling ENOTDIR, absolute paths, etc.
authorRob Browning <rlb@defaultvalue.org>
Sat, 9 Dec 2017 19:46:13 +0000 (13:46 -0600)
committerRob Browning <rlb@defaultvalue.org>
Sat, 16 Dec 2017 23:29:23 +0000 (17:29 -0600)
Handle and test various path_resolution(7) cases more carefully.
e.g. ensure a trailing '/' or '/.' forces resolution, ensure a
non-final path element that's not a directory produces ENOTDIR, ...
Add tests for the these, for bad symlink handling, and a for number of
other cases.

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
lib/bup/t/tvfs.py
lib/bup/vfs2.py

index 7cbd9467a6ad381bfa15751d2dece79e13c36ed2..7dbc6e8f555aa071a51ba29cae142463c8de2cb5 100644 (file)
@@ -1,7 +1,7 @@
 
 from __future__ import print_function
 from collections import namedtuple
-from errno import ELOOP
+from errno import ELOOP, ENOTDIR
 from io import BytesIO
 from os import environ, symlink
 from stat import S_IFDIR, S_IFREG, S_ISDIR, S_ISREG
@@ -214,9 +214,12 @@ def test_resolve():
             save_time = 100000
             save_time_str = strftime('%Y-%m-%d-%H%M%S', localtime(save_time))
             os.mkdir(data_path)
+            os.mkdir(data_path + '/dir')
             with open(data_path + '/file', 'w+') as tmpfile:
                 print('canary', file=tmpfile)
-            symlink('file', data_path + '/symlink')
+            symlink('file', data_path + '/file-symlink')
+            symlink('dir', data_path + '/dir-symlink')
+            symlink('not-there', data_path + '/bad-symlink')
             ex((bup_path, 'init'))
             ex((bup_path, 'index', '-v', data_path))
             ex((bup_path, 'save', '-d', str(save_time), '-tvvn', 'test',
@@ -249,6 +252,20 @@ def test_resolve():
                                 ('.tag', vfs._tags),
                                 ('test', test_revlist)]),
                      root_content)
+            for path in ('//', '/.', '/./', '/..', '/../',
+                         '/test/latest/dir/../../..',
+                         '/test/latest/dir/../../../',
+                         '/test/latest/dir/../../../.',
+                         '/test/latest/dir/../../..//',
+                         '/test//latest/dir/../../..',
+                         '/test/./latest/dir/../../..',
+                         '/test/././latest/dir/../../..',
+                         '/test/.//./latest/dir/../../..',
+                         '/test//.//.//latest/dir/../../..'
+                         '/test//./latest/dir/../../..'):
+                wvstart('resolve: ' + path)
+                res = resolve(repo, path)
+                wvpasseq((('', vfs._root),), res)
 
             wvstart('resolve: /.tag')
             res = resolve(repo, '/.tag')
@@ -286,11 +303,15 @@ def test_resolve():
             latest_content = frozenset(vfs.contents(repo, latest_item))
             expected = frozenset((x.name, vfs.Item(oid=x.oid, meta=x.meta))
                                  for x in (tip_tree[name]
-                                           for name in ('.', 'file',
-                                                        'symlink')))
+                                           for name in ('.',
+                                                        'bad-symlink',
+                                                        'dir',
+                                                        'dir-symlink',
+                                                        'file',
+                                                        'file-symlink')))
             wvpasseq(expected, latest_content)
 
-            wvstart('resolve: /test/latest/foo')
+            wvstart('resolve: /test/latest/file')
             res = resolve(repo, '/test/latest/file')
             wvpasseq(4, len(res))
             expected_file_item_w_meta = vfs.Item(meta=tip_tree['file'].meta,
@@ -301,8 +322,29 @@ def test_resolve():
                         ('file', expected_file_item_w_meta))
             wvpasseq(expected, res)
 
-            wvstart('resolve: /test/latest/symlink')
-            res = resolve(repo, '/test/latest/symlink')
+            wvstart('resolve: /test/latest/bad-symlink')
+            res = resolve(repo, '/test/latest/bad-symlink')
+            wvpasseq(4, len(res))
+            expected = (('', vfs._root),
+                        ('test', test_revlist),
+                        ('latest', expected_latest_item_w_meta),
+                        ('not-there', None))
+            wvpasseq(expected, res)
+
+            wvstart('lresolve: /test/latest/bad-symlink')
+            res = lresolve(repo, '/test/latest/bad-symlink')
+            wvpasseq(4, len(res))
+            bad_symlink_value = tip_tree['bad-symlink']
+            expected_bad_symlink_item_w_meta = vfs.Item(meta=bad_symlink_value.meta,
+                                                        oid=bad_symlink_value.oid)
+            expected = (('', vfs._root),
+                        ('test', test_revlist),
+                        ('latest', expected_latest_item_w_meta),
+                        ('bad-symlink', expected_bad_symlink_item_w_meta))
+            wvpasseq(expected, res)
+
+            wvstart('resolve: /test/latest/file-symlink')
+            res = resolve(repo, '/test/latest/file-symlink')
             wvpasseq(4, len(res))
             expected = (('', vfs._root),
                         ('test', test_revlist),
@@ -310,16 +352,16 @@ def test_resolve():
                         ('file', expected_file_item_w_meta))
             wvpasseq(expected, res)
 
-            wvstart('lresolve: /test/latest/symlink')
-            res = lresolve(repo, '/test/latest/symlink')
+            wvstart('lresolve: /test/latest/file-symlink')
+            res = lresolve(repo, '/test/latest/file-symlink')
             wvpasseq(4, len(res))
-            symlink_value = tip_tree['symlink']
-            expected_symlink_item_w_meta = vfs.Item(meta=symlink_value.meta,
-                                                    oid=symlink_value.oid)
+            file_symlink_value = tip_tree['file-symlink']
+            expected_file_symlink_item_w_meta = vfs.Item(meta=file_symlink_value.meta,
+                                                         oid=file_symlink_value.oid)
             expected = (('', vfs._root),
                         ('test', test_revlist),
                         ('latest', expected_latest_item_w_meta),
-                        ('symlink', expected_symlink_item_w_meta))
+                        ('file-symlink', expected_file_symlink_item_w_meta))
             wvpasseq(expected, res)
 
             wvstart('resolve: /test/latest/missing')
@@ -329,6 +371,75 @@ def test_resolve():
             wvpasseq('missing', name)
             wvpass(item is None)
 
+            for path in ('/test/latest/file/',
+                         '/test/latest/file/.',
+                         '/test/latest/file/..',
+                         '/test/latest/file/../',
+                         '/test/latest/file/../.',
+                         '/test/latest/file/../..',
+                         '/test/latest/file/foo'):
+                wvstart('resolve: ' + path)
+                try:
+                    resolve(repo, path)
+                except vfs.IOError as res_ex:
+                    wvpasseq(ENOTDIR, res_ex.errno)
+                    wvpasseq(['', 'test', 'latest', 'file'],
+                             [name for name, item in res_ex.terminus])
+
+            for path in ('/test/latest/file-symlink/',
+                         '/test/latest/file-symlink/.',
+                         '/test/latest/file-symlink/..',
+                         '/test/latest/file-symlink/../',
+                         '/test/latest/file-symlink/../.',
+                         '/test/latest/file-symlink/../..'):
+                wvstart('lresolve: ' + path)
+                try:
+                    lresolve(repo, path)
+                except vfs.IOError as res_ex:
+                    wvpasseq(ENOTDIR, res_ex.errno)
+                    wvpasseq(['', 'test', 'latest', 'file'],
+                             [name for name, item in res_ex.terminus])
+
+            wvstart('resolve: non-directory parent')
+            file_res = resolve(repo, '/test/latest/file')
+            try:
+                resolve(repo, 'foo', parent=file_res)
+            except vfs.IOError as res_ex:
+                wvpasseq(ENOTDIR, res_ex.errno)
+                wvpasseq(None, res_ex.terminus)
+
+            wvstart('lresolve: /test/latest/dir-symlink')
+            res = lresolve(repo, '/test/latest/dir-symlink')
+            wvpasseq(4, len(res))
+            dir_symlink_value = tip_tree['dir-symlink']
+            expected_dir_symlink_item_w_meta = vfs.Item(meta=dir_symlink_value.meta,
+                                                         oid=dir_symlink_value.oid)
+            expected = (('', vfs._root),
+                        ('test', test_revlist),
+                        ('latest', expected_latest_item_w_meta),
+                        ('dir-symlink', expected_dir_symlink_item_w_meta))
+            wvpasseq(expected, res)
+
+            dir_value = tip_tree['dir']
+            expected_dir_item = vfs.Item(oid=dir_value.oid,
+                                         meta=tree_dict(repo, dir_value.oid)['.'].meta)
+            expected = (('', vfs._root),
+                        ('test', test_revlist),
+                        ('latest', expected_latest_item_w_meta),
+                        ('dir', expected_dir_item))
+            for resname, resolver in (('resolve', resolve),
+                                      ('lresolve', lresolve)):
+                for path in ('/test/latest/dir-symlink/',
+                             '/test/latest/dir-symlink/.'):
+                    wvstart(resname + ': ' + path)
+                    res = resolver(repo, path)
+                    wvpasseq(4, len(res))
+                    wvpasseq(expected, res)
+            wvstart('resolve: /test/latest/dir-symlink')
+            res = resolve(repo, path)
+            wvpasseq(4, len(res))
+            wvpasseq(expected, res)
+
 @wvtest
 def test_resolve_loop():
     with no_lingering_errors():
index 3ec956b20816e3b8110f44645864a16c96f1676b..dfc747a4db7f6dbf858cbc3b984eec05823850de 100644 (file)
@@ -193,16 +193,28 @@ class _FileReader(object):
 _multiple_slashes_rx = re.compile(r'//+')
 
 def _decompose_path(path):
-    """Return a reversed list of path elements, omitting any occurrences
-    of "."  and ignoring any leading or trailing slash."""
+    """Return a boolean indicating whether the path is absolute, and a
+    reversed list of path elements, omitting any occurrences of "."
+    and ignoring any leading or trailing slash.  If the path is
+    effectively '/' or '.', return an empty list.
+
+    """
     path = re.sub(_multiple_slashes_rx, '/', path)
+    if path == '/':
+        return True, True, []
+    is_absolute = must_be_dir = False
     if path.startswith('/'):
+        is_absolute = True
         path = path[1:]
-    if path.endswith('/'):
-        path = path[:-1]
-    result = [x for x in path.split('/') if x != '.']
-    result.reverse()
-    return result
+    for suffix in ('/', '/.'):
+        if path.endswith(suffix):
+            must_be_dir = True
+            path = path[:-len(suffix)]
+    parts = [x for x in path.split('/') if x != '.']
+    parts.reverse()
+    if not parts:
+        must_be_dir = True  # e.g. path was effectively '.' or '/', etc.
+    return is_absolute, must_be_dir, parts
     
 
 Item = namedtuple('Item', ('meta', 'oid'))
@@ -663,6 +675,13 @@ def contents(repo, item, names=None, want_meta=True):
         yield x
 
 def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
+    def raise_dir_required_but_not_dir(path, parent, past):
+        raise IOError(ENOTDIR,
+                      "path %r%s resolves to non-directory %r"
+                      % (path,
+                         ' (relative to %r)' % parent if parent else '',
+                         past),
+                      terminus=past)
     global _root
     assert repo
     assert len(path)
@@ -672,23 +691,34 @@ def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
             assert type(x[0]) in (bytes, str)
             assert type(x[1]) in item_types
         assert parent[0][1] == _root
-    future = _decompose_path(path)
-    if path.startswith('/'):
-        if future == ['']: # path was effectively '/'
+        if not S_ISDIR(item_mode(parent[-1][1])):
+            raise IOError(ENOTDIR,
+                          'path resolution parent %r is not a directory'
+                          % (parent,))
+    is_absolute, must_be_dir, future = _decompose_path(path)
+    if must_be_dir:
+        deref = True
+    if not future:  # path was effectively '.' or '/'
+        if is_absolute:
             return (('', _root),)
+        if parent:
+            return tuple(parent)
+        return [('', _root)]
+    if is_absolute:
         past = [('', _root)]
     else:
-        if parent:
-            past = list(parent)
-        else:
-            past = [('', _root)]
-    if not future:  # e.g. if path was effectively '.'
-        return tuple(past)
+        past = list(parent) if parent else [('', _root)]
     hops = 0
     while True:
+        if not future:
+            if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
+                raise_dir_required_but_not_dir(path, parent, past)
+            return tuple(past)
         segment = future.pop()
         if segment == '..':
+            assert len(past) > 0
             if len(past) > 1:  # .. from / is /
+                assert S_ISDIR(item_mode(past[-1][1]))
                 past.pop()
         else:
             parent_name, parent_item = past[-1]
@@ -708,42 +738,49 @@ def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
             mode = item_mode(item)
             if not S_ISLNK(mode):
                 if not S_ISDIR(mode):
-                    assert(not future)
                     past.append((segment, item),)
+                    if future:
+                        raise IOError(ENOTDIR,
+                                      'path %r%s ends internally in non-directory here: %r'
+                                      % (path,
+                                         ' (relative to %r)' % parent if parent else '',
+                                         past),
+                                      terminus=past)
+                    if must_be_dir:
+                        raise_dir_required_but_not_dir(path, parent, past)
                     return tuple(past)
                 # It's treeish
                 if want_meta and type(item) in real_tree_types:
                     dir_meta = _find_treeish_oid_metadata(repo, item.oid)
                     if dir_meta:
                         item = item._replace(meta=dir_meta)
-                if not future:
-                    past.append((segment, item),)
-                    return tuple(past)
                 past.append((segment, item))
-            else:  # symlink            
+            else:  # symlink
                 if not future and not deref:
                     past.append((segment, item),)
-                    return tuple(past)
+                    continue
+                if hops > 100:
+                    raise IOError(ELOOP,
+                                  'too many symlinks encountered while resolving %r%s'
+                                  % (path, ' relative to %r' % parent if parent else ''),
+                                  terminus=tuple(past + [(segment, item)]))
                 target = readlink(repo, item)
-                target_future = _decompose_path(target)
-                if target.startswith('/'):
-                    future = target_future
+                is_absolute, _, target_future = _decompose_path(target)
+                if is_absolute:
+                    if not target_future:  # path was effectively '/'
+                        return (('', _root),)
                     past = [('', _root)]
-                    if target_future == ['']:  # path was effectively '/'
-                        return tuple(past)
+                    future = target_future
                 else:
                     future.extend(target_future)
                 hops += 1
-                if hops > 100:
-                    raise IOError(ELOOP,
-                                  'too many symlinks encountered while resolving %r%s'
-                                  % (path, 'relative to %r' % parent if parent else ''),
-                                  terminus=tuple(past + [(segment, item)]))
                 
 def lresolve(repo, path, parent=None, want_meta=True):
-    """Perform exactly the same function as resolve(), except if the
-     final path element is a symbolic link, don't follow it, just
-     return it in the result."""
+    """Perform exactly the same function as resolve(), except if the final
+    path element is a symbolic link, don't follow it, just return it
+    in the result.
+
+    """
     return _resolve_path(repo, path, parent=parent, want_meta=want_meta,
                          deref=False)
 
@@ -757,6 +794,9 @@ def resolve(repo, path, parent=None, want_meta=True):
     resolution, the result will represent the location of the missing
     item, and that item in the result will be None.
 
+    Any attempt to traverse a non-directory will raise a VFS ENOTDIR
+    IOError exception.
+
     Any symlinks along the path, including at the end, will be
     resolved.  A VFS IOError with the errno attribute set to ELOOP
     will be raised if too many symlinks are traversed while following
@@ -765,13 +805,14 @@ def resolve(repo, path, parent=None, want_meta=True):
     describing the location of the failure, which will be a tuple of
     (name, info) elements.
 
-    Currently, a path ending in '/' will still resolve if it exists,
-    even if not a directory.  The parent, if specified, must be a
-    sequence of (name, item) tuples, and will provide the starting
-    point for the resolution of the path.  The result may include
-    elements of parent directly, so they must not be modified later.
-    If this is a concern, pass in "name, copy_item(item) for
-    name, item in parent" instead.
+    The parent, if specified, must be a sequence of (name, item)
+    tuples, and will provide the starting point for the resolution of
+    the path.  If no parent is specified, resolution will start at
+    '/'.
+
+    The result may include elements of parent directly, so they must
+    not be modified later.  If this is a concern, pass in "name,
+    copy_item(item) for name, item in parent" instead.
 
     When want_meta is true, detailed metadata will be included in each
     result item if it's avaiable, otherwise item.meta will be an