From 7884c51f475b5270a6b9b54110144348fe90ad38 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sat, 9 Dec 2017 13:46:13 -0600 Subject: [PATCH] vfs2._resolve_path: improve handling ENOTDIR, absolute paths, etc. Handle and test various path_resolution(7) cases more carefully. e.g. ensure a trailing '/' or '/.' forces resolution, ensure a non-final path element that's not a directory produces ENOTDIR, ... Add tests for the these, for bad symlink handling, and a for number of other cases. Signed-off-by: Rob Browning Tested-by: Rob Browning --- lib/bup/t/tvfs.py | 137 +++++++++++++++++++++++++++++++++++++++++----- lib/bup/vfs2.py | 125 ++++++++++++++++++++++++++++-------------- 2 files changed, 207 insertions(+), 55 deletions(-) diff --git a/lib/bup/t/tvfs.py b/lib/bup/t/tvfs.py index 7cbd946..7dbc6e8 100644 --- a/lib/bup/t/tvfs.py +++ b/lib/bup/t/tvfs.py @@ -1,7 +1,7 @@ from __future__ import print_function from collections import namedtuple -from errno import ELOOP +from errno import ELOOP, ENOTDIR from io import BytesIO from os import environ, symlink from stat import S_IFDIR, S_IFREG, S_ISDIR, S_ISREG @@ -214,9 +214,12 @@ def test_resolve(): save_time = 100000 save_time_str = strftime('%Y-%m-%d-%H%M%S', localtime(save_time)) os.mkdir(data_path) + os.mkdir(data_path + '/dir') with open(data_path + '/file', 'w+') as tmpfile: print('canary', file=tmpfile) - symlink('file', data_path + '/symlink') + symlink('file', data_path + '/file-symlink') + symlink('dir', data_path + '/dir-symlink') + symlink('not-there', data_path + '/bad-symlink') ex((bup_path, 'init')) ex((bup_path, 'index', '-v', data_path)) ex((bup_path, 'save', '-d', str(save_time), '-tvvn', 'test', @@ -249,6 +252,20 @@ def test_resolve(): ('.tag', vfs._tags), ('test', test_revlist)]), root_content) + for path in ('//', '/.', '/./', '/..', '/../', + '/test/latest/dir/../../..', + '/test/latest/dir/../../../', + '/test/latest/dir/../../../.', + '/test/latest/dir/../../..//', + '/test//latest/dir/../../..', + '/test/./latest/dir/../../..', + '/test/././latest/dir/../../..', + '/test/.//./latest/dir/../../..', + '/test//.//.//latest/dir/../../..' + '/test//./latest/dir/../../..'): + wvstart('resolve: ' + path) + res = resolve(repo, path) + wvpasseq((('', vfs._root),), res) wvstart('resolve: /.tag') res = resolve(repo, '/.tag') @@ -286,11 +303,15 @@ def test_resolve(): latest_content = frozenset(vfs.contents(repo, latest_item)) expected = frozenset((x.name, vfs.Item(oid=x.oid, meta=x.meta)) for x in (tip_tree[name] - for name in ('.', 'file', - 'symlink'))) + for name in ('.', + 'bad-symlink', + 'dir', + 'dir-symlink', + 'file', + 'file-symlink'))) wvpasseq(expected, latest_content) - wvstart('resolve: /test/latest/foo') + wvstart('resolve: /test/latest/file') res = resolve(repo, '/test/latest/file') wvpasseq(4, len(res)) expected_file_item_w_meta = vfs.Item(meta=tip_tree['file'].meta, @@ -301,8 +322,29 @@ def test_resolve(): ('file', expected_file_item_w_meta)) wvpasseq(expected, res) - wvstart('resolve: /test/latest/symlink') - res = resolve(repo, '/test/latest/symlink') + wvstart('resolve: /test/latest/bad-symlink') + res = resolve(repo, '/test/latest/bad-symlink') + wvpasseq(4, len(res)) + expected = (('', vfs._root), + ('test', test_revlist), + ('latest', expected_latest_item_w_meta), + ('not-there', None)) + wvpasseq(expected, res) + + wvstart('lresolve: /test/latest/bad-symlink') + res = lresolve(repo, '/test/latest/bad-symlink') + wvpasseq(4, len(res)) + bad_symlink_value = tip_tree['bad-symlink'] + expected_bad_symlink_item_w_meta = vfs.Item(meta=bad_symlink_value.meta, + oid=bad_symlink_value.oid) + expected = (('', vfs._root), + ('test', test_revlist), + ('latest', expected_latest_item_w_meta), + ('bad-symlink', expected_bad_symlink_item_w_meta)) + wvpasseq(expected, res) + + wvstart('resolve: /test/latest/file-symlink') + res = resolve(repo, '/test/latest/file-symlink') wvpasseq(4, len(res)) expected = (('', vfs._root), ('test', test_revlist), @@ -310,16 +352,16 @@ def test_resolve(): ('file', expected_file_item_w_meta)) wvpasseq(expected, res) - wvstart('lresolve: /test/latest/symlink') - res = lresolve(repo, '/test/latest/symlink') + wvstart('lresolve: /test/latest/file-symlink') + res = lresolve(repo, '/test/latest/file-symlink') wvpasseq(4, len(res)) - symlink_value = tip_tree['symlink'] - expected_symlink_item_w_meta = vfs.Item(meta=symlink_value.meta, - oid=symlink_value.oid) + file_symlink_value = tip_tree['file-symlink'] + expected_file_symlink_item_w_meta = vfs.Item(meta=file_symlink_value.meta, + oid=file_symlink_value.oid) expected = (('', vfs._root), ('test', test_revlist), ('latest', expected_latest_item_w_meta), - ('symlink', expected_symlink_item_w_meta)) + ('file-symlink', expected_file_symlink_item_w_meta)) wvpasseq(expected, res) wvstart('resolve: /test/latest/missing') @@ -329,6 +371,75 @@ def test_resolve(): wvpasseq('missing', name) wvpass(item is None) + for path in ('/test/latest/file/', + '/test/latest/file/.', + '/test/latest/file/..', + '/test/latest/file/../', + '/test/latest/file/../.', + '/test/latest/file/../..', + '/test/latest/file/foo'): + wvstart('resolve: ' + path) + try: + resolve(repo, path) + except vfs.IOError as res_ex: + wvpasseq(ENOTDIR, res_ex.errno) + wvpasseq(['', 'test', 'latest', 'file'], + [name for name, item in res_ex.terminus]) + + for path in ('/test/latest/file-symlink/', + '/test/latest/file-symlink/.', + '/test/latest/file-symlink/..', + '/test/latest/file-symlink/../', + '/test/latest/file-symlink/../.', + '/test/latest/file-symlink/../..'): + wvstart('lresolve: ' + path) + try: + lresolve(repo, path) + except vfs.IOError as res_ex: + wvpasseq(ENOTDIR, res_ex.errno) + wvpasseq(['', 'test', 'latest', 'file'], + [name for name, item in res_ex.terminus]) + + wvstart('resolve: non-directory parent') + file_res = resolve(repo, '/test/latest/file') + try: + resolve(repo, 'foo', parent=file_res) + except vfs.IOError as res_ex: + wvpasseq(ENOTDIR, res_ex.errno) + wvpasseq(None, res_ex.terminus) + + wvstart('lresolve: /test/latest/dir-symlink') + res = lresolve(repo, '/test/latest/dir-symlink') + wvpasseq(4, len(res)) + dir_symlink_value = tip_tree['dir-symlink'] + expected_dir_symlink_item_w_meta = vfs.Item(meta=dir_symlink_value.meta, + oid=dir_symlink_value.oid) + expected = (('', vfs._root), + ('test', test_revlist), + ('latest', expected_latest_item_w_meta), + ('dir-symlink', expected_dir_symlink_item_w_meta)) + wvpasseq(expected, res) + + dir_value = tip_tree['dir'] + expected_dir_item = vfs.Item(oid=dir_value.oid, + meta=tree_dict(repo, dir_value.oid)['.'].meta) + expected = (('', vfs._root), + ('test', test_revlist), + ('latest', expected_latest_item_w_meta), + ('dir', expected_dir_item)) + for resname, resolver in (('resolve', resolve), + ('lresolve', lresolve)): + for path in ('/test/latest/dir-symlink/', + '/test/latest/dir-symlink/.'): + wvstart(resname + ': ' + path) + res = resolver(repo, path) + wvpasseq(4, len(res)) + wvpasseq(expected, res) + wvstart('resolve: /test/latest/dir-symlink') + res = resolve(repo, path) + wvpasseq(4, len(res)) + wvpasseq(expected, res) + @wvtest def test_resolve_loop(): with no_lingering_errors(): diff --git a/lib/bup/vfs2.py b/lib/bup/vfs2.py index 3ec956b..dfc747a 100644 --- a/lib/bup/vfs2.py +++ b/lib/bup/vfs2.py @@ -193,16 +193,28 @@ class _FileReader(object): _multiple_slashes_rx = re.compile(r'//+') def _decompose_path(path): - """Return a reversed list of path elements, omitting any occurrences - of "." and ignoring any leading or trailing slash.""" + """Return a boolean indicating whether the path is absolute, and a + reversed list of path elements, omitting any occurrences of "." + and ignoring any leading or trailing slash. If the path is + effectively '/' or '.', return an empty list. + + """ path = re.sub(_multiple_slashes_rx, '/', path) + if path == '/': + return True, True, [] + is_absolute = must_be_dir = False if path.startswith('/'): + is_absolute = True path = path[1:] - if path.endswith('/'): - path = path[:-1] - result = [x for x in path.split('/') if x != '.'] - result.reverse() - return result + for suffix in ('/', '/.'): + if path.endswith(suffix): + must_be_dir = True + path = path[:-len(suffix)] + parts = [x for x in path.split('/') if x != '.'] + parts.reverse() + if not parts: + must_be_dir = True # e.g. path was effectively '.' or '/', etc. + return is_absolute, must_be_dir, parts Item = namedtuple('Item', ('meta', 'oid')) @@ -663,6 +675,13 @@ def contents(repo, item, names=None, want_meta=True): yield x def _resolve_path(repo, path, parent=None, want_meta=True, deref=False): + def raise_dir_required_but_not_dir(path, parent, past): + raise IOError(ENOTDIR, + "path %r%s resolves to non-directory %r" + % (path, + ' (relative to %r)' % parent if parent else '', + past), + terminus=past) global _root assert repo assert len(path) @@ -672,23 +691,34 @@ def _resolve_path(repo, path, parent=None, want_meta=True, deref=False): assert type(x[0]) in (bytes, str) assert type(x[1]) in item_types assert parent[0][1] == _root - future = _decompose_path(path) - if path.startswith('/'): - if future == ['']: # path was effectively '/' + if not S_ISDIR(item_mode(parent[-1][1])): + raise IOError(ENOTDIR, + 'path resolution parent %r is not a directory' + % (parent,)) + is_absolute, must_be_dir, future = _decompose_path(path) + if must_be_dir: + deref = True + if not future: # path was effectively '.' or '/' + if is_absolute: return (('', _root),) + if parent: + return tuple(parent) + return [('', _root)] + if is_absolute: past = [('', _root)] else: - if parent: - past = list(parent) - else: - past = [('', _root)] - if not future: # e.g. if path was effectively '.' - return tuple(past) + past = list(parent) if parent else [('', _root)] hops = 0 while True: + if not future: + if must_be_dir and not S_ISDIR(item_mode(past[-1][1])): + raise_dir_required_but_not_dir(path, parent, past) + return tuple(past) segment = future.pop() if segment == '..': + assert len(past) > 0 if len(past) > 1: # .. from / is / + assert S_ISDIR(item_mode(past[-1][1])) past.pop() else: parent_name, parent_item = past[-1] @@ -708,42 +738,49 @@ def _resolve_path(repo, path, parent=None, want_meta=True, deref=False): mode = item_mode(item) if not S_ISLNK(mode): if not S_ISDIR(mode): - assert(not future) past.append((segment, item),) + if future: + raise IOError(ENOTDIR, + 'path %r%s ends internally in non-directory here: %r' + % (path, + ' (relative to %r)' % parent if parent else '', + past), + terminus=past) + if must_be_dir: + raise_dir_required_but_not_dir(path, parent, past) return tuple(past) # It's treeish if want_meta and type(item) in real_tree_types: dir_meta = _find_treeish_oid_metadata(repo, item.oid) if dir_meta: item = item._replace(meta=dir_meta) - if not future: - past.append((segment, item),) - return tuple(past) past.append((segment, item)) - else: # symlink + else: # symlink if not future and not deref: past.append((segment, item),) - return tuple(past) + continue + if hops > 100: + raise IOError(ELOOP, + 'too many symlinks encountered while resolving %r%s' + % (path, ' relative to %r' % parent if parent else ''), + terminus=tuple(past + [(segment, item)])) target = readlink(repo, item) - target_future = _decompose_path(target) - if target.startswith('/'): - future = target_future + is_absolute, _, target_future = _decompose_path(target) + if is_absolute: + if not target_future: # path was effectively '/' + return (('', _root),) past = [('', _root)] - if target_future == ['']: # path was effectively '/' - return tuple(past) + future = target_future else: future.extend(target_future) hops += 1 - if hops > 100: - raise IOError(ELOOP, - 'too many symlinks encountered while resolving %r%s' - % (path, 'relative to %r' % parent if parent else ''), - terminus=tuple(past + [(segment, item)])) def lresolve(repo, path, parent=None, want_meta=True): - """Perform exactly the same function as resolve(), except if the - final path element is a symbolic link, don't follow it, just - return it in the result.""" + """Perform exactly the same function as resolve(), except if the final + path element is a symbolic link, don't follow it, just return it + in the result. + + """ return _resolve_path(repo, path, parent=parent, want_meta=want_meta, deref=False) @@ -757,6 +794,9 @@ def resolve(repo, path, parent=None, want_meta=True): resolution, the result will represent the location of the missing item, and that item in the result will be None. + Any attempt to traverse a non-directory will raise a VFS ENOTDIR + IOError exception. + Any symlinks along the path, including at the end, will be resolved. A VFS IOError with the errno attribute set to ELOOP will be raised if too many symlinks are traversed while following @@ -765,13 +805,14 @@ def resolve(repo, path, parent=None, want_meta=True): describing the location of the failure, which will be a tuple of (name, info) elements. - Currently, a path ending in '/' will still resolve if it exists, - even if not a directory. The parent, if specified, must be a - sequence of (name, item) tuples, and will provide the starting - point for the resolution of the path. The result may include - elements of parent directly, so they must not be modified later. - If this is a concern, pass in "name, copy_item(item) for - name, item in parent" instead. + The parent, if specified, must be a sequence of (name, item) + tuples, and will provide the starting point for the resolution of + the path. If no parent is specified, resolution will start at + '/'. + + The result may include elements of parent directly, so they must + not be modified later. If this is a concern, pass in "name, + copy_item(item) for name, item in parent" instead. When want_meta is true, detailed metadata will be included in each result item if it's avaiable, otherwise item.meta will be an -- 2.39.2