X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=lib%2Fbup%2Fvfs.py;h=09bb1d37e33562bc9ff4cf1e8ed481a9edd9120f;hb=1de81e81a0ba24622dec3ec4c9b6acba3bba3cf2;hp=e3ea16ffae505d95c28a8d61ad7238f98730a94c;hpb=5fc56d8e45065211119c781de0546e83c8270479;p=bup.git diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py index e3ea16f..09bb1d3 100644 --- a/lib/bup/vfs.py +++ b/lib/bup/vfs.py @@ -47,26 +47,59 @@ item.coid. """ from __future__ import absolute_import, print_function +from binascii import hexlify, unhexlify from collections import namedtuple -from errno import EINVAL, ELOOP, ENOENT, ENOTDIR -from itertools import chain, dropwhile, groupby, tee +from errno import EINVAL, ELOOP, ENOTDIR +from itertools import chain, groupby, tee from random import randrange from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG from time import localtime, strftime -import exceptions, re, sys +import re, sys -from bup import client, git, metadata -from bup.compat import range -from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode +from bup import git, vint +from bup.compat import hexstr, range, str_type +from bup.git import BUP_CHUNKED, parse_commit, tree_decode from bup.helpers import debug2, last +from bup.io import path_msg from bup.metadata import Metadata +from bup.vint import read_bvec, write_bvec +from bup.vint import read_vint, write_vint +from bup.vint import read_vuint, write_vuint +if sys.version_info[0] < 3: + from exceptions import IOError as py_IOError +else: + py_IOError = IOError -class IOError(exceptions.IOError): +# We currently assume that it's always appropriate to just forward IOErrors +# to a remote client. + +class IOError(py_IOError): def __init__(self, errno, message, terminus=None): - exceptions.IOError.__init__(self, errno, message) + py_IOError.__init__(self, errno, message) self.terminus = terminus +def write_ioerror(port, ex): + assert isinstance(ex, IOError) + write_vuint(port, + (1 if ex.errno is not None else 0) + | (2 if ex.strerror is not None else 0) + | (4 if ex.terminus is not None else 0)) + if ex.errno is not None: + write_vint(port, ex.errno) + if ex.strerror is not None: + write_bvec(port, ex.strerror.encode('utf-8')) + if ex.terminus is not None: + write_resolution(port, ex.terminus) + +def read_ioerror(port): + mask = read_vuint(port) + no = read_vint(port) if 1 & mask else None + msg = read_bvec(port).decode('utf-8') if 2 & mask else None + term = read_resolution(port) if 4 & mask else None + return IOError(errno=no, message=msg, terminus=term) + + default_file_mode = S_IFREG | 0o644 default_dir_mode = S_IFDIR | 0o755 default_symlink_mode = S_IFLNK | 0o755 @@ -83,13 +116,13 @@ def _default_mode_for_gitmode(gitmode): def _normal_or_chunked_file_size(repo, oid): """Return the size of the normal or chunked file indicated by oid.""" # FIXME: --batch-format CatPipe? - it = repo.cat(oid.encode('hex')) + it = repo.cat(hexlify(oid)) _, obj_t, size = next(it) ofs = 0 - while obj_t == 'tree': - mode, name, last_oid = last(tree_decode(''.join(it))) + while obj_t == b'tree': + mode, name, last_oid = last(tree_decode(b''.join(it))) ofs += int(name, 16) - it = repo.cat(last_oid.encode('hex')) + it = repo.cat(hexlify(last_oid)) _, obj_t, size = next(it) return ofs + sum(len(b) for b in it) @@ -116,23 +149,23 @@ def _tree_chunks(repo, tree, startofs): skipmore = startofs - ofs if skipmore < 0: skipmore = 0 - it = repo.cat(oid.encode('hex')) + it = repo.cat(hexlify(oid)) _, obj_t, size = next(it) - data = ''.join(it) + data = b''.join(it) if S_ISDIR(mode): - assert obj_t == 'tree' + assert obj_t == b'tree' for b in _tree_chunks(repo, tree_decode(data), skipmore): yield b else: - assert obj_t == 'blob' + assert obj_t == b'blob' yield data[skipmore:] class _ChunkReader: def __init__(self, repo, oid, startofs): - it = repo.cat(oid.encode('hex')) + it = repo.cat(hexlify(oid)) _, obj_t, size = next(it) - isdir = obj_t == 'tree' - data = ''.join(it) + isdir = obj_t == b'tree' + data = b''.join(it) if isdir: self.it = _tree_chunks(repo, tree_decode(data), startofs) self.blob = None @@ -142,11 +175,11 @@ class _ChunkReader: self.ofs = startofs def next(self, size): - out = '' + out = b'' while len(out) < size: if self.it and not self.blob: try: - self.blob = self.it.next() + self.blob = next(self.it) except StopIteration: self.it = None if self.blob: @@ -172,7 +205,7 @@ class _FileReader(object): if not self._size: self._size = _normal_or_chunked_file_size(self._repo, self.oid) return self._size - + def seek(self, ofs): if ofs < 0 or ofs > self._compute_size(): raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs) @@ -184,7 +217,7 @@ class _FileReader(object): def read(self, count=-1): size = self._compute_size() if self.ofs >= size: - return '' + return b'' if count < 0: count = size - self.ofs if not self.reader or self.reader.ofs != self.ofs: @@ -206,7 +239,7 @@ class _FileReader(object): self.close() return False -_multiple_slashes_rx = re.compile(r'//+') +_multiple_slashes_rx = re.compile(br'//+') def _decompose_path(path): """Return a boolean indicating whether the path is absolute, and a @@ -215,23 +248,23 @@ def _decompose_path(path): effectively '/' or '.', return an empty list. """ - path = re.sub(_multiple_slashes_rx, '/', path) - if path == '/': + path = re.sub(_multiple_slashes_rx, b'/', path) + if path == b'/': return True, True, [] is_absolute = must_be_dir = False - if path.startswith('/'): + if path.startswith(b'/'): is_absolute = True path = path[1:] - for suffix in ('/', '/.'): + for suffix in (b'/', b'/.'): if path.endswith(suffix): must_be_dir = True path = path[:-len(suffix)] - parts = [x for x in path.split('/') if x != '.'] + parts = [x for x in path.split(b'/') if x != b'.'] parts.reverse() if not parts: must_be_dir = True # e.g. path was effectively '.' or '/', etc. return is_absolute, must_be_dir, parts - + Item = namedtuple('Item', ('meta', 'oid')) Chunky = namedtuple('Chunky', ('meta', 'oid')) @@ -241,8 +274,95 @@ Tags = namedtuple('Tags', ('meta')) RevList = namedtuple('RevList', ('meta', 'oid')) Commit = namedtuple('Commit', ('meta', 'oid', 'coid')) -item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit)) -real_tree_types = frozenset((Item, Commit)) +item_types = (Item, Chunky, Root, Tags, RevList, Commit) +real_tree_types = (Item, Commit) + +def write_item(port, item): + kind = type(item) + name = bytes(kind.__name__.encode('ascii')) + meta = item.meta + has_meta = 1 if isinstance(meta, Metadata) else 0 + if kind in (Item, Chunky, RevList): + assert len(item.oid) == 20 + if has_meta: + vint.send(port, 'sVs', name, has_meta, item.oid) + Metadata.write(meta, port, include_path=False) + else: + vint.send(port, 'sVsV', name, has_meta, item.oid, item.meta) + elif kind in (Root, Tags): + if has_meta: + vint.send(port, 'sV', name, has_meta) + Metadata.write(meta, port, include_path=False) + else: + vint.send(port, 'sVV', name, has_meta, item.meta) + elif kind == Commit: + assert len(item.oid) == 20 + assert len(item.coid) == 20 + if has_meta: + vint.send(port, 'sVss', name, has_meta, item.oid, item.coid) + Metadata.write(meta, port, include_path=False) + else: + vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid, + item.meta) + elif kind == FakeLink: + if has_meta: + vint.send(port, 'sVs', name, has_meta, item.target) + Metadata.write(meta, port, include_path=False) + else: + vint.send(port, 'sVsV', name, has_meta, item.target, item.meta) + else: + assert False + +def read_item(port): + def read_m(port, has_meta): + if has_meta: + m = Metadata.read(port) + return m + return read_vuint(port) + kind, has_meta = vint.recv(port, 'sV') + if kind == b'Item': + oid, meta = read_bvec(port), read_m(port, has_meta) + return Item(oid=oid, meta=meta) + if kind == b'Chunky': + oid, meta = read_bvec(port), read_m(port, has_meta) + return Chunky(oid=oid, meta=meta) + if kind == b'RevList': + oid, meta = read_bvec(port), read_m(port, has_meta) + return RevList(oid=oid, meta=meta) + if kind == b'Root': + return Root(meta=read_m(port, has_meta)) + if kind == b'Tags': + return Tags(meta=read_m(port, has_meta)) + if kind == b'Commit': + oid, coid = vint.recv(port, 'ss') + meta = read_m(port, has_meta) + return Commit(oid=oid, coid=coid, meta=meta) + if kind == b'FakeLink': + target, meta = read_bvec(port), read_m(port, has_meta) + return FakeLink(target=target, meta=meta) + assert False + +def write_resolution(port, resolution): + write_vuint(port, len(resolution)) + for name, item in resolution: + write_bvec(port, name) + if item: + port.write(b'\x01') + write_item(port, item) + else: + port.write(b'\x00') + +def read_resolution(port): + n = read_vuint(port) + result = [] + for i in range(n): + name = read_bvec(port) + have_item = ord(port.read(1)) + assert have_item in (0, 1) + item = read_item(port) if have_item else None + result.append((name, item)) + return tuple(result) + _root = Root(meta=default_dir_mode) _tags = Tags(meta=default_dir_mode) @@ -272,12 +392,11 @@ def is_valid_cache_key(x): rvl:OID -> {'.', commit, '2012...', next_commit, ...} """ # Suspect we may eventually add "(container_oid, name) -> ...", and others. - x_t = type(x) - if x_t is bytes: + if isinstance(x, bytes): tag = x[:4] - if tag in ('itm:', 'rvl:') and len(x) == 24: + if tag in (b'itm:', b'rvl:') and len(x) == 24: return True - if tag == 'res:': + if tag == b'res:': return True def cache_get(key): @@ -286,11 +405,13 @@ def cache_get(key): raise Exception('invalid cache key: ' + repr(key)) return _cache.get(key) -def cache_notice(key, value): +def cache_notice(key, value, overwrite=False): global _cache, _cache_keys, _cache_max_items if not is_valid_cache_key(key): raise Exception('invalid cache key: ' + repr(key)) if key in _cache: + if overwrite: + _cache[key] = value return if len(_cache) < _cache_max_items: _cache_keys.append(key) @@ -302,6 +423,13 @@ def cache_notice(key, value): _cache_keys[victim_i] = key _cache[key] = value +def _has_metadata_if_needed(item, need_meta): + if not need_meta: + return True + if isinstance(item.meta, Metadata): + return True + return False + def cache_get_commit_item(oid, need_meta=True): """Return the requested tree item if it can be found in the cache. When need_meta is true don't return a cached item that only has a @@ -310,18 +438,14 @@ def cache_get_commit_item(oid, need_meta=True): commit_key = b'itm:' + oid item = cache_get(commit_key) if item: - if not need_meta: - return item - if isinstance(item.meta, Metadata): + if _has_metadata_if_needed(item, need_meta): return item entries = cache_get(b'rvl:' + oid) if entries: - return entries['.'] - -def cache_get_revlist_item(oid, need_meta=True): - commit = cache_get_commit_item(oid, need_meta=need_meta) - if commit: - return RevList(oid=oid, meta=commit.meta) + item = entries[b'.'] + if _has_metadata_if_needed(item, need_meta): + return item + return None def copy_item(item): """Return a completely independent copy of item, such that @@ -355,23 +479,23 @@ def tree_data_and_bupm(repo, oid): """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the tree has no metadata (i.e. older bup save, or non-bup tree). - """ + """ assert len(oid) == 20 - it = repo.cat(oid.encode('hex')) + it = repo.cat(hexlify(oid)) _, item_t, size = next(it) - data = ''.join(it) - if item_t == 'commit': + data = b''.join(it) + if item_t == b'commit': commit = parse_commit(data) it = repo.cat(commit.tree) _, item_t, size = next(it) - data = ''.join(it) - assert item_t == 'tree' - elif item_t != 'tree': - raise Exception('%r is not a tree or commit' % oid.encode('hex')) + data = b''.join(it) + assert item_t == b'tree' + elif item_t != b'tree': + raise Exception('%s is not a tree or commit' % hexstr(oid)) for _, mangled_name, sub_oid in tree_decode(data): - if mangled_name == '.bupm': + if mangled_name == b'.bupm': return data, sub_oid - if mangled_name > '.bupm': + if mangled_name > b'.bupm': break return data, None @@ -387,19 +511,19 @@ def _find_treeish_oid_metadata(repo, oid): return None def _readlink(repo, oid): - return ''.join(repo.join(oid.encode('hex'))) + return b''.join(repo.join(hexlify(oid))) def readlink(repo, item): """Return the link target of item, which must be a symlink. Reads the target from the repository if necessary.""" assert repo assert S_ISLNK(item_mode(item)) + if isinstance(item, FakeLink): + return item.target if isinstance(item.meta, Metadata): target = item.meta.symlink_target if target: return target - elif isinstance(item, FakeLink): - return item.target return _readlink(repo, item.oid) def _compute_item_size(repo, item): @@ -408,6 +532,8 @@ def _compute_item_size(repo, item): size = _normal_or_chunked_file_size(repo, item.oid) return size if S_ISLNK(mode): + if isinstance(item, FakeLink): + return len(item.target) return len(_readlink(repo, item.oid)) return 0 @@ -431,23 +557,23 @@ def fopen(repo, item): def _commit_item_from_data(oid, data): info = parse_commit(data) return Commit(meta=default_dir_mode, - oid=info.tree.decode('hex'), + oid=unhexlify(info.tree), coid=oid) def _commit_item_from_oid(repo, oid, require_meta): commit = cache_get_commit_item(oid, need_meta=require_meta) if commit and ((not require_meta) or isinstance(commit.meta, Metadata)): return commit - it = repo.cat(oid.encode('hex')) + it = repo.cat(hexlify(oid)) _, typ, size = next(it) - assert typ == 'commit' - commit = _commit_item_from_data(oid, ''.join(it)) + assert typ == b'commit' + commit = _commit_item_from_data(oid, b''.join(it)) if require_meta: meta = _find_treeish_oid_metadata(repo, commit.oid) if meta: commit = commit._replace(meta=meta) commit_key = b'itm:' + oid - cache_notice(commit_key, commit) + cache_notice(commit_key, commit, overwrite=True) return commit def _revlist_item_from_oid(repo, oid, require_meta): @@ -464,31 +590,31 @@ def root_items(repo, names=None, want_meta=True): global _root, _tags if not names: - yield '.', _root - yield '.tag', _tags + yield b'.', _root + yield b'.tag', _tags # FIXME: maybe eventually support repo.clone() or something # and pass in two repos, so we can drop the tuple() and stream # in parallel (i.e. meta vs refs). for name, oid in tuple(repo.refs([], limit_to_heads=True)): - assert(name.startswith('refs/heads/')) + assert(name.startswith(b'refs/heads/')) yield name[11:], _revlist_item_from_oid(repo, oid, want_meta) return - if '.' in names: - yield '.', _root - if '.tag' in names: - yield '.tag', _tags + if b'.' in names: + yield b'.', _root + if b'.tag' in names: + yield b'.tag', _tags for ref in names: - if ref in ('.', '.tag'): + if ref in (b'.', b'.tag'): continue - it = repo.cat('refs/heads/' + ref) + it = repo.cat(b'refs/heads/' + ref) oidx, typ, size = next(it) if not oidx: for _ in it: pass continue - assert typ == 'commit' - commit = parse_commit(''.join(it)) - yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta) + assert typ == b'commit' + commit = parse_commit(b''.join(it)) + yield ref, _revlist_item_from_oid(repo, unhexlify(oidx), want_meta) def ordered_tree_entries(tree_data, bupm=None): """Yields (name, mangled_name, kind, gitmode, oid) for each item in @@ -496,10 +622,12 @@ def ordered_tree_entries(tree_data, bupm=None): """ # Sadly, the .bupm entries currently aren't in git tree order, - # i.e. they don't account for the fact that git sorts trees - # (including our chunked trees) as if their names ended with "/", - # so "fo" sorts after "fo." iff fo is a directory. This makes - # streaming impossible when we need the metadata. + # but in unmangled name order. They _do_ account for the fact + # that git sorts trees (including chunked trees) as if their + # names ended with "/" (so "fo" sorts after "fo." iff fo is a + # directory), but we apply this on the unmangled names in save + # rather than on the mangled names. + # This makes streaming impossible when we need the metadata. def result_from_tree_entry(tree_entry): gitmode, mangled_name, oid = tree_entry name, kind = git.demangle_name(mangled_name, gitmode) @@ -510,7 +638,7 @@ def ordered_tree_entries(tree_data, bupm=None): tree_ents = sorted(tree_ents, key=lambda x: x[0]) for ent in tree_ents: yield ent - + def tree_items(oid, tree_data, names=frozenset(), bupm=None): def tree_item(ent_oid, kind, gitmode): @@ -522,43 +650,46 @@ def tree_items(oid, tree_data, names=frozenset(), bupm=None): # No metadata here (accessable via '.' inside ent_oid). return Item(meta=default_dir_mode, oid=ent_oid) - return Item(oid=ent_oid, - meta=(Metadata.read(bupm) if bupm \ - else _default_mode_for_gitmode(gitmode))) + meta = Metadata.read(bupm) if bupm else None + # handle the case of metadata being empty/missing in bupm + # (or there not being bupm at all) + if meta is None: + meta = _default_mode_for_gitmode(gitmode) + return Item(oid=ent_oid, meta=meta) assert len(oid) == 20 if not names: dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode - yield '.', Item(oid=oid, meta=dot_meta) + yield b'.', Item(oid=oid, meta=dot_meta) tree_entries = ordered_tree_entries(tree_data, bupm) for name, mangled_name, kind, gitmode, ent_oid in tree_entries: - if mangled_name == '.bupm': + if mangled_name == b'.bupm': continue - assert name != '.' + assert name != b'.' yield name, tree_item(ent_oid, kind, gitmode) return # Assumes the tree is properly formed, i.e. there are no # duplicates, and entries will be in git tree order. - if type(names) not in (frozenset, set): + if isinstance(names, (frozenset, set)): names = frozenset(names) remaining = len(names) # Account for the bupm sort order issue (cf. ordered_tree_entries above) - last_name = max(names) if bupm else max(names) + '/' + last_name = max(names) if bupm else max(names) + b'/' - if '.' in names: + if b'.' in names: dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode - yield '.', Item(oid=oid, meta=dot_meta) + yield b'.', Item(oid=oid, meta=dot_meta) if remaining == 1: return remaining -= 1 tree_entries = ordered_tree_entries(tree_data, bupm) for name, mangled_name, kind, gitmode, ent_oid in tree_entries: - if mangled_name == '.bupm': + if mangled_name == b'.bupm': continue - assert name != '.' + assert name != b'.' if name not in names: if name > last_name: break # given bupm sort order, we're finished @@ -577,16 +708,16 @@ def tree_items_with_meta(repo, oid, tree_data, names): assert len(oid) == 20 bupm = None for _, mangled_name, sub_oid in tree_decode(tree_data): - if mangled_name == '.bupm': + if mangled_name == b'.bupm': bupm = _FileReader(repo, sub_oid) break - if mangled_name > '.bupm': + if mangled_name > b'.bupm': break for item in tree_items(oid, tree_data, names, bupm): yield item -_save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$') - +_save_name_rx = re.compile(br'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$') + def _reverse_suffix_duplicates(strs): """Yields the elements of strs, with any runs of duplicate values suffixed with -N suffixes, where the zero padded integer N @@ -599,7 +730,7 @@ def _reverse_suffix_duplicates(strs): yield name else: ndig = len(str(ndup - 1)) - fmt = '%s-' + '%0' + str(ndig) + 'd' + fmt = b'%s-' + b'%0' + (b'%d' % ndig) + b'd' for i in range(ndup - 1, -1, -1): yield fmt % (name, i) @@ -607,15 +738,15 @@ def parse_rev(f): items = f.readline().split(None) assert len(items) == 2 tree, auth_sec = items - return tree.decode('hex'), int(auth_sec) + return unhexlify(tree), int(auth_sec) def _name_for_rev(rev): commit_oidx, (tree_oid, utc) = rev - return strftime('%Y-%m-%d-%H%M%S', localtime(utc)) + return strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii') def _item_for_rev(rev): commit_oidx, (tree_oid, utc) = rev - coid = commit_oidx.decode('hex') + coid = unhexlify(commit_oidx) item = cache_get_commit_item(coid, need_meta=False) if item: return item @@ -624,15 +755,17 @@ def _item_for_rev(rev): cache_notice(commit_key, item) return item -def cache_commit(repo, oid): +# non-string singleton +_HAS_META_ENTRY = object() + +def cache_commit(repo, oid, require_meta=True): """Build, cache, and return a "name -> commit_item" dict of the entire commit rev-list. """ - # For now, always cache with full metadata entries = {} - entries['.'] = _revlist_item_from_oid(repo, oid, True) - revs = repo.rev_list((oid.encode('hex'),), format='%T %at', + entries[b'.'] = _revlist_item_from_oid(repo, oid, require_meta) + revs = repo.rev_list((hexlify(oid),), format=b'%T %at', parse=parse_rev) rev_items, rev_names = tee(revs) revs = None # Don't disturb the tees @@ -643,38 +776,43 @@ def cache_commit(repo, oid): name = next(rev_names) tip = tip or (name, item) entries[name] = item - entries['latest'] = FakeLink(meta=default_symlink_mode, target=tip[0]) + entries[b'latest'] = FakeLink(meta=default_symlink_mode, target=tip[0]) revlist_key = b'rvl:' + tip[1].coid - cache_notice(revlist_key, entries) + entries[_HAS_META_ENTRY] = require_meta + cache_notice(revlist_key, entries, overwrite=True) return entries -def revlist_items(repo, oid, names): +def revlist_items(repo, oid, names, require_meta=True): assert len(oid) == 20 # Special case '.' instead of caching the whole history since it's # the only way to get the metadata for the commit. - if names and all(x == '.' for x in names): - yield '.', _revlist_item_from_oid(repo, oid, True) + if names and all(x == b'.' for x in names): + yield b'.', _revlist_item_from_oid(repo, oid, require_meta) return # For now, don't worry about the possibility of the contents being # "too big" for the cache. revlist_key = b'rvl:' + oid entries = cache_get(revlist_key) + if entries and require_meta and not entries[_HAS_META_ENTRY]: + entries = None if not entries: - entries = cache_commit(repo, oid) + entries = cache_commit(repo, oid, require_meta) if not names: - for name in sorted(entries.keys()): + for name in sorted((n for n in entries.keys() if n != _HAS_META_ENTRY)): yield name, entries[name] return names = frozenset(name for name in names - if _save_name_rx.match(name) or name in ('.', 'latest')) + if _save_name_rx.match(name) or name in (b'.', b'latest')) - if '.' in names: - yield '.', entries['.'] - for name in (n for n in names if n != '.'): + if b'.' in names: + yield b'.', entries[b'.'] + for name in (n for n in names if n != b'.'): + if name == _HAS_META_ENTRY: + continue commit = entries.get(name) if commit: yield name, commit @@ -684,41 +822,42 @@ def tags_items(repo, names): def tag_item(oid): assert len(oid) == 20 - oidx = oid.encode('hex') + oidx = hexlify(oid) it = repo.cat(oidx) _, typ, size = next(it) - if typ == 'commit': + if typ == b'commit': return cache_get_commit_item(oid, need_meta=False) \ - or _commit_item_from_data(oid, ''.join(it)) + or _commit_item_from_data(oid, b''.join(it)) for _ in it: pass - if typ == 'blob': + if typ == b'blob': return Item(meta=default_file_mode, oid=oid) - elif typ == 'tree': + elif typ == b'tree': return Item(meta=default_dir_mode, oid=oid) - raise Exception('unexpected tag type ' + typ + ' for tag ' + name) + raise Exception('unexpected tag type ' + typ.decode('ascii') + + ' for tag ' + path_msg(name)) if not names: - yield '.', _tags + yield b'.', _tags # We have to pull these all into ram because tag_item calls cat() for name, oid in tuple(repo.refs(names, limit_to_tags=True)): - assert(name.startswith('refs/tags/')) + assert(name.startswith(b'refs/tags/')) name = name[10:] yield name, tag_item(oid) return # Assumes no duplicate refs - if type(names) not in (frozenset, set): + if isinstance(names, (frozenset, set)): names = frozenset(names) remaining = len(names) last_name = max(names) - if '.' in names: - yield '.', _tags + if b'.' in names: + yield b'.', _tags if remaining == 1: return remaining -= 1 for name, oid in repo.refs(names, limit_to_tags=True): - assert(name.startswith('refs/tags/')) + assert(name.startswith(b'refs/tags/')) name = name[10:] if name > last_name: return @@ -754,25 +893,25 @@ def contents(repo, item, names=None, want_meta=True): global _root, _tags assert repo assert S_ISDIR(item_mode(item)) - item_t = type(item) - if item_t in real_tree_types: - it = repo.cat(item.oid.encode('hex')) + if isinstance(item, real_tree_types): + it = repo.cat(hexlify(item.oid)) _, obj_t, size = next(it) - data = ''.join(it) - if obj_t != 'tree': + data = b''.join(it) + if obj_t != b'tree': for _ in it: pass # Note: it shouldn't be possible to see an Item with type # 'commit' since a 'commit' should always produce a Commit. - raise Exception('unexpected git ' + obj_t) + raise Exception('unexpected git ' + obj_t.decode('ascii')) if want_meta: item_gen = tree_items_with_meta(repo, item.oid, data, names) else: item_gen = tree_items(item.oid, data, names) - elif item_t == RevList: - item_gen = revlist_items(repo, item.oid, names) - elif item_t == Root: + elif isinstance(item, RevList): + item_gen = revlist_items(repo, item.oid, names, + require_meta=want_meta) + elif isinstance(item, Root): item_gen = root_items(repo, names, want_meta) - elif item_t == Tags: + elif isinstance(item, Tags): item_gen = tags_items(repo, names) else: raise Exception('unexpected VFS item ' + str(item)) @@ -782,8 +921,8 @@ def contents(repo, item, names=None, want_meta=True): def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): cache_key = b'res:%d%d%d:%s\0%s' \ % (bool(want_meta), bool(follow), repo.id(), - ('/'.join(x[0] for x in parent) if parent else ''), - '/'.join(path)) + (b'/'.join(x[0] for x in parent) if parent else b''), + path) resolution = cache_get(cache_key) if resolution: return resolution @@ -794,7 +933,7 @@ def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): def raise_dir_required_but_not_dir(path, parent, past): raise IOError(ENOTDIR, - "path %r%s resolves to non-directory %r" + "path %s%s resolves to non-directory %r" % (path, ' (relative to %r)' % parent if parent else '', past), @@ -805,8 +944,8 @@ def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): if parent: for x in parent: assert len(x) == 2 - assert type(x[0]) in (bytes, str) - assert type(x[1]) in item_types + assert isinstance(x[0], (bytes, str_type)) + assert isinstance(x[1], item_types) assert parent[0][1] == _root if not S_ISDIR(item_mode(parent[-1][1])): raise IOError(ENOTDIR, @@ -817,14 +956,14 @@ def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): follow = True if not future: # path was effectively '.' or '/' if is_absolute: - return notice_resolution((('', _root),)) + return notice_resolution(((b'', _root),)) if parent: return notice_resolution(tuple(parent)) - return notice_resolution((('', _root),)) + return notice_resolution(((b'', _root),)) if is_absolute: - past = [('', _root)] + past = [(b'', _root)] else: - past = list(parent) if parent else [('', _root)] + past = list(parent) if parent else [(b'', _root)] hops = 0 while True: if not future: @@ -832,14 +971,14 @@ def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): raise_dir_required_but_not_dir(path, parent, past) return notice_resolution(tuple(past)) segment = future.pop() - if segment == '..': + if segment == b'..': assert len(past) > 0 if len(past) > 1: # .. from / is / assert S_ISDIR(item_mode(past[-1][1])) past.pop() else: parent_name, parent_item = past[-1] - wanted = (segment,) if not want_meta else ('.', segment) + wanted = (segment,) if not want_meta else (b'.', segment) items = tuple(contents(repo, parent_item, names=wanted, want_meta=want_meta)) if not want_meta: @@ -847,7 +986,7 @@ def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): else: # First item will be '.' and have the metadata item = items[1][1] if len(items) == 2 else None dot, dot_item = items[0] - assert dot == '.' + assert dot == b'.' past[-1] = parent_name, parent_item if not item: past.append((segment, None),) @@ -867,7 +1006,7 @@ def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): raise_dir_required_but_not_dir(path, parent, past) return notice_resolution(tuple(past)) # It's treeish - if want_meta and type(item) in real_tree_types: + if want_meta and isinstance(item, real_tree_types): dir_meta = _find_treeish_oid_metadata(repo, item.oid) if dir_meta: item = item._replace(meta=dir_meta) @@ -885,13 +1024,13 @@ def _resolve_path(repo, path, parent=None, want_meta=True, follow=True): is_absolute, _, target_future = _decompose_path(target) if is_absolute: if not target_future: # path was effectively '/' - return notice_resolution((('', _root),)) - past = [('', _root)] + return notice_resolution(((b'', _root),)) + past = [(b'', _root)] future = target_future else: future.extend(target_future) hops += 1 - + def resolve(repo, path, parent=None, want_meta=True, follow=True): """Follow the path in the virtual filesystem and return a tuple representing the location, if any, denoted by the path. Each @@ -940,6 +1079,10 @@ def resolve(repo, path, parent=None, want_meta=True, follow=True): needed, make a copy via item.meta.copy() and modify that instead. """ + if repo.is_remote(): + # Redirect to the more efficient remote version + return repo.resolve(path, parent=parent, want_meta=want_meta, + follow=follow) result = _resolve_path(repo, path, parent=parent, want_meta=want_meta, follow=follow) _, leaf_item = result[-1] @@ -985,7 +1128,8 @@ def augment_item_meta(repo, item, include_size=False): # m is mode meta = Metadata() meta.mode = m - meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0 + meta.uid = meta.gid = None + meta.atime = meta.mtime = meta.ctime = 0 if S_ISLNK(m): if isinstance(item, FakeLink): target = item.target @@ -1005,9 +1149,9 @@ def fill_in_metadata_if_dir(repo, item): """ if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata): - items = tuple(contents(repo, item, ('.',), want_meta=True)) + items = tuple(contents(repo, item, (b'.',), want_meta=True)) assert len(items) == 1 - assert items[0][0] == '.' + assert items[0][0] == b'.' item = items[0][1] return item