X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=lib%2Fbup%2Fvfs.py;h=99ca85d39426a0de4e35617625872d9b804f9491;hb=aeb8ce7d7974a4e6f472ae546d3baab90e1e8aa1;hp=454355337c4b06bde89f231657be66647a7fea14;hpb=f3e3307ebee6b88b118a447b0f999232eb948696;p=bup.git

diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py
index 4543553..99ca85d 100644
--- a/lib/bup/vfs.py
+++ b/lib/bup/vfs.py
@@ -1,110 +1,145 @@
-"""Virtual File System representing bup's repository contents.
+"""Virtual File System interface to bup repository content.
+
+This module provides a path-based interface to the content of a bup
+repository.
+
+The VFS is structured like this:
+
+  /SAVE-NAME/latest/...
+  /SAVE-NAME/SAVE-DATE/...
+  /.tag/TAG-NAME/...
+
+Each path is represented by an item that has least an item.meta which
+may be either a Metadata object, or an integer mode.  Functions like
+item_mode() and item_size() will return the mode and size in either
+case.  Any item.meta Metadata instances must not be modified directly.
+Make a copy to modify via item.meta.copy() if needed, or call
+copy_item().
+
+The want_meta argument is advisory for calls that accept it, and it
+may not be honored.  Callers must be able to handle an item.meta value
+that is either an instance of Metadata or an integer mode, perhaps
+via item_mode() or augment_item_meta().
+
+Setting want_meta=False is rarely desirable since it can limit the VFS
+to only the metadata that git itself can represent, and so for
+example, fifos and sockets will appear to be regular files
+(e.g. S_ISREG(item_mode(item)) will be true).  But the option is still
+provided because it may be more efficient when just the path names or
+the more limited metadata is sufficient.
+
+Any given metadata object's size may be None, in which case the size
+can be computed via item_size() or augment_item_meta(...,
+include_size=True).
+
+When traversing a directory using functions like contents(), the meta
+value for any directories other than '.' will be a default directory
+mode, not a Metadata object.  This is because the actual metadata for
+a directory is stored inside the directory (see
+fill_in_metadata_if_dir() or ensure_item_has_metadata()).
+
+Commit items represent commits (e.g. /.tag/some-commit or
+/foo/latest), and for most purposes, they appear as the underlying
+tree.  S_ISDIR(item_mode(item)) will return true for both tree Items
+and Commits and the commit's oid is the tree hash; the commit hash is
+item.coid.
 
-The vfs.py library makes it possible to expose contents from bup's repository
-and abstracts internal name mangling and storage from the exposition layer.
 """
-import os, re, stat, time
-from bup import git
-from helpers import *
-from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
 
-EMPTY_SHA='\0'*20
-
-_cp = None
-def cp():
-    """Create a git.CatPipe object or reuse the already existing one."""
-    global _cp
-    if not _cp:
-        _cp = git.CatPipe()
-    return _cp
-
-class NodeError(Exception):
-    """VFS base exception."""
-    pass
-
-class NoSuchFile(NodeError):
-    """Request of a file that does not exist."""
-    pass
-
-class NotDir(NodeError):
-    """Attempt to do a directory action on a file that is not one."""
-    pass
-
-class NotFile(NodeError):
-    """Access to a node that does not represent a file."""
-    pass
-
-class TooManySymlinks(NodeError):
-    """Symlink dereferencing level is too deep."""
-    pass
-
-
-def _treeget(hash):
-    it = cp().get(hash.encode('hex'))
-    type = it.next()
-    assert(type == 'tree')
-    return git.tree_decode(''.join(it))
-
-
-def _tree_decode(hash):
-    tree = [(int(name,16),stat.S_ISDIR(mode),sha)
-            for (mode,name,sha)
-            in _treeget(hash)]
-    assert(tree == list(sorted(tree)))
-    return tree
-
-
-def _chunk_len(hash):
-    return sum(len(b) for b in cp().join(hash.encode('hex')))
-
-
-def _last_chunk_info(hash):
-    tree = _tree_decode(hash)
-    assert(tree)
-    (ofs,isdir,sha) = tree[-1]
-    if isdir:
-        (subofs, sublen) = _last_chunk_info(sha)
-        return (ofs+subofs, sublen)
-    else:
-        return (ofs, _chunk_len(sha))
-
-
-def _total_size(hash):
-    (lastofs, lastsize) = _last_chunk_info(hash)
-    return lastofs + lastsize
-
-
-def _chunkiter(hash, startofs):
+from __future__ import absolute_import, print_function
+from collections import namedtuple
+from errno import EINVAL, ELOOP, ENOENT, ENOTDIR
+from itertools import chain, dropwhile, groupby, tee
+from random import randrange
+from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
+from time import localtime, strftime
+import exceptions, re, sys
+
+from bup import client, git, metadata
+from bup.compat import range
+from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode
+from bup.helpers import debug2, last
+from bup.metadata import Metadata
+from bup.repo import LocalRepo, RemoteRepo
+
+
+class IOError(exceptions.IOError):
+    def __init__(self, errno, message, terminus=None):
+        exceptions.IOError.__init__(self, errno, message)
+        self.terminus = terminus
+
+default_file_mode = S_IFREG | 0o644
+default_dir_mode = S_IFDIR | 0o755
+default_symlink_mode = S_IFLNK | 0o755
+
+def _default_mode_for_gitmode(gitmode):
+    if S_ISREG(gitmode):
+        return default_file_mode
+    if S_ISDIR(gitmode):
+        return default_dir_mode
+    if S_ISLNK(gitmode):
+        return default_symlink_mode
+    raise Exception('unexpected git mode ' + oct(gitmode))
+
+def _normal_or_chunked_file_size(repo, oid):
+    """Return the size of the normal or chunked file indicated by oid."""
+    # FIXME: --batch-format CatPipe?
+    it = repo.cat(oid.encode('hex'))
+    _, obj_t, size = next(it)
+    ofs = 0
+    while obj_t == 'tree':
+        mode, name, last_oid = last(tree_decode(''.join(it)))
+        ofs += int(name, 16)
+        it = repo.cat(last_oid.encode('hex'))
+        _, obj_t, size = next(it)
+    return ofs + sum(len(b) for b in it)
+
+def _skip_chunks_before_offset(tree, offset):
+    prev_ent = next(tree, None)
+    if not prev_ent:
+        return tree
+    ent = None
+    for ent in tree:
+        ent_ofs = int(ent[1], 16)
+        if ent_ofs > offset:
+            return chain([prev_ent, ent], tree)
+        if ent_ofs == offset:
+            return chain([ent], tree)
+        prev_ent = ent
+    return [prev_ent]
+
+def _tree_chunks(repo, tree, startofs):
+    "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
     assert(startofs >= 0)
-    tree = _tree_decode(hash)
-
-    # skip elements before startofs
-    for i in xrange(len(tree)):
-        if i+1 >= len(tree) or tree[i+1][0] > startofs:
-            break
-    first = i
-
-    # iterate through what's left
-    for i in xrange(first, len(tree)):
-        (ofs,isdir,sha) = tree[i]
-        skipmore = startofs-ofs
+    # name is the chunk's hex offset in the original file
+    for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
+        ofs = int(name, 16)
+        skipmore = startofs - ofs
         if skipmore < 0:
             skipmore = 0
-        if isdir:
-            for b in _chunkiter(sha, skipmore):
+        it = repo.cat(oid.encode('hex'))
+        _, obj_t, size = next(it)
+        data = ''.join(it)            
+        if S_ISDIR(mode):
+            assert obj_t == 'tree'
+            for b in _tree_chunks(repo, tree_decode(data), skipmore):
                 yield b
         else:
-            yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
-
+            assert obj_t == 'blob'
+            yield data[skipmore:]
 
 class _ChunkReader:
-    def __init__(self, hash, isdir, startofs):
+    def __init__(self, repo, oid, startofs):
+        it = repo.cat(oid.encode('hex'))
+        _, obj_t, size = next(it)
+        isdir = obj_t == 'tree'
+        data = ''.join(it)
         if isdir:
-            self.it = _chunkiter(hash, startofs)
+            self.it = _tree_chunks(repo, tree_decode(data), startofs)
             self.blob = None
         else:
             self.it = None
-            self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
+            self.blob = data[startofs:]
         self.ofs = startofs
 
     def next(self, size):
@@ -125,31 +160,36 @@ class _ChunkReader:
         self.ofs += len(out)
         return out
 
-
 class _FileReader(object):
-    def __init__(self, hash, size, isdir):
-        self.hash = hash
+    def __init__(self, repo, oid, known_size=None):
+        assert len(oid) == 20
+        self.oid = oid
         self.ofs = 0
-        self.size = size
-        self.isdir = isdir
         self.reader = None
-
+        self._repo = repo
+        self._size = known_size
+
+    def _compute_size(self):
+        if not self._size:
+            self._size = _normal_or_chunked_file_size(self._repo, self.oid)
+        return self._size
+        
     def seek(self, ofs):
-        if ofs > self.size:
-            self.ofs = self.size
-        elif ofs < 0:
-            self.ofs = 0
-        else:
-            self.ofs = ofs
+        if ofs < 0 or ofs > self._compute_size():
+            raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
+        self.ofs = ofs
 
     def tell(self):
         return self.ofs
 
-    def read(self, count = -1):
+    def read(self, count=-1):
+        size = self._compute_size()
+        if self.ofs >= size:
+            return ''
         if count < 0:
-            count = self.size - self.ofs
+            count = size - self.ofs
         if not self.reader or self.reader.ofs != self.ofs:
-            self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
+            self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
         try:
             buf = self.reader.next(count)
         except:
@@ -161,374 +201,818 @@ class _FileReader(object):
     def close(self):
         pass
 
+    def __enter__(self):
+        return self
+    def __exit__(self, type, value, traceback):
+        self.close()
+        return False
 
-class Node:
-    """Base class for file representation."""
-    def __init__(self, parent, name, mode, hash):
-        self.parent = parent
-        self.name = name
-        self.mode = mode
-        self.hash = hash
-        self.ctime = self.mtime = self.atime = 0
-        self._subs = None
-
-    def __cmp__(a, b):
-        return cmp(a and a.name or None, b and b.name or None)
-
-    def __iter__(self):
-        return iter(self.subs())
-
-    def fullname(self, stop_at=None):
-        """Get this file's full path."""
-        assert(self != stop_at)  # would be the empty string; too weird
-        if self.parent and self.parent != stop_at:
-            return os.path.join(self.parent.fullname(stop_at=stop_at),
-                                self.name)
-        else:
-            return self.name
-
-    def _mksubs(self):
-        self._subs = {}
-
-    def subs(self):
-        """Get a list of nodes that are contained in this node."""
-        if self._subs == None:
-            self._mksubs()
-        return sorted(self._subs.values())
-
-    def sub(self, name):
-        """Get node named 'name' that is contained in this node."""
-        if self._subs == None:
-            self._mksubs()
-        ret = self._subs.get(name)
-        if not ret:
-            raise NoSuchFile("no file %r in %r" % (name, self.name))
-        return ret
-
-    def top(self):
-        """Return the very top node of the tree."""
-        if self.parent:
-            return self.parent.top()
+_multiple_slashes_rx = re.compile(r'//+')
+
+def _decompose_path(path):
+    """Return a boolean indicating whether the path is absolute, and a
+    reversed list of path elements, omitting any occurrences of "."
+    and ignoring any leading or trailing slash.  If the path is
+    effectively '/' or '.', return an empty list.
+
+    """
+    path = re.sub(_multiple_slashes_rx, '/', path)
+    if path == '/':
+        return True, True, []
+    is_absolute = must_be_dir = False
+    if path.startswith('/'):
+        is_absolute = True
+        path = path[1:]
+    for suffix in ('/', '/.'):
+        if path.endswith(suffix):
+            must_be_dir = True
+            path = path[:-len(suffix)]
+    parts = [x for x in path.split('/') if x != '.']
+    parts.reverse()
+    if not parts:
+        must_be_dir = True  # e.g. path was effectively '.' or '/', etc.
+    return is_absolute, must_be_dir, parts
+    
+
+Item = namedtuple('Item', ('meta', 'oid'))
+Chunky = namedtuple('Chunky', ('meta', 'oid'))
+Root = namedtuple('Root', ('meta'))
+Tags = namedtuple('Tags', ('meta'))
+RevList = namedtuple('RevList', ('meta', 'oid'))
+Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
+
+item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
+real_tree_types = frozenset((Item, Commit))
+
+_root = Root(meta=default_dir_mode)
+_tags = Tags(meta=default_dir_mode)
+
+
+### vfs cache
+
+### A general purpose shared cache with (currently) cheap random
+### eviction.  At the moment there is no weighting so a single commit
+### item is just as likely to be evicted as an entire "rev-list".  See
+### is_valid_cache_key for a description of the expected content.
+
+_cache = {}
+_cache_keys = []
+_cache_max_items = 30000
+
+def clear_cache():
+    global _cache, _cache_keys
+    _cache = {}
+    _cache_keys = []
+
+def is_valid_cache_key(x):
+    """Return logically true if x looks like it could be a valid cache key
+    (with respect to structure).  Current valid cache entries:
+      res:... -> resolution
+      commit_oid -> commit
+      commit_oid + ':r' -> rev-list
+         i.e. rev-list -> {'.', commit, '2012...', next_commit, ...}
+    """
+    # Suspect we may eventually add "(container_oid, name) -> ...", and others.
+    x_t = type(x)
+    if x_t is bytes:
+        if len(x) == 20:
+            return True
+        if len(x) == 22 and x.endswith(b':r'):
+            return True
+        if x.startswith('res:'):
+            return True
+
+def cache_get(key):
+    global _cache
+    assert is_valid_cache_key(key)
+    return _cache.get(key)
+
+def cache_notice(key, value):
+    global _cache, _cache_keys, _cache_max_items
+    assert is_valid_cache_key(key)
+    if key in _cache:
+        return
+    if len(_cache) < _cache_max_items:
+        _cache_keys.append(key)
+        _cache[key] = value
+        return
+    victim_i = randrange(0, len(_cache_keys))
+    victim = _cache_keys[victim_i]
+    del _cache[victim]
+    _cache_keys[victim_i] = key
+    _cache[key] = value
+
+def cache_get_commit_item(oid, need_meta=True):
+    """Return the requested tree item if it can be found in the cache.
+    When need_meta is true don't return a cached item that only has a
+    mode."""
+    # tree might be stored independently, or as '.' with its entries.
+    item = cache_get(oid)
+    if item:
+        if not need_meta:
+            return item
+        if isinstance(item.meta, Metadata):
+            return item
+    entries = cache_get(oid + b':r')
+    if entries:
+        return entries['.']
+
+def cache_get_revlist_item(oid, need_meta=True):
+    commit = cache_get_commit_item(oid, need_meta=need_meta)
+    if commit:
+        return RevList(oid=oid, meta=commit.meta)
+
+def copy_item(item):
+    """Return a completely independent copy of item, such that
+    modifications will not affect the original.
+
+    """
+    meta = getattr(item, 'meta', None)
+    if isinstance(meta, Metadata):
+        return(item._replace(meta=meta.copy()))
+    return item
+
+def item_mode(item):
+    """Return the integer mode (stat st_mode) for item."""
+    m = item.meta
+    if isinstance(m, Metadata):
+        return m.mode
+    return m
+
+def _read_dir_meta(bupm):
+    # This is because save writes unmodified Metadata() entries for
+    # fake parents -- test-save-strip-graft.sh demonstrates.
+    m = Metadata.read(bupm)
+    if not m:
+        return default_dir_mode
+    assert m.mode is not None
+    if m.size is None:
+        m.size = 0
+    return m
+
+def tree_data_and_bupm(repo, oid):
+    """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
+    tree has no metadata (i.e. older bup save, or non-bup tree).
+
+    """    
+    assert len(oid) == 20
+    it = repo.cat(oid.encode('hex'))
+    _, item_t, size = next(it)
+    data = ''.join(it)
+    if item_t == 'commit':
+        commit = parse_commit(data)
+        it = repo.cat(commit.tree)
+        _, item_t, size = next(it)
+        data = ''.join(it)
+        assert item_t == 'tree'
+    elif item_t != 'tree':
+        raise Exception('%r is not a tree or commit' % oid.encode('hex'))
+    for _, mangled_name, sub_oid in tree_decode(data):
+        if mangled_name == '.bupm':
+            return data, sub_oid
+        if mangled_name > '.bupm':
+            break
+    return data, None
+
+def _find_treeish_oid_metadata(repo, oid):
+    """Return the metadata for the tree or commit oid, or None if the tree
+    has no metadata (i.e. older bup save, or non-bup tree).
+
+    """
+    tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
+    if bupm_oid:
+        with _FileReader(repo, bupm_oid) as meta_stream:
+            return _read_dir_meta(meta_stream)
+    return None
+
+def _readlink(repo, oid):
+    return ''.join(repo.join(oid.encode('hex')))
+
+def readlink(repo, item):
+    """Return the link target of item, which must be a symlink.  Reads the
+    target from the repository if necessary."""
+    assert repo
+    assert S_ISLNK(item_mode(item))
+    if isinstance(item.meta, Metadata):
+        target = item.meta.symlink_target
+        if target:
+            return target
+    return _readlink(repo, item.oid)
+
+def _compute_item_size(repo, item):
+    mode = item_mode(item)
+    if S_ISREG(mode):
+        size = _normal_or_chunked_file_size(repo, item.oid)
+        return size
+    if S_ISLNK(mode):
+        return len(_readlink(repo, item.oid))
+    return 0
+
+def item_size(repo, item):
+    """Return the size of item, computing it if necessary."""
+    m = item.meta
+    if isinstance(m, Metadata) and m.size is not None:
+        return m.size
+    return _compute_item_size(repo, item)
+
+def tree_data_reader(repo, oid):
+    """Return an open reader for all of the data contained within oid.  If
+    oid refers to a tree, recursively concatenate all of its contents."""
+    return _FileReader(repo, oid)
+
+def fopen(repo, item):
+    """Return an open reader for the given file item."""
+    assert S_ISREG(item_mode(item))
+    return tree_data_reader(repo, item.oid)
+
+def _commit_item_from_data(oid, data):
+    info = parse_commit(data)
+    return Commit(meta=default_dir_mode,
+                  oid=info.tree.decode('hex'),
+                  coid=oid)
+
+def _commit_item_from_oid(repo, oid, require_meta):
+    commit = cache_get_commit_item(oid, need_meta=require_meta)
+    if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
+        return commit
+    it = repo.cat(oid.encode('hex'))
+    _, typ, size = next(it)
+    assert typ == 'commit'
+    commit = _commit_item_from_data(oid, ''.join(it))
+    if require_meta:
+        meta = _find_treeish_oid_metadata(repo, commit.oid)
+        if meta:
+            commit = commit._replace(meta=meta)
+    cache_notice(oid, commit)
+    return commit
+
+def _revlist_item_from_oid(repo, oid, require_meta):
+    commit = _commit_item_from_oid(repo, oid, require_meta)
+    return RevList(oid=oid, meta=commit.meta)
+
+def root_items(repo, names=None, want_meta=True):
+    """Yield (name, item) for the items in '/' in the VFS.  Return
+    everything if names is logically false, otherwise return only
+    items with a name in the collection.
+
+    """
+    # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
+
+    global _root, _tags
+    if not names:
+        yield '.', _root
+        yield '.tag', _tags
+        # FIXME: maybe eventually support repo.clone() or something
+        # and pass in two repos, so we can drop the tuple() and stream
+        # in parallel (i.e. meta vs refs).
+        for name, oid in tuple(repo.refs([], limit_to_heads=True)):
+            assert(name.startswith('refs/heads/'))
+            yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
+        return
+
+    if '.' in names:
+        yield '.', _root
+    if '.tag' in names:
+        yield '.tag', _tags
+    for ref in names:
+        if ref in ('.', '.tag'):
+            continue
+        it = repo.cat('refs/heads/' + ref)
+        oidx, typ, size = next(it)
+        if not oidx:
+            for _ in it: pass
+            continue
+        assert typ == 'commit'
+        commit = parse_commit(''.join(it))
+        yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
+
+def ordered_tree_entries(tree_data, bupm=None):
+    """Yields (name, mangled_name, kind, gitmode, oid) for each item in
+    tree, sorted by name.
+
+    """
+    # Sadly, the .bupm entries currently aren't in git tree order,
+    # i.e. they don't account for the fact that git sorts trees
+    # (including our chunked trees) as if their names ended with "/",
+    # so "fo" sorts after "fo." iff fo is a directory.  This makes
+    # streaming impossible when we need the metadata.
+    def result_from_tree_entry(tree_entry):
+        gitmode, mangled_name, oid = tree_entry
+        name, kind = git.demangle_name(mangled_name, gitmode)
+        return name, mangled_name, kind, gitmode, oid
+
+    tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
+    if bupm:
+        tree_ents = sorted(tree_ents, key=lambda x: x[0])
+    for ent in tree_ents:
+        yield ent
+    
+def tree_items(oid, tree_data, names=frozenset(), bupm=None):
+
+    def tree_item(ent_oid, kind, gitmode):
+        if kind == BUP_CHUNKED:
+            meta = Metadata.read(bupm) if bupm else default_file_mode
+            return Chunky(oid=ent_oid, meta=meta)
+
+        if S_ISDIR(gitmode):
+            # No metadata here (accessable via '.' inside ent_oid).
+            return Item(meta=default_dir_mode, oid=ent_oid)
+
+        return Item(oid=ent_oid,
+                    meta=(Metadata.read(bupm) if bupm \
+                          else _default_mode_for_gitmode(gitmode)))
+
+    assert len(oid) == 20
+    if not names:
+        dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
+        yield '.', Item(oid=oid, meta=dot_meta)
+        tree_entries = ordered_tree_entries(tree_data, bupm)
+        for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
+            if mangled_name == '.bupm':
+                continue
+            assert name != '.'
+            yield name, tree_item(ent_oid, kind, gitmode)
+        return
+
+    # Assumes the tree is properly formed, i.e. there are no
+    # duplicates, and entries will be in git tree order.
+    if type(names) not in (frozenset, set):
+        names = frozenset(names)
+    remaining = len(names)
+
+    # Account for the bupm sort order issue (cf. ordered_tree_entries above)
+    last_name = max(names) if bupm else max(names) + '/'
+
+    if '.' in names:
+        dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
+        yield '.', Item(oid=oid, meta=dot_meta)
+        if remaining == 1:
+            return
+        remaining -= 1
+
+    tree_entries = ordered_tree_entries(tree_data, bupm)
+    for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
+        if mangled_name == '.bupm':
+            continue
+        assert name != '.'
+        if name not in names:
+            if name > last_name:
+                break  # given bupm sort order, we're finished
+            if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
+                Metadata.read(bupm)
+            continue
+        yield name, tree_item(ent_oid, kind, gitmode)
+        if remaining == 1:
+            break
+        remaining -= 1
+
+def tree_items_with_meta(repo, oid, tree_data, names):
+    # For now, the .bupm order doesn't quite match git's, and we don't
+    # load the tree data incrementally anyway, so we just work in RAM
+    # via tree_data.
+    assert len(oid) == 20
+    bupm = None
+    for _, mangled_name, sub_oid in tree_decode(tree_data):
+        if mangled_name == '.bupm':
+            bupm = _FileReader(repo, sub_oid)
+            break
+        if mangled_name > '.bupm':
+            break
+    for item in tree_items(oid, tree_data, names, bupm):
+        yield item
+
+_save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
+        
+def _reverse_suffix_duplicates(strs):
+    """Yields the elements of strs, with any runs of duplicate values
+    suffixed with -N suffixes, where the zero padded integer N
+    decreases to 0 by 1 (e.g. 10, 09, ..., 00).
+
+    """
+    for name, duplicates in groupby(strs):
+        ndup = len(tuple(duplicates))
+        if ndup == 1:
+            yield name
         else:
-            return self
+            ndig = len(str(ndup - 1))
+            fmt = '%s-' + '%0' + str(ndig) + 'd'
+            for i in range(ndup - 1, -1, -1):
+                yield fmt % (name, i)
+
+def parse_rev(f):
+    items = f.readline().split(None)
+    assert len(items) == 2
+    tree, auth_sec = items
+    return tree.decode('hex'), int(auth_sec)
+
+def _name_for_rev(rev):
+    commit_oidx, (tree_oid, utc) = rev
+    return strftime('%Y-%m-%d-%H%M%S', localtime(utc))
+
+def _item_for_rev(rev):
+    commit_oidx, (tree_oid, utc) = rev
+    coid = commit_oidx.decode('hex')
+    item = cache_get_commit_item(coid, need_meta=False)
+    if item:
+        return item
+    item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
+    cache_notice(item.coid, item)
+    return item
+
+def cache_commit(repo, oid):
+    """Build, cache, and return a "name -> commit_item" dict of the entire
+    commit rev-list.
 
-    def fs_top(self):
-        """Return the top node of the particular backup set.
+    """
+    # For now, always cache with full metadata
+    entries = {}
+    entries['.'] = _revlist_item_from_oid(repo, oid, True)
+    revs = repo.rev_list((oid.encode('hex'),), format='%T %at',
+                         parse=parse_rev)
+    rev_items, rev_names = tee(revs)
+    revs = None  # Don't disturb the tees
+    rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
+    rev_items = (_item_for_rev(x) for x in rev_items)
+    latest = None
+    for item in rev_items:
+        latest = latest or item
+        name = next(rev_names)
+        entries[name] = item
+    entries['latest'] = latest
+    cache_notice(latest.coid + b':r', entries)
+    return entries
+
+def revlist_items(repo, oid, names):
+    assert len(oid) == 20
+
+    # Special case '.' instead of caching the whole history since it's
+    # the only way to get the metadata for the commit.
+    if names and all(x == '.' for x in names):
+        yield '.', _revlist_item_from_oid(repo, oid, True)
+        return
+
+    # For now, don't worry about the possibility of the contents being
+    # "too big" for the cache.
+    entries = cache_get(oid + b':r')
+    if not entries:
+        entries = cache_commit(repo, oid)
+
+    if not names:
+        for name in sorted(entries.keys()):
+            yield name, entries[name]
+        return
+
+    names = frozenset(name for name in names
+                      if _save_name_rx.match(name) or name in ('.', 'latest'))
+
+    if '.' in names:
+        yield '.', entries['.']
+    for name in (n for n in names if n != '.'):
+        commit = entries.get(name)
+        if commit:
+            yield name, commit
+
+def tags_items(repo, names):
+    global _tags
+
+    def tag_item(oid):
+        assert len(oid) == 20
+        oidx = oid.encode('hex')
+        it = repo.cat(oidx)
+        _, typ, size = next(it)
+        if typ == 'commit':
+            return cache_get_commit_item(oid, need_meta=False) \
+                or _commit_item_from_data(oid, ''.join(it))
+        for _ in it: pass
+        if typ == 'blob':
+            return Item(meta=default_file_mode, oid=oid)
+        elif typ == 'tree':
+            return Item(meta=default_dir_mode, oid=oid)
+        raise Exception('unexpected tag type ' + typ + ' for tag ' + name)
+
+    if not names:
+        yield '.', _tags
+        # We have to pull these all into ram because tag_item calls cat()
+        for name, oid in tuple(repo.refs(names, limit_to_tags=True)):
+            assert(name.startswith('refs/tags/'))
+            name = name[10:]
+            yield name, tag_item(oid)
+        return
+
+    # Assumes no duplicate refs
+    if type(names) not in (frozenset, set):
+        names = frozenset(names)
+    remaining = len(names)
+    last_name = max(names)
+    if '.' in names:
+        yield '.', _tags
+        if remaining == 1:
+            return
+        remaining -= 1
+
+    for name, oid in repo.refs(names, limit_to_tags=True):
+        assert(name.startswith('refs/tags/'))
+        name = name[10:]
+        if name > last_name:
+            return
+        if name not in names:
+            continue
+        yield name, tag_item(oid)
+        if remaining == 1:
+            return
+        remaining -= 1
+
+def contents(repo, item, names=None, want_meta=True):
+    """Yields information about the items contained in item.  Yields
+    (name, item) for each name in names, if the name exists, in an
+    unspecified order.  If there are no names, then yields (name,
+    item) for all items, including, a first item named '.'
+    representing the container itself.
+
+    The meta value for any directories other than '.' will be a
+    default directory mode, not a Metadata object.  This is because
+    the actual metadata for a directory is stored inside the directory
+    (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
+
+    Note that want_meta is advisory.  For any given item, item.meta
+    might be a Metadata instance or a mode, and if the former,
+    meta.size might be None.  Missing sizes can be computed via via
+    item_size() or augment_item_meta(..., include_size=True).
+
+    Do not modify any item.meta Metadata instances directly.  If
+    needed, make a copy via item.meta.copy() and modify that instead.
 
-        If this node isn't inside a backup set, return the root level.
-        """
-        if self.parent and not isinstance(self.parent, CommitList):
-            return self.parent.fs_top()
+    """
+    # Q: are we comfortable promising '.' first when no names?
+    global _root, _tags
+    assert repo
+    assert S_ISDIR(item_mode(item))
+    item_t = type(item)
+    if item_t in real_tree_types:
+        it = repo.cat(item.oid.encode('hex'))
+        _, obj_t, size = next(it)
+        data = ''.join(it)
+        if obj_t != 'tree':
+            for _ in it: pass
+            # Note: it shouldn't be possible to see an Item with type
+            # 'commit' since a 'commit' should always produce a Commit.
+            raise Exception('unexpected git ' + obj_t)
+        if want_meta:
+            item_gen = tree_items_with_meta(repo, item.oid, data, names)
         else:
-            return self
-
-    def _lresolve(self, parts):
-        #debug2('_lresolve %r in %r\n' % (parts, self.name))
-        if not parts:
-            return self
-        (first, rest) = (parts[0], parts[1:])
-        if first == '.':
-            return self._lresolve(rest)
-        elif first == '..':
-            if not self.parent:
-                raise NoSuchFile("no parent dir for %r" % self.name)
-            return self.parent._lresolve(rest)
-        elif rest:
-            return self.sub(first)._lresolve(rest)
+            item_gen = tree_items(item.oid, data, names)
+    elif item_t == RevList:
+        item_gen = revlist_items(repo, item.oid, names)
+    elif item_t == Root:
+        item_gen = root_items(repo, names, want_meta)
+    elif item_t == Tags:
+        item_gen = tags_items(repo, names)
+    else:
+        raise Exception('unexpected VFS item ' + str(item))
+    for x in item_gen:
+        yield x
+
+def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
+    cache_key = b'res:%d%d%d:%s\0%s' \
+                % (bool(want_meta), bool(deref), repo.id(),
+                   ('/'.join(x[0] for x in parent) if parent else ''),
+                   '/'.join(path))
+    resolution = cache_get(cache_key)
+    if resolution:
+        return resolution
+
+    def notice_resolution(r):
+        cache_notice(cache_key, r)
+        return r
+
+    def raise_dir_required_but_not_dir(path, parent, past):
+        raise IOError(ENOTDIR,
+                      "path %r%s resolves to non-directory %r"
+                      % (path,
+                         ' (relative to %r)' % parent if parent else '',
+                         past),
+                      terminus=past)
+    global _root
+    assert repo
+    assert len(path)
+    if parent:
+        for x in parent:
+            assert len(x) == 2
+            assert type(x[0]) in (bytes, str)
+            assert type(x[1]) in item_types
+        assert parent[0][1] == _root
+        if not S_ISDIR(item_mode(parent[-1][1])):
+            raise IOError(ENOTDIR,
+                          'path resolution parent %r is not a directory'
+                          % (parent,))
+    is_absolute, must_be_dir, future = _decompose_path(path)
+    if must_be_dir:
+        deref = True
+    if not future:  # path was effectively '.' or '/'
+        if is_absolute:
+            return notice_resolution((('', _root),))
+        if parent:
+            return notice_resolution(tuple(parent))
+        return notice_resolution((('', _root),))
+    if is_absolute:
+        past = [('', _root)]
+    else:
+        past = list(parent) if parent else [('', _root)]
+    hops = 0
+    while True:
+        if not future:
+            if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
+                raise_dir_required_but_not_dir(path, parent, past)
+            return notice_resolution(tuple(past))
+        segment = future.pop()
+        if segment == '..':
+            assert len(past) > 0
+            if len(past) > 1:  # .. from / is /
+                assert S_ISDIR(item_mode(past[-1][1]))
+                past.pop()
         else:
-            return self.sub(first)
-
-    def lresolve(self, path, stay_inside_fs=False):
-        """Walk into a given sub-path of this node.
-
-        If the last element is a symlink, leave it as a symlink, don't resolve
-        it.  (like lstat())
-        """
-        start = self
-        if not path:
-            return start
-        if path.startswith('/'):
-            if stay_inside_fs:
-                start = self.fs_top()
-            else:
-                start = self.top()
-            path = path[1:]
-        parts = re.split(r'/+', path or '.')
-        if not parts[-1]:
-            parts[-1] = '.'
-        #debug2('parts: %r %r\n' % (path, parts))
-        return start._lresolve(parts)
-
-    def resolve(self, path = ''):
-        """Like lresolve(), and dereference it if it was a symlink."""
-        return self.lresolve(path).lresolve('.')
-
-    def try_resolve(self, path = ''):
-        """Like resolve(), but don't worry if a symlink uses an invalid path.
-
-        Returns an error if any intermediate nodes were invalid.
-        """
-        n = self.lresolve(path)
-        try:
-            n = n.lresolve('.')
-        except NoSuchFile:
-            pass
-        return n
-
-    def nlinks(self):
-        """Get the number of hard links to the current node."""
-        if self._subs == None:
-            self._mksubs()
-        return 1
-
-    def size(self):
-        """Get the size of the current node."""
-        return 0
-
-    def open(self):
-        """Open the current node. It is an error to open a non-file node."""
-        raise NotFile('%s is not a regular file' % self.name)
-
-
-class File(Node):
-    """A normal file from bup's repository."""
-    def __init__(self, parent, name, mode, hash, bupmode):
-        Node.__init__(self, parent, name, mode, hash)
-        self.bupmode = bupmode
-        self._cached_size = None
-        self._filereader = None
-
-    def open(self):
-        """Open the file."""
-        # You'd think FUSE might call this only once each time a file is
-        # opened, but no; it's really more of a refcount, and it's called
-        # once per read().  Thus, it's important to cache the filereader
-        # object here so we're not constantly re-seeking.
-        if not self._filereader:
-            self._filereader = _FileReader(self.hash, self.size(),
-                                           self.bupmode == git.BUP_CHUNKED)
-        self._filereader.seek(0)
-        return self._filereader
-
-    def size(self):
-        """Get this file's size."""
-        if self._cached_size == None:
-            debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
-            if self.bupmode == git.BUP_CHUNKED:
-                self._cached_size = _total_size(self.hash)
-            else:
-                self._cached_size = _chunk_len(self.hash)
-            debug1('<<<<File.size() done.\n')
-        return self._cached_size
-
-
-_symrefs = 0
-class Symlink(File):
-    """A symbolic link from bup's repository."""
-    def __init__(self, parent, name, hash, bupmode):
-        File.__init__(self, parent, name, 0120000, hash, bupmode)
-
-    def size(self):
-        """Get the file size of the file at which this link points."""
-        return len(self.readlink())
-
-    def readlink(self):
-        """Get the path that this link points at."""
-        return ''.join(cp().join(self.hash.encode('hex')))
-
-    def dereference(self):
-        """Get the node that this link points at.
-
-        If the path is invalid, raise a NoSuchFile exception. If the level of
-        indirection of symlinks is 100 levels deep, raise a TooManySymlinks
-        exception.
-        """
-        global _symrefs
-        if _symrefs > 100:
-            raise TooManySymlinks('too many levels of symlinks: %r'
-                                  % self.fullname())
-        _symrefs += 1
-        try:
-            try:
-                return self.parent.lresolve(self.readlink(),
-                                            stay_inside_fs=True)
-            except NoSuchFile:
-                raise NoSuchFile("%s: broken symlink to %r"
-                                 % (self.fullname(), self.readlink()))
-        finally:
-            _symrefs -= 1
-
-    def _lresolve(self, parts):
-        return self.dereference()._lresolve(parts)
-
-
-class FakeSymlink(Symlink):
-    """A symlink that is not stored in the bup repository."""
-    def __init__(self, parent, name, toname):
-        Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
-        self.toname = toname
-
-    def readlink(self):
-        """Get the path that this link points at."""
-        return self.toname
-
-
-class Dir(Node):
-    """A directory stored inside of bup's repository."""
-    def _mksubs(self):
-        self._subs = {}
-        it = cp().get(self.hash.encode('hex'))
-        type = it.next()
-        if type == 'commit':
-            del it
-            it = cp().get(self.hash.encode('hex') + ':')
-            type = it.next()
-        assert(type == 'tree')
-        for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
-            name = mangled_name
-            (name,bupmode) = git.demangle_name(mangled_name)
-            if bupmode == git.BUP_CHUNKED:
-                mode = GIT_MODE_FILE
-            if stat.S_ISDIR(mode):
-                self._subs[name] = Dir(self, name, mode, sha)
-            elif stat.S_ISLNK(mode):
-                self._subs[name] = Symlink(self, name, sha, bupmode)
-            else:
-                self._subs[name] = File(self, name, mode, sha, bupmode)
-
-
-class CommitDir(Node):
-    """A directory that contains all commits that are reachable by a ref.
-
-    Contains a set of subdirectories named after the commits' first byte in
-    hexadecimal. Each of those directories contain all commits with hashes that
-    start the same as the directory name. The name used for those
-    subdirectories is the hash of the commit without the first byte. This
-    separation helps us avoid having too much directories on the same level as
-    the number of commits grows big.
-    """
-    def __init__(self, parent, name):
-        Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
-
-    def _mksubs(self):
-        self._subs = {}
-        refs = git.list_refs()
-        for ref in refs:
-            #debug2('ref name: %s\n' % ref[0])
-            revs = git.rev_list(ref[1].encode('hex'))
-            for (date, commit) in revs:
-                #debug2('commit: %s  date: %s\n' % (commit.encode('hex'), date))
-                commithex = commit.encode('hex')
-                containername = commithex[:2]
-                dirname = commithex[2:]
-                n1 = self._subs.get(containername)
-                if not n1:
-                    n1 = CommitList(self, containername)
-                    self._subs[containername] = n1
-
-                if n1.commits.get(dirname):
-                    # Stop work for this ref, the rest should already be present
-                    break
-
-                n1.commits[dirname] = (commit, date)
-
-
-class CommitList(Node):
-    """A list of commits with hashes that start with the current node's name."""
-    def __init__(self, parent, name):
-        Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
-        self.commits = {}
-
-    def _mksubs(self):
-        self._subs = {}
-        for (name, (hash, date)) in self.commits.items():
-            n1 = Dir(self, name, GIT_MODE_TREE, hash)
-            n1.ctime = n1.mtime = date
-            self._subs[name] = n1
-
-
-class TagDir(Node):
-    """A directory that contains all tags in the repository."""
-    def __init__(self, parent, name):
-        Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
-
-    def _mksubs(self):
-        self._subs = {}
-        for (name, sha) in git.list_refs():
-            if name.startswith('refs/tags/'):
-                name = name[10:]
-                date = git.rev_get_date(sha.encode('hex'))
-                commithex = sha.encode('hex')
-                target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
-                tag1 = FakeSymlink(self, name, target)
-                tag1.ctime = tag1.mtime = date
-                self._subs[name] = tag1
-
-
-class BranchList(Node):
-    """A list of links to commits reachable by a branch in bup's repository.
-
-    Represents each commit as a symlink that points to the commit directory in
-    /.commit/??/ . The symlink is named after the commit date.
+            parent_name, parent_item = past[-1]
+            wanted = (segment,) if not want_meta else ('.', segment)
+            items = tuple(contents(repo, parent_item, names=wanted,
+                                   want_meta=want_meta))
+            if not want_meta:
+                item = items[0][1] if items else None
+            else:  # First item will be '.' and have the metadata
+                item = items[1][1] if len(items) == 2 else None
+                dot, dot_item = items[0]
+                assert dot == '.'
+                past[-1] = parent_name, parent_item
+            if not item:
+                past.append((segment, None),)
+                return notice_resolution(tuple(past))
+            mode = item_mode(item)
+            if not S_ISLNK(mode):
+                if not S_ISDIR(mode):
+                    past.append((segment, item),)
+                    if future:
+                        raise IOError(ENOTDIR,
+                                      'path %r%s ends internally in non-directory here: %r'
+                                      % (path,
+                                         ' (relative to %r)' % parent if parent else '',
+                                         past),
+                                      terminus=past)
+                    if must_be_dir:
+                        raise_dir_required_but_not_dir(path, parent, past)
+                    return notice_resolution(tuple(past))
+                # It's treeish
+                if want_meta and type(item) in real_tree_types:
+                    dir_meta = _find_treeish_oid_metadata(repo, item.oid)
+                    if dir_meta:
+                        item = item._replace(meta=dir_meta)
+                past.append((segment, item))
+            else:  # symlink
+                if not future and not deref:
+                    past.append((segment, item),)
+                    continue
+                if hops > 100:
+                    raise IOError(ELOOP,
+                                  'too many symlinks encountered while resolving %r%s'
+                                  % (path, ' relative to %r' % parent if parent else ''),
+                                  terminus=tuple(past + [(segment, item)]))
+                target = readlink(repo, item)
+                is_absolute, _, target_future = _decompose_path(target)
+                if is_absolute:
+                    if not target_future:  # path was effectively '/'
+                        return notice_resolution((('', _root),))
+                    past = [('', _root)]
+                    future = target_future
+                else:
+                    future.extend(target_future)
+                hops += 1
+                
+def lresolve(repo, path, parent=None, want_meta=True):
+    """Perform exactly the same function as resolve(), except if the final
+    path element is a symbolic link, don't follow it, just return it
+    in the result.
+
     """
-    def __init__(self, parent, name, hash):
-        Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
-
-    def _mksubs(self):
-        self._subs = {}
-
-        tags = git.tags()
-
-        revs = list(git.rev_list(self.hash.encode('hex')))
-        for (date, commit) in revs:
-            l = time.localtime(date)
-            ls = time.strftime('%Y-%m-%d-%H%M%S', l)
-            commithex = commit.encode('hex')
-            target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
-            n1 = FakeSymlink(self, ls, target)
-            n1.ctime = n1.mtime = date
-            self._subs[ls] = n1
-
-            for tag in tags.get(commit, []):
-                t1 = FakeSymlink(self, tag, target)
-                t1.ctime = t1.mtime = date
-                self._subs[tag] = t1
-
-        latest = max(revs)
-        if latest:
-            (date, commit) = latest
-            commithex = commit.encode('hex')
-            target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
-            n1 = FakeSymlink(self, 'latest', target)
-            n1.ctime = n1.mtime = date
-            self._subs['latest'] = n1
-
-
-class RefList(Node):
-    """A list of branches in bup's repository.
-
-    The sub-nodes of the ref list are a series of CommitList for each commit
-    hash pointed to by a branch.
-
-    Also, a special sub-node named '.commit' contains all commit directories
-    that are reachable via a ref (e.g. a branch).  See CommitDir for details.
+    return _resolve_path(repo, path, parent=parent, want_meta=want_meta,
+                         deref=False)
+
+def resolve(repo, path, parent=None, want_meta=True):
+    """Follow the path in the virtual filesystem and return a tuple
+    representing the location, if any, denoted by the path.  Each
+    element in the result tuple will be (name, info), where info will
+    be a VFS item that can be passed to functions like item_mode().
+
+    If a path segment that does not exist is encountered during
+    resolution, the result will represent the location of the missing
+    item, and that item in the result will be None.
+
+    Any attempt to traverse a non-directory will raise a VFS ENOTDIR
+    IOError exception.
+
+    Any symlinks along the path, including at the end, will be
+    resolved.  A VFS IOError with the errno attribute set to ELOOP
+    will be raised if too many symlinks are traversed while following
+    the path.  That exception is effectively like a normal
+    ELOOP IOError exception, but will include a terminus element
+    describing the location of the failure, which will be a tuple of
+    (name, info) elements.
+
+    The parent, if specified, must be a sequence of (name, item)
+    tuples, and will provide the starting point for the resolution of
+    the path.  If no parent is specified, resolution will start at
+    '/'.
+
+    The result may include elements of parent directly, so they must
+    not be modified later.  If this is a concern, pass in "name,
+    copy_item(item) for name, item in parent" instead.
+
+    When want_meta is true, detailed metadata will be included in each
+    result item if it's avaiable, otherwise item.meta will be an
+    integer mode.  The metadata size may or may not be provided, but
+    can be computed by item_size() or augment_item_meta(...,
+    include_size=True).  Setting want_meta=False is rarely desirable
+    since it can limit the VFS to just the metadata git itself can
+    represent, and so, as an example, fifos and sockets will appear to
+    be regular files (e.g. S_ISREG(item_mode(item)) will be true) .
+    But the option is provided because it may be more efficient when
+    only the path names or the more limited metadata is sufficient.
+
+    Do not modify any item.meta Metadata instances directly.  If
+    needed, make a copy via item.meta.copy() and modify that instead.
+
     """
-    def __init__(self, parent):
-        Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
+    result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
+                           deref=True)
+    _, leaf_item = result[-1]
+    if leaf_item:
+        assert not S_ISLNK(item_mode(leaf_item))
+    return result
+
+def try_resolve(repo, path, parent=None, want_meta=True):
+    """If path does not refer to a symlink, does not exist, or refers to a
+    valid symlink, behave exactly like resolve().  If path refers to
+    an invalid symlink, behave like lresolve.
 
-    def _mksubs(self):
-        self._subs = {}
+    """
+    res = lresolve(repo, path, parent=parent, want_meta=want_meta)
+    leaf_name, leaf_item = res[-1]
+    if not leaf_item:
+        return res
+    if not S_ISLNK(item_mode(leaf_item)):
+        return res
+    deref = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
+    deref_name, deref_item = deref[-1]
+    if deref_item:
+        return deref
+    return res
+
+def augment_item_meta(repo, item, include_size=False):
+    """Ensure item has a Metadata instance for item.meta.  If item.meta is
+    currently a mode, replace it with a compatible "fake" Metadata
+    instance.  If include_size is true, ensure item.meta.size is
+    correct, computing it if needed.  If item.meta is a Metadata
+    instance, this call may modify it in place or replace it.
 
-        commit_dir = CommitDir(self, '.commit')
-        self._subs['.commit'] = commit_dir
+    """
+    # If we actually had parallelism, we'd need locking...
+    assert repo
+    m = item.meta
+    if isinstance(m, Metadata):
+        if include_size and m.size is None:
+            m.size = _compute_item_size(repo, item)
+            return item._replace(meta=m)
+        return item
+    # m is mode
+    meta = Metadata()
+    meta.mode = m
+    meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
+    if S_ISLNK(m):
+        target = _readlink(repo, item.oid)
+        meta.symlink_target = target
+        meta.size = len(target)
+    elif include_size:
+        meta.size = _compute_item_size(repo, item)
+    return item._replace(meta=meta)
+
+def fill_in_metadata_if_dir(repo, item):
+    """If item is a directory and item.meta is not a Metadata instance,
+    attempt to find the metadata for the directory.  If found, return
+    a new item augmented to include that metadata.  Otherwise, return
+    item.  May be useful for the output of contents().
 
-        tag_dir = TagDir(self, '.tag')
-        self._subs['.tag'] = tag_dir
+    """
+    if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
+        items = tuple(contents(repo, item, ('.',), want_meta=True))
+        assert len(items) == 1
+        assert items[0][0] == '.'
+        item = items[0][1]
+    return item
+
+def ensure_item_has_metadata(repo, item, include_size=False):
+    """If item is a directory, attempt to find and add its metadata.  If
+    the item still doesn't have a Metadata instance for item.meta,
+    give it one via augment_item_meta().  May be useful for the output
+    of contents().
 
-        for (name,sha) in git.list_refs():
-            if name.startswith('refs/heads/'):
-                name = name[11:]
-                date = git.rev_get_date(sha.encode('hex'))
-                n1 = BranchList(self, name, sha)
-                n1.ctime = n1.mtime = date
-                self._subs[name] = n1
+    """
+    return augment_item_meta(repo,
+                             fill_in_metadata_if_dir(repo, item),
+                             include_size=include_size)