+Item = namedtuple('Item', ('meta', 'oid'))
+Chunky = namedtuple('Chunky', ('meta', 'oid'))
+Root = namedtuple('Root', ('meta'))
+Tags = namedtuple('Tags', ('meta'))
+RevList = namedtuple('RevList', ('meta', 'oid'))
+Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
+
+item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
+real_tree_types = frozenset((Item, Commit))
+
+_root = Root(meta=default_dir_mode)
+_tags = Tags(meta=default_dir_mode)
+
+
+### vfs cache
+
+### A general purpose shared cache with (currently) cheap random
+### eviction. There is currently no weighting so a single commit item
+### is just as likely to be evicted as an entire "rev-list". See
+### is_valid_cache_key for a description of the expected content.
+
+_cache = {}
+_cache_keys = []
+_cache_max_items = 30000
+
+def clear_cache():
+ global _cache, _cache_keys
+ _cache = {}
+ _cache_keys = []
+
+def is_valid_cache_key(x):
+ """Return logically true if x looks like it could be a valid cache key
+ (with respect to structure). Current valid cache entries:
+ commit_oid -> commit
+ commit_oid + ':r' -> rev-list
+ i.e. rev-list -> {'.', commit, '2012...', next_commit, ...}
+ """
+ # Suspect we may eventually add "(container_oid, name) -> ...", and others.
+ x_t = type(x)
+ if x_t is bytes:
+ if len(x) == 20:
+ return True
+ if len(x) == 22 and x.endswith(b':r'):
+ return True
+
+def cache_get(key):
+ global _cache
+ assert is_valid_cache_key(key)
+ return _cache.get(key)
+
+def cache_notice(key, value):
+ global _cache, _cache_keys, _cache_max_items
+ assert is_valid_cache_key(key)
+ if key in _cache:
+ return
+ _cache[key] = value
+ if len(_cache) < _cache_max_items:
+ return
+ victim_i = random.randrange(0, len(_cache_keys))
+ victim = _cache_keys[victim_i]
+ _cache_keys[victim_i] = key
+ _cache.pop(victim)
+
+
+def cache_get_commit_item(oid, need_meta=True):
+ """Return the requested tree item if it can be found in the cache.
+ When need_meta is true don't return a cached item that only has a
+ mode."""
+ # tree might be stored independently, or as '.' with its entries.
+ item = cache_get(oid)
+ if item:
+ if not need_meta:
+ return item
+ if isinstance(item.meta, Metadata):
+ return item
+ entries = cache_get(oid + b':r')
+ if entries:
+ return entries['.']
+
+def cache_get_revlist_item(oid, need_meta=True):
+ commit = cache_get_commit_item(oid, need_meta=need_meta)
+ if commit:
+ return RevList(oid=oid, meta=commit.meta)
+
+
+def copy_item(item):
+ """Return a completely independent copy of item, such that
+ modifications will not affect the original.
+
+ """
+ meta = getattr(item, 'meta', None)
+ if not meta:
+ return item
+ return(item._replace(meta=meta.copy()))
+
+def item_mode(item):
+ """Return the integer mode (stat st_mode) for item."""
+ m = item.meta
+ if isinstance(m, Metadata):
+ return m.mode
+ return m
+
+def _read_dir_meta(bupm):
+ # This is because save writes unmodified Metadata() entries for
+ # fake parents -- test-save-strip-graft.sh demonstrates.
+ m = Metadata.read(bupm)
+ if not m:
+ return default_dir_mode
+ assert m.mode is not None
+ if m.size is None:
+ m.size = 0
+ return m
+
+def tree_data_and_bupm(repo, oid):
+ """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
+ tree has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ assert len(oid) == 20
+ it = repo.cat(oid.encode('hex'))
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ if item_t == 'commit':
+ commit = parse_commit(data)
+ it = repo.cat(commit.tree)
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ assert item_t == 'tree'
+ elif item_t != 'tree':
+ raise Exception('%r is not a tree or commit' % oid.encode('hex'))
+ for _, mangled_name, sub_oid in tree_decode(data):
+ if mangled_name == '.bupm':
+ return data, sub_oid
+ if mangled_name > '.bupm':
+ break
+ return data, None
+
+def _find_treeish_oid_metadata(repo, oid):
+ """Return the metadata for the tree or commit oid, or None if the tree
+ has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
+ if bupm_oid:
+ with _FileReader(repo, bupm_oid) as meta_stream:
+ return _read_dir_meta(meta_stream)
+ return None
+
+def _readlink(repo, oid):
+ return ''.join(repo.join(oid.encode('hex')))
+
+def readlink(repo, item):
+ """Return the link target of item, which must be a symlink. Reads the
+ target from the repository if necessary."""
+ assert repo
+ assert S_ISLNK(item_mode(item))
+ if isinstance(item.meta, Metadata):
+ target = item.meta.symlink_target
+ if target:
+ return target
+ return _readlink(repo, item.oid)
+
+def _compute_item_size(repo, item):
+ mode = item_mode(item)
+ if S_ISREG(mode):
+ size = _normal_or_chunked_file_size(repo, item.oid)
+ return size
+ if S_ISLNK(mode):
+ return len(_readlink(repo, item.oid))
+ return 0
+
+def item_size(repo, item):
+ """Return the size of item, computing it if necessary."""
+ m = item.meta
+ if isinstance(m, Metadata) and m.size is not None:
+ return m.size
+ return _compute_item_size(repo, item)
+
+def tree_data_reader(repo, oid):
+ """Return an open reader for all of the data contained within oid. If
+ oid refers to a tree, recursively concatenate all of its contents."""
+ return _FileReader(repo, oid)
+
+def fopen(repo, item):
+ """Return an open reader for the given file item."""
+ assert S_ISREG(item_mode(item))
+ return tree_data_reader(repo, item.oid)
+
+def _commit_item_from_data(oid, data):
+ info = parse_commit(data)
+ return Commit(meta=default_dir_mode,
+ oid=info.tree.decode('hex'),
+ coid=oid)
+
+def _commit_item_from_oid(repo, oid, require_meta):
+ commit = cache_get_commit_item(oid, need_meta=require_meta)
+ if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
+ return commit
+ it = repo.cat(oid.encode('hex'))
+ _, typ, size = next(it)
+ assert typ == 'commit'
+ commit = _commit_item_from_data(oid, ''.join(it))
+ if require_meta:
+ meta = _find_treeish_oid_metadata(repo, commit.oid)
+ if meta:
+ commit = commit._replace(meta=meta)
+ cache_notice(oid, commit)
+ return commit
+
+def _revlist_item_from_oid(repo, oid, require_meta):
+ commit = _commit_item_from_oid(repo, oid, require_meta)
+ return RevList(oid=oid, meta=commit.meta)
+
+def root_items(repo, names=None, want_meta=True):
+ """Yield (name, item) for the items in '/' in the VFS. Return
+ everything if names is logically false, otherwise return only
+ items with a name in the collection.
+
+ """
+ # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
+
+ global _root, _tags
+ if not names:
+ yield '.', _root
+ yield '.tag', _tags
+ # FIXME: maybe eventually support repo.clone() or something
+ # and pass in two repos, so we can drop the tuple() and stream
+ # in parallel (i.e. meta vs refs).
+ for name, oid in tuple(repo.refs([], limit_to_heads=True)):
+ assert(name.startswith('refs/heads/'))
+ yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
+ return
+
+ if '.' in names:
+ yield '.', _root
+ if '.tag' in names:
+ yield '.tag', _tags
+ for ref in names:
+ if ref in ('.', '.tag'):
+ continue
+ it = repo.cat('refs/heads/' + ref)
+ oidx, typ, size = next(it)
+ if not oidx:
+ for _ in it: pass
+ continue
+ assert typ == 'commit'
+ commit = parse_commit(''.join(it))
+ yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
+
+def ordered_tree_entries(tree_data, bupm=None):
+ """Yields (name, mangled_name, kind, gitmode, oid) for each item in
+ tree, sorted by name.
+
+ """
+ # Sadly, the .bupm entries currently aren't in git tree order,
+ # i.e. they don't account for the fact that git sorts trees
+ # (including our chunked trees) as if their names ended with "/",
+ # so "fo" sorts after "fo." iff fo is a directory. This makes
+ # streaming impossible when we need the metadata.
+ def result_from_tree_entry(tree_entry):
+ gitmode, mangled_name, oid = tree_entry
+ name, kind = git.demangle_name(mangled_name, gitmode)
+ return name, mangled_name, kind, gitmode, oid
+
+ tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
+ if bupm:
+ tree_ents = sorted(tree_ents, key=lambda x: x[0])
+ for ent in tree_ents:
+ yield ent