+_multiple_slashes_rx = re.compile(r'//+')
+
+def _decompose_path(path):
+ """Return a boolean indicating whether the path is absolute, and a
+ reversed list of path elements, omitting any occurrences of "."
+ and ignoring any leading or trailing slash. If the path is
+ effectively '/' or '.', return an empty list.
+
+ """
+ path = re.sub(_multiple_slashes_rx, '/', path)
+ if path == '/':
+ return True, True, []
+ is_absolute = must_be_dir = False
+ if path.startswith('/'):
+ is_absolute = True
+ path = path[1:]
+ for suffix in ('/', '/.'):
+ if path.endswith(suffix):
+ must_be_dir = True
+ path = path[:-len(suffix)]
+ parts = [x for x in path.split('/') if x != '.']
+ parts.reverse()
+ if not parts:
+ must_be_dir = True # e.g. path was effectively '.' or '/', etc.
+ return is_absolute, must_be_dir, parts
+
+
+Item = namedtuple('Item', ('meta', 'oid'))
+Chunky = namedtuple('Chunky', ('meta', 'oid'))
+Root = namedtuple('Root', ('meta'))
+Tags = namedtuple('Tags', ('meta'))
+RevList = namedtuple('RevList', ('meta', 'oid'))
+Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
+
+item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
+real_tree_types = frozenset((Item, Commit))
+
+_root = Root(meta=default_dir_mode)
+_tags = Tags(meta=default_dir_mode)
+
+
+### vfs cache
+
+### A general purpose shared cache with (currently) cheap random
+### eviction. At the moment there is no weighting so a single commit
+### item is just as likely to be evicted as an entire "rev-list". See
+### is_valid_cache_key for a description of the expected content.
+
+_cache = {}
+_cache_keys = []
+_cache_max_items = 30000
+
+def clear_cache():
+ global _cache, _cache_keys
+ _cache = {}
+ _cache_keys = []
+
+def is_valid_cache_key(x):
+ """Return logically true if x looks like it could be a valid cache key
+ (with respect to structure). Current valid cache entries:
+ res:... -> resolution
+ commit_oid -> commit
+ commit_oid + ':r' -> rev-list
+ i.e. rev-list -> {'.', commit, '2012...', next_commit, ...}
+ """
+ # Suspect we may eventually add "(container_oid, name) -> ...", and others.
+ x_t = type(x)
+ if x_t is bytes:
+ if len(x) == 20:
+ return True
+ if len(x) == 22 and x.endswith(b':r'):
+ return True
+ if x.startswith('res:'):
+ return True
+
+def cache_get(key):
+ global _cache
+ assert is_valid_cache_key(key)
+ return _cache.get(key)
+
+def cache_notice(key, value):
+ global _cache, _cache_keys, _cache_max_items
+ assert is_valid_cache_key(key)
+ if key in _cache:
+ return
+ if len(_cache) < _cache_max_items:
+ _cache_keys.append(key)
+ _cache[key] = value
+ return
+ victim_i = randrange(0, len(_cache_keys))
+ victim = _cache_keys[victim_i]
+ del _cache[victim]
+ _cache_keys[victim_i] = key
+ _cache[key] = value
+
+def cache_get_commit_item(oid, need_meta=True):
+ """Return the requested tree item if it can be found in the cache.
+ When need_meta is true don't return a cached item that only has a
+ mode."""
+ # tree might be stored independently, or as '.' with its entries.
+ item = cache_get(oid)
+ if item:
+ if not need_meta:
+ return item
+ if isinstance(item.meta, Metadata):
+ return item
+ entries = cache_get(oid + b':r')
+ if entries:
+ return entries['.']
+
+def cache_get_revlist_item(oid, need_meta=True):
+ commit = cache_get_commit_item(oid, need_meta=need_meta)
+ if commit:
+ return RevList(oid=oid, meta=commit.meta)
+
+def copy_item(item):
+ """Return a completely independent copy of item, such that
+ modifications will not affect the original.
+
+ """
+ meta = getattr(item, 'meta', None)
+ if isinstance(meta, Metadata):
+ return(item._replace(meta=meta.copy()))
+ return item
+
+def item_mode(item):
+ """Return the integer mode (stat st_mode) for item."""
+ m = item.meta
+ if isinstance(m, Metadata):
+ return m.mode
+ return m
+
+def _read_dir_meta(bupm):
+ # This is because save writes unmodified Metadata() entries for
+ # fake parents -- test-save-strip-graft.sh demonstrates.
+ m = Metadata.read(bupm)
+ if not m:
+ return default_dir_mode
+ assert m.mode is not None
+ if m.size is None:
+ m.size = 0
+ return m
+
+def tree_data_and_bupm(repo, oid):
+ """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
+ tree has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ assert len(oid) == 20
+ it = repo.cat(oid.encode('hex'))
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ if item_t == 'commit':
+ commit = parse_commit(data)
+ it = repo.cat(commit.tree)
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ assert item_t == 'tree'
+ elif item_t != 'tree':
+ raise Exception('%r is not a tree or commit' % oid.encode('hex'))
+ for _, mangled_name, sub_oid in tree_decode(data):
+ if mangled_name == '.bupm':
+ return data, sub_oid
+ if mangled_name > '.bupm':
+ break
+ return data, None
+
+def _find_treeish_oid_metadata(repo, oid):
+ """Return the metadata for the tree or commit oid, or None if the tree
+ has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
+ if bupm_oid:
+ with _FileReader(repo, bupm_oid) as meta_stream:
+ return _read_dir_meta(meta_stream)
+ return None
+
+def _readlink(repo, oid):
+ return ''.join(repo.join(oid.encode('hex')))
+
+def readlink(repo, item):
+ """Return the link target of item, which must be a symlink. Reads the
+ target from the repository if necessary."""
+ assert repo
+ assert S_ISLNK(item_mode(item))
+ if isinstance(item.meta, Metadata):
+ target = item.meta.symlink_target
+ if target:
+ return target
+ return _readlink(repo, item.oid)
+
+def _compute_item_size(repo, item):
+ mode = item_mode(item)
+ if S_ISREG(mode):
+ size = _normal_or_chunked_file_size(repo, item.oid)
+ return size
+ if S_ISLNK(mode):
+ return len(_readlink(repo, item.oid))
+ return 0
+
+def item_size(repo, item):
+ """Return the size of item, computing it if necessary."""
+ m = item.meta
+ if isinstance(m, Metadata) and m.size is not None:
+ return m.size
+ return _compute_item_size(repo, item)
+
+def tree_data_reader(repo, oid):
+ """Return an open reader for all of the data contained within oid. If
+ oid refers to a tree, recursively concatenate all of its contents."""
+ return _FileReader(repo, oid)
+
+def fopen(repo, item):
+ """Return an open reader for the given file item."""
+ assert S_ISREG(item_mode(item))
+ return tree_data_reader(repo, item.oid)
+
+def _commit_item_from_data(oid, data):
+ info = parse_commit(data)
+ return Commit(meta=default_dir_mode,
+ oid=info.tree.decode('hex'),
+ coid=oid)
+
+def _commit_item_from_oid(repo, oid, require_meta):
+ commit = cache_get_commit_item(oid, need_meta=require_meta)
+ if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
+ return commit
+ it = repo.cat(oid.encode('hex'))
+ _, typ, size = next(it)
+ assert typ == 'commit'
+ commit = _commit_item_from_data(oid, ''.join(it))
+ if require_meta:
+ meta = _find_treeish_oid_metadata(repo, commit.oid)
+ if meta:
+ commit = commit._replace(meta=meta)
+ cache_notice(oid, commit)
+ return commit
+
+def _revlist_item_from_oid(repo, oid, require_meta):
+ commit = _commit_item_from_oid(repo, oid, require_meta)
+ return RevList(oid=oid, meta=commit.meta)
+
+def root_items(repo, names=None, want_meta=True):
+ """Yield (name, item) for the items in '/' in the VFS. Return
+ everything if names is logically false, otherwise return only
+ items with a name in the collection.
+
+ """
+ # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
+
+ global _root, _tags
+ if not names:
+ yield '.', _root
+ yield '.tag', _tags
+ # FIXME: maybe eventually support repo.clone() or something
+ # and pass in two repos, so we can drop the tuple() and stream
+ # in parallel (i.e. meta vs refs).
+ for name, oid in tuple(repo.refs([], limit_to_heads=True)):
+ assert(name.startswith('refs/heads/'))
+ yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
+ return
+
+ if '.' in names:
+ yield '.', _root
+ if '.tag' in names:
+ yield '.tag', _tags
+ for ref in names:
+ if ref in ('.', '.tag'):
+ continue
+ it = repo.cat('refs/heads/' + ref)
+ oidx, typ, size = next(it)
+ if not oidx:
+ for _ in it: pass
+ continue
+ assert typ == 'commit'
+ commit = parse_commit(''.join(it))
+ yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
+
+def ordered_tree_entries(tree_data, bupm=None):
+ """Yields (name, mangled_name, kind, gitmode, oid) for each item in
+ tree, sorted by name.
+
+ """
+ # Sadly, the .bupm entries currently aren't in git tree order,
+ # i.e. they don't account for the fact that git sorts trees
+ # (including our chunked trees) as if their names ended with "/",
+ # so "fo" sorts after "fo." iff fo is a directory. This makes
+ # streaming impossible when we need the metadata.
+ def result_from_tree_entry(tree_entry):
+ gitmode, mangled_name, oid = tree_entry
+ name, kind = git.demangle_name(mangled_name, gitmode)
+ return name, mangled_name, kind, gitmode, oid
+
+ tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
+ if bupm:
+ tree_ents = sorted(tree_ents, key=lambda x: x[0])
+ for ent in tree_ents:
+ yield ent
+
+def tree_items(oid, tree_data, names=frozenset(), bupm=None):
+
+ def tree_item(ent_oid, kind, gitmode):
+ if kind == BUP_CHUNKED:
+ meta = Metadata.read(bupm) if bupm else default_file_mode
+ return Chunky(oid=ent_oid, meta=meta)
+
+ if S_ISDIR(gitmode):
+ # No metadata here (accessable via '.' inside ent_oid).
+ return Item(meta=default_dir_mode, oid=ent_oid)
+
+ return Item(oid=ent_oid,
+ meta=(Metadata.read(bupm) if bupm \
+ else _default_mode_for_gitmode(gitmode)))
+
+ assert len(oid) == 20
+ if not names:
+ dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
+ yield '.', Item(oid=oid, meta=dot_meta)
+ tree_entries = ordered_tree_entries(tree_data, bupm)
+ for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
+ if mangled_name == '.bupm':
+ continue
+ assert name != '.'
+ yield name, tree_item(ent_oid, kind, gitmode)
+ return
+
+ # Assumes the tree is properly formed, i.e. there are no
+ # duplicates, and entries will be in git tree order.
+ if type(names) not in (frozenset, set):
+ names = frozenset(names)
+ remaining = len(names)
+
+ # Account for the bupm sort order issue (cf. ordered_tree_entries above)
+ last_name = max(names) if bupm else max(names) + '/'
+
+ if '.' in names:
+ dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
+ yield '.', Item(oid=oid, meta=dot_meta)
+ if remaining == 1:
+ return
+ remaining -= 1
+
+ tree_entries = ordered_tree_entries(tree_data, bupm)
+ for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
+ if mangled_name == '.bupm':
+ continue
+ assert name != '.'
+ if name not in names:
+ if name > last_name:
+ break # given bupm sort order, we're finished
+ if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
+ Metadata.read(bupm)
+ continue
+ yield name, tree_item(ent_oid, kind, gitmode)
+ if remaining == 1:
+ break
+ remaining -= 1
+
+def tree_items_with_meta(repo, oid, tree_data, names):
+ # For now, the .bupm order doesn't quite match git's, and we don't
+ # load the tree data incrementally anyway, so we just work in RAM
+ # via tree_data.
+ assert len(oid) == 20
+ bupm = None
+ for _, mangled_name, sub_oid in tree_decode(tree_data):
+ if mangled_name == '.bupm':
+ bupm = _FileReader(repo, sub_oid)
+ break
+ if mangled_name > '.bupm':
+ break
+ for item in tree_items(oid, tree_data, names, bupm):
+ yield item
+
+_save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
+
+def _reverse_suffix_duplicates(strs):
+ """Yields the elements of strs, with any runs of duplicate values
+ suffixed with -N suffixes, where the zero padded integer N
+ decreases to 0 by 1 (e.g. 10, 09, ..., 00).
+
+ """
+ for name, duplicates in groupby(strs):
+ ndup = len(tuple(duplicates))
+ if ndup == 1:
+ yield name