- return self.sub(first)
-
- def lresolve(self, path, stay_inside_fs=False):
- """Walk into a given sub-path of this node.
-
- If the last element is a symlink, leave it as a symlink, don't resolve
- it. (like lstat())
- """
- start = self
- if not path:
- return start
- if path.startswith('/'):
- if stay_inside_fs:
- start = self.fs_top()
- else:
- start = self.top()
- path = path[1:]
- parts = re.split(r'/+', path or '.')
- if not parts[-1]:
- parts[-1] = '.'
- #debug2('parts: %r %r\n' % (path, parts))
- return start._lresolve(parts)
-
- def resolve(self, path = ''):
- """Like lresolve(), and dereference it if it was a symlink."""
- return self.lresolve(path).lresolve('.')
-
- def try_resolve(self, path = ''):
- """Like resolve(), but don't worry if a symlink uses an invalid path.
-
- Returns an error if any intermediate nodes were invalid.
- """
- n = self.lresolve(path)
- try:
- n = n.lresolve('.')
- except NoSuchFile:
- pass
- return n
-
- def nlinks(self):
- """Get the number of hard links to the current node."""
- if self._subs == None:
- self._mksubs()
- return 1
-
- def size(self):
- """Get the size of the current node."""
- return 0
-
- def open(self):
- """Open the current node. It is an error to open a non-file node."""
- raise NotFile('%s is not a regular file' % self.name)
-
-
-class File(Node):
- """A normal file from bup's repository."""
- def __init__(self, parent, name, mode, hash, bupmode):
- Node.__init__(self, parent, name, mode, hash)
- self.bupmode = bupmode
- self._cached_size = None
- self._filereader = None
-
- def open(self):
- """Open the file."""
- # You'd think FUSE might call this only once each time a file is
- # opened, but no; it's really more of a refcount, and it's called
- # once per read(). Thus, it's important to cache the filereader
- # object here so we're not constantly re-seeking.
- if not self._filereader:
- self._filereader = _FileReader(self.hash, self.size(),
- self.bupmode == git.BUP_CHUNKED)
- self._filereader.seek(0)
- return self._filereader
-
- def size(self):
- """Get this file's size."""
- if self._cached_size == None:
- debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
- if self.bupmode == git.BUP_CHUNKED:
- self._cached_size = _total_size(self.hash)
- else:
- self._cached_size = _chunk_len(self.hash)
- debug1('<<<<File.size() done.\n')
- return self._cached_size
-
-
-_symrefs = 0
-class Symlink(File):
- """A symbolic link from bup's repository."""
- def __init__(self, parent, name, hash, bupmode):
- File.__init__(self, parent, name, 0120000, hash, bupmode)
-
- def size(self):
- """Get the file size of the file at which this link points."""
- return len(self.readlink())
-
- def readlink(self):
- """Get the path that this link points at."""
- return ''.join(cp().join(self.hash.encode('hex')))
-
- def dereference(self):
- """Get the node that this link points at.
-
- If the path is invalid, raise a NoSuchFile exception. If the level of
- indirection of symlinks is 100 levels deep, raise a TooManySymlinks
- exception.
- """
- global _symrefs
- if _symrefs > 100:
- raise TooManySymlinks('too many levels of symlinks: %r'
- % self.fullname())
- _symrefs += 1
- try:
- try:
- return self.parent.lresolve(self.readlink(),
- stay_inside_fs=True)
- except NoSuchFile:
- raise NoSuchFile("%s: broken symlink to %r"
- % (self.fullname(), self.readlink()))
- finally:
- _symrefs -= 1
-
- def _lresolve(self, parts):
- return self.dereference()._lresolve(parts)
-
-
-class FakeSymlink(Symlink):
- """A symlink that is not stored in the bup repository."""
- def __init__(self, parent, name, toname):
- Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
- self.toname = toname
-
- def readlink(self):
- """Get the path that this link points at."""
- return self.toname
-
-
-class Dir(Node):
- """A directory stored inside of bup's repository."""
-
- def __init__(self, *args):
- Node.__init__(self, *args)
- self._metadata = None
-
- def _mksubs(self):
- self._subs = {}
- it = cp().get(self.hash.encode('hex'))
- type = it.next()
- if type == 'commit':
- del it
- it = cp().get(self.hash.encode('hex') + ':')
- type = it.next()
- assert(type == 'tree')
- for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
+ vint.send(port, 'sVV', name, has_meta, item.meta)
+ elif kind == Commit:
+ assert len(item.oid) == 20
+ assert len(item.coid) == 20
+ if has_meta:
+ vint.send(port, 'sVss', name, has_meta, item.oid, item.coid)
+ Metadata.write(meta, port, include_path=False)
+ else:
+ vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid,
+ item.meta)
+ elif kind == FakeLink:
+ if has_meta:
+ vint.send(port, 'sVs', name, has_meta, item.target)
+ Metadata.write(meta, port, include_path=False)
+ else:
+ vint.send(port, 'sVsV', name, has_meta, item.target, item.meta)
+ else:
+ assert False
+
+def read_item(port):
+ def read_m(port, has_meta):
+ if has_meta:
+ m = Metadata.read(port)
+ return m
+ return read_vuint(port)
+ kind, has_meta = vint.recv(port, 'sV')
+ if kind == b'Item':
+ oid, meta = read_bvec(port), read_m(port, has_meta)
+ return Item(oid=oid, meta=meta)
+ if kind == b'Chunky':
+ oid, meta = read_bvec(port), read_m(port, has_meta)
+ return Chunky(oid=oid, meta=meta)
+ if kind == b'RevList':
+ oid, meta = read_bvec(port), read_m(port, has_meta)
+ return RevList(oid=oid, meta=meta)
+ if kind == b'Root':
+ return Root(meta=read_m(port, has_meta))
+ if kind == b'Tags':
+ return Tags(meta=read_m(port, has_meta))
+ if kind == b'Commit':
+ oid, coid = vint.recv(port, 'ss')
+ meta = read_m(port, has_meta)
+ return Commit(oid=oid, coid=coid, meta=meta)
+ if kind == b'FakeLink':
+ target, meta = read_bvec(port), read_m(port, has_meta)
+ return FakeLink(target=target, meta=meta)
+ assert False
+
+def write_resolution(port, resolution):
+ write_vuint(port, len(resolution))
+ for name, item in resolution:
+ write_bvec(port, name)
+ if item:
+ port.write(b'\1')
+ write_item(port, item)
+ else:
+ port.write(b'\0')
+
+def read_resolution(port):
+ n = read_vuint(port)
+ result = []
+ for i in range(n):
+ name = read_bvec(port)
+ have_item = ord(port.read(1))
+ assert have_item in (0, 1)
+ item = read_item(port) if have_item else None
+ result.append((name, item))
+ return tuple(result)
+
+
+_root = Root(meta=default_dir_mode)
+_tags = Tags(meta=default_dir_mode)
+
+
+### vfs cache
+
+### A general purpose shared cache with (currently) cheap random
+### eviction. At the moment there is no weighting so a single commit
+### item is just as likely to be evicted as an entire "rev-list". See
+### is_valid_cache_key for a description of the expected content.
+
+_cache = {}
+_cache_keys = []
+_cache_max_items = 30000
+
+def clear_cache():
+ global _cache, _cache_keys
+ _cache = {}
+ _cache_keys = []
+
+def is_valid_cache_key(x):
+ """Return logically true if x looks like it could be a valid cache key
+ (with respect to structure). Current valid cache entries:
+ res:... -> resolution
+ itm:OID -> Commit
+ rvl:OID -> {'.', commit, '2012...', next_commit, ...}
+ """
+ # Suspect we may eventually add "(container_oid, name) -> ...", and others.
+ x_t = type(x)
+ if x_t is bytes:
+ tag = x[:4]
+ if tag in ('itm:', 'rvl:') and len(x) == 24:
+ return True
+ if tag == 'res:':
+ return True
+
+def cache_get(key):
+ global _cache
+ if not is_valid_cache_key(key):
+ raise Exception('invalid cache key: ' + repr(key))
+ return _cache.get(key)
+
+def cache_notice(key, value):
+ global _cache, _cache_keys, _cache_max_items
+ if not is_valid_cache_key(key):
+ raise Exception('invalid cache key: ' + repr(key))
+ if key in _cache:
+ return
+ if len(_cache) < _cache_max_items:
+ _cache_keys.append(key)
+ _cache[key] = value
+ return
+ victim_i = randrange(0, len(_cache_keys))
+ victim = _cache_keys[victim_i]
+ del _cache[victim]
+ _cache_keys[victim_i] = key
+ _cache[key] = value
+
+def cache_get_commit_item(oid, need_meta=True):
+ """Return the requested tree item if it can be found in the cache.
+ When need_meta is true don't return a cached item that only has a
+ mode."""
+ # tree might be stored independently, or as '.' with its entries.
+ commit_key = b'itm:' + oid
+ item = cache_get(commit_key)
+ if item:
+ if not need_meta:
+ return item
+ if isinstance(item.meta, Metadata):
+ return item
+ entries = cache_get(b'rvl:' + oid)
+ if entries:
+ return entries['.']
+
+def cache_get_revlist_item(oid, need_meta=True):
+ commit = cache_get_commit_item(oid, need_meta=need_meta)
+ if commit:
+ return RevList(oid=oid, meta=commit.meta)
+
+def copy_item(item):
+ """Return a completely independent copy of item, such that
+ modifications will not affect the original.
+
+ """
+ meta = getattr(item, 'meta', None)
+ if isinstance(meta, Metadata):
+ return(item._replace(meta=meta.copy()))
+ return item
+
+def item_mode(item):
+ """Return the integer mode (stat st_mode) for item."""
+ m = item.meta
+ if isinstance(m, Metadata):
+ return m.mode
+ return m
+
+def _read_dir_meta(bupm):
+ # This is because save writes unmodified Metadata() entries for
+ # fake parents -- test-save-strip-graft.sh demonstrates.
+ m = Metadata.read(bupm)
+ if not m:
+ return default_dir_mode
+ assert m.mode is not None
+ if m.size is None:
+ m.size = 0
+ return m
+
+def tree_data_and_bupm(repo, oid):
+ """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
+ tree has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ assert len(oid) == 20
+ it = repo.cat(oid.encode('hex'))
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ if item_t == 'commit':
+ commit = parse_commit(data)
+ it = repo.cat(commit.tree)
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ assert item_t == 'tree'
+ elif item_t != 'tree':
+ raise Exception('%s is not a tree or commit' % hexstr(oid))
+ for _, mangled_name, sub_oid in tree_decode(data):
+ if mangled_name == '.bupm':
+ return data, sub_oid
+ if mangled_name > '.bupm':
+ break
+ return data, None
+
+def _find_treeish_oid_metadata(repo, oid):
+ """Return the metadata for the tree or commit oid, or None if the tree
+ has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
+ if bupm_oid:
+ with _FileReader(repo, bupm_oid) as meta_stream:
+ return _read_dir_meta(meta_stream)
+ return None
+
+def _readlink(repo, oid):
+ return ''.join(repo.join(oid.encode('hex')))
+
+def readlink(repo, item):
+ """Return the link target of item, which must be a symlink. Reads the
+ target from the repository if necessary."""
+ assert repo
+ assert S_ISLNK(item_mode(item))
+ if isinstance(item, FakeLink):
+ return item.target
+ if isinstance(item.meta, Metadata):
+ target = item.meta.symlink_target
+ if target:
+ return target
+ return _readlink(repo, item.oid)
+
+def _compute_item_size(repo, item):
+ mode = item_mode(item)
+ if S_ISREG(mode):
+ size = _normal_or_chunked_file_size(repo, item.oid)
+ return size
+ if S_ISLNK(mode):
+ if isinstance(item, FakeLink):
+ return len(item.target)
+ return len(_readlink(repo, item.oid))
+ return 0
+
+def item_size(repo, item):
+ """Return the size of item, computing it if necessary."""
+ m = item.meta
+ if isinstance(m, Metadata) and m.size is not None:
+ return m.size
+ return _compute_item_size(repo, item)
+
+def tree_data_reader(repo, oid):
+ """Return an open reader for all of the data contained within oid. If
+ oid refers to a tree, recursively concatenate all of its contents."""
+ return _FileReader(repo, oid)
+
+def fopen(repo, item):
+ """Return an open reader for the given file item."""
+ assert S_ISREG(item_mode(item))
+ return tree_data_reader(repo, item.oid)
+
+def _commit_item_from_data(oid, data):
+ info = parse_commit(data)
+ return Commit(meta=default_dir_mode,
+ oid=info.tree.decode('hex'),
+ coid=oid)
+
+def _commit_item_from_oid(repo, oid, require_meta):
+ commit = cache_get_commit_item(oid, need_meta=require_meta)
+ if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
+ return commit
+ it = repo.cat(oid.encode('hex'))
+ _, typ, size = next(it)
+ assert typ == 'commit'
+ commit = _commit_item_from_data(oid, ''.join(it))
+ if require_meta:
+ meta = _find_treeish_oid_metadata(repo, commit.oid)
+ if meta:
+ commit = commit._replace(meta=meta)
+ commit_key = b'itm:' + oid
+ cache_notice(commit_key, commit)
+ return commit
+
+def _revlist_item_from_oid(repo, oid, require_meta):
+ commit = _commit_item_from_oid(repo, oid, require_meta)
+ return RevList(oid=oid, meta=commit.meta)
+
+def root_items(repo, names=None, want_meta=True):
+ """Yield (name, item) for the items in '/' in the VFS. Return
+ everything if names is logically false, otherwise return only
+ items with a name in the collection.
+
+ """
+ # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
+
+ global _root, _tags
+ if not names:
+ yield '.', _root
+ yield '.tag', _tags
+ # FIXME: maybe eventually support repo.clone() or something
+ # and pass in two repos, so we can drop the tuple() and stream
+ # in parallel (i.e. meta vs refs).
+ for name, oid in tuple(repo.refs([], limit_to_heads=True)):
+ assert(name.startswith('refs/heads/'))
+ yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
+ return
+
+ if '.' in names:
+ yield '.', _root
+ if '.tag' in names:
+ yield '.tag', _tags
+ for ref in names:
+ if ref in ('.', '.tag'):
+ continue
+ it = repo.cat('refs/heads/' + ref)
+ oidx, typ, size = next(it)
+ if not oidx:
+ for _ in it: pass
+ continue
+ assert typ == 'commit'
+ commit = parse_commit(''.join(it))
+ yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
+
+def ordered_tree_entries(tree_data, bupm=None):
+ """Yields (name, mangled_name, kind, gitmode, oid) for each item in
+ tree, sorted by name.
+
+ """
+ # Sadly, the .bupm entries currently aren't in git tree order,
+ # i.e. they don't account for the fact that git sorts trees
+ # (including our chunked trees) as if their names ended with "/",
+ # so "fo" sorts after "fo." iff fo is a directory. This makes
+ # streaming impossible when we need the metadata.
+ def result_from_tree_entry(tree_entry):
+ gitmode, mangled_name, oid = tree_entry
+ name, kind = git.demangle_name(mangled_name, gitmode)
+ return name, mangled_name, kind, gitmode, oid
+
+ tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
+ if bupm:
+ tree_ents = sorted(tree_ents, key=lambda x: x[0])
+ for ent in tree_ents:
+ yield ent
+
+def tree_items(oid, tree_data, names=frozenset(), bupm=None):
+
+ def tree_item(ent_oid, kind, gitmode):
+ if kind == BUP_CHUNKED:
+ meta = Metadata.read(bupm) if bupm else default_file_mode
+ return Chunky(oid=ent_oid, meta=meta)
+
+ if S_ISDIR(gitmode):
+ # No metadata here (accessable via '.' inside ent_oid).
+ return Item(meta=default_dir_mode, oid=ent_oid)
+
+ return Item(oid=ent_oid,
+ meta=(Metadata.read(bupm) if bupm \
+ else _default_mode_for_gitmode(gitmode)))
+
+ assert len(oid) == 20
+ if not names:
+ dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
+ yield '.', Item(oid=oid, meta=dot_meta)
+ tree_entries = ordered_tree_entries(tree_data, bupm)
+ for name, mangled_name, kind, gitmode, ent_oid in tree_entries: