- If this node isn't inside a backup set, return the root level.
- """
- if self.parent and not isinstance(self.parent, CommitList):
- return self.parent.fs_top()
- else:
- return self
-
- def _lresolve(self, parts):
- #debug2('_lresolve %r in %r\n' % (parts, self.name))
- if not parts:
- return self
- (first, rest) = (parts[0], parts[1:])
- if first == '.':
- return self._lresolve(rest)
- elif first == '..':
- if not self.parent:
- raise NoSuchFile("no parent dir for %r" % self.name)
- return self.parent._lresolve(rest)
- elif rest:
- return self.sub(first)._lresolve(rest)
- else:
- return self.sub(first)
-
- def lresolve(self, path, stay_inside_fs=False):
- """Walk into a given sub-path of this node.
-
- If the last element is a symlink, leave it as a symlink, don't resolve
- it. (like lstat())
- """
- start = self
- if not path:
- return start
- if path.startswith('/'):
- if stay_inside_fs:
- start = self.fs_top()
- else:
- start = self.top()
- path = path[1:]
- parts = re.split(r'/+', path or '.')
- if not parts[-1]:
- parts[-1] = '.'
- #debug2('parts: %r %r\n' % (path, parts))
- return start._lresolve(parts)
-
- def resolve(self, path = ''):
- """Like lresolve(), and dereference it if it was a symlink."""
- return self.lresolve(path).lresolve('.')
-
- def try_resolve(self, path = ''):
- """Like resolve(), but don't worry if a symlink uses an invalid path.
-
- Returns an error if any intermediate nodes were invalid.
- """
- n = self.lresolve(path)
- try:
- n = n.lresolve('.')
- except NoSuchFile:
- pass
- return n
-
- def nlinks(self):
- """Get the number of hard links to the current node."""
- if self._subs == None:
- self._mksubs()
- return 1
-
- def size(self):
- """Get the size of the current node."""
- return 0
-
- def open(self):
- """Open the current node. It is an error to open a non-file node."""
- raise NotFile('%s is not a regular file' % self.name)
-
-
-class File(Node):
- """A normal file from bup's repository."""
- def __init__(self, parent, name, mode, hash, bupmode):
- Node.__init__(self, parent, name, mode, hash)
- self.bupmode = bupmode
- self._cached_size = None
- self._filereader = None
-
- def open(self):
- """Open the file."""
- # You'd think FUSE might call this only once each time a file is
- # opened, but no; it's really more of a refcount, and it's called
- # once per read(). Thus, it's important to cache the filereader
- # object here so we're not constantly re-seeking.
- if not self._filereader:
- self._filereader = _FileReader(self.hash, self.size(),
- self.bupmode == git.BUP_CHUNKED)
- self._filereader.seek(0)
- return self._filereader
-
- def size(self):
- """Get this file's size."""
- if self._cached_size == None:
- debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
- if self.bupmode == git.BUP_CHUNKED:
- self._cached_size = _total_size(self.hash)
- else:
- self._cached_size = _chunk_len(self.hash)
- debug1('<<<<File.size() done.\n')
- return self._cached_size
-
-
-_symrefs = 0
-class Symlink(File):
- """A symbolic link from bup's repository."""
- def __init__(self, parent, name, hash, bupmode):
- File.__init__(self, parent, name, 0120000, hash, bupmode)
-
- def size(self):
- """Get the file size of the file at which this link points."""
- return len(self.readlink())
-
- def readlink(self):
- """Get the path that this link points at."""
- return ''.join(cp().join(self.hash.encode('hex')))
-
- def dereference(self):
- """Get the node that this link points at.
-
- If the path is invalid, raise a NoSuchFile exception. If the level of
- indirection of symlinks is 100 levels deep, raise a TooManySymlinks
- exception.
- """
- global _symrefs
- if _symrefs > 100:
- raise TooManySymlinks('too many levels of symlinks: %r'
- % self.fullname())
- _symrefs += 1
- try:
- try:
- return self.parent.lresolve(self.readlink(),
- stay_inside_fs=True)
- except NoSuchFile:
- raise NoSuchFile("%s: broken symlink to %r"
- % (self.fullname(), self.readlink()))
- finally:
- _symrefs -= 1
-
- def _lresolve(self, parts):
- return self.dereference()._lresolve(parts)
-
-
-class FakeSymlink(Symlink):
- """A symlink that is not stored in the bup repository."""
- def __init__(self, parent, name, toname):
- Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
- self.toname = toname
-
- def readlink(self):
- """Get the path that this link points at."""
- return self.toname
-
-
-class Dir(Node):
- """A directory stored inside of bup's repository."""
-
- def __init__(self, *args):
- Node.__init__(self, *args)
- self._metadata_sha = None
-
- def _mksubs(self):
- self._subs = {}
- it = cp().get(self.hash.encode('hex'))
- type = it.next()
- if type == 'commit':
- del it
- it = cp().get(self.hash.encode('hex') + ':')
- type = it.next()
- assert(type == 'tree')
- for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
+ """
+ path = re.sub(_multiple_slashes_rx, '/', path)
+ if path == '/':
+ return True, True, []
+ is_absolute = must_be_dir = False
+ if path.startswith('/'):
+ is_absolute = True
+ path = path[1:]
+ for suffix in ('/', '/.'):
+ if path.endswith(suffix):
+ must_be_dir = True
+ path = path[:-len(suffix)]
+ parts = [x for x in path.split('/') if x != '.']
+ parts.reverse()
+ if not parts:
+ must_be_dir = True # e.g. path was effectively '.' or '/', etc.
+ return is_absolute, must_be_dir, parts
+
+
+Item = namedtuple('Item', ('meta', 'oid'))
+Chunky = namedtuple('Chunky', ('meta', 'oid'))
+Root = namedtuple('Root', ('meta'))
+Tags = namedtuple('Tags', ('meta'))
+RevList = namedtuple('RevList', ('meta', 'oid'))
+Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
+
+item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
+real_tree_types = frozenset((Item, Commit))
+
+_root = Root(meta=default_dir_mode)
+_tags = Tags(meta=default_dir_mode)
+
+
+### vfs cache
+
+### A general purpose shared cache with (currently) cheap random
+### eviction. At the moment there is no weighting so a single commit
+### item is just as likely to be evicted as an entire "rev-list". See
+### is_valid_cache_key for a description of the expected content.
+
+_cache = {}
+_cache_keys = []
+_cache_max_items = 30000
+
+def clear_cache():
+ global _cache, _cache_keys
+ _cache = {}
+ _cache_keys = []
+
+def is_valid_cache_key(x):
+ """Return logically true if x looks like it could be a valid cache key
+ (with respect to structure). Current valid cache entries:
+ (path, parent, want_meta, dref) -> resolution
+ commit_oid -> commit
+ commit_oid + ':r' -> rev-list
+ i.e. rev-list -> {'.', commit, '2012...', next_commit, ...}
+ """
+ # Suspect we may eventually add "(container_oid, name) -> ...", and others.
+ x_t = type(x)
+ if x_t is tuple:
+ return len(x) == 4
+ if x_t is bytes:
+ if len(x) == 20:
+ return True
+ if len(x) == 22 and x.endswith(b':r'):
+ return True
+
+def cache_get(key):
+ global _cache
+ assert is_valid_cache_key(key)
+ return _cache.get(key)
+
+def cache_notice(key, value):
+ global _cache, _cache_keys, _cache_max_items
+ assert is_valid_cache_key(key)
+ if key in _cache:
+ return
+ if len(_cache) < _cache_max_items:
+ _cache_keys.append(key)
+ _cache[key] = value
+ return
+ victim_i = randrange(0, len(_cache_keys))
+ victim = _cache_keys[victim_i]
+ del _cache[victim]
+ _cache_keys[victim_i] = key
+ _cache[key] = value
+
+def cache_get_commit_item(oid, need_meta=True):
+ """Return the requested tree item if it can be found in the cache.
+ When need_meta is true don't return a cached item that only has a
+ mode."""
+ # tree might be stored independently, or as '.' with its entries.
+ item = cache_get(oid)
+ if item:
+ if not need_meta:
+ return item
+ if isinstance(item.meta, Metadata):
+ return item
+ entries = cache_get(oid + b':r')
+ if entries:
+ return entries['.']
+
+def cache_get_revlist_item(oid, need_meta=True):
+ commit = cache_get_commit_item(oid, need_meta=need_meta)
+ if commit:
+ return RevList(oid=oid, meta=commit.meta)
+
+def copy_item(item):
+ """Return a completely independent copy of item, such that
+ modifications will not affect the original.
+
+ """
+ meta = getattr(item, 'meta', None)
+ if isinstance(meta, Metadata):
+ return(item._replace(meta=meta.copy()))
+ return item
+
+def item_mode(item):
+ """Return the integer mode (stat st_mode) for item."""
+ m = item.meta
+ if isinstance(m, Metadata):
+ return m.mode
+ return m
+
+def _read_dir_meta(bupm):
+ # This is because save writes unmodified Metadata() entries for
+ # fake parents -- test-save-strip-graft.sh demonstrates.
+ m = Metadata.read(bupm)
+ if not m:
+ return default_dir_mode
+ assert m.mode is not None
+ if m.size is None:
+ m.size = 0
+ return m
+
+def tree_data_and_bupm(repo, oid):
+ """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
+ tree has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ assert len(oid) == 20
+ it = repo.cat(oid.encode('hex'))
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ if item_t == 'commit':
+ commit = parse_commit(data)
+ it = repo.cat(commit.tree)
+ _, item_t, size = next(it)
+ data = ''.join(it)
+ assert item_t == 'tree'
+ elif item_t != 'tree':
+ raise Exception('%r is not a tree or commit' % oid.encode('hex'))
+ for _, mangled_name, sub_oid in tree_decode(data):
+ if mangled_name == '.bupm':
+ return data, sub_oid
+ if mangled_name > '.bupm':
+ break
+ return data, None
+
+def _find_treeish_oid_metadata(repo, oid):
+ """Return the metadata for the tree or commit oid, or None if the tree
+ has no metadata (i.e. older bup save, or non-bup tree).
+
+ """
+ tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
+ if bupm_oid:
+ with _FileReader(repo, bupm_oid) as meta_stream:
+ return _read_dir_meta(meta_stream)
+ return None
+
+def _readlink(repo, oid):
+ return ''.join(repo.join(oid.encode('hex')))
+
+def readlink(repo, item):
+ """Return the link target of item, which must be a symlink. Reads the
+ target from the repository if necessary."""
+ assert repo
+ assert S_ISLNK(item_mode(item))
+ if isinstance(item.meta, Metadata):
+ target = item.meta.symlink_target
+ if target:
+ return target
+ return _readlink(repo, item.oid)
+
+def _compute_item_size(repo, item):
+ mode = item_mode(item)
+ if S_ISREG(mode):
+ size = _normal_or_chunked_file_size(repo, item.oid)
+ return size
+ if S_ISLNK(mode):
+ return len(_readlink(repo, item.oid))
+ return 0
+
+def item_size(repo, item):
+ """Return the size of item, computing it if necessary."""
+ m = item.meta
+ if isinstance(m, Metadata) and m.size is not None:
+ return m.size
+ return _compute_item_size(repo, item)
+
+def tree_data_reader(repo, oid):
+ """Return an open reader for all of the data contained within oid. If
+ oid refers to a tree, recursively concatenate all of its contents."""
+ return _FileReader(repo, oid)
+
+def fopen(repo, item):
+ """Return an open reader for the given file item."""
+ assert S_ISREG(item_mode(item))
+ return tree_data_reader(repo, item.oid)
+
+def _commit_item_from_data(oid, data):
+ info = parse_commit(data)
+ return Commit(meta=default_dir_mode,
+ oid=info.tree.decode('hex'),
+ coid=oid)
+
+def _commit_item_from_oid(repo, oid, require_meta):
+ commit = cache_get_commit_item(oid, need_meta=require_meta)
+ if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
+ return commit
+ it = repo.cat(oid.encode('hex'))
+ _, typ, size = next(it)
+ assert typ == 'commit'
+ commit = _commit_item_from_data(oid, ''.join(it))
+ if require_meta:
+ meta = _find_treeish_oid_metadata(repo, commit.oid)
+ if meta:
+ commit = commit._replace(meta=meta)
+ cache_notice(oid, commit)
+ return commit
+
+def _revlist_item_from_oid(repo, oid, require_meta):
+ commit = _commit_item_from_oid(repo, oid, require_meta)
+ return RevList(oid=oid, meta=commit.meta)
+
+def root_items(repo, names=None, want_meta=True):
+ """Yield (name, item) for the items in '/' in the VFS. Return
+ everything if names is logically false, otherwise return only
+ items with a name in the collection.
+
+ """
+ # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
+
+ global _root, _tags
+ if not names:
+ yield '.', _root
+ yield '.tag', _tags
+ # FIXME: maybe eventually support repo.clone() or something
+ # and pass in two repos, so we can drop the tuple() and stream
+ # in parallel (i.e. meta vs refs).
+ for name, oid in tuple(repo.refs([], limit_to_heads=True)):
+ assert(name.startswith('refs/heads/'))
+ yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
+ return
+
+ if '.' in names:
+ yield '.', _root
+ if '.tag' in names:
+ yield '.tag', _tags
+ for ref in names:
+ if ref in ('.', '.tag'):
+ continue
+ it = repo.cat('refs/heads/' + ref)
+ oidx, typ, size = next(it)
+ if not oidx:
+ for _ in it: pass
+ continue
+ assert typ == 'commit'
+ commit = parse_commit(''.join(it))
+ yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
+
+def ordered_tree_entries(tree_data, bupm=None):
+ """Yields (name, mangled_name, kind, gitmode, oid) for each item in
+ tree, sorted by name.
+
+ """
+ # Sadly, the .bupm entries currently aren't in git tree order,
+ # i.e. they don't account for the fact that git sorts trees
+ # (including our chunked trees) as if their names ended with "/",
+ # so "fo" sorts after "fo." iff fo is a directory. This makes
+ # streaming impossible when we need the metadata.
+ def result_from_tree_entry(tree_entry):
+ gitmode, mangled_name, oid = tree_entry
+ name, kind = git.demangle_name(mangled_name, gitmode)
+ return name, mangled_name, kind, gitmode, oid
+
+ tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
+ if bupm:
+ tree_ents = sorted(tree_ents, key=lambda x: x[0])
+ for ent in tree_ents:
+ yield ent
+
+def tree_items(oid, tree_data, names=frozenset(), bupm=None):
+
+ def tree_item(ent_oid, kind, gitmode):
+ if kind == BUP_CHUNKED:
+ meta = Metadata.read(bupm) if bupm else default_file_mode
+ return Chunky(oid=ent_oid, meta=meta)
+
+ if S_ISDIR(gitmode):
+ # No metadata here (accessable via '.' inside ent_oid).
+ return Item(meta=default_dir_mode, oid=ent_oid)
+
+ return Item(oid=ent_oid,
+ meta=(Metadata.read(bupm) if bupm \
+ else _default_mode_for_gitmode(gitmode)))
+
+ assert len(oid) == 20
+ if not names:
+ dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
+ yield '.', Item(oid=oid, meta=dot_meta)
+ tree_entries = ordered_tree_entries(tree_data, bupm)
+ for name, mangled_name, kind, gitmode, ent_oid in tree_entries: