1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
7 import os, re, stat, time
9 from bup import git, metadata
10 from helpers import debug1, debug2
11 from bup.git import BUP_NORMAL, BUP_CHUNKED, cp
12 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
17 class NodeError(Exception):
18 """VFS base exception."""
21 class NoSuchFile(NodeError):
22 """Request of a file that does not exist."""
25 class NotDir(NodeError):
26 """Attempt to do a directory action on a file that is not one."""
29 class NotFile(NodeError):
30 """Access to a node that does not represent a file."""
33 class TooManySymlinks(NodeError):
34 """Symlink dereferencing level is too deep."""
38 def _treeget(hash, repo_dir=None):
39 it = cp(repo_dir).get(hash.encode('hex'))
41 assert(type == 'tree')
42 return git.tree_decode(''.join(it))
45 def _tree_decode(hash, repo_dir=None):
46 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
48 in _treeget(hash, repo_dir)]
49 assert(tree == list(sorted(tree)))
53 def _chunk_len(hash, repo_dir=None):
54 return sum(len(b) for b in cp(repo_dir).join(hash.encode('hex')))
57 def _last_chunk_info(hash, repo_dir=None):
58 tree = _tree_decode(hash, repo_dir)
60 (ofs,isdir,sha) = tree[-1]
62 (subofs, sublen) = _last_chunk_info(sha, repo_dir)
63 return (ofs+subofs, sublen)
65 return (ofs, _chunk_len(sha))
68 def _total_size(hash, repo_dir=None):
69 (lastofs, lastsize) = _last_chunk_info(hash, repo_dir)
70 return lastofs + lastsize
73 def _chunkiter(hash, startofs, repo_dir=None):
75 tree = _tree_decode(hash, repo_dir)
77 # skip elements before startofs
78 for i in xrange(len(tree)):
79 if i+1 >= len(tree) or tree[i+1][0] > startofs:
83 # iterate through what's left
84 for i in xrange(first, len(tree)):
85 (ofs,isdir,sha) = tree[i]
86 skipmore = startofs-ofs
90 for b in _chunkiter(sha, skipmore, repo_dir):
93 yield ''.join(cp(repo_dir).join(sha.encode('hex')))[skipmore:]
97 def __init__(self, hash, isdir, startofs, repo_dir=None):
99 self.it = _chunkiter(hash, startofs, repo_dir)
103 self.blob = ''.join(cp(repo_dir).join(hash.encode('hex')))[startofs:]
106 def next(self, size):
108 while len(out) < size:
109 if self.it and not self.blob:
111 self.blob = next(self.it)
112 except StopIteration:
115 want = size - len(out)
116 out += self.blob[:want]
117 self.blob = self.blob[want:]
120 debug2('next(%d) returned %d\n' % (size, len(out)))
125 class _FileReader(object):
126 def __init__(self, hash, size, isdir, repo_dir=None):
132 self._repo_dir = repo_dir
145 def read(self, count = -1):
147 count = self.size - self.ofs
148 if not self.reader or self.reader.ofs != self.ofs:
149 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs,
152 buf = self.reader.next(count)
155 raise # our offsets will be all screwed up otherwise
164 """Base class for file representation."""
165 def __init__(self, parent, name, mode, hash, repo_dir=None):
170 self.ctime = self.mtime = self.atime = 0
171 self._repo_dir = repo_dir
173 self._metadata = None
176 return "<%s object at %s - name:%r hash:%s parent:%r>" \
177 % (self.__class__, hex(id(self)),
178 self.name, self.hash.encode('hex'),
179 self.parent.name if self.parent else None)
184 return (cmp(a and a.parent, b and b.parent) or
185 cmp(a and a.name, b and b.name))
188 return iter(self.subs())
190 def fullname(self, stop_at=None):
191 """Get this file's full path."""
192 assert(self != stop_at) # would be the empty string; too weird
193 if self.parent and self.parent != stop_at:
194 return os.path.join(self.parent.fullname(stop_at=stop_at),
203 """Get a list of nodes that are contained in this node."""
204 if self._subs == None:
206 return sorted(self._subs.values())
209 """Get node named 'name' that is contained in this node."""
210 if self._subs == None:
212 ret = self._subs.get(name)
214 raise NoSuchFile("no file %r in %r" % (name, self.name))
218 """Return the very top node of the tree."""
220 return self.parent.top()
225 """Return the top node of the particular backup set.
227 If this node isn't inside a backup set, return the root level.
229 if self.parent and not isinstance(self.parent, CommitList):
230 return self.parent.fs_top()
234 def _lresolve(self, parts):
235 #debug2('_lresolve %r in %r\n' % (parts, self.name))
238 (first, rest) = (parts[0], parts[1:])
240 return self._lresolve(rest)
243 raise NoSuchFile("no parent dir for %r" % self.name)
244 return self.parent._lresolve(rest)
246 return self.sub(first)._lresolve(rest)
248 return self.sub(first)
250 def lresolve(self, path, stay_inside_fs=False):
251 """Walk into a given sub-path of this node.
253 If the last element is a symlink, leave it as a symlink, don't resolve
259 if path.startswith('/'):
261 start = self.fs_top()
265 parts = re.split(r'/+', path or '.')
268 #debug2('parts: %r %r\n' % (path, parts))
269 return start._lresolve(parts)
271 def resolve(self, path = ''):
272 """Like lresolve(), and dereference it if it was a symlink."""
273 return self.lresolve(path).lresolve('.')
275 def try_resolve(self, path = ''):
276 """Like resolve(), but don't worry if a symlink uses an invalid path.
278 Returns an error if any intermediate nodes were invalid.
280 n = self.lresolve(path)
288 """Get the number of hard links to the current node."""
292 """Get the size of the current node."""
296 """Open the current node. It is an error to open a non-file node."""
297 raise NotFile('%s is not a regular file' % self.name)
299 def _populate_metadata(self, force=False):
300 # Only Dirs contain .bupm files, so by default, do nothing.
304 """Return this Node's Metadata() object, if any."""
305 if not self._metadata and self.parent:
306 self.parent._populate_metadata(force=True)
307 return self._metadata
310 """Release resources that can be automatically restored (at a cost)."""
311 self._metadata = None
316 """A normal file from bup's repository."""
317 def __init__(self, parent, name, mode, hash, bupmode, repo_dir=None):
318 Node.__init__(self, parent, name, mode, hash, repo_dir)
319 self.bupmode = bupmode
320 self._cached_size = None
321 self._filereader = None
325 # You'd think FUSE might call this only once each time a file is
326 # opened, but no; it's really more of a refcount, and it's called
327 # once per read(). Thus, it's important to cache the filereader
328 # object here so we're not constantly re-seeking.
329 if not self._filereader:
330 self._filereader = _FileReader(self.hash, self.size(),
331 self.bupmode == git.BUP_CHUNKED,
332 repo_dir = self._repo_dir)
333 self._filereader.seek(0)
334 return self._filereader
337 """Get this file's size."""
338 if self._cached_size == None:
339 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
340 if self.bupmode == git.BUP_CHUNKED:
341 self._cached_size = _total_size(self.hash,
342 repo_dir = self._repo_dir)
344 self._cached_size = _chunk_len(self.hash,
345 repo_dir = self._repo_dir)
346 debug1('<<<<File.size() done.\n')
347 return self._cached_size
352 """A symbolic link from bup's repository."""
353 def __init__(self, parent, name, hash, bupmode, repo_dir=None):
354 File.__init__(self, parent, name, 0o120000, hash, bupmode,
358 """Get the file size of the file at which this link points."""
359 return len(self.readlink())
362 """Get the path that this link points at."""
363 return ''.join(cp(self._repo_dir).join(self.hash.encode('hex')))
365 def dereference(self):
366 """Get the node that this link points at.
368 If the path is invalid, raise a NoSuchFile exception. If the level of
369 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
374 raise TooManySymlinks('too many levels of symlinks: %r'
379 return self.parent.lresolve(self.readlink(),
382 raise NoSuchFile("%s: broken symlink to %r"
383 % (self.fullname(), self.readlink()))
387 def _lresolve(self, parts):
388 return self.dereference()._lresolve(parts)
391 class FakeSymlink(Symlink):
392 """A symlink that is not stored in the bup repository."""
393 def __init__(self, parent, name, toname, repo_dir=None):
394 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL,
399 """Get the path that this link points at."""
404 """A directory stored inside of bup's repository."""
406 def __init__(self, *args, **kwargs):
407 Node.__init__(self, *args, **kwargs)
410 def _populate_metadata(self, force=False):
411 if self._metadata and not force:
417 meta_stream = self._bupm.open()
418 dir_meta = metadata.Metadata.read(meta_stream)
420 if not stat.S_ISDIR(sub.mode):
421 sub._metadata = metadata.Metadata.read(meta_stream)
422 self._metadata = dir_meta
426 it = cp(self._repo_dir).get(self.hash.encode('hex'))
427 _, type, _ = next(it)
430 it = cp(self._repo_dir).get(self.hash.encode('hex') + ':')
431 _, type, _ = next(it)
432 assert(type == 'tree')
433 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
434 if mangled_name == '.bupm':
435 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
436 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
439 name, bupmode = git.demangle_name(mangled_name, mode)
440 if bupmode == git.BUP_CHUNKED:
442 if stat.S_ISDIR(mode):
443 self._subs[name] = Dir(self, name, mode, sha, self._repo_dir)
444 elif stat.S_ISLNK(mode):
445 self._subs[name] = Symlink(self, name, sha, bupmode,
448 self._subs[name] = File(self, name, mode, sha, bupmode,
452 """Return this Dir's Metadata() object, if any."""
453 self._populate_metadata()
454 return self._metadata
456 def metadata_file(self):
457 """Return this Dir's .bupm File, if any."""
463 """Release restorable resources held by this node."""
465 super(Dir, self).release()
468 class CommitDir(Node):
469 """A directory that contains all commits that are reachable by a ref.
471 Contains a set of subdirectories named after the commits' first byte in
472 hexadecimal. Each of those directories contain all commits with hashes that
473 start the same as the directory name. The name used for those
474 subdirectories is the hash of the commit without the first byte. This
475 separation helps us avoid having too much directories on the same level as
476 the number of commits grows big.
478 def __init__(self, parent, name, repo_dir=None):
479 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
483 refs = git.list_refs(repo_dir = self._repo_dir)
485 #debug2('ref name: %s\n' % ref[0])
486 revs = git.rev_list(ref[1].encode('hex'),
488 parse=lambda f: int(f.readline().strip()),
489 repo_dir=self._repo_dir)
490 for commithex, date in revs:
491 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
492 commit = commithex.decode('hex')
493 containername = commithex[:2]
494 dirname = commithex[2:]
495 n1 = self._subs.get(containername)
497 n1 = CommitList(self, containername, self._repo_dir)
498 self._subs[containername] = n1
500 if n1.commits.get(dirname):
501 # Stop work for this ref, the rest should already be present
504 n1.commits[dirname] = (commit, date)
507 class CommitList(Node):
508 """A list of commits with hashes that start with the current node's name."""
509 def __init__(self, parent, name, repo_dir=None):
510 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
515 for (name, (hash, date)) in self.commits.items():
516 n1 = Dir(self, name, GIT_MODE_TREE, hash, self._repo_dir)
517 n1.ctime = n1.mtime = date
518 self._subs[name] = n1
522 """A directory that contains all tags in the repository."""
523 def __init__(self, parent, name, repo_dir = None):
524 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
528 for (name, sha) in git.list_refs(repo_dir = self._repo_dir):
529 if name.startswith('refs/tags/'):
531 date = git.get_commit_dates([sha.encode('hex')],
532 repo_dir=self._repo_dir)[0]
533 commithex = sha.encode('hex')
534 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
535 tag1 = FakeSymlink(self, name, target, self._repo_dir)
536 tag1.ctime = tag1.mtime = date
537 self._subs[name] = tag1
540 class BranchList(Node):
541 """A list of links to commits reachable by a branch in bup's repository.
543 Represents each commit as a symlink that points to the commit directory in
544 /.commit/??/ . The symlink is named after the commit date.
546 def __init__(self, parent, name, hash, repo_dir=None):
547 Node.__init__(self, parent, name, GIT_MODE_TREE, hash, repo_dir)
553 revs = list(git.rev_list(self.hash.encode('hex'),
555 parse=lambda f: int(f.readline().strip()),
556 repo_dir=self._repo_dir))
558 for commithex, date in revs:
559 l = time.localtime(date)
560 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
561 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
562 n1 = FakeSymlink(self, ls, target, self._repo_dir)
563 n1.ctime = n1.mtime = date
566 commithex, date = latest
567 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
568 n1 = FakeSymlink(self, 'latest', target, self._repo_dir)
569 n1.ctime = n1.mtime = date
570 self._subs['latest'] = n1
574 """A list of branches in bup's repository.
576 The sub-nodes of the ref list are a series of CommitList for each commit
577 hash pointed to by a branch.
579 Also, a special sub-node named '.commit' contains all commit directories
580 that are reachable via a ref (e.g. a branch). See CommitDir for details.
582 def __init__(self, parent, repo_dir=None):
583 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA, repo_dir)
588 commit_dir = CommitDir(self, '.commit', self._repo_dir)
589 self._subs['.commit'] = commit_dir
591 tag_dir = TagDir(self, '.tag', self._repo_dir)
592 self._subs['.tag'] = tag_dir
594 refs_info = [(name[11:], sha) for (name,sha)
595 in git.list_refs(limit_to_heads=True,
596 repo_dir=self._repo_dir)]
597 dates = git.get_commit_dates([sha.encode('hex')
598 for (name, sha) in refs_info],
599 repo_dir=self._repo_dir)
600 for (name, sha), date in zip(refs_info, dates):
601 n1 = BranchList(self, name, sha, self._repo_dir)
602 n1.ctime = n1.mtime = date
603 self._subs[name] = n1