1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
7 import os, re, stat, time
9 from bup import git, metadata
10 from helpers import debug1, debug2
11 from bup.git import BUP_NORMAL, BUP_CHUNKED, cp
12 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
17 class NodeError(Exception):
18 """VFS base exception."""
21 class NoSuchFile(NodeError):
22 """Request of a file that does not exist."""
25 class NotDir(NodeError):
26 """Attempt to do a directory action on a file that is not one."""
29 class NotFile(NodeError):
30 """Access to a node that does not represent a file."""
33 class TooManySymlinks(NodeError):
34 """Symlink dereferencing level is too deep."""
38 def _treeget(hash, repo_dir=None):
39 it = cp(repo_dir).get(hash.encode('hex'))
41 assert(type == 'tree')
42 return git.tree_decode(''.join(it))
45 def _tree_decode(hash, repo_dir=None):
46 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
48 in _treeget(hash, repo_dir)]
49 assert(tree == list(sorted(tree)))
53 def _chunk_len(hash, repo_dir=None):
54 return sum(len(b) for b in cp(repo_dir).join(hash.encode('hex')))
57 def _last_chunk_info(hash, repo_dir=None):
58 tree = _tree_decode(hash, repo_dir)
60 (ofs,isdir,sha) = tree[-1]
62 (subofs, sublen) = _last_chunk_info(sha, repo_dir)
63 return (ofs+subofs, sublen)
65 return (ofs, _chunk_len(sha))
68 def _total_size(hash, repo_dir=None):
69 (lastofs, lastsize) = _last_chunk_info(hash, repo_dir)
70 return lastofs + lastsize
73 def _chunkiter(hash, startofs, repo_dir=None):
75 tree = _tree_decode(hash, repo_dir)
77 # skip elements before startofs
78 for i in xrange(len(tree)):
79 if i+1 >= len(tree) or tree[i+1][0] > startofs:
83 # iterate through what's left
84 for i in xrange(first, len(tree)):
85 (ofs,isdir,sha) = tree[i]
86 skipmore = startofs-ofs
90 for b in _chunkiter(sha, skipmore, repo_dir):
93 yield ''.join(cp(repo_dir).join(sha.encode('hex')))[skipmore:]
97 def __init__(self, hash, isdir, startofs, repo_dir=None):
99 self.it = _chunkiter(hash, startofs, repo_dir)
103 self.blob = ''.join(cp(repo_dir).join(hash.encode('hex')))[startofs:]
106 def next(self, size):
108 while len(out) < size:
109 if self.it and not self.blob:
111 self.blob = self.it.next()
112 except StopIteration:
115 want = size - len(out)
116 out += self.blob[:want]
117 self.blob = self.blob[want:]
120 debug2('next(%d) returned %d\n' % (size, len(out)))
125 class _FileReader(object):
126 def __init__(self, hash, size, isdir, repo_dir=None):
132 self._repo_dir = repo_dir
145 def read(self, count = -1):
147 count = self.size - self.ofs
148 if not self.reader or self.reader.ofs != self.ofs:
149 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs,
152 buf = self.reader.next(count)
155 raise # our offsets will be all screwed up otherwise
164 """Base class for file representation."""
165 def __init__(self, parent, name, mode, hash, repo_dir=None):
170 self.ctime = self.mtime = self.atime = 0
171 self._repo_dir = repo_dir
173 self._metadata = None
176 return "<%s object at %s - name:%r hash:%s parent:%r>" \
177 % (self.__class__, hex(id(self)),
178 self.name, self.hash.encode('hex'),
179 self.parent.name if self.parent else None)
184 return (cmp(a and a.parent, b and b.parent) or
185 cmp(a and a.name, b and b.name))
188 return iter(self.subs())
190 def fullname(self, stop_at=None):
191 """Get this file's full path."""
192 assert(self != stop_at) # would be the empty string; too weird
193 if self.parent and self.parent != stop_at:
194 return os.path.join(self.parent.fullname(stop_at=stop_at),
203 """Get a list of nodes that are contained in this node."""
204 if self._subs == None:
206 return sorted(self._subs.values())
209 """Get node named 'name' that is contained in this node."""
210 if self._subs == None:
212 ret = self._subs.get(name)
214 raise NoSuchFile("no file %r in %r" % (name, self.name))
218 """Return the very top node of the tree."""
220 return self.parent.top()
225 """Return the top node of the particular backup set.
227 If this node isn't inside a backup set, return the root level.
229 if self.parent and not isinstance(self.parent, CommitList):
230 return self.parent.fs_top()
234 def _lresolve(self, parts):
235 #debug2('_lresolve %r in %r\n' % (parts, self.name))
238 (first, rest) = (parts[0], parts[1:])
240 return self._lresolve(rest)
243 raise NoSuchFile("no parent dir for %r" % self.name)
244 return self.parent._lresolve(rest)
246 return self.sub(first)._lresolve(rest)
248 return self.sub(first)
250 def lresolve(self, path, stay_inside_fs=False):
251 """Walk into a given sub-path of this node.
253 If the last element is a symlink, leave it as a symlink, don't resolve
259 if path.startswith('/'):
261 start = self.fs_top()
265 parts = re.split(r'/+', path or '.')
268 #debug2('parts: %r %r\n' % (path, parts))
269 return start._lresolve(parts)
271 def resolve(self, path = ''):
272 """Like lresolve(), and dereference it if it was a symlink."""
273 return self.lresolve(path).lresolve('.')
275 def try_resolve(self, path = ''):
276 """Like resolve(), but don't worry if a symlink uses an invalid path.
278 Returns an error if any intermediate nodes were invalid.
280 n = self.lresolve(path)
288 """Get the number of hard links to the current node."""
292 """Get the size of the current node."""
296 """Open the current node. It is an error to open a non-file node."""
297 raise NotFile('%s is not a regular file' % self.name)
299 def _populate_metadata(self, force=False):
300 # Only Dirs contain .bupm files, so by default, do nothing.
304 """Return this Node's Metadata() object, if any."""
305 if not self._metadata and self.parent:
306 self.parent._populate_metadata(force=True)
307 return self._metadata
310 """Release resources that can be automatically restored (at a cost)."""
311 self._metadata = None
316 """A normal file from bup's repository."""
317 def __init__(self, parent, name, mode, hash, bupmode, repo_dir=None):
318 Node.__init__(self, parent, name, mode, hash, repo_dir)
319 self.bupmode = bupmode
320 self._cached_size = None
321 self._filereader = None
325 # You'd think FUSE might call this only once each time a file is
326 # opened, but no; it's really more of a refcount, and it's called
327 # once per read(). Thus, it's important to cache the filereader
328 # object here so we're not constantly re-seeking.
329 if not self._filereader:
330 self._filereader = _FileReader(self.hash, self.size(),
331 self.bupmode == git.BUP_CHUNKED,
332 repo_dir = self._repo_dir)
333 self._filereader.seek(0)
334 return self._filereader
337 """Get this file's size."""
338 if self._cached_size == None:
339 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
340 if self.bupmode == git.BUP_CHUNKED:
341 self._cached_size = _total_size(self.hash,
342 repo_dir = self._repo_dir)
344 self._cached_size = _chunk_len(self.hash,
345 repo_dir = self._repo_dir)
346 debug1('<<<<File.size() done.\n')
347 return self._cached_size
352 """A symbolic link from bup's repository."""
353 def __init__(self, parent, name, hash, bupmode, repo_dir=None):
354 File.__init__(self, parent, name, 0o120000, hash, bupmode,
358 """Get the file size of the file at which this link points."""
359 return len(self.readlink())
362 """Get the path that this link points at."""
363 return ''.join(cp(self._repo_dir).join(self.hash.encode('hex')))
365 def dereference(self):
366 """Get the node that this link points at.
368 If the path is invalid, raise a NoSuchFile exception. If the level of
369 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
374 raise TooManySymlinks('too many levels of symlinks: %r'
379 return self.parent.lresolve(self.readlink(),
382 raise NoSuchFile("%s: broken symlink to %r"
383 % (self.fullname(), self.readlink()))
387 def _lresolve(self, parts):
388 return self.dereference()._lresolve(parts)
391 class FakeSymlink(Symlink):
392 """A symlink that is not stored in the bup repository."""
393 def __init__(self, parent, name, toname, repo_dir=None):
394 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL,
399 """Get the path that this link points at."""
404 """A directory stored inside of bup's repository."""
406 def __init__(self, *args, **kwargs):
407 Node.__init__(self, *args, **kwargs)
410 def _populate_metadata(self, force=False):
411 if self._metadata and not force:
417 meta_stream = self._bupm.open()
418 dir_meta = metadata.Metadata.read(meta_stream)
420 if not stat.S_ISDIR(sub.mode):
421 sub._metadata = metadata.Metadata.read(meta_stream)
422 self._metadata = dir_meta
426 it = cp(self._repo_dir).get(self.hash.encode('hex'))
430 it = cp(self._repo_dir).get(self.hash.encode('hex') + ':')
432 assert(type == 'tree')
433 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
434 if mangled_name == '.bupm':
435 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
436 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
439 name, bupmode = git.demangle_name(mangled_name, mode)
440 if bupmode == git.BUP_CHUNKED:
442 if stat.S_ISDIR(mode):
443 self._subs[name] = Dir(self, name, mode, sha, self._repo_dir)
444 elif stat.S_ISLNK(mode):
445 self._subs[name] = Symlink(self, name, sha, bupmode,
448 self._subs[name] = File(self, name, mode, sha, bupmode,
452 """Return this Dir's Metadata() object, if any."""
453 self._populate_metadata()
454 return self._metadata
456 def metadata_file(self):
457 """Return this Dir's .bupm File, if any."""
463 """Release restorable resources held by this node."""
465 super(Dir, self).release()
468 class CommitDir(Node):
469 """A directory that contains all commits that are reachable by a ref.
471 Contains a set of subdirectories named after the commits' first byte in
472 hexadecimal. Each of those directories contain all commits with hashes that
473 start the same as the directory name. The name used for those
474 subdirectories is the hash of the commit without the first byte. This
475 separation helps us avoid having too much directories on the same level as
476 the number of commits grows big.
478 def __init__(self, parent, name, repo_dir=None):
479 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
483 refs = git.list_refs(repo_dir = self._repo_dir)
485 #debug2('ref name: %s\n' % ref[0])
486 revs = git.rev_list(ref[1].encode('hex'), repo_dir = self._repo_dir)
487 for (date, commit) in revs:
488 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
489 commithex = commit.encode('hex')
490 containername = commithex[:2]
491 dirname = commithex[2:]
492 n1 = self._subs.get(containername)
494 n1 = CommitList(self, containername, self._repo_dir)
495 self._subs[containername] = n1
497 if n1.commits.get(dirname):
498 # Stop work for this ref, the rest should already be present
501 n1.commits[dirname] = (commit, date)
504 class CommitList(Node):
505 """A list of commits with hashes that start with the current node's name."""
506 def __init__(self, parent, name, repo_dir=None):
507 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
512 for (name, (hash, date)) in self.commits.items():
513 n1 = Dir(self, name, GIT_MODE_TREE, hash, self._repo_dir)
514 n1.ctime = n1.mtime = date
515 self._subs[name] = n1
519 """A directory that contains all tags in the repository."""
520 def __init__(self, parent, name, repo_dir = None):
521 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
525 for (name, sha) in git.list_refs(repo_dir = self._repo_dir):
526 if name.startswith('refs/tags/'):
528 date = git.get_commit_dates([sha.encode('hex')],
529 repo_dir=self._repo_dir)[0]
530 commithex = sha.encode('hex')
531 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
532 tag1 = FakeSymlink(self, name, target, self._repo_dir)
533 tag1.ctime = tag1.mtime = date
534 self._subs[name] = tag1
537 class BranchList(Node):
538 """A list of links to commits reachable by a branch in bup's repository.
540 Represents each commit as a symlink that points to the commit directory in
541 /.commit/??/ . The symlink is named after the commit date.
543 def __init__(self, parent, name, hash, repo_dir=None):
544 Node.__init__(self, parent, name, GIT_MODE_TREE, hash, repo_dir)
549 revs = list(git.rev_list(self.hash.encode('hex'),
550 repo_dir=self._repo_dir))
552 for (date, commit) in revs:
553 l = time.localtime(date)
554 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
555 commithex = commit.encode('hex')
556 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
557 n1 = FakeSymlink(self, ls, target, self._repo_dir)
558 n1.ctime = n1.mtime = date
561 (date, commit) = latest
562 commithex = commit.encode('hex')
563 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
564 n1 = FakeSymlink(self, 'latest', target, self._repo_dir)
565 n1.ctime = n1.mtime = date
566 self._subs['latest'] = n1
570 """A list of branches in bup's repository.
572 The sub-nodes of the ref list are a series of CommitList for each commit
573 hash pointed to by a branch.
575 Also, a special sub-node named '.commit' contains all commit directories
576 that are reachable via a ref (e.g. a branch). See CommitDir for details.
578 def __init__(self, parent, repo_dir=None):
579 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA, repo_dir)
584 commit_dir = CommitDir(self, '.commit', self._repo_dir)
585 self._subs['.commit'] = commit_dir
587 tag_dir = TagDir(self, '.tag', self._repo_dir)
588 self._subs['.tag'] = tag_dir
590 refs_info = [(name[11:], sha) for (name,sha)
591 in git.list_refs(repo_dir=self._repo_dir)
592 if name.startswith('refs/heads/')]
593 dates = git.get_commit_dates([sha.encode('hex')
594 for (name, sha) in refs_info],
595 repo_dir=self._repo_dir)
596 for (name, sha), date in zip(refs_info, dates):
597 n1 = BranchList(self, name, sha, self._repo_dir)
598 n1.ctime = n1.mtime = date
599 self._subs[name] = n1