1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.git import BUP_NORMAL, BUP_CHUNKED, cp
10 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 class NodeError(Exception):
16 """VFS base exception."""
19 class NoSuchFile(NodeError):
20 """Request of a file that does not exist."""
23 class NotDir(NodeError):
24 """Attempt to do a directory action on a file that is not one."""
27 class NotFile(NodeError):
28 """Access to a node that does not represent a file."""
31 class TooManySymlinks(NodeError):
32 """Symlink dereferencing level is too deep."""
36 def _treeget(hash, repo_dir=None):
37 it = cp(repo_dir).get(hash.encode('hex'))
39 assert(type == 'tree')
40 return git.tree_decode(''.join(it))
43 def _tree_decode(hash, repo_dir=None):
44 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
46 in _treeget(hash, repo_dir)]
47 assert(tree == list(sorted(tree)))
51 def _chunk_len(hash, repo_dir=None):
52 return sum(len(b) for b in cp(repo_dir).join(hash.encode('hex')))
55 def _last_chunk_info(hash, repo_dir=None):
56 tree = _tree_decode(hash, repo_dir)
58 (ofs,isdir,sha) = tree[-1]
60 (subofs, sublen) = _last_chunk_info(sha, repo_dir)
61 return (ofs+subofs, sublen)
63 return (ofs, _chunk_len(sha))
66 def _total_size(hash, repo_dir=None):
67 (lastofs, lastsize) = _last_chunk_info(hash, repo_dir)
68 return lastofs + lastsize
71 def _chunkiter(hash, startofs, repo_dir=None):
73 tree = _tree_decode(hash, repo_dir)
75 # skip elements before startofs
76 for i in xrange(len(tree)):
77 if i+1 >= len(tree) or tree[i+1][0] > startofs:
81 # iterate through what's left
82 for i in xrange(first, len(tree)):
83 (ofs,isdir,sha) = tree[i]
84 skipmore = startofs-ofs
88 for b in _chunkiter(sha, skipmore, repo_dir):
91 yield ''.join(cp(repo_dir).join(sha.encode('hex')))[skipmore:]
95 def __init__(self, hash, isdir, startofs, repo_dir=None):
97 self.it = _chunkiter(hash, startofs, repo_dir)
101 self.blob = ''.join(cp(repo_dir).join(hash.encode('hex')))[startofs:]
104 def next(self, size):
106 while len(out) < size:
107 if self.it and not self.blob:
109 self.blob = self.it.next()
110 except StopIteration:
113 want = size - len(out)
114 out += self.blob[:want]
115 self.blob = self.blob[want:]
118 debug2('next(%d) returned %d\n' % (size, len(out)))
123 class _FileReader(object):
124 def __init__(self, hash, size, isdir, repo_dir=None):
130 self._repo_dir = repo_dir
143 def read(self, count = -1):
145 count = self.size - self.ofs
146 if not self.reader or self.reader.ofs != self.ofs:
147 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs,
150 buf = self.reader.next(count)
153 raise # our offsets will be all screwed up otherwise
162 """Base class for file representation."""
163 def __init__(self, parent, name, mode, hash, repo_dir=None):
168 self.ctime = self.mtime = self.atime = 0
169 self._repo_dir = repo_dir
171 self._metadata = None
174 return "<%s object at %s - name:%r hash:%s parent:%r>" \
175 % (self.__class__, hex(id(self)),
176 self.name, self.hash.encode('hex'),
177 self.parent.name if self.parent else None)
182 return (cmp(a and a.parent, b and b.parent) or
183 cmp(a and a.name, b and b.name))
186 return iter(self.subs())
188 def fullname(self, stop_at=None):
189 """Get this file's full path."""
190 assert(self != stop_at) # would be the empty string; too weird
191 if self.parent and self.parent != stop_at:
192 return os.path.join(self.parent.fullname(stop_at=stop_at),
201 """Get a list of nodes that are contained in this node."""
202 if self._subs == None:
204 return sorted(self._subs.values())
207 """Get node named 'name' that is contained in this node."""
208 if self._subs == None:
210 ret = self._subs.get(name)
212 raise NoSuchFile("no file %r in %r" % (name, self.name))
216 """Return the very top node of the tree."""
218 return self.parent.top()
223 """Return the top node of the particular backup set.
225 If this node isn't inside a backup set, return the root level.
227 if self.parent and not isinstance(self.parent, CommitList):
228 return self.parent.fs_top()
232 def _lresolve(self, parts):
233 #debug2('_lresolve %r in %r\n' % (parts, self.name))
236 (first, rest) = (parts[0], parts[1:])
238 return self._lresolve(rest)
241 raise NoSuchFile("no parent dir for %r" % self.name)
242 return self.parent._lresolve(rest)
244 return self.sub(first)._lresolve(rest)
246 return self.sub(first)
248 def lresolve(self, path, stay_inside_fs=False):
249 """Walk into a given sub-path of this node.
251 If the last element is a symlink, leave it as a symlink, don't resolve
257 if path.startswith('/'):
259 start = self.fs_top()
263 parts = re.split(r'/+', path or '.')
266 #debug2('parts: %r %r\n' % (path, parts))
267 return start._lresolve(parts)
269 def resolve(self, path = ''):
270 """Like lresolve(), and dereference it if it was a symlink."""
271 return self.lresolve(path).lresolve('.')
273 def try_resolve(self, path = ''):
274 """Like resolve(), but don't worry if a symlink uses an invalid path.
276 Returns an error if any intermediate nodes were invalid.
278 n = self.lresolve(path)
286 """Get the number of hard links to the current node."""
290 """Get the size of the current node."""
294 """Open the current node. It is an error to open a non-file node."""
295 raise NotFile('%s is not a regular file' % self.name)
297 def _populate_metadata(self, force=False):
298 # Only Dirs contain .bupm files, so by default, do nothing.
302 """Return this Node's Metadata() object, if any."""
303 if not self._metadata and self.parent:
304 self.parent._populate_metadata(force=True)
305 return self._metadata
308 """Release resources that can be automatically restored (at a cost)."""
309 self._metadata = None
314 """A normal file from bup's repository."""
315 def __init__(self, parent, name, mode, hash, bupmode, repo_dir=None):
316 Node.__init__(self, parent, name, mode, hash, repo_dir)
317 self.bupmode = bupmode
318 self._cached_size = None
319 self._filereader = None
323 # You'd think FUSE might call this only once each time a file is
324 # opened, but no; it's really more of a refcount, and it's called
325 # once per read(). Thus, it's important to cache the filereader
326 # object here so we're not constantly re-seeking.
327 if not self._filereader:
328 self._filereader = _FileReader(self.hash, self.size(),
329 self.bupmode == git.BUP_CHUNKED,
330 repo_dir = self._repo_dir)
331 self._filereader.seek(0)
332 return self._filereader
335 """Get this file's size."""
336 if self._cached_size == None:
337 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
338 if self.bupmode == git.BUP_CHUNKED:
339 self._cached_size = _total_size(self.hash,
340 repo_dir = self._repo_dir)
342 self._cached_size = _chunk_len(self.hash,
343 repo_dir = self._repo_dir)
344 debug1('<<<<File.size() done.\n')
345 return self._cached_size
350 """A symbolic link from bup's repository."""
351 def __init__(self, parent, name, hash, bupmode, repo_dir=None):
352 File.__init__(self, parent, name, 0120000, hash, bupmode,
356 """Get the file size of the file at which this link points."""
357 return len(self.readlink())
360 """Get the path that this link points at."""
361 return ''.join(cp(self._repo_dir).join(self.hash.encode('hex')))
363 def dereference(self):
364 """Get the node that this link points at.
366 If the path is invalid, raise a NoSuchFile exception. If the level of
367 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
372 raise TooManySymlinks('too many levels of symlinks: %r'
377 return self.parent.lresolve(self.readlink(),
380 raise NoSuchFile("%s: broken symlink to %r"
381 % (self.fullname(), self.readlink()))
385 def _lresolve(self, parts):
386 return self.dereference()._lresolve(parts)
389 class FakeSymlink(Symlink):
390 """A symlink that is not stored in the bup repository."""
391 def __init__(self, parent, name, toname, repo_dir=None):
392 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL,
397 """Get the path that this link points at."""
402 """A directory stored inside of bup's repository."""
404 def __init__(self, *args, **kwargs):
405 Node.__init__(self, *args, **kwargs)
408 def _populate_metadata(self, force=False):
409 if self._metadata and not force:
415 meta_stream = self._bupm.open()
416 dir_meta = metadata.Metadata.read(meta_stream)
418 if not stat.S_ISDIR(sub.mode):
419 sub._metadata = metadata.Metadata.read(meta_stream)
420 self._metadata = dir_meta
424 it = cp(self._repo_dir).get(self.hash.encode('hex'))
428 it = cp(self._repo_dir).get(self.hash.encode('hex') + ':')
430 assert(type == 'tree')
431 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
432 if mangled_name == '.bupm':
433 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
434 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
437 name, bupmode = git.demangle_name(mangled_name, mode)
438 if bupmode == git.BUP_CHUNKED:
440 if stat.S_ISDIR(mode):
441 self._subs[name] = Dir(self, name, mode, sha, self._repo_dir)
442 elif stat.S_ISLNK(mode):
443 self._subs[name] = Symlink(self, name, sha, bupmode,
446 self._subs[name] = File(self, name, mode, sha, bupmode,
450 """Return this Dir's Metadata() object, if any."""
451 self._populate_metadata()
452 return self._metadata
454 def metadata_file(self):
455 """Return this Dir's .bupm File, if any."""
461 """Release restorable resources held by this node."""
463 super(Dir, self).release()
466 class CommitDir(Node):
467 """A directory that contains all commits that are reachable by a ref.
469 Contains a set of subdirectories named after the commits' first byte in
470 hexadecimal. Each of those directories contain all commits with hashes that
471 start the same as the directory name. The name used for those
472 subdirectories is the hash of the commit without the first byte. This
473 separation helps us avoid having too much directories on the same level as
474 the number of commits grows big.
476 def __init__(self, parent, name, repo_dir=None):
477 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
481 refs = git.list_refs(repo_dir = self._repo_dir)
483 #debug2('ref name: %s\n' % ref[0])
484 revs = git.rev_list(ref[1].encode('hex'), repo_dir = self._repo_dir)
485 for (date, commit) in revs:
486 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
487 commithex = commit.encode('hex')
488 containername = commithex[:2]
489 dirname = commithex[2:]
490 n1 = self._subs.get(containername)
492 n1 = CommitList(self, containername, self._repo_dir)
493 self._subs[containername] = n1
495 if n1.commits.get(dirname):
496 # Stop work for this ref, the rest should already be present
499 n1.commits[dirname] = (commit, date)
502 class CommitList(Node):
503 """A list of commits with hashes that start with the current node's name."""
504 def __init__(self, parent, name, repo_dir=None):
505 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
510 for (name, (hash, date)) in self.commits.items():
511 n1 = Dir(self, name, GIT_MODE_TREE, hash, self._repo_dir)
512 n1.ctime = n1.mtime = date
513 self._subs[name] = n1
517 """A directory that contains all tags in the repository."""
518 def __init__(self, parent, name, repo_dir = None):
519 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
523 for (name, sha) in git.list_refs(repo_dir = self._repo_dir):
524 if name.startswith('refs/tags/'):
526 date = git.get_commit_dates([sha.encode('hex')],
527 repo_dir=self._repo_dir)[0]
528 commithex = sha.encode('hex')
529 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
530 tag1 = FakeSymlink(self, name, target, self._repo_dir)
531 tag1.ctime = tag1.mtime = date
532 self._subs[name] = tag1
535 class BranchList(Node):
536 """A list of links to commits reachable by a branch in bup's repository.
538 Represents each commit as a symlink that points to the commit directory in
539 /.commit/??/ . The symlink is named after the commit date.
541 def __init__(self, parent, name, hash, repo_dir=None):
542 Node.__init__(self, parent, name, GIT_MODE_TREE, hash, repo_dir)
547 revs = list(git.rev_list(self.hash.encode('hex'),
548 repo_dir=self._repo_dir))
550 for (date, commit) in revs:
551 l = time.localtime(date)
552 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
553 commithex = commit.encode('hex')
554 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
555 n1 = FakeSymlink(self, ls, target, self._repo_dir)
556 n1.ctime = n1.mtime = date
559 (date, commit) = latest
560 commithex = commit.encode('hex')
561 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
562 n1 = FakeSymlink(self, 'latest', target, self._repo_dir)
563 n1.ctime = n1.mtime = date
564 self._subs['latest'] = n1
568 """A list of branches in bup's repository.
570 The sub-nodes of the ref list are a series of CommitList for each commit
571 hash pointed to by a branch.
573 Also, a special sub-node named '.commit' contains all commit directories
574 that are reachable via a ref (e.g. a branch). See CommitDir for details.
576 def __init__(self, parent, repo_dir=None):
577 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA, repo_dir)
582 commit_dir = CommitDir(self, '.commit', self._repo_dir)
583 self._subs['.commit'] = commit_dir
585 tag_dir = TagDir(self, '.tag', self._repo_dir)
586 self._subs['.tag'] = tag_dir
588 refs_info = [(name[11:], sha) for (name,sha)
589 in git.list_refs(repo_dir=self._repo_dir)
590 if name.startswith('refs/heads/')]
591 dates = git.get_commit_dates([sha.encode('hex')
592 for (name, sha) in refs_info],
593 repo_dir=self._repo_dir)
594 for (name, sha), date in zip(refs_info, dates):
595 n1 = BranchList(self, name, sha, self._repo_dir)
596 n1.ctime = n1.mtime = date
597 self._subs[name] = n1