1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.git import BUP_NORMAL, BUP_CHUNKED, cp
10 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 class NodeError(Exception):
16 """VFS base exception."""
19 class NoSuchFile(NodeError):
20 """Request of a file that does not exist."""
23 class NotDir(NodeError):
24 """Attempt to do a directory action on a file that is not one."""
27 class NotFile(NodeError):
28 """Access to a node that does not represent a file."""
31 class TooManySymlinks(NodeError):
32 """Symlink dereferencing level is too deep."""
36 def _treeget(hash, repo_dir=None):
37 it = cp(repo_dir).get(hash.encode('hex'))
39 assert(type == 'tree')
40 return git.tree_decode(''.join(it))
43 def _tree_decode(hash, repo_dir=None):
44 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
46 in _treeget(hash, repo_dir)]
47 assert(tree == list(sorted(tree)))
51 def _chunk_len(hash, repo_dir=None):
52 return sum(len(b) for b in cp(repo_dir).join(hash.encode('hex')))
55 def _last_chunk_info(hash, repo_dir=None):
56 tree = _tree_decode(hash, repo_dir)
58 (ofs,isdir,sha) = tree[-1]
60 (subofs, sublen) = _last_chunk_info(sha, repo_dir)
61 return (ofs+subofs, sublen)
63 return (ofs, _chunk_len(sha))
66 def _total_size(hash, repo_dir=None):
67 (lastofs, lastsize) = _last_chunk_info(hash, repo_dir)
68 return lastofs + lastsize
71 def _chunkiter(hash, startofs, repo_dir=None):
73 tree = _tree_decode(hash, repo_dir)
75 # skip elements before startofs
76 for i in xrange(len(tree)):
77 if i+1 >= len(tree) or tree[i+1][0] > startofs:
81 # iterate through what's left
82 for i in xrange(first, len(tree)):
83 (ofs,isdir,sha) = tree[i]
84 skipmore = startofs-ofs
88 for b in _chunkiter(sha, skipmore, repo_dir):
91 yield ''.join(cp(repo_dir).join(sha.encode('hex')))[skipmore:]
95 def __init__(self, hash, isdir, startofs, repo_dir=None):
97 self.it = _chunkiter(hash, startofs, repo_dir)
101 self.blob = ''.join(cp(repo_dir).join(hash.encode('hex')))[startofs:]
104 def next(self, size):
106 while len(out) < size:
107 if self.it and not self.blob:
109 self.blob = self.it.next()
110 except StopIteration:
113 want = size - len(out)
114 out += self.blob[:want]
115 self.blob = self.blob[want:]
118 debug2('next(%d) returned %d\n' % (size, len(out)))
123 class _FileReader(object):
124 def __init__(self, hash, size, isdir, repo_dir=None):
130 self._repo_dir = repo_dir
143 def read(self, count = -1):
145 count = self.size - self.ofs
146 if not self.reader or self.reader.ofs != self.ofs:
147 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs,
150 buf = self.reader.next(count)
153 raise # our offsets will be all screwed up otherwise
162 """Base class for file representation."""
163 def __init__(self, parent, name, mode, hash, repo_dir=None):
168 self.ctime = self.mtime = self.atime = 0
169 self._repo_dir = repo_dir
171 self._metadata = None
174 return "<%s object at %s - name:%r hash:%s parent:%r>" \
175 % (self.__class__, hex(id(self)),
176 self.name, self.hash.encode('hex'),
177 self.parent.name if self.parent else None)
182 return (cmp(a and a.parent, b and b.parent) or
183 cmp(a and a.name, b and b.name))
186 return iter(self.subs())
188 def fullname(self, stop_at=None):
189 """Get this file's full path."""
190 assert(self != stop_at) # would be the empty string; too weird
191 if self.parent and self.parent != stop_at:
192 return os.path.join(self.parent.fullname(stop_at=stop_at),
201 """Get a list of nodes that are contained in this node."""
202 if self._subs == None:
204 return sorted(self._subs.values())
207 """Get node named 'name' that is contained in this node."""
208 if self._subs == None:
210 ret = self._subs.get(name)
212 raise NoSuchFile("no file %r in %r" % (name, self.name))
216 """Return the very top node of the tree."""
218 return self.parent.top()
223 """Return the top node of the particular backup set.
225 If this node isn't inside a backup set, return the root level.
227 if self.parent and not isinstance(self.parent, CommitList):
228 return self.parent.fs_top()
232 def _lresolve(self, parts):
233 #debug2('_lresolve %r in %r\n' % (parts, self.name))
236 (first, rest) = (parts[0], parts[1:])
238 return self._lresolve(rest)
241 raise NoSuchFile("no parent dir for %r" % self.name)
242 return self.parent._lresolve(rest)
244 return self.sub(first)._lresolve(rest)
246 return self.sub(first)
248 def lresolve(self, path, stay_inside_fs=False):
249 """Walk into a given sub-path of this node.
251 If the last element is a symlink, leave it as a symlink, don't resolve
257 if path.startswith('/'):
259 start = self.fs_top()
263 parts = re.split(r'/+', path or '.')
266 #debug2('parts: %r %r\n' % (path, parts))
267 return start._lresolve(parts)
269 def resolve(self, path = ''):
270 """Like lresolve(), and dereference it if it was a symlink."""
271 return self.lresolve(path).lresolve('.')
273 def try_resolve(self, path = ''):
274 """Like resolve(), but don't worry if a symlink uses an invalid path.
276 Returns an error if any intermediate nodes were invalid.
278 n = self.lresolve(path)
286 """Get the number of hard links to the current node."""
290 """Get the size of the current node."""
294 """Open the current node. It is an error to open a non-file node."""
295 raise NotFile('%s is not a regular file' % self.name)
297 def _populate_metadata(self, force=False):
298 # Only Dirs contain .bupm files, so by default, do nothing.
302 """Return this Node's Metadata() object, if any."""
303 if not self._metadata and self.parent:
304 self.parent._populate_metadata(force=True)
305 return self._metadata
308 """Release resources that can be automatically restored (at a cost)."""
309 self._metadata = None
314 """A normal file from bup's repository."""
315 def __init__(self, parent, name, mode, hash, bupmode, repo_dir=None):
316 Node.__init__(self, parent, name, mode, hash, repo_dir)
317 self.bupmode = bupmode
318 self._cached_size = None
319 self._filereader = None
323 # You'd think FUSE might call this only once each time a file is
324 # opened, but no; it's really more of a refcount, and it's called
325 # once per read(). Thus, it's important to cache the filereader
326 # object here so we're not constantly re-seeking.
327 if not self._filereader:
328 self._filereader = _FileReader(self.hash, self.size(),
329 self.bupmode == git.BUP_CHUNKED,
330 repo_dir = self._repo_dir)
331 self._filereader.seek(0)
332 return self._filereader
335 """Get this file's size."""
336 if self._cached_size == None:
337 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
338 if self.bupmode == git.BUP_CHUNKED:
339 self._cached_size = _total_size(self.hash,
340 repo_dir = self._repo_dir)
342 self._cached_size = _chunk_len(self.hash,
343 repo_dir = self._repo_dir)
344 debug1('<<<<File.size() done.\n')
345 return self._cached_size
350 """A symbolic link from bup's repository."""
351 def __init__(self, parent, name, hash, bupmode, repo_dir=None):
352 File.__init__(self, parent, name, 0120000, hash, bupmode,
356 """Get the file size of the file at which this link points."""
357 return len(self.readlink())
360 """Get the path that this link points at."""
361 return ''.join(cp(self._repo_dir).join(self.hash.encode('hex')))
363 def dereference(self):
364 """Get the node that this link points at.
366 If the path is invalid, raise a NoSuchFile exception. If the level of
367 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
372 raise TooManySymlinks('too many levels of symlinks: %r'
377 return self.parent.lresolve(self.readlink(),
380 raise NoSuchFile("%s: broken symlink to %r"
381 % (self.fullname(), self.readlink()))
385 def _lresolve(self, parts):
386 return self.dereference()._lresolve(parts)
389 class FakeSymlink(Symlink):
390 """A symlink that is not stored in the bup repository."""
391 def __init__(self, parent, name, toname, repo_dir=None):
392 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL,
397 """Get the path that this link points at."""
402 """A directory stored inside of bup's repository."""
404 def __init__(self, *args, **kwargs):
405 Node.__init__(self, *args, **kwargs)
408 def _populate_metadata(self, force=False):
409 if self._metadata and not force:
415 meta_stream = self._bupm.open()
416 dir_meta = metadata.Metadata.read(meta_stream)
418 if not stat.S_ISDIR(sub.mode):
419 sub._metadata = metadata.Metadata.read(meta_stream)
420 self._metadata = dir_meta
424 it = cp(self._repo_dir).get(self.hash.encode('hex'))
428 it = cp(self._repo_dir).get(self.hash.encode('hex') + ':')
430 assert(type == 'tree')
431 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
432 if mangled_name == '.bupm':
433 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
434 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
438 (name,bupmode) = git.demangle_name(mangled_name)
439 if bupmode == git.BUP_CHUNKED:
441 if stat.S_ISDIR(mode):
442 self._subs[name] = Dir(self, name, mode, sha, self._repo_dir)
443 elif stat.S_ISLNK(mode):
444 self._subs[name] = Symlink(self, name, sha, bupmode,
447 self._subs[name] = File(self, name, mode, sha, bupmode,
451 """Return this Dir's Metadata() object, if any."""
452 self._populate_metadata()
453 return self._metadata
455 def metadata_file(self):
456 """Return this Dir's .bupm File, if any."""
462 """Release restorable resources held by this node."""
464 super(Dir, self).release()
467 class CommitDir(Node):
468 """A directory that contains all commits that are reachable by a ref.
470 Contains a set of subdirectories named after the commits' first byte in
471 hexadecimal. Each of those directories contain all commits with hashes that
472 start the same as the directory name. The name used for those
473 subdirectories is the hash of the commit without the first byte. This
474 separation helps us avoid having too much directories on the same level as
475 the number of commits grows big.
477 def __init__(self, parent, name, repo_dir=None):
478 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
482 refs = git.list_refs(repo_dir = self._repo_dir)
484 #debug2('ref name: %s\n' % ref[0])
485 revs = git.rev_list(ref[1].encode('hex'), repo_dir = self._repo_dir)
486 for (date, commit) in revs:
487 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
488 commithex = commit.encode('hex')
489 containername = commithex[:2]
490 dirname = commithex[2:]
491 n1 = self._subs.get(containername)
493 n1 = CommitList(self, containername, self._repo_dir)
494 self._subs[containername] = n1
496 if n1.commits.get(dirname):
497 # Stop work for this ref, the rest should already be present
500 n1.commits[dirname] = (commit, date)
503 class CommitList(Node):
504 """A list of commits with hashes that start with the current node's name."""
505 def __init__(self, parent, name, repo_dir=None):
506 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
511 for (name, (hash, date)) in self.commits.items():
512 n1 = Dir(self, name, GIT_MODE_TREE, hash, self._repo_dir)
513 n1.ctime = n1.mtime = date
514 self._subs[name] = n1
518 """A directory that contains all tags in the repository."""
519 def __init__(self, parent, name, repo_dir = None):
520 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA, repo_dir)
524 for (name, sha) in git.list_refs(repo_dir = self._repo_dir):
525 if name.startswith('refs/tags/'):
527 date = git.get_commit_dates([sha.encode('hex')],
528 repo_dir=self._repo_dir)[0]
529 commithex = sha.encode('hex')
530 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
531 tag1 = FakeSymlink(self, name, target, repo_dir, self._repo_dir)
532 tag1.ctime = tag1.mtime = date
533 self._subs[name] = tag1
536 class BranchList(Node):
537 """A list of links to commits reachable by a branch in bup's repository.
539 Represents each commit as a symlink that points to the commit directory in
540 /.commit/??/ . The symlink is named after the commit date.
542 def __init__(self, parent, name, hash, repo_dir=None):
543 Node.__init__(self, parent, name, GIT_MODE_TREE, hash, repo_dir)
548 tags = git.tags(repo_dir = self._repo_dir)
550 revs = list(git.rev_list(self.hash.encode('hex'),
551 repo_dir=self._repo_dir))
553 for (date, commit) in revs:
554 l = time.localtime(date)
555 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
556 commithex = commit.encode('hex')
557 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
558 n1 = FakeSymlink(self, ls, target, self._repo_dir)
559 n1.ctime = n1.mtime = date
562 for tag in tags.get(commit, []):
563 t1 = FakeSymlink(self, tag, target, self._repo_dir)
564 t1.ctime = t1.mtime = date
567 (date, commit) = latest
568 commithex = commit.encode('hex')
569 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
570 n1 = FakeSymlink(self, 'latest', target, self._repo_dir)
571 n1.ctime = n1.mtime = date
572 self._subs['latest'] = n1
576 """A list of branches in bup's repository.
578 The sub-nodes of the ref list are a series of CommitList for each commit
579 hash pointed to by a branch.
581 Also, a special sub-node named '.commit' contains all commit directories
582 that are reachable via a ref (e.g. a branch). See CommitDir for details.
584 def __init__(self, parent, repo_dir=None):
585 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA, repo_dir)
590 commit_dir = CommitDir(self, '.commit', self._repo_dir)
591 self._subs['.commit'] = commit_dir
593 tag_dir = TagDir(self, '.tag', self._repo_dir)
594 self._subs['.tag'] = tag_dir
596 refs_info = [(name[11:], sha) for (name,sha)
597 in git.list_refs(repo_dir=self._repo_dir)
598 if name.startswith('refs/heads/')]
599 dates = git.get_commit_dates([sha.encode('hex')
600 for (name, sha) in refs_info],
601 repo_dir=self._repo_dir)
602 for (name, sha), date in zip(refs_info, dates):
603 n1 = BranchList(self, name, sha, self._repo_dir)
604 n1.ctime = n1.mtime = date
605 self._subs[name] = n1