1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.git import BUP_NORMAL, BUP_CHUNKED, cp
10 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 class NodeError(Exception):
16 """VFS base exception."""
19 class NoSuchFile(NodeError):
20 """Request of a file that does not exist."""
23 class NotDir(NodeError):
24 """Attempt to do a directory action on a file that is not one."""
27 class NotFile(NodeError):
28 """Access to a node that does not represent a file."""
31 class TooManySymlinks(NodeError):
32 """Symlink dereferencing level is too deep."""
37 it = cp().get(hash.encode('hex'))
39 assert(type == 'tree')
40 return git.tree_decode(''.join(it))
43 def _tree_decode(hash):
44 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
47 assert(tree == list(sorted(tree)))
52 return sum(len(b) for b in cp().join(hash.encode('hex')))
55 def _last_chunk_info(hash):
56 tree = _tree_decode(hash)
58 (ofs,isdir,sha) = tree[-1]
60 (subofs, sublen) = _last_chunk_info(sha)
61 return (ofs+subofs, sublen)
63 return (ofs, _chunk_len(sha))
66 def _total_size(hash):
67 (lastofs, lastsize) = _last_chunk_info(hash)
68 return lastofs + lastsize
71 def _chunkiter(hash, startofs):
73 tree = _tree_decode(hash)
75 # skip elements before startofs
76 for i in xrange(len(tree)):
77 if i+1 >= len(tree) or tree[i+1][0] > startofs:
81 # iterate through what's left
82 for i in xrange(first, len(tree)):
83 (ofs,isdir,sha) = tree[i]
84 skipmore = startofs-ofs
88 for b in _chunkiter(sha, skipmore):
91 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
95 def __init__(self, hash, isdir, startofs):
97 self.it = _chunkiter(hash, startofs)
101 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
104 def next(self, size):
106 while len(out) < size:
107 if self.it and not self.blob:
109 self.blob = self.it.next()
110 except StopIteration:
113 want = size - len(out)
114 out += self.blob[:want]
115 self.blob = self.blob[want:]
118 debug2('next(%d) returned %d\n' % (size, len(out)))
123 class _FileReader(object):
124 def __init__(self, hash, size, isdir):
142 def read(self, count = -1):
144 count = self.size - self.ofs
145 if not self.reader or self.reader.ofs != self.ofs:
146 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
148 buf = self.reader.next(count)
151 raise # our offsets will be all screwed up otherwise
160 """Base class for file representation."""
161 def __init__(self, parent, name, mode, hash):
166 self.ctime = self.mtime = self.atime = 0
168 self._metadata = None
171 return "<%s object at %s - name:%r hash:%s parent:%r>" \
172 % (self.__class__, hex(id(self)),
173 self.name, self.hash.encode('hex'),
174 self.parent.name if self.parent else None)
179 return (cmp(a and a.parent, b and b.parent) or
180 cmp(a and a.name, b and b.name))
183 return iter(self.subs())
185 def fullname(self, stop_at=None):
186 """Get this file's full path."""
187 assert(self != stop_at) # would be the empty string; too weird
188 if self.parent and self.parent != stop_at:
189 return os.path.join(self.parent.fullname(stop_at=stop_at),
198 """Get a list of nodes that are contained in this node."""
199 if self._subs == None:
201 return sorted(self._subs.values())
204 """Get node named 'name' that is contained in this node."""
205 if self._subs == None:
207 ret = self._subs.get(name)
209 raise NoSuchFile("no file %r in %r" % (name, self.name))
213 """Return the very top node of the tree."""
215 return self.parent.top()
220 """Return the top node of the particular backup set.
222 If this node isn't inside a backup set, return the root level.
224 if self.parent and not isinstance(self.parent, CommitList):
225 return self.parent.fs_top()
229 def _lresolve(self, parts):
230 #debug2('_lresolve %r in %r\n' % (parts, self.name))
233 (first, rest) = (parts[0], parts[1:])
235 return self._lresolve(rest)
238 raise NoSuchFile("no parent dir for %r" % self.name)
239 return self.parent._lresolve(rest)
241 return self.sub(first)._lresolve(rest)
243 return self.sub(first)
245 def lresolve(self, path, stay_inside_fs=False):
246 """Walk into a given sub-path of this node.
248 If the last element is a symlink, leave it as a symlink, don't resolve
254 if path.startswith('/'):
256 start = self.fs_top()
260 parts = re.split(r'/+', path or '.')
263 #debug2('parts: %r %r\n' % (path, parts))
264 return start._lresolve(parts)
266 def resolve(self, path = ''):
267 """Like lresolve(), and dereference it if it was a symlink."""
268 return self.lresolve(path).lresolve('.')
270 def try_resolve(self, path = ''):
271 """Like resolve(), but don't worry if a symlink uses an invalid path.
273 Returns an error if any intermediate nodes were invalid.
275 n = self.lresolve(path)
283 """Get the number of hard links to the current node."""
287 """Get the size of the current node."""
291 """Open the current node. It is an error to open a non-file node."""
292 raise NotFile('%s is not a regular file' % self.name)
294 def _populate_metadata(self, force=False):
295 # Only Dirs contain .bupm files, so by default, do nothing.
299 """Return this Node's Metadata() object, if any."""
300 if not self._metadata and self.parent:
301 self.parent._populate_metadata(force=True)
302 return self._metadata
305 """Release resources that can be automatically restored (at a cost)."""
306 self._metadata = None
311 """A normal file from bup's repository."""
312 def __init__(self, parent, name, mode, hash, bupmode):
313 Node.__init__(self, parent, name, mode, hash)
314 self.bupmode = bupmode
315 self._cached_size = None
316 self._filereader = None
320 # You'd think FUSE might call this only once each time a file is
321 # opened, but no; it's really more of a refcount, and it's called
322 # once per read(). Thus, it's important to cache the filereader
323 # object here so we're not constantly re-seeking.
324 if not self._filereader:
325 self._filereader = _FileReader(self.hash, self.size(),
326 self.bupmode == git.BUP_CHUNKED)
327 self._filereader.seek(0)
328 return self._filereader
331 """Get this file's size."""
332 if self._cached_size == None:
333 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
334 if self.bupmode == git.BUP_CHUNKED:
335 self._cached_size = _total_size(self.hash)
337 self._cached_size = _chunk_len(self.hash)
338 debug1('<<<<File.size() done.\n')
339 return self._cached_size
344 """A symbolic link from bup's repository."""
345 def __init__(self, parent, name, hash, bupmode):
346 File.__init__(self, parent, name, 0120000, hash, bupmode)
349 """Get the file size of the file at which this link points."""
350 return len(self.readlink())
353 """Get the path that this link points at."""
354 return ''.join(cp().join(self.hash.encode('hex')))
356 def dereference(self):
357 """Get the node that this link points at.
359 If the path is invalid, raise a NoSuchFile exception. If the level of
360 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
365 raise TooManySymlinks('too many levels of symlinks: %r'
370 return self.parent.lresolve(self.readlink(),
373 raise NoSuchFile("%s: broken symlink to %r"
374 % (self.fullname(), self.readlink()))
378 def _lresolve(self, parts):
379 return self.dereference()._lresolve(parts)
382 class FakeSymlink(Symlink):
383 """A symlink that is not stored in the bup repository."""
384 def __init__(self, parent, name, toname):
385 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
389 """Get the path that this link points at."""
394 """A directory stored inside of bup's repository."""
396 def __init__(self, *args, **kwargs):
397 Node.__init__(self, *args, **kwargs)
400 def _populate_metadata(self, force=False):
401 if self._metadata and not force:
407 meta_stream = self._bupm.open()
408 dir_meta = metadata.Metadata.read(meta_stream)
410 if not stat.S_ISDIR(sub.mode):
411 sub._metadata = metadata.Metadata.read(meta_stream)
412 self._metadata = dir_meta
416 it = cp().get(self.hash.encode('hex'))
420 it = cp().get(self.hash.encode('hex') + ':')
422 assert(type == 'tree')
423 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
424 if mangled_name == '.bupm':
425 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
426 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
430 (name,bupmode) = git.demangle_name(mangled_name)
431 if bupmode == git.BUP_CHUNKED:
433 if stat.S_ISDIR(mode):
434 self._subs[name] = Dir(self, name, mode, sha)
435 elif stat.S_ISLNK(mode):
436 self._subs[name] = Symlink(self, name, sha, bupmode)
438 self._subs[name] = File(self, name, mode, sha, bupmode)
441 """Return this Dir's Metadata() object, if any."""
442 self._populate_metadata()
443 return self._metadata
445 def metadata_file(self):
446 """Return this Dir's .bupm File, if any."""
452 """Release restorable resources held by this node."""
454 super(Dir, self).release()
457 class CommitDir(Node):
458 """A directory that contains all commits that are reachable by a ref.
460 Contains a set of subdirectories named after the commits' first byte in
461 hexadecimal. Each of those directories contain all commits with hashes that
462 start the same as the directory name. The name used for those
463 subdirectories is the hash of the commit without the first byte. This
464 separation helps us avoid having too much directories on the same level as
465 the number of commits grows big.
467 def __init__(self, parent, name):
468 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
472 refs = git.list_refs()
474 #debug2('ref name: %s\n' % ref[0])
475 revs = git.rev_list(ref[1].encode('hex'))
476 for (date, commit) in revs:
477 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
478 commithex = commit.encode('hex')
479 containername = commithex[:2]
480 dirname = commithex[2:]
481 n1 = self._subs.get(containername)
483 n1 = CommitList(self, containername)
484 self._subs[containername] = n1
486 if n1.commits.get(dirname):
487 # Stop work for this ref, the rest should already be present
490 n1.commits[dirname] = (commit, date)
493 class CommitList(Node):
494 """A list of commits with hashes that start with the current node's name."""
495 def __init__(self, parent, name):
496 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
501 for (name, (hash, date)) in self.commits.items():
502 n1 = Dir(self, name, GIT_MODE_TREE, hash)
503 n1.ctime = n1.mtime = date
504 self._subs[name] = n1
508 """A directory that contains all tags in the repository."""
509 def __init__(self, parent, name):
510 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
514 for (name, sha) in git.list_refs():
515 if name.startswith('refs/tags/'):
517 date = git.get_commit_dates([sha.encode('hex')])[0]
518 commithex = sha.encode('hex')
519 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
520 tag1 = FakeSymlink(self, name, target)
521 tag1.ctime = tag1.mtime = date
522 self._subs[name] = tag1
525 class BranchList(Node):
526 """A list of links to commits reachable by a branch in bup's repository.
528 Represents each commit as a symlink that points to the commit directory in
529 /.commit/??/ . The symlink is named after the commit date.
531 def __init__(self, parent, name, hash):
532 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
539 revs = list(git.rev_list(self.hash.encode('hex')))
541 for (date, commit) in revs:
542 l = time.localtime(date)
543 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
544 commithex = commit.encode('hex')
545 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
546 n1 = FakeSymlink(self, ls, target)
547 n1.ctime = n1.mtime = date
550 for tag in tags.get(commit, []):
551 t1 = FakeSymlink(self, tag, target)
552 t1.ctime = t1.mtime = date
555 (date, commit) = latest
556 commithex = commit.encode('hex')
557 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
558 n1 = FakeSymlink(self, 'latest', target)
559 n1.ctime = n1.mtime = date
560 self._subs['latest'] = n1
564 """A list of branches in bup's repository.
566 The sub-nodes of the ref list are a series of CommitList for each commit
567 hash pointed to by a branch.
569 Also, a special sub-node named '.commit' contains all commit directories
570 that are reachable via a ref (e.g. a branch). See CommitDir for details.
572 def __init__(self, parent):
573 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
578 commit_dir = CommitDir(self, '.commit')
579 self._subs['.commit'] = commit_dir
581 tag_dir = TagDir(self, '.tag')
582 self._subs['.tag'] = tag_dir
584 refs_info = [(name[11:], sha) for (name,sha) in git.list_refs() \
585 if name.startswith('refs/heads/')]
587 dates = git.get_commit_dates([sha.encode('hex')
588 for (name, sha) in refs_info])
590 for (name, sha), date in zip(refs_info, dates):
591 n1 = BranchList(self, name, sha)
592 n1.ctime = n1.mtime = date
593 self._subs[name] = n1