1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.git import BUP_NORMAL, BUP_CHUNKED, cp
10 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 class NodeError(Exception):
16 """VFS base exception."""
19 class NoSuchFile(NodeError):
20 """Request of a file that does not exist."""
23 class NotDir(NodeError):
24 """Attempt to do a directory action on a file that is not one."""
27 class NotFile(NodeError):
28 """Access to a node that does not represent a file."""
31 class TooManySymlinks(NodeError):
32 """Symlink dereferencing level is too deep."""
37 it = cp().get(hash.encode('hex'))
39 assert(type == 'tree')
40 return git.tree_decode(''.join(it))
43 def _tree_decode(hash):
44 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
47 assert(tree == list(sorted(tree)))
52 return sum(len(b) for b in cp().join(hash.encode('hex')))
55 def _last_chunk_info(hash):
56 tree = _tree_decode(hash)
58 (ofs,isdir,sha) = tree[-1]
60 (subofs, sublen) = _last_chunk_info(sha)
61 return (ofs+subofs, sublen)
63 return (ofs, _chunk_len(sha))
66 def _total_size(hash):
67 (lastofs, lastsize) = _last_chunk_info(hash)
68 return lastofs + lastsize
71 def _chunkiter(hash, startofs):
73 tree = _tree_decode(hash)
75 # skip elements before startofs
76 for i in xrange(len(tree)):
77 if i+1 >= len(tree) or tree[i+1][0] > startofs:
81 # iterate through what's left
82 for i in xrange(first, len(tree)):
83 (ofs,isdir,sha) = tree[i]
84 skipmore = startofs-ofs
88 for b in _chunkiter(sha, skipmore):
91 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
95 def __init__(self, hash, isdir, startofs):
97 self.it = _chunkiter(hash, startofs)
101 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
104 def next(self, size):
106 while len(out) < size:
107 if self.it and not self.blob:
109 self.blob = self.it.next()
110 except StopIteration:
113 want = size - len(out)
114 out += self.blob[:want]
115 self.blob = self.blob[want:]
118 debug2('next(%d) returned %d\n' % (size, len(out)))
123 class _FileReader(object):
124 def __init__(self, hash, size, isdir):
142 def read(self, count = -1):
144 count = self.size - self.ofs
145 if not self.reader or self.reader.ofs != self.ofs:
146 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
148 buf = self.reader.next(count)
151 raise # our offsets will be all screwed up otherwise
160 """Base class for file representation."""
161 def __init__(self, parent, name, mode, hash):
166 self.ctime = self.mtime = self.atime = 0
168 self._metadata = None
171 return "<%s object at %s - name:%r hash:%s parent:%r>" \
172 % (self.__class__, hex(id(self)),
173 self.name, self.hash.encode('hex'),
174 self.parent.name if self.parent else None)
179 return (cmp(a and a.parent, b and b.parent) or
180 cmp(a and a.name, b and b.name))
183 return iter(self.subs())
185 def fullname(self, stop_at=None):
186 """Get this file's full path."""
187 assert(self != stop_at) # would be the empty string; too weird
188 if self.parent and self.parent != stop_at:
189 return os.path.join(self.parent.fullname(stop_at=stop_at),
198 """Get a list of nodes that are contained in this node."""
199 if self._subs == None:
201 return sorted(self._subs.values())
204 """Get node named 'name' that is contained in this node."""
205 if self._subs == None:
207 ret = self._subs.get(name)
209 raise NoSuchFile("no file %r in %r" % (name, self.name))
213 """Return the very top node of the tree."""
215 return self.parent.top()
220 """Return the top node of the particular backup set.
222 If this node isn't inside a backup set, return the root level.
224 if self.parent and not isinstance(self.parent, CommitList):
225 return self.parent.fs_top()
229 def _lresolve(self, parts):
230 #debug2('_lresolve %r in %r\n' % (parts, self.name))
233 (first, rest) = (parts[0], parts[1:])
235 return self._lresolve(rest)
238 raise NoSuchFile("no parent dir for %r" % self.name)
239 return self.parent._lresolve(rest)
241 return self.sub(first)._lresolve(rest)
243 return self.sub(first)
245 def lresolve(self, path, stay_inside_fs=False):
246 """Walk into a given sub-path of this node.
248 If the last element is a symlink, leave it as a symlink, don't resolve
254 if path.startswith('/'):
256 start = self.fs_top()
260 parts = re.split(r'/+', path or '.')
263 #debug2('parts: %r %r\n' % (path, parts))
264 return start._lresolve(parts)
266 def resolve(self, path = ''):
267 """Like lresolve(), and dereference it if it was a symlink."""
268 return self.lresolve(path).lresolve('.')
270 def try_resolve(self, path = ''):
271 """Like resolve(), but don't worry if a symlink uses an invalid path.
273 Returns an error if any intermediate nodes were invalid.
275 n = self.lresolve(path)
283 """Get the number of hard links to the current node."""
284 if self._subs == None:
289 """Get the size of the current node."""
293 """Open the current node. It is an error to open a non-file node."""
294 raise NotFile('%s is not a regular file' % self.name)
296 def _populate_metadata(self, force=False):
297 # Only Dirs contain .bupm files, so by default, do nothing.
301 """Return this Node's Metadata() object, if any."""
302 if not self._metadata and self.parent:
303 self.parent._populate_metadata(force=True)
304 return self._metadata
307 """Release resources that can be automatically restored (at a cost)."""
308 self._metadata = None
313 """A normal file from bup's repository."""
314 def __init__(self, parent, name, mode, hash, bupmode):
315 Node.__init__(self, parent, name, mode, hash)
316 self.bupmode = bupmode
317 self._cached_size = None
318 self._filereader = None
322 # You'd think FUSE might call this only once each time a file is
323 # opened, but no; it's really more of a refcount, and it's called
324 # once per read(). Thus, it's important to cache the filereader
325 # object here so we're not constantly re-seeking.
326 if not self._filereader:
327 self._filereader = _FileReader(self.hash, self.size(),
328 self.bupmode == git.BUP_CHUNKED)
329 self._filereader.seek(0)
330 return self._filereader
333 """Get this file's size."""
334 if self._cached_size == None:
335 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
336 if self.bupmode == git.BUP_CHUNKED:
337 self._cached_size = _total_size(self.hash)
339 self._cached_size = _chunk_len(self.hash)
340 debug1('<<<<File.size() done.\n')
341 return self._cached_size
346 """A symbolic link from bup's repository."""
347 def __init__(self, parent, name, hash, bupmode):
348 File.__init__(self, parent, name, 0120000, hash, bupmode)
351 """Get the file size of the file at which this link points."""
352 return len(self.readlink())
355 """Get the path that this link points at."""
356 return ''.join(cp().join(self.hash.encode('hex')))
358 def dereference(self):
359 """Get the node that this link points at.
361 If the path is invalid, raise a NoSuchFile exception. If the level of
362 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
367 raise TooManySymlinks('too many levels of symlinks: %r'
372 return self.parent.lresolve(self.readlink(),
375 raise NoSuchFile("%s: broken symlink to %r"
376 % (self.fullname(), self.readlink()))
380 def _lresolve(self, parts):
381 return self.dereference()._lresolve(parts)
384 class FakeSymlink(Symlink):
385 """A symlink that is not stored in the bup repository."""
386 def __init__(self, parent, name, toname):
387 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
391 """Get the path that this link points at."""
396 """A directory stored inside of bup's repository."""
398 def __init__(self, *args, **kwargs):
399 Node.__init__(self, *args, **kwargs)
402 def _populate_metadata(self, force=False):
403 if self._metadata and not force:
409 meta_stream = self._bupm.open()
410 dir_meta = metadata.Metadata.read(meta_stream)
412 if not stat.S_ISDIR(sub.mode):
413 sub._metadata = metadata.Metadata.read(meta_stream)
414 self._metadata = dir_meta
418 it = cp().get(self.hash.encode('hex'))
422 it = cp().get(self.hash.encode('hex') + ':')
424 assert(type == 'tree')
425 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
426 if mangled_name == '.bupm':
427 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
428 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
432 (name,bupmode) = git.demangle_name(mangled_name)
433 if bupmode == git.BUP_CHUNKED:
435 if stat.S_ISDIR(mode):
436 self._subs[name] = Dir(self, name, mode, sha)
437 elif stat.S_ISLNK(mode):
438 self._subs[name] = Symlink(self, name, sha, bupmode)
440 self._subs[name] = File(self, name, mode, sha, bupmode)
443 """Return this Dir's Metadata() object, if any."""
444 self._populate_metadata()
445 return self._metadata
447 def metadata_file(self):
448 """Return this Dir's .bupm File, if any."""
454 """Release restorable resources held by this node."""
456 super(Dir, self).release()
459 class CommitDir(Node):
460 """A directory that contains all commits that are reachable by a ref.
462 Contains a set of subdirectories named after the commits' first byte in
463 hexadecimal. Each of those directories contain all commits with hashes that
464 start the same as the directory name. The name used for those
465 subdirectories is the hash of the commit without the first byte. This
466 separation helps us avoid having too much directories on the same level as
467 the number of commits grows big.
469 def __init__(self, parent, name):
470 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
474 refs = git.list_refs()
476 #debug2('ref name: %s\n' % ref[0])
477 revs = git.rev_list(ref[1].encode('hex'))
478 for (date, commit) in revs:
479 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
480 commithex = commit.encode('hex')
481 containername = commithex[:2]
482 dirname = commithex[2:]
483 n1 = self._subs.get(containername)
485 n1 = CommitList(self, containername)
486 self._subs[containername] = n1
488 if n1.commits.get(dirname):
489 # Stop work for this ref, the rest should already be present
492 n1.commits[dirname] = (commit, date)
495 class CommitList(Node):
496 """A list of commits with hashes that start with the current node's name."""
497 def __init__(self, parent, name):
498 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
503 for (name, (hash, date)) in self.commits.items():
504 n1 = Dir(self, name, GIT_MODE_TREE, hash)
505 n1.ctime = n1.mtime = date
506 self._subs[name] = n1
510 """A directory that contains all tags in the repository."""
511 def __init__(self, parent, name):
512 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
516 for (name, sha) in git.list_refs():
517 if name.startswith('refs/tags/'):
519 date = git.get_commit_dates([sha.encode('hex')])[0]
520 commithex = sha.encode('hex')
521 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
522 tag1 = FakeSymlink(self, name, target)
523 tag1.ctime = tag1.mtime = date
524 self._subs[name] = tag1
527 class BranchList(Node):
528 """A list of links to commits reachable by a branch in bup's repository.
530 Represents each commit as a symlink that points to the commit directory in
531 /.commit/??/ . The symlink is named after the commit date.
533 def __init__(self, parent, name, hash):
534 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
541 revs = list(git.rev_list(self.hash.encode('hex')))
543 for (date, commit) in revs:
544 l = time.localtime(date)
545 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
546 commithex = commit.encode('hex')
547 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
548 n1 = FakeSymlink(self, ls, target)
549 n1.ctime = n1.mtime = date
552 for tag in tags.get(commit, []):
553 t1 = FakeSymlink(self, tag, target)
554 t1.ctime = t1.mtime = date
557 (date, commit) = latest
558 commithex = commit.encode('hex')
559 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
560 n1 = FakeSymlink(self, 'latest', target)
561 n1.ctime = n1.mtime = date
562 self._subs['latest'] = n1
566 """A list of branches in bup's repository.
568 The sub-nodes of the ref list are a series of CommitList for each commit
569 hash pointed to by a branch.
571 Also, a special sub-node named '.commit' contains all commit directories
572 that are reachable via a ref (e.g. a branch). See CommitDir for details.
574 def __init__(self, parent):
575 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
580 commit_dir = CommitDir(self, '.commit')
581 self._subs['.commit'] = commit_dir
583 tag_dir = TagDir(self, '.tag')
584 self._subs['.tag'] = tag_dir
586 refs_info = [(name[11:], sha) for (name,sha) in git.list_refs() \
587 if name.startswith('refs/heads/')]
589 dates = git.get_commit_dates([sha.encode('hex')
590 for (name, sha) in refs_info])
592 for (name, sha), date in zip(refs_info, dates):
593 n1 = BranchList(self, name, sha)
594 n1.ctime = n1.mtime = date
595 self._subs[name] = n1