1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
9 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 """Create a git.CatPipe object or reuse the already existing one."""
21 class NodeError(Exception):
22 """VFS base exception."""
25 class NoSuchFile(NodeError):
26 """Request of a file that does not exist."""
29 class NotDir(NodeError):
30 """Attempt to do a directory action on a file that is not one."""
33 class NotFile(NodeError):
34 """Access to a node that does not represent a file."""
37 class TooManySymlinks(NodeError):
38 """Symlink dereferencing level is too deep."""
43 it = cp().get(hash.encode('hex'))
45 assert(type == 'tree')
46 return git.tree_decode(''.join(it))
49 def _tree_decode(hash):
50 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
53 assert(tree == list(sorted(tree)))
58 return sum(len(b) for b in cp().join(hash.encode('hex')))
61 def _last_chunk_info(hash):
62 tree = _tree_decode(hash)
64 (ofs,isdir,sha) = tree[-1]
66 (subofs, sublen) = _last_chunk_info(sha)
67 return (ofs+subofs, sublen)
69 return (ofs, _chunk_len(sha))
72 def _total_size(hash):
73 (lastofs, lastsize) = _last_chunk_info(hash)
74 return lastofs + lastsize
77 def _chunkiter(hash, startofs):
79 tree = _tree_decode(hash)
81 # skip elements before startofs
82 for i in xrange(len(tree)):
83 if i+1 >= len(tree) or tree[i+1][0] > startofs:
87 # iterate through what's left
88 for i in xrange(first, len(tree)):
89 (ofs,isdir,sha) = tree[i]
90 skipmore = startofs-ofs
94 for b in _chunkiter(sha, skipmore):
97 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
101 def __init__(self, hash, isdir, startofs):
103 self.it = _chunkiter(hash, startofs)
107 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
110 def next(self, size):
112 while len(out) < size:
113 if self.it and not self.blob:
115 self.blob = self.it.next()
116 except StopIteration:
119 want = size - len(out)
120 out += self.blob[:want]
121 self.blob = self.blob[want:]
124 debug2('next(%d) returned %d\n' % (size, len(out)))
129 class _FileReader(object):
130 def __init__(self, hash, size, isdir):
148 def read(self, count = -1):
150 count = self.size - self.ofs
151 if not self.reader or self.reader.ofs != self.ofs:
152 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
154 buf = self.reader.next(count)
157 raise # our offsets will be all screwed up otherwise
166 """Base class for file representation."""
167 def __init__(self, parent, name, mode, hash):
172 self.ctime = self.mtime = self.atime = 0
178 return (cmp(a and a.parent, b and b.parent) or
179 cmp(a and a.name, b and b.name))
182 return iter(self.subs())
184 def fullname(self, stop_at=None):
185 """Get this file's full path."""
186 assert(self != stop_at) # would be the empty string; too weird
187 if self.parent and self.parent != stop_at:
188 return os.path.join(self.parent.fullname(stop_at=stop_at),
197 """Get a list of nodes that are contained in this node."""
198 if self._subs == None:
200 return sorted(self._subs.values())
203 """Get node named 'name' that is contained in this node."""
204 if self._subs == None:
206 ret = self._subs.get(name)
208 raise NoSuchFile("no file %r in %r" % (name, self.name))
212 """Return the very top node of the tree."""
214 return self.parent.top()
219 """Return the top node of the particular backup set.
221 If this node isn't inside a backup set, return the root level.
223 if self.parent and not isinstance(self.parent, CommitList):
224 return self.parent.fs_top()
228 def _lresolve(self, parts):
229 #debug2('_lresolve %r in %r\n' % (parts, self.name))
232 (first, rest) = (parts[0], parts[1:])
234 return self._lresolve(rest)
237 raise NoSuchFile("no parent dir for %r" % self.name)
238 return self.parent._lresolve(rest)
240 return self.sub(first)._lresolve(rest)
242 return self.sub(first)
244 def lresolve(self, path, stay_inside_fs=False):
245 """Walk into a given sub-path of this node.
247 If the last element is a symlink, leave it as a symlink, don't resolve
253 if path.startswith('/'):
255 start = self.fs_top()
259 parts = re.split(r'/+', path or '.')
262 #debug2('parts: %r %r\n' % (path, parts))
263 return start._lresolve(parts)
265 def resolve(self, path = ''):
266 """Like lresolve(), and dereference it if it was a symlink."""
267 return self.lresolve(path).lresolve('.')
269 def try_resolve(self, path = ''):
270 """Like resolve(), but don't worry if a symlink uses an invalid path.
272 Returns an error if any intermediate nodes were invalid.
274 n = self.lresolve(path)
282 """Get the number of hard links to the current node."""
283 if self._subs == None:
288 """Get the size of the current node."""
292 """Open the current node. It is an error to open a non-file node."""
293 raise NotFile('%s is not a regular file' % self.name)
297 """A normal file from bup's repository."""
298 def __init__(self, parent, name, mode, hash, bupmode):
299 Node.__init__(self, parent, name, mode, hash)
300 self.bupmode = bupmode
301 self._cached_size = None
302 self._filereader = None
306 # You'd think FUSE might call this only once each time a file is
307 # opened, but no; it's really more of a refcount, and it's called
308 # once per read(). Thus, it's important to cache the filereader
309 # object here so we're not constantly re-seeking.
310 if not self._filereader:
311 self._filereader = _FileReader(self.hash, self.size(),
312 self.bupmode == git.BUP_CHUNKED)
313 self._filereader.seek(0)
314 return self._filereader
317 """Get this file's size."""
318 if self._cached_size == None:
319 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
320 if self.bupmode == git.BUP_CHUNKED:
321 self._cached_size = _total_size(self.hash)
323 self._cached_size = _chunk_len(self.hash)
324 debug1('<<<<File.size() done.\n')
325 return self._cached_size
330 """A symbolic link from bup's repository."""
331 def __init__(self, parent, name, hash, bupmode):
332 File.__init__(self, parent, name, 0120000, hash, bupmode)
335 """Get the file size of the file at which this link points."""
336 return len(self.readlink())
339 """Get the path that this link points at."""
340 return ''.join(cp().join(self.hash.encode('hex')))
342 def dereference(self):
343 """Get the node that this link points at.
345 If the path is invalid, raise a NoSuchFile exception. If the level of
346 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
351 raise TooManySymlinks('too many levels of symlinks: %r'
356 return self.parent.lresolve(self.readlink(),
359 raise NoSuchFile("%s: broken symlink to %r"
360 % (self.fullname(), self.readlink()))
364 def _lresolve(self, parts):
365 return self.dereference()._lresolve(parts)
368 class FakeSymlink(Symlink):
369 """A symlink that is not stored in the bup repository."""
370 def __init__(self, parent, name, toname):
371 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
375 """Get the path that this link points at."""
380 """A directory stored inside of bup's repository."""
383 it = cp().get(self.hash.encode('hex'))
387 it = cp().get(self.hash.encode('hex') + ':')
389 assert(type == 'tree')
390 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
392 (name,bupmode) = git.demangle_name(mangled_name)
393 if bupmode == git.BUP_CHUNKED:
395 if stat.S_ISDIR(mode):
396 self._subs[name] = Dir(self, name, mode, sha)
397 elif stat.S_ISLNK(mode):
398 self._subs[name] = Symlink(self, name, sha, bupmode)
400 self._subs[name] = File(self, name, mode, sha, bupmode)
403 class CommitDir(Node):
404 """A directory that contains all commits that are reachable by a ref.
406 Contains a set of subdirectories named after the commits' first byte in
407 hexadecimal. Each of those directories contain all commits with hashes that
408 start the same as the directory name. The name used for those
409 subdirectories is the hash of the commit without the first byte. This
410 separation helps us avoid having too much directories on the same level as
411 the number of commits grows big.
413 def __init__(self, parent, name):
414 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
418 refs = git.list_refs()
420 #debug2('ref name: %s\n' % ref[0])
421 revs = git.rev_list(ref[1].encode('hex'))
422 for (date, commit) in revs:
423 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
424 commithex = commit.encode('hex')
425 containername = commithex[:2]
426 dirname = commithex[2:]
427 n1 = self._subs.get(containername)
429 n1 = CommitList(self, containername)
430 self._subs[containername] = n1
432 if n1.commits.get(dirname):
433 # Stop work for this ref, the rest should already be present
436 n1.commits[dirname] = (commit, date)
439 class CommitList(Node):
440 """A list of commits with hashes that start with the current node's name."""
441 def __init__(self, parent, name):
442 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
447 for (name, (hash, date)) in self.commits.items():
448 n1 = Dir(self, name, GIT_MODE_TREE, hash)
449 n1.ctime = n1.mtime = date
450 self._subs[name] = n1
454 """A directory that contains all tags in the repository."""
455 def __init__(self, parent, name):
456 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
460 for (name, sha) in git.list_refs():
461 if name.startswith('refs/tags/'):
463 date = git.rev_get_date(sha.encode('hex'))
464 commithex = sha.encode('hex')
465 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
466 tag1 = FakeSymlink(self, name, target)
467 tag1.ctime = tag1.mtime = date
468 self._subs[name] = tag1
471 class BranchList(Node):
472 """A list of links to commits reachable by a branch in bup's repository.
474 Represents each commit as a symlink that points to the commit directory in
475 /.commit/??/ . The symlink is named after the commit date.
477 def __init__(self, parent, name, hash):
478 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
485 revs = list(git.rev_list(self.hash.encode('hex')))
486 for (date, commit) in revs:
487 l = time.localtime(date)
488 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
489 commithex = commit.encode('hex')
490 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
491 n1 = FakeSymlink(self, ls, target)
492 n1.ctime = n1.mtime = date
495 for tag in tags.get(commit, []):
496 t1 = FakeSymlink(self, tag, target)
497 t1.ctime = t1.mtime = date
502 (date, commit) = latest
503 commithex = commit.encode('hex')
504 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
505 n1 = FakeSymlink(self, 'latest', target)
506 n1.ctime = n1.mtime = date
507 self._subs['latest'] = n1
511 """A list of branches in bup's repository.
513 The sub-nodes of the ref list are a series of CommitList for each commit
514 hash pointed to by a branch.
516 Also, a special sub-node named '.commit' contains all commit directories
517 that are reachable via a ref (e.g. a branch). See CommitDir for details.
519 def __init__(self, parent):
520 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
525 commit_dir = CommitDir(self, '.commit')
526 self._subs['.commit'] = commit_dir
528 tag_dir = TagDir(self, '.tag')
529 self._subs['.tag'] = tag_dir
531 for (name,sha) in git.list_refs():
532 if name.startswith('refs/heads/'):
534 date = git.rev_get_date(sha.encode('hex'))
535 n1 = BranchList(self, name, sha)
536 n1.ctime = n1.mtime = date
537 self._subs[name] = n1