1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
9 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 """Create a git.CatPipe object or reuse the already existing one."""
21 class NodeError(Exception):
22 """VFS base exception."""
25 class NoSuchFile(NodeError):
26 """Request of a file that does not exist."""
29 class NotDir(NodeError):
30 """Attempt to do a directory action on a file that is not one."""
33 class NotFile(NodeError):
34 """Access to a node that does not represent a file."""
37 class TooManySymlinks(NodeError):
38 """Symlink dereferencing level is too deep."""
43 it = cp().get(hash.encode('hex'))
45 assert(type == 'tree')
46 return git.tree_decode(''.join(it))
49 def _tree_decode(hash):
50 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
53 assert(tree == list(sorted(tree)))
58 return sum(len(b) for b in cp().join(hash.encode('hex')))
61 def _last_chunk_info(hash):
62 tree = _tree_decode(hash)
64 (ofs,isdir,sha) = tree[-1]
66 (subofs, sublen) = _last_chunk_info(sha)
67 return (ofs+subofs, sublen)
69 return (ofs, _chunk_len(sha))
72 def _total_size(hash):
73 (lastofs, lastsize) = _last_chunk_info(hash)
74 return lastofs + lastsize
77 def _chunkiter(hash, startofs):
79 tree = _tree_decode(hash)
81 # skip elements before startofs
82 for i in xrange(len(tree)):
83 if i+1 >= len(tree) or tree[i+1][0] > startofs:
87 # iterate through what's left
88 for i in xrange(first, len(tree)):
89 (ofs,isdir,sha) = tree[i]
90 skipmore = startofs-ofs
94 for b in _chunkiter(sha, skipmore):
97 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
101 def __init__(self, hash, isdir, startofs):
103 self.it = _chunkiter(hash, startofs)
107 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
110 def next(self, size):
112 while len(out) < size:
113 if self.it and not self.blob:
115 self.blob = self.it.next()
116 except StopIteration:
119 want = size - len(out)
120 out += self.blob[:want]
121 self.blob = self.blob[want:]
124 debug2('next(%d) returned %d\n' % (size, len(out)))
129 class _FileReader(object):
130 def __init__(self, hash, size, isdir):
148 def read(self, count = -1):
150 count = self.size - self.ofs
151 if not self.reader or self.reader.ofs != self.ofs:
152 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
154 buf = self.reader.next(count)
157 raise # our offsets will be all screwed up otherwise
166 """Base class for file representation."""
167 def __init__(self, parent, name, mode, hash):
172 self.ctime = self.mtime = self.atime = 0
176 return cmp(a and a.name or None, b and b.name or None)
179 return iter(self.subs())
181 def fullname(self, stop_at=None):
182 """Get this file's full path."""
183 assert(self != stop_at) # would be the empty string; too weird
184 if self.parent and self.parent != stop_at:
185 return os.path.join(self.parent.fullname(stop_at=stop_at),
194 """Get a list of nodes that are contained in this node."""
195 if self._subs == None:
197 return sorted(self._subs.values())
200 """Get node named 'name' that is contained in this node."""
201 if self._subs == None:
203 ret = self._subs.get(name)
205 raise NoSuchFile("no file %r in %r" % (name, self.name))
209 """Return the very top node of the tree."""
211 return self.parent.top()
216 """Return the top node of the particular backup set.
218 If this node isn't inside a backup set, return the root level.
220 if self.parent and not isinstance(self.parent, CommitList):
221 return self.parent.fs_top()
225 def _lresolve(self, parts):
226 #debug2('_lresolve %r in %r\n' % (parts, self.name))
229 (first, rest) = (parts[0], parts[1:])
231 return self._lresolve(rest)
234 raise NoSuchFile("no parent dir for %r" % self.name)
235 return self.parent._lresolve(rest)
237 return self.sub(first)._lresolve(rest)
239 return self.sub(first)
241 def lresolve(self, path, stay_inside_fs=False):
242 """Walk into a given sub-path of this node.
244 If the last element is a symlink, leave it as a symlink, don't resolve
250 if path.startswith('/'):
252 start = self.fs_top()
256 parts = re.split(r'/+', path or '.')
259 #debug2('parts: %r %r\n' % (path, parts))
260 return start._lresolve(parts)
262 def resolve(self, path = ''):
263 """Like lresolve(), and dereference it if it was a symlink."""
264 return self.lresolve(path).lresolve('.')
266 def try_resolve(self, path = ''):
267 """Like resolve(), but don't worry if a symlink uses an invalid path.
269 Returns an error if any intermediate nodes were invalid.
271 n = self.lresolve(path)
279 """Get the number of hard links to the current node."""
280 if self._subs == None:
285 """Get the size of the current node."""
289 """Open the current node. It is an error to open a non-file node."""
290 raise NotFile('%s is not a regular file' % self.name)
294 """A normal file from bup's repository."""
295 def __init__(self, parent, name, mode, hash, bupmode):
296 Node.__init__(self, parent, name, mode, hash)
297 self.bupmode = bupmode
298 self._cached_size = None
299 self._filereader = None
303 # You'd think FUSE might call this only once each time a file is
304 # opened, but no; it's really more of a refcount, and it's called
305 # once per read(). Thus, it's important to cache the filereader
306 # object here so we're not constantly re-seeking.
307 if not self._filereader:
308 self._filereader = _FileReader(self.hash, self.size(),
309 self.bupmode == git.BUP_CHUNKED)
310 self._filereader.seek(0)
311 return self._filereader
314 """Get this file's size."""
315 if self._cached_size == None:
316 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
317 if self.bupmode == git.BUP_CHUNKED:
318 self._cached_size = _total_size(self.hash)
320 self._cached_size = _chunk_len(self.hash)
321 debug1('<<<<File.size() done.\n')
322 return self._cached_size
327 """A symbolic link from bup's repository."""
328 def __init__(self, parent, name, hash, bupmode):
329 File.__init__(self, parent, name, 0120000, hash, bupmode)
332 """Get the file size of the file at which this link points."""
333 return len(self.readlink())
336 """Get the path that this link points at."""
337 return ''.join(cp().join(self.hash.encode('hex')))
339 def dereference(self):
340 """Get the node that this link points at.
342 If the path is invalid, raise a NoSuchFile exception. If the level of
343 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
348 raise TooManySymlinks('too many levels of symlinks: %r'
353 return self.parent.lresolve(self.readlink(),
356 raise NoSuchFile("%s: broken symlink to %r"
357 % (self.fullname(), self.readlink()))
361 def _lresolve(self, parts):
362 return self.dereference()._lresolve(parts)
365 class FakeSymlink(Symlink):
366 """A symlink that is not stored in the bup repository."""
367 def __init__(self, parent, name, toname):
368 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
372 """Get the path that this link points at."""
377 """A directory stored inside of bup's repository."""
380 it = cp().get(self.hash.encode('hex'))
384 it = cp().get(self.hash.encode('hex') + ':')
386 assert(type == 'tree')
387 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
389 (name,bupmode) = git.demangle_name(mangled_name)
390 if bupmode == git.BUP_CHUNKED:
392 if stat.S_ISDIR(mode):
393 self._subs[name] = Dir(self, name, mode, sha)
394 elif stat.S_ISLNK(mode):
395 self._subs[name] = Symlink(self, name, sha, bupmode)
397 self._subs[name] = File(self, name, mode, sha, bupmode)
400 class CommitDir(Node):
401 """A directory that contains all commits that are reachable by a ref.
403 Contains a set of subdirectories named after the commits' first byte in
404 hexadecimal. Each of those directories contain all commits with hashes that
405 start the same as the directory name. The name used for those
406 subdirectories is the hash of the commit without the first byte. This
407 separation helps us avoid having too much directories on the same level as
408 the number of commits grows big.
410 def __init__(self, parent, name):
411 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
415 refs = git.list_refs()
417 #debug2('ref name: %s\n' % ref[0])
418 revs = git.rev_list(ref[1].encode('hex'))
419 for (date, commit) in revs:
420 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
421 commithex = commit.encode('hex')
422 containername = commithex[:2]
423 dirname = commithex[2:]
424 n1 = self._subs.get(containername)
426 n1 = CommitList(self, containername)
427 self._subs[containername] = n1
429 if n1.commits.get(dirname):
430 # Stop work for this ref, the rest should already be present
433 n1.commits[dirname] = (commit, date)
436 class CommitList(Node):
437 """A list of commits with hashes that start with the current node's name."""
438 def __init__(self, parent, name):
439 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
444 for (name, (hash, date)) in self.commits.items():
445 n1 = Dir(self, name, GIT_MODE_TREE, hash)
446 n1.ctime = n1.mtime = date
447 self._subs[name] = n1
451 """A directory that contains all tags in the repository."""
452 def __init__(self, parent, name):
453 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
457 for (name, sha) in git.list_refs():
458 if name.startswith('refs/tags/'):
460 date = git.rev_get_date(sha.encode('hex'))
461 commithex = sha.encode('hex')
462 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
463 tag1 = FakeSymlink(self, name, target)
464 tag1.ctime = tag1.mtime = date
465 self._subs[name] = tag1
468 class BranchList(Node):
469 """A list of links to commits reachable by a branch in bup's repository.
471 Represents each commit as a symlink that points to the commit directory in
472 /.commit/??/ . The symlink is named after the commit date.
474 def __init__(self, parent, name, hash):
475 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
482 revs = list(git.rev_list(self.hash.encode('hex')))
483 for (date, commit) in revs:
484 l = time.localtime(date)
485 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
486 commithex = commit.encode('hex')
487 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
488 n1 = FakeSymlink(self, ls, target)
489 n1.ctime = n1.mtime = date
492 for tag in tags.get(commit, []):
493 t1 = FakeSymlink(self, tag, target)
494 t1.ctime = t1.mtime = date
499 (date, commit) = latest
500 commithex = commit.encode('hex')
501 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
502 n1 = FakeSymlink(self, 'latest', target)
503 n1.ctime = n1.mtime = date
504 self._subs['latest'] = n1
508 """A list of branches in bup's repository.
510 The sub-nodes of the ref list are a series of CommitList for each commit
511 hash pointed to by a branch.
513 Also, a special sub-node named '.commit' contains all commit directories
514 that are reachable via a ref (e.g. a branch). See CommitDir for details.
516 def __init__(self, parent):
517 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
522 commit_dir = CommitDir(self, '.commit')
523 self._subs['.commit'] = commit_dir
525 tag_dir = TagDir(self, '.tag')
526 self._subs['.tag'] = tag_dir
528 for (name,sha) in git.list_refs():
529 if name.startswith('refs/heads/'):
531 date = git.rev_get_date(sha.encode('hex'))
532 n1 = BranchList(self, name, sha)
533 n1.ctime = n1.mtime = date
534 self._subs[name] = n1