1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
9 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 """Create a git.CatPipe object or reuse the already existing one."""
21 class NodeError(Exception):
22 """VFS base exception."""
25 class NoSuchFile(NodeError):
26 """Request of a file that does not exist."""
29 class NotDir(NodeError):
30 """Attempt to do a directory action on a file that is not one."""
33 class NotFile(NodeError):
34 """Access to a node that does not represent a file."""
37 class TooManySymlinks(NodeError):
38 """Symlink dereferencing level is too deep."""
43 it = cp().get(hash.encode('hex'))
45 assert(type == 'tree')
46 return git.tree_decode(''.join(it))
49 def _tree_decode(hash):
50 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
53 assert(tree == list(sorted(tree)))
58 return sum(len(b) for b in cp().join(hash.encode('hex')))
61 def _last_chunk_info(hash):
62 tree = _tree_decode(hash)
64 (ofs,isdir,sha) = tree[-1]
66 (subofs, sublen) = _last_chunk_info(sha)
67 return (ofs+subofs, sublen)
69 return (ofs, _chunk_len(sha))
72 def _total_size(hash):
73 (lastofs, lastsize) = _last_chunk_info(hash)
74 return lastofs + lastsize
77 def _chunkiter(hash, startofs):
79 tree = _tree_decode(hash)
81 # skip elements before startofs
82 for i in xrange(len(tree)):
83 if i+1 >= len(tree) or tree[i+1][0] > startofs:
87 # iterate through what's left
88 for i in xrange(first, len(tree)):
89 (ofs,isdir,sha) = tree[i]
90 skipmore = startofs-ofs
94 for b in _chunkiter(sha, skipmore):
97 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
101 def __init__(self, hash, isdir, startofs):
103 self.it = _chunkiter(hash, startofs)
107 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
110 def next(self, size):
112 while len(out) < size:
113 if self.it and not self.blob:
115 self.blob = self.it.next()
116 except StopIteration:
119 want = size - len(out)
120 out += self.blob[:want]
121 self.blob = self.blob[want:]
124 debug2('next(%d) returned %d\n' % (size, len(out)))
129 class _FileReader(object):
130 def __init__(self, hash, size, isdir):
148 def read(self, count = -1):
150 count = self.size - self.ofs
151 if not self.reader or self.reader.ofs != self.ofs:
152 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
154 buf = self.reader.next(count)
157 raise # our offsets will be all screwed up otherwise
166 """Base class for file representation."""
167 def __init__(self, parent, name, mode, hash):
172 self.ctime = self.mtime = self.atime = 0
176 return "<bup.vfs.Node object at X - name:%r hash:%s parent:%r>" \
177 % (self.name, self.hash.encode('hex'),
178 self.parent.name if self.parent.name else None)
183 return (cmp(a and a.parent, b and b.parent) or
184 cmp(a and a.name, b and b.name))
187 return iter(self.subs())
189 def fullname(self, stop_at=None):
190 """Get this file's full path."""
191 assert(self != stop_at) # would be the empty string; too weird
192 if self.parent and self.parent != stop_at:
193 return os.path.join(self.parent.fullname(stop_at=stop_at),
202 """Get a list of nodes that are contained in this node."""
203 if self._subs == None:
205 return sorted(self._subs.values())
208 """Get node named 'name' that is contained in this node."""
209 if self._subs == None:
211 ret = self._subs.get(name)
213 raise NoSuchFile("no file %r in %r" % (name, self.name))
217 """Return the very top node of the tree."""
219 return self.parent.top()
224 """Return the top node of the particular backup set.
226 If this node isn't inside a backup set, return the root level.
228 if self.parent and not isinstance(self.parent, CommitList):
229 return self.parent.fs_top()
233 def _lresolve(self, parts):
234 #debug2('_lresolve %r in %r\n' % (parts, self.name))
237 (first, rest) = (parts[0], parts[1:])
239 return self._lresolve(rest)
242 raise NoSuchFile("no parent dir for %r" % self.name)
243 return self.parent._lresolve(rest)
245 return self.sub(first)._lresolve(rest)
247 return self.sub(first)
249 def lresolve(self, path, stay_inside_fs=False):
250 """Walk into a given sub-path of this node.
252 If the last element is a symlink, leave it as a symlink, don't resolve
258 if path.startswith('/'):
260 start = self.fs_top()
264 parts = re.split(r'/+', path or '.')
267 #debug2('parts: %r %r\n' % (path, parts))
268 return start._lresolve(parts)
270 def resolve(self, path = ''):
271 """Like lresolve(), and dereference it if it was a symlink."""
272 return self.lresolve(path).lresolve('.')
274 def try_resolve(self, path = ''):
275 """Like resolve(), but don't worry if a symlink uses an invalid path.
277 Returns an error if any intermediate nodes were invalid.
279 n = self.lresolve(path)
287 """Get the number of hard links to the current node."""
288 if self._subs == None:
293 """Get the size of the current node."""
297 """Open the current node. It is an error to open a non-file node."""
298 raise NotFile('%s is not a regular file' % self.name)
302 """A normal file from bup's repository."""
303 def __init__(self, parent, name, mode, hash, bupmode):
304 Node.__init__(self, parent, name, mode, hash)
305 self.bupmode = bupmode
306 self._cached_size = None
307 self._filereader = None
311 # You'd think FUSE might call this only once each time a file is
312 # opened, but no; it's really more of a refcount, and it's called
313 # once per read(). Thus, it's important to cache the filereader
314 # object here so we're not constantly re-seeking.
315 if not self._filereader:
316 self._filereader = _FileReader(self.hash, self.size(),
317 self.bupmode == git.BUP_CHUNKED)
318 self._filereader.seek(0)
319 return self._filereader
322 """Get this file's size."""
323 if self._cached_size == None:
324 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
325 if self.bupmode == git.BUP_CHUNKED:
326 self._cached_size = _total_size(self.hash)
328 self._cached_size = _chunk_len(self.hash)
329 debug1('<<<<File.size() done.\n')
330 return self._cached_size
335 """A symbolic link from bup's repository."""
336 def __init__(self, parent, name, hash, bupmode):
337 File.__init__(self, parent, name, 0120000, hash, bupmode)
340 """Get the file size of the file at which this link points."""
341 return len(self.readlink())
344 """Get the path that this link points at."""
345 return ''.join(cp().join(self.hash.encode('hex')))
347 def dereference(self):
348 """Get the node that this link points at.
350 If the path is invalid, raise a NoSuchFile exception. If the level of
351 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
356 raise TooManySymlinks('too many levels of symlinks: %r'
361 return self.parent.lresolve(self.readlink(),
364 raise NoSuchFile("%s: broken symlink to %r"
365 % (self.fullname(), self.readlink()))
369 def _lresolve(self, parts):
370 return self.dereference()._lresolve(parts)
373 class FakeSymlink(Symlink):
374 """A symlink that is not stored in the bup repository."""
375 def __init__(self, parent, name, toname):
376 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
380 """Get the path that this link points at."""
385 """A directory stored inside of bup's repository."""
387 def __init__(self, *args):
388 Node.__init__(self, *args)
389 self._metadata_sha = None
393 it = cp().get(self.hash.encode('hex'))
397 it = cp().get(self.hash.encode('hex') + ':')
399 assert(type == 'tree')
400 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
401 if mangled_name == '.bupm':
402 self._metadata_sha = sha
405 (name,bupmode) = git.demangle_name(mangled_name)
406 if bupmode == git.BUP_CHUNKED:
408 if stat.S_ISDIR(mode):
409 self._subs[name] = Dir(self, name, mode, sha)
410 elif stat.S_ISLNK(mode):
411 self._subs[name] = Symlink(self, name, sha, bupmode)
413 self._subs[name] = File(self, name, mode, sha, bupmode)
416 class CommitDir(Node):
417 """A directory that contains all commits that are reachable by a ref.
419 Contains a set of subdirectories named after the commits' first byte in
420 hexadecimal. Each of those directories contain all commits with hashes that
421 start the same as the directory name. The name used for those
422 subdirectories is the hash of the commit without the first byte. This
423 separation helps us avoid having too much directories on the same level as
424 the number of commits grows big.
426 def __init__(self, parent, name):
427 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
431 refs = git.list_refs()
433 #debug2('ref name: %s\n' % ref[0])
434 revs = git.rev_list(ref[1].encode('hex'))
435 for (date, commit) in revs:
436 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
437 commithex = commit.encode('hex')
438 containername = commithex[:2]
439 dirname = commithex[2:]
440 n1 = self._subs.get(containername)
442 n1 = CommitList(self, containername)
443 self._subs[containername] = n1
445 if n1.commits.get(dirname):
446 # Stop work for this ref, the rest should already be present
449 n1.commits[dirname] = (commit, date)
452 class CommitList(Node):
453 """A list of commits with hashes that start with the current node's name."""
454 def __init__(self, parent, name):
455 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
460 for (name, (hash, date)) in self.commits.items():
461 n1 = Dir(self, name, GIT_MODE_TREE, hash)
462 n1.ctime = n1.mtime = date
463 self._subs[name] = n1
467 """A directory that contains all tags in the repository."""
468 def __init__(self, parent, name):
469 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
473 for (name, sha) in git.list_refs():
474 if name.startswith('refs/tags/'):
476 date = git.rev_get_date(sha.encode('hex'))
477 commithex = sha.encode('hex')
478 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
479 tag1 = FakeSymlink(self, name, target)
480 tag1.ctime = tag1.mtime = date
481 self._subs[name] = tag1
484 class BranchList(Node):
485 """A list of links to commits reachable by a branch in bup's repository.
487 Represents each commit as a symlink that points to the commit directory in
488 /.commit/??/ . The symlink is named after the commit date.
490 def __init__(self, parent, name, hash):
491 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
498 revs = list(git.rev_list(self.hash.encode('hex')))
500 for (date, commit) in revs:
501 l = time.localtime(date)
502 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
503 commithex = commit.encode('hex')
504 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
505 n1 = FakeSymlink(self, ls, target)
506 n1.ctime = n1.mtime = date
509 for tag in tags.get(commit, []):
510 t1 = FakeSymlink(self, tag, target)
511 t1.ctime = t1.mtime = date
514 (date, commit) = latest
515 commithex = commit.encode('hex')
516 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
517 n1 = FakeSymlink(self, 'latest', target)
518 n1.ctime = n1.mtime = date
519 self._subs['latest'] = n1
523 """A list of branches in bup's repository.
525 The sub-nodes of the ref list are a series of CommitList for each commit
526 hash pointed to by a branch.
528 Also, a special sub-node named '.commit' contains all commit directories
529 that are reachable via a ref (e.g. a branch). See CommitDir for details.
531 def __init__(self, parent):
532 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
537 commit_dir = CommitDir(self, '.commit')
538 self._subs['.commit'] = commit_dir
540 tag_dir = TagDir(self, '.tag')
541 self._subs['.tag'] = tag_dir
543 for (name,sha) in git.list_refs():
544 if name.startswith('refs/heads/'):
546 date = git.rev_get_date(sha.encode('hex'))
547 n1 = BranchList(self, name, sha)
548 n1.ctime = n1.mtime = date
549 self._subs[name] = n1