1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 """Create a git.CatPipe object or reuse the already existing one."""
21 class NodeError(Exception):
22 """VFS base exception."""
25 class NoSuchFile(NodeError):
26 """Request of a file that does not exist."""
29 class NotDir(NodeError):
30 """Attempt to do a directory action on a file that is not one."""
33 class NotFile(NodeError):
34 """Access to a node that does not represent a file."""
37 class TooManySymlinks(NodeError):
38 """Symlink dereferencing level is too deep."""
43 it = cp().get(hash.encode('hex'))
45 assert(type == 'tree')
46 return git.tree_decode(''.join(it))
49 def _tree_decode(hash):
50 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
53 assert(tree == list(sorted(tree)))
58 return sum(len(b) for b in cp().join(hash.encode('hex')))
61 def _last_chunk_info(hash):
62 tree = _tree_decode(hash)
64 (ofs,isdir,sha) = tree[-1]
66 (subofs, sublen) = _last_chunk_info(sha)
67 return (ofs+subofs, sublen)
69 return (ofs, _chunk_len(sha))
72 def _total_size(hash):
73 (lastofs, lastsize) = _last_chunk_info(hash)
74 return lastofs + lastsize
77 def _chunkiter(hash, startofs):
79 tree = _tree_decode(hash)
81 # skip elements before startofs
82 for i in xrange(len(tree)):
83 if i+1 >= len(tree) or tree[i+1][0] > startofs:
87 # iterate through what's left
88 for i in xrange(first, len(tree)):
89 (ofs,isdir,sha) = tree[i]
90 skipmore = startofs-ofs
94 for b in _chunkiter(sha, skipmore):
97 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
101 def __init__(self, hash, isdir, startofs):
103 self.it = _chunkiter(hash, startofs)
107 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
110 def next(self, size):
112 while len(out) < size:
113 if self.it and not self.blob:
115 self.blob = self.it.next()
116 except StopIteration:
119 want = size - len(out)
120 out += self.blob[:want]
121 self.blob = self.blob[want:]
124 debug2('next(%d) returned %d\n' % (size, len(out)))
129 class _FileReader(object):
130 def __init__(self, hash, size, isdir):
148 def read(self, count = -1):
150 count = self.size - self.ofs
151 if not self.reader or self.reader.ofs != self.ofs:
152 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
154 buf = self.reader.next(count)
157 raise # our offsets will be all screwed up otherwise
166 """Base class for file representation."""
167 def __init__(self, parent, name, mode, hash):
172 self.ctime = self.mtime = self.atime = 0
174 self._metadata = None
177 return "<bup.vfs.Node object at X - name:%r hash:%s parent:%r>" \
178 % (self.name, self.hash.encode('hex'),
179 self.parent.name if self.parent.name else None)
184 return (cmp(a and a.parent, b and b.parent) or
185 cmp(a and a.name, b and b.name))
188 return iter(self.subs())
190 def fullname(self, stop_at=None):
191 """Get this file's full path."""
192 assert(self != stop_at) # would be the empty string; too weird
193 if self.parent and self.parent != stop_at:
194 return os.path.join(self.parent.fullname(stop_at=stop_at),
203 """Get a list of nodes that are contained in this node."""
204 if self._subs == None:
206 return sorted(self._subs.values())
209 """Get node named 'name' that is contained in this node."""
210 if self._subs == None:
212 ret = self._subs.get(name)
214 raise NoSuchFile("no file %r in %r" % (name, self.name))
218 """Return the very top node of the tree."""
220 return self.parent.top()
225 """Return the top node of the particular backup set.
227 If this node isn't inside a backup set, return the root level.
229 if self.parent and not isinstance(self.parent, CommitList):
230 return self.parent.fs_top()
234 def _lresolve(self, parts):
235 #debug2('_lresolve %r in %r\n' % (parts, self.name))
238 (first, rest) = (parts[0], parts[1:])
240 return self._lresolve(rest)
243 raise NoSuchFile("no parent dir for %r" % self.name)
244 return self.parent._lresolve(rest)
246 return self.sub(first)._lresolve(rest)
248 return self.sub(first)
250 def lresolve(self, path, stay_inside_fs=False):
251 """Walk into a given sub-path of this node.
253 If the last element is a symlink, leave it as a symlink, don't resolve
259 if path.startswith('/'):
261 start = self.fs_top()
265 parts = re.split(r'/+', path or '.')
268 #debug2('parts: %r %r\n' % (path, parts))
269 return start._lresolve(parts)
271 def resolve(self, path = ''):
272 """Like lresolve(), and dereference it if it was a symlink."""
273 return self.lresolve(path).lresolve('.')
275 def try_resolve(self, path = ''):
276 """Like resolve(), but don't worry if a symlink uses an invalid path.
278 Returns an error if any intermediate nodes were invalid.
280 n = self.lresolve(path)
288 """Get the number of hard links to the current node."""
289 if self._subs == None:
294 """Get the size of the current node."""
298 """Open the current node. It is an error to open a non-file node."""
299 raise NotFile('%s is not a regular file' % self.name)
301 def _populate_metadata(self):
302 # Only Dirs contain .bupm files, so by default, do nothing.
306 """Return this Node's Metadata() object, if any."""
308 self.parent._populate_metadata()
309 return self._metadata
313 """A normal file from bup's repository."""
314 def __init__(self, parent, name, mode, hash, bupmode):
315 Node.__init__(self, parent, name, mode, hash)
316 self.bupmode = bupmode
317 self._cached_size = None
318 self._filereader = None
322 # You'd think FUSE might call this only once each time a file is
323 # opened, but no; it's really more of a refcount, and it's called
324 # once per read(). Thus, it's important to cache the filereader
325 # object here so we're not constantly re-seeking.
326 if not self._filereader:
327 self._filereader = _FileReader(self.hash, self.size(),
328 self.bupmode == git.BUP_CHUNKED)
329 self._filereader.seek(0)
330 return self._filereader
333 """Get this file's size."""
334 if self._cached_size == None:
335 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
336 if self.bupmode == git.BUP_CHUNKED:
337 self._cached_size = _total_size(self.hash)
339 self._cached_size = _chunk_len(self.hash)
340 debug1('<<<<File.size() done.\n')
341 return self._cached_size
346 """A symbolic link from bup's repository."""
347 def __init__(self, parent, name, hash, bupmode):
348 File.__init__(self, parent, name, 0120000, hash, bupmode)
351 """Get the file size of the file at which this link points."""
352 return len(self.readlink())
355 """Get the path that this link points at."""
356 return ''.join(cp().join(self.hash.encode('hex')))
358 def dereference(self):
359 """Get the node that this link points at.
361 If the path is invalid, raise a NoSuchFile exception. If the level of
362 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
367 raise TooManySymlinks('too many levels of symlinks: %r'
372 return self.parent.lresolve(self.readlink(),
375 raise NoSuchFile("%s: broken symlink to %r"
376 % (self.fullname(), self.readlink()))
380 def _lresolve(self, parts):
381 return self.dereference()._lresolve(parts)
384 class FakeSymlink(Symlink):
385 """A symlink that is not stored in the bup repository."""
386 def __init__(self, parent, name, toname):
387 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
391 """Get the path that this link points at."""
396 """A directory stored inside of bup's repository."""
398 def __init__(self, *args):
399 Node.__init__(self, *args)
402 def _populate_metadata(self):
407 meta_stream = self._bupm.open()
408 self._metadata = metadata.Metadata.read(meta_stream)
410 if not stat.S_ISDIR(sub.mode):
411 sub._metadata = metadata.Metadata.read(meta_stream)
415 it = cp().get(self.hash.encode('hex'))
419 it = cp().get(self.hash.encode('hex') + ':')
421 assert(type == 'tree')
422 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
423 if mangled_name == '.bupm':
424 self._bupm = File(self, mangled_name, mode, sha, git.BUP_NORMAL)
427 (name,bupmode) = git.demangle_name(mangled_name)
428 if bupmode == git.BUP_CHUNKED:
430 if stat.S_ISDIR(mode):
431 self._subs[name] = Dir(self, name, mode, sha)
432 elif stat.S_ISLNK(mode):
433 self._subs[name] = Symlink(self, name, sha, bupmode)
435 self._subs[name] = File(self, name, mode, sha, bupmode)
438 """Return this Dir's Metadata() object, if any."""
439 self._populate_metadata()
440 return self._metadata
442 def metadata_file(self):
443 """Return this Dir's .bupm File, if any."""
444 self._populate_metadata()
448 class CommitDir(Node):
449 """A directory that contains all commits that are reachable by a ref.
451 Contains a set of subdirectories named after the commits' first byte in
452 hexadecimal. Each of those directories contain all commits with hashes that
453 start the same as the directory name. The name used for those
454 subdirectories is the hash of the commit without the first byte. This
455 separation helps us avoid having too much directories on the same level as
456 the number of commits grows big.
458 def __init__(self, parent, name):
459 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
463 refs = git.list_refs()
465 #debug2('ref name: %s\n' % ref[0])
466 revs = git.rev_list(ref[1].encode('hex'))
467 for (date, commit) in revs:
468 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
469 commithex = commit.encode('hex')
470 containername = commithex[:2]
471 dirname = commithex[2:]
472 n1 = self._subs.get(containername)
474 n1 = CommitList(self, containername)
475 self._subs[containername] = n1
477 if n1.commits.get(dirname):
478 # Stop work for this ref, the rest should already be present
481 n1.commits[dirname] = (commit, date)
484 class CommitList(Node):
485 """A list of commits with hashes that start with the current node's name."""
486 def __init__(self, parent, name):
487 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
492 for (name, (hash, date)) in self.commits.items():
493 n1 = Dir(self, name, GIT_MODE_TREE, hash)
494 n1.ctime = n1.mtime = date
495 self._subs[name] = n1
499 """A directory that contains all tags in the repository."""
500 def __init__(self, parent, name):
501 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
505 for (name, sha) in git.list_refs():
506 if name.startswith('refs/tags/'):
508 date = git.rev_get_date(sha.encode('hex'))
509 commithex = sha.encode('hex')
510 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
511 tag1 = FakeSymlink(self, name, target)
512 tag1.ctime = tag1.mtime = date
513 self._subs[name] = tag1
516 class BranchList(Node):
517 """A list of links to commits reachable by a branch in bup's repository.
519 Represents each commit as a symlink that points to the commit directory in
520 /.commit/??/ . The symlink is named after the commit date.
522 def __init__(self, parent, name, hash):
523 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
530 revs = list(git.rev_list(self.hash.encode('hex')))
532 for (date, commit) in revs:
533 l = time.localtime(date)
534 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
535 commithex = commit.encode('hex')
536 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
537 n1 = FakeSymlink(self, ls, target)
538 n1.ctime = n1.mtime = date
541 for tag in tags.get(commit, []):
542 t1 = FakeSymlink(self, tag, target)
543 t1.ctime = t1.mtime = date
546 (date, commit) = latest
547 commithex = commit.encode('hex')
548 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
549 n1 = FakeSymlink(self, 'latest', target)
550 n1.ctime = n1.mtime = date
551 self._subs['latest'] = n1
555 """A list of branches in bup's repository.
557 The sub-nodes of the ref list are a series of CommitList for each commit
558 hash pointed to by a branch.
560 Also, a special sub-node named '.commit' contains all commit directories
561 that are reachable via a ref (e.g. a branch). See CommitDir for details.
563 def __init__(self, parent):
564 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
569 commit_dir = CommitDir(self, '.commit')
570 self._subs['.commit'] = commit_dir
572 tag_dir = TagDir(self, '.tag')
573 self._subs['.tag'] = tag_dir
575 for (name,sha) in git.list_refs():
576 if name.startswith('refs/heads/'):
578 date = git.rev_get_date(sha.encode('hex'))
579 n1 = BranchList(self, name, sha)
580 n1.ctime = n1.mtime = date
581 self._subs[name] = n1