1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
15 """Create a git.CatPipe object or reuse the already existing one."""
21 class NodeError(Exception):
22 """VFS base exception."""
25 class NoSuchFile(NodeError):
26 """Request of a file that does not exist."""
29 class NotDir(NodeError):
30 """Attempt to do a directory action on a file that is not one."""
33 class NotFile(NodeError):
34 """Access to a node that does not represent a file."""
37 class TooManySymlinks(NodeError):
38 """Symlink dereferencing level is too deep."""
43 it = cp().get(hash.encode('hex'))
45 assert(type == 'tree')
46 return git.tree_decode(''.join(it))
49 def _tree_decode(hash):
50 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
53 assert(tree == list(sorted(tree)))
58 return sum(len(b) for b in cp().join(hash.encode('hex')))
61 def _last_chunk_info(hash):
62 tree = _tree_decode(hash)
64 (ofs,isdir,sha) = tree[-1]
66 (subofs, sublen) = _last_chunk_info(sha)
67 return (ofs+subofs, sublen)
69 return (ofs, _chunk_len(sha))
72 def _total_size(hash):
73 (lastofs, lastsize) = _last_chunk_info(hash)
74 return lastofs + lastsize
77 def _chunkiter(hash, startofs):
79 tree = _tree_decode(hash)
81 # skip elements before startofs
82 for i in xrange(len(tree)):
83 if i+1 >= len(tree) or tree[i+1][0] > startofs:
87 # iterate through what's left
88 for i in xrange(first, len(tree)):
89 (ofs,isdir,sha) = tree[i]
90 skipmore = startofs-ofs
94 for b in _chunkiter(sha, skipmore):
97 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
101 def __init__(self, hash, isdir, startofs):
103 self.it = _chunkiter(hash, startofs)
107 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
110 def next(self, size):
112 while len(out) < size:
113 if self.it and not self.blob:
115 self.blob = self.it.next()
116 except StopIteration:
119 want = size - len(out)
120 out += self.blob[:want]
121 self.blob = self.blob[want:]
124 debug2('next(%d) returned %d\n' % (size, len(out)))
129 class _FileReader(object):
130 def __init__(self, hash, size, isdir):
148 def read(self, count = -1):
150 count = self.size - self.ofs
151 if not self.reader or self.reader.ofs != self.ofs:
152 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
154 buf = self.reader.next(count)
157 raise # our offsets will be all screwed up otherwise
166 """Base class for file representation."""
167 def __init__(self, parent, name, mode, hash):
172 self.ctime = self.mtime = self.atime = 0
174 self._metadata = None
177 return "<%s object at %s - name:%r hash:%s parent:%r>" \
178 % (self.__class__, hex(id(self)),
179 self.name, self.hash.encode('hex'),
180 self.parent.name if self.parent else None)
185 return (cmp(a and a.parent, b and b.parent) or
186 cmp(a and a.name, b and b.name))
189 return iter(self.subs())
191 def fullname(self, stop_at=None):
192 """Get this file's full path."""
193 assert(self != stop_at) # would be the empty string; too weird
194 if self.parent and self.parent != stop_at:
195 return os.path.join(self.parent.fullname(stop_at=stop_at),
204 """Get a list of nodes that are contained in this node."""
205 if self._subs == None:
207 return sorted(self._subs.values())
210 """Get node named 'name' that is contained in this node."""
211 if self._subs == None:
213 ret = self._subs.get(name)
215 raise NoSuchFile("no file %r in %r" % (name, self.name))
219 """Return the very top node of the tree."""
221 return self.parent.top()
226 """Return the top node of the particular backup set.
228 If this node isn't inside a backup set, return the root level.
230 if self.parent and not isinstance(self.parent, CommitList):
231 return self.parent.fs_top()
235 def _lresolve(self, parts):
236 #debug2('_lresolve %r in %r\n' % (parts, self.name))
239 (first, rest) = (parts[0], parts[1:])
241 return self._lresolve(rest)
244 raise NoSuchFile("no parent dir for %r" % self.name)
245 return self.parent._lresolve(rest)
247 return self.sub(first)._lresolve(rest)
249 return self.sub(first)
251 def lresolve(self, path, stay_inside_fs=False):
252 """Walk into a given sub-path of this node.
254 If the last element is a symlink, leave it as a symlink, don't resolve
260 if path.startswith('/'):
262 start = self.fs_top()
266 parts = re.split(r'/+', path or '.')
269 #debug2('parts: %r %r\n' % (path, parts))
270 return start._lresolve(parts)
272 def resolve(self, path = ''):
273 """Like lresolve(), and dereference it if it was a symlink."""
274 return self.lresolve(path).lresolve('.')
276 def try_resolve(self, path = ''):
277 """Like resolve(), but don't worry if a symlink uses an invalid path.
279 Returns an error if any intermediate nodes were invalid.
281 n = self.lresolve(path)
289 """Get the number of hard links to the current node."""
290 if self._subs == None:
295 """Get the size of the current node."""
299 """Open the current node. It is an error to open a non-file node."""
300 raise NotFile('%s is not a regular file' % self.name)
302 def _populate_metadata(self):
303 # Only Dirs contain .bupm files, so by default, do nothing.
307 """Return this Node's Metadata() object, if any."""
309 self.parent._populate_metadata()
310 return self._metadata
314 """A normal file from bup's repository."""
315 def __init__(self, parent, name, mode, hash, bupmode):
316 Node.__init__(self, parent, name, mode, hash)
317 self.bupmode = bupmode
318 self._cached_size = None
319 self._filereader = None
323 # You'd think FUSE might call this only once each time a file is
324 # opened, but no; it's really more of a refcount, and it's called
325 # once per read(). Thus, it's important to cache the filereader
326 # object here so we're not constantly re-seeking.
327 if not self._filereader:
328 self._filereader = _FileReader(self.hash, self.size(),
329 self.bupmode == git.BUP_CHUNKED)
330 self._filereader.seek(0)
331 return self._filereader
334 """Get this file's size."""
335 if self._cached_size == None:
336 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
337 if self.bupmode == git.BUP_CHUNKED:
338 self._cached_size = _total_size(self.hash)
340 self._cached_size = _chunk_len(self.hash)
341 debug1('<<<<File.size() done.\n')
342 return self._cached_size
347 """A symbolic link from bup's repository."""
348 def __init__(self, parent, name, hash, bupmode):
349 File.__init__(self, parent, name, 0120000, hash, bupmode)
352 """Get the file size of the file at which this link points."""
353 return len(self.readlink())
356 """Get the path that this link points at."""
357 return ''.join(cp().join(self.hash.encode('hex')))
359 def dereference(self):
360 """Get the node that this link points at.
362 If the path is invalid, raise a NoSuchFile exception. If the level of
363 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
368 raise TooManySymlinks('too many levels of symlinks: %r'
373 return self.parent.lresolve(self.readlink(),
376 raise NoSuchFile("%s: broken symlink to %r"
377 % (self.fullname(), self.readlink()))
381 def _lresolve(self, parts):
382 return self.dereference()._lresolve(parts)
385 class FakeSymlink(Symlink):
386 """A symlink that is not stored in the bup repository."""
387 def __init__(self, parent, name, toname):
388 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
392 """Get the path that this link points at."""
397 """A directory stored inside of bup's repository."""
399 def __init__(self, *args, **kwargs):
400 Node.__init__(self, *args, **kwargs)
403 def _populate_metadata(self):
410 meta_stream = self._bupm.open()
411 dir_meta = metadata.Metadata.read(meta_stream)
413 if not stat.S_ISDIR(sub.mode):
414 sub._metadata = metadata.Metadata.read(meta_stream)
415 self._metadata = dir_meta
419 it = cp().get(self.hash.encode('hex'))
423 it = cp().get(self.hash.encode('hex') + ':')
425 assert(type == 'tree')
426 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
427 if mangled_name == '.bupm':
428 self._bupm = File(self, mangled_name, mode, sha, git.BUP_NORMAL)
431 (name,bupmode) = git.demangle_name(mangled_name)
432 if bupmode == git.BUP_CHUNKED:
434 if stat.S_ISDIR(mode):
435 self._subs[name] = Dir(self, name, mode, sha)
436 elif stat.S_ISLNK(mode):
437 self._subs[name] = Symlink(self, name, sha, bupmode)
439 self._subs[name] = File(self, name, mode, sha, bupmode)
442 """Return this Dir's Metadata() object, if any."""
443 self._populate_metadata()
444 return self._metadata
446 def metadata_file(self):
447 """Return this Dir's .bupm File, if any."""
453 class CommitDir(Node):
454 """A directory that contains all commits that are reachable by a ref.
456 Contains a set of subdirectories named after the commits' first byte in
457 hexadecimal. Each of those directories contain all commits with hashes that
458 start the same as the directory name. The name used for those
459 subdirectories is the hash of the commit without the first byte. This
460 separation helps us avoid having too much directories on the same level as
461 the number of commits grows big.
463 def __init__(self, parent, name):
464 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
468 refs = git.list_refs()
470 #debug2('ref name: %s\n' % ref[0])
471 revs = git.rev_list(ref[1].encode('hex'))
472 for (date, commit) in revs:
473 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
474 commithex = commit.encode('hex')
475 containername = commithex[:2]
476 dirname = commithex[2:]
477 n1 = self._subs.get(containername)
479 n1 = CommitList(self, containername)
480 self._subs[containername] = n1
482 if n1.commits.get(dirname):
483 # Stop work for this ref, the rest should already be present
486 n1.commits[dirname] = (commit, date)
489 class CommitList(Node):
490 """A list of commits with hashes that start with the current node's name."""
491 def __init__(self, parent, name):
492 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
497 for (name, (hash, date)) in self.commits.items():
498 n1 = Dir(self, name, GIT_MODE_TREE, hash)
499 n1.ctime = n1.mtime = date
500 self._subs[name] = n1
504 """A directory that contains all tags in the repository."""
505 def __init__(self, parent, name):
506 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
510 for (name, sha) in git.list_refs():
511 if name.startswith('refs/tags/'):
513 date = git.rev_get_date(sha.encode('hex'))
514 commithex = sha.encode('hex')
515 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
516 tag1 = FakeSymlink(self, name, target)
517 tag1.ctime = tag1.mtime = date
518 self._subs[name] = tag1
521 class BranchList(Node):
522 """A list of links to commits reachable by a branch in bup's repository.
524 Represents each commit as a symlink that points to the commit directory in
525 /.commit/??/ . The symlink is named after the commit date.
527 def __init__(self, parent, name, hash):
528 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
535 revs = list(git.rev_list(self.hash.encode('hex')))
537 for (date, commit) in revs:
538 l = time.localtime(date)
539 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
540 commithex = commit.encode('hex')
541 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
542 n1 = FakeSymlink(self, ls, target)
543 n1.ctime = n1.mtime = date
546 for tag in tags.get(commit, []):
547 t1 = FakeSymlink(self, tag, target)
548 t1.ctime = t1.mtime = date
551 (date, commit) = latest
552 commithex = commit.encode('hex')
553 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
554 n1 = FakeSymlink(self, 'latest', target)
555 n1.ctime = n1.mtime = date
556 self._subs['latest'] = n1
560 """A list of branches in bup's repository.
562 The sub-nodes of the ref list are a series of CommitList for each commit
563 hash pointed to by a branch.
565 Also, a special sub-node named '.commit' contains all commit directories
566 that are reachable via a ref (e.g. a branch). See CommitDir for details.
568 def __init__(self, parent):
569 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
574 commit_dir = CommitDir(self, '.commit')
575 self._subs['.commit'] = commit_dir
577 tag_dir = TagDir(self, '.tag')
578 self._subs['.tag'] = tag_dir
580 for (name,sha) in git.list_refs():
581 if name.startswith('refs/heads/'):
583 date = git.rev_get_date(sha.encode('hex'))
584 n1 = BranchList(self, name, sha)
585 n1.ctime = n1.mtime = date
586 self._subs[name] = n1