1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.git import BUP_NORMAL, BUP_CHUNKED
10 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
16 """Create a git.CatPipe object or reuse the already existing one."""
22 class NodeError(Exception):
23 """VFS base exception."""
26 class NoSuchFile(NodeError):
27 """Request of a file that does not exist."""
30 class NotDir(NodeError):
31 """Attempt to do a directory action on a file that is not one."""
34 class NotFile(NodeError):
35 """Access to a node that does not represent a file."""
38 class TooManySymlinks(NodeError):
39 """Symlink dereferencing level is too deep."""
44 it = cp().get(hash.encode('hex'))
46 assert(type == 'tree')
47 return git.tree_decode(''.join(it))
50 def _tree_decode(hash):
51 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
54 assert(tree == list(sorted(tree)))
59 return sum(len(b) for b in cp().join(hash.encode('hex')))
62 def _last_chunk_info(hash):
63 tree = _tree_decode(hash)
65 (ofs,isdir,sha) = tree[-1]
67 (subofs, sublen) = _last_chunk_info(sha)
68 return (ofs+subofs, sublen)
70 return (ofs, _chunk_len(sha))
73 def _total_size(hash):
74 (lastofs, lastsize) = _last_chunk_info(hash)
75 return lastofs + lastsize
78 def _chunkiter(hash, startofs):
80 tree = _tree_decode(hash)
82 # skip elements before startofs
83 for i in xrange(len(tree)):
84 if i+1 >= len(tree) or tree[i+1][0] > startofs:
88 # iterate through what's left
89 for i in xrange(first, len(tree)):
90 (ofs,isdir,sha) = tree[i]
91 skipmore = startofs-ofs
95 for b in _chunkiter(sha, skipmore):
98 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
102 def __init__(self, hash, isdir, startofs):
104 self.it = _chunkiter(hash, startofs)
108 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
111 def next(self, size):
113 while len(out) < size:
114 if self.it and not self.blob:
116 self.blob = self.it.next()
117 except StopIteration:
120 want = size - len(out)
121 out += self.blob[:want]
122 self.blob = self.blob[want:]
125 debug2('next(%d) returned %d\n' % (size, len(out)))
130 class _FileReader(object):
131 def __init__(self, hash, size, isdir):
149 def read(self, count = -1):
151 count = self.size - self.ofs
152 if not self.reader or self.reader.ofs != self.ofs:
153 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
155 buf = self.reader.next(count)
158 raise # our offsets will be all screwed up otherwise
167 """Base class for file representation."""
168 def __init__(self, parent, name, mode, hash):
173 self.ctime = self.mtime = self.atime = 0
175 self._metadata = None
178 return "<%s object at %s - name:%r hash:%s parent:%r>" \
179 % (self.__class__, hex(id(self)),
180 self.name, self.hash.encode('hex'),
181 self.parent.name if self.parent else None)
186 return (cmp(a and a.parent, b and b.parent) or
187 cmp(a and a.name, b and b.name))
190 return iter(self.subs())
192 def fullname(self, stop_at=None):
193 """Get this file's full path."""
194 assert(self != stop_at) # would be the empty string; too weird
195 if self.parent and self.parent != stop_at:
196 return os.path.join(self.parent.fullname(stop_at=stop_at),
205 """Get a list of nodes that are contained in this node."""
206 if self._subs == None:
208 return sorted(self._subs.values())
211 """Get node named 'name' that is contained in this node."""
212 if self._subs == None:
214 ret = self._subs.get(name)
216 raise NoSuchFile("no file %r in %r" % (name, self.name))
220 """Return the very top node of the tree."""
222 return self.parent.top()
227 """Return the top node of the particular backup set.
229 If this node isn't inside a backup set, return the root level.
231 if self.parent and not isinstance(self.parent, CommitList):
232 return self.parent.fs_top()
236 def _lresolve(self, parts):
237 #debug2('_lresolve %r in %r\n' % (parts, self.name))
240 (first, rest) = (parts[0], parts[1:])
242 return self._lresolve(rest)
245 raise NoSuchFile("no parent dir for %r" % self.name)
246 return self.parent._lresolve(rest)
248 return self.sub(first)._lresolve(rest)
250 return self.sub(first)
252 def lresolve(self, path, stay_inside_fs=False):
253 """Walk into a given sub-path of this node.
255 If the last element is a symlink, leave it as a symlink, don't resolve
261 if path.startswith('/'):
263 start = self.fs_top()
267 parts = re.split(r'/+', path or '.')
270 #debug2('parts: %r %r\n' % (path, parts))
271 return start._lresolve(parts)
273 def resolve(self, path = ''):
274 """Like lresolve(), and dereference it if it was a symlink."""
275 return self.lresolve(path).lresolve('.')
277 def try_resolve(self, path = ''):
278 """Like resolve(), but don't worry if a symlink uses an invalid path.
280 Returns an error if any intermediate nodes were invalid.
282 n = self.lresolve(path)
290 """Get the number of hard links to the current node."""
291 if self._subs == None:
296 """Get the size of the current node."""
300 """Open the current node. It is an error to open a non-file node."""
301 raise NotFile('%s is not a regular file' % self.name)
303 def _populate_metadata(self):
304 # Only Dirs contain .bupm files, so by default, do nothing.
308 """Return this Node's Metadata() object, if any."""
310 self.parent._populate_metadata()
311 return self._metadata
315 """A normal file from bup's repository."""
316 def __init__(self, parent, name, mode, hash, bupmode):
317 Node.__init__(self, parent, name, mode, hash)
318 self.bupmode = bupmode
319 self._cached_size = None
320 self._filereader = None
324 # You'd think FUSE might call this only once each time a file is
325 # opened, but no; it's really more of a refcount, and it's called
326 # once per read(). Thus, it's important to cache the filereader
327 # object here so we're not constantly re-seeking.
328 if not self._filereader:
329 self._filereader = _FileReader(self.hash, self.size(),
330 self.bupmode == git.BUP_CHUNKED)
331 self._filereader.seek(0)
332 return self._filereader
335 """Get this file's size."""
336 if self._cached_size == None:
337 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
338 if self.bupmode == git.BUP_CHUNKED:
339 self._cached_size = _total_size(self.hash)
341 self._cached_size = _chunk_len(self.hash)
342 debug1('<<<<File.size() done.\n')
343 return self._cached_size
348 """A symbolic link from bup's repository."""
349 def __init__(self, parent, name, hash, bupmode):
350 File.__init__(self, parent, name, 0120000, hash, bupmode)
353 """Get the file size of the file at which this link points."""
354 return len(self.readlink())
357 """Get the path that this link points at."""
358 return ''.join(cp().join(self.hash.encode('hex')))
360 def dereference(self):
361 """Get the node that this link points at.
363 If the path is invalid, raise a NoSuchFile exception. If the level of
364 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
369 raise TooManySymlinks('too many levels of symlinks: %r'
374 return self.parent.lresolve(self.readlink(),
377 raise NoSuchFile("%s: broken symlink to %r"
378 % (self.fullname(), self.readlink()))
382 def _lresolve(self, parts):
383 return self.dereference()._lresolve(parts)
386 class FakeSymlink(Symlink):
387 """A symlink that is not stored in the bup repository."""
388 def __init__(self, parent, name, toname):
389 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
393 """Get the path that this link points at."""
398 """A directory stored inside of bup's repository."""
400 def __init__(self, *args, **kwargs):
401 Node.__init__(self, *args, **kwargs)
404 def _populate_metadata(self):
411 meta_stream = self._bupm.open()
412 dir_meta = metadata.Metadata.read(meta_stream)
414 if not stat.S_ISDIR(sub.mode):
415 sub._metadata = metadata.Metadata.read(meta_stream)
416 self._metadata = dir_meta
420 it = cp().get(self.hash.encode('hex'))
424 it = cp().get(self.hash.encode('hex') + ':')
426 assert(type == 'tree')
427 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
428 if mangled_name == '.bupm':
429 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
430 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
434 (name,bupmode) = git.demangle_name(mangled_name)
435 if bupmode == git.BUP_CHUNKED:
437 if stat.S_ISDIR(mode):
438 self._subs[name] = Dir(self, name, mode, sha)
439 elif stat.S_ISLNK(mode):
440 self._subs[name] = Symlink(self, name, sha, bupmode)
442 self._subs[name] = File(self, name, mode, sha, bupmode)
445 """Return this Dir's Metadata() object, if any."""
446 self._populate_metadata()
447 return self._metadata
449 def metadata_file(self):
450 """Return this Dir's .bupm File, if any."""
456 class CommitDir(Node):
457 """A directory that contains all commits that are reachable by a ref.
459 Contains a set of subdirectories named after the commits' first byte in
460 hexadecimal. Each of those directories contain all commits with hashes that
461 start the same as the directory name. The name used for those
462 subdirectories is the hash of the commit without the first byte. This
463 separation helps us avoid having too much directories on the same level as
464 the number of commits grows big.
466 def __init__(self, parent, name):
467 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
471 refs = git.list_refs()
473 #debug2('ref name: %s\n' % ref[0])
474 revs = git.rev_list(ref[1].encode('hex'))
475 for (date, commit) in revs:
476 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
477 commithex = commit.encode('hex')
478 containername = commithex[:2]
479 dirname = commithex[2:]
480 n1 = self._subs.get(containername)
482 n1 = CommitList(self, containername)
483 self._subs[containername] = n1
485 if n1.commits.get(dirname):
486 # Stop work for this ref, the rest should already be present
489 n1.commits[dirname] = (commit, date)
492 class CommitList(Node):
493 """A list of commits with hashes that start with the current node's name."""
494 def __init__(self, parent, name):
495 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
500 for (name, (hash, date)) in self.commits.items():
501 n1 = Dir(self, name, GIT_MODE_TREE, hash)
502 n1.ctime = n1.mtime = date
503 self._subs[name] = n1
507 """A directory that contains all tags in the repository."""
508 def __init__(self, parent, name):
509 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
513 for (name, sha) in git.list_refs():
514 if name.startswith('refs/tags/'):
516 date = git.rev_get_date(sha.encode('hex'))
517 commithex = sha.encode('hex')
518 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
519 tag1 = FakeSymlink(self, name, target)
520 tag1.ctime = tag1.mtime = date
521 self._subs[name] = tag1
524 class BranchList(Node):
525 """A list of links to commits reachable by a branch in bup's repository.
527 Represents each commit as a symlink that points to the commit directory in
528 /.commit/??/ . The symlink is named after the commit date.
530 def __init__(self, parent, name, hash):
531 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
538 revs = list(git.rev_list(self.hash.encode('hex')))
540 for (date, commit) in revs:
541 l = time.localtime(date)
542 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
543 commithex = commit.encode('hex')
544 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
545 n1 = FakeSymlink(self, ls, target)
546 n1.ctime = n1.mtime = date
549 for tag in tags.get(commit, []):
550 t1 = FakeSymlink(self, tag, target)
551 t1.ctime = t1.mtime = date
554 (date, commit) = latest
555 commithex = commit.encode('hex')
556 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
557 n1 = FakeSymlink(self, 'latest', target)
558 n1.ctime = n1.mtime = date
559 self._subs['latest'] = n1
563 """A list of branches in bup's repository.
565 The sub-nodes of the ref list are a series of CommitList for each commit
566 hash pointed to by a branch.
568 Also, a special sub-node named '.commit' contains all commit directories
569 that are reachable via a ref (e.g. a branch). See CommitDir for details.
571 def __init__(self, parent):
572 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
577 commit_dir = CommitDir(self, '.commit')
578 self._subs['.commit'] = commit_dir
580 tag_dir = TagDir(self, '.tag')
581 self._subs['.tag'] = tag_dir
583 for (name,sha) in git.list_refs():
584 if name.startswith('refs/heads/'):
586 date = git.rev_get_date(sha.encode('hex'))
587 n1 = BranchList(self, name, sha)
588 n1.ctime = n1.mtime = date
589 self._subs[name] = n1