1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
14 """Create a git.CatPipe object or reuse the already existing one."""
20 class NodeError(Exception):
21 """VFS base exception."""
24 class NoSuchFile(NodeError):
25 """Request of a file that does not exist."""
28 class NotDir(NodeError):
29 """Attempt to do a directory action on a file that is not one."""
32 class NotFile(NodeError):
33 """Access to a node that does not represent a file."""
36 class TooManySymlinks(NodeError):
37 """Symlink dereferencing level is too deep."""
42 it = cp().get(hash.encode('hex'))
44 assert(type == 'tree')
45 return git.treeparse(''.join(it))
48 def _tree_decode(hash):
49 tree = [(int(name,16),stat.S_ISDIR(int(mode,8)),sha)
52 assert(tree == list(sorted(tree)))
57 return sum(len(b) for b in cp().join(hash.encode('hex')))
60 def _last_chunk_info(hash):
61 tree = _tree_decode(hash)
63 (ofs,isdir,sha) = tree[-1]
65 (subofs, sublen) = _last_chunk_info(sha)
66 return (ofs+subofs, sublen)
68 return (ofs, _chunk_len(sha))
71 def _total_size(hash):
72 (lastofs, lastsize) = _last_chunk_info(hash)
73 return lastofs + lastsize
76 def _chunkiter(hash, startofs):
78 tree = _tree_decode(hash)
80 # skip elements before startofs
81 for i in xrange(len(tree)):
82 if i+1 >= len(tree) or tree[i+1][0] > startofs:
86 # iterate through what's left
87 for i in xrange(first, len(tree)):
88 (ofs,isdir,sha) = tree[i]
89 skipmore = startofs-ofs
93 for b in _chunkiter(sha, skipmore):
96 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
99 class _ChunkReader(object):
100 def __init__(self, hash, isdir, startofs):
102 self.it = _chunkiter(hash, startofs)
106 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
109 def next(self, size):
111 while len(out) < size:
112 if self.it and not self.blob:
114 self.blob = self.it.next()
115 except StopIteration:
118 want = size - len(out)
119 out += self.blob[:want]
120 self.blob = self.blob[want:]
123 debug2('next(%d) returned %d\n' % (size, len(out)))
128 class _FileReader(object):
129 def __init__(self, hash, size, isdir):
147 def read(self, count = -1):
149 count = self.size - self.ofs
150 if not self.reader or self.reader.ofs != self.ofs:
151 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
153 buf = self.reader.next(count)
156 raise # our offsets will be all screwed up otherwise
165 """Base class for file representation."""
166 def __init__(self, parent, name, mode, hash):
171 self.ctime = self.mtime = self.atime = 0
175 return cmp(a and a.name or None, b and b.name or None)
178 return iter(self.subs())
180 def fullname(self, stop_at=None):
181 """Get this file's full path."""
182 assert(self != stop_at) # would be the empty string; too weird
183 if self.parent and self.parent != stop_at:
184 return os.path.join(self.parent.fullname(stop_at=stop_at),
193 """Get a list of nodes that are contained in this node."""
194 if self._subs == None:
196 return sorted(self._subs.values())
199 """Get node named 'name' that is contained in this node."""
200 if self._subs == None:
202 ret = self._subs.get(name)
204 raise NoSuchFile("no file %r in %r" % (name, self.name))
208 """Return the very top node of the tree."""
210 return self.parent.top()
215 """Return the top node of the particular backup set.
217 If this node isn't inside a backup set, return the root level.
219 if self.parent and not isinstance(self.parent, CommitList):
220 return self.parent.fs_top()
224 def _lresolve(self, parts):
225 #debug2('_lresolve %r in %r\n' % (parts, self.name))
228 (first, rest) = (parts[0], parts[1:])
230 return self._lresolve(rest)
233 raise NoSuchFile("no parent dir for %r" % self.name)
234 return self.parent._lresolve(rest)
236 return self.sub(first)._lresolve(rest)
238 return self.sub(first)
240 def lresolve(self, path, stay_inside_fs=False):
241 """Walk into a given sub-path of this node.
243 If the last element is a symlink, leave it as a symlink, don't resolve
249 if path.startswith('/'):
251 start = self.fs_top()
255 parts = re.split(r'/+', path or '.')
258 #debug2('parts: %r %r\n' % (path, parts))
259 return start._lresolve(parts)
261 def resolve(self, path = ''):
262 """Like lresolve(), and dereference it if it was a symlink."""
263 return self.lresolve(path).lresolve('.')
265 def try_resolve(self, path = ''):
266 """Like resolve(), but don't worry if a symlink uses an invalid path.
268 Returns an error if any intermediate nodes were invalid.
270 n = self.lresolve(path)
278 """Get the number of hard links to the current node."""
279 if self._subs == None:
284 """Get the size of the current node."""
288 """Open the current node. It is an error to open a non-file node."""
289 raise NotFile('%s is not a regular file' % self.name)
293 """A normal file from bup's repository."""
294 def __init__(self, parent, name, mode, hash, bupmode):
295 Node.__init__(self, parent, name, mode, hash)
296 self.bupmode = bupmode
297 self._cached_size = None
298 self._filereader = None
302 # You'd think FUSE might call this only once each time a file is
303 # opened, but no; it's really more of a refcount, and it's called
304 # once per read(). Thus, it's important to cache the filereader
305 # object here so we're not constantly re-seeking.
306 if not self._filereader:
307 self._filereader = _FileReader(self.hash, self.size(),
308 self.bupmode == git.BUP_CHUNKED)
309 self._filereader.seek(0)
310 return self._filereader
313 """Get this file's size."""
314 if self._cached_size == None:
315 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
316 if self.bupmode == git.BUP_CHUNKED:
317 self._cached_size = _total_size(self.hash)
319 self._cached_size = _chunk_len(self.hash)
320 debug1('<<<<File.size() done.\n')
321 return self._cached_size
326 """A symbolic link from bup's repository."""
327 def __init__(self, parent, name, hash, bupmode):
328 File.__init__(self, parent, name, 0120000, hash, bupmode)
331 """Get the file size of the file at which this link points."""
332 return len(self.readlink())
335 """Get the path that this link points at."""
336 return ''.join(cp().join(self.hash.encode('hex')))
338 def dereference(self):
339 """Get the node that this link points at.
341 If the path is invalid, raise a NoSuchFile exception. If the level of
342 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
347 raise TooManySymlinks('too many levels of symlinks: %r'
352 return self.parent.lresolve(self.readlink(),
355 raise NoSuchFile("%s: broken symlink to %r"
356 % (self.fullname(), self.readlink()))
360 def _lresolve(self, parts):
361 return self.dereference()._lresolve(parts)
364 class FakeSymlink(Symlink):
365 """A symlink that is not stored in the bup repository."""
366 def __init__(self, parent, name, toname):
367 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
371 """Get the path that this link points at."""
376 """A directory stored inside of bup's repository."""
379 it = cp().get(self.hash.encode('hex'))
383 it = cp().get(self.hash.encode('hex') + ':')
385 assert(type == 'tree')
386 for (mode,mangled_name,sha) in git.treeparse(''.join(it)):
389 (name,bupmode) = git.demangle_name(mangled_name)
390 if bupmode == git.BUP_CHUNKED:
392 if stat.S_ISDIR(mode):
393 self._subs[name] = Dir(self, name, mode, sha)
394 elif stat.S_ISLNK(mode):
395 self._subs[name] = Symlink(self, name, sha, bupmode)
397 self._subs[name] = File(self, name, mode, sha, bupmode)
400 class CommitList(Node):
401 """A reverse-chronological list of commits on a branch in bup's repository.
403 Represents each commit as a directory and a symlink that points to the
404 directory. The symlink is named after the date. Prepends a dot to each hash
405 to make commits look like hidden directories.
407 def __init__(self, parent, name, hash):
408 Node.__init__(self, parent, name, 040000, hash)
412 revs = list(git.rev_list(self.hash.encode('hex')))
413 for (date, commit) in revs:
414 l = time.localtime(date)
415 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
416 commithex = '.' + commit.encode('hex')
417 n1 = Dir(self, commithex, 040000, commit)
418 n2 = FakeSymlink(self, ls, commithex)
419 n1.ctime = n1.mtime = n2.ctime = n2.mtime = date
420 self._subs[commithex] = n1
424 (date, commit) = latest
425 commithex = '.' + commit.encode('hex')
426 n2 = FakeSymlink(self, 'latest', commithex)
427 n2.ctime = n2.mtime = date
428 self._subs['latest'] = n2
432 """A list of branches in bup's repository.
434 The sub-nodes of the ref list are a series of CommitList for each commit
435 hash pointed to by a branch.
437 def __init__(self, parent):
438 Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
442 for (name,sha) in git.list_refs():
443 if name.startswith('refs/heads/'):
445 date = git.rev_get_date(sha.encode('hex'))
446 n1 = CommitList(self, name, sha)
447 n1.ctime = n1.mtime = date
448 self._subs[name] = n1