1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
14 """Create a git.CatPipe object or reuse the already existing one."""
20 class NodeError(Exception):
21 """VFS base exception."""
24 class NoSuchFile(NodeError):
25 """Request of a file that does not exist."""
28 class NotDir(NodeError):
29 """Attempt to do a directory action on a file that is not one."""
32 class NotFile(NodeError):
33 """Access to a node that does not represent a file."""
36 class TooManySymlinks(NodeError):
37 """Symlink dereferencing level is too deep."""
42 it = cp().get(hash.encode('hex'))
44 assert(type == 'tree')
45 return git.treeparse(''.join(it))
48 def _tree_decode(hash):
49 tree = [(int(name,16),stat.S_ISDIR(int(mode,8)),sha)
52 assert(tree == list(sorted(tree)))
57 return sum(len(b) for b in cp().join(hash.encode('hex')))
60 def _last_chunk_info(hash):
61 tree = _tree_decode(hash)
63 (ofs,isdir,sha) = tree[-1]
65 (subofs, sublen) = _last_chunk_info(sha)
66 return (ofs+subofs, sublen)
68 return (ofs, _chunk_len(sha))
71 def _total_size(hash):
72 (lastofs, lastsize) = _last_chunk_info(hash)
73 return lastofs + lastsize
76 def _chunkiter(hash, startofs):
78 tree = _tree_decode(hash)
80 # skip elements before startofs
81 for i in xrange(len(tree)):
82 if i+1 >= len(tree) or tree[i+1][0] > startofs:
86 # iterate through what's left
87 for i in xrange(first, len(tree)):
88 (ofs,isdir,sha) = tree[i]
89 skipmore = startofs-ofs
93 for b in _chunkiter(sha, skipmore):
96 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
99 class _ChunkReader(object):
100 def __init__(self, hash, isdir, startofs):
102 self.it = _chunkiter(hash, startofs)
106 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
109 def next(self, size):
111 while len(out) < size:
112 if self.it and not self.blob:
114 self.blob = self.it.next()
115 except StopIteration:
118 want = size - len(out)
119 out += self.blob[:want]
120 self.blob = self.blob[want:]
123 log('next(%d) returned %d\n' % (size, len(out)))
128 class _FileReader(object):
129 def __init__(self, hash, size, isdir):
147 def read(self, count = -1):
149 count = self.size - self.ofs
150 if not self.reader or self.reader.ofs != self.ofs:
151 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
153 buf = self.reader.next(count)
156 raise # our offsets will be all screwed up otherwise
165 """Base class for file representation."""
166 def __init__(self, parent, name, mode, hash):
171 self.ctime = self.mtime = self.atime = 0
175 return cmp(a.name or None, b.name or None)
178 return iter(self.subs())
181 """Get this file's full path."""
183 return os.path.join(self.parent.fullname(), self.name)
191 """Get a list of nodes that are contained in this node."""
192 if self._subs == None:
194 return sorted(self._subs.values())
197 """Get node named 'name' that is contained in this node."""
198 if self._subs == None:
200 ret = self._subs.get(name)
202 raise NoSuchFile("no file %r in %r" % (name, self.name))
206 """Return the very top node of the tree."""
208 return self.parent.top()
213 """Return the top node of the particular backup set.
215 If this node isn't inside a backup set, return the root level.
217 if self.parent and not isinstance(self.parent, CommitList):
218 return self.parent.fs_top()
222 def _lresolve(self, parts):
223 #log('_lresolve %r in %r\n' % (parts, self.name))
226 (first, rest) = (parts[0], parts[1:])
228 return self._lresolve(rest)
231 raise NoSuchFile("no parent dir for %r" % self.name)
232 return self.parent._lresolve(rest)
234 return self.sub(first)._lresolve(rest)
236 return self.sub(first)
238 def lresolve(self, path, stay_inside_fs=False):
239 """Walk into a given sub-path of this node.
241 If the last element is a symlink, leave it as a symlink, don't resolve
247 if path.startswith('/'):
249 start = self.fs_top()
253 parts = re.split(r'/+', path or '.')
256 #log('parts: %r %r\n' % (path, parts))
257 return start._lresolve(parts)
259 def resolve(self, path = ''):
260 """Like lresolve(), and dereference it if it was a symlink."""
261 return self.lresolve(path).lresolve('.')
263 def try_resolve(self, path = ''):
264 """Like resolve(), but don't worry if a symlink uses an invalid path.
266 Returns an error if any intermediate nodes were invalid.
268 n = self.lresolve(path)
276 """Get the number of hard links to the current node."""
277 if self._subs == None:
282 """Get the size of the current node."""
286 """Open the current node. It is an error to open a non-file node."""
287 raise NotFile('%s is not a regular file' % self.name)
291 """A normal file from bup's repository."""
292 def __init__(self, parent, name, mode, hash, bupmode):
293 Node.__init__(self, parent, name, mode, hash)
294 self.bupmode = bupmode
295 self._cached_size = None
296 self._filereader = None
300 # You'd think FUSE might call this only once each time a file is
301 # opened, but no; it's really more of a refcount, and it's called
302 # once per read(). Thus, it's important to cache the filereader
303 # object here so we're not constantly re-seeking.
304 if not self._filereader:
305 self._filereader = _FileReader(self.hash, self.size(),
306 self.bupmode == git.BUP_CHUNKED)
307 self._filereader.seek(0)
308 return self._filereader
311 """Get this file's size."""
312 if self._cached_size == None:
313 log('<<<<File.size() is calculating...\n')
314 if self.bupmode == git.BUP_CHUNKED:
315 self._cached_size = _total_size(self.hash)
317 self._cached_size = _chunk_len(self.hash)
318 log('<<<<File.size() done.\n')
319 return self._cached_size
324 """A symbolic link from bup's repository."""
325 def __init__(self, parent, name, hash, bupmode):
326 File.__init__(self, parent, name, 0120000, hash, bupmode)
329 """Get the file size of the file at which this link points."""
330 return len(self.readlink())
333 """Get the path that this link points at."""
334 return ''.join(cp().join(self.hash.encode('hex')))
336 def dereference(self):
337 """Get the node that this link points at.
339 If the path is invalid, raise a NoSuchFile exception. If the level of
340 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
345 raise TooManySymlinks('too many levels of symlinks: %r'
350 return self.parent.lresolve(self.readlink(),
353 raise NoSuchFile("%s: broken symlink to %r"
354 % (self.fullname(), self.readlink()))
358 def _lresolve(self, parts):
359 return self.dereference()._lresolve(parts)
362 class FakeSymlink(Symlink):
363 """A symlink that is not stored in the bup repository."""
364 def __init__(self, parent, name, toname):
365 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
369 """Get the path that this link points at."""
374 """A directory stored inside of bup's repository."""
377 it = cp().get(self.hash.encode('hex'))
381 it = cp().get(self.hash.encode('hex') + ':')
383 assert(type == 'tree')
384 for (mode,mangled_name,sha) in git.treeparse(''.join(it)):
387 (name,bupmode) = git.demangle_name(mangled_name)
388 if bupmode == git.BUP_CHUNKED:
390 if stat.S_ISDIR(mode):
391 self._subs[name] = Dir(self, name, mode, sha)
392 elif stat.S_ISLNK(mode):
393 self._subs[name] = Symlink(self, name, sha, bupmode)
395 self._subs[name] = File(self, name, mode, sha, bupmode)
398 class CommitList(Node):
399 """A reverse-chronological list of commits on a branch in bup's repository.
401 Represents each commit as a directory and a symlink that points to the
402 directory. The symlink is named after the date. Prepends a dot to each hash
403 to make commits look like hidden directories.
405 def __init__(self, parent, name, hash):
406 Node.__init__(self, parent, name, 040000, hash)
410 revs = list(git.rev_list(self.hash.encode('hex')))
411 for (date, commit) in revs:
412 l = time.localtime(date)
413 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
414 commithex = '.' + commit.encode('hex')
415 n1 = Dir(self, commithex, 040000, commit)
416 n2 = FakeSymlink(self, ls, commithex)
417 n1.ctime = n1.mtime = n2.ctime = n2.mtime = date
418 self._subs[commithex] = n1
422 (date, commit) = latest
423 commithex = '.' + commit.encode('hex')
424 n2 = FakeSymlink(self, 'latest', commithex)
425 n2.ctime = n2.mtime = date
426 self._subs['latest'] = n2
430 """A list of branches in bup's repository.
432 The sub-nodes of the ref list are a series of CommitList for each commit
433 hash pointed to by a branch.
435 def __init__(self, parent):
436 Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
440 for (name,sha) in git.list_refs():
441 if name.startswith('refs/heads/'):
443 date = git.rev_get_date(sha.encode('hex'))
444 n1 = CommitList(self, name, sha)
445 n1.ctime = n1.mtime = date
446 self._subs[name] = n1