1 """Virtual File System representing bup's repository contents.
3 The vfs.py library makes it possible to expose contents from bup's repository
4 and abstracts internal name mangling and storage from the exposition layer.
6 import os, re, stat, time
7 from bup import git, metadata
9 from bup.git import BUP_NORMAL, BUP_CHUNKED
10 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
16 """Create a git.CatPipe object or reuse the already existing one."""
22 class NodeError(Exception):
23 """VFS base exception."""
26 class NoSuchFile(NodeError):
27 """Request of a file that does not exist."""
30 class NotDir(NodeError):
31 """Attempt to do a directory action on a file that is not one."""
34 class NotFile(NodeError):
35 """Access to a node that does not represent a file."""
38 class TooManySymlinks(NodeError):
39 """Symlink dereferencing level is too deep."""
44 it = cp().get(hash.encode('hex'))
46 assert(type == 'tree')
47 return git.tree_decode(''.join(it))
50 def _tree_decode(hash):
51 tree = [(int(name,16),stat.S_ISDIR(mode),sha)
54 assert(tree == list(sorted(tree)))
59 return sum(len(b) for b in cp().join(hash.encode('hex')))
62 def _last_chunk_info(hash):
63 tree = _tree_decode(hash)
65 (ofs,isdir,sha) = tree[-1]
67 (subofs, sublen) = _last_chunk_info(sha)
68 return (ofs+subofs, sublen)
70 return (ofs, _chunk_len(sha))
73 def _total_size(hash):
74 (lastofs, lastsize) = _last_chunk_info(hash)
75 return lastofs + lastsize
78 def _chunkiter(hash, startofs):
80 tree = _tree_decode(hash)
82 # skip elements before startofs
83 for i in xrange(len(tree)):
84 if i+1 >= len(tree) or tree[i+1][0] > startofs:
88 # iterate through what's left
89 for i in xrange(first, len(tree)):
90 (ofs,isdir,sha) = tree[i]
91 skipmore = startofs-ofs
95 for b in _chunkiter(sha, skipmore):
98 yield ''.join(cp().join(sha.encode('hex')))[skipmore:]
102 def __init__(self, hash, isdir, startofs):
104 self.it = _chunkiter(hash, startofs)
108 self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:]
111 def next(self, size):
113 while len(out) < size:
114 if self.it and not self.blob:
116 self.blob = self.it.next()
117 except StopIteration:
120 want = size - len(out)
121 out += self.blob[:want]
122 self.blob = self.blob[want:]
125 debug2('next(%d) returned %d\n' % (size, len(out)))
130 class _FileReader(object):
131 def __init__(self, hash, size, isdir):
149 def read(self, count = -1):
151 count = self.size - self.ofs
152 if not self.reader or self.reader.ofs != self.ofs:
153 self.reader = _ChunkReader(self.hash, self.isdir, self.ofs)
155 buf = self.reader.next(count)
158 raise # our offsets will be all screwed up otherwise
167 """Base class for file representation."""
168 def __init__(self, parent, name, mode, hash):
173 self.ctime = self.mtime = self.atime = 0
175 self._metadata = None
178 return "<%s object at %s - name:%r hash:%s parent:%r>" \
179 % (self.__class__, hex(id(self)),
180 self.name, self.hash.encode('hex'),
181 self.parent.name if self.parent else None)
186 return (cmp(a and a.parent, b and b.parent) or
187 cmp(a and a.name, b and b.name))
190 return iter(self.subs())
192 def fullname(self, stop_at=None):
193 """Get this file's full path."""
194 assert(self != stop_at) # would be the empty string; too weird
195 if self.parent and self.parent != stop_at:
196 return os.path.join(self.parent.fullname(stop_at=stop_at),
205 """Get a list of nodes that are contained in this node."""
206 if self._subs == None:
208 return sorted(self._subs.values())
211 """Get node named 'name' that is contained in this node."""
212 if self._subs == None:
214 ret = self._subs.get(name)
216 raise NoSuchFile("no file %r in %r" % (name, self.name))
220 """Return the very top node of the tree."""
222 return self.parent.top()
227 """Return the top node of the particular backup set.
229 If this node isn't inside a backup set, return the root level.
231 if self.parent and not isinstance(self.parent, CommitList):
232 return self.parent.fs_top()
236 def _lresolve(self, parts):
237 #debug2('_lresolve %r in %r\n' % (parts, self.name))
240 (first, rest) = (parts[0], parts[1:])
242 return self._lresolve(rest)
245 raise NoSuchFile("no parent dir for %r" % self.name)
246 return self.parent._lresolve(rest)
248 return self.sub(first)._lresolve(rest)
250 return self.sub(first)
252 def lresolve(self, path, stay_inside_fs=False):
253 """Walk into a given sub-path of this node.
255 If the last element is a symlink, leave it as a symlink, don't resolve
261 if path.startswith('/'):
263 start = self.fs_top()
267 parts = re.split(r'/+', path or '.')
270 #debug2('parts: %r %r\n' % (path, parts))
271 return start._lresolve(parts)
273 def resolve(self, path = ''):
274 """Like lresolve(), and dereference it if it was a symlink."""
275 return self.lresolve(path).lresolve('.')
277 def try_resolve(self, path = ''):
278 """Like resolve(), but don't worry if a symlink uses an invalid path.
280 Returns an error if any intermediate nodes were invalid.
282 n = self.lresolve(path)
290 """Get the number of hard links to the current node."""
291 if self._subs == None:
296 """Get the size of the current node."""
300 """Open the current node. It is an error to open a non-file node."""
301 raise NotFile('%s is not a regular file' % self.name)
303 def _populate_metadata(self, force=False):
304 # Only Dirs contain .bupm files, so by default, do nothing.
308 """Return this Node's Metadata() object, if any."""
309 if not self._metadata and self.parent:
310 self.parent._populate_metadata(force=True)
311 return self._metadata
314 """Release resources that can be automatically restored (at a cost)."""
315 self._metadata = None
320 """A normal file from bup's repository."""
321 def __init__(self, parent, name, mode, hash, bupmode):
322 Node.__init__(self, parent, name, mode, hash)
323 self.bupmode = bupmode
324 self._cached_size = None
325 self._filereader = None
329 # You'd think FUSE might call this only once each time a file is
330 # opened, but no; it's really more of a refcount, and it's called
331 # once per read(). Thus, it's important to cache the filereader
332 # object here so we're not constantly re-seeking.
333 if not self._filereader:
334 self._filereader = _FileReader(self.hash, self.size(),
335 self.bupmode == git.BUP_CHUNKED)
336 self._filereader.seek(0)
337 return self._filereader
340 """Get this file's size."""
341 if self._cached_size == None:
342 debug1('<<<<File.size() is calculating (for %r)...\n' % self.name)
343 if self.bupmode == git.BUP_CHUNKED:
344 self._cached_size = _total_size(self.hash)
346 self._cached_size = _chunk_len(self.hash)
347 debug1('<<<<File.size() done.\n')
348 return self._cached_size
353 """A symbolic link from bup's repository."""
354 def __init__(self, parent, name, hash, bupmode):
355 File.__init__(self, parent, name, 0120000, hash, bupmode)
358 """Get the file size of the file at which this link points."""
359 return len(self.readlink())
362 """Get the path that this link points at."""
363 return ''.join(cp().join(self.hash.encode('hex')))
365 def dereference(self):
366 """Get the node that this link points at.
368 If the path is invalid, raise a NoSuchFile exception. If the level of
369 indirection of symlinks is 100 levels deep, raise a TooManySymlinks
374 raise TooManySymlinks('too many levels of symlinks: %r'
379 return self.parent.lresolve(self.readlink(),
382 raise NoSuchFile("%s: broken symlink to %r"
383 % (self.fullname(), self.readlink()))
387 def _lresolve(self, parts):
388 return self.dereference()._lresolve(parts)
391 class FakeSymlink(Symlink):
392 """A symlink that is not stored in the bup repository."""
393 def __init__(self, parent, name, toname):
394 Symlink.__init__(self, parent, name, EMPTY_SHA, git.BUP_NORMAL)
398 """Get the path that this link points at."""
403 """A directory stored inside of bup's repository."""
405 def __init__(self, *args, **kwargs):
406 Node.__init__(self, *args, **kwargs)
409 def _populate_metadata(self, force=False):
410 if self._metadata and not force:
416 meta_stream = self._bupm.open()
417 dir_meta = metadata.Metadata.read(meta_stream)
419 if not stat.S_ISDIR(sub.mode):
420 sub._metadata = metadata.Metadata.read(meta_stream)
421 self._metadata = dir_meta
425 it = cp().get(self.hash.encode('hex'))
429 it = cp().get(self.hash.encode('hex') + ':')
431 assert(type == 'tree')
432 for (mode,mangled_name,sha) in git.tree_decode(''.join(it)):
433 if mangled_name == '.bupm':
434 bupmode = stat.S_ISDIR(mode) and BUP_CHUNKED or BUP_NORMAL
435 self._bupm = File(self, mangled_name, GIT_MODE_FILE, sha,
439 (name,bupmode) = git.demangle_name(mangled_name)
440 if bupmode == git.BUP_CHUNKED:
442 if stat.S_ISDIR(mode):
443 self._subs[name] = Dir(self, name, mode, sha)
444 elif stat.S_ISLNK(mode):
445 self._subs[name] = Symlink(self, name, sha, bupmode)
447 self._subs[name] = File(self, name, mode, sha, bupmode)
450 """Return this Dir's Metadata() object, if any."""
451 self._populate_metadata()
452 return self._metadata
454 def metadata_file(self):
455 """Return this Dir's .bupm File, if any."""
461 """Release restorable resources held by this node."""
463 super(Dir, self).release()
466 class CommitDir(Node):
467 """A directory that contains all commits that are reachable by a ref.
469 Contains a set of subdirectories named after the commits' first byte in
470 hexadecimal. Each of those directories contain all commits with hashes that
471 start the same as the directory name. The name used for those
472 subdirectories is the hash of the commit without the first byte. This
473 separation helps us avoid having too much directories on the same level as
474 the number of commits grows big.
476 def __init__(self, parent, name):
477 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
481 refs = git.list_refs()
483 #debug2('ref name: %s\n' % ref[0])
484 revs = git.rev_list(ref[1].encode('hex'))
485 for (date, commit) in revs:
486 #debug2('commit: %s date: %s\n' % (commit.encode('hex'), date))
487 commithex = commit.encode('hex')
488 containername = commithex[:2]
489 dirname = commithex[2:]
490 n1 = self._subs.get(containername)
492 n1 = CommitList(self, containername)
493 self._subs[containername] = n1
495 if n1.commits.get(dirname):
496 # Stop work for this ref, the rest should already be present
499 n1.commits[dirname] = (commit, date)
502 class CommitList(Node):
503 """A list of commits with hashes that start with the current node's name."""
504 def __init__(self, parent, name):
505 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
510 for (name, (hash, date)) in self.commits.items():
511 n1 = Dir(self, name, GIT_MODE_TREE, hash)
512 n1.ctime = n1.mtime = date
513 self._subs[name] = n1
517 """A directory that contains all tags in the repository."""
518 def __init__(self, parent, name):
519 Node.__init__(self, parent, name, GIT_MODE_TREE, EMPTY_SHA)
523 for (name, sha) in git.list_refs():
524 if name.startswith('refs/tags/'):
526 date = git.rev_get_date(sha.encode('hex'))
527 commithex = sha.encode('hex')
528 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
529 tag1 = FakeSymlink(self, name, target)
530 tag1.ctime = tag1.mtime = date
531 self._subs[name] = tag1
534 class BranchList(Node):
535 """A list of links to commits reachable by a branch in bup's repository.
537 Represents each commit as a symlink that points to the commit directory in
538 /.commit/??/ . The symlink is named after the commit date.
540 def __init__(self, parent, name, hash):
541 Node.__init__(self, parent, name, GIT_MODE_TREE, hash)
548 revs = list(git.rev_list(self.hash.encode('hex')))
550 for (date, commit) in revs:
551 l = time.localtime(date)
552 ls = time.strftime('%Y-%m-%d-%H%M%S', l)
553 commithex = commit.encode('hex')
554 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
555 n1 = FakeSymlink(self, ls, target)
556 n1.ctime = n1.mtime = date
559 for tag in tags.get(commit, []):
560 t1 = FakeSymlink(self, tag, target)
561 t1.ctime = t1.mtime = date
564 (date, commit) = latest
565 commithex = commit.encode('hex')
566 target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
567 n1 = FakeSymlink(self, 'latest', target)
568 n1.ctime = n1.mtime = date
569 self._subs['latest'] = n1
573 """A list of branches in bup's repository.
575 The sub-nodes of the ref list are a series of CommitList for each commit
576 hash pointed to by a branch.
578 Also, a special sub-node named '.commit' contains all commit directories
579 that are reachable via a ref (e.g. a branch). See CommitDir for details.
581 def __init__(self, parent):
582 Node.__init__(self, parent, '/', GIT_MODE_TREE, EMPTY_SHA)
587 commit_dir = CommitDir(self, '.commit')
588 self._subs['.commit'] = commit_dir
590 tag_dir = TagDir(self, '.tag')
591 self._subs['.tag'] = tag_dir
593 for (name,sha) in git.list_refs():
594 if name.startswith('refs/heads/'):
596 date = git.rev_get_date(sha.encode('hex'))
597 n1 = BranchList(self, name, sha)
598 n1.ctime = n1.mtime = date
599 self._subs[name] = n1