]> arthur.barton.de Git - bup.git/commitdiff
cmd-ls and cmd-fuse: toys for browsing your available backups.
authorAvery Pennarun <apenwarr@gmail.com>
Wed, 27 Jan 2010 00:30:30 +0000 (19:30 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Wed, 27 Jan 2010 04:34:21 +0000 (23:34 -0500)
'bup ls' lets you browse the set of backups on your current system.  It's a
bit useless, so it might go away or be rewritten eventually.

'bup fuse' is a simple read-only FUSE filesystem that lets you mount your
backup sets as a filesystem (on Linux only).  You can then export this
filesystem over samba or NFS or whatever, and people will be able to restore
their own files from backups.

Warning: we still don't support file metadata in 'bup save', so all the file
permissions will be wrong (and users will probably be able to see things
they shouldn't!).  Also, anything that has been split into chunks will show
you the chunks instead of the full file, which is a bit silly.  There are
also tons of places where performance could be improved.

But it's a pretty neat toy nevertheless.  To try it out:

   mkdir /tmp/backups
   sudo bup fuse /tmp/backups

Makefile
cmd-fuse.py [new file with mode: 0755]
cmd-ls.py [new file with mode: 0755]
git.py

index 2723930586eaa4b6ddead0530101b9fcdb25b707..568ececc5056ba2e2d8a8f9c850fbdf9d38e01b9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ endif
 default: all
 
 all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
-       bup-midx \
+       bup-midx bup-fuse bup-ls \
        bup memtest randomgen$(EXT) _hashsplit$(SOEXT)
 
 randomgen$(EXT): randomgen.o
diff --git a/cmd-fuse.py b/cmd-fuse.py
new file mode 100755 (executable)
index 0000000..b69d5cb
--- /dev/null
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+import sys, os, stat, errno, fuse, re, time, tempfile
+import options, git
+from helpers import *
+
+
+def namesplit(path):
+    l = path.split('/', 3)
+    ref = None
+    date = None
+    dir = None
+    assert(l[0] == '')
+    if len(l) > 1:
+        ref = l[1] or None
+    if len(l) > 2:
+        date = l[2]
+    if len(l) > 3:
+        dir = l[3]
+    return (ref, date, dir)
+
+
+# FIXME: iterating through a file just to check its size is super slow!
+def sz(it):
+    count = 0
+    for d in it:
+        count += len(d)
+    return count
+
+
+def date_to_commit(ref, datestr):
+    dates = dates_for_ref(ref)
+    dates.sort(reverse=True)
+    try:
+        dp = time.strptime(datestr, '%Y-%m-%d-%H%M%S')
+    except ValueError:
+        dp = time.strptime(datestr, '%Y-%m-%d')
+    dt = time.mktime(dp)
+    commit = None
+    for (d,commit) in dates:
+        if d <= dt: break
+    assert(commit)
+    return commit
+
+
+refdates = {}
+def dates_for_ref(ref):
+    dates = refdates.get(ref)
+    if not dates:
+        dates = refdates[ref] = list(git.rev_list(ref))
+        dates.sort()
+    return dates
+
+
+class Stat(fuse.Stat):
+    def __init__(self):
+        self.st_mode = 0
+        self.st_ino = 0
+        self.st_dev = 0
+        self.st_nlink = 0
+        self.st_uid = 0
+        self.st_gid = 0
+        self.st_size = 0
+        self.st_atime = 0
+        self.st_mtime = 0
+        self.st_ctime = 0
+
+
+statcache = {}
+filecache = {}
+
+
+class BupFs(fuse.Fuse):
+    def getattr(self, path):
+        log('--getattr(%r)\n' % path)
+        sc = statcache.get(path)
+        if sc:
+            return sc
+        (ref,date,filename) = namesplit(path)
+        if not ref:
+            st = Stat()
+            st.st_mode = stat.S_IFDIR | 0755
+            st.st_nlink = 1  # FIXME
+            statcache[path] = st
+            return st
+        elif not date or not filename:
+            st = Stat()
+            try:
+                git.read_ref(ref)
+            except git.GitError:
+                pass
+            st.st_mode = stat.S_IFDIR | 0755
+            st.st_nlink = 1  # FIXME
+            statcache[path] = st
+            return st
+        else:
+            st = Stat()
+            commit = date_to_commit(ref, date)
+            (dir,name) = os.path.split(filename)
+            it = cp.get('%s:%s' % (commit.encode('hex'), dir))
+            type = it.next()
+            if type == 'tree':
+                for (mode,n,sha) in git._treeparse(''.join(it)):
+                    if n == name:
+                        st.st_mode = int(mode, 8)
+                        st.st_nlink = 1  # FIXME
+                        if stat.S_ISDIR(st.st_mode):
+                            st.st_size = 1024
+                        else:
+                            fileid = '%s:%s' % (commit.encode('hex'), filename)
+                            st.st_size = sz(cp.join(fileid))
+                        statcache[path] = st
+                        return st
+        return -errno.ENOENT
+
+    def readdir(self, path, offset):
+        log('--readdir(%r)\n' % path)
+        yield fuse.Direntry('.')
+        yield fuse.Direntry('..')
+        (ref,date,dir) = namesplit(path)
+        if not ref:
+            for (name,sha) in git.list_refs():
+                name = re.sub('^refs/heads/', '', name)
+                yield fuse.Direntry(name)
+        elif not date:
+            dates = dates_for_ref(ref)
+            for (date,commit) in dates:
+                l = time.localtime(date)
+                yield fuse.Direntry(time.strftime('%Y-%m-%d-%H%M%S', l))
+        else:
+            commit = date_to_commit(ref, date)
+            it = cp.get('%s:%s' % (commit.encode('hex'), dir or ''))
+            type = it.next()
+            if type == 'tree':
+                for (mode,n,sha) in git._treeparse(''.join(it)):
+                    yield fuse.Direntry(n)
+
+    def readlink(self, path):
+        log('--readlink(%r)\n' % path)
+        self.open(path, os.O_RDONLY)  # FIXME: never released
+        return self.read(path, 10000, 0)
+
+    def open(self, path, flags):
+        log('--open(%r)\n' % path)
+        (ref,date,dir) = namesplit(path)
+        if not dir:
+            return -errno.ENOENT
+        commit = date_to_commit(ref, date)
+        try:
+            it = cp.get('%s:%s' % (commit.encode('hex'), dir or ''))
+        except KeyError:
+            return -errno.ENOENT
+        type = it.next()
+        if type != 'blob':
+            return -errno.EINVAL
+        accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
+        if (flags & accmode) != os.O_RDONLY:
+            return -errno.EACCES
+
+        f = tempfile.TemporaryFile()
+        for blob in it:
+            f.write(blob)
+        f.flush()
+        filecache[path] = f
+
+    def release(self, path, flags):
+        log('--release(%r)\n' % path)
+        del filecache[path]
+
+    def read(self, path, size, offset):
+        log('--read(%r)\n' % path)
+        f = filecache.get(path)
+        if not f:
+            return -errno.ENOENT
+        f.seek(offset)
+        return f.read(size)
+
+
+if not hasattr(fuse, '__version__'):
+    raise RuntimeError, "your fuse module is too old for fuse.__version__"
+fuse.fuse_python_api = (0, 2)
+
+optspec = """
+bup fuse [mountpoint]
+--
+d,debug   increase debug level
+f,foreground  run in foreground
+"""
+o = options.Options('bup fuse', optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+if len(extra) != 1:
+    log("bup fuse: exactly one argument expected\n")
+    o.usage()
+
+f = BupFs()
+f.fuse_args.mountpoint = extra[0]
+if opt.debug:
+    f.fuse_args.add('debug')
+if opt.foreground:
+    f.fuse_args.setmod('foreground')
+f.fuse_args.add('allow_other')
+
+git.check_repo_or_die()
+cp = git.CatPipe()
+f.main()
diff --git a/cmd-ls.py b/cmd-ls.py
new file mode 100755 (executable)
index 0000000..a1a22ac
--- /dev/null
+++ b/cmd-ls.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+import sys, os, re, time
+import options, git
+
+
+def namesplit(path):
+    path = re.sub(r'/+', '/', path)
+    while 1:
+        p2 = re.sub(r'/[^/]+/\.\.(/|$)', '/', path)  # handle ../ notation
+        if p2 == path: break
+        path = p2
+    l = path.split('/', 3)
+    ref = None
+    date = None
+    dir = None
+    assert(l[0] == '')
+    if len(l) > 1:
+        ref = l[1] or None
+    if len(l) > 2:
+        date = l[2]
+    if len(l) > 3:
+        dir = l[3]
+    return (ref, date, dir)
+
+
+optspec = """
+bup ls <dirs...>
+--
+s,hash   show hash for each file
+"""
+o = options.Options('bup ls', optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+git.check_repo_or_die()
+
+if not extra:
+    extra = ['/']
+
+for d in extra:
+    (ref, date, path) = namesplit(d)
+    if not ref:
+        for (name,sha) in git.list_refs():
+            name = re.sub('^refs/heads/', '', name)
+            if opt.hash:
+                print '%s %s' % (sha.encode('hex'), name)
+            else:
+                print name
+    elif not date:
+        dates = list(git.rev_list(ref))
+        dates.sort()
+        for (date,commit) in dates:
+            l = time.localtime(date)
+            print repr((time.strftime('%Y-%m-%d-%H%M%S', l),commit))
+    else:
+        dates = list(git.rev_list(ref))
+        dates.sort(reverse=True)
+        try:
+            dp = time.strptime(date, '%Y-%m-%d-%H%M%S')
+        except ValueError:
+            dp = time.strptime(date, '%Y-%m-%d')
+        dt = time.mktime(dp)
+        commit = None
+        for (d,commit) in dates:
+            if d <= dt: break
+        assert(commit)
+        it = cp.get('%s:%s' % (commit.encode('hex'), path or ''))
+        type = it.next()
+        if type == 'tree':
+            for (mode,name,sha) in git._treeparse(''.join(it)):
+                if opt.hash:
+                    print '%s %s' % (sha.encode('hex'), name)
+                else:
+                    print name
+        else:
+            (dir,name) = os.path.split(path)
+            if opt.hash:
+                print '%s %s' % ('?', name)  # FIXME
+            else:
+                print name
diff --git a/git.py b/git.py
index a29266bc729dfb568c52070ae801ec2f3749392c..998bc3c257609650bf490f55161882324907691c 100644 (file)
--- a/git.py
+++ b/git.py
@@ -417,20 +417,47 @@ def _gitenv():
     os.environ['GIT_DIR'] = os.path.abspath(repo())
 
 
-def read_ref(refname):
-    p = subprocess.Popen(['git', 'show-ref', '--', refname],
-                         preexec_fn = _gitenv,
-                         stdout = subprocess.PIPE)
+def list_refs(refname = None):
+    argv = ['git', 'show-ref', '--']
+    if refname:
+        argv += [refname]
+    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
     out = p.stdout.read().strip()
     rv = p.wait()  # not fatal
     if rv:
         assert(not out)
     if out:
-        return out.split()[0].decode('hex')
+        for d in out.split('\n'):
+            (sha, name) = d.split(' ', 1)
+            yield (name, sha.decode('hex'))
+
+
+def read_ref(refname):
+    l = list(list_refs(refname))
+    if l:
+        assert(len(l) == 1)
+        return l[0][1]
     else:
         return None
 
 
+def rev_list(ref):
+    assert(not ref.startswith('-'))
+    argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
+    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+    commit = None
+    for row in p.stdout:
+        s = row.strip()
+        if s.startswith('commit '):
+            commit = s[7:].decode('hex')
+        else:
+            date = int(s)
+            yield (date, commit)
+    rv = p.wait()  # not fatal
+    if rv:
+        raise GitError, 'git rev-list returned error %d' % rv
+
+
 def update_ref(refname, newval, oldval):
     if not oldval:
         oldval = ''
@@ -485,6 +512,7 @@ def _treeparse(buf):
         ofs += z+1+20
         yield (spl[0], spl[1], sha)
 
+
 _ver = None
 def ver():
     global _ver
@@ -542,14 +570,22 @@ class CatPipe:
         self.p.stdin.write('%s\n' % id)
         hdr = self.p.stdout.readline()
         if hdr.endswith(' missing\n'):
-            raise GitError('blob %r is missing' % id)
+            raise KeyError('blob %r is missing' % id)
         spl = hdr.split(' ')
         if len(spl) != 3 or len(spl[0]) != 40:
             raise GitError('expected blob, got %r' % spl)
         (hex, type, size) = spl
-        yield type
-        for blob in chunkyreader(self.p.stdout, int(spl[2])):
-            yield blob
+        it = iter(chunkyreader(self.p.stdout, int(spl[2])))
+        try:
+            yield type
+            for blob in it:
+                yield blob
+        finally:
+            try:
+                while 1:
+                    it.next()
+            except StopIteration:
+                pass
         assert(self.p.stdout.readline() == '\n')
 
     def _slow_get(self, id):