]> arthur.barton.de Git - bup.git/commitdiff
Merge branches 'gf/ls', 'gf/tag', 'zz/import-rsnapshot' and 'bl/selfindex' bup-0.21-rc1
authorAvery Pennarun <apenwarr@gmail.com>
Mon, 3 Jan 2011 04:58:35 +0000 (20:58 -0800)
committerAvery Pennarun <apenwarr@gmail.com>
Mon, 3 Jan 2011 04:58:35 +0000 (20:58 -0800)
* gf/ls:
  ls-cmd: hide files with a leading dot by default

* gf/tag:
  Refuse branch/tag names that start with a dot
  tag-cmd: Some fixups

* zz/import-rsnapshot:
  Adds a testcase for import-rsnapshot.
  Makes import-rsnapshot use save's -f option.
  Adds -f option to save to use a given indexfile.
  Makefile: handle shell commands (cmd/*-cmd.sh)
  Adds documentation for bup-import-rsnapshot
  Adds import-rsnapshot command.
  Adds documentation for save's strip option.
  Adds testcases for --strip and --strip-path.
  Adds a strip and strip-path option to bup save.

* bl/selfindex:
  Rename receive-objects command to receive-objects-v2.
  Write idxs directly rather than using git-index-pack.
  Send SHAs from the client to reduce server load
  Use chunkyreader() instead of manually reading multiple blocks.

16 files changed:
Documentation/bup-import-rsnapshot.md [new file with mode: 0644]
Documentation/bup-save.md
Makefile
cmd/import-rsnapshot-cmd.sh [new file with mode: 0755]
cmd/ls-cmd.py
cmd/save-cmd.py
cmd/server-cmd.py
cmd/split-cmd.py
cmd/tag-cmd.py
lib/bup/client.py
lib/bup/git.py
lib/bup/helpers.py
lib/bup/t/tgit.py
lib/bup/t/thelpers.py
lib/bup/vfs.py
t/test.sh

diff --git a/Documentation/bup-import-rsnapshot.md b/Documentation/bup-import-rsnapshot.md
new file mode 100644 (file)
index 0000000..4a0214f
--- /dev/null
@@ -0,0 +1,33 @@
+% bup-import-rsnapshot(1) Bup %BUP_VERSION%
+% Zoran Zaric <zz@zoranzaric.de>
+% %BUP_DATE%
+
+# NAME
+
+bup-import-rsnapshot - import a rsnapshot archive
+
+# SYNOPSIS
+
+bup import-rsnapshot [-n] <path to snapshot_root> [<backuptarget>]
+
+# SYNOPSIS
+
+`bup import-rsnapshot` imports a rsnapshot archive. The
+timestamps for the backups are preserved and the path to
+the rsnapshot archive is stripped from the paths.
+
+`bup import-rsnapshot` either imports the whole archive
+or only imports all backups for a given backuptarget.
+
+# OPTIONS
+
+-n,--dry-rung
+:   don't do anything just print out what would be done
+
+# EXAMPLE
+
+    $ bup import-rsnapshot /.snapshots
+
+# BUP
+
+Part of the `bup`(1) suite.
index 9471474cc8a4b29f0cd7ee535b63bb1f3a0310c4..638e5019d2710204e1ffb6f8e9b3d9830dea27b2 100644 (file)
@@ -8,8 +8,8 @@ bup-save - create a new bup backup set
 
 # SYNOPSIS
 
-bup save [-r *host*:*path*] <-t|-c|-n *name*> [-v] [-q]
-  [--smaller=*maxsize*] <paths...>
+bup save [-r *host*:*path*] <-t|-c|-n *name*> [-f *indexfile*]
+[-v] [-q] [--smaller=*maxsize*] <paths...>
 
 # DESCRIPTION
 
@@ -45,6 +45,10 @@ for `bup-index`(1).
     the same name, and later view the history of that
     backup set to see how files have changed over time.)
     
+-f, --indexfile=*indexfile*
+:   use a different index filename instead of
+    `~/.bup/bupindex`.
+
 -v, --verbose
 :   increase verbosity (can be used more than once).  With
     one -v, prints every directory name as it gets backed up.  With
@@ -70,6 +74,21 @@ for `bup-index`(1).
     like k, M, or G to specify multiples of 1024,
     1024*1024, 1024*1024*1024 respectively.
     
+--strip
+:   strips the path that is given from all files and directories.
+    
+    A directory */root/chroot/etc* saved with
+    "bup save -n chroot --strip /root/chroot" would be saved
+    as */etc*.
+    
+--strip-prefix=*path-prefix*
+:   strips the given path-prefix *path-prefix* from all
+    files and directories.
+    
+    A directory */root/chroots/webserver* saved with
+    "bup save -n webserver --strip-path=/root/chroots" would
+    be saved as */webserver/etc*
+    
 
 # EXAMPLE
     
index a9c8f04e2acb10ea055202568795af7835c04539..3194a06dde72d05d7293b8fb10caacb3af02c7ae 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -103,7 +103,9 @@ bup: main.py
        rm -f $@
        ln -s $< $@
 
-cmds: $(patsubst cmd/%-cmd.py,cmd/bup-%,$(wildcard cmd/*-cmd.py))
+cmds: \
+    $(patsubst cmd/%-cmd.py,cmd/bup-%,$(wildcard cmd/*-cmd.py)) \
+    $(patsubst cmd/%-cmd.sh,cmd/bup-%,$(wildcard cmd/*-cmd.sh))
 
 cmd/bup-%: cmd/%-cmd.py
        rm -f $@
@@ -117,6 +119,10 @@ bup-%: cmd-%.sh
        rm -f $@
        ln -s $< $@
 
+cmd/bup-%: cmd/%-cmd.sh
+       rm -f $@
+       ln -s $*-cmd.sh $@
+
 %.o: %.c
        gcc -c -o $@ $< $(CPPFLAGS) $(CFLAGS)
        
diff --git a/cmd/import-rsnapshot-cmd.sh b/cmd/import-rsnapshot-cmd.sh
new file mode 100755 (executable)
index 0000000..94db5c1
--- /dev/null
@@ -0,0 +1,72 @@
+#!/bin/sh
+# bup-import-rsnapshot.sh
+
+# Does an import of a rsnapshot archive.
+
+usage() {
+    echo "Usage: bup import-rsnapshot [-n]" \
+        "<path to snapshot_root> [<backuptarget>]"
+    echo "-n,--dry-rung: don't do anything just print out what would be done"
+    exit -1
+}
+
+if [ "$1" = "-n" -o "$1" = "--dry-run" ]; then
+    bup()
+    {
+        echo bup "$@" >&2
+    }
+    shift 1
+elif [ -n "$BUP_MAIN_EXE" ]; then
+    bup()
+    {
+        "$BUP_MAIN_EXE" "$@"
+    }
+else
+    bup()
+    {
+        bup "$@"
+    }
+fi
+
+[ "$#" -eq 1 ] || [ "$#" -eq 2 ] || usage
+
+if [ ! -e "$1/." ]; then
+    echo "$1 isn't a directory!"
+    exit -1
+fi
+
+TARGET=
+[ "$#" -eq 2 ] && TARGET="$2"
+
+
+ABSPATH=`readlink -f "$1"`
+
+for SNAPSHOT in "$ABSPATH/"*; do
+    if [ -e "$SNAPSHOT/." ]; then
+        for BRANCH_PATH in "$SNAPSHOT/"*; do
+            if [ -e "$BRANCH_PATH/." ]; then
+                # Get the snapshot's ctime
+                DATE=`stat -c %Z "$BRANCH_PATH"`
+                BRANCH=`basename "$BRANCH_PATH"`
+                TMPIDX="/tmp/$BRANCH"
+
+                if [ "$TARGET" == "" ] || [ "$TARGET" == "$BRANCH" ]; then
+                    bup index -ux \
+                        -f $TMPIDX \
+                        $BRANCH_PATH/
+                    bup save \
+                        --strip \
+                        --date=$DATE \
+                        -f $TMPIDX \
+                        -n $BRANCH \
+                        $BRANCH_PATH/
+
+                    if [ -e "$TMPIDX" ]; then
+                        rm "$TMPIDX"
+                    fi
+                fi
+            fi
+        done
+    fi
+done
+
index 57f4275cf84a28cf0ebb1270de62f0d77d7f02ea..bbc0432987530946aefdb489b1c749e5b4ad8626 100755 (executable)
@@ -19,6 +19,7 @@ optspec = """
 bup ls <dirs...>
 --
 s,hash   show hash for each file
+a,all    show hidden files
 """
 o = options.Options('bup ls', optspec)
 (opt, flags, extra) = o.parse(sys.argv[1:])
@@ -35,9 +36,11 @@ for d in extra:
         n = top.lresolve(d)
         if stat.S_ISDIR(n.mode):
             for sub in n:
-                print_node(sub.name, sub)
+                if opt.all or not sub.name.startswith('.'):
+                    print_node(sub.name, sub)
         else:
-            print_node(d, n)
+            if opt.all or not sub.name.startswith('.'):
+                print_node(d, n)
     except vfs.NodeError, e:
         log('error: %s\n' % e)
         ret = 1
index df51a6c74934c8aca99631e70122752e9986c815..62c1e426bb5bad665c41004fb9e71af99d1dc2d5 100755 (executable)
@@ -16,6 +16,9 @@ v,verbose  increase log output (can be used more than once)
 q,quiet    don't show progress meter
 smaller=   only back up files smaller than n bytes
 bwlimit=   maximum bytes/sec to transmit to server
+f,indexfile=  the name of the index file (normally BUP_DIR/bupindex)
+strip      strips the path to every filename given
+strip-path= path-prefix to be stripped when saving
 """
 o = options.Options('bup save', optspec)
 (opt, flags, extra) = o.parse(sys.argv[1:])
@@ -36,10 +39,15 @@ if opt.date:
 else:
     date = time.time()
 
+if opt.strip and opt.strip_path:
+    o.fatal("--strip is incompatible with --strip-path")
+
 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
 if is_reverse and opt.remote:
     o.fatal("don't use -r in reverse mode; it's automatic")
 
+if opt.name and opt.name.startswith('.'):
+    o.fatal("'%s' is not a valid branch name" % opt.name)
 refname = opt.name and 'refs/heads/%s' % opt.name or None
 if opt.remote or is_reverse:
     cli = client.Client(opt.remote)
@@ -129,7 +137,9 @@ def vlog(s):
     log(s)
 
 
-r = index.Reader(git.repo('bupindex'))
+indexfile = opt.indexfile or git.repo('bupindex')
+print indexfile
+r = index.Reader(indexfile)
 
 def already_saved(ent):
     return ent.is_valid() and w.exists(ent.sha) and ent.sha
@@ -195,7 +205,13 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
         continue
 
     assert(dir.startswith('/'))
-    dirp = dir.split('/')
+    if opt.strip:
+        stripped_base_path = strip_base_path(dir, extra)
+        dirp = stripped_base_path.split('/')
+    elif opt.strip_path:
+        dirp = strip_path(opt.strip_path, dir).split('/')
+    else:
+        dirp = dir.split('/')
     while parts > dirp:
         _pop(force_tree = None)
     if dir != '/':
@@ -283,7 +299,6 @@ if opt.tree:
     print tree.encode('hex')
 if opt.commit or opt.name:
     msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv
-    ref = opt.name and ('refs/heads/%s' % opt.name) or None
     commit = w.new_commit(oldref, tree, date, msg)
     if opt.commit:
         print commit.encode('hex')
index 299e28dda9a1de76896cda171ffce4a1bdc13db1..e44f302c7d7a7c8332b13141a107374702d1c022 100755 (executable)
@@ -36,7 +36,7 @@ def send_index(conn, name):
     conn.ok()
 
 
-def receive_objects(conn, junk):
+def receive_objects_v2(conn, junk):
     global suspended_w
     git.check_repo_or_die()
     suggested = {}
@@ -67,15 +67,16 @@ def receive_objects(conn, junk):
             conn.ok()
             return
             
+        shar = conn.read(20)
+        crcr = struct.unpack('!I', conn.read(4))[0]
+        n -= 20 + 4
         buf = conn.read(n)  # object sizes in bup are reasonably small
         #debug2('read %d bytes\n' % n)
         if len(buf) < n:
             w.abort()
             raise Exception('object read: expected %d bytes, got %d\n'
                             % (n, len(buf)))
-        (type, content) = git._decode_packobj(buf)
-        sha = git.calc_hash(type, content)
-        oldpack = w.exists(sha)
+        oldpack = w.exists(shar)
         # FIXME: we only suggest a single index per cycle, because the client
         # is currently too dumb to download more than one per cycle anyway.
         # Actually we should fix the client, but this is a minor optimization
@@ -88,7 +89,7 @@ def receive_objects(conn, junk):
             # fix that deficiency of midx files eventually, although it'll
             # make the files bigger.  This method is certainly not very
             # efficient.
-            oldpack = w.objcache.packname_containing(sha)
+            oldpack = w.objcache.packname_containing(shar)
             debug2('new suggestion: %r\n' % oldpack)
             assert(oldpack)
             assert(oldpack != True)
@@ -102,8 +103,16 @@ def receive_objects(conn, junk):
                 conn.write('index %s\n' % name)
                 suggested[name] = 1
         else:
-            w._raw_write([buf])
+            nw, crc = w._raw_write([buf], sha=shar)
+            _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
+            _check(w, n, nw, 'object read: expected %d bytes, got %d\n')
     # NOTREACHED
+    
+
+def _check(w, expected, actual, msg):
+    if expected != actual:
+        w.abort()
+        raise Exception(msg % (expected, actual))
 
 
 def read_ref(conn, refname):
@@ -156,7 +165,7 @@ commands = {
     'set-dir': set_dir,
     'list-indexes': list_indexes,
     'send-index': send_index,
-    'receive-objects': receive_objects,
+    'receive-objects-v2': receive_objects_v2,
     'read-ref': read_ref,
     'update-ref': update_ref,
     'cat': cat,
index 035e8de4b97cf95d6532e5909a9149de8cadc1c2..7ecc5d018cc865a76727580c2c612eb04a2c060e 100755 (executable)
@@ -78,6 +78,8 @@ if is_reverse and opt.remote:
     o.fatal("don't use -r in reverse mode; it's automatic")
 start_time = time.time()
 
+if opt.name and opt.name.startswith('.'):
+    o.fatal("'%s' is not a valid branch name." % opt.name)
 refname = opt.name and 'refs/heads/%s' % opt.name or None
 if opt.noop or opt.copy:
     cli = pack_writer = oldref = None
index 760dce7dd07c1b81d0c17bf4af6e44c81d51ce4c..a624e8b8ace127d1ea434cad72723468448a61c9 100755 (executable)
@@ -35,33 +35,38 @@ if opt.delete:
     try:
         os.unlink(tag_file)
     except OSError, e:
-        log("bup: error: unable to delete tag: %s" % e)
+        log("bup: error: unable to delete tag '%s': %s" % (opt.delete, e))
         sys.exit(1)
 
     sys.exit(0)
 
-tags = []
-for (t, dummy) in git.list_refs():
-    if t.startswith('refs/tags/'):
-        tags.append(t[10:])
+tags = [t for sublist in git.tags().values() for t in sublist]
 
 if not extra:
     for t in tags:
-        log("%s\n" % t)
+        print t
     sys.exit(0)
-elif len(extra) != 2:
-    log('bup: error: no ref or hash given.')
-    sys.exit(1)
+elif len(extra) < 2:
+    o.fatal('no commit ref or hash given.')
 
-tag_name = extra[0]
-commit = extra[1]
-debug1("from args: tag name = %s; commit = %s\n" % (tag_name, commit))
+(tag_name, commit) = extra[:2]
+if not tag_name:
+    o.fatal("tag name must not be empty.")
+debug1("args: tag name = %s; commit = %s\n" % (tag_name, commit))
 
 if tag_name in tags:
     log("bup: error: tag '%s' already exists" % tag_name)
     sys.exit(1)
 
-hash = git.rev_parse(commit)
+if tag_name.startswith('.'):
+    o.fatal("'%s' is not a valid tag name." % tag_name)
+
+try:
+    hash = git.rev_parse(commit)
+except git.GitError, e:
+    log("bup: error: %s" % e)
+    sys.exit(2)
+
 if not hash:
     log("bup: error: commit %s not found." % commit)
     sys.exit(2)
@@ -75,7 +80,7 @@ tag_file = git.repo('refs/tags/%s' % tag_name)
 try:
     tag = file(tag_file, 'w')
 except OSError, e:
-    log('bup: error: could not create tag %s: %s' % (tag_name, e))
+    log("bup: error: could not create tag '%s': %s" % (tag_name, e))
     sys.exit(3)
 
 tag.write(hash.encode('hex'))
index d1fdbbe155851563f026fc03d83e990b20cf1f26..c65ec0cef8caa9e6fbc90ade9446be4a91265dcb 100644 (file)
@@ -1,4 +1,4 @@
-import re, struct, errno, time
+import re, struct, errno, time, zlib
 from bup import git, ssh
 from bup.helpers import *
 
@@ -169,20 +169,20 @@ class Client:
         debug1('client: received index suggestion: %s\n' % indexname)
         ob = self._busy
         if ob:
-            assert(ob == 'receive-objects')
+            assert(ob == 'receive-objects-v2')
             self.conn.write('\xff\xff\xff\xff')  # suspend receive-objects
             self._busy = None
             self.conn.drain_and_check_ok()
         self.sync_index(indexname)
         if ob:
             self._busy = ob
-            self.conn.write('receive-objects\n')
+            self.conn.write('receive-objects-v2\n')
 
     def new_packwriter(self):
         self.check_busy()
         def _set_busy():
-            self._busy = 'receive-objects'
-            self.conn.write('receive-objects\n')
+            self._busy = 'receive-objects-v2'
+            self.conn.write('receive-objects-v2\n')
         return PackWriter_Remote(self.conn,
                                  objcache_maker = self._make_objcache,
                                  suggest_pack = self._suggest_pack,
@@ -271,18 +271,23 @@ class PackWriter_Remote(git.PackWriter):
     def abort(self):
         raise GitError("don't know how to abort remote pack writing")
 
-    def _raw_write(self, datalist):
+    def _raw_write(self, datalist, sha):
         assert(self.file)
         if not self._packopen:
             self._open()
         if self.ensure_busy:
             self.ensure_busy()
         data = ''.join(datalist)
-        assert(len(data))
-        outbuf = struct.pack('!I', len(data)) + data
+        assert(data)
+        assert(sha)
+        crc = zlib.crc32(data) & 0xffffffff
+        outbuf = ''.join((struct.pack('!I', len(data) + 20 + 4),
+                          sha,
+                          struct.pack('!I', crc),
+                          data))
         (self._bwcount, self._bwtime) = \
             _raw_write_bwlimit(self.file, outbuf, self._bwcount, self._bwtime)
-        self.outbytes += len(data)
+        self.outbytes += len(data) - 20 - 4 # Don't count sha1+crc
         self.count += 1
 
         if self.file.has_input():
@@ -292,3 +297,5 @@ class PackWriter_Remote(git.PackWriter):
             if self.suggest_pack:
                 self.suggest_pack(idxname)
                 self.objcache.refresh()
+
+        return sha, crc
index 6a65c435450233702dea291113c417e423c0419f..49e98019c2927c023de97a8683a6f4992c08c518 100644 (file)
@@ -2,8 +2,7 @@
 bup repositories are in Git format. This library allows us to
 interact with the Git data structures.
 """
-import os, zlib, time, subprocess, struct, stat, re, tempfile
-import heapq
+import os, zlib, time, subprocess, struct, stat, re, tempfile, heapq
 from bup.helpers import *
 from bup import _helpers
 
@@ -140,7 +139,7 @@ def _decode_packobj(buf):
 class PackIdx:
     def __init__(self):
         assert(0)
-    
+
     def find_offset(self, hash):
         """Get the offset of an object inside the index file."""
         idx = self._idx_from_hash(hash)
@@ -222,7 +221,7 @@ class PackIdxV2(PackIdx):
         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
         if ofs & 0x80000000:
             idx64 = ofs & 0x7fffffff
-            ofs = struct.unpack('!I',
+            ofs = struct.unpack('!Q',
                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
         return ofs
 
@@ -528,6 +527,7 @@ class PackWriter:
         self.outbytes = 0
         self.filename = None
         self.file = None
+        self.idx = None
         self.objcache_maker = objcache_maker
         self.objcache = None
 
@@ -549,8 +549,11 @@ class PackWriter:
             assert(name.endswith('.pack'))
             self.filename = name[:-5]
             self.file.write('PACK\0\0\0\2\0\0\0\0')
+            self.idx = list(list() for i in xrange(256))
 
-    def _raw_write(self, datalist):
+    # the 'sha' parameter is used in client.py's _raw_write(), but not needed
+    # in this basic version.
+    def _raw_write(self, datalist, sha):
         self._open()
         f = self.file
         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
@@ -560,14 +563,25 @@ class PackWriter:
         # but that's okay because we'll flush it in _end().
         oneblob = ''.join(datalist)
         f.write(oneblob)
-        self.outbytes += len(oneblob)
+        nw = len(oneblob)
+        crc = zlib.crc32(oneblob) & 0xffffffff
+        self._update_idx(sha, crc, nw)
+        self.outbytes += nw
         self.count += 1
+        return nw, crc
+
+    def _update_idx(self, sha, crc, size):
+        assert(sha)
+        if self.idx:
+            self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 
-    def _write(self, bin, type, content):
+    def _write(self, sha, type, content):
         if verbose:
             log('>')
-        self._raw_write(_encode_packobj(type, content))
-        return bin
+        if not sha:
+            sha = calc_hash(type, content)
+        size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
+        return sha
 
     def breakpoint(self):
         """Clear byte and object counts and return the last processed id."""
@@ -587,11 +601,11 @@ class PackWriter:
 
     def maybe_write(self, type, content):
         """Write an object to the pack file if not present and return its id."""
-        bin = calc_hash(type, content)
-        if not self.exists(bin):
-            self._write(bin, type, content)
-            self.objcache.add(bin)
-        return bin
+        sha = calc_hash(type, content)
+        if not self.exists(sha):
+            self._write(sha, type, content)
+            self.objcache.add(sha)
+        return sha
 
     def new_blob(self, blob):
         """Create a blob object in the pack with the supplied content."""
@@ -632,6 +646,7 @@ class PackWriter:
         """Remove the pack file from disk."""
         f = self.file
         if f:
+            self.idx = None
             self.file = None
             f.close()
             os.unlink(self.filename + '.pack')
@@ -641,6 +656,8 @@ class PackWriter:
         if not f: return None
         self.file = None
         self.objcache = None
+        idx = self.idx
+        self.idx = None
 
         # update object count
         f.seek(8)
@@ -651,24 +668,17 @@ class PackWriter:
         # calculate the pack sha1sum
         f.seek(0)
         sum = Sha1()
-        while 1:
-            b = f.read(65536)
+        for b in chunkyreader(f):
             sum.update(b)
-            if not b: break
-        f.write(sum.digest())
-
+        packbin = sum.digest()
+        f.write(packbin)
         f.close()
 
-        p = subprocess.Popen(['git', 'index-pack', '-v',
-                              '--index-version=2',
-                              self.filename + '.pack'],
-                             preexec_fn = _gitenv,
-                             stdout = subprocess.PIPE)
-        out = p.stdout.read().strip()
-        _git_wait('git index-pack', p)
-        if not out:
-            raise GitError('git index-pack produced no output')
-        nameprefix = repo('objects/pack/%s' % out)
+        idx_f = open(self.filename + '.idx', 'wb')
+        obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
+        idx_f.close()
+
+        nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
         if os.path.exists(self.filename + '.map'):
             os.unlink(self.filename + '.map')
         os.rename(self.filename + '.pack', nameprefix + '.pack')
@@ -681,6 +691,44 @@ class PackWriter:
         """Close the pack file and move it to its definitive path."""
         return self._end()
 
+    def _write_pack_idx_v2(self, file, idx, packbin):
+        sum = Sha1()
+
+        def write(data):
+            file.write(data)
+            sum.update(data)
+
+        write('\377tOc\0\0\0\2')
+
+        n = 0
+        for part in idx:
+            n += len(part)
+            write(struct.pack('!i', n))
+            part.sort(key=lambda x: x[0])
+
+        obj_list_sum = Sha1()
+        for part in idx:
+            for entry in part:
+                write(entry[0])
+                obj_list_sum.update(entry[0])
+        for part in idx:
+            for entry in part:
+                write(struct.pack('!I', entry[1]))
+        ofs64_list = []
+        for part in idx:
+            for entry in part:
+                if entry[2] & 0x80000000:
+                    write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
+                    ofs64_list.append(struct.pack('!Q', entry[2]))
+                else:
+                    write(struct.pack('!i', entry[2]))
+        for ofs64 in ofs64_list:
+            write(ofs64)
+
+        write(packbin)
+        file.write(sum.digest())
+        return obj_list_sum.hexdigest()
+
 
 def _git_date(date):
     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
@@ -1031,3 +1079,16 @@ class CatPipe:
                 yield d
         except StopIteration:
             log('booger!\n')
+
+def tags():
+    """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
+    tags = {}
+    for (n,c) in list_refs():
+        if n.startswith('refs/tags/'):
+            name = n[10:]
+            if not c in tags:
+                tags[c] = []
+
+            tags[c].append(name)  # more than one tag can point at 'c'
+
+    return tags
index cb7854338ead99ad13358ad5ec41617dfb31e121..3c78c1890945b96046a9759061564e988b51805b 100644 (file)
@@ -411,6 +411,39 @@ def parse_date_or_fatal(str, fatal):
     else:
         return date
 
+def strip_path(prefix, path):
+    """Strips a given prefix from a path.
+
+    First both paths are normalized.
+
+    Raises an Exception if no prefix is given.
+    """
+    if prefix == None:
+        raise Exception('no path given')
+
+    normalized_prefix = realpath(prefix)
+    print "normalized_prefix: " + normalized_prefix
+    normalized_path = realpath(path)
+    print "normalized_path: " + normalized_path
+    if normalized_path.startswith(normalized_prefix):
+        return normalized_path[len(normalized_prefix):]
+    else:
+        return path
+
+def strip_base_path(path, base_paths):
+    """Strips the base path from a given path.
+
+    Determines the base path for the given string and the strips it
+    using strip_path().
+    Iterates over all base_paths from long to short, to prevent that
+    a too short base_path is removed.
+    """
+    sorted_base_paths = sorted(base_paths, key=len, reverse=True)
+    for bp in sorted_base_paths:
+        if path.startswith(realpath(bp)):
+            return strip_path(bp, path)
+    return path
+
 
 # hashlib is only available in python 2.5 or higher, but the 'sha' module
 # produces a DeprecationWarning in python 2.6 or higher.  We want to support
index c61b351881201b4bee4c2e479056c7a50332b92a..fad720b06e413acf961ff9ee98bda1277b845b24 100644 (file)
@@ -1,4 +1,4 @@
-import time
+import struct, os, tempfile, time
 from bup import git
 from bup.helpers import *
 from wvtest import *
@@ -88,3 +88,29 @@ def testpacks():
     WVPASS(r.exists(hashes[5]))
     WVPASS(r.exists(hashes[6]))
     WVFAIL(r.exists('\0'*20))
+
+@wvtest
+def test_long_index():
+    w = git.PackWriter()
+    obj_bin = struct.pack('!IIIII',
+            0x00112233, 0x44556677, 0x88990011, 0x22334455, 0x66778899)
+    obj2_bin = struct.pack('!IIIII',
+            0x11223344, 0x55667788, 0x99001122, 0x33445566, 0x77889900)
+    obj3_bin = struct.pack('!IIIII',
+            0x22334455, 0x66778899, 0x00112233, 0x44556677, 0x88990011)
+    pack_bin = struct.pack('!IIIII',
+            0x99887766, 0x55443322, 0x11009988, 0x77665544, 0x33221100)
+    idx = list(list() for i in xrange(256))
+    idx[0].append((obj_bin, 1, 0xfffffffff))
+    idx[0x11].append((obj2_bin, 2, 0xffffffffff))
+    idx[0x22].append((obj3_bin, 3, 0xff))
+    (fd,name) = tempfile.mkstemp(suffix='.idx', dir=git.repo('objects'))
+    f = os.fdopen(fd, 'w+b')
+    r = w._write_pack_idx_v2(f, idx, pack_bin)
+    f.seek(0)
+    i = git.PackIdxV2(name, f)
+    WVPASS(i.find_offset(obj_bin)==0xfffffffff)
+    WVPASS(i.find_offset(obj2_bin)==0xffffffffff)
+    WVPASS(i.find_offset(obj3_bin)==0xff)
+    f.close()
+    os.remove(name)
index 9f24962644fb25072c27879968f98e38ee62fba8..306d39b943118b3dc0698354fd00e518ac3b2a9b 100644 (file)
@@ -10,3 +10,21 @@ def test_parse_num():
     WVPASSEQ(pn('2 gb'), 2*1024*1024*1024)
     WVPASSEQ(pn('1e+9 k'), 1000000000 * 1024)
     WVPASSEQ(pn('-3e-3mb'), int(-0.003 * 1024 * 1024))
+
+@wvtest
+def test_strip_path():
+    prefix = "/var/backup/daily.0/localhost"
+    empty_prefix = ""
+    non_matching_prefix = "/home"
+    path = "/var/backup/daily.0/localhost/etc/"
+
+    WVPASSEQ(strip_path(prefix, path), '/etc')
+    WVPASSEQ(strip_path(empty_prefix, path), path)
+    WVPASSEQ(strip_path(non_matching_prefix, path), path)
+    WVEXCEPT(Exception, strip_path, None, path)
+
+@wvtest
+def test_strip_base_path():
+    path = "/var/backup/daily.0/localhost/etc/"
+    base_paths = ["/var", "/var/backup", "/var/backup/daily.0/localhost"]
+    WVPASSEQ(strip_base_path(path, base_paths), '/etc')
index 9b7158f7a1a5b690c3d65e131ea52671a126c512..16a8d33b858999d05d37e74eeac947592d89fea0 100644 (file)
@@ -477,14 +477,7 @@ class BranchList(Node):
     def _mksubs(self):
         self._subs = {}
 
-        tags = {}
-        for (n,c) in git.list_refs():
-            if n.startswith('refs/tags/'):
-                name = n[10:]
-                if not c in tags:
-                    tags[c] = []
-
-                tags[c].append(name)
+        tags = git.tags()
 
         revs = list(git.rev_list(self.hash.encode('hex')))
         for (date, commit) in revs:
index 0c83ba7882b54911513194023c2379d3e7f43d91..832770f1619be0239e3e471f925933abca5edd58 100755 (executable)
--- a/t/test.sh
+++ b/t/test.sh
@@ -208,9 +208,9 @@ WVPASSEQ "$(sha1sum <$D/a)" "$(sha1sum <$D/a.new)"
 WVSTART "tag"
 WVFAIL bup tag -d v0.n 2>/dev/null
 WVFAIL bup tag v0.n non-existant 2>/dev/null
-WVPASSEQ "$(bup tag 2>&1)" ""
+WVPASSEQ "$(bup tag)" ""
 WVPASS bup tag v0.1 master
-WVPASSEQ "$(bup tag 2>&1)" "v0.1"
+WVPASSEQ "$(bup tag)" "v0.1"
 WVPASS bup tag -d v0.1
 
 # This section destroys data in the bup repository, so it is done last.
@@ -303,3 +303,78 @@ WVPASSEQ "$(bup ls exclude-from/latest/$TOP/$D/)" "a
 b
 f"
 rm $EXCLUDE_FILE
+
+WVSTART "strip"
+D=strip.tmp
+rm -rf $D
+mkdir $D
+export BUP_DIR="$D/.bup"
+WVPASS bup init
+touch $D/a
+WVPASS bup random 128k >$D/b
+mkdir $D/d $D/d/e
+WVPASS bup random 512 >$D/f
+WVPASS bup index -ux $D
+bup save --strip -n strip $D
+WVPASSEQ "$(bup ls strip/latest/)" "a
+b
+d/
+f"
+
+WVSTART "strip-path"
+D=strip-path.tmp
+rm -rf $D
+mkdir $D
+export BUP_DIR="$D/.bup"
+WVPASS bup init
+touch $D/a
+WVPASS bup random 128k >$D/b
+mkdir $D/d $D/d/e
+WVPASS bup random 512 >$D/f
+WVPASS bup index -ux $D
+bup save --strip-path $TOP -n strip-path $D
+WVPASSEQ "$(bup ls strip-path/latest/$D/)" "a
+b
+d/
+f"
+
+WVSTART "indexfile"
+D=indexfile.tmp
+INDEXFILE=tmpindexfile.tmp
+rm -f $INDEXFILE
+rm -rf $D
+mkdir $D
+export BUP_DIR="$D/.bup"
+WVPASS bup init
+touch $D/a
+touch $D/b
+mkdir $D/c
+WVPASS bup index -ux $D
+bup save --strip -n bupdir $D
+WVPASSEQ "$(bup ls bupdir/latest/)" "a
+b
+c/"
+WVPASS bup index -f $INDEXFILE --exclude=$D/c -ux $D
+bup save --strip -n indexfile -f $INDEXFILE $D
+WVPASSEQ "$(bup ls indexfile/latest/)" "a
+b"
+
+WVSTART "import-rsnapshot"
+
+#set -x
+rm -rf "$BUP_DIR"
+WVPASS bup init
+
+D=bupdata.tmp
+rm -rf $D
+mkdir $D
+
+mkdir -p $D/hourly.0/buptest/a
+touch $D/hourly.0/buptest/a/b
+mkdir -p $D/hourly.0/buptest/c/d
+touch $D/hourly.0/buptest/c/d/e
+
+bup import-rsnapshot $D/
+
+WVPASSEQ "$(bup ls buptest/latest/)" "a/
+c/"