]> arthur.barton.de Git - bup.git/commitdiff
Merge branches 'gf/ls', 'gf/tag', 'zz/import-rsnapshot' and 'bl/selfindex' bup-0.21-rc1
authorAvery Pennarun <apenwarr@gmail.com>
Mon, 3 Jan 2011 04:58:35 +0000 (20:58 -0800)
committerAvery Pennarun <apenwarr@gmail.com>
Mon, 3 Jan 2011 04:58:35 +0000 (20:58 -0800)
* gf/ls:
  ls-cmd: hide files with a leading dot by default

* gf/tag:
  Refuse branch/tag names that start with a dot
  tag-cmd: Some fixups

* zz/import-rsnapshot:
  Adds a testcase for import-rsnapshot.
  Makes import-rsnapshot use save's -f option.
  Adds -f option to save to use a given indexfile.
  Makefile: handle shell commands (cmd/*-cmd.sh)
  Adds documentation for bup-import-rsnapshot
  Adds import-rsnapshot command.
  Adds documentation for save's strip option.
  Adds testcases for --strip and --strip-path.
  Adds a strip and strip-path option to bup save.

* bl/selfindex:
  Rename receive-objects command to receive-objects-v2.
  Write idxs directly rather than using git-index-pack.
  Send SHAs from the client to reduce server load
  Use chunkyreader() instead of manually reading multiple blocks.

cmd/ls-cmd.py
cmd/save-cmd.py
cmd/server-cmd.py
cmd/split-cmd.py
cmd/tag-cmd.py
lib/bup/client.py
lib/bup/git.py
lib/bup/t/tgit.py
lib/bup/vfs.py
t/test.sh

index 57f4275cf84a28cf0ebb1270de62f0d77d7f02ea..bbc0432987530946aefdb489b1c749e5b4ad8626 100755 (executable)
@@ -19,6 +19,7 @@ optspec = """
 bup ls <dirs...>
 --
 s,hash   show hash for each file
+a,all    show hidden files
 """
 o = options.Options('bup ls', optspec)
 (opt, flags, extra) = o.parse(sys.argv[1:])
@@ -35,9 +36,11 @@ for d in extra:
         n = top.lresolve(d)
         if stat.S_ISDIR(n.mode):
             for sub in n:
-                print_node(sub.name, sub)
+                if opt.all or not sub.name.startswith('.'):
+                    print_node(sub.name, sub)
         else:
-            print_node(d, n)
+            if opt.all or not sub.name.startswith('.'):
+                print_node(d, n)
     except vfs.NodeError, e:
         log('error: %s\n' % e)
         ret = 1
index d278e70727a1b07e7b18b8a755a8d5378854325f..62c1e426bb5bad665c41004fb9e71af99d1dc2d5 100755 (executable)
@@ -46,6 +46,8 @@ is_reverse = os.environ.get('BUP_SERVER_REVERSE')
 if is_reverse and opt.remote:
     o.fatal("don't use -r in reverse mode; it's automatic")
 
+if opt.name and opt.name.startswith('.'):
+    o.fatal("'%s' is not a valid branch name" % opt.name)
 refname = opt.name and 'refs/heads/%s' % opt.name or None
 if opt.remote or is_reverse:
     cli = client.Client(opt.remote)
@@ -297,7 +299,6 @@ if opt.tree:
     print tree.encode('hex')
 if opt.commit or opt.name:
     msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv
-    ref = opt.name and ('refs/heads/%s' % opt.name) or None
     commit = w.new_commit(oldref, tree, date, msg)
     if opt.commit:
         print commit.encode('hex')
index 299e28dda9a1de76896cda171ffce4a1bdc13db1..e44f302c7d7a7c8332b13141a107374702d1c022 100755 (executable)
@@ -36,7 +36,7 @@ def send_index(conn, name):
     conn.ok()
 
 
-def receive_objects(conn, junk):
+def receive_objects_v2(conn, junk):
     global suspended_w
     git.check_repo_or_die()
     suggested = {}
@@ -67,15 +67,16 @@ def receive_objects(conn, junk):
             conn.ok()
             return
             
+        shar = conn.read(20)
+        crcr = struct.unpack('!I', conn.read(4))[0]
+        n -= 20 + 4
         buf = conn.read(n)  # object sizes in bup are reasonably small
         #debug2('read %d bytes\n' % n)
         if len(buf) < n:
             w.abort()
             raise Exception('object read: expected %d bytes, got %d\n'
                             % (n, len(buf)))
-        (type, content) = git._decode_packobj(buf)
-        sha = git.calc_hash(type, content)
-        oldpack = w.exists(sha)
+        oldpack = w.exists(shar)
         # FIXME: we only suggest a single index per cycle, because the client
         # is currently too dumb to download more than one per cycle anyway.
         # Actually we should fix the client, but this is a minor optimization
@@ -88,7 +89,7 @@ def receive_objects(conn, junk):
             # fix that deficiency of midx files eventually, although it'll
             # make the files bigger.  This method is certainly not very
             # efficient.
-            oldpack = w.objcache.packname_containing(sha)
+            oldpack = w.objcache.packname_containing(shar)
             debug2('new suggestion: %r\n' % oldpack)
             assert(oldpack)
             assert(oldpack != True)
@@ -102,8 +103,16 @@ def receive_objects(conn, junk):
                 conn.write('index %s\n' % name)
                 suggested[name] = 1
         else:
-            w._raw_write([buf])
+            nw, crc = w._raw_write([buf], sha=shar)
+            _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
+            _check(w, n, nw, 'object read: expected %d bytes, got %d\n')
     # NOTREACHED
+    
+
+def _check(w, expected, actual, msg):
+    if expected != actual:
+        w.abort()
+        raise Exception(msg % (expected, actual))
 
 
 def read_ref(conn, refname):
@@ -156,7 +165,7 @@ commands = {
     'set-dir': set_dir,
     'list-indexes': list_indexes,
     'send-index': send_index,
-    'receive-objects': receive_objects,
+    'receive-objects-v2': receive_objects_v2,
     'read-ref': read_ref,
     'update-ref': update_ref,
     'cat': cat,
index 035e8de4b97cf95d6532e5909a9149de8cadc1c2..7ecc5d018cc865a76727580c2c612eb04a2c060e 100755 (executable)
@@ -78,6 +78,8 @@ if is_reverse and opt.remote:
     o.fatal("don't use -r in reverse mode; it's automatic")
 start_time = time.time()
 
+if opt.name and opt.name.startswith('.'):
+    o.fatal("'%s' is not a valid branch name." % opt.name)
 refname = opt.name and 'refs/heads/%s' % opt.name or None
 if opt.noop or opt.copy:
     cli = pack_writer = oldref = None
index 760dce7dd07c1b81d0c17bf4af6e44c81d51ce4c..a624e8b8ace127d1ea434cad72723468448a61c9 100755 (executable)
@@ -35,33 +35,38 @@ if opt.delete:
     try:
         os.unlink(tag_file)
     except OSError, e:
-        log("bup: error: unable to delete tag: %s" % e)
+        log("bup: error: unable to delete tag '%s': %s" % (opt.delete, e))
         sys.exit(1)
 
     sys.exit(0)
 
-tags = []
-for (t, dummy) in git.list_refs():
-    if t.startswith('refs/tags/'):
-        tags.append(t[10:])
+tags = [t for sublist in git.tags().values() for t in sublist]
 
 if not extra:
     for t in tags:
-        log("%s\n" % t)
+        print t
     sys.exit(0)
-elif len(extra) != 2:
-    log('bup: error: no ref or hash given.')
-    sys.exit(1)
+elif len(extra) < 2:
+    o.fatal('no commit ref or hash given.')
 
-tag_name = extra[0]
-commit = extra[1]
-debug1("from args: tag name = %s; commit = %s\n" % (tag_name, commit))
+(tag_name, commit) = extra[:2]
+if not tag_name:
+    o.fatal("tag name must not be empty.")
+debug1("args: tag name = %s; commit = %s\n" % (tag_name, commit))
 
 if tag_name in tags:
     log("bup: error: tag '%s' already exists" % tag_name)
     sys.exit(1)
 
-hash = git.rev_parse(commit)
+if tag_name.startswith('.'):
+    o.fatal("'%s' is not a valid tag name." % tag_name)
+
+try:
+    hash = git.rev_parse(commit)
+except git.GitError, e:
+    log("bup: error: %s" % e)
+    sys.exit(2)
+
 if not hash:
     log("bup: error: commit %s not found." % commit)
     sys.exit(2)
@@ -75,7 +80,7 @@ tag_file = git.repo('refs/tags/%s' % tag_name)
 try:
     tag = file(tag_file, 'w')
 except OSError, e:
-    log('bup: error: could not create tag %s: %s' % (tag_name, e))
+    log("bup: error: could not create tag '%s': %s" % (tag_name, e))
     sys.exit(3)
 
 tag.write(hash.encode('hex'))
index d1fdbbe155851563f026fc03d83e990b20cf1f26..c65ec0cef8caa9e6fbc90ade9446be4a91265dcb 100644 (file)
@@ -1,4 +1,4 @@
-import re, struct, errno, time
+import re, struct, errno, time, zlib
 from bup import git, ssh
 from bup.helpers import *
 
@@ -169,20 +169,20 @@ class Client:
         debug1('client: received index suggestion: %s\n' % indexname)
         ob = self._busy
         if ob:
-            assert(ob == 'receive-objects')
+            assert(ob == 'receive-objects-v2')
             self.conn.write('\xff\xff\xff\xff')  # suspend receive-objects
             self._busy = None
             self.conn.drain_and_check_ok()
         self.sync_index(indexname)
         if ob:
             self._busy = ob
-            self.conn.write('receive-objects\n')
+            self.conn.write('receive-objects-v2\n')
 
     def new_packwriter(self):
         self.check_busy()
         def _set_busy():
-            self._busy = 'receive-objects'
-            self.conn.write('receive-objects\n')
+            self._busy = 'receive-objects-v2'
+            self.conn.write('receive-objects-v2\n')
         return PackWriter_Remote(self.conn,
                                  objcache_maker = self._make_objcache,
                                  suggest_pack = self._suggest_pack,
@@ -271,18 +271,23 @@ class PackWriter_Remote(git.PackWriter):
     def abort(self):
         raise GitError("don't know how to abort remote pack writing")
 
-    def _raw_write(self, datalist):
+    def _raw_write(self, datalist, sha):
         assert(self.file)
         if not self._packopen:
             self._open()
         if self.ensure_busy:
             self.ensure_busy()
         data = ''.join(datalist)
-        assert(len(data))
-        outbuf = struct.pack('!I', len(data)) + data
+        assert(data)
+        assert(sha)
+        crc = zlib.crc32(data) & 0xffffffff
+        outbuf = ''.join((struct.pack('!I', len(data) + 20 + 4),
+                          sha,
+                          struct.pack('!I', crc),
+                          data))
         (self._bwcount, self._bwtime) = \
             _raw_write_bwlimit(self.file, outbuf, self._bwcount, self._bwtime)
-        self.outbytes += len(data)
+        self.outbytes += len(data) - 20 - 4 # Don't count sha1+crc
         self.count += 1
 
         if self.file.has_input():
@@ -292,3 +297,5 @@ class PackWriter_Remote(git.PackWriter):
             if self.suggest_pack:
                 self.suggest_pack(idxname)
                 self.objcache.refresh()
+
+        return sha, crc
index 6a65c435450233702dea291113c417e423c0419f..49e98019c2927c023de97a8683a6f4992c08c518 100644 (file)
@@ -2,8 +2,7 @@
 bup repositories are in Git format. This library allows us to
 interact with the Git data structures.
 """
-import os, zlib, time, subprocess, struct, stat, re, tempfile
-import heapq
+import os, zlib, time, subprocess, struct, stat, re, tempfile, heapq
 from bup.helpers import *
 from bup import _helpers
 
@@ -140,7 +139,7 @@ def _decode_packobj(buf):
 class PackIdx:
     def __init__(self):
         assert(0)
-    
+
     def find_offset(self, hash):
         """Get the offset of an object inside the index file."""
         idx = self._idx_from_hash(hash)
@@ -222,7 +221,7 @@ class PackIdxV2(PackIdx):
         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
         if ofs & 0x80000000:
             idx64 = ofs & 0x7fffffff
-            ofs = struct.unpack('!I',
+            ofs = struct.unpack('!Q',
                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
         return ofs
 
@@ -528,6 +527,7 @@ class PackWriter:
         self.outbytes = 0
         self.filename = None
         self.file = None
+        self.idx = None
         self.objcache_maker = objcache_maker
         self.objcache = None
 
@@ -549,8 +549,11 @@ class PackWriter:
             assert(name.endswith('.pack'))
             self.filename = name[:-5]
             self.file.write('PACK\0\0\0\2\0\0\0\0')
+            self.idx = list(list() for i in xrange(256))
 
-    def _raw_write(self, datalist):
+    # the 'sha' parameter is used in client.py's _raw_write(), but not needed
+    # in this basic version.
+    def _raw_write(self, datalist, sha):
         self._open()
         f = self.file
         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
@@ -560,14 +563,25 @@ class PackWriter:
         # but that's okay because we'll flush it in _end().
         oneblob = ''.join(datalist)
         f.write(oneblob)
-        self.outbytes += len(oneblob)
+        nw = len(oneblob)
+        crc = zlib.crc32(oneblob) & 0xffffffff
+        self._update_idx(sha, crc, nw)
+        self.outbytes += nw
         self.count += 1
+        return nw, crc
+
+    def _update_idx(self, sha, crc, size):
+        assert(sha)
+        if self.idx:
+            self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 
-    def _write(self, bin, type, content):
+    def _write(self, sha, type, content):
         if verbose:
             log('>')
-        self._raw_write(_encode_packobj(type, content))
-        return bin
+        if not sha:
+            sha = calc_hash(type, content)
+        size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
+        return sha
 
     def breakpoint(self):
         """Clear byte and object counts and return the last processed id."""
@@ -587,11 +601,11 @@ class PackWriter:
 
     def maybe_write(self, type, content):
         """Write an object to the pack file if not present and return its id."""
-        bin = calc_hash(type, content)
-        if not self.exists(bin):
-            self._write(bin, type, content)
-            self.objcache.add(bin)
-        return bin
+        sha = calc_hash(type, content)
+        if not self.exists(sha):
+            self._write(sha, type, content)
+            self.objcache.add(sha)
+        return sha
 
     def new_blob(self, blob):
         """Create a blob object in the pack with the supplied content."""
@@ -632,6 +646,7 @@ class PackWriter:
         """Remove the pack file from disk."""
         f = self.file
         if f:
+            self.idx = None
             self.file = None
             f.close()
             os.unlink(self.filename + '.pack')
@@ -641,6 +656,8 @@ class PackWriter:
         if not f: return None
         self.file = None
         self.objcache = None
+        idx = self.idx
+        self.idx = None
 
         # update object count
         f.seek(8)
@@ -651,24 +668,17 @@ class PackWriter:
         # calculate the pack sha1sum
         f.seek(0)
         sum = Sha1()
-        while 1:
-            b = f.read(65536)
+        for b in chunkyreader(f):
             sum.update(b)
-            if not b: break
-        f.write(sum.digest())
-
+        packbin = sum.digest()
+        f.write(packbin)
         f.close()
 
-        p = subprocess.Popen(['git', 'index-pack', '-v',
-                              '--index-version=2',
-                              self.filename + '.pack'],
-                             preexec_fn = _gitenv,
-                             stdout = subprocess.PIPE)
-        out = p.stdout.read().strip()
-        _git_wait('git index-pack', p)
-        if not out:
-            raise GitError('git index-pack produced no output')
-        nameprefix = repo('objects/pack/%s' % out)
+        idx_f = open(self.filename + '.idx', 'wb')
+        obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
+        idx_f.close()
+
+        nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
         if os.path.exists(self.filename + '.map'):
             os.unlink(self.filename + '.map')
         os.rename(self.filename + '.pack', nameprefix + '.pack')
@@ -681,6 +691,44 @@ class PackWriter:
         """Close the pack file and move it to its definitive path."""
         return self._end()
 
+    def _write_pack_idx_v2(self, file, idx, packbin):
+        sum = Sha1()
+
+        def write(data):
+            file.write(data)
+            sum.update(data)
+
+        write('\377tOc\0\0\0\2')
+
+        n = 0
+        for part in idx:
+            n += len(part)
+            write(struct.pack('!i', n))
+            part.sort(key=lambda x: x[0])
+
+        obj_list_sum = Sha1()
+        for part in idx:
+            for entry in part:
+                write(entry[0])
+                obj_list_sum.update(entry[0])
+        for part in idx:
+            for entry in part:
+                write(struct.pack('!I', entry[1]))
+        ofs64_list = []
+        for part in idx:
+            for entry in part:
+                if entry[2] & 0x80000000:
+                    write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
+                    ofs64_list.append(struct.pack('!Q', entry[2]))
+                else:
+                    write(struct.pack('!i', entry[2]))
+        for ofs64 in ofs64_list:
+            write(ofs64)
+
+        write(packbin)
+        file.write(sum.digest())
+        return obj_list_sum.hexdigest()
+
 
 def _git_date(date):
     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
@@ -1031,3 +1079,16 @@ class CatPipe:
                 yield d
         except StopIteration:
             log('booger!\n')
+
+def tags():
+    """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
+    tags = {}
+    for (n,c) in list_refs():
+        if n.startswith('refs/tags/'):
+            name = n[10:]
+            if not c in tags:
+                tags[c] = []
+
+            tags[c].append(name)  # more than one tag can point at 'c'
+
+    return tags
index c61b351881201b4bee4c2e479056c7a50332b92a..fad720b06e413acf961ff9ee98bda1277b845b24 100644 (file)
@@ -1,4 +1,4 @@
-import time
+import struct, os, tempfile, time
 from bup import git
 from bup.helpers import *
 from wvtest import *
@@ -88,3 +88,29 @@ def testpacks():
     WVPASS(r.exists(hashes[5]))
     WVPASS(r.exists(hashes[6]))
     WVFAIL(r.exists('\0'*20))
+
+@wvtest
+def test_long_index():
+    w = git.PackWriter()
+    obj_bin = struct.pack('!IIIII',
+            0x00112233, 0x44556677, 0x88990011, 0x22334455, 0x66778899)
+    obj2_bin = struct.pack('!IIIII',
+            0x11223344, 0x55667788, 0x99001122, 0x33445566, 0x77889900)
+    obj3_bin = struct.pack('!IIIII',
+            0x22334455, 0x66778899, 0x00112233, 0x44556677, 0x88990011)
+    pack_bin = struct.pack('!IIIII',
+            0x99887766, 0x55443322, 0x11009988, 0x77665544, 0x33221100)
+    idx = list(list() for i in xrange(256))
+    idx[0].append((obj_bin, 1, 0xfffffffff))
+    idx[0x11].append((obj2_bin, 2, 0xffffffffff))
+    idx[0x22].append((obj3_bin, 3, 0xff))
+    (fd,name) = tempfile.mkstemp(suffix='.idx', dir=git.repo('objects'))
+    f = os.fdopen(fd, 'w+b')
+    r = w._write_pack_idx_v2(f, idx, pack_bin)
+    f.seek(0)
+    i = git.PackIdxV2(name, f)
+    WVPASS(i.find_offset(obj_bin)==0xfffffffff)
+    WVPASS(i.find_offset(obj2_bin)==0xffffffffff)
+    WVPASS(i.find_offset(obj3_bin)==0xff)
+    f.close()
+    os.remove(name)
index 9b7158f7a1a5b690c3d65e131ea52671a126c512..16a8d33b858999d05d37e74eeac947592d89fea0 100644 (file)
@@ -477,14 +477,7 @@ class BranchList(Node):
     def _mksubs(self):
         self._subs = {}
 
-        tags = {}
-        for (n,c) in git.list_refs():
-            if n.startswith('refs/tags/'):
-                name = n[10:]
-                if not c in tags:
-                    tags[c] = []
-
-                tags[c].append(name)
+        tags = git.tags()
 
         revs = list(git.rev_list(self.hash.encode('hex')))
         for (date, commit) in revs:
index 08a4be53c87897d470d8edabc8e9b39fa76f47a0..832770f1619be0239e3e471f925933abca5edd58 100755 (executable)
--- a/t/test.sh
+++ b/t/test.sh
@@ -208,9 +208,9 @@ WVPASSEQ "$(sha1sum <$D/a)" "$(sha1sum <$D/a.new)"
 WVSTART "tag"
 WVFAIL bup tag -d v0.n 2>/dev/null
 WVFAIL bup tag v0.n non-existant 2>/dev/null
-WVPASSEQ "$(bup tag 2>&1)" ""
+WVPASSEQ "$(bup tag)" ""
 WVPASS bup tag v0.1 master
-WVPASSEQ "$(bup tag 2>&1)" "v0.1"
+WVPASSEQ "$(bup tag)" "v0.1"
 WVPASS bup tag -d v0.1
 
 # This section destroys data in the bup repository, so it is done last.