]> arthur.barton.de Git - bup.git/commitdiff
Write git pack files instead of loose object files.
authorAvery Pennarun <apenwarr@gmail.com>
Sat, 2 Jan 2010 09:16:25 +0000 (04:16 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Sat, 2 Jan 2010 09:16:25 +0000 (04:16 -0500)
This causes much, much less disk grinding than creating zillions of files,
plus it's even more disk space efficient.

We could theoretically make it go even faster by generating the .idx file
ourselves, but for now, we just call "git index-pack" to do it.  That
helpfully also confirms that the data was written in a git-compatible way.

cmd-save.py
cmd-split.py
git.py

index 458dc1a179bdbc6ebcd4bf04e20a7d10b9d427b4..e63054bbe6d25a61af3c1c6cfc2d2ff39a424b67 100755 (executable)
@@ -124,5 +124,7 @@ if opt.commit or opt.name:
     if opt.commit:
         print commit
 
+git.flush_pack()
+
 if saved_errors:
     log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
index 18f2e05b8a6d401381bc38782e1d0c12d7e6544a..157b36c69325ce5e642bef68222acaedcd88e671 100755 (executable)
@@ -25,6 +25,7 @@ hashsplit.split_verbosely = opt.verbose
 start_time = time.time()
 
 (shalist,tree) = hashsplit.split_to_tree(hashsplit.autofiles(extra))
+
 if opt.blobs:
     for (mode,name,sum) in shalist:
         print sum
@@ -42,3 +43,5 @@ size = hashsplit.total_split
 if opt.bench:
     log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
         % (size/1024., secs, size/1024./secs))
+
+git.flush_pack()
diff --git a/git.py b/git.py
index 1e1c79fd6f9d1d4a9d87967c6aea279b9d37d378..1ff1b74325e54012fb767305447cb657d3c8391e 100644 (file)
--- a/git.py
+++ b/git.py
@@ -1,16 +1,10 @@
-import os, errno, zlib, time, sha, subprocess
+import os, errno, zlib, time, sha, subprocess, struct
 from helpers import *
 
-_objcache = {}
-def hash_raw(type, s):
-    global _objcache
-    header = '%s %d\0' % (type, len(s))
-    sum = sha.sha(header)
-    sum.update(s)
-    bin = sum.digest()
-    hex = sum.hexdigest()
-    if bin in _objcache:
-        return hex
+
+def _old_write_object(bin, type, content):
+    hex = bin.encode('hex')
+    header = '%s %d\0' % (type, len(content))
     dir = '.git/objects/%s' % hex[0:2]
     fn = '%s/%s' % (dir, hex[2:])
     if not os.path.exists(fn):
@@ -20,19 +14,105 @@ def hash_raw(type, s):
         except OSError, e:
             if e.errno != errno.EEXIST:
                 raise
-        tfn = '%s.%d' % (fn, os.getpid())
+        tfn = '.git/objects/bup%d.tmp' % os.getpid()
         f = open(tfn, 'w')
         z = zlib.compressobj(1)
         f.write(z.compress(header))
-        f.write(z.compress(s))
+        f.write(z.compress(content))
         f.write(z.flush())
         f.close()
         os.rename(tfn, fn)
+
+
+_typemap = dict(blob=3, tree=2, commit=1, tag=8)
+class PackWriter:
+    def __init__(self):
+        self.count = 0
+        self.binlist = []
+        self.filename = '.git/objects/bup%d' % os.getpid()
+        self.file = open(self.filename + '.pack', 'w+')
+        self.file.write('PACK\0\0\0\2\0\0\0\0')
+
+    def write(self, bin, type, content):
+        global _typemap
+        f = self.file
+
+        sz = len(content)
+        szbits = (sz & 0x0f) | (_typemap[type]<<4)
+        sz >>= 4
+        while 1:
+            if sz: szbits |= 0x80
+            f.write(chr(szbits))
+            if not sz:
+                break
+            szbits = sz & 0x7f
+            sz >>= 7
+        
+        z = zlib.compressobj(1)
+        f.write(z.compress(content))
+        f.write(z.flush())
+
+        self.count += 1
+        self.binlist.append(bin)
+
+    def close(self):
+        f = self.file
+
+        # update object count
+        f.seek(8)
+        cp = struct.pack('!i', self.count)
+        assert(len(cp) == 4)
+        f.write(cp)
+
+        # calculate the pack sha1sum
+        f.seek(0)
+        sum = sha.sha()
+        while 1:
+            b = f.read(65536)
+            sum.update(b)
+            if not b: break
+        f.write(sum.digest())
+        
+        f.close()
+
+        p = subprocess.Popen(['git', 'index-pack', '-v',
+                              self.filename + '.pack'],
+                             preexec_fn = lambda: _gitenv('.git'),
+                             stdout = subprocess.PIPE)
+        out = p.stdout.read().strip()
+        if p.wait() or not out:
+            raise Exception('git index-pack returned an error')
+        os.rename(self.filename + '.pack', '.git/objects/pack/%s.pack' % out)
+        os.rename(self.filename + '.idx', '.git/objects/pack/%s.idx' % out)
+
+_packout = None
+def _write_object(bin, type, content):
+    global _packout
+    if not _packout:
+        _packout = PackWriter()
+    _packout.write(bin, type, content)
+
+
+def flush_pack():
+    global _packout
+    if _packout:
+        _packout.close()
+
+
+_objcache = {}
+def hash_raw(type, s):
+    global _objcache
+    header = '%s %d\0' % (type, len(s))
+    sum = sha.sha(header)
+    sum.update(s)
+    bin = sum.digest()
+    hex = sum.hexdigest()
+    if bin in _objcache:
+        return hex
     else:
-        #log('exists %s' % fn)
-        pass
-    _objcache[bin] = 1
-    return hex
+        _write_object(bin, type, s)
+        _objcache[bin] = 1
+        return hex
 
 
 def hash_blob(blob):