'bup split': speed optimization for never-ending blocks.

author Avery Pennarun <apenwarr@gmail.com>

Sat, 2 Jan 2010 06:46:06 +0000 (01:46 -0500)

committer Avery Pennarun <apenwarr@gmail.com>

Sat, 2 Jan 2010 06:46:06 +0000 (01:46 -0500)
author Avery Pennarun <apenwarr@gmail.com>
Sat, 2 Jan 2010 06:46:06 +0000 (01:46 -0500)
committer Avery Pennarun <apenwarr@gmail.com>
Sat, 2 Jan 2010 06:46:06 +0000 (01:46 -0500)
diff --git a/git.py b/git.py

index 61e37cd57ed581965565592d3145c2aa32d93c94..1e1c79fd6f9d1d4a9d87967c6aea279b9d37d378 100644 (file)
--- a/git.py
+++ b/git.py
@@ -1,12 +1,16 @@
  import os, errno, zlib, time, sha, subprocess
  from helpers import *
  
-
+_objcache = {}
  def hash_raw(type, s):
+    global _objcache
      header = '%s %d\0' % (type, len(s))
      sum = sha.sha(header)
      sum.update(s)
+    bin = sum.digest()
      hex = sum.hexdigest()
+    if bin in _objcache:
+        return hex
      dir = '.git/objects/%s' % hex[0:2]
      fn = '%s/%s' % (dir, hex[2:])
      if not os.path.exists(fn):
@@ -27,6 +31,7 @@ def hash_raw(type, s):
      else:
          #log('exists %s' % fn)
          pass
+    _objcache[bin] = 1
      return hex
  
  
diff --git a/hashsplit.py b/hashsplit.py

index a991ca7bce5cd145f8773a3da6261cc0cc96feb5..80b54b1ffee342953217a8c815141720d8bcbae1 100644 (file)
--- a/hashsplit.py
+++ b/hashsplit.py
@@ -79,7 +79,7 @@ def hashsplit_iter(files):
          if eof and not blob:
              blob = buf.get(buf.used())
          if not blob and buf.used() >= BLOB_MAX:
-            blob = buf.get(BLOB_MAX)  # limit max blob size
+            blob = buf.get(buf.used())  # limit max blob size
          if not blob and not eof:
              continue
author	Avery Pennarun <apenwarr@gmail.com>
	Sat, 2 Jan 2010 06:46:06 +0000 (01:46 -0500)
committer	Avery Pennarun <apenwarr@gmail.com>
	Sat, 2 Jan 2010 06:46:06 +0000 (01:46 -0500)
git.py		patch \| blob \| history
hashsplit.py		patch \| blob \| history