]> arthur.barton.de Git - bup.git/commitdiff
hashsplit.py: remove PackWriter-specific knowledge.
authorAvery Pennarun <apenwarr@gmail.com>
Thu, 17 Feb 2011 12:22:50 +0000 (04:22 -0800)
committerAvery Pennarun <apenwarr@gmail.com>
Sun, 20 Feb 2011 05:38:28 +0000 (21:38 -0800)
Let's use callback functions explicitly instead of passing around special
objects; that makes the dependencies a bit more clear and hopefully opens
the way to some more refactoring for clarity.

Signed-off-by: Avery Pennarun <apenwarr@gmail.com>
cmd/save-cmd.py
cmd/split-cmd.py
lib/bup/client.py
lib/bup/git.py
lib/bup/hashsplit.py

index 8c2ba40abe61382ea5a46ebf99f9fee007432842..7219ab4b55441293136d74f58bfa99193aef1702 100755 (executable)
@@ -263,7 +263,8 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
                 lastskip_name = ent.name
             else:
                 try:
-                    (mode, id) = hashsplit.split_to_blob_or_tree(w, [f],
+                    (mode, id) = hashsplit.split_to_blob_or_tree(
+                                            w.new_blob, w.new_tree, [f],
                                             keep_boundaries=False)
                 except IOError, e:
                     add_error('%s: %s' % (ent.name, e))
index 363896f792381f3b85d34cc3ab98339af0458d2d..b243c61400e918fdae1b19a907177c38a61ea2a1 100755 (executable)
@@ -47,9 +47,9 @@ if opt.verbose >= 2:
     git.verbose = opt.verbose - 1
     opt.bench = 1
 if opt.max_pack_size:
-    hashsplit.max_pack_size = parse_num(opt.max_pack_size)
+    git.max_pack_size = parse_num(opt.max_pack_size)
 if opt.max_pack_objects:
-    hashsplit.max_pack_objects = parse_num(opt.max_pack_objects)
+    git.max_pack_objects = parse_num(opt.max_pack_objects)
 if opt.fanout:
     hashsplit.fanout = parse_num(opt.fanout)
 if opt.blobs:
@@ -128,14 +128,16 @@ else:
     files = extra and (open(fn) for fn in extra) or [sys.stdin]
 
 if pack_writer and opt.blobs:
-    shalist = hashsplit.split_to_blobs(pack_writer, files,
+    shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
                                        keep_boundaries=opt.keep_boundaries,
                                        progress=prog)
     for (sha, size, level) in shalist:
         print sha.encode('hex')
         reprogress()
 elif pack_writer:  # tree or commit or name
-    shalist = hashsplit.split_to_shalist(pack_writer, files,
+    shalist = hashsplit.split_to_shalist(pack_writer.new_blob,
+                                         pack_writer.new_tree,
+                                         files,
                                          keep_boundaries=opt.keep_boundaries,
                                          progress=prog)
     tree = pack_writer.new_tree(shalist)
index f941067237b1f574d93bb2b40668c5f5eb20dfca..5563905569e5d489b2ef46ae166bf03a5de0824e 100644 (file)
@@ -177,7 +177,6 @@ class Client:
             self.sync_index(idx)
         git.auto_midx(self.cachedir)
 
-
     def sync_index(self, name):
         #debug1('requesting %r\n' % name)
         self.check_busy()
index 6db392a2226fe5de78df7d4c15c44982a18bfdd2..fd364b542531e3499839b8ac156f43e7ddfb3fb1 100644 (file)
@@ -6,6 +6,8 @@ import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
 from bup.helpers import *
 from bup import _helpers, path, midx, bloom
 
+max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
+max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
 SEEK_END=2  # os.SEEK_END is not defined in python 2.4
 
 verbose = 0
@@ -509,6 +511,8 @@ class PackWriter:
         if not sha:
             sha = calc_hash(type, content)
         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
+        if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
+            self.breakpoint()
         return sha
 
     def breakpoint(self):
@@ -531,10 +535,10 @@ class PackWriter:
 
     def maybe_write(self, type, content):
         """Write an object to the pack file if not present and return its id."""
-        self._require_objcache()
         sha = calc_hash(type, content)
         if not self.exists(sha):
             self._write(sha, type, content)
+            self._require_objcache()
             self.objcache.add(sha)
         return sha
 
index 439c63db6c7374da6c868b3c1e123c05170b648c..1819294d1aa7d2029203316d78f9c300bc75b7c8 100644 (file)
@@ -6,8 +6,6 @@ BLOB_MAX = 8192*4   # 8192 is the "typical" blob size for bupsplit
 BLOB_READ_SIZE = 1024*1024
 MAX_PER_TREE = 256
 progress_callback = None
-max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
-max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
 fanout = 16
 
 # The purpose of this type of buffer is to avoid copying on peek(), get(),
@@ -105,13 +103,11 @@ def hashsplit_iter(files, keep_boundaries, progress):
 
 
 total_split = 0
-def split_to_blobs(w, files, keep_boundaries, progress):
+def split_to_blobs(makeblob, files, keep_boundaries, progress):
     global total_split
     for (blob, level) in hashsplit_iter(files, keep_boundaries, progress):
-        sha = w.new_blob(blob)
+        sha = makeblob(blob)
         total_split += len(blob)
-        if w.outbytes >= max_pack_size or w.count >= max_pack_objects:
-            w.breakpoint()
         if progress_callback:
             progress_callback(len(blob))
         yield (sha, len(blob), level)
@@ -127,7 +123,7 @@ def _make_shalist(l):
     return (shalist, total)
 
 
-def _squish(w, stacks, n):
+def _squish(maketree, stacks, n):
     i = 0
     while i<n or len(stacks[i]) > MAX_PER_TREE:
         while len(stacks) <= i+1:
@@ -136,14 +132,15 @@ def _squish(w, stacks, n):
             stacks[i+1] += stacks[i]
         elif stacks[i]:
             (shalist, size) = _make_shalist(stacks[i])
-            tree = w.new_tree(shalist)
+            tree = maketree(shalist)
             stacks[i+1].append(('40000', tree, size))
         stacks[i] = []
         i += 1
 
 
-def split_to_shalist(w, files, keep_boundaries, progress=None):
-    sl = split_to_blobs(w, files, keep_boundaries, progress)
+def split_to_shalist(makeblob, maketree, files,
+                     keep_boundaries, progress=None):
+    sl = split_to_blobs(makeblob, files, keep_boundaries, progress)
     assert(fanout != 0)
     if not fanout:
         shal = []
@@ -155,21 +152,22 @@ def split_to_shalist(w, files, keep_boundaries, progress=None):
         for (sha,size,level) in sl:
             stacks[0].append(('100644', sha, size))
             if level:
-                _squish(w, stacks, level)
+                _squish(maketree, stacks, level)
         #log('stacks: %r\n' % [len(i) for i in stacks])
-        _squish(w, stacks, len(stacks)-1)
+        _squish(maketree, stacks, len(stacks)-1)
         #log('stacks: %r\n' % [len(i) for i in stacks])
         return _make_shalist(stacks[-1])[0]
 
 
-def split_to_blob_or_tree(w, files, keep_boundaries):
-    shalist = list(split_to_shalist(w, files, keep_boundaries))
+def split_to_blob_or_tree(makeblob, maketree, files, keep_boundaries):
+    shalist = list(split_to_shalist(makeblob, maketree,
+                                    files, keep_boundaries))
     if len(shalist) == 1:
         return (shalist[0][0], shalist[0][2])
     elif len(shalist) == 0:
-        return ('100644', w.new_blob(''))
+        return ('100644', makeblob(''))
     else:
-        return ('40000', w.new_tree(shalist))
+        return ('40000', maketree(shalist))
 
 
 def open_noatime(name):