]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/git.py
Support catpipe get(...,size=True); require git >= 1.5.6
[bup.git] / lib / bup / git.py
index 8dd92361f964c367d9ab74c0e2523368ff31f564..94016a4c7ec59af671b429af382a04e7b65428b3 100644 (file)
@@ -12,14 +12,11 @@ from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
                          fdatasync,
                          hostname, localtime, log, merge_iter,
                          mmap_read, mmap_readwrite,
+                         parse_num,
                          progress, qprogress, stat_if_exists,
                          unlink, username, userfullname,
                          utc_offset_str)
 
-
-max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
-max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
-
 verbose = 0
 ignore_midx = 0
 repodir = None  # The default repository, once initialized
@@ -46,6 +43,18 @@ def _git_capture(argv):
     _git_wait(repr(argv), p)
     return r
 
+def git_config_get(option, repo_dir=None):
+    cmd = ('git', 'config', '--get', option)
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                         preexec_fn=_gitenv(repo_dir=repo_dir))
+    r = p.stdout.read()
+    rc = p.wait()
+    if rc == 0:
+        return r
+    if rc != 1:
+        raise GitError('%s returned %d' % (cmd, rc))
+    return None
+
 
 def parse_tz_offset(s):
     """UTC offset in seconds."""
@@ -596,7 +605,9 @@ def _make_objcache():
 class PackWriter:
     """Writes Git objects inside a pack file."""
     def __init__(self, objcache_maker=_make_objcache, compression_level=1,
-                 run_midx=True, on_pack_finish=None):
+                 run_midx=True, on_pack_finish=None,
+                 max_pack_size=None, max_pack_objects=None):
+        self.repo_dir = repo()
         self.file = None
         self.parentfd = None
         self.count = 0
@@ -608,13 +619,25 @@ class PackWriter:
         self.compression_level = compression_level
         self.run_midx=run_midx
         self.on_pack_finish = on_pack_finish
+        if not max_pack_size:
+            max_pack_size = git_config_get('pack.packSizeLimit',
+                                           repo_dir=self.repo_dir)
+            if max_pack_size is not None:
+                max_pack_size = parse_num(max_pack_size)
+            if not max_pack_size:
+                # larger packs slow down pruning
+                max_pack_size = 1000 * 1000 * 1000
+        self.max_pack_size = max_pack_size
+        # cache memory usage is about 83 bytes per object
+        self.max_pack_objects = max_pack_objects if max_pack_objects \
+                                else max(1, self.max_pack_size // 5000)
 
     def __del__(self):
         self.close()
 
     def _open(self):
         if not self.file:
-            objdir = dir=repo('objects')
+            objdir = dir = os.path.join(self.repo_dir, 'objects')
             fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
             try:
                 self.file = os.fdopen(fd, 'w+b')
@@ -666,7 +689,8 @@ class PackWriter:
         size, crc = self._raw_write(_encode_packobj(type, content,
                                                     self.compression_level),
                                     sha=sha)
-        if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
+        if self.outbytes >= self.max_pack_size \
+           or self.count >= self.max_pack_objects:
             self.breakpoint()
         return sha
 
@@ -778,8 +802,8 @@ class PackWriter:
             f.close()
 
         obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
-
-        nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
+        nameprefix = os.path.join(self.repo_dir,
+                                  'objects/pack/pack-' +  obj_list_sha)
         if os.path.exists(self.filename + '.map'):
             os.unlink(self.filename + '.map')
         os.rename(self.filename + '.pack', nameprefix + '.pack')
@@ -790,7 +814,7 @@ class PackWriter:
             os.close(self.parentfd)
 
         if run_midx:
-            auto_midx(repo('objects/pack'))
+            auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
 
         if self.on_pack_finish:
             self.on_pack_finish(nameprefix)
@@ -1114,14 +1138,9 @@ class CatPipe:
         self.repo_dir = repo_dir
         wanted = ('1','5','6')
         if ver() < wanted:
-            if not _ver_warned:
-                log('warning: git version < %s; bup will be slow.\n'
-                    % '.'.join(wanted))
-                _ver_warned = 1
-            self.get = self._slow_get
-        else:
-            self.p = self.inprogress = None
-            self.get = self._fast_get
+            log('error: git version must be at least 1.5.6\n')
+            sys.exit(1)
+        self.p = self.inprogress = None
 
     def _abort(self):
         if self.p:
@@ -1139,15 +1158,19 @@ class CatPipe:
                                   bufsize = 4096,
                                   preexec_fn = _gitenv(self.repo_dir))
 
-    def _fast_get(self, id):
+    def get(self, id, size=False):
+        """Yield the object type, and then an iterator over the data referred
+        to by the id ref.  If size is true, yield (obj_type, obj_size)
+        instead of just the type.
+
+        """
         if not self.p or self.p.poll() != None:
             self.restart()
         assert(self.p)
         poll_result = self.p.poll()
         assert(poll_result == None)
         if self.inprogress:
-            log('_fast_get: opening %r while %r is open\n'
-                % (id, self.inprogress))
+            log('get: opening %r while %r is open\n' % (id, self.inprogress))
         assert(not self.inprogress)
         assert(id.find('\n') < 0)
         assert(id.find('\r') < 0)
@@ -1162,12 +1185,15 @@ class CatPipe:
         spl = hdr.split(' ')
         if len(spl) != 3 or len(spl[0]) != 40:
             raise GitError('expected blob, got %r' % spl)
-        (hex, type, size) = spl
-
-        it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
-                           onabort = self._abort)
+        hex, typ, sz = spl
+        sz = int(sz)
+        it = _AbortableIter(chunkyreader(self.p.stdout, sz),
+                            onabort=self._abort)
         try:
-            yield type
+            if size:
+                yield typ, sz
+            else:
+                yield typ
             for blob in it:
                 yield blob
             readline_result = self.p.stdout.readline()
@@ -1177,20 +1203,6 @@ class CatPipe:
             it.abort()
             raise
 
-    def _slow_get(self, id):
-        assert(id.find('\n') < 0)
-        assert(id.find('\r') < 0)
-        assert(id[0] != '-')
-        type = _git_capture(['git', 'cat-file', '-t', id]).strip()
-        yield type
-
-        p = subprocess.Popen(['git', 'cat-file', type, id],
-                             stdout=subprocess.PIPE,
-                             preexec_fn = _gitenv(self.repo_dir))
-        for blob in chunkyreader(p.stdout):
-            yield blob
-        _git_wait('git cat-file', p)
-
     def _join(self, it):
         type = it.next()
         if type == 'blob':