]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/git.py
git/packwriter: open(..) prohibited in __del__
[bup.git] / lib / bup / git.py
index d1515e3f3f95823f77714e3d396810aa9a43e1e8..4c1a95c52a985e5eaee1bb36ef4ca61651559c78 100644 (file)
@@ -4,32 +4,32 @@ interact with the Git data structures.
 """
 
 from __future__ import absolute_import, print_function
-import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
+import os, sys, zlib, subprocess, struct, stat, re, tempfile, glob
 from array import array
 from binascii import hexlify, unhexlify
 from collections import namedtuple
 from itertools import islice
-from numbers import Integral
 
-from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
+from bup import _helpers, hashsplit, path, midx, bloom, xstat
 from bup.compat import (buffer,
                         byte_int, bytes_from_byte, bytes_from_uint,
                         environ,
                         items,
+                        pending_raise,
                         range,
                         reraise)
 from bup.io import path_msg
 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
+                         exo,
                          fdatasync,
-                         hostname, localtime, log,
+                         finalized,
+                         log,
                          merge_dict,
                          merge_iter,
                          mmap_read, mmap_readwrite,
-                         parse_num,
                          progress, qprogress, stat_if_exists,
                          unlink,
                          utc_offset_str)
-from bup.pwdgrp import username, userfullname
 
 
 verbose = 0
@@ -57,13 +57,40 @@ def _git_wait(cmd, p):
     if rv != 0:
         raise GitError('%r returned %d' % (cmd, rv))
 
-def git_config_get(option, repo_dir=None):
-    cmd = (b'git', b'config', b'--get', option)
-    p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                         env=_gitenv(repo_dir=repo_dir))
-    r = p.stdout.read()
+def _git_exo(cmd, **kwargs):
+    kwargs['check'] = False
+    result = exo(cmd, **kwargs)
+    _, _, proc = result
+    if proc.returncode != 0:
+        raise GitError('%r returned %d' % (cmd, proc.returncode))
+    return result
+
+def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None):
+    assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used"
+    cmd = [b'git', b'config', b'--null']
+    if cfg_file:
+        cmd.extend([b'--file', cfg_file])
+    if opttype == 'int':
+        cmd.extend([b'--int'])
+    elif opttype == 'bool':
+        cmd.extend([b'--bool'])
+    else:
+        assert opttype is None
+    cmd.extend([b'--get', option])
+    env=None
+    if repo_dir:
+        env = _gitenv(repo_dir=repo_dir)
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env,
+                         close_fds=True)
+    # with --null, git writes out a trailing \0 after the value
+    r = p.stdout.read()[:-1]
     rc = p.wait()
     if rc == 0:
+        if opttype == 'int':
+            return int(r)
+        elif opttype == 'bool':
+            # git converts to 'true' or 'false'
+            return r == b'true'
         return r
     if rc != 1:
         raise GitError('%r returned %d' % (cmd, rc))
@@ -77,9 +104,26 @@ def parse_tz_offset(s):
         return - tz_off
     return tz_off
 
+def parse_commit_gpgsig(sig):
+    """Return the original signature bytes.
+
+    i.e. with the "gpgsig " header and the leading space character on
+    each continuation line removed.
+
+    """
+    if not sig:
+        return None
+    assert sig.startswith(b'gpgsig ')
+    sig = sig[7:]
+    return sig.replace(b'\n ', b'\n')
 
 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
 # Make sure that's authoritative.
+
+# See also
+# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt
+# The continuation lines have only one leading space.
+
 _start_end_char = br'[^ .,:;<>"\'\0\n]'
 _content_char = br'[^\0\n<>]'
 _safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
@@ -93,7 +137,7 @@ _mergetag_rx = br'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]
 _commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
-
+(?P<gpgsig>gpgsig .*\n(?: .*\n)*)?
 (?P<message>(?:.|\n)*)''' % (_parent_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
@@ -107,6 +151,7 @@ CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
                                        'author_sec', 'author_offset',
                                        'committer_name', 'committer_mail',
                                        'committer_sec', 'committer_offset',
+                                       'gpgsig',
                                        'message'])
 
 def parse_commit(content):
@@ -124,6 +169,7 @@ def parse_commit(content):
                       committer_mail=matches['committer_mail'],
                       committer_sec=int(matches['csec']),
                       committer_offset=parse_tz_offset(matches['ctz']),
+                      gpgsig=parse_commit_gpgsig(matches['gpgsig']),
                       message=matches['message'])
 
 
@@ -182,12 +228,6 @@ def repo_rel(path):
     return shorten_hash(path)
 
 
-def all_packdirs():
-    paths = [repo(b'objects/pack')]
-    paths += glob.glob(repo(b'index-cache/*/.'))
-    return paths
-
-
 def auto_midx(objdir):
     args = [path.exe(), b'midx', b'--auto', b'--dir', objdir]
     try:
@@ -244,8 +284,7 @@ def demangle_name(name, mode):
     elif name.endswith(b'.bupm'):
         return (name[:-5],
                 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
-    else:
-        return (name, BUP_NORMAL)
+    return (name, BUP_NORMAL)
 
 
 def calc_hash(type, content):
@@ -314,27 +353,6 @@ def _encode_packobj(type, content, compression_level=1):
     yield z.flush()
 
 
-def _encode_looseobj(type, content, compression_level=1):
-    z = zlib.compressobj(compression_level)
-    yield z.compress(b'%s %d\0' % (type, len(content)))
-    yield z.compress(content)
-    yield z.flush()
-
-
-def _decode_looseobj(buf):
-    assert(buf);
-    s = zlib.decompress(buf)
-    i = s.find(b'\0')
-    assert(i > 0)
-    l = s[:i].split(b' ')
-    type = l[0]
-    sz = int(l[1])
-    content = s[i+1:]
-    assert(type in _typemap)
-    assert(sz == len(content))
-    return (type, content)
-
-
 def _decode_packobj(buf):
     assert(buf)
     c = byte_int(buf[0])
@@ -405,6 +423,13 @@ class PackIdxV1(PackIdx):
         # Avoid slicing shatable for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -425,6 +450,12 @@ class PackIdxV1(PackIdx):
         for ofs in range(start, start + 24 * self.nsha, 24):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
 
 class PackIdxV2(PackIdx):
     """Object representation of a Git pack index (version 2) file."""
@@ -443,6 +474,13 @@ class PackIdxV2(PackIdx):
         # Avoid slicing this for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -468,6 +506,12 @@ class PackIdxV2(PackIdx):
         for ofs in range(start, start + 20 * self.nsha, 20):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
 
 _mpi_count = 0
 class PackIdxList:
@@ -649,6 +693,28 @@ def idxmerge(idxlist, final_progress=True):
     return merge_iter(idxlist, 10024, pfunc, pfinal)
 
 
+def create_commit_blob(tree, parent,
+                       author, adate_sec, adate_tz,
+                       committer, cdate_sec, cdate_tz,
+                       msg):
+    if adate_tz is not None:
+        adate_str = _git_date_str(adate_sec, adate_tz)
+    else:
+        adate_str = _local_git_date_str(adate_sec)
+    if cdate_tz is not None:
+        cdate_str = _git_date_str(cdate_sec, cdate_tz)
+    else:
+        cdate_str = _local_git_date_str(cdate_sec)
+    l = []
+    if tree: l.append(b'tree %s' % hexlify(tree))
+    if parent: l.append(b'parent %s' % hexlify(parent))
+    if author: l.append(b'author %s %s' % (author, adate_str))
+    if committer: l.append(b'committer %s %s' % (committer, cdate_str))
+    l.append(b'')
+    l.append(msg)
+    return b'\n'.join(l)
+
+
 def _make_objcache():
     return PackIdxList(repo(b'objects/pack'))
 
@@ -674,9 +740,8 @@ class PackWriter:
         self.on_pack_finish = on_pack_finish
         if not max_pack_size:
             max_pack_size = git_config_get(b'pack.packSizeLimit',
-                                           repo_dir=self.repo_dir)
-            if max_pack_size is not None:
-                max_pack_size = parse_num(max_pack_size)
+                                           repo_dir=self.repo_dir,
+                                           opttype='int')
             if not max_pack_size:
                 # larger packs slow down pruning
                 max_pack_size = 1000 * 1000 * 1000
@@ -685,14 +750,12 @@ class PackWriter:
         self.max_pack_objects = max_pack_objects if max_pack_objects \
                                 else max(1, self.max_pack_size // 5000)
 
-    def __del__(self):
-        self.close()
-
     def __enter__(self):
         return self
 
     def __exit__(self, type, value, traceback):
-        self.close()
+        with pending_raise(value, rethrow=False):
+            self.close()
 
     def _open(self):
         if not self.file:
@@ -713,7 +776,7 @@ class PackWriter:
             assert name.endswith(b'.pack')
             self.filename = name[:-5]
             self.file.write(b'PACK\0\0\0\2\0\0\0\0')
-            self.idx = list(list() for i in range(256))
+            self.idx = PackIdxV2Writer()
 
     def _raw_write(self, datalist, sha):
         self._open()
@@ -738,8 +801,7 @@ class PackWriter:
     def _update_idx(self, sha, crc, size):
         assert(sha)
         if self.idx:
-            self.idx[byte_int(sha[0])].append((sha, crc,
-                                               self.file.tell() - size))
+            self.idx.add(sha, crc, self.file.tell() - size)
 
     def _write(self, sha, type, content):
         if verbose:
@@ -754,12 +816,6 @@ class PackWriter:
             self.breakpoint()
         return sha
 
-    def breakpoint(self):
-        """Clear byte and object counts and return the last processed id."""
-        id = self._end(self.run_midx)
-        self.outbytes = self.count = 0
-        return id
-
     def _require_objcache(self):
         if self.objcache is None and self.objcache_maker:
             self.objcache = self.objcache_maker()
@@ -802,53 +858,32 @@ class PackWriter:
                    msg):
         """Create a commit object in the pack.  The date_sec values must be
         epoch-seconds, and if a tz is None, the local timezone is assumed."""
-        if adate_tz:
-            adate_str = _git_date_str(adate_sec, adate_tz)
-        else:
-            adate_str = _local_git_date_str(adate_sec)
-        if cdate_tz:
-            cdate_str = _git_date_str(cdate_sec, cdate_tz)
-        else:
-            cdate_str = _local_git_date_str(cdate_sec)
-        l = []
-        if tree: l.append(b'tree %s' % hexlify(tree))
-        if parent: l.append(b'parent %s' % hexlify(parent))
-        if author: l.append(b'author %s %s' % (author, adate_str))
-        if committer: l.append(b'committer %s %s' % (committer, cdate_str))
-        l.append(b'')
-        l.append(msg)
-        return self.maybe_write(b'commit', b'\n'.join(l))
+        content = create_commit_blob(tree, parent,
+                                     author, adate_sec, adate_tz,
+                                     committer, cdate_sec, cdate_tz,
+                                     msg)
+        return self.maybe_write(b'commit', content)
+
+    def _end(self, run_midx=True, abort=False):
+        # Ignores run_midx during abort
+        if not self.file:
+            return None
+        self.file, f = None, self.file
+        self.idx, idx = None, self.idx
+        self.parentfd, pfd, = None, self.parentfd
+        self.objcache = None
 
-    def abort(self):
-        """Remove the pack file from disk."""
-        f = self.file
-        if f:
-            pfd = self.parentfd
-            self.file = None
-            self.parentfd = None
-            self.idx = None
-            try:
-                try:
-                    os.unlink(self.filename + b'.pack')
-                finally:
-                    f.close()
-            finally:
-                if pfd is not None:
-                    os.close(pfd)
+        with finalized(pfd, lambda x: x is not None and os.close(x)), \
+             f:
 
-    def _end(self, run_midx=True):
-        f = self.file
-        if not f: return None
-        self.file = None
-        try:
-            self.objcache = None
-            idx = self.idx
-            self.idx = None
+            if abort:
+                os.unlink(self.filename + b'.pack')
+                return None
 
             # update object count
             f.seek(8)
             cp = struct.pack('!i', self.count)
-            assert(len(cp) == 4)
+            assert len(cp) == 4
             f.write(cp)
 
             # calculate the pack sha1sum
@@ -858,38 +893,52 @@ class PackWriter:
                 sum.update(b)
             packbin = sum.digest()
             f.write(packbin)
+            f.flush()
             fdatasync(f.fileno())
-        finally:
             f.close()
 
-        obj_list_sha = self._write_pack_idx_v2(self.filename + b'.idx', idx,
-                                               packbin)
-        nameprefix = os.path.join(self.repo_dir,
-                                  b'objects/pack/pack-' +  obj_list_sha)
-        if os.path.exists(self.filename + b'.map'):
-            os.unlink(self.filename + b'.map')
-        os.rename(self.filename + b'.pack', nameprefix + b'.pack')
-        os.rename(self.filename + b'.idx', nameprefix + b'.idx')
-        try:
-            os.fsync(self.parentfd)
-        finally:
-            os.close(self.parentfd)
+            idx.write(self.filename + b'.idx', packbin)
+            nameprefix = os.path.join(self.repo_dir,
+                                      b'objects/pack/pack-' +  hexlify(packbin))
+            if os.path.exists(self.filename + b'.map'):
+                os.unlink(self.filename + b'.map')
+            os.rename(self.filename + b'.pack', nameprefix + b'.pack')
+            os.rename(self.filename + b'.idx', nameprefix + b'.idx')
+            os.fsync(pfd)
+            if run_midx:
+                auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+            if self.on_pack_finish:
+                self.on_pack_finish(nameprefix)
+            return nameprefix
 
-        if run_midx:
-            auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
-
-        if self.on_pack_finish:
-            self.on_pack_finish(nameprefix)
+    def abort(self):
+        """Remove the pack file from disk."""
+        self._end(abort=True)
 
-        return nameprefix
+    def breakpoint(self):
+        """Clear byte and object counts and return the last processed id."""
+        id = self._end(self.run_midx)
+        self.outbytes = self.count = 0
+        return id
 
     def close(self, run_midx=True):
         """Close the pack file and move it to its definitive path."""
         return self._end(run_midx=run_midx)
 
-    def _write_pack_idx_v2(self, filename, idx, packbin):
+
+class PackIdxV2Writer:
+    def __init__(self):
+        self.idx = list(list() for i in range(256))
+        self.count = 0
+
+    def add(self, sha, crc, offs):
+        assert(sha)
+        self.count += 1
+        self.idx[byte_int(sha[0])].append((sha, crc, offs))
+
+    def write(self, filename, packbin):
         ofs64_count = 0
-        for section in idx:
+        for section in self.idx:
             for entry in section:
                 if entry[2] >= 2**31:
                     ofs64_count += 1
@@ -903,7 +952,8 @@ class PackWriter:
             fdatasync(idx_f.fileno())
             idx_map = mmap_readwrite(idx_f, close=False)
             try:
-                count = _helpers.write_idx(filename, idx_map, idx, self.count)
+                count = _helpers.write_idx(filename, idx_map, self.idx,
+                                           self.count)
                 assert(count == self.count)
                 idx_map.flush()
             finally:
@@ -919,17 +969,13 @@ class PackWriter:
             b = idx_f.read(8 + 4*256)
             idx_sum.update(b)
 
-            obj_list_sum = Sha1()
-            for b in chunkyreader(idx_f, 20*self.count):
+            for b in chunkyreader(idx_f, 20 * self.count):
                 idx_sum.update(b)
-                obj_list_sum.update(b)
-            namebase = hexlify(obj_list_sum.digest())
 
             for b in chunkyreader(idx_f):
                 idx_sum.update(b)
             idx_f.write(idx_sum.digest())
             fdatasync(idx_f.fileno())
-            return namebase
         finally:
             idx_f.close()
 
@@ -951,7 +997,8 @@ def list_refs(patterns=None, repo_dir=None,
     argv.append(b'--')
     if patterns:
         argv.extend(patterns)
-    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
+    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE,
+                         close_fds=True)
     out = p.stdout.read().strip()
     rv = p.wait()  # not fatal
     if rv:
@@ -1002,7 +1049,8 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None):
     p = subprocess.Popen(rev_list_invocation(ref_or_refs,
                                              format=format),
                          env=_gitenv(repo_dir),
-                         stdout = subprocess.PIPE)
+                         stdout = subprocess.PIPE,
+                         close_fds=True)
     if not format:
         for line in p.stdout:
             yield line.strip()
@@ -1022,17 +1070,6 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None):
         raise GitError('git rev-list returned error %d' % rv)
 
 
-def get_commit_dates(refs, repo_dir=None):
-    """Get the dates for the specified commit refs.  For now, every unique
-       string in refs must resolve to a different commit or this
-       function will fail."""
-    result = []
-    for ref in refs:
-        commit = get_commit_items(ref, cp(repo_dir))
-        result.append(commit.author_sec)
-    return result
-
-
 def rev_parse(committish, repo_dir=None):
     """Resolve the full hash for 'committish', if it exists.
 
@@ -1068,7 +1105,8 @@ def update_ref(refname, newval, oldval, repo_dir=None):
         or refname.startswith(b'refs/tags/')
     p = subprocess.Popen([b'git', b'update-ref', refname,
                           hexlify(newval), hexlify(oldval)],
-                         env=_gitenv(repo_dir))
+                         env=_gitenv(repo_dir),
+                         close_fds=True)
     _git_wait(b'git update-ref', p)
 
 
@@ -1077,7 +1115,8 @@ def delete_ref(refname, oldvalue=None):
     assert refname.startswith(b'refs/')
     oldvalue = [] if not oldvalue else [oldvalue]
     p = subprocess.Popen([b'git', b'update-ref', b'-d', refname] + oldvalue,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git update-ref', p)
 
 
@@ -1108,16 +1147,17 @@ def init_repo(path=None):
     if os.path.exists(d) and not os.path.isdir(os.path.join(d, b'.')):
         raise GitError('"%s" exists but is not a directory\n' % path_msg(d))
     p = subprocess.Popen([b'git', b'--bare', b'init'], stdout=sys.stderr,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git init', p)
     # Force the index version configuration in order to ensure bup works
     # regardless of the version of the installed Git binary.
     p = subprocess.Popen([b'git', b'config', b'pack.indexVersion', '2'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
     # Enable the reflog
     p = subprocess.Popen([b'git', b'config', b'core.logAllRefUpdates', b'true'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
 
 
@@ -1138,31 +1178,54 @@ def check_repo_or_die(path=None):
     sys.exit(14)
 
 
-_ver = None
-def ver():
-    """Get Git's version and ensure a usable version is installed.
+def is_suitable_git(ver_str):
+    if not ver_str.startswith(b'git version '):
+        return 'unrecognized'
+    ver_str = ver_str[len(b'git version '):]
+    if ver_str.startswith(b'0.'):
+        return 'insufficient'
+    if ver_str.startswith(b'1.'):
+        if re.match(br'1\.[012345]rc', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.[01234]\.', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.5\.[012345]($|\.)', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.5\.6-rc', ver_str):
+            return 'insufficient'
+        return 'suitable'
+    if re.match(br'[0-9]+(\.|$)?', ver_str):
+        return 'suitable'
+    sys.exit(13)
+
+_git_great = None
+
+def require_suitable_git(ver_str=None):
+    """Raise GitError if the version of git isn't suitable.
+
+    Rely on ver_str when provided, rather than invoking the git in the
+    path.
 
-    The returned version is formatted as an ordered tuple with each position
-    representing a digit in the version tag. For example, the following tuple
-    would represent version 1.6.6.9:
-
-        (1, 6, 6, 9)
     """
-    global _ver
-    if not _ver:
-        p = subprocess.Popen([b'git', b'--version'], stdout=subprocess.PIPE)
-        gvs = p.stdout.read()
-        _git_wait('git --version', p)
-        m = re.match(br'git version (\S+.\S+)', gvs)
-        if not m:
-            raise GitError('git --version weird output: %r' % gvs)
-        _ver = tuple(int(x) for x in m.group(1).split(b'.'))
-    needed = (1, 5, 3, 1)
-    if _ver < needed:
-        raise GitError('git version %s or higher is required; you have %s'
-                       % ('.'.join(str(x) for x in needed),
-                          '.'.join(str(x) for x in _ver)))
-    return _ver
+    global _git_great
+    if _git_great is not None:
+        return
+    if environ.get(b'BUP_GIT_VERSION_IS_FINE', b'').lower() \
+       in (b'yes', b'true', b'1'):
+        _git_great = True
+        return
+    if not ver_str:
+        ver_str, _, _ = _git_exo([b'git', b'--version'])
+    status = is_suitable_git(ver_str)
+    if status == 'unrecognized':
+        raise GitError('Unexpected git --version output: %r' % ver_str)
+    if status == 'insufficient':
+        log('error: git version must be at least 1.5.6\n')
+        sys.exit(1)
+    if status == 'suitable':
+        _git_great = True
+        return
+    assert False
 
 
 class _AbortableIter:
@@ -1200,22 +1263,24 @@ class _AbortableIter:
 class CatPipe:
     """Link to 'git cat-file' that is used to retrieve blob data."""
     def __init__(self, repo_dir = None):
+        require_suitable_git()
         self.repo_dir = repo_dir
-        wanted = (1, 5, 6)
-        if ver() < wanted:
-            log('error: git version must be at least 1.5.6\n')
-            sys.exit(1)
         self.p = self.inprogress = None
 
-    def _abort(self):
-        if self.p:
-            self.p.stdout.close()
-            self.p.stdin.close()
+    def close(self, wait=False):
+        p = self.p
+        if p:
+            p.stdout.close()
+            p.stdin.close()
         self.p = None
         self.inprogress = None
+        if wait:
+            p.wait()
+            return p.returncode
+        return None
 
     def restart(self):
-        self._abort()
+        self.close()
         self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'],
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE,
@@ -1243,6 +1308,9 @@ class CatPipe:
         self.p.stdin.write(ref + b'\n')
         self.p.stdin.flush()
         hdr = self.p.stdout.readline()
+        if not hdr:
+            raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)'
+                           % (ref, self.p.poll() or 'none'))
         if hdr.endswith(b' missing\n'):
             self.inprogress = None
             yield None, None, None
@@ -1253,7 +1321,7 @@ class CatPipe:
         oidx, typ, size = info
         size = int(size)
         it = _AbortableIter(chunkyreader(self.p.stdout, size),
-                            onabort=self._abort)
+                            onabort=self.close)
         try:
             yield oidx, typ, size
             for blob in it:
@@ -1309,6 +1377,13 @@ def cp(repo_dir=None):
     return cp
 
 
+def close_catpipes():
+    # FIXME: chain exceptions
+    while _cp:
+        _, cp = _cp.popitem()
+        cp.close(wait=True)
+
+
 def tags(repo_dir = None):
     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
     tags = {}
@@ -1324,7 +1399,7 @@ def tags(repo_dir = None):
 class MissingObject(KeyError):
     def __init__(self, oid):
         self.oid = oid
-        KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
+        KeyError.__init__(self, 'object %r is missing' % hexlify(oid))
 
 
 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',