]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/git.py
Update base_version to 0.34~ for 0.34 development
[bup.git] / lib / bup / git.py
index 6f2dba3bfe4febc1a536021b145cea99a4b0eede..d6a745c02d7d9d355370bf23c85a9d5c645273e1 100644 (file)
@@ -4,39 +4,41 @@ interact with the Git data structures.
 """
 
 from __future__ import absolute_import, print_function
 """
 
 from __future__ import absolute_import, print_function
-import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
+import os, sys, zlib, subprocess, struct, stat, re, glob
 from array import array
 from binascii import hexlify, unhexlify
 from collections import namedtuple
 from array import array
 from binascii import hexlify, unhexlify
 from collections import namedtuple
+from contextlib import ExitStack
 from itertools import islice
 from itertools import islice
-from numbers import Integral
+from shutil import rmtree
 
 
-from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
+from bup import _helpers, hashsplit, path, midx, bloom, xstat
 from bup.compat import (buffer,
                         byte_int, bytes_from_byte, bytes_from_uint,
                         environ,
 from bup.compat import (buffer,
                         byte_int, bytes_from_byte, bytes_from_uint,
                         environ,
-                        items,
-                        range,
+                        pending_raise,
                         reraise)
 from bup.io import path_msg
 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
                         reraise)
 from bup.io import path_msg
 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
+                         exo,
                          fdatasync,
                          fdatasync,
-                         hostname, localtime, log,
+                         finalized,
+                         log,
                          merge_dict,
                          merge_iter,
                          mmap_read, mmap_readwrite,
                          merge_dict,
                          merge_iter,
                          mmap_read, mmap_readwrite,
-                         parse_num,
+                         nullcontext_if_not,
                          progress, qprogress, stat_if_exists,
                          progress, qprogress, stat_if_exists,
+                         temp_dir,
                          unlink,
                          utc_offset_str)
                          unlink,
                          utc_offset_str)
-from bup.pwdgrp import username, userfullname
 
 
 verbose = 0
 repodir = None  # The default repository, once initialized
 
 _typemap =  {b'blob': 3, b'tree': 2, b'commit': 1, b'tag': 4}
 
 
 verbose = 0
 repodir = None  # The default repository, once initialized
 
 _typemap =  {b'blob': 3, b'tree': 2, b'commit': 1, b'tag': 4}
-_typermap = {v: k for k, v in items(_typemap)}
+_typermap = {v: k for k, v in _typemap.items()}
 
 
 _total_searches = 0
 
 
 _total_searches = 0
@@ -57,19 +59,40 @@ def _git_wait(cmd, p):
     if rv != 0:
         raise GitError('%r returned %d' % (cmd, rv))
 
     if rv != 0:
         raise GitError('%r returned %d' % (cmd, rv))
 
-def _git_capture(argv):
-    p = subprocess.Popen(argv, stdout=subprocess.PIPE, env=_gitenv())
-    r = p.stdout.read()
-    _git_wait(argv, p)
-    return r
-
-def git_config_get(option, repo_dir=None):
-    cmd = (b'git', b'config', b'--get', option)
-    p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                         env=_gitenv(repo_dir=repo_dir))
-    r = p.stdout.read()
+def _git_exo(cmd, **kwargs):
+    kwargs['check'] = False
+    result = exo(cmd, **kwargs)
+    _, _, proc = result
+    if proc.returncode != 0:
+        raise GitError('%r returned %d' % (cmd, proc.returncode))
+    return result
+
+def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None):
+    assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used"
+    cmd = [b'git', b'config', b'--null']
+    if cfg_file:
+        cmd.extend([b'--file', cfg_file])
+    if opttype == 'int':
+        cmd.extend([b'--int'])
+    elif opttype == 'bool':
+        cmd.extend([b'--bool'])
+    else:
+        assert opttype is None
+    cmd.extend([b'--get', option])
+    env=None
+    if repo_dir:
+        env = _gitenv(repo_dir=repo_dir)
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env,
+                         close_fds=True)
+    # with --null, git writes out a trailing \0 after the value
+    r = p.stdout.read()[:-1]
     rc = p.wait()
     if rc == 0:
     rc = p.wait()
     if rc == 0:
+        if opttype == 'int':
+            return int(r)
+        elif opttype == 'bool':
+            # git converts to 'true' or 'false'
+            return r == b'true'
         return r
     if rc != 1:
         raise GitError('%r returned %d' % (cmd, rc))
         return r
     if rc != 1:
         raise GitError('%r returned %d' % (cmd, rc))
@@ -83,9 +106,26 @@ def parse_tz_offset(s):
         return - tz_off
     return tz_off
 
         return - tz_off
     return tz_off
 
+def parse_commit_gpgsig(sig):
+    """Return the original signature bytes.
+
+    i.e. with the "gpgsig " header and the leading space character on
+    each continuation line removed.
+
+    """
+    if not sig:
+        return None
+    assert sig.startswith(b'gpgsig ')
+    sig = sig[7:]
+    return sig.replace(b'\n ', b'\n')
 
 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
 # Make sure that's authoritative.
 
 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
 # Make sure that's authoritative.
+
+# See also
+# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt
+# The continuation lines have only one leading space.
+
 _start_end_char = br'[^ .,:;<>"\'\0\n]'
 _content_char = br'[^\0\n<>]'
 _safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
 _start_end_char = br'[^ .,:;<>"\'\0\n]'
 _content_char = br'[^\0\n<>]'
 _safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
@@ -99,7 +139,7 @@ _mergetag_rx = br'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]
 _commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
 _commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
-
+(?P<gpgsig>gpgsig .*\n(?: .*\n)*)?
 (?P<message>(?:.|\n)*)''' % (_parent_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
 (?P<message>(?:.|\n)*)''' % (_parent_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
@@ -113,6 +153,7 @@ CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
                                        'author_sec', 'author_offset',
                                        'committer_name', 'committer_mail',
                                        'committer_sec', 'committer_offset',
                                        'author_sec', 'author_offset',
                                        'committer_name', 'committer_mail',
                                        'committer_sec', 'committer_offset',
+                                       'gpgsig',
                                        'message'])
 
 def parse_commit(content):
                                        'message'])
 
 def parse_commit(content):
@@ -130,6 +171,7 @@ def parse_commit(content):
                       committer_mail=matches['committer_mail'],
                       committer_sec=int(matches['csec']),
                       committer_offset=parse_tz_offset(matches['ctz']),
                       committer_mail=matches['committer_mail'],
                       committer_sec=int(matches['csec']),
                       committer_offset=parse_tz_offset(matches['ctz']),
+                      gpgsig=parse_commit_gpgsig(matches['gpgsig']),
                       message=matches['message'])
 
 
                       message=matches['message'])
 
 
@@ -188,12 +230,6 @@ def repo_rel(path):
     return shorten_hash(path)
 
 
     return shorten_hash(path)
 
 
-def all_packdirs():
-    paths = [repo(b'objects/pack')]
-    paths += glob.glob(repo(b'index-cache/*/.'))
-    return paths
-
-
 def auto_midx(objdir):
     args = [path.exe(), b'midx', b'--auto', b'--dir', objdir]
     try:
 def auto_midx(objdir):
     args = [path.exe(), b'midx', b'--auto', b'--dir', objdir]
     try:
@@ -250,8 +286,7 @@ def demangle_name(name, mode):
     elif name.endswith(b'.bupm'):
         return (name[:-5],
                 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
     elif name.endswith(b'.bupm'):
         return (name[:-5],
                 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
-    else:
-        return (name, BUP_NORMAL)
+    return (name, BUP_NORMAL)
 
 
 def calc_hash(type, content):
 
 
 def calc_hash(type, content):
@@ -320,27 +355,6 @@ def _encode_packobj(type, content, compression_level=1):
     yield z.flush()
 
 
     yield z.flush()
 
 
-def _encode_looseobj(type, content, compression_level=1):
-    z = zlib.compressobj(compression_level)
-    yield z.compress(b'%s %d\0' % (type, len(content)))
-    yield z.compress(content)
-    yield z.flush()
-
-
-def _decode_looseobj(buf):
-    assert(buf);
-    s = zlib.decompress(buf)
-    i = s.find(b'\0')
-    assert(i > 0)
-    l = s[:i].split(b' ')
-    type = l[0]
-    sz = int(l[1])
-    content = s[i+1:]
-    assert(type in _typemap)
-    assert(sz == len(content))
-    return (type, content)
-
-
 def _decode_packobj(buf):
     assert(buf)
     c = byte_int(buf[0])
 def _decode_packobj(buf):
     assert(buf)
     c = byte_int(buf[0])
@@ -358,10 +372,7 @@ def _decode_packobj(buf):
     return (type, zlib.decompress(buf[i+1:]))
 
 
     return (type, zlib.decompress(buf[i+1:]))
 
 
-class PackIdx:
-    def __init__(self):
-        assert(0)
-
+class PackIdx(object):
     def find_offset(self, hash):
         """Get the offset of an object inside the index file."""
         idx = self._idx_from_hash(hash)
     def find_offset(self, hash):
         """Get the offset of an object inside the index file."""
         idx = self._idx_from_hash(hash)
@@ -400,6 +411,8 @@ class PackIdx:
 class PackIdxV1(PackIdx):
     """Object representation of a Git pack index (version 1) file."""
     def __init__(self, filename, f):
 class PackIdxV1(PackIdx):
     """Object representation of a Git pack index (version 1) file."""
     def __init__(self, filename, f):
+        super(PackIdxV1, self).__init__()
+        self.closed = False
         self.name = filename
         self.idxnames = [self.name]
         self.map = mmap_read(f)
         self.name = filename
         self.idxnames = [self.name]
         self.map = mmap_read(f)
@@ -411,6 +424,13 @@ class PackIdxV1(PackIdx):
         # Avoid slicing shatable for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)
 
         # Avoid slicing shatable for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -431,10 +451,22 @@ class PackIdxV1(PackIdx):
         for ofs in range(start, start + 24 * self.nsha, 24):
             yield self.map[ofs : ofs + 20]
 
         for ofs in range(start, start + 24 * self.nsha, 24):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        self.closed = True
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
+    def __del__(self):
+        assert self.closed
+
 
 class PackIdxV2(PackIdx):
     """Object representation of a Git pack index (version 2) file."""
     def __init__(self, filename, f):
 
 class PackIdxV2(PackIdx):
     """Object representation of a Git pack index (version 2) file."""
     def __init__(self, filename, f):
+        super(PackIdxV2, self).__init__()
+        self.closed = False
         self.name = filename
         self.idxnames = [self.name]
         self.map = mmap_read(f)
         self.name = filename
         self.idxnames = [self.name]
         self.map = mmap_read(f)
@@ -449,6 +481,13 @@ class PackIdxV2(PackIdx):
         # Avoid slicing this for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
 
         # Avoid slicing this for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -474,25 +513,63 @@ class PackIdxV2(PackIdx):
         for ofs in range(start, start + 20 * self.nsha, 20):
             yield self.map[ofs : ofs + 20]
 
         for ofs in range(start, start + 20 * self.nsha, 20):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        self.closed = True
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
+    def __del__(self):
+        assert self.closed
+
 
 _mpi_count = 0
 class PackIdxList:
     def __init__(self, dir, ignore_midx=False):
         global _mpi_count
 
 _mpi_count = 0
 class PackIdxList:
     def __init__(self, dir, ignore_midx=False):
         global _mpi_count
+        # Q: was this also intended to prevent opening multiple repos?
         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
         _mpi_count += 1
         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
         _mpi_count += 1
+        self.open = True
         self.dir = dir
         self.also = set()
         self.packs = []
         self.do_bloom = False
         self.bloom = None
         self.ignore_midx = ignore_midx
         self.dir = dir
         self.also = set()
         self.packs = []
         self.do_bloom = False
         self.bloom = None
         self.ignore_midx = ignore_midx
-        self.refresh()
+        try:
+            self.refresh()
+        except BaseException as ex:
+            with pending_raise(ex):
+                self.close()
 
 
-    def __del__(self):
+    def close(self):
         global _mpi_count
         global _mpi_count
+        if not self.open:
+            assert _mpi_count == 0
+            return
         _mpi_count -= 1
         _mpi_count -= 1
-        assert(_mpi_count == 0)
+        assert _mpi_count == 0
+        self.also = None
+        self.bloom, bloom = None, self.bloom
+        self.packs, packs = None, self.packs
+        self.open = False
+        with ExitStack() as stack:
+            for pack in packs:
+                stack.enter_context(pack)
+            if bloom:
+                bloom.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
+    def __del__(self):
+        assert not self.open
 
     def __iter__(self):
         return iter(idxmerge(self.packs))
 
     def __iter__(self):
         return iter(idxmerge(self.packs))
@@ -535,6 +612,8 @@ class PackIdxList:
         The instance variable 'ignore_midx' can force this function to
         always act as if skip_midx was True.
         """
         The instance variable 'ignore_midx' can force this function to
         always act as if skip_midx was True.
         """
+        if self.bloom is not None:
+            self.bloom.close()
         self.bloom = None # Always reopen the bloom as it may have been relaced
         self.do_bloom = False
         skip_midx = skip_midx or self.ignore_midx
         self.bloom = None # Always reopen the bloom as it may have been relaced
         self.do_bloom = False
         skip_midx = skip_midx or self.ignore_midx
@@ -543,11 +622,22 @@ class PackIdxList:
         if os.path.exists(self.dir):
             if not skip_midx:
                 midxl = []
         if os.path.exists(self.dir):
             if not skip_midx:
                 midxl = []
+                midxes = set(glob.glob(os.path.join(self.dir, b'*.midx')))
+                # remove any *.midx files from our list that no longer exist
+                for ix in list(d.values()):
+                    if not isinstance(ix, midx.PackMidx):
+                        continue
+                    if ix.name in midxes:
+                        continue
+                    # remove the midx
+                    del d[ix.name]
+                    ix.close()
+                    self.packs.remove(ix)
                 for ix in self.packs:
                     if isinstance(ix, midx.PackMidx):
                         for name in ix.idxnames:
                             d[os.path.join(self.dir, name)] = ix
                 for ix in self.packs:
                     if isinstance(ix, midx.PackMidx):
                         for name in ix.idxnames:
                             d[os.path.join(self.dir, name)] = ix
-                for full in glob.glob(os.path.join(self.dir,b'*.midx')):
+                for full in midxes:
                     if not d.get(full):
                         mx = midx.PackMidx(full)
                         (mxd, mxf) = os.path.split(mx.name)
                     if not d.get(full):
                         mx = midx.PackMidx(full)
                         (mxd, mxf) = os.path.split(mx.name)
@@ -560,7 +650,6 @@ class PackIdxList:
                                 broken = True
                         if broken:
                             mx.close()
                                 broken = True
                         if broken:
                             mx.close()
-                            del mx
                             unlink(full)
                         else:
                             midxl.append(mx)
                             unlink(full)
                         else:
                             midxl.append(mx)
@@ -592,14 +681,27 @@ class PackIdxList:
                         continue
                     d[full] = ix
             bfull = os.path.join(self.dir, b'bup.bloom')
                         continue
                     d[full] = ix
             bfull = os.path.join(self.dir, b'bup.bloom')
+            new_packs = set(d.values())
+            for p in self.packs:
+                if not p in new_packs:
+                    p.close()
+            new_packs = list(new_packs)
+            new_packs.sort(reverse=True, key=lambda x: len(x))
+            self.packs = new_packs
             if self.bloom is None and os.path.exists(bfull):
                 self.bloom = bloom.ShaBloom(bfull)
             if self.bloom is None and os.path.exists(bfull):
                 self.bloom = bloom.ShaBloom(bfull)
-            self.packs = list(set(d.values()))
-            self.packs.sort(reverse=True, key=lambda x: len(x))
-            if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
-                self.do_bloom = True
-            else:
-                self.bloom = None
+            try:
+                if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
+                    self.do_bloom = True
+                else:
+                    if self.bloom:
+                        self.bloom, bloom_tmp = None, self.bloom
+                        bloom_tmp.close()
+            except BaseException as ex:
+                with pending_raise(ex):
+                    if self.bloom:
+                        self.bloom.close()
+
         debug1('PackIdxList: using %d index%s.\n'
             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 
         debug1('PackIdxList: using %d index%s.\n'
             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 
@@ -642,23 +744,45 @@ def idxmerge(idxlist, final_progress=True):
     return merge_iter(idxlist, 10024, pfunc, pfinal)
 
 
     return merge_iter(idxlist, 10024, pfunc, pfinal)
 
 
+def create_commit_blob(tree, parent,
+                       author, adate_sec, adate_tz,
+                       committer, cdate_sec, cdate_tz,
+                       msg):
+    if adate_tz is not None:
+        adate_str = _git_date_str(adate_sec, adate_tz)
+    else:
+        adate_str = _local_git_date_str(adate_sec)
+    if cdate_tz is not None:
+        cdate_str = _git_date_str(cdate_sec, cdate_tz)
+    else:
+        cdate_str = _local_git_date_str(cdate_sec)
+    l = []
+    if tree: l.append(b'tree %s' % hexlify(tree))
+    if parent: l.append(b'parent %s' % hexlify(parent))
+    if author: l.append(b'author %s %s' % (author, adate_str))
+    if committer: l.append(b'committer %s %s' % (committer, cdate_str))
+    l.append(b'')
+    l.append(msg)
+    return b'\n'.join(l)
+
 def _make_objcache():
     return PackIdxList(repo(b'objects/pack'))
 
 # bup-gc assumes that it can disable all PackWriter activities
 # (bloom/midx/cache) via the constructor and close() arguments.
 
 def _make_objcache():
     return PackIdxList(repo(b'objects/pack'))
 
 # bup-gc assumes that it can disable all PackWriter activities
 # (bloom/midx/cache) via the constructor and close() arguments.
 
-class PackWriter:
+class PackWriter(object):
     """Writes Git objects inside a pack file."""
     def __init__(self, objcache_maker=_make_objcache, compression_level=1,
                  run_midx=True, on_pack_finish=None,
                  max_pack_size=None, max_pack_objects=None, repo_dir=None):
     """Writes Git objects inside a pack file."""
     def __init__(self, objcache_maker=_make_objcache, compression_level=1,
                  run_midx=True, on_pack_finish=None,
                  max_pack_size=None, max_pack_objects=None, repo_dir=None):
+        self.closed = False
         self.repo_dir = repo_dir or repo()
         self.file = None
         self.parentfd = None
         self.count = 0
         self.outbytes = 0
         self.repo_dir = repo_dir or repo()
         self.file = None
         self.parentfd = None
         self.count = 0
         self.outbytes = 0
-        self.filename = None
+        self.tmpdir = None
         self.idx = None
         self.objcache_maker = objcache_maker
         self.objcache = None
         self.idx = None
         self.objcache_maker = objcache_maker
         self.objcache = None
@@ -667,9 +791,8 @@ class PackWriter:
         self.on_pack_finish = on_pack_finish
         if not max_pack_size:
             max_pack_size = git_config_get(b'pack.packSizeLimit',
         self.on_pack_finish = on_pack_finish
         if not max_pack_size:
             max_pack_size = git_config_get(b'pack.packSizeLimit',
-                                           repo_dir=self.repo_dir)
-            if max_pack_size is not None:
-                max_pack_size = parse_num(max_pack_size)
+                                           repo_dir=self.repo_dir,
+                                           opttype='int')
             if not max_pack_size:
                 # larger packs slow down pruning
                 max_pack_size = 1000 * 1000 * 1000
             if not max_pack_size:
                 # larger packs slow down pruning
                 max_pack_size = 1000 * 1000 * 1000
@@ -678,35 +801,24 @@ class PackWriter:
         self.max_pack_objects = max_pack_objects if max_pack_objects \
                                 else max(1, self.max_pack_size // 5000)
 
         self.max_pack_objects = max_pack_objects if max_pack_objects \
                                 else max(1, self.max_pack_size // 5000)
 
-    def __del__(self):
-        self.close()
-
     def __enter__(self):
         return self
 
     def __exit__(self, type, value, traceback):
     def __enter__(self):
         return self
 
     def __exit__(self, type, value, traceback):
-        self.close()
+        with pending_raise(value, rethrow=False):
+            self.close()
 
     def _open(self):
         if not self.file:
 
     def _open(self):
         if not self.file:
-            objdir = dir = os.path.join(self.repo_dir, b'objects')
-            fd, name = tempfile.mkstemp(suffix=b'.pack', dir=objdir)
-            try:
-                self.file = os.fdopen(fd, 'w+b')
-            except:
-                os.close(fd)
-                raise
-            try:
-                self.parentfd = os.open(objdir, os.O_RDONLY)
-            except:
-                f = self.file
-                self.file = None
-                f.close()
-                raise
-            assert name.endswith(b'.pack')
-            self.filename = name[:-5]
-            self.file.write(b'PACK\0\0\0\2\0\0\0\0')
-            self.idx = list(list() for i in range(256))
+            with ExitStack() as err_stack:
+                objdir = dir = os.path.join(self.repo_dir, b'objects')
+                self.tmpdir = err_stack.enter_context(temp_dir(dir=objdir, prefix=b'pack-tmp-'))
+                self.file = err_stack.enter_context(open(self.tmpdir + b'/pack', 'w+b'))
+                self.parentfd = err_stack.enter_context(finalized(os.open(objdir, os.O_RDONLY),
+                                                                  lambda x: os.close(x)))
+                self.file.write(b'PACK\0\0\0\2\0\0\0\0')
+                self.idx = PackIdxV2Writer()
+                err_stack.pop_all()
 
     def _raw_write(self, datalist, sha):
         self._open()
 
     def _raw_write(self, datalist, sha):
         self._open()
@@ -731,14 +843,12 @@ class PackWriter:
     def _update_idx(self, sha, crc, size):
         assert(sha)
         if self.idx:
     def _update_idx(self, sha, crc, size):
         assert(sha)
         if self.idx:
-            self.idx[byte_int(sha[0])].append((sha, crc,
-                                               self.file.tell() - size))
+            self.idx.add(sha, crc, self.file.tell() - size)
 
     def _write(self, sha, type, content):
         if verbose:
             log('>')
 
     def _write(self, sha, type, content):
         if verbose:
             log('>')
-        if not sha:
-            sha = calc_hash(type, content)
+        assert sha
         size, crc = self._raw_write(_encode_packobj(type, content,
                                                     self.compression_level),
                                     sha=sha)
         size, crc = self._raw_write(_encode_packobj(type, content,
                                                     self.compression_level),
                                     sha=sha)
@@ -747,12 +857,6 @@ class PackWriter:
             self.breakpoint()
         return sha
 
             self.breakpoint()
         return sha
 
-    def breakpoint(self):
-        """Clear byte and object counts and return the last processed id."""
-        id = self._end(self.run_midx)
-        self.outbytes = self.count = 0
-        return id
-
     def _require_objcache(self):
         if self.objcache is None and self.objcache_maker:
             self.objcache = self.objcache_maker()
     def _require_objcache(self):
         if self.objcache is None and self.objcache_maker:
             self.objcache = self.objcache_maker()
@@ -795,94 +899,92 @@ class PackWriter:
                    msg):
         """Create a commit object in the pack.  The date_sec values must be
         epoch-seconds, and if a tz is None, the local timezone is assumed."""
                    msg):
         """Create a commit object in the pack.  The date_sec values must be
         epoch-seconds, and if a tz is None, the local timezone is assumed."""
-        if adate_tz:
-            adate_str = _git_date_str(adate_sec, adate_tz)
-        else:
-            adate_str = _local_git_date_str(adate_sec)
-        if cdate_tz:
-            cdate_str = _git_date_str(cdate_sec, cdate_tz)
-        else:
-            cdate_str = _local_git_date_str(cdate_sec)
-        l = []
-        if tree: l.append(b'tree %s' % hexlify(tree))
-        if parent: l.append(b'parent %s' % hexlify(parent))
-        if author: l.append(b'author %s %s' % (author, adate_str))
-        if committer: l.append(b'committer %s %s' % (committer, cdate_str))
-        l.append(b'')
-        l.append(msg)
-        return self.maybe_write(b'commit', b'\n'.join(l))
-
-    def abort(self):
-        """Remove the pack file from disk."""
-        f = self.file
-        if f:
-            pfd = self.parentfd
-            self.file = None
-            self.parentfd = None
-            self.idx = None
-            try:
-                try:
-                    os.unlink(self.filename + b'.pack')
-                finally:
-                    f.close()
-            finally:
-                if pfd is not None:
-                    os.close(pfd)
-
-    def _end(self, run_midx=True):
-        f = self.file
-        if not f: return None
-        self.file = None
+        content = create_commit_blob(tree, parent,
+                                     author, adate_sec, adate_tz,
+                                     committer, cdate_sec, cdate_tz,
+                                     msg)
+        return self.maybe_write(b'commit', content)
+
+    def _end(self, run_midx=True, abort=False):
+        # Ignores run_midx during abort
+        self.tmpdir, tmpdir = None, self.tmpdir
+        self.parentfd, pfd, = None, self.parentfd
+        self.file, f = None, self.file
+        self.idx, idx = None, self.idx
         try:
         try:
-            self.objcache = None
-            idx = self.idx
-            self.idx = None
-
-            # update object count
-            f.seek(8)
-            cp = struct.pack('!i', self.count)
-            assert(len(cp) == 4)
-            f.write(cp)
-
-            # calculate the pack sha1sum
-            f.seek(0)
-            sum = Sha1()
-            for b in chunkyreader(f):
-                sum.update(b)
-            packbin = sum.digest()
-            f.write(packbin)
-            fdatasync(f.fileno())
-        finally:
-            f.close()
-
-        obj_list_sha = self._write_pack_idx_v2(self.filename + b'.idx', idx,
-                                               packbin)
-        nameprefix = os.path.join(self.repo_dir,
-                                  b'objects/pack/pack-' +  obj_list_sha)
-        if os.path.exists(self.filename + b'.map'):
-            os.unlink(self.filename + b'.map')
-        os.rename(self.filename + b'.pack', nameprefix + b'.pack')
-        os.rename(self.filename + b'.idx', nameprefix + b'.idx')
-        try:
-            os.fsync(self.parentfd)
-        finally:
-            os.close(self.parentfd)
+            with nullcontext_if_not(self.objcache), \
+                 finalized(pfd, lambda x: x is not None and os.close(x)), \
+                 nullcontext_if_not(f):
+                if abort or not f:
+                    return None
+
+                # update object count
+                f.seek(8)
+                cp = struct.pack('!i', self.count)
+                assert len(cp) == 4
+                f.write(cp)
+
+                # calculate the pack sha1sum
+                f.seek(0)
+                sum = Sha1()
+                for b in chunkyreader(f):
+                    sum.update(b)
+                packbin = sum.digest()
+                f.write(packbin)
+                f.flush()
+                fdatasync(f.fileno())
+                f.close()
 
 
-        if run_midx:
-            auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+                idx.write(tmpdir + b'/idx', packbin)
+                nameprefix = os.path.join(self.repo_dir,
+                                          b'objects/pack/pack-' +  hexlify(packbin))
+                os.rename(tmpdir + b'/pack', nameprefix + b'.pack')
+                os.rename(tmpdir + b'/idx', nameprefix + b'.idx')
+                os.fsync(pfd)
+                if run_midx:
+                    auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+                if self.on_pack_finish:
+                    self.on_pack_finish(nameprefix)
+                return nameprefix
+        finally:
+            if tmpdir:
+                rmtree(tmpdir)
+            # Must be last -- some of the code above depends on it
+            self.objcache = None
 
 
-        if self.on_pack_finish:
-            self.on_pack_finish(nameprefix)
+    def abort(self):
+        """Remove the pack file from disk."""
+        self.closed = True
+        self._end(abort=True)
 
 
-        return nameprefix
+    def breakpoint(self):
+        """Clear byte and object counts and return the last processed id."""
+        id = self._end(self.run_midx)
+        self.outbytes = self.count = 0
+        return id
 
     def close(self, run_midx=True):
         """Close the pack file and move it to its definitive path."""
 
     def close(self, run_midx=True):
         """Close the pack file and move it to its definitive path."""
+        self.closed = True
         return self._end(run_midx=run_midx)
 
         return self._end(run_midx=run_midx)
 
-    def _write_pack_idx_v2(self, filename, idx, packbin):
+    def __del__(self):
+        assert self.closed
+
+
+class PackIdxV2Writer:
+    def __init__(self):
+        self.idx = list(list() for i in range(256))
+        self.count = 0
+
+    def add(self, sha, crc, offs):
+        assert(sha)
+        self.count += 1
+        self.idx[byte_int(sha[0])].append((sha, crc, offs))
+
+    def write(self, filename, packbin):
         ofs64_count = 0
         ofs64_count = 0
-        for section in idx:
+        for section in self.idx:
             for entry in section:
                 if entry[2] >= 2**31:
                     ofs64_count += 1
             for entry in section:
                 if entry[2] >= 2**31:
                     ofs64_count += 1
@@ -896,7 +998,8 @@ class PackWriter:
             fdatasync(idx_f.fileno())
             idx_map = mmap_readwrite(idx_f, close=False)
             try:
             fdatasync(idx_f.fileno())
             idx_map = mmap_readwrite(idx_f, close=False)
             try:
-                count = _helpers.write_idx(filename, idx_map, idx, self.count)
+                count = _helpers.write_idx(filename, idx_map, self.idx,
+                                           self.count)
                 assert(count == self.count)
                 idx_map.flush()
             finally:
                 assert(count == self.count)
                 idx_map.flush()
             finally:
@@ -912,17 +1015,13 @@ class PackWriter:
             b = idx_f.read(8 + 4*256)
             idx_sum.update(b)
 
             b = idx_f.read(8 + 4*256)
             idx_sum.update(b)
 
-            obj_list_sum = Sha1()
-            for b in chunkyreader(idx_f, 20*self.count):
+            for b in chunkyreader(idx_f, 20 * self.count):
                 idx_sum.update(b)
                 idx_sum.update(b)
-                obj_list_sum.update(b)
-            namebase = hexlify(obj_list_sum.digest())
 
             for b in chunkyreader(idx_f):
                 idx_sum.update(b)
             idx_f.write(idx_sum.digest())
             fdatasync(idx_f.fileno())
 
             for b in chunkyreader(idx_f):
                 idx_sum.update(b)
             idx_f.write(idx_sum.digest())
             fdatasync(idx_f.fileno())
-            return namebase
         finally:
             idx_f.close()
 
         finally:
             idx_f.close()
 
@@ -944,7 +1043,8 @@ def list_refs(patterns=None, repo_dir=None,
     argv.append(b'--')
     if patterns:
         argv.extend(patterns)
     argv.append(b'--')
     if patterns:
         argv.extend(patterns)
-    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
+    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE,
+                         close_fds=True)
     out = p.stdout.read().strip()
     rv = p.wait()  # not fatal
     if rv:
     out = p.stdout.read().strip()
     rv = p.wait()  # not fatal
     if rv:
@@ -966,16 +1066,12 @@ def read_ref(refname, repo_dir = None):
         return None
 
 
         return None
 
 
-def rev_list_invocation(ref_or_refs, count=None, format=None):
+def rev_list_invocation(ref_or_refs, format=None):
     if isinstance(ref_or_refs, bytes):
         refs = (ref_or_refs,)
     else:
         refs = ref_or_refs
     argv = [b'git', b'rev-list']
     if isinstance(ref_or_refs, bytes):
         refs = (ref_or_refs,)
     else:
         refs = ref_or_refs
     argv = [b'git', b'rev-list']
-    if isinstance(count, Integral):
-        argv.extend([b'-n', b'%d' % count])
-    elif count:
-        raise ValueError('unexpected count argument %r' % count)
 
     if format:
         argv.append(b'--pretty=format:' + format)
 
     if format:
         argv.append(b'--pretty=format:' + format)
@@ -986,7 +1082,7 @@ def rev_list_invocation(ref_or_refs, count=None, format=None):
     return argv
 
 
     return argv
 
 
-def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
+def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None):
     """Yield information about commits as per "git rev-list".  If a format
     is not provided, yield one hex hash at a time.  If a format is
     provided, pass it to rev-list and call parse(git_stdout) for each
     """Yield information about commits as per "git rev-list".  If a format
     is not provided, yield one hex hash at a time.  If a format is
     provided, pass it to rev-list and call parse(git_stdout) for each
@@ -996,10 +1092,11 @@ def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
 
     """
     assert bool(parse) == bool(format)
 
     """
     assert bool(parse) == bool(format)
-    p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
+    p = subprocess.Popen(rev_list_invocation(ref_or_refs,
                                              format=format),
                          env=_gitenv(repo_dir),
                                              format=format),
                          env=_gitenv(repo_dir),
-                         stdout = subprocess.PIPE)
+                         stdout = subprocess.PIPE,
+                         close_fds=True)
     if not format:
         for line in p.stdout:
             yield line.strip()
     if not format:
         for line in p.stdout:
             yield line.strip()
@@ -1019,17 +1116,6 @@ def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
         raise GitError('git rev-list returned error %d' % rv)
 
 
         raise GitError('git rev-list returned error %d' % rv)
 
 
-def get_commit_dates(refs, repo_dir=None):
-    """Get the dates for the specified commit refs.  For now, every unique
-       string in refs must resolve to a different commit or this
-       function will fail."""
-    result = []
-    for ref in refs:
-        commit = get_commit_items(ref, cp(repo_dir))
-        result.append(commit.author_sec)
-    return result
-
-
 def rev_parse(committish, repo_dir=None):
     """Resolve the full hash for 'committish', if it exists.
 
 def rev_parse(committish, repo_dir=None):
     """Resolve the full hash for 'committish', if it exists.
 
@@ -1043,29 +1129,39 @@ def rev_parse(committish, repo_dir=None):
         debug2("resolved from ref: commit = %s\n" % hexlify(head))
         return head
 
         debug2("resolved from ref: commit = %s\n" % hexlify(head))
         return head
 
-    pL = PackIdxList(repo(b'objects/pack', repo_dir=repo_dir))
-
     if len(committish) == 40:
         try:
             hash = unhexlify(committish)
         except TypeError:
             return None
 
     if len(committish) == 40:
         try:
             hash = unhexlify(committish)
         except TypeError:
             return None
 
-        if pL.exists(hash):
-            return hash
+        with PackIdxList(repo(b'objects/pack', repo_dir=repo_dir)) as pL:
+            if pL.exists(hash):
+                return hash
 
     return None
 
 
 
     return None
 
 
-def update_ref(refname, newval, oldval, repo_dir=None):
-    """Update a repository reference."""
-    if not oldval:
-        oldval = b''
+def update_ref(refname, newval, oldval, repo_dir=None, force=False):
+    """Update a repository reference.
+
+    With force=True, don't care about the previous ref (oldval);
+    with force=False oldval must be either a sha1 or None (for an
+    entirely new branch)
+    """
+    if force:
+        assert oldval is None
+        oldarg = []
+    elif not oldval:
+        oldarg = [b'']
+    else:
+        oldarg = [hexlify(oldval)]
     assert refname.startswith(b'refs/heads/') \
         or refname.startswith(b'refs/tags/')
     p = subprocess.Popen([b'git', b'update-ref', refname,
     assert refname.startswith(b'refs/heads/') \
         or refname.startswith(b'refs/tags/')
     p = subprocess.Popen([b'git', b'update-ref', refname,
-                          hexlify(newval), hexlify(oldval)],
-                         env=_gitenv(repo_dir))
+                          hexlify(newval)] + oldarg,
+                         env=_gitenv(repo_dir),
+                         close_fds=True)
     _git_wait(b'git update-ref', p)
 
 
     _git_wait(b'git update-ref', p)
 
 
@@ -1074,29 +1170,29 @@ def delete_ref(refname, oldvalue=None):
     assert refname.startswith(b'refs/')
     oldvalue = [] if not oldvalue else [oldvalue]
     p = subprocess.Popen([b'git', b'update-ref', b'-d', refname] + oldvalue,
     assert refname.startswith(b'refs/')
     oldvalue = [] if not oldvalue else [oldvalue]
     p = subprocess.Popen([b'git', b'update-ref', b'-d', refname] + oldvalue,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git update-ref', p)
 
 
     _git_wait('git update-ref', p)
 
 
-def guess_repo(path=None):
-    """Set the path value in the global variable "repodir".
-    This makes bup look for an existing bup repository, but not fail if a
-    repository doesn't exist. Usually, if you are interacting with a bup
-    repository, you would not be calling this function but using
-    check_repo_or_die().
+def guess_repo():
+    """Return the global repodir or BUP_DIR when either is set, or ~/.bup.
+    Usually, if you are interacting with a bup repository, you would
+    not be calling this function but using check_repo_or_die().
+
     """
     """
-    global repodir
-    if path:
-        repodir = path
-    if not repodir:
-        repodir = environ.get(b'BUP_DIR')
-        if not repodir:
-            repodir = os.path.expanduser(b'~/.bup')
+    if repodir:
+        return repodir
+    repo = environ.get(b'BUP_DIR')
+    if not repo:
+        repo = os.path.expanduser(b'~/.bup')
+    return repo
 
 
 def init_repo(path=None):
     """Create the Git bare repository for bup in a given path."""
 
 
 def init_repo(path=None):
     """Create the Git bare repository for bup in a given path."""
-    guess_repo(path)
+    global repodir
+    repodir = path or guess_repo()
     d = repo()  # appends a / to the path
     parent = os.path.dirname(os.path.dirname(d))
     if parent and not os.path.exists(parent):
     d = repo()  # appends a / to the path
     parent = os.path.dirname(os.path.dirname(d))
     if parent and not os.path.exists(parent):
@@ -1105,22 +1201,24 @@ def init_repo(path=None):
     if os.path.exists(d) and not os.path.isdir(os.path.join(d, b'.')):
         raise GitError('"%s" exists but is not a directory\n' % path_msg(d))
     p = subprocess.Popen([b'git', b'--bare', b'init'], stdout=sys.stderr,
     if os.path.exists(d) and not os.path.isdir(os.path.join(d, b'.')):
         raise GitError('"%s" exists but is not a directory\n' % path_msg(d))
     p = subprocess.Popen([b'git', b'--bare', b'init'], stdout=sys.stderr,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git init', p)
     # Force the index version configuration in order to ensure bup works
     # regardless of the version of the installed Git binary.
     p = subprocess.Popen([b'git', b'config', b'pack.indexVersion', '2'],
     _git_wait('git init', p)
     # Force the index version configuration in order to ensure bup works
     # regardless of the version of the installed Git binary.
     p = subprocess.Popen([b'git', b'config', b'pack.indexVersion', '2'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
     # Enable the reflog
     p = subprocess.Popen([b'git', b'config', b'core.logAllRefUpdates', b'true'],
     _git_wait('git config', p)
     # Enable the reflog
     p = subprocess.Popen([b'git', b'config', b'core.logAllRefUpdates', b'true'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
 
 
 def check_repo_or_die(path=None):
     """Check to see if a bup repository probably exists, and abort if not."""
     _git_wait('git config', p)
 
 
 def check_repo_or_die(path=None):
     """Check to see if a bup repository probably exists, and abort if not."""
-    guess_repo(path)
+    global repodir
+    repodir = path or guess_repo()
     top = repo()
     pst = stat_if_exists(top + b'/objects/pack')
     if pst and stat.S_ISDIR(pst.st_mode):
     top = repo()
     pst = stat_if_exists(top + b'/objects/pack')
     if pst and stat.S_ISDIR(pst.st_mode):
@@ -1135,84 +1233,80 @@ def check_repo_or_die(path=None):
     sys.exit(14)
 
 
     sys.exit(14)
 
 
-_ver = None
-def ver():
-    """Get Git's version and ensure a usable version is installed.
-
-    The returned version is formatted as an ordered tuple with each position
-    representing a digit in the version tag. For example, the following tuple
-    would represent version 1.6.6.9:
+def is_suitable_git(ver_str):
+    if not ver_str.startswith(b'git version '):
+        return 'unrecognized'
+    ver_str = ver_str[len(b'git version '):]
+    if ver_str.startswith(b'0.'):
+        return 'insufficient'
+    if ver_str.startswith(b'1.'):
+        if re.match(br'1\.[012345]rc', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.[01234]\.', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.5\.[012345]($|\.)', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.5\.6-rc', ver_str):
+            return 'insufficient'
+        return 'suitable'
+    if re.match(br'[0-9]+(\.|$)?', ver_str):
+        return 'suitable'
+    sys.exit(13)
+
+_git_great = None
+
+def require_suitable_git(ver_str=None):
+    """Raise GitError if the version of git isn't suitable.
+
+    Rely on ver_str when provided, rather than invoking the git in the
+    path.
 
 
-        (1, 6, 6, 9)
     """
     """
-    global _ver
-    if not _ver:
-        p = subprocess.Popen([b'git', b'--version'], stdout=subprocess.PIPE)
-        gvs = p.stdout.read()
-        _git_wait('git --version', p)
-        m = re.match(br'git version (\S+.\S+)', gvs)
-        if not m:
-            raise GitError('git --version weird output: %r' % gvs)
-        _ver = tuple(int(x) for x in m.group(1).split(b'.'))
-    needed = (1, 5, 3, 1)
-    if _ver < needed:
-        raise GitError('git version %s or higher is required; you have %s'
-                       % ('.'.join(str(x) for x in needed),
-                          '.'.join(str(x) for x in _ver)))
-    return _ver
-
-
-class _AbortableIter:
-    def __init__(self, it, onabort = None):
-        self.it = it
-        self.onabort = onabort
-        self.done = None
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        try:
-            return next(self.it)
-        except StopIteration as e:
-            self.done = True
-            raise
-        except:
-            self.abort()
-            raise
-
-    next = __next__
-
-    def abort(self):
-        """Abort iteration and call the abortion callback, if needed."""
-        if not self.done:
-            self.done = True
-            if self.onabort:
-                self.onabort()
-
-    def __del__(self):
-        self.abort()
+    global _git_great
+    if _git_great is not None:
+        return
+    if environ.get(b'BUP_GIT_VERSION_IS_FINE', b'').lower() \
+       in (b'yes', b'true', b'1'):
+        _git_great = True
+        return
+    if not ver_str:
+        ver_str, _, _ = _git_exo([b'git', b'--version'])
+    status = is_suitable_git(ver_str)
+    if status == 'unrecognized':
+        raise GitError('Unexpected git --version output: %r' % ver_str)
+    if status == 'insufficient':
+        log('error: git version must be at least 1.5.6\n')
+        sys.exit(1)
+    if status == 'suitable':
+        _git_great = True
+        return
+    assert False
 
 
 class CatPipe:
     """Link to 'git cat-file' that is used to retrieve blob data."""
     def __init__(self, repo_dir = None):
 
 
 class CatPipe:
     """Link to 'git cat-file' that is used to retrieve blob data."""
     def __init__(self, repo_dir = None):
+        require_suitable_git()
         self.repo_dir = repo_dir
         self.repo_dir = repo_dir
-        wanted = (1, 5, 6)
-        if ver() < wanted:
-            log('error: git version must be at least 1.5.6\n')
-            sys.exit(1)
         self.p = self.inprogress = None
 
         self.p = self.inprogress = None
 
-    def _abort(self):
-        if self.p:
-            self.p.stdout.close()
-            self.p.stdin.close()
-        self.p = None
+    def close(self, wait=False):
+        self.p, p = None, self.p
         self.inprogress = None
         self.inprogress = None
+        if p:
+            try:
+                p.stdout.close()
+            finally:
+                # This will handle pending exceptions correctly once
+                # we drop py2
+                p.stdin.close()
+        if wait:
+            p.wait()
+            return p.returncode
+        return None
 
     def restart(self):
 
     def restart(self):
-        self._abort()
+        self.close()
         self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'],
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE,
         self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'],
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE,
@@ -1240,6 +1334,9 @@ class CatPipe:
         self.p.stdin.write(ref + b'\n')
         self.p.stdin.flush()
         hdr = self.p.stdout.readline()
         self.p.stdin.write(ref + b'\n')
         self.p.stdin.flush()
         hdr = self.p.stdout.readline()
+        if not hdr:
+            raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)'
+                           % (ref, self.p.poll() or 'none'))
         if hdr.endswith(b' missing\n'):
             self.inprogress = None
             yield None, None, None
         if hdr.endswith(b' missing\n'):
             self.inprogress = None
             yield None, None, None
@@ -1249,18 +1346,17 @@ class CatPipe:
             raise GitError('expected object (id, type, size), got %r' % info)
         oidx, typ, size = info
         size = int(size)
             raise GitError('expected object (id, type, size), got %r' % info)
         oidx, typ, size = info
         size = int(size)
-        it = _AbortableIter(chunkyreader(self.p.stdout, size),
-                            onabort=self._abort)
         try:
         try:
+            it = chunkyreader(self.p.stdout, size)
             yield oidx, typ, size
             yield oidx, typ, size
-            for blob in it:
+            for blob in chunkyreader(self.p.stdout, size):
                 yield blob
             readline_result = self.p.stdout.readline()
             assert readline_result == b'\n'
             self.inprogress = None
                 yield blob
             readline_result = self.p.stdout.readline()
             assert readline_result == b'\n'
             self.inprogress = None
-        except Exception as e:
-            it.abort()
-            raise
+        except Exception as ex:
+            with pending_raise(ex):
+                self.close()
 
     def _join(self, it):
         _, typ, _ = next(it)
 
     def _join(self, it):
         _, typ, _ = next(it)
@@ -1287,11 +1383,8 @@ class CatPipe:
         or a commit. The content of all blobs that can be seen from trees or
         commits will be added to the list.
         """
         or a commit. The content of all blobs that can be seen from trees or
         commits will be added to the list.
         """
-        try:
-            for d in self._join(self.get(id)):
-                yield d
-        except StopIteration:
-            log('booger!\n')
+        for d in self._join(self.get(id)):
+            yield d
 
 
 _cp = {}
 
 
 _cp = {}
@@ -1309,6 +1402,13 @@ def cp(repo_dir=None):
     return cp
 
 
     return cp
 
 
+def close_catpipes():
+    # FIXME: chain exceptions
+    while _cp:
+        _, cp = _cp.popitem()
+        cp.close(wait=True)
+
+
 def tags(repo_dir = None):
     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
     tags = {}
 def tags(repo_dir = None):
     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
     tags = {}
@@ -1324,7 +1424,7 @@ def tags(repo_dir = None):
 class MissingObject(KeyError):
     def __init__(self, oid):
         self.oid = oid
 class MissingObject(KeyError):
     def __init__(self, oid):
         self.oid = oid
-        KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
+        KeyError.__init__(self, 'object %r is missing' % hexlify(oid))
 
 
 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
 
 
 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',