]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/git.py
Update base_version to 0.34~ for 0.34 development
[bup.git] / lib / bup / git.py
index d1515e3f3f95823f77714e3d396810aa9a43e1e8..d6a745c02d7d9d355370bf23c85a9d5c645273e1 100644 (file)
@@ -4,39 +4,41 @@ interact with the Git data structures.
 """
 
 from __future__ import absolute_import, print_function
-import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
+import os, sys, zlib, subprocess, struct, stat, re, glob
 from array import array
 from binascii import hexlify, unhexlify
 from collections import namedtuple
+from contextlib import ExitStack
 from itertools import islice
-from numbers import Integral
+from shutil import rmtree
 
-from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
+from bup import _helpers, hashsplit, path, midx, bloom, xstat
 from bup.compat import (buffer,
                         byte_int, bytes_from_byte, bytes_from_uint,
                         environ,
-                        items,
-                        range,
+                        pending_raise,
                         reraise)
 from bup.io import path_msg
 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
+                         exo,
                          fdatasync,
-                         hostname, localtime, log,
+                         finalized,
+                         log,
                          merge_dict,
                          merge_iter,
                          mmap_read, mmap_readwrite,
-                         parse_num,
+                         nullcontext_if_not,
                          progress, qprogress, stat_if_exists,
+                         temp_dir,
                          unlink,
                          utc_offset_str)
-from bup.pwdgrp import username, userfullname
 
 
 verbose = 0
 repodir = None  # The default repository, once initialized
 
 _typemap =  {b'blob': 3, b'tree': 2, b'commit': 1, b'tag': 4}
-_typermap = {v: k for k, v in items(_typemap)}
+_typermap = {v: k for k, v in _typemap.items()}
 
 
 _total_searches = 0
@@ -57,13 +59,40 @@ def _git_wait(cmd, p):
     if rv != 0:
         raise GitError('%r returned %d' % (cmd, rv))
 
-def git_config_get(option, repo_dir=None):
-    cmd = (b'git', b'config', b'--get', option)
-    p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                         env=_gitenv(repo_dir=repo_dir))
-    r = p.stdout.read()
+def _git_exo(cmd, **kwargs):
+    kwargs['check'] = False
+    result = exo(cmd, **kwargs)
+    _, _, proc = result
+    if proc.returncode != 0:
+        raise GitError('%r returned %d' % (cmd, proc.returncode))
+    return result
+
+def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None):
+    assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used"
+    cmd = [b'git', b'config', b'--null']
+    if cfg_file:
+        cmd.extend([b'--file', cfg_file])
+    if opttype == 'int':
+        cmd.extend([b'--int'])
+    elif opttype == 'bool':
+        cmd.extend([b'--bool'])
+    else:
+        assert opttype is None
+    cmd.extend([b'--get', option])
+    env=None
+    if repo_dir:
+        env = _gitenv(repo_dir=repo_dir)
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env,
+                         close_fds=True)
+    # with --null, git writes out a trailing \0 after the value
+    r = p.stdout.read()[:-1]
     rc = p.wait()
     if rc == 0:
+        if opttype == 'int':
+            return int(r)
+        elif opttype == 'bool':
+            # git converts to 'true' or 'false'
+            return r == b'true'
         return r
     if rc != 1:
         raise GitError('%r returned %d' % (cmd, rc))
@@ -77,9 +106,26 @@ def parse_tz_offset(s):
         return - tz_off
     return tz_off
 
+def parse_commit_gpgsig(sig):
+    """Return the original signature bytes.
+
+    i.e. with the "gpgsig " header and the leading space character on
+    each continuation line removed.
+
+    """
+    if not sig:
+        return None
+    assert sig.startswith(b'gpgsig ')
+    sig = sig[7:]
+    return sig.replace(b'\n ', b'\n')
 
 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
 # Make sure that's authoritative.
+
+# See also
+# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt
+# The continuation lines have only one leading space.
+
 _start_end_char = br'[^ .,:;<>"\'\0\n]'
 _content_char = br'[^\0\n<>]'
 _safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
@@ -93,7 +139,7 @@ _mergetag_rx = br'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]
 _commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
-
+(?P<gpgsig>gpgsig .*\n(?: .*\n)*)?
 (?P<message>(?:.|\n)*)''' % (_parent_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
                              _safe_str_rx, _safe_str_rx, _tz_rx,
@@ -107,6 +153,7 @@ CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
                                        'author_sec', 'author_offset',
                                        'committer_name', 'committer_mail',
                                        'committer_sec', 'committer_offset',
+                                       'gpgsig',
                                        'message'])
 
 def parse_commit(content):
@@ -124,6 +171,7 @@ def parse_commit(content):
                       committer_mail=matches['committer_mail'],
                       committer_sec=int(matches['csec']),
                       committer_offset=parse_tz_offset(matches['ctz']),
+                      gpgsig=parse_commit_gpgsig(matches['gpgsig']),
                       message=matches['message'])
 
 
@@ -182,12 +230,6 @@ def repo_rel(path):
     return shorten_hash(path)
 
 
-def all_packdirs():
-    paths = [repo(b'objects/pack')]
-    paths += glob.glob(repo(b'index-cache/*/.'))
-    return paths
-
-
 def auto_midx(objdir):
     args = [path.exe(), b'midx', b'--auto', b'--dir', objdir]
     try:
@@ -244,8 +286,7 @@ def demangle_name(name, mode):
     elif name.endswith(b'.bupm'):
         return (name[:-5],
                 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
-    else:
-        return (name, BUP_NORMAL)
+    return (name, BUP_NORMAL)
 
 
 def calc_hash(type, content):
@@ -314,27 +355,6 @@ def _encode_packobj(type, content, compression_level=1):
     yield z.flush()
 
 
-def _encode_looseobj(type, content, compression_level=1):
-    z = zlib.compressobj(compression_level)
-    yield z.compress(b'%s %d\0' % (type, len(content)))
-    yield z.compress(content)
-    yield z.flush()
-
-
-def _decode_looseobj(buf):
-    assert(buf);
-    s = zlib.decompress(buf)
-    i = s.find(b'\0')
-    assert(i > 0)
-    l = s[:i].split(b' ')
-    type = l[0]
-    sz = int(l[1])
-    content = s[i+1:]
-    assert(type in _typemap)
-    assert(sz == len(content))
-    return (type, content)
-
-
 def _decode_packobj(buf):
     assert(buf)
     c = byte_int(buf[0])
@@ -352,10 +372,7 @@ def _decode_packobj(buf):
     return (type, zlib.decompress(buf[i+1:]))
 
 
-class PackIdx:
-    def __init__(self):
-        assert(0)
-
+class PackIdx(object):
     def find_offset(self, hash):
         """Get the offset of an object inside the index file."""
         idx = self._idx_from_hash(hash)
@@ -394,6 +411,8 @@ class PackIdx:
 class PackIdxV1(PackIdx):
     """Object representation of a Git pack index (version 1) file."""
     def __init__(self, filename, f):
+        super(PackIdxV1, self).__init__()
+        self.closed = False
         self.name = filename
         self.idxnames = [self.name]
         self.map = mmap_read(f)
@@ -405,6 +424,13 @@ class PackIdxV1(PackIdx):
         # Avoid slicing shatable for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -425,10 +451,22 @@ class PackIdxV1(PackIdx):
         for ofs in range(start, start + 24 * self.nsha, 24):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        self.closed = True
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
+    def __del__(self):
+        assert self.closed
+
 
 class PackIdxV2(PackIdx):
     """Object representation of a Git pack index (version 2) file."""
     def __init__(self, filename, f):
+        super(PackIdxV2, self).__init__()
+        self.closed = False
         self.name = filename
         self.idxnames = [self.name]
         self.map = mmap_read(f)
@@ -443,6 +481,13 @@ class PackIdxV2(PackIdx):
         # Avoid slicing this for individual hashes (very high overhead)
         self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
     def __len__(self):
         return int(self.nsha)  # int() from long for python 2
 
@@ -468,25 +513,63 @@ class PackIdxV2(PackIdx):
         for ofs in range(start, start + 20 * self.nsha, 20):
             yield self.map[ofs : ofs + 20]
 
+    def close(self):
+        self.closed = True
+        if self.map is not None:
+            self.shatable = None
+            self.map.close()
+            self.map = None
+
+    def __del__(self):
+        assert self.closed
+
 
 _mpi_count = 0
 class PackIdxList:
     def __init__(self, dir, ignore_midx=False):
         global _mpi_count
+        # Q: was this also intended to prevent opening multiple repos?
         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
         _mpi_count += 1
+        self.open = True
         self.dir = dir
         self.also = set()
         self.packs = []
         self.do_bloom = False
         self.bloom = None
         self.ignore_midx = ignore_midx
-        self.refresh()
+        try:
+            self.refresh()
+        except BaseException as ex:
+            with pending_raise(ex):
+                self.close()
 
-    def __del__(self):
+    def close(self):
         global _mpi_count
+        if not self.open:
+            assert _mpi_count == 0
+            return
         _mpi_count -= 1
-        assert(_mpi_count == 0)
+        assert _mpi_count == 0
+        self.also = None
+        self.bloom, bloom = None, self.bloom
+        self.packs, packs = None, self.packs
+        self.open = False
+        with ExitStack() as stack:
+            for pack in packs:
+                stack.enter_context(pack)
+            if bloom:
+                bloom.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with pending_raise(value, rethrow=False):
+            self.close()
+
+    def __del__(self):
+        assert not self.open
 
     def __iter__(self):
         return iter(idxmerge(self.packs))
@@ -567,7 +650,6 @@ class PackIdxList:
                                 broken = True
                         if broken:
                             mx.close()
-                            del mx
                             unlink(full)
                         else:
                             midxl.append(mx)
@@ -599,14 +681,27 @@ class PackIdxList:
                         continue
                     d[full] = ix
             bfull = os.path.join(self.dir, b'bup.bloom')
+            new_packs = set(d.values())
+            for p in self.packs:
+                if not p in new_packs:
+                    p.close()
+            new_packs = list(new_packs)
+            new_packs.sort(reverse=True, key=lambda x: len(x))
+            self.packs = new_packs
             if self.bloom is None and os.path.exists(bfull):
                 self.bloom = bloom.ShaBloom(bfull)
-            self.packs = list(set(d.values()))
-            self.packs.sort(reverse=True, key=lambda x: len(x))
-            if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
-                self.do_bloom = True
-            else:
-                self.bloom = None
+            try:
+                if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
+                    self.do_bloom = True
+                else:
+                    if self.bloom:
+                        self.bloom, bloom_tmp = None, self.bloom
+                        bloom_tmp.close()
+            except BaseException as ex:
+                with pending_raise(ex):
+                    if self.bloom:
+                        self.bloom.close()
+
         debug1('PackIdxList: using %d index%s.\n'
             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 
@@ -649,23 +744,45 @@ def idxmerge(idxlist, final_progress=True):
     return merge_iter(idxlist, 10024, pfunc, pfinal)
 
 
+def create_commit_blob(tree, parent,
+                       author, adate_sec, adate_tz,
+                       committer, cdate_sec, cdate_tz,
+                       msg):
+    if adate_tz is not None:
+        adate_str = _git_date_str(adate_sec, adate_tz)
+    else:
+        adate_str = _local_git_date_str(adate_sec)
+    if cdate_tz is not None:
+        cdate_str = _git_date_str(cdate_sec, cdate_tz)
+    else:
+        cdate_str = _local_git_date_str(cdate_sec)
+    l = []
+    if tree: l.append(b'tree %s' % hexlify(tree))
+    if parent: l.append(b'parent %s' % hexlify(parent))
+    if author: l.append(b'author %s %s' % (author, adate_str))
+    if committer: l.append(b'committer %s %s' % (committer, cdate_str))
+    l.append(b'')
+    l.append(msg)
+    return b'\n'.join(l)
+
 def _make_objcache():
     return PackIdxList(repo(b'objects/pack'))
 
 # bup-gc assumes that it can disable all PackWriter activities
 # (bloom/midx/cache) via the constructor and close() arguments.
 
-class PackWriter:
+class PackWriter(object):
     """Writes Git objects inside a pack file."""
     def __init__(self, objcache_maker=_make_objcache, compression_level=1,
                  run_midx=True, on_pack_finish=None,
                  max_pack_size=None, max_pack_objects=None, repo_dir=None):
+        self.closed = False
         self.repo_dir = repo_dir or repo()
         self.file = None
         self.parentfd = None
         self.count = 0
         self.outbytes = 0
-        self.filename = None
+        self.tmpdir = None
         self.idx = None
         self.objcache_maker = objcache_maker
         self.objcache = None
@@ -674,9 +791,8 @@ class PackWriter:
         self.on_pack_finish = on_pack_finish
         if not max_pack_size:
             max_pack_size = git_config_get(b'pack.packSizeLimit',
-                                           repo_dir=self.repo_dir)
-            if max_pack_size is not None:
-                max_pack_size = parse_num(max_pack_size)
+                                           repo_dir=self.repo_dir,
+                                           opttype='int')
             if not max_pack_size:
                 # larger packs slow down pruning
                 max_pack_size = 1000 * 1000 * 1000
@@ -685,35 +801,24 @@ class PackWriter:
         self.max_pack_objects = max_pack_objects if max_pack_objects \
                                 else max(1, self.max_pack_size // 5000)
 
-    def __del__(self):
-        self.close()
-
     def __enter__(self):
         return self
 
     def __exit__(self, type, value, traceback):
-        self.close()
+        with pending_raise(value, rethrow=False):
+            self.close()
 
     def _open(self):
         if not self.file:
-            objdir = dir = os.path.join(self.repo_dir, b'objects')
-            fd, name = tempfile.mkstemp(suffix=b'.pack', dir=objdir)
-            try:
-                self.file = os.fdopen(fd, 'w+b')
-            except:
-                os.close(fd)
-                raise
-            try:
-                self.parentfd = os.open(objdir, os.O_RDONLY)
-            except:
-                f = self.file
-                self.file = None
-                f.close()
-                raise
-            assert name.endswith(b'.pack')
-            self.filename = name[:-5]
-            self.file.write(b'PACK\0\0\0\2\0\0\0\0')
-            self.idx = list(list() for i in range(256))
+            with ExitStack() as err_stack:
+                objdir = dir = os.path.join(self.repo_dir, b'objects')
+                self.tmpdir = err_stack.enter_context(temp_dir(dir=objdir, prefix=b'pack-tmp-'))
+                self.file = err_stack.enter_context(open(self.tmpdir + b'/pack', 'w+b'))
+                self.parentfd = err_stack.enter_context(finalized(os.open(objdir, os.O_RDONLY),
+                                                                  lambda x: os.close(x)))
+                self.file.write(b'PACK\0\0\0\2\0\0\0\0')
+                self.idx = PackIdxV2Writer()
+                err_stack.pop_all()
 
     def _raw_write(self, datalist, sha):
         self._open()
@@ -738,14 +843,12 @@ class PackWriter:
     def _update_idx(self, sha, crc, size):
         assert(sha)
         if self.idx:
-            self.idx[byte_int(sha[0])].append((sha, crc,
-                                               self.file.tell() - size))
+            self.idx.add(sha, crc, self.file.tell() - size)
 
     def _write(self, sha, type, content):
         if verbose:
             log('>')
-        if not sha:
-            sha = calc_hash(type, content)
+        assert sha
         size, crc = self._raw_write(_encode_packobj(type, content,
                                                     self.compression_level),
                                     sha=sha)
@@ -754,12 +857,6 @@ class PackWriter:
             self.breakpoint()
         return sha
 
-    def breakpoint(self):
-        """Clear byte and object counts and return the last processed id."""
-        id = self._end(self.run_midx)
-        self.outbytes = self.count = 0
-        return id
-
     def _require_objcache(self):
         if self.objcache is None and self.objcache_maker:
             self.objcache = self.objcache_maker()
@@ -802,94 +899,92 @@ class PackWriter:
                    msg):
         """Create a commit object in the pack.  The date_sec values must be
         epoch-seconds, and if a tz is None, the local timezone is assumed."""
-        if adate_tz:
-            adate_str = _git_date_str(adate_sec, adate_tz)
-        else:
-            adate_str = _local_git_date_str(adate_sec)
-        if cdate_tz:
-            cdate_str = _git_date_str(cdate_sec, cdate_tz)
-        else:
-            cdate_str = _local_git_date_str(cdate_sec)
-        l = []
-        if tree: l.append(b'tree %s' % hexlify(tree))
-        if parent: l.append(b'parent %s' % hexlify(parent))
-        if author: l.append(b'author %s %s' % (author, adate_str))
-        if committer: l.append(b'committer %s %s' % (committer, cdate_str))
-        l.append(b'')
-        l.append(msg)
-        return self.maybe_write(b'commit', b'\n'.join(l))
-
-    def abort(self):
-        """Remove the pack file from disk."""
-        f = self.file
-        if f:
-            pfd = self.parentfd
-            self.file = None
-            self.parentfd = None
-            self.idx = None
-            try:
-                try:
-                    os.unlink(self.filename + b'.pack')
-                finally:
-                    f.close()
-            finally:
-                if pfd is not None:
-                    os.close(pfd)
-
-    def _end(self, run_midx=True):
-        f = self.file
-        if not f: return None
-        self.file = None
-        try:
-            self.objcache = None
-            idx = self.idx
-            self.idx = None
-
-            # update object count
-            f.seek(8)
-            cp = struct.pack('!i', self.count)
-            assert(len(cp) == 4)
-            f.write(cp)
-
-            # calculate the pack sha1sum
-            f.seek(0)
-            sum = Sha1()
-            for b in chunkyreader(f):
-                sum.update(b)
-            packbin = sum.digest()
-            f.write(packbin)
-            fdatasync(f.fileno())
-        finally:
-            f.close()
-
-        obj_list_sha = self._write_pack_idx_v2(self.filename + b'.idx', idx,
-                                               packbin)
-        nameprefix = os.path.join(self.repo_dir,
-                                  b'objects/pack/pack-' +  obj_list_sha)
-        if os.path.exists(self.filename + b'.map'):
-            os.unlink(self.filename + b'.map')
-        os.rename(self.filename + b'.pack', nameprefix + b'.pack')
-        os.rename(self.filename + b'.idx', nameprefix + b'.idx')
+        content = create_commit_blob(tree, parent,
+                                     author, adate_sec, adate_tz,
+                                     committer, cdate_sec, cdate_tz,
+                                     msg)
+        return self.maybe_write(b'commit', content)
+
+    def _end(self, run_midx=True, abort=False):
+        # Ignores run_midx during abort
+        self.tmpdir, tmpdir = None, self.tmpdir
+        self.parentfd, pfd, = None, self.parentfd
+        self.file, f = None, self.file
+        self.idx, idx = None, self.idx
         try:
-            os.fsync(self.parentfd)
-        finally:
-            os.close(self.parentfd)
+            with nullcontext_if_not(self.objcache), \
+                 finalized(pfd, lambda x: x is not None and os.close(x)), \
+                 nullcontext_if_not(f):
+                if abort or not f:
+                    return None
+
+                # update object count
+                f.seek(8)
+                cp = struct.pack('!i', self.count)
+                assert len(cp) == 4
+                f.write(cp)
+
+                # calculate the pack sha1sum
+                f.seek(0)
+                sum = Sha1()
+                for b in chunkyreader(f):
+                    sum.update(b)
+                packbin = sum.digest()
+                f.write(packbin)
+                f.flush()
+                fdatasync(f.fileno())
+                f.close()
 
-        if run_midx:
-            auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+                idx.write(tmpdir + b'/idx', packbin)
+                nameprefix = os.path.join(self.repo_dir,
+                                          b'objects/pack/pack-' +  hexlify(packbin))
+                os.rename(tmpdir + b'/pack', nameprefix + b'.pack')
+                os.rename(tmpdir + b'/idx', nameprefix + b'.idx')
+                os.fsync(pfd)
+                if run_midx:
+                    auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+                if self.on_pack_finish:
+                    self.on_pack_finish(nameprefix)
+                return nameprefix
+        finally:
+            if tmpdir:
+                rmtree(tmpdir)
+            # Must be last -- some of the code above depends on it
+            self.objcache = None
 
-        if self.on_pack_finish:
-            self.on_pack_finish(nameprefix)
+    def abort(self):
+        """Remove the pack file from disk."""
+        self.closed = True
+        self._end(abort=True)
 
-        return nameprefix
+    def breakpoint(self):
+        """Clear byte and object counts and return the last processed id."""
+        id = self._end(self.run_midx)
+        self.outbytes = self.count = 0
+        return id
 
     def close(self, run_midx=True):
         """Close the pack file and move it to its definitive path."""
+        self.closed = True
         return self._end(run_midx=run_midx)
 
-    def _write_pack_idx_v2(self, filename, idx, packbin):
+    def __del__(self):
+        assert self.closed
+
+
+class PackIdxV2Writer:
+    def __init__(self):
+        self.idx = list(list() for i in range(256))
+        self.count = 0
+
+    def add(self, sha, crc, offs):
+        assert(sha)
+        self.count += 1
+        self.idx[byte_int(sha[0])].append((sha, crc, offs))
+
+    def write(self, filename, packbin):
         ofs64_count = 0
-        for section in idx:
+        for section in self.idx:
             for entry in section:
                 if entry[2] >= 2**31:
                     ofs64_count += 1
@@ -903,7 +998,8 @@ class PackWriter:
             fdatasync(idx_f.fileno())
             idx_map = mmap_readwrite(idx_f, close=False)
             try:
-                count = _helpers.write_idx(filename, idx_map, idx, self.count)
+                count = _helpers.write_idx(filename, idx_map, self.idx,
+                                           self.count)
                 assert(count == self.count)
                 idx_map.flush()
             finally:
@@ -919,17 +1015,13 @@ class PackWriter:
             b = idx_f.read(8 + 4*256)
             idx_sum.update(b)
 
-            obj_list_sum = Sha1()
-            for b in chunkyreader(idx_f, 20*self.count):
+            for b in chunkyreader(idx_f, 20 * self.count):
                 idx_sum.update(b)
-                obj_list_sum.update(b)
-            namebase = hexlify(obj_list_sum.digest())
 
             for b in chunkyreader(idx_f):
                 idx_sum.update(b)
             idx_f.write(idx_sum.digest())
             fdatasync(idx_f.fileno())
-            return namebase
         finally:
             idx_f.close()
 
@@ -951,7 +1043,8 @@ def list_refs(patterns=None, repo_dir=None,
     argv.append(b'--')
     if patterns:
         argv.extend(patterns)
-    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
+    p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE,
+                         close_fds=True)
     out = p.stdout.read().strip()
     rv = p.wait()  # not fatal
     if rv:
@@ -1002,7 +1095,8 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None):
     p = subprocess.Popen(rev_list_invocation(ref_or_refs,
                                              format=format),
                          env=_gitenv(repo_dir),
-                         stdout = subprocess.PIPE)
+                         stdout = subprocess.PIPE,
+                         close_fds=True)
     if not format:
         for line in p.stdout:
             yield line.strip()
@@ -1022,17 +1116,6 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None):
         raise GitError('git rev-list returned error %d' % rv)
 
 
-def get_commit_dates(refs, repo_dir=None):
-    """Get the dates for the specified commit refs.  For now, every unique
-       string in refs must resolve to a different commit or this
-       function will fail."""
-    result = []
-    for ref in refs:
-        commit = get_commit_items(ref, cp(repo_dir))
-        result.append(commit.author_sec)
-    return result
-
-
 def rev_parse(committish, repo_dir=None):
     """Resolve the full hash for 'committish', if it exists.
 
@@ -1046,29 +1129,39 @@ def rev_parse(committish, repo_dir=None):
         debug2("resolved from ref: commit = %s\n" % hexlify(head))
         return head
 
-    pL = PackIdxList(repo(b'objects/pack', repo_dir=repo_dir))
-
     if len(committish) == 40:
         try:
             hash = unhexlify(committish)
         except TypeError:
             return None
 
-        if pL.exists(hash):
-            return hash
+        with PackIdxList(repo(b'objects/pack', repo_dir=repo_dir)) as pL:
+            if pL.exists(hash):
+                return hash
 
     return None
 
 
-def update_ref(refname, newval, oldval, repo_dir=None):
-    """Update a repository reference."""
-    if not oldval:
-        oldval = b''
+def update_ref(refname, newval, oldval, repo_dir=None, force=False):
+    """Update a repository reference.
+
+    With force=True, don't care about the previous ref (oldval);
+    with force=False oldval must be either a sha1 or None (for an
+    entirely new branch)
+    """
+    if force:
+        assert oldval is None
+        oldarg = []
+    elif not oldval:
+        oldarg = [b'']
+    else:
+        oldarg = [hexlify(oldval)]
     assert refname.startswith(b'refs/heads/') \
         or refname.startswith(b'refs/tags/')
     p = subprocess.Popen([b'git', b'update-ref', refname,
-                          hexlify(newval), hexlify(oldval)],
-                         env=_gitenv(repo_dir))
+                          hexlify(newval)] + oldarg,
+                         env=_gitenv(repo_dir),
+                         close_fds=True)
     _git_wait(b'git update-ref', p)
 
 
@@ -1077,29 +1170,29 @@ def delete_ref(refname, oldvalue=None):
     assert refname.startswith(b'refs/')
     oldvalue = [] if not oldvalue else [oldvalue]
     p = subprocess.Popen([b'git', b'update-ref', b'-d', refname] + oldvalue,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git update-ref', p)
 
 
-def guess_repo(path=None):
-    """Set the path value in the global variable "repodir".
-    This makes bup look for an existing bup repository, but not fail if a
-    repository doesn't exist. Usually, if you are interacting with a bup
-    repository, you would not be calling this function but using
-    check_repo_or_die().
+def guess_repo():
+    """Return the global repodir or BUP_DIR when either is set, or ~/.bup.
+    Usually, if you are interacting with a bup repository, you would
+    not be calling this function but using check_repo_or_die().
+
     """
-    global repodir
-    if path:
-        repodir = path
-    if not repodir:
-        repodir = environ.get(b'BUP_DIR')
-        if not repodir:
-            repodir = os.path.expanduser(b'~/.bup')
+    if repodir:
+        return repodir
+    repo = environ.get(b'BUP_DIR')
+    if not repo:
+        repo = os.path.expanduser(b'~/.bup')
+    return repo
 
 
 def init_repo(path=None):
     """Create the Git bare repository for bup in a given path."""
-    guess_repo(path)
+    global repodir
+    repodir = path or guess_repo()
     d = repo()  # appends a / to the path
     parent = os.path.dirname(os.path.dirname(d))
     if parent and not os.path.exists(parent):
@@ -1108,22 +1201,24 @@ def init_repo(path=None):
     if os.path.exists(d) and not os.path.isdir(os.path.join(d, b'.')):
         raise GitError('"%s" exists but is not a directory\n' % path_msg(d))
     p = subprocess.Popen([b'git', b'--bare', b'init'], stdout=sys.stderr,
-                         env=_gitenv())
+                         env=_gitenv(),
+                         close_fds=True)
     _git_wait('git init', p)
     # Force the index version configuration in order to ensure bup works
     # regardless of the version of the installed Git binary.
     p = subprocess.Popen([b'git', b'config', b'pack.indexVersion', '2'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
     # Enable the reflog
     p = subprocess.Popen([b'git', b'config', b'core.logAllRefUpdates', b'true'],
-                         stdout=sys.stderr, env=_gitenv())
+                         stdout=sys.stderr, env=_gitenv(), close_fds=True)
     _git_wait('git config', p)
 
 
 def check_repo_or_die(path=None):
     """Check to see if a bup repository probably exists, and abort if not."""
-    guess_repo(path)
+    global repodir
+    repodir = path or guess_repo()
     top = repo()
     pst = stat_if_exists(top + b'/objects/pack')
     if pst and stat.S_ISDIR(pst.st_mode):
@@ -1138,84 +1233,80 @@ def check_repo_or_die(path=None):
     sys.exit(14)
 
 
-_ver = None
-def ver():
-    """Get Git's version and ensure a usable version is installed.
+def is_suitable_git(ver_str):
+    if not ver_str.startswith(b'git version '):
+        return 'unrecognized'
+    ver_str = ver_str[len(b'git version '):]
+    if ver_str.startswith(b'0.'):
+        return 'insufficient'
+    if ver_str.startswith(b'1.'):
+        if re.match(br'1\.[012345]rc', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.[01234]\.', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.5\.[012345]($|\.)', ver_str):
+            return 'insufficient'
+        if re.match(br'1\.5\.6-rc', ver_str):
+            return 'insufficient'
+        return 'suitable'
+    if re.match(br'[0-9]+(\.|$)?', ver_str):
+        return 'suitable'
+    sys.exit(13)
+
+_git_great = None
+
+def require_suitable_git(ver_str=None):
+    """Raise GitError if the version of git isn't suitable.
+
+    Rely on ver_str when provided, rather than invoking the git in the
+    path.
 
-    The returned version is formatted as an ordered tuple with each position
-    representing a digit in the version tag. For example, the following tuple
-    would represent version 1.6.6.9:
-
-        (1, 6, 6, 9)
     """
-    global _ver
-    if not _ver:
-        p = subprocess.Popen([b'git', b'--version'], stdout=subprocess.PIPE)
-        gvs = p.stdout.read()
-        _git_wait('git --version', p)
-        m = re.match(br'git version (\S+.\S+)', gvs)
-        if not m:
-            raise GitError('git --version weird output: %r' % gvs)
-        _ver = tuple(int(x) for x in m.group(1).split(b'.'))
-    needed = (1, 5, 3, 1)
-    if _ver < needed:
-        raise GitError('git version %s or higher is required; you have %s'
-                       % ('.'.join(str(x) for x in needed),
-                          '.'.join(str(x) for x in _ver)))
-    return _ver
-
-
-class _AbortableIter:
-    def __init__(self, it, onabort = None):
-        self.it = it
-        self.onabort = onabort
-        self.done = None
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        try:
-            return next(self.it)
-        except StopIteration as e:
-            self.done = True
-            raise
-        except:
-            self.abort()
-            raise
-
-    next = __next__
-
-    def abort(self):
-        """Abort iteration and call the abortion callback, if needed."""
-        if not self.done:
-            self.done = True
-            if self.onabort:
-                self.onabort()
-
-    def __del__(self):
-        self.abort()
+    global _git_great
+    if _git_great is not None:
+        return
+    if environ.get(b'BUP_GIT_VERSION_IS_FINE', b'').lower() \
+       in (b'yes', b'true', b'1'):
+        _git_great = True
+        return
+    if not ver_str:
+        ver_str, _, _ = _git_exo([b'git', b'--version'])
+    status = is_suitable_git(ver_str)
+    if status == 'unrecognized':
+        raise GitError('Unexpected git --version output: %r' % ver_str)
+    if status == 'insufficient':
+        log('error: git version must be at least 1.5.6\n')
+        sys.exit(1)
+    if status == 'suitable':
+        _git_great = True
+        return
+    assert False
 
 
 class CatPipe:
     """Link to 'git cat-file' that is used to retrieve blob data."""
     def __init__(self, repo_dir = None):
+        require_suitable_git()
         self.repo_dir = repo_dir
-        wanted = (1, 5, 6)
-        if ver() < wanted:
-            log('error: git version must be at least 1.5.6\n')
-            sys.exit(1)
         self.p = self.inprogress = None
 
-    def _abort(self):
-        if self.p:
-            self.p.stdout.close()
-            self.p.stdin.close()
-        self.p = None
+    def close(self, wait=False):
+        self.p, p = None, self.p
         self.inprogress = None
+        if p:
+            try:
+                p.stdout.close()
+            finally:
+                # This will handle pending exceptions correctly once
+                # we drop py2
+                p.stdin.close()
+        if wait:
+            p.wait()
+            return p.returncode
+        return None
 
     def restart(self):
-        self._abort()
+        self.close()
         self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'],
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE,
@@ -1243,6 +1334,9 @@ class CatPipe:
         self.p.stdin.write(ref + b'\n')
         self.p.stdin.flush()
         hdr = self.p.stdout.readline()
+        if not hdr:
+            raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)'
+                           % (ref, self.p.poll() or 'none'))
         if hdr.endswith(b' missing\n'):
             self.inprogress = None
             yield None, None, None
@@ -1252,18 +1346,17 @@ class CatPipe:
             raise GitError('expected object (id, type, size), got %r' % info)
         oidx, typ, size = info
         size = int(size)
-        it = _AbortableIter(chunkyreader(self.p.stdout, size),
-                            onabort=self._abort)
         try:
+            it = chunkyreader(self.p.stdout, size)
             yield oidx, typ, size
-            for blob in it:
+            for blob in chunkyreader(self.p.stdout, size):
                 yield blob
             readline_result = self.p.stdout.readline()
             assert readline_result == b'\n'
             self.inprogress = None
-        except Exception as e:
-            it.abort()
-            raise
+        except Exception as ex:
+            with pending_raise(ex):
+                self.close()
 
     def _join(self, it):
         _, typ, _ = next(it)
@@ -1309,6 +1402,13 @@ def cp(repo_dir=None):
     return cp
 
 
+def close_catpipes():
+    # FIXME: chain exceptions
+    while _cp:
+        _, cp = _cp.popitem()
+        cp.close(wait=True)
+
+
 def tags(repo_dir = None):
     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
     tags = {}
@@ -1324,7 +1424,7 @@ def tags(repo_dir = None):
 class MissingObject(KeyError):
     def __init__(self, oid):
         self.oid = oid
-        KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
+        KeyError.__init__(self, 'object %r is missing' % hexlify(oid))
 
 
 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',