X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=lib%2Fbup%2Fgit.py;h=4c1a95c52a985e5eaee1bb36ef4ca61651559c78;hb=20db61170c8a8fd4b452ec29f6a3133385c4fdad;hp=1a3ff1788e7aafad6f58924e3355156251307b97;hpb=f8627f4915c832c61f57df962f989b8cd6cbbfe6;p=bup.git diff --git a/lib/bup/git.py b/lib/bup/git.py index 1a3ff17..4c1a95c 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -4,33 +4,32 @@ interact with the Git data structures. """ from __future__ import absolute_import, print_function -import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob +import os, sys, zlib, subprocess, struct, stat, re, tempfile, glob from array import array from binascii import hexlify, unhexlify from collections import namedtuple from itertools import islice -from numbers import Integral -from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat +from bup import _helpers, hashsplit, path, midx, bloom, xstat from bup.compat import (buffer, byte_int, bytes_from_byte, bytes_from_uint, environ, items, + pending_raise, range, reraise) from bup.io import path_msg from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2, exo, fdatasync, - hostname, localtime, log, + finalized, + log, merge_dict, merge_iter, mmap_read, mmap_readwrite, - parse_num, progress, qprogress, stat_if_exists, unlink, utc_offset_str) -from bup.pwdgrp import username, userfullname verbose = 0 @@ -66,14 +65,32 @@ def _git_exo(cmd, **kwargs): raise GitError('%r returned %d' % (cmd, proc.returncode)) return result -def git_config_get(option, repo_dir=None): - cmd = (b'git', b'config', b'--get', option) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, - env=_gitenv(repo_dir=repo_dir), +def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None): + assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used" + cmd = [b'git', b'config', b'--null'] + if cfg_file: + cmd.extend([b'--file', cfg_file]) + if opttype == 'int': + cmd.extend([b'--int']) + elif opttype == 'bool': + cmd.extend([b'--bool']) + else: + assert opttype is None + cmd.extend([b'--get', option]) + env=None + if repo_dir: + env = _gitenv(repo_dir=repo_dir) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env, close_fds=True) - r = p.stdout.read() + # with --null, git writes out a trailing \0 after the value + r = p.stdout.read()[:-1] rc = p.wait() if rc == 0: + if opttype == 'int': + return int(r) + elif opttype == 'bool': + # git converts to 'true' or 'false' + return r == b'true' return r if rc != 1: raise GitError('%r returned %d' % (cmd, rc)) @@ -87,9 +104,26 @@ def parse_tz_offset(s): return - tz_off return tz_off +def parse_commit_gpgsig(sig): + """Return the original signature bytes. + + i.e. with the "gpgsig " header and the leading space character on + each continuation line removed. + + """ + if not sig: + return None + assert sig.startswith(b'gpgsig ') + sig = sig[7:] + return sig.replace(b'\n ', b'\n') # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt # Make sure that's authoritative. + +# See also +# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt +# The continuation lines have only one leading space. + _start_end_char = br'[^ .,:;<>"\'\0\n]' _content_char = br'[^\0\n<>]' _safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \ @@ -103,7 +137,7 @@ _mergetag_rx = br'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n] _commit_rx = re.compile(br'''tree (?P[abcdefABCDEF0123456789]{40}) (?P%s*)author (?P%s) <(?P%s)> (?P\d+) (?P%s) committer (?P%s) <(?P%s)> (?P\d+) (?P%s)(?P%s?) - +(?Pgpgsig .*\n(?: .*\n)*)? (?P(?:.|\n)*)''' % (_parent_rx, _safe_str_rx, _safe_str_rx, _tz_rx, _safe_str_rx, _safe_str_rx, _tz_rx, @@ -117,6 +151,7 @@ CommitInfo = namedtuple('CommitInfo', ['tree', 'parents', 'author_sec', 'author_offset', 'committer_name', 'committer_mail', 'committer_sec', 'committer_offset', + 'gpgsig', 'message']) def parse_commit(content): @@ -134,6 +169,7 @@ def parse_commit(content): committer_mail=matches['committer_mail'], committer_sec=int(matches['csec']), committer_offset=parse_tz_offset(matches['ctz']), + gpgsig=parse_commit_gpgsig(matches['gpgsig']), message=matches['message']) @@ -248,8 +284,7 @@ def demangle_name(name, mode): elif name.endswith(b'.bupm'): return (name[:-5], BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL) - else: - return (name, BUP_NORMAL) + return (name, BUP_NORMAL) def calc_hash(type, content): @@ -392,7 +427,8 @@ class PackIdxV1(PackIdx): return self def __exit__(self, type, value, traceback): - self.close() + with pending_raise(value, rethrow=False): + self.close() def __len__(self): return int(self.nsha) # int() from long for python 2 @@ -442,7 +478,8 @@ class PackIdxV2(PackIdx): return self def __exit__(self, type, value, traceback): - self.close() + with pending_raise(value, rethrow=False): + self.close() def __len__(self): return int(self.nsha) # int() from long for python 2 @@ -656,6 +693,28 @@ def idxmerge(idxlist, final_progress=True): return merge_iter(idxlist, 10024, pfunc, pfinal) +def create_commit_blob(tree, parent, + author, adate_sec, adate_tz, + committer, cdate_sec, cdate_tz, + msg): + if adate_tz is not None: + adate_str = _git_date_str(adate_sec, adate_tz) + else: + adate_str = _local_git_date_str(adate_sec) + if cdate_tz is not None: + cdate_str = _git_date_str(cdate_sec, cdate_tz) + else: + cdate_str = _local_git_date_str(cdate_sec) + l = [] + if tree: l.append(b'tree %s' % hexlify(tree)) + if parent: l.append(b'parent %s' % hexlify(parent)) + if author: l.append(b'author %s %s' % (author, adate_str)) + if committer: l.append(b'committer %s %s' % (committer, cdate_str)) + l.append(b'') + l.append(msg) + return b'\n'.join(l) + + def _make_objcache(): return PackIdxList(repo(b'objects/pack')) @@ -681,9 +740,8 @@ class PackWriter: self.on_pack_finish = on_pack_finish if not max_pack_size: max_pack_size = git_config_get(b'pack.packSizeLimit', - repo_dir=self.repo_dir) - if max_pack_size is not None: - max_pack_size = parse_num(max_pack_size) + repo_dir=self.repo_dir, + opttype='int') if not max_pack_size: # larger packs slow down pruning max_pack_size = 1000 * 1000 * 1000 @@ -692,14 +750,12 @@ class PackWriter: self.max_pack_objects = max_pack_objects if max_pack_objects \ else max(1, self.max_pack_size // 5000) - def __del__(self): - self.close() - def __enter__(self): return self def __exit__(self, type, value, traceback): - self.close() + with pending_raise(value, rethrow=False): + self.close() def _open(self): if not self.file: @@ -760,12 +816,6 @@ class PackWriter: self.breakpoint() return sha - def breakpoint(self): - """Clear byte and object counts and return the last processed id.""" - id = self._end(self.run_midx) - self.outbytes = self.count = 0 - return id - def _require_objcache(self): if self.objcache is None and self.objcache_maker: self.objcache = self.objcache_maker() @@ -808,53 +858,32 @@ class PackWriter: msg): """Create a commit object in the pack. The date_sec values must be epoch-seconds, and if a tz is None, the local timezone is assumed.""" - if adate_tz is not None: - adate_str = _git_date_str(adate_sec, adate_tz) - else: - adate_str = _local_git_date_str(adate_sec) - if cdate_tz is not None: - cdate_str = _git_date_str(cdate_sec, cdate_tz) - else: - cdate_str = _local_git_date_str(cdate_sec) - l = [] - if tree: l.append(b'tree %s' % hexlify(tree)) - if parent: l.append(b'parent %s' % hexlify(parent)) - if author: l.append(b'author %s %s' % (author, adate_str)) - if committer: l.append(b'committer %s %s' % (committer, cdate_str)) - l.append(b'') - l.append(msg) - return self.maybe_write(b'commit', b'\n'.join(l)) + content = create_commit_blob(tree, parent, + author, adate_sec, adate_tz, + committer, cdate_sec, cdate_tz, + msg) + return self.maybe_write(b'commit', content) + + def _end(self, run_midx=True, abort=False): + # Ignores run_midx during abort + if not self.file: + return None + self.file, f = None, self.file + self.idx, idx = None, self.idx + self.parentfd, pfd, = None, self.parentfd + self.objcache = None - def abort(self): - """Remove the pack file from disk.""" - f = self.file - if f: - pfd = self.parentfd - self.file = None - self.parentfd = None - self.idx = None - try: - try: - os.unlink(self.filename + b'.pack') - finally: - f.close() - finally: - if pfd is not None: - os.close(pfd) + with finalized(pfd, lambda x: x is not None and os.close(x)), \ + f: - def _end(self, run_midx=True): - f = self.file - if not f: return None - self.file = None - try: - self.objcache = None - idx = self.idx - self.idx = None + if abort: + os.unlink(self.filename + b'.pack') + return None # update object count f.seek(8) cp = struct.pack('!i', self.count) - assert(len(cp) == 4) + assert len(cp) == 4 f.write(cp) # calculate the pack sha1sum @@ -864,29 +893,33 @@ class PackWriter: sum.update(b) packbin = sum.digest() f.write(packbin) + f.flush() fdatasync(f.fileno()) - finally: f.close() - obj_list_sha = idx.write(self.filename + b'.idx', packbin) - nameprefix = os.path.join(self.repo_dir, - b'objects/pack/pack-' + obj_list_sha) - if os.path.exists(self.filename + b'.map'): - os.unlink(self.filename + b'.map') - os.rename(self.filename + b'.pack', nameprefix + b'.pack') - os.rename(self.filename + b'.idx', nameprefix + b'.idx') - try: - os.fsync(self.parentfd) - finally: - os.close(self.parentfd) - - if run_midx: - auto_midx(os.path.join(self.repo_dir, b'objects/pack')) + idx.write(self.filename + b'.idx', packbin) + nameprefix = os.path.join(self.repo_dir, + b'objects/pack/pack-' + hexlify(packbin)) + if os.path.exists(self.filename + b'.map'): + os.unlink(self.filename + b'.map') + os.rename(self.filename + b'.pack', nameprefix + b'.pack') + os.rename(self.filename + b'.idx', nameprefix + b'.idx') + os.fsync(pfd) + if run_midx: + auto_midx(os.path.join(self.repo_dir, b'objects/pack')) + if self.on_pack_finish: + self.on_pack_finish(nameprefix) + return nameprefix - if self.on_pack_finish: - self.on_pack_finish(nameprefix) + def abort(self): + """Remove the pack file from disk.""" + self._end(abort=True) - return nameprefix + def breakpoint(self): + """Clear byte and object counts and return the last processed id.""" + id = self._end(self.run_midx) + self.outbytes = self.count = 0 + return id def close(self, run_midx=True): """Close the pack file and move it to its definitive path.""" @@ -936,17 +969,13 @@ class PackIdxV2Writer: b = idx_f.read(8 + 4*256) idx_sum.update(b) - obj_list_sum = Sha1() for b in chunkyreader(idx_f, 20 * self.count): idx_sum.update(b) - obj_list_sum.update(b) - namebase = hexlify(obj_list_sum.digest()) for b in chunkyreader(idx_f): idx_sum.update(b) idx_f.write(idx_sum.digest()) fdatasync(idx_f.fileno()) - return namebase finally: idx_f.close() @@ -1041,17 +1070,6 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None): raise GitError('git rev-list returned error %d' % rv) -def get_commit_dates(refs, repo_dir=None): - """Get the dates for the specified commit refs. For now, every unique - string in refs must resolve to a different commit or this - function will fail.""" - result = [] - for ref in refs: - commit = get_commit_items(ref, cp(repo_dir)) - result.append(commit.author_sec) - return result - - def rev_parse(committish, repo_dir=None): """Resolve the full hash for 'committish', if it exists. @@ -1259,6 +1277,7 @@ class CatPipe: if wait: p.wait() return p.returncode + return None def restart(self): self.close() @@ -1289,6 +1308,9 @@ class CatPipe: self.p.stdin.write(ref + b'\n') self.p.stdin.flush() hdr = self.p.stdout.readline() + if not hdr: + raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)' + % (ref, self.p.poll() or 'none')) if hdr.endswith(b' missing\n'): self.inprogress = None yield None, None, None