X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=lib%2Fbup%2Fgit.py;h=dbfe78f6e81d48b7e8a21bdad7b682899ea91b01;hb=aa77c6b33c6d565801370aed96ad7d0075a073a3;hp=8777a58df06afd612ef9574327cc4a54c3e805b6;hpb=4443772e4a6cbce5479554e881830c500d27b17c;p=bup.git diff --git a/lib/bup/git.py b/lib/bup/git.py index 8777a58..dbfe78f 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -4,14 +4,13 @@ interact with the Git data structures. """ from __future__ import absolute_import, print_function -import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob +import os, sys, zlib, subprocess, struct, stat, re, tempfile, glob from array import array from binascii import hexlify, unhexlify from collections import namedtuple from itertools import islice -from numbers import Integral -from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat +from bup import _helpers, hashsplit, path, midx, bloom, xstat from bup.compat import (buffer, byte_int, bytes_from_byte, bytes_from_uint, environ, @@ -22,15 +21,13 @@ from bup.io import path_msg from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2, exo, fdatasync, - hostname, localtime, log, + log, merge_dict, merge_iter, mmap_read, mmap_readwrite, - parse_num, progress, qprogress, stat_if_exists, unlink, utc_offset_str) -from bup.pwdgrp import username, userfullname verbose = 0 @@ -66,13 +63,32 @@ def _git_exo(cmd, **kwargs): raise GitError('%r returned %d' % (cmd, proc.returncode)) return result -def git_config_get(option, repo_dir=None): - cmd = (b'git', b'config', b'--get', option) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, - env=_gitenv(repo_dir=repo_dir)) - r = p.stdout.read() +def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None): + assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used" + cmd = [b'git', b'config', b'--null'] + if cfg_file: + cmd.extend([b'--file', cfg_file]) + if opttype == 'int': + cmd.extend([b'--int']) + elif opttype == 'bool': + cmd.extend([b'--bool']) + else: + assert opttype is None + cmd.extend([b'--get', option]) + env=None + if repo_dir: + env = _gitenv(repo_dir=repo_dir) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env, + close_fds=True) + # with --null, git writes out a trailing \0 after the value + r = p.stdout.read()[:-1] rc = p.wait() if rc == 0: + if opttype == 'int': + return int(r) + elif opttype == 'bool': + # git converts to 'true' or 'false' + return r == b'true' return r if rc != 1: raise GitError('%r returned %d' % (cmd, rc)) @@ -191,12 +207,6 @@ def repo_rel(path): return shorten_hash(path) -def all_packdirs(): - paths = [repo(b'objects/pack')] - paths += glob.glob(repo(b'index-cache/*/.')) - return paths - - def auto_midx(objdir): args = [path.exe(), b'midx', b'--auto', b'--dir', objdir] try: @@ -253,8 +263,7 @@ def demangle_name(name, mode): elif name.endswith(b'.bupm'): return (name[:-5], BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL) - else: - return (name, BUP_NORMAL) + return (name, BUP_NORMAL) def calc_hash(type, content): @@ -323,27 +332,6 @@ def _encode_packobj(type, content, compression_level=1): yield z.flush() -def _encode_looseobj(type, content, compression_level=1): - z = zlib.compressobj(compression_level) - yield z.compress(b'%s %d\0' % (type, len(content))) - yield z.compress(content) - yield z.flush() - - -def _decode_looseobj(buf): - assert(buf); - s = zlib.decompress(buf) - i = s.find(b'\0') - assert(i > 0) - l = s[:i].split(b' ') - type = l[0] - sz = int(l[1]) - content = s[i+1:] - assert(type in _typemap) - assert(sz == len(content)) - return (type, content) - - def _decode_packobj(buf): assert(buf) c = byte_int(buf[0]) @@ -414,6 +402,12 @@ class PackIdxV1(PackIdx): # Avoid slicing shatable for individual hashes (very high overhead) self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24) + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + def __len__(self): return int(self.nsha) # int() from long for python 2 @@ -434,6 +428,12 @@ class PackIdxV1(PackIdx): for ofs in range(start, start + 24 * self.nsha, 24): yield self.map[ofs : ofs + 20] + def close(self): + if self.map is not None: + self.shatable = None + self.map.close() + self.map = None + class PackIdxV2(PackIdx): """Object representation of a Git pack index (version 2) file.""" @@ -452,6 +452,12 @@ class PackIdxV2(PackIdx): # Avoid slicing this for individual hashes (very high overhead) self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20) + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + def __len__(self): return int(self.nsha) # int() from long for python 2 @@ -477,6 +483,12 @@ class PackIdxV2(PackIdx): for ofs in range(start, start + 20 * self.nsha, 20): yield self.map[ofs : ofs + 20] + def close(self): + if self.map is not None: + self.shatable = None + self.map.close() + self.map = None + _mpi_count = 0 class PackIdxList: @@ -658,6 +670,28 @@ def idxmerge(idxlist, final_progress=True): return merge_iter(idxlist, 10024, pfunc, pfinal) +def create_commit_blob(tree, parent, + author, adate_sec, adate_tz, + committer, cdate_sec, cdate_tz, + msg): + if adate_tz is not None: + adate_str = _git_date_str(adate_sec, adate_tz) + else: + adate_str = _local_git_date_str(adate_sec) + if cdate_tz is not None: + cdate_str = _git_date_str(cdate_sec, cdate_tz) + else: + cdate_str = _local_git_date_str(cdate_sec) + l = [] + if tree: l.append(b'tree %s' % hexlify(tree)) + if parent: l.append(b'parent %s' % hexlify(parent)) + if author: l.append(b'author %s %s' % (author, adate_str)) + if committer: l.append(b'committer %s %s' % (committer, cdate_str)) + l.append(b'') + l.append(msg) + return b'\n'.join(l) + + def _make_objcache(): return PackIdxList(repo(b'objects/pack')) @@ -683,9 +717,8 @@ class PackWriter: self.on_pack_finish = on_pack_finish if not max_pack_size: max_pack_size = git_config_get(b'pack.packSizeLimit', - repo_dir=self.repo_dir) - if max_pack_size is not None: - max_pack_size = parse_num(max_pack_size) + repo_dir=self.repo_dir, + opttype='int') if not max_pack_size: # larger packs slow down pruning max_pack_size = 1000 * 1000 * 1000 @@ -810,22 +843,11 @@ class PackWriter: msg): """Create a commit object in the pack. The date_sec values must be epoch-seconds, and if a tz is None, the local timezone is assumed.""" - if adate_tz is not None: - adate_str = _git_date_str(adate_sec, adate_tz) - else: - adate_str = _local_git_date_str(adate_sec) - if cdate_tz is not None: - cdate_str = _git_date_str(cdate_sec, cdate_tz) - else: - cdate_str = _local_git_date_str(cdate_sec) - l = [] - if tree: l.append(b'tree %s' % hexlify(tree)) - if parent: l.append(b'parent %s' % hexlify(parent)) - if author: l.append(b'author %s %s' % (author, adate_str)) - if committer: l.append(b'committer %s %s' % (committer, cdate_str)) - l.append(b'') - l.append(msg) - return self.maybe_write(b'commit', b'\n'.join(l)) + content = create_commit_blob(tree, parent, + author, adate_sec, adate_tz, + committer, cdate_sec, cdate_tz, + msg) + return self.maybe_write(b'commit', content) def abort(self): """Remove the pack file from disk.""" @@ -870,9 +892,9 @@ class PackWriter: finally: f.close() - obj_list_sha = idx.write(self.filename + b'.idx', packbin) + idx.write(self.filename + b'.idx', packbin) nameprefix = os.path.join(self.repo_dir, - b'objects/pack/pack-' + obj_list_sha) + b'objects/pack/pack-' + hexlify(packbin)) if os.path.exists(self.filename + b'.map'): os.unlink(self.filename + b'.map') os.rename(self.filename + b'.pack', nameprefix + b'.pack') @@ -938,17 +960,13 @@ class PackIdxV2Writer: b = idx_f.read(8 + 4*256) idx_sum.update(b) - obj_list_sum = Sha1() for b in chunkyreader(idx_f, 20 * self.count): idx_sum.update(b) - obj_list_sum.update(b) - namebase = hexlify(obj_list_sum.digest()) for b in chunkyreader(idx_f): idx_sum.update(b) idx_f.write(idx_sum.digest()) fdatasync(idx_f.fileno()) - return namebase finally: idx_f.close() @@ -970,7 +988,8 @@ def list_refs(patterns=None, repo_dir=None, argv.append(b'--') if patterns: argv.extend(patterns) - p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE) + p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE, + close_fds=True) out = p.stdout.read().strip() rv = p.wait() # not fatal if rv: @@ -1021,7 +1040,8 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None): p = subprocess.Popen(rev_list_invocation(ref_or_refs, format=format), env=_gitenv(repo_dir), - stdout = subprocess.PIPE) + stdout = subprocess.PIPE, + close_fds=True) if not format: for line in p.stdout: yield line.strip() @@ -1041,17 +1061,6 @@ def rev_list(ref_or_refs, parse=None, format=None, repo_dir=None): raise GitError('git rev-list returned error %d' % rv) -def get_commit_dates(refs, repo_dir=None): - """Get the dates for the specified commit refs. For now, every unique - string in refs must resolve to a different commit or this - function will fail.""" - result = [] - for ref in refs: - commit = get_commit_items(ref, cp(repo_dir)) - result.append(commit.author_sec) - return result - - def rev_parse(committish, repo_dir=None): """Resolve the full hash for 'committish', if it exists. @@ -1087,7 +1096,8 @@ def update_ref(refname, newval, oldval, repo_dir=None): or refname.startswith(b'refs/tags/') p = subprocess.Popen([b'git', b'update-ref', refname, hexlify(newval), hexlify(oldval)], - env=_gitenv(repo_dir)) + env=_gitenv(repo_dir), + close_fds=True) _git_wait(b'git update-ref', p) @@ -1096,7 +1106,8 @@ def delete_ref(refname, oldvalue=None): assert refname.startswith(b'refs/') oldvalue = [] if not oldvalue else [oldvalue] p = subprocess.Popen([b'git', b'update-ref', b'-d', refname] + oldvalue, - env=_gitenv()) + env=_gitenv(), + close_fds=True) _git_wait('git update-ref', p) @@ -1127,16 +1138,17 @@ def init_repo(path=None): if os.path.exists(d) and not os.path.isdir(os.path.join(d, b'.')): raise GitError('"%s" exists but is not a directory\n' % path_msg(d)) p = subprocess.Popen([b'git', b'--bare', b'init'], stdout=sys.stderr, - env=_gitenv()) + env=_gitenv(), + close_fds=True) _git_wait('git init', p) # Force the index version configuration in order to ensure bup works # regardless of the version of the installed Git binary. p = subprocess.Popen([b'git', b'config', b'pack.indexVersion', '2'], - stdout=sys.stderr, env=_gitenv()) + stdout=sys.stderr, env=_gitenv(), close_fds=True) _git_wait('git config', p) # Enable the reflog p = subprocess.Popen([b'git', b'config', b'core.logAllRefUpdates', b'true'], - stdout=sys.stderr, env=_gitenv()) + stdout=sys.stderr, env=_gitenv(), close_fds=True) _git_wait('git config', p) @@ -1246,15 +1258,20 @@ class CatPipe: self.repo_dir = repo_dir self.p = self.inprogress = None - def _abort(self): - if self.p: - self.p.stdout.close() - self.p.stdin.close() + def close(self, wait=False): + p = self.p + if p: + p.stdout.close() + p.stdin.close() self.p = None self.inprogress = None + if wait: + p.wait() + return p.returncode + return None def restart(self): - self._abort() + self.close() self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -1282,6 +1299,9 @@ class CatPipe: self.p.stdin.write(ref + b'\n') self.p.stdin.flush() hdr = self.p.stdout.readline() + if not hdr: + raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)' + % (ref, self.p.poll() or 'none')) if hdr.endswith(b' missing\n'): self.inprogress = None yield None, None, None @@ -1292,7 +1312,7 @@ class CatPipe: oidx, typ, size = info size = int(size) it = _AbortableIter(chunkyreader(self.p.stdout, size), - onabort=self._abort) + onabort=self.close) try: yield oidx, typ, size for blob in it: @@ -1348,6 +1368,13 @@ def cp(repo_dir=None): return cp +def close_catpipes(): + # FIXME: chain exceptions + while _cp: + _, cp = _cp.popitem() + cp.close(wait=True) + + def tags(repo_dir = None): """Return a dictionary of all tags in the form {hash: [tag_names, ...]}.""" tags = {}