"""
from __future__ import absolute_import, print_function
-import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
+import os, sys, zlib, subprocess, struct, stat, re, tempfile, glob
from array import array
from binascii import hexlify, unhexlify
from collections import namedtuple
from itertools import islice
-from numbers import Integral
-from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
+from bup import _helpers, hashsplit, path, midx, bloom, xstat
from bup.compat import (buffer,
byte_int, bytes_from_byte, bytes_from_uint,
environ,
- items,
- range,
+ ExitStack,
+ pending_raise,
reraise)
from bup.io import path_msg
from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
exo,
fdatasync,
- hostname, localtime, log,
+ finalized,
+ log,
merge_dict,
merge_iter,
mmap_read, mmap_readwrite,
- parse_num,
+ nullcontext_if_not,
progress, qprogress, stat_if_exists,
unlink,
utc_offset_str)
-from bup.pwdgrp import username, userfullname
verbose = 0
repodir = None # The default repository, once initialized
_typemap = {b'blob': 3, b'tree': 2, b'commit': 1, b'tag': 4}
-_typermap = {v: k for k, v in items(_typemap)}
+_typermap = {v: k for k, v in _typemap.items()}
_total_searches = 0
raise GitError('%r returned %d' % (cmd, proc.returncode))
return result
-def git_config_get(option, repo_dir=None):
- cmd = (b'git', b'config', b'--get', option)
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
- env=_gitenv(repo_dir=repo_dir))
- r = p.stdout.read()
+def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None):
+ assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used"
+ cmd = [b'git', b'config', b'--null']
+ if cfg_file:
+ cmd.extend([b'--file', cfg_file])
+ if opttype == 'int':
+ cmd.extend([b'--int'])
+ elif opttype == 'bool':
+ cmd.extend([b'--bool'])
+ else:
+ assert opttype is None
+ cmd.extend([b'--get', option])
+ env=None
+ if repo_dir:
+ env = _gitenv(repo_dir=repo_dir)
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env,
+ close_fds=True)
+ # with --null, git writes out a trailing \0 after the value
+ r = p.stdout.read()[:-1]
rc = p.wait()
if rc == 0:
+ if opttype == 'int':
+ return int(r)
+ elif opttype == 'bool':
+ # git converts to 'true' or 'false'
+ return r == b'true'
return r
if rc != 1:
raise GitError('%r returned %d' % (cmd, rc))
return - tz_off
return tz_off
+def parse_commit_gpgsig(sig):
+ """Return the original signature bytes.
+
+ i.e. with the "gpgsig " header and the leading space character on
+ each continuation line removed.
+
+ """
+ if not sig:
+ return None
+ assert sig.startswith(b'gpgsig ')
+ sig = sig[7:]
+ return sig.replace(b'\n ', b'\n')
# FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
# Make sure that's authoritative.
+
+# See also
+# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt
+# The continuation lines have only one leading space.
+
_start_end_char = br'[^ .,:;<>"\'\0\n]'
_content_char = br'[^\0\n<>]'
_safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
_commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
(?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
-
+(?P<gpgsig>gpgsig .*\n(?: .*\n)*)?
(?P<message>(?:.|\n)*)''' % (_parent_rx,
_safe_str_rx, _safe_str_rx, _tz_rx,
_safe_str_rx, _safe_str_rx, _tz_rx,
'author_sec', 'author_offset',
'committer_name', 'committer_mail',
'committer_sec', 'committer_offset',
+ 'gpgsig',
'message'])
def parse_commit(content):
committer_mail=matches['committer_mail'],
committer_sec=int(matches['csec']),
committer_offset=parse_tz_offset(matches['ctz']),
+ gpgsig=parse_commit_gpgsig(matches['gpgsig']),
message=matches['message'])
return shorten_hash(path)
-def all_packdirs():
- paths = [repo(b'objects/pack')]
- paths += glob.glob(repo(b'index-cache/*/.'))
- return paths
-
-
def auto_midx(objdir):
args = [path.exe(), b'midx', b'--auto', b'--dir', objdir]
try:
elif name.endswith(b'.bupm'):
return (name[:-5],
BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
- else:
- return (name, BUP_NORMAL)
+ return (name, BUP_NORMAL)
def calc_hash(type, content):
yield z.flush()
-def _encode_looseobj(type, content, compression_level=1):
- z = zlib.compressobj(compression_level)
- yield z.compress(b'%s %d\0' % (type, len(content)))
- yield z.compress(content)
- yield z.flush()
-
-
-def _decode_looseobj(buf):
- assert(buf);
- s = zlib.decompress(buf)
- i = s.find(b'\0')
- assert(i > 0)
- l = s[:i].split(b' ')
- type = l[0]
- sz = int(l[1])
- content = s[i+1:]
- assert(type in _typemap)
- assert(sz == len(content))
- return (type, content)
-
-
def _decode_packobj(buf):
assert(buf)
c = byte_int(buf[0])
return (type, zlib.decompress(buf[i+1:]))
-class PackIdx:
- def __init__(self):
- assert(0)
-
+class PackIdx(object):
def find_offset(self, hash):
"""Get the offset of an object inside the index file."""
idx = self._idx_from_hash(hash)
class PackIdxV1(PackIdx):
"""Object representation of a Git pack index (version 1) file."""
def __init__(self, filename, f):
+ super(PackIdxV1, self).__init__()
+ self.closed = False
self.name = filename
self.idxnames = [self.name]
self.map = mmap_read(f)
return self
def __exit__(self, type, value, traceback):
- self.close()
+ with pending_raise(value, rethrow=False):
+ self.close()
def __len__(self):
return int(self.nsha) # int() from long for python 2
yield self.map[ofs : ofs + 20]
def close(self):
+ self.closed = True
if self.map is not None:
self.shatable = None
self.map.close()
self.map = None
+ def __del__(self):
+ assert self.closed
+
class PackIdxV2(PackIdx):
"""Object representation of a Git pack index (version 2) file."""
def __init__(self, filename, f):
+ super(PackIdxV2, self).__init__()
+ self.closed = False
self.name = filename
self.idxnames = [self.name]
self.map = mmap_read(f)
return self
def __exit__(self, type, value, traceback):
- self.close()
+ with pending_raise(value, rethrow=False):
+ self.close()
def __len__(self):
return int(self.nsha) # int() from long for python 2
yield self.map[ofs : ofs + 20]
def close(self):
+ self.closed = True
if self.map is not None:
self.shatable = None
self.map.close()
self.map = None
+ def __del__(self):
+ assert self.closed
+
_mpi_count = 0
class PackIdxList:
def __init__(self, dir, ignore_midx=False):
global _mpi_count
+ # Q: was this also intended to prevent opening multiple repos?
assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
_mpi_count += 1
+ self.open = True
self.dir = dir
self.also = set()
self.packs = []
self.do_bloom = False
self.bloom = None
self.ignore_midx = ignore_midx
- self.refresh()
+ try:
+ self.refresh()
+ except BaseException as ex:
+ with pending_raise(ex):
+ self.close()
- def __del__(self):
+ def close(self):
global _mpi_count
+ if not self.open:
+ assert _mpi_count == 0
+ return
_mpi_count -= 1
- assert(_mpi_count == 0)
+ assert _mpi_count == 0
+ self.also = None
+ self.bloom, bloom = None, self.bloom
+ self.packs, packs = None, self.packs
+ self.open = False
+ with ExitStack() as stack:
+ for pack in packs:
+ stack.enter_context(pack)
+ if bloom:
+ bloom.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, traceback):
+ with pending_raise(value, rethrow=False):
+ self.close()
+
+ def __del__(self):
+ assert not self.open
def __iter__(self):
return iter(idxmerge(self.packs))
broken = True
if broken:
mx.close()
- del mx
unlink(full)
else:
midxl.append(mx)
continue
d[full] = ix
bfull = os.path.join(self.dir, b'bup.bloom')
+ new_packs = set(d.values())
+ for p in self.packs:
+ if not p in new_packs:
+ p.close()
+ new_packs = list(new_packs)
+ new_packs.sort(reverse=True, key=lambda x: len(x))
+ self.packs = new_packs
if self.bloom is None and os.path.exists(bfull):
self.bloom = bloom.ShaBloom(bfull)
- self.packs = list(set(d.values()))
- self.packs.sort(reverse=True, key=lambda x: len(x))
- if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
- self.do_bloom = True
- else:
- self.bloom = None
+ try:
+ if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
+ self.do_bloom = True
+ else:
+ if self.bloom:
+ self.bloom, bloom_tmp = None, self.bloom
+ bloom_tmp.close()
+ except BaseException as ex:
+ with pending_raise(ex):
+ if self.bloom:
+ self.bloom.close()
+
debug1('PackIdxList: using %d index%s.\n'
% (len(self.packs), len(self.packs)!=1 and 'es' or ''))
return merge_iter(idxlist, 10024, pfunc, pfinal)
+def create_commit_blob(tree, parent,
+ author, adate_sec, adate_tz,
+ committer, cdate_sec, cdate_tz,
+ msg):
+ if adate_tz is not None:
+ adate_str = _git_date_str(adate_sec, adate_tz)
+ else:
+ adate_str = _local_git_date_str(adate_sec)
+ if cdate_tz is not None:
+ cdate_str = _git_date_str(cdate_sec, cdate_tz)
+ else:
+ cdate_str = _local_git_date_str(cdate_sec)
+ l = []
+ if tree: l.append(b'tree %s' % hexlify(tree))
+ if parent: l.append(b'parent %s' % hexlify(parent))
+ if author: l.append(b'author %s %s' % (author, adate_str))
+ if committer: l.append(b'committer %s %s' % (committer, cdate_str))
+ l.append(b'')
+ l.append(msg)
+ return b'\n'.join(l)
+
def _make_objcache():
return PackIdxList(repo(b'objects/pack'))
# bup-gc assumes that it can disable all PackWriter activities
# (bloom/midx/cache) via the constructor and close() arguments.
-class PackWriter:
+class PackWriter(object):
"""Writes Git objects inside a pack file."""
def __init__(self, objcache_maker=_make_objcache, compression_level=1,
run_midx=True, on_pack_finish=None,
max_pack_size=None, max_pack_objects=None, repo_dir=None):
+ self.closed = False
self.repo_dir = repo_dir or repo()
self.file = None
self.parentfd = None
self.on_pack_finish = on_pack_finish
if not max_pack_size:
max_pack_size = git_config_get(b'pack.packSizeLimit',
- repo_dir=self.repo_dir)
- if max_pack_size is not None:
- max_pack_size = parse_num(max_pack_size)
+ repo_dir=self.repo_dir,
+ opttype='int')
if not max_pack_size:
# larger packs slow down pruning
max_pack_size = 1000 * 1000 * 1000
self.max_pack_objects = max_pack_objects if max_pack_objects \
else max(1, self.max_pack_size // 5000)
- def __del__(self):
- self.close()
-
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
- self.close()
+ with pending_raise(value, rethrow=False):
+ self.close()
def _open(self):
if not self.file:
self.breakpoint()
return sha
- def breakpoint(self):
- """Clear byte and object counts and return the last processed id."""
- id = self._end(self.run_midx)
- self.outbytes = self.count = 0
- return id
-
def _require_objcache(self):
if self.objcache is None and self.objcache_maker:
self.objcache = self.objcache_maker()
msg):
"""Create a commit object in the pack. The date_sec values must be
epoch-seconds, and if a tz is None, the local timezone is assumed."""
- if adate_tz is not None:
- adate_str = _git_date_str(adate_sec, adate_tz)
- else:
- adate_str = _local_git_date_str(adate_sec)
- if cdate_tz is not None:
- cdate_str = _git_date_str(cdate_sec, cdate_tz)
- else:
- cdate_str = _local_git_date_str(cdate_sec)
- l = []
- if tree: l.append(b'tree %s' % hexlify(tree))
- if parent: l.append(b'parent %s' % hexlify(parent))
- if author: l.append(b'author %s %s' % (author, adate_str))
- if committer: l.append(b'committer %s %s' % (committer, cdate_str))
- l.append(b'')
- l.append(msg)
- return self.maybe_write(b'commit', b'\n'.join(l))
+ content = create_commit_blob(tree, parent,
+ author, adate_sec, adate_tz,
+ committer, cdate_sec, cdate_tz,
+ msg)
+ return self.maybe_write(b'commit', content)
+
+ def _end(self, run_midx=True, abort=False):
+ # Ignores run_midx during abort
+ self.parentfd, pfd, = None, self.parentfd
+ self.file, f = None, self.file
+ self.idx, idx = None, self.idx
+ try:
+ with nullcontext_if_not(self.objcache), \
+ finalized(pfd, lambda x: x is not None and os.close(x)), \
+ nullcontext_if_not(f):
+ if not f:
+ return None
- def abort(self):
- """Remove the pack file from disk."""
- f = self.file
- if f:
- pfd = self.parentfd
- self.file = None
- self.parentfd = None
- self.idx = None
- try:
- try:
+ if abort:
os.unlink(self.filename + b'.pack')
- finally:
- f.close()
- finally:
- if pfd is not None:
- os.close(pfd)
+ return None
+
+ # update object count
+ f.seek(8)
+ cp = struct.pack('!i', self.count)
+ assert len(cp) == 4
+ f.write(cp)
+
+ # calculate the pack sha1sum
+ f.seek(0)
+ sum = Sha1()
+ for b in chunkyreader(f):
+ sum.update(b)
+ packbin = sum.digest()
+ f.write(packbin)
+ f.flush()
+ fdatasync(f.fileno())
+ f.close()
- def _end(self, run_midx=True):
- f = self.file
- if not f: return None
- self.file = None
- try:
- self.objcache = None
- idx = self.idx
- self.idx = None
-
- # update object count
- f.seek(8)
- cp = struct.pack('!i', self.count)
- assert(len(cp) == 4)
- f.write(cp)
-
- # calculate the pack sha1sum
- f.seek(0)
- sum = Sha1()
- for b in chunkyreader(f):
- sum.update(b)
- packbin = sum.digest()
- f.write(packbin)
- fdatasync(f.fileno())
+ idx.write(self.filename + b'.idx', packbin)
+ nameprefix = os.path.join(self.repo_dir,
+ b'objects/pack/pack-' + hexlify(packbin))
+ if os.path.exists(self.filename + b'.map'):
+ os.unlink(self.filename + b'.map')
+ os.rename(self.filename + b'.pack', nameprefix + b'.pack')
+ os.rename(self.filename + b'.idx', nameprefix + b'.idx')
+ os.fsync(pfd)
+ if run_midx:
+ auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+ if self.on_pack_finish:
+ self.on_pack_finish(nameprefix)
+ return nameprefix
finally:
- f.close()
-
- obj_list_sha = idx.write(self.filename + b'.idx', packbin)
- nameprefix = os.path.join(self.repo_dir,
- b'objects/pack/pack-' + obj_list_sha)
- if os.path.exists(self.filename + b'.map'):
- os.unlink(self.filename + b'.map')
- os.rename(self.filename + b'.pack', nameprefix + b'.pack')
- os.rename(self.filename + b'.idx', nameprefix + b'.idx')
- try:
- os.fsync(self.parentfd)
- finally:
- os.close(self.parentfd)
-
- if run_midx:
- auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+ # Must be last -- some of the code above depends on it
+ self.objcache = None
- if self.on_pack_finish:
- self.on_pack_finish(nameprefix)
+ def abort(self):
+ """Remove the pack file from disk."""
+ self.closed = True
+ self._end(abort=True)
- return nameprefix
+ def breakpoint(self):
+ """Clear byte and object counts and return the last processed id."""
+ id = self._end(self.run_midx)
+ self.outbytes = self.count = 0
+ return id
def close(self, run_midx=True):
"""Close the pack file and move it to its definitive path."""
+ self.closed = True
return self._end(run_midx=run_midx)
+ def __del__(self):
+ assert self.closed
+
class PackIdxV2Writer:
def __init__(self):
b = idx_f.read(8 + 4*256)
idx_sum.update(b)
- obj_list_sum = Sha1()
for b in chunkyreader(idx_f, 20 * self.count):
idx_sum.update(b)
- obj_list_sum.update(b)
- namebase = hexlify(obj_list_sum.digest())
for b in chunkyreader(idx_f):
idx_sum.update(b)
idx_f.write(idx_sum.digest())
fdatasync(idx_f.fileno())
- return namebase
finally:
idx_f.close()
argv.append(b'--')
if patterns:
argv.extend(patterns)
- p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
+ p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE,
+ close_fds=True)
out = p.stdout.read().strip()
rv = p.wait() # not fatal
if rv:
p = subprocess.Popen(rev_list_invocation(ref_or_refs,
format=format),
env=_gitenv(repo_dir),
- stdout = subprocess.PIPE)
+ stdout = subprocess.PIPE,
+ close_fds=True)
if not format:
for line in p.stdout:
yield line.strip()
raise GitError('git rev-list returned error %d' % rv)
-def get_commit_dates(refs, repo_dir=None):
- """Get the dates for the specified commit refs. For now, every unique
- string in refs must resolve to a different commit or this
- function will fail."""
- result = []
- for ref in refs:
- commit = get_commit_items(ref, cp(repo_dir))
- result.append(commit.author_sec)
- return result
-
-
def rev_parse(committish, repo_dir=None):
"""Resolve the full hash for 'committish', if it exists.
debug2("resolved from ref: commit = %s\n" % hexlify(head))
return head
- pL = PackIdxList(repo(b'objects/pack', repo_dir=repo_dir))
-
if len(committish) == 40:
try:
hash = unhexlify(committish)
except TypeError:
return None
- if pL.exists(hash):
- return hash
+ with PackIdxList(repo(b'objects/pack', repo_dir=repo_dir)) as pL:
+ if pL.exists(hash):
+ return hash
return None
-def update_ref(refname, newval, oldval, repo_dir=None):
- """Update a repository reference."""
- if not oldval:
- oldval = b''
+def update_ref(refname, newval, oldval, repo_dir=None, force=False):
+ """Update a repository reference.
+
+ With force=True, don't care about the previous ref (oldval);
+ with force=False oldval must be either a sha1 or None (for an
+ entirely new branch)
+ """
+ if force:
+ assert oldval is None
+ oldarg = []
+ elif not oldval:
+ oldarg = [b'']
+ else:
+ oldarg = [hexlify(oldval)]
assert refname.startswith(b'refs/heads/') \
or refname.startswith(b'refs/tags/')
p = subprocess.Popen([b'git', b'update-ref', refname,
- hexlify(newval), hexlify(oldval)],
- env=_gitenv(repo_dir))
+ hexlify(newval)] + oldarg,
+ env=_gitenv(repo_dir),
+ close_fds=True)
_git_wait(b'git update-ref', p)
assert refname.startswith(b'refs/')
oldvalue = [] if not oldvalue else [oldvalue]
p = subprocess.Popen([b'git', b'update-ref', b'-d', refname] + oldvalue,
- env=_gitenv())
+ env=_gitenv(),
+ close_fds=True)
_git_wait('git update-ref', p)
if os.path.exists(d) and not os.path.isdir(os.path.join(d, b'.')):
raise GitError('"%s" exists but is not a directory\n' % path_msg(d))
p = subprocess.Popen([b'git', b'--bare', b'init'], stdout=sys.stderr,
- env=_gitenv())
+ env=_gitenv(),
+ close_fds=True)
_git_wait('git init', p)
# Force the index version configuration in order to ensure bup works
# regardless of the version of the installed Git binary.
p = subprocess.Popen([b'git', b'config', b'pack.indexVersion', '2'],
- stdout=sys.stderr, env=_gitenv())
+ stdout=sys.stderr, env=_gitenv(), close_fds=True)
_git_wait('git config', p)
# Enable the reflog
p = subprocess.Popen([b'git', b'config', b'core.logAllRefUpdates', b'true'],
- stdout=sys.stderr, env=_gitenv())
+ stdout=sys.stderr, env=_gitenv(), close_fds=True)
_git_wait('git config', p)
assert False
-class _AbortableIter:
- def __init__(self, it, onabort = None):
- self.it = it
- self.onabort = onabort
- self.done = None
-
- def __iter__(self):
- return self
-
- def __next__(self):
- try:
- return next(self.it)
- except StopIteration as e:
- self.done = True
- raise
- except:
- self.abort()
- raise
-
- next = __next__
-
- def abort(self):
- """Abort iteration and call the abortion callback, if needed."""
- if not self.done:
- self.done = True
- if self.onabort:
- self.onabort()
-
- def __del__(self):
- self.abort()
-
-
class CatPipe:
"""Link to 'git cat-file' that is used to retrieve blob data."""
def __init__(self, repo_dir = None):
self.repo_dir = repo_dir
self.p = self.inprogress = None
- def _abort(self):
- if self.p:
- self.p.stdout.close()
- self.p.stdin.close()
- self.p = None
+ def close(self, wait=False):
+ self.p, p = None, self.p
self.inprogress = None
+ if p:
+ try:
+ p.stdout.close()
+ finally:
+ # This will handle pending exceptions correctly once
+ # we drop py2
+ p.stdin.close()
+ if wait:
+ p.wait()
+ return p.returncode
+ return None
def restart(self):
- self._abort()
+ self.close()
self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
self.p.stdin.write(ref + b'\n')
self.p.stdin.flush()
hdr = self.p.stdout.readline()
+ if not hdr:
+ raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)'
+ % (ref, self.p.poll() or 'none'))
if hdr.endswith(b' missing\n'):
self.inprogress = None
yield None, None, None
raise GitError('expected object (id, type, size), got %r' % info)
oidx, typ, size = info
size = int(size)
- it = _AbortableIter(chunkyreader(self.p.stdout, size),
- onabort=self._abort)
try:
+ it = chunkyreader(self.p.stdout, size)
yield oidx, typ, size
- for blob in it:
+ for blob in chunkyreader(self.p.stdout, size):
yield blob
readline_result = self.p.stdout.readline()
assert readline_result == b'\n'
self.inprogress = None
- except Exception as e:
- it.abort()
- raise
+ except Exception as ex:
+ with pending_raise(ex):
+ self.close()
def _join(self, it):
_, typ, _ = next(it)
return cp
+def close_catpipes():
+ # FIXME: chain exceptions
+ while _cp:
+ _, cp = _cp.popitem()
+ cp.close(wait=True)
+
+
def tags(repo_dir = None):
"""Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
tags = {}