"""
from __future__ import absolute_import, print_function
-import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
+import os, sys, zlib, subprocess, struct, stat, re, tempfile, glob
from array import array
from binascii import hexlify, unhexlify
from collections import namedtuple
from itertools import islice
-from numbers import Integral
-from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
+from bup import _helpers, hashsplit, path, midx, bloom, xstat
from bup.compat import (buffer,
byte_int, bytes_from_byte, bytes_from_uint,
environ,
items,
+ pending_raise,
range,
reraise)
from bup.io import path_msg
from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
exo,
fdatasync,
- hostname, localtime, log,
+ finalized,
+ log,
merge_dict,
merge_iter,
mmap_read, mmap_readwrite,
- parse_num,
progress, qprogress, stat_if_exists,
unlink,
utc_offset_str)
-from bup.pwdgrp import username, userfullname
verbose = 0
raise GitError('%r returned %d' % (cmd, proc.returncode))
return result
-def git_config_get(option, repo_dir=None):
- cmd = (b'git', b'config', b'--get', option)
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
- env=_gitenv(repo_dir=repo_dir),
+def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None):
+ assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used"
+ cmd = [b'git', b'config', b'--null']
+ if cfg_file:
+ cmd.extend([b'--file', cfg_file])
+ if opttype == 'int':
+ cmd.extend([b'--int'])
+ elif opttype == 'bool':
+ cmd.extend([b'--bool'])
+ else:
+ assert opttype is None
+ cmd.extend([b'--get', option])
+ env=None
+ if repo_dir:
+ env = _gitenv(repo_dir=repo_dir)
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env,
close_fds=True)
- r = p.stdout.read()
+ # with --null, git writes out a trailing \0 after the value
+ r = p.stdout.read()[:-1]
rc = p.wait()
if rc == 0:
+ if opttype == 'int':
+ return int(r)
+ elif opttype == 'bool':
+ # git converts to 'true' or 'false'
+ return r == b'true'
return r
if rc != 1:
raise GitError('%r returned %d' % (cmd, rc))
return - tz_off
return tz_off
+def parse_commit_gpgsig(sig):
+ """Return the original signature bytes.
+
+ i.e. with the "gpgsig " header and the leading space character on
+ each continuation line removed.
+
+ """
+ if not sig:
+ return None
+ assert sig.startswith(b'gpgsig ')
+ sig = sig[7:]
+ return sig.replace(b'\n ', b'\n')
# FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
# Make sure that's authoritative.
+
+# See also
+# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt
+# The continuation lines have only one leading space.
+
_start_end_char = br'[^ .,:;<>"\'\0\n]'
_content_char = br'[^\0\n<>]'
_safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
_commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
(?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
-
+(?P<gpgsig>gpgsig .*\n(?: .*\n)*)?
(?P<message>(?:.|\n)*)''' % (_parent_rx,
_safe_str_rx, _safe_str_rx, _tz_rx,
_safe_str_rx, _safe_str_rx, _tz_rx,
'author_sec', 'author_offset',
'committer_name', 'committer_mail',
'committer_sec', 'committer_offset',
+ 'gpgsig',
'message'])
def parse_commit(content):
committer_mail=matches['committer_mail'],
committer_sec=int(matches['csec']),
committer_offset=parse_tz_offset(matches['ctz']),
+ gpgsig=parse_commit_gpgsig(matches['gpgsig']),
message=matches['message'])
elif name.endswith(b'.bupm'):
return (name[:-5],
BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
- else:
- return (name, BUP_NORMAL)
+ return (name, BUP_NORMAL)
def calc_hash(type, content):
yield z.flush()
-def _encode_looseobj(type, content, compression_level=1):
- z = zlib.compressobj(compression_level)
- yield z.compress(b'%s %d\0' % (type, len(content)))
- yield z.compress(content)
- yield z.flush()
-
-
-def _decode_looseobj(buf):
- assert(buf);
- s = zlib.decompress(buf)
- i = s.find(b'\0')
- assert(i > 0)
- l = s[:i].split(b' ')
- type = l[0]
- sz = int(l[1])
- content = s[i+1:]
- assert(type in _typemap)
- assert(sz == len(content))
- return (type, content)
-
-
def _decode_packobj(buf):
assert(buf)
c = byte_int(buf[0])
return self
def __exit__(self, type, value, traceback):
- self.close()
+ with pending_raise(value, rethrow=False):
+ self.close()
def __len__(self):
return int(self.nsha) # int() from long for python 2
return self
def __exit__(self, type, value, traceback):
- self.close()
+ with pending_raise(value, rethrow=False):
+ self.close()
def __len__(self):
return int(self.nsha) # int() from long for python 2
return merge_iter(idxlist, 10024, pfunc, pfinal)
+def create_commit_blob(tree, parent,
+ author, adate_sec, adate_tz,
+ committer, cdate_sec, cdate_tz,
+ msg):
+ if adate_tz is not None:
+ adate_str = _git_date_str(adate_sec, adate_tz)
+ else:
+ adate_str = _local_git_date_str(adate_sec)
+ if cdate_tz is not None:
+ cdate_str = _git_date_str(cdate_sec, cdate_tz)
+ else:
+ cdate_str = _local_git_date_str(cdate_sec)
+ l = []
+ if tree: l.append(b'tree %s' % hexlify(tree))
+ if parent: l.append(b'parent %s' % hexlify(parent))
+ if author: l.append(b'author %s %s' % (author, adate_str))
+ if committer: l.append(b'committer %s %s' % (committer, cdate_str))
+ l.append(b'')
+ l.append(msg)
+ return b'\n'.join(l)
+
+
def _make_objcache():
return PackIdxList(repo(b'objects/pack'))
self.on_pack_finish = on_pack_finish
if not max_pack_size:
max_pack_size = git_config_get(b'pack.packSizeLimit',
- repo_dir=self.repo_dir)
- if max_pack_size is not None:
- max_pack_size = parse_num(max_pack_size)
+ repo_dir=self.repo_dir,
+ opttype='int')
if not max_pack_size:
# larger packs slow down pruning
max_pack_size = 1000 * 1000 * 1000
self.max_pack_objects = max_pack_objects if max_pack_objects \
else max(1, self.max_pack_size // 5000)
- def __del__(self):
- self.close()
-
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
- self.close()
+ with pending_raise(value, rethrow=False):
+ self.close()
def _open(self):
if not self.file:
self.breakpoint()
return sha
- def breakpoint(self):
- """Clear byte and object counts and return the last processed id."""
- id = self._end(self.run_midx)
- self.outbytes = self.count = 0
- return id
-
def _require_objcache(self):
if self.objcache is None and self.objcache_maker:
self.objcache = self.objcache_maker()
msg):
"""Create a commit object in the pack. The date_sec values must be
epoch-seconds, and if a tz is None, the local timezone is assumed."""
- if adate_tz is not None:
- adate_str = _git_date_str(adate_sec, adate_tz)
- else:
- adate_str = _local_git_date_str(adate_sec)
- if cdate_tz is not None:
- cdate_str = _git_date_str(cdate_sec, cdate_tz)
- else:
- cdate_str = _local_git_date_str(cdate_sec)
- l = []
- if tree: l.append(b'tree %s' % hexlify(tree))
- if parent: l.append(b'parent %s' % hexlify(parent))
- if author: l.append(b'author %s %s' % (author, adate_str))
- if committer: l.append(b'committer %s %s' % (committer, cdate_str))
- l.append(b'')
- l.append(msg)
- return self.maybe_write(b'commit', b'\n'.join(l))
+ content = create_commit_blob(tree, parent,
+ author, adate_sec, adate_tz,
+ committer, cdate_sec, cdate_tz,
+ msg)
+ return self.maybe_write(b'commit', content)
+
+ def _end(self, run_midx=True, abort=False):
+ # Ignores run_midx during abort
+ if not self.file:
+ return None
+ self.file, f = None, self.file
+ self.idx, idx = None, self.idx
+ self.parentfd, pfd, = None, self.parentfd
+ self.objcache = None
- def abort(self):
- """Remove the pack file from disk."""
- f = self.file
- if f:
- pfd = self.parentfd
- self.file = None
- self.parentfd = None
- self.idx = None
- try:
- try:
- os.unlink(self.filename + b'.pack')
- finally:
- f.close()
- finally:
- if pfd is not None:
- os.close(pfd)
+ with finalized(pfd, lambda x: x is not None and os.close(x)), \
+ f:
- def _end(self, run_midx=True):
- f = self.file
- if not f: return None
- self.file = None
- try:
- self.objcache = None
- idx = self.idx
- self.idx = None
+ if abort:
+ os.unlink(self.filename + b'.pack')
+ return None
# update object count
f.seek(8)
cp = struct.pack('!i', self.count)
- assert(len(cp) == 4)
+ assert len(cp) == 4
f.write(cp)
# calculate the pack sha1sum
sum.update(b)
packbin = sum.digest()
f.write(packbin)
+ f.flush()
fdatasync(f.fileno())
- finally:
f.close()
- obj_list_sha = idx.write(self.filename + b'.idx', packbin)
- nameprefix = os.path.join(self.repo_dir,
- b'objects/pack/pack-' + obj_list_sha)
- if os.path.exists(self.filename + b'.map'):
- os.unlink(self.filename + b'.map')
- os.rename(self.filename + b'.pack', nameprefix + b'.pack')
- os.rename(self.filename + b'.idx', nameprefix + b'.idx')
- try:
- os.fsync(self.parentfd)
- finally:
- os.close(self.parentfd)
+ idx.write(self.filename + b'.idx', packbin)
+ nameprefix = os.path.join(self.repo_dir,
+ b'objects/pack/pack-' + hexlify(packbin))
+ if os.path.exists(self.filename + b'.map'):
+ os.unlink(self.filename + b'.map')
+ os.rename(self.filename + b'.pack', nameprefix + b'.pack')
+ os.rename(self.filename + b'.idx', nameprefix + b'.idx')
+ os.fsync(pfd)
+ if run_midx:
+ auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
+ if self.on_pack_finish:
+ self.on_pack_finish(nameprefix)
+ return nameprefix
- if run_midx:
- auto_midx(os.path.join(self.repo_dir, b'objects/pack'))
-
- if self.on_pack_finish:
- self.on_pack_finish(nameprefix)
+ def abort(self):
+ """Remove the pack file from disk."""
+ self._end(abort=True)
- return nameprefix
+ def breakpoint(self):
+ """Clear byte and object counts and return the last processed id."""
+ id = self._end(self.run_midx)
+ self.outbytes = self.count = 0
+ return id
def close(self, run_midx=True):
"""Close the pack file and move it to its definitive path."""
b = idx_f.read(8 + 4*256)
idx_sum.update(b)
- obj_list_sum = Sha1()
for b in chunkyreader(idx_f, 20 * self.count):
idx_sum.update(b)
- obj_list_sum.update(b)
- namebase = hexlify(obj_list_sum.digest())
for b in chunkyreader(idx_f):
idx_sum.update(b)
idx_f.write(idx_sum.digest())
fdatasync(idx_f.fileno())
- return namebase
finally:
idx_f.close()
raise GitError('git rev-list returned error %d' % rv)
-def get_commit_dates(refs, repo_dir=None):
- """Get the dates for the specified commit refs. For now, every unique
- string in refs must resolve to a different commit or this
- function will fail."""
- result = []
- for ref in refs:
- commit = get_commit_items(ref, cp(repo_dir))
- result.append(commit.author_sec)
- return result
-
-
def rev_parse(committish, repo_dir=None):
"""Resolve the full hash for 'committish', if it exists.
if wait:
p.wait()
return p.returncode
+ return None
def restart(self):
self.close()
self.p.stdin.write(ref + b'\n')
self.p.stdin.flush()
hdr = self.p.stdout.readline()
+ if not hdr:
+ raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)'
+ % (ref, self.p.poll() or 'none'))
if hdr.endswith(b' missing\n'):
self.inprogress = None
yield None, None, None