1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.compat import range
14 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
16 hostname, localtime, log, merge_iter,
17 mmap_read, mmap_readwrite,
19 progress, qprogress, shstr, stat_if_exists,
20 unlink, username, userfullname,
25 repodir = None # The default repository, once initialized
27 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
28 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
34 class GitError(Exception):
38 def _git_wait(cmd, p):
41 raise GitError('%s returned %d' % (shstr(cmd), rv))
43 def _git_capture(argv):
44 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
46 _git_wait(repr(argv), p)
49 def git_config_get(option, repo_dir=None):
50 cmd = ('git', 'config', '--get', option)
51 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
52 preexec_fn=_gitenv(repo_dir=repo_dir))
58 raise GitError('%s returned %d' % (cmd, rc))
62 def parse_tz_offset(s):
63 """UTC offset in seconds."""
64 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
70 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
71 # Make sure that's authoritative.
72 _start_end_char = r'[^ .,:;<>"\'\0\n]'
73 _content_char = r'[^\0\n<>]'
74 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
76 _start_end_char, _content_char, _start_end_char)
77 _tz_rx = r'[-+]\d\d[0-5]\d'
78 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
79 # Assumes every following line starting with a space is part of the
80 # mergetag. Is there a formal commit blob spec?
81 _mergetag_rx = r'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]*)*)'
82 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
83 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
84 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
86 (?P<message>(?:.|\n)*)''' % (_parent_rx,
87 _safe_str_rx, _safe_str_rx, _tz_rx,
88 _safe_str_rx, _safe_str_rx, _tz_rx,
90 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
92 # Note that the author_sec and committer_sec values are (UTC) epoch
93 # seconds, and for now the mergetag is not included.
94 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
95 'author_name', 'author_mail',
96 'author_sec', 'author_offset',
97 'committer_name', 'committer_mail',
98 'committer_sec', 'committer_offset',
101 def parse_commit(content):
102 commit_match = re.match(_commit_rx, content)
104 raise Exception('cannot parse commit %r' % content)
105 matches = commit_match.groupdict()
106 return CommitInfo(tree=matches['tree'],
107 parents=re.findall(_parent_hash_rx, matches['parents']),
108 author_name=matches['author_name'],
109 author_mail=matches['author_mail'],
110 author_sec=int(matches['asec']),
111 author_offset=parse_tz_offset(matches['atz']),
112 committer_name=matches['committer_name'],
113 committer_mail=matches['committer_mail'],
114 committer_sec=int(matches['csec']),
115 committer_offset=parse_tz_offset(matches['ctz']),
116 message=matches['message'])
119 def get_cat_data(cat_iterator, expected_type):
120 _, kind, _ = next(cat_iterator)
121 if kind != expected_type:
122 raise Exception('expected %r, saw %r' % (expected_type, kind))
123 return ''.join(cat_iterator)
125 def get_commit_items(id, cp):
126 return parse_commit(get_cat_data(cp.get(id), 'commit'))
128 def _local_git_date_str(epoch_sec):
129 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
132 def _git_date_str(epoch_sec, tz_offset_sec):
133 offs = tz_offset_sec // 60
134 return '%d %s%02d%02d' \
136 '+' if offs >= 0 else '-',
141 def repo(sub = '', repo_dir=None):
142 """Get the path to the git repository or one of its subdirectories."""
143 repo_dir = repo_dir or repodir
145 raise GitError('You should call check_repo_or_die()')
147 # If there's a .git subdirectory, then the actual repo is in there.
148 gd = os.path.join(repo_dir, '.git')
149 if os.path.exists(gd):
152 return os.path.join(repo_dir, sub)
156 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
161 full = os.path.abspath(path)
162 fullrepo = os.path.abspath(repo(''))
163 if not fullrepo.endswith('/'):
165 if full.startswith(fullrepo):
166 path = full[len(fullrepo):]
167 if path.startswith('index-cache/'):
168 path = path[len('index-cache/'):]
169 return shorten_hash(path)
173 paths = [repo('objects/pack')]
174 paths += glob.glob(repo('index-cache/*/.'))
178 def auto_midx(objdir):
179 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
181 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
183 # make sure 'args' gets printed to help with debugging
184 add_error('%r: exception: %s' % (args, e))
187 add_error('%r: returned %d' % (args, rv))
189 args = [path.exe(), 'bloom', '--dir', objdir]
191 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
193 # make sure 'args' gets printed to help with debugging
194 add_error('%r: exception: %s' % (args, e))
197 add_error('%r: returned %d' % (args, rv))
200 def mangle_name(name, mode, gitmode):
201 """Mangle a file name to present an abstract name for segmented files.
202 Mangled file names will have the ".bup" extension added to them. If a
203 file's name already ends with ".bup", a ".bupl" extension is added to
204 disambiguate normal files from segmented ones.
206 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
207 assert(stat.S_ISDIR(gitmode))
209 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
210 return name + '.bupl'
215 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
216 def demangle_name(name, mode):
217 """Remove name mangling from a file name, if necessary.
219 The return value is a tuple (demangled_filename,mode), where mode is one of
222 * BUP_NORMAL : files that should be read as-is from the repository
223 * BUP_CHUNKED : files that were chunked and need to be reassembled
225 For more information on the name mangling algorithm, see mangle_name()
227 if name.endswith('.bupl'):
228 return (name[:-5], BUP_NORMAL)
229 elif name.endswith('.bup'):
230 return (name[:-4], BUP_CHUNKED)
231 elif name.endswith('.bupm'):
233 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
235 return (name, BUP_NORMAL)
238 def calc_hash(type, content):
239 """Calculate some content's hash in the Git fashion."""
240 header = '%s %d\0' % (type, len(content))
246 def shalist_item_sort_key(ent):
247 (mode, name, id) = ent
248 assert(mode+0 == mode)
249 if stat.S_ISDIR(mode):
255 def tree_encode(shalist):
256 """Generate a git tree object from (mode,name,hash) tuples."""
257 shalist = sorted(shalist, key = shalist_item_sort_key)
259 for (mode,name,bin) in shalist:
261 assert(mode+0 == mode)
263 assert(len(bin) == 20)
264 s = '%o %s\0%s' % (mode,name,bin)
265 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
270 def tree_decode(buf):
271 """Generate a list of (mode,name,hash) from the git tree object in buf."""
273 while ofs < len(buf):
274 z = buf.find('\0', ofs)
276 spl = buf[ofs:z].split(' ', 1)
277 assert(len(spl) == 2)
279 sha = buf[z+1:z+1+20]
281 yield (int(mode, 8), name, sha)
284 def _encode_packobj(type, content, compression_level=1):
285 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
286 raise ValueError('invalid compression level %s' % compression_level)
289 szbits = (sz & 0x0f) | (_typemap[type]<<4)
292 if sz: szbits |= 0x80
298 z = zlib.compressobj(compression_level)
300 yield z.compress(content)
304 def _encode_looseobj(type, content, compression_level=1):
305 z = zlib.compressobj(compression_level)
306 yield z.compress('%s %d\0' % (type, len(content)))
307 yield z.compress(content)
311 def _decode_looseobj(buf):
313 s = zlib.decompress(buf)
320 assert(type in _typemap)
321 assert(sz == len(content))
322 return (type, content)
325 def _decode_packobj(buf):
328 type = _typermap[(c & 0x70) >> 4]
335 sz |= (c & 0x7f) << shift
339 return (type, zlib.decompress(buf[i+1:]))
346 def find_offset(self, hash):
347 """Get the offset of an object inside the index file."""
348 idx = self._idx_from_hash(hash)
350 return self._ofs_from_idx(idx)
353 def exists(self, hash, want_source=False):
354 """Return nonempty if the object exists in this index."""
355 if hash and (self._idx_from_hash(hash) != None):
356 return want_source and os.path.basename(self.name) or True
360 return int(self.fanout[255])
362 def _idx_from_hash(self, hash):
363 global _total_searches, _total_steps
365 assert(len(hash) == 20)
367 start = self.fanout[b1-1] # range -1..254
368 end = self.fanout[b1] # range 0..255
370 _total_steps += 1 # lookup table is a step
373 mid = start + (end-start)/2
374 v = self._idx_to_hash(mid)
384 class PackIdxV1(PackIdx):
385 """Object representation of a Git pack index (version 1) file."""
386 def __init__(self, filename, f):
388 self.idxnames = [self.name]
389 self.map = mmap_read(f)
390 self.fanout = list(struct.unpack('!256I',
391 str(buffer(self.map, 0, 256*4))))
392 self.fanout.append(0) # entry "-1"
393 nsha = self.fanout[255]
395 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
397 def _ofs_from_idx(self, idx):
398 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
400 def _idx_to_hash(self, idx):
401 return str(self.shatable[idx*24+4 : idx*24+24])
404 for i in range(self.fanout[255]):
405 yield buffer(self.map, 256*4 + 24*i + 4, 20)
408 class PackIdxV2(PackIdx):
409 """Object representation of a Git pack index (version 2) file."""
410 def __init__(self, filename, f):
412 self.idxnames = [self.name]
413 self.map = mmap_read(f)
414 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
415 self.fanout = list(struct.unpack('!256I',
416 str(buffer(self.map, 8, 256*4))))
417 self.fanout.append(0) # entry "-1"
418 nsha = self.fanout[255]
419 self.sha_ofs = 8 + 256*4
420 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
421 self.ofstable = buffer(self.map,
422 self.sha_ofs + nsha*20 + nsha*4,
424 self.ofs64table = buffer(self.map,
425 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
427 def _ofs_from_idx(self, idx):
428 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
430 idx64 = ofs & 0x7fffffff
431 ofs = struct.unpack('!Q',
432 str(buffer(self.ofs64table, idx64*8, 8)))[0]
435 def _idx_to_hash(self, idx):
436 return str(self.shatable[idx*20:(idx+1)*20])
439 for i in range(self.fanout[255]):
440 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
445 def __init__(self, dir):
447 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
452 self.do_bloom = False
459 assert(_mpi_count == 0)
462 return iter(idxmerge(self.packs))
465 return sum(len(pack) for pack in self.packs)
467 def exists(self, hash, want_source=False):
468 """Return nonempty if the object exists in the index files."""
469 global _total_searches
471 if hash in self.also:
473 if self.do_bloom and self.bloom:
474 if self.bloom.exists(hash):
475 self.do_bloom = False
477 _total_searches -= 1 # was counted by bloom
479 for i in xrange(len(self.packs)):
481 _total_searches -= 1 # will be incremented by sub-pack
482 ix = p.exists(hash, want_source=want_source)
484 # reorder so most recently used packs are searched first
485 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
490 def refresh(self, skip_midx = False):
491 """Refresh the index list.
492 This method verifies if .midx files were superseded (e.g. all of its
493 contents are in another, bigger .midx file) and removes the superseded
496 If skip_midx is True, all work on .midx files will be skipped and .midx
497 files will be removed from the list.
499 The module-global variable 'ignore_midx' can force this function to
500 always act as if skip_midx was True.
502 self.bloom = None # Always reopen the bloom as it may have been relaced
503 self.do_bloom = False
504 skip_midx = skip_midx or ignore_midx
505 d = dict((p.name, p) for p in self.packs
506 if not skip_midx or not isinstance(p, midx.PackMidx))
507 if os.path.exists(self.dir):
510 for ix in self.packs:
511 if isinstance(ix, midx.PackMidx):
512 for name in ix.idxnames:
513 d[os.path.join(self.dir, name)] = ix
514 for full in glob.glob(os.path.join(self.dir,'*.midx')):
516 mx = midx.PackMidx(full)
517 (mxd, mxf) = os.path.split(mx.name)
519 for n in mx.idxnames:
520 if not os.path.exists(os.path.join(mxd, n)):
521 log(('warning: index %s missing\n' +
522 ' used by %s\n') % (n, mxf))
530 midxl.sort(key=lambda ix:
531 (-len(ix), -xstat.stat(ix.name).st_mtime))
534 for sub in ix.idxnames:
535 found = d.get(os.path.join(self.dir, sub))
536 if not found or isinstance(found, PackIdx):
537 # doesn't exist, or exists but not in a midx
542 for name in ix.idxnames:
543 d[os.path.join(self.dir, name)] = ix
544 elif not ix.force_keep:
545 debug1('midx: removing redundant: %s\n'
546 % os.path.basename(ix.name))
549 for full in glob.glob(os.path.join(self.dir,'*.idx')):
553 except GitError as e:
557 bfull = os.path.join(self.dir, 'bup.bloom')
558 if self.bloom is None and os.path.exists(bfull):
559 self.bloom = bloom.ShaBloom(bfull)
560 self.packs = list(set(d.values()))
561 self.packs.sort(reverse=True, key=lambda x: len(x))
562 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
566 debug1('PackIdxList: using %d index%s.\n'
567 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
570 """Insert an additional object in the list."""
574 def open_idx(filename):
575 if filename.endswith('.idx'):
576 f = open(filename, 'rb')
578 if header[0:4] == '\377tOc':
579 version = struct.unpack('!I', header[4:8])[0]
581 return PackIdxV2(filename, f)
583 raise GitError('%s: expected idx file version 2, got %d'
584 % (filename, version))
585 elif len(header) == 8 and header[0:4] < '\377tOc':
586 return PackIdxV1(filename, f)
588 raise GitError('%s: unrecognized idx file header' % filename)
589 elif filename.endswith('.midx'):
590 return midx.PackMidx(filename)
592 raise GitError('idx filenames must end with .idx or .midx')
595 def idxmerge(idxlist, final_progress=True):
596 """Generate a list of all the objects reachable in a PackIdxList."""
597 def pfunc(count, total):
598 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
599 % (count*100.0/total, count, total))
600 def pfinal(count, total):
602 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
603 % (100, total, total))
604 return merge_iter(idxlist, 10024, pfunc, pfinal)
607 def _make_objcache():
608 return PackIdxList(repo('objects/pack'))
610 # bup-gc assumes that it can disable all PackWriter activities
611 # (bloom/midx/cache) via the constructor and close() arguments.
614 """Writes Git objects inside a pack file."""
615 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
616 run_midx=True, on_pack_finish=None,
617 max_pack_size=None, max_pack_objects=None, repo_dir=None):
618 self.repo_dir = repo_dir or repo()
625 self.objcache_maker = objcache_maker
627 self.compression_level = compression_level
628 self.run_midx=run_midx
629 self.on_pack_finish = on_pack_finish
630 if not max_pack_size:
631 max_pack_size = git_config_get('pack.packSizeLimit',
632 repo_dir=self.repo_dir)
633 if max_pack_size is not None:
634 max_pack_size = parse_num(max_pack_size)
635 if not max_pack_size:
636 # larger packs slow down pruning
637 max_pack_size = 1000 * 1000 * 1000
638 self.max_pack_size = max_pack_size
639 # cache memory usage is about 83 bytes per object
640 self.max_pack_objects = max_pack_objects if max_pack_objects \
641 else max(1, self.max_pack_size // 5000)
649 def __exit__(self, type, value, traceback):
654 objdir = dir = os.path.join(self.repo_dir, 'objects')
655 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
657 self.file = os.fdopen(fd, 'w+b')
662 self.parentfd = os.open(objdir, os.O_RDONLY)
668 assert(name.endswith('.pack'))
669 self.filename = name[:-5]
670 self.file.write('PACK\0\0\0\2\0\0\0\0')
671 self.idx = list(list() for i in xrange(256))
673 def _raw_write(self, datalist, sha):
676 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
677 # the file never has a *partial* blob. So let's make sure it's
678 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
679 # to our hashsplit algorithm.) f.write() does its own buffering,
680 # but that's okay because we'll flush it in _end().
681 oneblob = ''.join(datalist)
685 raise GitError, e, sys.exc_info()[2]
687 crc = zlib.crc32(oneblob) & 0xffffffff
688 self._update_idx(sha, crc, nw)
693 def _update_idx(self, sha, crc, size):
696 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
698 def _write(self, sha, type, content):
702 sha = calc_hash(type, content)
703 size, crc = self._raw_write(_encode_packobj(type, content,
704 self.compression_level),
706 if self.outbytes >= self.max_pack_size \
707 or self.count >= self.max_pack_objects:
711 def breakpoint(self):
712 """Clear byte and object counts and return the last processed id."""
713 id = self._end(self.run_midx)
714 self.outbytes = self.count = 0
717 def _require_objcache(self):
718 if self.objcache is None and self.objcache_maker:
719 self.objcache = self.objcache_maker()
720 if self.objcache is None:
722 "PackWriter not opened or can't check exists w/o objcache")
724 def exists(self, id, want_source=False):
725 """Return non-empty if an object is found in the object cache."""
726 self._require_objcache()
727 return self.objcache.exists(id, want_source=want_source)
729 def just_write(self, sha, type, content):
730 """Write an object to the pack file without checking for duplication."""
731 self._write(sha, type, content)
732 # If nothing else, gc doesn't have/want an objcache
733 if self.objcache is not None:
734 self.objcache.add(sha)
736 def maybe_write(self, type, content):
737 """Write an object to the pack file if not present and return its id."""
738 sha = calc_hash(type, content)
739 if not self.exists(sha):
740 self._require_objcache()
741 self.just_write(sha, type, content)
744 def new_blob(self, blob):
745 """Create a blob object in the pack with the supplied content."""
746 return self.maybe_write('blob', blob)
748 def new_tree(self, shalist):
749 """Create a tree object in the pack."""
750 content = tree_encode(shalist)
751 return self.maybe_write('tree', content)
753 def new_commit(self, tree, parent,
754 author, adate_sec, adate_tz,
755 committer, cdate_sec, cdate_tz,
757 """Create a commit object in the pack. The date_sec values must be
758 epoch-seconds, and if a tz is None, the local timezone is assumed."""
760 adate_str = _git_date_str(adate_sec, adate_tz)
762 adate_str = _local_git_date_str(adate_sec)
764 cdate_str = _git_date_str(cdate_sec, cdate_tz)
766 cdate_str = _local_git_date_str(cdate_sec)
768 if tree: l.append('tree %s' % tree.encode('hex'))
769 if parent: l.append('parent %s' % parent.encode('hex'))
770 if author: l.append('author %s %s' % (author, adate_str))
771 if committer: l.append('committer %s %s' % (committer, cdate_str))
774 return self.maybe_write('commit', '\n'.join(l))
777 """Remove the pack file from disk."""
786 os.unlink(self.filename + '.pack')
793 def _end(self, run_midx=True):
795 if not f: return None
802 # update object count
804 cp = struct.pack('!i', self.count)
808 # calculate the pack sha1sum
811 for b in chunkyreader(f):
813 packbin = sum.digest()
815 fdatasync(f.fileno())
819 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
820 nameprefix = os.path.join(self.repo_dir,
821 'objects/pack/pack-' + obj_list_sha)
822 if os.path.exists(self.filename + '.map'):
823 os.unlink(self.filename + '.map')
824 os.rename(self.filename + '.pack', nameprefix + '.pack')
825 os.rename(self.filename + '.idx', nameprefix + '.idx')
827 os.fsync(self.parentfd)
829 os.close(self.parentfd)
832 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
834 if self.on_pack_finish:
835 self.on_pack_finish(nameprefix)
839 def close(self, run_midx=True):
840 """Close the pack file and move it to its definitive path."""
841 return self._end(run_midx=run_midx)
843 def _write_pack_idx_v2(self, filename, idx, packbin):
846 for entry in section:
847 if entry[2] >= 2**31:
850 # Length: header + fan-out + shas-and-crcs + overflow-offsets
851 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
853 idx_f = open(filename, 'w+b')
855 idx_f.truncate(index_len)
856 fdatasync(idx_f.fileno())
857 idx_map = mmap_readwrite(idx_f, close=False)
859 count = _helpers.write_idx(filename, idx_map, idx, self.count)
860 assert(count == self.count)
867 idx_f = open(filename, 'a+b')
872 b = idx_f.read(8 + 4*256)
875 obj_list_sum = Sha1()
876 for b in chunkyreader(idx_f, 20*self.count):
878 obj_list_sum.update(b)
879 namebase = obj_list_sum.hexdigest()
881 for b in chunkyreader(idx_f):
883 idx_f.write(idx_sum.digest())
884 fdatasync(idx_f.fileno())
890 def _gitenv(repo_dir = None):
894 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
898 def list_refs(patterns=None, repo_dir=None,
899 limit_to_heads=False, limit_to_tags=False):
900 """Yield (refname, hash) tuples for all repository refs unless
901 patterns are specified. In that case, only include tuples for
902 refs matching those patterns (cf. git-show-ref(1)). The limits
903 restrict the result items to refs/heads or refs/tags. If both
904 limits are specified, items from both sources will be included.
907 argv = ['git', 'show-ref']
909 argv.append('--heads')
911 argv.append('--tags')
914 argv.extend(patterns)
915 p = subprocess.Popen(argv,
916 preexec_fn = _gitenv(repo_dir),
917 stdout = subprocess.PIPE)
918 out = p.stdout.read().strip()
919 rv = p.wait() # not fatal
923 for d in out.split('\n'):
924 (sha, name) = d.split(' ', 1)
925 yield (name, sha.decode('hex'))
928 def read_ref(refname, repo_dir = None):
929 """Get the commit id of the most recent commit made on a given ref."""
930 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
931 l = tuple(islice(refs, 2))
939 def rev_list_invocation(ref_or_refs, count=None, format=None):
940 if isinstance(ref_or_refs, compat.str_type):
941 refs = (ref_or_refs,)
944 argv = ['git', 'rev-list']
945 if isinstance(count, Integral):
946 argv.extend(['-n', str(count)])
948 raise ValueError('unexpected count argument %r' % count)
951 argv.append('--pretty=format:' + format)
953 assert not ref.startswith('-')
959 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
960 """Yield information about commits as per "git rev-list". If a format
961 is not provided, yield one hex hash at a time. If a format is
962 provided, pass it to rev-list and call parse(git_stdout) for each
963 commit with the stream positioned just after the rev-list "commit
964 HASH" header line. When a format is provided yield (oidx,
965 parse(git_stdout)) for each commit.
968 assert bool(parse) == bool(format)
969 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
971 preexec_fn = _gitenv(repo_dir),
972 stdout = subprocess.PIPE)
974 for line in p.stdout:
977 line = p.stdout.readline()
980 if not s.startswith('commit '):
981 raise Exception('unexpected line ' + s)
984 yield s, parse(p.stdout)
985 line = p.stdout.readline()
987 rv = p.wait() # not fatal
989 raise GitError, 'git rev-list returned error %d' % rv
992 def get_commit_dates(refs, repo_dir=None):
993 """Get the dates for the specified commit refs. For now, every unique
994 string in refs must resolve to a different commit or this
995 function will fail."""
998 commit = get_commit_items(ref, cp(repo_dir))
999 result.append(commit.author_sec)
1003 def rev_parse(committish, repo_dir=None):
1004 """Resolve the full hash for 'committish', if it exists.
1006 Should be roughly equivalent to 'git rev-parse'.
1008 Returns the hex value of the hash if it is found, None if 'committish' does
1009 not correspond to anything.
1011 head = read_ref(committish, repo_dir=repo_dir)
1013 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1016 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1018 if len(committish) == 40:
1020 hash = committish.decode('hex')
1030 def update_ref(refname, newval, oldval, repo_dir=None):
1031 """Update a repository reference."""
1034 assert(refname.startswith('refs/heads/') \
1035 or refname.startswith('refs/tags/'))
1036 p = subprocess.Popen(['git', 'update-ref', refname,
1037 newval.encode('hex'), oldval.encode('hex')],
1038 preexec_fn = _gitenv(repo_dir))
1039 _git_wait('git update-ref', p)
1042 def delete_ref(refname, oldvalue=None):
1043 """Delete a repository reference (see git update-ref(1))."""
1044 assert(refname.startswith('refs/'))
1045 oldvalue = [] if not oldvalue else [oldvalue]
1046 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1047 preexec_fn = _gitenv())
1048 _git_wait('git update-ref', p)
1051 def guess_repo(path=None):
1052 """Set the path value in the global variable "repodir".
1053 This makes bup look for an existing bup repository, but not fail if a
1054 repository doesn't exist. Usually, if you are interacting with a bup
1055 repository, you would not be calling this function but using
1056 check_repo_or_die().
1062 repodir = os.environ.get('BUP_DIR')
1064 repodir = os.path.expanduser('~/.bup')
1067 def init_repo(path=None):
1068 """Create the Git bare repository for bup in a given path."""
1070 d = repo() # appends a / to the path
1071 parent = os.path.dirname(os.path.dirname(d))
1072 if parent and not os.path.exists(parent):
1073 raise GitError('parent directory "%s" does not exist\n' % parent)
1074 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1075 raise GitError('"%s" exists but is not a directory\n' % d)
1076 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1077 preexec_fn = _gitenv())
1078 _git_wait('git init', p)
1079 # Force the index version configuration in order to ensure bup works
1080 # regardless of the version of the installed Git binary.
1081 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1082 stdout=sys.stderr, preexec_fn = _gitenv())
1083 _git_wait('git config', p)
1085 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1086 stdout=sys.stderr, preexec_fn = _gitenv())
1087 _git_wait('git config', p)
1090 def check_repo_or_die(path=None):
1091 """Check to see if a bup repository probably exists, and abort if not."""
1094 pst = stat_if_exists(top + '/objects/pack')
1095 if pst and stat.S_ISDIR(pst.st_mode):
1098 top_st = stat_if_exists(top)
1100 log('error: repository %r does not exist (see "bup help init")\n'
1103 log('error: %r is not a repository\n' % top)
1109 """Get Git's version and ensure a usable version is installed.
1111 The returned version is formatted as an ordered tuple with each position
1112 representing a digit in the version tag. For example, the following tuple
1113 would represent version 1.6.6.9:
1115 ('1', '6', '6', '9')
1119 p = subprocess.Popen(['git', '--version'],
1120 stdout=subprocess.PIPE)
1121 gvs = p.stdout.read()
1122 _git_wait('git --version', p)
1123 m = re.match(r'git version (\S+.\S+)', gvs)
1125 raise GitError('git --version weird output: %r' % gvs)
1126 _ver = tuple(m.group(1).split('.'))
1127 needed = ('1','5', '3', '1')
1129 raise GitError('git version %s or higher is required; you have %s'
1130 % ('.'.join(needed), '.'.join(_ver)))
1134 class _AbortableIter:
1135 def __init__(self, it, onabort = None):
1137 self.onabort = onabort
1145 return next(self.it)
1146 except StopIteration as e:
1154 """Abort iteration and call the abortion callback, if needed."""
1166 """Link to 'git cat-file' that is used to retrieve blob data."""
1167 def __init__(self, repo_dir = None):
1169 self.repo_dir = repo_dir
1170 wanted = ('1','5','6')
1172 log('error: git version must be at least 1.5.6\n')
1174 self.p = self.inprogress = None
1178 self.p.stdout.close()
1179 self.p.stdin.close()
1181 self.inprogress = None
1185 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1186 stdin=subprocess.PIPE,
1187 stdout=subprocess.PIPE,
1190 preexec_fn = _gitenv(self.repo_dir))
1193 """Yield (oidx, type, size), followed by the data referred to by ref.
1194 If ref does not exist, only yield (None, None, None).
1197 if not self.p or self.p.poll() != None:
1200 poll_result = self.p.poll()
1201 assert(poll_result == None)
1203 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1204 assert(not self.inprogress)
1205 assert(ref.find('\n') < 0)
1206 assert(ref.find('\r') < 0)
1207 assert(not ref.startswith('-'))
1208 self.inprogress = ref
1209 self.p.stdin.write('%s\n' % ref)
1210 self.p.stdin.flush()
1211 hdr = self.p.stdout.readline()
1212 if hdr.endswith(' missing\n'):
1213 self.inprogress = None
1214 yield None, None, None
1216 info = hdr.split(' ')
1217 if len(info) != 3 or len(info[0]) != 40:
1218 raise GitError('expected object (id, type, size), got %r' % info)
1219 oidx, typ, size = info
1221 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1222 onabort=self._abort)
1224 yield oidx, typ, size
1227 readline_result = self.p.stdout.readline()
1228 assert(readline_result == '\n')
1229 self.inprogress = None
1230 except Exception as e:
1234 def _join(self, it):
1235 _, typ, _ = next(it)
1240 treefile = ''.join(it)
1241 for (mode, name, sha) in tree_decode(treefile):
1242 for blob in self.join(sha.encode('hex')):
1244 elif typ == 'commit':
1245 treeline = ''.join(it).split('\n')[0]
1246 assert(treeline.startswith('tree '))
1247 for blob in self.join(treeline[5:]):
1250 raise GitError('invalid object type %r: expected blob/tree/commit'
1254 """Generate a list of the content of all blobs that can be reached
1255 from an object. The hash given in 'id' must point to a blob, a tree
1256 or a commit. The content of all blobs that can be seen from trees or
1257 commits will be added to the list.
1260 for d in self._join(self.get(id)):
1262 except StopIteration:
1268 def cp(repo_dir=None):
1269 """Create a CatPipe object or reuse the already existing one."""
1272 repo_dir = repodir or repo()
1273 repo_dir = os.path.abspath(repo_dir)
1274 cp = _cp.get(repo_dir)
1276 cp = CatPipe(repo_dir)
1281 def tags(repo_dir = None):
1282 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1284 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1285 assert(n.startswith('refs/tags/'))
1289 tags[c].append(name) # more than one tag can point at 'c'
1293 class MissingObject(KeyError):
1294 def __init__(self, oid):
1296 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1299 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1300 'path', 'chunk_path', 'data'])
1301 # The path is the mangled path, and if an item represents a fragment
1302 # of a chunked file, the chunk_path will be the chunked subtree path
1303 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1304 # chunked file will have a chunk_path of ['']. So some chunk subtree
1305 # of the file '/foo/bar/baz' might look like this:
1307 # item.path = ['foo', 'bar', 'baz.bup']
1308 # item.chunk_path = ['', '2d3115e', '016b097']
1309 # item.type = 'tree'
1313 def walk_object(get_ref, oidx, stop_at=None, include_data=None):
1314 """Yield everything reachable from oidx via get_ref (which must behave
1315 like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx)
1316 returns true. Throw MissingObject if a hash encountered is
1317 missing from the repository, and don't read or return blob content
1318 in the data field unless include_data is set.
1321 # Maintain the pending stack on the heap to avoid stack overflow
1322 pending = [(oidx, [], [], None)]
1324 oidx, parent_path, chunk_path, mode = pending.pop()
1325 oid = oidx.decode('hex')
1326 if stop_at and stop_at(oidx):
1329 if (not include_data) and mode and stat.S_ISREG(mode):
1330 # If the object is a "regular file", then it's a leaf in
1331 # the graph, so we can skip reading the data if the caller
1332 # hasn't requested it.
1333 yield WalkItem(oid=oid, type='blob',
1334 chunk_path=chunk_path, path=parent_path,
1339 item_it = get_ref(oidx)
1340 get_oidx, typ, _ = next(item_it)
1342 raise MissingObject(oidx.decode('hex'))
1343 if typ not in ('blob', 'commit', 'tree'):
1344 raise Exception('unexpected repository object type %r' % typ)
1346 # FIXME: set the mode based on the type when the mode is None
1347 if typ == 'blob' and not include_data:
1348 # Dump data until we can ask cat_pipe not to fetch it
1349 for ignored in item_it:
1353 data = ''.join(item_it)
1355 yield WalkItem(oid=oid, type=typ,
1356 chunk_path=chunk_path, path=parent_path,
1358 data=(data if include_data else None))
1361 commit_items = parse_commit(data)
1362 for pid in commit_items.parents:
1363 pending.append((pid, parent_path, chunk_path, mode))
1364 pending.append((commit_items.tree, parent_path, chunk_path,
1365 hashsplit.GIT_MODE_TREE))
1367 for mode, name, ent_id in tree_decode(data):
1368 demangled, bup_type = demangle_name(name, mode)
1370 sub_path = parent_path
1371 sub_chunk_path = chunk_path + [name]
1373 sub_path = parent_path + [name]
1374 if bup_type == BUP_CHUNKED:
1375 sub_chunk_path = ['']
1377 sub_chunk_path = chunk_path
1378 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,