1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.compat import range
14 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
16 hostname, localtime, log,
19 mmap_read, mmap_readwrite,
21 progress, qprogress, shstr, stat_if_exists,
22 unlink, username, userfullname,
27 repodir = None # The default repository, once initialized
29 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
30 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
36 class GitError(Exception):
40 def _gitenv(repo_dir=None):
43 return merge_dict(os.environ, {'GIT_DIR': os.path.abspath(repo_dir)})
45 def _git_wait(cmd, p):
48 raise GitError('%s returned %d' % (shstr(cmd), rv))
50 def _git_capture(argv):
51 p = subprocess.Popen(argv, stdout=subprocess.PIPE, env=_gitenv())
53 _git_wait(repr(argv), p)
56 def git_config_get(option, repo_dir=None):
57 cmd = ('git', 'config', '--get', option)
58 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
59 env=_gitenv(repo_dir=repo_dir))
65 raise GitError('%s returned %d' % (cmd, rc))
69 def parse_tz_offset(s):
70 """UTC offset in seconds."""
71 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
77 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
78 # Make sure that's authoritative.
79 _start_end_char = r'[^ .,:;<>"\'\0\n]'
80 _content_char = r'[^\0\n<>]'
81 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
83 _start_end_char, _content_char, _start_end_char)
84 _tz_rx = r'[-+]\d\d[0-5]\d'
85 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
86 # Assumes every following line starting with a space is part of the
87 # mergetag. Is there a formal commit blob spec?
88 _mergetag_rx = r'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]*)*)'
89 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
90 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
91 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
93 (?P<message>(?:.|\n)*)''' % (_parent_rx,
94 _safe_str_rx, _safe_str_rx, _tz_rx,
95 _safe_str_rx, _safe_str_rx, _tz_rx,
97 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
99 # Note that the author_sec and committer_sec values are (UTC) epoch
100 # seconds, and for now the mergetag is not included.
101 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
102 'author_name', 'author_mail',
103 'author_sec', 'author_offset',
104 'committer_name', 'committer_mail',
105 'committer_sec', 'committer_offset',
108 def parse_commit(content):
109 commit_match = re.match(_commit_rx, content)
111 raise Exception('cannot parse commit %r' % content)
112 matches = commit_match.groupdict()
113 return CommitInfo(tree=matches['tree'],
114 parents=re.findall(_parent_hash_rx, matches['parents']),
115 author_name=matches['author_name'],
116 author_mail=matches['author_mail'],
117 author_sec=int(matches['asec']),
118 author_offset=parse_tz_offset(matches['atz']),
119 committer_name=matches['committer_name'],
120 committer_mail=matches['committer_mail'],
121 committer_sec=int(matches['csec']),
122 committer_offset=parse_tz_offset(matches['ctz']),
123 message=matches['message'])
126 def get_cat_data(cat_iterator, expected_type):
127 _, kind, _ = next(cat_iterator)
128 if kind != expected_type:
129 raise Exception('expected %r, saw %r' % (expected_type, kind))
130 return ''.join(cat_iterator)
132 def get_commit_items(id, cp):
133 return parse_commit(get_cat_data(cp.get(id), 'commit'))
135 def _local_git_date_str(epoch_sec):
136 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
139 def _git_date_str(epoch_sec, tz_offset_sec):
140 offs = tz_offset_sec // 60
141 return '%d %s%02d%02d' \
143 '+' if offs >= 0 else '-',
148 def repo(sub = '', repo_dir=None):
149 """Get the path to the git repository or one of its subdirectories."""
150 repo_dir = repo_dir or repodir
152 raise GitError('You should call check_repo_or_die()')
154 # If there's a .git subdirectory, then the actual repo is in there.
155 gd = os.path.join(repo_dir, '.git')
156 if os.path.exists(gd):
159 return os.path.join(repo_dir, sub)
163 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
168 full = os.path.abspath(path)
169 fullrepo = os.path.abspath(repo(''))
170 if not fullrepo.endswith('/'):
172 if full.startswith(fullrepo):
173 path = full[len(fullrepo):]
174 if path.startswith('index-cache/'):
175 path = path[len('index-cache/'):]
176 return shorten_hash(path)
180 paths = [repo('objects/pack')]
181 paths += glob.glob(repo('index-cache/*/.'))
185 def auto_midx(objdir):
186 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
188 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
190 # make sure 'args' gets printed to help with debugging
191 add_error('%r: exception: %s' % (args, e))
194 add_error('%r: returned %d' % (args, rv))
196 args = [path.exe(), 'bloom', '--dir', objdir]
198 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
200 # make sure 'args' gets printed to help with debugging
201 add_error('%r: exception: %s' % (args, e))
204 add_error('%r: returned %d' % (args, rv))
207 def mangle_name(name, mode, gitmode):
208 """Mangle a file name to present an abstract name for segmented files.
209 Mangled file names will have the ".bup" extension added to them. If a
210 file's name already ends with ".bup", a ".bupl" extension is added to
211 disambiguate normal files from segmented ones.
213 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
214 assert(stat.S_ISDIR(gitmode))
216 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
217 return name + '.bupl'
222 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
223 def demangle_name(name, mode):
224 """Remove name mangling from a file name, if necessary.
226 The return value is a tuple (demangled_filename,mode), where mode is one of
229 * BUP_NORMAL : files that should be read as-is from the repository
230 * BUP_CHUNKED : files that were chunked and need to be reassembled
232 For more information on the name mangling algorithm, see mangle_name()
234 if name.endswith('.bupl'):
235 return (name[:-5], BUP_NORMAL)
236 elif name.endswith('.bup'):
237 return (name[:-4], BUP_CHUNKED)
238 elif name.endswith('.bupm'):
240 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
242 return (name, BUP_NORMAL)
245 def calc_hash(type, content):
246 """Calculate some content's hash in the Git fashion."""
247 header = '%s %d\0' % (type, len(content))
253 def shalist_item_sort_key(ent):
254 (mode, name, id) = ent
255 assert(mode+0 == mode)
256 if stat.S_ISDIR(mode):
262 def tree_encode(shalist):
263 """Generate a git tree object from (mode,name,hash) tuples."""
264 shalist = sorted(shalist, key = shalist_item_sort_key)
266 for (mode,name,bin) in shalist:
268 assert(mode+0 == mode)
270 assert(len(bin) == 20)
271 s = '%o %s\0%s' % (mode,name,bin)
272 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
277 def tree_decode(buf):
278 """Generate a list of (mode,name,hash) from the git tree object in buf."""
280 while ofs < len(buf):
281 z = buf.find('\0', ofs)
283 spl = buf[ofs:z].split(' ', 1)
284 assert(len(spl) == 2)
286 sha = buf[z+1:z+1+20]
288 yield (int(mode, 8), name, sha)
291 def _encode_packobj(type, content, compression_level=1):
292 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
293 raise ValueError('invalid compression level %s' % compression_level)
296 szbits = (sz & 0x0f) | (_typemap[type]<<4)
299 if sz: szbits |= 0x80
305 z = zlib.compressobj(compression_level)
307 yield z.compress(content)
311 def _encode_looseobj(type, content, compression_level=1):
312 z = zlib.compressobj(compression_level)
313 yield z.compress('%s %d\0' % (type, len(content)))
314 yield z.compress(content)
318 def _decode_looseobj(buf):
320 s = zlib.decompress(buf)
327 assert(type in _typemap)
328 assert(sz == len(content))
329 return (type, content)
332 def _decode_packobj(buf):
335 type = _typermap[(c & 0x70) >> 4]
342 sz |= (c & 0x7f) << shift
346 return (type, zlib.decompress(buf[i+1:]))
353 def find_offset(self, hash):
354 """Get the offset of an object inside the index file."""
355 idx = self._idx_from_hash(hash)
357 return self._ofs_from_idx(idx)
360 def exists(self, hash, want_source=False):
361 """Return nonempty if the object exists in this index."""
362 if hash and (self._idx_from_hash(hash) != None):
363 return want_source and os.path.basename(self.name) or True
367 return int(self.fanout[255])
369 def _idx_from_hash(self, hash):
370 global _total_searches, _total_steps
372 assert(len(hash) == 20)
374 start = self.fanout[b1-1] # range -1..254
375 end = self.fanout[b1] # range 0..255
377 _total_steps += 1 # lookup table is a step
380 mid = start + (end-start)/2
381 v = self._idx_to_hash(mid)
391 class PackIdxV1(PackIdx):
392 """Object representation of a Git pack index (version 1) file."""
393 def __init__(self, filename, f):
395 self.idxnames = [self.name]
396 self.map = mmap_read(f)
397 self.fanout = list(struct.unpack('!256I', buffer(self.map, 0, 256 * 4)))
398 self.fanout.append(0) # entry "-1"
399 nsha = self.fanout[255]
401 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
403 def _ofs_from_idx(self, idx):
405 return struct.unpack('!I', self.shatable[ofs : ofs + 4])[0]
407 def _idx_to_hash(self, idx):
409 return self.shatable[ofs : ofs + 20]
412 count = self.fanout[255]
414 for ofs in range(start, start + (24 * count), 24):
415 yield self.map[ofs : ofs + 20]
418 class PackIdxV2(PackIdx):
419 """Object representation of a Git pack index (version 2) file."""
420 def __init__(self, filename, f):
422 self.idxnames = [self.name]
423 self.map = mmap_read(f)
424 assert self.map[0:8] == b'\377tOc\0\0\0\2'
425 self.fanout = list(struct.unpack('!256I',
426 buffer(self.map[8 : 8 + 256 * 4])))
427 self.fanout.append(0) # entry "-1"
428 nsha = self.fanout[255]
429 self.sha_ofs = 8 + 256*4
430 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
431 self.ofstable = buffer(self.map,
432 self.sha_ofs + nsha*20 + nsha*4,
434 self.ofs64table = buffer(self.map,
435 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
437 def _ofs_from_idx(self, idx):
439 ofs = struct.unpack('!I', self.ofstable[i : i + 4])[0]
441 idx64 = ofs & 0x7fffffff
443 ofs = struct.unpack('!Q', self.ofs64table[idx64_i : idx64_i + 8])[0]
446 def _idx_to_hash(self, idx):
447 return self.shatable[idx * 20 : (idx + 1) * 20]
450 count = self.fanout[255]
452 for ofs in range(start, start + (20 * count), 20):
453 yield self.map[ofs : ofs + 20]
458 def __init__(self, dir):
460 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
465 self.do_bloom = False
472 assert(_mpi_count == 0)
475 return iter(idxmerge(self.packs))
478 return sum(len(pack) for pack in self.packs)
480 def exists(self, hash, want_source=False):
481 """Return nonempty if the object exists in the index files."""
482 global _total_searches
484 if hash in self.also:
486 if self.do_bloom and self.bloom:
487 if self.bloom.exists(hash):
488 self.do_bloom = False
490 _total_searches -= 1 # was counted by bloom
492 for i in xrange(len(self.packs)):
494 _total_searches -= 1 # will be incremented by sub-pack
495 ix = p.exists(hash, want_source=want_source)
497 # reorder so most recently used packs are searched first
498 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
503 def refresh(self, skip_midx = False):
504 """Refresh the index list.
505 This method verifies if .midx files were superseded (e.g. all of its
506 contents are in another, bigger .midx file) and removes the superseded
509 If skip_midx is True, all work on .midx files will be skipped and .midx
510 files will be removed from the list.
512 The module-global variable 'ignore_midx' can force this function to
513 always act as if skip_midx was True.
515 self.bloom = None # Always reopen the bloom as it may have been relaced
516 self.do_bloom = False
517 skip_midx = skip_midx or ignore_midx
518 d = dict((p.name, p) for p in self.packs
519 if not skip_midx or not isinstance(p, midx.PackMidx))
520 if os.path.exists(self.dir):
523 for ix in self.packs:
524 if isinstance(ix, midx.PackMidx):
525 for name in ix.idxnames:
526 d[os.path.join(self.dir, name)] = ix
527 for full in glob.glob(os.path.join(self.dir,'*.midx')):
529 mx = midx.PackMidx(full)
530 (mxd, mxf) = os.path.split(mx.name)
532 for n in mx.idxnames:
533 if not os.path.exists(os.path.join(mxd, n)):
534 log(('warning: index %s missing\n' +
535 ' used by %s\n') % (n, mxf))
543 midxl.sort(key=lambda ix:
544 (-len(ix), -xstat.stat(ix.name).st_mtime))
547 for sub in ix.idxnames:
548 found = d.get(os.path.join(self.dir, sub))
549 if not found or isinstance(found, PackIdx):
550 # doesn't exist, or exists but not in a midx
555 for name in ix.idxnames:
556 d[os.path.join(self.dir, name)] = ix
557 elif not ix.force_keep:
558 debug1('midx: removing redundant: %s\n'
559 % os.path.basename(ix.name))
562 for full in glob.glob(os.path.join(self.dir,'*.idx')):
566 except GitError as e:
570 bfull = os.path.join(self.dir, 'bup.bloom')
571 if self.bloom is None and os.path.exists(bfull):
572 self.bloom = bloom.ShaBloom(bfull)
573 self.packs = list(set(d.values()))
574 self.packs.sort(reverse=True, key=lambda x: len(x))
575 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
579 debug1('PackIdxList: using %d index%s.\n'
580 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
583 """Insert an additional object in the list."""
587 def open_idx(filename):
588 if filename.endswith('.idx'):
589 f = open(filename, 'rb')
591 if header[0:4] == '\377tOc':
592 version = struct.unpack('!I', header[4:8])[0]
594 return PackIdxV2(filename, f)
596 raise GitError('%s: expected idx file version 2, got %d'
597 % (filename, version))
598 elif len(header) == 8 and header[0:4] < '\377tOc':
599 return PackIdxV1(filename, f)
601 raise GitError('%s: unrecognized idx file header' % filename)
602 elif filename.endswith('.midx'):
603 return midx.PackMidx(filename)
605 raise GitError('idx filenames must end with .idx or .midx')
608 def idxmerge(idxlist, final_progress=True):
609 """Generate a list of all the objects reachable in a PackIdxList."""
610 def pfunc(count, total):
611 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
612 % (count*100.0/total, count, total))
613 def pfinal(count, total):
615 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
616 % (100, total, total))
617 return merge_iter(idxlist, 10024, pfunc, pfinal)
620 def _make_objcache():
621 return PackIdxList(repo('objects/pack'))
623 # bup-gc assumes that it can disable all PackWriter activities
624 # (bloom/midx/cache) via the constructor and close() arguments.
627 """Writes Git objects inside a pack file."""
628 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
629 run_midx=True, on_pack_finish=None,
630 max_pack_size=None, max_pack_objects=None, repo_dir=None):
631 self.repo_dir = repo_dir or repo()
638 self.objcache_maker = objcache_maker
640 self.compression_level = compression_level
641 self.run_midx=run_midx
642 self.on_pack_finish = on_pack_finish
643 if not max_pack_size:
644 max_pack_size = git_config_get('pack.packSizeLimit',
645 repo_dir=self.repo_dir)
646 if max_pack_size is not None:
647 max_pack_size = parse_num(max_pack_size)
648 if not max_pack_size:
649 # larger packs slow down pruning
650 max_pack_size = 1000 * 1000 * 1000
651 self.max_pack_size = max_pack_size
652 # cache memory usage is about 83 bytes per object
653 self.max_pack_objects = max_pack_objects if max_pack_objects \
654 else max(1, self.max_pack_size // 5000)
662 def __exit__(self, type, value, traceback):
667 objdir = dir = os.path.join(self.repo_dir, 'objects')
668 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
670 self.file = os.fdopen(fd, 'w+b')
675 self.parentfd = os.open(objdir, os.O_RDONLY)
681 assert(name.endswith('.pack'))
682 self.filename = name[:-5]
683 self.file.write('PACK\0\0\0\2\0\0\0\0')
684 self.idx = list(list() for i in xrange(256))
686 def _raw_write(self, datalist, sha):
689 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
690 # the file never has a *partial* blob. So let's make sure it's
691 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
692 # to our hashsplit algorithm.) f.write() does its own buffering,
693 # but that's okay because we'll flush it in _end().
694 oneblob = ''.join(datalist)
698 raise GitError, e, sys.exc_info()[2]
700 crc = zlib.crc32(oneblob) & 0xffffffff
701 self._update_idx(sha, crc, nw)
706 def _update_idx(self, sha, crc, size):
709 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
711 def _write(self, sha, type, content):
715 sha = calc_hash(type, content)
716 size, crc = self._raw_write(_encode_packobj(type, content,
717 self.compression_level),
719 if self.outbytes >= self.max_pack_size \
720 or self.count >= self.max_pack_objects:
724 def breakpoint(self):
725 """Clear byte and object counts and return the last processed id."""
726 id = self._end(self.run_midx)
727 self.outbytes = self.count = 0
730 def _require_objcache(self):
731 if self.objcache is None and self.objcache_maker:
732 self.objcache = self.objcache_maker()
733 if self.objcache is None:
735 "PackWriter not opened or can't check exists w/o objcache")
737 def exists(self, id, want_source=False):
738 """Return non-empty if an object is found in the object cache."""
739 self._require_objcache()
740 return self.objcache.exists(id, want_source=want_source)
742 def just_write(self, sha, type, content):
743 """Write an object to the pack file without checking for duplication."""
744 self._write(sha, type, content)
745 # If nothing else, gc doesn't have/want an objcache
746 if self.objcache is not None:
747 self.objcache.add(sha)
749 def maybe_write(self, type, content):
750 """Write an object to the pack file if not present and return its id."""
751 sha = calc_hash(type, content)
752 if not self.exists(sha):
753 self._require_objcache()
754 self.just_write(sha, type, content)
757 def new_blob(self, blob):
758 """Create a blob object in the pack with the supplied content."""
759 return self.maybe_write('blob', blob)
761 def new_tree(self, shalist):
762 """Create a tree object in the pack."""
763 content = tree_encode(shalist)
764 return self.maybe_write('tree', content)
766 def new_commit(self, tree, parent,
767 author, adate_sec, adate_tz,
768 committer, cdate_sec, cdate_tz,
770 """Create a commit object in the pack. The date_sec values must be
771 epoch-seconds, and if a tz is None, the local timezone is assumed."""
773 adate_str = _git_date_str(adate_sec, adate_tz)
775 adate_str = _local_git_date_str(adate_sec)
777 cdate_str = _git_date_str(cdate_sec, cdate_tz)
779 cdate_str = _local_git_date_str(cdate_sec)
781 if tree: l.append('tree %s' % tree.encode('hex'))
782 if parent: l.append('parent %s' % parent.encode('hex'))
783 if author: l.append('author %s %s' % (author, adate_str))
784 if committer: l.append('committer %s %s' % (committer, cdate_str))
787 return self.maybe_write('commit', '\n'.join(l))
790 """Remove the pack file from disk."""
799 os.unlink(self.filename + '.pack')
806 def _end(self, run_midx=True):
808 if not f: return None
815 # update object count
817 cp = struct.pack('!i', self.count)
821 # calculate the pack sha1sum
824 for b in chunkyreader(f):
826 packbin = sum.digest()
828 fdatasync(f.fileno())
832 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
833 nameprefix = os.path.join(self.repo_dir,
834 'objects/pack/pack-' + obj_list_sha)
835 if os.path.exists(self.filename + '.map'):
836 os.unlink(self.filename + '.map')
837 os.rename(self.filename + '.pack', nameprefix + '.pack')
838 os.rename(self.filename + '.idx', nameprefix + '.idx')
840 os.fsync(self.parentfd)
842 os.close(self.parentfd)
845 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
847 if self.on_pack_finish:
848 self.on_pack_finish(nameprefix)
852 def close(self, run_midx=True):
853 """Close the pack file and move it to its definitive path."""
854 return self._end(run_midx=run_midx)
856 def _write_pack_idx_v2(self, filename, idx, packbin):
859 for entry in section:
860 if entry[2] >= 2**31:
863 # Length: header + fan-out + shas-and-crcs + overflow-offsets
864 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
866 idx_f = open(filename, 'w+b')
868 idx_f.truncate(index_len)
869 fdatasync(idx_f.fileno())
870 idx_map = mmap_readwrite(idx_f, close=False)
872 count = _helpers.write_idx(filename, idx_map, idx, self.count)
873 assert(count == self.count)
880 idx_f = open(filename, 'a+b')
885 b = idx_f.read(8 + 4*256)
888 obj_list_sum = Sha1()
889 for b in chunkyreader(idx_f, 20*self.count):
891 obj_list_sum.update(b)
892 namebase = obj_list_sum.hexdigest()
894 for b in chunkyreader(idx_f):
896 idx_f.write(idx_sum.digest())
897 fdatasync(idx_f.fileno())
903 def list_refs(patterns=None, repo_dir=None,
904 limit_to_heads=False, limit_to_tags=False):
905 """Yield (refname, hash) tuples for all repository refs unless
906 patterns are specified. In that case, only include tuples for
907 refs matching those patterns (cf. git-show-ref(1)). The limits
908 restrict the result items to refs/heads or refs/tags. If both
909 limits are specified, items from both sources will be included.
912 argv = ['git', 'show-ref']
914 argv.append('--heads')
916 argv.append('--tags')
919 argv.extend(patterns)
920 p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
921 out = p.stdout.read().strip()
922 rv = p.wait() # not fatal
926 for d in out.split('\n'):
927 (sha, name) = d.split(' ', 1)
928 yield (name, sha.decode('hex'))
931 def read_ref(refname, repo_dir = None):
932 """Get the commit id of the most recent commit made on a given ref."""
933 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
934 l = tuple(islice(refs, 2))
942 def rev_list_invocation(ref_or_refs, count=None, format=None):
943 if isinstance(ref_or_refs, compat.str_type):
944 refs = (ref_or_refs,)
947 argv = ['git', 'rev-list']
948 if isinstance(count, Integral):
949 argv.extend(['-n', str(count)])
951 raise ValueError('unexpected count argument %r' % count)
954 argv.append('--pretty=format:' + format)
956 assert not ref.startswith('-')
962 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
963 """Yield information about commits as per "git rev-list". If a format
964 is not provided, yield one hex hash at a time. If a format is
965 provided, pass it to rev-list and call parse(git_stdout) for each
966 commit with the stream positioned just after the rev-list "commit
967 HASH" header line. When a format is provided yield (oidx,
968 parse(git_stdout)) for each commit.
971 assert bool(parse) == bool(format)
972 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
974 env=_gitenv(repo_dir),
975 stdout = subprocess.PIPE)
977 for line in p.stdout:
980 line = p.stdout.readline()
983 if not s.startswith('commit '):
984 raise Exception('unexpected line ' + s)
987 yield s, parse(p.stdout)
988 line = p.stdout.readline()
990 rv = p.wait() # not fatal
992 raise GitError, 'git rev-list returned error %d' % rv
995 def get_commit_dates(refs, repo_dir=None):
996 """Get the dates for the specified commit refs. For now, every unique
997 string in refs must resolve to a different commit or this
998 function will fail."""
1001 commit = get_commit_items(ref, cp(repo_dir))
1002 result.append(commit.author_sec)
1006 def rev_parse(committish, repo_dir=None):
1007 """Resolve the full hash for 'committish', if it exists.
1009 Should be roughly equivalent to 'git rev-parse'.
1011 Returns the hex value of the hash if it is found, None if 'committish' does
1012 not correspond to anything.
1014 head = read_ref(committish, repo_dir=repo_dir)
1016 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1019 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1021 if len(committish) == 40:
1023 hash = committish.decode('hex')
1033 def update_ref(refname, newval, oldval, repo_dir=None):
1034 """Update a repository reference."""
1037 assert(refname.startswith('refs/heads/') \
1038 or refname.startswith('refs/tags/'))
1039 p = subprocess.Popen(['git', 'update-ref', refname,
1040 newval.encode('hex'), oldval.encode('hex')],
1041 env=_gitenv(repo_dir))
1042 _git_wait('git update-ref', p)
1045 def delete_ref(refname, oldvalue=None):
1046 """Delete a repository reference (see git update-ref(1))."""
1047 assert(refname.startswith('refs/'))
1048 oldvalue = [] if not oldvalue else [oldvalue]
1049 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1051 _git_wait('git update-ref', p)
1054 def guess_repo(path=None):
1055 """Set the path value in the global variable "repodir".
1056 This makes bup look for an existing bup repository, but not fail if a
1057 repository doesn't exist. Usually, if you are interacting with a bup
1058 repository, you would not be calling this function but using
1059 check_repo_or_die().
1065 repodir = os.environ.get('BUP_DIR')
1067 repodir = os.path.expanduser('~/.bup')
1070 def init_repo(path=None):
1071 """Create the Git bare repository for bup in a given path."""
1073 d = repo() # appends a / to the path
1074 parent = os.path.dirname(os.path.dirname(d))
1075 if parent and not os.path.exists(parent):
1076 raise GitError('parent directory "%s" does not exist\n' % parent)
1077 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1078 raise GitError('"%s" exists but is not a directory\n' % d)
1079 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1081 _git_wait('git init', p)
1082 # Force the index version configuration in order to ensure bup works
1083 # regardless of the version of the installed Git binary.
1084 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1085 stdout=sys.stderr, env=_gitenv())
1086 _git_wait('git config', p)
1088 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1089 stdout=sys.stderr, env=_gitenv())
1090 _git_wait('git config', p)
1093 def check_repo_or_die(path=None):
1094 """Check to see if a bup repository probably exists, and abort if not."""
1097 pst = stat_if_exists(top + '/objects/pack')
1098 if pst and stat.S_ISDIR(pst.st_mode):
1101 top_st = stat_if_exists(top)
1103 log('error: repository %r does not exist (see "bup help init")\n'
1106 log('error: %r is not a repository\n' % top)
1112 """Get Git's version and ensure a usable version is installed.
1114 The returned version is formatted as an ordered tuple with each position
1115 representing a digit in the version tag. For example, the following tuple
1116 would represent version 1.6.6.9:
1118 ('1', '6', '6', '9')
1122 p = subprocess.Popen(['git', '--version'],
1123 stdout=subprocess.PIPE)
1124 gvs = p.stdout.read()
1125 _git_wait('git --version', p)
1126 m = re.match(r'git version (\S+.\S+)', gvs)
1128 raise GitError('git --version weird output: %r' % gvs)
1129 _ver = tuple(m.group(1).split('.'))
1130 needed = ('1','5', '3', '1')
1132 raise GitError('git version %s or higher is required; you have %s'
1133 % ('.'.join(needed), '.'.join(_ver)))
1137 class _AbortableIter:
1138 def __init__(self, it, onabort = None):
1140 self.onabort = onabort
1148 return next(self.it)
1149 except StopIteration as e:
1157 """Abort iteration and call the abortion callback, if needed."""
1169 """Link to 'git cat-file' that is used to retrieve blob data."""
1170 def __init__(self, repo_dir = None):
1172 self.repo_dir = repo_dir
1173 wanted = ('1','5','6')
1175 log('error: git version must be at least 1.5.6\n')
1177 self.p = self.inprogress = None
1181 self.p.stdout.close()
1182 self.p.stdin.close()
1184 self.inprogress = None
1188 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1189 stdin=subprocess.PIPE,
1190 stdout=subprocess.PIPE,
1193 env=_gitenv(self.repo_dir))
1196 """Yield (oidx, type, size), followed by the data referred to by ref.
1197 If ref does not exist, only yield (None, None, None).
1200 if not self.p or self.p.poll() != None:
1203 poll_result = self.p.poll()
1204 assert(poll_result == None)
1206 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1207 assert(not self.inprogress)
1208 assert(ref.find('\n') < 0)
1209 assert(ref.find('\r') < 0)
1210 assert(not ref.startswith('-'))
1211 self.inprogress = ref
1212 self.p.stdin.write('%s\n' % ref)
1213 self.p.stdin.flush()
1214 hdr = self.p.stdout.readline()
1215 if hdr.endswith(' missing\n'):
1216 self.inprogress = None
1217 yield None, None, None
1219 info = hdr.split(' ')
1220 if len(info) != 3 or len(info[0]) != 40:
1221 raise GitError('expected object (id, type, size), got %r' % info)
1222 oidx, typ, size = info
1224 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1225 onabort=self._abort)
1227 yield oidx, typ, size
1230 readline_result = self.p.stdout.readline()
1231 assert(readline_result == '\n')
1232 self.inprogress = None
1233 except Exception as e:
1237 def _join(self, it):
1238 _, typ, _ = next(it)
1243 treefile = ''.join(it)
1244 for (mode, name, sha) in tree_decode(treefile):
1245 for blob in self.join(sha.encode('hex')):
1247 elif typ == 'commit':
1248 treeline = ''.join(it).split('\n')[0]
1249 assert(treeline.startswith('tree '))
1250 for blob in self.join(treeline[5:]):
1253 raise GitError('invalid object type %r: expected blob/tree/commit'
1257 """Generate a list of the content of all blobs that can be reached
1258 from an object. The hash given in 'id' must point to a blob, a tree
1259 or a commit. The content of all blobs that can be seen from trees or
1260 commits will be added to the list.
1263 for d in self._join(self.get(id)):
1265 except StopIteration:
1271 def cp(repo_dir=None):
1272 """Create a CatPipe object or reuse the already existing one."""
1275 repo_dir = repodir or repo()
1276 repo_dir = os.path.abspath(repo_dir)
1277 cp = _cp.get(repo_dir)
1279 cp = CatPipe(repo_dir)
1284 def tags(repo_dir = None):
1285 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1287 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1288 assert(n.startswith('refs/tags/'))
1292 tags[c].append(name) # more than one tag can point at 'c'
1296 class MissingObject(KeyError):
1297 def __init__(self, oid):
1299 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1302 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1303 'path', 'chunk_path', 'data'])
1304 # The path is the mangled path, and if an item represents a fragment
1305 # of a chunked file, the chunk_path will be the chunked subtree path
1306 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1307 # chunked file will have a chunk_path of ['']. So some chunk subtree
1308 # of the file '/foo/bar/baz' might look like this:
1310 # item.path = ['foo', 'bar', 'baz.bup']
1311 # item.chunk_path = ['', '2d3115e', '016b097']
1312 # item.type = 'tree'
1316 def walk_object(get_ref, oidx, stop_at=None, include_data=None):
1317 """Yield everything reachable from oidx via get_ref (which must behave
1318 like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx)
1319 returns true. Throw MissingObject if a hash encountered is
1320 missing from the repository, and don't read or return blob content
1321 in the data field unless include_data is set.
1324 # Maintain the pending stack on the heap to avoid stack overflow
1325 pending = [(oidx, [], [], None)]
1327 oidx, parent_path, chunk_path, mode = pending.pop()
1328 oid = oidx.decode('hex')
1329 if stop_at and stop_at(oidx):
1332 if (not include_data) and mode and stat.S_ISREG(mode):
1333 # If the object is a "regular file", then it's a leaf in
1334 # the graph, so we can skip reading the data if the caller
1335 # hasn't requested it.
1336 yield WalkItem(oid=oid, type='blob',
1337 chunk_path=chunk_path, path=parent_path,
1342 item_it = get_ref(oidx)
1343 get_oidx, typ, _ = next(item_it)
1345 raise MissingObject(oidx.decode('hex'))
1346 if typ not in ('blob', 'commit', 'tree'):
1347 raise Exception('unexpected repository object type %r' % typ)
1349 # FIXME: set the mode based on the type when the mode is None
1350 if typ == 'blob' and not include_data:
1351 # Dump data until we can ask cat_pipe not to fetch it
1352 for ignored in item_it:
1356 data = ''.join(item_it)
1358 yield WalkItem(oid=oid, type=typ,
1359 chunk_path=chunk_path, path=parent_path,
1361 data=(data if include_data else None))
1364 commit_items = parse_commit(data)
1365 for pid in commit_items.parents:
1366 pending.append((pid, parent_path, chunk_path, mode))
1367 pending.append((commit_items.tree, parent_path, chunk_path,
1368 hashsplit.GIT_MODE_TREE))
1370 for mode, name, ent_id in tree_decode(data):
1371 demangled, bup_type = demangle_name(name, mode)
1373 sub_path = parent_path
1374 sub_chunk_path = chunk_path + [name]
1376 sub_path = parent_path + [name]
1377 if bup_type == BUP_CHUNKED:
1378 sub_chunk_path = ['']
1380 sub_chunk_path = chunk_path
1381 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,