1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
9 from numbers import Integral
11 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
12 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
14 hostname, localtime, log, merge_iter,
15 mmap_read, mmap_readwrite,
17 progress, qprogress, stat_if_exists,
18 unlink, username, userfullname,
23 repodir = None # The default repository, once initialized
25 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
26 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
32 class GitError(Exception):
36 def _git_wait(cmd, p):
39 raise GitError('%s returned %d' % (cmd, rv))
41 def _git_capture(argv):
42 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
44 _git_wait(repr(argv), p)
47 def git_config_get(option, repo_dir=None):
48 cmd = ('git', 'config', '--get', option)
49 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
50 preexec_fn=_gitenv(repo_dir=repo_dir))
56 raise GitError('%s returned %d' % (cmd, rc))
60 def parse_tz_offset(s):
61 """UTC offset in seconds."""
62 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
68 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
69 # Make sure that's authoritative.
70 _start_end_char = r'[^ .,:;<>"\'\0\n]'
71 _content_char = r'[^\0\n<>]'
72 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
74 _start_end_char, _content_char, _start_end_char)
75 _tz_rx = r'[-+]\d\d[0-5]\d'
76 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
77 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
78 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
79 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
81 (?P<message>(?:.|\n)*)''' % (_parent_rx,
82 _safe_str_rx, _safe_str_rx, _tz_rx,
83 _safe_str_rx, _safe_str_rx, _tz_rx))
84 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
87 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
88 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
89 'author_name', 'author_mail',
90 'author_sec', 'author_offset',
91 'committer_name', 'committer_mail',
92 'committer_sec', 'committer_offset',
95 def parse_commit(content):
96 commit_match = re.match(_commit_rx, content)
98 raise Exception('cannot parse commit %r' % content)
99 matches = commit_match.groupdict()
100 return CommitInfo(tree=matches['tree'],
101 parents=re.findall(_parent_hash_rx, matches['parents']),
102 author_name=matches['author_name'],
103 author_mail=matches['author_mail'],
104 author_sec=int(matches['asec']),
105 author_offset=parse_tz_offset(matches['atz']),
106 committer_name=matches['committer_name'],
107 committer_mail=matches['committer_mail'],
108 committer_sec=int(matches['csec']),
109 committer_offset=parse_tz_offset(matches['ctz']),
110 message=matches['message'])
113 def get_commit_items(id, cp):
114 commit_it = cp.get(id)
115 _, typ, _ = next(commit_it)
116 assert(typ == 'commit')
117 commit_content = ''.join(commit_it)
118 return parse_commit(commit_content)
121 def _local_git_date_str(epoch_sec):
122 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
125 def _git_date_str(epoch_sec, tz_offset_sec):
126 offs = tz_offset_sec // 60
127 return '%d %s%02d%02d' \
129 '+' if offs >= 0 else '-',
134 def repo(sub = '', repo_dir=None):
135 """Get the path to the git repository or one of its subdirectories."""
137 repo_dir = repo_dir or repodir
139 raise GitError('You should call check_repo_or_die()')
141 # If there's a .git subdirectory, then the actual repo is in there.
142 gd = os.path.join(repo_dir, '.git')
143 if os.path.exists(gd):
146 return os.path.join(repo_dir, sub)
150 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
155 full = os.path.abspath(path)
156 fullrepo = os.path.abspath(repo(''))
157 if not fullrepo.endswith('/'):
159 if full.startswith(fullrepo):
160 path = full[len(fullrepo):]
161 if path.startswith('index-cache/'):
162 path = path[len('index-cache/'):]
163 return shorten_hash(path)
167 paths = [repo('objects/pack')]
168 paths += glob.glob(repo('index-cache/*/.'))
172 def auto_midx(objdir):
173 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
175 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
177 # make sure 'args' gets printed to help with debugging
178 add_error('%r: exception: %s' % (args, e))
181 add_error('%r: returned %d' % (args, rv))
183 args = [path.exe(), 'bloom', '--dir', objdir]
185 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
187 # make sure 'args' gets printed to help with debugging
188 add_error('%r: exception: %s' % (args, e))
191 add_error('%r: returned %d' % (args, rv))
194 def mangle_name(name, mode, gitmode):
195 """Mangle a file name to present an abstract name for segmented files.
196 Mangled file names will have the ".bup" extension added to them. If a
197 file's name already ends with ".bup", a ".bupl" extension is added to
198 disambiguate normal files from segmented ones.
200 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
201 assert(stat.S_ISDIR(gitmode))
203 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
204 return name + '.bupl'
209 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
210 def demangle_name(name, mode):
211 """Remove name mangling from a file name, if necessary.
213 The return value is a tuple (demangled_filename,mode), where mode is one of
216 * BUP_NORMAL : files that should be read as-is from the repository
217 * BUP_CHUNKED : files that were chunked and need to be reassembled
219 For more information on the name mangling algorithm, see mangle_name()
221 if name.endswith('.bupl'):
222 return (name[:-5], BUP_NORMAL)
223 elif name.endswith('.bup'):
224 return (name[:-4], BUP_CHUNKED)
225 elif name.endswith('.bupm'):
227 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
229 return (name, BUP_NORMAL)
232 def calc_hash(type, content):
233 """Calculate some content's hash in the Git fashion."""
234 header = '%s %d\0' % (type, len(content))
240 def shalist_item_sort_key(ent):
241 (mode, name, id) = ent
242 assert(mode+0 == mode)
243 if stat.S_ISDIR(mode):
249 def tree_encode(shalist):
250 """Generate a git tree object from (mode,name,hash) tuples."""
251 shalist = sorted(shalist, key = shalist_item_sort_key)
253 for (mode,name,bin) in shalist:
255 assert(mode+0 == mode)
257 assert(len(bin) == 20)
258 s = '%o %s\0%s' % (mode,name,bin)
259 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
264 def tree_decode(buf):
265 """Generate a list of (mode,name,hash) from the git tree object in buf."""
267 while ofs < len(buf):
268 z = buf.find('\0', ofs)
270 spl = buf[ofs:z].split(' ', 1)
271 assert(len(spl) == 2)
273 sha = buf[z+1:z+1+20]
275 yield (int(mode, 8), name, sha)
278 def _encode_packobj(type, content, compression_level=1):
279 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
280 raise ValueError('invalid compression level %s' % compression_level)
283 szbits = (sz & 0x0f) | (_typemap[type]<<4)
286 if sz: szbits |= 0x80
292 z = zlib.compressobj(compression_level)
294 yield z.compress(content)
298 def _encode_looseobj(type, content, compression_level=1):
299 z = zlib.compressobj(compression_level)
300 yield z.compress('%s %d\0' % (type, len(content)))
301 yield z.compress(content)
305 def _decode_looseobj(buf):
307 s = zlib.decompress(buf)
314 assert(type in _typemap)
315 assert(sz == len(content))
316 return (type, content)
319 def _decode_packobj(buf):
322 type = _typermap[(c & 0x70) >> 4]
329 sz |= (c & 0x7f) << shift
333 return (type, zlib.decompress(buf[i+1:]))
340 def find_offset(self, hash):
341 """Get the offset of an object inside the index file."""
342 idx = self._idx_from_hash(hash)
344 return self._ofs_from_idx(idx)
347 def exists(self, hash, want_source=False):
348 """Return nonempty if the object exists in this index."""
349 if hash and (self._idx_from_hash(hash) != None):
350 return want_source and os.path.basename(self.name) or True
354 return int(self.fanout[255])
356 def _idx_from_hash(self, hash):
357 global _total_searches, _total_steps
359 assert(len(hash) == 20)
361 start = self.fanout[b1-1] # range -1..254
362 end = self.fanout[b1] # range 0..255
364 _total_steps += 1 # lookup table is a step
367 mid = start + (end-start)/2
368 v = self._idx_to_hash(mid)
378 class PackIdxV1(PackIdx):
379 """Object representation of a Git pack index (version 1) file."""
380 def __init__(self, filename, f):
382 self.idxnames = [self.name]
383 self.map = mmap_read(f)
384 self.fanout = list(struct.unpack('!256I',
385 str(buffer(self.map, 0, 256*4))))
386 self.fanout.append(0) # entry "-1"
387 nsha = self.fanout[255]
389 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
391 def _ofs_from_idx(self, idx):
392 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
394 def _idx_to_hash(self, idx):
395 return str(self.shatable[idx*24+4 : idx*24+24])
398 for i in xrange(self.fanout[255]):
399 yield buffer(self.map, 256*4 + 24*i + 4, 20)
402 class PackIdxV2(PackIdx):
403 """Object representation of a Git pack index (version 2) file."""
404 def __init__(self, filename, f):
406 self.idxnames = [self.name]
407 self.map = mmap_read(f)
408 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
409 self.fanout = list(struct.unpack('!256I',
410 str(buffer(self.map, 8, 256*4))))
411 self.fanout.append(0) # entry "-1"
412 nsha = self.fanout[255]
413 self.sha_ofs = 8 + 256*4
414 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
415 self.ofstable = buffer(self.map,
416 self.sha_ofs + nsha*20 + nsha*4,
418 self.ofs64table = buffer(self.map,
419 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
421 def _ofs_from_idx(self, idx):
422 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
424 idx64 = ofs & 0x7fffffff
425 ofs = struct.unpack('!Q',
426 str(buffer(self.ofs64table, idx64*8, 8)))[0]
429 def _idx_to_hash(self, idx):
430 return str(self.shatable[idx*20:(idx+1)*20])
433 for i in xrange(self.fanout[255]):
434 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
439 def __init__(self, dir):
441 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
446 self.do_bloom = False
453 assert(_mpi_count == 0)
456 return iter(idxmerge(self.packs))
459 return sum(len(pack) for pack in self.packs)
461 def exists(self, hash, want_source=False):
462 """Return nonempty if the object exists in the index files."""
463 global _total_searches
465 if hash in self.also:
467 if self.do_bloom and self.bloom:
468 if self.bloom.exists(hash):
469 self.do_bloom = False
471 _total_searches -= 1 # was counted by bloom
473 for i in xrange(len(self.packs)):
475 _total_searches -= 1 # will be incremented by sub-pack
476 ix = p.exists(hash, want_source=want_source)
478 # reorder so most recently used packs are searched first
479 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
484 def refresh(self, skip_midx = False):
485 """Refresh the index list.
486 This method verifies if .midx files were superseded (e.g. all of its
487 contents are in another, bigger .midx file) and removes the superseded
490 If skip_midx is True, all work on .midx files will be skipped and .midx
491 files will be removed from the list.
493 The module-global variable 'ignore_midx' can force this function to
494 always act as if skip_midx was True.
496 self.bloom = None # Always reopen the bloom as it may have been relaced
497 self.do_bloom = False
498 skip_midx = skip_midx or ignore_midx
499 d = dict((p.name, p) for p in self.packs
500 if not skip_midx or not isinstance(p, midx.PackMidx))
501 if os.path.exists(self.dir):
504 for ix in self.packs:
505 if isinstance(ix, midx.PackMidx):
506 for name in ix.idxnames:
507 d[os.path.join(self.dir, name)] = ix
508 for full in glob.glob(os.path.join(self.dir,'*.midx')):
510 mx = midx.PackMidx(full)
511 (mxd, mxf) = os.path.split(mx.name)
513 for n in mx.idxnames:
514 if not os.path.exists(os.path.join(mxd, n)):
515 log(('warning: index %s missing\n' +
516 ' used by %s\n') % (n, mxf))
524 midxl.sort(key=lambda ix:
525 (-len(ix), -xstat.stat(ix.name).st_mtime))
528 for sub in ix.idxnames:
529 found = d.get(os.path.join(self.dir, sub))
530 if not found or isinstance(found, PackIdx):
531 # doesn't exist, or exists but not in a midx
536 for name in ix.idxnames:
537 d[os.path.join(self.dir, name)] = ix
538 elif not ix.force_keep:
539 debug1('midx: removing redundant: %s\n'
540 % os.path.basename(ix.name))
543 for full in glob.glob(os.path.join(self.dir,'*.idx')):
547 except GitError as e:
551 bfull = os.path.join(self.dir, 'bup.bloom')
552 if self.bloom is None and os.path.exists(bfull):
553 self.bloom = bloom.ShaBloom(bfull)
554 self.packs = list(set(d.values()))
555 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
556 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
560 debug1('PackIdxList: using %d index%s.\n'
561 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
564 """Insert an additional object in the list."""
568 def open_idx(filename):
569 if filename.endswith('.idx'):
570 f = open(filename, 'rb')
572 if header[0:4] == '\377tOc':
573 version = struct.unpack('!I', header[4:8])[0]
575 return PackIdxV2(filename, f)
577 raise GitError('%s: expected idx file version 2, got %d'
578 % (filename, version))
579 elif len(header) == 8 and header[0:4] < '\377tOc':
580 return PackIdxV1(filename, f)
582 raise GitError('%s: unrecognized idx file header' % filename)
583 elif filename.endswith('.midx'):
584 return midx.PackMidx(filename)
586 raise GitError('idx filenames must end with .idx or .midx')
589 def idxmerge(idxlist, final_progress=True):
590 """Generate a list of all the objects reachable in a PackIdxList."""
591 def pfunc(count, total):
592 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
593 % (count*100.0/total, count, total))
594 def pfinal(count, total):
596 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
597 % (100, total, total))
598 return merge_iter(idxlist, 10024, pfunc, pfinal)
601 def _make_objcache():
602 return PackIdxList(repo('objects/pack'))
604 # bup-gc assumes that it can disable all PackWriter activities
605 # (bloom/midx/cache) via the constructor and close() arguments.
608 """Writes Git objects inside a pack file."""
609 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
610 run_midx=True, on_pack_finish=None,
611 max_pack_size=None, max_pack_objects=None):
612 self.repo_dir = repo()
619 self.objcache_maker = objcache_maker
621 self.compression_level = compression_level
622 self.run_midx=run_midx
623 self.on_pack_finish = on_pack_finish
624 if not max_pack_size:
625 max_pack_size = git_config_get('pack.packSizeLimit',
626 repo_dir=self.repo_dir)
627 if max_pack_size is not None:
628 max_pack_size = parse_num(max_pack_size)
629 if not max_pack_size:
630 # larger packs slow down pruning
631 max_pack_size = 1000 * 1000 * 1000
632 self.max_pack_size = max_pack_size
633 # cache memory usage is about 83 bytes per object
634 self.max_pack_objects = max_pack_objects if max_pack_objects \
635 else max(1, self.max_pack_size // 5000)
642 objdir = dir = os.path.join(self.repo_dir, 'objects')
643 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
645 self.file = os.fdopen(fd, 'w+b')
650 self.parentfd = os.open(objdir, os.O_RDONLY)
656 assert(name.endswith('.pack'))
657 self.filename = name[:-5]
658 self.file.write('PACK\0\0\0\2\0\0\0\0')
659 self.idx = list(list() for i in xrange(256))
661 def _raw_write(self, datalist, sha):
664 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
665 # the file never has a *partial* blob. So let's make sure it's
666 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
667 # to our hashsplit algorithm.) f.write() does its own buffering,
668 # but that's okay because we'll flush it in _end().
669 oneblob = ''.join(datalist)
673 raise GitError, e, sys.exc_info()[2]
675 crc = zlib.crc32(oneblob) & 0xffffffff
676 self._update_idx(sha, crc, nw)
681 def _update_idx(self, sha, crc, size):
684 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
686 def _write(self, sha, type, content):
690 sha = calc_hash(type, content)
691 size, crc = self._raw_write(_encode_packobj(type, content,
692 self.compression_level),
694 if self.outbytes >= self.max_pack_size \
695 or self.count >= self.max_pack_objects:
699 def breakpoint(self):
700 """Clear byte and object counts and return the last processed id."""
701 id = self._end(self.run_midx)
702 self.outbytes = self.count = 0
705 def _require_objcache(self):
706 if self.objcache is None and self.objcache_maker:
707 self.objcache = self.objcache_maker()
708 if self.objcache is None:
710 "PackWriter not opened or can't check exists w/o objcache")
712 def exists(self, id, want_source=False):
713 """Return non-empty if an object is found in the object cache."""
714 self._require_objcache()
715 return self.objcache.exists(id, want_source=want_source)
717 def just_write(self, sha, type, content):
718 """Write an object to the pack file, bypassing the objcache. Fails if
720 self._write(sha, type, content)
722 def maybe_write(self, type, content):
723 """Write an object to the pack file if not present and return its id."""
724 sha = calc_hash(type, content)
725 if not self.exists(sha):
726 self.just_write(sha, type, content)
727 self._require_objcache()
728 self.objcache.add(sha)
731 def new_blob(self, blob):
732 """Create a blob object in the pack with the supplied content."""
733 return self.maybe_write('blob', blob)
735 def new_tree(self, shalist):
736 """Create a tree object in the pack."""
737 content = tree_encode(shalist)
738 return self.maybe_write('tree', content)
740 def new_commit(self, tree, parent,
741 author, adate_sec, adate_tz,
742 committer, cdate_sec, cdate_tz,
744 """Create a commit object in the pack. The date_sec values must be
745 epoch-seconds, and if a tz is None, the local timezone is assumed."""
747 adate_str = _git_date_str(adate_sec, adate_tz)
749 adate_str = _local_git_date_str(adate_sec)
751 cdate_str = _git_date_str(cdate_sec, cdate_tz)
753 cdate_str = _local_git_date_str(cdate_sec)
755 if tree: l.append('tree %s' % tree.encode('hex'))
756 if parent: l.append('parent %s' % parent.encode('hex'))
757 if author: l.append('author %s %s' % (author, adate_str))
758 if committer: l.append('committer %s %s' % (committer, cdate_str))
761 return self.maybe_write('commit', '\n'.join(l))
764 """Remove the pack file from disk."""
773 os.unlink(self.filename + '.pack')
780 def _end(self, run_midx=True):
782 if not f: return None
789 # update object count
791 cp = struct.pack('!i', self.count)
795 # calculate the pack sha1sum
798 for b in chunkyreader(f):
800 packbin = sum.digest()
802 fdatasync(f.fileno())
806 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
807 nameprefix = os.path.join(self.repo_dir,
808 'objects/pack/pack-' + obj_list_sha)
809 if os.path.exists(self.filename + '.map'):
810 os.unlink(self.filename + '.map')
811 os.rename(self.filename + '.pack', nameprefix + '.pack')
812 os.rename(self.filename + '.idx', nameprefix + '.idx')
814 os.fsync(self.parentfd)
816 os.close(self.parentfd)
819 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
821 if self.on_pack_finish:
822 self.on_pack_finish(nameprefix)
826 def close(self, run_midx=True):
827 """Close the pack file and move it to its definitive path."""
828 return self._end(run_midx=run_midx)
830 def _write_pack_idx_v2(self, filename, idx, packbin):
833 for entry in section:
834 if entry[2] >= 2**31:
837 # Length: header + fan-out + shas-and-crcs + overflow-offsets
838 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
840 idx_f = open(filename, 'w+b')
842 idx_f.truncate(index_len)
843 fdatasync(idx_f.fileno())
844 idx_map = mmap_readwrite(idx_f, close=False)
846 count = _helpers.write_idx(filename, idx_map, idx, self.count)
847 assert(count == self.count)
854 idx_f = open(filename, 'a+b')
859 b = idx_f.read(8 + 4*256)
862 obj_list_sum = Sha1()
863 for b in chunkyreader(idx_f, 20*self.count):
865 obj_list_sum.update(b)
866 namebase = obj_list_sum.hexdigest()
868 for b in chunkyreader(idx_f):
870 idx_f.write(idx_sum.digest())
871 fdatasync(idx_f.fileno())
877 def _gitenv(repo_dir = None):
881 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
885 def list_refs(patterns=None, repo_dir=None,
886 limit_to_heads=False, limit_to_tags=False):
887 """Yield (refname, hash) tuples for all repository refs unless
888 patterns are specified. In that case, only include tuples for
889 refs matching those patterns (cf. git-show-ref(1)). The limits
890 restrict the result items to refs/heads or refs/tags. If both
891 limits are specified, items from both sources will be included.
894 argv = ['git', 'show-ref']
896 argv.append('--heads')
898 argv.append('--tags')
901 argv.extend(patterns)
902 p = subprocess.Popen(argv,
903 preexec_fn = _gitenv(repo_dir),
904 stdout = subprocess.PIPE)
905 out = p.stdout.read().strip()
906 rv = p.wait() # not fatal
910 for d in out.split('\n'):
911 (sha, name) = d.split(' ', 1)
912 yield (name, sha.decode('hex'))
915 def read_ref(refname, repo_dir = None):
916 """Get the commit id of the most recent commit made on a given ref."""
917 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
918 l = tuple(islice(refs, 2))
926 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
927 """Yield information about commits as per "git rev-list". If a format
928 is not provided, yield one hex hash at a time. If a format is
929 provided, pass it to rev-list and call parse(git_stdout) for each
930 commit with the stream positioned just after the rev-list "commit
931 HASH" header line. When a format is provided yield (oidx,
932 parse(git_stdout)) for each commit.
935 assert bool(parse) == bool(format)
936 if isinstance(ref_or_refs, compat.str_type):
937 refs = (ref_or_refs,)
940 argv = ['git', 'rev-list']
941 if isinstance(count, Integral):
942 argv.extend(['-n', str(count)])
946 argv.append('--pretty=format:' + format)
948 assert not ref.startswith('-')
951 p = subprocess.Popen(argv,
952 preexec_fn = _gitenv(repo_dir),
953 stdout = subprocess.PIPE)
955 for line in p.stdout:
958 line = p.stdout.readline()
961 if not s.startswith('commit '):
962 raise Exception('unexpected line ' + s)
963 yield s[7:], parse(p.stdout)
964 line = p.stdout.readline()
966 rv = p.wait() # not fatal
968 raise GitError, 'git rev-list returned error %d' % rv
971 def get_commit_dates(refs, repo_dir=None):
972 """Get the dates for the specified commit refs. For now, every unique
973 string in refs must resolve to a different commit or this
974 function will fail."""
977 commit = get_commit_items(ref, cp(repo_dir))
978 result.append(commit.author_sec)
982 def rev_parse(committish, repo_dir=None):
983 """Resolve the full hash for 'committish', if it exists.
985 Should be roughly equivalent to 'git rev-parse'.
987 Returns the hex value of the hash if it is found, None if 'committish' does
988 not correspond to anything.
990 head = read_ref(committish, repo_dir=repo_dir)
992 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
995 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
997 if len(committish) == 40:
999 hash = committish.decode('hex')
1009 def update_ref(refname, newval, oldval, repo_dir=None):
1010 """Update a repository reference."""
1013 assert(refname.startswith('refs/heads/') \
1014 or refname.startswith('refs/tags/'))
1015 p = subprocess.Popen(['git', 'update-ref', refname,
1016 newval.encode('hex'), oldval.encode('hex')],
1017 preexec_fn = _gitenv(repo_dir))
1018 _git_wait('git update-ref', p)
1021 def delete_ref(refname, oldvalue=None):
1022 """Delete a repository reference (see git update-ref(1))."""
1023 assert(refname.startswith('refs/'))
1024 oldvalue = [] if not oldvalue else [oldvalue]
1025 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1026 preexec_fn = _gitenv())
1027 _git_wait('git update-ref', p)
1030 def guess_repo(path=None):
1031 """Set the path value in the global variable "repodir".
1032 This makes bup look for an existing bup repository, but not fail if a
1033 repository doesn't exist. Usually, if you are interacting with a bup
1034 repository, you would not be calling this function but using
1035 check_repo_or_die().
1041 repodir = os.environ.get('BUP_DIR')
1043 repodir = os.path.expanduser('~/.bup')
1046 def init_repo(path=None):
1047 """Create the Git bare repository for bup in a given path."""
1049 d = repo() # appends a / to the path
1050 parent = os.path.dirname(os.path.dirname(d))
1051 if parent and not os.path.exists(parent):
1052 raise GitError('parent directory "%s" does not exist\n' % parent)
1053 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1054 raise GitError('"%s" exists but is not a directory\n' % d)
1055 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1056 preexec_fn = _gitenv())
1057 _git_wait('git init', p)
1058 # Force the index version configuration in order to ensure bup works
1059 # regardless of the version of the installed Git binary.
1060 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1061 stdout=sys.stderr, preexec_fn = _gitenv())
1062 _git_wait('git config', p)
1064 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1065 stdout=sys.stderr, preexec_fn = _gitenv())
1066 _git_wait('git config', p)
1069 def check_repo_or_die(path=None):
1070 """Check to see if a bup repository probably exists, and abort if not."""
1073 pst = stat_if_exists(top + '/objects/pack')
1074 if pst and stat.S_ISDIR(pst.st_mode):
1077 top_st = stat_if_exists(top)
1079 log('error: repository %r does not exist (see "bup help init")\n'
1082 log('error: %r is not a repository\n' % top)
1088 """Get Git's version and ensure a usable version is installed.
1090 The returned version is formatted as an ordered tuple with each position
1091 representing a digit in the version tag. For example, the following tuple
1092 would represent version 1.6.6.9:
1094 ('1', '6', '6', '9')
1098 p = subprocess.Popen(['git', '--version'],
1099 stdout=subprocess.PIPE)
1100 gvs = p.stdout.read()
1101 _git_wait('git --version', p)
1102 m = re.match(r'git version (\S+.\S+)', gvs)
1104 raise GitError('git --version weird output: %r' % gvs)
1105 _ver = tuple(m.group(1).split('.'))
1106 needed = ('1','5', '3', '1')
1108 raise GitError('git version %s or higher is required; you have %s'
1109 % ('.'.join(needed), '.'.join(_ver)))
1113 class _AbortableIter:
1114 def __init__(self, it, onabort = None):
1116 self.onabort = onabort
1124 return next(self.it)
1125 except StopIteration as e:
1133 """Abort iteration and call the abortion callback, if needed."""
1145 """Link to 'git cat-file' that is used to retrieve blob data."""
1146 def __init__(self, repo_dir = None):
1148 self.repo_dir = repo_dir
1149 wanted = ('1','5','6')
1151 log('error: git version must be at least 1.5.6\n')
1153 self.p = self.inprogress = None
1157 self.p.stdout.close()
1158 self.p.stdin.close()
1160 self.inprogress = None
1164 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1165 stdin=subprocess.PIPE,
1166 stdout=subprocess.PIPE,
1169 preexec_fn = _gitenv(self.repo_dir))
1172 """Yield (oidx, type, size), followed by the data referred to by ref.
1173 If ref does not exist, only yield (None, None, None).
1176 if not self.p or self.p.poll() != None:
1179 poll_result = self.p.poll()
1180 assert(poll_result == None)
1182 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1183 assert(not self.inprogress)
1184 assert(ref.find('\n') < 0)
1185 assert(ref.find('\r') < 0)
1186 assert(not ref.startswith('-'))
1187 self.inprogress = ref
1188 self.p.stdin.write('%s\n' % ref)
1189 self.p.stdin.flush()
1190 hdr = self.p.stdout.readline()
1191 if hdr.endswith(' missing\n'):
1192 self.inprogress = None
1193 yield None, None, None
1195 info = hdr.split(' ')
1196 if len(info) != 3 or len(info[0]) != 40:
1197 raise GitError('expected object (id, type, size), got %r' % spl)
1198 oidx, typ, size = info
1200 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1201 onabort=self._abort)
1203 yield oidx, typ, size
1206 readline_result = self.p.stdout.readline()
1207 assert(readline_result == '\n')
1208 self.inprogress = None
1209 except Exception as e:
1213 def _join(self, it):
1214 _, typ, _ = next(it)
1219 treefile = ''.join(it)
1220 for (mode, name, sha) in tree_decode(treefile):
1221 for blob in self.join(sha.encode('hex')):
1223 elif typ == 'commit':
1224 treeline = ''.join(it).split('\n')[0]
1225 assert(treeline.startswith('tree '))
1226 for blob in self.join(treeline[5:]):
1229 raise GitError('invalid object type %r: expected blob/tree/commit'
1233 """Generate a list of the content of all blobs that can be reached
1234 from an object. The hash given in 'id' must point to a blob, a tree
1235 or a commit. The content of all blobs that can be seen from trees or
1236 commits will be added to the list.
1239 for d in self._join(self.get(id)):
1241 except StopIteration:
1247 def cp(repo_dir=None):
1248 """Create a CatPipe object or reuse the already existing one."""
1251 repo_dir = repodir or repo()
1252 repo_dir = os.path.abspath(repo_dir)
1253 cp = _cp.get(repo_dir)
1255 cp = CatPipe(repo_dir)
1260 def tags(repo_dir = None):
1261 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1263 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1264 assert(n.startswith('refs/tags/'))
1268 tags[c].append(name) # more than one tag can point at 'c'
1272 class MissingObject(KeyError):
1273 def __init__(self, oid):
1275 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1278 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1279 'path', 'chunk_path', 'data'])
1280 # The path is the mangled path, and if an item represents a fragment
1281 # of a chunked file, the chunk_path will be the chunked subtree path
1282 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1283 # chunked file will have a chunk_path of ['']. So some chunk subtree
1284 # of the file '/foo/bar/baz' might look like this:
1286 # item.path = ['foo', 'bar', 'baz.bup']
1287 # item.chunk_path = ['', '2d3115e', '016b097']
1288 # item.type = 'tree'
1292 def walk_object(cat_pipe, oidx,
1295 """Yield everything reachable from oidx via cat_pipe as a WalkItem,
1296 stopping whenever stop_at(oidx) returns true. Throw MissingObject
1297 if a hash encountered is missing from the repository, and don't
1298 read or return blob content in the data field unless include_data
1301 # Maintain the pending stack on the heap to avoid stack overflow
1302 pending = [(oidx, [], [], None)]
1304 oidx, parent_path, chunk_path, mode = pending.pop()
1305 oid = oidx.decode('hex')
1306 if stop_at and stop_at(oidx):
1309 if (not include_data) and mode and stat.S_ISREG(mode):
1310 # If the object is a "regular file", then it's a leaf in
1311 # the graph, so we can skip reading the data if the caller
1312 # hasn't requested it.
1313 yield WalkItem(oid=oid, type='blob',
1314 chunk_path=chunk_path, path=parent_path,
1319 item_it = cat_pipe.get(oidx)
1320 get_oidx, typ, _ = next(item_it)
1322 raise MissingObject(oidx.decode('hex'))
1323 if typ not in ('blob', 'commit', 'tree'):
1324 raise Exception('unexpected repository object type %r' % typ)
1326 # FIXME: set the mode based on the type when the mode is None
1327 if typ == 'blob' and not include_data:
1328 # Dump data until we can ask cat_pipe not to fetch it
1329 for ignored in item_it:
1333 data = ''.join(item_it)
1335 yield WalkItem(oid=oid, type=typ,
1336 chunk_path=chunk_path, path=parent_path,
1338 data=(data if include_data else None))
1341 commit_items = parse_commit(data)
1342 for pid in commit_items.parents:
1343 pending.append((pid, parent_path, chunk_path, mode))
1344 pending.append((commit_items.tree, parent_path, chunk_path,
1345 hashsplit.GIT_MODE_TREE))
1347 for mode, name, ent_id in tree_decode(data):
1348 demangled, bup_type = demangle_name(name, mode)
1350 sub_path = parent_path
1351 sub_chunk_path = chunk_path + [name]
1353 sub_path = parent_path + [name]
1354 if bup_type == BUP_CHUNKED:
1355 sub_chunk_path = ['']
1357 sub_chunk_path = chunk_path
1358 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,