1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
9 from numbers import Integral
11 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
12 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
14 hostname, localtime, log, merge_iter,
15 mmap_read, mmap_readwrite,
17 progress, qprogress, shstr, stat_if_exists,
18 unlink, username, userfullname,
23 repodir = None # The default repository, once initialized
25 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
26 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
32 class GitError(Exception):
36 def _git_wait(cmd, p):
39 raise GitError('%s returned %d' % (shstr(cmd), rv))
41 def _git_capture(argv):
42 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
44 _git_wait(repr(argv), p)
47 def git_config_get(option, repo_dir=None):
48 cmd = ('git', 'config', '--get', option)
49 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
50 preexec_fn=_gitenv(repo_dir=repo_dir))
56 raise GitError('%s returned %d' % (cmd, rc))
60 def parse_tz_offset(s):
61 """UTC offset in seconds."""
62 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
68 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
69 # Make sure that's authoritative.
70 _start_end_char = r'[^ .,:;<>"\'\0\n]'
71 _content_char = r'[^\0\n<>]'
72 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
74 _start_end_char, _content_char, _start_end_char)
75 _tz_rx = r'[-+]\d\d[0-5]\d'
76 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
77 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
78 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
79 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
81 (?P<message>(?:.|\n)*)''' % (_parent_rx,
82 _safe_str_rx, _safe_str_rx, _tz_rx,
83 _safe_str_rx, _safe_str_rx, _tz_rx))
84 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
87 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
88 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
89 'author_name', 'author_mail',
90 'author_sec', 'author_offset',
91 'committer_name', 'committer_mail',
92 'committer_sec', 'committer_offset',
95 def parse_commit(content):
96 commit_match = re.match(_commit_rx, content)
98 raise Exception('cannot parse commit %r' % content)
99 matches = commit_match.groupdict()
100 return CommitInfo(tree=matches['tree'],
101 parents=re.findall(_parent_hash_rx, matches['parents']),
102 author_name=matches['author_name'],
103 author_mail=matches['author_mail'],
104 author_sec=int(matches['asec']),
105 author_offset=parse_tz_offset(matches['atz']),
106 committer_name=matches['committer_name'],
107 committer_mail=matches['committer_mail'],
108 committer_sec=int(matches['csec']),
109 committer_offset=parse_tz_offset(matches['ctz']),
110 message=matches['message'])
113 def get_commit_items(id, cp):
114 commit_it = cp.get(id)
115 _, typ, _ = next(commit_it)
116 assert(typ == 'commit')
117 commit_content = ''.join(commit_it)
118 return parse_commit(commit_content)
121 def _local_git_date_str(epoch_sec):
122 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
125 def _git_date_str(epoch_sec, tz_offset_sec):
126 offs = tz_offset_sec // 60
127 return '%d %s%02d%02d' \
129 '+' if offs >= 0 else '-',
134 def repo(sub = '', repo_dir=None):
135 """Get the path to the git repository or one of its subdirectories."""
137 repo_dir = repo_dir or repodir
139 raise GitError('You should call check_repo_or_die()')
141 # If there's a .git subdirectory, then the actual repo is in there.
142 gd = os.path.join(repo_dir, '.git')
143 if os.path.exists(gd):
146 return os.path.join(repo_dir, sub)
150 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
155 full = os.path.abspath(path)
156 fullrepo = os.path.abspath(repo(''))
157 if not fullrepo.endswith('/'):
159 if full.startswith(fullrepo):
160 path = full[len(fullrepo):]
161 if path.startswith('index-cache/'):
162 path = path[len('index-cache/'):]
163 return shorten_hash(path)
167 paths = [repo('objects/pack')]
168 paths += glob.glob(repo('index-cache/*/.'))
172 def auto_midx(objdir):
173 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
175 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
177 # make sure 'args' gets printed to help with debugging
178 add_error('%r: exception: %s' % (args, e))
181 add_error('%r: returned %d' % (args, rv))
183 args = [path.exe(), 'bloom', '--dir', objdir]
185 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
187 # make sure 'args' gets printed to help with debugging
188 add_error('%r: exception: %s' % (args, e))
191 add_error('%r: returned %d' % (args, rv))
194 def mangle_name(name, mode, gitmode):
195 """Mangle a file name to present an abstract name for segmented files.
196 Mangled file names will have the ".bup" extension added to them. If a
197 file's name already ends with ".bup", a ".bupl" extension is added to
198 disambiguate normal files from segmented ones.
200 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
201 assert(stat.S_ISDIR(gitmode))
203 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
204 return name + '.bupl'
209 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
210 def demangle_name(name, mode):
211 """Remove name mangling from a file name, if necessary.
213 The return value is a tuple (demangled_filename,mode), where mode is one of
216 * BUP_NORMAL : files that should be read as-is from the repository
217 * BUP_CHUNKED : files that were chunked and need to be reassembled
219 For more information on the name mangling algorithm, see mangle_name()
221 if name.endswith('.bupl'):
222 return (name[:-5], BUP_NORMAL)
223 elif name.endswith('.bup'):
224 return (name[:-4], BUP_CHUNKED)
225 elif name.endswith('.bupm'):
227 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
229 return (name, BUP_NORMAL)
232 def calc_hash(type, content):
233 """Calculate some content's hash in the Git fashion."""
234 header = '%s %d\0' % (type, len(content))
240 def shalist_item_sort_key(ent):
241 (mode, name, id) = ent
242 assert(mode+0 == mode)
243 if stat.S_ISDIR(mode):
249 def tree_encode(shalist):
250 """Generate a git tree object from (mode,name,hash) tuples."""
251 shalist = sorted(shalist, key = shalist_item_sort_key)
253 for (mode,name,bin) in shalist:
255 assert(mode+0 == mode)
257 assert(len(bin) == 20)
258 s = '%o %s\0%s' % (mode,name,bin)
259 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
264 def tree_decode(buf):
265 """Generate a list of (mode,name,hash) from the git tree object in buf."""
267 while ofs < len(buf):
268 z = buf.find('\0', ofs)
270 spl = buf[ofs:z].split(' ', 1)
271 assert(len(spl) == 2)
273 sha = buf[z+1:z+1+20]
275 yield (int(mode, 8), name, sha)
278 def _encode_packobj(type, content, compression_level=1):
279 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
280 raise ValueError('invalid compression level %s' % compression_level)
283 szbits = (sz & 0x0f) | (_typemap[type]<<4)
286 if sz: szbits |= 0x80
292 z = zlib.compressobj(compression_level)
294 yield z.compress(content)
298 def _encode_looseobj(type, content, compression_level=1):
299 z = zlib.compressobj(compression_level)
300 yield z.compress('%s %d\0' % (type, len(content)))
301 yield z.compress(content)
305 def _decode_looseobj(buf):
307 s = zlib.decompress(buf)
314 assert(type in _typemap)
315 assert(sz == len(content))
316 return (type, content)
319 def _decode_packobj(buf):
322 type = _typermap[(c & 0x70) >> 4]
329 sz |= (c & 0x7f) << shift
333 return (type, zlib.decompress(buf[i+1:]))
340 def find_offset(self, hash):
341 """Get the offset of an object inside the index file."""
342 idx = self._idx_from_hash(hash)
344 return self._ofs_from_idx(idx)
347 def exists(self, hash, want_source=False):
348 """Return nonempty if the object exists in this index."""
349 if hash and (self._idx_from_hash(hash) != None):
350 return want_source and os.path.basename(self.name) or True
354 return int(self.fanout[255])
356 def _idx_from_hash(self, hash):
357 global _total_searches, _total_steps
359 assert(len(hash) == 20)
361 start = self.fanout[b1-1] # range -1..254
362 end = self.fanout[b1] # range 0..255
364 _total_steps += 1 # lookup table is a step
367 mid = start + (end-start)/2
368 v = self._idx_to_hash(mid)
378 class PackIdxV1(PackIdx):
379 """Object representation of a Git pack index (version 1) file."""
380 def __init__(self, filename, f):
382 self.idxnames = [self.name]
383 self.map = mmap_read(f)
384 self.fanout = list(struct.unpack('!256I',
385 str(buffer(self.map, 0, 256*4))))
386 self.fanout.append(0) # entry "-1"
387 nsha = self.fanout[255]
389 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
391 def _ofs_from_idx(self, idx):
392 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
394 def _idx_to_hash(self, idx):
395 return str(self.shatable[idx*24+4 : idx*24+24])
398 for i in xrange(self.fanout[255]):
399 yield buffer(self.map, 256*4 + 24*i + 4, 20)
402 class PackIdxV2(PackIdx):
403 """Object representation of a Git pack index (version 2) file."""
404 def __init__(self, filename, f):
406 self.idxnames = [self.name]
407 self.map = mmap_read(f)
408 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
409 self.fanout = list(struct.unpack('!256I',
410 str(buffer(self.map, 8, 256*4))))
411 self.fanout.append(0) # entry "-1"
412 nsha = self.fanout[255]
413 self.sha_ofs = 8 + 256*4
414 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
415 self.ofstable = buffer(self.map,
416 self.sha_ofs + nsha*20 + nsha*4,
418 self.ofs64table = buffer(self.map,
419 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
421 def _ofs_from_idx(self, idx):
422 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
424 idx64 = ofs & 0x7fffffff
425 ofs = struct.unpack('!Q',
426 str(buffer(self.ofs64table, idx64*8, 8)))[0]
429 def _idx_to_hash(self, idx):
430 return str(self.shatable[idx*20:(idx+1)*20])
433 for i in xrange(self.fanout[255]):
434 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
439 def __init__(self, dir):
441 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
446 self.do_bloom = False
453 assert(_mpi_count == 0)
456 return iter(idxmerge(self.packs))
459 return sum(len(pack) for pack in self.packs)
461 def exists(self, hash, want_source=False):
462 """Return nonempty if the object exists in the index files."""
463 global _total_searches
465 if hash in self.also:
467 if self.do_bloom and self.bloom:
468 if self.bloom.exists(hash):
469 self.do_bloom = False
471 _total_searches -= 1 # was counted by bloom
473 for i in xrange(len(self.packs)):
475 _total_searches -= 1 # will be incremented by sub-pack
476 ix = p.exists(hash, want_source=want_source)
478 # reorder so most recently used packs are searched first
479 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
484 def refresh(self, skip_midx = False):
485 """Refresh the index list.
486 This method verifies if .midx files were superseded (e.g. all of its
487 contents are in another, bigger .midx file) and removes the superseded
490 If skip_midx is True, all work on .midx files will be skipped and .midx
491 files will be removed from the list.
493 The module-global variable 'ignore_midx' can force this function to
494 always act as if skip_midx was True.
496 self.bloom = None # Always reopen the bloom as it may have been relaced
497 self.do_bloom = False
498 skip_midx = skip_midx or ignore_midx
499 d = dict((p.name, p) for p in self.packs
500 if not skip_midx or not isinstance(p, midx.PackMidx))
501 if os.path.exists(self.dir):
504 for ix in self.packs:
505 if isinstance(ix, midx.PackMidx):
506 for name in ix.idxnames:
507 d[os.path.join(self.dir, name)] = ix
508 for full in glob.glob(os.path.join(self.dir,'*.midx')):
510 mx = midx.PackMidx(full)
511 (mxd, mxf) = os.path.split(mx.name)
513 for n in mx.idxnames:
514 if not os.path.exists(os.path.join(mxd, n)):
515 log(('warning: index %s missing\n' +
516 ' used by %s\n') % (n, mxf))
524 midxl.sort(key=lambda ix:
525 (-len(ix), -xstat.stat(ix.name).st_mtime))
528 for sub in ix.idxnames:
529 found = d.get(os.path.join(self.dir, sub))
530 if not found or isinstance(found, PackIdx):
531 # doesn't exist, or exists but not in a midx
536 for name in ix.idxnames:
537 d[os.path.join(self.dir, name)] = ix
538 elif not ix.force_keep:
539 debug1('midx: removing redundant: %s\n'
540 % os.path.basename(ix.name))
543 for full in glob.glob(os.path.join(self.dir,'*.idx')):
547 except GitError as e:
551 bfull = os.path.join(self.dir, 'bup.bloom')
552 if self.bloom is None and os.path.exists(bfull):
553 self.bloom = bloom.ShaBloom(bfull)
554 self.packs = list(set(d.values()))
555 self.packs.sort(reverse=True, key=lambda x: len(x))
556 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
560 debug1('PackIdxList: using %d index%s.\n'
561 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
564 """Insert an additional object in the list."""
568 def open_idx(filename):
569 if filename.endswith('.idx'):
570 f = open(filename, 'rb')
572 if header[0:4] == '\377tOc':
573 version = struct.unpack('!I', header[4:8])[0]
575 return PackIdxV2(filename, f)
577 raise GitError('%s: expected idx file version 2, got %d'
578 % (filename, version))
579 elif len(header) == 8 and header[0:4] < '\377tOc':
580 return PackIdxV1(filename, f)
582 raise GitError('%s: unrecognized idx file header' % filename)
583 elif filename.endswith('.midx'):
584 return midx.PackMidx(filename)
586 raise GitError('idx filenames must end with .idx or .midx')
589 def idxmerge(idxlist, final_progress=True):
590 """Generate a list of all the objects reachable in a PackIdxList."""
591 def pfunc(count, total):
592 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
593 % (count*100.0/total, count, total))
594 def pfinal(count, total):
596 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
597 % (100, total, total))
598 return merge_iter(idxlist, 10024, pfunc, pfinal)
601 def _make_objcache():
602 return PackIdxList(repo('objects/pack'))
604 # bup-gc assumes that it can disable all PackWriter activities
605 # (bloom/midx/cache) via the constructor and close() arguments.
608 """Writes Git objects inside a pack file."""
609 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
610 run_midx=True, on_pack_finish=None,
611 max_pack_size=None, max_pack_objects=None):
612 self.repo_dir = repo()
619 self.objcache_maker = objcache_maker
621 self.compression_level = compression_level
622 self.run_midx=run_midx
623 self.on_pack_finish = on_pack_finish
624 if not max_pack_size:
625 max_pack_size = git_config_get('pack.packSizeLimit',
626 repo_dir=self.repo_dir)
627 if max_pack_size is not None:
628 max_pack_size = parse_num(max_pack_size)
629 if not max_pack_size:
630 # larger packs slow down pruning
631 max_pack_size = 1000 * 1000 * 1000
632 self.max_pack_size = max_pack_size
633 # cache memory usage is about 83 bytes per object
634 self.max_pack_objects = max_pack_objects if max_pack_objects \
635 else max(1, self.max_pack_size // 5000)
642 objdir = dir = os.path.join(self.repo_dir, 'objects')
643 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
645 self.file = os.fdopen(fd, 'w+b')
650 self.parentfd = os.open(objdir, os.O_RDONLY)
656 assert(name.endswith('.pack'))
657 self.filename = name[:-5]
658 self.file.write('PACK\0\0\0\2\0\0\0\0')
659 self.idx = list(list() for i in xrange(256))
661 def _raw_write(self, datalist, sha):
664 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
665 # the file never has a *partial* blob. So let's make sure it's
666 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
667 # to our hashsplit algorithm.) f.write() does its own buffering,
668 # but that's okay because we'll flush it in _end().
669 oneblob = ''.join(datalist)
673 raise GitError, e, sys.exc_info()[2]
675 crc = zlib.crc32(oneblob) & 0xffffffff
676 self._update_idx(sha, crc, nw)
681 def _update_idx(self, sha, crc, size):
684 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
686 def _write(self, sha, type, content):
690 sha = calc_hash(type, content)
691 size, crc = self._raw_write(_encode_packobj(type, content,
692 self.compression_level),
694 if self.outbytes >= self.max_pack_size \
695 or self.count >= self.max_pack_objects:
699 def breakpoint(self):
700 """Clear byte and object counts and return the last processed id."""
701 id = self._end(self.run_midx)
702 self.outbytes = self.count = 0
705 def _require_objcache(self):
706 if self.objcache is None and self.objcache_maker:
707 self.objcache = self.objcache_maker()
708 if self.objcache is None:
710 "PackWriter not opened or can't check exists w/o objcache")
712 def exists(self, id, want_source=False):
713 """Return non-empty if an object is found in the object cache."""
714 self._require_objcache()
715 return self.objcache.exists(id, want_source=want_source)
717 def just_write(self, sha, type, content):
718 """Write an object to the pack file, bypassing the objcache. Fails if
720 self._write(sha, type, content)
722 def maybe_write(self, type, content):
723 """Write an object to the pack file if not present and return its id."""
724 sha = calc_hash(type, content)
725 if not self.exists(sha):
726 self.just_write(sha, type, content)
727 self._require_objcache()
728 self.objcache.add(sha)
731 def new_blob(self, blob):
732 """Create a blob object in the pack with the supplied content."""
733 return self.maybe_write('blob', blob)
735 def new_tree(self, shalist):
736 """Create a tree object in the pack."""
737 content = tree_encode(shalist)
738 return self.maybe_write('tree', content)
740 def new_commit(self, tree, parent,
741 author, adate_sec, adate_tz,
742 committer, cdate_sec, cdate_tz,
744 """Create a commit object in the pack. The date_sec values must be
745 epoch-seconds, and if a tz is None, the local timezone is assumed."""
747 adate_str = _git_date_str(adate_sec, adate_tz)
749 adate_str = _local_git_date_str(adate_sec)
751 cdate_str = _git_date_str(cdate_sec, cdate_tz)
753 cdate_str = _local_git_date_str(cdate_sec)
755 if tree: l.append('tree %s' % tree.encode('hex'))
756 if parent: l.append('parent %s' % parent.encode('hex'))
757 if author: l.append('author %s %s' % (author, adate_str))
758 if committer: l.append('committer %s %s' % (committer, cdate_str))
761 return self.maybe_write('commit', '\n'.join(l))
764 """Remove the pack file from disk."""
773 os.unlink(self.filename + '.pack')
780 def _end(self, run_midx=True):
782 if not f: return None
789 # update object count
791 cp = struct.pack('!i', self.count)
795 # calculate the pack sha1sum
798 for b in chunkyreader(f):
800 packbin = sum.digest()
802 fdatasync(f.fileno())
806 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
807 nameprefix = os.path.join(self.repo_dir,
808 'objects/pack/pack-' + obj_list_sha)
809 if os.path.exists(self.filename + '.map'):
810 os.unlink(self.filename + '.map')
811 os.rename(self.filename + '.pack', nameprefix + '.pack')
812 os.rename(self.filename + '.idx', nameprefix + '.idx')
814 os.fsync(self.parentfd)
816 os.close(self.parentfd)
819 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
821 if self.on_pack_finish:
822 self.on_pack_finish(nameprefix)
826 def close(self, run_midx=True):
827 """Close the pack file and move it to its definitive path."""
828 return self._end(run_midx=run_midx)
830 def _write_pack_idx_v2(self, filename, idx, packbin):
833 for entry in section:
834 if entry[2] >= 2**31:
837 # Length: header + fan-out + shas-and-crcs + overflow-offsets
838 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
840 idx_f = open(filename, 'w+b')
842 idx_f.truncate(index_len)
843 fdatasync(idx_f.fileno())
844 idx_map = mmap_readwrite(idx_f, close=False)
846 count = _helpers.write_idx(filename, idx_map, idx, self.count)
847 assert(count == self.count)
854 idx_f = open(filename, 'a+b')
859 b = idx_f.read(8 + 4*256)
862 obj_list_sum = Sha1()
863 for b in chunkyreader(idx_f, 20*self.count):
865 obj_list_sum.update(b)
866 namebase = obj_list_sum.hexdigest()
868 for b in chunkyreader(idx_f):
870 idx_f.write(idx_sum.digest())
871 fdatasync(idx_f.fileno())
877 def _gitenv(repo_dir = None):
881 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
885 def list_refs(patterns=None, repo_dir=None,
886 limit_to_heads=False, limit_to_tags=False):
887 """Yield (refname, hash) tuples for all repository refs unless
888 patterns are specified. In that case, only include tuples for
889 refs matching those patterns (cf. git-show-ref(1)). The limits
890 restrict the result items to refs/heads or refs/tags. If both
891 limits are specified, items from both sources will be included.
894 argv = ['git', 'show-ref']
896 argv.append('--heads')
898 argv.append('--tags')
901 argv.extend(patterns)
902 p = subprocess.Popen(argv,
903 preexec_fn = _gitenv(repo_dir),
904 stdout = subprocess.PIPE)
905 out = p.stdout.read().strip()
906 rv = p.wait() # not fatal
910 for d in out.split('\n'):
911 (sha, name) = d.split(' ', 1)
912 yield (name, sha.decode('hex'))
915 def read_ref(refname, repo_dir = None):
916 """Get the commit id of the most recent commit made on a given ref."""
917 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
918 l = tuple(islice(refs, 2))
926 def rev_list_invocation(ref_or_refs, count=None, format=None):
927 if isinstance(ref_or_refs, compat.str_type):
928 refs = (ref_or_refs,)
931 argv = ['git', 'rev-list']
932 if isinstance(count, Integral):
933 argv.extend(['-n', str(count)])
935 raise ValueError('unexpected count argument %r' % count)
938 argv.append('--pretty=format:' + format)
940 assert not ref.startswith('-')
946 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
947 """Yield information about commits as per "git rev-list". If a format
948 is not provided, yield one hex hash at a time. If a format is
949 provided, pass it to rev-list and call parse(git_stdout) for each
950 commit with the stream positioned just after the rev-list "commit
951 HASH" header line. When a format is provided yield (oidx,
952 parse(git_stdout)) for each commit.
955 assert bool(parse) == bool(format)
956 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
958 preexec_fn = _gitenv(repo_dir),
959 stdout = subprocess.PIPE)
961 for line in p.stdout:
964 line = p.stdout.readline()
967 if not s.startswith('commit '):
968 raise Exception('unexpected line ' + s)
969 yield s[7:], parse(p.stdout)
970 line = p.stdout.readline()
972 rv = p.wait() # not fatal
974 raise GitError, 'git rev-list returned error %d' % rv
977 def get_commit_dates(refs, repo_dir=None):
978 """Get the dates for the specified commit refs. For now, every unique
979 string in refs must resolve to a different commit or this
980 function will fail."""
983 commit = get_commit_items(ref, cp(repo_dir))
984 result.append(commit.author_sec)
988 def rev_parse(committish, repo_dir=None):
989 """Resolve the full hash for 'committish', if it exists.
991 Should be roughly equivalent to 'git rev-parse'.
993 Returns the hex value of the hash if it is found, None if 'committish' does
994 not correspond to anything.
996 head = read_ref(committish, repo_dir=repo_dir)
998 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1001 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1003 if len(committish) == 40:
1005 hash = committish.decode('hex')
1015 def update_ref(refname, newval, oldval, repo_dir=None):
1016 """Update a repository reference."""
1019 assert(refname.startswith('refs/heads/') \
1020 or refname.startswith('refs/tags/'))
1021 p = subprocess.Popen(['git', 'update-ref', refname,
1022 newval.encode('hex'), oldval.encode('hex')],
1023 preexec_fn = _gitenv(repo_dir))
1024 _git_wait('git update-ref', p)
1027 def delete_ref(refname, oldvalue=None):
1028 """Delete a repository reference (see git update-ref(1))."""
1029 assert(refname.startswith('refs/'))
1030 oldvalue = [] if not oldvalue else [oldvalue]
1031 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1032 preexec_fn = _gitenv())
1033 _git_wait('git update-ref', p)
1036 def guess_repo(path=None):
1037 """Set the path value in the global variable "repodir".
1038 This makes bup look for an existing bup repository, but not fail if a
1039 repository doesn't exist. Usually, if you are interacting with a bup
1040 repository, you would not be calling this function but using
1041 check_repo_or_die().
1047 repodir = os.environ.get('BUP_DIR')
1049 repodir = os.path.expanduser('~/.bup')
1052 def init_repo(path=None):
1053 """Create the Git bare repository for bup in a given path."""
1055 d = repo() # appends a / to the path
1056 parent = os.path.dirname(os.path.dirname(d))
1057 if parent and not os.path.exists(parent):
1058 raise GitError('parent directory "%s" does not exist\n' % parent)
1059 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1060 raise GitError('"%s" exists but is not a directory\n' % d)
1061 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1062 preexec_fn = _gitenv())
1063 _git_wait('git init', p)
1064 # Force the index version configuration in order to ensure bup works
1065 # regardless of the version of the installed Git binary.
1066 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1067 stdout=sys.stderr, preexec_fn = _gitenv())
1068 _git_wait('git config', p)
1070 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1071 stdout=sys.stderr, preexec_fn = _gitenv())
1072 _git_wait('git config', p)
1075 def check_repo_or_die(path=None):
1076 """Check to see if a bup repository probably exists, and abort if not."""
1079 pst = stat_if_exists(top + '/objects/pack')
1080 if pst and stat.S_ISDIR(pst.st_mode):
1083 top_st = stat_if_exists(top)
1085 log('error: repository %r does not exist (see "bup help init")\n'
1088 log('error: %r is not a repository\n' % top)
1094 """Get Git's version and ensure a usable version is installed.
1096 The returned version is formatted as an ordered tuple with each position
1097 representing a digit in the version tag. For example, the following tuple
1098 would represent version 1.6.6.9:
1100 ('1', '6', '6', '9')
1104 p = subprocess.Popen(['git', '--version'],
1105 stdout=subprocess.PIPE)
1106 gvs = p.stdout.read()
1107 _git_wait('git --version', p)
1108 m = re.match(r'git version (\S+.\S+)', gvs)
1110 raise GitError('git --version weird output: %r' % gvs)
1111 _ver = tuple(m.group(1).split('.'))
1112 needed = ('1','5', '3', '1')
1114 raise GitError('git version %s or higher is required; you have %s'
1115 % ('.'.join(needed), '.'.join(_ver)))
1119 class _AbortableIter:
1120 def __init__(self, it, onabort = None):
1122 self.onabort = onabort
1130 return next(self.it)
1131 except StopIteration as e:
1139 """Abort iteration and call the abortion callback, if needed."""
1151 """Link to 'git cat-file' that is used to retrieve blob data."""
1152 def __init__(self, repo_dir = None):
1154 self.repo_dir = repo_dir
1155 wanted = ('1','5','6')
1157 log('error: git version must be at least 1.5.6\n')
1159 self.p = self.inprogress = None
1163 self.p.stdout.close()
1164 self.p.stdin.close()
1166 self.inprogress = None
1170 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1171 stdin=subprocess.PIPE,
1172 stdout=subprocess.PIPE,
1175 preexec_fn = _gitenv(self.repo_dir))
1178 """Yield (oidx, type, size), followed by the data referred to by ref.
1179 If ref does not exist, only yield (None, None, None).
1182 if not self.p or self.p.poll() != None:
1185 poll_result = self.p.poll()
1186 assert(poll_result == None)
1188 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1189 assert(not self.inprogress)
1190 assert(ref.find('\n') < 0)
1191 assert(ref.find('\r') < 0)
1192 assert(not ref.startswith('-'))
1193 self.inprogress = ref
1194 self.p.stdin.write('%s\n' % ref)
1195 self.p.stdin.flush()
1196 hdr = self.p.stdout.readline()
1197 if hdr.endswith(' missing\n'):
1198 self.inprogress = None
1199 yield None, None, None
1201 info = hdr.split(' ')
1202 if len(info) != 3 or len(info[0]) != 40:
1203 raise GitError('expected object (id, type, size), got %r' % spl)
1204 oidx, typ, size = info
1206 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1207 onabort=self._abort)
1209 yield oidx, typ, size
1212 readline_result = self.p.stdout.readline()
1213 assert(readline_result == '\n')
1214 self.inprogress = None
1215 except Exception as e:
1219 def _join(self, it):
1220 _, typ, _ = next(it)
1225 treefile = ''.join(it)
1226 for (mode, name, sha) in tree_decode(treefile):
1227 for blob in self.join(sha.encode('hex')):
1229 elif typ == 'commit':
1230 treeline = ''.join(it).split('\n')[0]
1231 assert(treeline.startswith('tree '))
1232 for blob in self.join(treeline[5:]):
1235 raise GitError('invalid object type %r: expected blob/tree/commit'
1239 """Generate a list of the content of all blobs that can be reached
1240 from an object. The hash given in 'id' must point to a blob, a tree
1241 or a commit. The content of all blobs that can be seen from trees or
1242 commits will be added to the list.
1245 for d in self._join(self.get(id)):
1247 except StopIteration:
1253 def cp(repo_dir=None):
1254 """Create a CatPipe object or reuse the already existing one."""
1257 repo_dir = repodir or repo()
1258 repo_dir = os.path.abspath(repo_dir)
1259 cp = _cp.get(repo_dir)
1261 cp = CatPipe(repo_dir)
1266 def tags(repo_dir = None):
1267 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1269 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1270 assert(n.startswith('refs/tags/'))
1274 tags[c].append(name) # more than one tag can point at 'c'
1278 class MissingObject(KeyError):
1279 def __init__(self, oid):
1281 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1284 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1285 'path', 'chunk_path', 'data'])
1286 # The path is the mangled path, and if an item represents a fragment
1287 # of a chunked file, the chunk_path will be the chunked subtree path
1288 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1289 # chunked file will have a chunk_path of ['']. So some chunk subtree
1290 # of the file '/foo/bar/baz' might look like this:
1292 # item.path = ['foo', 'bar', 'baz.bup']
1293 # item.chunk_path = ['', '2d3115e', '016b097']
1294 # item.type = 'tree'
1298 def walk_object(cat_pipe, oidx,
1301 """Yield everything reachable from oidx via cat_pipe as a WalkItem,
1302 stopping whenever stop_at(oidx) returns true. Throw MissingObject
1303 if a hash encountered is missing from the repository, and don't
1304 read or return blob content in the data field unless include_data
1307 # Maintain the pending stack on the heap to avoid stack overflow
1308 pending = [(oidx, [], [], None)]
1310 oidx, parent_path, chunk_path, mode = pending.pop()
1311 oid = oidx.decode('hex')
1312 if stop_at and stop_at(oidx):
1315 if (not include_data) and mode and stat.S_ISREG(mode):
1316 # If the object is a "regular file", then it's a leaf in
1317 # the graph, so we can skip reading the data if the caller
1318 # hasn't requested it.
1319 yield WalkItem(oid=oid, type='blob',
1320 chunk_path=chunk_path, path=parent_path,
1325 item_it = cat_pipe.get(oidx)
1326 get_oidx, typ, _ = next(item_it)
1328 raise MissingObject(oidx.decode('hex'))
1329 if typ not in ('blob', 'commit', 'tree'):
1330 raise Exception('unexpected repository object type %r' % typ)
1332 # FIXME: set the mode based on the type when the mode is None
1333 if typ == 'blob' and not include_data:
1334 # Dump data until we can ask cat_pipe not to fetch it
1335 for ignored in item_it:
1339 data = ''.join(item_it)
1341 yield WalkItem(oid=oid, type=typ,
1342 chunk_path=chunk_path, path=parent_path,
1344 data=(data if include_data else None))
1347 commit_items = parse_commit(data)
1348 for pid in commit_items.parents:
1349 pending.append((pid, parent_path, chunk_path, mode))
1350 pending.append((commit_items.tree, parent_path, chunk_path,
1351 hashsplit.GIT_MODE_TREE))
1353 for mode, name, ent_id in tree_decode(data):
1354 demangled, bup_type = demangle_name(name, mode)
1356 sub_path = parent_path
1357 sub_chunk_path = chunk_path + [name]
1359 sub_path = parent_path + [name]
1360 if bup_type == BUP_CHUNKED:
1361 sub_chunk_path = ['']
1363 sub_chunk_path = chunk_path
1364 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,