1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
6 from collections import namedtuple
8 from bup.helpers import *
9 from bup import _helpers, path, midx, bloom, xstat
11 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
12 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
18 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
19 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
25 class GitError(Exception):
29 def parse_tz_offset(s):
30 """UTC offset in seconds."""
31 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
37 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
38 # Make sure that's authoritative.
39 _start_end_char = r'[^ .,:;<>"\'\0\n]'
40 _content_char = r'[^\0\n<>]'
41 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
43 _start_end_char, _content_char, _start_end_char)
44 _tz_rx = r'[-+]\d\d[0-5]\d'
45 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
46 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
47 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
48 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
50 (?P<message>(?:.|\n)*)''' % (_parent_rx,
51 _safe_str_rx, _safe_str_rx, _tz_rx,
52 _safe_str_rx, _safe_str_rx, _tz_rx))
53 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
56 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
57 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
58 'author_name', 'author_mail',
59 'author_sec', 'author_offset',
60 'committer_name', 'committer_mail',
61 'committer_sec', 'committer_offset',
64 def parse_commit(content):
65 commit_match = re.match(_commit_rx, content)
67 raise Exception('cannot parse commit %r' % content)
68 matches = commit_match.groupdict()
69 return CommitInfo(tree=matches['tree'],
70 parents=re.findall(_parent_hash_rx, matches['parents']),
71 author_name=matches['author_name'],
72 author_mail=matches['author_mail'],
73 author_sec=int(matches['asec']),
74 author_offset=parse_tz_offset(matches['atz']),
75 committer_name=matches['committer_name'],
76 committer_mail=matches['committer_mail'],
77 committer_sec=int(matches['csec']),
78 committer_offset=parse_tz_offset(matches['ctz']),
79 message=matches['message'])
82 def get_commit_items(id, cp):
83 commit_it = cp.get(id)
84 assert(commit_it.next() == 'commit')
85 commit_content = ''.join(commit_it)
86 return parse_commit(commit_content)
90 """Get the path to the git repository or one of its subdirectories."""
93 raise GitError('You should call check_repo_or_die()')
95 # If there's a .git subdirectory, then the actual repo is in there.
96 gd = os.path.join(repodir, '.git')
97 if os.path.exists(gd):
100 return os.path.join(repodir, sub)
104 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
109 full = os.path.abspath(path)
110 fullrepo = os.path.abspath(repo(''))
111 if not fullrepo.endswith('/'):
113 if full.startswith(fullrepo):
114 path = full[len(fullrepo):]
115 if path.startswith('index-cache/'):
116 path = path[len('index-cache/'):]
117 return shorten_hash(path)
121 paths = [repo('objects/pack')]
122 paths += glob.glob(repo('index-cache/*/.'))
126 def auto_midx(objdir):
127 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
129 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
131 # make sure 'args' gets printed to help with debugging
132 add_error('%r: exception: %s' % (args, e))
135 add_error('%r: returned %d' % (args, rv))
137 args = [path.exe(), 'bloom', '--dir', objdir]
139 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
141 # make sure 'args' gets printed to help with debugging
142 add_error('%r: exception: %s' % (args, e))
145 add_error('%r: returned %d' % (args, rv))
148 def mangle_name(name, mode, gitmode):
149 """Mangle a file name to present an abstract name for segmented files.
150 Mangled file names will have the ".bup" extension added to them. If a
151 file's name already ends with ".bup", a ".bupl" extension is added to
152 disambiguate normal files from segmented ones.
154 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
155 assert(stat.S_ISDIR(gitmode))
157 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
158 return name + '.bupl'
163 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
164 def demangle_name(name):
165 """Remove name mangling from a file name, if necessary.
167 The return value is a tuple (demangled_filename,mode), where mode is one of
170 * BUP_NORMAL : files that should be read as-is from the repository
171 * BUP_CHUNKED : files that were chunked and need to be reassembled
173 For more information on the name mangling algorithm, see mangle_name()
175 if name.endswith('.bupl'):
176 return (name[:-5], BUP_NORMAL)
177 elif name.endswith('.bup'):
178 return (name[:-4], BUP_CHUNKED)
180 return (name, BUP_NORMAL)
183 def calc_hash(type, content):
184 """Calculate some content's hash in the Git fashion."""
185 header = '%s %d\0' % (type, len(content))
191 def shalist_item_sort_key(ent):
192 (mode, name, id) = ent
193 assert(mode+0 == mode)
194 if stat.S_ISDIR(mode):
200 def tree_encode(shalist):
201 """Generate a git tree object from (mode,name,hash) tuples."""
202 shalist = sorted(shalist, key = shalist_item_sort_key)
204 for (mode,name,bin) in shalist:
206 assert(mode+0 == mode)
208 assert(len(bin) == 20)
209 s = '%o %s\0%s' % (mode,name,bin)
210 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
215 def tree_decode(buf):
216 """Generate a list of (mode,name,hash) from the git tree object in buf."""
218 while ofs < len(buf):
219 z = buf.find('\0', ofs)
221 spl = buf[ofs:z].split(' ', 1)
222 assert(len(spl) == 2)
224 sha = buf[z+1:z+1+20]
226 yield (int(mode, 8), name, sha)
229 def _encode_packobj(type, content, compression_level=1):
232 szbits = (sz & 0x0f) | (_typemap[type]<<4)
235 if sz: szbits |= 0x80
241 if compression_level > 9:
242 compression_level = 9
243 elif compression_level < 0:
244 compression_level = 0
245 z = zlib.compressobj(compression_level)
247 yield z.compress(content)
251 def _encode_looseobj(type, content, compression_level=1):
252 z = zlib.compressobj(compression_level)
253 yield z.compress('%s %d\0' % (type, len(content)))
254 yield z.compress(content)
258 def _decode_looseobj(buf):
260 s = zlib.decompress(buf)
267 assert(type in _typemap)
268 assert(sz == len(content))
269 return (type, content)
272 def _decode_packobj(buf):
275 type = _typermap[(c & 0x70) >> 4]
282 sz |= (c & 0x7f) << shift
286 return (type, zlib.decompress(buf[i+1:]))
293 def find_offset(self, hash):
294 """Get the offset of an object inside the index file."""
295 idx = self._idx_from_hash(hash)
297 return self._ofs_from_idx(idx)
300 def exists(self, hash, want_source=False):
301 """Return nonempty if the object exists in this index."""
302 if hash and (self._idx_from_hash(hash) != None):
303 return want_source and os.path.basename(self.name) or True
307 return int(self.fanout[255])
309 def _idx_from_hash(self, hash):
310 global _total_searches, _total_steps
312 assert(len(hash) == 20)
314 start = self.fanout[b1-1] # range -1..254
315 end = self.fanout[b1] # range 0..255
317 _total_steps += 1 # lookup table is a step
320 mid = start + (end-start)/2
321 v = self._idx_to_hash(mid)
331 class PackIdxV1(PackIdx):
332 """Object representation of a Git pack index (version 1) file."""
333 def __init__(self, filename, f):
335 self.idxnames = [self.name]
336 self.map = mmap_read(f)
337 self.fanout = list(struct.unpack('!256I',
338 str(buffer(self.map, 0, 256*4))))
339 self.fanout.append(0) # entry "-1"
340 nsha = self.fanout[255]
342 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
344 def _ofs_from_idx(self, idx):
345 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
347 def _idx_to_hash(self, idx):
348 return str(self.shatable[idx*24+4 : idx*24+24])
351 for i in xrange(self.fanout[255]):
352 yield buffer(self.map, 256*4 + 24*i + 4, 20)
355 class PackIdxV2(PackIdx):
356 """Object representation of a Git pack index (version 2) file."""
357 def __init__(self, filename, f):
359 self.idxnames = [self.name]
360 self.map = mmap_read(f)
361 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
362 self.fanout = list(struct.unpack('!256I',
363 str(buffer(self.map, 8, 256*4))))
364 self.fanout.append(0) # entry "-1"
365 nsha = self.fanout[255]
366 self.sha_ofs = 8 + 256*4
367 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
368 self.ofstable = buffer(self.map,
369 self.sha_ofs + nsha*20 + nsha*4,
371 self.ofs64table = buffer(self.map,
372 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
374 def _ofs_from_idx(self, idx):
375 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
377 idx64 = ofs & 0x7fffffff
378 ofs = struct.unpack('!Q',
379 str(buffer(self.ofs64table, idx64*8, 8)))[0]
382 def _idx_to_hash(self, idx):
383 return str(self.shatable[idx*20:(idx+1)*20])
386 for i in xrange(self.fanout[255]):
387 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
392 def __init__(self, dir):
394 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
399 self.do_bloom = False
406 assert(_mpi_count == 0)
409 return iter(idxmerge(self.packs))
412 return sum(len(pack) for pack in self.packs)
414 def exists(self, hash, want_source=False):
415 """Return nonempty if the object exists in the index files."""
416 global _total_searches
418 if hash in self.also:
420 if self.do_bloom and self.bloom:
421 if self.bloom.exists(hash):
422 self.do_bloom = False
424 _total_searches -= 1 # was counted by bloom
426 for i in xrange(len(self.packs)):
428 _total_searches -= 1 # will be incremented by sub-pack
429 ix = p.exists(hash, want_source=want_source)
431 # reorder so most recently used packs are searched first
432 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
437 def refresh(self, skip_midx = False):
438 """Refresh the index list.
439 This method verifies if .midx files were superseded (e.g. all of its
440 contents are in another, bigger .midx file) and removes the superseded
443 If skip_midx is True, all work on .midx files will be skipped and .midx
444 files will be removed from the list.
446 The module-global variable 'ignore_midx' can force this function to
447 always act as if skip_midx was True.
449 self.bloom = None # Always reopen the bloom as it may have been relaced
450 self.do_bloom = False
451 skip_midx = skip_midx or ignore_midx
452 d = dict((p.name, p) for p in self.packs
453 if not skip_midx or not isinstance(p, midx.PackMidx))
454 if os.path.exists(self.dir):
457 for ix in self.packs:
458 if isinstance(ix, midx.PackMidx):
459 for name in ix.idxnames:
460 d[os.path.join(self.dir, name)] = ix
461 for full in glob.glob(os.path.join(self.dir,'*.midx')):
463 mx = midx.PackMidx(full)
464 (mxd, mxf) = os.path.split(mx.name)
466 for n in mx.idxnames:
467 if not os.path.exists(os.path.join(mxd, n)):
468 log(('warning: index %s missing\n' +
469 ' used by %s\n') % (n, mxf))
477 midxl.sort(key=lambda ix:
478 (-len(ix), -xstat.stat(ix.name).st_mtime))
481 for sub in ix.idxnames:
482 found = d.get(os.path.join(self.dir, sub))
483 if not found or isinstance(found, PackIdx):
484 # doesn't exist, or exists but not in a midx
489 for name in ix.idxnames:
490 d[os.path.join(self.dir, name)] = ix
491 elif not ix.force_keep:
492 debug1('midx: removing redundant: %s\n'
493 % os.path.basename(ix.name))
496 for full in glob.glob(os.path.join(self.dir,'*.idx')):
504 bfull = os.path.join(self.dir, 'bup.bloom')
505 if self.bloom is None and os.path.exists(bfull):
506 self.bloom = bloom.ShaBloom(bfull)
507 self.packs = list(set(d.values()))
508 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
509 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
513 debug1('PackIdxList: using %d index%s.\n'
514 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
517 """Insert an additional object in the list."""
521 def open_idx(filename):
522 if filename.endswith('.idx'):
523 f = open(filename, 'rb')
525 if header[0:4] == '\377tOc':
526 version = struct.unpack('!I', header[4:8])[0]
528 return PackIdxV2(filename, f)
530 raise GitError('%s: expected idx file version 2, got %d'
531 % (filename, version))
532 elif len(header) == 8 and header[0:4] < '\377tOc':
533 return PackIdxV1(filename, f)
535 raise GitError('%s: unrecognized idx file header' % filename)
536 elif filename.endswith('.midx'):
537 return midx.PackMidx(filename)
539 raise GitError('idx filenames must end with .idx or .midx')
542 def idxmerge(idxlist, final_progress=True):
543 """Generate a list of all the objects reachable in a PackIdxList."""
544 def pfunc(count, total):
545 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
546 % (count*100.0/total, count, total))
547 def pfinal(count, total):
549 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
550 % (100, total, total))
551 return merge_iter(idxlist, 10024, pfunc, pfinal)
554 def _make_objcache():
555 return PackIdxList(repo('objects/pack'))
558 """Writes Git objects inside a pack file."""
559 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
565 self.objcache_maker = objcache_maker
567 self.compression_level = compression_level
574 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
575 self.file = os.fdopen(fd, 'w+b')
576 assert(name.endswith('.pack'))
577 self.filename = name[:-5]
578 self.file.write('PACK\0\0\0\2\0\0\0\0')
579 self.idx = list(list() for i in xrange(256))
581 def _raw_write(self, datalist, sha):
584 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
585 # the file never has a *partial* blob. So let's make sure it's
586 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
587 # to our hashsplit algorithm.) f.write() does its own buffering,
588 # but that's okay because we'll flush it in _end().
589 oneblob = ''.join(datalist)
593 raise GitError, e, sys.exc_info()[2]
595 crc = zlib.crc32(oneblob) & 0xffffffff
596 self._update_idx(sha, crc, nw)
601 def _update_idx(self, sha, crc, size):
604 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
606 def _write(self, sha, type, content):
610 sha = calc_hash(type, content)
611 size, crc = self._raw_write(_encode_packobj(type, content,
612 self.compression_level),
614 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
618 def breakpoint(self):
619 """Clear byte and object counts and return the last processed id."""
621 self.outbytes = self.count = 0
624 def _require_objcache(self):
625 if self.objcache is None and self.objcache_maker:
626 self.objcache = self.objcache_maker()
627 if self.objcache is None:
629 "PackWriter not opened or can't check exists w/o objcache")
631 def exists(self, id, want_source=False):
632 """Return non-empty if an object is found in the object cache."""
633 self._require_objcache()
634 return self.objcache.exists(id, want_source=want_source)
636 def maybe_write(self, type, content):
637 """Write an object to the pack file if not present and return its id."""
638 sha = calc_hash(type, content)
639 if not self.exists(sha):
640 self._write(sha, type, content)
641 self._require_objcache()
642 self.objcache.add(sha)
645 def new_blob(self, blob):
646 """Create a blob object in the pack with the supplied content."""
647 return self.maybe_write('blob', blob)
649 def new_tree(self, shalist):
650 """Create a tree object in the pack."""
651 content = tree_encode(shalist)
652 return self.maybe_write('tree', content)
654 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
656 if tree: l.append('tree %s' % tree.encode('hex'))
657 if parent: l.append('parent %s' % parent.encode('hex'))
658 if author: l.append('author %s %s' % (author, _git_date(adate)))
659 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
662 return self.maybe_write('commit', '\n'.join(l))
664 def new_commit(self, parent, tree, date, msg):
665 """Create a commit object in the pack."""
666 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
667 commit = self._new_commit(tree, parent,
668 userline, date, userline, date,
673 """Remove the pack file from disk."""
679 os.unlink(self.filename + '.pack')
681 def _end(self, run_midx=True):
683 if not f: return None
689 # update object count
691 cp = struct.pack('!i', self.count)
695 # calculate the pack sha1sum
698 for b in chunkyreader(f):
700 packbin = sum.digest()
704 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
706 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
707 if os.path.exists(self.filename + '.map'):
708 os.unlink(self.filename + '.map')
709 os.rename(self.filename + '.pack', nameprefix + '.pack')
710 os.rename(self.filename + '.idx', nameprefix + '.idx')
713 auto_midx(repo('objects/pack'))
716 def close(self, run_midx=True):
717 """Close the pack file and move it to its definitive path."""
718 return self._end(run_midx=run_midx)
720 def _write_pack_idx_v2(self, filename, idx, packbin):
723 for entry in section:
724 if entry[2] >= 2**31:
727 # Length: header + fan-out + shas-and-crcs + overflow-offsets
728 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
730 idx_f = open(filename, 'w+b')
732 idx_f.truncate(index_len)
733 idx_map = mmap_readwrite(idx_f, close=False)
734 count = _helpers.write_idx(filename, idx_map, idx, self.count)
735 assert(count == self.count)
737 if idx_map: idx_map.close()
740 idx_f = open(filename, 'a+b')
745 b = idx_f.read(8 + 4*256)
748 obj_list_sum = Sha1()
749 for b in chunkyreader(idx_f, 20*self.count):
751 obj_list_sum.update(b)
752 namebase = obj_list_sum.hexdigest()
754 for b in chunkyreader(idx_f):
756 idx_f.write(idx_sum.digest())
763 return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
766 def _gitenv(repo_dir = None):
770 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
774 def list_refs(refname = None):
775 """Generate a list of tuples in the form (refname,hash).
776 If a ref name is specified, list only this particular ref.
778 argv = ['git', 'show-ref', '--']
781 p = subprocess.Popen(argv, preexec_fn = _gitenv(), stdout = subprocess.PIPE)
782 out = p.stdout.read().strip()
783 rv = p.wait() # not fatal
787 for d in out.split('\n'):
788 (sha, name) = d.split(' ', 1)
789 yield (name, sha.decode('hex'))
792 def read_ref(refname):
793 """Get the commit id of the most recent commit made on a given ref."""
794 l = list(list_refs(refname))
802 def rev_list(ref, count=None):
803 """Generate a list of reachable commits in reverse chronological order.
805 This generator walks through commits, from child to parent, that are
806 reachable via the specified ref and yields a series of tuples of the form
809 If count is a non-zero integer, limit the number of commits to "count"
812 assert(not ref.startswith('-'))
815 opts += ['-n', str(atoi(count))]
816 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
817 p = subprocess.Popen(argv, preexec_fn = _gitenv(), stdout = subprocess.PIPE)
821 if s.startswith('commit '):
822 commit = s[7:].decode('hex')
826 rv = p.wait() # not fatal
828 raise GitError, 'git rev-list returned error %d' % rv
831 def get_commit_dates(refs):
832 """Get the dates for the specified commit refs. For now, every unique
833 string in refs must resolve to a different commit or this
834 function will fail."""
837 commit = get_commit_items(ref, cp())
838 result.append(commit.author_sec)
842 def rev_parse(committish):
843 """Resolve the full hash for 'committish', if it exists.
845 Should be roughly equivalent to 'git rev-parse'.
847 Returns the hex value of the hash if it is found, None if 'committish' does
848 not correspond to anything.
850 head = read_ref(committish)
852 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
855 pL = PackIdxList(repo('objects/pack'))
857 if len(committish) == 40:
859 hash = committish.decode('hex')
869 def update_ref(refname, newval, oldval):
870 """Change the commit pointed to by a branch."""
873 assert(refname.startswith('refs/heads/'))
874 p = subprocess.Popen(['git', 'update-ref', refname,
875 newval.encode('hex'), oldval.encode('hex')],
876 preexec_fn = _gitenv())
877 _git_wait('git update-ref', p)
880 def guess_repo(path=None):
881 """Set the path value in the global variable "repodir".
882 This makes bup look for an existing bup repository, but not fail if a
883 repository doesn't exist. Usually, if you are interacting with a bup
884 repository, you would not be calling this function but using
891 repodir = os.environ.get('BUP_DIR')
893 repodir = os.path.expanduser('~/.bup')
896 def init_repo(path=None):
897 """Create the Git bare repository for bup in a given path."""
899 d = repo() # appends a / to the path
900 parent = os.path.dirname(os.path.dirname(d))
901 if parent and not os.path.exists(parent):
902 raise GitError('parent directory "%s" does not exist\n' % parent)
903 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
904 raise GitError('"%s" exists but is not a directory\n' % d)
905 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
906 preexec_fn = _gitenv())
907 _git_wait('git init', p)
908 # Force the index version configuration in order to ensure bup works
909 # regardless of the version of the installed Git binary.
910 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
911 stdout=sys.stderr, preexec_fn = _gitenv())
912 _git_wait('git config', p)
914 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
915 stdout=sys.stderr, preexec_fn = _gitenv())
916 _git_wait('git config', p)
919 def check_repo_or_die(path=None):
920 """Make sure a bup repository exists, and abort if not.
921 If the path to a particular repository was not specified, this function
922 initializes the default repository automatically.
926 os.stat(repo('objects/pack/.'))
928 if e.errno == errno.ENOENT:
929 log('error: %r is not a bup repository; run "bup init"\n'
933 log('error: %s\n' % e)
939 """Get Git's version and ensure a usable version is installed.
941 The returned version is formatted as an ordered tuple with each position
942 representing a digit in the version tag. For example, the following tuple
943 would represent version 1.6.6.9:
949 p = subprocess.Popen(['git', '--version'],
950 stdout=subprocess.PIPE)
951 gvs = p.stdout.read()
952 _git_wait('git --version', p)
953 m = re.match(r'git version (\S+.\S+)', gvs)
955 raise GitError('git --version weird output: %r' % gvs)
956 _ver = tuple(m.group(1).split('.'))
957 needed = ('1','5', '3', '1')
959 raise GitError('git version %s or higher is required; you have %s'
960 % ('.'.join(needed), '.'.join(_ver)))
964 def _git_wait(cmd, p):
967 raise GitError('%s returned %d' % (cmd, rv))
970 def _git_capture(argv):
971 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
973 _git_wait(repr(argv), p)
977 class _AbortableIter:
978 def __init__(self, it, onabort = None):
980 self.onabort = onabort
988 return self.it.next()
989 except StopIteration, e:
997 """Abort iteration and call the abortion callback, if needed."""
1009 """Link to 'git cat-file' that is used to retrieve blob data."""
1010 def __init__(self, repo_dir = None):
1012 self.repo_dir = repo_dir
1013 wanted = ('1','5','6')
1016 log('warning: git version < %s; bup will be slow.\n'
1019 self.get = self._slow_get
1021 self.p = self.inprogress = None
1022 self.get = self._fast_get
1026 self.p.stdout.close()
1027 self.p.stdin.close()
1029 self.inprogress = None
1033 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1034 stdin=subprocess.PIPE,
1035 stdout=subprocess.PIPE,
1038 preexec_fn = _gitenv(self.repo_dir))
1040 def _fast_get(self, id):
1041 if not self.p or self.p.poll() != None:
1044 poll_result = self.p.poll()
1045 assert(poll_result == None)
1047 log('_fast_get: opening %r while %r is open\n'
1048 % (id, self.inprogress))
1049 assert(not self.inprogress)
1050 assert(id.find('\n') < 0)
1051 assert(id.find('\r') < 0)
1052 assert(not id.startswith('-'))
1053 self.inprogress = id
1054 self.p.stdin.write('%s\n' % id)
1055 self.p.stdin.flush()
1056 hdr = self.p.stdout.readline()
1057 if hdr.endswith(' missing\n'):
1058 self.inprogress = None
1059 raise KeyError('blob %r is missing' % id)
1060 spl = hdr.split(' ')
1061 if len(spl) != 3 or len(spl[0]) != 40:
1062 raise GitError('expected blob, got %r' % spl)
1063 (hex, type, size) = spl
1065 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1066 onabort = self._abort)
1071 readline_result = self.p.stdout.readline()
1072 assert(readline_result == '\n')
1073 self.inprogress = None
1074 except Exception, e:
1078 def _slow_get(self, id):
1079 assert(id.find('\n') < 0)
1080 assert(id.find('\r') < 0)
1081 assert(id[0] != '-')
1082 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1085 p = subprocess.Popen(['git', 'cat-file', type, id],
1086 stdout=subprocess.PIPE,
1087 preexec_fn = _gitenv(self.repo_dir))
1088 for blob in chunkyreader(p.stdout):
1090 _git_wait('git cat-file', p)
1092 def _join(self, it):
1097 elif type == 'tree':
1098 treefile = ''.join(it)
1099 for (mode, name, sha) in tree_decode(treefile):
1100 for blob in self.join(sha.encode('hex')):
1102 elif type == 'commit':
1103 treeline = ''.join(it).split('\n')[0]
1104 assert(treeline.startswith('tree '))
1105 for blob in self.join(treeline[5:]):
1108 raise GitError('invalid object type %r: expected blob/tree/commit'
1112 """Generate a list of the content of all blobs that can be reached
1113 from an object. The hash given in 'id' must point to a blob, a tree
1114 or a commit. The content of all blobs that can be seen from trees or
1115 commits will be added to the list.
1118 for d in self._join(self.get(id)):
1120 except StopIteration:
1127 """Create a CatPipe object or reuse an already existing one."""
1130 cur_dir = os.path.realpath(repo())
1131 if cur_dir != cp_dir:
1138 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1140 for (n,c) in list_refs():
1141 if n.startswith('refs/tags/'):
1146 tags[c].append(name) # more than one tag can point at 'c'