1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
6 from collections import namedtuple
8 from bup.helpers import *
9 from bup import _helpers, path, midx, bloom, xstat
11 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
12 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
18 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
19 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
25 class GitError(Exception):
29 def parse_tz_offset(s):
30 """UTC offset in seconds."""
31 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
37 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
38 # Make sure that's authoritative.
39 _start_end_char = r'[^ .,:;<>"\'\0\n]'
40 _content_char = r'[^\0\n<>]'
41 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
43 _start_end_char, _content_char, _start_end_char)
44 _tz_rx = r'[-+]\d\d[0-5]\d'
45 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
46 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
47 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
48 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
50 (?P<message>(?:.|\n)*)''' % (_parent_rx,
51 _safe_str_rx, _safe_str_rx, _tz_rx,
52 _safe_str_rx, _safe_str_rx, _tz_rx))
53 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
56 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
57 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
58 'author_name', 'author_mail',
59 'author_sec', 'author_offset',
60 'committer_name', 'committer_mail',
61 'committer_sec', 'committer_offset',
64 def parse_commit(content):
65 commit_match = re.match(_commit_rx, content)
67 raise Exception('cannot parse commit %r' % content)
68 matches = commit_match.groupdict()
69 return CommitInfo(tree=matches['tree'],
70 parents=re.findall(_parent_hash_rx, matches['parents']),
71 author_name=matches['author_name'],
72 author_mail=matches['author_mail'],
73 author_sec=int(matches['asec']),
74 author_offset=parse_tz_offset(matches['atz']),
75 committer_name=matches['committer_name'],
76 committer_mail=matches['committer_mail'],
77 committer_sec=int(matches['csec']),
78 committer_offset=parse_tz_offset(matches['ctz']),
79 message=matches['message'])
82 def get_commit_items(id, cp):
83 commit_it = cp.get(id)
84 assert(commit_it.next() == 'commit')
85 commit_content = ''.join(commit_it)
86 return parse_commit(commit_content)
90 """Get the path to the git repository or one of its subdirectories."""
93 raise GitError('You should call check_repo_or_die()')
95 # If there's a .git subdirectory, then the actual repo is in there.
96 gd = os.path.join(repodir, '.git')
97 if os.path.exists(gd):
100 return os.path.join(repodir, sub)
104 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
109 full = os.path.abspath(path)
110 fullrepo = os.path.abspath(repo(''))
111 if not fullrepo.endswith('/'):
113 if full.startswith(fullrepo):
114 path = full[len(fullrepo):]
115 if path.startswith('index-cache/'):
116 path = path[len('index-cache/'):]
117 return shorten_hash(path)
121 paths = [repo('objects/pack')]
122 paths += glob.glob(repo('index-cache/*/.'))
126 def auto_midx(objdir):
127 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
129 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
131 # make sure 'args' gets printed to help with debugging
132 add_error('%r: exception: %s' % (args, e))
135 add_error('%r: returned %d' % (args, rv))
137 args = [path.exe(), 'bloom', '--dir', objdir]
139 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
141 # make sure 'args' gets printed to help with debugging
142 add_error('%r: exception: %s' % (args, e))
145 add_error('%r: returned %d' % (args, rv))
148 def mangle_name(name, mode, gitmode):
149 """Mangle a file name to present an abstract name for segmented files.
150 Mangled file names will have the ".bup" extension added to them. If a
151 file's name already ends with ".bup", a ".bupl" extension is added to
152 disambiguate normal files from semgmented ones.
154 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
156 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
157 return name + '.bupl'
162 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
163 def demangle_name(name):
164 """Remove name mangling from a file name, if necessary.
166 The return value is a tuple (demangled_filename,mode), where mode is one of
169 * BUP_NORMAL : files that should be read as-is from the repository
170 * BUP_CHUNKED : files that were chunked and need to be assembled
172 For more information on the name mangling algorythm, see mangle_name()
174 if name.endswith('.bupl'):
175 return (name[:-5], BUP_NORMAL)
176 elif name.endswith('.bup'):
177 return (name[:-4], BUP_CHUNKED)
179 return (name, BUP_NORMAL)
182 def calc_hash(type, content):
183 """Calculate some content's hash in the Git fashion."""
184 header = '%s %d\0' % (type, len(content))
190 def shalist_item_sort_key(ent):
191 (mode, name, id) = ent
192 assert(mode+0 == mode)
193 if stat.S_ISDIR(mode):
199 def tree_encode(shalist):
200 """Generate a git tree object from (mode,name,hash) tuples."""
201 shalist = sorted(shalist, key = shalist_item_sort_key)
203 for (mode,name,bin) in shalist:
205 assert(mode+0 == mode)
207 assert(len(bin) == 20)
208 s = '%o %s\0%s' % (mode,name,bin)
209 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
214 def tree_decode(buf):
215 """Generate a list of (mode,name,hash) from the git tree object in buf."""
217 while ofs < len(buf):
218 z = buf.find('\0', ofs)
220 spl = buf[ofs:z].split(' ', 1)
221 assert(len(spl) == 2)
223 sha = buf[z+1:z+1+20]
225 yield (int(mode, 8), name, sha)
228 def _encode_packobj(type, content, compression_level=1):
231 szbits = (sz & 0x0f) | (_typemap[type]<<4)
234 if sz: szbits |= 0x80
240 if compression_level > 9:
241 compression_level = 9
242 elif compression_level < 0:
243 compression_level = 0
244 z = zlib.compressobj(compression_level)
246 yield z.compress(content)
250 def _encode_looseobj(type, content, compression_level=1):
251 z = zlib.compressobj(compression_level)
252 yield z.compress('%s %d\0' % (type, len(content)))
253 yield z.compress(content)
257 def _decode_looseobj(buf):
259 s = zlib.decompress(buf)
266 assert(type in _typemap)
267 assert(sz == len(content))
268 return (type, content)
271 def _decode_packobj(buf):
274 type = _typermap[(c & 0x70) >> 4]
281 sz |= (c & 0x7f) << shift
285 return (type, zlib.decompress(buf[i+1:]))
292 def find_offset(self, hash):
293 """Get the offset of an object inside the index file."""
294 idx = self._idx_from_hash(hash)
296 return self._ofs_from_idx(idx)
299 def exists(self, hash, want_source=False):
300 """Return nonempty if the object exists in this index."""
301 if hash and (self._idx_from_hash(hash) != None):
302 return want_source and os.path.basename(self.name) or True
306 return int(self.fanout[255])
308 def _idx_from_hash(self, hash):
309 global _total_searches, _total_steps
311 assert(len(hash) == 20)
313 start = self.fanout[b1-1] # range -1..254
314 end = self.fanout[b1] # range 0..255
316 _total_steps += 1 # lookup table is a step
319 mid = start + (end-start)/2
320 v = self._idx_to_hash(mid)
330 class PackIdxV1(PackIdx):
331 """Object representation of a Git pack index (version 1) file."""
332 def __init__(self, filename, f):
334 self.idxnames = [self.name]
335 self.map = mmap_read(f)
336 self.fanout = list(struct.unpack('!256I',
337 str(buffer(self.map, 0, 256*4))))
338 self.fanout.append(0) # entry "-1"
339 nsha = self.fanout[255]
341 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
343 def _ofs_from_idx(self, idx):
344 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
346 def _idx_to_hash(self, idx):
347 return str(self.shatable[idx*24+4 : idx*24+24])
350 for i in xrange(self.fanout[255]):
351 yield buffer(self.map, 256*4 + 24*i + 4, 20)
354 class PackIdxV2(PackIdx):
355 """Object representation of a Git pack index (version 2) file."""
356 def __init__(self, filename, f):
358 self.idxnames = [self.name]
359 self.map = mmap_read(f)
360 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
361 self.fanout = list(struct.unpack('!256I',
362 str(buffer(self.map, 8, 256*4))))
363 self.fanout.append(0) # entry "-1"
364 nsha = self.fanout[255]
365 self.sha_ofs = 8 + 256*4
366 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
367 self.ofstable = buffer(self.map,
368 self.sha_ofs + nsha*20 + nsha*4,
370 self.ofs64table = buffer(self.map,
371 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
373 def _ofs_from_idx(self, idx):
374 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
376 idx64 = ofs & 0x7fffffff
377 ofs = struct.unpack('!Q',
378 str(buffer(self.ofs64table, idx64*8, 8)))[0]
381 def _idx_to_hash(self, idx):
382 return str(self.shatable[idx*20:(idx+1)*20])
385 for i in xrange(self.fanout[255]):
386 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
391 def __init__(self, dir):
393 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
398 self.do_bloom = False
405 assert(_mpi_count == 0)
408 return iter(idxmerge(self.packs))
411 return sum(len(pack) for pack in self.packs)
413 def exists(self, hash, want_source=False):
414 """Return nonempty if the object exists in the index files."""
415 global _total_searches
417 if hash in self.also:
419 if self.do_bloom and self.bloom:
420 if self.bloom.exists(hash):
421 self.do_bloom = False
423 _total_searches -= 1 # was counted by bloom
425 for i in xrange(len(self.packs)):
427 _total_searches -= 1 # will be incremented by sub-pack
428 ix = p.exists(hash, want_source=want_source)
430 # reorder so most recently used packs are searched first
431 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
436 def refresh(self, skip_midx = False):
437 """Refresh the index list.
438 This method verifies if .midx files were superseded (e.g. all of its
439 contents are in another, bigger .midx file) and removes the superseded
442 If skip_midx is True, all work on .midx files will be skipped and .midx
443 files will be removed from the list.
445 The module-global variable 'ignore_midx' can force this function to
446 always act as if skip_midx was True.
448 self.bloom = None # Always reopen the bloom as it may have been relaced
449 self.do_bloom = False
450 skip_midx = skip_midx or ignore_midx
451 d = dict((p.name, p) for p in self.packs
452 if not skip_midx or not isinstance(p, midx.PackMidx))
453 if os.path.exists(self.dir):
456 for ix in self.packs:
457 if isinstance(ix, midx.PackMidx):
458 for name in ix.idxnames:
459 d[os.path.join(self.dir, name)] = ix
460 for full in glob.glob(os.path.join(self.dir,'*.midx')):
462 mx = midx.PackMidx(full)
463 (mxd, mxf) = os.path.split(mx.name)
465 for n in mx.idxnames:
466 if not os.path.exists(os.path.join(mxd, n)):
467 log(('warning: index %s missing\n' +
468 ' used by %s\n') % (n, mxf))
476 midxl.sort(key=lambda ix:
477 (-len(ix), -xstat.stat(ix.name).st_mtime))
480 for sub in ix.idxnames:
481 found = d.get(os.path.join(self.dir, sub))
482 if not found or isinstance(found, PackIdx):
483 # doesn't exist, or exists but not in a midx
488 for name in ix.idxnames:
489 d[os.path.join(self.dir, name)] = ix
490 elif not ix.force_keep:
491 debug1('midx: removing redundant: %s\n'
492 % os.path.basename(ix.name))
495 for full in glob.glob(os.path.join(self.dir,'*.idx')):
503 bfull = os.path.join(self.dir, 'bup.bloom')
504 if self.bloom is None and os.path.exists(bfull):
505 self.bloom = bloom.ShaBloom(bfull)
506 self.packs = list(set(d.values()))
507 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
508 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
512 debug1('PackIdxList: using %d index%s.\n'
513 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
516 """Insert an additional object in the list."""
520 def open_idx(filename):
521 if filename.endswith('.idx'):
522 f = open(filename, 'rb')
524 if header[0:4] == '\377tOc':
525 version = struct.unpack('!I', header[4:8])[0]
527 return PackIdxV2(filename, f)
529 raise GitError('%s: expected idx file version 2, got %d'
530 % (filename, version))
531 elif len(header) == 8 and header[0:4] < '\377tOc':
532 return PackIdxV1(filename, f)
534 raise GitError('%s: unrecognized idx file header' % filename)
535 elif filename.endswith('.midx'):
536 return midx.PackMidx(filename)
538 raise GitError('idx filenames must end with .idx or .midx')
541 def idxmerge(idxlist, final_progress=True):
542 """Generate a list of all the objects reachable in a PackIdxList."""
543 def pfunc(count, total):
544 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
545 % (count*100.0/total, count, total))
546 def pfinal(count, total):
548 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
549 % (100, total, total))
550 return merge_iter(idxlist, 10024, pfunc, pfinal)
553 def _make_objcache():
554 return PackIdxList(repo('objects/pack'))
557 """Writes Git objects inside a pack file."""
558 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
564 self.objcache_maker = objcache_maker
566 self.compression_level = compression_level
573 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
574 self.file = os.fdopen(fd, 'w+b')
575 assert(name.endswith('.pack'))
576 self.filename = name[:-5]
577 self.file.write('PACK\0\0\0\2\0\0\0\0')
578 self.idx = list(list() for i in xrange(256))
580 def _raw_write(self, datalist, sha):
583 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
584 # the file never has a *partial* blob. So let's make sure it's
585 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
586 # to our hashsplit algorithm.) f.write() does its own buffering,
587 # but that's okay because we'll flush it in _end().
588 oneblob = ''.join(datalist)
592 raise GitError, e, sys.exc_info()[2]
594 crc = zlib.crc32(oneblob) & 0xffffffff
595 self._update_idx(sha, crc, nw)
600 def _update_idx(self, sha, crc, size):
603 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
605 def _write(self, sha, type, content):
609 sha = calc_hash(type, content)
610 size, crc = self._raw_write(_encode_packobj(type, content,
611 self.compression_level),
613 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
617 def breakpoint(self):
618 """Clear byte and object counts and return the last processed id."""
620 self.outbytes = self.count = 0
623 def _require_objcache(self):
624 if self.objcache is None and self.objcache_maker:
625 self.objcache = self.objcache_maker()
626 if self.objcache is None:
628 "PackWriter not opened or can't check exists w/o objcache")
630 def exists(self, id, want_source=False):
631 """Return non-empty if an object is found in the object cache."""
632 self._require_objcache()
633 return self.objcache.exists(id, want_source=want_source)
635 def maybe_write(self, type, content):
636 """Write an object to the pack file if not present and return its id."""
637 sha = calc_hash(type, content)
638 if not self.exists(sha):
639 self._write(sha, type, content)
640 self._require_objcache()
641 self.objcache.add(sha)
644 def new_blob(self, blob):
645 """Create a blob object in the pack with the supplied content."""
646 return self.maybe_write('blob', blob)
648 def new_tree(self, shalist):
649 """Create a tree object in the pack."""
650 content = tree_encode(shalist)
651 return self.maybe_write('tree', content)
653 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
655 if tree: l.append('tree %s' % tree.encode('hex'))
656 if parent: l.append('parent %s' % parent.encode('hex'))
657 if author: l.append('author %s %s' % (author, _git_date(adate)))
658 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
661 return self.maybe_write('commit', '\n'.join(l))
663 def new_commit(self, parent, tree, date, msg):
664 """Create a commit object in the pack."""
665 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
666 commit = self._new_commit(tree, parent,
667 userline, date, userline, date,
672 """Remove the pack file from disk."""
678 os.unlink(self.filename + '.pack')
680 def _end(self, run_midx=True):
682 if not f: return None
688 # update object count
690 cp = struct.pack('!i', self.count)
694 # calculate the pack sha1sum
697 for b in chunkyreader(f):
699 packbin = sum.digest()
703 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
705 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
706 if os.path.exists(self.filename + '.map'):
707 os.unlink(self.filename + '.map')
708 os.rename(self.filename + '.pack', nameprefix + '.pack')
709 os.rename(self.filename + '.idx', nameprefix + '.idx')
712 auto_midx(repo('objects/pack'))
715 def close(self, run_midx=True):
716 """Close the pack file and move it to its definitive path."""
717 return self._end(run_midx=run_midx)
719 def _write_pack_idx_v2(self, filename, idx, packbin):
722 for entry in section:
723 if entry[2] >= 2**31:
726 # Length: header + fan-out + shas-and-crcs + overflow-offsets
727 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
729 idx_f = open(filename, 'w+b')
731 idx_f.truncate(index_len)
732 idx_map = mmap_readwrite(idx_f, close=False)
733 count = _helpers.write_idx(filename, idx_map, idx, self.count)
734 assert(count == self.count)
736 if idx_map: idx_map.close()
739 idx_f = open(filename, 'a+b')
744 b = idx_f.read(8 + 4*256)
747 obj_list_sum = Sha1()
748 for b in chunkyreader(idx_f, 20*self.count):
750 obj_list_sum.update(b)
751 namebase = obj_list_sum.hexdigest()
753 for b in chunkyreader(idx_f):
755 idx_f.write(idx_sum.digest())
762 return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
766 os.environ['GIT_DIR'] = os.path.abspath(repo())
769 def list_refs(refname = None):
770 """Generate a list of tuples in the form (refname,hash).
771 If a ref name is specified, list only this particular ref.
773 argv = ['git', 'show-ref', '--']
776 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
777 out = p.stdout.read().strip()
778 rv = p.wait() # not fatal
782 for d in out.split('\n'):
783 (sha, name) = d.split(' ', 1)
784 yield (name, sha.decode('hex'))
787 def read_ref(refname):
788 """Get the commit id of the most recent commit made on a given ref."""
789 l = list(list_refs(refname))
797 def rev_list(ref, count=None):
798 """Generate a list of reachable commits in reverse chronological order.
800 This generator walks through commits, from child to parent, that are
801 reachable via the specified ref and yields a series of tuples of the form
804 If count is a non-zero integer, limit the number of commits to "count"
807 assert(not ref.startswith('-'))
810 opts += ['-n', str(atoi(count))]
811 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
812 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
816 if s.startswith('commit '):
817 commit = s[7:].decode('hex')
821 rv = p.wait() # not fatal
823 raise GitError, 'git rev-list returned error %d' % rv
826 def get_commit_dates(refs):
827 """Get the dates for the specified commit refs. For now, every unique
828 string in refs must resolve to a different commit or this
829 function will fail."""
832 commit = get_commit_items(ref, cp())
833 result.append(commit.author_sec)
837 def rev_parse(committish):
838 """Resolve the full hash for 'committish', if it exists.
840 Should be roughly equivalent to 'git rev-parse'.
842 Returns the hex value of the hash if it is found, None if 'committish' does
843 not correspond to anything.
845 head = read_ref(committish)
847 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
850 pL = PackIdxList(repo('objects/pack'))
852 if len(committish) == 40:
854 hash = committish.decode('hex')
864 def update_ref(refname, newval, oldval):
865 """Change the commit pointed to by a branch."""
868 assert(refname.startswith('refs/heads/'))
869 p = subprocess.Popen(['git', 'update-ref', refname,
870 newval.encode('hex'), oldval.encode('hex')],
871 preexec_fn = _gitenv)
872 _git_wait('git update-ref', p)
875 def guess_repo(path=None):
876 """Set the path value in the global variable "repodir".
877 This makes bup look for an existing bup repository, but not fail if a
878 repository doesn't exist. Usually, if you are interacting with a bup
879 repository, you would not be calling this function but using
886 repodir = os.environ.get('BUP_DIR')
888 repodir = os.path.expanduser('~/.bup')
891 def init_repo(path=None):
892 """Create the Git bare repository for bup in a given path."""
894 d = repo() # appends a / to the path
895 parent = os.path.dirname(os.path.dirname(d))
896 if parent and not os.path.exists(parent):
897 raise GitError('parent directory "%s" does not exist\n' % parent)
898 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
899 raise GitError('"%s" exists but is not a directory\n' % d)
900 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
901 preexec_fn = _gitenv)
902 _git_wait('git init', p)
903 # Force the index version configuration in order to ensure bup works
904 # regardless of the version of the installed Git binary.
905 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
906 stdout=sys.stderr, preexec_fn = _gitenv)
907 _git_wait('git config', p)
909 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
910 stdout=sys.stderr, preexec_fn = _gitenv)
911 _git_wait('git config', p)
914 def check_repo_or_die(path=None):
915 """Make sure a bup repository exists, and abort if not.
916 If the path to a particular repository was not specified, this function
917 initializes the default repository automatically.
921 os.stat(repo('objects/pack/.'))
923 if e.errno == errno.ENOENT:
924 log('error: %r is not a bup repository; run "bup init"\n'
928 log('error: %s\n' % e)
934 """Get Git's version and ensure a usable version is installed.
936 The returned version is formatted as an ordered tuple with each position
937 representing a digit in the version tag. For example, the following tuple
938 would represent version 1.6.6.9:
944 p = subprocess.Popen(['git', '--version'],
945 stdout=subprocess.PIPE)
946 gvs = p.stdout.read()
947 _git_wait('git --version', p)
948 m = re.match(r'git version (\S+.\S+)', gvs)
950 raise GitError('git --version weird output: %r' % gvs)
951 _ver = tuple(m.group(1).split('.'))
952 needed = ('1','5', '3', '1')
954 raise GitError('git version %s or higher is required; you have %s'
955 % ('.'.join(needed), '.'.join(_ver)))
959 def _git_wait(cmd, p):
962 raise GitError('%s returned %d' % (cmd, rv))
965 def _git_capture(argv):
966 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
968 _git_wait(repr(argv), p)
972 class _AbortableIter:
973 def __init__(self, it, onabort = None):
975 self.onabort = onabort
983 return self.it.next()
984 except StopIteration, e:
992 """Abort iteration and call the abortion callback, if needed."""
1004 """Link to 'git cat-file' that is used to retrieve blob data."""
1007 wanted = ('1','5','6')
1010 log('warning: git version < %s; bup will be slow.\n'
1013 self.get = self._slow_get
1015 self.p = self.inprogress = None
1016 self.get = self._fast_get
1020 self.p.stdout.close()
1021 self.p.stdin.close()
1023 self.inprogress = None
1027 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1028 stdin=subprocess.PIPE,
1029 stdout=subprocess.PIPE,
1032 preexec_fn = _gitenv)
1034 def _fast_get(self, id):
1035 if not self.p or self.p.poll() != None:
1038 poll_result = self.p.poll()
1039 assert(poll_result == None)
1041 log('_fast_get: opening %r while %r is open\n'
1042 % (id, self.inprogress))
1043 assert(not self.inprogress)
1044 assert(id.find('\n') < 0)
1045 assert(id.find('\r') < 0)
1046 assert(not id.startswith('-'))
1047 self.inprogress = id
1048 self.p.stdin.write('%s\n' % id)
1049 self.p.stdin.flush()
1050 hdr = self.p.stdout.readline()
1051 if hdr.endswith(' missing\n'):
1052 self.inprogress = None
1053 raise KeyError('blob %r is missing' % id)
1054 spl = hdr.split(' ')
1055 if len(spl) != 3 or len(spl[0]) != 40:
1056 raise GitError('expected blob, got %r' % spl)
1057 (hex, type, size) = spl
1059 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1060 onabort = self._abort)
1065 readline_result = self.p.stdout.readline()
1066 assert(readline_result == '\n')
1067 self.inprogress = None
1068 except Exception, e:
1072 def _slow_get(self, id):
1073 assert(id.find('\n') < 0)
1074 assert(id.find('\r') < 0)
1075 assert(id[0] != '-')
1076 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1079 p = subprocess.Popen(['git', 'cat-file', type, id],
1080 stdout=subprocess.PIPE,
1081 preexec_fn = _gitenv)
1082 for blob in chunkyreader(p.stdout):
1084 _git_wait('git cat-file', p)
1086 def _join(self, it):
1091 elif type == 'tree':
1092 treefile = ''.join(it)
1093 for (mode, name, sha) in tree_decode(treefile):
1094 for blob in self.join(sha.encode('hex')):
1096 elif type == 'commit':
1097 treeline = ''.join(it).split('\n')[0]
1098 assert(treeline.startswith('tree '))
1099 for blob in self.join(treeline[5:]):
1102 raise GitError('invalid object type %r: expected blob/tree/commit'
1106 """Generate a list of the content of all blobs that can be reached
1107 from an object. The hash given in 'id' must point to a blob, a tree
1108 or a commit. The content of all blobs that can be seen from trees or
1109 commits will be added to the list.
1112 for d in self._join(self.get(id)):
1114 except StopIteration:
1121 """Create a CatPipe object or reuse an already existing one."""
1124 cur_dir = os.path.realpath(repo())
1125 if cur_dir != cp_dir:
1132 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1134 for (n,c) in list_refs():
1135 if n.startswith('refs/tags/'):
1140 tags[c].append(name) # more than one tag can point at 'c'