1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, zlib, time, subprocess, struct, stat, re, tempfile, heapq
6 from bup.helpers import *
7 from bup import _helpers
13 home_repodir = os.path.expanduser('~/.bup')
16 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
23 class GitError(Exception):
28 """Get the path to the git repository or one of its subdirectories."""
31 raise GitError('You should call check_repo_or_die()')
33 # If there's a .git subdirectory, then the actual repo is in there.
34 gd = os.path.join(repodir, '.git')
35 if os.path.exists(gd):
38 return os.path.join(repodir, sub)
41 def auto_midx(objdir):
42 main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
43 args = [main_exe, 'midx', '--auto', '--dir', objdir]
44 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
46 add_error('%r: returned %d' % (args, rv))
49 def mangle_name(name, mode, gitmode):
50 """Mangle a file name to present an abstract name for segmented files.
51 Mangled file names will have the ".bup" extension added to them. If a
52 file's name already ends with ".bup", a ".bupl" extension is added to
53 disambiguate normal files from semgmented ones.
55 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
57 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
63 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
64 def demangle_name(name):
65 """Remove name mangling from a file name, if necessary.
67 The return value is a tuple (demangled_filename,mode), where mode is one of
70 * BUP_NORMAL : files that should be read as-is from the repository
71 * BUP_CHUNKED : files that were chunked and need to be assembled
73 For more information on the name mangling algorythm, see mangle_name()
75 if name.endswith('.bupl'):
76 return (name[:-5], BUP_NORMAL)
77 elif name.endswith('.bup'):
78 return (name[:-4], BUP_CHUNKED)
80 return (name, BUP_NORMAL)
83 def _encode_packobj(type, content):
86 szbits = (sz & 0x0f) | (_typemap[type]<<4)
95 z = zlib.compressobj(1)
97 yield z.compress(content)
101 def _encode_looseobj(type, content):
102 z = zlib.compressobj(1)
103 yield z.compress('%s %d\0' % (type, len(content)))
104 yield z.compress(content)
108 def _decode_looseobj(buf):
110 s = zlib.decompress(buf)
117 assert(type in _typemap)
118 assert(sz == len(content))
119 return (type, content)
122 def _decode_packobj(buf):
125 type = _typermap[(c & 0x70) >> 4]
132 sz |= (c & 0x7f) << shift
136 return (type, zlib.decompress(buf[i+1:]))
143 def find_offset(self, hash):
144 """Get the offset of an object inside the index file."""
145 idx = self._idx_from_hash(hash)
147 return self._ofs_from_idx(idx)
150 def exists(self, hash):
151 """Return nonempty if the object exists in this index."""
152 return hash and (self._idx_from_hash(hash) != None) and True or None
155 return int(self.fanout[255])
157 def _idx_from_hash(self, hash):
158 global _total_searches, _total_steps
160 assert(len(hash) == 20)
162 start = self.fanout[b1-1] # range -1..254
163 end = self.fanout[b1] # range 0..255
165 _total_steps += 1 # lookup table is a step
168 mid = start + (end-start)/2
169 v = self._idx_to_hash(mid)
179 class PackIdxV1(PackIdx):
180 """Object representation of a Git pack index (version 1) file."""
181 def __init__(self, filename, f):
183 self.idxnames = [self.name]
184 self.map = mmap_read(f)
185 self.fanout = list(struct.unpack('!256I',
186 str(buffer(self.map, 0, 256*4))))
187 self.fanout.append(0) # entry "-1"
188 nsha = self.fanout[255]
189 self.shatable = buffer(self.map, 256*4, nsha*24)
191 def _ofs_from_idx(self, idx):
192 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
194 def _idx_to_hash(self, idx):
195 return str(self.shatable[idx*24+4 : idx*24+24])
198 for i in xrange(self.fanout[255]):
199 yield buffer(self.map, 256*4 + 24*i + 4, 20)
202 class PackIdxV2(PackIdx):
203 """Object representation of a Git pack index (version 2) file."""
204 def __init__(self, filename, f):
206 self.idxnames = [self.name]
207 self.map = mmap_read(f)
208 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
209 self.fanout = list(struct.unpack('!256I',
210 str(buffer(self.map, 8, 256*4))))
211 self.fanout.append(0) # entry "-1"
212 nsha = self.fanout[255]
213 self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
214 self.ofstable = buffer(self.map,
215 8 + 256*4 + nsha*20 + nsha*4,
217 self.ofs64table = buffer(self.map,
218 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
220 def _ofs_from_idx(self, idx):
221 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
223 idx64 = ofs & 0x7fffffff
224 ofs = struct.unpack('!Q',
225 str(buffer(self.ofs64table, idx64*8, 8)))[0]
228 def _idx_to_hash(self, idx):
229 return str(self.shatable[idx*20:(idx+1)*20])
232 for i in xrange(self.fanout[255]):
233 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
236 extract_bits = _helpers.extract_bits
240 """Wrapper which contains data from multiple index files.
241 Multiple index (.midx) files constitute a wrapper around index (.idx) files
242 and make it possible for bup to expand Git's indexing capabilities to vast
245 def __init__(self, filename):
247 self.force_keep = False
248 assert(filename.endswith('.midx'))
249 self.map = mmap_read(open(filename))
250 if str(self.map[0:4]) != 'MIDX':
251 log('Warning: skipping: invalid MIDX header in %r\n' % filename)
252 self.force_keep = True
253 return self._init_failed()
254 ver = struct.unpack('!I', self.map[4:8])[0]
255 if ver < MIDX_VERSION:
256 log('Warning: ignoring old-style (v%d) midx %r\n'
258 self.force_keep = False # old stuff is boring
259 return self._init_failed()
260 if ver > MIDX_VERSION:
261 log('Warning: ignoring too-new (v%d) midx %r\n'
263 self.force_keep = True # new stuff is exciting
264 return self._init_failed()
266 self.bits = _helpers.firstword(self.map[8:12])
267 self.entries = 2**self.bits
268 self.fanout = buffer(self.map, 12, self.entries*4)
269 shaofs = 12 + self.entries*4
270 nsha = self._fanget(self.entries-1)
271 self.shalist = buffer(self.map, shaofs, nsha*20)
272 self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
274 def _init_failed(self):
277 self.fanout = buffer('\0\0\0\0')
278 self.shalist = buffer('\0'*20)
281 def _fanget(self, i):
283 s = self.fanout[start:start+4]
284 return _helpers.firstword(s)
287 return str(self.shalist[i*20:(i+1)*20])
289 def exists(self, hash):
290 """Return nonempty if the object exists in the index files."""
291 global _total_searches, _total_steps
294 el = extract_bits(want, self.bits)
296 start = self._fanget(el-1)
297 startv = el << (32-self.bits)
301 end = self._fanget(el)
302 endv = (el+1) << (32-self.bits)
303 _total_steps += 1 # lookup table is a step
304 hashv = _helpers.firstword(hash)
305 #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
308 #print '! %08x %08x %08x %d - %d' % (startv, hashv, endv, start, end)
309 mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
310 #print ' %08x %08x %08x %d %d %d' % (startv, hashv, endv, start, mid, end)
312 #print ' %08x' % self._num(v)
315 startv = _helpers.firstword(v)
318 endv = _helpers.firstword(v)
324 for i in xrange(self._fanget(self.entries-1)):
325 yield buffer(self.shalist, i*20, 20)
328 return int(self._fanget(self.entries-1))
333 def __init__(self, dir):
335 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
345 assert(_mpi_count == 0)
348 return iter(idxmerge(self.packs))
351 return sum(len(pack) for pack in self.packs)
353 def exists(self, hash):
354 """Return nonempty if the object exists in the index files."""
355 global _total_searches
357 if hash in self.also:
359 for i in range(len(self.packs)):
361 _total_searches -= 1 # will be incremented by sub-pack
363 # reorder so most recently used packs are searched first
364 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
368 def refresh(self, skip_midx = False):
369 """Refresh the index list.
370 This method verifies if .midx files were superseded (e.g. all of its
371 contents are in another, bigger .midx file) and removes the superseded
374 If skip_midx is True, all work on .midx files will be skipped and .midx
375 files will be removed from the list.
377 The module-global variable 'ignore_midx' can force this function to
378 always act as if skip_midx was True.
380 skip_midx = skip_midx or ignore_midx
381 d = dict((p.name, p) for p in self.packs
382 if not skip_midx or not isinstance(p, PackMidx))
383 if os.path.exists(self.dir):
386 for ix in self.packs:
387 if isinstance(ix, PackMidx):
388 for name in ix.idxnames:
389 d[os.path.join(self.dir, name)] = ix
390 for f in os.listdir(self.dir):
391 full = os.path.join(self.dir, f)
392 if f.endswith('.midx') and not d.get(full):
394 (mxd, mxf) = os.path.split(mx.name)
396 for n in mx.idxnames:
397 if not os.path.exists(os.path.join(mxd, n)):
398 log(('warning: index %s missing\n' +
399 ' used by %s\n') % (n, mxf))
406 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
409 for sub in ix.idxnames:
410 found = d.get(os.path.join(self.dir, sub))
411 if not found or isinstance(found, PackIdx):
412 # doesn't exist, or exists but not in a midx
414 for name in ix.idxnames:
415 d[os.path.join(self.dir, name)] = ix
418 if not any and not ix.force_keep:
419 debug1('midx: removing redundant: %s\n'
420 % os.path.basename(ix.name))
422 for f in os.listdir(self.dir):
423 full = os.path.join(self.dir, f)
424 if f.endswith('.idx') and not d.get(full):
431 self.packs = list(set(d.values()))
432 debug1('PackIdxList: using %d index%s.\n'
433 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
435 def packname_containing(self, hash):
436 # figure out which pack contains a given hash.
437 # FIXME: if the midx file format would just *store* this information,
438 # we could calculate it a lot more efficiently. But it's not needed
439 # often, so let's do it like this.
440 for f in os.listdir(self.dir):
441 if f.endswith('.idx'):
442 full = os.path.join(self.dir, f)
452 """Insert an additional object in the list."""
456 """Remove all additional objects from the list."""
460 def calc_hash(type, content):
461 """Calculate some content's hash in the Git fashion."""
462 header = '%s %d\0' % (type, len(content))
468 def _shalist_sort_key(ent):
469 (mode, name, id) = ent
470 if stat.S_ISDIR(int(mode, 8)):
476 def open_idx(filename):
477 if filename.endswith('.idx'):
478 f = open(filename, 'rb')
480 if header[0:4] == '\377tOc':
481 version = struct.unpack('!I', header[4:8])[0]
483 return PackIdxV2(filename, f)
485 raise GitError('%s: expected idx file version 2, got %d'
486 % (filename, version))
487 elif len(header) == 8 and header[0:4] < '\377tOc':
488 return PackIdxV1(filename, f)
490 raise GitError('%s: unrecognized idx file header' % filename)
491 elif filename.endswith('.midx'):
492 return PackMidx(filename)
494 raise GitError('idx filenames must end with .idx or .midx')
497 def idxmerge(idxlist, final_progress=True):
498 """Generate a list of all the objects reachable in a PackIdxList."""
499 total = sum(len(i) for i in idxlist)
500 iters = (iter(i) for i in idxlist)
501 heap = [(next(it), it) for it in iters]
506 if (count % 10024) == 0:
507 progress('Reading indexes: %.2f%% (%d/%d)\r'
508 % (count*100.0/total, count, total))
516 heapq.heapreplace(heap, (e, it))
520 log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
523 def _make_objcache():
524 return PackIdxList(repo('objects/pack'))
527 """Writes Git objects insid a pack file."""
528 def __init__(self, objcache_maker=_make_objcache):
534 self.objcache_maker = objcache_maker
542 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
543 self.file = os.fdopen(fd, 'w+b')
544 assert(name.endswith('.pack'))
545 self.filename = name[:-5]
546 self.file.write('PACK\0\0\0\2\0\0\0\0')
547 self.idx = list(list() for i in xrange(256))
549 # the 'sha' parameter is used in client.py's _raw_write(), but not needed
550 # in this basic version.
551 def _raw_write(self, datalist, sha):
554 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
555 # the file never has a *partial* blob. So let's make sure it's
556 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
557 # to our hashsplit algorithm.) f.write() does its own buffering,
558 # but that's okay because we'll flush it in _end().
559 oneblob = ''.join(datalist)
562 crc = zlib.crc32(oneblob) & 0xffffffff
563 self._update_idx(sha, crc, nw)
568 def _update_idx(self, sha, crc, size):
571 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
573 def _write(self, sha, type, content):
577 sha = calc_hash(type, content)
578 size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
581 def breakpoint(self):
582 """Clear byte and object counts and return the last processed id."""
584 self.outbytes = self.count = 0
587 def write(self, type, content):
588 """Write an object in this pack file."""
589 return self._write(calc_hash(type, content), type, content)
591 def _require_objcache(self):
592 if self.objcache is None and self.objcache_maker:
593 self.objcache = self.objcache_maker()
594 if self.objcache is None:
596 "PackWriter not opened or can't check exists w/o objcache")
598 def exists(self, id):
599 """Return non-empty if an object is found in the object cache."""
600 self._require_objcache()
601 return self.objcache.exists(id)
603 def maybe_write(self, type, content):
604 """Write an object to the pack file if not present and return its id."""
605 self._require_objcache()
606 sha = calc_hash(type, content)
607 if not self.exists(sha):
608 self._write(sha, type, content)
609 self.objcache.add(sha)
612 def new_blob(self, blob):
613 """Create a blob object in the pack with the supplied content."""
614 return self.maybe_write('blob', blob)
616 def new_tree(self, shalist):
617 """Create a tree object in the pack."""
618 shalist = sorted(shalist, key = _shalist_sort_key)
620 for (mode,name,bin) in shalist:
623 assert(mode[0] != '0')
625 assert(len(bin) == 20)
626 l.append('%s %s\0%s' % (mode,name,bin))
627 return self.maybe_write('tree', ''.join(l))
629 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
631 if tree: l.append('tree %s' % tree.encode('hex'))
632 if parent: l.append('parent %s' % parent.encode('hex'))
633 if author: l.append('author %s %s' % (author, _git_date(adate)))
634 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
637 return self.maybe_write('commit', '\n'.join(l))
639 def new_commit(self, parent, tree, date, msg):
640 """Create a commit object in the pack."""
641 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
642 commit = self._new_commit(tree, parent,
643 userline, date, userline, date,
648 """Remove the pack file from disk."""
654 os.unlink(self.filename + '.pack')
656 def _end(self, run_midx=True):
658 if not f: return None
664 # update object count
666 cp = struct.pack('!i', self.count)
670 # calculate the pack sha1sum
673 for b in chunkyreader(f):
675 packbin = sum.digest()
679 idx_f = open(self.filename + '.idx', 'wb')
680 obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
683 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
684 if os.path.exists(self.filename + '.map'):
685 os.unlink(self.filename + '.map')
686 os.rename(self.filename + '.pack', nameprefix + '.pack')
687 os.rename(self.filename + '.idx', nameprefix + '.idx')
690 auto_midx(repo('objects/pack'))
693 def close(self, run_midx=True):
694 """Close the pack file and move it to its definitive path."""
695 return self._end(run_midx=run_midx)
697 def _write_pack_idx_v2(self, file, idx, packbin):
704 write('\377tOc\0\0\0\2')
709 write(struct.pack('!i', n))
710 part.sort(key=lambda x: x[0])
712 obj_list_sum = Sha1()
716 obj_list_sum.update(entry[0])
719 write(struct.pack('!I', entry[1]))
723 if entry[2] & 0x80000000:
724 write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
725 ofs64_list.append(struct.pack('!Q', entry[2]))
727 write(struct.pack('!i', entry[2]))
728 for ofs64 in ofs64_list:
732 file.write(sum.digest())
733 return obj_list_sum.hexdigest()
737 return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
741 os.environ['GIT_DIR'] = os.path.abspath(repo())
744 def list_refs(refname = None):
745 """Generate a list of tuples in the form (refname,hash).
746 If a ref name is specified, list only this particular ref.
748 argv = ['git', 'show-ref', '--']
751 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
752 out = p.stdout.read().strip()
753 rv = p.wait() # not fatal
757 for d in out.split('\n'):
758 (sha, name) = d.split(' ', 1)
759 yield (name, sha.decode('hex'))
762 def read_ref(refname):
763 """Get the commit id of the most recent commit made on a given ref."""
764 l = list(list_refs(refname))
772 def rev_list(ref, count=None):
773 """Generate a list of reachable commits in reverse chronological order.
775 This generator walks through commits, from child to parent, that are
776 reachable via the specified ref and yields a series of tuples of the form
779 If count is a non-zero integer, limit the number of commits to "count"
782 assert(not ref.startswith('-'))
785 opts += ['-n', str(atoi(count))]
786 argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
787 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
791 if s.startswith('commit '):
792 commit = s[7:].decode('hex')
796 rv = p.wait() # not fatal
798 raise GitError, 'git rev-list returned error %d' % rv
801 def rev_get_date(ref):
802 """Get the date of the latest commit on the specified ref."""
803 for (date, commit) in rev_list(ref, count=1):
805 raise GitError, 'no such commit %r' % ref
808 def rev_parse(committish):
809 """Resolve the full hash for 'committish', if it exists.
811 Should be roughly equivalent to 'git rev-parse'.
813 Returns the hex value of the hash if it is found, None if 'committish' does
814 not correspond to anything.
816 head = read_ref(committish)
818 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
821 pL = PackIdxList(repo('objects/pack'))
823 if len(committish) == 40:
825 hash = committish.decode('hex')
835 def update_ref(refname, newval, oldval):
836 """Change the commit pointed to by a branch."""
839 assert(refname.startswith('refs/heads/'))
840 p = subprocess.Popen(['git', 'update-ref', refname,
841 newval.encode('hex'), oldval.encode('hex')],
842 preexec_fn = _gitenv)
843 _git_wait('git update-ref', p)
846 def guess_repo(path=None):
847 """Set the path value in the global variable "repodir".
848 This makes bup look for an existing bup repository, but not fail if a
849 repository doesn't exist. Usually, if you are interacting with a bup
850 repository, you would not be calling this function but using
857 repodir = os.environ.get('BUP_DIR')
859 repodir = os.path.expanduser('~/.bup')
862 def init_repo(path=None):
863 """Create the Git bare repository for bup in a given path."""
865 d = repo() # appends a / to the path
866 parent = os.path.dirname(os.path.dirname(d))
867 if parent and not os.path.exists(parent):
868 raise GitError('parent directory "%s" does not exist\n' % parent)
869 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
870 raise GitError('"%d" exists but is not a directory\n' % d)
871 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
872 preexec_fn = _gitenv)
873 _git_wait('git init', p)
874 # Force the index version configuration in order to ensure bup works
875 # regardless of the version of the installed Git binary.
876 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
877 stdout=sys.stderr, preexec_fn = _gitenv)
878 _git_wait('git config', p)
881 def check_repo_or_die(path=None):
882 """Make sure a bup repository exists, and abort if not.
883 If the path to a particular repository was not specified, this function
884 initializes the default repository automatically.
887 if not os.path.isdir(repo('objects/pack/.')):
888 if repodir == home_repodir:
891 log('error: %r is not a bup/git repository\n' % repo())
896 """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
898 while ofs < len(buf):
899 z = buf[ofs:].find('\0')
901 spl = buf[ofs:ofs+z].split(' ', 1)
902 assert(len(spl) == 2)
903 sha = buf[ofs+z+1:ofs+z+1+20]
905 yield (spl[0], spl[1], sha)
910 """Get Git's version and ensure a usable version is installed.
912 The returned version is formatted as an ordered tuple with each position
913 representing a digit in the version tag. For example, the following tuple
914 would represent version 1.6.6.9:
920 p = subprocess.Popen(['git', '--version'],
921 stdout=subprocess.PIPE)
922 gvs = p.stdout.read()
923 _git_wait('git --version', p)
924 m = re.match(r'git version (\S+.\S+)', gvs)
926 raise GitError('git --version weird output: %r' % gvs)
927 _ver = tuple(m.group(1).split('.'))
928 needed = ('1','5', '3', '1')
930 raise GitError('git version %s or higher is required; you have %s'
931 % ('.'.join(needed), '.'.join(_ver)))
935 def _git_wait(cmd, p):
938 raise GitError('%s returned %d' % (cmd, rv))
941 def _git_capture(argv):
942 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
944 _git_wait(repr(argv), p)
948 class _AbortableIter:
949 def __init__(self, it, onabort = None):
951 self.onabort = onabort
959 return self.it.next()
960 except StopIteration, e:
968 """Abort iteration and call the abortion callback, if needed."""
980 """Link to 'git cat-file' that is used to retrieve blob data."""
983 wanted = ('1','5','6')
986 log('warning: git version < %s; bup will be slow.\n'
989 self.get = self._slow_get
991 self.p = self.inprogress = None
992 self.get = self._fast_get
996 self.p.stdout.close()
999 self.inprogress = None
1003 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1004 stdin=subprocess.PIPE,
1005 stdout=subprocess.PIPE,
1008 preexec_fn = _gitenv)
1010 def _fast_get(self, id):
1011 if not self.p or self.p.poll() != None:
1014 assert(self.p.poll() == None)
1016 log('_fast_get: opening %r while %r is open'
1017 % (id, self.inprogress))
1018 assert(not self.inprogress)
1019 assert(id.find('\n') < 0)
1020 assert(id.find('\r') < 0)
1021 assert(not id.startswith('-'))
1022 self.inprogress = id
1023 self.p.stdin.write('%s\n' % id)
1024 self.p.stdin.flush()
1025 hdr = self.p.stdout.readline()
1026 if hdr.endswith(' missing\n'):
1027 self.inprogress = None
1028 raise KeyError('blob %r is missing' % id)
1029 spl = hdr.split(' ')
1030 if len(spl) != 3 or len(spl[0]) != 40:
1031 raise GitError('expected blob, got %r' % spl)
1032 (hex, type, size) = spl
1034 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1035 onabort = self._abort)
1040 assert(self.p.stdout.readline() == '\n')
1041 self.inprogress = None
1042 except Exception, e:
1046 def _slow_get(self, id):
1047 assert(id.find('\n') < 0)
1048 assert(id.find('\r') < 0)
1049 assert(id[0] != '-')
1050 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1053 p = subprocess.Popen(['git', 'cat-file', type, id],
1054 stdout=subprocess.PIPE,
1055 preexec_fn = _gitenv)
1056 for blob in chunkyreader(p.stdout):
1058 _git_wait('git cat-file', p)
1060 def _join(self, it):
1065 elif type == 'tree':
1066 treefile = ''.join(it)
1067 for (mode, name, sha) in treeparse(treefile):
1068 for blob in self.join(sha.encode('hex')):
1070 elif type == 'commit':
1071 treeline = ''.join(it).split('\n')[0]
1072 assert(treeline.startswith('tree '))
1073 for blob in self.join(treeline[5:]):
1076 raise GitError('invalid object type %r: expected blob/tree/commit'
1080 """Generate a list of the content of all blobs that can be reached
1081 from an object. The hash given in 'id' must point to a blob, a tree
1082 or a commit. The content of all blobs that can be seen from trees or
1083 commits will be added to the list.
1086 for d in self._join(self.get(id)):
1088 except StopIteration:
1092 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1094 for (n,c) in list_refs():
1095 if n.startswith('refs/tags/'):
1100 tags[c].append(name) # more than one tag can point at 'c'