1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, heapq
6 from bup.helpers import *
7 from bup import _helpers
13 home_repodir = os.path.expanduser('~/.bup')
16 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
23 class GitError(Exception):
28 """Get the path to the git repository or one of its subdirectories."""
31 raise GitError('You should call check_repo_or_die()')
33 # If there's a .git subdirectory, then the actual repo is in there.
34 gd = os.path.join(repodir, '.git')
35 if os.path.exists(gd):
38 return os.path.join(repodir, sub)
41 def auto_midx(objdir):
42 main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
43 args = [main_exe, 'midx', '--auto', '--dir', objdir]
45 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
47 # make sure 'args' gets printed to help with debugging
48 add_error('%r: exception: %s' % (args, e))
51 add_error('%r: returned %d' % (args, rv))
54 def mangle_name(name, mode, gitmode):
55 """Mangle a file name to present an abstract name for segmented files.
56 Mangled file names will have the ".bup" extension added to them. If a
57 file's name already ends with ".bup", a ".bupl" extension is added to
58 disambiguate normal files from semgmented ones.
60 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
62 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
68 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
69 def demangle_name(name):
70 """Remove name mangling from a file name, if necessary.
72 The return value is a tuple (demangled_filename,mode), where mode is one of
75 * BUP_NORMAL : files that should be read as-is from the repository
76 * BUP_CHUNKED : files that were chunked and need to be assembled
78 For more information on the name mangling algorythm, see mangle_name()
80 if name.endswith('.bupl'):
81 return (name[:-5], BUP_NORMAL)
82 elif name.endswith('.bup'):
83 return (name[:-4], BUP_CHUNKED)
85 return (name, BUP_NORMAL)
88 def _encode_packobj(type, content):
91 szbits = (sz & 0x0f) | (_typemap[type]<<4)
100 z = zlib.compressobj(1)
102 yield z.compress(content)
106 def _encode_looseobj(type, content):
107 z = zlib.compressobj(1)
108 yield z.compress('%s %d\0' % (type, len(content)))
109 yield z.compress(content)
113 def _decode_looseobj(buf):
115 s = zlib.decompress(buf)
122 assert(type in _typemap)
123 assert(sz == len(content))
124 return (type, content)
127 def _decode_packobj(buf):
130 type = _typermap[(c & 0x70) >> 4]
137 sz |= (c & 0x7f) << shift
141 return (type, zlib.decompress(buf[i+1:]))
148 def find_offset(self, hash):
149 """Get the offset of an object inside the index file."""
150 idx = self._idx_from_hash(hash)
152 return self._ofs_from_idx(idx)
155 def exists(self, hash):
156 """Return nonempty if the object exists in this index."""
157 return hash and (self._idx_from_hash(hash) != None) and True or None
160 return int(self.fanout[255])
162 def _idx_from_hash(self, hash):
163 global _total_searches, _total_steps
165 assert(len(hash) == 20)
167 start = self.fanout[b1-1] # range -1..254
168 end = self.fanout[b1] # range 0..255
170 _total_steps += 1 # lookup table is a step
173 mid = start + (end-start)/2
174 v = self._idx_to_hash(mid)
184 class PackIdxV1(PackIdx):
185 """Object representation of a Git pack index (version 1) file."""
186 def __init__(self, filename, f):
188 self.idxnames = [self.name]
189 self.map = mmap_read(f)
190 self.fanout = list(struct.unpack('!256I',
191 str(buffer(self.map, 0, 256*4))))
192 self.fanout.append(0) # entry "-1"
193 nsha = self.fanout[255]
194 self.shatable = buffer(self.map, 256*4, nsha*24)
196 def _ofs_from_idx(self, idx):
197 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
199 def _idx_to_hash(self, idx):
200 return str(self.shatable[idx*24+4 : idx*24+24])
203 for i in xrange(self.fanout[255]):
204 yield buffer(self.map, 256*4 + 24*i + 4, 20)
207 class PackIdxV2(PackIdx):
208 """Object representation of a Git pack index (version 2) file."""
209 def __init__(self, filename, f):
211 self.idxnames = [self.name]
212 self.map = mmap_read(f)
213 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
214 self.fanout = list(struct.unpack('!256I',
215 str(buffer(self.map, 8, 256*4))))
216 self.fanout.append(0) # entry "-1"
217 nsha = self.fanout[255]
218 self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
219 self.ofstable = buffer(self.map,
220 8 + 256*4 + nsha*20 + nsha*4,
222 self.ofs64table = buffer(self.map,
223 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
225 def _ofs_from_idx(self, idx):
226 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
228 idx64 = ofs & 0x7fffffff
229 ofs = struct.unpack('!Q',
230 str(buffer(self.ofs64table, idx64*8, 8)))[0]
233 def _idx_to_hash(self, idx):
234 return str(self.shatable[idx*20:(idx+1)*20])
237 for i in xrange(self.fanout[255]):
238 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
241 extract_bits = _helpers.extract_bits
245 """Wrapper which contains data from multiple index files.
246 Multiple index (.midx) files constitute a wrapper around index (.idx) files
247 and make it possible for bup to expand Git's indexing capabilities to vast
250 def __init__(self, filename):
252 self.force_keep = False
253 assert(filename.endswith('.midx'))
254 self.map = mmap_read(open(filename))
255 if str(self.map[0:4]) != 'MIDX':
256 log('Warning: skipping: invalid MIDX header in %r\n' % filename)
257 self.force_keep = True
258 return self._init_failed()
259 ver = struct.unpack('!I', self.map[4:8])[0]
260 if ver < MIDX_VERSION:
261 log('Warning: ignoring old-style (v%d) midx %r\n'
263 self.force_keep = False # old stuff is boring
264 return self._init_failed()
265 if ver > MIDX_VERSION:
266 log('Warning: ignoring too-new (v%d) midx %r\n'
268 self.force_keep = True # new stuff is exciting
269 return self._init_failed()
271 self.bits = _helpers.firstword(self.map[8:12])
272 self.entries = 2**self.bits
273 self.fanout = buffer(self.map, 12, self.entries*4)
274 shaofs = 12 + self.entries*4
275 nsha = self._fanget(self.entries-1)
276 self.shalist = buffer(self.map, shaofs, nsha*20)
277 self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
279 def _init_failed(self):
282 self.fanout = buffer('\0\0\0\0')
283 self.shalist = buffer('\0'*20)
286 def _fanget(self, i):
288 s = self.fanout[start:start+4]
289 return _helpers.firstword(s)
292 return str(self.shalist[i*20:(i+1)*20])
294 def exists(self, hash):
295 """Return nonempty if the object exists in the index files."""
296 global _total_searches, _total_steps
299 el = extract_bits(want, self.bits)
301 start = self._fanget(el-1)
302 startv = el << (32-self.bits)
306 end = self._fanget(el)
307 endv = (el+1) << (32-self.bits)
308 _total_steps += 1 # lookup table is a step
309 hashv = _helpers.firstword(hash)
310 #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
313 #print '! %08x %08x %08x %d - %d' % (startv, hashv, endv, start, end)
314 mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
315 #print ' %08x %08x %08x %d %d %d' % (startv, hashv, endv, start, mid, end)
317 #print ' %08x' % self._num(v)
320 startv = _helpers.firstword(v)
323 endv = _helpers.firstword(v)
329 for i in xrange(self._fanget(self.entries-1)):
330 yield buffer(self.shalist, i*20, 20)
333 return int(self._fanget(self.entries-1))
338 def __init__(self, dir):
340 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
350 assert(_mpi_count == 0)
353 return iter(idxmerge(self.packs))
356 return sum(len(pack) for pack in self.packs)
358 def exists(self, hash):
359 """Return nonempty if the object exists in the index files."""
360 global _total_searches
362 if hash in self.also:
364 for i in range(len(self.packs)):
366 _total_searches -= 1 # will be incremented by sub-pack
368 # reorder so most recently used packs are searched first
369 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
373 def refresh(self, skip_midx = False):
374 """Refresh the index list.
375 This method verifies if .midx files were superseded (e.g. all of its
376 contents are in another, bigger .midx file) and removes the superseded
379 If skip_midx is True, all work on .midx files will be skipped and .midx
380 files will be removed from the list.
382 The module-global variable 'ignore_midx' can force this function to
383 always act as if skip_midx was True.
385 skip_midx = skip_midx or ignore_midx
386 d = dict((p.name, p) for p in self.packs
387 if not skip_midx or not isinstance(p, PackMidx))
388 if os.path.exists(self.dir):
391 for ix in self.packs:
392 if isinstance(ix, PackMidx):
393 for name in ix.idxnames:
394 d[os.path.join(self.dir, name)] = ix
395 for f in os.listdir(self.dir):
396 full = os.path.join(self.dir, f)
397 if f.endswith('.midx') and not d.get(full):
399 (mxd, mxf) = os.path.split(mx.name)
401 for n in mx.idxnames:
402 if not os.path.exists(os.path.join(mxd, n)):
403 log(('warning: index %s missing\n' +
404 ' used by %s\n') % (n, mxf))
411 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
414 for sub in ix.idxnames:
415 found = d.get(os.path.join(self.dir, sub))
416 if not found or isinstance(found, PackIdx):
417 # doesn't exist, or exists but not in a midx
419 for name in ix.idxnames:
420 d[os.path.join(self.dir, name)] = ix
423 if not any and not ix.force_keep:
424 debug1('midx: removing redundant: %s\n'
425 % os.path.basename(ix.name))
427 for f in os.listdir(self.dir):
428 full = os.path.join(self.dir, f)
429 if f.endswith('.idx') and not d.get(full):
436 self.packs = list(set(d.values()))
437 debug1('PackIdxList: using %d index%s.\n'
438 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
440 def packname_containing(self, hash):
441 # figure out which pack contains a given hash.
442 # FIXME: if the midx file format would just *store* this information,
443 # we could calculate it a lot more efficiently. But it's not needed
444 # often, so let's do it like this.
445 for f in os.listdir(self.dir):
446 if f.endswith('.idx'):
447 full = os.path.join(self.dir, f)
457 """Insert an additional object in the list."""
461 """Remove all additional objects from the list."""
465 def calc_hash(type, content):
466 """Calculate some content's hash in the Git fashion."""
467 header = '%s %d\0' % (type, len(content))
473 def _shalist_sort_key(ent):
474 (mode, name, id) = ent
475 if stat.S_ISDIR(int(mode, 8)):
481 def open_idx(filename):
482 if filename.endswith('.idx'):
483 f = open(filename, 'rb')
485 if header[0:4] == '\377tOc':
486 version = struct.unpack('!I', header[4:8])[0]
488 return PackIdxV2(filename, f)
490 raise GitError('%s: expected idx file version 2, got %d'
491 % (filename, version))
492 elif len(header) == 8 and header[0:4] < '\377tOc':
493 return PackIdxV1(filename, f)
495 raise GitError('%s: unrecognized idx file header' % filename)
496 elif filename.endswith('.midx'):
497 return PackMidx(filename)
499 raise GitError('idx filenames must end with .idx or .midx')
502 def idxmerge(idxlist, final_progress=True):
503 """Generate a list of all the objects reachable in a PackIdxList."""
504 total = sum(len(i) for i in idxlist)
505 iters = (iter(i) for i in idxlist)
506 heap = [(next(it), it) for it in iters]
511 if (count % 10024) == 0:
512 progress('Reading indexes: %.2f%% (%d/%d)\r'
513 % (count*100.0/total, count, total))
521 heapq.heapreplace(heap, (e, it))
525 log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
528 def _make_objcache():
529 return PackIdxList(repo('objects/pack'))
532 """Writes Git objects insid a pack file."""
533 def __init__(self, objcache_maker=_make_objcache):
539 self.objcache_maker = objcache_maker
547 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
548 self.file = os.fdopen(fd, 'w+b')
549 assert(name.endswith('.pack'))
550 self.filename = name[:-5]
551 self.file.write('PACK\0\0\0\2\0\0\0\0')
552 self.idx = list(list() for i in xrange(256))
554 # the 'sha' parameter is used in client.py's _raw_write(), but not needed
555 # in this basic version.
556 def _raw_write(self, datalist, sha):
559 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
560 # the file never has a *partial* blob. So let's make sure it's
561 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
562 # to our hashsplit algorithm.) f.write() does its own buffering,
563 # but that's okay because we'll flush it in _end().
564 oneblob = ''.join(datalist)
568 raise GitError, e, sys.exc_info()[2]
570 crc = zlib.crc32(oneblob) & 0xffffffff
571 self._update_idx(sha, crc, nw)
576 def _update_idx(self, sha, crc, size):
579 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
581 def _write(self, sha, type, content):
585 sha = calc_hash(type, content)
586 size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
589 def breakpoint(self):
590 """Clear byte and object counts and return the last processed id."""
592 self.outbytes = self.count = 0
595 def write(self, type, content):
596 """Write an object in this pack file."""
597 return self._write(calc_hash(type, content), type, content)
599 def _require_objcache(self):
600 if self.objcache is None and self.objcache_maker:
601 self.objcache = self.objcache_maker()
602 if self.objcache is None:
604 "PackWriter not opened or can't check exists w/o objcache")
606 def exists(self, id):
607 """Return non-empty if an object is found in the object cache."""
608 self._require_objcache()
609 return self.objcache.exists(id)
611 def maybe_write(self, type, content):
612 """Write an object to the pack file if not present and return its id."""
613 self._require_objcache()
614 sha = calc_hash(type, content)
615 if not self.exists(sha):
616 self._write(sha, type, content)
617 self.objcache.add(sha)
620 def new_blob(self, blob):
621 """Create a blob object in the pack with the supplied content."""
622 return self.maybe_write('blob', blob)
624 def new_tree(self, shalist):
625 """Create a tree object in the pack."""
626 shalist = sorted(shalist, key = _shalist_sort_key)
628 for (mode,name,bin) in shalist:
631 assert(mode[0] != '0')
633 assert(len(bin) == 20)
634 l.append('%s %s\0%s' % (mode,name,bin))
635 return self.maybe_write('tree', ''.join(l))
637 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
639 if tree: l.append('tree %s' % tree.encode('hex'))
640 if parent: l.append('parent %s' % parent.encode('hex'))
641 if author: l.append('author %s %s' % (author, _git_date(adate)))
642 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
645 return self.maybe_write('commit', '\n'.join(l))
647 def new_commit(self, parent, tree, date, msg):
648 """Create a commit object in the pack."""
649 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
650 commit = self._new_commit(tree, parent,
651 userline, date, userline, date,
656 """Remove the pack file from disk."""
662 os.unlink(self.filename + '.pack')
664 def _end(self, run_midx=True):
666 if not f: return None
672 # update object count
674 cp = struct.pack('!i', self.count)
678 # calculate the pack sha1sum
681 for b in chunkyreader(f):
683 packbin = sum.digest()
687 idx_f = open(self.filename + '.idx', 'wb')
688 obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
691 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
692 if os.path.exists(self.filename + '.map'):
693 os.unlink(self.filename + '.map')
694 os.rename(self.filename + '.pack', nameprefix + '.pack')
695 os.rename(self.filename + '.idx', nameprefix + '.idx')
698 auto_midx(repo('objects/pack'))
701 def close(self, run_midx=True):
702 """Close the pack file and move it to its definitive path."""
703 return self._end(run_midx=run_midx)
705 def _write_pack_idx_v2(self, file, idx, packbin):
712 write('\377tOc\0\0\0\2')
717 write(struct.pack('!i', n))
718 part.sort(key=lambda x: x[0])
720 obj_list_sum = Sha1()
724 obj_list_sum.update(entry[0])
727 write(struct.pack('!I', entry[1]))
731 if entry[2] & 0x80000000:
732 write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
733 ofs64_list.append(struct.pack('!Q', entry[2]))
735 write(struct.pack('!i', entry[2]))
736 for ofs64 in ofs64_list:
740 file.write(sum.digest())
741 return obj_list_sum.hexdigest()
745 return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
749 os.environ['GIT_DIR'] = os.path.abspath(repo())
752 def list_refs(refname = None):
753 """Generate a list of tuples in the form (refname,hash).
754 If a ref name is specified, list only this particular ref.
756 argv = ['git', 'show-ref', '--']
759 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
760 out = p.stdout.read().strip()
761 rv = p.wait() # not fatal
765 for d in out.split('\n'):
766 (sha, name) = d.split(' ', 1)
767 yield (name, sha.decode('hex'))
770 def read_ref(refname):
771 """Get the commit id of the most recent commit made on a given ref."""
772 l = list(list_refs(refname))
780 def rev_list(ref, count=None):
781 """Generate a list of reachable commits in reverse chronological order.
783 This generator walks through commits, from child to parent, that are
784 reachable via the specified ref and yields a series of tuples of the form
787 If count is a non-zero integer, limit the number of commits to "count"
790 assert(not ref.startswith('-'))
793 opts += ['-n', str(atoi(count))]
794 argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
795 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
799 if s.startswith('commit '):
800 commit = s[7:].decode('hex')
804 rv = p.wait() # not fatal
806 raise GitError, 'git rev-list returned error %d' % rv
809 def rev_get_date(ref):
810 """Get the date of the latest commit on the specified ref."""
811 for (date, commit) in rev_list(ref, count=1):
813 raise GitError, 'no such commit %r' % ref
816 def rev_parse(committish):
817 """Resolve the full hash for 'committish', if it exists.
819 Should be roughly equivalent to 'git rev-parse'.
821 Returns the hex value of the hash if it is found, None if 'committish' does
822 not correspond to anything.
824 head = read_ref(committish)
826 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
829 pL = PackIdxList(repo('objects/pack'))
831 if len(committish) == 40:
833 hash = committish.decode('hex')
843 def update_ref(refname, newval, oldval):
844 """Change the commit pointed to by a branch."""
847 assert(refname.startswith('refs/heads/'))
848 p = subprocess.Popen(['git', 'update-ref', refname,
849 newval.encode('hex'), oldval.encode('hex')],
850 preexec_fn = _gitenv)
851 _git_wait('git update-ref', p)
854 def guess_repo(path=None):
855 """Set the path value in the global variable "repodir".
856 This makes bup look for an existing bup repository, but not fail if a
857 repository doesn't exist. Usually, if you are interacting with a bup
858 repository, you would not be calling this function but using
865 repodir = os.environ.get('BUP_DIR')
867 repodir = os.path.expanduser('~/.bup')
870 def init_repo(path=None):
871 """Create the Git bare repository for bup in a given path."""
873 d = repo() # appends a / to the path
874 parent = os.path.dirname(os.path.dirname(d))
875 if parent and not os.path.exists(parent):
876 raise GitError('parent directory "%s" does not exist\n' % parent)
877 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
878 raise GitError('"%d" exists but is not a directory\n' % d)
879 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
880 preexec_fn = _gitenv)
881 _git_wait('git init', p)
882 # Force the index version configuration in order to ensure bup works
883 # regardless of the version of the installed Git binary.
884 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
885 stdout=sys.stderr, preexec_fn = _gitenv)
886 _git_wait('git config', p)
889 def check_repo_or_die(path=None):
890 """Make sure a bup repository exists, and abort if not.
891 If the path to a particular repository was not specified, this function
892 initializes the default repository automatically.
895 if not os.path.isdir(repo('objects/pack/.')):
896 if repodir == home_repodir:
899 log('error: %r is not a bup/git repository\n' % repo())
904 """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
906 while ofs < len(buf):
907 z = buf[ofs:].find('\0')
909 spl = buf[ofs:ofs+z].split(' ', 1)
910 assert(len(spl) == 2)
911 sha = buf[ofs+z+1:ofs+z+1+20]
913 yield (spl[0], spl[1], sha)
918 """Get Git's version and ensure a usable version is installed.
920 The returned version is formatted as an ordered tuple with each position
921 representing a digit in the version tag. For example, the following tuple
922 would represent version 1.6.6.9:
928 p = subprocess.Popen(['git', '--version'],
929 stdout=subprocess.PIPE)
930 gvs = p.stdout.read()
931 _git_wait('git --version', p)
932 m = re.match(r'git version (\S+.\S+)', gvs)
934 raise GitError('git --version weird output: %r' % gvs)
935 _ver = tuple(m.group(1).split('.'))
936 needed = ('1','5', '3', '1')
938 raise GitError('git version %s or higher is required; you have %s'
939 % ('.'.join(needed), '.'.join(_ver)))
943 def _git_wait(cmd, p):
946 raise GitError('%s returned %d' % (cmd, rv))
949 def _git_capture(argv):
950 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
952 _git_wait(repr(argv), p)
956 class _AbortableIter:
957 def __init__(self, it, onabort = None):
959 self.onabort = onabort
967 return self.it.next()
968 except StopIteration, e:
976 """Abort iteration and call the abortion callback, if needed."""
988 """Link to 'git cat-file' that is used to retrieve blob data."""
991 wanted = ('1','5','6')
994 log('warning: git version < %s; bup will be slow.\n'
997 self.get = self._slow_get
999 self.p = self.inprogress = None
1000 self.get = self._fast_get
1004 self.p.stdout.close()
1005 self.p.stdin.close()
1007 self.inprogress = None
1011 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1012 stdin=subprocess.PIPE,
1013 stdout=subprocess.PIPE,
1016 preexec_fn = _gitenv)
1018 def _fast_get(self, id):
1019 if not self.p or self.p.poll() != None:
1022 assert(self.p.poll() == None)
1024 log('_fast_get: opening %r while %r is open'
1025 % (id, self.inprogress))
1026 assert(not self.inprogress)
1027 assert(id.find('\n') < 0)
1028 assert(id.find('\r') < 0)
1029 assert(not id.startswith('-'))
1030 self.inprogress = id
1031 self.p.stdin.write('%s\n' % id)
1032 self.p.stdin.flush()
1033 hdr = self.p.stdout.readline()
1034 if hdr.endswith(' missing\n'):
1035 self.inprogress = None
1036 raise KeyError('blob %r is missing' % id)
1037 spl = hdr.split(' ')
1038 if len(spl) != 3 or len(spl[0]) != 40:
1039 raise GitError('expected blob, got %r' % spl)
1040 (hex, type, size) = spl
1042 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1043 onabort = self._abort)
1048 assert(self.p.stdout.readline() == '\n')
1049 self.inprogress = None
1050 except Exception, e:
1054 def _slow_get(self, id):
1055 assert(id.find('\n') < 0)
1056 assert(id.find('\r') < 0)
1057 assert(id[0] != '-')
1058 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1061 p = subprocess.Popen(['git', 'cat-file', type, id],
1062 stdout=subprocess.PIPE,
1063 preexec_fn = _gitenv)
1064 for blob in chunkyreader(p.stdout):
1066 _git_wait('git cat-file', p)
1068 def _join(self, it):
1073 elif type == 'tree':
1074 treefile = ''.join(it)
1075 for (mode, name, sha) in treeparse(treefile):
1076 for blob in self.join(sha.encode('hex')):
1078 elif type == 'commit':
1079 treeline = ''.join(it).split('\n')[0]
1080 assert(treeline.startswith('tree '))
1081 for blob in self.join(treeline[5:]):
1084 raise GitError('invalid object type %r: expected blob/tree/commit'
1088 """Generate a list of the content of all blobs that can be reached
1089 from an object. The hash given in 'id' must point to a blob, a tree
1090 or a commit. The content of all blobs that can be seen from trees or
1091 commits will be added to the list.
1094 for d in self._join(self.get(id)):
1096 except StopIteration:
1100 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1102 for (n,c) in list_refs():
1103 if n.startswith('refs/tags/'):
1108 tags[c].append(name) # more than one tag can point at 'c'