1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
6 from bup.helpers import *
7 from bup import _helpers, path, bloom
10 SEEK_END=2 # os.SEEK_END is not defined in python 2.4
14 home_repodir = os.path.expanduser('~/.bup')
17 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
24 class GitError(Exception):
29 """Get the path to the git repository or one of its subdirectories."""
32 raise GitError('You should call check_repo_or_die()')
34 # If there's a .git subdirectory, then the actual repo is in there.
35 gd = os.path.join(repodir, '.git')
36 if os.path.exists(gd):
39 return os.path.join(repodir, sub)
43 full = os.path.abspath(path)
44 fullrepo = os.path.abspath(repo(''))
45 if not fullrepo.endswith('/'):
47 if full.startswith(fullrepo):
48 path = full[len(fullrepo):]
49 if path.startswith('index-cache/'):
50 path = path[len('index-cache/'):]
55 paths = [repo('objects/pack')]
56 paths += glob.glob(repo('index-cache/*/.'))
60 def auto_midx(objdir):
61 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
63 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
65 # make sure 'args' gets printed to help with debugging
66 add_error('%r: exception: %s' % (args, e))
69 add_error('%r: returned %d' % (args, rv))
71 args = [path.exe(), 'bloom', '--dir', objdir]
73 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
75 # make sure 'args' gets printed to help with debugging
76 add_error('%r: exception: %s' % (args, e))
79 add_error('%r: returned %d' % (args, rv))
82 def mangle_name(name, mode, gitmode):
83 """Mangle a file name to present an abstract name for segmented files.
84 Mangled file names will have the ".bup" extension added to them. If a
85 file's name already ends with ".bup", a ".bupl" extension is added to
86 disambiguate normal files from semgmented ones.
88 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
90 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
96 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
97 def demangle_name(name):
98 """Remove name mangling from a file name, if necessary.
100 The return value is a tuple (demangled_filename,mode), where mode is one of
103 * BUP_NORMAL : files that should be read as-is from the repository
104 * BUP_CHUNKED : files that were chunked and need to be assembled
106 For more information on the name mangling algorythm, see mangle_name()
108 if name.endswith('.bupl'):
109 return (name[:-5], BUP_NORMAL)
110 elif name.endswith('.bup'):
111 return (name[:-4], BUP_CHUNKED)
113 return (name, BUP_NORMAL)
116 def _encode_packobj(type, content):
119 szbits = (sz & 0x0f) | (_typemap[type]<<4)
122 if sz: szbits |= 0x80
128 z = zlib.compressobj(1)
130 yield z.compress(content)
134 def _encode_looseobj(type, content):
135 z = zlib.compressobj(1)
136 yield z.compress('%s %d\0' % (type, len(content)))
137 yield z.compress(content)
141 def _decode_looseobj(buf):
143 s = zlib.decompress(buf)
150 assert(type in _typemap)
151 assert(sz == len(content))
152 return (type, content)
155 def _decode_packobj(buf):
158 type = _typermap[(c & 0x70) >> 4]
165 sz |= (c & 0x7f) << shift
169 return (type, zlib.decompress(buf[i+1:]))
176 def find_offset(self, hash):
177 """Get the offset of an object inside the index file."""
178 idx = self._idx_from_hash(hash)
180 return self._ofs_from_idx(idx)
183 def exists(self, hash, want_source=False):
184 """Return nonempty if the object exists in this index."""
185 if hash and (self._idx_from_hash(hash) != None):
186 return want_source and os.path.basename(self.name) or True
190 return int(self.fanout[255])
192 def _idx_from_hash(self, hash):
193 global _total_searches, _total_steps
195 assert(len(hash) == 20)
197 start = self.fanout[b1-1] # range -1..254
198 end = self.fanout[b1] # range 0..255
200 _total_steps += 1 # lookup table is a step
203 mid = start + (end-start)/2
204 v = self._idx_to_hash(mid)
214 class PackIdxV1(PackIdx):
215 """Object representation of a Git pack index (version 1) file."""
216 def __init__(self, filename, f):
218 self.idxnames = [self.name]
219 self.map = mmap_read(f)
220 self.fanout = list(struct.unpack('!256I',
221 str(buffer(self.map, 0, 256*4))))
222 self.fanout.append(0) # entry "-1"
223 nsha = self.fanout[255]
225 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
227 def _ofs_from_idx(self, idx):
228 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
230 def _idx_to_hash(self, idx):
231 return str(self.shatable[idx*24+4 : idx*24+24])
234 for i in xrange(self.fanout[255]):
235 yield buffer(self.map, 256*4 + 24*i + 4, 20)
238 class PackIdxV2(PackIdx):
239 """Object representation of a Git pack index (version 2) file."""
240 def __init__(self, filename, f):
242 self.idxnames = [self.name]
243 self.map = mmap_read(f)
244 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
245 self.fanout = list(struct.unpack('!256I',
246 str(buffer(self.map, 8, 256*4))))
247 self.fanout.append(0) # entry "-1"
248 nsha = self.fanout[255]
249 self.sha_ofs = 8 + 256*4
250 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
251 self.ofstable = buffer(self.map,
252 self.sha_ofs + nsha*20 + nsha*4,
254 self.ofs64table = buffer(self.map,
255 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
257 def _ofs_from_idx(self, idx):
258 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
260 idx64 = ofs & 0x7fffffff
261 ofs = struct.unpack('!Q',
262 str(buffer(self.ofs64table, idx64*8, 8)))[0]
265 def _idx_to_hash(self, idx):
266 return str(self.shatable[idx*20:(idx+1)*20])
269 for i in xrange(self.fanout[255]):
270 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
273 extract_bits = _helpers.extract_bits
276 """Wrapper which contains data from multiple index files.
277 Multiple index (.midx) files constitute a wrapper around index (.idx) files
278 and make it possible for bup to expand Git's indexing capabilities to vast
281 def __init__(self, filename):
283 self.force_keep = False
284 assert(filename.endswith('.midx'))
285 self.map = mmap_read(open(filename))
286 if str(self.map[0:4]) != 'MIDX':
287 log('Warning: skipping: invalid MIDX header in %r\n' % filename)
288 self.force_keep = True
289 return self._init_failed()
290 ver = struct.unpack('!I', self.map[4:8])[0]
291 if ver < MIDX_VERSION:
292 log('Warning: ignoring old-style (v%d) midx %r\n'
294 self.force_keep = False # old stuff is boring
295 return self._init_failed()
296 if ver > MIDX_VERSION:
297 log('Warning: ignoring too-new (v%d) midx %r\n'
299 self.force_keep = True # new stuff is exciting
300 return self._init_failed()
302 self.bits = _helpers.firstword(self.map[8:12])
303 self.entries = 2**self.bits
304 self.fanout = buffer(self.map, 12, self.entries*4)
305 self.sha_ofs = 12 + self.entries*4
306 self.nsha = nsha = self._fanget(self.entries-1)
307 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
308 self.which_ofs = self.sha_ofs + 20*nsha
309 self.whichlist = buffer(self.map, self.which_ofs, nsha*4)
310 self.idxnames = str(self.map[self.which_ofs + 4*nsha:]).split('\0')
312 def _init_failed(self):
315 self.fanout = buffer('\0\0\0\0')
316 self.shatable = buffer('\0'*20)
319 def _fanget(self, i):
321 s = self.fanout[start:start+4]
322 return _helpers.firstword(s)
325 return str(self.shatable[i*20:(i+1)*20])
327 def _get_idx_i(self, i):
328 return struct.unpack('!I', self.whichlist[i*4:(i+1)*4])[0]
330 def _get_idxname(self, i):
331 return self.idxnames[self._get_idx_i(i)]
333 def exists(self, hash, want_source=False):
334 """Return nonempty if the object exists in the index files."""
335 global _total_searches, _total_steps
338 el = extract_bits(want, self.bits)
340 start = self._fanget(el-1)
341 startv = el << (32-self.bits)
345 end = self._fanget(el)
346 endv = (el+1) << (32-self.bits)
347 _total_steps += 1 # lookup table is a step
348 hashv = _helpers.firstword(hash)
349 #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
352 #print '! %08x %08x %08x %d - %d' % (startv, hashv, endv, start, end)
353 mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
354 #print ' %08x %08x %08x %d %d %d' % (startv, hashv, endv, start, mid, end)
356 #print ' %08x' % self._num(v)
359 startv = _helpers.firstword(v)
362 endv = _helpers.firstword(v)
364 return want_source and self._get_idxname(mid) or True
368 for i in xrange(self._fanget(self.entries-1)):
369 yield buffer(self.shatable, i*20, 20)
372 return int(self._fanget(self.entries-1))
377 def __init__(self, dir):
379 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
384 self.do_bloom = False
391 assert(_mpi_count == 0)
394 return iter(idxmerge(self.packs))
397 return sum(len(pack) for pack in self.packs)
399 def exists(self, hash, want_source=False):
400 """Return nonempty if the object exists in the index files."""
401 global _total_searches
403 if hash in self.also:
405 if self.do_bloom and self.bloom is not None:
406 _total_searches -= 1 # will be incremented by bloom
407 if self.bloom.exists(hash):
408 self.do_bloom = False
411 for i in xrange(len(self.packs)):
413 _total_searches -= 1 # will be incremented by sub-pack
414 ix = p.exists(hash, want_source=want_source)
416 # reorder so most recently used packs are searched first
417 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
422 def refresh(self, skip_midx = False):
423 """Refresh the index list.
424 This method verifies if .midx files were superseded (e.g. all of its
425 contents are in another, bigger .midx file) and removes the superseded
428 If skip_midx is True, all work on .midx files will be skipped and .midx
429 files will be removed from the list.
431 The module-global variable 'ignore_midx' can force this function to
432 always act as if skip_midx was True.
434 self.bloom = None # Always reopen the bloom as it may have been relaced
435 self.do_bloom = False
436 skip_midx = skip_midx or ignore_midx
437 d = dict((p.name, p) for p in self.packs
438 if not skip_midx or not isinstance(p, PackMidx))
439 if os.path.exists(self.dir):
442 for ix in self.packs:
443 if isinstance(ix, PackMidx):
444 for name in ix.idxnames:
445 d[os.path.join(self.dir, name)] = ix
446 for full in glob.glob(os.path.join(self.dir,'*.midx')):
449 (mxd, mxf) = os.path.split(mx.name)
451 for n in mx.idxnames:
452 if not os.path.exists(os.path.join(mxd, n)):
453 log(('warning: index %s missing\n' +
454 ' used by %s\n') % (n, mxf))
461 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
464 for sub in ix.idxnames:
465 found = d.get(os.path.join(self.dir, sub))
466 if not found or isinstance(found, PackIdx):
467 # doesn't exist, or exists but not in a midx
472 for name in ix.idxnames:
473 d[os.path.join(self.dir, name)] = ix
474 elif not ix.force_keep:
475 debug1('midx: removing redundant: %s\n'
476 % os.path.basename(ix.name))
478 for full in glob.glob(os.path.join(self.dir,'*.idx')):
486 bfull = os.path.join(self.dir, 'bup.bloom')
487 if self.bloom is None and os.path.exists(bfull):
488 self.bloom = bloom.ShaBloom(bfull)
489 self.packs = list(set(d.values()))
490 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
491 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
495 debug1('PackIdxList: using %d index%s.\n'
496 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
499 """Insert an additional object in the list."""
503 def calc_hash(type, content):
504 """Calculate some content's hash in the Git fashion."""
505 header = '%s %d\0' % (type, len(content))
511 def _shalist_sort_key(ent):
512 (mode, name, id) = ent
513 if stat.S_ISDIR(int(mode, 8)):
519 def open_idx(filename):
520 if filename.endswith('.idx'):
521 f = open(filename, 'rb')
523 if header[0:4] == '\377tOc':
524 version = struct.unpack('!I', header[4:8])[0]
526 return PackIdxV2(filename, f)
528 raise GitError('%s: expected idx file version 2, got %d'
529 % (filename, version))
530 elif len(header) == 8 and header[0:4] < '\377tOc':
531 return PackIdxV1(filename, f)
533 raise GitError('%s: unrecognized idx file header' % filename)
534 elif filename.endswith('.midx'):
535 return PackMidx(filename)
537 raise GitError('idx filenames must end with .idx or .midx')
540 def idxmerge(idxlist, final_progress=True):
541 """Generate a list of all the objects reachable in a PackIdxList."""
542 def pfunc(count, total):
543 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
544 % (count*100.0/total, count, total))
545 def pfinal(count, total):
547 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
548 % (100, total, total))
549 return merge_iter(idxlist, 10024, pfunc, pfinal)
552 def _make_objcache():
553 return PackIdxList(repo('objects/pack'))
556 """Writes Git objects insid a pack file."""
557 def __init__(self, objcache_maker=_make_objcache):
563 self.objcache_maker = objcache_maker
571 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
572 self.file = os.fdopen(fd, 'w+b')
573 assert(name.endswith('.pack'))
574 self.filename = name[:-5]
575 self.file.write('PACK\0\0\0\2\0\0\0\0')
576 self.idx = list(list() for i in xrange(256))
578 def _raw_write(self, datalist, sha):
581 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
582 # the file never has a *partial* blob. So let's make sure it's
583 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
584 # to our hashsplit algorithm.) f.write() does its own buffering,
585 # but that's okay because we'll flush it in _end().
586 oneblob = ''.join(datalist)
590 raise GitError, e, sys.exc_info()[2]
592 crc = zlib.crc32(oneblob) & 0xffffffff
593 self._update_idx(sha, crc, nw)
598 def _update_idx(self, sha, crc, size):
601 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
603 def _write(self, sha, type, content):
607 sha = calc_hash(type, content)
608 size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
611 def breakpoint(self):
612 """Clear byte and object counts and return the last processed id."""
614 self.outbytes = self.count = 0
617 def _require_objcache(self):
618 if self.objcache is None and self.objcache_maker:
619 self.objcache = self.objcache_maker()
620 if self.objcache is None:
622 "PackWriter not opened or can't check exists w/o objcache")
624 def exists(self, id, want_source=False):
625 """Return non-empty if an object is found in the object cache."""
626 self._require_objcache()
627 return self.objcache.exists(id, want_source=want_source)
629 def maybe_write(self, type, content):
630 """Write an object to the pack file if not present and return its id."""
631 self._require_objcache()
632 sha = calc_hash(type, content)
633 if not self.exists(sha):
634 self._write(sha, type, content)
635 self.objcache.add(sha)
638 def new_blob(self, blob):
639 """Create a blob object in the pack with the supplied content."""
640 return self.maybe_write('blob', blob)
642 def new_tree(self, shalist):
643 """Create a tree object in the pack."""
644 shalist = sorted(shalist, key = _shalist_sort_key)
646 for (mode,name,bin) in shalist:
649 assert(mode[0] != '0')
651 assert(len(bin) == 20)
652 l.append('%s %s\0%s' % (mode,name,bin))
653 return self.maybe_write('tree', ''.join(l))
655 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
657 if tree: l.append('tree %s' % tree.encode('hex'))
658 if parent: l.append('parent %s' % parent.encode('hex'))
659 if author: l.append('author %s %s' % (author, _git_date(adate)))
660 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
663 return self.maybe_write('commit', '\n'.join(l))
665 def new_commit(self, parent, tree, date, msg):
666 """Create a commit object in the pack."""
667 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
668 commit = self._new_commit(tree, parent,
669 userline, date, userline, date,
674 """Remove the pack file from disk."""
680 os.unlink(self.filename + '.pack')
682 def _end(self, run_midx=True):
684 if not f: return None
690 # update object count
692 cp = struct.pack('!i', self.count)
696 # calculate the pack sha1sum
699 for b in chunkyreader(f):
701 packbin = sum.digest()
705 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
707 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
708 if os.path.exists(self.filename + '.map'):
709 os.unlink(self.filename + '.map')
710 os.rename(self.filename + '.pack', nameprefix + '.pack')
711 os.rename(self.filename + '.idx', nameprefix + '.idx')
714 auto_midx(repo('objects/pack'))
717 def close(self, run_midx=True):
718 """Close the pack file and move it to its definitive path."""
719 return self._end(run_midx=run_midx)
721 def _write_pack_idx_v2(self, filename, idx, packbin):
722 idx_f = open(filename, 'w+b')
723 idx_f.write('\377tOc\0\0\0\2')
725 ofs64_ofs = 8 + 4*256 + 28*self.count
726 idx_f.truncate(ofs64_ofs)
728 idx_map = mmap_readwrite(idx_f, close=False)
729 idx_f.seek(0, SEEK_END)
730 count = _helpers.write_idx(idx_f, idx_map, idx, self.count)
731 assert(count == self.count)
737 b = idx_f.read(8 + 4*256)
740 obj_list_sum = Sha1()
741 for b in chunkyreader(idx_f, 20*self.count):
743 obj_list_sum.update(b)
744 namebase = obj_list_sum.hexdigest()
746 for b in chunkyreader(idx_f):
748 idx_f.write(idx_sum.digest())
755 return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
759 os.environ['GIT_DIR'] = os.path.abspath(repo())
762 def list_refs(refname = None):
763 """Generate a list of tuples in the form (refname,hash).
764 If a ref name is specified, list only this particular ref.
766 argv = ['git', 'show-ref', '--']
769 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
770 out = p.stdout.read().strip()
771 rv = p.wait() # not fatal
775 for d in out.split('\n'):
776 (sha, name) = d.split(' ', 1)
777 yield (name, sha.decode('hex'))
780 def read_ref(refname):
781 """Get the commit id of the most recent commit made on a given ref."""
782 l = list(list_refs(refname))
790 def rev_list(ref, count=None):
791 """Generate a list of reachable commits in reverse chronological order.
793 This generator walks through commits, from child to parent, that are
794 reachable via the specified ref and yields a series of tuples of the form
797 If count is a non-zero integer, limit the number of commits to "count"
800 assert(not ref.startswith('-'))
803 opts += ['-n', str(atoi(count))]
804 argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
805 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
809 if s.startswith('commit '):
810 commit = s[7:].decode('hex')
814 rv = p.wait() # not fatal
816 raise GitError, 'git rev-list returned error %d' % rv
819 def rev_get_date(ref):
820 """Get the date of the latest commit on the specified ref."""
821 for (date, commit) in rev_list(ref, count=1):
823 raise GitError, 'no such commit %r' % ref
826 def rev_parse(committish):
827 """Resolve the full hash for 'committish', if it exists.
829 Should be roughly equivalent to 'git rev-parse'.
831 Returns the hex value of the hash if it is found, None if 'committish' does
832 not correspond to anything.
834 head = read_ref(committish)
836 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
839 pL = PackIdxList(repo('objects/pack'))
841 if len(committish) == 40:
843 hash = committish.decode('hex')
853 def update_ref(refname, newval, oldval):
854 """Change the commit pointed to by a branch."""
857 assert(refname.startswith('refs/heads/'))
858 p = subprocess.Popen(['git', 'update-ref', refname,
859 newval.encode('hex'), oldval.encode('hex')],
860 preexec_fn = _gitenv)
861 _git_wait('git update-ref', p)
864 def guess_repo(path=None):
865 """Set the path value in the global variable "repodir".
866 This makes bup look for an existing bup repository, but not fail if a
867 repository doesn't exist. Usually, if you are interacting with a bup
868 repository, you would not be calling this function but using
875 repodir = os.environ.get('BUP_DIR')
877 repodir = os.path.expanduser('~/.bup')
880 def init_repo(path=None):
881 """Create the Git bare repository for bup in a given path."""
883 d = repo() # appends a / to the path
884 parent = os.path.dirname(os.path.dirname(d))
885 if parent and not os.path.exists(parent):
886 raise GitError('parent directory "%s" does not exist\n' % parent)
887 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
888 raise GitError('"%d" exists but is not a directory\n' % d)
889 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
890 preexec_fn = _gitenv)
891 _git_wait('git init', p)
892 # Force the index version configuration in order to ensure bup works
893 # regardless of the version of the installed Git binary.
894 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
895 stdout=sys.stderr, preexec_fn = _gitenv)
896 _git_wait('git config', p)
899 def check_repo_or_die(path=None):
900 """Make sure a bup repository exists, and abort if not.
901 If the path to a particular repository was not specified, this function
902 initializes the default repository automatically.
905 if not os.path.isdir(repo('objects/pack/.')):
906 if repodir == home_repodir:
909 log('error: %r is not a bup/git repository\n' % repo())
914 """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
916 while ofs < len(buf):
917 z = buf[ofs:].find('\0')
919 spl = buf[ofs:ofs+z].split(' ', 1)
920 assert(len(spl) == 2)
921 sha = buf[ofs+z+1:ofs+z+1+20]
923 yield (spl[0], spl[1], sha)
928 """Get Git's version and ensure a usable version is installed.
930 The returned version is formatted as an ordered tuple with each position
931 representing a digit in the version tag. For example, the following tuple
932 would represent version 1.6.6.9:
938 p = subprocess.Popen(['git', '--version'],
939 stdout=subprocess.PIPE)
940 gvs = p.stdout.read()
941 _git_wait('git --version', p)
942 m = re.match(r'git version (\S+.\S+)', gvs)
944 raise GitError('git --version weird output: %r' % gvs)
945 _ver = tuple(m.group(1).split('.'))
946 needed = ('1','5', '3', '1')
948 raise GitError('git version %s or higher is required; you have %s'
949 % ('.'.join(needed), '.'.join(_ver)))
953 def _git_wait(cmd, p):
956 raise GitError('%s returned %d' % (cmd, rv))
959 def _git_capture(argv):
960 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
962 _git_wait(repr(argv), p)
966 class _AbortableIter:
967 def __init__(self, it, onabort = None):
969 self.onabort = onabort
977 return self.it.next()
978 except StopIteration, e:
986 """Abort iteration and call the abortion callback, if needed."""
998 """Link to 'git cat-file' that is used to retrieve blob data."""
1001 wanted = ('1','5','6')
1004 log('warning: git version < %s; bup will be slow.\n'
1007 self.get = self._slow_get
1009 self.p = self.inprogress = None
1010 self.get = self._fast_get
1014 self.p.stdout.close()
1015 self.p.stdin.close()
1017 self.inprogress = None
1021 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1022 stdin=subprocess.PIPE,
1023 stdout=subprocess.PIPE,
1026 preexec_fn = _gitenv)
1028 def _fast_get(self, id):
1029 if not self.p or self.p.poll() != None:
1032 assert(self.p.poll() == None)
1034 log('_fast_get: opening %r while %r is open'
1035 % (id, self.inprogress))
1036 assert(not self.inprogress)
1037 assert(id.find('\n') < 0)
1038 assert(id.find('\r') < 0)
1039 assert(not id.startswith('-'))
1040 self.inprogress = id
1041 self.p.stdin.write('%s\n' % id)
1042 self.p.stdin.flush()
1043 hdr = self.p.stdout.readline()
1044 if hdr.endswith(' missing\n'):
1045 self.inprogress = None
1046 raise KeyError('blob %r is missing' % id)
1047 spl = hdr.split(' ')
1048 if len(spl) != 3 or len(spl[0]) != 40:
1049 raise GitError('expected blob, got %r' % spl)
1050 (hex, type, size) = spl
1052 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1053 onabort = self._abort)
1058 assert(self.p.stdout.readline() == '\n')
1059 self.inprogress = None
1060 except Exception, e:
1064 def _slow_get(self, id):
1065 assert(id.find('\n') < 0)
1066 assert(id.find('\r') < 0)
1067 assert(id[0] != '-')
1068 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1071 p = subprocess.Popen(['git', 'cat-file', type, id],
1072 stdout=subprocess.PIPE,
1073 preexec_fn = _gitenv)
1074 for blob in chunkyreader(p.stdout):
1076 _git_wait('git cat-file', p)
1078 def _join(self, it):
1083 elif type == 'tree':
1084 treefile = ''.join(it)
1085 for (mode, name, sha) in treeparse(treefile):
1086 for blob in self.join(sha.encode('hex')):
1088 elif type == 'commit':
1089 treeline = ''.join(it).split('\n')[0]
1090 assert(treeline.startswith('tree '))
1091 for blob in self.join(treeline[5:]):
1094 raise GitError('invalid object type %r: expected blob/tree/commit'
1098 """Generate a list of the content of all blobs that can be reached
1099 from an object. The hash given in 'id' must point to a blob, a tree
1100 or a commit. The content of all blobs that can be seen from trees or
1101 commits will be added to the list.
1104 for d in self._join(self.get(id)):
1106 except StopIteration:
1110 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1112 for (n,c) in list_refs():
1113 if n.startswith('refs/tags/'):
1118 tags[c].append(name) # more than one tag can point at 'c'