lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8
   9 verbose = 0
  10 ignore_midx = 0
  11 home_repodir = os.path.expanduser('~/.bup')
  12 repodir = None
  13
  14 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  15 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  16
  17
  18 class GitError(Exception):
  19     pass
  20
  21
  22 def repo(sub = ''):
  23     """Get the path to the git repository or one of its subdirectories."""
  24     global repodir
  25     if not repodir:
  26         raise GitError('You should call check_repo_or_die()')
  27
  28     # If there's a .git subdirectory, then the actual repo is in there.
  29     gd = os.path.join(repodir, '.git')
  30     if os.path.exists(gd):
  31         repodir = gd
  32
  33     return os.path.join(repodir, sub)
  34
  35
  36 def mangle_name(name, mode, gitmode):
  37     """Mangle a file name to present an abstract name for segmented files.
  38     Mangled file names will have the ".bup" extension added to them. If a
  39     file's name already ends with ".bup", a ".bupl" extension is added to
  40     disambiguate normal files from semgmented ones.
  41     """
  42     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  43         return name + '.bup'
  44     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  45         return name + '.bupl'
  46     else:
  47         return name
  48
  49
  50 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  51 def demangle_name(name):
  52     """Remove name mangling from a file name, if necessary.
  53
  54     The return value is a tuple (demangled_filename,mode), where mode is one of
  55     the following:
  56
  57     * BUP_NORMAL  : files that should be read as-is from the repository
  58     * BUP_CHUNKED : files that were chunked and need to be assembled
  59
  60     For more information on the name mangling algorythm, see mangle_name()
  61     """
  62     if name.endswith('.bupl'):
  63         return (name[:-5], BUP_NORMAL)
  64     elif name.endswith('.bup'):
  65         return (name[:-4], BUP_CHUNKED)
  66     else:
  67         return (name, BUP_NORMAL)
  68
  69
  70 def _encode_packobj(type, content):
  71     szout = ''
  72     sz = len(content)
  73     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  74     sz >>= 4
  75     while 1:
  76         if sz: szbits |= 0x80
  77         szout += chr(szbits)
  78         if not sz:
  79             break
  80         szbits = sz & 0x7f
  81         sz >>= 7
  82     z = zlib.compressobj(1)
  83     yield szout
  84     yield z.compress(content)
  85     yield z.flush()
  86
  87
  88 def _encode_looseobj(type, content):
  89     z = zlib.compressobj(1)
  90     yield z.compress('%s %d\0' % (type, len(content)))
  91     yield z.compress(content)
  92     yield z.flush()
  93
  94
  95 def _decode_looseobj(buf):
  96     assert(buf);
  97     s = zlib.decompress(buf)
  98     i = s.find('\0')
  99     assert(i > 0)
 100     l = s[:i].split(' ')
 101     type = l[0]
 102     sz = int(l[1])
 103     content = s[i+1:]
 104     assert(type in _typemap)
 105     assert(sz == len(content))
 106     return (type, content)
 107
 108
 109 def _decode_packobj(buf):
 110     assert(buf)
 111     c = ord(buf[0])
 112     type = _typermap[(c & 0x70) >> 4]
 113     sz = c & 0x0f
 114     shift = 4
 115     i = 0
 116     while c & 0x80:
 117         i += 1
 118         c = ord(buf[i])
 119         sz |= (c & 0x7f) << shift
 120         shift += 7
 121         if not (c & 0x80):
 122             break
 123     return (type, zlib.decompress(buf[i+1:]))
 124
 125
 126 class PackIdx:
 127     """Object representation of a Git pack index file."""
 128     def __init__(self, filename):
 129         self.name = filename
 130         self.map = mmap_read(open(filename))
 131         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 132         self.fanout = list(struct.unpack('!256I',
 133                                          str(buffer(self.map, 8, 256*4))))
 134         self.fanout.append(0)  # entry "-1"
 135         nsha = self.fanout[255]
 136         self.ofstable = buffer(self.map,
 137                                8 + 256*4 + nsha*20 + nsha*4,
 138                                nsha*4)
 139         self.ofs64table = buffer(self.map,
 140                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 141
 142     def _ofs_from_idx(self, idx):
 143         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 144         if ofs & 0x80000000:
 145             idx64 = ofs & 0x7fffffff
 146             ofs = struct.unpack('!I',
 147                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 148         return ofs
 149
 150     def _idx_from_hash(self, hash):
 151         assert(len(hash) == 20)
 152         b1 = ord(hash[0])
 153         start = self.fanout[b1-1] # range -1..254
 154         end = self.fanout[b1] # range 0..255
 155         buf = buffer(self.map, 8 + 256*4, end*20)
 156         want = str(hash)
 157         while start < end:
 158             mid = start + (end-start)/2
 159             v = str(buf[mid*20:(mid+1)*20])
 160             if v < want:
 161                 start = mid+1
 162             elif v > want:
 163                 end = mid
 164             else: # got it!
 165                 return mid
 166         return None
 167
 168     def find_offset(self, hash):
 169         """Get the offset of an object inside the index file."""
 170         idx = self._idx_from_hash(hash)
 171         if idx != None:
 172             return self._ofs_from_idx(idx)
 173         return None
 174
 175     def exists(self, hash):
 176         """Return nonempty if the object exists in this index."""
 177         return hash and (self._idx_from_hash(hash) != None) and True or None
 178
 179     def __iter__(self):
 180         for i in xrange(self.fanout[255]):
 181             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 182
 183     def __len__(self):
 184         return int(self.fanout[255])
 185
 186
 187 def extract_bits(buf, nbits):
 188     """Take the first 'nbits' bits from 'buf' and return them as an integer."""
 189     mask = (1<<nbits) - 1
 190     v = struct.unpack('!I', buf[0:4])[0]
 191     v = (v >> (32-nbits)) & mask
 192     return v
 193
 194
 195 class PackMidx:
 196     """Wrapper which contains data from multiple index files.
 197     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 198     and make it possible for bup to expand Git's indexing capabilities to vast
 199     amounts of files.
 200     """
 201     def __init__(self, filename):
 202         self.name = filename
 203         assert(filename.endswith('.midx'))
 204         self.map = mmap_read(open(filename))
 205         if str(self.map[0:8]) == 'MIDX\0\0\0\1':
 206             log('Warning: ignoring old-style midx %r\n' % filename)
 207             self.bits = 0
 208             self.entries = 1
 209             self.fanout = buffer('\0\0\0\0')
 210             self.shalist = buffer('\0'*20)
 211             self.idxnames = []
 212         else:
 213             assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
 214             self.bits = struct.unpack('!I', self.map[8:12])[0]
 215             self.entries = 2**self.bits
 216             self.fanout = buffer(self.map, 12, self.entries*4)
 217             shaofs = 12 + self.entries*4
 218             nsha = self._fanget(self.entries-1)
 219             self.shalist = buffer(self.map, shaofs, nsha*20)
 220             self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 221
 222     def _fanget(self, i):
 223         start = i*4
 224         s = self.fanout[start:start+4]
 225         return struct.unpack('!I', s)[0]
 226
 227     def exists(self, hash):
 228         """Return nonempty if the object exists in the index files."""
 229         want = str(hash)
 230         el = extract_bits(want, self.bits)
 231         if el:
 232             start = self._fanget(el-1)
 233         else:
 234             start = 0
 235         end = self._fanget(el)
 236         while start < end:
 237             mid = start + (end-start)/2
 238             v = str(self.shalist[mid*20:(mid+1)*20])
 239             if v < want:
 240                 start = mid+1
 241             elif v > want:
 242                 end = mid
 243             else: # got it!
 244                 return True
 245         return None
 246
 247     def __iter__(self):
 248         for i in xrange(self._fanget(self.entries-1)):
 249             yield buffer(self.shalist, i*20, 20)
 250
 251     def __len__(self):
 252         return int(self._fanget(self.entries-1))
 253
 254
 255 _mpi_count = 0
 256 class PackIdxList:
 257     def __init__(self, dir):
 258         global _mpi_count
 259         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 260         _mpi_count += 1
 261         self.dir = dir
 262         self.also = {}
 263         self.packs = []
 264         self.refresh()
 265
 266     def __del__(self):
 267         global _mpi_count
 268         _mpi_count -= 1
 269         assert(_mpi_count == 0)
 270
 271     def __iter__(self):
 272         return iter(idxmerge(self.packs))
 273
 274     def __len__(self):
 275         return sum(len(pack) for pack in self.packs)
 276
 277     def exists(self, hash):
 278         """Return nonempty if the object exists in the index files."""
 279         if hash in self.also:
 280             return True
 281         for i in range(len(self.packs)):
 282             p = self.packs[i]
 283             if p.exists(hash):
 284                 # reorder so most recently used packs are searched first
 285                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 286                 return p.name
 287         return None
 288
 289     def refresh(self, skip_midx = False):
 290         """Refresh the index list.
 291         This method verifies if .midx files were superseded (e.g. all of its
 292         contents are in another, bigger .midx file) and removes the superseded
 293         files.
 294
 295         If skip_midx is True, all work on .midx files will be skipped and .midx
 296         files will be removed from the list.
 297
 298         The module-global variable 'ignore_midx' can force this function to
 299         always act as if skip_midx was True.
 300         """
 301         skip_midx = skip_midx or ignore_midx
 302         d = dict((p.name, p) for p in self.packs
 303                  if not skip_midx or not isinstance(p, PackMidx))
 304         if os.path.exists(self.dir):
 305             if not skip_midx:
 306                 midxl = []
 307                 for ix in self.packs:
 308                     if isinstance(ix, PackMidx):
 309                         for name in ix.idxnames:
 310                             d[os.path.join(self.dir, name)] = ix
 311                 for f in os.listdir(self.dir):
 312                     full = os.path.join(self.dir, f)
 313                     if f.endswith('.midx') and not d.get(full):
 314                         mx = PackMidx(full)
 315                         (mxd, mxf) = os.path.split(mx.name)
 316                         broken = 0
 317                         for n in mx.idxnames:
 318                             if not os.path.exists(os.path.join(mxd, n)):
 319                                 log(('warning: index %s missing\n' +
 320                                     '  used by %s\n') % (n, mxf))
 321                                 broken += 1
 322                         if not broken:
 323                             midxl.append(mx)
 324                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 325                 for ix in midxl:
 326                     any = 0
 327                     for sub in ix.idxnames:
 328                         found = d.get(os.path.join(self.dir, sub))
 329                         if not found or isinstance(found, PackIdx):
 330                             # doesn't exist, or exists but not in a midx
 331                             d[ix.name] = ix
 332                             for name in ix.idxnames:
 333                                 d[os.path.join(self.dir, name)] = ix
 334                             any += 1
 335                             break
 336                     if not any:
 337                         log('midx: removing redundant: %s\n'
 338                             % os.path.basename(ix.name))
 339                         unlink(ix.name)
 340             for f in os.listdir(self.dir):
 341                 full = os.path.join(self.dir, f)
 342                 if f.endswith('.idx') and not d.get(full):
 343                     ix = PackIdx(full)
 344                     d[full] = ix
 345             self.packs = list(set(d.values()))
 346         log('PackIdxList: using %d index%s.\n'
 347             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 348
 349     def add(self, hash):
 350         """Insert an additional object in the list."""
 351         self.also[hash] = 1
 352
 353     def zap_also(self):
 354         """Remove all additional objects from the list."""
 355         self.also = {}
 356
 357
 358 def calc_hash(type, content):
 359     """Calculate some content's hash in the Git fashion."""
 360     header = '%s %d\0' % (type, len(content))
 361     sum = Sha1(header)
 362     sum.update(content)
 363     return sum.digest()
 364
 365
 366 def _shalist_sort_key(ent):
 367     (mode, name, id) = ent
 368     if stat.S_ISDIR(int(mode, 8)):
 369         return name + '/'
 370     else:
 371         return name
 372
 373
 374 def idxmerge(idxlist):
 375     """Generate a list of all the objects reachable in a PackIdxList."""
 376     total = sum(len(i) for i in idxlist)
 377     iters = (iter(i) for i in idxlist)
 378     heap = [(next(it), it) for it in iters]
 379     heapq.heapify(heap)
 380     count = 0
 381     last = None
 382     while heap:
 383         if (count % 10024) == 0:
 384             progress('Reading indexes: %.2f%% (%d/%d)\r'
 385                      % (count*100.0/total, count, total))
 386         (e, it) = heap[0]
 387         if e != last:
 388             yield e
 389             last = e
 390         count += 1
 391         e = next(it)
 392         if e:
 393             heapq.heapreplace(heap, (e, it))
 394         else:
 395             heapq.heappop(heap)
 396     log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 397
 398
 399 class PackWriter:
 400     """Writes Git objects insid a pack file."""
 401     def __init__(self, objcache_maker=None):
 402         self.count = 0
 403         self.outbytes = 0
 404         self.filename = None
 405         self.file = None
 406         self.objcache_maker = objcache_maker
 407         self.objcache = None
 408
 409     def __del__(self):
 410         self.close()
 411
 412     def _make_objcache(self):
 413         if self.objcache == None:
 414             if self.objcache_maker:
 415                 self.objcache = self.objcache_maker()
 416             else:
 417                 self.objcache = PackIdxList(repo('objects/pack'))
 418
 419     def _open(self):
 420         if not self.file:
 421             self._make_objcache()
 422             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 423             self.file = os.fdopen(fd, 'w+b')
 424             assert(name.endswith('.pack'))
 425             self.filename = name[:-5]
 426             self.file.write('PACK\0\0\0\2\0\0\0\0')
 427
 428     def _raw_write(self, datalist):
 429         self._open()
 430         f = self.file
 431         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 432         # the file never has a *partial* blob.  So let's make sure it's
 433         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 434         # to our hashsplit algorithm.)  f.write() does its own buffering,
 435         # but that's okay because we'll flush it in _end().
 436         oneblob = ''.join(datalist)
 437         f.write(oneblob)
 438         self.outbytes += len(oneblob)
 439         self.count += 1
 440
 441     def _write(self, bin, type, content):
 442         if verbose:
 443             log('>')
 444         self._raw_write(_encode_packobj(type, content))
 445         return bin
 446
 447     def breakpoint(self):
 448         """Clear byte and object counts and return the last processed id."""
 449         id = self._end()
 450         self.outbytes = self.count = 0
 451         return id
 452
 453     def write(self, type, content):
 454         """Write an object in this pack file."""
 455         return self._write(calc_hash(type, content), type, content)
 456
 457     def exists(self, id):
 458         """Return non-empty if an object is found in the object cache."""
 459         if not self.objcache:
 460             self._make_objcache()
 461         return self.objcache.exists(id)
 462
 463     def maybe_write(self, type, content):
 464         """Write an object to the pack file if not present and return its id."""
 465         bin = calc_hash(type, content)
 466         if not self.exists(bin):
 467             self._write(bin, type, content)
 468             self.objcache.add(bin)
 469         return bin
 470
 471     def new_blob(self, blob):
 472         """Create a blob object in the pack with the supplied content."""
 473         return self.maybe_write('blob', blob)
 474
 475     def new_tree(self, shalist):
 476         """Create a tree object in the pack."""
 477         shalist = sorted(shalist, key = _shalist_sort_key)
 478         l = []
 479         for (mode,name,bin) in shalist:
 480             assert(mode)
 481             assert(mode != '0')
 482             assert(mode[0] != '0')
 483             assert(name)
 484             assert(len(bin) == 20)
 485             l.append('%s %s\0%s' % (mode,name,bin))
 486         return self.maybe_write('tree', ''.join(l))
 487
 488     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 489         l = []
 490         if tree: l.append('tree %s' % tree.encode('hex'))
 491         if parent: l.append('parent %s' % parent.encode('hex'))
 492         if author: l.append('author %s %s' % (author, _git_date(adate)))
 493         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 494         l.append('')
 495         l.append(msg)
 496         return self.maybe_write('commit', '\n'.join(l))
 497
 498     def new_commit(self, parent, tree, msg):
 499         """Create a commit object in the pack."""
 500         now = time.time()
 501         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 502         commit = self._new_commit(tree, parent,
 503                                   userline, now, userline, now,
 504                                   msg)
 505         return commit
 506
 507     def abort(self):
 508         """Remove the pack file from disk."""
 509         f = self.file
 510         if f:
 511             self.file = None
 512             f.close()
 513             os.unlink(self.filename + '.pack')
 514
 515     def _end(self):
 516         f = self.file
 517         if not f: return None
 518         self.file = None
 519         self.objcache = None
 520
 521         # update object count
 522         f.seek(8)
 523         cp = struct.pack('!i', self.count)
 524         assert(len(cp) == 4)
 525         f.write(cp)
 526
 527         # calculate the pack sha1sum
 528         f.seek(0)
 529         sum = Sha1()
 530         while 1:
 531             b = f.read(65536)
 532             sum.update(b)
 533             if not b: break
 534         f.write(sum.digest())
 535
 536         f.close()
 537
 538         p = subprocess.Popen(['git', 'index-pack', '-v',
 539                               '--index-version=2',
 540                               self.filename + '.pack'],
 541                              preexec_fn = _gitenv,
 542                              stdout = subprocess.PIPE)
 543         out = p.stdout.read().strip()
 544         _git_wait('git index-pack', p)
 545         if not out:
 546             raise GitError('git index-pack produced no output')
 547         nameprefix = repo('objects/pack/%s' % out)
 548         if os.path.exists(self.filename + '.map'):
 549             os.unlink(self.filename + '.map')
 550         os.rename(self.filename + '.pack', nameprefix + '.pack')
 551         os.rename(self.filename + '.idx', nameprefix + '.idx')
 552         return nameprefix
 553
 554     def close(self):
 555         """Close the pack file and move it to its definitive path."""
 556         return self._end()
 557
 558
 559 def _git_date(date):
 560     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 561
 562
 563 def _gitenv():
 564     os.environ['GIT_DIR'] = os.path.abspath(repo())
 565
 566
 567 def list_refs(refname = None):
 568     """Generate a list of tuples in the form (refname,hash).
 569     If a ref name is specified, list only this particular ref.
 570     """
 571     argv = ['git', 'show-ref', '--']
 572     if refname:
 573         argv += [refname]
 574     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 575     out = p.stdout.read().strip()
 576     rv = p.wait()  # not fatal
 577     if rv:
 578         assert(not out)
 579     if out:
 580         for d in out.split('\n'):
 581             (sha, name) = d.split(' ', 1)
 582             yield (name, sha.decode('hex'))
 583
 584
 585 def read_ref(refname):
 586     """Get the commit id of the most recent commit made on a given ref."""
 587     l = list(list_refs(refname))
 588     if l:
 589         assert(len(l) == 1)
 590         return l[0][1]
 591     else:
 592         return None
 593
 594
 595 def rev_list(ref, count=None):
 596     """Generate a list of reachable commits in reverse chronological order.
 597
 598     This generator walks through commits, from child to parent, that are
 599     reachable via the specified ref and yields a series of tuples of the form
 600     (date,hash).
 601
 602     If count is a non-zero integer, limit the number of commits to "count"
 603     objects.
 604     """
 605     assert(not ref.startswith('-'))
 606     opts = []
 607     if count:
 608         opts += ['-n', str(atoi(count))]
 609     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 610     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 611     commit = None
 612     for row in p.stdout:
 613         s = row.strip()
 614         if s.startswith('commit '):
 615             commit = s[7:].decode('hex')
 616         else:
 617             date = int(s)
 618             yield (date, commit)
 619     rv = p.wait()  # not fatal
 620     if rv:
 621         raise GitError, 'git rev-list returned error %d' % rv
 622
 623
 624 def rev_get_date(ref):
 625     """Get the date of the latest commit on the specified ref."""
 626     for (date, commit) in rev_list(ref, count=1):
 627         return date
 628     raise GitError, 'no such commit %r' % ref
 629
 630
 631 def update_ref(refname, newval, oldval):
 632     """Change the commit pointed to by a branch."""
 633     if not oldval:
 634         oldval = ''
 635     assert(refname.startswith('refs/heads/'))
 636     p = subprocess.Popen(['git', 'update-ref', refname,
 637                           newval.encode('hex'), oldval.encode('hex')],
 638                          preexec_fn = _gitenv)
 639     _git_wait('git update-ref', p)
 640
 641
 642 def guess_repo(path=None):
 643     """Set the path value in the global variable "repodir".
 644     This makes bup look for an existing bup repository, but not fail if a
 645     repository doesn't exist. Usually, if you are interacting with a bup
 646     repository, you would not be calling this function but using
 647     check_repo_or_die().
 648     """
 649     global repodir
 650     if path:
 651         repodir = path
 652     if not repodir:
 653         repodir = os.environ.get('BUP_DIR')
 654         if not repodir:
 655             repodir = os.path.expanduser('~/.bup')
 656
 657
 658 def init_repo(path=None):
 659     """Create the Git bare repository for bup in a given path."""
 660     guess_repo(path)
 661     d = repo()
 662     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 663         raise GitError('"%d" exists but is not a directory\n' % d)
 664     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 665                          preexec_fn = _gitenv)
 666     _git_wait('git init', p)
 667     # Force the index version configuration in order to ensure bup works
 668     # regardless of the version of the installed Git binary.
 669     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 670                          stdout=sys.stderr, preexec_fn = _gitenv)
 671     _git_wait('git config', p)
 672
 673
 674 def check_repo_or_die(path=None):
 675     """Make sure a bup repository exists, and abort if not.
 676     If the path to a particular repository was not specified, this function
 677     initializes the default repository automatically.
 678     """
 679     guess_repo(path)
 680     if not os.path.isdir(repo('objects/pack/.')):
 681         if repodir == home_repodir:
 682             init_repo()
 683         else:
 684             log('error: %r is not a bup/git repository\n' % repo())
 685             sys.exit(15)
 686
 687
 688 def treeparse(buf):
 689     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 690     ofs = 0
 691     while ofs < len(buf):
 692         z = buf[ofs:].find('\0')
 693         assert(z > 0)
 694         spl = buf[ofs:ofs+z].split(' ', 1)
 695         assert(len(spl) == 2)
 696         sha = buf[ofs+z+1:ofs+z+1+20]
 697         ofs += z+1+20
 698         yield (spl[0], spl[1], sha)
 699
 700
 701 _ver = None
 702 def ver():
 703     """Get Git's version and ensure a usable version is installed.
 704
 705     The returned version is formatted as an ordered tuple with each position
 706     representing a digit in the version tag. For example, the following tuple
 707     would represent version 1.6.6.9:
 708
 709         ('1', '6', '6', '9')
 710     """
 711     global _ver
 712     if not _ver:
 713         p = subprocess.Popen(['git', '--version'],
 714                              stdout=subprocess.PIPE)
 715         gvs = p.stdout.read()
 716         _git_wait('git --version', p)
 717         m = re.match(r'git version (\S+.\S+)', gvs)
 718         if not m:
 719             raise GitError('git --version weird output: %r' % gvs)
 720         _ver = tuple(m.group(1).split('.'))
 721     needed = ('1','5', '3', '1')
 722     if _ver < needed:
 723         raise GitError('git version %s or higher is required; you have %s'
 724                        % ('.'.join(needed), '.'.join(_ver)))
 725     return _ver
 726
 727
 728 def _git_wait(cmd, p):
 729     rv = p.wait()
 730     if rv != 0:
 731         raise GitError('%s returned %d' % (cmd, rv))
 732
 733
 734 def _git_capture(argv):
 735     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 736     r = p.stdout.read()
 737     _git_wait(repr(argv), p)
 738     return r
 739
 740
 741 class _AbortableIter:
 742     def __init__(self, it, onabort = None):
 743         self.it = it
 744         self.onabort = onabort
 745         self.done = None
 746
 747     def __iter__(self):
 748         return self
 749
 750     def next(self):
 751         try:
 752             return self.it.next()
 753         except StopIteration, e:
 754             self.done = True
 755             raise
 756         except:
 757             self.abort()
 758             raise
 759
 760     def abort(self):
 761         """Abort iteration and call the abortion callback, if needed."""
 762         if not self.done:
 763             self.done = True
 764             if self.onabort:
 765                 self.onabort()
 766
 767     def __del__(self):
 768         self.abort()
 769
 770
 771 _ver_warned = 0
 772 class CatPipe:
 773     """Link to 'git cat-file' that is used to retrieve blob data."""
 774     def __init__(self):
 775         global _ver_warned
 776         wanted = ('1','5','6')
 777         if ver() < wanted:
 778             if not _ver_warned:
 779                 log('warning: git version < %s; bup will be slow.\n'
 780                     % '.'.join(wanted))
 781                 _ver_warned = 1
 782             self.get = self._slow_get
 783         else:
 784             self.p = self.inprogress = None
 785             self.get = self._fast_get
 786
 787     def _abort(self):
 788         if self.p:
 789             self.p.stdout.close()
 790             self.p.stdin.close()
 791         self.p = None
 792         self.inprogress = None
 793
 794     def _restart(self):
 795         self._abort()
 796         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 797                                   stdin=subprocess.PIPE,
 798                                   stdout=subprocess.PIPE,
 799                                   close_fds = True,
 800                                   preexec_fn = _gitenv)
 801
 802     def _fast_get(self, id):
 803         if not self.p or self.p.poll() != None:
 804             self._restart()
 805         assert(self.p)
 806         assert(self.p.poll() == None)
 807         if self.inprogress:
 808             log('_fast_get: opening %r while %r is open'
 809                 % (id, self.inprogress))
 810         assert(not self.inprogress)
 811         assert(id.find('\n') < 0)
 812         assert(id.find('\r') < 0)
 813         assert(id[0] != '-')
 814         self.inprogress = id
 815         self.p.stdin.write('%s\n' % id)
 816         hdr = self.p.stdout.readline()
 817         if hdr.endswith(' missing\n'):
 818             raise KeyError('blob %r is missing' % id)
 819         spl = hdr.split(' ')
 820         if len(spl) != 3 or len(spl[0]) != 40:
 821             raise GitError('expected blob, got %r' % spl)
 822         (hex, type, size) = spl
 823
 824         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 825                            onabort = self._abort)
 826         try:
 827             yield type
 828             for blob in it:
 829                 yield blob
 830             assert(self.p.stdout.readline() == '\n')
 831             self.inprogress = None
 832         except Exception, e:
 833             it.abort()
 834             raise
 835
 836     def _slow_get(self, id):
 837         assert(id.find('\n') < 0)
 838         assert(id.find('\r') < 0)
 839         assert(id[0] != '-')
 840         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 841         yield type
 842
 843         p = subprocess.Popen(['git', 'cat-file', type, id],
 844                              stdout=subprocess.PIPE,
 845                              preexec_fn = _gitenv)
 846         for blob in chunkyreader(p.stdout):
 847             yield blob
 848         _git_wait('git cat-file', p)
 849
 850     def _join(self, it):
 851         type = it.next()
 852         if type == 'blob':
 853             for blob in it:
 854                 yield blob
 855         elif type == 'tree':
 856             treefile = ''.join(it)
 857             for (mode, name, sha) in treeparse(treefile):
 858                 for blob in self.join(sha.encode('hex')):
 859                     yield blob
 860         elif type == 'commit':
 861             treeline = ''.join(it).split('\n')[0]
 862             assert(treeline.startswith('tree '))
 863             for blob in self.join(treeline[5:]):
 864                 yield blob
 865         else:
 866             raise GitError('invalid object type %r: expected blob/tree/commit'
 867                            % type)
 868
 869     def join(self, id):
 870         """Generate a list of the content of all blobs that can be reached
 871         from an object.  The hash given in 'id' must point to a blob, a tree
 872         or a commit. The content of all blobs that can be seen from trees or
 873         commits will be added to the list.
 874         """
 875         try:
 876             for d in self._join(self.get(id)):
 877                 yield d
 878         except StopIteration:
 879             log('booger!\n')