lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, errno, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8
   9 verbose = 0
  10 ignore_midx = 0
  11 home_repodir = os.path.expanduser('~/.bup')
  12 repodir = None
  13
  14 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  15 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  16
  17
  18 class GitError(Exception):
  19     pass
  20
  21
  22 def repo(sub = ''):
  23     """Get the path to the git repository or one of its subdirectories."""
  24     global repodir
  25     if not repodir:
  26         raise GitError('You should call check_repo_or_die()')
  27
  28     # If there's a .git subdirectory, then the actual repo is in there.
  29     gd = os.path.join(repodir, '.git')
  30     if os.path.exists(gd):
  31         repodir = gd
  32
  33     return os.path.join(repodir, sub)
  34
  35
  36 def mangle_name(name, mode, gitmode):
  37     """Mangle a file name to present an abstract name for segmented files.
  38     Mangled file names will have the ".bup" extension added to them. If a
  39     file's name already ends with ".bup", a ".bupl" extension is added to
  40     disambiguate normal files from semgmented ones.
  41     """
  42     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  43         return name + '.bup'
  44     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  45         return name + '.bupl'
  46     else:
  47         return name
  48
  49
  50 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  51 def demangle_name(name):
  52     """Remove name mangling from a file name, if necessary.
  53
  54     The return value is a tuple (demangled_filename,mode), where mode is one of
  55     the following:
  56
  57     * BUP_NORMAL  : files that should be read as-is from the repository
  58     * BUP_CHUNKED : files that were chunked and need to be assembled
  59
  60     For more information on the name mangling algorythm, see mangle_name()
  61     """
  62     if name.endswith('.bupl'):
  63         return (name[:-5], BUP_NORMAL)
  64     elif name.endswith('.bup'):
  65         return (name[:-4], BUP_CHUNKED)
  66     else:
  67         return (name, BUP_NORMAL)
  68
  69
  70 def _encode_packobj(type, content):
  71     szout = ''
  72     sz = len(content)
  73     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  74     sz >>= 4
  75     while 1:
  76         if sz: szbits |= 0x80
  77         szout += chr(szbits)
  78         if not sz:
  79             break
  80         szbits = sz & 0x7f
  81         sz >>= 7
  82     z = zlib.compressobj(1)
  83     yield szout
  84     yield z.compress(content)
  85     yield z.flush()
  86
  87
  88 def _encode_looseobj(type, content):
  89     z = zlib.compressobj(1)
  90     yield z.compress('%s %d\0' % (type, len(content)))
  91     yield z.compress(content)
  92     yield z.flush()
  93
  94
  95 def _decode_looseobj(buf):
  96     assert(buf);
  97     s = zlib.decompress(buf)
  98     i = s.find('\0')
  99     assert(i > 0)
 100     l = s[:i].split(' ')
 101     type = l[0]
 102     sz = int(l[1])
 103     content = s[i+1:]
 104     assert(type in _typemap)
 105     assert(sz == len(content))
 106     return (type, content)
 107
 108
 109 def _decode_packobj(buf):
 110     assert(buf)
 111     c = ord(buf[0])
 112     type = _typermap[(c & 0x70) >> 4]
 113     sz = c & 0x0f
 114     shift = 4
 115     i = 0
 116     while c & 0x80:
 117         i += 1
 118         c = ord(buf[i])
 119         sz |= (c & 0x7f) << shift
 120         shift += 7
 121         if not (c & 0x80):
 122             break
 123     return (type, zlib.decompress(buf[i+1:]))
 124
 125
 126 class PackIdx:
 127     """Object representation of a Git pack index file."""
 128     def __init__(self, filename):
 129         self.name = filename
 130         self.map = mmap_read(open(filename))
 131         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 132         self.fanout = list(struct.unpack('!256I',
 133                                          str(buffer(self.map, 8, 256*4))))
 134         self.fanout.append(0)  # entry "-1"
 135         nsha = self.fanout[255]
 136         self.ofstable = buffer(self.map,
 137                                8 + 256*4 + nsha*20 + nsha*4,
 138                                nsha*4)
 139         self.ofs64table = buffer(self.map,
 140                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 141
 142     def _ofs_from_idx(self, idx):
 143         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 144         if ofs & 0x80000000:
 145             idx64 = ofs & 0x7fffffff
 146             ofs = struct.unpack('!I',
 147                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 148         return ofs
 149
 150     def _idx_from_hash(self, hash):
 151         assert(len(hash) == 20)
 152         b1 = ord(hash[0])
 153         start = self.fanout[b1-1] # range -1..254
 154         end = self.fanout[b1] # range 0..255
 155         buf = buffer(self.map, 8 + 256*4, end*20)
 156         want = str(hash)
 157         while start < end:
 158             mid = start + (end-start)/2
 159             v = str(buf[mid*20:(mid+1)*20])
 160             if v < want:
 161                 start = mid+1
 162             elif v > want:
 163                 end = mid
 164             else: # got it!
 165                 return mid
 166         return None
 167
 168     def find_offset(self, hash):
 169         """Get the offset of an object inside the index file."""
 170         idx = self._idx_from_hash(hash)
 171         if idx != None:
 172             return self._ofs_from_idx(idx)
 173         return None
 174
 175     def exists(self, hash):
 176         """Return nonempty if the object exists in this index."""
 177         return hash and (self._idx_from_hash(hash) != None) and True or None
 178
 179     def __iter__(self):
 180         for i in xrange(self.fanout[255]):
 181             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 182
 183     def __len__(self):
 184         return int(self.fanout[255])
 185
 186
 187 def extract_bits(buf, nbits):
 188     """Take the first 'nbits' bits from 'buf' and return them as an integer."""
 189     mask = (1<<nbits) - 1
 190     v = struct.unpack('!I', buf[0:4])[0]
 191     v = (v >> (32-nbits)) & mask
 192     return v
 193
 194
 195 class PackMidx:
 196     """Wrapper which contains data from multiple index files.
 197     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 198     and make it possible for bup to expand Git's indexing capabilities to vast
 199     amounts of files.
 200     """
 201     def __init__(self, filename):
 202         self.name = filename
 203         assert(filename.endswith('.midx'))
 204         self.map = mmap_read(open(filename))
 205         if str(self.map[0:8]) == 'MIDX\0\0\0\1':
 206             log('Warning: ignoring old-style midx %r\n' % filename)
 207             self.bits = 0
 208             self.entries = 1
 209             self.fanout = buffer('\0\0\0\0')
 210             self.shalist = buffer('\0'*20)
 211             self.idxnames = []
 212         else:
 213             assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
 214             self.bits = struct.unpack('!I', self.map[8:12])[0]
 215             self.entries = 2**self.bits
 216             self.fanout = buffer(self.map, 12, self.entries*4)
 217             shaofs = 12 + self.entries*4
 218             nsha = self._fanget(self.entries-1)
 219             self.shalist = buffer(self.map, shaofs, nsha*20)
 220             self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 221
 222     def _fanget(self, i):
 223         start = i*4
 224         s = self.fanout[start:start+4]
 225         return struct.unpack('!I', s)[0]
 226
 227     def exists(self, hash):
 228         """Return nonempty if the object exists in the index files."""
 229         want = str(hash)
 230         el = extract_bits(want, self.bits)
 231         if el:
 232             start = self._fanget(el-1)
 233         else:
 234             start = 0
 235         end = self._fanget(el)
 236         while start < end:
 237             mid = start + (end-start)/2
 238             v = str(self.shalist[mid*20:(mid+1)*20])
 239             if v < want:
 240                 start = mid+1
 241             elif v > want:
 242                 end = mid
 243             else: # got it!
 244                 return True
 245         return None
 246
 247     def __iter__(self):
 248         for i in xrange(self._fanget(self.entries-1)):
 249             yield buffer(self.shalist, i*20, 20)
 250
 251     def __len__(self):
 252         return int(self._fanget(self.entries-1))
 253
 254
 255 _mpi_count = 0
 256 class PackIdxList:
 257     def __init__(self, dir):
 258         global _mpi_count
 259         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 260         _mpi_count += 1
 261         self.dir = dir
 262         self.also = {}
 263         self.packs = []
 264         self.refresh()
 265
 266     def __del__(self):
 267         global _mpi_count
 268         _mpi_count -= 1
 269         assert(_mpi_count == 0)
 270
 271     def __iter__(self):
 272         return iter(idxmerge(self.packs))
 273
 274     def exists(self, hash):
 275         """Return nonempty if the object exists in the index files."""
 276         if hash in self.also:
 277             return True
 278         for i in range(len(self.packs)):
 279             p = self.packs[i]
 280             if p.exists(hash):
 281                 # reorder so most recently used packs are searched first
 282                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 283                 return p.name
 284         return None
 285
 286     def refresh(self, skip_midx = False):
 287         """Refresh the index list.
 288         This method verifies if .midx files were superseded (e.g. all of its
 289         contents are in another, bigger .midx file) and removes the superseded
 290         files.
 291
 292         If skip_midx is True, all work on .midx files will be skipped and .midx
 293         files will be removed from the list.
 294
 295         The module-global variable 'ignore_midx' can force this function to
 296         always act as if skip_midx was True.
 297         """
 298         skip_midx = skip_midx or ignore_midx
 299         d = dict((p.name, p) for p in self.packs
 300                  if not skip_midx or not isinstance(p, PackMidx))
 301         if os.path.exists(self.dir):
 302             if not skip_midx:
 303                 midxl = []
 304                 for ix in self.packs:
 305                     if isinstance(ix, PackMidx):
 306                         for name in ix.idxnames:
 307                             d[os.path.join(self.dir, name)] = ix
 308                 for f in os.listdir(self.dir):
 309                     full = os.path.join(self.dir, f)
 310                     if f.endswith('.midx') and not d.get(full):
 311                         mx = PackMidx(full)
 312                         (mxd, mxf) = os.path.split(mx.name)
 313                         broken = 0
 314                         for n in mx.idxnames:
 315                             if not os.path.exists(os.path.join(mxd, n)):
 316                                 log(('warning: index %s missing\n' +
 317                                     '  used by %s\n') % (n, mxf))
 318                                 broken += 1
 319                         if not broken:
 320                             midxl.append(mx)
 321                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 322                 for ix in midxl:
 323                     any = 0
 324                     for sub in ix.idxnames:
 325                         found = d.get(os.path.join(self.dir, sub))
 326                         if not found or isinstance(found, PackIdx):
 327                             # doesn't exist, or exists but not in a midx
 328                             d[ix.name] = ix
 329                             for name in ix.idxnames:
 330                                 d[os.path.join(self.dir, name)] = ix
 331                             any += 1
 332                             break
 333                     if not any:
 334                         log('midx: removing redundant: %s\n'
 335                             % os.path.basename(ix.name))
 336                         unlink(ix.name)
 337             for f in os.listdir(self.dir):
 338                 full = os.path.join(self.dir, f)
 339                 if f.endswith('.idx') and not d.get(full):
 340                     ix = PackIdx(full)
 341                     d[full] = ix
 342             self.packs = list(set(d.values()))
 343         log('PackIdxList: using %d index%s.\n'
 344             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 345
 346     def add(self, hash):
 347         """Insert an additional object in the list."""
 348         self.also[hash] = 1
 349
 350     def zap_also(self):
 351         """Remove all additional objects from the list."""
 352         self.also = {}
 353
 354
 355 def calc_hash(type, content):
 356     """Calculate some content's hash in the Git fashion."""
 357     header = '%s %d\0' % (type, len(content))
 358     sum = Sha1(header)
 359     sum.update(content)
 360     return sum.digest()
 361
 362
 363 def _shalist_sort_key(ent):
 364     (mode, name, id) = ent
 365     if stat.S_ISDIR(int(mode, 8)):
 366         return name + '/'
 367     else:
 368         return name
 369
 370
 371 def idxmerge(idxlist):
 372     """Generate a list of all the objects reachable in a PackIdxList."""
 373     total = sum(len(i) for i in idxlist)
 374     iters = (iter(i) for i in idxlist)
 375     heap = [(next(it), it) for it in iters]
 376     heapq.heapify(heap)
 377     count = 0
 378     last = None
 379     while heap:
 380         if (count % 10024) == 0:
 381             progress('Reading indexes: %.2f%% (%d/%d)\r'
 382                      % (count*100.0/total, count, total))
 383         (e, it) = heap[0]
 384         if e != last:
 385             yield e
 386             last = e
 387         count += 1
 388         e = next(it)
 389         if e:
 390             heapq.heapreplace(heap, (e, it))
 391         else:
 392             heapq.heappop(heap)
 393     log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 394
 395
 396 class PackWriter:
 397     """Writes Git objects insid a pack file."""
 398     def __init__(self, objcache_maker=None):
 399         self.count = 0
 400         self.outbytes = 0
 401         self.filename = None
 402         self.file = None
 403         self.objcache_maker = objcache_maker
 404         self.objcache = None
 405
 406     def __del__(self):
 407         self.close()
 408
 409     def _make_objcache(self):
 410         if not self.objcache:
 411             if self.objcache_maker:
 412                 self.objcache = self.objcache_maker()
 413             else:
 414                 self.objcache = PackIdxList(repo('objects/pack'))
 415
 416     def _open(self):
 417         if not self.file:
 418             self._make_objcache()
 419             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 420             self.file = os.fdopen(fd, 'w+b')
 421             assert(name.endswith('.pack'))
 422             self.filename = name[:-5]
 423             self.file.write('PACK\0\0\0\2\0\0\0\0')
 424
 425     def _raw_write(self, datalist):
 426         self._open()
 427         f = self.file
 428         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 429         # the file never has a *partial* blob.  So let's make sure it's
 430         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 431         # to our hashsplit algorithm.)  f.write() does its own buffering,
 432         # but that's okay because we'll flush it in _end().
 433         oneblob = ''.join(datalist)
 434         f.write(oneblob)
 435         self.outbytes += len(oneblob)
 436         self.count += 1
 437
 438     def _write(self, bin, type, content):
 439         if verbose:
 440             log('>')
 441         self._raw_write(_encode_packobj(type, content))
 442         return bin
 443
 444     def breakpoint(self):
 445         """Clear byte and object counts and return the last processed id."""
 446         id = self._end()
 447         self.outbytes = self.count = 0
 448         return id
 449
 450     def write(self, type, content):
 451         """Write an object in this pack file."""
 452         return self._write(calc_hash(type, content), type, content)
 453
 454     def exists(self, id):
 455         """Return non-empty if an object is found in the object cache."""
 456         if not self.objcache:
 457             self._make_objcache()
 458         return self.objcache.exists(id)
 459
 460     def maybe_write(self, type, content):
 461         """Write an object to the pack file if not present and return its id."""
 462         bin = calc_hash(type, content)
 463         if not self.exists(bin):
 464             self._write(bin, type, content)
 465             self.objcache.add(bin)
 466         return bin
 467
 468     def new_blob(self, blob):
 469         """Create a blob object in the pack with the supplied content."""
 470         return self.maybe_write('blob', blob)
 471
 472     def new_tree(self, shalist):
 473         """Create a tree object in the pack."""
 474         shalist = sorted(shalist, key = _shalist_sort_key)
 475         l = []
 476         for (mode,name,bin) in shalist:
 477             assert(mode)
 478             assert(mode != '0')
 479             assert(mode[0] != '0')
 480             assert(name)
 481             assert(len(bin) == 20)
 482             l.append('%s %s\0%s' % (mode,name,bin))
 483         return self.maybe_write('tree', ''.join(l))
 484
 485     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 486         l = []
 487         if tree: l.append('tree %s' % tree.encode('hex'))
 488         if parent: l.append('parent %s' % parent.encode('hex'))
 489         if author: l.append('author %s %s' % (author, _git_date(adate)))
 490         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 491         l.append('')
 492         l.append(msg)
 493         return self.maybe_write('commit', '\n'.join(l))
 494
 495     def new_commit(self, parent, tree, msg):
 496         """Create a commit object in the pack."""
 497         now = time.time()
 498         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 499         commit = self._new_commit(tree, parent,
 500                                   userline, now, userline, now,
 501                                   msg)
 502         return commit
 503
 504     def abort(self):
 505         """Remove the pack file from disk."""
 506         f = self.file
 507         if f:
 508             self.file = None
 509             f.close()
 510             os.unlink(self.filename + '.pack')
 511
 512     def _end(self):
 513         f = self.file
 514         if not f: return None
 515         self.file = None
 516         self.objcache = None
 517
 518         # update object count
 519         f.seek(8)
 520         cp = struct.pack('!i', self.count)
 521         assert(len(cp) == 4)
 522         f.write(cp)
 523
 524         # calculate the pack sha1sum
 525         f.seek(0)
 526         sum = Sha1()
 527         while 1:
 528             b = f.read(65536)
 529             sum.update(b)
 530             if not b: break
 531         f.write(sum.digest())
 532
 533         f.close()
 534
 535         p = subprocess.Popen(['git', 'index-pack', '-v',
 536                               '--index-version=2',
 537                               self.filename + '.pack'],
 538                              preexec_fn = _gitenv,
 539                              stdout = subprocess.PIPE)
 540         out = p.stdout.read().strip()
 541         _git_wait('git index-pack', p)
 542         if not out:
 543             raise GitError('git index-pack produced no output')
 544         nameprefix = repo('objects/pack/%s' % out)
 545         if os.path.exists(self.filename + '.map'):
 546             os.unlink(self.filename + '.map')
 547         os.rename(self.filename + '.pack', nameprefix + '.pack')
 548         os.rename(self.filename + '.idx', nameprefix + '.idx')
 549         return nameprefix
 550
 551     def close(self):
 552         """Close the pack file and move it to its definitive path."""
 553         return self._end()
 554
 555
 556 def _git_date(date):
 557     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 558
 559
 560 def _gitenv():
 561     os.environ['GIT_DIR'] = os.path.abspath(repo())
 562
 563
 564 def list_refs(refname = None):
 565     """Generate a list of tuples in the form (refname,hash).
 566     If a ref name is specified, list only this particular ref.
 567     """
 568     argv = ['git', 'show-ref', '--']
 569     if refname:
 570         argv += [refname]
 571     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 572     out = p.stdout.read().strip()
 573     rv = p.wait()  # not fatal
 574     if rv:
 575         assert(not out)
 576     if out:
 577         for d in out.split('\n'):
 578             (sha, name) = d.split(' ', 1)
 579             yield (name, sha.decode('hex'))
 580
 581
 582 def read_ref(refname):
 583     """Get the commit id of the most recent commit made on a given ref."""
 584     l = list(list_refs(refname))
 585     if l:
 586         assert(len(l) == 1)
 587         return l[0][1]
 588     else:
 589         return None
 590
 591
 592 def rev_list(ref, count=None):
 593     """Generate a list of reachable commits in reverse chronological order.
 594
 595     This generator walks through commits, from child to parent, that are
 596     reachable via the specified ref and yields a series of tuples of the form
 597     (date,hash).
 598
 599     If count is a non-zero integer, limit the number of commits to "count"
 600     objects.
 601     """
 602     assert(not ref.startswith('-'))
 603     opts = []
 604     if count:
 605         opts += ['-n', str(atoi(count))]
 606     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 607     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 608     commit = None
 609     for row in p.stdout:
 610         s = row.strip()
 611         if s.startswith('commit '):
 612             commit = s[7:].decode('hex')
 613         else:
 614             date = int(s)
 615             yield (date, commit)
 616     rv = p.wait()  # not fatal
 617     if rv:
 618         raise GitError, 'git rev-list returned error %d' % rv
 619
 620
 621 def rev_get_date(ref):
 622     """Get the date of the latest commit on the specified ref."""
 623     for (date, commit) in rev_list(ref, count=1):
 624         return date
 625     raise GitError, 'no such commit %r' % ref
 626
 627
 628 def update_ref(refname, newval, oldval):
 629     """Change the commit pointed to by a branch."""
 630     if not oldval:
 631         oldval = ''
 632     assert(refname.startswith('refs/heads/'))
 633     p = subprocess.Popen(['git', 'update-ref', refname,
 634                           newval.encode('hex'), oldval.encode('hex')],
 635                          preexec_fn = _gitenv)
 636     _git_wait('git update-ref', p)
 637
 638
 639 def guess_repo(path=None):
 640     """Set the path value in the global variable "repodir".
 641     This makes bup look for an existing bup repository, but not fail if a
 642     repository doesn't exist. Usually, if you are interacting with a bup
 643     repository, you would not be calling this function but using
 644     check_repo_or_die().
 645     """
 646     global repodir
 647     if path:
 648         repodir = path
 649     if not repodir:
 650         repodir = os.environ.get('BUP_DIR')
 651         if not repodir:
 652             repodir = os.path.expanduser('~/.bup')
 653
 654
 655 def init_repo(path=None):
 656     """Create the Git bare repository for bup in a given path."""
 657     guess_repo(path)
 658     d = repo()
 659     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 660         raise GitError('"%d" exists but is not a directory\n' % d)
 661     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 662                          preexec_fn = _gitenv)
 663     _git_wait('git init', p)
 664     # Force the index version configuration in order to ensure bup works
 665     # regardless of the version of the installed Git binary.
 666     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 667                          stdout=sys.stderr, preexec_fn = _gitenv)
 668     _git_wait('git config', p)
 669
 670
 671 def check_repo_or_die(path=None):
 672     """Make sure a bup repository exists, and abort if not.
 673     If the path to a particular repository was not specified, this function
 674     initializes the default repository automatically.
 675     """
 676     guess_repo(path)
 677     if not os.path.isdir(repo('objects/pack/.')):
 678         if repodir == home_repodir:
 679             init_repo()
 680         else:
 681             log('error: %r is not a bup/git repository\n' % repo())
 682             sys.exit(15)
 683
 684
 685 def _treeparse(buf):
 686     ofs = 0
 687     while ofs < len(buf):
 688         z = buf[ofs:].find('\0')
 689         assert(z > 0)
 690         spl = buf[ofs:ofs+z].split(' ', 1)
 691         assert(len(spl) == 2)
 692         sha = buf[ofs+z+1:ofs+z+1+20]
 693         ofs += z+1+20
 694         yield (spl[0], spl[1], sha)
 695
 696
 697 _ver = None
 698 def ver():
 699     """Get Git's version and ensure a usable version is installed.
 700
 701     The returned version is formatted as an ordered tuple with each position
 702     representing a digit in the version tag. For example, the following tuple
 703     would represent version 1.6.6.9:
 704
 705         ('1', '6', '6', '9')
 706     """
 707     global _ver
 708     if not _ver:
 709         p = subprocess.Popen(['git', '--version'],
 710                              stdout=subprocess.PIPE)
 711         gvs = p.stdout.read()
 712         _git_wait('git --version', p)
 713         m = re.match(r'git version (\S+.\S+)', gvs)
 714         if not m:
 715             raise GitError('git --version weird output: %r' % gvs)
 716         _ver = tuple(m.group(1).split('.'))
 717     needed = ('1','5', '3', '1')
 718     if _ver < needed:
 719         raise GitError('git version %s or higher is required; you have %s'
 720                        % ('.'.join(needed), '.'.join(_ver)))
 721     return _ver
 722
 723
 724 def _git_wait(cmd, p):
 725     rv = p.wait()
 726     if rv != 0:
 727         raise GitError('%s returned %d' % (cmd, rv))
 728
 729
 730 def _git_capture(argv):
 731     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 732     r = p.stdout.read()
 733     _git_wait(repr(argv), p)
 734     return r
 735
 736
 737 class _AbortableIter:
 738     def __init__(self, it, onabort = None):
 739         self.it = it
 740         self.onabort = onabort
 741         self.done = None
 742
 743     def __iter__(self):
 744         return self
 745
 746     def next(self):
 747         try:
 748             return self.it.next()
 749         except StopIteration, e:
 750             self.done = True
 751             raise
 752         except:
 753             self.abort()
 754             raise
 755
 756     def abort(self):
 757         """Abort iteration and call the abortion callback, if needed."""
 758         if not self.done:
 759             self.done = True
 760             if self.onabort:
 761                 self.onabort()
 762
 763     def __del__(self):
 764         self.abort()
 765
 766
 767 _ver_warned = 0
 768 class CatPipe:
 769     """Link to 'git cat-file' that is used to retrieve blob data."""
 770     def __init__(self):
 771         global _ver_warned
 772         wanted = ('1','5','6')
 773         if ver() < wanted:
 774             if not _ver_warned:
 775                 log('warning: git version < %s; bup will be slow.\n'
 776                     % '.'.join(wanted))
 777                 _ver_warned = 1
 778             self.get = self._slow_get
 779         else:
 780             self.p = self.inprogress = None
 781             self.get = self._fast_get
 782
 783     def _abort(self):
 784         if self.p:
 785             self.p.stdout.close()
 786             self.p.stdin.close()
 787         self.p = None
 788         self.inprogress = None
 789
 790     def _restart(self):
 791         self._abort()
 792         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 793                                   stdin=subprocess.PIPE,
 794                                   stdout=subprocess.PIPE,
 795                                   preexec_fn = _gitenv)
 796
 797     def _fast_get(self, id):
 798         if not self.p or self.p.poll() != None:
 799             self._restart()
 800         assert(self.p)
 801         assert(self.p.poll() == None)
 802         if self.inprogress:
 803             log('_fast_get: opening %r while %r is open'
 804                 % (id, self.inprogress))
 805         assert(not self.inprogress)
 806         assert(id.find('\n') < 0)
 807         assert(id.find('\r') < 0)
 808         assert(id[0] != '-')
 809         self.inprogress = id
 810         self.p.stdin.write('%s\n' % id)
 811         hdr = self.p.stdout.readline()
 812         if hdr.endswith(' missing\n'):
 813             raise KeyError('blob %r is missing' % id)
 814         spl = hdr.split(' ')
 815         if len(spl) != 3 or len(spl[0]) != 40:
 816             raise GitError('expected blob, got %r' % spl)
 817         (hex, type, size) = spl
 818
 819         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 820                            onabort = self._abort)
 821         try:
 822             yield type
 823             for blob in it:
 824                 yield blob
 825             assert(self.p.stdout.readline() == '\n')
 826             self.inprogress = None
 827         except Exception, e:
 828             it.abort()
 829             raise
 830
 831     def _slow_get(self, id):
 832         assert(id.find('\n') < 0)
 833         assert(id.find('\r') < 0)
 834         assert(id[0] != '-')
 835         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 836         yield type
 837
 838         p = subprocess.Popen(['git', 'cat-file', type, id],
 839                              stdout=subprocess.PIPE,
 840                              preexec_fn = _gitenv)
 841         for blob in chunkyreader(p.stdout):
 842             yield blob
 843         _git_wait('git cat-file', p)
 844
 845     def _join(self, it):
 846         type = it.next()
 847         if type == 'blob':
 848             for blob in it:
 849                 yield blob
 850         elif type == 'tree':
 851             treefile = ''.join(it)
 852             for (mode, name, sha) in _treeparse(treefile):
 853                 for blob in self.join(sha.encode('hex')):
 854                     yield blob
 855         elif type == 'commit':
 856             treeline = ''.join(it).split('\n')[0]
 857             assert(treeline.startswith('tree '))
 858             for blob in self.join(treeline[5:]):
 859                 yield blob
 860         else:
 861             raise GitError('invalid object type %r: expected blob/tree/commit'
 862                            % type)
 863
 864     def join(self, id):
 865         """Generate a list of the content of all blobs that can be reached
 866         from an object.  The hash given in 'id' must point to a blob, a tree
 867         or a commit. The content of all blobs that can be seen from trees or
 868         commits will be added to the list.
 869         """
 870         try:
 871             for d in self._join(self.get(id)):
 872                 yield d
 873         except StopIteration:
 874             log('booger!\n')
 875
 876
 877 def cat(id):
 878     c = CatPipe()
 879     for d in c.join(id):
 880         yield d