lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 verbose = 0
  11 ignore_midx = 0
  12 home_repodir = os.path.expanduser('~/.bup')
  13 repodir = None
  14
  15 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  16 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  17
  18 _total_searches = 0
  19 _total_steps = 0
  20
  21
  22 class GitError(Exception):
  23     pass
  24
  25
  26 def repo(sub = ''):
  27     """Get the path to the git repository or one of its subdirectories."""
  28     global repodir
  29     if not repodir:
  30         raise GitError('You should call check_repo_or_die()')
  31
  32     # If there's a .git subdirectory, then the actual repo is in there.
  33     gd = os.path.join(repodir, '.git')
  34     if os.path.exists(gd):
  35         repodir = gd
  36
  37     return os.path.join(repodir, sub)
  38
  39
  40 def mangle_name(name, mode, gitmode):
  41     """Mangle a file name to present an abstract name for segmented files.
  42     Mangled file names will have the ".bup" extension added to them. If a
  43     file's name already ends with ".bup", a ".bupl" extension is added to
  44     disambiguate normal files from semgmented ones.
  45     """
  46     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  47         return name + '.bup'
  48     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  49         return name + '.bupl'
  50     else:
  51         return name
  52
  53
  54 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  55 def demangle_name(name):
  56     """Remove name mangling from a file name, if necessary.
  57
  58     The return value is a tuple (demangled_filename,mode), where mode is one of
  59     the following:
  60
  61     * BUP_NORMAL  : files that should be read as-is from the repository
  62     * BUP_CHUNKED : files that were chunked and need to be assembled
  63
  64     For more information on the name mangling algorythm, see mangle_name()
  65     """
  66     if name.endswith('.bupl'):
  67         return (name[:-5], BUP_NORMAL)
  68     elif name.endswith('.bup'):
  69         return (name[:-4], BUP_CHUNKED)
  70     else:
  71         return (name, BUP_NORMAL)
  72
  73
  74 def _encode_packobj(type, content):
  75     szout = ''
  76     sz = len(content)
  77     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  78     sz >>= 4
  79     while 1:
  80         if sz: szbits |= 0x80
  81         szout += chr(szbits)
  82         if not sz:
  83             break
  84         szbits = sz & 0x7f
  85         sz >>= 7
  86     z = zlib.compressobj(1)
  87     yield szout
  88     yield z.compress(content)
  89     yield z.flush()
  90
  91
  92 def _encode_looseobj(type, content):
  93     z = zlib.compressobj(1)
  94     yield z.compress('%s %d\0' % (type, len(content)))
  95     yield z.compress(content)
  96     yield z.flush()
  97
  98
  99 def _decode_looseobj(buf):
 100     assert(buf);
 101     s = zlib.decompress(buf)
 102     i = s.find('\0')
 103     assert(i > 0)
 104     l = s[:i].split(' ')
 105     type = l[0]
 106     sz = int(l[1])
 107     content = s[i+1:]
 108     assert(type in _typemap)
 109     assert(sz == len(content))
 110     return (type, content)
 111
 112
 113 def _decode_packobj(buf):
 114     assert(buf)
 115     c = ord(buf[0])
 116     type = _typermap[(c & 0x70) >> 4]
 117     sz = c & 0x0f
 118     shift = 4
 119     i = 0
 120     while c & 0x80:
 121         i += 1
 122         c = ord(buf[i])
 123         sz |= (c & 0x7f) << shift
 124         shift += 7
 125         if not (c & 0x80):
 126             break
 127     return (type, zlib.decompress(buf[i+1:]))
 128
 129
 130 class PackIdx:
 131     """Object representation of a Git pack index file."""
 132     def __init__(self, filename):
 133         self.name = filename
 134         self.map = mmap_read(open(filename))
 135         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 136         self.fanout = list(struct.unpack('!256I',
 137                                          str(buffer(self.map, 8, 256*4))))
 138         self.fanout.append(0)  # entry "-1"
 139         nsha = self.fanout[255]
 140         self.ofstable = buffer(self.map,
 141                                8 + 256*4 + nsha*20 + nsha*4,
 142                                nsha*4)
 143         self.ofs64table = buffer(self.map,
 144                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 145
 146     def _ofs_from_idx(self, idx):
 147         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 148         if ofs & 0x80000000:
 149             idx64 = ofs & 0x7fffffff
 150             ofs = struct.unpack('!I',
 151                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 152         return ofs
 153
 154     def _idx_from_hash(self, hash):
 155         global _total_searches, _total_steps
 156         _total_searches += 1
 157         assert(len(hash) == 20)
 158         b1 = ord(hash[0])
 159         start = self.fanout[b1-1] # range -1..254
 160         end = self.fanout[b1] # range 0..255
 161         buf = buffer(self.map, 8 + 256*4, end*20)
 162         want = str(hash)
 163         _total_steps += 1  # lookup table is a step
 164         while start < end:
 165             _total_steps += 1
 166             mid = start + (end-start)/2
 167             v = str(buf[mid*20:(mid+1)*20])
 168             if v < want:
 169                 start = mid+1
 170             elif v > want:
 171                 end = mid
 172             else: # got it!
 173                 return mid
 174         return None
 175
 176     def find_offset(self, hash):
 177         """Get the offset of an object inside the index file."""
 178         idx = self._idx_from_hash(hash)
 179         if idx != None:
 180             return self._ofs_from_idx(idx)
 181         return None
 182
 183     def exists(self, hash):
 184         """Return nonempty if the object exists in this index."""
 185         return hash and (self._idx_from_hash(hash) != None) and True or None
 186
 187     def __iter__(self):
 188         for i in xrange(self.fanout[255]):
 189             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 190
 191     def __len__(self):
 192         return int(self.fanout[255])
 193
 194
 195 def extract_bits(buf, nbits):
 196     """Take the first 'nbits' bits from 'buf' and return them as an integer."""
 197     mask = (1<<nbits) - 1
 198     v = _helpers.firstword(buf)
 199     v = (v >> (32-nbits)) & mask
 200     return v
 201
 202
 203 class PackMidx:
 204     """Wrapper which contains data from multiple index files.
 205     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 206     and make it possible for bup to expand Git's indexing capabilities to vast
 207     amounts of files.
 208     """
 209     def __init__(self, filename):
 210         self.name = filename
 211         assert(filename.endswith('.midx'))
 212         self.map = mmap_read(open(filename))
 213         if str(self.map[0:8]) == 'MIDX\0\0\0\1':
 214             log('Warning: ignoring old-style midx %r\n' % filename)
 215             self.bits = 0
 216             self.entries = 1
 217             self.fanout = buffer('\0\0\0\0')
 218             self.shalist = buffer('\0'*20)
 219             self.idxnames = []
 220         else:
 221             assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
 222             self.bits = _helpers.firstword(self.map[8:12])
 223             self.entries = 2**self.bits
 224             self.fanout = buffer(self.map, 12, self.entries*4)
 225             shaofs = 12 + self.entries*4
 226             nsha = self._fanget(self.entries-1)
 227             self.shalist = buffer(self.map, shaofs, nsha*20)
 228             self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 229
 230     def _fanget(self, i):
 231         start = i*4
 232         s = self.fanout[start:start+4]
 233         return _helpers.firstword(s)
 234
 235     def _get(self, i):
 236         return str(self.shalist[i*20:(i+1)*20])
 237
 238     def exists(self, hash):
 239         """Return nonempty if the object exists in the index files."""
 240         global _total_searches, _total_steps
 241         _total_searches += 1
 242         want = str(hash)
 243         el = extract_bits(want, self.bits)
 244         if el:
 245             start = self._fanget(el-1)
 246             startv = el << (32-self.bits)
 247         else:
 248             start = 0
 249             startv = 0
 250         end = self._fanget(el)
 251         endv = (el+1) << (32-self.bits)
 252         _total_steps += 1   # lookup table is a step
 253         hashv = _helpers.firstword(hash)
 254         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 255         while start < end:
 256             _total_steps += 1
 257             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 258             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 259             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 260             v = self._get(mid)
 261             #print '    %08x' % self._num(v)
 262             if v < want:
 263                 start = mid+1
 264                 startv = _helpers.firstword(v)
 265             elif v > want:
 266                 end = mid
 267                 endv = _helpers.firstword(v)
 268             else: # got it!
 269                 return True
 270         return None
 271
 272     def __iter__(self):
 273         for i in xrange(self._fanget(self.entries-1)):
 274             yield buffer(self.shalist, i*20, 20)
 275
 276     def __len__(self):
 277         return int(self._fanget(self.entries-1))
 278
 279
 280 _mpi_count = 0
 281 class PackIdxList:
 282     def __init__(self, dir):
 283         global _mpi_count
 284         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 285         _mpi_count += 1
 286         self.dir = dir
 287         self.also = {}
 288         self.packs = []
 289         self.refresh()
 290
 291     def __del__(self):
 292         global _mpi_count
 293         _mpi_count -= 1
 294         assert(_mpi_count == 0)
 295
 296     def __iter__(self):
 297         return iter(idxmerge(self.packs))
 298
 299     def __len__(self):
 300         return sum(len(pack) for pack in self.packs)
 301
 302     def exists(self, hash):
 303         """Return nonempty if the object exists in the index files."""
 304         global _total_searches
 305         _total_searches += 1
 306         if hash in self.also:
 307             return True
 308         for i in range(len(self.packs)):
 309             p = self.packs[i]
 310             _total_searches -= 1  # will be incremented by sub-pack
 311             if p.exists(hash):
 312                 # reorder so most recently used packs are searched first
 313                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 314                 return p.name
 315         return None
 316
 317     def refresh(self, skip_midx = False):
 318         """Refresh the index list.
 319         This method verifies if .midx files were superseded (e.g. all of its
 320         contents are in another, bigger .midx file) and removes the superseded
 321         files.
 322
 323         If skip_midx is True, all work on .midx files will be skipped and .midx
 324         files will be removed from the list.
 325
 326         The module-global variable 'ignore_midx' can force this function to
 327         always act as if skip_midx was True.
 328         """
 329         skip_midx = skip_midx or ignore_midx
 330         d = dict((p.name, p) for p in self.packs
 331                  if not skip_midx or not isinstance(p, PackMidx))
 332         if os.path.exists(self.dir):
 333             if not skip_midx:
 334                 midxl = []
 335                 for ix in self.packs:
 336                     if isinstance(ix, PackMidx):
 337                         for name in ix.idxnames:
 338                             d[os.path.join(self.dir, name)] = ix
 339                 for f in os.listdir(self.dir):
 340                     full = os.path.join(self.dir, f)
 341                     if f.endswith('.midx') and not d.get(full):
 342                         mx = PackMidx(full)
 343                         (mxd, mxf) = os.path.split(mx.name)
 344                         broken = 0
 345                         for n in mx.idxnames:
 346                             if not os.path.exists(os.path.join(mxd, n)):
 347                                 log(('warning: index %s missing\n' +
 348                                     '  used by %s\n') % (n, mxf))
 349                                 broken += 1
 350                         if not broken:
 351                             midxl.append(mx)
 352                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 353                 for ix in midxl:
 354                     any = 0
 355                     for sub in ix.idxnames:
 356                         found = d.get(os.path.join(self.dir, sub))
 357                         if not found or isinstance(found, PackIdx):
 358                             # doesn't exist, or exists but not in a midx
 359                             d[ix.name] = ix
 360                             for name in ix.idxnames:
 361                                 d[os.path.join(self.dir, name)] = ix
 362                             any += 1
 363                             break
 364                     if not any:
 365                         log('midx: removing redundant: %s\n'
 366                             % os.path.basename(ix.name))
 367                         unlink(ix.name)
 368             for f in os.listdir(self.dir):
 369                 full = os.path.join(self.dir, f)
 370                 if f.endswith('.idx') and not d.get(full):
 371                     ix = PackIdx(full)
 372                     d[full] = ix
 373             self.packs = list(set(d.values()))
 374         log('PackIdxList: using %d index%s.\n'
 375             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 376
 377     def add(self, hash):
 378         """Insert an additional object in the list."""
 379         self.also[hash] = 1
 380
 381     def zap_also(self):
 382         """Remove all additional objects from the list."""
 383         self.also = {}
 384
 385
 386 def calc_hash(type, content):
 387     """Calculate some content's hash in the Git fashion."""
 388     header = '%s %d\0' % (type, len(content))
 389     sum = Sha1(header)
 390     sum.update(content)
 391     return sum.digest()
 392
 393
 394 def _shalist_sort_key(ent):
 395     (mode, name, id) = ent
 396     if stat.S_ISDIR(int(mode, 8)):
 397         return name + '/'
 398     else:
 399         return name
 400
 401
 402 def idxmerge(idxlist):
 403     """Generate a list of all the objects reachable in a PackIdxList."""
 404     total = sum(len(i) for i in idxlist)
 405     iters = (iter(i) for i in idxlist)
 406     heap = [(next(it), it) for it in iters]
 407     heapq.heapify(heap)
 408     count = 0
 409     last = None
 410     while heap:
 411         if (count % 10024) == 0:
 412             progress('Reading indexes: %.2f%% (%d/%d)\r'
 413                      % (count*100.0/total, count, total))
 414         (e, it) = heap[0]
 415         if e != last:
 416             yield e
 417             last = e
 418         count += 1
 419         e = next(it)
 420         if e:
 421             heapq.heapreplace(heap, (e, it))
 422         else:
 423             heapq.heappop(heap)
 424     log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 425
 426
 427 class PackWriter:
 428     """Writes Git objects insid a pack file."""
 429     def __init__(self, objcache_maker=None):
 430         self.count = 0
 431         self.outbytes = 0
 432         self.filename = None
 433         self.file = None
 434         self.objcache_maker = objcache_maker
 435         self.objcache = None
 436
 437     def __del__(self):
 438         self.close()
 439
 440     def _make_objcache(self):
 441         if self.objcache == None:
 442             if self.objcache_maker:
 443                 self.objcache = self.objcache_maker()
 444             else:
 445                 self.objcache = PackIdxList(repo('objects/pack'))
 446
 447     def _open(self):
 448         if not self.file:
 449             self._make_objcache()
 450             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 451             self.file = os.fdopen(fd, 'w+b')
 452             assert(name.endswith('.pack'))
 453             self.filename = name[:-5]
 454             self.file.write('PACK\0\0\0\2\0\0\0\0')
 455
 456     def _raw_write(self, datalist):
 457         self._open()
 458         f = self.file
 459         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 460         # the file never has a *partial* blob.  So let's make sure it's
 461         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 462         # to our hashsplit algorithm.)  f.write() does its own buffering,
 463         # but that's okay because we'll flush it in _end().
 464         oneblob = ''.join(datalist)
 465         f.write(oneblob)
 466         self.outbytes += len(oneblob)
 467         self.count += 1
 468
 469     def _write(self, bin, type, content):
 470         if verbose:
 471             log('>')
 472         self._raw_write(_encode_packobj(type, content))
 473         return bin
 474
 475     def breakpoint(self):
 476         """Clear byte and object counts and return the last processed id."""
 477         id = self._end()
 478         self.outbytes = self.count = 0
 479         return id
 480
 481     def write(self, type, content):
 482         """Write an object in this pack file."""
 483         return self._write(calc_hash(type, content), type, content)
 484
 485     def exists(self, id):
 486         """Return non-empty if an object is found in the object cache."""
 487         if not self.objcache:
 488             self._make_objcache()
 489         return self.objcache.exists(id)
 490
 491     def maybe_write(self, type, content):
 492         """Write an object to the pack file if not present and return its id."""
 493         bin = calc_hash(type, content)
 494         if not self.exists(bin):
 495             self._write(bin, type, content)
 496             self.objcache.add(bin)
 497         return bin
 498
 499     def new_blob(self, blob):
 500         """Create a blob object in the pack with the supplied content."""
 501         return self.maybe_write('blob', blob)
 502
 503     def new_tree(self, shalist):
 504         """Create a tree object in the pack."""
 505         shalist = sorted(shalist, key = _shalist_sort_key)
 506         l = []
 507         for (mode,name,bin) in shalist:
 508             assert(mode)
 509             assert(mode != '0')
 510             assert(mode[0] != '0')
 511             assert(name)
 512             assert(len(bin) == 20)
 513             l.append('%s %s\0%s' % (mode,name,bin))
 514         return self.maybe_write('tree', ''.join(l))
 515
 516     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 517         l = []
 518         if tree: l.append('tree %s' % tree.encode('hex'))
 519         if parent: l.append('parent %s' % parent.encode('hex'))
 520         if author: l.append('author %s %s' % (author, _git_date(adate)))
 521         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 522         l.append('')
 523         l.append(msg)
 524         return self.maybe_write('commit', '\n'.join(l))
 525
 526     def new_commit(self, parent, tree, msg):
 527         """Create a commit object in the pack."""
 528         now = time.time()
 529         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 530         commit = self._new_commit(tree, parent,
 531                                   userline, now, userline, now,
 532                                   msg)
 533         return commit
 534
 535     def abort(self):
 536         """Remove the pack file from disk."""
 537         f = self.file
 538         if f:
 539             self.file = None
 540             f.close()
 541             os.unlink(self.filename + '.pack')
 542
 543     def _end(self):
 544         f = self.file
 545         if not f: return None
 546         self.file = None
 547         self.objcache = None
 548
 549         # update object count
 550         f.seek(8)
 551         cp = struct.pack('!i', self.count)
 552         assert(len(cp) == 4)
 553         f.write(cp)
 554
 555         # calculate the pack sha1sum
 556         f.seek(0)
 557         sum = Sha1()
 558         while 1:
 559             b = f.read(65536)
 560             sum.update(b)
 561             if not b: break
 562         f.write(sum.digest())
 563
 564         f.close()
 565
 566         p = subprocess.Popen(['git', 'index-pack', '-v',
 567                               '--index-version=2',
 568                               self.filename + '.pack'],
 569                              preexec_fn = _gitenv,
 570                              stdout = subprocess.PIPE)
 571         out = p.stdout.read().strip()
 572         _git_wait('git index-pack', p)
 573         if not out:
 574             raise GitError('git index-pack produced no output')
 575         nameprefix = repo('objects/pack/%s' % out)
 576         if os.path.exists(self.filename + '.map'):
 577             os.unlink(self.filename + '.map')
 578         os.rename(self.filename + '.pack', nameprefix + '.pack')
 579         os.rename(self.filename + '.idx', nameprefix + '.idx')
 580         return nameprefix
 581
 582     def close(self):
 583         """Close the pack file and move it to its definitive path."""
 584         return self._end()
 585
 586
 587 def _git_date(date):
 588     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 589
 590
 591 def _gitenv():
 592     os.environ['GIT_DIR'] = os.path.abspath(repo())
 593
 594
 595 def list_refs(refname = None):
 596     """Generate a list of tuples in the form (refname,hash).
 597     If a ref name is specified, list only this particular ref.
 598     """
 599     argv = ['git', 'show-ref', '--']
 600     if refname:
 601         argv += [refname]
 602     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 603     out = p.stdout.read().strip()
 604     rv = p.wait()  # not fatal
 605     if rv:
 606         assert(not out)
 607     if out:
 608         for d in out.split('\n'):
 609             (sha, name) = d.split(' ', 1)
 610             yield (name, sha.decode('hex'))
 611
 612
 613 def read_ref(refname):
 614     """Get the commit id of the most recent commit made on a given ref."""
 615     l = list(list_refs(refname))
 616     if l:
 617         assert(len(l) == 1)
 618         return l[0][1]
 619     else:
 620         return None
 621
 622
 623 def rev_list(ref, count=None):
 624     """Generate a list of reachable commits in reverse chronological order.
 625
 626     This generator walks through commits, from child to parent, that are
 627     reachable via the specified ref and yields a series of tuples of the form
 628     (date,hash).
 629
 630     If count is a non-zero integer, limit the number of commits to "count"
 631     objects.
 632     """
 633     assert(not ref.startswith('-'))
 634     opts = []
 635     if count:
 636         opts += ['-n', str(atoi(count))]
 637     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 638     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 639     commit = None
 640     for row in p.stdout:
 641         s = row.strip()
 642         if s.startswith('commit '):
 643             commit = s[7:].decode('hex')
 644         else:
 645             date = int(s)
 646             yield (date, commit)
 647     rv = p.wait()  # not fatal
 648     if rv:
 649         raise GitError, 'git rev-list returned error %d' % rv
 650
 651
 652 def rev_get_date(ref):
 653     """Get the date of the latest commit on the specified ref."""
 654     for (date, commit) in rev_list(ref, count=1):
 655         return date
 656     raise GitError, 'no such commit %r' % ref
 657
 658
 659 def update_ref(refname, newval, oldval):
 660     """Change the commit pointed to by a branch."""
 661     if not oldval:
 662         oldval = ''
 663     assert(refname.startswith('refs/heads/'))
 664     p = subprocess.Popen(['git', 'update-ref', refname,
 665                           newval.encode('hex'), oldval.encode('hex')],
 666                          preexec_fn = _gitenv)
 667     _git_wait('git update-ref', p)
 668
 669
 670 def guess_repo(path=None):
 671     """Set the path value in the global variable "repodir".
 672     This makes bup look for an existing bup repository, but not fail if a
 673     repository doesn't exist. Usually, if you are interacting with a bup
 674     repository, you would not be calling this function but using
 675     check_repo_or_die().
 676     """
 677     global repodir
 678     if path:
 679         repodir = path
 680     if not repodir:
 681         repodir = os.environ.get('BUP_DIR')
 682         if not repodir:
 683             repodir = os.path.expanduser('~/.bup')
 684
 685
 686 def init_repo(path=None):
 687     """Create the Git bare repository for bup in a given path."""
 688     guess_repo(path)
 689     d = repo()
 690     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 691         raise GitError('"%d" exists but is not a directory\n' % d)
 692     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 693                          preexec_fn = _gitenv)
 694     _git_wait('git init', p)
 695     # Force the index version configuration in order to ensure bup works
 696     # regardless of the version of the installed Git binary.
 697     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 698                          stdout=sys.stderr, preexec_fn = _gitenv)
 699     _git_wait('git config', p)
 700
 701
 702 def check_repo_or_die(path=None):
 703     """Make sure a bup repository exists, and abort if not.
 704     If the path to a particular repository was not specified, this function
 705     initializes the default repository automatically.
 706     """
 707     guess_repo(path)
 708     if not os.path.isdir(repo('objects/pack/.')):
 709         if repodir == home_repodir:
 710             init_repo()
 711         else:
 712             log('error: %r is not a bup/git repository\n' % repo())
 713             sys.exit(15)
 714
 715
 716 def treeparse(buf):
 717     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 718     ofs = 0
 719     while ofs < len(buf):
 720         z = buf[ofs:].find('\0')
 721         assert(z > 0)
 722         spl = buf[ofs:ofs+z].split(' ', 1)
 723         assert(len(spl) == 2)
 724         sha = buf[ofs+z+1:ofs+z+1+20]
 725         ofs += z+1+20
 726         yield (spl[0], spl[1], sha)
 727
 728
 729 _ver = None
 730 def ver():
 731     """Get Git's version and ensure a usable version is installed.
 732
 733     The returned version is formatted as an ordered tuple with each position
 734     representing a digit in the version tag. For example, the following tuple
 735     would represent version 1.6.6.9:
 736
 737         ('1', '6', '6', '9')
 738     """
 739     global _ver
 740     if not _ver:
 741         p = subprocess.Popen(['git', '--version'],
 742                              stdout=subprocess.PIPE)
 743         gvs = p.stdout.read()
 744         _git_wait('git --version', p)
 745         m = re.match(r'git version (\S+.\S+)', gvs)
 746         if not m:
 747             raise GitError('git --version weird output: %r' % gvs)
 748         _ver = tuple(m.group(1).split('.'))
 749     needed = ('1','5', '3', '1')
 750     if _ver < needed:
 751         raise GitError('git version %s or higher is required; you have %s'
 752                        % ('.'.join(needed), '.'.join(_ver)))
 753     return _ver
 754
 755
 756 def _git_wait(cmd, p):
 757     rv = p.wait()
 758     if rv != 0:
 759         raise GitError('%s returned %d' % (cmd, rv))
 760
 761
 762 def _git_capture(argv):
 763     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 764     r = p.stdout.read()
 765     _git_wait(repr(argv), p)
 766     return r
 767
 768
 769 class _AbortableIter:
 770     def __init__(self, it, onabort = None):
 771         self.it = it
 772         self.onabort = onabort
 773         self.done = None
 774
 775     def __iter__(self):
 776         return self
 777
 778     def next(self):
 779         try:
 780             return self.it.next()
 781         except StopIteration, e:
 782             self.done = True
 783             raise
 784         except:
 785             self.abort()
 786             raise
 787
 788     def abort(self):
 789         """Abort iteration and call the abortion callback, if needed."""
 790         if not self.done:
 791             self.done = True
 792             if self.onabort:
 793                 self.onabort()
 794
 795     def __del__(self):
 796         self.abort()
 797
 798
 799 _ver_warned = 0
 800 class CatPipe:
 801     """Link to 'git cat-file' that is used to retrieve blob data."""
 802     def __init__(self):
 803         global _ver_warned
 804         wanted = ('1','5','6')
 805         if ver() < wanted:
 806             if not _ver_warned:
 807                 log('warning: git version < %s; bup will be slow.\n'
 808                     % '.'.join(wanted))
 809                 _ver_warned = 1
 810             self.get = self._slow_get
 811         else:
 812             self.p = self.inprogress = None
 813             self.get = self._fast_get
 814
 815     def _abort(self):
 816         if self.p:
 817             self.p.stdout.close()
 818             self.p.stdin.close()
 819         self.p = None
 820         self.inprogress = None
 821
 822     def _restart(self):
 823         self._abort()
 824         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 825                                   stdin=subprocess.PIPE,
 826                                   stdout=subprocess.PIPE,
 827                                   close_fds = True,
 828                                   preexec_fn = _gitenv)
 829
 830     def _fast_get(self, id):
 831         if not self.p or self.p.poll() != None:
 832             self._restart()
 833         assert(self.p)
 834         assert(self.p.poll() == None)
 835         if self.inprogress:
 836             log('_fast_get: opening %r while %r is open'
 837                 % (id, self.inprogress))
 838         assert(not self.inprogress)
 839         assert(id.find('\n') < 0)
 840         assert(id.find('\r') < 0)
 841         assert(id[0] != '-')
 842         self.inprogress = id
 843         self.p.stdin.write('%s\n' % id)
 844         hdr = self.p.stdout.readline()
 845         if hdr.endswith(' missing\n'):
 846             raise KeyError('blob %r is missing' % id)
 847         spl = hdr.split(' ')
 848         if len(spl) != 3 or len(spl[0]) != 40:
 849             raise GitError('expected blob, got %r' % spl)
 850         (hex, type, size) = spl
 851
 852         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 853                            onabort = self._abort)
 854         try:
 855             yield type
 856             for blob in it:
 857                 yield blob
 858             assert(self.p.stdout.readline() == '\n')
 859             self.inprogress = None
 860         except Exception, e:
 861             it.abort()
 862             raise
 863
 864     def _slow_get(self, id):
 865         assert(id.find('\n') < 0)
 866         assert(id.find('\r') < 0)
 867         assert(id[0] != '-')
 868         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 869         yield type
 870
 871         p = subprocess.Popen(['git', 'cat-file', type, id],
 872                              stdout=subprocess.PIPE,
 873                              preexec_fn = _gitenv)
 874         for blob in chunkyreader(p.stdout):
 875             yield blob
 876         _git_wait('git cat-file', p)
 877
 878     def _join(self, it):
 879         type = it.next()
 880         if type == 'blob':
 881             for blob in it:
 882                 yield blob
 883         elif type == 'tree':
 884             treefile = ''.join(it)
 885             for (mode, name, sha) in treeparse(treefile):
 886                 for blob in self.join(sha.encode('hex')):
 887                     yield blob
 888         elif type == 'commit':
 889             treeline = ''.join(it).split('\n')[0]
 890             assert(treeline.startswith('tree '))
 891             for blob in self.join(treeline[5:]):
 892                 yield blob
 893         else:
 894             raise GitError('invalid object type %r: expected blob/tree/commit'
 895                            % type)
 896
 897     def join(self, id):
 898         """Generate a list of the content of all blobs that can be reached
 899         from an object.  The hash given in 'id' must point to a blob, a tree
 900         or a commit. The content of all blobs that can be seen from trees or
 901         commits will be added to the list.
 902         """
 903         try:
 904             for d in self._join(self.get(id)):
 905                 yield d
 906         except StopIteration:
 907             log('booger!\n')