lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 MIDX_VERSION = 2
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 home_repodir = os.path.expanduser('~/.bup')
  15 repodir = None
  16
  17 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  19
  20 _total_searches = 0
  21 _total_steps = 0
  22
  23
  24 class GitError(Exception):
  25     pass
  26
  27
  28 def repo(sub = ''):
  29     """Get the path to the git repository or one of its subdirectories."""
  30     global repodir
  31     if not repodir:
  32         raise GitError('You should call check_repo_or_die()')
  33
  34     # If there's a .git subdirectory, then the actual repo is in there.
  35     gd = os.path.join(repodir, '.git')
  36     if os.path.exists(gd):
  37         repodir = gd
  38
  39     return os.path.join(repodir, sub)
  40
  41
  42 def auto_midx(objdir):
  43     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  44     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  45     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  46     if rv:
  47         add_error('%r: returned %d' % (args, rv))
  48
  49
  50 def mangle_name(name, mode, gitmode):
  51     """Mangle a file name to present an abstract name for segmented files.
  52     Mangled file names will have the ".bup" extension added to them. If a
  53     file's name already ends with ".bup", a ".bupl" extension is added to
  54     disambiguate normal files from semgmented ones.
  55     """
  56     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  57         return name + '.bup'
  58     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  59         return name + '.bupl'
  60     else:
  61         return name
  62
  63
  64 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  65 def demangle_name(name):
  66     """Remove name mangling from a file name, if necessary.
  67
  68     The return value is a tuple (demangled_filename,mode), where mode is one of
  69     the following:
  70
  71     * BUP_NORMAL  : files that should be read as-is from the repository
  72     * BUP_CHUNKED : files that were chunked and need to be assembled
  73
  74     For more information on the name mangling algorythm, see mangle_name()
  75     """
  76     if name.endswith('.bupl'):
  77         return (name[:-5], BUP_NORMAL)
  78     elif name.endswith('.bup'):
  79         return (name[:-4], BUP_CHUNKED)
  80     else:
  81         return (name, BUP_NORMAL)
  82
  83
  84 def _encode_packobj(type, content):
  85     szout = ''
  86     sz = len(content)
  87     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  88     sz >>= 4
  89     while 1:
  90         if sz: szbits |= 0x80
  91         szout += chr(szbits)
  92         if not sz:
  93             break
  94         szbits = sz & 0x7f
  95         sz >>= 7
  96     z = zlib.compressobj(1)
  97     yield szout
  98     yield z.compress(content)
  99     yield z.flush()
 100
 101
 102 def _encode_looseobj(type, content):
 103     z = zlib.compressobj(1)
 104     yield z.compress('%s %d\0' % (type, len(content)))
 105     yield z.compress(content)
 106     yield z.flush()
 107
 108
 109 def _decode_looseobj(buf):
 110     assert(buf);
 111     s = zlib.decompress(buf)
 112     i = s.find('\0')
 113     assert(i > 0)
 114     l = s[:i].split(' ')
 115     type = l[0]
 116     sz = int(l[1])
 117     content = s[i+1:]
 118     assert(type in _typemap)
 119     assert(sz == len(content))
 120     return (type, content)
 121
 122
 123 def _decode_packobj(buf):
 124     assert(buf)
 125     c = ord(buf[0])
 126     type = _typermap[(c & 0x70) >> 4]
 127     sz = c & 0x0f
 128     shift = 4
 129     i = 0
 130     while c & 0x80:
 131         i += 1
 132         c = ord(buf[i])
 133         sz |= (c & 0x7f) << shift
 134         shift += 7
 135         if not (c & 0x80):
 136             break
 137     return (type, zlib.decompress(buf[i+1:]))
 138
 139
 140 class PackIdxV2:
 141     """Object representation of a Git pack index file."""
 142     def __init__(self, filename, f):
 143         self.name = filename
 144         self.idxnames = [self.name]
 145         self.map = mmap_read(f)
 146         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 147         self.fanout = list(struct.unpack('!256I',
 148                                          str(buffer(self.map, 8, 256*4))))
 149         self.fanout.append(0)  # entry "-1"
 150         nsha = self.fanout[255]
 151         self.ofstable = buffer(self.map,
 152                                8 + 256*4 + nsha*20 + nsha*4,
 153                                nsha*4)
 154         self.ofs64table = buffer(self.map,
 155                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 156
 157     def _ofs_from_idx(self, idx):
 158         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 159         if ofs & 0x80000000:
 160             idx64 = ofs & 0x7fffffff
 161             ofs = struct.unpack('!I',
 162                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 163         return ofs
 164
 165     def _idx_from_hash(self, hash):
 166         global _total_searches, _total_steps
 167         _total_searches += 1
 168         assert(len(hash) == 20)
 169         b1 = ord(hash[0])
 170         start = self.fanout[b1-1] # range -1..254
 171         end = self.fanout[b1] # range 0..255
 172         buf = buffer(self.map, 8 + 256*4, end*20)
 173         want = str(hash)
 174         _total_steps += 1  # lookup table is a step
 175         while start < end:
 176             _total_steps += 1
 177             mid = start + (end-start)/2
 178             v = str(buf[mid*20:(mid+1)*20])
 179             if v < want:
 180                 start = mid+1
 181             elif v > want:
 182                 end = mid
 183             else: # got it!
 184                 return mid
 185         return None
 186
 187     def find_offset(self, hash):
 188         """Get the offset of an object inside the index file."""
 189         idx = self._idx_from_hash(hash)
 190         if idx != None:
 191             return self._ofs_from_idx(idx)
 192         return None
 193
 194     def exists(self, hash):
 195         """Return nonempty if the object exists in this index."""
 196         return hash and (self._idx_from_hash(hash) != None) and True or None
 197
 198     def __iter__(self):
 199         for i in xrange(self.fanout[255]):
 200             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 201
 202     def __len__(self):
 203         return int(self.fanout[255])
 204
 205
 206 extract_bits = _helpers.extract_bits
 207
 208
 209 class PackMidx:
 210     """Wrapper which contains data from multiple index files.
 211     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 212     and make it possible for bup to expand Git's indexing capabilities to vast
 213     amounts of files.
 214     """
 215     def __init__(self, filename):
 216         self.name = filename
 217         self.force_keep = False
 218         assert(filename.endswith('.midx'))
 219         self.map = mmap_read(open(filename))
 220         if str(self.map[0:4]) != 'MIDX':
 221             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 222             self.force_keep = True
 223             return self._init_failed()
 224         ver = struct.unpack('!I', self.map[4:8])[0]
 225         if ver < MIDX_VERSION:
 226             log('Warning: ignoring old-style (v%d) midx %r\n'
 227                 % (ver, filename))
 228             self.force_keep = False  # old stuff is boring
 229             return self._init_failed()
 230         if ver > MIDX_VERSION:
 231             log('Warning: ignoring too-new (v%d) midx %r\n'
 232                 % (ver, filename))
 233             self.force_keep = True  # new stuff is exciting
 234             return self._init_failed()
 235
 236         self.bits = _helpers.firstword(self.map[8:12])
 237         self.entries = 2**self.bits
 238         self.fanout = buffer(self.map, 12, self.entries*4)
 239         shaofs = 12 + self.entries*4
 240         nsha = self._fanget(self.entries-1)
 241         self.shalist = buffer(self.map, shaofs, nsha*20)
 242         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 243
 244     def _init_failed(self):
 245         self.bits = 0
 246         self.entries = 1
 247         self.fanout = buffer('\0\0\0\0')
 248         self.shalist = buffer('\0'*20)
 249         self.idxnames = []
 250
 251     def _fanget(self, i):
 252         start = i*4
 253         s = self.fanout[start:start+4]
 254         return _helpers.firstword(s)
 255
 256     def _get(self, i):
 257         return str(self.shalist[i*20:(i+1)*20])
 258
 259     def exists(self, hash):
 260         """Return nonempty if the object exists in the index files."""
 261         global _total_searches, _total_steps
 262         _total_searches += 1
 263         want = str(hash)
 264         el = extract_bits(want, self.bits)
 265         if el:
 266             start = self._fanget(el-1)
 267             startv = el << (32-self.bits)
 268         else:
 269             start = 0
 270             startv = 0
 271         end = self._fanget(el)
 272         endv = (el+1) << (32-self.bits)
 273         _total_steps += 1   # lookup table is a step
 274         hashv = _helpers.firstword(hash)
 275         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 276         while start < end:
 277             _total_steps += 1
 278             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 279             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 280             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 281             v = self._get(mid)
 282             #print '    %08x' % self._num(v)
 283             if v < want:
 284                 start = mid+1
 285                 startv = _helpers.firstword(v)
 286             elif v > want:
 287                 end = mid
 288                 endv = _helpers.firstword(v)
 289             else: # got it!
 290                 return True
 291         return None
 292
 293     def __iter__(self):
 294         for i in xrange(self._fanget(self.entries-1)):
 295             yield buffer(self.shalist, i*20, 20)
 296
 297     def __len__(self):
 298         return int(self._fanget(self.entries-1))
 299
 300
 301 _mpi_count = 0
 302 class PackIdxList:
 303     def __init__(self, dir):
 304         global _mpi_count
 305         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 306         _mpi_count += 1
 307         self.dir = dir
 308         self.also = {}
 309         self.packs = []
 310         self.refresh()
 311
 312     def __del__(self):
 313         global _mpi_count
 314         _mpi_count -= 1
 315         assert(_mpi_count == 0)
 316
 317     def __iter__(self):
 318         return iter(idxmerge(self.packs))
 319
 320     def __len__(self):
 321         return sum(len(pack) for pack in self.packs)
 322
 323     def exists(self, hash):
 324         """Return nonempty if the object exists in the index files."""
 325         global _total_searches
 326         _total_searches += 1
 327         if hash in self.also:
 328             return True
 329         for i in range(len(self.packs)):
 330             p = self.packs[i]
 331             _total_searches -= 1  # will be incremented by sub-pack
 332             if p.exists(hash):
 333                 # reorder so most recently used packs are searched first
 334                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 335                 return p.name
 336         return None
 337
 338     def refresh(self, skip_midx = False):
 339         """Refresh the index list.
 340         This method verifies if .midx files were superseded (e.g. all of its
 341         contents are in another, bigger .midx file) and removes the superseded
 342         files.
 343
 344         If skip_midx is True, all work on .midx files will be skipped and .midx
 345         files will be removed from the list.
 346
 347         The module-global variable 'ignore_midx' can force this function to
 348         always act as if skip_midx was True.
 349         """
 350         skip_midx = skip_midx or ignore_midx
 351         d = dict((p.name, p) for p in self.packs
 352                  if not skip_midx or not isinstance(p, PackMidx))
 353         if os.path.exists(self.dir):
 354             if not skip_midx:
 355                 midxl = []
 356                 for ix in self.packs:
 357                     if isinstance(ix, PackMidx):
 358                         for name in ix.idxnames:
 359                             d[os.path.join(self.dir, name)] = ix
 360                 for f in os.listdir(self.dir):
 361                     full = os.path.join(self.dir, f)
 362                     if f.endswith('.midx') and not d.get(full):
 363                         mx = PackMidx(full)
 364                         (mxd, mxf) = os.path.split(mx.name)
 365                         broken = 0
 366                         for n in mx.idxnames:
 367                             if not os.path.exists(os.path.join(mxd, n)):
 368                                 log(('warning: index %s missing\n' +
 369                                     '  used by %s\n') % (n, mxf))
 370                                 broken += 1
 371                         if not broken:
 372                             midxl.append(mx)
 373                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 374                 for ix in midxl:
 375                     any = 0
 376                     for sub in ix.idxnames:
 377                         found = d.get(os.path.join(self.dir, sub))
 378                         if not found or isinstance(found, PackIdx):
 379                             # doesn't exist, or exists but not in a midx
 380                             d[ix.name] = ix
 381                             for name in ix.idxnames:
 382                                 d[os.path.join(self.dir, name)] = ix
 383                             any += 1
 384                             break
 385                     if not any and not ix.force_keep:
 386                         debug1('midx: removing redundant: %s\n'
 387                                % os.path.basename(ix.name))
 388                         unlink(ix.name)
 389             for f in os.listdir(self.dir):
 390                 full = os.path.join(self.dir, f)
 391                 if f.endswith('.idx') and not d.get(full):
 392                     ix = PackIdx(full)
 393                     d[full] = ix
 394             self.packs = list(set(d.values()))
 395         debug1('PackIdxList: using %d index%s.\n'
 396             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 397
 398     def add(self, hash):
 399         """Insert an additional object in the list."""
 400         self.also[hash] = 1
 401
 402     def zap_also(self):
 403         """Remove all additional objects from the list."""
 404         self.also = {}
 405
 406
 407 def calc_hash(type, content):
 408     """Calculate some content's hash in the Git fashion."""
 409     header = '%s %d\0' % (type, len(content))
 410     sum = Sha1(header)
 411     sum.update(content)
 412     return sum.digest()
 413
 414
 415 def _shalist_sort_key(ent):
 416     (mode, name, id) = ent
 417     if stat.S_ISDIR(int(mode, 8)):
 418         return name + '/'
 419     else:
 420         return name
 421
 422
 423 def open_idx(filename):
 424     if filename.endswith('.idx'):
 425         f = open(filename, 'rb')
 426         header = f.read(8)
 427         if header[0:4] == '\377tOc':
 428             version = struct.unpack('!I', header[4:8])[0]
 429             if version == 2:
 430                 return PackIdxV2(filename, f)
 431             else:
 432                 raise GitError('%s: expected idx file version 2, got %d'
 433                                % (filename, version))
 434         else:
 435             raise GitError('version 1 idx files not supported')
 436     elif filename.endswith('.midx'):
 437         return PackMidx(filename)
 438     else:
 439         raise GitError('idx filenames must end with .idx or .midx')
 440
 441
 442 def idxmerge(idxlist, final_progress=True):
 443     """Generate a list of all the objects reachable in a PackIdxList."""
 444     total = sum(len(i) for i in idxlist)
 445     iters = (iter(i) for i in idxlist)
 446     heap = [(next(it), it) for it in iters]
 447     heapq.heapify(heap)
 448     count = 0
 449     last = None
 450     while heap:
 451         if (count % 10024) == 0:
 452             progress('Reading indexes: %.2f%% (%d/%d)\r'
 453                      % (count*100.0/total, count, total))
 454         (e, it) = heap[0]
 455         if e != last:
 456             yield e
 457             last = e
 458         count += 1
 459         e = next(it)
 460         if e:
 461             heapq.heapreplace(heap, (e, it))
 462         else:
 463             heapq.heappop(heap)
 464     if final_progress:
 465         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 466
 467
 468 class PackWriter:
 469     """Writes Git objects insid a pack file."""
 470     def __init__(self, objcache_maker=None):
 471         self.count = 0
 472         self.outbytes = 0
 473         self.filename = None
 474         self.file = None
 475         self.objcache_maker = objcache_maker
 476         self.objcache = None
 477
 478     def __del__(self):
 479         self.close()
 480
 481     def _make_objcache(self):
 482         if self.objcache == None:
 483             if self.objcache_maker:
 484                 self.objcache = self.objcache_maker()
 485             else:
 486                 self.objcache = PackIdxList(repo('objects/pack'))
 487
 488     def _open(self):
 489         if not self.file:
 490             self._make_objcache()
 491             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 492             self.file = os.fdopen(fd, 'w+b')
 493             assert(name.endswith('.pack'))
 494             self.filename = name[:-5]
 495             self.file.write('PACK\0\0\0\2\0\0\0\0')
 496
 497     def _raw_write(self, datalist):
 498         self._open()
 499         f = self.file
 500         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 501         # the file never has a *partial* blob.  So let's make sure it's
 502         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 503         # to our hashsplit algorithm.)  f.write() does its own buffering,
 504         # but that's okay because we'll flush it in _end().
 505         oneblob = ''.join(datalist)
 506         f.write(oneblob)
 507         self.outbytes += len(oneblob)
 508         self.count += 1
 509
 510     def _write(self, bin, type, content):
 511         if verbose:
 512             log('>')
 513         self._raw_write(_encode_packobj(type, content))
 514         return bin
 515
 516     def breakpoint(self):
 517         """Clear byte and object counts and return the last processed id."""
 518         id = self._end()
 519         self.outbytes = self.count = 0
 520         return id
 521
 522     def write(self, type, content):
 523         """Write an object in this pack file."""
 524         return self._write(calc_hash(type, content), type, content)
 525
 526     def exists(self, id):
 527         """Return non-empty if an object is found in the object cache."""
 528         if not self.objcache:
 529             self._make_objcache()
 530         return self.objcache.exists(id)
 531
 532     def maybe_write(self, type, content):
 533         """Write an object to the pack file if not present and return its id."""
 534         bin = calc_hash(type, content)
 535         if not self.exists(bin):
 536             self._write(bin, type, content)
 537             self.objcache.add(bin)
 538         return bin
 539
 540     def new_blob(self, blob):
 541         """Create a blob object in the pack with the supplied content."""
 542         return self.maybe_write('blob', blob)
 543
 544     def new_tree(self, shalist):
 545         """Create a tree object in the pack."""
 546         shalist = sorted(shalist, key = _shalist_sort_key)
 547         l = []
 548         for (mode,name,bin) in shalist:
 549             assert(mode)
 550             assert(mode != '0')
 551             assert(mode[0] != '0')
 552             assert(name)
 553             assert(len(bin) == 20)
 554             l.append('%s %s\0%s' % (mode,name,bin))
 555         return self.maybe_write('tree', ''.join(l))
 556
 557     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 558         l = []
 559         if tree: l.append('tree %s' % tree.encode('hex'))
 560         if parent: l.append('parent %s' % parent.encode('hex'))
 561         if author: l.append('author %s %s' % (author, _git_date(adate)))
 562         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 563         l.append('')
 564         l.append(msg)
 565         return self.maybe_write('commit', '\n'.join(l))
 566
 567     def new_commit(self, parent, tree, date, msg):
 568         """Create a commit object in the pack."""
 569         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 570         commit = self._new_commit(tree, parent,
 571                                   userline, date, userline, date,
 572                                   msg)
 573         return commit
 574
 575     def abort(self):
 576         """Remove the pack file from disk."""
 577         f = self.file
 578         if f:
 579             self.file = None
 580             f.close()
 581             os.unlink(self.filename + '.pack')
 582
 583     def _end(self):
 584         f = self.file
 585         if not f: return None
 586         self.file = None
 587         self.objcache = None
 588
 589         # update object count
 590         f.seek(8)
 591         cp = struct.pack('!i', self.count)
 592         assert(len(cp) == 4)
 593         f.write(cp)
 594
 595         # calculate the pack sha1sum
 596         f.seek(0)
 597         sum = Sha1()
 598         while 1:
 599             b = f.read(65536)
 600             sum.update(b)
 601             if not b: break
 602         f.write(sum.digest())
 603
 604         f.close()
 605
 606         p = subprocess.Popen(['git', 'index-pack', '-v',
 607                               '--index-version=2',
 608                               self.filename + '.pack'],
 609                              preexec_fn = _gitenv,
 610                              stdout = subprocess.PIPE)
 611         out = p.stdout.read().strip()
 612         _git_wait('git index-pack', p)
 613         if not out:
 614             raise GitError('git index-pack produced no output')
 615         nameprefix = repo('objects/pack/%s' % out)
 616         if os.path.exists(self.filename + '.map'):
 617             os.unlink(self.filename + '.map')
 618         os.rename(self.filename + '.pack', nameprefix + '.pack')
 619         os.rename(self.filename + '.idx', nameprefix + '.idx')
 620
 621         auto_midx(repo('objects/pack'))
 622         return nameprefix
 623
 624     def close(self):
 625         """Close the pack file and move it to its definitive path."""
 626         return self._end()
 627
 628
 629 def _git_date(date):
 630     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 631
 632
 633 def _gitenv():
 634     os.environ['GIT_DIR'] = os.path.abspath(repo())
 635
 636
 637 def list_refs(refname = None):
 638     """Generate a list of tuples in the form (refname,hash).
 639     If a ref name is specified, list only this particular ref.
 640     """
 641     argv = ['git', 'show-ref', '--']
 642     if refname:
 643         argv += [refname]
 644     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 645     out = p.stdout.read().strip()
 646     rv = p.wait()  # not fatal
 647     if rv:
 648         assert(not out)
 649     if out:
 650         for d in out.split('\n'):
 651             (sha, name) = d.split(' ', 1)
 652             yield (name, sha.decode('hex'))
 653
 654
 655 def read_ref(refname):
 656     """Get the commit id of the most recent commit made on a given ref."""
 657     l = list(list_refs(refname))
 658     if l:
 659         assert(len(l) == 1)
 660         return l[0][1]
 661     else:
 662         return None
 663
 664
 665 def rev_list(ref, count=None):
 666     """Generate a list of reachable commits in reverse chronological order.
 667
 668     This generator walks through commits, from child to parent, that are
 669     reachable via the specified ref and yields a series of tuples of the form
 670     (date,hash).
 671
 672     If count is a non-zero integer, limit the number of commits to "count"
 673     objects.
 674     """
 675     assert(not ref.startswith('-'))
 676     opts = []
 677     if count:
 678         opts += ['-n', str(atoi(count))]
 679     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 680     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 681     commit = None
 682     for row in p.stdout:
 683         s = row.strip()
 684         if s.startswith('commit '):
 685             commit = s[7:].decode('hex')
 686         else:
 687             date = int(s)
 688             yield (date, commit)
 689     rv = p.wait()  # not fatal
 690     if rv:
 691         raise GitError, 'git rev-list returned error %d' % rv
 692
 693
 694 def rev_get_date(ref):
 695     """Get the date of the latest commit on the specified ref."""
 696     for (date, commit) in rev_list(ref, count=1):
 697         return date
 698     raise GitError, 'no such commit %r' % ref
 699
 700
 701 def update_ref(refname, newval, oldval):
 702     """Change the commit pointed to by a branch."""
 703     if not oldval:
 704         oldval = ''
 705     assert(refname.startswith('refs/heads/'))
 706     p = subprocess.Popen(['git', 'update-ref', refname,
 707                           newval.encode('hex'), oldval.encode('hex')],
 708                          preexec_fn = _gitenv)
 709     _git_wait('git update-ref', p)
 710
 711
 712 def guess_repo(path=None):
 713     """Set the path value in the global variable "repodir".
 714     This makes bup look for an existing bup repository, but not fail if a
 715     repository doesn't exist. Usually, if you are interacting with a bup
 716     repository, you would not be calling this function but using
 717     check_repo_or_die().
 718     """
 719     global repodir
 720     if path:
 721         repodir = path
 722     if not repodir:
 723         repodir = os.environ.get('BUP_DIR')
 724         if not repodir:
 725             repodir = os.path.expanduser('~/.bup')
 726
 727
 728 def init_repo(path=None):
 729     """Create the Git bare repository for bup in a given path."""
 730     guess_repo(path)
 731     d = repo()
 732     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 733         raise GitError('"%d" exists but is not a directory\n' % d)
 734     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 735                          preexec_fn = _gitenv)
 736     _git_wait('git init', p)
 737     # Force the index version configuration in order to ensure bup works
 738     # regardless of the version of the installed Git binary.
 739     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 740                          stdout=sys.stderr, preexec_fn = _gitenv)
 741     _git_wait('git config', p)
 742
 743
 744 def check_repo_or_die(path=None):
 745     """Make sure a bup repository exists, and abort if not.
 746     If the path to a particular repository was not specified, this function
 747     initializes the default repository automatically.
 748     """
 749     guess_repo(path)
 750     if not os.path.isdir(repo('objects/pack/.')):
 751         if repodir == home_repodir:
 752             init_repo()
 753         else:
 754             log('error: %r is not a bup/git repository\n' % repo())
 755             sys.exit(15)
 756
 757
 758 def treeparse(buf):
 759     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 760     ofs = 0
 761     while ofs < len(buf):
 762         z = buf[ofs:].find('\0')
 763         assert(z > 0)
 764         spl = buf[ofs:ofs+z].split(' ', 1)
 765         assert(len(spl) == 2)
 766         sha = buf[ofs+z+1:ofs+z+1+20]
 767         ofs += z+1+20
 768         yield (spl[0], spl[1], sha)
 769
 770
 771 _ver = None
 772 def ver():
 773     """Get Git's version and ensure a usable version is installed.
 774
 775     The returned version is formatted as an ordered tuple with each position
 776     representing a digit in the version tag. For example, the following tuple
 777     would represent version 1.6.6.9:
 778
 779         ('1', '6', '6', '9')
 780     """
 781     global _ver
 782     if not _ver:
 783         p = subprocess.Popen(['git', '--version'],
 784                              stdout=subprocess.PIPE)
 785         gvs = p.stdout.read()
 786         _git_wait('git --version', p)
 787         m = re.match(r'git version (\S+.\S+)', gvs)
 788         if not m:
 789             raise GitError('git --version weird output: %r' % gvs)
 790         _ver = tuple(m.group(1).split('.'))
 791     needed = ('1','5', '3', '1')
 792     if _ver < needed:
 793         raise GitError('git version %s or higher is required; you have %s'
 794                        % ('.'.join(needed), '.'.join(_ver)))
 795     return _ver
 796
 797
 798 def _git_wait(cmd, p):
 799     rv = p.wait()
 800     if rv != 0:
 801         raise GitError('%s returned %d' % (cmd, rv))
 802
 803
 804 def _git_capture(argv):
 805     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 806     r = p.stdout.read()
 807     _git_wait(repr(argv), p)
 808     return r
 809
 810
 811 class _AbortableIter:
 812     def __init__(self, it, onabort = None):
 813         self.it = it
 814         self.onabort = onabort
 815         self.done = None
 816
 817     def __iter__(self):
 818         return self
 819
 820     def next(self):
 821         try:
 822             return self.it.next()
 823         except StopIteration, e:
 824             self.done = True
 825             raise
 826         except:
 827             self.abort()
 828             raise
 829
 830     def abort(self):
 831         """Abort iteration and call the abortion callback, if needed."""
 832         if not self.done:
 833             self.done = True
 834             if self.onabort:
 835                 self.onabort()
 836
 837     def __del__(self):
 838         self.abort()
 839
 840
 841 _ver_warned = 0
 842 class CatPipe:
 843     """Link to 'git cat-file' that is used to retrieve blob data."""
 844     def __init__(self):
 845         global _ver_warned
 846         wanted = ('1','5','6')
 847         if ver() < wanted:
 848             if not _ver_warned:
 849                 log('warning: git version < %s; bup will be slow.\n'
 850                     % '.'.join(wanted))
 851                 _ver_warned = 1
 852             self.get = self._slow_get
 853         else:
 854             self.p = self.inprogress = None
 855             self.get = self._fast_get
 856
 857     def _abort(self):
 858         if self.p:
 859             self.p.stdout.close()
 860             self.p.stdin.close()
 861         self.p = None
 862         self.inprogress = None
 863
 864     def _restart(self):
 865         self._abort()
 866         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 867                                   stdin=subprocess.PIPE,
 868                                   stdout=subprocess.PIPE,
 869                                   close_fds = True,
 870                                   preexec_fn = _gitenv)
 871
 872     def _fast_get(self, id):
 873         if not self.p or self.p.poll() != None:
 874             self._restart()
 875         assert(self.p)
 876         assert(self.p.poll() == None)
 877         if self.inprogress:
 878             log('_fast_get: opening %r while %r is open'
 879                 % (id, self.inprogress))
 880         assert(not self.inprogress)
 881         assert(id.find('\n') < 0)
 882         assert(id.find('\r') < 0)
 883         assert(not id.startswith('-'))
 884         self.inprogress = id
 885         self.p.stdin.write('%s\n' % id)
 886         hdr = self.p.stdout.readline()
 887         if hdr.endswith(' missing\n'):
 888             self.inprogress = None
 889             raise KeyError('blob %r is missing' % id)
 890         spl = hdr.split(' ')
 891         if len(spl) != 3 or len(spl[0]) != 40:
 892             raise GitError('expected blob, got %r' % spl)
 893         (hex, type, size) = spl
 894
 895         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 896                            onabort = self._abort)
 897         try:
 898             yield type
 899             for blob in it:
 900                 yield blob
 901             assert(self.p.stdout.readline() == '\n')
 902             self.inprogress = None
 903         except Exception, e:
 904             it.abort()
 905             raise
 906
 907     def _slow_get(self, id):
 908         assert(id.find('\n') < 0)
 909         assert(id.find('\r') < 0)
 910         assert(id[0] != '-')
 911         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 912         yield type
 913
 914         p = subprocess.Popen(['git', 'cat-file', type, id],
 915                              stdout=subprocess.PIPE,
 916                              preexec_fn = _gitenv)
 917         for blob in chunkyreader(p.stdout):
 918             yield blob
 919         _git_wait('git cat-file', p)
 920
 921     def _join(self, it):
 922         type = it.next()
 923         if type == 'blob':
 924             for blob in it:
 925                 yield blob
 926         elif type == 'tree':
 927             treefile = ''.join(it)
 928             for (mode, name, sha) in treeparse(treefile):
 929                 for blob in self.join(sha.encode('hex')):
 930                     yield blob
 931         elif type == 'commit':
 932             treeline = ''.join(it).split('\n')[0]
 933             assert(treeline.startswith('tree '))
 934             for blob in self.join(treeline[5:]):
 935                 yield blob
 936         else:
 937             raise GitError('invalid object type %r: expected blob/tree/commit'
 938                            % type)
 939
 940     def join(self, id):
 941         """Generate a list of the content of all blobs that can be reached
 942         from an object.  The hash given in 'id' must point to a blob, a tree
 943         or a commit. The content of all blobs that can be seen from trees or
 944         commits will be added to the list.
 945         """
 946         try:
 947             for d in self._join(self.get(id)):
 948                 yield d
 949         except StopIteration:
 950             log('booger!\n')