lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 MIDX_VERSION = 2
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 home_repodir = os.path.expanduser('~/.bup')
  15 repodir = None
  16
  17 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  19
  20 _total_searches = 0
  21 _total_steps = 0
  22
  23
  24 class GitError(Exception):
  25     pass
  26
  27
  28 def repo(sub = ''):
  29     """Get the path to the git repository or one of its subdirectories."""
  30     global repodir
  31     if not repodir:
  32         raise GitError('You should call check_repo_or_die()')
  33
  34     # If there's a .git subdirectory, then the actual repo is in there.
  35     gd = os.path.join(repodir, '.git')
  36     if os.path.exists(gd):
  37         repodir = gd
  38
  39     return os.path.join(repodir, sub)
  40
  41
  42 def auto_midx(objdir):
  43     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  44     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  45     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  46     if rv:
  47         add_error('%r: returned %d' % (args, rv))
  48
  49
  50 def mangle_name(name, mode, gitmode):
  51     """Mangle a file name to present an abstract name for segmented files.
  52     Mangled file names will have the ".bup" extension added to them. If a
  53     file's name already ends with ".bup", a ".bupl" extension is added to
  54     disambiguate normal files from semgmented ones.
  55     """
  56     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  57         return name + '.bup'
  58     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  59         return name + '.bupl'
  60     else:
  61         return name
  62
  63
  64 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  65 def demangle_name(name):
  66     """Remove name mangling from a file name, if necessary.
  67
  68     The return value is a tuple (demangled_filename,mode), where mode is one of
  69     the following:
  70
  71     * BUP_NORMAL  : files that should be read as-is from the repository
  72     * BUP_CHUNKED : files that were chunked and need to be assembled
  73
  74     For more information on the name mangling algorythm, see mangle_name()
  75     """
  76     if name.endswith('.bupl'):
  77         return (name[:-5], BUP_NORMAL)
  78     elif name.endswith('.bup'):
  79         return (name[:-4], BUP_CHUNKED)
  80     else:
  81         return (name, BUP_NORMAL)
  82
  83
  84 def _encode_packobj(type, content):
  85     szout = ''
  86     sz = len(content)
  87     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  88     sz >>= 4
  89     while 1:
  90         if sz: szbits |= 0x80
  91         szout += chr(szbits)
  92         if not sz:
  93             break
  94         szbits = sz & 0x7f
  95         sz >>= 7
  96     z = zlib.compressobj(1)
  97     yield szout
  98     yield z.compress(content)
  99     yield z.flush()
 100
 101
 102 def _encode_looseobj(type, content):
 103     z = zlib.compressobj(1)
 104     yield z.compress('%s %d\0' % (type, len(content)))
 105     yield z.compress(content)
 106     yield z.flush()
 107
 108
 109 def _decode_looseobj(buf):
 110     assert(buf);
 111     s = zlib.decompress(buf)
 112     i = s.find('\0')
 113     assert(i > 0)
 114     l = s[:i].split(' ')
 115     type = l[0]
 116     sz = int(l[1])
 117     content = s[i+1:]
 118     assert(type in _typemap)
 119     assert(sz == len(content))
 120     return (type, content)
 121
 122
 123 def _decode_packobj(buf):
 124     assert(buf)
 125     c = ord(buf[0])
 126     type = _typermap[(c & 0x70) >> 4]
 127     sz = c & 0x0f
 128     shift = 4
 129     i = 0
 130     while c & 0x80:
 131         i += 1
 132         c = ord(buf[i])
 133         sz |= (c & 0x7f) << shift
 134         shift += 7
 135         if not (c & 0x80):
 136             break
 137     return (type, zlib.decompress(buf[i+1:]))
 138
 139
 140 class PackIdx:
 141     """Object representation of a Git pack index file."""
 142     def __init__(self, filename):
 143         self.name = filename
 144         self.idxnames = [self.name]
 145         self.map = mmap_read(open(filename))
 146         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 147         self.fanout = list(struct.unpack('!256I',
 148                                          str(buffer(self.map, 8, 256*4))))
 149         self.fanout.append(0)  # entry "-1"
 150         nsha = self.fanout[255]
 151         self.ofstable = buffer(self.map,
 152                                8 + 256*4 + nsha*20 + nsha*4,
 153                                nsha*4)
 154         self.ofs64table = buffer(self.map,
 155                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 156
 157     def _ofs_from_idx(self, idx):
 158         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 159         if ofs & 0x80000000:
 160             idx64 = ofs & 0x7fffffff
 161             ofs = struct.unpack('!I',
 162                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 163         return ofs
 164
 165     def _idx_from_hash(self, hash):
 166         global _total_searches, _total_steps
 167         _total_searches += 1
 168         assert(len(hash) == 20)
 169         b1 = ord(hash[0])
 170         start = self.fanout[b1-1] # range -1..254
 171         end = self.fanout[b1] # range 0..255
 172         buf = buffer(self.map, 8 + 256*4, end*20)
 173         want = str(hash)
 174         _total_steps += 1  # lookup table is a step
 175         while start < end:
 176             _total_steps += 1
 177             mid = start + (end-start)/2
 178             v = str(buf[mid*20:(mid+1)*20])
 179             if v < want:
 180                 start = mid+1
 181             elif v > want:
 182                 end = mid
 183             else: # got it!
 184                 return mid
 185         return None
 186
 187     def find_offset(self, hash):
 188         """Get the offset of an object inside the index file."""
 189         idx = self._idx_from_hash(hash)
 190         if idx != None:
 191             return self._ofs_from_idx(idx)
 192         return None
 193
 194     def exists(self, hash):
 195         """Return nonempty if the object exists in this index."""
 196         return hash and (self._idx_from_hash(hash) != None) and True or None
 197
 198     def __iter__(self):
 199         for i in xrange(self.fanout[255]):
 200             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 201
 202     def __len__(self):
 203         return int(self.fanout[255])
 204
 205
 206 extract_bits = _helpers.extract_bits
 207
 208
 209 class PackMidx:
 210     """Wrapper which contains data from multiple index files.
 211     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 212     and make it possible for bup to expand Git's indexing capabilities to vast
 213     amounts of files.
 214     """
 215     def __init__(self, filename):
 216         self.name = filename
 217         self.force_keep = False
 218         assert(filename.endswith('.midx'))
 219         self.map = mmap_read(open(filename))
 220         if str(self.map[0:4]) != 'MIDX':
 221             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 222             self.force_keep = True
 223             return self._init_failed()
 224         ver = struct.unpack('!I', self.map[4:8])[0]
 225         if ver < MIDX_VERSION:
 226             log('Warning: ignoring old-style (v%d) midx %r\n'
 227                 % (ver, filename))
 228             self.force_keep = False  # old stuff is boring
 229             return self._init_failed()
 230         if ver > MIDX_VERSION:
 231             log('Warning: ignoring too-new (v%d) midx %r\n'
 232                 % (ver, filename))
 233             self.force_keep = True  # new stuff is exciting
 234             return self._init_failed()
 235
 236         self.bits = _helpers.firstword(self.map[8:12])
 237         self.entries = 2**self.bits
 238         self.fanout = buffer(self.map, 12, self.entries*4)
 239         shaofs = 12 + self.entries*4
 240         nsha = self._fanget(self.entries-1)
 241         self.shalist = buffer(self.map, shaofs, nsha*20)
 242         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 243
 244     def _init_failed(self):
 245         self.bits = 0
 246         self.entries = 1
 247         self.fanout = buffer('\0\0\0\0')
 248         self.shalist = buffer('\0'*20)
 249         self.idxnames = []
 250
 251     def _fanget(self, i):
 252         start = i*4
 253         s = self.fanout[start:start+4]
 254         return _helpers.firstword(s)
 255
 256     def _get(self, i):
 257         return str(self.shalist[i*20:(i+1)*20])
 258
 259     def exists(self, hash):
 260         """Return nonempty if the object exists in the index files."""
 261         global _total_searches, _total_steps
 262         _total_searches += 1
 263         want = str(hash)
 264         el = extract_bits(want, self.bits)
 265         if el:
 266             start = self._fanget(el-1)
 267             startv = el << (32-self.bits)
 268         else:
 269             start = 0
 270             startv = 0
 271         end = self._fanget(el)
 272         endv = (el+1) << (32-self.bits)
 273         _total_steps += 1   # lookup table is a step
 274         hashv = _helpers.firstword(hash)
 275         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 276         while start < end:
 277             _total_steps += 1
 278             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 279             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 280             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 281             v = self._get(mid)
 282             #print '    %08x' % self._num(v)
 283             if v < want:
 284                 start = mid+1
 285                 startv = _helpers.firstword(v)
 286             elif v > want:
 287                 end = mid
 288                 endv = _helpers.firstword(v)
 289             else: # got it!
 290                 return True
 291         return None
 292
 293     def __iter__(self):
 294         for i in xrange(self._fanget(self.entries-1)):
 295             yield buffer(self.shalist, i*20, 20)
 296
 297     def __len__(self):
 298         return int(self._fanget(self.entries-1))
 299
 300
 301 _mpi_count = 0
 302 class PackIdxList:
 303     def __init__(self, dir):
 304         global _mpi_count
 305         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 306         _mpi_count += 1
 307         self.dir = dir
 308         self.also = {}
 309         self.packs = []
 310         self.refresh()
 311
 312     def __del__(self):
 313         global _mpi_count
 314         _mpi_count -= 1
 315         assert(_mpi_count == 0)
 316
 317     def __iter__(self):
 318         return iter(idxmerge(self.packs))
 319
 320     def __len__(self):
 321         return sum(len(pack) for pack in self.packs)
 322
 323     def exists(self, hash):
 324         """Return nonempty if the object exists in the index files."""
 325         global _total_searches
 326         _total_searches += 1
 327         if hash in self.also:
 328             return True
 329         for i in range(len(self.packs)):
 330             p = self.packs[i]
 331             _total_searches -= 1  # will be incremented by sub-pack
 332             if p.exists(hash):
 333                 # reorder so most recently used packs are searched first
 334                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 335                 return p.name
 336         return None
 337
 338     def refresh(self, skip_midx = False):
 339         """Refresh the index list.
 340         This method verifies if .midx files were superseded (e.g. all of its
 341         contents are in another, bigger .midx file) and removes the superseded
 342         files.
 343
 344         If skip_midx is True, all work on .midx files will be skipped and .midx
 345         files will be removed from the list.
 346
 347         The module-global variable 'ignore_midx' can force this function to
 348         always act as if skip_midx was True.
 349         """
 350         skip_midx = skip_midx or ignore_midx
 351         d = dict((p.name, p) for p in self.packs
 352                  if not skip_midx or not isinstance(p, PackMidx))
 353         if os.path.exists(self.dir):
 354             if not skip_midx:
 355                 midxl = []
 356                 for ix in self.packs:
 357                     if isinstance(ix, PackMidx):
 358                         for name in ix.idxnames:
 359                             d[os.path.join(self.dir, name)] = ix
 360                 for f in os.listdir(self.dir):
 361                     full = os.path.join(self.dir, f)
 362                     if f.endswith('.midx') and not d.get(full):
 363                         mx = PackMidx(full)
 364                         (mxd, mxf) = os.path.split(mx.name)
 365                         broken = 0
 366                         for n in mx.idxnames:
 367                             if not os.path.exists(os.path.join(mxd, n)):
 368                                 log(('warning: index %s missing\n' +
 369                                     '  used by %s\n') % (n, mxf))
 370                                 broken += 1
 371                         if not broken:
 372                             midxl.append(mx)
 373                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 374                 for ix in midxl:
 375                     any = 0
 376                     for sub in ix.idxnames:
 377                         found = d.get(os.path.join(self.dir, sub))
 378                         if not found or isinstance(found, PackIdx):
 379                             # doesn't exist, or exists but not in a midx
 380                             d[ix.name] = ix
 381                             for name in ix.idxnames:
 382                                 d[os.path.join(self.dir, name)] = ix
 383                             any += 1
 384                             break
 385                     if not any and not ix.force_keep:
 386                         debug1('midx: removing redundant: %s\n'
 387                                % os.path.basename(ix.name))
 388                         unlink(ix.name)
 389             for f in os.listdir(self.dir):
 390                 full = os.path.join(self.dir, f)
 391                 if f.endswith('.idx') and not d.get(full):
 392                     ix = PackIdx(full)
 393                     d[full] = ix
 394             self.packs = list(set(d.values()))
 395         debug1('PackIdxList: using %d index%s.\n'
 396             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 397
 398     def add(self, hash):
 399         """Insert an additional object in the list."""
 400         self.also[hash] = 1
 401
 402     def zap_also(self):
 403         """Remove all additional objects from the list."""
 404         self.also = {}
 405
 406
 407 def calc_hash(type, content):
 408     """Calculate some content's hash in the Git fashion."""
 409     header = '%s %d\0' % (type, len(content))
 410     sum = Sha1(header)
 411     sum.update(content)
 412     return sum.digest()
 413
 414
 415 def _shalist_sort_key(ent):
 416     (mode, name, id) = ent
 417     if stat.S_ISDIR(int(mode, 8)):
 418         return name + '/'
 419     else:
 420         return name
 421
 422
 423 def open_idx(filename):
 424     if filename.endswith('.idx'):
 425         return PackIdx(filename)
 426     elif filename.endswith('.midx'):
 427         return PackMidx(filename)
 428     else:
 429         raise GitError('idx filenames must end with .idx or .midx')
 430
 431
 432 def idxmerge(idxlist, final_progress=True):
 433     """Generate a list of all the objects reachable in a PackIdxList."""
 434     total = sum(len(i) for i in idxlist)
 435     iters = (iter(i) for i in idxlist)
 436     heap = [(next(it), it) for it in iters]
 437     heapq.heapify(heap)
 438     count = 0
 439     last = None
 440     while heap:
 441         if (count % 10024) == 0:
 442             progress('Reading indexes: %.2f%% (%d/%d)\r'
 443                      % (count*100.0/total, count, total))
 444         (e, it) = heap[0]
 445         if e != last:
 446             yield e
 447             last = e
 448         count += 1
 449         e = next(it)
 450         if e:
 451             heapq.heapreplace(heap, (e, it))
 452         else:
 453             heapq.heappop(heap)
 454     if final_progress:
 455         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 456
 457
 458 class PackWriter:
 459     """Writes Git objects insid a pack file."""
 460     def __init__(self, objcache_maker=None):
 461         self.count = 0
 462         self.outbytes = 0
 463         self.filename = None
 464         self.file = None
 465         self.objcache_maker = objcache_maker
 466         self.objcache = None
 467
 468     def __del__(self):
 469         self.close()
 470
 471     def _make_objcache(self):
 472         if self.objcache == None:
 473             if self.objcache_maker:
 474                 self.objcache = self.objcache_maker()
 475             else:
 476                 self.objcache = PackIdxList(repo('objects/pack'))
 477
 478     def _open(self):
 479         if not self.file:
 480             self._make_objcache()
 481             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 482             self.file = os.fdopen(fd, 'w+b')
 483             assert(name.endswith('.pack'))
 484             self.filename = name[:-5]
 485             self.file.write('PACK\0\0\0\2\0\0\0\0')
 486
 487     def _raw_write(self, datalist):
 488         self._open()
 489         f = self.file
 490         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 491         # the file never has a *partial* blob.  So let's make sure it's
 492         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 493         # to our hashsplit algorithm.)  f.write() does its own buffering,
 494         # but that's okay because we'll flush it in _end().
 495         oneblob = ''.join(datalist)
 496         f.write(oneblob)
 497         self.outbytes += len(oneblob)
 498         self.count += 1
 499
 500     def _write(self, bin, type, content):
 501         if verbose:
 502             log('>')
 503         self._raw_write(_encode_packobj(type, content))
 504         return bin
 505
 506     def breakpoint(self):
 507         """Clear byte and object counts and return the last processed id."""
 508         id = self._end()
 509         self.outbytes = self.count = 0
 510         return id
 511
 512     def write(self, type, content):
 513         """Write an object in this pack file."""
 514         return self._write(calc_hash(type, content), type, content)
 515
 516     def exists(self, id):
 517         """Return non-empty if an object is found in the object cache."""
 518         if not self.objcache:
 519             self._make_objcache()
 520         return self.objcache.exists(id)
 521
 522     def maybe_write(self, type, content):
 523         """Write an object to the pack file if not present and return its id."""
 524         bin = calc_hash(type, content)
 525         if not self.exists(bin):
 526             self._write(bin, type, content)
 527             self.objcache.add(bin)
 528         return bin
 529
 530     def new_blob(self, blob):
 531         """Create a blob object in the pack with the supplied content."""
 532         return self.maybe_write('blob', blob)
 533
 534     def new_tree(self, shalist):
 535         """Create a tree object in the pack."""
 536         shalist = sorted(shalist, key = _shalist_sort_key)
 537         l = []
 538         for (mode,name,bin) in shalist:
 539             assert(mode)
 540             assert(mode != '0')
 541             assert(mode[0] != '0')
 542             assert(name)
 543             assert(len(bin) == 20)
 544             l.append('%s %s\0%s' % (mode,name,bin))
 545         return self.maybe_write('tree', ''.join(l))
 546
 547     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 548         l = []
 549         if tree: l.append('tree %s' % tree.encode('hex'))
 550         if parent: l.append('parent %s' % parent.encode('hex'))
 551         if author: l.append('author %s %s' % (author, _git_date(adate)))
 552         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 553         l.append('')
 554         l.append(msg)
 555         return self.maybe_write('commit', '\n'.join(l))
 556
 557     def new_commit(self, parent, tree, date, msg):
 558         """Create a commit object in the pack."""
 559         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 560         commit = self._new_commit(tree, parent,
 561                                   userline, date, userline, date,
 562                                   msg)
 563         return commit
 564
 565     def abort(self):
 566         """Remove the pack file from disk."""
 567         f = self.file
 568         if f:
 569             self.file = None
 570             f.close()
 571             os.unlink(self.filename + '.pack')
 572
 573     def _end(self):
 574         f = self.file
 575         if not f: return None
 576         self.file = None
 577         self.objcache = None
 578
 579         # update object count
 580         f.seek(8)
 581         cp = struct.pack('!i', self.count)
 582         assert(len(cp) == 4)
 583         f.write(cp)
 584
 585         # calculate the pack sha1sum
 586         f.seek(0)
 587         sum = Sha1()
 588         while 1:
 589             b = f.read(65536)
 590             sum.update(b)
 591             if not b: break
 592         f.write(sum.digest())
 593
 594         f.close()
 595
 596         p = subprocess.Popen(['git', 'index-pack', '-v',
 597                               '--index-version=2',
 598                               self.filename + '.pack'],
 599                              preexec_fn = _gitenv,
 600                              stdout = subprocess.PIPE)
 601         out = p.stdout.read().strip()
 602         _git_wait('git index-pack', p)
 603         if not out:
 604             raise GitError('git index-pack produced no output')
 605         nameprefix = repo('objects/pack/%s' % out)
 606         if os.path.exists(self.filename + '.map'):
 607             os.unlink(self.filename + '.map')
 608         os.rename(self.filename + '.pack', nameprefix + '.pack')
 609         os.rename(self.filename + '.idx', nameprefix + '.idx')
 610
 611         auto_midx(repo('objects/pack'))
 612         return nameprefix
 613
 614     def close(self):
 615         """Close the pack file and move it to its definitive path."""
 616         return self._end()
 617
 618
 619 def _git_date(date):
 620     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 621
 622
 623 def _gitenv():
 624     os.environ['GIT_DIR'] = os.path.abspath(repo())
 625
 626
 627 def list_refs(refname = None):
 628     """Generate a list of tuples in the form (refname,hash).
 629     If a ref name is specified, list only this particular ref.
 630     """
 631     argv = ['git', 'show-ref', '--']
 632     if refname:
 633         argv += [refname]
 634     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 635     out = p.stdout.read().strip()
 636     rv = p.wait()  # not fatal
 637     if rv:
 638         assert(not out)
 639     if out:
 640         for d in out.split('\n'):
 641             (sha, name) = d.split(' ', 1)
 642             yield (name, sha.decode('hex'))
 643
 644
 645 def read_ref(refname):
 646     """Get the commit id of the most recent commit made on a given ref."""
 647     l = list(list_refs(refname))
 648     if l:
 649         assert(len(l) == 1)
 650         return l[0][1]
 651     else:
 652         return None
 653
 654
 655 def rev_list(ref, count=None):
 656     """Generate a list of reachable commits in reverse chronological order.
 657
 658     This generator walks through commits, from child to parent, that are
 659     reachable via the specified ref and yields a series of tuples of the form
 660     (date,hash).
 661
 662     If count is a non-zero integer, limit the number of commits to "count"
 663     objects.
 664     """
 665     assert(not ref.startswith('-'))
 666     opts = []
 667     if count:
 668         opts += ['-n', str(atoi(count))]
 669     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 670     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 671     commit = None
 672     for row in p.stdout:
 673         s = row.strip()
 674         if s.startswith('commit '):
 675             commit = s[7:].decode('hex')
 676         else:
 677             date = int(s)
 678             yield (date, commit)
 679     rv = p.wait()  # not fatal
 680     if rv:
 681         raise GitError, 'git rev-list returned error %d' % rv
 682
 683
 684 def rev_get_date(ref):
 685     """Get the date of the latest commit on the specified ref."""
 686     for (date, commit) in rev_list(ref, count=1):
 687         return date
 688     raise GitError, 'no such commit %r' % ref
 689
 690
 691 def update_ref(refname, newval, oldval):
 692     """Change the commit pointed to by a branch."""
 693     if not oldval:
 694         oldval = ''
 695     assert(refname.startswith('refs/heads/'))
 696     p = subprocess.Popen(['git', 'update-ref', refname,
 697                           newval.encode('hex'), oldval.encode('hex')],
 698                          preexec_fn = _gitenv)
 699     _git_wait('git update-ref', p)
 700
 701
 702 def guess_repo(path=None):
 703     """Set the path value in the global variable "repodir".
 704     This makes bup look for an existing bup repository, but not fail if a
 705     repository doesn't exist. Usually, if you are interacting with a bup
 706     repository, you would not be calling this function but using
 707     check_repo_or_die().
 708     """
 709     global repodir
 710     if path:
 711         repodir = path
 712     if not repodir:
 713         repodir = os.environ.get('BUP_DIR')
 714         if not repodir:
 715             repodir = os.path.expanduser('~/.bup')
 716
 717
 718 def init_repo(path=None):
 719     """Create the Git bare repository for bup in a given path."""
 720     guess_repo(path)
 721     d = repo()
 722     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 723         raise GitError('"%d" exists but is not a directory\n' % d)
 724     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 725                          preexec_fn = _gitenv)
 726     _git_wait('git init', p)
 727     # Force the index version configuration in order to ensure bup works
 728     # regardless of the version of the installed Git binary.
 729     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 730                          stdout=sys.stderr, preexec_fn = _gitenv)
 731     _git_wait('git config', p)
 732
 733
 734 def check_repo_or_die(path=None):
 735     """Make sure a bup repository exists, and abort if not.
 736     If the path to a particular repository was not specified, this function
 737     initializes the default repository automatically.
 738     """
 739     guess_repo(path)
 740     if not os.path.isdir(repo('objects/pack/.')):
 741         if repodir == home_repodir:
 742             init_repo()
 743         else:
 744             log('error: %r is not a bup/git repository\n' % repo())
 745             sys.exit(15)
 746
 747
 748 def treeparse(buf):
 749     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 750     ofs = 0
 751     while ofs < len(buf):
 752         z = buf[ofs:].find('\0')
 753         assert(z > 0)
 754         spl = buf[ofs:ofs+z].split(' ', 1)
 755         assert(len(spl) == 2)
 756         sha = buf[ofs+z+1:ofs+z+1+20]
 757         ofs += z+1+20
 758         yield (spl[0], spl[1], sha)
 759
 760
 761 _ver = None
 762 def ver():
 763     """Get Git's version and ensure a usable version is installed.
 764
 765     The returned version is formatted as an ordered tuple with each position
 766     representing a digit in the version tag. For example, the following tuple
 767     would represent version 1.6.6.9:
 768
 769         ('1', '6', '6', '9')
 770     """
 771     global _ver
 772     if not _ver:
 773         p = subprocess.Popen(['git', '--version'],
 774                              stdout=subprocess.PIPE)
 775         gvs = p.stdout.read()
 776         _git_wait('git --version', p)
 777         m = re.match(r'git version (\S+.\S+)', gvs)
 778         if not m:
 779             raise GitError('git --version weird output: %r' % gvs)
 780         _ver = tuple(m.group(1).split('.'))
 781     needed = ('1','5', '3', '1')
 782     if _ver < needed:
 783         raise GitError('git version %s or higher is required; you have %s'
 784                        % ('.'.join(needed), '.'.join(_ver)))
 785     return _ver
 786
 787
 788 def _git_wait(cmd, p):
 789     rv = p.wait()
 790     if rv != 0:
 791         raise GitError('%s returned %d' % (cmd, rv))
 792
 793
 794 def _git_capture(argv):
 795     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 796     r = p.stdout.read()
 797     _git_wait(repr(argv), p)
 798     return r
 799
 800
 801 class _AbortableIter:
 802     def __init__(self, it, onabort = None):
 803         self.it = it
 804         self.onabort = onabort
 805         self.done = None
 806
 807     def __iter__(self):
 808         return self
 809
 810     def next(self):
 811         try:
 812             return self.it.next()
 813         except StopIteration, e:
 814             self.done = True
 815             raise
 816         except:
 817             self.abort()
 818             raise
 819
 820     def abort(self):
 821         """Abort iteration and call the abortion callback, if needed."""
 822         if not self.done:
 823             self.done = True
 824             if self.onabort:
 825                 self.onabort()
 826
 827     def __del__(self):
 828         self.abort()
 829
 830
 831 _ver_warned = 0
 832 class CatPipe:
 833     """Link to 'git cat-file' that is used to retrieve blob data."""
 834     def __init__(self):
 835         global _ver_warned
 836         wanted = ('1','5','6')
 837         if ver() < wanted:
 838             if not _ver_warned:
 839                 log('warning: git version < %s; bup will be slow.\n'
 840                     % '.'.join(wanted))
 841                 _ver_warned = 1
 842             self.get = self._slow_get
 843         else:
 844             self.p = self.inprogress = None
 845             self.get = self._fast_get
 846
 847     def _abort(self):
 848         if self.p:
 849             self.p.stdout.close()
 850             self.p.stdin.close()
 851         self.p = None
 852         self.inprogress = None
 853
 854     def _restart(self):
 855         self._abort()
 856         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 857                                   stdin=subprocess.PIPE,
 858                                   stdout=subprocess.PIPE,
 859                                   close_fds = True,
 860                                   preexec_fn = _gitenv)
 861
 862     def _fast_get(self, id):
 863         if not self.p or self.p.poll() != None:
 864             self._restart()
 865         assert(self.p)
 866         assert(self.p.poll() == None)
 867         if self.inprogress:
 868             log('_fast_get: opening %r while %r is open'
 869                 % (id, self.inprogress))
 870         assert(not self.inprogress)
 871         assert(id.find('\n') < 0)
 872         assert(id.find('\r') < 0)
 873         assert(not id.startswith('-'))
 874         self.inprogress = id
 875         self.p.stdin.write('%s\n' % id)
 876         hdr = self.p.stdout.readline()
 877         if hdr.endswith(' missing\n'):
 878             self.inprogress = None
 879             raise KeyError('blob %r is missing' % id)
 880         spl = hdr.split(' ')
 881         if len(spl) != 3 or len(spl[0]) != 40:
 882             raise GitError('expected blob, got %r' % spl)
 883         (hex, type, size) = spl
 884
 885         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 886                            onabort = self._abort)
 887         try:
 888             yield type
 889             for blob in it:
 890                 yield blob
 891             assert(self.p.stdout.readline() == '\n')
 892             self.inprogress = None
 893         except Exception, e:
 894             it.abort()
 895             raise
 896
 897     def _slow_get(self, id):
 898         assert(id.find('\n') < 0)
 899         assert(id.find('\r') < 0)
 900         assert(id[0] != '-')
 901         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 902         yield type
 903
 904         p = subprocess.Popen(['git', 'cat-file', type, id],
 905                              stdout=subprocess.PIPE,
 906                              preexec_fn = _gitenv)
 907         for blob in chunkyreader(p.stdout):
 908             yield blob
 909         _git_wait('git cat-file', p)
 910
 911     def _join(self, it):
 912         type = it.next()
 913         if type == 'blob':
 914             for blob in it:
 915                 yield blob
 916         elif type == 'tree':
 917             treefile = ''.join(it)
 918             for (mode, name, sha) in treeparse(treefile):
 919                 for blob in self.join(sha.encode('hex')):
 920                     yield blob
 921         elif type == 'commit':
 922             treeline = ''.join(it).split('\n')[0]
 923             assert(treeline.startswith('tree '))
 924             for blob in self.join(treeline[5:]):
 925                 yield blob
 926         else:
 927             raise GitError('invalid object type %r: expected blob/tree/commit'
 928                            % type)
 929
 930     def join(self, id):
 931         """Generate a list of the content of all blobs that can be reached
 932         from an object.  The hash given in 'id' must point to a blob, a tree
 933         or a commit. The content of all blobs that can be seen from trees or
 934         commits will be added to the list.
 935         """
 936         try:
 937             for d in self._join(self.get(id)):
 938                 yield d
 939         except StopIteration:
 940             log('booger!\n')