lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
   6 from bup.helpers import *
   7 from bup import _helpers, path, midx, bloom
   8
   9 max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
  10 max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
  11 SEEK_END=2  # os.SEEK_END is not defined in python 2.4
  12
  13 verbose = 0
  14 ignore_midx = 0
  15 home_repodir = os.path.expanduser('~/.bup')
  16 repodir = None
  17
  18 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  19 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  20
  21 _total_searches = 0
  22 _total_steps = 0
  23
  24
  25 class GitError(Exception):
  26     pass
  27
  28
  29 def repo(sub = ''):
  30     """Get the path to the git repository or one of its subdirectories."""
  31     global repodir
  32     if not repodir:
  33         raise GitError('You should call check_repo_or_die()')
  34
  35     # If there's a .git subdirectory, then the actual repo is in there.
  36     gd = os.path.join(repodir, '.git')
  37     if os.path.exists(gd):
  38         repodir = gd
  39
  40     return os.path.join(repodir, sub)
  41
  42
  43 def shorten_hash(s):
  44     return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
  45                   r'\1\2*\3', s)
  46
  47
  48 def repo_rel(path):
  49     full = os.path.abspath(path)
  50     fullrepo = os.path.abspath(repo(''))
  51     if not fullrepo.endswith('/'):
  52         fullrepo += '/'
  53     if full.startswith(fullrepo):
  54         path = full[len(fullrepo):]
  55     if path.startswith('index-cache/'):
  56         path = path[len('index-cache/'):]
  57     return shorten_hash(path)
  58
  59
  60 def all_packdirs():
  61     paths = [repo('objects/pack')]
  62     paths += glob.glob(repo('index-cache/*/.'))
  63     return paths
  64
  65
  66 def auto_midx(objdir):
  67     args = [path.exe(), 'midx', '--auto', '--dir', objdir]
  68     try:
  69         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  70     except OSError, e:
  71         # make sure 'args' gets printed to help with debugging
  72         add_error('%r: exception: %s' % (args, e))
  73         raise
  74     if rv:
  75         add_error('%r: returned %d' % (args, rv))
  76
  77     args = [path.exe(), 'bloom', '--dir', objdir]
  78     try:
  79         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  80     except OSError, e:
  81         # make sure 'args' gets printed to help with debugging
  82         add_error('%r: exception: %s' % (args, e))
  83         raise
  84     if rv:
  85         add_error('%r: returned %d' % (args, rv))
  86
  87
  88 def mangle_name(name, mode, gitmode):
  89     """Mangle a file name to present an abstract name for segmented files.
  90     Mangled file names will have the ".bup" extension added to them. If a
  91     file's name already ends with ".bup", a ".bupl" extension is added to
  92     disambiguate normal files from semgmented ones.
  93     """
  94     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  95         return name + '.bup'
  96     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  97         return name + '.bupl'
  98     else:
  99         return name
 100
 101
 102 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
 103 def demangle_name(name):
 104     """Remove name mangling from a file name, if necessary.
 105
 106     The return value is a tuple (demangled_filename,mode), where mode is one of
 107     the following:
 108
 109     * BUP_NORMAL  : files that should be read as-is from the repository
 110     * BUP_CHUNKED : files that were chunked and need to be assembled
 111
 112     For more information on the name mangling algorythm, see mangle_name()
 113     """
 114     if name.endswith('.bupl'):
 115         return (name[:-5], BUP_NORMAL)
 116     elif name.endswith('.bup'):
 117         return (name[:-4], BUP_CHUNKED)
 118     else:
 119         return (name, BUP_NORMAL)
 120
 121
 122 def _encode_packobj(type, content):
 123     szout = ''
 124     sz = len(content)
 125     szbits = (sz & 0x0f) | (_typemap[type]<<4)
 126     sz >>= 4
 127     while 1:
 128         if sz: szbits |= 0x80
 129         szout += chr(szbits)
 130         if not sz:
 131             break
 132         szbits = sz & 0x7f
 133         sz >>= 7
 134     z = zlib.compressobj(1)
 135     yield szout
 136     yield z.compress(content)
 137     yield z.flush()
 138
 139
 140 def _encode_looseobj(type, content):
 141     z = zlib.compressobj(1)
 142     yield z.compress('%s %d\0' % (type, len(content)))
 143     yield z.compress(content)
 144     yield z.flush()
 145
 146
 147 def _decode_looseobj(buf):
 148     assert(buf);
 149     s = zlib.decompress(buf)
 150     i = s.find('\0')
 151     assert(i > 0)
 152     l = s[:i].split(' ')
 153     type = l[0]
 154     sz = int(l[1])
 155     content = s[i+1:]
 156     assert(type in _typemap)
 157     assert(sz == len(content))
 158     return (type, content)
 159
 160
 161 def _decode_packobj(buf):
 162     assert(buf)
 163     c = ord(buf[0])
 164     type = _typermap[(c & 0x70) >> 4]
 165     sz = c & 0x0f
 166     shift = 4
 167     i = 0
 168     while c & 0x80:
 169         i += 1
 170         c = ord(buf[i])
 171         sz |= (c & 0x7f) << shift
 172         shift += 7
 173         if not (c & 0x80):
 174             break
 175     return (type, zlib.decompress(buf[i+1:]))
 176
 177
 178 class PackIdx:
 179     def __init__(self):
 180         assert(0)
 181
 182     def find_offset(self, hash):
 183         """Get the offset of an object inside the index file."""
 184         idx = self._idx_from_hash(hash)
 185         if idx != None:
 186             return self._ofs_from_idx(idx)
 187         return None
 188
 189     def exists(self, hash, want_source=False):
 190         """Return nonempty if the object exists in this index."""
 191         if hash and (self._idx_from_hash(hash) != None):
 192             return want_source and os.path.basename(self.name) or True
 193         return None
 194
 195     def __len__(self):
 196         return int(self.fanout[255])
 197
 198     def _idx_from_hash(self, hash):
 199         global _total_searches, _total_steps
 200         _total_searches += 1
 201         assert(len(hash) == 20)
 202         b1 = ord(hash[0])
 203         start = self.fanout[b1-1] # range -1..254
 204         end = self.fanout[b1] # range 0..255
 205         want = str(hash)
 206         _total_steps += 1  # lookup table is a step
 207         while start < end:
 208             _total_steps += 1
 209             mid = start + (end-start)/2
 210             v = self._idx_to_hash(mid)
 211             if v < want:
 212                 start = mid+1
 213             elif v > want:
 214                 end = mid
 215             else: # got it!
 216                 return mid
 217         return None
 218
 219
 220 class PackIdxV1(PackIdx):
 221     """Object representation of a Git pack index (version 1) file."""
 222     def __init__(self, filename, f):
 223         self.name = filename
 224         self.idxnames = [self.name]
 225         self.map = mmap_read(f)
 226         self.fanout = list(struct.unpack('!256I',
 227                                          str(buffer(self.map, 0, 256*4))))
 228         self.fanout.append(0)  # entry "-1"
 229         nsha = self.fanout[255]
 230         self.sha_ofs = 256*4
 231         self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
 232
 233     def _ofs_from_idx(self, idx):
 234         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 235
 236     def _idx_to_hash(self, idx):
 237         return str(self.shatable[idx*24+4 : idx*24+24])
 238
 239     def __iter__(self):
 240         for i in xrange(self.fanout[255]):
 241             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 242
 243
 244 class PackIdxV2(PackIdx):
 245     """Object representation of a Git pack index (version 2) file."""
 246     def __init__(self, filename, f):
 247         self.name = filename
 248         self.idxnames = [self.name]
 249         self.map = mmap_read(f)
 250         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 251         self.fanout = list(struct.unpack('!256I',
 252                                          str(buffer(self.map, 8, 256*4))))
 253         self.fanout.append(0)  # entry "-1"
 254         nsha = self.fanout[255]
 255         self.sha_ofs = 8 + 256*4
 256         self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
 257         self.ofstable = buffer(self.map,
 258                                self.sha_ofs + nsha*20 + nsha*4,
 259                                nsha*4)
 260         self.ofs64table = buffer(self.map,
 261                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 262
 263     def _ofs_from_idx(self, idx):
 264         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 265         if ofs & 0x80000000:
 266             idx64 = ofs & 0x7fffffff
 267             ofs = struct.unpack('!Q',
 268                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 269         return ofs
 270
 271     def _idx_to_hash(self, idx):
 272         return str(self.shatable[idx*20:(idx+1)*20])
 273
 274     def __iter__(self):
 275         for i in xrange(self.fanout[255]):
 276             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 277
 278
 279 _mpi_count = 0
 280 class PackIdxList:
 281     def __init__(self, dir):
 282         global _mpi_count
 283         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 284         _mpi_count += 1
 285         self.dir = dir
 286         self.also = set()
 287         self.packs = []
 288         self.do_bloom = False
 289         self.bloom = None
 290         self.refresh()
 291
 292     def __del__(self):
 293         global _mpi_count
 294         _mpi_count -= 1
 295         assert(_mpi_count == 0)
 296
 297     def __iter__(self):
 298         return iter(idxmerge(self.packs))
 299
 300     def __len__(self):
 301         return sum(len(pack) for pack in self.packs)
 302
 303     def exists(self, hash, want_source=False):
 304         """Return nonempty if the object exists in the index files."""
 305         global _total_searches
 306         _total_searches += 1
 307         if hash in self.also:
 308             return True
 309         if self.do_bloom and self.bloom:
 310             if self.bloom.exists(hash):
 311                 self.do_bloom = False
 312             else:
 313                 _total_searches -= 1  # was counted by bloom
 314                 return None
 315         for i in xrange(len(self.packs)):
 316             p = self.packs[i]
 317             _total_searches -= 1  # will be incremented by sub-pack
 318             ix = p.exists(hash, want_source=want_source)
 319             if ix:
 320                 # reorder so most recently used packs are searched first
 321                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 322                 return ix
 323         self.do_bloom = True
 324         return None
 325
 326     def refresh(self, skip_midx = False):
 327         """Refresh the index list.
 328         This method verifies if .midx files were superseded (e.g. all of its
 329         contents are in another, bigger .midx file) and removes the superseded
 330         files.
 331
 332         If skip_midx is True, all work on .midx files will be skipped and .midx
 333         files will be removed from the list.
 334
 335         The module-global variable 'ignore_midx' can force this function to
 336         always act as if skip_midx was True.
 337         """
 338         self.bloom = None # Always reopen the bloom as it may have been relaced
 339         self.do_bloom = False
 340         skip_midx = skip_midx or ignore_midx
 341         d = dict((p.name, p) for p in self.packs
 342                  if not skip_midx or not isinstance(p, midx.PackMidx))
 343         if os.path.exists(self.dir):
 344             if not skip_midx:
 345                 midxl = []
 346                 for ix in self.packs:
 347                     if isinstance(ix, midx.PackMidx):
 348                         for name in ix.idxnames:
 349                             d[os.path.join(self.dir, name)] = ix
 350                 for full in glob.glob(os.path.join(self.dir,'*.midx')):
 351                     if not d.get(full):
 352                         mx = midx.PackMidx(full)
 353                         (mxd, mxf) = os.path.split(mx.name)
 354                         broken = False
 355                         for n in mx.idxnames:
 356                             if not os.path.exists(os.path.join(mxd, n)):
 357                                 log(('warning: index %s missing\n' +
 358                                     '  used by %s\n') % (n, mxf))
 359                                 broken = True
 360                         if broken:
 361                             del mx
 362                             unlink(full)
 363                         else:
 364                             midxl.append(mx)
 365                 midxl.sort(key=lambda ix:
 366                            (-len(ix), -os.stat(ix.name).st_mtime))
 367                 for ix in midxl:
 368                     any_needed = False
 369                     for sub in ix.idxnames:
 370                         found = d.get(os.path.join(self.dir, sub))
 371                         if not found or isinstance(found, PackIdx):
 372                             # doesn't exist, or exists but not in a midx
 373                             any_needed = True
 374                             break
 375                     if any_needed:
 376                         d[ix.name] = ix
 377                         for name in ix.idxnames:
 378                             d[os.path.join(self.dir, name)] = ix
 379                     elif not ix.force_keep:
 380                         debug1('midx: removing redundant: %s\n'
 381                                % os.path.basename(ix.name))
 382                         unlink(ix.name)
 383             for full in glob.glob(os.path.join(self.dir,'*.idx')):
 384                 if not d.get(full):
 385                     try:
 386                         ix = open_idx(full)
 387                     except GitError, e:
 388                         add_error(e)
 389                         continue
 390                     d[full] = ix
 391             bfull = os.path.join(self.dir, 'bup.bloom')
 392             if self.bloom is None and os.path.exists(bfull):
 393                 self.bloom = bloom.ShaBloom(bfull)
 394             self.packs = list(set(d.values()))
 395             self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
 396             if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
 397                 self.do_bloom = True
 398             else:
 399                 self.bloom = None
 400         debug1('PackIdxList: using %d index%s.\n'
 401             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 402
 403     def add(self, hash):
 404         """Insert an additional object in the list."""
 405         self.also.add(hash)
 406
 407
 408 def calc_hash(type, content):
 409     """Calculate some content's hash in the Git fashion."""
 410     header = '%s %d\0' % (type, len(content))
 411     sum = Sha1(header)
 412     sum.update(content)
 413     return sum.digest()
 414
 415
 416 def _shalist_sort_key(ent):
 417     (mode, name, id) = ent
 418     if stat.S_ISDIR(int(mode, 8)):
 419         return name + '/'
 420     else:
 421         return name
 422
 423
 424 def open_idx(filename):
 425     if filename.endswith('.idx'):
 426         f = open(filename, 'rb')
 427         header = f.read(8)
 428         if header[0:4] == '\377tOc':
 429             version = struct.unpack('!I', header[4:8])[0]
 430             if version == 2:
 431                 return PackIdxV2(filename, f)
 432             else:
 433                 raise GitError('%s: expected idx file version 2, got %d'
 434                                % (filename, version))
 435         elif len(header) == 8 and header[0:4] < '\377tOc':
 436             return PackIdxV1(filename, f)
 437         else:
 438             raise GitError('%s: unrecognized idx file header' % filename)
 439     elif filename.endswith('.midx'):
 440         return midx.PackMidx(filename)
 441     else:
 442         raise GitError('idx filenames must end with .idx or .midx')
 443
 444
 445 def idxmerge(idxlist, final_progress=True):
 446     """Generate a list of all the objects reachable in a PackIdxList."""
 447     def pfunc(count, total):
 448         qprogress('Reading indexes: %.2f%% (%d/%d)\r'
 449                   % (count*100.0/total, count, total))
 450     def pfinal(count, total):
 451         if final_progress:
 452             progress('Reading indexes: %.2f%% (%d/%d), done.\n'
 453                      % (100, total, total))
 454     return merge_iter(idxlist, 10024, pfunc, pfinal)
 455
 456
 457 def _make_objcache():
 458     return PackIdxList(repo('objects/pack'))
 459
 460 class PackWriter:
 461     """Writes Git objects inside a pack file."""
 462     def __init__(self, objcache_maker=_make_objcache):
 463         self.count = 0
 464         self.outbytes = 0
 465         self.filename = None
 466         self.file = None
 467         self.idx = None
 468         self.objcache_maker = objcache_maker
 469         self.objcache = None
 470
 471     def __del__(self):
 472         self.close()
 473
 474     def _open(self):
 475         if not self.file:
 476             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 477             self.file = os.fdopen(fd, 'w+b')
 478             assert(name.endswith('.pack'))
 479             self.filename = name[:-5]
 480             self.file.write('PACK\0\0\0\2\0\0\0\0')
 481             self.idx = list(list() for i in xrange(256))
 482
 483     def _raw_write(self, datalist, sha):
 484         self._open()
 485         f = self.file
 486         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 487         # the file never has a *partial* blob.  So let's make sure it's
 488         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 489         # to our hashsplit algorithm.)  f.write() does its own buffering,
 490         # but that's okay because we'll flush it in _end().
 491         oneblob = ''.join(datalist)
 492         try:
 493             f.write(oneblob)
 494         except IOError, e:
 495             raise GitError, e, sys.exc_info()[2]
 496         nw = len(oneblob)
 497         crc = zlib.crc32(oneblob) & 0xffffffff
 498         self._update_idx(sha, crc, nw)
 499         self.outbytes += nw
 500         self.count += 1
 501         return nw, crc
 502
 503     def _update_idx(self, sha, crc, size):
 504         assert(sha)
 505         if self.idx:
 506             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 507
 508     def _write(self, sha, type, content):
 509         if verbose:
 510             log('>')
 511         if not sha:
 512             sha = calc_hash(type, content)
 513         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
 514         if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
 515             self.breakpoint()
 516         return sha
 517
 518     def breakpoint(self):
 519         """Clear byte and object counts and return the last processed id."""
 520         id = self._end()
 521         self.outbytes = self.count = 0
 522         return id
 523
 524     def _require_objcache(self):
 525         if self.objcache is None and self.objcache_maker:
 526             self.objcache = self.objcache_maker()
 527         if self.objcache is None:
 528             raise GitError(
 529                     "PackWriter not opened or can't check exists w/o objcache")
 530
 531     def exists(self, id, want_source=False):
 532         """Return non-empty if an object is found in the object cache."""
 533         self._require_objcache()
 534         return self.objcache.exists(id, want_source=want_source)
 535
 536     def maybe_write(self, type, content):
 537         """Write an object to the pack file if not present and return its id."""
 538         sha = calc_hash(type, content)
 539         if not self.exists(sha):
 540             self._write(sha, type, content)
 541             self._require_objcache()
 542             self.objcache.add(sha)
 543         return sha
 544
 545     def new_blob(self, blob):
 546         """Create a blob object in the pack with the supplied content."""
 547         return self.maybe_write('blob', blob)
 548
 549     def new_tree(self, shalist):
 550         """Create a tree object in the pack."""
 551         shalist = sorted(shalist, key = _shalist_sort_key)
 552         l = []
 553         for (mode,name,bin) in shalist:
 554             assert(mode)
 555             assert(mode != '0')
 556             assert(mode[0] != '0')
 557             assert(name)
 558             assert(len(bin) == 20)
 559             l.append('%s %s\0%s' % (mode,name,bin))
 560         return self.maybe_write('tree', ''.join(l))
 561
 562     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 563         l = []
 564         if tree: l.append('tree %s' % tree.encode('hex'))
 565         if parent: l.append('parent %s' % parent.encode('hex'))
 566         if author: l.append('author %s %s' % (author, _git_date(adate)))
 567         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 568         l.append('')
 569         l.append(msg)
 570         return self.maybe_write('commit', '\n'.join(l))
 571
 572     def new_commit(self, parent, tree, date, msg):
 573         """Create a commit object in the pack."""
 574         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 575         commit = self._new_commit(tree, parent,
 576                                   userline, date, userline, date,
 577                                   msg)
 578         return commit
 579
 580     def abort(self):
 581         """Remove the pack file from disk."""
 582         f = self.file
 583         if f:
 584             self.idx = None
 585             self.file = None
 586             f.close()
 587             os.unlink(self.filename + '.pack')
 588
 589     def _end(self, run_midx=True):
 590         f = self.file
 591         if not f: return None
 592         self.file = None
 593         self.objcache = None
 594         idx = self.idx
 595         self.idx = None
 596
 597         # update object count
 598         f.seek(8)
 599         cp = struct.pack('!i', self.count)
 600         assert(len(cp) == 4)
 601         f.write(cp)
 602
 603         # calculate the pack sha1sum
 604         f.seek(0)
 605         sum = Sha1()
 606         for b in chunkyreader(f):
 607             sum.update(b)
 608         packbin = sum.digest()
 609         f.write(packbin)
 610         f.close()
 611
 612         obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
 613
 614         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 615         if os.path.exists(self.filename + '.map'):
 616             os.unlink(self.filename + '.map')
 617         os.rename(self.filename + '.pack', nameprefix + '.pack')
 618         os.rename(self.filename + '.idx', nameprefix + '.idx')
 619
 620         if run_midx:
 621             auto_midx(repo('objects/pack'))
 622         return nameprefix
 623
 624     def close(self, run_midx=True):
 625         """Close the pack file and move it to its definitive path."""
 626         return self._end(run_midx=run_midx)
 627
 628     def _write_pack_idx_v2(self, filename, idx, packbin):
 629         idx_f = open(filename, 'w+b')
 630         idx_f.write('\377tOc\0\0\0\2')
 631
 632         ofs64_ofs = 8 + 4*256 + 28*self.count
 633         idx_f.truncate(ofs64_ofs)
 634         idx_f.seek(0)
 635         idx_map = mmap_readwrite(idx_f, close=False)
 636         idx_f.seek(0, SEEK_END)
 637         count = _helpers.write_idx(idx_f, idx_map, idx, self.count)
 638         assert(count == self.count)
 639         idx_map.close()
 640         idx_f.write(packbin)
 641
 642         idx_f.seek(0)
 643         idx_sum = Sha1()
 644         b = idx_f.read(8 + 4*256)
 645         idx_sum.update(b)
 646
 647         obj_list_sum = Sha1()
 648         for b in chunkyreader(idx_f, 20*self.count):
 649             idx_sum.update(b)
 650             obj_list_sum.update(b)
 651         namebase = obj_list_sum.hexdigest()
 652
 653         for b in chunkyreader(idx_f):
 654             idx_sum.update(b)
 655         idx_f.write(idx_sum.digest())
 656         idx_f.close()
 657
 658         return namebase
 659
 660
 661 def _git_date(date):
 662     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 663
 664
 665 def _gitenv():
 666     os.environ['GIT_DIR'] = os.path.abspath(repo())
 667
 668
 669 def list_refs(refname = None):
 670     """Generate a list of tuples in the form (refname,hash).
 671     If a ref name is specified, list only this particular ref.
 672     """
 673     argv = ['git', 'show-ref', '--']
 674     if refname:
 675         argv += [refname]
 676     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 677     out = p.stdout.read().strip()
 678     rv = p.wait()  # not fatal
 679     if rv:
 680         assert(not out)
 681     if out:
 682         for d in out.split('\n'):
 683             (sha, name) = d.split(' ', 1)
 684             yield (name, sha.decode('hex'))
 685
 686
 687 def read_ref(refname):
 688     """Get the commit id of the most recent commit made on a given ref."""
 689     l = list(list_refs(refname))
 690     if l:
 691         assert(len(l) == 1)
 692         return l[0][1]
 693     else:
 694         return None
 695
 696
 697 def rev_list(ref, count=None):
 698     """Generate a list of reachable commits in reverse chronological order.
 699
 700     This generator walks through commits, from child to parent, that are
 701     reachable via the specified ref and yields a series of tuples of the form
 702     (date,hash).
 703
 704     If count is a non-zero integer, limit the number of commits to "count"
 705     objects.
 706     """
 707     assert(not ref.startswith('-'))
 708     opts = []
 709     if count:
 710         opts += ['-n', str(atoi(count))]
 711     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 712     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 713     commit = None
 714     for row in p.stdout:
 715         s = row.strip()
 716         if s.startswith('commit '):
 717             commit = s[7:].decode('hex')
 718         else:
 719             date = int(s)
 720             yield (date, commit)
 721     rv = p.wait()  # not fatal
 722     if rv:
 723         raise GitError, 'git rev-list returned error %d' % rv
 724
 725
 726 def rev_get_date(ref):
 727     """Get the date of the latest commit on the specified ref."""
 728     for (date, commit) in rev_list(ref, count=1):
 729         return date
 730     raise GitError, 'no such commit %r' % ref
 731
 732
 733 def rev_parse(committish):
 734     """Resolve the full hash for 'committish', if it exists.
 735
 736     Should be roughly equivalent to 'git rev-parse'.
 737
 738     Returns the hex value of the hash if it is found, None if 'committish' does
 739     not correspond to anything.
 740     """
 741     head = read_ref(committish)
 742     if head:
 743         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 744         return head
 745
 746     pL = PackIdxList(repo('objects/pack'))
 747
 748     if len(committish) == 40:
 749         try:
 750             hash = committish.decode('hex')
 751         except TypeError:
 752             return None
 753
 754         if pL.exists(hash):
 755             return hash
 756
 757     return None
 758
 759
 760 def update_ref(refname, newval, oldval):
 761     """Change the commit pointed to by a branch."""
 762     if not oldval:
 763         oldval = ''
 764     assert(refname.startswith('refs/heads/'))
 765     p = subprocess.Popen(['git', 'update-ref', refname,
 766                           newval.encode('hex'), oldval.encode('hex')],
 767                          preexec_fn = _gitenv)
 768     _git_wait('git update-ref', p)
 769
 770
 771 def guess_repo(path=None):
 772     """Set the path value in the global variable "repodir".
 773     This makes bup look for an existing bup repository, but not fail if a
 774     repository doesn't exist. Usually, if you are interacting with a bup
 775     repository, you would not be calling this function but using
 776     check_repo_or_die().
 777     """
 778     global repodir
 779     if path:
 780         repodir = path
 781     if not repodir:
 782         repodir = os.environ.get('BUP_DIR')
 783         if not repodir:
 784             repodir = os.path.expanduser('~/.bup')
 785
 786
 787 def init_repo(path=None):
 788     """Create the Git bare repository for bup in a given path."""
 789     guess_repo(path)
 790     d = repo()  # appends a / to the path
 791     parent = os.path.dirname(os.path.dirname(d))
 792     if parent and not os.path.exists(parent):
 793         raise GitError('parent directory "%s" does not exist\n' % parent)
 794     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 795         raise GitError('"%d" exists but is not a directory\n' % d)
 796     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 797                          preexec_fn = _gitenv)
 798     _git_wait('git init', p)
 799     # Force the index version configuration in order to ensure bup works
 800     # regardless of the version of the installed Git binary.
 801     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 802                          stdout=sys.stderr, preexec_fn = _gitenv)
 803     _git_wait('git config', p)
 804
 805
 806 def check_repo_or_die(path=None):
 807     """Make sure a bup repository exists, and abort if not.
 808     If the path to a particular repository was not specified, this function
 809     initializes the default repository automatically.
 810     """
 811     guess_repo(path)
 812     if not os.path.isdir(repo('objects/pack/.')):
 813         if repodir == home_repodir:
 814             init_repo()
 815         else:
 816             log('error: %r is not a bup/git repository\n' % repo())
 817             sys.exit(15)
 818
 819
 820 def treeparse(buf):
 821     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 822     ofs = 0
 823     while ofs < len(buf):
 824         z = buf[ofs:].find('\0')
 825         assert(z > 0)
 826         spl = buf[ofs:ofs+z].split(' ', 1)
 827         assert(len(spl) == 2)
 828         sha = buf[ofs+z+1:ofs+z+1+20]
 829         ofs += z+1+20
 830         yield (spl[0], spl[1], sha)
 831
 832
 833 _ver = None
 834 def ver():
 835     """Get Git's version and ensure a usable version is installed.
 836
 837     The returned version is formatted as an ordered tuple with each position
 838     representing a digit in the version tag. For example, the following tuple
 839     would represent version 1.6.6.9:
 840
 841         ('1', '6', '6', '9')
 842     """
 843     global _ver
 844     if not _ver:
 845         p = subprocess.Popen(['git', '--version'],
 846                              stdout=subprocess.PIPE)
 847         gvs = p.stdout.read()
 848         _git_wait('git --version', p)
 849         m = re.match(r'git version (\S+.\S+)', gvs)
 850         if not m:
 851             raise GitError('git --version weird output: %r' % gvs)
 852         _ver = tuple(m.group(1).split('.'))
 853     needed = ('1','5', '3', '1')
 854     if _ver < needed:
 855         raise GitError('git version %s or higher is required; you have %s'
 856                        % ('.'.join(needed), '.'.join(_ver)))
 857     return _ver
 858
 859
 860 def _git_wait(cmd, p):
 861     rv = p.wait()
 862     if rv != 0:
 863         raise GitError('%s returned %d' % (cmd, rv))
 864
 865
 866 def _git_capture(argv):
 867     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 868     r = p.stdout.read()
 869     _git_wait(repr(argv), p)
 870     return r
 871
 872
 873 class _AbortableIter:
 874     def __init__(self, it, onabort = None):
 875         self.it = it
 876         self.onabort = onabort
 877         self.done = None
 878
 879     def __iter__(self):
 880         return self
 881
 882     def next(self):
 883         try:
 884             return self.it.next()
 885         except StopIteration, e:
 886             self.done = True
 887             raise
 888         except:
 889             self.abort()
 890             raise
 891
 892     def abort(self):
 893         """Abort iteration and call the abortion callback, if needed."""
 894         if not self.done:
 895             self.done = True
 896             if self.onabort:
 897                 self.onabort()
 898
 899     def __del__(self):
 900         self.abort()
 901
 902
 903 _ver_warned = 0
 904 class CatPipe:
 905     """Link to 'git cat-file' that is used to retrieve blob data."""
 906     def __init__(self):
 907         global _ver_warned
 908         wanted = ('1','5','6')
 909         if ver() < wanted:
 910             if not _ver_warned:
 911                 log('warning: git version < %s; bup will be slow.\n'
 912                     % '.'.join(wanted))
 913                 _ver_warned = 1
 914             self.get = self._slow_get
 915         else:
 916             self.p = self.inprogress = None
 917             self.get = self._fast_get
 918
 919     def _abort(self):
 920         if self.p:
 921             self.p.stdout.close()
 922             self.p.stdin.close()
 923         self.p = None
 924         self.inprogress = None
 925
 926     def _restart(self):
 927         self._abort()
 928         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 929                                   stdin=subprocess.PIPE,
 930                                   stdout=subprocess.PIPE,
 931                                   close_fds = True,
 932                                   bufsize = 4096,
 933                                   preexec_fn = _gitenv)
 934
 935     def _fast_get(self, id):
 936         if not self.p or self.p.poll() != None:
 937             self._restart()
 938         assert(self.p)
 939         assert(self.p.poll() == None)
 940         if self.inprogress:
 941             log('_fast_get: opening %r while %r is open'
 942                 % (id, self.inprogress))
 943         assert(not self.inprogress)
 944         assert(id.find('\n') < 0)
 945         assert(id.find('\r') < 0)
 946         assert(not id.startswith('-'))
 947         self.inprogress = id
 948         self.p.stdin.write('%s\n' % id)
 949         self.p.stdin.flush()
 950         hdr = self.p.stdout.readline()
 951         if hdr.endswith(' missing\n'):
 952             self.inprogress = None
 953             raise KeyError('blob %r is missing' % id)
 954         spl = hdr.split(' ')
 955         if len(spl) != 3 or len(spl[0]) != 40:
 956             raise GitError('expected blob, got %r' % spl)
 957         (hex, type, size) = spl
 958
 959         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 960                            onabort = self._abort)
 961         try:
 962             yield type
 963             for blob in it:
 964                 yield blob
 965             assert(self.p.stdout.readline() == '\n')
 966             self.inprogress = None
 967         except Exception, e:
 968             it.abort()
 969             raise
 970
 971     def _slow_get(self, id):
 972         assert(id.find('\n') < 0)
 973         assert(id.find('\r') < 0)
 974         assert(id[0] != '-')
 975         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 976         yield type
 977
 978         p = subprocess.Popen(['git', 'cat-file', type, id],
 979                              stdout=subprocess.PIPE,
 980                              preexec_fn = _gitenv)
 981         for blob in chunkyreader(p.stdout):
 982             yield blob
 983         _git_wait('git cat-file', p)
 984
 985     def _join(self, it):
 986         type = it.next()
 987         if type == 'blob':
 988             for blob in it:
 989                 yield blob
 990         elif type == 'tree':
 991             treefile = ''.join(it)
 992             for (mode, name, sha) in treeparse(treefile):
 993                 for blob in self.join(sha.encode('hex')):
 994                     yield blob
 995         elif type == 'commit':
 996             treeline = ''.join(it).split('\n')[0]
 997             assert(treeline.startswith('tree '))
 998             for blob in self.join(treeline[5:]):
 999                 yield blob
1000         else:
1001             raise GitError('invalid object type %r: expected blob/tree/commit'
1002                            % type)
1003
1004     def join(self, id):
1005         """Generate a list of the content of all blobs that can be reached
1006         from an object.  The hash given in 'id' must point to a blob, a tree
1007         or a commit. The content of all blobs that can be seen from trees or
1008         commits will be added to the list.
1009         """
1010         try:
1011             for d in self._join(self.get(id)):
1012                 yield d
1013         except StopIteration:
1014             log('booger!\n')
1015
1016 def tags():
1017     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1018     tags = {}
1019     for (n,c) in list_refs():
1020         if n.startswith('refs/tags/'):
1021             name = n[10:]
1022             if not c in tags:
1023                 tags[c] = []
1024
1025             tags[c].append(name)  # more than one tag can point at 'c'
1026
1027     return tags