lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
   6 from bup.helpers import *
   7 from bup import _helpers, path, midx, bloom
   8
   9 SEEK_END=2  # os.SEEK_END is not defined in python 2.4
  10
  11 verbose = 0
  12 ignore_midx = 0
  13 home_repodir = os.path.expanduser('~/.bup')
  14 repodir = None
  15
  16 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  18
  19 _total_searches = 0
  20 _total_steps = 0
  21
  22
  23 class GitError(Exception):
  24     pass
  25
  26
  27 def repo(sub = ''):
  28     """Get the path to the git repository or one of its subdirectories."""
  29     global repodir
  30     if not repodir:
  31         raise GitError('You should call check_repo_or_die()')
  32
  33     # If there's a .git subdirectory, then the actual repo is in there.
  34     gd = os.path.join(repodir, '.git')
  35     if os.path.exists(gd):
  36         repodir = gd
  37
  38     return os.path.join(repodir, sub)
  39
  40
  41 def repo_rel(path):
  42     full = os.path.abspath(path)
  43     fullrepo = os.path.abspath(repo(''))
  44     if not fullrepo.endswith('/'):
  45         fullrepo += '/'
  46     if full.startswith(fullrepo):
  47         path = full[len(fullrepo):]
  48     if path.startswith('index-cache/'):
  49         path = path[len('index-cache/'):]
  50     return path
  51
  52
  53 def all_packdirs():
  54     paths = [repo('objects/pack')]
  55     paths += glob.glob(repo('index-cache/*/.'))
  56     return paths
  57
  58
  59 def auto_midx(objdir):
  60     args = [path.exe(), 'midx', '--auto', '--dir', objdir]
  61     try:
  62         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  63     except OSError, e:
  64         # make sure 'args' gets printed to help with debugging
  65         add_error('%r: exception: %s' % (args, e))
  66         raise
  67     if rv:
  68         add_error('%r: returned %d' % (args, rv))
  69
  70     args = [path.exe(), 'bloom', '--dir', objdir]
  71     try:
  72         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  73     except OSError, e:
  74         # make sure 'args' gets printed to help with debugging
  75         add_error('%r: exception: %s' % (args, e))
  76         raise
  77     if rv:
  78         add_error('%r: returned %d' % (args, rv))
  79
  80
  81 def mangle_name(name, mode, gitmode):
  82     """Mangle a file name to present an abstract name for segmented files.
  83     Mangled file names will have the ".bup" extension added to them. If a
  84     file's name already ends with ".bup", a ".bupl" extension is added to
  85     disambiguate normal files from semgmented ones.
  86     """
  87     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  88         return name + '.bup'
  89     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  90         return name + '.bupl'
  91     else:
  92         return name
  93
  94
  95 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  96 def demangle_name(name):
  97     """Remove name mangling from a file name, if necessary.
  98
  99     The return value is a tuple (demangled_filename,mode), where mode is one of
 100     the following:
 101
 102     * BUP_NORMAL  : files that should be read as-is from the repository
 103     * BUP_CHUNKED : files that were chunked and need to be assembled
 104
 105     For more information on the name mangling algorythm, see mangle_name()
 106     """
 107     if name.endswith('.bupl'):
 108         return (name[:-5], BUP_NORMAL)
 109     elif name.endswith('.bup'):
 110         return (name[:-4], BUP_CHUNKED)
 111     else:
 112         return (name, BUP_NORMAL)
 113
 114
 115 def _encode_packobj(type, content):
 116     szout = ''
 117     sz = len(content)
 118     szbits = (sz & 0x0f) | (_typemap[type]<<4)
 119     sz >>= 4
 120     while 1:
 121         if sz: szbits |= 0x80
 122         szout += chr(szbits)
 123         if not sz:
 124             break
 125         szbits = sz & 0x7f
 126         sz >>= 7
 127     z = zlib.compressobj(1)
 128     yield szout
 129     yield z.compress(content)
 130     yield z.flush()
 131
 132
 133 def _encode_looseobj(type, content):
 134     z = zlib.compressobj(1)
 135     yield z.compress('%s %d\0' % (type, len(content)))
 136     yield z.compress(content)
 137     yield z.flush()
 138
 139
 140 def _decode_looseobj(buf):
 141     assert(buf);
 142     s = zlib.decompress(buf)
 143     i = s.find('\0')
 144     assert(i > 0)
 145     l = s[:i].split(' ')
 146     type = l[0]
 147     sz = int(l[1])
 148     content = s[i+1:]
 149     assert(type in _typemap)
 150     assert(sz == len(content))
 151     return (type, content)
 152
 153
 154 def _decode_packobj(buf):
 155     assert(buf)
 156     c = ord(buf[0])
 157     type = _typermap[(c & 0x70) >> 4]
 158     sz = c & 0x0f
 159     shift = 4
 160     i = 0
 161     while c & 0x80:
 162         i += 1
 163         c = ord(buf[i])
 164         sz |= (c & 0x7f) << shift
 165         shift += 7
 166         if not (c & 0x80):
 167             break
 168     return (type, zlib.decompress(buf[i+1:]))
 169
 170
 171 class PackIdx:
 172     def __init__(self):
 173         assert(0)
 174
 175     def find_offset(self, hash):
 176         """Get the offset of an object inside the index file."""
 177         idx = self._idx_from_hash(hash)
 178         if idx != None:
 179             return self._ofs_from_idx(idx)
 180         return None
 181
 182     def exists(self, hash, want_source=False):
 183         """Return nonempty if the object exists in this index."""
 184         if hash and (self._idx_from_hash(hash) != None):
 185             return want_source and os.path.basename(self.name) or True
 186         return None
 187
 188     def __len__(self):
 189         return int(self.fanout[255])
 190
 191     def _idx_from_hash(self, hash):
 192         global _total_searches, _total_steps
 193         _total_searches += 1
 194         assert(len(hash) == 20)
 195         b1 = ord(hash[0])
 196         start = self.fanout[b1-1] # range -1..254
 197         end = self.fanout[b1] # range 0..255
 198         want = str(hash)
 199         _total_steps += 1  # lookup table is a step
 200         while start < end:
 201             _total_steps += 1
 202             mid = start + (end-start)/2
 203             v = self._idx_to_hash(mid)
 204             if v < want:
 205                 start = mid+1
 206             elif v > want:
 207                 end = mid
 208             else: # got it!
 209                 return mid
 210         return None
 211
 212
 213 class PackIdxV1(PackIdx):
 214     """Object representation of a Git pack index (version 1) file."""
 215     def __init__(self, filename, f):
 216         self.name = filename
 217         self.idxnames = [self.name]
 218         self.map = mmap_read(f)
 219         self.fanout = list(struct.unpack('!256I',
 220                                          str(buffer(self.map, 0, 256*4))))
 221         self.fanout.append(0)  # entry "-1"
 222         nsha = self.fanout[255]
 223         self.sha_ofs = 256*4
 224         self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
 225
 226     def _ofs_from_idx(self, idx):
 227         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 228
 229     def _idx_to_hash(self, idx):
 230         return str(self.shatable[idx*24+4 : idx*24+24])
 231
 232     def __iter__(self):
 233         for i in xrange(self.fanout[255]):
 234             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 235
 236
 237 class PackIdxV2(PackIdx):
 238     """Object representation of a Git pack index (version 2) file."""
 239     def __init__(self, filename, f):
 240         self.name = filename
 241         self.idxnames = [self.name]
 242         self.map = mmap_read(f)
 243         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 244         self.fanout = list(struct.unpack('!256I',
 245                                          str(buffer(self.map, 8, 256*4))))
 246         self.fanout.append(0)  # entry "-1"
 247         nsha = self.fanout[255]
 248         self.sha_ofs = 8 + 256*4
 249         self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
 250         self.ofstable = buffer(self.map,
 251                                self.sha_ofs + nsha*20 + nsha*4,
 252                                nsha*4)
 253         self.ofs64table = buffer(self.map,
 254                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 255
 256     def _ofs_from_idx(self, idx):
 257         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 258         if ofs & 0x80000000:
 259             idx64 = ofs & 0x7fffffff
 260             ofs = struct.unpack('!Q',
 261                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 262         return ofs
 263
 264     def _idx_to_hash(self, idx):
 265         return str(self.shatable[idx*20:(idx+1)*20])
 266
 267     def __iter__(self):
 268         for i in xrange(self.fanout[255]):
 269             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 270
 271
 272 _mpi_count = 0
 273 class PackIdxList:
 274     def __init__(self, dir):
 275         global _mpi_count
 276         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 277         _mpi_count += 1
 278         self.dir = dir
 279         self.also = set()
 280         self.packs = []
 281         self.do_bloom = False
 282         self.bloom = None
 283         self.refresh()
 284
 285     def __del__(self):
 286         global _mpi_count
 287         _mpi_count -= 1
 288         assert(_mpi_count == 0)
 289
 290     def __iter__(self):
 291         return iter(idxmerge(self.packs))
 292
 293     def __len__(self):
 294         return sum(len(pack) for pack in self.packs)
 295
 296     def exists(self, hash, want_source=False):
 297         """Return nonempty if the object exists in the index files."""
 298         global _total_searches
 299         _total_searches += 1
 300         if hash in self.also:
 301             return True
 302         if self.do_bloom and self.bloom:
 303             if self.bloom.exists(hash):
 304                 self.do_bloom = False
 305             else:
 306                 _total_searches -= 1  # was counted by bloom
 307                 return None
 308         for i in xrange(len(self.packs)):
 309             p = self.packs[i]
 310             _total_searches -= 1  # will be incremented by sub-pack
 311             ix = p.exists(hash, want_source=want_source)
 312             if ix:
 313                 # reorder so most recently used packs are searched first
 314                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 315                 return ix
 316         self.do_bloom = True
 317         return None
 318
 319     def refresh(self, skip_midx = False):
 320         """Refresh the index list.
 321         This method verifies if .midx files were superseded (e.g. all of its
 322         contents are in another, bigger .midx file) and removes the superseded
 323         files.
 324
 325         If skip_midx is True, all work on .midx files will be skipped and .midx
 326         files will be removed from the list.
 327
 328         The module-global variable 'ignore_midx' can force this function to
 329         always act as if skip_midx was True.
 330         """
 331         self.bloom = None # Always reopen the bloom as it may have been relaced
 332         self.do_bloom = False
 333         skip_midx = skip_midx or ignore_midx
 334         d = dict((p.name, p) for p in self.packs
 335                  if not skip_midx or not isinstance(p, midx.PackMidx))
 336         if os.path.exists(self.dir):
 337             if not skip_midx:
 338                 midxl = []
 339                 for ix in self.packs:
 340                     if isinstance(ix, midx.PackMidx):
 341                         for name in ix.idxnames:
 342                             d[os.path.join(self.dir, name)] = ix
 343                 for full in glob.glob(os.path.join(self.dir,'*.midx')):
 344                     if not d.get(full):
 345                         mx = midx.PackMidx(full)
 346                         (mxd, mxf) = os.path.split(mx.name)
 347                         broken = False
 348                         for n in mx.idxnames:
 349                             if not os.path.exists(os.path.join(mxd, n)):
 350                                 log(('warning: index %s missing\n' +
 351                                     '  used by %s\n') % (n, mxf))
 352                                 broken = True
 353                         if broken:
 354                             del mx
 355                             unlink(full)
 356                         else:
 357                             midxl.append(mx)
 358                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 359                 for ix in midxl:
 360                     any_needed = False
 361                     for sub in ix.idxnames:
 362                         found = d.get(os.path.join(self.dir, sub))
 363                         if not found or isinstance(found, PackIdx):
 364                             # doesn't exist, or exists but not in a midx
 365                             any_needed = True
 366                             break
 367                     if any_needed:
 368                         d[ix.name] = ix
 369                         for name in ix.idxnames:
 370                             d[os.path.join(self.dir, name)] = ix
 371                     elif not ix.force_keep:
 372                         debug1('midx: removing redundant: %s\n'
 373                                % os.path.basename(ix.name))
 374                         unlink(ix.name)
 375             for full in glob.glob(os.path.join(self.dir,'*.idx')):
 376                 if not d.get(full):
 377                     try:
 378                         ix = open_idx(full)
 379                     except GitError, e:
 380                         add_error(e)
 381                         continue
 382                     d[full] = ix
 383             bfull = os.path.join(self.dir, 'bup.bloom')
 384             if self.bloom is None and os.path.exists(bfull):
 385                 self.bloom = bloom.ShaBloom(bfull)
 386             self.packs = list(set(d.values()))
 387             self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
 388             if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
 389                 self.do_bloom = True
 390             else:
 391                 self.bloom = None
 392         debug1('PackIdxList: using %d index%s.\n'
 393             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 394
 395     def add(self, hash):
 396         """Insert an additional object in the list."""
 397         self.also.add(hash)
 398
 399
 400 def calc_hash(type, content):
 401     """Calculate some content's hash in the Git fashion."""
 402     header = '%s %d\0' % (type, len(content))
 403     sum = Sha1(header)
 404     sum.update(content)
 405     return sum.digest()
 406
 407
 408 def _shalist_sort_key(ent):
 409     (mode, name, id) = ent
 410     if stat.S_ISDIR(int(mode, 8)):
 411         return name + '/'
 412     else:
 413         return name
 414
 415
 416 def open_idx(filename):
 417     if filename.endswith('.idx'):
 418         f = open(filename, 'rb')
 419         header = f.read(8)
 420         if header[0:4] == '\377tOc':
 421             version = struct.unpack('!I', header[4:8])[0]
 422             if version == 2:
 423                 return PackIdxV2(filename, f)
 424             else:
 425                 raise GitError('%s: expected idx file version 2, got %d'
 426                                % (filename, version))
 427         elif len(header) == 8 and header[0:4] < '\377tOc':
 428             return PackIdxV1(filename, f)
 429         else:
 430             raise GitError('%s: unrecognized idx file header' % filename)
 431     elif filename.endswith('.midx'):
 432         return midx.PackMidx(filename)
 433     else:
 434         raise GitError('idx filenames must end with .idx or .midx')
 435
 436
 437 def idxmerge(idxlist, final_progress=True):
 438     """Generate a list of all the objects reachable in a PackIdxList."""
 439     def pfunc(count, total):
 440         qprogress('Reading indexes: %.2f%% (%d/%d)\r'
 441                   % (count*100.0/total, count, total))
 442     def pfinal(count, total):
 443         if final_progress:
 444             progress('Reading indexes: %.2f%% (%d/%d), done.\n'
 445                      % (100, total, total))
 446     return merge_iter(idxlist, 10024, pfunc, pfinal)
 447
 448
 449 def _make_objcache():
 450     return PackIdxList(repo('objects/pack'))
 451
 452 class PackWriter:
 453     """Writes Git objects insid a pack file."""
 454     def __init__(self, objcache_maker=_make_objcache):
 455         self.count = 0
 456         self.outbytes = 0
 457         self.filename = None
 458         self.file = None
 459         self.idx = None
 460         self.objcache_maker = objcache_maker
 461         self.objcache = None
 462
 463     def __del__(self):
 464         self.close()
 465
 466     def _open(self):
 467         if not self.file:
 468             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 469             self.file = os.fdopen(fd, 'w+b')
 470             assert(name.endswith('.pack'))
 471             self.filename = name[:-5]
 472             self.file.write('PACK\0\0\0\2\0\0\0\0')
 473             self.idx = list(list() for i in xrange(256))
 474
 475     def _raw_write(self, datalist, sha):
 476         self._open()
 477         f = self.file
 478         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 479         # the file never has a *partial* blob.  So let's make sure it's
 480         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 481         # to our hashsplit algorithm.)  f.write() does its own buffering,
 482         # but that's okay because we'll flush it in _end().
 483         oneblob = ''.join(datalist)
 484         try:
 485             f.write(oneblob)
 486         except IOError, e:
 487             raise GitError, e, sys.exc_info()[2]
 488         nw = len(oneblob)
 489         crc = zlib.crc32(oneblob) & 0xffffffff
 490         self._update_idx(sha, crc, nw)
 491         self.outbytes += nw
 492         self.count += 1
 493         return nw, crc
 494
 495     def _update_idx(self, sha, crc, size):
 496         assert(sha)
 497         if self.idx:
 498             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 499
 500     def _write(self, sha, type, content):
 501         if verbose:
 502             log('>')
 503         if not sha:
 504             sha = calc_hash(type, content)
 505         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
 506         return sha
 507
 508     def breakpoint(self):
 509         """Clear byte and object counts and return the last processed id."""
 510         id = self._end()
 511         self.outbytes = self.count = 0
 512         return id
 513
 514     def _require_objcache(self):
 515         if self.objcache is None and self.objcache_maker:
 516             self.objcache = self.objcache_maker()
 517         if self.objcache is None:
 518             raise GitError(
 519                     "PackWriter not opened or can't check exists w/o objcache")
 520
 521     def exists(self, id, want_source=False):
 522         """Return non-empty if an object is found in the object cache."""
 523         self._require_objcache()
 524         return self.objcache.exists(id, want_source=want_source)
 525
 526     def maybe_write(self, type, content):
 527         """Write an object to the pack file if not present and return its id."""
 528         self._require_objcache()
 529         sha = calc_hash(type, content)
 530         if not self.exists(sha):
 531             self._write(sha, type, content)
 532             self.objcache.add(sha)
 533         return sha
 534
 535     def new_blob(self, blob):
 536         """Create a blob object in the pack with the supplied content."""
 537         return self.maybe_write('blob', blob)
 538
 539     def new_tree(self, shalist):
 540         """Create a tree object in the pack."""
 541         shalist = sorted(shalist, key = _shalist_sort_key)
 542         l = []
 543         for (mode,name,bin) in shalist:
 544             assert(mode)
 545             assert(mode != '0')
 546             assert(mode[0] != '0')
 547             assert(name)
 548             assert(len(bin) == 20)
 549             l.append('%s %s\0%s' % (mode,name,bin))
 550         return self.maybe_write('tree', ''.join(l))
 551
 552     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 553         l = []
 554         if tree: l.append('tree %s' % tree.encode('hex'))
 555         if parent: l.append('parent %s' % parent.encode('hex'))
 556         if author: l.append('author %s %s' % (author, _git_date(adate)))
 557         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 558         l.append('')
 559         l.append(msg)
 560         return self.maybe_write('commit', '\n'.join(l))
 561
 562     def new_commit(self, parent, tree, date, msg):
 563         """Create a commit object in the pack."""
 564         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 565         commit = self._new_commit(tree, parent,
 566                                   userline, date, userline, date,
 567                                   msg)
 568         return commit
 569
 570     def abort(self):
 571         """Remove the pack file from disk."""
 572         f = self.file
 573         if f:
 574             self.idx = None
 575             self.file = None
 576             f.close()
 577             os.unlink(self.filename + '.pack')
 578
 579     def _end(self, run_midx=True):
 580         f = self.file
 581         if not f: return None
 582         self.file = None
 583         self.objcache = None
 584         idx = self.idx
 585         self.idx = None
 586
 587         # update object count
 588         f.seek(8)
 589         cp = struct.pack('!i', self.count)
 590         assert(len(cp) == 4)
 591         f.write(cp)
 592
 593         # calculate the pack sha1sum
 594         f.seek(0)
 595         sum = Sha1()
 596         for b in chunkyreader(f):
 597             sum.update(b)
 598         packbin = sum.digest()
 599         f.write(packbin)
 600         f.close()
 601
 602         obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
 603
 604         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 605         if os.path.exists(self.filename + '.map'):
 606             os.unlink(self.filename + '.map')
 607         os.rename(self.filename + '.pack', nameprefix + '.pack')
 608         os.rename(self.filename + '.idx', nameprefix + '.idx')
 609
 610         if run_midx:
 611             auto_midx(repo('objects/pack'))
 612         return nameprefix
 613
 614     def close(self, run_midx=True):
 615         """Close the pack file and move it to its definitive path."""
 616         return self._end(run_midx=run_midx)
 617
 618     def _write_pack_idx_v2(self, filename, idx, packbin):
 619         idx_f = open(filename, 'w+b')
 620         idx_f.write('\377tOc\0\0\0\2')
 621
 622         ofs64_ofs = 8 + 4*256 + 28*self.count
 623         idx_f.truncate(ofs64_ofs)
 624         idx_f.seek(0)
 625         idx_map = mmap_readwrite(idx_f, close=False)
 626         idx_f.seek(0, SEEK_END)
 627         count = _helpers.write_idx(idx_f, idx_map, idx, self.count)
 628         assert(count == self.count)
 629         idx_map.close()
 630         idx_f.write(packbin)
 631
 632         idx_f.seek(0)
 633         idx_sum = Sha1()
 634         b = idx_f.read(8 + 4*256)
 635         idx_sum.update(b)
 636
 637         obj_list_sum = Sha1()
 638         for b in chunkyreader(idx_f, 20*self.count):
 639             idx_sum.update(b)
 640             obj_list_sum.update(b)
 641         namebase = obj_list_sum.hexdigest()
 642
 643         for b in chunkyreader(idx_f):
 644             idx_sum.update(b)
 645         idx_f.write(idx_sum.digest())
 646         idx_f.close()
 647
 648         return namebase
 649
 650
 651 def _git_date(date):
 652     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 653
 654
 655 def _gitenv():
 656     os.environ['GIT_DIR'] = os.path.abspath(repo())
 657
 658
 659 def list_refs(refname = None):
 660     """Generate a list of tuples in the form (refname,hash).
 661     If a ref name is specified, list only this particular ref.
 662     """
 663     argv = ['git', 'show-ref', '--']
 664     if refname:
 665         argv += [refname]
 666     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 667     out = p.stdout.read().strip()
 668     rv = p.wait()  # not fatal
 669     if rv:
 670         assert(not out)
 671     if out:
 672         for d in out.split('\n'):
 673             (sha, name) = d.split(' ', 1)
 674             yield (name, sha.decode('hex'))
 675
 676
 677 def read_ref(refname):
 678     """Get the commit id of the most recent commit made on a given ref."""
 679     l = list(list_refs(refname))
 680     if l:
 681         assert(len(l) == 1)
 682         return l[0][1]
 683     else:
 684         return None
 685
 686
 687 def rev_list(ref, count=None):
 688     """Generate a list of reachable commits in reverse chronological order.
 689
 690     This generator walks through commits, from child to parent, that are
 691     reachable via the specified ref and yields a series of tuples of the form
 692     (date,hash).
 693
 694     If count is a non-zero integer, limit the number of commits to "count"
 695     objects.
 696     """
 697     assert(not ref.startswith('-'))
 698     opts = []
 699     if count:
 700         opts += ['-n', str(atoi(count))]
 701     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 702     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 703     commit = None
 704     for row in p.stdout:
 705         s = row.strip()
 706         if s.startswith('commit '):
 707             commit = s[7:].decode('hex')
 708         else:
 709             date = int(s)
 710             yield (date, commit)
 711     rv = p.wait()  # not fatal
 712     if rv:
 713         raise GitError, 'git rev-list returned error %d' % rv
 714
 715
 716 def rev_get_date(ref):
 717     """Get the date of the latest commit on the specified ref."""
 718     for (date, commit) in rev_list(ref, count=1):
 719         return date
 720     raise GitError, 'no such commit %r' % ref
 721
 722
 723 def rev_parse(committish):
 724     """Resolve the full hash for 'committish', if it exists.
 725
 726     Should be roughly equivalent to 'git rev-parse'.
 727
 728     Returns the hex value of the hash if it is found, None if 'committish' does
 729     not correspond to anything.
 730     """
 731     head = read_ref(committish)
 732     if head:
 733         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 734         return head
 735
 736     pL = PackIdxList(repo('objects/pack'))
 737
 738     if len(committish) == 40:
 739         try:
 740             hash = committish.decode('hex')
 741         except TypeError:
 742             return None
 743
 744         if pL.exists(hash):
 745             return hash
 746
 747     return None
 748
 749
 750 def update_ref(refname, newval, oldval):
 751     """Change the commit pointed to by a branch."""
 752     if not oldval:
 753         oldval = ''
 754     assert(refname.startswith('refs/heads/'))
 755     p = subprocess.Popen(['git', 'update-ref', refname,
 756                           newval.encode('hex'), oldval.encode('hex')],
 757                          preexec_fn = _gitenv)
 758     _git_wait('git update-ref', p)
 759
 760
 761 def guess_repo(path=None):
 762     """Set the path value in the global variable "repodir".
 763     This makes bup look for an existing bup repository, but not fail if a
 764     repository doesn't exist. Usually, if you are interacting with a bup
 765     repository, you would not be calling this function but using
 766     check_repo_or_die().
 767     """
 768     global repodir
 769     if path:
 770         repodir = path
 771     if not repodir:
 772         repodir = os.environ.get('BUP_DIR')
 773         if not repodir:
 774             repodir = os.path.expanduser('~/.bup')
 775
 776
 777 def init_repo(path=None):
 778     """Create the Git bare repository for bup in a given path."""
 779     guess_repo(path)
 780     d = repo()  # appends a / to the path
 781     parent = os.path.dirname(os.path.dirname(d))
 782     if parent and not os.path.exists(parent):
 783         raise GitError('parent directory "%s" does not exist\n' % parent)
 784     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 785         raise GitError('"%d" exists but is not a directory\n' % d)
 786     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 787                          preexec_fn = _gitenv)
 788     _git_wait('git init', p)
 789     # Force the index version configuration in order to ensure bup works
 790     # regardless of the version of the installed Git binary.
 791     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 792                          stdout=sys.stderr, preexec_fn = _gitenv)
 793     _git_wait('git config', p)
 794
 795
 796 def check_repo_or_die(path=None):
 797     """Make sure a bup repository exists, and abort if not.
 798     If the path to a particular repository was not specified, this function
 799     initializes the default repository automatically.
 800     """
 801     guess_repo(path)
 802     if not os.path.isdir(repo('objects/pack/.')):
 803         if repodir == home_repodir:
 804             init_repo()
 805         else:
 806             log('error: %r is not a bup/git repository\n' % repo())
 807             sys.exit(15)
 808
 809
 810 def treeparse(buf):
 811     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 812     ofs = 0
 813     while ofs < len(buf):
 814         z = buf[ofs:].find('\0')
 815         assert(z > 0)
 816         spl = buf[ofs:ofs+z].split(' ', 1)
 817         assert(len(spl) == 2)
 818         sha = buf[ofs+z+1:ofs+z+1+20]
 819         ofs += z+1+20
 820         yield (spl[0], spl[1], sha)
 821
 822
 823 _ver = None
 824 def ver():
 825     """Get Git's version and ensure a usable version is installed.
 826
 827     The returned version is formatted as an ordered tuple with each position
 828     representing a digit in the version tag. For example, the following tuple
 829     would represent version 1.6.6.9:
 830
 831         ('1', '6', '6', '9')
 832     """
 833     global _ver
 834     if not _ver:
 835         p = subprocess.Popen(['git', '--version'],
 836                              stdout=subprocess.PIPE)
 837         gvs = p.stdout.read()
 838         _git_wait('git --version', p)
 839         m = re.match(r'git version (\S+.\S+)', gvs)
 840         if not m:
 841             raise GitError('git --version weird output: %r' % gvs)
 842         _ver = tuple(m.group(1).split('.'))
 843     needed = ('1','5', '3', '1')
 844     if _ver < needed:
 845         raise GitError('git version %s or higher is required; you have %s'
 846                        % ('.'.join(needed), '.'.join(_ver)))
 847     return _ver
 848
 849
 850 def _git_wait(cmd, p):
 851     rv = p.wait()
 852     if rv != 0:
 853         raise GitError('%s returned %d' % (cmd, rv))
 854
 855
 856 def _git_capture(argv):
 857     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 858     r = p.stdout.read()
 859     _git_wait(repr(argv), p)
 860     return r
 861
 862
 863 class _AbortableIter:
 864     def __init__(self, it, onabort = None):
 865         self.it = it
 866         self.onabort = onabort
 867         self.done = None
 868
 869     def __iter__(self):
 870         return self
 871
 872     def next(self):
 873         try:
 874             return self.it.next()
 875         except StopIteration, e:
 876             self.done = True
 877             raise
 878         except:
 879             self.abort()
 880             raise
 881
 882     def abort(self):
 883         """Abort iteration and call the abortion callback, if needed."""
 884         if not self.done:
 885             self.done = True
 886             if self.onabort:
 887                 self.onabort()
 888
 889     def __del__(self):
 890         self.abort()
 891
 892
 893 _ver_warned = 0
 894 class CatPipe:
 895     """Link to 'git cat-file' that is used to retrieve blob data."""
 896     def __init__(self):
 897         global _ver_warned
 898         wanted = ('1','5','6')
 899         if ver() < wanted:
 900             if not _ver_warned:
 901                 log('warning: git version < %s; bup will be slow.\n'
 902                     % '.'.join(wanted))
 903                 _ver_warned = 1
 904             self.get = self._slow_get
 905         else:
 906             self.p = self.inprogress = None
 907             self.get = self._fast_get
 908
 909     def _abort(self):
 910         if self.p:
 911             self.p.stdout.close()
 912             self.p.stdin.close()
 913         self.p = None
 914         self.inprogress = None
 915
 916     def _restart(self):
 917         self._abort()
 918         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 919                                   stdin=subprocess.PIPE,
 920                                   stdout=subprocess.PIPE,
 921                                   close_fds = True,
 922                                   bufsize = 4096,
 923                                   preexec_fn = _gitenv)
 924
 925     def _fast_get(self, id):
 926         if not self.p or self.p.poll() != None:
 927             self._restart()
 928         assert(self.p)
 929         assert(self.p.poll() == None)
 930         if self.inprogress:
 931             log('_fast_get: opening %r while %r is open'
 932                 % (id, self.inprogress))
 933         assert(not self.inprogress)
 934         assert(id.find('\n') < 0)
 935         assert(id.find('\r') < 0)
 936         assert(not id.startswith('-'))
 937         self.inprogress = id
 938         self.p.stdin.write('%s\n' % id)
 939         self.p.stdin.flush()
 940         hdr = self.p.stdout.readline()
 941         if hdr.endswith(' missing\n'):
 942             self.inprogress = None
 943             raise KeyError('blob %r is missing' % id)
 944         spl = hdr.split(' ')
 945         if len(spl) != 3 or len(spl[0]) != 40:
 946             raise GitError('expected blob, got %r' % spl)
 947         (hex, type, size) = spl
 948
 949         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 950                            onabort = self._abort)
 951         try:
 952             yield type
 953             for blob in it:
 954                 yield blob
 955             assert(self.p.stdout.readline() == '\n')
 956             self.inprogress = None
 957         except Exception, e:
 958             it.abort()
 959             raise
 960
 961     def _slow_get(self, id):
 962         assert(id.find('\n') < 0)
 963         assert(id.find('\r') < 0)
 964         assert(id[0] != '-')
 965         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 966         yield type
 967
 968         p = subprocess.Popen(['git', 'cat-file', type, id],
 969                              stdout=subprocess.PIPE,
 970                              preexec_fn = _gitenv)
 971         for blob in chunkyreader(p.stdout):
 972             yield blob
 973         _git_wait('git cat-file', p)
 974
 975     def _join(self, it):
 976         type = it.next()
 977         if type == 'blob':
 978             for blob in it:
 979                 yield blob
 980         elif type == 'tree':
 981             treefile = ''.join(it)
 982             for (mode, name, sha) in treeparse(treefile):
 983                 for blob in self.join(sha.encode('hex')):
 984                     yield blob
 985         elif type == 'commit':
 986             treeline = ''.join(it).split('\n')[0]
 987             assert(treeline.startswith('tree '))
 988             for blob in self.join(treeline[5:]):
 989                 yield blob
 990         else:
 991             raise GitError('invalid object type %r: expected blob/tree/commit'
 992                            % type)
 993
 994     def join(self, id):
 995         """Generate a list of the content of all blobs that can be reached
 996         from an object.  The hash given in 'id' must point to a blob, a tree
 997         or a commit. The content of all blobs that can be seen from trees or
 998         commits will be added to the list.
 999         """
1000         try:
1001             for d in self._join(self.get(id)):
1002                 yield d
1003         except StopIteration:
1004             log('booger!\n')
1005
1006 def tags():
1007     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1008     tags = {}
1009     for (n,c) in list_refs():
1010         if n.startswith('refs/tags/'):
1011             name = n[10:]
1012             if not c in tags:
1013                 tags[c] = []
1014
1015             tags[c].append(name)  # more than one tag can point at 'c'
1016
1017     return tags