lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 MIDX_VERSION = 2
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 home_repodir = os.path.expanduser('~/.bup')
  15 repodir = None
  16
  17 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  19
  20 _total_searches = 0
  21 _total_steps = 0
  22
  23
  24 class GitError(Exception):
  25     pass
  26
  27
  28 def repo(sub = ''):
  29     """Get the path to the git repository or one of its subdirectories."""
  30     global repodir
  31     if not repodir:
  32         raise GitError('You should call check_repo_or_die()')
  33
  34     # If there's a .git subdirectory, then the actual repo is in there.
  35     gd = os.path.join(repodir, '.git')
  36     if os.path.exists(gd):
  37         repodir = gd
  38
  39     return os.path.join(repodir, sub)
  40
  41
  42 def auto_midx(objdir):
  43     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  44     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  45     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  46     if rv:
  47         add_error('%r: returned %d' % (args, rv))
  48
  49
  50 def mangle_name(name, mode, gitmode):
  51     """Mangle a file name to present an abstract name for segmented files.
  52     Mangled file names will have the ".bup" extension added to them. If a
  53     file's name already ends with ".bup", a ".bupl" extension is added to
  54     disambiguate normal files from semgmented ones.
  55     """
  56     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  57         return name + '.bup'
  58     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  59         return name + '.bupl'
  60     else:
  61         return name
  62
  63
  64 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  65 def demangle_name(name):
  66     """Remove name mangling from a file name, if necessary.
  67
  68     The return value is a tuple (demangled_filename,mode), where mode is one of
  69     the following:
  70
  71     * BUP_NORMAL  : files that should be read as-is from the repository
  72     * BUP_CHUNKED : files that were chunked and need to be assembled
  73
  74     For more information on the name mangling algorythm, see mangle_name()
  75     """
  76     if name.endswith('.bupl'):
  77         return (name[:-5], BUP_NORMAL)
  78     elif name.endswith('.bup'):
  79         return (name[:-4], BUP_CHUNKED)
  80     else:
  81         return (name, BUP_NORMAL)
  82
  83
  84 def _encode_packobj(type, content):
  85     szout = ''
  86     sz = len(content)
  87     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  88     sz >>= 4
  89     while 1:
  90         if sz: szbits |= 0x80
  91         szout += chr(szbits)
  92         if not sz:
  93             break
  94         szbits = sz & 0x7f
  95         sz >>= 7
  96     z = zlib.compressobj(1)
  97     yield szout
  98     yield z.compress(content)
  99     yield z.flush()
 100
 101
 102 def _encode_looseobj(type, content):
 103     z = zlib.compressobj(1)
 104     yield z.compress('%s %d\0' % (type, len(content)))
 105     yield z.compress(content)
 106     yield z.flush()
 107
 108
 109 def _decode_looseobj(buf):
 110     assert(buf);
 111     s = zlib.decompress(buf)
 112     i = s.find('\0')
 113     assert(i > 0)
 114     l = s[:i].split(' ')
 115     type = l[0]
 116     sz = int(l[1])
 117     content = s[i+1:]
 118     assert(type in _typemap)
 119     assert(sz == len(content))
 120     return (type, content)
 121
 122
 123 def _decode_packobj(buf):
 124     assert(buf)
 125     c = ord(buf[0])
 126     type = _typermap[(c & 0x70) >> 4]
 127     sz = c & 0x0f
 128     shift = 4
 129     i = 0
 130     while c & 0x80:
 131         i += 1
 132         c = ord(buf[i])
 133         sz |= (c & 0x7f) << shift
 134         shift += 7
 135         if not (c & 0x80):
 136             break
 137     return (type, zlib.decompress(buf[i+1:]))
 138
 139
 140 class PackIdx:
 141     def __init__(self):
 142         assert(0)
 143
 144     def find_offset(self, hash):
 145         """Get the offset of an object inside the index file."""
 146         idx = self._idx_from_hash(hash)
 147         if idx != None:
 148             return self._ofs_from_idx(idx)
 149         return None
 150
 151     def exists(self, hash):
 152         """Return nonempty if the object exists in this index."""
 153         return hash and (self._idx_from_hash(hash) != None) and True or None
 154
 155     def __len__(self):
 156         return int(self.fanout[255])
 157
 158     def _idx_from_hash(self, hash):
 159         global _total_searches, _total_steps
 160         _total_searches += 1
 161         assert(len(hash) == 20)
 162         b1 = ord(hash[0])
 163         start = self.fanout[b1-1] # range -1..254
 164         end = self.fanout[b1] # range 0..255
 165         want = str(hash)
 166         _total_steps += 1  # lookup table is a step
 167         while start < end:
 168             _total_steps += 1
 169             mid = start + (end-start)/2
 170             v = self._idx_to_hash(mid)
 171             if v < want:
 172                 start = mid+1
 173             elif v > want:
 174                 end = mid
 175             else: # got it!
 176                 return mid
 177         return None
 178
 179
 180 class PackIdxV1(PackIdx):
 181     """Object representation of a Git pack index (version 1) file."""
 182     def __init__(self, filename, f):
 183         self.name = filename
 184         self.idxnames = [self.name]
 185         self.map = mmap_read(f)
 186         self.fanout = list(struct.unpack('!256I',
 187                                          str(buffer(self.map, 0, 256*4))))
 188         self.fanout.append(0)  # entry "-1"
 189         nsha = self.fanout[255]
 190         self.shatable = buffer(self.map, 256*4, nsha*24)
 191
 192     def _ofs_from_idx(self, idx):
 193         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 194
 195     def _idx_to_hash(self, idx):
 196         return str(self.shatable[idx*24+4 : idx*24+24])
 197
 198     def __iter__(self):
 199         for i in xrange(self.fanout[255]):
 200             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 201
 202
 203 class PackIdxV2(PackIdx):
 204     """Object representation of a Git pack index (version 2) file."""
 205     def __init__(self, filename, f):
 206         self.name = filename
 207         self.idxnames = [self.name]
 208         self.map = mmap_read(f)
 209         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 210         self.fanout = list(struct.unpack('!256I',
 211                                          str(buffer(self.map, 8, 256*4))))
 212         self.fanout.append(0)  # entry "-1"
 213         nsha = self.fanout[255]
 214         self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
 215         self.ofstable = buffer(self.map,
 216                                8 + 256*4 + nsha*20 + nsha*4,
 217                                nsha*4)
 218         self.ofs64table = buffer(self.map,
 219                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 220
 221     def _ofs_from_idx(self, idx):
 222         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 223         if ofs & 0x80000000:
 224             idx64 = ofs & 0x7fffffff
 225             ofs = struct.unpack('!I',
 226                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 227         return ofs
 228
 229     def _idx_to_hash(self, idx):
 230         return str(self.shatable[idx*20:(idx+1)*20])
 231
 232     def __iter__(self):
 233         for i in xrange(self.fanout[255]):
 234             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 235
 236
 237 extract_bits = _helpers.extract_bits
 238
 239
 240 class PackMidx:
 241     """Wrapper which contains data from multiple index files.
 242     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 243     and make it possible for bup to expand Git's indexing capabilities to vast
 244     amounts of files.
 245     """
 246     def __init__(self, filename):
 247         self.name = filename
 248         self.force_keep = False
 249         assert(filename.endswith('.midx'))
 250         self.map = mmap_read(open(filename))
 251         if str(self.map[0:4]) != 'MIDX':
 252             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 253             self.force_keep = True
 254             return self._init_failed()
 255         ver = struct.unpack('!I', self.map[4:8])[0]
 256         if ver < MIDX_VERSION:
 257             log('Warning: ignoring old-style (v%d) midx %r\n'
 258                 % (ver, filename))
 259             self.force_keep = False  # old stuff is boring
 260             return self._init_failed()
 261         if ver > MIDX_VERSION:
 262             log('Warning: ignoring too-new (v%d) midx %r\n'
 263                 % (ver, filename))
 264             self.force_keep = True  # new stuff is exciting
 265             return self._init_failed()
 266
 267         self.bits = _helpers.firstword(self.map[8:12])
 268         self.entries = 2**self.bits
 269         self.fanout = buffer(self.map, 12, self.entries*4)
 270         shaofs = 12 + self.entries*4
 271         nsha = self._fanget(self.entries-1)
 272         self.shalist = buffer(self.map, shaofs, nsha*20)
 273         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 274
 275     def _init_failed(self):
 276         self.bits = 0
 277         self.entries = 1
 278         self.fanout = buffer('\0\0\0\0')
 279         self.shalist = buffer('\0'*20)
 280         self.idxnames = []
 281
 282     def _fanget(self, i):
 283         start = i*4
 284         s = self.fanout[start:start+4]
 285         return _helpers.firstword(s)
 286
 287     def _get(self, i):
 288         return str(self.shalist[i*20:(i+1)*20])
 289
 290     def exists(self, hash):
 291         """Return nonempty if the object exists in the index files."""
 292         global _total_searches, _total_steps
 293         _total_searches += 1
 294         want = str(hash)
 295         el = extract_bits(want, self.bits)
 296         if el:
 297             start = self._fanget(el-1)
 298             startv = el << (32-self.bits)
 299         else:
 300             start = 0
 301             startv = 0
 302         end = self._fanget(el)
 303         endv = (el+1) << (32-self.bits)
 304         _total_steps += 1   # lookup table is a step
 305         hashv = _helpers.firstword(hash)
 306         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 307         while start < end:
 308             _total_steps += 1
 309             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 310             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 311             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 312             v = self._get(mid)
 313             #print '    %08x' % self._num(v)
 314             if v < want:
 315                 start = mid+1
 316                 startv = _helpers.firstword(v)
 317             elif v > want:
 318                 end = mid
 319                 endv = _helpers.firstword(v)
 320             else: # got it!
 321                 return True
 322         return None
 323
 324     def __iter__(self):
 325         for i in xrange(self._fanget(self.entries-1)):
 326             yield buffer(self.shalist, i*20, 20)
 327
 328     def __len__(self):
 329         return int(self._fanget(self.entries-1))
 330
 331
 332 _mpi_count = 0
 333 class PackIdxList:
 334     def __init__(self, dir):
 335         global _mpi_count
 336         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 337         _mpi_count += 1
 338         self.dir = dir
 339         self.also = {}
 340         self.packs = []
 341         self.refresh()
 342
 343     def __del__(self):
 344         global _mpi_count
 345         _mpi_count -= 1
 346         assert(_mpi_count == 0)
 347
 348     def __iter__(self):
 349         return iter(idxmerge(self.packs))
 350
 351     def __len__(self):
 352         return sum(len(pack) for pack in self.packs)
 353
 354     def exists(self, hash):
 355         """Return nonempty if the object exists in the index files."""
 356         global _total_searches
 357         _total_searches += 1
 358         if hash in self.also:
 359             return True
 360         for i in range(len(self.packs)):
 361             p = self.packs[i]
 362             _total_searches -= 1  # will be incremented by sub-pack
 363             if p.exists(hash):
 364                 # reorder so most recently used packs are searched first
 365                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 366                 return p.name
 367         return None
 368
 369     def refresh(self, skip_midx = False):
 370         """Refresh the index list.
 371         This method verifies if .midx files were superseded (e.g. all of its
 372         contents are in another, bigger .midx file) and removes the superseded
 373         files.
 374
 375         If skip_midx is True, all work on .midx files will be skipped and .midx
 376         files will be removed from the list.
 377
 378         The module-global variable 'ignore_midx' can force this function to
 379         always act as if skip_midx was True.
 380         """
 381         skip_midx = skip_midx or ignore_midx
 382         d = dict((p.name, p) for p in self.packs
 383                  if not skip_midx or not isinstance(p, PackMidx))
 384         if os.path.exists(self.dir):
 385             if not skip_midx:
 386                 midxl = []
 387                 for ix in self.packs:
 388                     if isinstance(ix, PackMidx):
 389                         for name in ix.idxnames:
 390                             d[os.path.join(self.dir, name)] = ix
 391                 for f in os.listdir(self.dir):
 392                     full = os.path.join(self.dir, f)
 393                     if f.endswith('.midx') and not d.get(full):
 394                         mx = PackMidx(full)
 395                         (mxd, mxf) = os.path.split(mx.name)
 396                         broken = 0
 397                         for n in mx.idxnames:
 398                             if not os.path.exists(os.path.join(mxd, n)):
 399                                 log(('warning: index %s missing\n' +
 400                                     '  used by %s\n') % (n, mxf))
 401                                 broken += 1
 402                         if not broken:
 403                             midxl.append(mx)
 404                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 405                 for ix in midxl:
 406                     any = 0
 407                     for sub in ix.idxnames:
 408                         found = d.get(os.path.join(self.dir, sub))
 409                         if not found or isinstance(found, PackIdx):
 410                             # doesn't exist, or exists but not in a midx
 411                             d[ix.name] = ix
 412                             for name in ix.idxnames:
 413                                 d[os.path.join(self.dir, name)] = ix
 414                             any += 1
 415                             break
 416                     if not any and not ix.force_keep:
 417                         debug1('midx: removing redundant: %s\n'
 418                                % os.path.basename(ix.name))
 419                         unlink(ix.name)
 420             for f in os.listdir(self.dir):
 421                 full = os.path.join(self.dir, f)
 422                 if f.endswith('.idx') and not d.get(full):
 423                     ix = open_idx(full)
 424                     d[full] = ix
 425             self.packs = list(set(d.values()))
 426         debug1('PackIdxList: using %d index%s.\n'
 427             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 428
 429     def add(self, hash):
 430         """Insert an additional object in the list."""
 431         self.also[hash] = 1
 432
 433     def zap_also(self):
 434         """Remove all additional objects from the list."""
 435         self.also = {}
 436
 437
 438 def calc_hash(type, content):
 439     """Calculate some content's hash in the Git fashion."""
 440     header = '%s %d\0' % (type, len(content))
 441     sum = Sha1(header)
 442     sum.update(content)
 443     return sum.digest()
 444
 445
 446 def _shalist_sort_key(ent):
 447     (mode, name, id) = ent
 448     if stat.S_ISDIR(int(mode, 8)):
 449         return name + '/'
 450     else:
 451         return name
 452
 453
 454 def open_idx(filename):
 455     if filename.endswith('.idx'):
 456         f = open(filename, 'rb')
 457         header = f.read(8)
 458         if header[0:4] == '\377tOc':
 459             version = struct.unpack('!I', header[4:8])[0]
 460             if version == 2:
 461                 return PackIdxV2(filename, f)
 462             else:
 463                 raise GitError('%s: expected idx file version 2, got %d'
 464                                % (filename, version))
 465         else:
 466             return PackIdxV1(filename, f)
 467     elif filename.endswith('.midx'):
 468         return PackMidx(filename)
 469     else:
 470         raise GitError('idx filenames must end with .idx or .midx')
 471
 472
 473 def idxmerge(idxlist, final_progress=True):
 474     """Generate a list of all the objects reachable in a PackIdxList."""
 475     total = sum(len(i) for i in idxlist)
 476     iters = (iter(i) for i in idxlist)
 477     heap = [(next(it), it) for it in iters]
 478     heapq.heapify(heap)
 479     count = 0
 480     last = None
 481     while heap:
 482         if (count % 10024) == 0:
 483             progress('Reading indexes: %.2f%% (%d/%d)\r'
 484                      % (count*100.0/total, count, total))
 485         (e, it) = heap[0]
 486         if e != last:
 487             yield e
 488             last = e
 489         count += 1
 490         e = next(it)
 491         if e:
 492             heapq.heapreplace(heap, (e, it))
 493         else:
 494             heapq.heappop(heap)
 495     if final_progress:
 496         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 497
 498
 499 class PackWriter:
 500     """Writes Git objects insid a pack file."""
 501     def __init__(self, objcache_maker=None):
 502         self.count = 0
 503         self.outbytes = 0
 504         self.filename = None
 505         self.file = None
 506         self.objcache_maker = objcache_maker
 507         self.objcache = None
 508
 509     def __del__(self):
 510         self.close()
 511
 512     def _make_objcache(self):
 513         if self.objcache == None:
 514             if self.objcache_maker:
 515                 self.objcache = self.objcache_maker()
 516             else:
 517                 self.objcache = PackIdxList(repo('objects/pack'))
 518
 519     def _open(self):
 520         if not self.file:
 521             self._make_objcache()
 522             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 523             self.file = os.fdopen(fd, 'w+b')
 524             assert(name.endswith('.pack'))
 525             self.filename = name[:-5]
 526             self.file.write('PACK\0\0\0\2\0\0\0\0')
 527
 528     def _raw_write(self, datalist):
 529         self._open()
 530         f = self.file
 531         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 532         # the file never has a *partial* blob.  So let's make sure it's
 533         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 534         # to our hashsplit algorithm.)  f.write() does its own buffering,
 535         # but that's okay because we'll flush it in _end().
 536         oneblob = ''.join(datalist)
 537         f.write(oneblob)
 538         self.outbytes += len(oneblob)
 539         self.count += 1
 540
 541     def _write(self, bin, type, content):
 542         if verbose:
 543             log('>')
 544         self._raw_write(_encode_packobj(type, content))
 545         return bin
 546
 547     def breakpoint(self):
 548         """Clear byte and object counts and return the last processed id."""
 549         id = self._end()
 550         self.outbytes = self.count = 0
 551         return id
 552
 553     def write(self, type, content):
 554         """Write an object in this pack file."""
 555         return self._write(calc_hash(type, content), type, content)
 556
 557     def exists(self, id):
 558         """Return non-empty if an object is found in the object cache."""
 559         if not self.objcache:
 560             self._make_objcache()
 561         return self.objcache.exists(id)
 562
 563     def maybe_write(self, type, content):
 564         """Write an object to the pack file if not present and return its id."""
 565         bin = calc_hash(type, content)
 566         if not self.exists(bin):
 567             self._write(bin, type, content)
 568             self.objcache.add(bin)
 569         return bin
 570
 571     def new_blob(self, blob):
 572         """Create a blob object in the pack with the supplied content."""
 573         return self.maybe_write('blob', blob)
 574
 575     def new_tree(self, shalist):
 576         """Create a tree object in the pack."""
 577         shalist = sorted(shalist, key = _shalist_sort_key)
 578         l = []
 579         for (mode,name,bin) in shalist:
 580             assert(mode)
 581             assert(mode != '0')
 582             assert(mode[0] != '0')
 583             assert(name)
 584             assert(len(bin) == 20)
 585             l.append('%s %s\0%s' % (mode,name,bin))
 586         return self.maybe_write('tree', ''.join(l))
 587
 588     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 589         l = []
 590         if tree: l.append('tree %s' % tree.encode('hex'))
 591         if parent: l.append('parent %s' % parent.encode('hex'))
 592         if author: l.append('author %s %s' % (author, _git_date(adate)))
 593         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 594         l.append('')
 595         l.append(msg)
 596         return self.maybe_write('commit', '\n'.join(l))
 597
 598     def new_commit(self, parent, tree, date, msg):
 599         """Create a commit object in the pack."""
 600         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 601         commit = self._new_commit(tree, parent,
 602                                   userline, date, userline, date,
 603                                   msg)
 604         return commit
 605
 606     def abort(self):
 607         """Remove the pack file from disk."""
 608         f = self.file
 609         if f:
 610             self.file = None
 611             f.close()
 612             os.unlink(self.filename + '.pack')
 613
 614     def _end(self):
 615         f = self.file
 616         if not f: return None
 617         self.file = None
 618         self.objcache = None
 619
 620         # update object count
 621         f.seek(8)
 622         cp = struct.pack('!i', self.count)
 623         assert(len(cp) == 4)
 624         f.write(cp)
 625
 626         # calculate the pack sha1sum
 627         f.seek(0)
 628         sum = Sha1()
 629         while 1:
 630             b = f.read(65536)
 631             sum.update(b)
 632             if not b: break
 633         f.write(sum.digest())
 634
 635         f.close()
 636
 637         p = subprocess.Popen(['git', 'index-pack', '-v',
 638                               '--index-version=2',
 639                               self.filename + '.pack'],
 640                              preexec_fn = _gitenv,
 641                              stdout = subprocess.PIPE)
 642         out = p.stdout.read().strip()
 643         _git_wait('git index-pack', p)
 644         if not out:
 645             raise GitError('git index-pack produced no output')
 646         nameprefix = repo('objects/pack/%s' % out)
 647         if os.path.exists(self.filename + '.map'):
 648             os.unlink(self.filename + '.map')
 649         os.rename(self.filename + '.pack', nameprefix + '.pack')
 650         os.rename(self.filename + '.idx', nameprefix + '.idx')
 651
 652         auto_midx(repo('objects/pack'))
 653         return nameprefix
 654
 655     def close(self):
 656         """Close the pack file and move it to its definitive path."""
 657         return self._end()
 658
 659
 660 def _git_date(date):
 661     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 662
 663
 664 def _gitenv():
 665     os.environ['GIT_DIR'] = os.path.abspath(repo())
 666
 667
 668 def list_refs(refname = None):
 669     """Generate a list of tuples in the form (refname,hash).
 670     If a ref name is specified, list only this particular ref.
 671     """
 672     argv = ['git', 'show-ref', '--']
 673     if refname:
 674         argv += [refname]
 675     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 676     out = p.stdout.read().strip()
 677     rv = p.wait()  # not fatal
 678     if rv:
 679         assert(not out)
 680     if out:
 681         for d in out.split('\n'):
 682             (sha, name) = d.split(' ', 1)
 683             yield (name, sha.decode('hex'))
 684
 685
 686 def read_ref(refname):
 687     """Get the commit id of the most recent commit made on a given ref."""
 688     l = list(list_refs(refname))
 689     if l:
 690         assert(len(l) == 1)
 691         return l[0][1]
 692     else:
 693         return None
 694
 695
 696 def rev_list(ref, count=None):
 697     """Generate a list of reachable commits in reverse chronological order.
 698
 699     This generator walks through commits, from child to parent, that are
 700     reachable via the specified ref and yields a series of tuples of the form
 701     (date,hash).
 702
 703     If count is a non-zero integer, limit the number of commits to "count"
 704     objects.
 705     """
 706     assert(not ref.startswith('-'))
 707     opts = []
 708     if count:
 709         opts += ['-n', str(atoi(count))]
 710     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 711     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 712     commit = None
 713     for row in p.stdout:
 714         s = row.strip()
 715         if s.startswith('commit '):
 716             commit = s[7:].decode('hex')
 717         else:
 718             date = int(s)
 719             yield (date, commit)
 720     rv = p.wait()  # not fatal
 721     if rv:
 722         raise GitError, 'git rev-list returned error %d' % rv
 723
 724
 725 def rev_get_date(ref):
 726     """Get the date of the latest commit on the specified ref."""
 727     for (date, commit) in rev_list(ref, count=1):
 728         return date
 729     raise GitError, 'no such commit %r' % ref
 730
 731
 732 def rev_parse(committish):
 733     """Resolve the full hash for 'committish', if it exists.
 734
 735     Should be roughly equivalent to 'git rev-parse'.
 736
 737     Returns the hex value of the hash if it is found, None if 'committish' does
 738     not correspond to anything.
 739     """
 740     head = read_ref(committish)
 741     if head:
 742         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 743         return head
 744
 745     pL = PackIdxList(repo('objects/pack'))
 746
 747     if len(committish) == 40:
 748         try:
 749             hash = committish.decode('hex')
 750         except TypeError:
 751             return None
 752
 753         if pL.exists(hash):
 754             return hash
 755
 756     return None
 757
 758
 759 def update_ref(refname, newval, oldval):
 760     """Change the commit pointed to by a branch."""
 761     if not oldval:
 762         oldval = ''
 763     assert(refname.startswith('refs/heads/'))
 764     p = subprocess.Popen(['git', 'update-ref', refname,
 765                           newval.encode('hex'), oldval.encode('hex')],
 766                          preexec_fn = _gitenv)
 767     _git_wait('git update-ref', p)
 768
 769
 770 def guess_repo(path=None):
 771     """Set the path value in the global variable "repodir".
 772     This makes bup look for an existing bup repository, but not fail if a
 773     repository doesn't exist. Usually, if you are interacting with a bup
 774     repository, you would not be calling this function but using
 775     check_repo_or_die().
 776     """
 777     global repodir
 778     if path:
 779         repodir = path
 780     if not repodir:
 781         repodir = os.environ.get('BUP_DIR')
 782         if not repodir:
 783             repodir = os.path.expanduser('~/.bup')
 784
 785
 786 def init_repo(path=None):
 787     """Create the Git bare repository for bup in a given path."""
 788     guess_repo(path)
 789     d = repo()
 790     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 791         raise GitError('"%d" exists but is not a directory\n' % d)
 792     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 793                          preexec_fn = _gitenv)
 794     _git_wait('git init', p)
 795     # Force the index version configuration in order to ensure bup works
 796     # regardless of the version of the installed Git binary.
 797     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 798                          stdout=sys.stderr, preexec_fn = _gitenv)
 799     _git_wait('git config', p)
 800
 801
 802 def check_repo_or_die(path=None):
 803     """Make sure a bup repository exists, and abort if not.
 804     If the path to a particular repository was not specified, this function
 805     initializes the default repository automatically.
 806     """
 807     guess_repo(path)
 808     if not os.path.isdir(repo('objects/pack/.')):
 809         if repodir == home_repodir:
 810             init_repo()
 811         else:
 812             log('error: %r is not a bup/git repository\n' % repo())
 813             sys.exit(15)
 814
 815
 816 def treeparse(buf):
 817     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 818     ofs = 0
 819     while ofs < len(buf):
 820         z = buf[ofs:].find('\0')
 821         assert(z > 0)
 822         spl = buf[ofs:ofs+z].split(' ', 1)
 823         assert(len(spl) == 2)
 824         sha = buf[ofs+z+1:ofs+z+1+20]
 825         ofs += z+1+20
 826         yield (spl[0], spl[1], sha)
 827
 828
 829 _ver = None
 830 def ver():
 831     """Get Git's version and ensure a usable version is installed.
 832
 833     The returned version is formatted as an ordered tuple with each position
 834     representing a digit in the version tag. For example, the following tuple
 835     would represent version 1.6.6.9:
 836
 837         ('1', '6', '6', '9')
 838     """
 839     global _ver
 840     if not _ver:
 841         p = subprocess.Popen(['git', '--version'],
 842                              stdout=subprocess.PIPE)
 843         gvs = p.stdout.read()
 844         _git_wait('git --version', p)
 845         m = re.match(r'git version (\S+.\S+)', gvs)
 846         if not m:
 847             raise GitError('git --version weird output: %r' % gvs)
 848         _ver = tuple(m.group(1).split('.'))
 849     needed = ('1','5', '3', '1')
 850     if _ver < needed:
 851         raise GitError('git version %s or higher is required; you have %s'
 852                        % ('.'.join(needed), '.'.join(_ver)))
 853     return _ver
 854
 855
 856 def _git_wait(cmd, p):
 857     rv = p.wait()
 858     if rv != 0:
 859         raise GitError('%s returned %d' % (cmd, rv))
 860
 861
 862 def _git_capture(argv):
 863     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 864     r = p.stdout.read()
 865     _git_wait(repr(argv), p)
 866     return r
 867
 868
 869 class _AbortableIter:
 870     def __init__(self, it, onabort = None):
 871         self.it = it
 872         self.onabort = onabort
 873         self.done = None
 874
 875     def __iter__(self):
 876         return self
 877
 878     def next(self):
 879         try:
 880             return self.it.next()
 881         except StopIteration, e:
 882             self.done = True
 883             raise
 884         except:
 885             self.abort()
 886             raise
 887
 888     def abort(self):
 889         """Abort iteration and call the abortion callback, if needed."""
 890         if not self.done:
 891             self.done = True
 892             if self.onabort:
 893                 self.onabort()
 894
 895     def __del__(self):
 896         self.abort()
 897
 898
 899 _ver_warned = 0
 900 class CatPipe:
 901     """Link to 'git cat-file' that is used to retrieve blob data."""
 902     def __init__(self):
 903         global _ver_warned
 904         wanted = ('1','5','6')
 905         if ver() < wanted:
 906             if not _ver_warned:
 907                 log('warning: git version < %s; bup will be slow.\n'
 908                     % '.'.join(wanted))
 909                 _ver_warned = 1
 910             self.get = self._slow_get
 911         else:
 912             self.p = self.inprogress = None
 913             self.get = self._fast_get
 914
 915     def _abort(self):
 916         if self.p:
 917             self.p.stdout.close()
 918             self.p.stdin.close()
 919         self.p = None
 920         self.inprogress = None
 921
 922     def _restart(self):
 923         self._abort()
 924         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 925                                   stdin=subprocess.PIPE,
 926                                   stdout=subprocess.PIPE,
 927                                   close_fds = True,
 928                                   preexec_fn = _gitenv)
 929
 930     def _fast_get(self, id):
 931         if not self.p or self.p.poll() != None:
 932             self._restart()
 933         assert(self.p)
 934         assert(self.p.poll() == None)
 935         if self.inprogress:
 936             log('_fast_get: opening %r while %r is open'
 937                 % (id, self.inprogress))
 938         assert(not self.inprogress)
 939         assert(id.find('\n') < 0)
 940         assert(id.find('\r') < 0)
 941         assert(not id.startswith('-'))
 942         self.inprogress = id
 943         self.p.stdin.write('%s\n' % id)
 944         hdr = self.p.stdout.readline()
 945         if hdr.endswith(' missing\n'):
 946             self.inprogress = None
 947             raise KeyError('blob %r is missing' % id)
 948         spl = hdr.split(' ')
 949         if len(spl) != 3 or len(spl[0]) != 40:
 950             raise GitError('expected blob, got %r' % spl)
 951         (hex, type, size) = spl
 952
 953         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 954                            onabort = self._abort)
 955         try:
 956             yield type
 957             for blob in it:
 958                 yield blob
 959             assert(self.p.stdout.readline() == '\n')
 960             self.inprogress = None
 961         except Exception, e:
 962             it.abort()
 963             raise
 964
 965     def _slow_get(self, id):
 966         assert(id.find('\n') < 0)
 967         assert(id.find('\r') < 0)
 968         assert(id[0] != '-')
 969         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 970         yield type
 971
 972         p = subprocess.Popen(['git', 'cat-file', type, id],
 973                              stdout=subprocess.PIPE,
 974                              preexec_fn = _gitenv)
 975         for blob in chunkyreader(p.stdout):
 976             yield blob
 977         _git_wait('git cat-file', p)
 978
 979     def _join(self, it):
 980         type = it.next()
 981         if type == 'blob':
 982             for blob in it:
 983                 yield blob
 984         elif type == 'tree':
 985             treefile = ''.join(it)
 986             for (mode, name, sha) in treeparse(treefile):
 987                 for blob in self.join(sha.encode('hex')):
 988                     yield blob
 989         elif type == 'commit':
 990             treeline = ''.join(it).split('\n')[0]
 991             assert(treeline.startswith('tree '))
 992             for blob in self.join(treeline[5:]):
 993                 yield blob
 994         else:
 995             raise GitError('invalid object type %r: expected blob/tree/commit'
 996                            % type)
 997
 998     def join(self, id):
 999         """Generate a list of the content of all blobs that can be reached
1000         from an object.  The hash given in 'id' must point to a blob, a tree
1001         or a commit. The content of all blobs that can be seen from trees or
1002         commits will be added to the list.
1003         """
1004         try:
1005             for d in self._join(self.get(id)):
1006                 yield d
1007         except StopIteration:
1008             log('booger!\n')