lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 MIDX_VERSION = 2
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 home_repodir = os.path.expanduser('~/.bup')
  15 repodir = None
  16
  17 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  19
  20 _total_searches = 0
  21 _total_steps = 0
  22
  23
  24 class GitError(Exception):
  25     pass
  26
  27
  28 def repo(sub = ''):
  29     """Get the path to the git repository or one of its subdirectories."""
  30     global repodir
  31     if not repodir:
  32         raise GitError('You should call check_repo_or_die()')
  33
  34     # If there's a .git subdirectory, then the actual repo is in there.
  35     gd = os.path.join(repodir, '.git')
  36     if os.path.exists(gd):
  37         repodir = gd
  38
  39     return os.path.join(repodir, sub)
  40
  41
  42 def auto_midx(objdir):
  43     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  44     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  45     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  46     if rv:
  47         add_error('%r: returned %d' % (args, rv))
  48
  49
  50 def mangle_name(name, mode, gitmode):
  51     """Mangle a file name to present an abstract name for segmented files.
  52     Mangled file names will have the ".bup" extension added to them. If a
  53     file's name already ends with ".bup", a ".bupl" extension is added to
  54     disambiguate normal files from semgmented ones.
  55     """
  56     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  57         return name + '.bup'
  58     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  59         return name + '.bupl'
  60     else:
  61         return name
  62
  63
  64 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  65 def demangle_name(name):
  66     """Remove name mangling from a file name, if necessary.
  67
  68     The return value is a tuple (demangled_filename,mode), where mode is one of
  69     the following:
  70
  71     * BUP_NORMAL  : files that should be read as-is from the repository
  72     * BUP_CHUNKED : files that were chunked and need to be assembled
  73
  74     For more information on the name mangling algorythm, see mangle_name()
  75     """
  76     if name.endswith('.bupl'):
  77         return (name[:-5], BUP_NORMAL)
  78     elif name.endswith('.bup'):
  79         return (name[:-4], BUP_CHUNKED)
  80     else:
  81         return (name, BUP_NORMAL)
  82
  83
  84 def _encode_packobj(type, content):
  85     szout = ''
  86     sz = len(content)
  87     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  88     sz >>= 4
  89     while 1:
  90         if sz: szbits |= 0x80
  91         szout += chr(szbits)
  92         if not sz:
  93             break
  94         szbits = sz & 0x7f
  95         sz >>= 7
  96     z = zlib.compressobj(1)
  97     yield szout
  98     yield z.compress(content)
  99     yield z.flush()
 100
 101
 102 def _encode_looseobj(type, content):
 103     z = zlib.compressobj(1)
 104     yield z.compress('%s %d\0' % (type, len(content)))
 105     yield z.compress(content)
 106     yield z.flush()
 107
 108
 109 def _decode_looseobj(buf):
 110     assert(buf);
 111     s = zlib.decompress(buf)
 112     i = s.find('\0')
 113     assert(i > 0)
 114     l = s[:i].split(' ')
 115     type = l[0]
 116     sz = int(l[1])
 117     content = s[i+1:]
 118     assert(type in _typemap)
 119     assert(sz == len(content))
 120     return (type, content)
 121
 122
 123 def _decode_packobj(buf):
 124     assert(buf)
 125     c = ord(buf[0])
 126     type = _typermap[(c & 0x70) >> 4]
 127     sz = c & 0x0f
 128     shift = 4
 129     i = 0
 130     while c & 0x80:
 131         i += 1
 132         c = ord(buf[i])
 133         sz |= (c & 0x7f) << shift
 134         shift += 7
 135         if not (c & 0x80):
 136             break
 137     return (type, zlib.decompress(buf[i+1:]))
 138
 139
 140 class PackIdx:
 141     def __init__(self):
 142         assert(0)
 143
 144     def find_offset(self, hash):
 145         """Get the offset of an object inside the index file."""
 146         idx = self._idx_from_hash(hash)
 147         if idx != None:
 148             return self._ofs_from_idx(idx)
 149         return None
 150
 151     def exists(self, hash):
 152         """Return nonempty if the object exists in this index."""
 153         return hash and (self._idx_from_hash(hash) != None) and True or None
 154
 155     def __len__(self):
 156         return int(self.fanout[255])
 157
 158     def _idx_from_hash(self, hash):
 159         global _total_searches, _total_steps
 160         _total_searches += 1
 161         assert(len(hash) == 20)
 162         b1 = ord(hash[0])
 163         start = self.fanout[b1-1] # range -1..254
 164         end = self.fanout[b1] # range 0..255
 165         want = str(hash)
 166         _total_steps += 1  # lookup table is a step
 167         while start < end:
 168             _total_steps += 1
 169             mid = start + (end-start)/2
 170             v = self._idx_to_hash(mid)
 171             if v < want:
 172                 start = mid+1
 173             elif v > want:
 174                 end = mid
 175             else: # got it!
 176                 return mid
 177         return None
 178
 179
 180 class PackIdxV1(PackIdx):
 181     """Object representation of a Git pack index (version 1) file."""
 182     def __init__(self, filename, f):
 183         self.name = filename
 184         self.idxnames = [self.name]
 185         self.map = mmap_read(f)
 186         self.fanout = list(struct.unpack('!256I',
 187                                          str(buffer(self.map, 0, 256*4))))
 188         self.fanout.append(0)  # entry "-1"
 189         nsha = self.fanout[255]
 190         self.shatable = buffer(self.map, 256*4, nsha*24)
 191
 192     def _ofs_from_idx(self, idx):
 193         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 194
 195     def _idx_to_hash(self, idx):
 196         return str(self.shatable[idx*24+4 : idx*24+24])
 197
 198     def __iter__(self):
 199         for i in xrange(self.fanout[255]):
 200             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 201
 202
 203 class PackIdxV2(PackIdx):
 204     """Object representation of a Git pack index (version 2) file."""
 205     def __init__(self, filename, f):
 206         self.name = filename
 207         self.idxnames = [self.name]
 208         self.map = mmap_read(f)
 209         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 210         self.fanout = list(struct.unpack('!256I',
 211                                          str(buffer(self.map, 8, 256*4))))
 212         self.fanout.append(0)  # entry "-1"
 213         nsha = self.fanout[255]
 214         self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
 215         self.ofstable = buffer(self.map,
 216                                8 + 256*4 + nsha*20 + nsha*4,
 217                                nsha*4)
 218         self.ofs64table = buffer(self.map,
 219                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 220
 221     def _ofs_from_idx(self, idx):
 222         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 223         if ofs & 0x80000000:
 224             idx64 = ofs & 0x7fffffff
 225             ofs = struct.unpack('!I',
 226                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 227         return ofs
 228
 229     def _idx_to_hash(self, idx):
 230         return str(self.shatable[idx*20:(idx+1)*20])
 231
 232     def __iter__(self):
 233         for i in xrange(self.fanout[255]):
 234             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 235
 236
 237 extract_bits = _helpers.extract_bits
 238
 239
 240 class PackMidx:
 241     """Wrapper which contains data from multiple index files.
 242     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 243     and make it possible for bup to expand Git's indexing capabilities to vast
 244     amounts of files.
 245     """
 246     def __init__(self, filename):
 247         self.name = filename
 248         self.force_keep = False
 249         assert(filename.endswith('.midx'))
 250         self.map = mmap_read(open(filename))
 251         if str(self.map[0:4]) != 'MIDX':
 252             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 253             self.force_keep = True
 254             return self._init_failed()
 255         ver = struct.unpack('!I', self.map[4:8])[0]
 256         if ver < MIDX_VERSION:
 257             log('Warning: ignoring old-style (v%d) midx %r\n'
 258                 % (ver, filename))
 259             self.force_keep = False  # old stuff is boring
 260             return self._init_failed()
 261         if ver > MIDX_VERSION:
 262             log('Warning: ignoring too-new (v%d) midx %r\n'
 263                 % (ver, filename))
 264             self.force_keep = True  # new stuff is exciting
 265             return self._init_failed()
 266
 267         self.bits = _helpers.firstword(self.map[8:12])
 268         self.entries = 2**self.bits
 269         self.fanout = buffer(self.map, 12, self.entries*4)
 270         shaofs = 12 + self.entries*4
 271         nsha = self._fanget(self.entries-1)
 272         self.shalist = buffer(self.map, shaofs, nsha*20)
 273         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 274
 275     def _init_failed(self):
 276         self.bits = 0
 277         self.entries = 1
 278         self.fanout = buffer('\0\0\0\0')
 279         self.shalist = buffer('\0'*20)
 280         self.idxnames = []
 281
 282     def _fanget(self, i):
 283         start = i*4
 284         s = self.fanout[start:start+4]
 285         return _helpers.firstword(s)
 286
 287     def _get(self, i):
 288         return str(self.shalist[i*20:(i+1)*20])
 289
 290     def exists(self, hash):
 291         """Return nonempty if the object exists in the index files."""
 292         global _total_searches, _total_steps
 293         _total_searches += 1
 294         want = str(hash)
 295         el = extract_bits(want, self.bits)
 296         if el:
 297             start = self._fanget(el-1)
 298             startv = el << (32-self.bits)
 299         else:
 300             start = 0
 301             startv = 0
 302         end = self._fanget(el)
 303         endv = (el+1) << (32-self.bits)
 304         _total_steps += 1   # lookup table is a step
 305         hashv = _helpers.firstword(hash)
 306         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 307         while start < end:
 308             _total_steps += 1
 309             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 310             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 311             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 312             v = self._get(mid)
 313             #print '    %08x' % self._num(v)
 314             if v < want:
 315                 start = mid+1
 316                 startv = _helpers.firstword(v)
 317             elif v > want:
 318                 end = mid
 319                 endv = _helpers.firstword(v)
 320             else: # got it!
 321                 return True
 322         return None
 323
 324     def __iter__(self):
 325         for i in xrange(self._fanget(self.entries-1)):
 326             yield buffer(self.shalist, i*20, 20)
 327
 328     def __len__(self):
 329         return int(self._fanget(self.entries-1))
 330
 331
 332 _mpi_count = 0
 333 class PackIdxList:
 334     def __init__(self, dir):
 335         global _mpi_count
 336         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 337         _mpi_count += 1
 338         self.dir = dir
 339         self.also = {}
 340         self.packs = []
 341         self.refresh()
 342
 343     def __del__(self):
 344         global _mpi_count
 345         _mpi_count -= 1
 346         assert(_mpi_count == 0)
 347
 348     def __iter__(self):
 349         return iter(idxmerge(self.packs))
 350
 351     def __len__(self):
 352         return sum(len(pack) for pack in self.packs)
 353
 354     def exists(self, hash):
 355         """Return nonempty if the object exists in the index files."""
 356         global _total_searches
 357         _total_searches += 1
 358         if hash in self.also:
 359             return True
 360         for i in range(len(self.packs)):
 361             p = self.packs[i]
 362             _total_searches -= 1  # will be incremented by sub-pack
 363             if p.exists(hash):
 364                 # reorder so most recently used packs are searched first
 365                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 366                 return p.name
 367         return None
 368
 369     def refresh(self, skip_midx = False):
 370         """Refresh the index list.
 371         This method verifies if .midx files were superseded (e.g. all of its
 372         contents are in another, bigger .midx file) and removes the superseded
 373         files.
 374
 375         If skip_midx is True, all work on .midx files will be skipped and .midx
 376         files will be removed from the list.
 377
 378         The module-global variable 'ignore_midx' can force this function to
 379         always act as if skip_midx was True.
 380         """
 381         skip_midx = skip_midx or ignore_midx
 382         d = dict((p.name, p) for p in self.packs
 383                  if not skip_midx or not isinstance(p, PackMidx))
 384         if os.path.exists(self.dir):
 385             if not skip_midx:
 386                 midxl = []
 387                 for ix in self.packs:
 388                     if isinstance(ix, PackMidx):
 389                         for name in ix.idxnames:
 390                             d[os.path.join(self.dir, name)] = ix
 391                 for f in os.listdir(self.dir):
 392                     full = os.path.join(self.dir, f)
 393                     if f.endswith('.midx') and not d.get(full):
 394                         mx = PackMidx(full)
 395                         (mxd, mxf) = os.path.split(mx.name)
 396                         broken = 0
 397                         for n in mx.idxnames:
 398                             if not os.path.exists(os.path.join(mxd, n)):
 399                                 log(('warning: index %s missing\n' +
 400                                     '  used by %s\n') % (n, mxf))
 401                                 broken += 1
 402                         if broken:
 403                             del mx
 404                             unlink(full)
 405                         else:
 406                             midxl.append(mx)
 407                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 408                 for ix in midxl:
 409                     any = 0
 410                     for sub in ix.idxnames:
 411                         found = d.get(os.path.join(self.dir, sub))
 412                         if not found or isinstance(found, PackIdx):
 413                             # doesn't exist, or exists but not in a midx
 414                             d[ix.name] = ix
 415                             for name in ix.idxnames:
 416                                 d[os.path.join(self.dir, name)] = ix
 417                             any += 1
 418                             break
 419                     if not any and not ix.force_keep:
 420                         debug1('midx: removing redundant: %s\n'
 421                                % os.path.basename(ix.name))
 422                         unlink(ix.name)
 423             for f in os.listdir(self.dir):
 424                 full = os.path.join(self.dir, f)
 425                 if f.endswith('.idx') and not d.get(full):
 426                     ix = open_idx(full)
 427                     d[full] = ix
 428             self.packs = list(set(d.values()))
 429         debug1('PackIdxList: using %d index%s.\n'
 430             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 431
 432     def add(self, hash):
 433         """Insert an additional object in the list."""
 434         self.also[hash] = 1
 435
 436     def zap_also(self):
 437         """Remove all additional objects from the list."""
 438         self.also = {}
 439
 440
 441 def calc_hash(type, content):
 442     """Calculate some content's hash in the Git fashion."""
 443     header = '%s %d\0' % (type, len(content))
 444     sum = Sha1(header)
 445     sum.update(content)
 446     return sum.digest()
 447
 448
 449 def _shalist_sort_key(ent):
 450     (mode, name, id) = ent
 451     if stat.S_ISDIR(int(mode, 8)):
 452         return name + '/'
 453     else:
 454         return name
 455
 456
 457 def open_idx(filename):
 458     if filename.endswith('.idx'):
 459         f = open(filename, 'rb')
 460         header = f.read(8)
 461         if header[0:4] == '\377tOc':
 462             version = struct.unpack('!I', header[4:8])[0]
 463             if version == 2:
 464                 return PackIdxV2(filename, f)
 465             else:
 466                 raise GitError('%s: expected idx file version 2, got %d'
 467                                % (filename, version))
 468         else:
 469             return PackIdxV1(filename, f)
 470     elif filename.endswith('.midx'):
 471         return PackMidx(filename)
 472     else:
 473         raise GitError('idx filenames must end with .idx or .midx')
 474
 475
 476 def idxmerge(idxlist, final_progress=True):
 477     """Generate a list of all the objects reachable in a PackIdxList."""
 478     total = sum(len(i) for i in idxlist)
 479     iters = (iter(i) for i in idxlist)
 480     heap = [(next(it), it) for it in iters]
 481     heapq.heapify(heap)
 482     count = 0
 483     last = None
 484     while heap:
 485         if (count % 10024) == 0:
 486             progress('Reading indexes: %.2f%% (%d/%d)\r'
 487                      % (count*100.0/total, count, total))
 488         (e, it) = heap[0]
 489         if e != last:
 490             yield e
 491             last = e
 492         count += 1
 493         e = next(it)
 494         if e:
 495             heapq.heapreplace(heap, (e, it))
 496         else:
 497             heapq.heappop(heap)
 498     if final_progress:
 499         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 500
 501
 502 class PackWriter:
 503     """Writes Git objects insid a pack file."""
 504     def __init__(self, objcache_maker=None):
 505         self.count = 0
 506         self.outbytes = 0
 507         self.filename = None
 508         self.file = None
 509         self.objcache_maker = objcache_maker
 510         self.objcache = None
 511
 512     def __del__(self):
 513         self.close()
 514
 515     def _make_objcache(self):
 516         if self.objcache == None:
 517             if self.objcache_maker:
 518                 self.objcache = self.objcache_maker()
 519             else:
 520                 self.objcache = PackIdxList(repo('objects/pack'))
 521
 522     def _open(self):
 523         if not self.file:
 524             self._make_objcache()
 525             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 526             self.file = os.fdopen(fd, 'w+b')
 527             assert(name.endswith('.pack'))
 528             self.filename = name[:-5]
 529             self.file.write('PACK\0\0\0\2\0\0\0\0')
 530
 531     def _raw_write(self, datalist):
 532         self._open()
 533         f = self.file
 534         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 535         # the file never has a *partial* blob.  So let's make sure it's
 536         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 537         # to our hashsplit algorithm.)  f.write() does its own buffering,
 538         # but that's okay because we'll flush it in _end().
 539         oneblob = ''.join(datalist)
 540         f.write(oneblob)
 541         self.outbytes += len(oneblob)
 542         self.count += 1
 543
 544     def _write(self, bin, type, content):
 545         if verbose:
 546             log('>')
 547         self._raw_write(_encode_packobj(type, content))
 548         return bin
 549
 550     def breakpoint(self):
 551         """Clear byte and object counts and return the last processed id."""
 552         id = self._end()
 553         self.outbytes = self.count = 0
 554         return id
 555
 556     def write(self, type, content):
 557         """Write an object in this pack file."""
 558         return self._write(calc_hash(type, content), type, content)
 559
 560     def exists(self, id):
 561         """Return non-empty if an object is found in the object cache."""
 562         if not self.objcache:
 563             self._make_objcache()
 564         return self.objcache.exists(id)
 565
 566     def maybe_write(self, type, content):
 567         """Write an object to the pack file if not present and return its id."""
 568         bin = calc_hash(type, content)
 569         if not self.exists(bin):
 570             self._write(bin, type, content)
 571             self.objcache.add(bin)
 572         return bin
 573
 574     def new_blob(self, blob):
 575         """Create a blob object in the pack with the supplied content."""
 576         return self.maybe_write('blob', blob)
 577
 578     def new_tree(self, shalist):
 579         """Create a tree object in the pack."""
 580         shalist = sorted(shalist, key = _shalist_sort_key)
 581         l = []
 582         for (mode,name,bin) in shalist:
 583             assert(mode)
 584             assert(mode != '0')
 585             assert(mode[0] != '0')
 586             assert(name)
 587             assert(len(bin) == 20)
 588             l.append('%s %s\0%s' % (mode,name,bin))
 589         return self.maybe_write('tree', ''.join(l))
 590
 591     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 592         l = []
 593         if tree: l.append('tree %s' % tree.encode('hex'))
 594         if parent: l.append('parent %s' % parent.encode('hex'))
 595         if author: l.append('author %s %s' % (author, _git_date(adate)))
 596         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 597         l.append('')
 598         l.append(msg)
 599         return self.maybe_write('commit', '\n'.join(l))
 600
 601     def new_commit(self, parent, tree, date, msg):
 602         """Create a commit object in the pack."""
 603         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 604         commit = self._new_commit(tree, parent,
 605                                   userline, date, userline, date,
 606                                   msg)
 607         return commit
 608
 609     def abort(self):
 610         """Remove the pack file from disk."""
 611         f = self.file
 612         if f:
 613             self.file = None
 614             f.close()
 615             os.unlink(self.filename + '.pack')
 616
 617     def _end(self):
 618         f = self.file
 619         if not f: return None
 620         self.file = None
 621         self.objcache = None
 622
 623         # update object count
 624         f.seek(8)
 625         cp = struct.pack('!i', self.count)
 626         assert(len(cp) == 4)
 627         f.write(cp)
 628
 629         # calculate the pack sha1sum
 630         f.seek(0)
 631         sum = Sha1()
 632         while 1:
 633             b = f.read(65536)
 634             sum.update(b)
 635             if not b: break
 636         f.write(sum.digest())
 637
 638         f.close()
 639
 640         p = subprocess.Popen(['git', 'index-pack', '-v',
 641                               '--index-version=2',
 642                               self.filename + '.pack'],
 643                              preexec_fn = _gitenv,
 644                              stdout = subprocess.PIPE)
 645         out = p.stdout.read().strip()
 646         _git_wait('git index-pack', p)
 647         if not out:
 648             raise GitError('git index-pack produced no output')
 649         nameprefix = repo('objects/pack/%s' % out)
 650         if os.path.exists(self.filename + '.map'):
 651             os.unlink(self.filename + '.map')
 652         os.rename(self.filename + '.pack', nameprefix + '.pack')
 653         os.rename(self.filename + '.idx', nameprefix + '.idx')
 654
 655         auto_midx(repo('objects/pack'))
 656         return nameprefix
 657
 658     def close(self):
 659         """Close the pack file and move it to its definitive path."""
 660         return self._end()
 661
 662
 663 def _git_date(date):
 664     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 665
 666
 667 def _gitenv():
 668     os.environ['GIT_DIR'] = os.path.abspath(repo())
 669
 670
 671 def list_refs(refname = None):
 672     """Generate a list of tuples in the form (refname,hash).
 673     If a ref name is specified, list only this particular ref.
 674     """
 675     argv = ['git', 'show-ref', '--']
 676     if refname:
 677         argv += [refname]
 678     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 679     out = p.stdout.read().strip()
 680     rv = p.wait()  # not fatal
 681     if rv:
 682         assert(not out)
 683     if out:
 684         for d in out.split('\n'):
 685             (sha, name) = d.split(' ', 1)
 686             yield (name, sha.decode('hex'))
 687
 688
 689 def read_ref(refname):
 690     """Get the commit id of the most recent commit made on a given ref."""
 691     l = list(list_refs(refname))
 692     if l:
 693         assert(len(l) == 1)
 694         return l[0][1]
 695     else:
 696         return None
 697
 698
 699 def rev_list(ref, count=None):
 700     """Generate a list of reachable commits in reverse chronological order.
 701
 702     This generator walks through commits, from child to parent, that are
 703     reachable via the specified ref and yields a series of tuples of the form
 704     (date,hash).
 705
 706     If count is a non-zero integer, limit the number of commits to "count"
 707     objects.
 708     """
 709     assert(not ref.startswith('-'))
 710     opts = []
 711     if count:
 712         opts += ['-n', str(atoi(count))]
 713     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 714     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 715     commit = None
 716     for row in p.stdout:
 717         s = row.strip()
 718         if s.startswith('commit '):
 719             commit = s[7:].decode('hex')
 720         else:
 721             date = int(s)
 722             yield (date, commit)
 723     rv = p.wait()  # not fatal
 724     if rv:
 725         raise GitError, 'git rev-list returned error %d' % rv
 726
 727
 728 def rev_get_date(ref):
 729     """Get the date of the latest commit on the specified ref."""
 730     for (date, commit) in rev_list(ref, count=1):
 731         return date
 732     raise GitError, 'no such commit %r' % ref
 733
 734
 735 def rev_parse(committish):
 736     """Resolve the full hash for 'committish', if it exists.
 737
 738     Should be roughly equivalent to 'git rev-parse'.
 739
 740     Returns the hex value of the hash if it is found, None if 'committish' does
 741     not correspond to anything.
 742     """
 743     head = read_ref(committish)
 744     if head:
 745         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 746         return head
 747
 748     pL = PackIdxList(repo('objects/pack'))
 749
 750     if len(committish) == 40:
 751         try:
 752             hash = committish.decode('hex')
 753         except TypeError:
 754             return None
 755
 756         if pL.exists(hash):
 757             return hash
 758
 759     return None
 760
 761
 762 def update_ref(refname, newval, oldval):
 763     """Change the commit pointed to by a branch."""
 764     if not oldval:
 765         oldval = ''
 766     assert(refname.startswith('refs/heads/'))
 767     p = subprocess.Popen(['git', 'update-ref', refname,
 768                           newval.encode('hex'), oldval.encode('hex')],
 769                          preexec_fn = _gitenv)
 770     _git_wait('git update-ref', p)
 771
 772
 773 def guess_repo(path=None):
 774     """Set the path value in the global variable "repodir".
 775     This makes bup look for an existing bup repository, but not fail if a
 776     repository doesn't exist. Usually, if you are interacting with a bup
 777     repository, you would not be calling this function but using
 778     check_repo_or_die().
 779     """
 780     global repodir
 781     if path:
 782         repodir = path
 783     if not repodir:
 784         repodir = os.environ.get('BUP_DIR')
 785         if not repodir:
 786             repodir = os.path.expanduser('~/.bup')
 787
 788
 789 def init_repo(path=None):
 790     """Create the Git bare repository for bup in a given path."""
 791     guess_repo(path)
 792     d = repo()
 793     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 794         raise GitError('"%d" exists but is not a directory\n' % d)
 795     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 796                          preexec_fn = _gitenv)
 797     _git_wait('git init', p)
 798     # Force the index version configuration in order to ensure bup works
 799     # regardless of the version of the installed Git binary.
 800     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 801                          stdout=sys.stderr, preexec_fn = _gitenv)
 802     _git_wait('git config', p)
 803
 804
 805 def check_repo_or_die(path=None):
 806     """Make sure a bup repository exists, and abort if not.
 807     If the path to a particular repository was not specified, this function
 808     initializes the default repository automatically.
 809     """
 810     guess_repo(path)
 811     if not os.path.isdir(repo('objects/pack/.')):
 812         if repodir == home_repodir:
 813             init_repo()
 814         else:
 815             log('error: %r is not a bup/git repository\n' % repo())
 816             sys.exit(15)
 817
 818
 819 def treeparse(buf):
 820     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 821     ofs = 0
 822     while ofs < len(buf):
 823         z = buf[ofs:].find('\0')
 824         assert(z > 0)
 825         spl = buf[ofs:ofs+z].split(' ', 1)
 826         assert(len(spl) == 2)
 827         sha = buf[ofs+z+1:ofs+z+1+20]
 828         ofs += z+1+20
 829         yield (spl[0], spl[1], sha)
 830
 831
 832 _ver = None
 833 def ver():
 834     """Get Git's version and ensure a usable version is installed.
 835
 836     The returned version is formatted as an ordered tuple with each position
 837     representing a digit in the version tag. For example, the following tuple
 838     would represent version 1.6.6.9:
 839
 840         ('1', '6', '6', '9')
 841     """
 842     global _ver
 843     if not _ver:
 844         p = subprocess.Popen(['git', '--version'],
 845                              stdout=subprocess.PIPE)
 846         gvs = p.stdout.read()
 847         _git_wait('git --version', p)
 848         m = re.match(r'git version (\S+.\S+)', gvs)
 849         if not m:
 850             raise GitError('git --version weird output: %r' % gvs)
 851         _ver = tuple(m.group(1).split('.'))
 852     needed = ('1','5', '3', '1')
 853     if _ver < needed:
 854         raise GitError('git version %s or higher is required; you have %s'
 855                        % ('.'.join(needed), '.'.join(_ver)))
 856     return _ver
 857
 858
 859 def _git_wait(cmd, p):
 860     rv = p.wait()
 861     if rv != 0:
 862         raise GitError('%s returned %d' % (cmd, rv))
 863
 864
 865 def _git_capture(argv):
 866     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 867     r = p.stdout.read()
 868     _git_wait(repr(argv), p)
 869     return r
 870
 871
 872 class _AbortableIter:
 873     def __init__(self, it, onabort = None):
 874         self.it = it
 875         self.onabort = onabort
 876         self.done = None
 877
 878     def __iter__(self):
 879         return self
 880
 881     def next(self):
 882         try:
 883             return self.it.next()
 884         except StopIteration, e:
 885             self.done = True
 886             raise
 887         except:
 888             self.abort()
 889             raise
 890
 891     def abort(self):
 892         """Abort iteration and call the abortion callback, if needed."""
 893         if not self.done:
 894             self.done = True
 895             if self.onabort:
 896                 self.onabort()
 897
 898     def __del__(self):
 899         self.abort()
 900
 901
 902 _ver_warned = 0
 903 class CatPipe:
 904     """Link to 'git cat-file' that is used to retrieve blob data."""
 905     def __init__(self):
 906         global _ver_warned
 907         wanted = ('1','5','6')
 908         if ver() < wanted:
 909             if not _ver_warned:
 910                 log('warning: git version < %s; bup will be slow.\n'
 911                     % '.'.join(wanted))
 912                 _ver_warned = 1
 913             self.get = self._slow_get
 914         else:
 915             self.p = self.inprogress = None
 916             self.get = self._fast_get
 917
 918     def _abort(self):
 919         if self.p:
 920             self.p.stdout.close()
 921             self.p.stdin.close()
 922         self.p = None
 923         self.inprogress = None
 924
 925     def _restart(self):
 926         self._abort()
 927         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 928                                   stdin=subprocess.PIPE,
 929                                   stdout=subprocess.PIPE,
 930                                   close_fds = True,
 931                                   preexec_fn = _gitenv)
 932
 933     def _fast_get(self, id):
 934         if not self.p or self.p.poll() != None:
 935             self._restart()
 936         assert(self.p)
 937         assert(self.p.poll() == None)
 938         if self.inprogress:
 939             log('_fast_get: opening %r while %r is open'
 940                 % (id, self.inprogress))
 941         assert(not self.inprogress)
 942         assert(id.find('\n') < 0)
 943         assert(id.find('\r') < 0)
 944         assert(not id.startswith('-'))
 945         self.inprogress = id
 946         self.p.stdin.write('%s\n' % id)
 947         hdr = self.p.stdout.readline()
 948         if hdr.endswith(' missing\n'):
 949             self.inprogress = None
 950             raise KeyError('blob %r is missing' % id)
 951         spl = hdr.split(' ')
 952         if len(spl) != 3 or len(spl[0]) != 40:
 953             raise GitError('expected blob, got %r' % spl)
 954         (hex, type, size) = spl
 955
 956         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 957                            onabort = self._abort)
 958         try:
 959             yield type
 960             for blob in it:
 961                 yield blob
 962             assert(self.p.stdout.readline() == '\n')
 963             self.inprogress = None
 964         except Exception, e:
 965             it.abort()
 966             raise
 967
 968     def _slow_get(self, id):
 969         assert(id.find('\n') < 0)
 970         assert(id.find('\r') < 0)
 971         assert(id[0] != '-')
 972         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 973         yield type
 974
 975         p = subprocess.Popen(['git', 'cat-file', type, id],
 976                              stdout=subprocess.PIPE,
 977                              preexec_fn = _gitenv)
 978         for blob in chunkyreader(p.stdout):
 979             yield blob
 980         _git_wait('git cat-file', p)
 981
 982     def _join(self, it):
 983         type = it.next()
 984         if type == 'blob':
 985             for blob in it:
 986                 yield blob
 987         elif type == 'tree':
 988             treefile = ''.join(it)
 989             for (mode, name, sha) in treeparse(treefile):
 990                 for blob in self.join(sha.encode('hex')):
 991                     yield blob
 992         elif type == 'commit':
 993             treeline = ''.join(it).split('\n')[0]
 994             assert(treeline.startswith('tree '))
 995             for blob in self.join(treeline[5:]):
 996                 yield blob
 997         else:
 998             raise GitError('invalid object type %r: expected blob/tree/commit'
 999                            % type)
1000
1001     def join(self, id):
1002         """Generate a list of the content of all blobs that can be reached
1003         from an object.  The hash given in 'id' must point to a blob, a tree
1004         or a commit. The content of all blobs that can be seen from trees or
1005         commits will be added to the list.
1006         """
1007         try:
1008             for d in self._join(self.get(id)):
1009                 yield d
1010         except StopIteration:
1011             log('booger!\n')