lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 MIDX_VERSION = 2
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 home_repodir = os.path.expanduser('~/.bup')
  15 repodir = None
  16
  17 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  19
  20 _total_searches = 0
  21 _total_steps = 0
  22
  23
  24 class GitError(Exception):
  25     pass
  26
  27
  28 def repo(sub = ''):
  29     """Get the path to the git repository or one of its subdirectories."""
  30     global repodir
  31     if not repodir:
  32         raise GitError('You should call check_repo_or_die()')
  33
  34     # If there's a .git subdirectory, then the actual repo is in there.
  35     gd = os.path.join(repodir, '.git')
  36     if os.path.exists(gd):
  37         repodir = gd
  38
  39     return os.path.join(repodir, sub)
  40
  41
  42 def auto_midx(objdir):
  43     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  44     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  45     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  46     if rv:
  47         add_error('%r: returned %d' % (args, rv))
  48
  49
  50 def mangle_name(name, mode, gitmode):
  51     """Mangle a file name to present an abstract name for segmented files.
  52     Mangled file names will have the ".bup" extension added to them. If a
  53     file's name already ends with ".bup", a ".bupl" extension is added to
  54     disambiguate normal files from semgmented ones.
  55     """
  56     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  57         return name + '.bup'
  58     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  59         return name + '.bupl'
  60     else:
  61         return name
  62
  63
  64 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  65 def demangle_name(name):
  66     """Remove name mangling from a file name, if necessary.
  67
  68     The return value is a tuple (demangled_filename,mode), where mode is one of
  69     the following:
  70
  71     * BUP_NORMAL  : files that should be read as-is from the repository
  72     * BUP_CHUNKED : files that were chunked and need to be assembled
  73
  74     For more information on the name mangling algorythm, see mangle_name()
  75     """
  76     if name.endswith('.bupl'):
  77         return (name[:-5], BUP_NORMAL)
  78     elif name.endswith('.bup'):
  79         return (name[:-4], BUP_CHUNKED)
  80     else:
  81         return (name, BUP_NORMAL)
  82
  83
  84 def _encode_packobj(type, content):
  85     szout = ''
  86     sz = len(content)
  87     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  88     sz >>= 4
  89     while 1:
  90         if sz: szbits |= 0x80
  91         szout += chr(szbits)
  92         if not sz:
  93             break
  94         szbits = sz & 0x7f
  95         sz >>= 7
  96     z = zlib.compressobj(1)
  97     yield szout
  98     yield z.compress(content)
  99     yield z.flush()
 100
 101
 102 def _encode_looseobj(type, content):
 103     z = zlib.compressobj(1)
 104     yield z.compress('%s %d\0' % (type, len(content)))
 105     yield z.compress(content)
 106     yield z.flush()
 107
 108
 109 def _decode_looseobj(buf):
 110     assert(buf);
 111     s = zlib.decompress(buf)
 112     i = s.find('\0')
 113     assert(i > 0)
 114     l = s[:i].split(' ')
 115     type = l[0]
 116     sz = int(l[1])
 117     content = s[i+1:]
 118     assert(type in _typemap)
 119     assert(sz == len(content))
 120     return (type, content)
 121
 122
 123 def _decode_packobj(buf):
 124     assert(buf)
 125     c = ord(buf[0])
 126     type = _typermap[(c & 0x70) >> 4]
 127     sz = c & 0x0f
 128     shift = 4
 129     i = 0
 130     while c & 0x80:
 131         i += 1
 132         c = ord(buf[i])
 133         sz |= (c & 0x7f) << shift
 134         shift += 7
 135         if not (c & 0x80):
 136             break
 137     return (type, zlib.decompress(buf[i+1:]))
 138
 139
 140 class PackIdx:
 141     def __init__(self):
 142         assert(0)
 143
 144     def find_offset(self, hash):
 145         """Get the offset of an object inside the index file."""
 146         idx = self._idx_from_hash(hash)
 147         if idx != None:
 148             return self._ofs_from_idx(idx)
 149         return None
 150
 151     def exists(self, hash):
 152         """Return nonempty if the object exists in this index."""
 153         return hash and (self._idx_from_hash(hash) != None) and True or None
 154
 155     def __len__(self):
 156         return int(self.fanout[255])
 157
 158     def _idx_from_hash(self, hash):
 159         global _total_searches, _total_steps
 160         _total_searches += 1
 161         assert(len(hash) == 20)
 162         b1 = ord(hash[0])
 163         start = self.fanout[b1-1] # range -1..254
 164         end = self.fanout[b1] # range 0..255
 165         want = str(hash)
 166         _total_steps += 1  # lookup table is a step
 167         while start < end:
 168             _total_steps += 1
 169             mid = start + (end-start)/2
 170             v = self._idx_to_hash(mid)
 171             if v < want:
 172                 start = mid+1
 173             elif v > want:
 174                 end = mid
 175             else: # got it!
 176                 return mid
 177         return None
 178
 179
 180 class PackIdxV1(PackIdx):
 181     """Object representation of a Git pack index (version 1) file."""
 182     def __init__(self, filename, f):
 183         self.name = filename
 184         self.idxnames = [self.name]
 185         self.map = mmap_read(f)
 186         self.fanout = list(struct.unpack('!256I',
 187                                          str(buffer(self.map, 0, 256*4))))
 188         self.fanout.append(0)  # entry "-1"
 189         nsha = self.fanout[255]
 190         self.shatable = buffer(self.map, 256*4, nsha*24)
 191
 192     def _ofs_from_idx(self, idx):
 193         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 194
 195     def _idx_to_hash(self, idx):
 196         return str(self.shatable[idx*24+4 : idx*24+24])
 197
 198     def __iter__(self):
 199         for i in xrange(self.fanout[255]):
 200             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 201
 202
 203 class PackIdxV2(PackIdx):
 204     """Object representation of a Git pack index (version 2) file."""
 205     def __init__(self, filename, f):
 206         self.name = filename
 207         self.idxnames = [self.name]
 208         self.map = mmap_read(f)
 209         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 210         self.fanout = list(struct.unpack('!256I',
 211                                          str(buffer(self.map, 8, 256*4))))
 212         self.fanout.append(0)  # entry "-1"
 213         nsha = self.fanout[255]
 214         self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
 215         self.ofstable = buffer(self.map,
 216                                8 + 256*4 + nsha*20 + nsha*4,
 217                                nsha*4)
 218         self.ofs64table = buffer(self.map,
 219                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 220
 221     def _ofs_from_idx(self, idx):
 222         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 223         if ofs & 0x80000000:
 224             idx64 = ofs & 0x7fffffff
 225             ofs = struct.unpack('!I',
 226                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 227         return ofs
 228
 229     def _idx_to_hash(self, idx):
 230         return str(self.shatable[idx*20:(idx+1)*20])
 231
 232     def __iter__(self):
 233         for i in xrange(self.fanout[255]):
 234             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 235
 236
 237 extract_bits = _helpers.extract_bits
 238
 239
 240 class PackMidx:
 241     """Wrapper which contains data from multiple index files.
 242     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 243     and make it possible for bup to expand Git's indexing capabilities to vast
 244     amounts of files.
 245     """
 246     def __init__(self, filename):
 247         self.name = filename
 248         self.force_keep = False
 249         assert(filename.endswith('.midx'))
 250         self.map = mmap_read(open(filename))
 251         if str(self.map[0:4]) != 'MIDX':
 252             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 253             self.force_keep = True
 254             return self._init_failed()
 255         ver = struct.unpack('!I', self.map[4:8])[0]
 256         if ver < MIDX_VERSION:
 257             log('Warning: ignoring old-style (v%d) midx %r\n'
 258                 % (ver, filename))
 259             self.force_keep = False  # old stuff is boring
 260             return self._init_failed()
 261         if ver > MIDX_VERSION:
 262             log('Warning: ignoring too-new (v%d) midx %r\n'
 263                 % (ver, filename))
 264             self.force_keep = True  # new stuff is exciting
 265             return self._init_failed()
 266
 267         self.bits = _helpers.firstword(self.map[8:12])
 268         self.entries = 2**self.bits
 269         self.fanout = buffer(self.map, 12, self.entries*4)
 270         shaofs = 12 + self.entries*4
 271         nsha = self._fanget(self.entries-1)
 272         self.shalist = buffer(self.map, shaofs, nsha*20)
 273         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 274
 275     def _init_failed(self):
 276         self.bits = 0
 277         self.entries = 1
 278         self.fanout = buffer('\0\0\0\0')
 279         self.shalist = buffer('\0'*20)
 280         self.idxnames = []
 281
 282     def _fanget(self, i):
 283         start = i*4
 284         s = self.fanout[start:start+4]
 285         return _helpers.firstword(s)
 286
 287     def _get(self, i):
 288         return str(self.shalist[i*20:(i+1)*20])
 289
 290     def exists(self, hash):
 291         """Return nonempty if the object exists in the index files."""
 292         global _total_searches, _total_steps
 293         _total_searches += 1
 294         want = str(hash)
 295         el = extract_bits(want, self.bits)
 296         if el:
 297             start = self._fanget(el-1)
 298             startv = el << (32-self.bits)
 299         else:
 300             start = 0
 301             startv = 0
 302         end = self._fanget(el)
 303         endv = (el+1) << (32-self.bits)
 304         _total_steps += 1   # lookup table is a step
 305         hashv = _helpers.firstword(hash)
 306         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 307         while start < end:
 308             _total_steps += 1
 309             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 310             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 311             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 312             v = self._get(mid)
 313             #print '    %08x' % self._num(v)
 314             if v < want:
 315                 start = mid+1
 316                 startv = _helpers.firstword(v)
 317             elif v > want:
 318                 end = mid
 319                 endv = _helpers.firstword(v)
 320             else: # got it!
 321                 return True
 322         return None
 323
 324     def __iter__(self):
 325         for i in xrange(self._fanget(self.entries-1)):
 326             yield buffer(self.shalist, i*20, 20)
 327
 328     def __len__(self):
 329         return int(self._fanget(self.entries-1))
 330
 331
 332 _mpi_count = 0
 333 class PackIdxList:
 334     def __init__(self, dir):
 335         global _mpi_count
 336         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 337         _mpi_count += 1
 338         self.dir = dir
 339         self.also = {}
 340         self.packs = []
 341         self.refresh()
 342
 343     def __del__(self):
 344         global _mpi_count
 345         _mpi_count -= 1
 346         assert(_mpi_count == 0)
 347
 348     def __iter__(self):
 349         return iter(idxmerge(self.packs))
 350
 351     def __len__(self):
 352         return sum(len(pack) for pack in self.packs)
 353
 354     def exists(self, hash):
 355         """Return nonempty if the object exists in the index files."""
 356         global _total_searches
 357         _total_searches += 1
 358         if hash in self.also:
 359             return True
 360         for i in range(len(self.packs)):
 361             p = self.packs[i]
 362             _total_searches -= 1  # will be incremented by sub-pack
 363             if p.exists(hash):
 364                 # reorder so most recently used packs are searched first
 365                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 366                 return p.name
 367         return None
 368
 369     def refresh(self, skip_midx = False):
 370         """Refresh the index list.
 371         This method verifies if .midx files were superseded (e.g. all of its
 372         contents are in another, bigger .midx file) and removes the superseded
 373         files.
 374
 375         If skip_midx is True, all work on .midx files will be skipped and .midx
 376         files will be removed from the list.
 377
 378         The module-global variable 'ignore_midx' can force this function to
 379         always act as if skip_midx was True.
 380         """
 381         skip_midx = skip_midx or ignore_midx
 382         d = dict((p.name, p) for p in self.packs
 383                  if not skip_midx or not isinstance(p, PackMidx))
 384         if os.path.exists(self.dir):
 385             if not skip_midx:
 386                 midxl = []
 387                 for ix in self.packs:
 388                     if isinstance(ix, PackMidx):
 389                         for name in ix.idxnames:
 390                             d[os.path.join(self.dir, name)] = ix
 391                 for f in os.listdir(self.dir):
 392                     full = os.path.join(self.dir, f)
 393                     if f.endswith('.midx') and not d.get(full):
 394                         mx = PackMidx(full)
 395                         (mxd, mxf) = os.path.split(mx.name)
 396                         broken = 0
 397                         for n in mx.idxnames:
 398                             if not os.path.exists(os.path.join(mxd, n)):
 399                                 log(('warning: index %s missing\n' +
 400                                     '  used by %s\n') % (n, mxf))
 401                                 broken += 1
 402                         if broken:
 403                             del mx
 404                             unlink(full)
 405                         else:
 406                             midxl.append(mx)
 407                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 408                 for ix in midxl:
 409                     any = 0
 410                     for sub in ix.idxnames:
 411                         found = d.get(os.path.join(self.dir, sub))
 412                         if not found or isinstance(found, PackIdx):
 413                             # doesn't exist, or exists but not in a midx
 414                             d[ix.name] = ix
 415                             for name in ix.idxnames:
 416                                 d[os.path.join(self.dir, name)] = ix
 417                             any += 1
 418                             break
 419                     if not any and not ix.force_keep:
 420                         debug1('midx: removing redundant: %s\n'
 421                                % os.path.basename(ix.name))
 422                         unlink(ix.name)
 423             for f in os.listdir(self.dir):
 424                 full = os.path.join(self.dir, f)
 425                 if f.endswith('.idx') and not d.get(full):
 426                     try:
 427                         ix = open_idx(full)
 428                     except GitError, e:
 429                         add_error(e)
 430                         continue
 431                     d[full] = ix
 432             self.packs = list(set(d.values()))
 433         debug1('PackIdxList: using %d index%s.\n'
 434             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 435
 436     def packname_containing(self, hash):
 437         # figure out which pack contains a given hash.
 438         # FIXME: if the midx file format would just *store* this information,
 439         # we could calculate it a lot more efficiently.  But it's not needed
 440         # often, so let's do it like this.
 441         for f in os.listdir(self.dir):
 442             if f.endswith('.idx'):
 443                 full = os.path.join(self.dir, f)
 444                 try:
 445                     ix = open_idx(full)
 446                 except GitError, e:
 447                     add_error(e)
 448                     continue
 449                 if ix.exists(hash):
 450                     return full
 451
 452     def add(self, hash):
 453         """Insert an additional object in the list."""
 454         self.also[hash] = 1
 455
 456     def zap_also(self):
 457         """Remove all additional objects from the list."""
 458         self.also = {}
 459
 460
 461 def calc_hash(type, content):
 462     """Calculate some content's hash in the Git fashion."""
 463     header = '%s %d\0' % (type, len(content))
 464     sum = Sha1(header)
 465     sum.update(content)
 466     return sum.digest()
 467
 468
 469 def _shalist_sort_key(ent):
 470     (mode, name, id) = ent
 471     if stat.S_ISDIR(int(mode, 8)):
 472         return name + '/'
 473     else:
 474         return name
 475
 476
 477 def open_idx(filename):
 478     if filename.endswith('.idx'):
 479         f = open(filename, 'rb')
 480         header = f.read(8)
 481         if header[0:4] == '\377tOc':
 482             version = struct.unpack('!I', header[4:8])[0]
 483             if version == 2:
 484                 return PackIdxV2(filename, f)
 485             else:
 486                 raise GitError('%s: expected idx file version 2, got %d'
 487                                % (filename, version))
 488         elif len(header) == 8 and header[0:4] < '\377tOc':
 489             return PackIdxV1(filename, f)
 490         else:
 491             raise GitError('%s: unrecognized idx file header' % filename)
 492     elif filename.endswith('.midx'):
 493         return PackMidx(filename)
 494     else:
 495         raise GitError('idx filenames must end with .idx or .midx')
 496
 497
 498 def idxmerge(idxlist, final_progress=True):
 499     """Generate a list of all the objects reachable in a PackIdxList."""
 500     total = sum(len(i) for i in idxlist)
 501     iters = (iter(i) for i in idxlist)
 502     heap = [(next(it), it) for it in iters]
 503     heapq.heapify(heap)
 504     count = 0
 505     last = None
 506     while heap:
 507         if (count % 10024) == 0:
 508             progress('Reading indexes: %.2f%% (%d/%d)\r'
 509                      % (count*100.0/total, count, total))
 510         (e, it) = heap[0]
 511         if e != last:
 512             yield e
 513             last = e
 514         count += 1
 515         e = next(it)
 516         if e:
 517             heapq.heapreplace(heap, (e, it))
 518         else:
 519             heapq.heappop(heap)
 520     if final_progress:
 521         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 522
 523
 524 class PackWriter:
 525     """Writes Git objects insid a pack file."""
 526     def __init__(self, objcache_maker=None):
 527         self.count = 0
 528         self.outbytes = 0
 529         self.filename = None
 530         self.file = None
 531         self.objcache_maker = objcache_maker
 532         self.objcache = None
 533
 534     def __del__(self):
 535         self.close()
 536
 537     def _make_objcache(self):
 538         if self.objcache == None:
 539             if self.objcache_maker:
 540                 self.objcache = self.objcache_maker()
 541             else:
 542                 self.objcache = PackIdxList(repo('objects/pack'))
 543
 544     def _open(self):
 545         if not self.file:
 546             self._make_objcache()
 547             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 548             self.file = os.fdopen(fd, 'w+b')
 549             assert(name.endswith('.pack'))
 550             self.filename = name[:-5]
 551             self.file.write('PACK\0\0\0\2\0\0\0\0')
 552
 553     def _raw_write(self, datalist):
 554         self._open()
 555         f = self.file
 556         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 557         # the file never has a *partial* blob.  So let's make sure it's
 558         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 559         # to our hashsplit algorithm.)  f.write() does its own buffering,
 560         # but that's okay because we'll flush it in _end().
 561         oneblob = ''.join(datalist)
 562         f.write(oneblob)
 563         self.outbytes += len(oneblob)
 564         self.count += 1
 565
 566     def _write(self, bin, type, content):
 567         if verbose:
 568             log('>')
 569         self._raw_write(_encode_packobj(type, content))
 570         return bin
 571
 572     def breakpoint(self):
 573         """Clear byte and object counts and return the last processed id."""
 574         id = self._end()
 575         self.outbytes = self.count = 0
 576         return id
 577
 578     def write(self, type, content):
 579         """Write an object in this pack file."""
 580         return self._write(calc_hash(type, content), type, content)
 581
 582     def exists(self, id):
 583         """Return non-empty if an object is found in the object cache."""
 584         if not self.objcache:
 585             self._make_objcache()
 586         return self.objcache.exists(id)
 587
 588     def maybe_write(self, type, content):
 589         """Write an object to the pack file if not present and return its id."""
 590         bin = calc_hash(type, content)
 591         if not self.exists(bin):
 592             self._write(bin, type, content)
 593             self.objcache.add(bin)
 594         return bin
 595
 596     def new_blob(self, blob):
 597         """Create a blob object in the pack with the supplied content."""
 598         return self.maybe_write('blob', blob)
 599
 600     def new_tree(self, shalist):
 601         """Create a tree object in the pack."""
 602         shalist = sorted(shalist, key = _shalist_sort_key)
 603         l = []
 604         for (mode,name,bin) in shalist:
 605             assert(mode)
 606             assert(mode != '0')
 607             assert(mode[0] != '0')
 608             assert(name)
 609             assert(len(bin) == 20)
 610             l.append('%s %s\0%s' % (mode,name,bin))
 611         return self.maybe_write('tree', ''.join(l))
 612
 613     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 614         l = []
 615         if tree: l.append('tree %s' % tree.encode('hex'))
 616         if parent: l.append('parent %s' % parent.encode('hex'))
 617         if author: l.append('author %s %s' % (author, _git_date(adate)))
 618         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 619         l.append('')
 620         l.append(msg)
 621         return self.maybe_write('commit', '\n'.join(l))
 622
 623     def new_commit(self, parent, tree, date, msg):
 624         """Create a commit object in the pack."""
 625         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 626         commit = self._new_commit(tree, parent,
 627                                   userline, date, userline, date,
 628                                   msg)
 629         return commit
 630
 631     def abort(self):
 632         """Remove the pack file from disk."""
 633         f = self.file
 634         if f:
 635             self.file = None
 636             f.close()
 637             os.unlink(self.filename + '.pack')
 638
 639     def _end(self):
 640         f = self.file
 641         if not f: return None
 642         self.file = None
 643         self.objcache = None
 644
 645         # update object count
 646         f.seek(8)
 647         cp = struct.pack('!i', self.count)
 648         assert(len(cp) == 4)
 649         f.write(cp)
 650
 651         # calculate the pack sha1sum
 652         f.seek(0)
 653         sum = Sha1()
 654         while 1:
 655             b = f.read(65536)
 656             sum.update(b)
 657             if not b: break
 658         f.write(sum.digest())
 659
 660         f.close()
 661
 662         p = subprocess.Popen(['git', 'index-pack', '-v',
 663                               '--index-version=2',
 664                               self.filename + '.pack'],
 665                              preexec_fn = _gitenv,
 666                              stdout = subprocess.PIPE)
 667         out = p.stdout.read().strip()
 668         _git_wait('git index-pack', p)
 669         if not out:
 670             raise GitError('git index-pack produced no output')
 671         nameprefix = repo('objects/pack/%s' % out)
 672         if os.path.exists(self.filename + '.map'):
 673             os.unlink(self.filename + '.map')
 674         os.rename(self.filename + '.pack', nameprefix + '.pack')
 675         os.rename(self.filename + '.idx', nameprefix + '.idx')
 676
 677         auto_midx(repo('objects/pack'))
 678         return nameprefix
 679
 680     def close(self):
 681         """Close the pack file and move it to its definitive path."""
 682         return self._end()
 683
 684
 685 def _git_date(date):
 686     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 687
 688
 689 def _gitenv():
 690     os.environ['GIT_DIR'] = os.path.abspath(repo())
 691
 692
 693 def list_refs(refname = None):
 694     """Generate a list of tuples in the form (refname,hash).
 695     If a ref name is specified, list only this particular ref.
 696     """
 697     argv = ['git', 'show-ref', '--']
 698     if refname:
 699         argv += [refname]
 700     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 701     out = p.stdout.read().strip()
 702     rv = p.wait()  # not fatal
 703     if rv:
 704         assert(not out)
 705     if out:
 706         for d in out.split('\n'):
 707             (sha, name) = d.split(' ', 1)
 708             yield (name, sha.decode('hex'))
 709
 710
 711 def read_ref(refname):
 712     """Get the commit id of the most recent commit made on a given ref."""
 713     l = list(list_refs(refname))
 714     if l:
 715         assert(len(l) == 1)
 716         return l[0][1]
 717     else:
 718         return None
 719
 720
 721 def rev_list(ref, count=None):
 722     """Generate a list of reachable commits in reverse chronological order.
 723
 724     This generator walks through commits, from child to parent, that are
 725     reachable via the specified ref and yields a series of tuples of the form
 726     (date,hash).
 727
 728     If count is a non-zero integer, limit the number of commits to "count"
 729     objects.
 730     """
 731     assert(not ref.startswith('-'))
 732     opts = []
 733     if count:
 734         opts += ['-n', str(atoi(count))]
 735     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 736     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 737     commit = None
 738     for row in p.stdout:
 739         s = row.strip()
 740         if s.startswith('commit '):
 741             commit = s[7:].decode('hex')
 742         else:
 743             date = int(s)
 744             yield (date, commit)
 745     rv = p.wait()  # not fatal
 746     if rv:
 747         raise GitError, 'git rev-list returned error %d' % rv
 748
 749
 750 def rev_get_date(ref):
 751     """Get the date of the latest commit on the specified ref."""
 752     for (date, commit) in rev_list(ref, count=1):
 753         return date
 754     raise GitError, 'no such commit %r' % ref
 755
 756
 757 def rev_parse(committish):
 758     """Resolve the full hash for 'committish', if it exists.
 759
 760     Should be roughly equivalent to 'git rev-parse'.
 761
 762     Returns the hex value of the hash if it is found, None if 'committish' does
 763     not correspond to anything.
 764     """
 765     head = read_ref(committish)
 766     if head:
 767         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 768         return head
 769
 770     pL = PackIdxList(repo('objects/pack'))
 771
 772     if len(committish) == 40:
 773         try:
 774             hash = committish.decode('hex')
 775         except TypeError:
 776             return None
 777
 778         if pL.exists(hash):
 779             return hash
 780
 781     return None
 782
 783
 784 def update_ref(refname, newval, oldval):
 785     """Change the commit pointed to by a branch."""
 786     if not oldval:
 787         oldval = ''
 788     assert(refname.startswith('refs/heads/'))
 789     p = subprocess.Popen(['git', 'update-ref', refname,
 790                           newval.encode('hex'), oldval.encode('hex')],
 791                          preexec_fn = _gitenv)
 792     _git_wait('git update-ref', p)
 793
 794
 795 def guess_repo(path=None):
 796     """Set the path value in the global variable "repodir".
 797     This makes bup look for an existing bup repository, but not fail if a
 798     repository doesn't exist. Usually, if you are interacting with a bup
 799     repository, you would not be calling this function but using
 800     check_repo_or_die().
 801     """
 802     global repodir
 803     if path:
 804         repodir = path
 805     if not repodir:
 806         repodir = os.environ.get('BUP_DIR')
 807         if not repodir:
 808             repodir = os.path.expanduser('~/.bup')
 809
 810
 811 def init_repo(path=None):
 812     """Create the Git bare repository for bup in a given path."""
 813     guess_repo(path)
 814     d = repo()
 815     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 816         raise GitError('"%d" exists but is not a directory\n' % d)
 817     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 818                          preexec_fn = _gitenv)
 819     _git_wait('git init', p)
 820     # Force the index version configuration in order to ensure bup works
 821     # regardless of the version of the installed Git binary.
 822     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 823                          stdout=sys.stderr, preexec_fn = _gitenv)
 824     _git_wait('git config', p)
 825
 826
 827 def check_repo_or_die(path=None):
 828     """Make sure a bup repository exists, and abort if not.
 829     If the path to a particular repository was not specified, this function
 830     initializes the default repository automatically.
 831     """
 832     guess_repo(path)
 833     if not os.path.isdir(repo('objects/pack/.')):
 834         if repodir == home_repodir:
 835             init_repo()
 836         else:
 837             log('error: %r is not a bup/git repository\n' % repo())
 838             sys.exit(15)
 839
 840
 841 def treeparse(buf):
 842     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 843     ofs = 0
 844     while ofs < len(buf):
 845         z = buf[ofs:].find('\0')
 846         assert(z > 0)
 847         spl = buf[ofs:ofs+z].split(' ', 1)
 848         assert(len(spl) == 2)
 849         sha = buf[ofs+z+1:ofs+z+1+20]
 850         ofs += z+1+20
 851         yield (spl[0], spl[1], sha)
 852
 853
 854 _ver = None
 855 def ver():
 856     """Get Git's version and ensure a usable version is installed.
 857
 858     The returned version is formatted as an ordered tuple with each position
 859     representing a digit in the version tag. For example, the following tuple
 860     would represent version 1.6.6.9:
 861
 862         ('1', '6', '6', '9')
 863     """
 864     global _ver
 865     if not _ver:
 866         p = subprocess.Popen(['git', '--version'],
 867                              stdout=subprocess.PIPE)
 868         gvs = p.stdout.read()
 869         _git_wait('git --version', p)
 870         m = re.match(r'git version (\S+.\S+)', gvs)
 871         if not m:
 872             raise GitError('git --version weird output: %r' % gvs)
 873         _ver = tuple(m.group(1).split('.'))
 874     needed = ('1','5', '3', '1')
 875     if _ver < needed:
 876         raise GitError('git version %s or higher is required; you have %s'
 877                        % ('.'.join(needed), '.'.join(_ver)))
 878     return _ver
 879
 880
 881 def _git_wait(cmd, p):
 882     rv = p.wait()
 883     if rv != 0:
 884         raise GitError('%s returned %d' % (cmd, rv))
 885
 886
 887 def _git_capture(argv):
 888     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 889     r = p.stdout.read()
 890     _git_wait(repr(argv), p)
 891     return r
 892
 893
 894 class _AbortableIter:
 895     def __init__(self, it, onabort = None):
 896         self.it = it
 897         self.onabort = onabort
 898         self.done = None
 899
 900     def __iter__(self):
 901         return self
 902
 903     def next(self):
 904         try:
 905             return self.it.next()
 906         except StopIteration, e:
 907             self.done = True
 908             raise
 909         except:
 910             self.abort()
 911             raise
 912
 913     def abort(self):
 914         """Abort iteration and call the abortion callback, if needed."""
 915         if not self.done:
 916             self.done = True
 917             if self.onabort:
 918                 self.onabort()
 919
 920     def __del__(self):
 921         self.abort()
 922
 923
 924 _ver_warned = 0
 925 class CatPipe:
 926     """Link to 'git cat-file' that is used to retrieve blob data."""
 927     def __init__(self):
 928         global _ver_warned
 929         wanted = ('1','5','6')
 930         if ver() < wanted:
 931             if not _ver_warned:
 932                 log('warning: git version < %s; bup will be slow.\n'
 933                     % '.'.join(wanted))
 934                 _ver_warned = 1
 935             self.get = self._slow_get
 936         else:
 937             self.p = self.inprogress = None
 938             self.get = self._fast_get
 939
 940     def _abort(self):
 941         if self.p:
 942             self.p.stdout.close()
 943             self.p.stdin.close()
 944         self.p = None
 945         self.inprogress = None
 946
 947     def _restart(self):
 948         self._abort()
 949         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 950                                   stdin=subprocess.PIPE,
 951                                   stdout=subprocess.PIPE,
 952                                   close_fds = True,
 953                                   preexec_fn = _gitenv)
 954
 955     def _fast_get(self, id):
 956         if not self.p or self.p.poll() != None:
 957             self._restart()
 958         assert(self.p)
 959         assert(self.p.poll() == None)
 960         if self.inprogress:
 961             log('_fast_get: opening %r while %r is open'
 962                 % (id, self.inprogress))
 963         assert(not self.inprogress)
 964         assert(id.find('\n') < 0)
 965         assert(id.find('\r') < 0)
 966         assert(not id.startswith('-'))
 967         self.inprogress = id
 968         self.p.stdin.write('%s\n' % id)
 969         hdr = self.p.stdout.readline()
 970         if hdr.endswith(' missing\n'):
 971             self.inprogress = None
 972             raise KeyError('blob %r is missing' % id)
 973         spl = hdr.split(' ')
 974         if len(spl) != 3 or len(spl[0]) != 40:
 975             raise GitError('expected blob, got %r' % spl)
 976         (hex, type, size) = spl
 977
 978         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 979                            onabort = self._abort)
 980         try:
 981             yield type
 982             for blob in it:
 983                 yield blob
 984             assert(self.p.stdout.readline() == '\n')
 985             self.inprogress = None
 986         except Exception, e:
 987             it.abort()
 988             raise
 989
 990     def _slow_get(self, id):
 991         assert(id.find('\n') < 0)
 992         assert(id.find('\r') < 0)
 993         assert(id[0] != '-')
 994         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 995         yield type
 996
 997         p = subprocess.Popen(['git', 'cat-file', type, id],
 998                              stdout=subprocess.PIPE,
 999                              preexec_fn = _gitenv)
1000         for blob in chunkyreader(p.stdout):
1001             yield blob
1002         _git_wait('git cat-file', p)
1003
1004     def _join(self, it):
1005         type = it.next()
1006         if type == 'blob':
1007             for blob in it:
1008                 yield blob
1009         elif type == 'tree':
1010             treefile = ''.join(it)
1011             for (mode, name, sha) in treeparse(treefile):
1012                 for blob in self.join(sha.encode('hex')):
1013                     yield blob
1014         elif type == 'commit':
1015             treeline = ''.join(it).split('\n')[0]
1016             assert(treeline.startswith('tree '))
1017             for blob in self.join(treeline[5:]):
1018                 yield blob
1019         else:
1020             raise GitError('invalid object type %r: expected blob/tree/commit'
1021                            % type)
1022
1023     def join(self, id):
1024         """Generate a list of the content of all blobs that can be reached
1025         from an object.  The hash given in 'id' must point to a blob, a tree
1026         or a commit. The content of all blobs that can be seen from trees or
1027         commits will be added to the list.
1028         """
1029         try:
1030             for d in self._join(self.get(id)):
1031                 yield d
1032         except StopIteration:
1033             log('booger!\n')