lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 verbose = 0
  11 ignore_midx = 0
  12 home_repodir = os.path.expanduser('~/.bup')
  13 repodir = None
  14
  15 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  16 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  17
  18 _total_searches = 0
  19 _total_steps = 0
  20
  21
  22 class GitError(Exception):
  23     pass
  24
  25
  26 def repo(sub = ''):
  27     """Get the path to the git repository or one of its subdirectories."""
  28     global repodir
  29     if not repodir:
  30         raise GitError('You should call check_repo_or_die()')
  31
  32     # If there's a .git subdirectory, then the actual repo is in there.
  33     gd = os.path.join(repodir, '.git')
  34     if os.path.exists(gd):
  35         repodir = gd
  36
  37     return os.path.join(repodir, sub)
  38
  39
  40 def mangle_name(name, mode, gitmode):
  41     """Mangle a file name to present an abstract name for segmented files.
  42     Mangled file names will have the ".bup" extension added to them. If a
  43     file's name already ends with ".bup", a ".bupl" extension is added to
  44     disambiguate normal files from semgmented ones.
  45     """
  46     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  47         return name + '.bup'
  48     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  49         return name + '.bupl'
  50     else:
  51         return name
  52
  53
  54 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  55 def demangle_name(name):
  56     """Remove name mangling from a file name, if necessary.
  57
  58     The return value is a tuple (demangled_filename,mode), where mode is one of
  59     the following:
  60
  61     * BUP_NORMAL  : files that should be read as-is from the repository
  62     * BUP_CHUNKED : files that were chunked and need to be assembled
  63
  64     For more information on the name mangling algorythm, see mangle_name()
  65     """
  66     if name.endswith('.bupl'):
  67         return (name[:-5], BUP_NORMAL)
  68     elif name.endswith('.bup'):
  69         return (name[:-4], BUP_CHUNKED)
  70     else:
  71         return (name, BUP_NORMAL)
  72
  73
  74 def _encode_packobj(type, content):
  75     szout = ''
  76     sz = len(content)
  77     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  78     sz >>= 4
  79     while 1:
  80         if sz: szbits |= 0x80
  81         szout += chr(szbits)
  82         if not sz:
  83             break
  84         szbits = sz & 0x7f
  85         sz >>= 7
  86     z = zlib.compressobj(1)
  87     yield szout
  88     yield z.compress(content)
  89     yield z.flush()
  90
  91
  92 def _encode_looseobj(type, content):
  93     z = zlib.compressobj(1)
  94     yield z.compress('%s %d\0' % (type, len(content)))
  95     yield z.compress(content)
  96     yield z.flush()
  97
  98
  99 def _decode_looseobj(buf):
 100     assert(buf);
 101     s = zlib.decompress(buf)
 102     i = s.find('\0')
 103     assert(i > 0)
 104     l = s[:i].split(' ')
 105     type = l[0]
 106     sz = int(l[1])
 107     content = s[i+1:]
 108     assert(type in _typemap)
 109     assert(sz == len(content))
 110     return (type, content)
 111
 112
 113 def _decode_packobj(buf):
 114     assert(buf)
 115     c = ord(buf[0])
 116     type = _typermap[(c & 0x70) >> 4]
 117     sz = c & 0x0f
 118     shift = 4
 119     i = 0
 120     while c & 0x80:
 121         i += 1
 122         c = ord(buf[i])
 123         sz |= (c & 0x7f) << shift
 124         shift += 7
 125         if not (c & 0x80):
 126             break
 127     return (type, zlib.decompress(buf[i+1:]))
 128
 129
 130 class PackIdx:
 131     """Object representation of a Git pack index file."""
 132     def __init__(self, filename):
 133         self.name = filename
 134         self.map = mmap_read(open(filename))
 135         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 136         self.fanout = list(struct.unpack('!256I',
 137                                          str(buffer(self.map, 8, 256*4))))
 138         self.fanout.append(0)  # entry "-1"
 139         nsha = self.fanout[255]
 140         self.ofstable = buffer(self.map,
 141                                8 + 256*4 + nsha*20 + nsha*4,
 142                                nsha*4)
 143         self.ofs64table = buffer(self.map,
 144                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 145
 146     def _ofs_from_idx(self, idx):
 147         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 148         if ofs & 0x80000000:
 149             idx64 = ofs & 0x7fffffff
 150             ofs = struct.unpack('!I',
 151                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 152         return ofs
 153
 154     def _idx_from_hash(self, hash):
 155         global _total_searches, _total_steps
 156         _total_searches += 1
 157         assert(len(hash) == 20)
 158         b1 = ord(hash[0])
 159         start = self.fanout[b1-1] # range -1..254
 160         end = self.fanout[b1] # range 0..255
 161         buf = buffer(self.map, 8 + 256*4, end*20)
 162         want = str(hash)
 163         _total_steps += 1  # lookup table is a step
 164         while start < end:
 165             _total_steps += 1
 166             mid = start + (end-start)/2
 167             v = str(buf[mid*20:(mid+1)*20])
 168             if v < want:
 169                 start = mid+1
 170             elif v > want:
 171                 end = mid
 172             else: # got it!
 173                 return mid
 174         return None
 175
 176     def find_offset(self, hash):
 177         """Get the offset of an object inside the index file."""
 178         idx = self._idx_from_hash(hash)
 179         if idx != None:
 180             return self._ofs_from_idx(idx)
 181         return None
 182
 183     def exists(self, hash):
 184         """Return nonempty if the object exists in this index."""
 185         return hash and (self._idx_from_hash(hash) != None) and True or None
 186
 187     def __iter__(self):
 188         for i in xrange(self.fanout[255]):
 189             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 190
 191     def __len__(self):
 192         return int(self.fanout[255])
 193
 194
 195 extract_bits = _helpers.extract_bits
 196
 197
 198 class PackMidx:
 199     """Wrapper which contains data from multiple index files.
 200     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 201     and make it possible for bup to expand Git's indexing capabilities to vast
 202     amounts of files.
 203     """
 204     def __init__(self, filename):
 205         self.name = filename
 206         assert(filename.endswith('.midx'))
 207         self.map = mmap_read(open(filename))
 208         if str(self.map[0:8]) == 'MIDX\0\0\0\1':
 209             log('Warning: ignoring old-style midx %r\n' % filename)
 210             self.bits = 0
 211             self.entries = 1
 212             self.fanout = buffer('\0\0\0\0')
 213             self.shalist = buffer('\0'*20)
 214             self.idxnames = []
 215         else:
 216             assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
 217             self.bits = _helpers.firstword(self.map[8:12])
 218             self.entries = 2**self.bits
 219             self.fanout = buffer(self.map, 12, self.entries*4)
 220             shaofs = 12 + self.entries*4
 221             nsha = self._fanget(self.entries-1)
 222             self.shalist = buffer(self.map, shaofs, nsha*20)
 223             self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 224
 225     def _fanget(self, i):
 226         start = i*4
 227         s = self.fanout[start:start+4]
 228         return _helpers.firstword(s)
 229
 230     def _get(self, i):
 231         return str(self.shalist[i*20:(i+1)*20])
 232
 233     def exists(self, hash):
 234         """Return nonempty if the object exists in the index files."""
 235         global _total_searches, _total_steps
 236         _total_searches += 1
 237         want = str(hash)
 238         el = extract_bits(want, self.bits)
 239         if el:
 240             start = self._fanget(el-1)
 241             startv = el << (32-self.bits)
 242         else:
 243             start = 0
 244             startv = 0
 245         end = self._fanget(el)
 246         endv = (el+1) << (32-self.bits)
 247         _total_steps += 1   # lookup table is a step
 248         hashv = _helpers.firstword(hash)
 249         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 250         while start < end:
 251             _total_steps += 1
 252             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 253             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 254             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 255             v = self._get(mid)
 256             #print '    %08x' % self._num(v)
 257             if v < want:
 258                 start = mid+1
 259                 startv = _helpers.firstword(v)
 260             elif v > want:
 261                 end = mid
 262                 endv = _helpers.firstword(v)
 263             else: # got it!
 264                 return True
 265         return None
 266
 267     def __iter__(self):
 268         for i in xrange(self._fanget(self.entries-1)):
 269             yield buffer(self.shalist, i*20, 20)
 270
 271     def __len__(self):
 272         return int(self._fanget(self.entries-1))
 273
 274
 275 _mpi_count = 0
 276 class PackIdxList:
 277     def __init__(self, dir):
 278         global _mpi_count
 279         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 280         _mpi_count += 1
 281         self.dir = dir
 282         self.also = {}
 283         self.packs = []
 284         self.refresh()
 285
 286     def __del__(self):
 287         global _mpi_count
 288         _mpi_count -= 1
 289         assert(_mpi_count == 0)
 290
 291     def __iter__(self):
 292         return iter(idxmerge(self.packs))
 293
 294     def __len__(self):
 295         return sum(len(pack) for pack in self.packs)
 296
 297     def exists(self, hash):
 298         """Return nonempty if the object exists in the index files."""
 299         global _total_searches
 300         _total_searches += 1
 301         if hash in self.also:
 302             return True
 303         for i in range(len(self.packs)):
 304             p = self.packs[i]
 305             _total_searches -= 1  # will be incremented by sub-pack
 306             if p.exists(hash):
 307                 # reorder so most recently used packs are searched first
 308                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 309                 return p.name
 310         return None
 311
 312     def refresh(self, skip_midx = False):
 313         """Refresh the index list.
 314         This method verifies if .midx files were superseded (e.g. all of its
 315         contents are in another, bigger .midx file) and removes the superseded
 316         files.
 317
 318         If skip_midx is True, all work on .midx files will be skipped and .midx
 319         files will be removed from the list.
 320
 321         The module-global variable 'ignore_midx' can force this function to
 322         always act as if skip_midx was True.
 323         """
 324         skip_midx = skip_midx or ignore_midx
 325         d = dict((p.name, p) for p in self.packs
 326                  if not skip_midx or not isinstance(p, PackMidx))
 327         if os.path.exists(self.dir):
 328             if not skip_midx:
 329                 midxl = []
 330                 for ix in self.packs:
 331                     if isinstance(ix, PackMidx):
 332                         for name in ix.idxnames:
 333                             d[os.path.join(self.dir, name)] = ix
 334                 for f in os.listdir(self.dir):
 335                     full = os.path.join(self.dir, f)
 336                     if f.endswith('.midx') and not d.get(full):
 337                         mx = PackMidx(full)
 338                         (mxd, mxf) = os.path.split(mx.name)
 339                         broken = 0
 340                         for n in mx.idxnames:
 341                             if not os.path.exists(os.path.join(mxd, n)):
 342                                 log(('warning: index %s missing\n' +
 343                                     '  used by %s\n') % (n, mxf))
 344                                 broken += 1
 345                         if not broken:
 346                             midxl.append(mx)
 347                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 348                 for ix in midxl:
 349                     any = 0
 350                     for sub in ix.idxnames:
 351                         found = d.get(os.path.join(self.dir, sub))
 352                         if not found or isinstance(found, PackIdx):
 353                             # doesn't exist, or exists but not in a midx
 354                             d[ix.name] = ix
 355                             for name in ix.idxnames:
 356                                 d[os.path.join(self.dir, name)] = ix
 357                             any += 1
 358                             break
 359                     if not any:
 360                         log('midx: removing redundant: %s\n'
 361                             % os.path.basename(ix.name))
 362                         unlink(ix.name)
 363             for f in os.listdir(self.dir):
 364                 full = os.path.join(self.dir, f)
 365                 if f.endswith('.idx') and not d.get(full):
 366                     ix = PackIdx(full)
 367                     d[full] = ix
 368             self.packs = list(set(d.values()))
 369         log('PackIdxList: using %d index%s.\n'
 370             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 371
 372     def add(self, hash):
 373         """Insert an additional object in the list."""
 374         self.also[hash] = 1
 375
 376     def zap_also(self):
 377         """Remove all additional objects from the list."""
 378         self.also = {}
 379
 380
 381 def calc_hash(type, content):
 382     """Calculate some content's hash in the Git fashion."""
 383     header = '%s %d\0' % (type, len(content))
 384     sum = Sha1(header)
 385     sum.update(content)
 386     return sum.digest()
 387
 388
 389 def _shalist_sort_key(ent):
 390     (mode, name, id) = ent
 391     if stat.S_ISDIR(int(mode, 8)):
 392         return name + '/'
 393     else:
 394         return name
 395
 396
 397 def idxmerge(idxlist):
 398     """Generate a list of all the objects reachable in a PackIdxList."""
 399     total = sum(len(i) for i in idxlist)
 400     iters = (iter(i) for i in idxlist)
 401     heap = [(next(it), it) for it in iters]
 402     heapq.heapify(heap)
 403     count = 0
 404     last = None
 405     while heap:
 406         if (count % 10024) == 0:
 407             progress('Reading indexes: %.2f%% (%d/%d)\r'
 408                      % (count*100.0/total, count, total))
 409         (e, it) = heap[0]
 410         if e != last:
 411             yield e
 412             last = e
 413         count += 1
 414         e = next(it)
 415         if e:
 416             heapq.heapreplace(heap, (e, it))
 417         else:
 418             heapq.heappop(heap)
 419     log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 420
 421
 422 class PackWriter:
 423     """Writes Git objects insid a pack file."""
 424     def __init__(self, objcache_maker=None):
 425         self.count = 0
 426         self.outbytes = 0
 427         self.filename = None
 428         self.file = None
 429         self.objcache_maker = objcache_maker
 430         self.objcache = None
 431
 432     def __del__(self):
 433         self.close()
 434
 435     def _make_objcache(self):
 436         if self.objcache == None:
 437             if self.objcache_maker:
 438                 self.objcache = self.objcache_maker()
 439             else:
 440                 self.objcache = PackIdxList(repo('objects/pack'))
 441
 442     def _open(self):
 443         if not self.file:
 444             self._make_objcache()
 445             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 446             self.file = os.fdopen(fd, 'w+b')
 447             assert(name.endswith('.pack'))
 448             self.filename = name[:-5]
 449             self.file.write('PACK\0\0\0\2\0\0\0\0')
 450
 451     def _raw_write(self, datalist):
 452         self._open()
 453         f = self.file
 454         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 455         # the file never has a *partial* blob.  So let's make sure it's
 456         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 457         # to our hashsplit algorithm.)  f.write() does its own buffering,
 458         # but that's okay because we'll flush it in _end().
 459         oneblob = ''.join(datalist)
 460         f.write(oneblob)
 461         self.outbytes += len(oneblob)
 462         self.count += 1
 463
 464     def _write(self, bin, type, content):
 465         if verbose:
 466             log('>')
 467         self._raw_write(_encode_packobj(type, content))
 468         return bin
 469
 470     def breakpoint(self):
 471         """Clear byte and object counts and return the last processed id."""
 472         id = self._end()
 473         self.outbytes = self.count = 0
 474         return id
 475
 476     def write(self, type, content):
 477         """Write an object in this pack file."""
 478         return self._write(calc_hash(type, content), type, content)
 479
 480     def exists(self, id):
 481         """Return non-empty if an object is found in the object cache."""
 482         if not self.objcache:
 483             self._make_objcache()
 484         return self.objcache.exists(id)
 485
 486     def maybe_write(self, type, content):
 487         """Write an object to the pack file if not present and return its id."""
 488         bin = calc_hash(type, content)
 489         if not self.exists(bin):
 490             self._write(bin, type, content)
 491             self.objcache.add(bin)
 492         return bin
 493
 494     def new_blob(self, blob):
 495         """Create a blob object in the pack with the supplied content."""
 496         return self.maybe_write('blob', blob)
 497
 498     def new_tree(self, shalist):
 499         """Create a tree object in the pack."""
 500         shalist = sorted(shalist, key = _shalist_sort_key)
 501         l = []
 502         for (mode,name,bin) in shalist:
 503             assert(mode)
 504             assert(mode != '0')
 505             assert(mode[0] != '0')
 506             assert(name)
 507             assert(len(bin) == 20)
 508             l.append('%s %s\0%s' % (mode,name,bin))
 509         return self.maybe_write('tree', ''.join(l))
 510
 511     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 512         l = []
 513         if tree: l.append('tree %s' % tree.encode('hex'))
 514         if parent: l.append('parent %s' % parent.encode('hex'))
 515         if author: l.append('author %s %s' % (author, _git_date(adate)))
 516         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 517         l.append('')
 518         l.append(msg)
 519         return self.maybe_write('commit', '\n'.join(l))
 520
 521     def new_commit(self, parent, tree, msg):
 522         """Create a commit object in the pack."""
 523         now = time.time()
 524         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 525         commit = self._new_commit(tree, parent,
 526                                   userline, now, userline, now,
 527                                   msg)
 528         return commit
 529
 530     def abort(self):
 531         """Remove the pack file from disk."""
 532         f = self.file
 533         if f:
 534             self.file = None
 535             f.close()
 536             os.unlink(self.filename + '.pack')
 537
 538     def _end(self):
 539         f = self.file
 540         if not f: return None
 541         self.file = None
 542         self.objcache = None
 543
 544         # update object count
 545         f.seek(8)
 546         cp = struct.pack('!i', self.count)
 547         assert(len(cp) == 4)
 548         f.write(cp)
 549
 550         # calculate the pack sha1sum
 551         f.seek(0)
 552         sum = Sha1()
 553         while 1:
 554             b = f.read(65536)
 555             sum.update(b)
 556             if not b: break
 557         f.write(sum.digest())
 558
 559         f.close()
 560
 561         p = subprocess.Popen(['git', 'index-pack', '-v',
 562                               '--index-version=2',
 563                               self.filename + '.pack'],
 564                              preexec_fn = _gitenv,
 565                              stdout = subprocess.PIPE)
 566         out = p.stdout.read().strip()
 567         _git_wait('git index-pack', p)
 568         if not out:
 569             raise GitError('git index-pack produced no output')
 570         nameprefix = repo('objects/pack/%s' % out)
 571         if os.path.exists(self.filename + '.map'):
 572             os.unlink(self.filename + '.map')
 573         os.rename(self.filename + '.pack', nameprefix + '.pack')
 574         os.rename(self.filename + '.idx', nameprefix + '.idx')
 575         return nameprefix
 576
 577     def close(self):
 578         """Close the pack file and move it to its definitive path."""
 579         return self._end()
 580
 581
 582 def _git_date(date):
 583     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 584
 585
 586 def _gitenv():
 587     os.environ['GIT_DIR'] = os.path.abspath(repo())
 588
 589
 590 def list_refs(refname = None):
 591     """Generate a list of tuples in the form (refname,hash).
 592     If a ref name is specified, list only this particular ref.
 593     """
 594     argv = ['git', 'show-ref', '--']
 595     if refname:
 596         argv += [refname]
 597     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 598     out = p.stdout.read().strip()
 599     rv = p.wait()  # not fatal
 600     if rv:
 601         assert(not out)
 602     if out:
 603         for d in out.split('\n'):
 604             (sha, name) = d.split(' ', 1)
 605             yield (name, sha.decode('hex'))
 606
 607
 608 def read_ref(refname):
 609     """Get the commit id of the most recent commit made on a given ref."""
 610     l = list(list_refs(refname))
 611     if l:
 612         assert(len(l) == 1)
 613         return l[0][1]
 614     else:
 615         return None
 616
 617
 618 def rev_list(ref, count=None):
 619     """Generate a list of reachable commits in reverse chronological order.
 620
 621     This generator walks through commits, from child to parent, that are
 622     reachable via the specified ref and yields a series of tuples of the form
 623     (date,hash).
 624
 625     If count is a non-zero integer, limit the number of commits to "count"
 626     objects.
 627     """
 628     assert(not ref.startswith('-'))
 629     opts = []
 630     if count:
 631         opts += ['-n', str(atoi(count))]
 632     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 633     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 634     commit = None
 635     for row in p.stdout:
 636         s = row.strip()
 637         if s.startswith('commit '):
 638             commit = s[7:].decode('hex')
 639         else:
 640             date = int(s)
 641             yield (date, commit)
 642     rv = p.wait()  # not fatal
 643     if rv:
 644         raise GitError, 'git rev-list returned error %d' % rv
 645
 646
 647 def rev_get_date(ref):
 648     """Get the date of the latest commit on the specified ref."""
 649     for (date, commit) in rev_list(ref, count=1):
 650         return date
 651     raise GitError, 'no such commit %r' % ref
 652
 653
 654 def update_ref(refname, newval, oldval):
 655     """Change the commit pointed to by a branch."""
 656     if not oldval:
 657         oldval = ''
 658     assert(refname.startswith('refs/heads/'))
 659     p = subprocess.Popen(['git', 'update-ref', refname,
 660                           newval.encode('hex'), oldval.encode('hex')],
 661                          preexec_fn = _gitenv)
 662     _git_wait('git update-ref', p)
 663
 664
 665 def guess_repo(path=None):
 666     """Set the path value in the global variable "repodir".
 667     This makes bup look for an existing bup repository, but not fail if a
 668     repository doesn't exist. Usually, if you are interacting with a bup
 669     repository, you would not be calling this function but using
 670     check_repo_or_die().
 671     """
 672     global repodir
 673     if path:
 674         repodir = path
 675     if not repodir:
 676         repodir = os.environ.get('BUP_DIR')
 677         if not repodir:
 678             repodir = os.path.expanduser('~/.bup')
 679
 680
 681 def init_repo(path=None):
 682     """Create the Git bare repository for bup in a given path."""
 683     guess_repo(path)
 684     d = repo()
 685     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 686         raise GitError('"%d" exists but is not a directory\n' % d)
 687     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 688                          preexec_fn = _gitenv)
 689     _git_wait('git init', p)
 690     # Force the index version configuration in order to ensure bup works
 691     # regardless of the version of the installed Git binary.
 692     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 693                          stdout=sys.stderr, preexec_fn = _gitenv)
 694     _git_wait('git config', p)
 695
 696
 697 def check_repo_or_die(path=None):
 698     """Make sure a bup repository exists, and abort if not.
 699     If the path to a particular repository was not specified, this function
 700     initializes the default repository automatically.
 701     """
 702     guess_repo(path)
 703     if not os.path.isdir(repo('objects/pack/.')):
 704         if repodir == home_repodir:
 705             init_repo()
 706         else:
 707             log('error: %r is not a bup/git repository\n' % repo())
 708             sys.exit(15)
 709
 710
 711 def treeparse(buf):
 712     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 713     ofs = 0
 714     while ofs < len(buf):
 715         z = buf[ofs:].find('\0')
 716         assert(z > 0)
 717         spl = buf[ofs:ofs+z].split(' ', 1)
 718         assert(len(spl) == 2)
 719         sha = buf[ofs+z+1:ofs+z+1+20]
 720         ofs += z+1+20
 721         yield (spl[0], spl[1], sha)
 722
 723
 724 _ver = None
 725 def ver():
 726     """Get Git's version and ensure a usable version is installed.
 727
 728     The returned version is formatted as an ordered tuple with each position
 729     representing a digit in the version tag. For example, the following tuple
 730     would represent version 1.6.6.9:
 731
 732         ('1', '6', '6', '9')
 733     """
 734     global _ver
 735     if not _ver:
 736         p = subprocess.Popen(['git', '--version'],
 737                              stdout=subprocess.PIPE)
 738         gvs = p.stdout.read()
 739         _git_wait('git --version', p)
 740         m = re.match(r'git version (\S+.\S+)', gvs)
 741         if not m:
 742             raise GitError('git --version weird output: %r' % gvs)
 743         _ver = tuple(m.group(1).split('.'))
 744     needed = ('1','5', '3', '1')
 745     if _ver < needed:
 746         raise GitError('git version %s or higher is required; you have %s'
 747                        % ('.'.join(needed), '.'.join(_ver)))
 748     return _ver
 749
 750
 751 def _git_wait(cmd, p):
 752     rv = p.wait()
 753     if rv != 0:
 754         raise GitError('%s returned %d' % (cmd, rv))
 755
 756
 757 def _git_capture(argv):
 758     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 759     r = p.stdout.read()
 760     _git_wait(repr(argv), p)
 761     return r
 762
 763
 764 class _AbortableIter:
 765     def __init__(self, it, onabort = None):
 766         self.it = it
 767         self.onabort = onabort
 768         self.done = None
 769
 770     def __iter__(self):
 771         return self
 772
 773     def next(self):
 774         try:
 775             return self.it.next()
 776         except StopIteration, e:
 777             self.done = True
 778             raise
 779         except:
 780             self.abort()
 781             raise
 782
 783     def abort(self):
 784         """Abort iteration and call the abortion callback, if needed."""
 785         if not self.done:
 786             self.done = True
 787             if self.onabort:
 788                 self.onabort()
 789
 790     def __del__(self):
 791         self.abort()
 792
 793
 794 _ver_warned = 0
 795 class CatPipe:
 796     """Link to 'git cat-file' that is used to retrieve blob data."""
 797     def __init__(self):
 798         global _ver_warned
 799         wanted = ('1','5','6')
 800         if ver() < wanted:
 801             if not _ver_warned:
 802                 log('warning: git version < %s; bup will be slow.\n'
 803                     % '.'.join(wanted))
 804                 _ver_warned = 1
 805             self.get = self._slow_get
 806         else:
 807             self.p = self.inprogress = None
 808             self.get = self._fast_get
 809
 810     def _abort(self):
 811         if self.p:
 812             self.p.stdout.close()
 813             self.p.stdin.close()
 814         self.p = None
 815         self.inprogress = None
 816
 817     def _restart(self):
 818         self._abort()
 819         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 820                                   stdin=subprocess.PIPE,
 821                                   stdout=subprocess.PIPE,
 822                                   close_fds = True,
 823                                   preexec_fn = _gitenv)
 824
 825     def _fast_get(self, id):
 826         if not self.p or self.p.poll() != None:
 827             self._restart()
 828         assert(self.p)
 829         assert(self.p.poll() == None)
 830         if self.inprogress:
 831             log('_fast_get: opening %r while %r is open'
 832                 % (id, self.inprogress))
 833         assert(not self.inprogress)
 834         assert(id.find('\n') < 0)
 835         assert(id.find('\r') < 0)
 836         assert(id[0] != '-')
 837         self.inprogress = id
 838         self.p.stdin.write('%s\n' % id)
 839         hdr = self.p.stdout.readline()
 840         if hdr.endswith(' missing\n'):
 841             raise KeyError('blob %r is missing' % id)
 842         spl = hdr.split(' ')
 843         if len(spl) != 3 or len(spl[0]) != 40:
 844             raise GitError('expected blob, got %r' % spl)
 845         (hex, type, size) = spl
 846
 847         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 848                            onabort = self._abort)
 849         try:
 850             yield type
 851             for blob in it:
 852                 yield blob
 853             assert(self.p.stdout.readline() == '\n')
 854             self.inprogress = None
 855         except Exception, e:
 856             it.abort()
 857             raise
 858
 859     def _slow_get(self, id):
 860         assert(id.find('\n') < 0)
 861         assert(id.find('\r') < 0)
 862         assert(id[0] != '-')
 863         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 864         yield type
 865
 866         p = subprocess.Popen(['git', 'cat-file', type, id],
 867                              stdout=subprocess.PIPE,
 868                              preexec_fn = _gitenv)
 869         for blob in chunkyreader(p.stdout):
 870             yield blob
 871         _git_wait('git cat-file', p)
 872
 873     def _join(self, it):
 874         type = it.next()
 875         if type == 'blob':
 876             for blob in it:
 877                 yield blob
 878         elif type == 'tree':
 879             treefile = ''.join(it)
 880             for (mode, name, sha) in treeparse(treefile):
 881                 for blob in self.join(sha.encode('hex')):
 882                     yield blob
 883         elif type == 'commit':
 884             treeline = ''.join(it).split('\n')[0]
 885             assert(treeline.startswith('tree '))
 886             for blob in self.join(treeline[5:]):
 887                 yield blob
 888         else:
 889             raise GitError('invalid object type %r: expected blob/tree/commit'
 890                            % type)
 891
 892     def join(self, id):
 893         """Generate a list of the content of all blobs that can be reached
 894         from an object.  The hash given in 'id' must point to a blob, a tree
 895         or a commit. The content of all blobs that can be seen from trees or
 896         commits will be added to the list.
 897         """
 898         try:
 899             for d in self._join(self.get(id)):
 900                 yield d
 901         except StopIteration:
 902             log('booger!\n')