lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 MIDX_VERSION = 2
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 home_repodir = os.path.expanduser('~/.bup')
  15 repodir = None
  16
  17 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  19
  20 _total_searches = 0
  21 _total_steps = 0
  22
  23
  24 class GitError(Exception):
  25     pass
  26
  27
  28 def repo(sub = ''):
  29     """Get the path to the git repository or one of its subdirectories."""
  30     global repodir
  31     if not repodir:
  32         raise GitError('You should call check_repo_or_die()')
  33
  34     # If there's a .git subdirectory, then the actual repo is in there.
  35     gd = os.path.join(repodir, '.git')
  36     if os.path.exists(gd):
  37         repodir = gd
  38
  39     return os.path.join(repodir, sub)
  40
  41
  42 def mangle_name(name, mode, gitmode):
  43     """Mangle a file name to present an abstract name for segmented files.
  44     Mangled file names will have the ".bup" extension added to them. If a
  45     file's name already ends with ".bup", a ".bupl" extension is added to
  46     disambiguate normal files from semgmented ones.
  47     """
  48     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  49         return name + '.bup'
  50     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  51         return name + '.bupl'
  52     else:
  53         return name
  54
  55
  56 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  57 def demangle_name(name):
  58     """Remove name mangling from a file name, if necessary.
  59
  60     The return value is a tuple (demangled_filename,mode), where mode is one of
  61     the following:
  62
  63     * BUP_NORMAL  : files that should be read as-is from the repository
  64     * BUP_CHUNKED : files that were chunked and need to be assembled
  65
  66     For more information on the name mangling algorythm, see mangle_name()
  67     """
  68     if name.endswith('.bupl'):
  69         return (name[:-5], BUP_NORMAL)
  70     elif name.endswith('.bup'):
  71         return (name[:-4], BUP_CHUNKED)
  72     else:
  73         return (name, BUP_NORMAL)
  74
  75
  76 def _encode_packobj(type, content):
  77     szout = ''
  78     sz = len(content)
  79     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  80     sz >>= 4
  81     while 1:
  82         if sz: szbits |= 0x80
  83         szout += chr(szbits)
  84         if not sz:
  85             break
  86         szbits = sz & 0x7f
  87         sz >>= 7
  88     z = zlib.compressobj(1)
  89     yield szout
  90     yield z.compress(content)
  91     yield z.flush()
  92
  93
  94 def _encode_looseobj(type, content):
  95     z = zlib.compressobj(1)
  96     yield z.compress('%s %d\0' % (type, len(content)))
  97     yield z.compress(content)
  98     yield z.flush()
  99
 100
 101 def _decode_looseobj(buf):
 102     assert(buf);
 103     s = zlib.decompress(buf)
 104     i = s.find('\0')
 105     assert(i > 0)
 106     l = s[:i].split(' ')
 107     type = l[0]
 108     sz = int(l[1])
 109     content = s[i+1:]
 110     assert(type in _typemap)
 111     assert(sz == len(content))
 112     return (type, content)
 113
 114
 115 def _decode_packobj(buf):
 116     assert(buf)
 117     c = ord(buf[0])
 118     type = _typermap[(c & 0x70) >> 4]
 119     sz = c & 0x0f
 120     shift = 4
 121     i = 0
 122     while c & 0x80:
 123         i += 1
 124         c = ord(buf[i])
 125         sz |= (c & 0x7f) << shift
 126         shift += 7
 127         if not (c & 0x80):
 128             break
 129     return (type, zlib.decompress(buf[i+1:]))
 130
 131
 132 class PackIdx:
 133     """Object representation of a Git pack index file."""
 134     def __init__(self, filename):
 135         self.name = filename
 136         self.map = mmap_read(open(filename))
 137         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 138         self.fanout = list(struct.unpack('!256I',
 139                                          str(buffer(self.map, 8, 256*4))))
 140         self.fanout.append(0)  # entry "-1"
 141         nsha = self.fanout[255]
 142         self.ofstable = buffer(self.map,
 143                                8 + 256*4 + nsha*20 + nsha*4,
 144                                nsha*4)
 145         self.ofs64table = buffer(self.map,
 146                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 147
 148     def _ofs_from_idx(self, idx):
 149         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 150         if ofs & 0x80000000:
 151             idx64 = ofs & 0x7fffffff
 152             ofs = struct.unpack('!I',
 153                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 154         return ofs
 155
 156     def _idx_from_hash(self, hash):
 157         global _total_searches, _total_steps
 158         _total_searches += 1
 159         assert(len(hash) == 20)
 160         b1 = ord(hash[0])
 161         start = self.fanout[b1-1] # range -1..254
 162         end = self.fanout[b1] # range 0..255
 163         buf = buffer(self.map, 8 + 256*4, end*20)
 164         want = str(hash)
 165         _total_steps += 1  # lookup table is a step
 166         while start < end:
 167             _total_steps += 1
 168             mid = start + (end-start)/2
 169             v = str(buf[mid*20:(mid+1)*20])
 170             if v < want:
 171                 start = mid+1
 172             elif v > want:
 173                 end = mid
 174             else: # got it!
 175                 return mid
 176         return None
 177
 178     def find_offset(self, hash):
 179         """Get the offset of an object inside the index file."""
 180         idx = self._idx_from_hash(hash)
 181         if idx != None:
 182             return self._ofs_from_idx(idx)
 183         return None
 184
 185     def exists(self, hash):
 186         """Return nonempty if the object exists in this index."""
 187         return hash and (self._idx_from_hash(hash) != None) and True or None
 188
 189     def __iter__(self):
 190         for i in xrange(self.fanout[255]):
 191             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 192
 193     def __len__(self):
 194         return int(self.fanout[255])
 195
 196
 197 extract_bits = _helpers.extract_bits
 198
 199
 200 class PackMidx:
 201     """Wrapper which contains data from multiple index files.
 202     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 203     and make it possible for bup to expand Git's indexing capabilities to vast
 204     amounts of files.
 205     """
 206     def __init__(self, filename):
 207         self.name = filename
 208         self.force_keep = False
 209         assert(filename.endswith('.midx'))
 210         self.map = mmap_read(open(filename))
 211         if str(self.map[0:4]) != 'MIDX':
 212             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 213             self.force_keep = True
 214             return self._init_failed()
 215         ver = struct.unpack('!I', self.map[4:8])[0]
 216         if ver < MIDX_VERSION:
 217             log('Warning: ignoring old-style (v%d) midx %r\n'
 218                 % (ver, filename))
 219             self.force_keep = False  # old stuff is boring
 220             return self._init_failed()
 221         if ver > MIDX_VERSION:
 222             log('Warning: ignoring too-new (v%d) midx %r\n'
 223                 % (ver, filename))
 224             self.force_keep = True  # new stuff is exciting
 225             return self._init_failed()
 226
 227         self.bits = _helpers.firstword(self.map[8:12])
 228         self.entries = 2**self.bits
 229         self.fanout = buffer(self.map, 12, self.entries*4)
 230         shaofs = 12 + self.entries*4
 231         nsha = self._fanget(self.entries-1)
 232         self.shalist = buffer(self.map, shaofs, nsha*20)
 233         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 234
 235     def _init_failed(self):
 236         self.bits = 0
 237         self.entries = 1
 238         self.fanout = buffer('\0\0\0\0')
 239         self.shalist = buffer('\0'*20)
 240         self.idxnames = []
 241
 242     def _fanget(self, i):
 243         start = i*4
 244         s = self.fanout[start:start+4]
 245         return _helpers.firstword(s)
 246
 247     def _get(self, i):
 248         return str(self.shalist[i*20:(i+1)*20])
 249
 250     def exists(self, hash):
 251         """Return nonempty if the object exists in the index files."""
 252         global _total_searches, _total_steps
 253         _total_searches += 1
 254         want = str(hash)
 255         el = extract_bits(want, self.bits)
 256         if el:
 257             start = self._fanget(el-1)
 258             startv = el << (32-self.bits)
 259         else:
 260             start = 0
 261             startv = 0
 262         end = self._fanget(el)
 263         endv = (el+1) << (32-self.bits)
 264         _total_steps += 1   # lookup table is a step
 265         hashv = _helpers.firstword(hash)
 266         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 267         while start < end:
 268             _total_steps += 1
 269             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 270             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 271             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 272             v = self._get(mid)
 273             #print '    %08x' % self._num(v)
 274             if v < want:
 275                 start = mid+1
 276                 startv = _helpers.firstword(v)
 277             elif v > want:
 278                 end = mid
 279                 endv = _helpers.firstword(v)
 280             else: # got it!
 281                 return True
 282         return None
 283
 284     def __iter__(self):
 285         for i in xrange(self._fanget(self.entries-1)):
 286             yield buffer(self.shalist, i*20, 20)
 287
 288     def __len__(self):
 289         return int(self._fanget(self.entries-1))
 290
 291
 292 _mpi_count = 0
 293 class PackIdxList:
 294     def __init__(self, dir):
 295         global _mpi_count
 296         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 297         _mpi_count += 1
 298         self.dir = dir
 299         self.also = {}
 300         self.packs = []
 301         self.refresh()
 302
 303     def __del__(self):
 304         global _mpi_count
 305         _mpi_count -= 1
 306         assert(_mpi_count == 0)
 307
 308     def __iter__(self):
 309         return iter(idxmerge(self.packs))
 310
 311     def __len__(self):
 312         return sum(len(pack) for pack in self.packs)
 313
 314     def exists(self, hash):
 315         """Return nonempty if the object exists in the index files."""
 316         global _total_searches
 317         _total_searches += 1
 318         if hash in self.also:
 319             return True
 320         for i in range(len(self.packs)):
 321             p = self.packs[i]
 322             _total_searches -= 1  # will be incremented by sub-pack
 323             if p.exists(hash):
 324                 # reorder so most recently used packs are searched first
 325                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 326                 return p.name
 327         return None
 328
 329     def refresh(self, skip_midx = False):
 330         """Refresh the index list.
 331         This method verifies if .midx files were superseded (e.g. all of its
 332         contents are in another, bigger .midx file) and removes the superseded
 333         files.
 334
 335         If skip_midx is True, all work on .midx files will be skipped and .midx
 336         files will be removed from the list.
 337
 338         The module-global variable 'ignore_midx' can force this function to
 339         always act as if skip_midx was True.
 340         """
 341         skip_midx = skip_midx or ignore_midx
 342         d = dict((p.name, p) for p in self.packs
 343                  if not skip_midx or not isinstance(p, PackMidx))
 344         if os.path.exists(self.dir):
 345             if not skip_midx:
 346                 midxl = []
 347                 for ix in self.packs:
 348                     if isinstance(ix, PackMidx):
 349                         for name in ix.idxnames:
 350                             d[os.path.join(self.dir, name)] = ix
 351                 for f in os.listdir(self.dir):
 352                     full = os.path.join(self.dir, f)
 353                     if f.endswith('.midx') and not d.get(full):
 354                         mx = PackMidx(full)
 355                         (mxd, mxf) = os.path.split(mx.name)
 356                         broken = 0
 357                         for n in mx.idxnames:
 358                             if not os.path.exists(os.path.join(mxd, n)):
 359                                 log(('warning: index %s missing\n' +
 360                                     '  used by %s\n') % (n, mxf))
 361                                 broken += 1
 362                         if not broken:
 363                             midxl.append(mx)
 364                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 365                 for ix in midxl:
 366                     any = 0
 367                     for sub in ix.idxnames:
 368                         found = d.get(os.path.join(self.dir, sub))
 369                         if not found or isinstance(found, PackIdx):
 370                             # doesn't exist, or exists but not in a midx
 371                             d[ix.name] = ix
 372                             for name in ix.idxnames:
 373                                 d[os.path.join(self.dir, name)] = ix
 374                             any += 1
 375                             break
 376                     if not any and not ix.force_keep:
 377                         log('midx: removing redundant: %s\n'
 378                             % os.path.basename(ix.name))
 379                         unlink(ix.name)
 380             for f in os.listdir(self.dir):
 381                 full = os.path.join(self.dir, f)
 382                 if f.endswith('.idx') and not d.get(full):
 383                     ix = PackIdx(full)
 384                     d[full] = ix
 385             self.packs = list(set(d.values()))
 386         log('PackIdxList: using %d index%s.\n'
 387             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 388
 389     def add(self, hash):
 390         """Insert an additional object in the list."""
 391         self.also[hash] = 1
 392
 393     def zap_also(self):
 394         """Remove all additional objects from the list."""
 395         self.also = {}
 396
 397
 398 def calc_hash(type, content):
 399     """Calculate some content's hash in the Git fashion."""
 400     header = '%s %d\0' % (type, len(content))
 401     sum = Sha1(header)
 402     sum.update(content)
 403     return sum.digest()
 404
 405
 406 def _shalist_sort_key(ent):
 407     (mode, name, id) = ent
 408     if stat.S_ISDIR(int(mode, 8)):
 409         return name + '/'
 410     else:
 411         return name
 412
 413
 414 def idxmerge(idxlist):
 415     """Generate a list of all the objects reachable in a PackIdxList."""
 416     total = sum(len(i) for i in idxlist)
 417     iters = (iter(i) for i in idxlist)
 418     heap = [(next(it), it) for it in iters]
 419     heapq.heapify(heap)
 420     count = 0
 421     last = None
 422     while heap:
 423         if (count % 10024) == 0:
 424             progress('Reading indexes: %.2f%% (%d/%d)\r'
 425                      % (count*100.0/total, count, total))
 426         (e, it) = heap[0]
 427         if e != last:
 428             yield e
 429             last = e
 430         count += 1
 431         e = next(it)
 432         if e:
 433             heapq.heapreplace(heap, (e, it))
 434         else:
 435             heapq.heappop(heap)
 436     log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 437
 438
 439 class PackWriter:
 440     """Writes Git objects insid a pack file."""
 441     def __init__(self, objcache_maker=None):
 442         self.count = 0
 443         self.outbytes = 0
 444         self.filename = None
 445         self.file = None
 446         self.objcache_maker = objcache_maker
 447         self.objcache = None
 448
 449     def __del__(self):
 450         self.close()
 451
 452     def _make_objcache(self):
 453         if self.objcache == None:
 454             if self.objcache_maker:
 455                 self.objcache = self.objcache_maker()
 456             else:
 457                 self.objcache = PackIdxList(repo('objects/pack'))
 458
 459     def _open(self):
 460         if not self.file:
 461             self._make_objcache()
 462             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 463             self.file = os.fdopen(fd, 'w+b')
 464             assert(name.endswith('.pack'))
 465             self.filename = name[:-5]
 466             self.file.write('PACK\0\0\0\2\0\0\0\0')
 467
 468     def _raw_write(self, datalist):
 469         self._open()
 470         f = self.file
 471         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 472         # the file never has a *partial* blob.  So let's make sure it's
 473         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 474         # to our hashsplit algorithm.)  f.write() does its own buffering,
 475         # but that's okay because we'll flush it in _end().
 476         oneblob = ''.join(datalist)
 477         f.write(oneblob)
 478         self.outbytes += len(oneblob)
 479         self.count += 1
 480
 481     def _write(self, bin, type, content):
 482         if verbose:
 483             log('>')
 484         self._raw_write(_encode_packobj(type, content))
 485         return bin
 486
 487     def breakpoint(self):
 488         """Clear byte and object counts and return the last processed id."""
 489         id = self._end()
 490         self.outbytes = self.count = 0
 491         return id
 492
 493     def write(self, type, content):
 494         """Write an object in this pack file."""
 495         return self._write(calc_hash(type, content), type, content)
 496
 497     def exists(self, id):
 498         """Return non-empty if an object is found in the object cache."""
 499         if not self.objcache:
 500             self._make_objcache()
 501         return self.objcache.exists(id)
 502
 503     def maybe_write(self, type, content):
 504         """Write an object to the pack file if not present and return its id."""
 505         bin = calc_hash(type, content)
 506         if not self.exists(bin):
 507             self._write(bin, type, content)
 508             self.objcache.add(bin)
 509         return bin
 510
 511     def new_blob(self, blob):
 512         """Create a blob object in the pack with the supplied content."""
 513         return self.maybe_write('blob', blob)
 514
 515     def new_tree(self, shalist):
 516         """Create a tree object in the pack."""
 517         shalist = sorted(shalist, key = _shalist_sort_key)
 518         l = []
 519         for (mode,name,bin) in shalist:
 520             assert(mode)
 521             assert(mode != '0')
 522             assert(mode[0] != '0')
 523             assert(name)
 524             assert(len(bin) == 20)
 525             l.append('%s %s\0%s' % (mode,name,bin))
 526         return self.maybe_write('tree', ''.join(l))
 527
 528     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 529         l = []
 530         if tree: l.append('tree %s' % tree.encode('hex'))
 531         if parent: l.append('parent %s' % parent.encode('hex'))
 532         if author: l.append('author %s %s' % (author, _git_date(adate)))
 533         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 534         l.append('')
 535         l.append(msg)
 536         return self.maybe_write('commit', '\n'.join(l))
 537
 538     def new_commit(self, parent, tree, msg):
 539         """Create a commit object in the pack."""
 540         now = time.time()
 541         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 542         commit = self._new_commit(tree, parent,
 543                                   userline, now, userline, now,
 544                                   msg)
 545         return commit
 546
 547     def abort(self):
 548         """Remove the pack file from disk."""
 549         f = self.file
 550         if f:
 551             self.file = None
 552             f.close()
 553             os.unlink(self.filename + '.pack')
 554
 555     def _end(self):
 556         f = self.file
 557         if not f: return None
 558         self.file = None
 559         self.objcache = None
 560
 561         # update object count
 562         f.seek(8)
 563         cp = struct.pack('!i', self.count)
 564         assert(len(cp) == 4)
 565         f.write(cp)
 566
 567         # calculate the pack sha1sum
 568         f.seek(0)
 569         sum = Sha1()
 570         while 1:
 571             b = f.read(65536)
 572             sum.update(b)
 573             if not b: break
 574         f.write(sum.digest())
 575
 576         f.close()
 577
 578         p = subprocess.Popen(['git', 'index-pack', '-v',
 579                               '--index-version=2',
 580                               self.filename + '.pack'],
 581                              preexec_fn = _gitenv,
 582                              stdout = subprocess.PIPE)
 583         out = p.stdout.read().strip()
 584         _git_wait('git index-pack', p)
 585         if not out:
 586             raise GitError('git index-pack produced no output')
 587         nameprefix = repo('objects/pack/%s' % out)
 588         if os.path.exists(self.filename + '.map'):
 589             os.unlink(self.filename + '.map')
 590         os.rename(self.filename + '.pack', nameprefix + '.pack')
 591         os.rename(self.filename + '.idx', nameprefix + '.idx')
 592         return nameprefix
 593
 594     def close(self):
 595         """Close the pack file and move it to its definitive path."""
 596         return self._end()
 597
 598
 599 def _git_date(date):
 600     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 601
 602
 603 def _gitenv():
 604     os.environ['GIT_DIR'] = os.path.abspath(repo())
 605
 606
 607 def list_refs(refname = None):
 608     """Generate a list of tuples in the form (refname,hash).
 609     If a ref name is specified, list only this particular ref.
 610     """
 611     argv = ['git', 'show-ref', '--']
 612     if refname:
 613         argv += [refname]
 614     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 615     out = p.stdout.read().strip()
 616     rv = p.wait()  # not fatal
 617     if rv:
 618         assert(not out)
 619     if out:
 620         for d in out.split('\n'):
 621             (sha, name) = d.split(' ', 1)
 622             yield (name, sha.decode('hex'))
 623
 624
 625 def read_ref(refname):
 626     """Get the commit id of the most recent commit made on a given ref."""
 627     l = list(list_refs(refname))
 628     if l:
 629         assert(len(l) == 1)
 630         return l[0][1]
 631     else:
 632         return None
 633
 634
 635 def rev_list(ref, count=None):
 636     """Generate a list of reachable commits in reverse chronological order.
 637
 638     This generator walks through commits, from child to parent, that are
 639     reachable via the specified ref and yields a series of tuples of the form
 640     (date,hash).
 641
 642     If count is a non-zero integer, limit the number of commits to "count"
 643     objects.
 644     """
 645     assert(not ref.startswith('-'))
 646     opts = []
 647     if count:
 648         opts += ['-n', str(atoi(count))]
 649     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 650     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 651     commit = None
 652     for row in p.stdout:
 653         s = row.strip()
 654         if s.startswith('commit '):
 655             commit = s[7:].decode('hex')
 656         else:
 657             date = int(s)
 658             yield (date, commit)
 659     rv = p.wait()  # not fatal
 660     if rv:
 661         raise GitError, 'git rev-list returned error %d' % rv
 662
 663
 664 def rev_get_date(ref):
 665     """Get the date of the latest commit on the specified ref."""
 666     for (date, commit) in rev_list(ref, count=1):
 667         return date
 668     raise GitError, 'no such commit %r' % ref
 669
 670
 671 def update_ref(refname, newval, oldval):
 672     """Change the commit pointed to by a branch."""
 673     if not oldval:
 674         oldval = ''
 675     assert(refname.startswith('refs/heads/'))
 676     p = subprocess.Popen(['git', 'update-ref', refname,
 677                           newval.encode('hex'), oldval.encode('hex')],
 678                          preexec_fn = _gitenv)
 679     _git_wait('git update-ref', p)
 680
 681
 682 def guess_repo(path=None):
 683     """Set the path value in the global variable "repodir".
 684     This makes bup look for an existing bup repository, but not fail if a
 685     repository doesn't exist. Usually, if you are interacting with a bup
 686     repository, you would not be calling this function but using
 687     check_repo_or_die().
 688     """
 689     global repodir
 690     if path:
 691         repodir = path
 692     if not repodir:
 693         repodir = os.environ.get('BUP_DIR')
 694         if not repodir:
 695             repodir = os.path.expanduser('~/.bup')
 696
 697
 698 def init_repo(path=None):
 699     """Create the Git bare repository for bup in a given path."""
 700     guess_repo(path)
 701     d = repo()
 702     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 703         raise GitError('"%d" exists but is not a directory\n' % d)
 704     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 705                          preexec_fn = _gitenv)
 706     _git_wait('git init', p)
 707     # Force the index version configuration in order to ensure bup works
 708     # regardless of the version of the installed Git binary.
 709     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 710                          stdout=sys.stderr, preexec_fn = _gitenv)
 711     _git_wait('git config', p)
 712
 713
 714 def check_repo_or_die(path=None):
 715     """Make sure a bup repository exists, and abort if not.
 716     If the path to a particular repository was not specified, this function
 717     initializes the default repository automatically.
 718     """
 719     guess_repo(path)
 720     if not os.path.isdir(repo('objects/pack/.')):
 721         if repodir == home_repodir:
 722             init_repo()
 723         else:
 724             log('error: %r is not a bup/git repository\n' % repo())
 725             sys.exit(15)
 726
 727
 728 def treeparse(buf):
 729     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 730     ofs = 0
 731     while ofs < len(buf):
 732         z = buf[ofs:].find('\0')
 733         assert(z > 0)
 734         spl = buf[ofs:ofs+z].split(' ', 1)
 735         assert(len(spl) == 2)
 736         sha = buf[ofs+z+1:ofs+z+1+20]
 737         ofs += z+1+20
 738         yield (spl[0], spl[1], sha)
 739
 740
 741 _ver = None
 742 def ver():
 743     """Get Git's version and ensure a usable version is installed.
 744
 745     The returned version is formatted as an ordered tuple with each position
 746     representing a digit in the version tag. For example, the following tuple
 747     would represent version 1.6.6.9:
 748
 749         ('1', '6', '6', '9')
 750     """
 751     global _ver
 752     if not _ver:
 753         p = subprocess.Popen(['git', '--version'],
 754                              stdout=subprocess.PIPE)
 755         gvs = p.stdout.read()
 756         _git_wait('git --version', p)
 757         m = re.match(r'git version (\S+.\S+)', gvs)
 758         if not m:
 759             raise GitError('git --version weird output: %r' % gvs)
 760         _ver = tuple(m.group(1).split('.'))
 761     needed = ('1','5', '3', '1')
 762     if _ver < needed:
 763         raise GitError('git version %s or higher is required; you have %s'
 764                        % ('.'.join(needed), '.'.join(_ver)))
 765     return _ver
 766
 767
 768 def _git_wait(cmd, p):
 769     rv = p.wait()
 770     if rv != 0:
 771         raise GitError('%s returned %d' % (cmd, rv))
 772
 773
 774 def _git_capture(argv):
 775     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 776     r = p.stdout.read()
 777     _git_wait(repr(argv), p)
 778     return r
 779
 780
 781 class _AbortableIter:
 782     def __init__(self, it, onabort = None):
 783         self.it = it
 784         self.onabort = onabort
 785         self.done = None
 786
 787     def __iter__(self):
 788         return self
 789
 790     def next(self):
 791         try:
 792             return self.it.next()
 793         except StopIteration, e:
 794             self.done = True
 795             raise
 796         except:
 797             self.abort()
 798             raise
 799
 800     def abort(self):
 801         """Abort iteration and call the abortion callback, if needed."""
 802         if not self.done:
 803             self.done = True
 804             if self.onabort:
 805                 self.onabort()
 806
 807     def __del__(self):
 808         self.abort()
 809
 810
 811 _ver_warned = 0
 812 class CatPipe:
 813     """Link to 'git cat-file' that is used to retrieve blob data."""
 814     def __init__(self):
 815         global _ver_warned
 816         wanted = ('1','5','6')
 817         if ver() < wanted:
 818             if not _ver_warned:
 819                 log('warning: git version < %s; bup will be slow.\n'
 820                     % '.'.join(wanted))
 821                 _ver_warned = 1
 822             self.get = self._slow_get
 823         else:
 824             self.p = self.inprogress = None
 825             self.get = self._fast_get
 826
 827     def _abort(self):
 828         if self.p:
 829             self.p.stdout.close()
 830             self.p.stdin.close()
 831         self.p = None
 832         self.inprogress = None
 833
 834     def _restart(self):
 835         self._abort()
 836         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 837                                   stdin=subprocess.PIPE,
 838                                   stdout=subprocess.PIPE,
 839                                   close_fds = True,
 840                                   preexec_fn = _gitenv)
 841
 842     def _fast_get(self, id):
 843         if not self.p or self.p.poll() != None:
 844             self._restart()
 845         assert(self.p)
 846         assert(self.p.poll() == None)
 847         if self.inprogress:
 848             log('_fast_get: opening %r while %r is open'
 849                 % (id, self.inprogress))
 850         assert(not self.inprogress)
 851         assert(id.find('\n') < 0)
 852         assert(id.find('\r') < 0)
 853         assert(id[0] != '-')
 854         self.inprogress = id
 855         self.p.stdin.write('%s\n' % id)
 856         hdr = self.p.stdout.readline()
 857         if hdr.endswith(' missing\n'):
 858             raise KeyError('blob %r is missing' % id)
 859         spl = hdr.split(' ')
 860         if len(spl) != 3 or len(spl[0]) != 40:
 861             raise GitError('expected blob, got %r' % spl)
 862         (hex, type, size) = spl
 863
 864         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 865                            onabort = self._abort)
 866         try:
 867             yield type
 868             for blob in it:
 869                 yield blob
 870             assert(self.p.stdout.readline() == '\n')
 871             self.inprogress = None
 872         except Exception, e:
 873             it.abort()
 874             raise
 875
 876     def _slow_get(self, id):
 877         assert(id.find('\n') < 0)
 878         assert(id.find('\r') < 0)
 879         assert(id[0] != '-')
 880         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 881         yield type
 882
 883         p = subprocess.Popen(['git', 'cat-file', type, id],
 884                              stdout=subprocess.PIPE,
 885                              preexec_fn = _gitenv)
 886         for blob in chunkyreader(p.stdout):
 887             yield blob
 888         _git_wait('git cat-file', p)
 889
 890     def _join(self, it):
 891         type = it.next()
 892         if type == 'blob':
 893             for blob in it:
 894                 yield blob
 895         elif type == 'tree':
 896             treefile = ''.join(it)
 897             for (mode, name, sha) in treeparse(treefile):
 898                 for blob in self.join(sha.encode('hex')):
 899                     yield blob
 900         elif type == 'commit':
 901             treeline = ''.join(it).split('\n')[0]
 902             assert(treeline.startswith('tree '))
 903             for blob in self.join(treeline[5:]):
 904                 yield blob
 905         else:
 906             raise GitError('invalid object type %r: expected blob/tree/commit'
 907                            % type)
 908
 909     def join(self, id):
 910         """Generate a list of the content of all blobs that can be reached
 911         from an object.  The hash given in 'id' must point to a blob, a tree
 912         or a commit. The content of all blobs that can be seen from trees or
 913         commits will be added to the list.
 914         """
 915         try:
 916             for d in self._join(self.get(id)):
 917                 yield d
 918         except StopIteration:
 919             log('booger!\n')