lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile
   6 import heapq
   7 from bup.helpers import *
   8 from bup import _helpers
   9
  10 MIDX_VERSION = 2
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 home_repodir = os.path.expanduser('~/.bup')
  15 repodir = None
  16
  17 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  18 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  19
  20 _total_searches = 0
  21 _total_steps = 0
  22
  23
  24 class GitError(Exception):
  25     pass
  26
  27
  28 def repo(sub = ''):
  29     """Get the path to the git repository or one of its subdirectories."""
  30     global repodir
  31     if not repodir:
  32         raise GitError('You should call check_repo_or_die()')
  33
  34     # If there's a .git subdirectory, then the actual repo is in there.
  35     gd = os.path.join(repodir, '.git')
  36     if os.path.exists(gd):
  37         repodir = gd
  38
  39     return os.path.join(repodir, sub)
  40
  41
  42 def mangle_name(name, mode, gitmode):
  43     """Mangle a file name to present an abstract name for segmented files.
  44     Mangled file names will have the ".bup" extension added to them. If a
  45     file's name already ends with ".bup", a ".bupl" extension is added to
  46     disambiguate normal files from semgmented ones.
  47     """
  48     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  49         return name + '.bup'
  50     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  51         return name + '.bupl'
  52     else:
  53         return name
  54
  55
  56 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  57 def demangle_name(name):
  58     """Remove name mangling from a file name, if necessary.
  59
  60     The return value is a tuple (demangled_filename,mode), where mode is one of
  61     the following:
  62
  63     * BUP_NORMAL  : files that should be read as-is from the repository
  64     * BUP_CHUNKED : files that were chunked and need to be assembled
  65
  66     For more information on the name mangling algorythm, see mangle_name()
  67     """
  68     if name.endswith('.bupl'):
  69         return (name[:-5], BUP_NORMAL)
  70     elif name.endswith('.bup'):
  71         return (name[:-4], BUP_CHUNKED)
  72     else:
  73         return (name, BUP_NORMAL)
  74
  75
  76 def _encode_packobj(type, content):
  77     szout = ''
  78     sz = len(content)
  79     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  80     sz >>= 4
  81     while 1:
  82         if sz: szbits |= 0x80
  83         szout += chr(szbits)
  84         if not sz:
  85             break
  86         szbits = sz & 0x7f
  87         sz >>= 7
  88     z = zlib.compressobj(1)
  89     yield szout
  90     yield z.compress(content)
  91     yield z.flush()
  92
  93
  94 def _encode_looseobj(type, content):
  95     z = zlib.compressobj(1)
  96     yield z.compress('%s %d\0' % (type, len(content)))
  97     yield z.compress(content)
  98     yield z.flush()
  99
 100
 101 def _decode_looseobj(buf):
 102     assert(buf);
 103     s = zlib.decompress(buf)
 104     i = s.find('\0')
 105     assert(i > 0)
 106     l = s[:i].split(' ')
 107     type = l[0]
 108     sz = int(l[1])
 109     content = s[i+1:]
 110     assert(type in _typemap)
 111     assert(sz == len(content))
 112     return (type, content)
 113
 114
 115 def _decode_packobj(buf):
 116     assert(buf)
 117     c = ord(buf[0])
 118     type = _typermap[(c & 0x70) >> 4]
 119     sz = c & 0x0f
 120     shift = 4
 121     i = 0
 122     while c & 0x80:
 123         i += 1
 124         c = ord(buf[i])
 125         sz |= (c & 0x7f) << shift
 126         shift += 7
 127         if not (c & 0x80):
 128             break
 129     return (type, zlib.decompress(buf[i+1:]))
 130
 131
 132 class PackIdx:
 133     """Object representation of a Git pack index file."""
 134     def __init__(self, filename):
 135         self.name = filename
 136         self.idxnames = [self.name]
 137         self.map = mmap_read(open(filename))
 138         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 139         self.fanout = list(struct.unpack('!256I',
 140                                          str(buffer(self.map, 8, 256*4))))
 141         self.fanout.append(0)  # entry "-1"
 142         nsha = self.fanout[255]
 143         self.ofstable = buffer(self.map,
 144                                8 + 256*4 + nsha*20 + nsha*4,
 145                                nsha*4)
 146         self.ofs64table = buffer(self.map,
 147                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 148
 149     def _ofs_from_idx(self, idx):
 150         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 151         if ofs & 0x80000000:
 152             idx64 = ofs & 0x7fffffff
 153             ofs = struct.unpack('!I',
 154                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 155         return ofs
 156
 157     def _idx_from_hash(self, hash):
 158         global _total_searches, _total_steps
 159         _total_searches += 1
 160         assert(len(hash) == 20)
 161         b1 = ord(hash[0])
 162         start = self.fanout[b1-1] # range -1..254
 163         end = self.fanout[b1] # range 0..255
 164         buf = buffer(self.map, 8 + 256*4, end*20)
 165         want = str(hash)
 166         _total_steps += 1  # lookup table is a step
 167         while start < end:
 168             _total_steps += 1
 169             mid = start + (end-start)/2
 170             v = str(buf[mid*20:(mid+1)*20])
 171             if v < want:
 172                 start = mid+1
 173             elif v > want:
 174                 end = mid
 175             else: # got it!
 176                 return mid
 177         return None
 178
 179     def find_offset(self, hash):
 180         """Get the offset of an object inside the index file."""
 181         idx = self._idx_from_hash(hash)
 182         if idx != None:
 183             return self._ofs_from_idx(idx)
 184         return None
 185
 186     def exists(self, hash):
 187         """Return nonempty if the object exists in this index."""
 188         return hash and (self._idx_from_hash(hash) != None) and True or None
 189
 190     def __iter__(self):
 191         for i in xrange(self.fanout[255]):
 192             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 193
 194     def __len__(self):
 195         return int(self.fanout[255])
 196
 197
 198 extract_bits = _helpers.extract_bits
 199
 200
 201 class PackMidx:
 202     """Wrapper which contains data from multiple index files.
 203     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 204     and make it possible for bup to expand Git's indexing capabilities to vast
 205     amounts of files.
 206     """
 207     def __init__(self, filename):
 208         self.name = filename
 209         self.force_keep = False
 210         assert(filename.endswith('.midx'))
 211         self.map = mmap_read(open(filename))
 212         if str(self.map[0:4]) != 'MIDX':
 213             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 214             self.force_keep = True
 215             return self._init_failed()
 216         ver = struct.unpack('!I', self.map[4:8])[0]
 217         if ver < MIDX_VERSION:
 218             log('Warning: ignoring old-style (v%d) midx %r\n'
 219                 % (ver, filename))
 220             self.force_keep = False  # old stuff is boring
 221             return self._init_failed()
 222         if ver > MIDX_VERSION:
 223             log('Warning: ignoring too-new (v%d) midx %r\n'
 224                 % (ver, filename))
 225             self.force_keep = True  # new stuff is exciting
 226             return self._init_failed()
 227
 228         self.bits = _helpers.firstword(self.map[8:12])
 229         self.entries = 2**self.bits
 230         self.fanout = buffer(self.map, 12, self.entries*4)
 231         shaofs = 12 + self.entries*4
 232         nsha = self._fanget(self.entries-1)
 233         self.shalist = buffer(self.map, shaofs, nsha*20)
 234         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 235
 236     def _init_failed(self):
 237         self.bits = 0
 238         self.entries = 1
 239         self.fanout = buffer('\0\0\0\0')
 240         self.shalist = buffer('\0'*20)
 241         self.idxnames = []
 242
 243     def _fanget(self, i):
 244         start = i*4
 245         s = self.fanout[start:start+4]
 246         return _helpers.firstword(s)
 247
 248     def _get(self, i):
 249         return str(self.shalist[i*20:(i+1)*20])
 250
 251     def exists(self, hash):
 252         """Return nonempty if the object exists in the index files."""
 253         global _total_searches, _total_steps
 254         _total_searches += 1
 255         want = str(hash)
 256         el = extract_bits(want, self.bits)
 257         if el:
 258             start = self._fanget(el-1)
 259             startv = el << (32-self.bits)
 260         else:
 261             start = 0
 262             startv = 0
 263         end = self._fanget(el)
 264         endv = (el+1) << (32-self.bits)
 265         _total_steps += 1   # lookup table is a step
 266         hashv = _helpers.firstword(hash)
 267         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 268         while start < end:
 269             _total_steps += 1
 270             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 271             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 272             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 273             v = self._get(mid)
 274             #print '    %08x' % self._num(v)
 275             if v < want:
 276                 start = mid+1
 277                 startv = _helpers.firstword(v)
 278             elif v > want:
 279                 end = mid
 280                 endv = _helpers.firstword(v)
 281             else: # got it!
 282                 return True
 283         return None
 284
 285     def __iter__(self):
 286         for i in xrange(self._fanget(self.entries-1)):
 287             yield buffer(self.shalist, i*20, 20)
 288
 289     def __len__(self):
 290         return int(self._fanget(self.entries-1))
 291
 292
 293 _mpi_count = 0
 294 class PackIdxList:
 295     def __init__(self, dir):
 296         global _mpi_count
 297         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 298         _mpi_count += 1
 299         self.dir = dir
 300         self.also = {}
 301         self.packs = []
 302         self.refresh()
 303
 304     def __del__(self):
 305         global _mpi_count
 306         _mpi_count -= 1
 307         assert(_mpi_count == 0)
 308
 309     def __iter__(self):
 310         return iter(idxmerge(self.packs))
 311
 312     def __len__(self):
 313         return sum(len(pack) for pack in self.packs)
 314
 315     def exists(self, hash):
 316         """Return nonempty if the object exists in the index files."""
 317         global _total_searches
 318         _total_searches += 1
 319         if hash in self.also:
 320             return True
 321         for i in range(len(self.packs)):
 322             p = self.packs[i]
 323             _total_searches -= 1  # will be incremented by sub-pack
 324             if p.exists(hash):
 325                 # reorder so most recently used packs are searched first
 326                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 327                 return p.name
 328         return None
 329
 330     def refresh(self, skip_midx = False):
 331         """Refresh the index list.
 332         This method verifies if .midx files were superseded (e.g. all of its
 333         contents are in another, bigger .midx file) and removes the superseded
 334         files.
 335
 336         If skip_midx is True, all work on .midx files will be skipped and .midx
 337         files will be removed from the list.
 338
 339         The module-global variable 'ignore_midx' can force this function to
 340         always act as if skip_midx was True.
 341         """
 342         skip_midx = skip_midx or ignore_midx
 343         d = dict((p.name, p) for p in self.packs
 344                  if not skip_midx or not isinstance(p, PackMidx))
 345         if os.path.exists(self.dir):
 346             if not skip_midx:
 347                 midxl = []
 348                 for ix in self.packs:
 349                     if isinstance(ix, PackMidx):
 350                         for name in ix.idxnames:
 351                             d[os.path.join(self.dir, name)] = ix
 352                 for f in os.listdir(self.dir):
 353                     full = os.path.join(self.dir, f)
 354                     if f.endswith('.midx') and not d.get(full):
 355                         mx = PackMidx(full)
 356                         (mxd, mxf) = os.path.split(mx.name)
 357                         broken = 0
 358                         for n in mx.idxnames:
 359                             if not os.path.exists(os.path.join(mxd, n)):
 360                                 log(('warning: index %s missing\n' +
 361                                     '  used by %s\n') % (n, mxf))
 362                                 broken += 1
 363                         if not broken:
 364                             midxl.append(mx)
 365                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 366                 for ix in midxl:
 367                     any = 0
 368                     for sub in ix.idxnames:
 369                         found = d.get(os.path.join(self.dir, sub))
 370                         if not found or isinstance(found, PackIdx):
 371                             # doesn't exist, or exists but not in a midx
 372                             d[ix.name] = ix
 373                             for name in ix.idxnames:
 374                                 d[os.path.join(self.dir, name)] = ix
 375                             any += 1
 376                             break
 377                     if not any and not ix.force_keep:
 378                         log('midx: removing redundant: %s\n'
 379                             % os.path.basename(ix.name))
 380                         unlink(ix.name)
 381             for f in os.listdir(self.dir):
 382                 full = os.path.join(self.dir, f)
 383                 if f.endswith('.idx') and not d.get(full):
 384                     ix = PackIdx(full)
 385                     d[full] = ix
 386             self.packs = list(set(d.values()))
 387         log('PackIdxList: using %d index%s.\n'
 388             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 389
 390     def add(self, hash):
 391         """Insert an additional object in the list."""
 392         self.also[hash] = 1
 393
 394     def zap_also(self):
 395         """Remove all additional objects from the list."""
 396         self.also = {}
 397
 398
 399 def calc_hash(type, content):
 400     """Calculate some content's hash in the Git fashion."""
 401     header = '%s %d\0' % (type, len(content))
 402     sum = Sha1(header)
 403     sum.update(content)
 404     return sum.digest()
 405
 406
 407 def _shalist_sort_key(ent):
 408     (mode, name, id) = ent
 409     if stat.S_ISDIR(int(mode, 8)):
 410         return name + '/'
 411     else:
 412         return name
 413
 414
 415 def open_idx(filename):
 416     if filename.endswith('.idx'):
 417         return PackIdx(filename)
 418     elif filename.endswith('.midx'):
 419         return PackMidx(filename)
 420     else:
 421         raise GitError('idx filenames must end with .idx or .midx')
 422
 423
 424 def idxmerge(idxlist):
 425     """Generate a list of all the objects reachable in a PackIdxList."""
 426     total = sum(len(i) for i in idxlist)
 427     iters = (iter(i) for i in idxlist)
 428     heap = [(next(it), it) for it in iters]
 429     heapq.heapify(heap)
 430     count = 0
 431     last = None
 432     while heap:
 433         if (count % 10024) == 0:
 434             progress('Reading indexes: %.2f%% (%d/%d)\r'
 435                      % (count*100.0/total, count, total))
 436         (e, it) = heap[0]
 437         if e != last:
 438             yield e
 439             last = e
 440         count += 1
 441         e = next(it)
 442         if e:
 443             heapq.heapreplace(heap, (e, it))
 444         else:
 445             heapq.heappop(heap)
 446     log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 447
 448
 449 class PackWriter:
 450     """Writes Git objects insid a pack file."""
 451     def __init__(self, objcache_maker=None):
 452         self.count = 0
 453         self.outbytes = 0
 454         self.filename = None
 455         self.file = None
 456         self.objcache_maker = objcache_maker
 457         self.objcache = None
 458
 459     def __del__(self):
 460         self.close()
 461
 462     def _make_objcache(self):
 463         if self.objcache == None:
 464             if self.objcache_maker:
 465                 self.objcache = self.objcache_maker()
 466             else:
 467                 self.objcache = PackIdxList(repo('objects/pack'))
 468
 469     def _open(self):
 470         if not self.file:
 471             self._make_objcache()
 472             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 473             self.file = os.fdopen(fd, 'w+b')
 474             assert(name.endswith('.pack'))
 475             self.filename = name[:-5]
 476             self.file.write('PACK\0\0\0\2\0\0\0\0')
 477
 478     def _raw_write(self, datalist):
 479         self._open()
 480         f = self.file
 481         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 482         # the file never has a *partial* blob.  So let's make sure it's
 483         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 484         # to our hashsplit algorithm.)  f.write() does its own buffering,
 485         # but that's okay because we'll flush it in _end().
 486         oneblob = ''.join(datalist)
 487         f.write(oneblob)
 488         self.outbytes += len(oneblob)
 489         self.count += 1
 490
 491     def _write(self, bin, type, content):
 492         if verbose:
 493             log('>')
 494         self._raw_write(_encode_packobj(type, content))
 495         return bin
 496
 497     def breakpoint(self):
 498         """Clear byte and object counts and return the last processed id."""
 499         id = self._end()
 500         self.outbytes = self.count = 0
 501         return id
 502
 503     def write(self, type, content):
 504         """Write an object in this pack file."""
 505         return self._write(calc_hash(type, content), type, content)
 506
 507     def exists(self, id):
 508         """Return non-empty if an object is found in the object cache."""
 509         if not self.objcache:
 510             self._make_objcache()
 511         return self.objcache.exists(id)
 512
 513     def maybe_write(self, type, content):
 514         """Write an object to the pack file if not present and return its id."""
 515         bin = calc_hash(type, content)
 516         if not self.exists(bin):
 517             self._write(bin, type, content)
 518             self.objcache.add(bin)
 519         return bin
 520
 521     def new_blob(self, blob):
 522         """Create a blob object in the pack with the supplied content."""
 523         return self.maybe_write('blob', blob)
 524
 525     def new_tree(self, shalist):
 526         """Create a tree object in the pack."""
 527         shalist = sorted(shalist, key = _shalist_sort_key)
 528         l = []
 529         for (mode,name,bin) in shalist:
 530             assert(mode)
 531             assert(mode != '0')
 532             assert(mode[0] != '0')
 533             assert(name)
 534             assert(len(bin) == 20)
 535             l.append('%s %s\0%s' % (mode,name,bin))
 536         return self.maybe_write('tree', ''.join(l))
 537
 538     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 539         l = []
 540         if tree: l.append('tree %s' % tree.encode('hex'))
 541         if parent: l.append('parent %s' % parent.encode('hex'))
 542         if author: l.append('author %s %s' % (author, _git_date(adate)))
 543         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 544         l.append('')
 545         l.append(msg)
 546         return self.maybe_write('commit', '\n'.join(l))
 547
 548     def new_commit(self, parent, tree, msg):
 549         """Create a commit object in the pack."""
 550         now = time.time()
 551         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 552         commit = self._new_commit(tree, parent,
 553                                   userline, now, userline, now,
 554                                   msg)
 555         return commit
 556
 557     def abort(self):
 558         """Remove the pack file from disk."""
 559         f = self.file
 560         if f:
 561             self.file = None
 562             f.close()
 563             os.unlink(self.filename + '.pack')
 564
 565     def _end(self):
 566         f = self.file
 567         if not f: return None
 568         self.file = None
 569         self.objcache = None
 570
 571         # update object count
 572         f.seek(8)
 573         cp = struct.pack('!i', self.count)
 574         assert(len(cp) == 4)
 575         f.write(cp)
 576
 577         # calculate the pack sha1sum
 578         f.seek(0)
 579         sum = Sha1()
 580         while 1:
 581             b = f.read(65536)
 582             sum.update(b)
 583             if not b: break
 584         f.write(sum.digest())
 585
 586         f.close()
 587
 588         p = subprocess.Popen(['git', 'index-pack', '-v',
 589                               '--index-version=2',
 590                               self.filename + '.pack'],
 591                              preexec_fn = _gitenv,
 592                              stdout = subprocess.PIPE)
 593         out = p.stdout.read().strip()
 594         _git_wait('git index-pack', p)
 595         if not out:
 596             raise GitError('git index-pack produced no output')
 597         nameprefix = repo('objects/pack/%s' % out)
 598         if os.path.exists(self.filename + '.map'):
 599             os.unlink(self.filename + '.map')
 600         os.rename(self.filename + '.pack', nameprefix + '.pack')
 601         os.rename(self.filename + '.idx', nameprefix + '.idx')
 602         return nameprefix
 603
 604     def close(self):
 605         """Close the pack file and move it to its definitive path."""
 606         return self._end()
 607
 608
 609 def _git_date(date):
 610     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 611
 612
 613 def _gitenv():
 614     os.environ['GIT_DIR'] = os.path.abspath(repo())
 615
 616
 617 def list_refs(refname = None):
 618     """Generate a list of tuples in the form (refname,hash).
 619     If a ref name is specified, list only this particular ref.
 620     """
 621     argv = ['git', 'show-ref', '--']
 622     if refname:
 623         argv += [refname]
 624     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 625     out = p.stdout.read().strip()
 626     rv = p.wait()  # not fatal
 627     if rv:
 628         assert(not out)
 629     if out:
 630         for d in out.split('\n'):
 631             (sha, name) = d.split(' ', 1)
 632             yield (name, sha.decode('hex'))
 633
 634
 635 def read_ref(refname):
 636     """Get the commit id of the most recent commit made on a given ref."""
 637     l = list(list_refs(refname))
 638     if l:
 639         assert(len(l) == 1)
 640         return l[0][1]
 641     else:
 642         return None
 643
 644
 645 def rev_list(ref, count=None):
 646     """Generate a list of reachable commits in reverse chronological order.
 647
 648     This generator walks through commits, from child to parent, that are
 649     reachable via the specified ref and yields a series of tuples of the form
 650     (date,hash).
 651
 652     If count is a non-zero integer, limit the number of commits to "count"
 653     objects.
 654     """
 655     assert(not ref.startswith('-'))
 656     opts = []
 657     if count:
 658         opts += ['-n', str(atoi(count))]
 659     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 660     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 661     commit = None
 662     for row in p.stdout:
 663         s = row.strip()
 664         if s.startswith('commit '):
 665             commit = s[7:].decode('hex')
 666         else:
 667             date = int(s)
 668             yield (date, commit)
 669     rv = p.wait()  # not fatal
 670     if rv:
 671         raise GitError, 'git rev-list returned error %d' % rv
 672
 673
 674 def rev_get_date(ref):
 675     """Get the date of the latest commit on the specified ref."""
 676     for (date, commit) in rev_list(ref, count=1):
 677         return date
 678     raise GitError, 'no such commit %r' % ref
 679
 680
 681 def update_ref(refname, newval, oldval):
 682     """Change the commit pointed to by a branch."""
 683     if not oldval:
 684         oldval = ''
 685     assert(refname.startswith('refs/heads/'))
 686     p = subprocess.Popen(['git', 'update-ref', refname,
 687                           newval.encode('hex'), oldval.encode('hex')],
 688                          preexec_fn = _gitenv)
 689     _git_wait('git update-ref', p)
 690
 691
 692 def guess_repo(path=None):
 693     """Set the path value in the global variable "repodir".
 694     This makes bup look for an existing bup repository, but not fail if a
 695     repository doesn't exist. Usually, if you are interacting with a bup
 696     repository, you would not be calling this function but using
 697     check_repo_or_die().
 698     """
 699     global repodir
 700     if path:
 701         repodir = path
 702     if not repodir:
 703         repodir = os.environ.get('BUP_DIR')
 704         if not repodir:
 705             repodir = os.path.expanduser('~/.bup')
 706
 707
 708 def init_repo(path=None):
 709     """Create the Git bare repository for bup in a given path."""
 710     guess_repo(path)
 711     d = repo()
 712     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 713         raise GitError('"%d" exists but is not a directory\n' % d)
 714     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 715                          preexec_fn = _gitenv)
 716     _git_wait('git init', p)
 717     # Force the index version configuration in order to ensure bup works
 718     # regardless of the version of the installed Git binary.
 719     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 720                          stdout=sys.stderr, preexec_fn = _gitenv)
 721     _git_wait('git config', p)
 722
 723
 724 def check_repo_or_die(path=None):
 725     """Make sure a bup repository exists, and abort if not.
 726     If the path to a particular repository was not specified, this function
 727     initializes the default repository automatically.
 728     """
 729     guess_repo(path)
 730     if not os.path.isdir(repo('objects/pack/.')):
 731         if repodir == home_repodir:
 732             init_repo()
 733         else:
 734             log('error: %r is not a bup/git repository\n' % repo())
 735             sys.exit(15)
 736
 737
 738 def treeparse(buf):
 739     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 740     ofs = 0
 741     while ofs < len(buf):
 742         z = buf[ofs:].find('\0')
 743         assert(z > 0)
 744         spl = buf[ofs:ofs+z].split(' ', 1)
 745         assert(len(spl) == 2)
 746         sha = buf[ofs+z+1:ofs+z+1+20]
 747         ofs += z+1+20
 748         yield (spl[0], spl[1], sha)
 749
 750
 751 _ver = None
 752 def ver():
 753     """Get Git's version and ensure a usable version is installed.
 754
 755     The returned version is formatted as an ordered tuple with each position
 756     representing a digit in the version tag. For example, the following tuple
 757     would represent version 1.6.6.9:
 758
 759         ('1', '6', '6', '9')
 760     """
 761     global _ver
 762     if not _ver:
 763         p = subprocess.Popen(['git', '--version'],
 764                              stdout=subprocess.PIPE)
 765         gvs = p.stdout.read()
 766         _git_wait('git --version', p)
 767         m = re.match(r'git version (\S+.\S+)', gvs)
 768         if not m:
 769             raise GitError('git --version weird output: %r' % gvs)
 770         _ver = tuple(m.group(1).split('.'))
 771     needed = ('1','5', '3', '1')
 772     if _ver < needed:
 773         raise GitError('git version %s or higher is required; you have %s'
 774                        % ('.'.join(needed), '.'.join(_ver)))
 775     return _ver
 776
 777
 778 def _git_wait(cmd, p):
 779     rv = p.wait()
 780     if rv != 0:
 781         raise GitError('%s returned %d' % (cmd, rv))
 782
 783
 784 def _git_capture(argv):
 785     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 786     r = p.stdout.read()
 787     _git_wait(repr(argv), p)
 788     return r
 789
 790
 791 class _AbortableIter:
 792     def __init__(self, it, onabort = None):
 793         self.it = it
 794         self.onabort = onabort
 795         self.done = None
 796
 797     def __iter__(self):
 798         return self
 799
 800     def next(self):
 801         try:
 802             return self.it.next()
 803         except StopIteration, e:
 804             self.done = True
 805             raise
 806         except:
 807             self.abort()
 808             raise
 809
 810     def abort(self):
 811         """Abort iteration and call the abortion callback, if needed."""
 812         if not self.done:
 813             self.done = True
 814             if self.onabort:
 815                 self.onabort()
 816
 817     def __del__(self):
 818         self.abort()
 819
 820
 821 _ver_warned = 0
 822 class CatPipe:
 823     """Link to 'git cat-file' that is used to retrieve blob data."""
 824     def __init__(self):
 825         global _ver_warned
 826         wanted = ('1','5','6')
 827         if ver() < wanted:
 828             if not _ver_warned:
 829                 log('warning: git version < %s; bup will be slow.\n'
 830                     % '.'.join(wanted))
 831                 _ver_warned = 1
 832             self.get = self._slow_get
 833         else:
 834             self.p = self.inprogress = None
 835             self.get = self._fast_get
 836
 837     def _abort(self):
 838         if self.p:
 839             self.p.stdout.close()
 840             self.p.stdin.close()
 841         self.p = None
 842         self.inprogress = None
 843
 844     def _restart(self):
 845         self._abort()
 846         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 847                                   stdin=subprocess.PIPE,
 848                                   stdout=subprocess.PIPE,
 849                                   close_fds = True,
 850                                   preexec_fn = _gitenv)
 851
 852     def _fast_get(self, id):
 853         if not self.p or self.p.poll() != None:
 854             self._restart()
 855         assert(self.p)
 856         assert(self.p.poll() == None)
 857         if self.inprogress:
 858             log('_fast_get: opening %r while %r is open'
 859                 % (id, self.inprogress))
 860         assert(not self.inprogress)
 861         assert(id.find('\n') < 0)
 862         assert(id.find('\r') < 0)
 863         assert(id[0] != '-')
 864         self.inprogress = id
 865         self.p.stdin.write('%s\n' % id)
 866         hdr = self.p.stdout.readline()
 867         if hdr.endswith(' missing\n'):
 868             raise KeyError('blob %r is missing' % id)
 869         spl = hdr.split(' ')
 870         if len(spl) != 3 or len(spl[0]) != 40:
 871             raise GitError('expected blob, got %r' % spl)
 872         (hex, type, size) = spl
 873
 874         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 875                            onabort = self._abort)
 876         try:
 877             yield type
 878             for blob in it:
 879                 yield blob
 880             assert(self.p.stdout.readline() == '\n')
 881             self.inprogress = None
 882         except Exception, e:
 883             it.abort()
 884             raise
 885
 886     def _slow_get(self, id):
 887         assert(id.find('\n') < 0)
 888         assert(id.find('\r') < 0)
 889         assert(id[0] != '-')
 890         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 891         yield type
 892
 893         p = subprocess.Popen(['git', 'cat-file', type, id],
 894                              stdout=subprocess.PIPE,
 895                              preexec_fn = _gitenv)
 896         for blob in chunkyreader(p.stdout):
 897             yield blob
 898         _git_wait('git cat-file', p)
 899
 900     def _join(self, it):
 901         type = it.next()
 902         if type == 'blob':
 903             for blob in it:
 904                 yield blob
 905         elif type == 'tree':
 906             treefile = ''.join(it)
 907             for (mode, name, sha) in treeparse(treefile):
 908                 for blob in self.join(sha.encode('hex')):
 909                     yield blob
 910         elif type == 'commit':
 911             treeline = ''.join(it).split('\n')[0]
 912             assert(treeline.startswith('tree '))
 913             for blob in self.join(treeline[5:]):
 914                 yield blob
 915         else:
 916             raise GitError('invalid object type %r: expected blob/tree/commit'
 917                            % type)
 918
 919     def join(self, id):
 920         """Generate a list of the content of all blobs that can be reached
 921         from an object.  The hash given in 'id' must point to a blob, a tree
 922         or a commit. The content of all blobs that can be seen from trees or
 923         commits will be added to the list.
 924         """
 925         try:
 926             for d in self._join(self.get(id)):
 927                 yield d
 928         except StopIteration:
 929             log('booger!\n')