lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile, heapq
   6 from bup.helpers import *
   7 from bup import _helpers
   8
   9 MIDX_VERSION = 2
  10
  11 verbose = 0
  12 ignore_midx = 0
  13 home_repodir = os.path.expanduser('~/.bup')
  14 repodir = None
  15
  16 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  18
  19 _total_searches = 0
  20 _total_steps = 0
  21
  22
  23 class GitError(Exception):
  24     pass
  25
  26
  27 def repo(sub = ''):
  28     """Get the path to the git repository or one of its subdirectories."""
  29     global repodir
  30     if not repodir:
  31         raise GitError('You should call check_repo_or_die()')
  32
  33     # If there's a .git subdirectory, then the actual repo is in there.
  34     gd = os.path.join(repodir, '.git')
  35     if os.path.exists(gd):
  36         repodir = gd
  37
  38     return os.path.join(repodir, sub)
  39
  40
  41 def auto_midx(objdir):
  42     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  43     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  44     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  45     if rv:
  46         add_error('%r: returned %d' % (args, rv))
  47
  48
  49 def mangle_name(name, mode, gitmode):
  50     """Mangle a file name to present an abstract name for segmented files.
  51     Mangled file names will have the ".bup" extension added to them. If a
  52     file's name already ends with ".bup", a ".bupl" extension is added to
  53     disambiguate normal files from semgmented ones.
  54     """
  55     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  56         return name + '.bup'
  57     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  58         return name + '.bupl'
  59     else:
  60         return name
  61
  62
  63 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  64 def demangle_name(name):
  65     """Remove name mangling from a file name, if necessary.
  66
  67     The return value is a tuple (demangled_filename,mode), where mode is one of
  68     the following:
  69
  70     * BUP_NORMAL  : files that should be read as-is from the repository
  71     * BUP_CHUNKED : files that were chunked and need to be assembled
  72
  73     For more information on the name mangling algorythm, see mangle_name()
  74     """
  75     if name.endswith('.bupl'):
  76         return (name[:-5], BUP_NORMAL)
  77     elif name.endswith('.bup'):
  78         return (name[:-4], BUP_CHUNKED)
  79     else:
  80         return (name, BUP_NORMAL)
  81
  82
  83 def _encode_packobj(type, content):
  84     szout = ''
  85     sz = len(content)
  86     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  87     sz >>= 4
  88     while 1:
  89         if sz: szbits |= 0x80
  90         szout += chr(szbits)
  91         if not sz:
  92             break
  93         szbits = sz & 0x7f
  94         sz >>= 7
  95     z = zlib.compressobj(1)
  96     yield szout
  97     yield z.compress(content)
  98     yield z.flush()
  99
 100
 101 def _encode_looseobj(type, content):
 102     z = zlib.compressobj(1)
 103     yield z.compress('%s %d\0' % (type, len(content)))
 104     yield z.compress(content)
 105     yield z.flush()
 106
 107
 108 def _decode_looseobj(buf):
 109     assert(buf);
 110     s = zlib.decompress(buf)
 111     i = s.find('\0')
 112     assert(i > 0)
 113     l = s[:i].split(' ')
 114     type = l[0]
 115     sz = int(l[1])
 116     content = s[i+1:]
 117     assert(type in _typemap)
 118     assert(sz == len(content))
 119     return (type, content)
 120
 121
 122 def _decode_packobj(buf):
 123     assert(buf)
 124     c = ord(buf[0])
 125     type = _typermap[(c & 0x70) >> 4]
 126     sz = c & 0x0f
 127     shift = 4
 128     i = 0
 129     while c & 0x80:
 130         i += 1
 131         c = ord(buf[i])
 132         sz |= (c & 0x7f) << shift
 133         shift += 7
 134         if not (c & 0x80):
 135             break
 136     return (type, zlib.decompress(buf[i+1:]))
 137
 138
 139 class PackIdx:
 140     def __init__(self):
 141         assert(0)
 142
 143     def find_offset(self, hash):
 144         """Get the offset of an object inside the index file."""
 145         idx = self._idx_from_hash(hash)
 146         if idx != None:
 147             return self._ofs_from_idx(idx)
 148         return None
 149
 150     def exists(self, hash):
 151         """Return nonempty if the object exists in this index."""
 152         return hash and (self._idx_from_hash(hash) != None) and True or None
 153
 154     def __len__(self):
 155         return int(self.fanout[255])
 156
 157     def _idx_from_hash(self, hash):
 158         global _total_searches, _total_steps
 159         _total_searches += 1
 160         assert(len(hash) == 20)
 161         b1 = ord(hash[0])
 162         start = self.fanout[b1-1] # range -1..254
 163         end = self.fanout[b1] # range 0..255
 164         want = str(hash)
 165         _total_steps += 1  # lookup table is a step
 166         while start < end:
 167             _total_steps += 1
 168             mid = start + (end-start)/2
 169             v = self._idx_to_hash(mid)
 170             if v < want:
 171                 start = mid+1
 172             elif v > want:
 173                 end = mid
 174             else: # got it!
 175                 return mid
 176         return None
 177
 178
 179 class PackIdxV1(PackIdx):
 180     """Object representation of a Git pack index (version 1) file."""
 181     def __init__(self, filename, f):
 182         self.name = filename
 183         self.idxnames = [self.name]
 184         self.map = mmap_read(f)
 185         self.fanout = list(struct.unpack('!256I',
 186                                          str(buffer(self.map, 0, 256*4))))
 187         self.fanout.append(0)  # entry "-1"
 188         nsha = self.fanout[255]
 189         self.shatable = buffer(self.map, 256*4, nsha*24)
 190
 191     def _ofs_from_idx(self, idx):
 192         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 193
 194     def _idx_to_hash(self, idx):
 195         return str(self.shatable[idx*24+4 : idx*24+24])
 196
 197     def __iter__(self):
 198         for i in xrange(self.fanout[255]):
 199             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 200
 201
 202 class PackIdxV2(PackIdx):
 203     """Object representation of a Git pack index (version 2) file."""
 204     def __init__(self, filename, f):
 205         self.name = filename
 206         self.idxnames = [self.name]
 207         self.map = mmap_read(f)
 208         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 209         self.fanout = list(struct.unpack('!256I',
 210                                          str(buffer(self.map, 8, 256*4))))
 211         self.fanout.append(0)  # entry "-1"
 212         nsha = self.fanout[255]
 213         self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
 214         self.ofstable = buffer(self.map,
 215                                8 + 256*4 + nsha*20 + nsha*4,
 216                                nsha*4)
 217         self.ofs64table = buffer(self.map,
 218                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 219
 220     def _ofs_from_idx(self, idx):
 221         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 222         if ofs & 0x80000000:
 223             idx64 = ofs & 0x7fffffff
 224             ofs = struct.unpack('!Q',
 225                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 226         return ofs
 227
 228     def _idx_to_hash(self, idx):
 229         return str(self.shatable[idx*20:(idx+1)*20])
 230
 231     def __iter__(self):
 232         for i in xrange(self.fanout[255]):
 233             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 234
 235
 236 extract_bits = _helpers.extract_bits
 237
 238
 239 class PackMidx:
 240     """Wrapper which contains data from multiple index files.
 241     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 242     and make it possible for bup to expand Git's indexing capabilities to vast
 243     amounts of files.
 244     """
 245     def __init__(self, filename):
 246         self.name = filename
 247         self.force_keep = False
 248         assert(filename.endswith('.midx'))
 249         self.map = mmap_read(open(filename))
 250         if str(self.map[0:4]) != 'MIDX':
 251             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 252             self.force_keep = True
 253             return self._init_failed()
 254         ver = struct.unpack('!I', self.map[4:8])[0]
 255         if ver < MIDX_VERSION:
 256             log('Warning: ignoring old-style (v%d) midx %r\n'
 257                 % (ver, filename))
 258             self.force_keep = False  # old stuff is boring
 259             return self._init_failed()
 260         if ver > MIDX_VERSION:
 261             log('Warning: ignoring too-new (v%d) midx %r\n'
 262                 % (ver, filename))
 263             self.force_keep = True  # new stuff is exciting
 264             return self._init_failed()
 265
 266         self.bits = _helpers.firstword(self.map[8:12])
 267         self.entries = 2**self.bits
 268         self.fanout = buffer(self.map, 12, self.entries*4)
 269         shaofs = 12 + self.entries*4
 270         nsha = self._fanget(self.entries-1)
 271         self.shalist = buffer(self.map, shaofs, nsha*20)
 272         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 273
 274     def _init_failed(self):
 275         self.bits = 0
 276         self.entries = 1
 277         self.fanout = buffer('\0\0\0\0')
 278         self.shalist = buffer('\0'*20)
 279         self.idxnames = []
 280
 281     def _fanget(self, i):
 282         start = i*4
 283         s = self.fanout[start:start+4]
 284         return _helpers.firstword(s)
 285
 286     def _get(self, i):
 287         return str(self.shalist[i*20:(i+1)*20])
 288
 289     def exists(self, hash):
 290         """Return nonempty if the object exists in the index files."""
 291         global _total_searches, _total_steps
 292         _total_searches += 1
 293         want = str(hash)
 294         el = extract_bits(want, self.bits)
 295         if el:
 296             start = self._fanget(el-1)
 297             startv = el << (32-self.bits)
 298         else:
 299             start = 0
 300             startv = 0
 301         end = self._fanget(el)
 302         endv = (el+1) << (32-self.bits)
 303         _total_steps += 1   # lookup table is a step
 304         hashv = _helpers.firstword(hash)
 305         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 306         while start < end:
 307             _total_steps += 1
 308             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 309             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 310             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 311             v = self._get(mid)
 312             #print '    %08x' % self._num(v)
 313             if v < want:
 314                 start = mid+1
 315                 startv = _helpers.firstword(v)
 316             elif v > want:
 317                 end = mid
 318                 endv = _helpers.firstword(v)
 319             else: # got it!
 320                 return True
 321         return None
 322
 323     def __iter__(self):
 324         for i in xrange(self._fanget(self.entries-1)):
 325             yield buffer(self.shalist, i*20, 20)
 326
 327     def __len__(self):
 328         return int(self._fanget(self.entries-1))
 329
 330
 331 _mpi_count = 0
 332 class PackIdxList:
 333     def __init__(self, dir):
 334         global _mpi_count
 335         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 336         _mpi_count += 1
 337         self.dir = dir
 338         self.also = {}
 339         self.packs = []
 340         self.refresh()
 341
 342     def __del__(self):
 343         global _mpi_count
 344         _mpi_count -= 1
 345         assert(_mpi_count == 0)
 346
 347     def __iter__(self):
 348         return iter(idxmerge(self.packs))
 349
 350     def __len__(self):
 351         return sum(len(pack) for pack in self.packs)
 352
 353     def exists(self, hash):
 354         """Return nonempty if the object exists in the index files."""
 355         global _total_searches
 356         _total_searches += 1
 357         if hash in self.also:
 358             return True
 359         for i in range(len(self.packs)):
 360             p = self.packs[i]
 361             _total_searches -= 1  # will be incremented by sub-pack
 362             if p.exists(hash):
 363                 # reorder so most recently used packs are searched first
 364                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 365                 return p.name
 366         return None
 367
 368     def refresh(self, skip_midx = False):
 369         """Refresh the index list.
 370         This method verifies if .midx files were superseded (e.g. all of its
 371         contents are in another, bigger .midx file) and removes the superseded
 372         files.
 373
 374         If skip_midx is True, all work on .midx files will be skipped and .midx
 375         files will be removed from the list.
 376
 377         The module-global variable 'ignore_midx' can force this function to
 378         always act as if skip_midx was True.
 379         """
 380         skip_midx = skip_midx or ignore_midx
 381         d = dict((p.name, p) for p in self.packs
 382                  if not skip_midx or not isinstance(p, PackMidx))
 383         if os.path.exists(self.dir):
 384             if not skip_midx:
 385                 midxl = []
 386                 for ix in self.packs:
 387                     if isinstance(ix, PackMidx):
 388                         for name in ix.idxnames:
 389                             d[os.path.join(self.dir, name)] = ix
 390                 for f in os.listdir(self.dir):
 391                     full = os.path.join(self.dir, f)
 392                     if f.endswith('.midx') and not d.get(full):
 393                         mx = PackMidx(full)
 394                         (mxd, mxf) = os.path.split(mx.name)
 395                         broken = 0
 396                         for n in mx.idxnames:
 397                             if not os.path.exists(os.path.join(mxd, n)):
 398                                 log(('warning: index %s missing\n' +
 399                                     '  used by %s\n') % (n, mxf))
 400                                 broken += 1
 401                         if broken:
 402                             del mx
 403                             unlink(full)
 404                         else:
 405                             midxl.append(mx)
 406                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 407                 for ix in midxl:
 408                     any = 0
 409                     for sub in ix.idxnames:
 410                         found = d.get(os.path.join(self.dir, sub))
 411                         if not found or isinstance(found, PackIdx):
 412                             # doesn't exist, or exists but not in a midx
 413                             d[ix.name] = ix
 414                             for name in ix.idxnames:
 415                                 d[os.path.join(self.dir, name)] = ix
 416                             any += 1
 417                             break
 418                     if not any and not ix.force_keep:
 419                         debug1('midx: removing redundant: %s\n'
 420                                % os.path.basename(ix.name))
 421                         unlink(ix.name)
 422             for f in os.listdir(self.dir):
 423                 full = os.path.join(self.dir, f)
 424                 if f.endswith('.idx') and not d.get(full):
 425                     try:
 426                         ix = open_idx(full)
 427                     except GitError, e:
 428                         add_error(e)
 429                         continue
 430                     d[full] = ix
 431             self.packs = list(set(d.values()))
 432         debug1('PackIdxList: using %d index%s.\n'
 433             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 434
 435     def packname_containing(self, hash):
 436         # figure out which pack contains a given hash.
 437         # FIXME: if the midx file format would just *store* this information,
 438         # we could calculate it a lot more efficiently.  But it's not needed
 439         # often, so let's do it like this.
 440         for f in os.listdir(self.dir):
 441             if f.endswith('.idx'):
 442                 full = os.path.join(self.dir, f)
 443                 try:
 444                     ix = open_idx(full)
 445                 except GitError, e:
 446                     add_error(e)
 447                     continue
 448                 if ix.exists(hash):
 449                     return full
 450
 451     def add(self, hash):
 452         """Insert an additional object in the list."""
 453         self.also[hash] = 1
 454
 455     def zap_also(self):
 456         """Remove all additional objects from the list."""
 457         self.also = {}
 458
 459
 460 def calc_hash(type, content):
 461     """Calculate some content's hash in the Git fashion."""
 462     header = '%s %d\0' % (type, len(content))
 463     sum = Sha1(header)
 464     sum.update(content)
 465     return sum.digest()
 466
 467
 468 def _shalist_sort_key(ent):
 469     (mode, name, id) = ent
 470     if stat.S_ISDIR(int(mode, 8)):
 471         return name + '/'
 472     else:
 473         return name
 474
 475
 476 def open_idx(filename):
 477     if filename.endswith('.idx'):
 478         f = open(filename, 'rb')
 479         header = f.read(8)
 480         if header[0:4] == '\377tOc':
 481             version = struct.unpack('!I', header[4:8])[0]
 482             if version == 2:
 483                 return PackIdxV2(filename, f)
 484             else:
 485                 raise GitError('%s: expected idx file version 2, got %d'
 486                                % (filename, version))
 487         elif len(header) == 8 and header[0:4] < '\377tOc':
 488             return PackIdxV1(filename, f)
 489         else:
 490             raise GitError('%s: unrecognized idx file header' % filename)
 491     elif filename.endswith('.midx'):
 492         return PackMidx(filename)
 493     else:
 494         raise GitError('idx filenames must end with .idx or .midx')
 495
 496
 497 def idxmerge(idxlist, final_progress=True):
 498     """Generate a list of all the objects reachable in a PackIdxList."""
 499     total = sum(len(i) for i in idxlist)
 500     iters = (iter(i) for i in idxlist)
 501     heap = [(next(it), it) for it in iters]
 502     heapq.heapify(heap)
 503     count = 0
 504     last = None
 505     while heap:
 506         if (count % 10024) == 0:
 507             progress('Reading indexes: %.2f%% (%d/%d)\r'
 508                      % (count*100.0/total, count, total))
 509         (e, it) = heap[0]
 510         if e != last:
 511             yield e
 512             last = e
 513         count += 1
 514         e = next(it)
 515         if e:
 516             heapq.heapreplace(heap, (e, it))
 517         else:
 518             heapq.heappop(heap)
 519     if final_progress:
 520         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 521
 522
 523 def _make_objcache():
 524     return PackIdxList(repo('objects/pack'))
 525
 526 class PackWriter:
 527     """Writes Git objects insid a pack file."""
 528     def __init__(self, objcache_maker=_make_objcache):
 529         self.count = 0
 530         self.outbytes = 0
 531         self.filename = None
 532         self.file = None
 533         self.idx = None
 534         self.objcache_maker = objcache_maker
 535         self.objcache = None
 536
 537     def __del__(self):
 538         self.close()
 539
 540     def _open(self):
 541         if not self.file:
 542             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 543             self.file = os.fdopen(fd, 'w+b')
 544             assert(name.endswith('.pack'))
 545             self.filename = name[:-5]
 546             self.file.write('PACK\0\0\0\2\0\0\0\0')
 547             self.idx = list(list() for i in xrange(256))
 548
 549     # the 'sha' parameter is used in client.py's _raw_write(), but not needed
 550     # in this basic version.
 551     def _raw_write(self, datalist, sha):
 552         self._open()
 553         f = self.file
 554         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 555         # the file never has a *partial* blob.  So let's make sure it's
 556         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 557         # to our hashsplit algorithm.)  f.write() does its own buffering,
 558         # but that's okay because we'll flush it in _end().
 559         oneblob = ''.join(datalist)
 560         f.write(oneblob)
 561         nw = len(oneblob)
 562         crc = zlib.crc32(oneblob) & 0xffffffff
 563         self._update_idx(sha, crc, nw)
 564         self.outbytes += nw
 565         self.count += 1
 566         return nw, crc
 567
 568     def _update_idx(self, sha, crc, size):
 569         assert(sha)
 570         if self.idx:
 571             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 572
 573     def _write(self, sha, type, content):
 574         if verbose:
 575             log('>')
 576         if not sha:
 577             sha = calc_hash(type, content)
 578         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
 579         return sha
 580
 581     def breakpoint(self):
 582         """Clear byte and object counts and return the last processed id."""
 583         id = self._end()
 584         self.outbytes = self.count = 0
 585         return id
 586
 587     def write(self, type, content):
 588         """Write an object in this pack file."""
 589         return self._write(calc_hash(type, content), type, content)
 590
 591     def _require_objcache(self):
 592         if self.objcache is None and self.objcache_maker:
 593             self.objcache = self.objcache_maker()
 594         if self.objcache is None:
 595             raise GitError(
 596                     "PackWriter not opened or can't check exists w/o objcache")
 597
 598     def exists(self, id):
 599         """Return non-empty if an object is found in the object cache."""
 600         self._require_objcache()
 601         return self.objcache.exists(id)
 602
 603     def maybe_write(self, type, content):
 604         """Write an object to the pack file if not present and return its id."""
 605         self._require_objcache()
 606         sha = calc_hash(type, content)
 607         if not self.exists(sha):
 608             self._write(sha, type, content)
 609             self.objcache.add(sha)
 610         return sha
 611
 612     def new_blob(self, blob):
 613         """Create a blob object in the pack with the supplied content."""
 614         return self.maybe_write('blob', blob)
 615
 616     def new_tree(self, shalist):
 617         """Create a tree object in the pack."""
 618         shalist = sorted(shalist, key = _shalist_sort_key)
 619         l = []
 620         for (mode,name,bin) in shalist:
 621             assert(mode)
 622             assert(mode != '0')
 623             assert(mode[0] != '0')
 624             assert(name)
 625             assert(len(bin) == 20)
 626             l.append('%s %s\0%s' % (mode,name,bin))
 627         return self.maybe_write('tree', ''.join(l))
 628
 629     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 630         l = []
 631         if tree: l.append('tree %s' % tree.encode('hex'))
 632         if parent: l.append('parent %s' % parent.encode('hex'))
 633         if author: l.append('author %s %s' % (author, _git_date(adate)))
 634         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 635         l.append('')
 636         l.append(msg)
 637         return self.maybe_write('commit', '\n'.join(l))
 638
 639     def new_commit(self, parent, tree, date, msg):
 640         """Create a commit object in the pack."""
 641         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 642         commit = self._new_commit(tree, parent,
 643                                   userline, date, userline, date,
 644                                   msg)
 645         return commit
 646
 647     def abort(self):
 648         """Remove the pack file from disk."""
 649         f = self.file
 650         if f:
 651             self.idx = None
 652             self.file = None
 653             f.close()
 654             os.unlink(self.filename + '.pack')
 655
 656     def _end(self, run_midx=True):
 657         f = self.file
 658         if not f: return None
 659         self.file = None
 660         self.objcache = None
 661         idx = self.idx
 662         self.idx = None
 663
 664         # update object count
 665         f.seek(8)
 666         cp = struct.pack('!i', self.count)
 667         assert(len(cp) == 4)
 668         f.write(cp)
 669
 670         # calculate the pack sha1sum
 671         f.seek(0)
 672         sum = Sha1()
 673         for b in chunkyreader(f):
 674             sum.update(b)
 675         packbin = sum.digest()
 676         f.write(packbin)
 677         f.close()
 678
 679         idx_f = open(self.filename + '.idx', 'wb')
 680         obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
 681         idx_f.close()
 682
 683         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 684         if os.path.exists(self.filename + '.map'):
 685             os.unlink(self.filename + '.map')
 686         os.rename(self.filename + '.pack', nameprefix + '.pack')
 687         os.rename(self.filename + '.idx', nameprefix + '.idx')
 688
 689         if run_midx:
 690             auto_midx(repo('objects/pack'))
 691         return nameprefix
 692
 693     def close(self, run_midx=True):
 694         """Close the pack file and move it to its definitive path."""
 695         return self._end(run_midx=run_midx)
 696
 697     def _write_pack_idx_v2(self, file, idx, packbin):
 698         sum = Sha1()
 699
 700         def write(data):
 701             file.write(data)
 702             sum.update(data)
 703
 704         write('\377tOc\0\0\0\2')
 705
 706         n = 0
 707         for part in idx:
 708             n += len(part)
 709             write(struct.pack('!i', n))
 710             part.sort(key=lambda x: x[0])
 711
 712         obj_list_sum = Sha1()
 713         for part in idx:
 714             for entry in part:
 715                 write(entry[0])
 716                 obj_list_sum.update(entry[0])
 717         for part in idx:
 718             for entry in part:
 719                 write(struct.pack('!I', entry[1]))
 720         ofs64_list = []
 721         for part in idx:
 722             for entry in part:
 723                 if entry[2] & 0x80000000:
 724                     write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
 725                     ofs64_list.append(struct.pack('!Q', entry[2]))
 726                 else:
 727                     write(struct.pack('!i', entry[2]))
 728         for ofs64 in ofs64_list:
 729             write(ofs64)
 730
 731         write(packbin)
 732         file.write(sum.digest())
 733         return obj_list_sum.hexdigest()
 734
 735
 736 def _git_date(date):
 737     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 738
 739
 740 def _gitenv():
 741     os.environ['GIT_DIR'] = os.path.abspath(repo())
 742
 743
 744 def list_refs(refname = None):
 745     """Generate a list of tuples in the form (refname,hash).
 746     If a ref name is specified, list only this particular ref.
 747     """
 748     argv = ['git', 'show-ref', '--']
 749     if refname:
 750         argv += [refname]
 751     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 752     out = p.stdout.read().strip()
 753     rv = p.wait()  # not fatal
 754     if rv:
 755         assert(not out)
 756     if out:
 757         for d in out.split('\n'):
 758             (sha, name) = d.split(' ', 1)
 759             yield (name, sha.decode('hex'))
 760
 761
 762 def read_ref(refname):
 763     """Get the commit id of the most recent commit made on a given ref."""
 764     l = list(list_refs(refname))
 765     if l:
 766         assert(len(l) == 1)
 767         return l[0][1]
 768     else:
 769         return None
 770
 771
 772 def rev_list(ref, count=None):
 773     """Generate a list of reachable commits in reverse chronological order.
 774
 775     This generator walks through commits, from child to parent, that are
 776     reachable via the specified ref and yields a series of tuples of the form
 777     (date,hash).
 778
 779     If count is a non-zero integer, limit the number of commits to "count"
 780     objects.
 781     """
 782     assert(not ref.startswith('-'))
 783     opts = []
 784     if count:
 785         opts += ['-n', str(atoi(count))]
 786     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 787     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 788     commit = None
 789     for row in p.stdout:
 790         s = row.strip()
 791         if s.startswith('commit '):
 792             commit = s[7:].decode('hex')
 793         else:
 794             date = int(s)
 795             yield (date, commit)
 796     rv = p.wait()  # not fatal
 797     if rv:
 798         raise GitError, 'git rev-list returned error %d' % rv
 799
 800
 801 def rev_get_date(ref):
 802     """Get the date of the latest commit on the specified ref."""
 803     for (date, commit) in rev_list(ref, count=1):
 804         return date
 805     raise GitError, 'no such commit %r' % ref
 806
 807
 808 def rev_parse(committish):
 809     """Resolve the full hash for 'committish', if it exists.
 810
 811     Should be roughly equivalent to 'git rev-parse'.
 812
 813     Returns the hex value of the hash if it is found, None if 'committish' does
 814     not correspond to anything.
 815     """
 816     head = read_ref(committish)
 817     if head:
 818         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 819         return head
 820
 821     pL = PackIdxList(repo('objects/pack'))
 822
 823     if len(committish) == 40:
 824         try:
 825             hash = committish.decode('hex')
 826         except TypeError:
 827             return None
 828
 829         if pL.exists(hash):
 830             return hash
 831
 832     return None
 833
 834
 835 def update_ref(refname, newval, oldval):
 836     """Change the commit pointed to by a branch."""
 837     if not oldval:
 838         oldval = ''
 839     assert(refname.startswith('refs/heads/'))
 840     p = subprocess.Popen(['git', 'update-ref', refname,
 841                           newval.encode('hex'), oldval.encode('hex')],
 842                          preexec_fn = _gitenv)
 843     _git_wait('git update-ref', p)
 844
 845
 846 def guess_repo(path=None):
 847     """Set the path value in the global variable "repodir".
 848     This makes bup look for an existing bup repository, but not fail if a
 849     repository doesn't exist. Usually, if you are interacting with a bup
 850     repository, you would not be calling this function but using
 851     check_repo_or_die().
 852     """
 853     global repodir
 854     if path:
 855         repodir = path
 856     if not repodir:
 857         repodir = os.environ.get('BUP_DIR')
 858         if not repodir:
 859             repodir = os.path.expanduser('~/.bup')
 860
 861
 862 def init_repo(path=None):
 863     """Create the Git bare repository for bup in a given path."""
 864     guess_repo(path)
 865     d = repo()  # appends a / to the path
 866     parent = os.path.dirname(os.path.dirname(d))
 867     if parent and not os.path.exists(parent):
 868         raise GitError('parent directory "%s" does not exist\n' % parent)
 869     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 870         raise GitError('"%d" exists but is not a directory\n' % d)
 871     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 872                          preexec_fn = _gitenv)
 873     _git_wait('git init', p)
 874     # Force the index version configuration in order to ensure bup works
 875     # regardless of the version of the installed Git binary.
 876     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 877                          stdout=sys.stderr, preexec_fn = _gitenv)
 878     _git_wait('git config', p)
 879
 880
 881 def check_repo_or_die(path=None):
 882     """Make sure a bup repository exists, and abort if not.
 883     If the path to a particular repository was not specified, this function
 884     initializes the default repository automatically.
 885     """
 886     guess_repo(path)
 887     if not os.path.isdir(repo('objects/pack/.')):
 888         if repodir == home_repodir:
 889             init_repo()
 890         else:
 891             log('error: %r is not a bup/git repository\n' % repo())
 892             sys.exit(15)
 893
 894
 895 def treeparse(buf):
 896     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 897     ofs = 0
 898     while ofs < len(buf):
 899         z = buf[ofs:].find('\0')
 900         assert(z > 0)
 901         spl = buf[ofs:ofs+z].split(' ', 1)
 902         assert(len(spl) == 2)
 903         sha = buf[ofs+z+1:ofs+z+1+20]
 904         ofs += z+1+20
 905         yield (spl[0], spl[1], sha)
 906
 907
 908 _ver = None
 909 def ver():
 910     """Get Git's version and ensure a usable version is installed.
 911
 912     The returned version is formatted as an ordered tuple with each position
 913     representing a digit in the version tag. For example, the following tuple
 914     would represent version 1.6.6.9:
 915
 916         ('1', '6', '6', '9')
 917     """
 918     global _ver
 919     if not _ver:
 920         p = subprocess.Popen(['git', '--version'],
 921                              stdout=subprocess.PIPE)
 922         gvs = p.stdout.read()
 923         _git_wait('git --version', p)
 924         m = re.match(r'git version (\S+.\S+)', gvs)
 925         if not m:
 926             raise GitError('git --version weird output: %r' % gvs)
 927         _ver = tuple(m.group(1).split('.'))
 928     needed = ('1','5', '3', '1')
 929     if _ver < needed:
 930         raise GitError('git version %s or higher is required; you have %s'
 931                        % ('.'.join(needed), '.'.join(_ver)))
 932     return _ver
 933
 934
 935 def _git_wait(cmd, p):
 936     rv = p.wait()
 937     if rv != 0:
 938         raise GitError('%s returned %d' % (cmd, rv))
 939
 940
 941 def _git_capture(argv):
 942     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 943     r = p.stdout.read()
 944     _git_wait(repr(argv), p)
 945     return r
 946
 947
 948 class _AbortableIter:
 949     def __init__(self, it, onabort = None):
 950         self.it = it
 951         self.onabort = onabort
 952         self.done = None
 953
 954     def __iter__(self):
 955         return self
 956
 957     def next(self):
 958         try:
 959             return self.it.next()
 960         except StopIteration, e:
 961             self.done = True
 962             raise
 963         except:
 964             self.abort()
 965             raise
 966
 967     def abort(self):
 968         """Abort iteration and call the abortion callback, if needed."""
 969         if not self.done:
 970             self.done = True
 971             if self.onabort:
 972                 self.onabort()
 973
 974     def __del__(self):
 975         self.abort()
 976
 977
 978 _ver_warned = 0
 979 class CatPipe:
 980     """Link to 'git cat-file' that is used to retrieve blob data."""
 981     def __init__(self):
 982         global _ver_warned
 983         wanted = ('1','5','6')
 984         if ver() < wanted:
 985             if not _ver_warned:
 986                 log('warning: git version < %s; bup will be slow.\n'
 987                     % '.'.join(wanted))
 988                 _ver_warned = 1
 989             self.get = self._slow_get
 990         else:
 991             self.p = self.inprogress = None
 992             self.get = self._fast_get
 993
 994     def _abort(self):
 995         if self.p:
 996             self.p.stdout.close()
 997             self.p.stdin.close()
 998         self.p = None
 999         self.inprogress = None
1000
1001     def _restart(self):
1002         self._abort()
1003         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1004                                   stdin=subprocess.PIPE,
1005                                   stdout=subprocess.PIPE,
1006                                   close_fds = True,
1007                                   bufsize = 4096,
1008                                   preexec_fn = _gitenv)
1009
1010     def _fast_get(self, id):
1011         if not self.p or self.p.poll() != None:
1012             self._restart()
1013         assert(self.p)
1014         assert(self.p.poll() == None)
1015         if self.inprogress:
1016             log('_fast_get: opening %r while %r is open'
1017                 % (id, self.inprogress))
1018         assert(not self.inprogress)
1019         assert(id.find('\n') < 0)
1020         assert(id.find('\r') < 0)
1021         assert(not id.startswith('-'))
1022         self.inprogress = id
1023         self.p.stdin.write('%s\n' % id)
1024         self.p.stdin.flush()
1025         hdr = self.p.stdout.readline()
1026         if hdr.endswith(' missing\n'):
1027             self.inprogress = None
1028             raise KeyError('blob %r is missing' % id)
1029         spl = hdr.split(' ')
1030         if len(spl) != 3 or len(spl[0]) != 40:
1031             raise GitError('expected blob, got %r' % spl)
1032         (hex, type, size) = spl
1033
1034         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1035                            onabort = self._abort)
1036         try:
1037             yield type
1038             for blob in it:
1039                 yield blob
1040             assert(self.p.stdout.readline() == '\n')
1041             self.inprogress = None
1042         except Exception, e:
1043             it.abort()
1044             raise
1045
1046     def _slow_get(self, id):
1047         assert(id.find('\n') < 0)
1048         assert(id.find('\r') < 0)
1049         assert(id[0] != '-')
1050         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1051         yield type
1052
1053         p = subprocess.Popen(['git', 'cat-file', type, id],
1054                              stdout=subprocess.PIPE,
1055                              preexec_fn = _gitenv)
1056         for blob in chunkyreader(p.stdout):
1057             yield blob
1058         _git_wait('git cat-file', p)
1059
1060     def _join(self, it):
1061         type = it.next()
1062         if type == 'blob':
1063             for blob in it:
1064                 yield blob
1065         elif type == 'tree':
1066             treefile = ''.join(it)
1067             for (mode, name, sha) in treeparse(treefile):
1068                 for blob in self.join(sha.encode('hex')):
1069                     yield blob
1070         elif type == 'commit':
1071             treeline = ''.join(it).split('\n')[0]
1072             assert(treeline.startswith('tree '))
1073             for blob in self.join(treeline[5:]):
1074                 yield blob
1075         else:
1076             raise GitError('invalid object type %r: expected blob/tree/commit'
1077                            % type)
1078
1079     def join(self, id):
1080         """Generate a list of the content of all blobs that can be reached
1081         from an object.  The hash given in 'id' must point to a blob, a tree
1082         or a commit. The content of all blobs that can be seen from trees or
1083         commits will be added to the list.
1084         """
1085         try:
1086             for d in self._join(self.get(id)):
1087                 yield d
1088         except StopIteration:
1089             log('booger!\n')
1090
1091 def tags():
1092     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1093     tags = {}
1094     for (n,c) in list_refs():
1095         if n.startswith('refs/tags/'):
1096             name = n[10:]
1097             if not c in tags:
1098                 tags[c] = []
1099
1100             tags[c].append(name)  # more than one tag can point at 'c'
1101
1102     return tags