lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, heapq
   6 from bup.helpers import *
   7 from bup import _helpers
   8
   9 MIDX_VERSION = 2
  10
  11 verbose = 0
  12 ignore_midx = 0
  13 home_repodir = os.path.expanduser('~/.bup')
  14 repodir = None
  15
  16 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  18
  19 _total_searches = 0
  20 _total_steps = 0
  21
  22
  23 class GitError(Exception):
  24     pass
  25
  26
  27 def repo(sub = ''):
  28     """Get the path to the git repository or one of its subdirectories."""
  29     global repodir
  30     if not repodir:
  31         raise GitError('You should call check_repo_or_die()')
  32
  33     # If there's a .git subdirectory, then the actual repo is in there.
  34     gd = os.path.join(repodir, '.git')
  35     if os.path.exists(gd):
  36         repodir = gd
  37
  38     return os.path.join(repodir, sub)
  39
  40
  41 def auto_midx(objdir):
  42     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  43     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  44     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  45     if rv:
  46         add_error('%r: returned %d' % (args, rv))
  47
  48
  49 def mangle_name(name, mode, gitmode):
  50     """Mangle a file name to present an abstract name for segmented files.
  51     Mangled file names will have the ".bup" extension added to them. If a
  52     file's name already ends with ".bup", a ".bupl" extension is added to
  53     disambiguate normal files from semgmented ones.
  54     """
  55     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  56         return name + '.bup'
  57     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  58         return name + '.bupl'
  59     else:
  60         return name
  61
  62
  63 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  64 def demangle_name(name):
  65     """Remove name mangling from a file name, if necessary.
  66
  67     The return value is a tuple (demangled_filename,mode), where mode is one of
  68     the following:
  69
  70     * BUP_NORMAL  : files that should be read as-is from the repository
  71     * BUP_CHUNKED : files that were chunked and need to be assembled
  72
  73     For more information on the name mangling algorythm, see mangle_name()
  74     """
  75     if name.endswith('.bupl'):
  76         return (name[:-5], BUP_NORMAL)
  77     elif name.endswith('.bup'):
  78         return (name[:-4], BUP_CHUNKED)
  79     else:
  80         return (name, BUP_NORMAL)
  81
  82
  83 def _encode_packobj(type, content):
  84     szout = ''
  85     sz = len(content)
  86     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  87     sz >>= 4
  88     while 1:
  89         if sz: szbits |= 0x80
  90         szout += chr(szbits)
  91         if not sz:
  92             break
  93         szbits = sz & 0x7f
  94         sz >>= 7
  95     z = zlib.compressobj(1)
  96     yield szout
  97     yield z.compress(content)
  98     yield z.flush()
  99
 100
 101 def _encode_looseobj(type, content):
 102     z = zlib.compressobj(1)
 103     yield z.compress('%s %d\0' % (type, len(content)))
 104     yield z.compress(content)
 105     yield z.flush()
 106
 107
 108 def _decode_looseobj(buf):
 109     assert(buf);
 110     s = zlib.decompress(buf)
 111     i = s.find('\0')
 112     assert(i > 0)
 113     l = s[:i].split(' ')
 114     type = l[0]
 115     sz = int(l[1])
 116     content = s[i+1:]
 117     assert(type in _typemap)
 118     assert(sz == len(content))
 119     return (type, content)
 120
 121
 122 def _decode_packobj(buf):
 123     assert(buf)
 124     c = ord(buf[0])
 125     type = _typermap[(c & 0x70) >> 4]
 126     sz = c & 0x0f
 127     shift = 4
 128     i = 0
 129     while c & 0x80:
 130         i += 1
 131         c = ord(buf[i])
 132         sz |= (c & 0x7f) << shift
 133         shift += 7
 134         if not (c & 0x80):
 135             break
 136     return (type, zlib.decompress(buf[i+1:]))
 137
 138
 139 class PackIdx:
 140     def __init__(self):
 141         assert(0)
 142
 143     def find_offset(self, hash):
 144         """Get the offset of an object inside the index file."""
 145         idx = self._idx_from_hash(hash)
 146         if idx != None:
 147             return self._ofs_from_idx(idx)
 148         return None
 149
 150     def exists(self, hash):
 151         """Return nonempty if the object exists in this index."""
 152         return hash and (self._idx_from_hash(hash) != None) and True or None
 153
 154     def __len__(self):
 155         return int(self.fanout[255])
 156
 157     def _idx_from_hash(self, hash):
 158         global _total_searches, _total_steps
 159         _total_searches += 1
 160         assert(len(hash) == 20)
 161         b1 = ord(hash[0])
 162         start = self.fanout[b1-1] # range -1..254
 163         end = self.fanout[b1] # range 0..255
 164         want = str(hash)
 165         _total_steps += 1  # lookup table is a step
 166         while start < end:
 167             _total_steps += 1
 168             mid = start + (end-start)/2
 169             v = self._idx_to_hash(mid)
 170             if v < want:
 171                 start = mid+1
 172             elif v > want:
 173                 end = mid
 174             else: # got it!
 175                 return mid
 176         return None
 177
 178
 179 class PackIdxV1(PackIdx):
 180     """Object representation of a Git pack index (version 1) file."""
 181     def __init__(self, filename, f):
 182         self.name = filename
 183         self.idxnames = [self.name]
 184         self.map = mmap_read(f)
 185         self.fanout = list(struct.unpack('!256I',
 186                                          str(buffer(self.map, 0, 256*4))))
 187         self.fanout.append(0)  # entry "-1"
 188         nsha = self.fanout[255]
 189         self.shatable = buffer(self.map, 256*4, nsha*24)
 190
 191     def _ofs_from_idx(self, idx):
 192         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 193
 194     def _idx_to_hash(self, idx):
 195         return str(self.shatable[idx*24+4 : idx*24+24])
 196
 197     def __iter__(self):
 198         for i in xrange(self.fanout[255]):
 199             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 200
 201
 202 class PackIdxV2(PackIdx):
 203     """Object representation of a Git pack index (version 2) file."""
 204     def __init__(self, filename, f):
 205         self.name = filename
 206         self.idxnames = [self.name]
 207         self.map = mmap_read(f)
 208         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 209         self.fanout = list(struct.unpack('!256I',
 210                                          str(buffer(self.map, 8, 256*4))))
 211         self.fanout.append(0)  # entry "-1"
 212         nsha = self.fanout[255]
 213         self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
 214         self.ofstable = buffer(self.map,
 215                                8 + 256*4 + nsha*20 + nsha*4,
 216                                nsha*4)
 217         self.ofs64table = buffer(self.map,
 218                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 219
 220     def _ofs_from_idx(self, idx):
 221         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 222         if ofs & 0x80000000:
 223             idx64 = ofs & 0x7fffffff
 224             ofs = struct.unpack('!Q',
 225                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 226         return ofs
 227
 228     def _idx_to_hash(self, idx):
 229         return str(self.shatable[idx*20:(idx+1)*20])
 230
 231     def __iter__(self):
 232         for i in xrange(self.fanout[255]):
 233             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 234
 235
 236 extract_bits = _helpers.extract_bits
 237
 238
 239 class PackMidx:
 240     """Wrapper which contains data from multiple index files.
 241     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 242     and make it possible for bup to expand Git's indexing capabilities to vast
 243     amounts of files.
 244     """
 245     def __init__(self, filename):
 246         self.name = filename
 247         self.force_keep = False
 248         assert(filename.endswith('.midx'))
 249         self.map = mmap_read(open(filename))
 250         if str(self.map[0:4]) != 'MIDX':
 251             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 252             self.force_keep = True
 253             return self._init_failed()
 254         ver = struct.unpack('!I', self.map[4:8])[0]
 255         if ver < MIDX_VERSION:
 256             log('Warning: ignoring old-style (v%d) midx %r\n'
 257                 % (ver, filename))
 258             self.force_keep = False  # old stuff is boring
 259             return self._init_failed()
 260         if ver > MIDX_VERSION:
 261             log('Warning: ignoring too-new (v%d) midx %r\n'
 262                 % (ver, filename))
 263             self.force_keep = True  # new stuff is exciting
 264             return self._init_failed()
 265
 266         self.bits = _helpers.firstword(self.map[8:12])
 267         self.entries = 2**self.bits
 268         self.fanout = buffer(self.map, 12, self.entries*4)
 269         shaofs = 12 + self.entries*4
 270         nsha = self._fanget(self.entries-1)
 271         self.shalist = buffer(self.map, shaofs, nsha*20)
 272         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 273
 274     def _init_failed(self):
 275         self.bits = 0
 276         self.entries = 1
 277         self.fanout = buffer('\0\0\0\0')
 278         self.shalist = buffer('\0'*20)
 279         self.idxnames = []
 280
 281     def _fanget(self, i):
 282         start = i*4
 283         s = self.fanout[start:start+4]
 284         return _helpers.firstword(s)
 285
 286     def _get(self, i):
 287         return str(self.shalist[i*20:(i+1)*20])
 288
 289     def exists(self, hash):
 290         """Return nonempty if the object exists in the index files."""
 291         global _total_searches, _total_steps
 292         _total_searches += 1
 293         want = str(hash)
 294         el = extract_bits(want, self.bits)
 295         if el:
 296             start = self._fanget(el-1)
 297             startv = el << (32-self.bits)
 298         else:
 299             start = 0
 300             startv = 0
 301         end = self._fanget(el)
 302         endv = (el+1) << (32-self.bits)
 303         _total_steps += 1   # lookup table is a step
 304         hashv = _helpers.firstword(hash)
 305         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 306         while start < end:
 307             _total_steps += 1
 308             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 309             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 310             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 311             v = self._get(mid)
 312             #print '    %08x' % self._num(v)
 313             if v < want:
 314                 start = mid+1
 315                 startv = _helpers.firstword(v)
 316             elif v > want:
 317                 end = mid
 318                 endv = _helpers.firstword(v)
 319             else: # got it!
 320                 return True
 321         return None
 322
 323     def __iter__(self):
 324         for i in xrange(self._fanget(self.entries-1)):
 325             yield buffer(self.shalist, i*20, 20)
 326
 327     def __len__(self):
 328         return int(self._fanget(self.entries-1))
 329
 330
 331 _mpi_count = 0
 332 class PackIdxList:
 333     def __init__(self, dir):
 334         global _mpi_count
 335         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 336         _mpi_count += 1
 337         self.dir = dir
 338         self.also = {}
 339         self.packs = []
 340         self.refresh()
 341
 342     def __del__(self):
 343         global _mpi_count
 344         _mpi_count -= 1
 345         assert(_mpi_count == 0)
 346
 347     def __iter__(self):
 348         return iter(idxmerge(self.packs))
 349
 350     def __len__(self):
 351         return sum(len(pack) for pack in self.packs)
 352
 353     def exists(self, hash):
 354         """Return nonempty if the object exists in the index files."""
 355         global _total_searches
 356         _total_searches += 1
 357         if hash in self.also:
 358             return True
 359         for i in range(len(self.packs)):
 360             p = self.packs[i]
 361             _total_searches -= 1  # will be incremented by sub-pack
 362             if p.exists(hash):
 363                 # reorder so most recently used packs are searched first
 364                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 365                 return p.name
 366         return None
 367
 368     def refresh(self, skip_midx = False):
 369         """Refresh the index list.
 370         This method verifies if .midx files were superseded (e.g. all of its
 371         contents are in another, bigger .midx file) and removes the superseded
 372         files.
 373
 374         If skip_midx is True, all work on .midx files will be skipped and .midx
 375         files will be removed from the list.
 376
 377         The module-global variable 'ignore_midx' can force this function to
 378         always act as if skip_midx was True.
 379         """
 380         skip_midx = skip_midx or ignore_midx
 381         d = dict((p.name, p) for p in self.packs
 382                  if not skip_midx or not isinstance(p, PackMidx))
 383         if os.path.exists(self.dir):
 384             if not skip_midx:
 385                 midxl = []
 386                 for ix in self.packs:
 387                     if isinstance(ix, PackMidx):
 388                         for name in ix.idxnames:
 389                             d[os.path.join(self.dir, name)] = ix
 390                 for f in os.listdir(self.dir):
 391                     full = os.path.join(self.dir, f)
 392                     if f.endswith('.midx') and not d.get(full):
 393                         mx = PackMidx(full)
 394                         (mxd, mxf) = os.path.split(mx.name)
 395                         broken = 0
 396                         for n in mx.idxnames:
 397                             if not os.path.exists(os.path.join(mxd, n)):
 398                                 log(('warning: index %s missing\n' +
 399                                     '  used by %s\n') % (n, mxf))
 400                                 broken += 1
 401                         if broken:
 402                             del mx
 403                             unlink(full)
 404                         else:
 405                             midxl.append(mx)
 406                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 407                 for ix in midxl:
 408                     any = 0
 409                     for sub in ix.idxnames:
 410                         found = d.get(os.path.join(self.dir, sub))
 411                         if not found or isinstance(found, PackIdx):
 412                             # doesn't exist, or exists but not in a midx
 413                             d[ix.name] = ix
 414                             for name in ix.idxnames:
 415                                 d[os.path.join(self.dir, name)] = ix
 416                             any += 1
 417                             break
 418                     if not any and not ix.force_keep:
 419                         debug1('midx: removing redundant: %s\n'
 420                                % os.path.basename(ix.name))
 421                         unlink(ix.name)
 422             for f in os.listdir(self.dir):
 423                 full = os.path.join(self.dir, f)
 424                 if f.endswith('.idx') and not d.get(full):
 425                     try:
 426                         ix = open_idx(full)
 427                     except GitError, e:
 428                         add_error(e)
 429                         continue
 430                     d[full] = ix
 431             self.packs = list(set(d.values()))
 432         debug1('PackIdxList: using %d index%s.\n'
 433             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 434
 435     def packname_containing(self, hash):
 436         # figure out which pack contains a given hash.
 437         # FIXME: if the midx file format would just *store* this information,
 438         # we could calculate it a lot more efficiently.  But it's not needed
 439         # often, so let's do it like this.
 440         for f in os.listdir(self.dir):
 441             if f.endswith('.idx'):
 442                 full = os.path.join(self.dir, f)
 443                 try:
 444                     ix = open_idx(full)
 445                 except GitError, e:
 446                     add_error(e)
 447                     continue
 448                 if ix.exists(hash):
 449                     return full
 450
 451     def add(self, hash):
 452         """Insert an additional object in the list."""
 453         self.also[hash] = 1
 454
 455     def zap_also(self):
 456         """Remove all additional objects from the list."""
 457         self.also = {}
 458
 459
 460 def calc_hash(type, content):
 461     """Calculate some content's hash in the Git fashion."""
 462     header = '%s %d\0' % (type, len(content))
 463     sum = Sha1(header)
 464     sum.update(content)
 465     return sum.digest()
 466
 467
 468 def _shalist_sort_key(ent):
 469     (mode, name, id) = ent
 470     if stat.S_ISDIR(int(mode, 8)):
 471         return name + '/'
 472     else:
 473         return name
 474
 475
 476 def open_idx(filename):
 477     if filename.endswith('.idx'):
 478         f = open(filename, 'rb')
 479         header = f.read(8)
 480         if header[0:4] == '\377tOc':
 481             version = struct.unpack('!I', header[4:8])[0]
 482             if version == 2:
 483                 return PackIdxV2(filename, f)
 484             else:
 485                 raise GitError('%s: expected idx file version 2, got %d'
 486                                % (filename, version))
 487         elif len(header) == 8 and header[0:4] < '\377tOc':
 488             return PackIdxV1(filename, f)
 489         else:
 490             raise GitError('%s: unrecognized idx file header' % filename)
 491     elif filename.endswith('.midx'):
 492         return PackMidx(filename)
 493     else:
 494         raise GitError('idx filenames must end with .idx or .midx')
 495
 496
 497 def idxmerge(idxlist, final_progress=True):
 498     """Generate a list of all the objects reachable in a PackIdxList."""
 499     total = sum(len(i) for i in idxlist)
 500     iters = (iter(i) for i in idxlist)
 501     heap = [(next(it), it) for it in iters]
 502     heapq.heapify(heap)
 503     count = 0
 504     last = None
 505     while heap:
 506         if (count % 10024) == 0:
 507             progress('Reading indexes: %.2f%% (%d/%d)\r'
 508                      % (count*100.0/total, count, total))
 509         (e, it) = heap[0]
 510         if e != last:
 511             yield e
 512             last = e
 513         count += 1
 514         e = next(it)
 515         if e:
 516             heapq.heapreplace(heap, (e, it))
 517         else:
 518             heapq.heappop(heap)
 519     if final_progress:
 520         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 521
 522
 523 def _make_objcache():
 524     return PackIdxList(repo('objects/pack'))
 525
 526 class PackWriter:
 527     """Writes Git objects insid a pack file."""
 528     def __init__(self, objcache_maker=_make_objcache):
 529         self.count = 0
 530         self.outbytes = 0
 531         self.filename = None
 532         self.file = None
 533         self.idx = None
 534         self.objcache_maker = objcache_maker
 535         self.objcache = None
 536
 537     def __del__(self):
 538         self.close()
 539
 540     def _open(self):
 541         if not self.file:
 542             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 543             self.file = os.fdopen(fd, 'w+b')
 544             assert(name.endswith('.pack'))
 545             self.filename = name[:-5]
 546             self.file.write('PACK\0\0\0\2\0\0\0\0')
 547             self.idx = list(list() for i in xrange(256))
 548
 549     # the 'sha' parameter is used in client.py's _raw_write(), but not needed
 550     # in this basic version.
 551     def _raw_write(self, datalist, sha):
 552         self._open()
 553         f = self.file
 554         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 555         # the file never has a *partial* blob.  So let's make sure it's
 556         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 557         # to our hashsplit algorithm.)  f.write() does its own buffering,
 558         # but that's okay because we'll flush it in _end().
 559         oneblob = ''.join(datalist)
 560         try:
 561             f.write(oneblob)
 562         except IOError, e:
 563             raise GitError, e, sys.exc_info()[2]
 564         nw = len(oneblob)
 565         crc = zlib.crc32(oneblob) & 0xffffffff
 566         self._update_idx(sha, crc, nw)
 567         self.outbytes += nw
 568         self.count += 1
 569         return nw, crc
 570
 571     def _update_idx(self, sha, crc, size):
 572         assert(sha)
 573         if self.idx:
 574             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 575
 576     def _write(self, sha, type, content):
 577         if verbose:
 578             log('>')
 579         if not sha:
 580             sha = calc_hash(type, content)
 581         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
 582         return sha
 583
 584     def breakpoint(self):
 585         """Clear byte and object counts and return the last processed id."""
 586         id = self._end()
 587         self.outbytes = self.count = 0
 588         return id
 589
 590     def write(self, type, content):
 591         """Write an object in this pack file."""
 592         return self._write(calc_hash(type, content), type, content)
 593
 594     def _require_objcache(self):
 595         if self.objcache is None and self.objcache_maker:
 596             self.objcache = self.objcache_maker()
 597         if self.objcache is None:
 598             raise GitError(
 599                     "PackWriter not opened or can't check exists w/o objcache")
 600
 601     def exists(self, id):
 602         """Return non-empty if an object is found in the object cache."""
 603         self._require_objcache()
 604         return self.objcache.exists(id)
 605
 606     def maybe_write(self, type, content):
 607         """Write an object to the pack file if not present and return its id."""
 608         self._require_objcache()
 609         sha = calc_hash(type, content)
 610         if not self.exists(sha):
 611             self._write(sha, type, content)
 612             self.objcache.add(sha)
 613         return sha
 614
 615     def new_blob(self, blob):
 616         """Create a blob object in the pack with the supplied content."""
 617         return self.maybe_write('blob', blob)
 618
 619     def new_tree(self, shalist):
 620         """Create a tree object in the pack."""
 621         shalist = sorted(shalist, key = _shalist_sort_key)
 622         l = []
 623         for (mode,name,bin) in shalist:
 624             assert(mode)
 625             assert(mode != '0')
 626             assert(mode[0] != '0')
 627             assert(name)
 628             assert(len(bin) == 20)
 629             l.append('%s %s\0%s' % (mode,name,bin))
 630         return self.maybe_write('tree', ''.join(l))
 631
 632     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 633         l = []
 634         if tree: l.append('tree %s' % tree.encode('hex'))
 635         if parent: l.append('parent %s' % parent.encode('hex'))
 636         if author: l.append('author %s %s' % (author, _git_date(adate)))
 637         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 638         l.append('')
 639         l.append(msg)
 640         return self.maybe_write('commit', '\n'.join(l))
 641
 642     def new_commit(self, parent, tree, date, msg):
 643         """Create a commit object in the pack."""
 644         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 645         commit = self._new_commit(tree, parent,
 646                                   userline, date, userline, date,
 647                                   msg)
 648         return commit
 649
 650     def abort(self):
 651         """Remove the pack file from disk."""
 652         f = self.file
 653         if f:
 654             self.idx = None
 655             self.file = None
 656             f.close()
 657             os.unlink(self.filename + '.pack')
 658
 659     def _end(self, run_midx=True):
 660         f = self.file
 661         if not f: return None
 662         self.file = None
 663         self.objcache = None
 664         idx = self.idx
 665         self.idx = None
 666
 667         # update object count
 668         f.seek(8)
 669         cp = struct.pack('!i', self.count)
 670         assert(len(cp) == 4)
 671         f.write(cp)
 672
 673         # calculate the pack sha1sum
 674         f.seek(0)
 675         sum = Sha1()
 676         for b in chunkyreader(f):
 677             sum.update(b)
 678         packbin = sum.digest()
 679         f.write(packbin)
 680         f.close()
 681
 682         idx_f = open(self.filename + '.idx', 'wb')
 683         obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
 684         idx_f.close()
 685
 686         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 687         if os.path.exists(self.filename + '.map'):
 688             os.unlink(self.filename + '.map')
 689         os.rename(self.filename + '.pack', nameprefix + '.pack')
 690         os.rename(self.filename + '.idx', nameprefix + '.idx')
 691
 692         if run_midx:
 693             auto_midx(repo('objects/pack'))
 694         return nameprefix
 695
 696     def close(self, run_midx=True):
 697         """Close the pack file and move it to its definitive path."""
 698         return self._end(run_midx=run_midx)
 699
 700     def _write_pack_idx_v2(self, file, idx, packbin):
 701         sum = Sha1()
 702
 703         def write(data):
 704             file.write(data)
 705             sum.update(data)
 706
 707         write('\377tOc\0\0\0\2')
 708
 709         n = 0
 710         for part in idx:
 711             n += len(part)
 712             write(struct.pack('!i', n))
 713             part.sort(key=lambda x: x[0])
 714
 715         obj_list_sum = Sha1()
 716         for part in idx:
 717             for entry in part:
 718                 write(entry[0])
 719                 obj_list_sum.update(entry[0])
 720         for part in idx:
 721             for entry in part:
 722                 write(struct.pack('!I', entry[1]))
 723         ofs64_list = []
 724         for part in idx:
 725             for entry in part:
 726                 if entry[2] & 0x80000000:
 727                     write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
 728                     ofs64_list.append(struct.pack('!Q', entry[2]))
 729                 else:
 730                     write(struct.pack('!i', entry[2]))
 731         for ofs64 in ofs64_list:
 732             write(ofs64)
 733
 734         write(packbin)
 735         file.write(sum.digest())
 736         return obj_list_sum.hexdigest()
 737
 738
 739 def _git_date(date):
 740     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 741
 742
 743 def _gitenv():
 744     os.environ['GIT_DIR'] = os.path.abspath(repo())
 745
 746
 747 def list_refs(refname = None):
 748     """Generate a list of tuples in the form (refname,hash).
 749     If a ref name is specified, list only this particular ref.
 750     """
 751     argv = ['git', 'show-ref', '--']
 752     if refname:
 753         argv += [refname]
 754     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 755     out = p.stdout.read().strip()
 756     rv = p.wait()  # not fatal
 757     if rv:
 758         assert(not out)
 759     if out:
 760         for d in out.split('\n'):
 761             (sha, name) = d.split(' ', 1)
 762             yield (name, sha.decode('hex'))
 763
 764
 765 def read_ref(refname):
 766     """Get the commit id of the most recent commit made on a given ref."""
 767     l = list(list_refs(refname))
 768     if l:
 769         assert(len(l) == 1)
 770         return l[0][1]
 771     else:
 772         return None
 773
 774
 775 def rev_list(ref, count=None):
 776     """Generate a list of reachable commits in reverse chronological order.
 777
 778     This generator walks through commits, from child to parent, that are
 779     reachable via the specified ref and yields a series of tuples of the form
 780     (date,hash).
 781
 782     If count is a non-zero integer, limit the number of commits to "count"
 783     objects.
 784     """
 785     assert(not ref.startswith('-'))
 786     opts = []
 787     if count:
 788         opts += ['-n', str(atoi(count))]
 789     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 790     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 791     commit = None
 792     for row in p.stdout:
 793         s = row.strip()
 794         if s.startswith('commit '):
 795             commit = s[7:].decode('hex')
 796         else:
 797             date = int(s)
 798             yield (date, commit)
 799     rv = p.wait()  # not fatal
 800     if rv:
 801         raise GitError, 'git rev-list returned error %d' % rv
 802
 803
 804 def rev_get_date(ref):
 805     """Get the date of the latest commit on the specified ref."""
 806     for (date, commit) in rev_list(ref, count=1):
 807         return date
 808     raise GitError, 'no such commit %r' % ref
 809
 810
 811 def rev_parse(committish):
 812     """Resolve the full hash for 'committish', if it exists.
 813
 814     Should be roughly equivalent to 'git rev-parse'.
 815
 816     Returns the hex value of the hash if it is found, None if 'committish' does
 817     not correspond to anything.
 818     """
 819     head = read_ref(committish)
 820     if head:
 821         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 822         return head
 823
 824     pL = PackIdxList(repo('objects/pack'))
 825
 826     if len(committish) == 40:
 827         try:
 828             hash = committish.decode('hex')
 829         except TypeError:
 830             return None
 831
 832         if pL.exists(hash):
 833             return hash
 834
 835     return None
 836
 837
 838 def update_ref(refname, newval, oldval):
 839     """Change the commit pointed to by a branch."""
 840     if not oldval:
 841         oldval = ''
 842     assert(refname.startswith('refs/heads/'))
 843     p = subprocess.Popen(['git', 'update-ref', refname,
 844                           newval.encode('hex'), oldval.encode('hex')],
 845                          preexec_fn = _gitenv)
 846     _git_wait('git update-ref', p)
 847
 848
 849 def guess_repo(path=None):
 850     """Set the path value in the global variable "repodir".
 851     This makes bup look for an existing bup repository, but not fail if a
 852     repository doesn't exist. Usually, if you are interacting with a bup
 853     repository, you would not be calling this function but using
 854     check_repo_or_die().
 855     """
 856     global repodir
 857     if path:
 858         repodir = path
 859     if not repodir:
 860         repodir = os.environ.get('BUP_DIR')
 861         if not repodir:
 862             repodir = os.path.expanduser('~/.bup')
 863
 864
 865 def init_repo(path=None):
 866     """Create the Git bare repository for bup in a given path."""
 867     guess_repo(path)
 868     d = repo()  # appends a / to the path
 869     parent = os.path.dirname(os.path.dirname(d))
 870     if parent and not os.path.exists(parent):
 871         raise GitError('parent directory "%s" does not exist\n' % parent)
 872     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 873         raise GitError('"%d" exists but is not a directory\n' % d)
 874     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 875                          preexec_fn = _gitenv)
 876     _git_wait('git init', p)
 877     # Force the index version configuration in order to ensure bup works
 878     # regardless of the version of the installed Git binary.
 879     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 880                          stdout=sys.stderr, preexec_fn = _gitenv)
 881     _git_wait('git config', p)
 882
 883
 884 def check_repo_or_die(path=None):
 885     """Make sure a bup repository exists, and abort if not.
 886     If the path to a particular repository was not specified, this function
 887     initializes the default repository automatically.
 888     """
 889     guess_repo(path)
 890     if not os.path.isdir(repo('objects/pack/.')):
 891         if repodir == home_repodir:
 892             init_repo()
 893         else:
 894             log('error: %r is not a bup/git repository\n' % repo())
 895             sys.exit(15)
 896
 897
 898 def treeparse(buf):
 899     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 900     ofs = 0
 901     while ofs < len(buf):
 902         z = buf[ofs:].find('\0')
 903         assert(z > 0)
 904         spl = buf[ofs:ofs+z].split(' ', 1)
 905         assert(len(spl) == 2)
 906         sha = buf[ofs+z+1:ofs+z+1+20]
 907         ofs += z+1+20
 908         yield (spl[0], spl[1], sha)
 909
 910
 911 _ver = None
 912 def ver():
 913     """Get Git's version and ensure a usable version is installed.
 914
 915     The returned version is formatted as an ordered tuple with each position
 916     representing a digit in the version tag. For example, the following tuple
 917     would represent version 1.6.6.9:
 918
 919         ('1', '6', '6', '9')
 920     """
 921     global _ver
 922     if not _ver:
 923         p = subprocess.Popen(['git', '--version'],
 924                              stdout=subprocess.PIPE)
 925         gvs = p.stdout.read()
 926         _git_wait('git --version', p)
 927         m = re.match(r'git version (\S+.\S+)', gvs)
 928         if not m:
 929             raise GitError('git --version weird output: %r' % gvs)
 930         _ver = tuple(m.group(1).split('.'))
 931     needed = ('1','5', '3', '1')
 932     if _ver < needed:
 933         raise GitError('git version %s or higher is required; you have %s'
 934                        % ('.'.join(needed), '.'.join(_ver)))
 935     return _ver
 936
 937
 938 def _git_wait(cmd, p):
 939     rv = p.wait()
 940     if rv != 0:
 941         raise GitError('%s returned %d' % (cmd, rv))
 942
 943
 944 def _git_capture(argv):
 945     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 946     r = p.stdout.read()
 947     _git_wait(repr(argv), p)
 948     return r
 949
 950
 951 class _AbortableIter:
 952     def __init__(self, it, onabort = None):
 953         self.it = it
 954         self.onabort = onabort
 955         self.done = None
 956
 957     def __iter__(self):
 958         return self
 959
 960     def next(self):
 961         try:
 962             return self.it.next()
 963         except StopIteration, e:
 964             self.done = True
 965             raise
 966         except:
 967             self.abort()
 968             raise
 969
 970     def abort(self):
 971         """Abort iteration and call the abortion callback, if needed."""
 972         if not self.done:
 973             self.done = True
 974             if self.onabort:
 975                 self.onabort()
 976
 977     def __del__(self):
 978         self.abort()
 979
 980
 981 _ver_warned = 0
 982 class CatPipe:
 983     """Link to 'git cat-file' that is used to retrieve blob data."""
 984     def __init__(self):
 985         global _ver_warned
 986         wanted = ('1','5','6')
 987         if ver() < wanted:
 988             if not _ver_warned:
 989                 log('warning: git version < %s; bup will be slow.\n'
 990                     % '.'.join(wanted))
 991                 _ver_warned = 1
 992             self.get = self._slow_get
 993         else:
 994             self.p = self.inprogress = None
 995             self.get = self._fast_get
 996
 997     def _abort(self):
 998         if self.p:
 999             self.p.stdout.close()
1000             self.p.stdin.close()
1001         self.p = None
1002         self.inprogress = None
1003
1004     def _restart(self):
1005         self._abort()
1006         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1007                                   stdin=subprocess.PIPE,
1008                                   stdout=subprocess.PIPE,
1009                                   close_fds = True,
1010                                   bufsize = 4096,
1011                                   preexec_fn = _gitenv)
1012
1013     def _fast_get(self, id):
1014         if not self.p or self.p.poll() != None:
1015             self._restart()
1016         assert(self.p)
1017         assert(self.p.poll() == None)
1018         if self.inprogress:
1019             log('_fast_get: opening %r while %r is open'
1020                 % (id, self.inprogress))
1021         assert(not self.inprogress)
1022         assert(id.find('\n') < 0)
1023         assert(id.find('\r') < 0)
1024         assert(not id.startswith('-'))
1025         self.inprogress = id
1026         self.p.stdin.write('%s\n' % id)
1027         self.p.stdin.flush()
1028         hdr = self.p.stdout.readline()
1029         if hdr.endswith(' missing\n'):
1030             self.inprogress = None
1031             raise KeyError('blob %r is missing' % id)
1032         spl = hdr.split(' ')
1033         if len(spl) != 3 or len(spl[0]) != 40:
1034             raise GitError('expected blob, got %r' % spl)
1035         (hex, type, size) = spl
1036
1037         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1038                            onabort = self._abort)
1039         try:
1040             yield type
1041             for blob in it:
1042                 yield blob
1043             assert(self.p.stdout.readline() == '\n')
1044             self.inprogress = None
1045         except Exception, e:
1046             it.abort()
1047             raise
1048
1049     def _slow_get(self, id):
1050         assert(id.find('\n') < 0)
1051         assert(id.find('\r') < 0)
1052         assert(id[0] != '-')
1053         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1054         yield type
1055
1056         p = subprocess.Popen(['git', 'cat-file', type, id],
1057                              stdout=subprocess.PIPE,
1058                              preexec_fn = _gitenv)
1059         for blob in chunkyreader(p.stdout):
1060             yield blob
1061         _git_wait('git cat-file', p)
1062
1063     def _join(self, it):
1064         type = it.next()
1065         if type == 'blob':
1066             for blob in it:
1067                 yield blob
1068         elif type == 'tree':
1069             treefile = ''.join(it)
1070             for (mode, name, sha) in treeparse(treefile):
1071                 for blob in self.join(sha.encode('hex')):
1072                     yield blob
1073         elif type == 'commit':
1074             treeline = ''.join(it).split('\n')[0]
1075             assert(treeline.startswith('tree '))
1076             for blob in self.join(treeline[5:]):
1077                 yield blob
1078         else:
1079             raise GitError('invalid object type %r: expected blob/tree/commit'
1080                            % type)
1081
1082     def join(self, id):
1083         """Generate a list of the content of all blobs that can be reached
1084         from an object.  The hash given in 'id' must point to a blob, a tree
1085         or a commit. The content of all blobs that can be seen from trees or
1086         commits will be added to the list.
1087         """
1088         try:
1089             for d in self._join(self.get(id)):
1090                 yield d
1091         except StopIteration:
1092             log('booger!\n')
1093
1094 def tags():
1095     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1096     tags = {}
1097     for (n,c) in list_refs():
1098         if n.startswith('refs/tags/'):
1099             name = n[10:]
1100             if not c in tags:
1101                 tags[c] = []
1102
1103             tags[c].append(name)  # more than one tag can point at 'c'
1104
1105     return tags