lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, heapq
   6 from bup.helpers import *
   7 from bup import _helpers
   8
   9 MIDX_VERSION = 2
  10
  11 verbose = 0
  12 ignore_midx = 0
  13 home_repodir = os.path.expanduser('~/.bup')
  14 repodir = None
  15
  16 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  18
  19 _total_searches = 0
  20 _total_steps = 0
  21
  22
  23 class GitError(Exception):
  24     pass
  25
  26
  27 def repo(sub = ''):
  28     """Get the path to the git repository or one of its subdirectories."""
  29     global repodir
  30     if not repodir:
  31         raise GitError('You should call check_repo_or_die()')
  32
  33     # If there's a .git subdirectory, then the actual repo is in there.
  34     gd = os.path.join(repodir, '.git')
  35     if os.path.exists(gd):
  36         repodir = gd
  37
  38     return os.path.join(repodir, sub)
  39
  40
  41 def auto_midx(objdir):
  42     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  43     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  44     try:
  45         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  46     except OSError, e:
  47         # make sure 'args' gets printed to help with debugging
  48         add_error('%r: exception: %s' % (args, e))
  49         raise
  50     if rv:
  51         add_error('%r: returned %d' % (args, rv))
  52
  53
  54 def mangle_name(name, mode, gitmode):
  55     """Mangle a file name to present an abstract name for segmented files.
  56     Mangled file names will have the ".bup" extension added to them. If a
  57     file's name already ends with ".bup", a ".bupl" extension is added to
  58     disambiguate normal files from semgmented ones.
  59     """
  60     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  61         return name + '.bup'
  62     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  63         return name + '.bupl'
  64     else:
  65         return name
  66
  67
  68 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  69 def demangle_name(name):
  70     """Remove name mangling from a file name, if necessary.
  71
  72     The return value is a tuple (demangled_filename,mode), where mode is one of
  73     the following:
  74
  75     * BUP_NORMAL  : files that should be read as-is from the repository
  76     * BUP_CHUNKED : files that were chunked and need to be assembled
  77
  78     For more information on the name mangling algorythm, see mangle_name()
  79     """
  80     if name.endswith('.bupl'):
  81         return (name[:-5], BUP_NORMAL)
  82     elif name.endswith('.bup'):
  83         return (name[:-4], BUP_CHUNKED)
  84     else:
  85         return (name, BUP_NORMAL)
  86
  87
  88 def _encode_packobj(type, content):
  89     szout = ''
  90     sz = len(content)
  91     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  92     sz >>= 4
  93     while 1:
  94         if sz: szbits |= 0x80
  95         szout += chr(szbits)
  96         if not sz:
  97             break
  98         szbits = sz & 0x7f
  99         sz >>= 7
 100     z = zlib.compressobj(1)
 101     yield szout
 102     yield z.compress(content)
 103     yield z.flush()
 104
 105
 106 def _encode_looseobj(type, content):
 107     z = zlib.compressobj(1)
 108     yield z.compress('%s %d\0' % (type, len(content)))
 109     yield z.compress(content)
 110     yield z.flush()
 111
 112
 113 def _decode_looseobj(buf):
 114     assert(buf);
 115     s = zlib.decompress(buf)
 116     i = s.find('\0')
 117     assert(i > 0)
 118     l = s[:i].split(' ')
 119     type = l[0]
 120     sz = int(l[1])
 121     content = s[i+1:]
 122     assert(type in _typemap)
 123     assert(sz == len(content))
 124     return (type, content)
 125
 126
 127 def _decode_packobj(buf):
 128     assert(buf)
 129     c = ord(buf[0])
 130     type = _typermap[(c & 0x70) >> 4]
 131     sz = c & 0x0f
 132     shift = 4
 133     i = 0
 134     while c & 0x80:
 135         i += 1
 136         c = ord(buf[i])
 137         sz |= (c & 0x7f) << shift
 138         shift += 7
 139         if not (c & 0x80):
 140             break
 141     return (type, zlib.decompress(buf[i+1:]))
 142
 143
 144 class PackIdx:
 145     def __init__(self):
 146         assert(0)
 147
 148     def find_offset(self, hash):
 149         """Get the offset of an object inside the index file."""
 150         idx = self._idx_from_hash(hash)
 151         if idx != None:
 152             return self._ofs_from_idx(idx)
 153         return None
 154
 155     def exists(self, hash):
 156         """Return nonempty if the object exists in this index."""
 157         return hash and (self._idx_from_hash(hash) != None) and True or None
 158
 159     def __len__(self):
 160         return int(self.fanout[255])
 161
 162     def _idx_from_hash(self, hash):
 163         global _total_searches, _total_steps
 164         _total_searches += 1
 165         assert(len(hash) == 20)
 166         b1 = ord(hash[0])
 167         start = self.fanout[b1-1] # range -1..254
 168         end = self.fanout[b1] # range 0..255
 169         want = str(hash)
 170         _total_steps += 1  # lookup table is a step
 171         while start < end:
 172             _total_steps += 1
 173             mid = start + (end-start)/2
 174             v = self._idx_to_hash(mid)
 175             if v < want:
 176                 start = mid+1
 177             elif v > want:
 178                 end = mid
 179             else: # got it!
 180                 return mid
 181         return None
 182
 183
 184 class PackIdxV1(PackIdx):
 185     """Object representation of a Git pack index (version 1) file."""
 186     def __init__(self, filename, f):
 187         self.name = filename
 188         self.idxnames = [self.name]
 189         self.map = mmap_read(f)
 190         self.fanout = list(struct.unpack('!256I',
 191                                          str(buffer(self.map, 0, 256*4))))
 192         self.fanout.append(0)  # entry "-1"
 193         nsha = self.fanout[255]
 194         self.shatable = buffer(self.map, 256*4, nsha*24)
 195
 196     def _ofs_from_idx(self, idx):
 197         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 198
 199     def _idx_to_hash(self, idx):
 200         return str(self.shatable[idx*24+4 : idx*24+24])
 201
 202     def __iter__(self):
 203         for i in xrange(self.fanout[255]):
 204             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 205
 206
 207 class PackIdxV2(PackIdx):
 208     """Object representation of a Git pack index (version 2) file."""
 209     def __init__(self, filename, f):
 210         self.name = filename
 211         self.idxnames = [self.name]
 212         self.map = mmap_read(f)
 213         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 214         self.fanout = list(struct.unpack('!256I',
 215                                          str(buffer(self.map, 8, 256*4))))
 216         self.fanout.append(0)  # entry "-1"
 217         nsha = self.fanout[255]
 218         self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
 219         self.ofstable = buffer(self.map,
 220                                8 + 256*4 + nsha*20 + nsha*4,
 221                                nsha*4)
 222         self.ofs64table = buffer(self.map,
 223                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 224
 225     def _ofs_from_idx(self, idx):
 226         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 227         if ofs & 0x80000000:
 228             idx64 = ofs & 0x7fffffff
 229             ofs = struct.unpack('!Q',
 230                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 231         return ofs
 232
 233     def _idx_to_hash(self, idx):
 234         return str(self.shatable[idx*20:(idx+1)*20])
 235
 236     def __iter__(self):
 237         for i in xrange(self.fanout[255]):
 238             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 239
 240
 241 extract_bits = _helpers.extract_bits
 242
 243
 244 class PackMidx:
 245     """Wrapper which contains data from multiple index files.
 246     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 247     and make it possible for bup to expand Git's indexing capabilities to vast
 248     amounts of files.
 249     """
 250     def __init__(self, filename):
 251         self.name = filename
 252         self.force_keep = False
 253         assert(filename.endswith('.midx'))
 254         self.map = mmap_read(open(filename))
 255         if str(self.map[0:4]) != 'MIDX':
 256             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 257             self.force_keep = True
 258             return self._init_failed()
 259         ver = struct.unpack('!I', self.map[4:8])[0]
 260         if ver < MIDX_VERSION:
 261             log('Warning: ignoring old-style (v%d) midx %r\n'
 262                 % (ver, filename))
 263             self.force_keep = False  # old stuff is boring
 264             return self._init_failed()
 265         if ver > MIDX_VERSION:
 266             log('Warning: ignoring too-new (v%d) midx %r\n'
 267                 % (ver, filename))
 268             self.force_keep = True  # new stuff is exciting
 269             return self._init_failed()
 270
 271         self.bits = _helpers.firstword(self.map[8:12])
 272         self.entries = 2**self.bits
 273         self.fanout = buffer(self.map, 12, self.entries*4)
 274         shaofs = 12 + self.entries*4
 275         nsha = self._fanget(self.entries-1)
 276         self.shalist = buffer(self.map, shaofs, nsha*20)
 277         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 278
 279     def _init_failed(self):
 280         self.bits = 0
 281         self.entries = 1
 282         self.fanout = buffer('\0\0\0\0')
 283         self.shalist = buffer('\0'*20)
 284         self.idxnames = []
 285
 286     def _fanget(self, i):
 287         start = i*4
 288         s = self.fanout[start:start+4]
 289         return _helpers.firstword(s)
 290
 291     def _get(self, i):
 292         return str(self.shalist[i*20:(i+1)*20])
 293
 294     def exists(self, hash):
 295         """Return nonempty if the object exists in the index files."""
 296         global _total_searches, _total_steps
 297         _total_searches += 1
 298         want = str(hash)
 299         el = extract_bits(want, self.bits)
 300         if el:
 301             start = self._fanget(el-1)
 302             startv = el << (32-self.bits)
 303         else:
 304             start = 0
 305             startv = 0
 306         end = self._fanget(el)
 307         endv = (el+1) << (32-self.bits)
 308         _total_steps += 1   # lookup table is a step
 309         hashv = _helpers.firstword(hash)
 310         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 311         while start < end:
 312             _total_steps += 1
 313             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 314             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 315             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 316             v = self._get(mid)
 317             #print '    %08x' % self._num(v)
 318             if v < want:
 319                 start = mid+1
 320                 startv = _helpers.firstword(v)
 321             elif v > want:
 322                 end = mid
 323                 endv = _helpers.firstword(v)
 324             else: # got it!
 325                 return True
 326         return None
 327
 328     def __iter__(self):
 329         for i in xrange(self._fanget(self.entries-1)):
 330             yield buffer(self.shalist, i*20, 20)
 331
 332     def __len__(self):
 333         return int(self._fanget(self.entries-1))
 334
 335
 336 _mpi_count = 0
 337 class PackIdxList:
 338     def __init__(self, dir):
 339         global _mpi_count
 340         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 341         _mpi_count += 1
 342         self.dir = dir
 343         self.also = {}
 344         self.packs = []
 345         self.refresh()
 346
 347     def __del__(self):
 348         global _mpi_count
 349         _mpi_count -= 1
 350         assert(_mpi_count == 0)
 351
 352     def __iter__(self):
 353         return iter(idxmerge(self.packs))
 354
 355     def __len__(self):
 356         return sum(len(pack) for pack in self.packs)
 357
 358     def exists(self, hash):
 359         """Return nonempty if the object exists in the index files."""
 360         global _total_searches
 361         _total_searches += 1
 362         if hash in self.also:
 363             return True
 364         for i in range(len(self.packs)):
 365             p = self.packs[i]
 366             _total_searches -= 1  # will be incremented by sub-pack
 367             if p.exists(hash):
 368                 # reorder so most recently used packs are searched first
 369                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 370                 return p.name
 371         return None
 372
 373     def refresh(self, skip_midx = False):
 374         """Refresh the index list.
 375         This method verifies if .midx files were superseded (e.g. all of its
 376         contents are in another, bigger .midx file) and removes the superseded
 377         files.
 378
 379         If skip_midx is True, all work on .midx files will be skipped and .midx
 380         files will be removed from the list.
 381
 382         The module-global variable 'ignore_midx' can force this function to
 383         always act as if skip_midx was True.
 384         """
 385         skip_midx = skip_midx or ignore_midx
 386         d = dict((p.name, p) for p in self.packs
 387                  if not skip_midx or not isinstance(p, PackMidx))
 388         if os.path.exists(self.dir):
 389             if not skip_midx:
 390                 midxl = []
 391                 for ix in self.packs:
 392                     if isinstance(ix, PackMidx):
 393                         for name in ix.idxnames:
 394                             d[os.path.join(self.dir, name)] = ix
 395                 for f in os.listdir(self.dir):
 396                     full = os.path.join(self.dir, f)
 397                     if f.endswith('.midx') and not d.get(full):
 398                         mx = PackMidx(full)
 399                         (mxd, mxf) = os.path.split(mx.name)
 400                         broken = 0
 401                         for n in mx.idxnames:
 402                             if not os.path.exists(os.path.join(mxd, n)):
 403                                 log(('warning: index %s missing\n' +
 404                                     '  used by %s\n') % (n, mxf))
 405                                 broken += 1
 406                         if broken:
 407                             del mx
 408                             unlink(full)
 409                         else:
 410                             midxl.append(mx)
 411                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 412                 for ix in midxl:
 413                     any = 0
 414                     for sub in ix.idxnames:
 415                         found = d.get(os.path.join(self.dir, sub))
 416                         if not found or isinstance(found, PackIdx):
 417                             # doesn't exist, or exists but not in a midx
 418                             d[ix.name] = ix
 419                             for name in ix.idxnames:
 420                                 d[os.path.join(self.dir, name)] = ix
 421                             any += 1
 422                             break
 423                     if not any and not ix.force_keep:
 424                         debug1('midx: removing redundant: %s\n'
 425                                % os.path.basename(ix.name))
 426                         unlink(ix.name)
 427             for f in os.listdir(self.dir):
 428                 full = os.path.join(self.dir, f)
 429                 if f.endswith('.idx') and not d.get(full):
 430                     try:
 431                         ix = open_idx(full)
 432                     except GitError, e:
 433                         add_error(e)
 434                         continue
 435                     d[full] = ix
 436             self.packs = list(set(d.values()))
 437         debug1('PackIdxList: using %d index%s.\n'
 438             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 439
 440     def packname_containing(self, hash):
 441         # figure out which pack contains a given hash.
 442         # FIXME: if the midx file format would just *store* this information,
 443         # we could calculate it a lot more efficiently.  But it's not needed
 444         # often, so let's do it like this.
 445         for f in os.listdir(self.dir):
 446             if f.endswith('.idx'):
 447                 full = os.path.join(self.dir, f)
 448                 try:
 449                     ix = open_idx(full)
 450                 except GitError, e:
 451                     add_error(e)
 452                     continue
 453                 if ix.exists(hash):
 454                     return full
 455
 456     def add(self, hash):
 457         """Insert an additional object in the list."""
 458         self.also[hash] = 1
 459
 460     def zap_also(self):
 461         """Remove all additional objects from the list."""
 462         self.also = {}
 463
 464
 465 def calc_hash(type, content):
 466     """Calculate some content's hash in the Git fashion."""
 467     header = '%s %d\0' % (type, len(content))
 468     sum = Sha1(header)
 469     sum.update(content)
 470     return sum.digest()
 471
 472
 473 def _shalist_sort_key(ent):
 474     (mode, name, id) = ent
 475     if stat.S_ISDIR(int(mode, 8)):
 476         return name + '/'
 477     else:
 478         return name
 479
 480
 481 def open_idx(filename):
 482     if filename.endswith('.idx'):
 483         f = open(filename, 'rb')
 484         header = f.read(8)
 485         if header[0:4] == '\377tOc':
 486             version = struct.unpack('!I', header[4:8])[0]
 487             if version == 2:
 488                 return PackIdxV2(filename, f)
 489             else:
 490                 raise GitError('%s: expected idx file version 2, got %d'
 491                                % (filename, version))
 492         elif len(header) == 8 and header[0:4] < '\377tOc':
 493             return PackIdxV1(filename, f)
 494         else:
 495             raise GitError('%s: unrecognized idx file header' % filename)
 496     elif filename.endswith('.midx'):
 497         return PackMidx(filename)
 498     else:
 499         raise GitError('idx filenames must end with .idx or .midx')
 500
 501
 502 def idxmerge(idxlist, final_progress=True):
 503     """Generate a list of all the objects reachable in a PackIdxList."""
 504     total = sum(len(i) for i in idxlist)
 505     iters = (iter(i) for i in idxlist)
 506     heap = [(next(it), it) for it in iters]
 507     heapq.heapify(heap)
 508     count = 0
 509     last = None
 510     while heap:
 511         if (count % 10024) == 0:
 512             progress('Reading indexes: %.2f%% (%d/%d)\r'
 513                      % (count*100.0/total, count, total))
 514         (e, it) = heap[0]
 515         if e != last:
 516             yield e
 517             last = e
 518         count += 1
 519         e = next(it)
 520         if e:
 521             heapq.heapreplace(heap, (e, it))
 522         else:
 523             heapq.heappop(heap)
 524     if final_progress:
 525         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 526
 527
 528 def _make_objcache():
 529     return PackIdxList(repo('objects/pack'))
 530
 531 class PackWriter:
 532     """Writes Git objects insid a pack file."""
 533     def __init__(self, objcache_maker=_make_objcache):
 534         self.count = 0
 535         self.outbytes = 0
 536         self.filename = None
 537         self.file = None
 538         self.idx = None
 539         self.objcache_maker = objcache_maker
 540         self.objcache = None
 541
 542     def __del__(self):
 543         self.close()
 544
 545     def _open(self):
 546         if not self.file:
 547             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 548             self.file = os.fdopen(fd, 'w+b')
 549             assert(name.endswith('.pack'))
 550             self.filename = name[:-5]
 551             self.file.write('PACK\0\0\0\2\0\0\0\0')
 552             self.idx = list(list() for i in xrange(256))
 553
 554     # the 'sha' parameter is used in client.py's _raw_write(), but not needed
 555     # in this basic version.
 556     def _raw_write(self, datalist, sha):
 557         self._open()
 558         f = self.file
 559         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 560         # the file never has a *partial* blob.  So let's make sure it's
 561         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 562         # to our hashsplit algorithm.)  f.write() does its own buffering,
 563         # but that's okay because we'll flush it in _end().
 564         oneblob = ''.join(datalist)
 565         try:
 566             f.write(oneblob)
 567         except IOError, e:
 568             raise GitError, e, sys.exc_info()[2]
 569         nw = len(oneblob)
 570         crc = zlib.crc32(oneblob) & 0xffffffff
 571         self._update_idx(sha, crc, nw)
 572         self.outbytes += nw
 573         self.count += 1
 574         return nw, crc
 575
 576     def _update_idx(self, sha, crc, size):
 577         assert(sha)
 578         if self.idx:
 579             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 580
 581     def _write(self, sha, type, content):
 582         if verbose:
 583             log('>')
 584         if not sha:
 585             sha = calc_hash(type, content)
 586         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
 587         return sha
 588
 589     def breakpoint(self):
 590         """Clear byte and object counts and return the last processed id."""
 591         id = self._end()
 592         self.outbytes = self.count = 0
 593         return id
 594
 595     def write(self, type, content):
 596         """Write an object in this pack file."""
 597         return self._write(calc_hash(type, content), type, content)
 598
 599     def _require_objcache(self):
 600         if self.objcache is None and self.objcache_maker:
 601             self.objcache = self.objcache_maker()
 602         if self.objcache is None:
 603             raise GitError(
 604                     "PackWriter not opened or can't check exists w/o objcache")
 605
 606     def exists(self, id):
 607         """Return non-empty if an object is found in the object cache."""
 608         self._require_objcache()
 609         return self.objcache.exists(id)
 610
 611     def maybe_write(self, type, content):
 612         """Write an object to the pack file if not present and return its id."""
 613         self._require_objcache()
 614         sha = calc_hash(type, content)
 615         if not self.exists(sha):
 616             self._write(sha, type, content)
 617             self.objcache.add(sha)
 618         return sha
 619
 620     def new_blob(self, blob):
 621         """Create a blob object in the pack with the supplied content."""
 622         return self.maybe_write('blob', blob)
 623
 624     def new_tree(self, shalist):
 625         """Create a tree object in the pack."""
 626         shalist = sorted(shalist, key = _shalist_sort_key)
 627         l = []
 628         for (mode,name,bin) in shalist:
 629             assert(mode)
 630             assert(mode != '0')
 631             assert(mode[0] != '0')
 632             assert(name)
 633             assert(len(bin) == 20)
 634             l.append('%s %s\0%s' % (mode,name,bin))
 635         return self.maybe_write('tree', ''.join(l))
 636
 637     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 638         l = []
 639         if tree: l.append('tree %s' % tree.encode('hex'))
 640         if parent: l.append('parent %s' % parent.encode('hex'))
 641         if author: l.append('author %s %s' % (author, _git_date(adate)))
 642         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 643         l.append('')
 644         l.append(msg)
 645         return self.maybe_write('commit', '\n'.join(l))
 646
 647     def new_commit(self, parent, tree, date, msg):
 648         """Create a commit object in the pack."""
 649         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 650         commit = self._new_commit(tree, parent,
 651                                   userline, date, userline, date,
 652                                   msg)
 653         return commit
 654
 655     def abort(self):
 656         """Remove the pack file from disk."""
 657         f = self.file
 658         if f:
 659             self.idx = None
 660             self.file = None
 661             f.close()
 662             os.unlink(self.filename + '.pack')
 663
 664     def _end(self, run_midx=True):
 665         f = self.file
 666         if not f: return None
 667         self.file = None
 668         self.objcache = None
 669         idx = self.idx
 670         self.idx = None
 671
 672         # update object count
 673         f.seek(8)
 674         cp = struct.pack('!i', self.count)
 675         assert(len(cp) == 4)
 676         f.write(cp)
 677
 678         # calculate the pack sha1sum
 679         f.seek(0)
 680         sum = Sha1()
 681         for b in chunkyreader(f):
 682             sum.update(b)
 683         packbin = sum.digest()
 684         f.write(packbin)
 685         f.close()
 686
 687         idx_f = open(self.filename + '.idx', 'wb')
 688         obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
 689         idx_f.close()
 690
 691         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 692         if os.path.exists(self.filename + '.map'):
 693             os.unlink(self.filename + '.map')
 694         os.rename(self.filename + '.pack', nameprefix + '.pack')
 695         os.rename(self.filename + '.idx', nameprefix + '.idx')
 696
 697         if run_midx:
 698             auto_midx(repo('objects/pack'))
 699         return nameprefix
 700
 701     def close(self, run_midx=True):
 702         """Close the pack file and move it to its definitive path."""
 703         return self._end(run_midx=run_midx)
 704
 705     def _write_pack_idx_v2(self, file, idx, packbin):
 706         sum = Sha1()
 707
 708         def write(data):
 709             file.write(data)
 710             sum.update(data)
 711
 712         write('\377tOc\0\0\0\2')
 713
 714         n = 0
 715         for part in idx:
 716             n += len(part)
 717             write(struct.pack('!i', n))
 718             part.sort(key=lambda x: x[0])
 719
 720         obj_list_sum = Sha1()
 721         for part in idx:
 722             for entry in part:
 723                 write(entry[0])
 724                 obj_list_sum.update(entry[0])
 725         for part in idx:
 726             for entry in part:
 727                 write(struct.pack('!I', entry[1]))
 728         ofs64_list = []
 729         for part in idx:
 730             for entry in part:
 731                 if entry[2] & 0x80000000:
 732                     write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
 733                     ofs64_list.append(struct.pack('!Q', entry[2]))
 734                 else:
 735                     write(struct.pack('!i', entry[2]))
 736         for ofs64 in ofs64_list:
 737             write(ofs64)
 738
 739         write(packbin)
 740         file.write(sum.digest())
 741         return obj_list_sum.hexdigest()
 742
 743
 744 def _git_date(date):
 745     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 746
 747
 748 def _gitenv():
 749     os.environ['GIT_DIR'] = os.path.abspath(repo())
 750
 751
 752 def list_refs(refname = None):
 753     """Generate a list of tuples in the form (refname,hash).
 754     If a ref name is specified, list only this particular ref.
 755     """
 756     argv = ['git', 'show-ref', '--']
 757     if refname:
 758         argv += [refname]
 759     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 760     out = p.stdout.read().strip()
 761     rv = p.wait()  # not fatal
 762     if rv:
 763         assert(not out)
 764     if out:
 765         for d in out.split('\n'):
 766             (sha, name) = d.split(' ', 1)
 767             yield (name, sha.decode('hex'))
 768
 769
 770 def read_ref(refname):
 771     """Get the commit id of the most recent commit made on a given ref."""
 772     l = list(list_refs(refname))
 773     if l:
 774         assert(len(l) == 1)
 775         return l[0][1]
 776     else:
 777         return None
 778
 779
 780 def rev_list(ref, count=None):
 781     """Generate a list of reachable commits in reverse chronological order.
 782
 783     This generator walks through commits, from child to parent, that are
 784     reachable via the specified ref and yields a series of tuples of the form
 785     (date,hash).
 786
 787     If count is a non-zero integer, limit the number of commits to "count"
 788     objects.
 789     """
 790     assert(not ref.startswith('-'))
 791     opts = []
 792     if count:
 793         opts += ['-n', str(atoi(count))]
 794     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 795     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 796     commit = None
 797     for row in p.stdout:
 798         s = row.strip()
 799         if s.startswith('commit '):
 800             commit = s[7:].decode('hex')
 801         else:
 802             date = int(s)
 803             yield (date, commit)
 804     rv = p.wait()  # not fatal
 805     if rv:
 806         raise GitError, 'git rev-list returned error %d' % rv
 807
 808
 809 def rev_get_date(ref):
 810     """Get the date of the latest commit on the specified ref."""
 811     for (date, commit) in rev_list(ref, count=1):
 812         return date
 813     raise GitError, 'no such commit %r' % ref
 814
 815
 816 def rev_parse(committish):
 817     """Resolve the full hash for 'committish', if it exists.
 818
 819     Should be roughly equivalent to 'git rev-parse'.
 820
 821     Returns the hex value of the hash if it is found, None if 'committish' does
 822     not correspond to anything.
 823     """
 824     head = read_ref(committish)
 825     if head:
 826         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 827         return head
 828
 829     pL = PackIdxList(repo('objects/pack'))
 830
 831     if len(committish) == 40:
 832         try:
 833             hash = committish.decode('hex')
 834         except TypeError:
 835             return None
 836
 837         if pL.exists(hash):
 838             return hash
 839
 840     return None
 841
 842
 843 def update_ref(refname, newval, oldval):
 844     """Change the commit pointed to by a branch."""
 845     if not oldval:
 846         oldval = ''
 847     assert(refname.startswith('refs/heads/'))
 848     p = subprocess.Popen(['git', 'update-ref', refname,
 849                           newval.encode('hex'), oldval.encode('hex')],
 850                          preexec_fn = _gitenv)
 851     _git_wait('git update-ref', p)
 852
 853
 854 def guess_repo(path=None):
 855     """Set the path value in the global variable "repodir".
 856     This makes bup look for an existing bup repository, but not fail if a
 857     repository doesn't exist. Usually, if you are interacting with a bup
 858     repository, you would not be calling this function but using
 859     check_repo_or_die().
 860     """
 861     global repodir
 862     if path:
 863         repodir = path
 864     if not repodir:
 865         repodir = os.environ.get('BUP_DIR')
 866         if not repodir:
 867             repodir = os.path.expanduser('~/.bup')
 868
 869
 870 def init_repo(path=None):
 871     """Create the Git bare repository for bup in a given path."""
 872     guess_repo(path)
 873     d = repo()  # appends a / to the path
 874     parent = os.path.dirname(os.path.dirname(d))
 875     if parent and not os.path.exists(parent):
 876         raise GitError('parent directory "%s" does not exist\n' % parent)
 877     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 878         raise GitError('"%d" exists but is not a directory\n' % d)
 879     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 880                          preexec_fn = _gitenv)
 881     _git_wait('git init', p)
 882     # Force the index version configuration in order to ensure bup works
 883     # regardless of the version of the installed Git binary.
 884     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 885                          stdout=sys.stderr, preexec_fn = _gitenv)
 886     _git_wait('git config', p)
 887
 888
 889 def check_repo_or_die(path=None):
 890     """Make sure a bup repository exists, and abort if not.
 891     If the path to a particular repository was not specified, this function
 892     initializes the default repository automatically.
 893     """
 894     guess_repo(path)
 895     if not os.path.isdir(repo('objects/pack/.')):
 896         if repodir == home_repodir:
 897             init_repo()
 898         else:
 899             log('error: %r is not a bup/git repository\n' % repo())
 900             sys.exit(15)
 901
 902
 903 def treeparse(buf):
 904     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 905     ofs = 0
 906     while ofs < len(buf):
 907         z = buf[ofs:].find('\0')
 908         assert(z > 0)
 909         spl = buf[ofs:ofs+z].split(' ', 1)
 910         assert(len(spl) == 2)
 911         sha = buf[ofs+z+1:ofs+z+1+20]
 912         ofs += z+1+20
 913         yield (spl[0], spl[1], sha)
 914
 915
 916 _ver = None
 917 def ver():
 918     """Get Git's version and ensure a usable version is installed.
 919
 920     The returned version is formatted as an ordered tuple with each position
 921     representing a digit in the version tag. For example, the following tuple
 922     would represent version 1.6.6.9:
 923
 924         ('1', '6', '6', '9')
 925     """
 926     global _ver
 927     if not _ver:
 928         p = subprocess.Popen(['git', '--version'],
 929                              stdout=subprocess.PIPE)
 930         gvs = p.stdout.read()
 931         _git_wait('git --version', p)
 932         m = re.match(r'git version (\S+.\S+)', gvs)
 933         if not m:
 934             raise GitError('git --version weird output: %r' % gvs)
 935         _ver = tuple(m.group(1).split('.'))
 936     needed = ('1','5', '3', '1')
 937     if _ver < needed:
 938         raise GitError('git version %s or higher is required; you have %s'
 939                        % ('.'.join(needed), '.'.join(_ver)))
 940     return _ver
 941
 942
 943 def _git_wait(cmd, p):
 944     rv = p.wait()
 945     if rv != 0:
 946         raise GitError('%s returned %d' % (cmd, rv))
 947
 948
 949 def _git_capture(argv):
 950     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 951     r = p.stdout.read()
 952     _git_wait(repr(argv), p)
 953     return r
 954
 955
 956 class _AbortableIter:
 957     def __init__(self, it, onabort = None):
 958         self.it = it
 959         self.onabort = onabort
 960         self.done = None
 961
 962     def __iter__(self):
 963         return self
 964
 965     def next(self):
 966         try:
 967             return self.it.next()
 968         except StopIteration, e:
 969             self.done = True
 970             raise
 971         except:
 972             self.abort()
 973             raise
 974
 975     def abort(self):
 976         """Abort iteration and call the abortion callback, if needed."""
 977         if not self.done:
 978             self.done = True
 979             if self.onabort:
 980                 self.onabort()
 981
 982     def __del__(self):
 983         self.abort()
 984
 985
 986 _ver_warned = 0
 987 class CatPipe:
 988     """Link to 'git cat-file' that is used to retrieve blob data."""
 989     def __init__(self):
 990         global _ver_warned
 991         wanted = ('1','5','6')
 992         if ver() < wanted:
 993             if not _ver_warned:
 994                 log('warning: git version < %s; bup will be slow.\n'
 995                     % '.'.join(wanted))
 996                 _ver_warned = 1
 997             self.get = self._slow_get
 998         else:
 999             self.p = self.inprogress = None
1000             self.get = self._fast_get
1001
1002     def _abort(self):
1003         if self.p:
1004             self.p.stdout.close()
1005             self.p.stdin.close()
1006         self.p = None
1007         self.inprogress = None
1008
1009     def _restart(self):
1010         self._abort()
1011         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1012                                   stdin=subprocess.PIPE,
1013                                   stdout=subprocess.PIPE,
1014                                   close_fds = True,
1015                                   bufsize = 4096,
1016                                   preexec_fn = _gitenv)
1017
1018     def _fast_get(self, id):
1019         if not self.p or self.p.poll() != None:
1020             self._restart()
1021         assert(self.p)
1022         assert(self.p.poll() == None)
1023         if self.inprogress:
1024             log('_fast_get: opening %r while %r is open'
1025                 % (id, self.inprogress))
1026         assert(not self.inprogress)
1027         assert(id.find('\n') < 0)
1028         assert(id.find('\r') < 0)
1029         assert(not id.startswith('-'))
1030         self.inprogress = id
1031         self.p.stdin.write('%s\n' % id)
1032         self.p.stdin.flush()
1033         hdr = self.p.stdout.readline()
1034         if hdr.endswith(' missing\n'):
1035             self.inprogress = None
1036             raise KeyError('blob %r is missing' % id)
1037         spl = hdr.split(' ')
1038         if len(spl) != 3 or len(spl[0]) != 40:
1039             raise GitError('expected blob, got %r' % spl)
1040         (hex, type, size) = spl
1041
1042         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1043                            onabort = self._abort)
1044         try:
1045             yield type
1046             for blob in it:
1047                 yield blob
1048             assert(self.p.stdout.readline() == '\n')
1049             self.inprogress = None
1050         except Exception, e:
1051             it.abort()
1052             raise
1053
1054     def _slow_get(self, id):
1055         assert(id.find('\n') < 0)
1056         assert(id.find('\r') < 0)
1057         assert(id[0] != '-')
1058         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1059         yield type
1060
1061         p = subprocess.Popen(['git', 'cat-file', type, id],
1062                              stdout=subprocess.PIPE,
1063                              preexec_fn = _gitenv)
1064         for blob in chunkyreader(p.stdout):
1065             yield blob
1066         _git_wait('git cat-file', p)
1067
1068     def _join(self, it):
1069         type = it.next()
1070         if type == 'blob':
1071             for blob in it:
1072                 yield blob
1073         elif type == 'tree':
1074             treefile = ''.join(it)
1075             for (mode, name, sha) in treeparse(treefile):
1076                 for blob in self.join(sha.encode('hex')):
1077                     yield blob
1078         elif type == 'commit':
1079             treeline = ''.join(it).split('\n')[0]
1080             assert(treeline.startswith('tree '))
1081             for blob in self.join(treeline[5:]):
1082                 yield blob
1083         else:
1084             raise GitError('invalid object type %r: expected blob/tree/commit'
1085                            % type)
1086
1087     def join(self, id):
1088         """Generate a list of the content of all blobs that can be reached
1089         from an object.  The hash given in 'id' must point to a blob, a tree
1090         or a commit. The content of all blobs that can be seen from trees or
1091         commits will be added to the list.
1092         """
1093         try:
1094             for d in self._join(self.get(id)):
1095                 yield d
1096         except StopIteration:
1097             log('booger!\n')
1098
1099 def tags():
1100     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1101     tags = {}
1102     for (n,c) in list_refs():
1103         if n.startswith('refs/tags/'):
1104             name = n[10:]
1105             if not c in tags:
1106                 tags[c] = []
1107
1108             tags[c].append(name)  # more than one tag can point at 'c'
1109
1110     return tags