lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, zlib, time, subprocess, struct, stat, re, tempfile, heapq
   6 from bup.helpers import *
   7 from bup import _helpers
   8
   9 MIDX_VERSION = 2
  10
  11 verbose = 0
  12 ignore_midx = 0
  13 home_repodir = os.path.expanduser('~/.bup')
  14 repodir = None
  15
  16 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  18
  19 _total_searches = 0
  20 _total_steps = 0
  21
  22
  23 class GitError(Exception):
  24     pass
  25
  26
  27 def repo(sub = ''):
  28     """Get the path to the git repository or one of its subdirectories."""
  29     global repodir
  30     if not repodir:
  31         raise GitError('You should call check_repo_or_die()')
  32
  33     # If there's a .git subdirectory, then the actual repo is in there.
  34     gd = os.path.join(repodir, '.git')
  35     if os.path.exists(gd):
  36         repodir = gd
  37
  38     return os.path.join(repodir, sub)
  39
  40
  41 def auto_midx(objdir):
  42     main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
  43     args = [main_exe, 'midx', '--auto', '--dir', objdir]
  44     rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  45     if rv:
  46         add_error('%r: returned %d' % (args, rv))
  47
  48
  49 def mangle_name(name, mode, gitmode):
  50     """Mangle a file name to present an abstract name for segmented files.
  51     Mangled file names will have the ".bup" extension added to them. If a
  52     file's name already ends with ".bup", a ".bupl" extension is added to
  53     disambiguate normal files from semgmented ones.
  54     """
  55     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  56         return name + '.bup'
  57     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  58         return name + '.bupl'
  59     else:
  60         return name
  61
  62
  63 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
  64 def demangle_name(name):
  65     """Remove name mangling from a file name, if necessary.
  66
  67     The return value is a tuple (demangled_filename,mode), where mode is one of
  68     the following:
  69
  70     * BUP_NORMAL  : files that should be read as-is from the repository
  71     * BUP_CHUNKED : files that were chunked and need to be assembled
  72
  73     For more information on the name mangling algorythm, see mangle_name()
  74     """
  75     if name.endswith('.bupl'):
  76         return (name[:-5], BUP_NORMAL)
  77     elif name.endswith('.bup'):
  78         return (name[:-4], BUP_CHUNKED)
  79     else:
  80         return (name, BUP_NORMAL)
  81
  82
  83 def _encode_packobj(type, content):
  84     szout = ''
  85     sz = len(content)
  86     szbits = (sz & 0x0f) | (_typemap[type]<<4)
  87     sz >>= 4
  88     while 1:
  89         if sz: szbits |= 0x80
  90         szout += chr(szbits)
  91         if not sz:
  92             break
  93         szbits = sz & 0x7f
  94         sz >>= 7
  95     z = zlib.compressobj(1)
  96     yield szout
  97     yield z.compress(content)
  98     yield z.flush()
  99
 100
 101 def _encode_looseobj(type, content):
 102     z = zlib.compressobj(1)
 103     yield z.compress('%s %d\0' % (type, len(content)))
 104     yield z.compress(content)
 105     yield z.flush()
 106
 107
 108 def _decode_looseobj(buf):
 109     assert(buf);
 110     s = zlib.decompress(buf)
 111     i = s.find('\0')
 112     assert(i > 0)
 113     l = s[:i].split(' ')
 114     type = l[0]
 115     sz = int(l[1])
 116     content = s[i+1:]
 117     assert(type in _typemap)
 118     assert(sz == len(content))
 119     return (type, content)
 120
 121
 122 def _decode_packobj(buf):
 123     assert(buf)
 124     c = ord(buf[0])
 125     type = _typermap[(c & 0x70) >> 4]
 126     sz = c & 0x0f
 127     shift = 4
 128     i = 0
 129     while c & 0x80:
 130         i += 1
 131         c = ord(buf[i])
 132         sz |= (c & 0x7f) << shift
 133         shift += 7
 134         if not (c & 0x80):
 135             break
 136     return (type, zlib.decompress(buf[i+1:]))
 137
 138
 139 class PackIdx:
 140     def __init__(self):
 141         assert(0)
 142
 143     def find_offset(self, hash):
 144         """Get the offset of an object inside the index file."""
 145         idx = self._idx_from_hash(hash)
 146         if idx != None:
 147             return self._ofs_from_idx(idx)
 148         return None
 149
 150     def exists(self, hash):
 151         """Return nonempty if the object exists in this index."""
 152         return hash and (self._idx_from_hash(hash) != None) and True or None
 153
 154     def __len__(self):
 155         return int(self.fanout[255])
 156
 157     def _idx_from_hash(self, hash):
 158         global _total_searches, _total_steps
 159         _total_searches += 1
 160         assert(len(hash) == 20)
 161         b1 = ord(hash[0])
 162         start = self.fanout[b1-1] # range -1..254
 163         end = self.fanout[b1] # range 0..255
 164         want = str(hash)
 165         _total_steps += 1  # lookup table is a step
 166         while start < end:
 167             _total_steps += 1
 168             mid = start + (end-start)/2
 169             v = self._idx_to_hash(mid)
 170             if v < want:
 171                 start = mid+1
 172             elif v > want:
 173                 end = mid
 174             else: # got it!
 175                 return mid
 176         return None
 177
 178
 179 class PackIdxV1(PackIdx):
 180     """Object representation of a Git pack index (version 1) file."""
 181     def __init__(self, filename, f):
 182         self.name = filename
 183         self.idxnames = [self.name]
 184         self.map = mmap_read(f)
 185         self.fanout = list(struct.unpack('!256I',
 186                                          str(buffer(self.map, 0, 256*4))))
 187         self.fanout.append(0)  # entry "-1"
 188         nsha = self.fanout[255]
 189         self.shatable = buffer(self.map, 256*4, nsha*24)
 190
 191     def _ofs_from_idx(self, idx):
 192         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 193
 194     def _idx_to_hash(self, idx):
 195         return str(self.shatable[idx*24+4 : idx*24+24])
 196
 197     def __iter__(self):
 198         for i in xrange(self.fanout[255]):
 199             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 200
 201
 202 class PackIdxV2(PackIdx):
 203     """Object representation of a Git pack index (version 2) file."""
 204     def __init__(self, filename, f):
 205         self.name = filename
 206         self.idxnames = [self.name]
 207         self.map = mmap_read(f)
 208         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 209         self.fanout = list(struct.unpack('!256I',
 210                                          str(buffer(self.map, 8, 256*4))))
 211         self.fanout.append(0)  # entry "-1"
 212         nsha = self.fanout[255]
 213         self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
 214         self.ofstable = buffer(self.map,
 215                                8 + 256*4 + nsha*20 + nsha*4,
 216                                nsha*4)
 217         self.ofs64table = buffer(self.map,
 218                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 219
 220     def _ofs_from_idx(self, idx):
 221         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 222         if ofs & 0x80000000:
 223             idx64 = ofs & 0x7fffffff
 224             ofs = struct.unpack('!Q',
 225                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 226         return ofs
 227
 228     def _idx_to_hash(self, idx):
 229         return str(self.shatable[idx*20:(idx+1)*20])
 230
 231     def __iter__(self):
 232         for i in xrange(self.fanout[255]):
 233             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 234
 235
 236 extract_bits = _helpers.extract_bits
 237
 238
 239 class PackMidx:
 240     """Wrapper which contains data from multiple index files.
 241     Multiple index (.midx) files constitute a wrapper around index (.idx) files
 242     and make it possible for bup to expand Git's indexing capabilities to vast
 243     amounts of files.
 244     """
 245     def __init__(self, filename):
 246         self.name = filename
 247         self.force_keep = False
 248         assert(filename.endswith('.midx'))
 249         self.map = mmap_read(open(filename))
 250         if str(self.map[0:4]) != 'MIDX':
 251             log('Warning: skipping: invalid MIDX header in %r\n' % filename)
 252             self.force_keep = True
 253             return self._init_failed()
 254         ver = struct.unpack('!I', self.map[4:8])[0]
 255         if ver < MIDX_VERSION:
 256             log('Warning: ignoring old-style (v%d) midx %r\n'
 257                 % (ver, filename))
 258             self.force_keep = False  # old stuff is boring
 259             return self._init_failed()
 260         if ver > MIDX_VERSION:
 261             log('Warning: ignoring too-new (v%d) midx %r\n'
 262                 % (ver, filename))
 263             self.force_keep = True  # new stuff is exciting
 264             return self._init_failed()
 265
 266         self.bits = _helpers.firstword(self.map[8:12])
 267         self.entries = 2**self.bits
 268         self.fanout = buffer(self.map, 12, self.entries*4)
 269         shaofs = 12 + self.entries*4
 270         nsha = self._fanget(self.entries-1)
 271         self.shalist = buffer(self.map, shaofs, nsha*20)
 272         self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
 273
 274     def _init_failed(self):
 275         self.bits = 0
 276         self.entries = 1
 277         self.fanout = buffer('\0\0\0\0')
 278         self.shalist = buffer('\0'*20)
 279         self.idxnames = []
 280
 281     def _fanget(self, i):
 282         start = i*4
 283         s = self.fanout[start:start+4]
 284         return _helpers.firstword(s)
 285
 286     def _get(self, i):
 287         return str(self.shalist[i*20:(i+1)*20])
 288
 289     def exists(self, hash):
 290         """Return nonempty if the object exists in the index files."""
 291         global _total_searches, _total_steps
 292         _total_searches += 1
 293         want = str(hash)
 294         el = extract_bits(want, self.bits)
 295         if el:
 296             start = self._fanget(el-1)
 297             startv = el << (32-self.bits)
 298         else:
 299             start = 0
 300             startv = 0
 301         end = self._fanget(el)
 302         endv = (el+1) << (32-self.bits)
 303         _total_steps += 1   # lookup table is a step
 304         hashv = _helpers.firstword(hash)
 305         #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
 306         while start < end:
 307             _total_steps += 1
 308             #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
 309             mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
 310             #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
 311             v = self._get(mid)
 312             #print '    %08x' % self._num(v)
 313             if v < want:
 314                 start = mid+1
 315                 startv = _helpers.firstword(v)
 316             elif v > want:
 317                 end = mid
 318                 endv = _helpers.firstword(v)
 319             else: # got it!
 320                 return True
 321         return None
 322
 323     def __iter__(self):
 324         for i in xrange(self._fanget(self.entries-1)):
 325             yield buffer(self.shalist, i*20, 20)
 326
 327     def __len__(self):
 328         return int(self._fanget(self.entries-1))
 329
 330
 331 _mpi_count = 0
 332 class PackIdxList:
 333     def __init__(self, dir):
 334         global _mpi_count
 335         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 336         _mpi_count += 1
 337         self.dir = dir
 338         self.also = {}
 339         self.packs = []
 340         self.refresh()
 341
 342     def __del__(self):
 343         global _mpi_count
 344         _mpi_count -= 1
 345         assert(_mpi_count == 0)
 346
 347     def __iter__(self):
 348         return iter(idxmerge(self.packs))
 349
 350     def __len__(self):
 351         return sum(len(pack) for pack in self.packs)
 352
 353     def exists(self, hash):
 354         """Return nonempty if the object exists in the index files."""
 355         global _total_searches
 356         _total_searches += 1
 357         if hash in self.also:
 358             return True
 359         for i in range(len(self.packs)):
 360             p = self.packs[i]
 361             _total_searches -= 1  # will be incremented by sub-pack
 362             if p.exists(hash):
 363                 # reorder so most recently used packs are searched first
 364                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 365                 return p.name
 366         return None
 367
 368     def refresh(self, skip_midx = False):
 369         """Refresh the index list.
 370         This method verifies if .midx files were superseded (e.g. all of its
 371         contents are in another, bigger .midx file) and removes the superseded
 372         files.
 373
 374         If skip_midx is True, all work on .midx files will be skipped and .midx
 375         files will be removed from the list.
 376
 377         The module-global variable 'ignore_midx' can force this function to
 378         always act as if skip_midx was True.
 379         """
 380         skip_midx = skip_midx or ignore_midx
 381         d = dict((p.name, p) for p in self.packs
 382                  if not skip_midx or not isinstance(p, PackMidx))
 383         if os.path.exists(self.dir):
 384             if not skip_midx:
 385                 midxl = []
 386                 for ix in self.packs:
 387                     if isinstance(ix, PackMidx):
 388                         for name in ix.idxnames:
 389                             d[os.path.join(self.dir, name)] = ix
 390                 for f in os.listdir(self.dir):
 391                     full = os.path.join(self.dir, f)
 392                     if f.endswith('.midx') and not d.get(full):
 393                         mx = PackMidx(full)
 394                         (mxd, mxf) = os.path.split(mx.name)
 395                         broken = 0
 396                         for n in mx.idxnames:
 397                             if not os.path.exists(os.path.join(mxd, n)):
 398                                 log(('warning: index %s missing\n' +
 399                                     '  used by %s\n') % (n, mxf))
 400                                 broken += 1
 401                         if broken:
 402                             del mx
 403                             unlink(full)
 404                         else:
 405                             midxl.append(mx)
 406                 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
 407                 for ix in midxl:
 408                     any = 0
 409                     for sub in ix.idxnames:
 410                         found = d.get(os.path.join(self.dir, sub))
 411                         if not found or isinstance(found, PackIdx):
 412                             # doesn't exist, or exists but not in a midx
 413                             d[ix.name] = ix
 414                             for name in ix.idxnames:
 415                                 d[os.path.join(self.dir, name)] = ix
 416                             any += 1
 417                             break
 418                     if not any and not ix.force_keep:
 419                         debug1('midx: removing redundant: %s\n'
 420                                % os.path.basename(ix.name))
 421                         unlink(ix.name)
 422             for f in os.listdir(self.dir):
 423                 full = os.path.join(self.dir, f)
 424                 if f.endswith('.idx') and not d.get(full):
 425                     try:
 426                         ix = open_idx(full)
 427                     except GitError, e:
 428                         add_error(e)
 429                         continue
 430                     d[full] = ix
 431             self.packs = list(set(d.values()))
 432         debug1('PackIdxList: using %d index%s.\n'
 433             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 434
 435     def packname_containing(self, hash):
 436         # figure out which pack contains a given hash.
 437         # FIXME: if the midx file format would just *store* this information,
 438         # we could calculate it a lot more efficiently.  But it's not needed
 439         # often, so let's do it like this.
 440         for f in os.listdir(self.dir):
 441             if f.endswith('.idx'):
 442                 full = os.path.join(self.dir, f)
 443                 try:
 444                     ix = open_idx(full)
 445                 except GitError, e:
 446                     add_error(e)
 447                     continue
 448                 if ix.exists(hash):
 449                     return full
 450
 451     def add(self, hash):
 452         """Insert an additional object in the list."""
 453         self.also[hash] = 1
 454
 455     def zap_also(self):
 456         """Remove all additional objects from the list."""
 457         self.also = {}
 458
 459
 460 def calc_hash(type, content):
 461     """Calculate some content's hash in the Git fashion."""
 462     header = '%s %d\0' % (type, len(content))
 463     sum = Sha1(header)
 464     sum.update(content)
 465     return sum.digest()
 466
 467
 468 def _shalist_sort_key(ent):
 469     (mode, name, id) = ent
 470     if stat.S_ISDIR(int(mode, 8)):
 471         return name + '/'
 472     else:
 473         return name
 474
 475
 476 def open_idx(filename):
 477     if filename.endswith('.idx'):
 478         f = open(filename, 'rb')
 479         header = f.read(8)
 480         if header[0:4] == '\377tOc':
 481             version = struct.unpack('!I', header[4:8])[0]
 482             if version == 2:
 483                 return PackIdxV2(filename, f)
 484             else:
 485                 raise GitError('%s: expected idx file version 2, got %d'
 486                                % (filename, version))
 487         elif len(header) == 8 and header[0:4] < '\377tOc':
 488             return PackIdxV1(filename, f)
 489         else:
 490             raise GitError('%s: unrecognized idx file header' % filename)
 491     elif filename.endswith('.midx'):
 492         return PackMidx(filename)
 493     else:
 494         raise GitError('idx filenames must end with .idx or .midx')
 495
 496
 497 def idxmerge(idxlist, final_progress=True):
 498     """Generate a list of all the objects reachable in a PackIdxList."""
 499     total = sum(len(i) for i in idxlist)
 500     iters = (iter(i) for i in idxlist)
 501     heap = [(next(it), it) for it in iters]
 502     heapq.heapify(heap)
 503     count = 0
 504     last = None
 505     while heap:
 506         if (count % 10024) == 0:
 507             progress('Reading indexes: %.2f%% (%d/%d)\r'
 508                      % (count*100.0/total, count, total))
 509         (e, it) = heap[0]
 510         if e != last:
 511             yield e
 512             last = e
 513         count += 1
 514         e = next(it)
 515         if e:
 516             heapq.heapreplace(heap, (e, it))
 517         else:
 518             heapq.heappop(heap)
 519     if final_progress:
 520         log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
 521
 522
 523 class PackWriter:
 524     """Writes Git objects insid a pack file."""
 525     def __init__(self, objcache_maker=None):
 526         self.count = 0
 527         self.outbytes = 0
 528         self.filename = None
 529         self.file = None
 530         self.idx = None
 531         self.objcache_maker = objcache_maker
 532         self.objcache = None
 533
 534     def __del__(self):
 535         self.close()
 536
 537     def _make_objcache(self):
 538         if self.objcache == None:
 539             if self.objcache_maker:
 540                 self.objcache = self.objcache_maker()
 541             else:
 542                 self.objcache = PackIdxList(repo('objects/pack'))
 543
 544     def _open(self):
 545         if not self.file:
 546             self._make_objcache()
 547             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 548             self.file = os.fdopen(fd, 'w+b')
 549             assert(name.endswith('.pack'))
 550             self.filename = name[:-5]
 551             self.file.write('PACK\0\0\0\2\0\0\0\0')
 552             self.idx = list(list() for i in xrange(256))
 553
 554     # the 'sha' parameter is used in client.py's _raw_write(), but not needed
 555     # in this basic version.
 556     def _raw_write(self, datalist, sha):
 557         self._open()
 558         f = self.file
 559         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 560         # the file never has a *partial* blob.  So let's make sure it's
 561         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 562         # to our hashsplit algorithm.)  f.write() does its own buffering,
 563         # but that's okay because we'll flush it in _end().
 564         oneblob = ''.join(datalist)
 565         f.write(oneblob)
 566         nw = len(oneblob)
 567         crc = zlib.crc32(oneblob) & 0xffffffff
 568         self._update_idx(sha, crc, nw)
 569         self.outbytes += nw
 570         self.count += 1
 571         return nw, crc
 572
 573     def _update_idx(self, sha, crc, size):
 574         assert(sha)
 575         if self.idx:
 576             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 577
 578     def _write(self, sha, type, content):
 579         if verbose:
 580             log('>')
 581         if not sha:
 582             sha = calc_hash(type, content)
 583         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
 584         return sha
 585
 586     def breakpoint(self):
 587         """Clear byte and object counts and return the last processed id."""
 588         id = self._end()
 589         self.outbytes = self.count = 0
 590         return id
 591
 592     def write(self, type, content):
 593         """Write an object in this pack file."""
 594         return self._write(calc_hash(type, content), type, content)
 595
 596     def exists(self, id):
 597         """Return non-empty if an object is found in the object cache."""
 598         if not self.objcache:
 599             self._make_objcache()
 600         return self.objcache.exists(id)
 601
 602     def maybe_write(self, type, content):
 603         """Write an object to the pack file if not present and return its id."""
 604         sha = calc_hash(type, content)
 605         if not self.exists(sha):
 606             self._write(sha, type, content)
 607             self.objcache.add(sha)
 608         return sha
 609
 610     def new_blob(self, blob):
 611         """Create a blob object in the pack with the supplied content."""
 612         return self.maybe_write('blob', blob)
 613
 614     def new_tree(self, shalist):
 615         """Create a tree object in the pack."""
 616         shalist = sorted(shalist, key = _shalist_sort_key)
 617         l = []
 618         for (mode,name,bin) in shalist:
 619             assert(mode)
 620             assert(mode != '0')
 621             assert(mode[0] != '0')
 622             assert(name)
 623             assert(len(bin) == 20)
 624             l.append('%s %s\0%s' % (mode,name,bin))
 625         return self.maybe_write('tree', ''.join(l))
 626
 627     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 628         l = []
 629         if tree: l.append('tree %s' % tree.encode('hex'))
 630         if parent: l.append('parent %s' % parent.encode('hex'))
 631         if author: l.append('author %s %s' % (author, _git_date(adate)))
 632         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 633         l.append('')
 634         l.append(msg)
 635         return self.maybe_write('commit', '\n'.join(l))
 636
 637     def new_commit(self, parent, tree, date, msg):
 638         """Create a commit object in the pack."""
 639         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 640         commit = self._new_commit(tree, parent,
 641                                   userline, date, userline, date,
 642                                   msg)
 643         return commit
 644
 645     def abort(self):
 646         """Remove the pack file from disk."""
 647         f = self.file
 648         if f:
 649             self.idx = None
 650             self.file = None
 651             f.close()
 652             os.unlink(self.filename + '.pack')
 653
 654     def _end(self, run_midx=True):
 655         f = self.file
 656         if not f: return None
 657         self.file = None
 658         self.objcache = None
 659         idx = self.idx
 660         self.idx = None
 661
 662         # update object count
 663         f.seek(8)
 664         cp = struct.pack('!i', self.count)
 665         assert(len(cp) == 4)
 666         f.write(cp)
 667
 668         # calculate the pack sha1sum
 669         f.seek(0)
 670         sum = Sha1()
 671         for b in chunkyreader(f):
 672             sum.update(b)
 673         packbin = sum.digest()
 674         f.write(packbin)
 675         f.close()
 676
 677         idx_f = open(self.filename + '.idx', 'wb')
 678         obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
 679         idx_f.close()
 680
 681         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 682         if os.path.exists(self.filename + '.map'):
 683             os.unlink(self.filename + '.map')
 684         os.rename(self.filename + '.pack', nameprefix + '.pack')
 685         os.rename(self.filename + '.idx', nameprefix + '.idx')
 686
 687         if run_midx:
 688             auto_midx(repo('objects/pack'))
 689         return nameprefix
 690
 691     def close(self, run_midx=True):
 692         """Close the pack file and move it to its definitive path."""
 693         return self._end(run_midx=run_midx)
 694
 695     def _write_pack_idx_v2(self, file, idx, packbin):
 696         sum = Sha1()
 697
 698         def write(data):
 699             file.write(data)
 700             sum.update(data)
 701
 702         write('\377tOc\0\0\0\2')
 703
 704         n = 0
 705         for part in idx:
 706             n += len(part)
 707             write(struct.pack('!i', n))
 708             part.sort(key=lambda x: x[0])
 709
 710         obj_list_sum = Sha1()
 711         for part in idx:
 712             for entry in part:
 713                 write(entry[0])
 714                 obj_list_sum.update(entry[0])
 715         for part in idx:
 716             for entry in part:
 717                 write(struct.pack('!I', entry[1]))
 718         ofs64_list = []
 719         for part in idx:
 720             for entry in part:
 721                 if entry[2] & 0x80000000:
 722                     write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
 723                     ofs64_list.append(struct.pack('!Q', entry[2]))
 724                 else:
 725                     write(struct.pack('!i', entry[2]))
 726         for ofs64 in ofs64_list:
 727             write(ofs64)
 728
 729         write(packbin)
 730         file.write(sum.digest())
 731         return obj_list_sum.hexdigest()
 732
 733
 734 def _git_date(date):
 735     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 736
 737
 738 def _gitenv():
 739     os.environ['GIT_DIR'] = os.path.abspath(repo())
 740
 741
 742 def list_refs(refname = None):
 743     """Generate a list of tuples in the form (refname,hash).
 744     If a ref name is specified, list only this particular ref.
 745     """
 746     argv = ['git', 'show-ref', '--']
 747     if refname:
 748         argv += [refname]
 749     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 750     out = p.stdout.read().strip()
 751     rv = p.wait()  # not fatal
 752     if rv:
 753         assert(not out)
 754     if out:
 755         for d in out.split('\n'):
 756             (sha, name) = d.split(' ', 1)
 757             yield (name, sha.decode('hex'))
 758
 759
 760 def read_ref(refname):
 761     """Get the commit id of the most recent commit made on a given ref."""
 762     l = list(list_refs(refname))
 763     if l:
 764         assert(len(l) == 1)
 765         return l[0][1]
 766     else:
 767         return None
 768
 769
 770 def rev_list(ref, count=None):
 771     """Generate a list of reachable commits in reverse chronological order.
 772
 773     This generator walks through commits, from child to parent, that are
 774     reachable via the specified ref and yields a series of tuples of the form
 775     (date,hash).
 776
 777     If count is a non-zero integer, limit the number of commits to "count"
 778     objects.
 779     """
 780     assert(not ref.startswith('-'))
 781     opts = []
 782     if count:
 783         opts += ['-n', str(atoi(count))]
 784     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 785     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 786     commit = None
 787     for row in p.stdout:
 788         s = row.strip()
 789         if s.startswith('commit '):
 790             commit = s[7:].decode('hex')
 791         else:
 792             date = int(s)
 793             yield (date, commit)
 794     rv = p.wait()  # not fatal
 795     if rv:
 796         raise GitError, 'git rev-list returned error %d' % rv
 797
 798
 799 def rev_get_date(ref):
 800     """Get the date of the latest commit on the specified ref."""
 801     for (date, commit) in rev_list(ref, count=1):
 802         return date
 803     raise GitError, 'no such commit %r' % ref
 804
 805
 806 def rev_parse(committish):
 807     """Resolve the full hash for 'committish', if it exists.
 808
 809     Should be roughly equivalent to 'git rev-parse'.
 810
 811     Returns the hex value of the hash if it is found, None if 'committish' does
 812     not correspond to anything.
 813     """
 814     head = read_ref(committish)
 815     if head:
 816         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 817         return head
 818
 819     pL = PackIdxList(repo('objects/pack'))
 820
 821     if len(committish) == 40:
 822         try:
 823             hash = committish.decode('hex')
 824         except TypeError:
 825             return None
 826
 827         if pL.exists(hash):
 828             return hash
 829
 830     return None
 831
 832
 833 def update_ref(refname, newval, oldval):
 834     """Change the commit pointed to by a branch."""
 835     if not oldval:
 836         oldval = ''
 837     assert(refname.startswith('refs/heads/'))
 838     p = subprocess.Popen(['git', 'update-ref', refname,
 839                           newval.encode('hex'), oldval.encode('hex')],
 840                          preexec_fn = _gitenv)
 841     _git_wait('git update-ref', p)
 842
 843
 844 def guess_repo(path=None):
 845     """Set the path value in the global variable "repodir".
 846     This makes bup look for an existing bup repository, but not fail if a
 847     repository doesn't exist. Usually, if you are interacting with a bup
 848     repository, you would not be calling this function but using
 849     check_repo_or_die().
 850     """
 851     global repodir
 852     if path:
 853         repodir = path
 854     if not repodir:
 855         repodir = os.environ.get('BUP_DIR')
 856         if not repodir:
 857             repodir = os.path.expanduser('~/.bup')
 858
 859
 860 def init_repo(path=None):
 861     """Create the Git bare repository for bup in a given path."""
 862     guess_repo(path)
 863     d = repo()
 864     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 865         raise GitError('"%d" exists but is not a directory\n' % d)
 866     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 867                          preexec_fn = _gitenv)
 868     _git_wait('git init', p)
 869     # Force the index version configuration in order to ensure bup works
 870     # regardless of the version of the installed Git binary.
 871     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 872                          stdout=sys.stderr, preexec_fn = _gitenv)
 873     _git_wait('git config', p)
 874
 875
 876 def check_repo_or_die(path=None):
 877     """Make sure a bup repository exists, and abort if not.
 878     If the path to a particular repository was not specified, this function
 879     initializes the default repository automatically.
 880     """
 881     guess_repo(path)
 882     if not os.path.isdir(repo('objects/pack/.')):
 883         if repodir == home_repodir:
 884             init_repo()
 885         else:
 886             log('error: %r is not a bup/git repository\n' % repo())
 887             sys.exit(15)
 888
 889
 890 def treeparse(buf):
 891     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 892     ofs = 0
 893     while ofs < len(buf):
 894         z = buf[ofs:].find('\0')
 895         assert(z > 0)
 896         spl = buf[ofs:ofs+z].split(' ', 1)
 897         assert(len(spl) == 2)
 898         sha = buf[ofs+z+1:ofs+z+1+20]
 899         ofs += z+1+20
 900         yield (spl[0], spl[1], sha)
 901
 902
 903 _ver = None
 904 def ver():
 905     """Get Git's version and ensure a usable version is installed.
 906
 907     The returned version is formatted as an ordered tuple with each position
 908     representing a digit in the version tag. For example, the following tuple
 909     would represent version 1.6.6.9:
 910
 911         ('1', '6', '6', '9')
 912     """
 913     global _ver
 914     if not _ver:
 915         p = subprocess.Popen(['git', '--version'],
 916                              stdout=subprocess.PIPE)
 917         gvs = p.stdout.read()
 918         _git_wait('git --version', p)
 919         m = re.match(r'git version (\S+.\S+)', gvs)
 920         if not m:
 921             raise GitError('git --version weird output: %r' % gvs)
 922         _ver = tuple(m.group(1).split('.'))
 923     needed = ('1','5', '3', '1')
 924     if _ver < needed:
 925         raise GitError('git version %s or higher is required; you have %s'
 926                        % ('.'.join(needed), '.'.join(_ver)))
 927     return _ver
 928
 929
 930 def _git_wait(cmd, p):
 931     rv = p.wait()
 932     if rv != 0:
 933         raise GitError('%s returned %d' % (cmd, rv))
 934
 935
 936 def _git_capture(argv):
 937     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 938     r = p.stdout.read()
 939     _git_wait(repr(argv), p)
 940     return r
 941
 942
 943 class _AbortableIter:
 944     def __init__(self, it, onabort = None):
 945         self.it = it
 946         self.onabort = onabort
 947         self.done = None
 948
 949     def __iter__(self):
 950         return self
 951
 952     def next(self):
 953         try:
 954             return self.it.next()
 955         except StopIteration, e:
 956             self.done = True
 957             raise
 958         except:
 959             self.abort()
 960             raise
 961
 962     def abort(self):
 963         """Abort iteration and call the abortion callback, if needed."""
 964         if not self.done:
 965             self.done = True
 966             if self.onabort:
 967                 self.onabort()
 968
 969     def __del__(self):
 970         self.abort()
 971
 972
 973 _ver_warned = 0
 974 class CatPipe:
 975     """Link to 'git cat-file' that is used to retrieve blob data."""
 976     def __init__(self):
 977         global _ver_warned
 978         wanted = ('1','5','6')
 979         if ver() < wanted:
 980             if not _ver_warned:
 981                 log('warning: git version < %s; bup will be slow.\n'
 982                     % '.'.join(wanted))
 983                 _ver_warned = 1
 984             self.get = self._slow_get
 985         else:
 986             self.p = self.inprogress = None
 987             self.get = self._fast_get
 988
 989     def _abort(self):
 990         if self.p:
 991             self.p.stdout.close()
 992             self.p.stdin.close()
 993         self.p = None
 994         self.inprogress = None
 995
 996     def _restart(self):
 997         self._abort()
 998         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 999                                   stdin=subprocess.PIPE,
1000                                   stdout=subprocess.PIPE,
1001                                   close_fds = True,
1002                                   bufsize = 4096,
1003                                   preexec_fn = _gitenv)
1004
1005     def _fast_get(self, id):
1006         if not self.p or self.p.poll() != None:
1007             self._restart()
1008         assert(self.p)
1009         assert(self.p.poll() == None)
1010         if self.inprogress:
1011             log('_fast_get: opening %r while %r is open'
1012                 % (id, self.inprogress))
1013         assert(not self.inprogress)
1014         assert(id.find('\n') < 0)
1015         assert(id.find('\r') < 0)
1016         assert(not id.startswith('-'))
1017         self.inprogress = id
1018         self.p.stdin.write('%s\n' % id)
1019         self.p.stdin.flush()
1020         hdr = self.p.stdout.readline()
1021         if hdr.endswith(' missing\n'):
1022             self.inprogress = None
1023             raise KeyError('blob %r is missing' % id)
1024         spl = hdr.split(' ')
1025         if len(spl) != 3 or len(spl[0]) != 40:
1026             raise GitError('expected blob, got %r' % spl)
1027         (hex, type, size) = spl
1028
1029         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1030                            onabort = self._abort)
1031         try:
1032             yield type
1033             for blob in it:
1034                 yield blob
1035             assert(self.p.stdout.readline() == '\n')
1036             self.inprogress = None
1037         except Exception, e:
1038             it.abort()
1039             raise
1040
1041     def _slow_get(self, id):
1042         assert(id.find('\n') < 0)
1043         assert(id.find('\r') < 0)
1044         assert(id[0] != '-')
1045         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1046         yield type
1047
1048         p = subprocess.Popen(['git', 'cat-file', type, id],
1049                              stdout=subprocess.PIPE,
1050                              preexec_fn = _gitenv)
1051         for blob in chunkyreader(p.stdout):
1052             yield blob
1053         _git_wait('git cat-file', p)
1054
1055     def _join(self, it):
1056         type = it.next()
1057         if type == 'blob':
1058             for blob in it:
1059                 yield blob
1060         elif type == 'tree':
1061             treefile = ''.join(it)
1062             for (mode, name, sha) in treeparse(treefile):
1063                 for blob in self.join(sha.encode('hex')):
1064                     yield blob
1065         elif type == 'commit':
1066             treeline = ''.join(it).split('\n')[0]
1067             assert(treeline.startswith('tree '))
1068             for blob in self.join(treeline[5:]):
1069                 yield blob
1070         else:
1071             raise GitError('invalid object type %r: expected blob/tree/commit'
1072                            % type)
1073
1074     def join(self, id):
1075         """Generate a list of the content of all blobs that can be reached
1076         from an object.  The hash given in 'id' must point to a blob, a tree
1077         or a commit. The content of all blobs that can be seen from trees or
1078         commits will be added to the list.
1079         """
1080         try:
1081             for d in self._join(self.get(id)):
1082                 yield d
1083         except StopIteration:
1084             log('booger!\n')
1085
1086 def tags():
1087     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1088     tags = {}
1089     for (n,c) in list_refs():
1090         if n.startswith('refs/tags/'):
1091             name = n[10:]
1092             if not c in tags:
1093                 tags[c] = []
1094
1095             tags[c].append(name)  # more than one tag can point at 'c'
1096
1097     return tags