lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
   6 from bup.helpers import *
   7 from bup import _helpers, path, midx, bloom, xstat
   8
   9 max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
  10 max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
  11
  12 verbose = 0
  13 ignore_midx = 0
  14 repodir = None
  15
  16 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  18
  19 _total_searches = 0
  20 _total_steps = 0
  21
  22
  23 class GitError(Exception):
  24     pass
  25
  26
  27 def repo(sub = ''):
  28     """Get the path to the git repository or one of its subdirectories."""
  29     global repodir
  30     if not repodir:
  31         raise GitError('You should call check_repo_or_die()')
  32
  33     # If there's a .git subdirectory, then the actual repo is in there.
  34     gd = os.path.join(repodir, '.git')
  35     if os.path.exists(gd):
  36         repodir = gd
  37
  38     return os.path.join(repodir, sub)
  39
  40
  41 def shorten_hash(s):
  42     return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
  43                   r'\1\2*\3', s)
  44
  45
  46 def repo_rel(path):
  47     full = os.path.abspath(path)
  48     fullrepo = os.path.abspath(repo(''))
  49     if not fullrepo.endswith('/'):
  50         fullrepo += '/'
  51     if full.startswith(fullrepo):
  52         path = full[len(fullrepo):]
  53     if path.startswith('index-cache/'):
  54         path = path[len('index-cache/'):]
  55     return shorten_hash(path)
  56
  57
  58 def all_packdirs():
  59     paths = [repo('objects/pack')]
  60     paths += glob.glob(repo('index-cache/*/.'))
  61     return paths
  62
  63
  64 def auto_midx(objdir):
  65     args = [path.exe(), 'midx', '--auto', '--dir', objdir]
  66     try:
  67         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  68     except OSError, e:
  69         # make sure 'args' gets printed to help with debugging
  70         add_error('%r: exception: %s' % (args, e))
  71         raise
  72     if rv:
  73         add_error('%r: returned %d' % (args, rv))
  74
  75     args = [path.exe(), 'bloom', '--dir', objdir]
  76     try:
  77         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  78     except OSError, e:
  79         # make sure 'args' gets printed to help with debugging
  80         add_error('%r: exception: %s' % (args, e))
  81         raise
  82     if rv:
  83         add_error('%r: returned %d' % (args, rv))
  84
  85
  86 def mangle_name(name, mode, gitmode):
  87     """Mangle a file name to present an abstract name for segmented files.
  88     Mangled file names will have the ".bup" extension added to them. If a
  89     file's name already ends with ".bup", a ".bupl" extension is added to
  90     disambiguate normal files from semgmented ones.
  91     """
  92     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  93         return name + '.bup'
  94     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  95         return name + '.bupl'
  96     else:
  97         return name
  98
  99
 100 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
 101 def demangle_name(name):
 102     """Remove name mangling from a file name, if necessary.
 103
 104     The return value is a tuple (demangled_filename,mode), where mode is one of
 105     the following:
 106
 107     * BUP_NORMAL  : files that should be read as-is from the repository
 108     * BUP_CHUNKED : files that were chunked and need to be assembled
 109
 110     For more information on the name mangling algorythm, see mangle_name()
 111     """
 112     if name.endswith('.bupl'):
 113         return (name[:-5], BUP_NORMAL)
 114     elif name.endswith('.bup'):
 115         return (name[:-4], BUP_CHUNKED)
 116     else:
 117         return (name, BUP_NORMAL)
 118
 119
 120 def calc_hash(type, content):
 121     """Calculate some content's hash in the Git fashion."""
 122     header = '%s %d\0' % (type, len(content))
 123     sum = Sha1(header)
 124     sum.update(content)
 125     return sum.digest()
 126
 127
 128 def shalist_item_sort_key(ent):
 129     (mode, name, id) = ent
 130     assert(mode+0 == mode)
 131     if stat.S_ISDIR(mode):
 132         return name + '/'
 133     else:
 134         return name
 135
 136
 137 def tree_encode(shalist):
 138     """Generate a git tree object from (mode,name,hash) tuples."""
 139     shalist = sorted(shalist, key = shalist_item_sort_key)
 140     l = []
 141     for (mode,name,bin) in shalist:
 142         assert(mode)
 143         assert(mode+0 == mode)
 144         assert(name)
 145         assert(len(bin) == 20)
 146         s = '%o %s\0%s' % (mode,name,bin)
 147         assert(s[0] != '0')  # 0-padded octal is not acceptable in a git tree
 148         l.append(s)
 149     return ''.join(l)
 150
 151
 152 def tree_decode(buf):
 153     """Generate a list of (mode,name,hash) from the git tree object in buf."""
 154     ofs = 0
 155     while ofs < len(buf):
 156         z = buf.find('\0', ofs)
 157         assert(z > ofs)
 158         spl = buf[ofs:z].split(' ', 1)
 159         assert(len(spl) == 2)
 160         mode,name = spl
 161         sha = buf[z+1:z+1+20]
 162         ofs = z+1+20
 163         yield (int(mode, 8), name, sha)
 164
 165
 166 def _encode_packobj(type, content, compression_level=1):
 167     szout = ''
 168     sz = len(content)
 169     szbits = (sz & 0x0f) | (_typemap[type]<<4)
 170     sz >>= 4
 171     while 1:
 172         if sz: szbits |= 0x80
 173         szout += chr(szbits)
 174         if not sz:
 175             break
 176         szbits = sz & 0x7f
 177         sz >>= 7
 178     if compression_level > 9:
 179         compression_level = 9
 180     elif compression_level < 0:
 181         compression_level = 0
 182     z = zlib.compressobj(compression_level)
 183     yield szout
 184     yield z.compress(content)
 185     yield z.flush()
 186
 187
 188 def _encode_looseobj(type, content, compression_level=1):
 189     z = zlib.compressobj(compression_level)
 190     yield z.compress('%s %d\0' % (type, len(content)))
 191     yield z.compress(content)
 192     yield z.flush()
 193
 194
 195 def _decode_looseobj(buf):
 196     assert(buf);
 197     s = zlib.decompress(buf)
 198     i = s.find('\0')
 199     assert(i > 0)
 200     l = s[:i].split(' ')
 201     type = l[0]
 202     sz = int(l[1])
 203     content = s[i+1:]
 204     assert(type in _typemap)
 205     assert(sz == len(content))
 206     return (type, content)
 207
 208
 209 def _decode_packobj(buf):
 210     assert(buf)
 211     c = ord(buf[0])
 212     type = _typermap[(c & 0x70) >> 4]
 213     sz = c & 0x0f
 214     shift = 4
 215     i = 0
 216     while c & 0x80:
 217         i += 1
 218         c = ord(buf[i])
 219         sz |= (c & 0x7f) << shift
 220         shift += 7
 221         if not (c & 0x80):
 222             break
 223     return (type, zlib.decompress(buf[i+1:]))
 224
 225
 226 class PackIdx:
 227     def __init__(self):
 228         assert(0)
 229
 230     def find_offset(self, hash):
 231         """Get the offset of an object inside the index file."""
 232         idx = self._idx_from_hash(hash)
 233         if idx != None:
 234             return self._ofs_from_idx(idx)
 235         return None
 236
 237     def exists(self, hash, want_source=False):
 238         """Return nonempty if the object exists in this index."""
 239         if hash and (self._idx_from_hash(hash) != None):
 240             return want_source and os.path.basename(self.name) or True
 241         return None
 242
 243     def __len__(self):
 244         return int(self.fanout[255])
 245
 246     def _idx_from_hash(self, hash):
 247         global _total_searches, _total_steps
 248         _total_searches += 1
 249         assert(len(hash) == 20)
 250         b1 = ord(hash[0])
 251         start = self.fanout[b1-1] # range -1..254
 252         end = self.fanout[b1] # range 0..255
 253         want = str(hash)
 254         _total_steps += 1  # lookup table is a step
 255         while start < end:
 256             _total_steps += 1
 257             mid = start + (end-start)/2
 258             v = self._idx_to_hash(mid)
 259             if v < want:
 260                 start = mid+1
 261             elif v > want:
 262                 end = mid
 263             else: # got it!
 264                 return mid
 265         return None
 266
 267
 268 class PackIdxV1(PackIdx):
 269     """Object representation of a Git pack index (version 1) file."""
 270     def __init__(self, filename, f):
 271         self.name = filename
 272         self.idxnames = [self.name]
 273         self.map = mmap_read(f)
 274         self.fanout = list(struct.unpack('!256I',
 275                                          str(buffer(self.map, 0, 256*4))))
 276         self.fanout.append(0)  # entry "-1"
 277         nsha = self.fanout[255]
 278         self.sha_ofs = 256*4
 279         self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
 280
 281     def _ofs_from_idx(self, idx):
 282         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 283
 284     def _idx_to_hash(self, idx):
 285         return str(self.shatable[idx*24+4 : idx*24+24])
 286
 287     def __iter__(self):
 288         for i in xrange(self.fanout[255]):
 289             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 290
 291
 292 class PackIdxV2(PackIdx):
 293     """Object representation of a Git pack index (version 2) file."""
 294     def __init__(self, filename, f):
 295         self.name = filename
 296         self.idxnames = [self.name]
 297         self.map = mmap_read(f)
 298         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 299         self.fanout = list(struct.unpack('!256I',
 300                                          str(buffer(self.map, 8, 256*4))))
 301         self.fanout.append(0)  # entry "-1"
 302         nsha = self.fanout[255]
 303         self.sha_ofs = 8 + 256*4
 304         self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
 305         self.ofstable = buffer(self.map,
 306                                self.sha_ofs + nsha*20 + nsha*4,
 307                                nsha*4)
 308         self.ofs64table = buffer(self.map,
 309                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 310
 311     def _ofs_from_idx(self, idx):
 312         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 313         if ofs & 0x80000000:
 314             idx64 = ofs & 0x7fffffff
 315             ofs = struct.unpack('!Q',
 316                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 317         return ofs
 318
 319     def _idx_to_hash(self, idx):
 320         return str(self.shatable[idx*20:(idx+1)*20])
 321
 322     def __iter__(self):
 323         for i in xrange(self.fanout[255]):
 324             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 325
 326
 327 _mpi_count = 0
 328 class PackIdxList:
 329     def __init__(self, dir):
 330         global _mpi_count
 331         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 332         _mpi_count += 1
 333         self.dir = dir
 334         self.also = set()
 335         self.packs = []
 336         self.do_bloom = False
 337         self.bloom = None
 338         self.refresh()
 339
 340     def __del__(self):
 341         global _mpi_count
 342         _mpi_count -= 1
 343         assert(_mpi_count == 0)
 344
 345     def __iter__(self):
 346         return iter(idxmerge(self.packs))
 347
 348     def __len__(self):
 349         return sum(len(pack) for pack in self.packs)
 350
 351     def exists(self, hash, want_source=False):
 352         """Return nonempty if the object exists in the index files."""
 353         global _total_searches
 354         _total_searches += 1
 355         if hash in self.also:
 356             return True
 357         if self.do_bloom and self.bloom:
 358             if self.bloom.exists(hash):
 359                 self.do_bloom = False
 360             else:
 361                 _total_searches -= 1  # was counted by bloom
 362                 return None
 363         for i in xrange(len(self.packs)):
 364             p = self.packs[i]
 365             _total_searches -= 1  # will be incremented by sub-pack
 366             ix = p.exists(hash, want_source=want_source)
 367             if ix:
 368                 # reorder so most recently used packs are searched first
 369                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 370                 return ix
 371         self.do_bloom = True
 372         return None
 373
 374     def refresh(self, skip_midx = False):
 375         """Refresh the index list.
 376         This method verifies if .midx files were superseded (e.g. all of its
 377         contents are in another, bigger .midx file) and removes the superseded
 378         files.
 379
 380         If skip_midx is True, all work on .midx files will be skipped and .midx
 381         files will be removed from the list.
 382
 383         The module-global variable 'ignore_midx' can force this function to
 384         always act as if skip_midx was True.
 385         """
 386         self.bloom = None # Always reopen the bloom as it may have been relaced
 387         self.do_bloom = False
 388         skip_midx = skip_midx or ignore_midx
 389         d = dict((p.name, p) for p in self.packs
 390                  if not skip_midx or not isinstance(p, midx.PackMidx))
 391         if os.path.exists(self.dir):
 392             if not skip_midx:
 393                 midxl = []
 394                 for ix in self.packs:
 395                     if isinstance(ix, midx.PackMidx):
 396                         for name in ix.idxnames:
 397                             d[os.path.join(self.dir, name)] = ix
 398                 for full in glob.glob(os.path.join(self.dir,'*.midx')):
 399                     if not d.get(full):
 400                         mx = midx.PackMidx(full)
 401                         (mxd, mxf) = os.path.split(mx.name)
 402                         broken = False
 403                         for n in mx.idxnames:
 404                             if not os.path.exists(os.path.join(mxd, n)):
 405                                 log(('warning: index %s missing\n' +
 406                                     '  used by %s\n') % (n, mxf))
 407                                 broken = True
 408                         if broken:
 409                             del mx
 410                             unlink(full)
 411                         else:
 412                             midxl.append(mx)
 413                 midxl.sort(key=lambda ix:
 414                            (-len(ix), -xstat.stat(ix.name).st_mtime))
 415                 for ix in midxl:
 416                     any_needed = False
 417                     for sub in ix.idxnames:
 418                         found = d.get(os.path.join(self.dir, sub))
 419                         if not found or isinstance(found, PackIdx):
 420                             # doesn't exist, or exists but not in a midx
 421                             any_needed = True
 422                             break
 423                     if any_needed:
 424                         d[ix.name] = ix
 425                         for name in ix.idxnames:
 426                             d[os.path.join(self.dir, name)] = ix
 427                     elif not ix.force_keep:
 428                         debug1('midx: removing redundant: %s\n'
 429                                % os.path.basename(ix.name))
 430                         unlink(ix.name)
 431             for full in glob.glob(os.path.join(self.dir,'*.idx')):
 432                 if not d.get(full):
 433                     try:
 434                         ix = open_idx(full)
 435                     except GitError, e:
 436                         add_error(e)
 437                         continue
 438                     d[full] = ix
 439             bfull = os.path.join(self.dir, 'bup.bloom')
 440             if self.bloom is None and os.path.exists(bfull):
 441                 self.bloom = bloom.ShaBloom(bfull)
 442             self.packs = list(set(d.values()))
 443             self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
 444             if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
 445                 self.do_bloom = True
 446             else:
 447                 self.bloom = None
 448         debug1('PackIdxList: using %d index%s.\n'
 449             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 450
 451     def add(self, hash):
 452         """Insert an additional object in the list."""
 453         self.also.add(hash)
 454
 455
 456 def open_idx(filename):
 457     if filename.endswith('.idx'):
 458         f = open(filename, 'rb')
 459         header = f.read(8)
 460         if header[0:4] == '\377tOc':
 461             version = struct.unpack('!I', header[4:8])[0]
 462             if version == 2:
 463                 return PackIdxV2(filename, f)
 464             else:
 465                 raise GitError('%s: expected idx file version 2, got %d'
 466                                % (filename, version))
 467         elif len(header) == 8 and header[0:4] < '\377tOc':
 468             return PackIdxV1(filename, f)
 469         else:
 470             raise GitError('%s: unrecognized idx file header' % filename)
 471     elif filename.endswith('.midx'):
 472         return midx.PackMidx(filename)
 473     else:
 474         raise GitError('idx filenames must end with .idx or .midx')
 475
 476
 477 def idxmerge(idxlist, final_progress=True):
 478     """Generate a list of all the objects reachable in a PackIdxList."""
 479     def pfunc(count, total):
 480         qprogress('Reading indexes: %.2f%% (%d/%d)\r'
 481                   % (count*100.0/total, count, total))
 482     def pfinal(count, total):
 483         if final_progress:
 484             progress('Reading indexes: %.2f%% (%d/%d), done.\n'
 485                      % (100, total, total))
 486     return merge_iter(idxlist, 10024, pfunc, pfinal)
 487
 488
 489 def _make_objcache():
 490     return PackIdxList(repo('objects/pack'))
 491
 492 class PackWriter:
 493     """Writes Git objects inside a pack file."""
 494     def __init__(self, objcache_maker=_make_objcache, compression_level=1):
 495         self.count = 0
 496         self.outbytes = 0
 497         self.filename = None
 498         self.file = None
 499         self.idx = None
 500         self.objcache_maker = objcache_maker
 501         self.objcache = None
 502         self.compression_level = compression_level
 503
 504     def __del__(self):
 505         self.close()
 506
 507     def _open(self):
 508         if not self.file:
 509             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 510             self.file = os.fdopen(fd, 'w+b')
 511             assert(name.endswith('.pack'))
 512             self.filename = name[:-5]
 513             self.file.write('PACK\0\0\0\2\0\0\0\0')
 514             self.idx = list(list() for i in xrange(256))
 515
 516     def _raw_write(self, datalist, sha):
 517         self._open()
 518         f = self.file
 519         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 520         # the file never has a *partial* blob.  So let's make sure it's
 521         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 522         # to our hashsplit algorithm.)  f.write() does its own buffering,
 523         # but that's okay because we'll flush it in _end().
 524         oneblob = ''.join(datalist)
 525         try:
 526             f.write(oneblob)
 527         except IOError, e:
 528             raise GitError, e, sys.exc_info()[2]
 529         nw = len(oneblob)
 530         crc = zlib.crc32(oneblob) & 0xffffffff
 531         self._update_idx(sha, crc, nw)
 532         self.outbytes += nw
 533         self.count += 1
 534         return nw, crc
 535
 536     def _update_idx(self, sha, crc, size):
 537         assert(sha)
 538         if self.idx:
 539             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 540
 541     def _write(self, sha, type, content):
 542         if verbose:
 543             log('>')
 544         if not sha:
 545             sha = calc_hash(type, content)
 546         size, crc = self._raw_write(_encode_packobj(type, content,
 547                                                     self.compression_level),
 548                                     sha=sha)
 549         if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
 550             self.breakpoint()
 551         return sha
 552
 553     def breakpoint(self):
 554         """Clear byte and object counts and return the last processed id."""
 555         id = self._end()
 556         self.outbytes = self.count = 0
 557         return id
 558
 559     def _require_objcache(self):
 560         if self.objcache is None and self.objcache_maker:
 561             self.objcache = self.objcache_maker()
 562         if self.objcache is None:
 563             raise GitError(
 564                     "PackWriter not opened or can't check exists w/o objcache")
 565
 566     def exists(self, id, want_source=False):
 567         """Return non-empty if an object is found in the object cache."""
 568         self._require_objcache()
 569         return self.objcache.exists(id, want_source=want_source)
 570
 571     def maybe_write(self, type, content):
 572         """Write an object to the pack file if not present and return its id."""
 573         sha = calc_hash(type, content)
 574         if not self.exists(sha):
 575             self._write(sha, type, content)
 576             self._require_objcache()
 577             self.objcache.add(sha)
 578         return sha
 579
 580     def new_blob(self, blob):
 581         """Create a blob object in the pack with the supplied content."""
 582         return self.maybe_write('blob', blob)
 583
 584     def new_tree(self, shalist):
 585         """Create a tree object in the pack."""
 586         content = tree_encode(shalist)
 587         return self.maybe_write('tree', content)
 588
 589     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 590         l = []
 591         if tree: l.append('tree %s' % tree.encode('hex'))
 592         if parent: l.append('parent %s' % parent.encode('hex'))
 593         if author: l.append('author %s %s' % (author, _git_date(adate)))
 594         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 595         l.append('')
 596         l.append(msg)
 597         return self.maybe_write('commit', '\n'.join(l))
 598
 599     def new_commit(self, parent, tree, date, msg):
 600         """Create a commit object in the pack."""
 601         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 602         commit = self._new_commit(tree, parent,
 603                                   userline, date, userline, date,
 604                                   msg)
 605         return commit
 606
 607     def abort(self):
 608         """Remove the pack file from disk."""
 609         f = self.file
 610         if f:
 611             self.idx = None
 612             self.file = None
 613             f.close()
 614             os.unlink(self.filename + '.pack')
 615
 616     def _end(self, run_midx=True):
 617         f = self.file
 618         if not f: return None
 619         self.file = None
 620         self.objcache = None
 621         idx = self.idx
 622         self.idx = None
 623
 624         # update object count
 625         f.seek(8)
 626         cp = struct.pack('!i', self.count)
 627         assert(len(cp) == 4)
 628         f.write(cp)
 629
 630         # calculate the pack sha1sum
 631         f.seek(0)
 632         sum = Sha1()
 633         for b in chunkyreader(f):
 634             sum.update(b)
 635         packbin = sum.digest()
 636         f.write(packbin)
 637         f.close()
 638
 639         obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
 640
 641         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 642         if os.path.exists(self.filename + '.map'):
 643             os.unlink(self.filename + '.map')
 644         os.rename(self.filename + '.pack', nameprefix + '.pack')
 645         os.rename(self.filename + '.idx', nameprefix + '.idx')
 646
 647         if run_midx:
 648             auto_midx(repo('objects/pack'))
 649         return nameprefix
 650
 651     def close(self, run_midx=True):
 652         """Close the pack file and move it to its definitive path."""
 653         return self._end(run_midx=run_midx)
 654
 655     def _write_pack_idx_v2(self, filename, idx, packbin):
 656         ofs64_count = 0
 657         for section in idx:
 658             for entry in section:
 659                 if entry[2] >= 2**31:
 660                     ofs64_count += 1
 661
 662         # Length: header + fan-out + shas-and-crcs + overflow-offsets
 663         index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
 664         idx_map = None
 665         idx_f = open(filename, 'w+b')
 666         try:
 667             idx_f.truncate(index_len)
 668             idx_map = mmap_readwrite(idx_f, close=False)
 669             count = _helpers.write_idx(filename, idx_map, idx, self.count)
 670             assert(count == self.count)
 671         finally:
 672             if idx_map: idx_map.close()
 673             idx_f.close()
 674
 675         idx_f = open(filename, 'a+b')
 676         try:
 677             idx_f.write(packbin)
 678             idx_f.seek(0)
 679             idx_sum = Sha1()
 680             b = idx_f.read(8 + 4*256)
 681             idx_sum.update(b)
 682
 683             obj_list_sum = Sha1()
 684             for b in chunkyreader(idx_f, 20*self.count):
 685                 idx_sum.update(b)
 686                 obj_list_sum.update(b)
 687             namebase = obj_list_sum.hexdigest()
 688
 689             for b in chunkyreader(idx_f):
 690                 idx_sum.update(b)
 691             idx_f.write(idx_sum.digest())
 692             return namebase
 693         finally:
 694             idx_f.close()
 695
 696
 697 def _git_date(date):
 698     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 699
 700
 701 def _gitenv():
 702     os.environ['GIT_DIR'] = os.path.abspath(repo())
 703
 704
 705 def list_refs(refname = None):
 706     """Generate a list of tuples in the form (refname,hash).
 707     If a ref name is specified, list only this particular ref.
 708     """
 709     argv = ['git', 'show-ref', '--']
 710     if refname:
 711         argv += [refname]
 712     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 713     out = p.stdout.read().strip()
 714     rv = p.wait()  # not fatal
 715     if rv:
 716         assert(not out)
 717     if out:
 718         for d in out.split('\n'):
 719             (sha, name) = d.split(' ', 1)
 720             yield (name, sha.decode('hex'))
 721
 722
 723 def read_ref(refname):
 724     """Get the commit id of the most recent commit made on a given ref."""
 725     l = list(list_refs(refname))
 726     if l:
 727         assert(len(l) == 1)
 728         return l[0][1]
 729     else:
 730         return None
 731
 732
 733 def rev_list(ref, count=None):
 734     """Generate a list of reachable commits in reverse chronological order.
 735
 736     This generator walks through commits, from child to parent, that are
 737     reachable via the specified ref and yields a series of tuples of the form
 738     (date,hash).
 739
 740     If count is a non-zero integer, limit the number of commits to "count"
 741     objects.
 742     """
 743     assert(not ref.startswith('-'))
 744     opts = []
 745     if count:
 746         opts += ['-n', str(atoi(count))]
 747     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 748     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 749     commit = None
 750     for row in p.stdout:
 751         s = row.strip()
 752         if s.startswith('commit '):
 753             commit = s[7:].decode('hex')
 754         else:
 755             date = int(s)
 756             yield (date, commit)
 757     rv = p.wait()  # not fatal
 758     if rv:
 759         raise GitError, 'git rev-list returned error %d' % rv
 760
 761
 762 def rev_get_date(ref):
 763     """Get the date of the latest commit on the specified ref."""
 764     for (date, commit) in rev_list(ref, count=1):
 765         return date
 766     raise GitError, 'no such commit %r' % ref
 767
 768
 769 def rev_parse(committish):
 770     """Resolve the full hash for 'committish', if it exists.
 771
 772     Should be roughly equivalent to 'git rev-parse'.
 773
 774     Returns the hex value of the hash if it is found, None if 'committish' does
 775     not correspond to anything.
 776     """
 777     head = read_ref(committish)
 778     if head:
 779         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 780         return head
 781
 782     pL = PackIdxList(repo('objects/pack'))
 783
 784     if len(committish) == 40:
 785         try:
 786             hash = committish.decode('hex')
 787         except TypeError:
 788             return None
 789
 790         if pL.exists(hash):
 791             return hash
 792
 793     return None
 794
 795
 796 def update_ref(refname, newval, oldval):
 797     """Change the commit pointed to by a branch."""
 798     if not oldval:
 799         oldval = ''
 800     assert(refname.startswith('refs/heads/'))
 801     p = subprocess.Popen(['git', 'update-ref', refname,
 802                           newval.encode('hex'), oldval.encode('hex')],
 803                          preexec_fn = _gitenv)
 804     _git_wait('git update-ref', p)
 805
 806
 807 def guess_repo(path=None):
 808     """Set the path value in the global variable "repodir".
 809     This makes bup look for an existing bup repository, but not fail if a
 810     repository doesn't exist. Usually, if you are interacting with a bup
 811     repository, you would not be calling this function but using
 812     check_repo_or_die().
 813     """
 814     global repodir
 815     if path:
 816         repodir = path
 817     if not repodir:
 818         repodir = os.environ.get('BUP_DIR')
 819         if not repodir:
 820             repodir = os.path.expanduser('~/.bup')
 821
 822
 823 def init_repo(path=None):
 824     """Create the Git bare repository for bup in a given path."""
 825     guess_repo(path)
 826     d = repo()  # appends a / to the path
 827     parent = os.path.dirname(os.path.dirname(d))
 828     if parent and not os.path.exists(parent):
 829         raise GitError('parent directory "%s" does not exist\n' % parent)
 830     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 831         raise GitError('"%s" exists but is not a directory\n' % d)
 832     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 833                          preexec_fn = _gitenv)
 834     _git_wait('git init', p)
 835     # Force the index version configuration in order to ensure bup works
 836     # regardless of the version of the installed Git binary.
 837     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 838                          stdout=sys.stderr, preexec_fn = _gitenv)
 839     _git_wait('git config', p)
 840     # Enable the reflog
 841     p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
 842                          stdout=sys.stderr, preexec_fn = _gitenv)
 843     _git_wait('git config', p)
 844
 845
 846 def check_repo_or_die(path=None):
 847     """Make sure a bup repository exists, and abort if not.
 848     If the path to a particular repository was not specified, this function
 849     initializes the default repository automatically.
 850     """
 851     guess_repo(path)
 852     try:
 853         os.stat(repo('objects/pack/.'))
 854     except OSError, e:
 855         if e.errno == errno.ENOENT:
 856             log('error: %r is not a bup repository; run "bup init"\n'
 857                 % repo())
 858             sys.exit(15)
 859         else:
 860             log('error: %s\n' % e)
 861             sys.exit(14)
 862
 863
 864 _ver = None
 865 def ver():
 866     """Get Git's version and ensure a usable version is installed.
 867
 868     The returned version is formatted as an ordered tuple with each position
 869     representing a digit in the version tag. For example, the following tuple
 870     would represent version 1.6.6.9:
 871
 872         ('1', '6', '6', '9')
 873     """
 874     global _ver
 875     if not _ver:
 876         p = subprocess.Popen(['git', '--version'],
 877                              stdout=subprocess.PIPE)
 878         gvs = p.stdout.read()
 879         _git_wait('git --version', p)
 880         m = re.match(r'git version (\S+.\S+)', gvs)
 881         if not m:
 882             raise GitError('git --version weird output: %r' % gvs)
 883         _ver = tuple(m.group(1).split('.'))
 884     needed = ('1','5', '3', '1')
 885     if _ver < needed:
 886         raise GitError('git version %s or higher is required; you have %s'
 887                        % ('.'.join(needed), '.'.join(_ver)))
 888     return _ver
 889
 890
 891 def _git_wait(cmd, p):
 892     rv = p.wait()
 893     if rv != 0:
 894         raise GitError('%s returned %d' % (cmd, rv))
 895
 896
 897 def _git_capture(argv):
 898     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 899     r = p.stdout.read()
 900     _git_wait(repr(argv), p)
 901     return r
 902
 903
 904 class _AbortableIter:
 905     def __init__(self, it, onabort = None):
 906         self.it = it
 907         self.onabort = onabort
 908         self.done = None
 909
 910     def __iter__(self):
 911         return self
 912
 913     def next(self):
 914         try:
 915             return self.it.next()
 916         except StopIteration, e:
 917             self.done = True
 918             raise
 919         except:
 920             self.abort()
 921             raise
 922
 923     def abort(self):
 924         """Abort iteration and call the abortion callback, if needed."""
 925         if not self.done:
 926             self.done = True
 927             if self.onabort:
 928                 self.onabort()
 929
 930     def __del__(self):
 931         self.abort()
 932
 933
 934 _ver_warned = 0
 935 class CatPipe:
 936     """Link to 'git cat-file' that is used to retrieve blob data."""
 937     def __init__(self):
 938         global _ver_warned
 939         wanted = ('1','5','6')
 940         if ver() < wanted:
 941             if not _ver_warned:
 942                 log('warning: git version < %s; bup will be slow.\n'
 943                     % '.'.join(wanted))
 944                 _ver_warned = 1
 945             self.get = self._slow_get
 946         else:
 947             self.p = self.inprogress = None
 948             self.get = self._fast_get
 949
 950     def _abort(self):
 951         if self.p:
 952             self.p.stdout.close()
 953             self.p.stdin.close()
 954         self.p = None
 955         self.inprogress = None
 956
 957     def _restart(self):
 958         self._abort()
 959         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 960                                   stdin=subprocess.PIPE,
 961                                   stdout=subprocess.PIPE,
 962                                   close_fds = True,
 963                                   bufsize = 4096,
 964                                   preexec_fn = _gitenv)
 965
 966     def _fast_get(self, id):
 967         if not self.p or self.p.poll() != None:
 968             self._restart()
 969         assert(self.p)
 970         poll_result = self.p.poll()
 971         assert(poll_result == None)
 972         if self.inprogress:
 973             log('_fast_get: opening %r while %r is open\n'
 974                 % (id, self.inprogress))
 975         assert(not self.inprogress)
 976         assert(id.find('\n') < 0)
 977         assert(id.find('\r') < 0)
 978         assert(not id.startswith('-'))
 979         self.inprogress = id
 980         self.p.stdin.write('%s\n' % id)
 981         self.p.stdin.flush()
 982         hdr = self.p.stdout.readline()
 983         if hdr.endswith(' missing\n'):
 984             self.inprogress = None
 985             raise KeyError('blob %r is missing' % id)
 986         spl = hdr.split(' ')
 987         if len(spl) != 3 or len(spl[0]) != 40:
 988             raise GitError('expected blob, got %r' % spl)
 989         (hex, type, size) = spl
 990
 991         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 992                            onabort = self._abort)
 993         try:
 994             yield type
 995             for blob in it:
 996                 yield blob
 997             readline_result = self.p.stdout.readline()
 998             assert(readline_result == '\n')
 999             self.inprogress = None
1000         except Exception, e:
1001             it.abort()
1002             raise
1003
1004     def _slow_get(self, id):
1005         assert(id.find('\n') < 0)
1006         assert(id.find('\r') < 0)
1007         assert(id[0] != '-')
1008         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1009         yield type
1010
1011         p = subprocess.Popen(['git', 'cat-file', type, id],
1012                              stdout=subprocess.PIPE,
1013                              preexec_fn = _gitenv)
1014         for blob in chunkyreader(p.stdout):
1015             yield blob
1016         _git_wait('git cat-file', p)
1017
1018     def _join(self, it):
1019         type = it.next()
1020         if type == 'blob':
1021             for blob in it:
1022                 yield blob
1023         elif type == 'tree':
1024             treefile = ''.join(it)
1025             for (mode, name, sha) in tree_decode(treefile):
1026                 for blob in self.join(sha.encode('hex')):
1027                     yield blob
1028         elif type == 'commit':
1029             treeline = ''.join(it).split('\n')[0]
1030             assert(treeline.startswith('tree '))
1031             for blob in self.join(treeline[5:]):
1032                 yield blob
1033         else:
1034             raise GitError('invalid object type %r: expected blob/tree/commit'
1035                            % type)
1036
1037     def join(self, id):
1038         """Generate a list of the content of all blobs that can be reached
1039         from an object.  The hash given in 'id' must point to a blob, a tree
1040         or a commit. The content of all blobs that can be seen from trees or
1041         commits will be added to the list.
1042         """
1043         try:
1044             for d in self._join(self.get(id)):
1045                 yield d
1046         except StopIteration:
1047             log('booger!\n')
1048
1049 def tags():
1050     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1051     tags = {}
1052     for (n,c) in list_refs():
1053         if n.startswith('refs/tags/'):
1054             name = n[10:]
1055             if not c in tags:
1056                 tags[c] = []
1057
1058             tags[c].append(name)  # more than one tag can point at 'c'
1059
1060     return tags