lib/bup/git.py

   1 """Git interaction library.
   2 bup repositories are in Git format. This library allows us to
   3 interact with the Git data structures.
   4 """
   5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
   6 from bup.helpers import *
   7 from bup import _helpers, path, midx, bloom
   8
   9 SEEK_END=2  # os.SEEK_END is not defined in python 2.4
  10
  11 verbose = 0
  12 ignore_midx = 0
  13 home_repodir = os.path.expanduser('~/.bup')
  14 repodir = None
  15
  16 _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  17 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
  18
  19 _total_searches = 0
  20 _total_steps = 0
  21
  22
  23 class GitError(Exception):
  24     pass
  25
  26
  27 def repo(sub = ''):
  28     """Get the path to the git repository or one of its subdirectories."""
  29     global repodir
  30     if not repodir:
  31         raise GitError('You should call check_repo_or_die()')
  32
  33     # If there's a .git subdirectory, then the actual repo is in there.
  34     gd = os.path.join(repodir, '.git')
  35     if os.path.exists(gd):
  36         repodir = gd
  37
  38     return os.path.join(repodir, sub)
  39
  40
  41 def shorten_hash(s):
  42     return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
  43                   r'\1\2*\3', s)
  44
  45
  46 def repo_rel(path):
  47     full = os.path.abspath(path)
  48     fullrepo = os.path.abspath(repo(''))
  49     if not fullrepo.endswith('/'):
  50         fullrepo += '/'
  51     if full.startswith(fullrepo):
  52         path = full[len(fullrepo):]
  53     if path.startswith('index-cache/'):
  54         path = path[len('index-cache/'):]
  55     return shorten_hash(path)
  56
  57
  58 def all_packdirs():
  59     paths = [repo('objects/pack')]
  60     paths += glob.glob(repo('index-cache/*/.'))
  61     return paths
  62
  63
  64 def auto_midx(objdir):
  65     args = [path.exe(), 'midx', '--auto', '--dir', objdir]
  66     try:
  67         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  68     except OSError, e:
  69         # make sure 'args' gets printed to help with debugging
  70         add_error('%r: exception: %s' % (args, e))
  71         raise
  72     if rv:
  73         add_error('%r: returned %d' % (args, rv))
  74
  75     args = [path.exe(), 'bloom', '--dir', objdir]
  76     try:
  77         rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
  78     except OSError, e:
  79         # make sure 'args' gets printed to help with debugging
  80         add_error('%r: exception: %s' % (args, e))
  81         raise
  82     if rv:
  83         add_error('%r: returned %d' % (args, rv))
  84
  85
  86 def mangle_name(name, mode, gitmode):
  87     """Mangle a file name to present an abstract name for segmented files.
  88     Mangled file names will have the ".bup" extension added to them. If a
  89     file's name already ends with ".bup", a ".bupl" extension is added to
  90     disambiguate normal files from semgmented ones.
  91     """
  92     if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
  93         return name + '.bup'
  94     elif name.endswith('.bup') or name[:-1].endswith('.bup'):
  95         return name + '.bupl'
  96     else:
  97         return name
  98
  99
 100 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
 101 def demangle_name(name):
 102     """Remove name mangling from a file name, if necessary.
 103
 104     The return value is a tuple (demangled_filename,mode), where mode is one of
 105     the following:
 106
 107     * BUP_NORMAL  : files that should be read as-is from the repository
 108     * BUP_CHUNKED : files that were chunked and need to be assembled
 109
 110     For more information on the name mangling algorythm, see mangle_name()
 111     """
 112     if name.endswith('.bupl'):
 113         return (name[:-5], BUP_NORMAL)
 114     elif name.endswith('.bup'):
 115         return (name[:-4], BUP_CHUNKED)
 116     else:
 117         return (name, BUP_NORMAL)
 118
 119
 120 def _encode_packobj(type, content):
 121     szout = ''
 122     sz = len(content)
 123     szbits = (sz & 0x0f) | (_typemap[type]<<4)
 124     sz >>= 4
 125     while 1:
 126         if sz: szbits |= 0x80
 127         szout += chr(szbits)
 128         if not sz:
 129             break
 130         szbits = sz & 0x7f
 131         sz >>= 7
 132     z = zlib.compressobj(1)
 133     yield szout
 134     yield z.compress(content)
 135     yield z.flush()
 136
 137
 138 def _encode_looseobj(type, content):
 139     z = zlib.compressobj(1)
 140     yield z.compress('%s %d\0' % (type, len(content)))
 141     yield z.compress(content)
 142     yield z.flush()
 143
 144
 145 def _decode_looseobj(buf):
 146     assert(buf);
 147     s = zlib.decompress(buf)
 148     i = s.find('\0')
 149     assert(i > 0)
 150     l = s[:i].split(' ')
 151     type = l[0]
 152     sz = int(l[1])
 153     content = s[i+1:]
 154     assert(type in _typemap)
 155     assert(sz == len(content))
 156     return (type, content)
 157
 158
 159 def _decode_packobj(buf):
 160     assert(buf)
 161     c = ord(buf[0])
 162     type = _typermap[(c & 0x70) >> 4]
 163     sz = c & 0x0f
 164     shift = 4
 165     i = 0
 166     while c & 0x80:
 167         i += 1
 168         c = ord(buf[i])
 169         sz |= (c & 0x7f) << shift
 170         shift += 7
 171         if not (c & 0x80):
 172             break
 173     return (type, zlib.decompress(buf[i+1:]))
 174
 175
 176 class PackIdx:
 177     def __init__(self):
 178         assert(0)
 179
 180     def find_offset(self, hash):
 181         """Get the offset of an object inside the index file."""
 182         idx = self._idx_from_hash(hash)
 183         if idx != None:
 184             return self._ofs_from_idx(idx)
 185         return None
 186
 187     def exists(self, hash, want_source=False):
 188         """Return nonempty if the object exists in this index."""
 189         if hash and (self._idx_from_hash(hash) != None):
 190             return want_source and os.path.basename(self.name) or True
 191         return None
 192
 193     def __len__(self):
 194         return int(self.fanout[255])
 195
 196     def _idx_from_hash(self, hash):
 197         global _total_searches, _total_steps
 198         _total_searches += 1
 199         assert(len(hash) == 20)
 200         b1 = ord(hash[0])
 201         start = self.fanout[b1-1] # range -1..254
 202         end = self.fanout[b1] # range 0..255
 203         want = str(hash)
 204         _total_steps += 1  # lookup table is a step
 205         while start < end:
 206             _total_steps += 1
 207             mid = start + (end-start)/2
 208             v = self._idx_to_hash(mid)
 209             if v < want:
 210                 start = mid+1
 211             elif v > want:
 212                 end = mid
 213             else: # got it!
 214                 return mid
 215         return None
 216
 217
 218 class PackIdxV1(PackIdx):
 219     """Object representation of a Git pack index (version 1) file."""
 220     def __init__(self, filename, f):
 221         self.name = filename
 222         self.idxnames = [self.name]
 223         self.map = mmap_read(f)
 224         self.fanout = list(struct.unpack('!256I',
 225                                          str(buffer(self.map, 0, 256*4))))
 226         self.fanout.append(0)  # entry "-1"
 227         nsha = self.fanout[255]
 228         self.sha_ofs = 256*4
 229         self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
 230
 231     def _ofs_from_idx(self, idx):
 232         return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
 233
 234     def _idx_to_hash(self, idx):
 235         return str(self.shatable[idx*24+4 : idx*24+24])
 236
 237     def __iter__(self):
 238         for i in xrange(self.fanout[255]):
 239             yield buffer(self.map, 256*4 + 24*i + 4, 20)
 240
 241
 242 class PackIdxV2(PackIdx):
 243     """Object representation of a Git pack index (version 2) file."""
 244     def __init__(self, filename, f):
 245         self.name = filename
 246         self.idxnames = [self.name]
 247         self.map = mmap_read(f)
 248         assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
 249         self.fanout = list(struct.unpack('!256I',
 250                                          str(buffer(self.map, 8, 256*4))))
 251         self.fanout.append(0)  # entry "-1"
 252         nsha = self.fanout[255]
 253         self.sha_ofs = 8 + 256*4
 254         self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
 255         self.ofstable = buffer(self.map,
 256                                self.sha_ofs + nsha*20 + nsha*4,
 257                                nsha*4)
 258         self.ofs64table = buffer(self.map,
 259                                  8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
 260
 261     def _ofs_from_idx(self, idx):
 262         ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
 263         if ofs & 0x80000000:
 264             idx64 = ofs & 0x7fffffff
 265             ofs = struct.unpack('!Q',
 266                                 str(buffer(self.ofs64table, idx64*8, 8)))[0]
 267         return ofs
 268
 269     def _idx_to_hash(self, idx):
 270         return str(self.shatable[idx*20:(idx+1)*20])
 271
 272     def __iter__(self):
 273         for i in xrange(self.fanout[255]):
 274             yield buffer(self.map, 8 + 256*4 + 20*i, 20)
 275
 276
 277 _mpi_count = 0
 278 class PackIdxList:
 279     def __init__(self, dir):
 280         global _mpi_count
 281         assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
 282         _mpi_count += 1
 283         self.dir = dir
 284         self.also = set()
 285         self.packs = []
 286         self.do_bloom = False
 287         self.bloom = None
 288         self.refresh()
 289
 290     def __del__(self):
 291         global _mpi_count
 292         _mpi_count -= 1
 293         assert(_mpi_count == 0)
 294
 295     def __iter__(self):
 296         return iter(idxmerge(self.packs))
 297
 298     def __len__(self):
 299         return sum(len(pack) for pack in self.packs)
 300
 301     def exists(self, hash, want_source=False):
 302         """Return nonempty if the object exists in the index files."""
 303         global _total_searches
 304         _total_searches += 1
 305         if hash in self.also:
 306             return True
 307         if self.do_bloom and self.bloom:
 308             if self.bloom.exists(hash):
 309                 self.do_bloom = False
 310             else:
 311                 _total_searches -= 1  # was counted by bloom
 312                 return None
 313         for i in xrange(len(self.packs)):
 314             p = self.packs[i]
 315             _total_searches -= 1  # will be incremented by sub-pack
 316             ix = p.exists(hash, want_source=want_source)
 317             if ix:
 318                 # reorder so most recently used packs are searched first
 319                 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
 320                 return ix
 321         self.do_bloom = True
 322         return None
 323
 324     def refresh(self, skip_midx = False):
 325         """Refresh the index list.
 326         This method verifies if .midx files were superseded (e.g. all of its
 327         contents are in another, bigger .midx file) and removes the superseded
 328         files.
 329
 330         If skip_midx is True, all work on .midx files will be skipped and .midx
 331         files will be removed from the list.
 332
 333         The module-global variable 'ignore_midx' can force this function to
 334         always act as if skip_midx was True.
 335         """
 336         self.bloom = None # Always reopen the bloom as it may have been relaced
 337         self.do_bloom = False
 338         skip_midx = skip_midx or ignore_midx
 339         d = dict((p.name, p) for p in self.packs
 340                  if not skip_midx or not isinstance(p, midx.PackMidx))
 341         if os.path.exists(self.dir):
 342             if not skip_midx:
 343                 midxl = []
 344                 for ix in self.packs:
 345                     if isinstance(ix, midx.PackMidx):
 346                         for name in ix.idxnames:
 347                             d[os.path.join(self.dir, name)] = ix
 348                 for full in glob.glob(os.path.join(self.dir,'*.midx')):
 349                     if not d.get(full):
 350                         mx = midx.PackMidx(full)
 351                         (mxd, mxf) = os.path.split(mx.name)
 352                         broken = False
 353                         for n in mx.idxnames:
 354                             if not os.path.exists(os.path.join(mxd, n)):
 355                                 log(('warning: index %s missing\n' +
 356                                     '  used by %s\n') % (n, mxf))
 357                                 broken = True
 358                         if broken:
 359                             del mx
 360                             unlink(full)
 361                         else:
 362                             midxl.append(mx)
 363                 midxl.sort(key=lambda ix:
 364                            (-len(ix), -os.stat(ix.name).st_mtime))
 365                 for ix in midxl:
 366                     any_needed = False
 367                     for sub in ix.idxnames:
 368                         found = d.get(os.path.join(self.dir, sub))
 369                         if not found or isinstance(found, PackIdx):
 370                             # doesn't exist, or exists but not in a midx
 371                             any_needed = True
 372                             break
 373                     if any_needed:
 374                         d[ix.name] = ix
 375                         for name in ix.idxnames:
 376                             d[os.path.join(self.dir, name)] = ix
 377                     elif not ix.force_keep:
 378                         debug1('midx: removing redundant: %s\n'
 379                                % os.path.basename(ix.name))
 380                         unlink(ix.name)
 381             for full in glob.glob(os.path.join(self.dir,'*.idx')):
 382                 if not d.get(full):
 383                     try:
 384                         ix = open_idx(full)
 385                     except GitError, e:
 386                         add_error(e)
 387                         continue
 388                     d[full] = ix
 389             bfull = os.path.join(self.dir, 'bup.bloom')
 390             if self.bloom is None and os.path.exists(bfull):
 391                 self.bloom = bloom.ShaBloom(bfull)
 392             self.packs = list(set(d.values()))
 393             self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
 394             if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
 395                 self.do_bloom = True
 396             else:
 397                 self.bloom = None
 398         debug1('PackIdxList: using %d index%s.\n'
 399             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 400
 401     def add(self, hash):
 402         """Insert an additional object in the list."""
 403         self.also.add(hash)
 404
 405
 406 def calc_hash(type, content):
 407     """Calculate some content's hash in the Git fashion."""
 408     header = '%s %d\0' % (type, len(content))
 409     sum = Sha1(header)
 410     sum.update(content)
 411     return sum.digest()
 412
 413
 414 def _shalist_sort_key(ent):
 415     (mode, name, id) = ent
 416     if stat.S_ISDIR(int(mode, 8)):
 417         return name + '/'
 418     else:
 419         return name
 420
 421
 422 def open_idx(filename):
 423     if filename.endswith('.idx'):
 424         f = open(filename, 'rb')
 425         header = f.read(8)
 426         if header[0:4] == '\377tOc':
 427             version = struct.unpack('!I', header[4:8])[0]
 428             if version == 2:
 429                 return PackIdxV2(filename, f)
 430             else:
 431                 raise GitError('%s: expected idx file version 2, got %d'
 432                                % (filename, version))
 433         elif len(header) == 8 and header[0:4] < '\377tOc':
 434             return PackIdxV1(filename, f)
 435         else:
 436             raise GitError('%s: unrecognized idx file header' % filename)
 437     elif filename.endswith('.midx'):
 438         return midx.PackMidx(filename)
 439     else:
 440         raise GitError('idx filenames must end with .idx or .midx')
 441
 442
 443 def idxmerge(idxlist, final_progress=True):
 444     """Generate a list of all the objects reachable in a PackIdxList."""
 445     def pfunc(count, total):
 446         qprogress('Reading indexes: %.2f%% (%d/%d)\r'
 447                   % (count*100.0/total, count, total))
 448     def pfinal(count, total):
 449         if final_progress:
 450             progress('Reading indexes: %.2f%% (%d/%d), done.\n'
 451                      % (100, total, total))
 452     return merge_iter(idxlist, 10024, pfunc, pfinal)
 453
 454
 455 def _make_objcache():
 456     return PackIdxList(repo('objects/pack'))
 457
 458 class PackWriter:
 459     """Writes Git objects inside a pack file."""
 460     def __init__(self, objcache_maker=_make_objcache):
 461         self.count = 0
 462         self.outbytes = 0
 463         self.filename = None
 464         self.file = None
 465         self.idx = None
 466         self.objcache_maker = objcache_maker
 467         self.objcache = None
 468
 469     def __del__(self):
 470         self.close()
 471
 472     def _open(self):
 473         if not self.file:
 474             (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
 475             self.file = os.fdopen(fd, 'w+b')
 476             assert(name.endswith('.pack'))
 477             self.filename = name[:-5]
 478             self.file.write('PACK\0\0\0\2\0\0\0\0')
 479             self.idx = list(list() for i in xrange(256))
 480
 481     def _raw_write(self, datalist, sha):
 482         self._open()
 483         f = self.file
 484         # in case we get interrupted (eg. KeyboardInterrupt), it's best if
 485         # the file never has a *partial* blob.  So let's make sure it's
 486         # all-or-nothing.  (The blob shouldn't be very big anyway, thanks
 487         # to our hashsplit algorithm.)  f.write() does its own buffering,
 488         # but that's okay because we'll flush it in _end().
 489         oneblob = ''.join(datalist)
 490         try:
 491             f.write(oneblob)
 492         except IOError, e:
 493             raise GitError, e, sys.exc_info()[2]
 494         nw = len(oneblob)
 495         crc = zlib.crc32(oneblob) & 0xffffffff
 496         self._update_idx(sha, crc, nw)
 497         self.outbytes += nw
 498         self.count += 1
 499         return nw, crc
 500
 501     def _update_idx(self, sha, crc, size):
 502         assert(sha)
 503         if self.idx:
 504             self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
 505
 506     def _write(self, sha, type, content):
 507         if verbose:
 508             log('>')
 509         if not sha:
 510             sha = calc_hash(type, content)
 511         size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
 512         return sha
 513
 514     def breakpoint(self):
 515         """Clear byte and object counts and return the last processed id."""
 516         id = self._end()
 517         self.outbytes = self.count = 0
 518         return id
 519
 520     def _require_objcache(self):
 521         if self.objcache is None and self.objcache_maker:
 522             self.objcache = self.objcache_maker()
 523         if self.objcache is None:
 524             raise GitError(
 525                     "PackWriter not opened or can't check exists w/o objcache")
 526
 527     def exists(self, id, want_source=False):
 528         """Return non-empty if an object is found in the object cache."""
 529         self._require_objcache()
 530         return self.objcache.exists(id, want_source=want_source)
 531
 532     def maybe_write(self, type, content):
 533         """Write an object to the pack file if not present and return its id."""
 534         self._require_objcache()
 535         sha = calc_hash(type, content)
 536         if not self.exists(sha):
 537             self._write(sha, type, content)
 538             self.objcache.add(sha)
 539         return sha
 540
 541     def new_blob(self, blob):
 542         """Create a blob object in the pack with the supplied content."""
 543         return self.maybe_write('blob', blob)
 544
 545     def new_tree(self, shalist):
 546         """Create a tree object in the pack."""
 547         shalist = sorted(shalist, key = _shalist_sort_key)
 548         l = []
 549         for (mode,name,bin) in shalist:
 550             assert(mode)
 551             assert(mode != '0')
 552             assert(mode[0] != '0')
 553             assert(name)
 554             assert(len(bin) == 20)
 555             l.append('%s %s\0%s' % (mode,name,bin))
 556         return self.maybe_write('tree', ''.join(l))
 557
 558     def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
 559         l = []
 560         if tree: l.append('tree %s' % tree.encode('hex'))
 561         if parent: l.append('parent %s' % parent.encode('hex'))
 562         if author: l.append('author %s %s' % (author, _git_date(adate)))
 563         if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
 564         l.append('')
 565         l.append(msg)
 566         return self.maybe_write('commit', '\n'.join(l))
 567
 568     def new_commit(self, parent, tree, date, msg):
 569         """Create a commit object in the pack."""
 570         userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
 571         commit = self._new_commit(tree, parent,
 572                                   userline, date, userline, date,
 573                                   msg)
 574         return commit
 575
 576     def abort(self):
 577         """Remove the pack file from disk."""
 578         f = self.file
 579         if f:
 580             self.idx = None
 581             self.file = None
 582             f.close()
 583             os.unlink(self.filename + '.pack')
 584
 585     def _end(self, run_midx=True):
 586         f = self.file
 587         if not f: return None
 588         self.file = None
 589         self.objcache = None
 590         idx = self.idx
 591         self.idx = None
 592
 593         # update object count
 594         f.seek(8)
 595         cp = struct.pack('!i', self.count)
 596         assert(len(cp) == 4)
 597         f.write(cp)
 598
 599         # calculate the pack sha1sum
 600         f.seek(0)
 601         sum = Sha1()
 602         for b in chunkyreader(f):
 603             sum.update(b)
 604         packbin = sum.digest()
 605         f.write(packbin)
 606         f.close()
 607
 608         obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
 609
 610         nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
 611         if os.path.exists(self.filename + '.map'):
 612             os.unlink(self.filename + '.map')
 613         os.rename(self.filename + '.pack', nameprefix + '.pack')
 614         os.rename(self.filename + '.idx', nameprefix + '.idx')
 615
 616         if run_midx:
 617             auto_midx(repo('objects/pack'))
 618         return nameprefix
 619
 620     def close(self, run_midx=True):
 621         """Close the pack file and move it to its definitive path."""
 622         return self._end(run_midx=run_midx)
 623
 624     def _write_pack_idx_v2(self, filename, idx, packbin):
 625         idx_f = open(filename, 'w+b')
 626         idx_f.write('\377tOc\0\0\0\2')
 627
 628         ofs64_ofs = 8 + 4*256 + 28*self.count
 629         idx_f.truncate(ofs64_ofs)
 630         idx_f.seek(0)
 631         idx_map = mmap_readwrite(idx_f, close=False)
 632         idx_f.seek(0, SEEK_END)
 633         count = _helpers.write_idx(idx_f, idx_map, idx, self.count)
 634         assert(count == self.count)
 635         idx_map.close()
 636         idx_f.write(packbin)
 637
 638         idx_f.seek(0)
 639         idx_sum = Sha1()
 640         b = idx_f.read(8 + 4*256)
 641         idx_sum.update(b)
 642
 643         obj_list_sum = Sha1()
 644         for b in chunkyreader(idx_f, 20*self.count):
 645             idx_sum.update(b)
 646             obj_list_sum.update(b)
 647         namebase = obj_list_sum.hexdigest()
 648
 649         for b in chunkyreader(idx_f):
 650             idx_sum.update(b)
 651         idx_f.write(idx_sum.digest())
 652         idx_f.close()
 653
 654         return namebase
 655
 656
 657 def _git_date(date):
 658     return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
 659
 660
 661 def _gitenv():
 662     os.environ['GIT_DIR'] = os.path.abspath(repo())
 663
 664
 665 def list_refs(refname = None):
 666     """Generate a list of tuples in the form (refname,hash).
 667     If a ref name is specified, list only this particular ref.
 668     """
 669     argv = ['git', 'show-ref', '--']
 670     if refname:
 671         argv += [refname]
 672     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 673     out = p.stdout.read().strip()
 674     rv = p.wait()  # not fatal
 675     if rv:
 676         assert(not out)
 677     if out:
 678         for d in out.split('\n'):
 679             (sha, name) = d.split(' ', 1)
 680             yield (name, sha.decode('hex'))
 681
 682
 683 def read_ref(refname):
 684     """Get the commit id of the most recent commit made on a given ref."""
 685     l = list(list_refs(refname))
 686     if l:
 687         assert(len(l) == 1)
 688         return l[0][1]
 689     else:
 690         return None
 691
 692
 693 def rev_list(ref, count=None):
 694     """Generate a list of reachable commits in reverse chronological order.
 695
 696     This generator walks through commits, from child to parent, that are
 697     reachable via the specified ref and yields a series of tuples of the form
 698     (date,hash).
 699
 700     If count is a non-zero integer, limit the number of commits to "count"
 701     objects.
 702     """
 703     assert(not ref.startswith('-'))
 704     opts = []
 705     if count:
 706         opts += ['-n', str(atoi(count))]
 707     argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
 708     p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
 709     commit = None
 710     for row in p.stdout:
 711         s = row.strip()
 712         if s.startswith('commit '):
 713             commit = s[7:].decode('hex')
 714         else:
 715             date = int(s)
 716             yield (date, commit)
 717     rv = p.wait()  # not fatal
 718     if rv:
 719         raise GitError, 'git rev-list returned error %d' % rv
 720
 721
 722 def rev_get_date(ref):
 723     """Get the date of the latest commit on the specified ref."""
 724     for (date, commit) in rev_list(ref, count=1):
 725         return date
 726     raise GitError, 'no such commit %r' % ref
 727
 728
 729 def rev_parse(committish):
 730     """Resolve the full hash for 'committish', if it exists.
 731
 732     Should be roughly equivalent to 'git rev-parse'.
 733
 734     Returns the hex value of the hash if it is found, None if 'committish' does
 735     not correspond to anything.
 736     """
 737     head = read_ref(committish)
 738     if head:
 739         debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
 740         return head
 741
 742     pL = PackIdxList(repo('objects/pack'))
 743
 744     if len(committish) == 40:
 745         try:
 746             hash = committish.decode('hex')
 747         except TypeError:
 748             return None
 749
 750         if pL.exists(hash):
 751             return hash
 752
 753     return None
 754
 755
 756 def update_ref(refname, newval, oldval):
 757     """Change the commit pointed to by a branch."""
 758     if not oldval:
 759         oldval = ''
 760     assert(refname.startswith('refs/heads/'))
 761     p = subprocess.Popen(['git', 'update-ref', refname,
 762                           newval.encode('hex'), oldval.encode('hex')],
 763                          preexec_fn = _gitenv)
 764     _git_wait('git update-ref', p)
 765
 766
 767 def guess_repo(path=None):
 768     """Set the path value in the global variable "repodir".
 769     This makes bup look for an existing bup repository, but not fail if a
 770     repository doesn't exist. Usually, if you are interacting with a bup
 771     repository, you would not be calling this function but using
 772     check_repo_or_die().
 773     """
 774     global repodir
 775     if path:
 776         repodir = path
 777     if not repodir:
 778         repodir = os.environ.get('BUP_DIR')
 779         if not repodir:
 780             repodir = os.path.expanduser('~/.bup')
 781
 782
 783 def init_repo(path=None):
 784     """Create the Git bare repository for bup in a given path."""
 785     guess_repo(path)
 786     d = repo()  # appends a / to the path
 787     parent = os.path.dirname(os.path.dirname(d))
 788     if parent and not os.path.exists(parent):
 789         raise GitError('parent directory "%s" does not exist\n' % parent)
 790     if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
 791         raise GitError('"%d" exists but is not a directory\n' % d)
 792     p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
 793                          preexec_fn = _gitenv)
 794     _git_wait('git init', p)
 795     # Force the index version configuration in order to ensure bup works
 796     # regardless of the version of the installed Git binary.
 797     p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
 798                          stdout=sys.stderr, preexec_fn = _gitenv)
 799     _git_wait('git config', p)
 800
 801
 802 def check_repo_or_die(path=None):
 803     """Make sure a bup repository exists, and abort if not.
 804     If the path to a particular repository was not specified, this function
 805     initializes the default repository automatically.
 806     """
 807     guess_repo(path)
 808     if not os.path.isdir(repo('objects/pack/.')):
 809         if repodir == home_repodir:
 810             init_repo()
 811         else:
 812             log('error: %r is not a bup/git repository\n' % repo())
 813             sys.exit(15)
 814
 815
 816 def treeparse(buf):
 817     """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
 818     ofs = 0
 819     while ofs < len(buf):
 820         z = buf[ofs:].find('\0')
 821         assert(z > 0)
 822         spl = buf[ofs:ofs+z].split(' ', 1)
 823         assert(len(spl) == 2)
 824         sha = buf[ofs+z+1:ofs+z+1+20]
 825         ofs += z+1+20
 826         yield (spl[0], spl[1], sha)
 827
 828
 829 _ver = None
 830 def ver():
 831     """Get Git's version and ensure a usable version is installed.
 832
 833     The returned version is formatted as an ordered tuple with each position
 834     representing a digit in the version tag. For example, the following tuple
 835     would represent version 1.6.6.9:
 836
 837         ('1', '6', '6', '9')
 838     """
 839     global _ver
 840     if not _ver:
 841         p = subprocess.Popen(['git', '--version'],
 842                              stdout=subprocess.PIPE)
 843         gvs = p.stdout.read()
 844         _git_wait('git --version', p)
 845         m = re.match(r'git version (\S+.\S+)', gvs)
 846         if not m:
 847             raise GitError('git --version weird output: %r' % gvs)
 848         _ver = tuple(m.group(1).split('.'))
 849     needed = ('1','5', '3', '1')
 850     if _ver < needed:
 851         raise GitError('git version %s or higher is required; you have %s'
 852                        % ('.'.join(needed), '.'.join(_ver)))
 853     return _ver
 854
 855
 856 def _git_wait(cmd, p):
 857     rv = p.wait()
 858     if rv != 0:
 859         raise GitError('%s returned %d' % (cmd, rv))
 860
 861
 862 def _git_capture(argv):
 863     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
 864     r = p.stdout.read()
 865     _git_wait(repr(argv), p)
 866     return r
 867
 868
 869 class _AbortableIter:
 870     def __init__(self, it, onabort = None):
 871         self.it = it
 872         self.onabort = onabort
 873         self.done = None
 874
 875     def __iter__(self):
 876         return self
 877
 878     def next(self):
 879         try:
 880             return self.it.next()
 881         except StopIteration, e:
 882             self.done = True
 883             raise
 884         except:
 885             self.abort()
 886             raise
 887
 888     def abort(self):
 889         """Abort iteration and call the abortion callback, if needed."""
 890         if not self.done:
 891             self.done = True
 892             if self.onabort:
 893                 self.onabort()
 894
 895     def __del__(self):
 896         self.abort()
 897
 898
 899 _ver_warned = 0
 900 class CatPipe:
 901     """Link to 'git cat-file' that is used to retrieve blob data."""
 902     def __init__(self):
 903         global _ver_warned
 904         wanted = ('1','5','6')
 905         if ver() < wanted:
 906             if not _ver_warned:
 907                 log('warning: git version < %s; bup will be slow.\n'
 908                     % '.'.join(wanted))
 909                 _ver_warned = 1
 910             self.get = self._slow_get
 911         else:
 912             self.p = self.inprogress = None
 913             self.get = self._fast_get
 914
 915     def _abort(self):
 916         if self.p:
 917             self.p.stdout.close()
 918             self.p.stdin.close()
 919         self.p = None
 920         self.inprogress = None
 921
 922     def _restart(self):
 923         self._abort()
 924         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
 925                                   stdin=subprocess.PIPE,
 926                                   stdout=subprocess.PIPE,
 927                                   close_fds = True,
 928                                   bufsize = 4096,
 929                                   preexec_fn = _gitenv)
 930
 931     def _fast_get(self, id):
 932         if not self.p or self.p.poll() != None:
 933             self._restart()
 934         assert(self.p)
 935         assert(self.p.poll() == None)
 936         if self.inprogress:
 937             log('_fast_get: opening %r while %r is open'
 938                 % (id, self.inprogress))
 939         assert(not self.inprogress)
 940         assert(id.find('\n') < 0)
 941         assert(id.find('\r') < 0)
 942         assert(not id.startswith('-'))
 943         self.inprogress = id
 944         self.p.stdin.write('%s\n' % id)
 945         self.p.stdin.flush()
 946         hdr = self.p.stdout.readline()
 947         if hdr.endswith(' missing\n'):
 948             self.inprogress = None
 949             raise KeyError('blob %r is missing' % id)
 950         spl = hdr.split(' ')
 951         if len(spl) != 3 or len(spl[0]) != 40:
 952             raise GitError('expected blob, got %r' % spl)
 953         (hex, type, size) = spl
 954
 955         it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
 956                            onabort = self._abort)
 957         try:
 958             yield type
 959             for blob in it:
 960                 yield blob
 961             assert(self.p.stdout.readline() == '\n')
 962             self.inprogress = None
 963         except Exception, e:
 964             it.abort()
 965             raise
 966
 967     def _slow_get(self, id):
 968         assert(id.find('\n') < 0)
 969         assert(id.find('\r') < 0)
 970         assert(id[0] != '-')
 971         type = _git_capture(['git', 'cat-file', '-t', id]).strip()
 972         yield type
 973
 974         p = subprocess.Popen(['git', 'cat-file', type, id],
 975                              stdout=subprocess.PIPE,
 976                              preexec_fn = _gitenv)
 977         for blob in chunkyreader(p.stdout):
 978             yield blob
 979         _git_wait('git cat-file', p)
 980
 981     def _join(self, it):
 982         type = it.next()
 983         if type == 'blob':
 984             for blob in it:
 985                 yield blob
 986         elif type == 'tree':
 987             treefile = ''.join(it)
 988             for (mode, name, sha) in treeparse(treefile):
 989                 for blob in self.join(sha.encode('hex')):
 990                     yield blob
 991         elif type == 'commit':
 992             treeline = ''.join(it).split('\n')[0]
 993             assert(treeline.startswith('tree '))
 994             for blob in self.join(treeline[5:]):
 995                 yield blob
 996         else:
 997             raise GitError('invalid object type %r: expected blob/tree/commit'
 998                            % type)
 999
1000     def join(self, id):
1001         """Generate a list of the content of all blobs that can be reached
1002         from an object.  The hash given in 'id' must point to a blob, a tree
1003         or a commit. The content of all blobs that can be seen from trees or
1004         commits will be added to the list.
1005         """
1006         try:
1007             for d in self._join(self.get(id)):
1008                 yield d
1009         except StopIteration:
1010             log('booger!\n')
1011
1012 def tags():
1013     """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1014     tags = {}
1015     for (n,c) in list_refs():
1016         if n.startswith('refs/tags/'):
1017             name = n[10:]
1018             if not c in tags:
1019                 tags[c] = []
1020
1021             tags[c].append(name)  # more than one tag can point at 'c'
1022
1023     return tags