Combine and speed up idx->midx and bupindex merge

[bup.git] / lib / bup / git.py
diff --git a/lib/bup/git.py b/lib/bup/git.py

index 66370cacc64d6e6a967fa8220752397175ac7e82..cd1682afc009b2f3b09d46fb6a8cf0b00e0c3cf7 100644 (file)
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -2,10 +2,9 @@
  bup repositories are in Git format. This library allows us to
  interact with the Git data structures.
  """
  bup repositories are in Git format. This library allows us to
  interact with the Git data structures.
  """
-import os, zlib, time, subprocess, struct, stat, re, tempfile
-import heapq
+import os, sys, zlib, time, subprocess, struct, stat, re, tempfile
  from bup.helpers import *
  from bup.helpers import *
-from bup import _helpers
+from bup import _helpers, path
  
  MIDX_VERSION = 2
  
  
  MIDX_VERSION = 2
  
@@ -40,9 +39,13 @@ def repo(sub = ''):
  
  
  def auto_midx(objdir):
  
  
  def auto_midx(objdir):
-    main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0]
-    args = [main_exe, 'midx', '--auto', '--dir', objdir]
-    rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
+    args = [path.exe(), 'midx', '--auto', '--dir', objdir]
+    try:
+        rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
+    except OSError, e:
+        # make sure 'args' gets printed to help with debugging
+        add_error('%r: exception: %s' % (args, e))
+        raise
      if rv:
          add_error('%r: returned %d' % (args, rv))
  
      if rv:
          add_error('%r: returned %d' % (args, rv))
  
@@ -138,29 +141,22 @@ def _decode_packobj(buf):
  
  
  class PackIdx:
  
  
  class PackIdx:
-    """Object representation of a Git pack index file."""
-    def __init__(self, filename):
-        self.name = filename
-        self.idxnames = [self.name]
-        self.map = mmap_read(open(filename))
-        assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
-        self.fanout = list(struct.unpack('!256I',
-                                         str(buffer(self.map, 8, 256*4))))
-        self.fanout.append(0)  # entry "-1"
-        nsha = self.fanout[255]
-        self.ofstable = buffer(self.map,
-                               8 + 256*4 + nsha*20 + nsha*4,
-                               nsha*4)
-        self.ofs64table = buffer(self.map,
-                                 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
+    def __init__(self):
+        assert(0)
  
  
-    def _ofs_from_idx(self, idx):
-        ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
-        if ofs & 0x80000000:
-            idx64 = ofs & 0x7fffffff
-            ofs = struct.unpack('!I',
-                                str(buffer(self.ofs64table, idx64*8, 8)))[0]
-        return ofs
+    def find_offset(self, hash):
+        """Get the offset of an object inside the index file."""
+        idx = self._idx_from_hash(hash)
+        if idx != None:
+            return self._ofs_from_idx(idx)
+        return None
+
+    def exists(self, hash):
+        """Return nonempty if the object exists in this index."""
+        return hash and (self._idx_from_hash(hash) != None) and True or None
+
+    def __len__(self):
+        return int(self.fanout[255])
  
      def _idx_from_hash(self, hash):
          global _total_searches, _total_steps
  
      def _idx_from_hash(self, hash):
          global _total_searches, _total_steps
@@ -169,13 +165,12 @@ class PackIdx:
          b1 = ord(hash[0])
          start = self.fanout[b1-1] # range -1..254
          end = self.fanout[b1] # range 0..255
          b1 = ord(hash[0])
          start = self.fanout[b1-1] # range -1..254
          end = self.fanout[b1] # range 0..255
-        buf = buffer(self.map, 8 + 256*4, end*20)
          want = str(hash)
          _total_steps += 1  # lookup table is a step
          while start < end:
              _total_steps += 1
              mid = start + (end-start)/2
          want = str(hash)
          _total_steps += 1  # lookup table is a step
          while start < end:
              _total_steps += 1
              mid = start + (end-start)/2
-            v = str(buf[mid*20:(mid+1)*20])
+            v = self._idx_to_hash(mid)
              if v < want:
                  start = mid+1
              elif v > want:
              if v < want:
                  start = mid+1
              elif v > want:
@@ -184,23 +179,62 @@ class PackIdx:
                  return mid
          return None
  
                  return mid
          return None
  
-    def find_offset(self, hash):
-        """Get the offset of an object inside the index file."""
-        idx = self._idx_from_hash(hash)
-        if idx != None:
-            return self._ofs_from_idx(idx)
-        return None
  
  
-    def exists(self, hash):
-        """Return nonempty if the object exists in this index."""
-        return hash and (self._idx_from_hash(hash) != None) and True or None
+class PackIdxV1(PackIdx):
+    """Object representation of a Git pack index (version 1) file."""
+    def __init__(self, filename, f):
+        self.name = filename
+        self.idxnames = [self.name]
+        self.map = mmap_read(f)
+        self.fanout = list(struct.unpack('!256I',
+                                         str(buffer(self.map, 0, 256*4))))
+        self.fanout.append(0)  # entry "-1"
+        nsha = self.fanout[255]
+        self.shatable = buffer(self.map, 256*4, nsha*24)
+
+    def _ofs_from_idx(self, idx):
+        return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
+
+    def _idx_to_hash(self, idx):
+        return str(self.shatable[idx*24+4 : idx*24+24])
  
      def __iter__(self):
          for i in xrange(self.fanout[255]):
  
      def __iter__(self):
          for i in xrange(self.fanout[255]):
-            yield buffer(self.map, 8 + 256*4 + 20*i, 20)
+            yield buffer(self.map, 256*4 + 24*i + 4, 20)
  
  
-    def __len__(self):
-        return int(self.fanout[255])
+
+class PackIdxV2(PackIdx):
+    """Object representation of a Git pack index (version 2) file."""
+    def __init__(self, filename, f):
+        self.name = filename
+        self.idxnames = [self.name]
+        self.map = mmap_read(f)
+        assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
+        self.fanout = list(struct.unpack('!256I',
+                                         str(buffer(self.map, 8, 256*4))))
+        self.fanout.append(0)  # entry "-1"
+        nsha = self.fanout[255]
+        self.shatable = buffer(self.map, 8 + 256*4, nsha*20)
+        self.ofstable = buffer(self.map,
+                               8 + 256*4 + nsha*20 + nsha*4,
+                               nsha*4)
+        self.ofs64table = buffer(self.map,
+                                 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
+
+    def _ofs_from_idx(self, idx):
+        ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
+        if ofs & 0x80000000:
+            idx64 = ofs & 0x7fffffff
+            ofs = struct.unpack('!Q',
+                                str(buffer(self.ofs64table, idx64*8, 8)))[0]
+        return ofs
+
+    def _idx_to_hash(self, idx):
+        return str(self.shatable[idx*20:(idx+1)*20])
+
+    def __iter__(self):
+        for i in xrange(self.fanout[255]):
+            yield buffer(self.map, 8 + 256*4 + 20*i, 20)
  
  
  extract_bits = _helpers.extract_bits
  
  
  extract_bits = _helpers.extract_bits
@@ -368,7 +402,10 @@ class PackIdxList:
                                  log(('warning: index %s missing\n' +
                                      '  used by %s\n') % (n, mxf))
                                  broken += 1
                                  log(('warning: index %s missing\n' +
                                      '  used by %s\n') % (n, mxf))
                                  broken += 1
-                        if not broken:
+                        if broken:
+                            del mx
+                            unlink(full)
+                        else:
                              midxl.append(mx)
                  midxl.sort(lambda x,y: -cmp(len(x),len(y)))
                  for ix in midxl:
                              midxl.append(mx)
                  midxl.sort(lambda x,y: -cmp(len(x),len(y)))
                  for ix in midxl:
@@ -389,12 +426,32 @@ class PackIdxList:
              for f in os.listdir(self.dir):
                  full = os.path.join(self.dir, f)
                  if f.endswith('.idx') and not d.get(full):
              for f in os.listdir(self.dir):
                  full = os.path.join(self.dir, f)
                  if f.endswith('.idx') and not d.get(full):
-                    ix = PackIdx(full)
+                    try:
+                        ix = open_idx(full)
+                    except GitError, e:
+                        add_error(e)
+                        continue
                      d[full] = ix
              self.packs = list(set(d.values()))
          debug1('PackIdxList: using %d index%s.\n'
              % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
  
                      d[full] = ix
              self.packs = list(set(d.values()))
          debug1('PackIdxList: using %d index%s.\n'
              % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
  
+    def packname_containing(self, hash):
+        # figure out which pack contains a given hash.
+        # FIXME: if the midx file format would just *store* this information,
+        # we could calculate it a lot more efficiently.  But it's not needed
+        # often, so let's do it like this.
+        for f in os.listdir(self.dir):
+            if f.endswith('.idx'):
+                full = os.path.join(self.dir, f)
+                try:
+                    ix = open_idx(full)
+                except GitError, e:
+                    add_error(e)
+                    continue
+                if ix.exists(hash):
+                    return full
+
      def add(self, hash):
          """Insert an additional object in the list."""
          self.also[hash] = 1
      def add(self, hash):
          """Insert an additional object in the list."""
          self.also[hash] = 1
@@ -422,7 +479,19 @@ def _shalist_sort_key(ent):
  
  def open_idx(filename):
      if filename.endswith('.idx'):
  
  def open_idx(filename):
      if filename.endswith('.idx'):
-        return PackIdx(filename)
+        f = open(filename, 'rb')
+        header = f.read(8)
+        if header[0:4] == '\377tOc':
+            version = struct.unpack('!I', header[4:8])[0]
+            if version == 2:
+                return PackIdxV2(filename, f)
+            else:
+                raise GitError('%s: expected idx file version 2, got %d'
+                               % (filename, version))
+        elif len(header) == 8 and header[0:4] < '\377tOc':
+            return PackIdxV1(filename, f)
+        else:
+            raise GitError('%s: unrecognized idx file header' % filename)
      elif filename.endswith('.midx'):
          return PackMidx(filename)
      else:
      elif filename.endswith('.midx'):
          return PackMidx(filename)
      else:
@@ -431,60 +500,44 @@ def open_idx(filename):
  
  def idxmerge(idxlist, final_progress=True):
      """Generate a list of all the objects reachable in a PackIdxList."""
  
  def idxmerge(idxlist, final_progress=True):
      """Generate a list of all the objects reachable in a PackIdxList."""
-    total = sum(len(i) for i in idxlist)
-    iters = (iter(i) for i in idxlist)
-    heap = [(next(it), it) for it in iters]
-    heapq.heapify(heap)
-    count = 0
-    last = None
-    while heap:
-        if (count % 10024) == 0:
-            progress('Reading indexes: %.2f%% (%d/%d)\r'
-                     % (count*100.0/total, count, total))
-        (e, it) = heap[0]
-        if e != last:
-            yield e
-            last = e
-        count += 1
-        e = next(it)
-        if e:
-            heapq.heapreplace(heap, (e, it))
-        else:
-            heapq.heappop(heap)
-    if final_progress:
-        log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
+    def pfunc(count, total):
+        progress('Reading indexes: %.2f%% (%d/%d)\r'
+                 % (count*100.0/total, count, total))
+    def pfinal(count, total):
+        if final_progress:
+            log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
+    return merge_iter(idxlist, 10024, pfunc, pfinal)
+
  
  
+def _make_objcache():
+    return PackIdxList(repo('objects/pack'))
  
  class PackWriter:
      """Writes Git objects insid a pack file."""
  
  class PackWriter:
      """Writes Git objects insid a pack file."""
-    def __init__(self, objcache_maker=None):
+    def __init__(self, objcache_maker=_make_objcache):
          self.count = 0
          self.outbytes = 0
          self.filename = None
          self.file = None
          self.count = 0
          self.outbytes = 0
          self.filename = None
          self.file = None
+        self.idx = None
          self.objcache_maker = objcache_maker
          self.objcache = None
  
      def __del__(self):
          self.close()
  
          self.objcache_maker = objcache_maker
          self.objcache = None
  
      def __del__(self):
          self.close()
  
-    def _make_objcache(self):
-        if self.objcache == None:
-            if self.objcache_maker:
-                self.objcache = self.objcache_maker()
-            else:
-                self.objcache = PackIdxList(repo('objects/pack'))
-
      def _open(self):
          if not self.file:
      def _open(self):
          if not self.file:
-            self._make_objcache()
              (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
              self.file = os.fdopen(fd, 'w+b')
              assert(name.endswith('.pack'))
              self.filename = name[:-5]
              self.file.write('PACK\0\0\0\2\0\0\0\0')
              (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
              self.file = os.fdopen(fd, 'w+b')
              assert(name.endswith('.pack'))
              self.filename = name[:-5]
              self.file.write('PACK\0\0\0\2\0\0\0\0')
+            self.idx = list(list() for i in xrange(256))
  
  
-    def _raw_write(self, datalist):
+    # the 'sha' parameter is used in client.py's _raw_write(), but not needed
+    # in this basic version.
+    def _raw_write(self, datalist, sha):
          self._open()
          f = self.file
          # in case we get interrupted (eg. KeyboardInterrupt), it's best if
          self._open()
          f = self.file
          # in case we get interrupted (eg. KeyboardInterrupt), it's best if
@@ -493,15 +546,29 @@ class PackWriter:
          # to our hashsplit algorithm.)  f.write() does its own buffering,
          # but that's okay because we'll flush it in _end().
          oneblob = ''.join(datalist)
          # to our hashsplit algorithm.)  f.write() does its own buffering,
          # but that's okay because we'll flush it in _end().
          oneblob = ''.join(datalist)
-        f.write(oneblob)
-        self.outbytes += len(oneblob)
+        try:
+            f.write(oneblob)
+        except IOError, e:
+            raise GitError, e, sys.exc_info()[2]
+        nw = len(oneblob)
+        crc = zlib.crc32(oneblob) & 0xffffffff
+        self._update_idx(sha, crc, nw)
+        self.outbytes += nw
          self.count += 1
          self.count += 1
+        return nw, crc
+
+    def _update_idx(self, sha, crc, size):
+        assert(sha)
+        if self.idx:
+            self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
  
  
-    def _write(self, bin, type, content):
+    def _write(self, sha, type, content):
          if verbose:
              log('>')
          if verbose:
              log('>')
-        self._raw_write(_encode_packobj(type, content))
-        return bin
+        if not sha:
+            sha = calc_hash(type, content)
+        size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
+        return sha
  
      def breakpoint(self):
          """Clear byte and object counts and return the last processed id."""
  
      def breakpoint(self):
          """Clear byte and object counts and return the last processed id."""
@@ -513,19 +580,26 @@ class PackWriter:
          """Write an object in this pack file."""
          return self._write(calc_hash(type, content), type, content)
  
          """Write an object in this pack file."""
          return self._write(calc_hash(type, content), type, content)
  
+    def _require_objcache(self):
+        if self.objcache is None and self.objcache_maker:
+            self.objcache = self.objcache_maker()
+        if self.objcache is None:
+            raise GitError(
+                    "PackWriter not opened or can't check exists w/o objcache")
+
      def exists(self, id):
          """Return non-empty if an object is found in the object cache."""
      def exists(self, id):
          """Return non-empty if an object is found in the object cache."""
-        if not self.objcache:
-            self._make_objcache()
+        self._require_objcache()
          return self.objcache.exists(id)
  
      def maybe_write(self, type, content):
          """Write an object to the pack file if not present and return its id."""
          return self.objcache.exists(id)
  
      def maybe_write(self, type, content):
          """Write an object to the pack file if not present and return its id."""
-        bin = calc_hash(type, content)
-        if not self.exists(bin):
-            self._write(bin, type, content)
-            self.objcache.add(bin)
-        return bin
+        self._require_objcache()
+        sha = calc_hash(type, content)
+        if not self.exists(sha):
+            self._write(sha, type, content)
+            self.objcache.add(sha)
+        return sha
  
      def new_blob(self, blob):
          """Create a blob object in the pack with the supplied content."""
  
      def new_blob(self, blob):
          """Create a blob object in the pack with the supplied content."""
@@ -566,15 +640,18 @@ class PackWriter:
          """Remove the pack file from disk."""
          f = self.file
          if f:
          """Remove the pack file from disk."""
          f = self.file
          if f:
+            self.idx = None
              self.file = None
              f.close()
              os.unlink(self.filename + '.pack')
  
              self.file = None
              f.close()
              os.unlink(self.filename + '.pack')
  
-    def _end(self):
+    def _end(self, run_midx=True):
          f = self.file
          if not f: return None
          self.file = None
          self.objcache = None
          f = self.file
          if not f: return None
          self.file = None
          self.objcache = None
+        idx = self.idx
+        self.idx = None
  
          # update object count
          f.seek(8)
  
          # update object count
          f.seek(8)
@@ -585,35 +662,67 @@ class PackWriter:
          # calculate the pack sha1sum
          f.seek(0)
          sum = Sha1()
          # calculate the pack sha1sum
          f.seek(0)
          sum = Sha1()
-        while 1:
-            b = f.read(65536)
+        for b in chunkyreader(f):
              sum.update(b)
              sum.update(b)
-            if not b: break
-        f.write(sum.digest())
-
+        packbin = sum.digest()
+        f.write(packbin)
          f.close()
  
          f.close()
  
-        p = subprocess.Popen(['git', 'index-pack', '-v',
-                              '--index-version=2',
-                              self.filename + '.pack'],
-                             preexec_fn = _gitenv,
-                             stdout = subprocess.PIPE)
-        out = p.stdout.read().strip()
-        _git_wait('git index-pack', p)
-        if not out:
-            raise GitError('git index-pack produced no output')
-        nameprefix = repo('objects/pack/%s' % out)
+        idx_f = open(self.filename + '.idx', 'wb')
+        obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
+        idx_f.close()
+
+        nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
          if os.path.exists(self.filename + '.map'):
              os.unlink(self.filename + '.map')
          os.rename(self.filename + '.pack', nameprefix + '.pack')
          os.rename(self.filename + '.idx', nameprefix + '.idx')
  
          if os.path.exists(self.filename + '.map'):
              os.unlink(self.filename + '.map')
          os.rename(self.filename + '.pack', nameprefix + '.pack')
          os.rename(self.filename + '.idx', nameprefix + '.idx')
  
-        auto_midx(repo('objects/pack'))
+        if run_midx:
+            auto_midx(repo('objects/pack'))
          return nameprefix
  
          return nameprefix
  
-    def close(self):
+    def close(self, run_midx=True):
          """Close the pack file and move it to its definitive path."""
          """Close the pack file and move it to its definitive path."""
-        return self._end()
+        return self._end(run_midx=run_midx)
+
+    def _write_pack_idx_v2(self, file, idx, packbin):
+        sum = Sha1()
+
+        def write(data):
+            file.write(data)
+            sum.update(data)
+
+        write('\377tOc\0\0\0\2')
+
+        n = 0
+        for part in idx:
+            n += len(part)
+            write(struct.pack('!i', n))
+            part.sort(key=lambda x: x[0])
+
+        obj_list_sum = Sha1()
+        for part in idx:
+            for entry in part:
+                write(entry[0])
+                obj_list_sum.update(entry[0])
+        for part in idx:
+            for entry in part:
+                write(struct.pack('!I', entry[1]))
+        ofs64_list = []
+        for part in idx:
+            for entry in part:
+                if entry[2] & 0x80000000:
+                    write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
+                    ofs64_list.append(struct.pack('!Q', entry[2]))
+                else:
+                    write(struct.pack('!i', entry[2]))
+        for ofs64 in ofs64_list:
+            write(ofs64)
+
+        write(packbin)
+        file.write(sum.digest())
+        return obj_list_sum.hexdigest()
  
  
  def _git_date(date):
  
  
  def _git_date(date):
@@ -688,6 +797,33 @@ def rev_get_date(ref):
      raise GitError, 'no such commit %r' % ref
  
  
      raise GitError, 'no such commit %r' % ref
  
  
+def rev_parse(committish):
+    """Resolve the full hash for 'committish', if it exists.
+
+    Should be roughly equivalent to 'git rev-parse'.
+
+    Returns the hex value of the hash if it is found, None if 'committish' does
+    not correspond to anything.
+    """
+    head = read_ref(committish)
+    if head:
+        debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
+        return head
+
+    pL = PackIdxList(repo('objects/pack'))
+
+    if len(committish) == 40:
+        try:
+            hash = committish.decode('hex')
+        except TypeError:
+            return None
+
+        if pL.exists(hash):
+            return hash
+
+    return None
+
+
  def update_ref(refname, newval, oldval):
      """Change the commit pointed to by a branch."""
      if not oldval:
  def update_ref(refname, newval, oldval):
      """Change the commit pointed to by a branch."""
      if not oldval:
@@ -718,7 +854,10 @@ def guess_repo(path=None):
  def init_repo(path=None):
      """Create the Git bare repository for bup in a given path."""
      guess_repo(path)
  def init_repo(path=None):
      """Create the Git bare repository for bup in a given path."""
      guess_repo(path)
-    d = repo()
+    d = repo()  # appends a / to the path
+    parent = os.path.dirname(os.path.dirname(d))
+    if parent and not os.path.exists(parent):
+        raise GitError('parent directory "%s" does not exist\n' % parent)
      if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
          raise GitError('"%d" exists but is not a directory\n' % d)
      p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
      if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
          raise GitError('"%d" exists but is not a directory\n' % d)
      p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
@@ -857,6 +996,7 @@ class CatPipe:
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    close_fds = True,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    close_fds = True,
+                                  bufsize = 4096,
                                    preexec_fn = _gitenv)
  
      def _fast_get(self, id):
                                    preexec_fn = _gitenv)
  
      def _fast_get(self, id):
@@ -870,11 +1010,13 @@ class CatPipe:
          assert(not self.inprogress)
          assert(id.find('\n') < 0)
          assert(id.find('\r') < 0)
          assert(not self.inprogress)
          assert(id.find('\n') < 0)
          assert(id.find('\r') < 0)
-        assert(id[0] != '-')
+        assert(not id.startswith('-'))
          self.inprogress = id
          self.p.stdin.write('%s\n' % id)
          self.inprogress = id
          self.p.stdin.write('%s\n' % id)
+        self.p.stdin.flush()
          hdr = self.p.stdout.readline()
          if hdr.endswith(' missing\n'):
          hdr = self.p.stdout.readline()
          if hdr.endswith(' missing\n'):
+            self.inprogress = None
              raise KeyError('blob %r is missing' % id)
          spl = hdr.split(' ')
          if len(spl) != 3 or len(spl[0]) != 40:
              raise KeyError('blob %r is missing' % id)
          spl = hdr.split(' ')
          if len(spl) != 3 or len(spl[0]) != 40:
@@ -937,3 +1079,16 @@ class CatPipe:
                  yield d
          except StopIteration:
              log('booger!\n')
                  yield d
          except StopIteration:
              log('booger!\n')
+
+def tags():
+    """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
+    tags = {}
+    for (n,c) in list_refs():
+        if n.startswith('refs/tags/'):
+            name = n[10:]
+            if not c in tags:
+                tags[c] = []
+
+            tags[c].append(name)  # more than one tag can point at 'c'
+
+    return tags