Allow per-instance specification of the repo_dir for CatPipe

[bup.git] / lib / bup / git.py
diff --git a/lib/bup/git.py b/lib/bup/git.py

index 8a6808e03b59a5070babe73dbee51b3b49e66ee9..a8f3729f05f439ef545e3ecb3be5cfa316f493b2 100644 (file)
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -2,101 +2,17 @@
  bup repositories are in Git format. This library allows us to
  interact with the Git data structures.
  """
  bup repositories are in Git format. This library allows us to
  interact with the Git data structures.
  """
-import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, math, glob
+import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
+from collections import namedtuple
+
  from bup.helpers import *
  from bup.helpers import *
-from bup import _helpers, path
-
-MIDX_VERSION = 4
-
-"""Discussion of bloom constants for bup:
-
-There are four basic things to consider when building a bloom filter:
-The size, in bits, of the filter
-The capacity, in entries, of the filter
-The probability of a false positive that is tolerable
-The number of bits readily available to use for addresing filter bits
-
-There is one major tunable that is not directly related to the above:
-k: the number of bits set in the filter per entry
-
-Here's a wall of numbers showing the relationship between k; the ratio between
-the filter size in bits and the entries in the filter; and pfalse_positive:
-
-mn|k=3    |k=4    |k=5    |k=6    |k=7    |k=8    |k=9    |k=10   |k=11
- 8|3.05794|2.39687|2.16792|2.15771|2.29297|2.54917|2.92244|3.41909|4.05091
- 9|2.27780|1.65770|1.40703|1.32721|1.34892|1.44631|1.61138|1.84491|2.15259
-10|1.74106|1.18133|0.94309|0.84362|0.81937|0.84555|0.91270|1.01859|1.16495
-11|1.36005|0.86373|0.65018|0.55222|0.51259|0.50864|0.53098|0.57616|0.64387
-12|1.08231|0.64568|0.45945|0.37108|0.32939|0.31424|0.31695|0.33387|0.36380
-13|0.87517|0.49210|0.33183|0.25527|0.21689|0.19897|0.19384|0.19804|0.21013
-14|0.71759|0.38147|0.24433|0.17934|0.14601|0.12887|0.12127|0.12012|0.12399
-15|0.59562|0.30019|0.18303|0.12840|0.10028|0.08523|0.07749|0.07440|0.07468
-16|0.49977|0.23941|0.13925|0.09351|0.07015|0.05745|0.05049|0.04700|0.04587
-17|0.42340|0.19323|0.10742|0.06916|0.04990|0.03941|0.03350|0.03024|0.02870
-18|0.36181|0.15765|0.08392|0.05188|0.03604|0.02748|0.02260|0.01980|0.01827
-19|0.31160|0.12989|0.06632|0.03942|0.02640|0.01945|0.01549|0.01317|0.01182
-20|0.27026|0.10797|0.05296|0.03031|0.01959|0.01396|0.01077|0.00889|0.00777
-21|0.23591|0.09048|0.04269|0.02356|0.01471|0.01014|0.00759|0.00609|0.00518
-22|0.20714|0.07639|0.03473|0.01850|0.01117|0.00746|0.00542|0.00423|0.00350
-23|0.18287|0.06493|0.02847|0.01466|0.00856|0.00555|0.00392|0.00297|0.00240
-24|0.16224|0.05554|0.02352|0.01171|0.00663|0.00417|0.00286|0.00211|0.00166
-25|0.14459|0.04779|0.01957|0.00944|0.00518|0.00316|0.00211|0.00152|0.00116
-26|0.12942|0.04135|0.01639|0.00766|0.00408|0.00242|0.00157|0.00110|0.00082
-27|0.11629|0.03595|0.01381|0.00626|0.00324|0.00187|0.00118|0.00081|0.00059
-28|0.10489|0.03141|0.01170|0.00515|0.00259|0.00146|0.00090|0.00060|0.00043
-29|0.09492|0.02756|0.00996|0.00426|0.00209|0.00114|0.00069|0.00045|0.00031
-30|0.08618|0.02428|0.00853|0.00355|0.00169|0.00090|0.00053|0.00034|0.00023
-31|0.07848|0.02147|0.00733|0.00297|0.00138|0.00072|0.00041|0.00025|0.00017
-32|0.07167|0.01906|0.00633|0.00250|0.00113|0.00057|0.00032|0.00019|0.00013
-
-Here's a table showing available repository size for a given pfalse_positive
-and three values of k (assuming we only use the 160 bit SHA1 for addressing the
-filter and 8192bytes per object):
-
-pfalse|obj k=4     |cap k=4    |obj k=5  |cap k=5    |obj k=6 |cap k=6
-2.500%|139333497228|1038.11 TiB|558711157|4262.63 GiB|13815755|105.41 GiB
-1.000%|104489450934| 778.50 TiB|436090254|3327.10 GiB|11077519| 84.51 GiB
-0.125%| 57254889824| 426.58 TiB|261732190|1996.86 GiB| 7063017| 55.89 GiB
-
-This eliminates pretty neatly any k>6 as long as we use the raw SHA for
-addressing.
-
-filter size scales linearly with repository size for a given k and pfalse.
-
-Here's a table of filter sizes for a 1 TiB repository:
-
-pfalse| k=3        | k=4        | k=5        | k=6
-2.500%| 138.78 MiB | 126.26 MiB | 123.00 MiB | 123.37 MiB
-1.000%| 197.83 MiB | 168.36 MiB | 157.58 MiB | 153.87 MiB
-0.125%| 421.14 MiB | 307.26 MiB | 262.56 MiB | 241.32 MiB
-
-For bup:
-* We want the bloom filter to fit in memory; if it doesn't, the k pagefaults
-per lookup will be worse than the two required for midx.
-* We want the pfalse_positive to be low enough that the cost of sometimes
-faulting on the midx doesn't overcome the benefit of the bloom filter.
-* We have readily available 160 bits for addressing the filter.
-* We want to be able to have a single bloom address entire repositories of
-reasonable size.
-
-Based on these parameters, a combination of k=4 and k=5 provides the behavior
-that bup needs.  As such, I've implemented bloom addressing, adding and
-checking functions in C for these two values.  Because k=5 requires less space
-and gives better overall pfalse_positive perofrmance, it is preferred if a
-table with k=5 can represent the repository.
-
-None of this tells us what max_pfalse_positive to choose.
-
-Brandon Low <lostlogic@lostlogicx.com> 04-02-2011
-"""
-BLOOM_VERSION = 2
-MAX_BITS_EACH = 32 # Kinda arbitrary, but 4 bytes per entry is pretty big
-MAX_BLOOM_BITS = {4: 37, 5: 29} # 160/k-log2(8)
-MAX_PFALSE_POSITIVE = 1. # Totally arbitrary, needs benchmarking
+from bup import _helpers, path, midx, bloom, xstat
+
+max_pack_size = 1000*1000*1000  # larger packs will slow down pruning
+max_pack_objects = 200*1000  # cache memory usage is about 83 bytes per object
  
  verbose = 0
  ignore_midx = 0
  
  verbose = 0
  ignore_midx = 0
-home_repodir = os.path.expanduser('~/.bup')
  repodir = None
  
  _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
  repodir = None
  
  _typemap =  { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
@@ -110,6 +26,66 @@ class GitError(Exception):
      pass
  
  
      pass
  
  
+def parse_tz_offset(s):
+    """UTC offset in seconds."""
+    tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
+    if s[0] == '-':
+        return - tz_off
+    return tz_off
+
+
+# FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
+# Make sure that's authoritative.
+_start_end_char = r'[^ .,:;<>"\'\0\n]'
+_content_char = r'[^\0\n<>]'
+_safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
+    % (_start_end_char,
+       _start_end_char, _content_char, _start_end_char)
+_tz_rx = r'[-+]\d\d[0-5]\d'
+_parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
+_commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
+(?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
+committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
+
+(?P<message>(?:.|\n)*)''' % (_parent_rx,
+                             _safe_str_rx, _safe_str_rx, _tz_rx,
+                             _safe_str_rx, _safe_str_rx, _tz_rx))
+_parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
+
+
+# Note that the author_sec and committer_sec values are (UTC) epoch seconds.
+CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
+                                       'author_name', 'author_mail',
+                                       'author_sec', 'author_offset',
+                                       'committer_name', 'committer_mail',
+                                       'committer_sec', 'committer_offset',
+                                       'message'])
+
+def parse_commit(content):
+    commit_match = re.match(_commit_rx, content)
+    if not commit_match:
+        raise Exception('cannot parse commit %r' % content)
+    matches = commit_match.groupdict()
+    return CommitInfo(tree=matches['tree'],
+                      parents=re.findall(_parent_hash_rx, matches['parents']),
+                      author_name=matches['author_name'],
+                      author_mail=matches['author_mail'],
+                      author_sec=int(matches['asec']),
+                      author_offset=parse_tz_offset(matches['atz']),
+                      committer_name=matches['committer_name'],
+                      committer_mail=matches['committer_mail'],
+                      committer_sec=int(matches['csec']),
+                      committer_offset=parse_tz_offset(matches['ctz']),
+                      message=matches['message'])
+
+
+def get_commit_items(id, cp):
+    commit_it = cp.get(id)
+    assert(commit_it.next() == 'commit')
+    commit_content = ''.join(commit_it)
+    return parse_commit(commit_content)
+
+
  def repo(sub = ''):
      """Get the path to the git repository or one of its subdirectories."""
      global repodir
  def repo(sub = ''):
      """Get the path to the git repository or one of its subdirectories."""
      global repodir
@@ -124,6 +100,29 @@ def repo(sub = ''):
      return os.path.join(repodir, sub)
  
  
      return os.path.join(repodir, sub)
  
  
+def shorten_hash(s):
+    return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
+                  r'\1\2*\3', s)
+
+
+def repo_rel(path):
+    full = os.path.abspath(path)
+    fullrepo = os.path.abspath(repo(''))
+    if not fullrepo.endswith('/'):
+        fullrepo += '/'
+    if full.startswith(fullrepo):
+        path = full[len(fullrepo):]
+    if path.startswith('index-cache/'):
+        path = path[len('index-cache/'):]
+    return shorten_hash(path)
+
+
+def all_packdirs():
+    paths = [repo('objects/pack')]
+    paths += glob.glob(repo('index-cache/*/.'))
+    return paths
+
+
  def auto_midx(objdir):
      args = [path.exe(), 'midx', '--auto', '--dir', objdir]
      try:
  def auto_midx(objdir):
      args = [path.exe(), 'midx', '--auto', '--dir', objdir]
      try:
@@ -150,9 +149,10 @@ def mangle_name(name, mode, gitmode):
      """Mangle a file name to present an abstract name for segmented files.
      Mangled file names will have the ".bup" extension added to them. If a
      file's name already ends with ".bup", a ".bupl" extension is added to
      """Mangle a file name to present an abstract name for segmented files.
      Mangled file names will have the ".bup" extension added to them. If a
      file's name already ends with ".bup", a ".bupl" extension is added to
-    disambiguate normal files from semgmented ones.
+    disambiguate normal files from segmented ones.
      """
      if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
      """
      if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
+        assert(stat.S_ISDIR(gitmode))
          return name + '.bup'
      elif name.endswith('.bup') or name[:-1].endswith('.bup'):
          return name + '.bupl'
          return name + '.bup'
      elif name.endswith('.bup') or name[:-1].endswith('.bup'):
          return name + '.bupl'
@@ -168,9 +168,9 @@ def demangle_name(name):
      the following:
  
      * BUP_NORMAL  : files that should be read as-is from the repository
      the following:
  
      * BUP_NORMAL  : files that should be read as-is from the repository
-    * BUP_CHUNKED : files that were chunked and need to be assembled
+    * BUP_CHUNKED : files that were chunked and need to be reassembled
  
  
-    For more information on the name mangling algorythm, see mangle_name()
+    For more information on the name mangling algorithm, see mangle_name()
      """
      if name.endswith('.bupl'):
          return (name[:-5], BUP_NORMAL)
      """
      if name.endswith('.bupl'):
          return (name[:-5], BUP_NORMAL)
@@ -180,7 +180,53 @@ def demangle_name(name):
          return (name, BUP_NORMAL)
  
  
          return (name, BUP_NORMAL)
  
  
-def _encode_packobj(type, content):
+def calc_hash(type, content):
+    """Calculate some content's hash in the Git fashion."""
+    header = '%s %d\0' % (type, len(content))
+    sum = Sha1(header)
+    sum.update(content)
+    return sum.digest()
+
+
+def shalist_item_sort_key(ent):
+    (mode, name, id) = ent
+    assert(mode+0 == mode)
+    if stat.S_ISDIR(mode):
+        return name + '/'
+    else:
+        return name
+
+
+def tree_encode(shalist):
+    """Generate a git tree object from (mode,name,hash) tuples."""
+    shalist = sorted(shalist, key = shalist_item_sort_key)
+    l = []
+    for (mode,name,bin) in shalist:
+        assert(mode)
+        assert(mode+0 == mode)
+        assert(name)
+        assert(len(bin) == 20)
+        s = '%o %s\0%s' % (mode,name,bin)
+        assert(s[0] != '0')  # 0-padded octal is not acceptable in a git tree
+        l.append(s)
+    return ''.join(l)
+
+
+def tree_decode(buf):
+    """Generate a list of (mode,name,hash) from the git tree object in buf."""
+    ofs = 0
+    while ofs < len(buf):
+        z = buf.find('\0', ofs)
+        assert(z > ofs)
+        spl = buf[ofs:z].split(' ', 1)
+        assert(len(spl) == 2)
+        mode,name = spl
+        sha = buf[z+1:z+1+20]
+        ofs = z+1+20
+        yield (int(mode, 8), name, sha)
+
+
+def _encode_packobj(type, content, compression_level=1):
      szout = ''
      sz = len(content)
      szbits = (sz & 0x0f) | (_typemap[type]<<4)
      szout = ''
      sz = len(content)
      szbits = (sz & 0x0f) | (_typemap[type]<<4)
@@ -192,14 +238,18 @@ def _encode_packobj(type, content):
              break
          szbits = sz & 0x7f
          sz >>= 7
              break
          szbits = sz & 0x7f
          sz >>= 7
-    z = zlib.compressobj(1)
+    if compression_level > 9:
+        compression_level = 9
+    elif compression_level < 0:
+        compression_level = 0
+    z = zlib.compressobj(compression_level)
      yield szout
      yield z.compress(content)
      yield z.flush()
  
  
      yield szout
      yield z.compress(content)
      yield z.flush()
  
  
-def _encode_looseobj(type, content):
-    z = zlib.compressobj(1)
+def _encode_looseobj(type, content, compression_level=1):
+    z = zlib.compressobj(compression_level)
      yield z.compress('%s %d\0' % (type, len(content)))
      yield z.compress(content)
      yield z.flush()
      yield z.compress('%s %d\0' % (type, len(content)))
      yield z.compress(content)
      yield z.flush()
@@ -337,249 +387,6 @@ class PackIdxV2(PackIdx):
              yield buffer(self.map, 8 + 256*4 + 20*i, 20)
  
  
              yield buffer(self.map, 8 + 256*4 + 20*i, 20)
  
  
-extract_bits = _helpers.extract_bits
-
-bloom_contains = _helpers.bloom_contains
-bloom_add = _helpers.bloom_add
-
-
-class ShaBloom:
-    """Wrapper which contains data from multiple index files.
-    """
-    def __init__(self, filename, f=None, readwrite=False, expected=-1):
-        self.name = filename
-        self.rwfile = None
-        self.map = None
-        assert(filename.endswith('.bloom'))
-        if readwrite:
-            assert(expected > 0)
-            self.rwfile = f = f or open(filename, 'r+b')
-            f.seek(0)
-
-            # Decide if we want to mmap() the pages as writable ('immediate'
-            # write) or else map them privately for later writing back to
-            # the file ('delayed' write).  A bloom table's write access
-            # pattern is such that we dirty almost all the pages after adding
-            # very few entries.  But the table is so big that dirtying
-            # *all* the pages often exceeds Linux's default
-            # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
-            # thus causing it to start flushing the table before we're
-            # finished... even though there's more than enough space to
-            # store the bloom table in RAM.
-            #
-            # To work around that behaviour, if we calculate that we'll
-            # probably end up touching the whole table anyway (at least
-            # one bit flipped per memory page), let's use a "private" mmap,
-            # which defeats Linux's ability to flush it to disk.  Then we'll
-            # flush it as one big lump during close().
-            pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5
-            self.delaywrite = expected > pages
-            debug1('bloom: delaywrite=%r\n' % self.delaywrite)
-            if self.delaywrite:
-                self.map = mmap_readwrite_private(self.rwfile, close=False)
-            else:
-                self.map = mmap_readwrite(self.rwfile, close=False)
-        else:
-            self.rwfile = None
-            f = f or open(filename, 'rb')
-            self.map = mmap_read(f)
-        got = str(self.map[0:4])
-        if got != 'BLOM':
-            log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
-            return self._init_failed()
-        ver = struct.unpack('!I', self.map[4:8])[0]
-        if ver < BLOOM_VERSION:
-            log('Warning: ignoring old-style (v%d) bloom %r\n' 
-                % (ver, filename))
-            return self._init_failed()
-        if ver > BLOOM_VERSION:
-            log('Warning: ignoring too-new (v%d) bloom %r\n'
-                % (ver, filename))
-            return self._init_failed()
-
-        self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
-        idxnamestr = str(self.map[16 + 2**self.bits:])
-        if idxnamestr:
-            self.idxnames = idxnamestr.split('\0')
-        else:
-            self.idxnames = []
-
-    def _init_failed(self):
-        if self.map:
-            self.map = None
-        if self.rwfile:
-            self.rwfile.close()
-            self.rwfile = None
-        self.idxnames = []
-        self.bits = self.entries = 0
-
-    def valid(self):
-        return self.map and self.bits
-
-    def __del__(self):
-        self.close()
-
-    def close(self):
-        if self.map and self.rwfile:
-            debug2("bloom: closing with %d entries\n" % self.entries)
-            self.map[12:16] = struct.pack('!I', self.entries)
-            if self.delaywrite:
-                self.rwfile.seek(0)
-                self.rwfile.write(self.map)
-            else:
-                self.map.flush()
-            self.rwfile.seek(16 + 2**self.bits)
-            if self.idxnames:
-                self.rwfile.write('\0'.join(self.idxnames))
-        self._init_failed()
-
-    def pfalse_positive(self, additional=0):
-        n = self.entries + additional
-        m = 8*2**self.bits
-        k = self.k
-        return 100*(1-math.exp(-k*float(n)/m))**k
-
-    def add_idx(self, ix):
-        """Add the object to the filter, return current pfalse_positive."""
-        if not self.map: raise Exception, "Cannot add to closed bloom"
-        self.entries += bloom_add(self.map, ix.shatable, self.bits, self.k)
-        self.idxnames.append(os.path.basename(ix.name))
-
-    def exists(self, sha):
-        """Return nonempty if the object probably exists in the bloom filter."""
-        global _total_searches, _total_steps
-        _total_searches += 1
-        if not self.map: return None
-        found, steps = bloom_contains(self.map, str(sha), self.bits, self.k)
-        _total_steps += steps
-        return found
-
-    @classmethod
-    def create(cls, name, expected, delaywrite=None, f=None, k=None):
-        """Create and return a bloom filter for `expected` entries."""
-        bits = int(math.floor(math.log(expected*MAX_BITS_EACH/8,2)))
-        k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4)
-        if bits > MAX_BLOOM_BITS[k]:
-            log('bloom: warning, max bits exceeded, non-optimal\n')
-            bits = MAX_BLOOM_BITS[k]
-        debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k))
-        f = f or open(name, 'w+b')
-        f.write('BLOM')
-        f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0))
-        assert(f.tell() == 16)
-        # NOTE: On some systems this will not extend+zerofill, but it does on
-        # darwin, linux, bsd and solaris.
-        f.truncate(16+2**bits)
-        f.seek(0)
-        if delaywrite != None and not delaywrite:
-            # tell it to expect very few objects, forcing a direct mmap
-            expected = 1
-        return cls(name, f=f, readwrite=True, expected=expected)
-
-    def __len__(self):
-        return int(self.entries)
-
-
-class PackMidx:
-    """Wrapper which contains data from multiple index files.
-    Multiple index (.midx) files constitute a wrapper around index (.idx) files
-    and make it possible for bup to expand Git's indexing capabilities to vast
-    amounts of files.
-    """
-    def __init__(self, filename):
-        self.name = filename
-        self.force_keep = False
-        assert(filename.endswith('.midx'))
-        self.map = mmap_read(open(filename))
-        if str(self.map[0:4]) != 'MIDX':
-            log('Warning: skipping: invalid MIDX header in %r\n' % filename)
-            self.force_keep = True
-            return self._init_failed()
-        ver = struct.unpack('!I', self.map[4:8])[0]
-        if ver < MIDX_VERSION:
-            log('Warning: ignoring old-style (v%d) midx %r\n' 
-                % (ver, filename))
-            self.force_keep = False  # old stuff is boring  
-            return self._init_failed()
-        if ver > MIDX_VERSION:
-            log('Warning: ignoring too-new (v%d) midx %r\n'
-                % (ver, filename))
-            self.force_keep = True  # new stuff is exciting
-            return self._init_failed()
-
-        self.bits = _helpers.firstword(self.map[8:12])
-        self.entries = 2**self.bits
-        self.fanout = buffer(self.map, 12, self.entries*4)
-        self.sha_ofs = 12 + self.entries*4
-        self.nsha = nsha = self._fanget(self.entries-1)
-        self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
-        self.whichlist = buffer(self.map, self.sha_ofs + nsha*20, nsha*4)
-        self.idxname_ofs = self.sha_ofs + 24*nsha
-        self.idxnames = str(self.map[self.idxname_ofs:]).split('\0')
-
-    def _init_failed(self):
-        self.bits = 0
-        self.entries = 1
-        self.fanout = buffer('\0\0\0\0')
-        self.shatable = buffer('\0'*20)
-        self.idxnames = []
-
-    def _fanget(self, i):
-        start = i*4
-        s = self.fanout[start:start+4]
-        return _helpers.firstword(s)
-
-    def _get(self, i):
-        return str(self.shatable[i*20:(i+1)*20])
-
-    def _get_idx_i(self, i):
-        return struct.unpack('!I', self.whichlist[i*4:(i+1)*4])[0]
-
-    def _get_idxname(self, i):
-        return self.idxnames[self._get_idx_i(i)]
-
-    def exists(self, hash, want_source=False):
-        """Return nonempty if the object exists in the index files."""
-        global _total_searches, _total_steps
-        _total_searches += 1
-        want = str(hash)
-        el = extract_bits(want, self.bits)
-        if el:
-            start = self._fanget(el-1)
-            startv = el << (32-self.bits)
-        else:
-            start = 0
-            startv = 0
-        end = self._fanget(el)
-        endv = (el+1) << (32-self.bits)
-        _total_steps += 1   # lookup table is a step
-        hashv = _helpers.firstword(hash)
-        #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
-        while start < end:
-            _total_steps += 1
-            #print '! %08x %08x %08x   %d - %d' % (startv, hashv, endv, start, end)
-            mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
-            #print '  %08x %08x %08x   %d %d %d' % (startv, hashv, endv, start, mid, end)
-            v = self._get(mid)
-            #print '    %08x' % self._num(v)
-            if v < want:
-                start = mid+1
-                startv = _helpers.firstword(v)
-            elif v > want:
-                end = mid
-                endv = _helpers.firstword(v)
-            else: # got it!
-                return want_source and self._get_idxname(mid) or True
-        return None
-
-    def __iter__(self):
-        for i in xrange(self._fanget(self.entries-1)):
-            yield buffer(self.shatable, i*20, 20)
-
-    def __len__(self):
-        return int(self._fanget(self.entries-1))
-
-
  _mpi_count = 0
  class PackIdxList:
      def __init__(self, dir):
  _mpi_count = 0
  class PackIdxList:
      def __init__(self, dir):
@@ -610,11 +417,11 @@ class PackIdxList:
          _total_searches += 1
          if hash in self.also:
              return True
          _total_searches += 1
          if hash in self.also:
              return True
-        if self.do_bloom and self.bloom is not None:
-            _total_searches -= 1  # will be incremented by bloom
+        if self.do_bloom and self.bloom:
              if self.bloom.exists(hash):
                  self.do_bloom = False
              else:
              if self.bloom.exists(hash):
                  self.do_bloom = False
              else:
+                _total_searches -= 1  # was counted by bloom
                  return None
          for i in xrange(len(self.packs)):
              p = self.packs[i]
                  return None
          for i in xrange(len(self.packs)):
              p = self.packs[i]
@@ -643,17 +450,17 @@ class PackIdxList:
          self.do_bloom = False
          skip_midx = skip_midx or ignore_midx
          d = dict((p.name, p) for p in self.packs
          self.do_bloom = False
          skip_midx = skip_midx or ignore_midx
          d = dict((p.name, p) for p in self.packs
-                 if not skip_midx or not isinstance(p, PackMidx))
+                 if not skip_midx or not isinstance(p, midx.PackMidx))
          if os.path.exists(self.dir):
              if not skip_midx:
                  midxl = []
                  for ix in self.packs:
          if os.path.exists(self.dir):
              if not skip_midx:
                  midxl = []
                  for ix in self.packs:
-                    if isinstance(ix, PackMidx):
+                    if isinstance(ix, midx.PackMidx):
                          for name in ix.idxnames:
                              d[os.path.join(self.dir, name)] = ix
                  for full in glob.glob(os.path.join(self.dir,'*.midx')):
                      if not d.get(full):
                          for name in ix.idxnames:
                              d[os.path.join(self.dir, name)] = ix
                  for full in glob.glob(os.path.join(self.dir,'*.midx')):
                      if not d.get(full):
-                        mx = PackMidx(full)
+                        mx = midx.PackMidx(full)
                          (mxd, mxf) = os.path.split(mx.name)
                          broken = False
                          for n in mx.idxnames:
                          (mxd, mxf) = os.path.split(mx.name)
                          broken = False
                          for n in mx.idxnames:
@@ -662,11 +469,13 @@ class PackIdxList:
                                      '  used by %s\n') % (n, mxf))
                                  broken = True
                          if broken:
                                      '  used by %s\n') % (n, mxf))
                                  broken = True
                          if broken:
+                            mx.close()
                              del mx
                              unlink(full)
                          else:
                              midxl.append(mx)
                              del mx
                              unlink(full)
                          else:
                              midxl.append(mx)
-                midxl.sort(lambda x,y: -cmp(len(x),len(y)))
+                midxl.sort(key=lambda ix:
+                           (-len(ix), -xstat.stat(ix.name).st_mtime))
                  for ix in midxl:
                      any_needed = False
                      for sub in ix.idxnames:
                  for ix in midxl:
                      any_needed = False
                      for sub in ix.idxnames:
@@ -682,6 +491,7 @@ class PackIdxList:
                      elif not ix.force_keep:
                          debug1('midx: removing redundant: %s\n'
                                 % os.path.basename(ix.name))
                      elif not ix.force_keep:
                          debug1('midx: removing redundant: %s\n'
                                 % os.path.basename(ix.name))
+                        ix.close()
                          unlink(ix.name)
              for full in glob.glob(os.path.join(self.dir,'*.idx')):
                  if not d.get(full):
                          unlink(ix.name)
              for full in glob.glob(os.path.join(self.dir,'*.idx')):
                  if not d.get(full):
@@ -693,7 +503,7 @@ class PackIdxList:
                      d[full] = ix
              bfull = os.path.join(self.dir, 'bup.bloom')
              if self.bloom is None and os.path.exists(bfull):
                      d[full] = ix
              bfull = os.path.join(self.dir, 'bup.bloom')
              if self.bloom is None and os.path.exists(bfull):
-                self.bloom = ShaBloom(bfull)
+                self.bloom = bloom.ShaBloom(bfull)
              self.packs = list(set(d.values()))
              self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
              if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
              self.packs = list(set(d.values()))
              self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
              if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
@@ -708,22 +518,6 @@ class PackIdxList:
          self.also.add(hash)
  
  
          self.also.add(hash)
  
  
-def calc_hash(type, content):
-    """Calculate some content's hash in the Git fashion."""
-    header = '%s %d\0' % (type, len(content))
-    sum = Sha1(header)
-    sum.update(content)
-    return sum.digest()
-
-
-def _shalist_sort_key(ent):
-    (mode, name, id) = ent
-    if stat.S_ISDIR(int(mode, 8)):
-        return name + '/'
-    else:
-        return name
-
-
  def open_idx(filename):
      if filename.endswith('.idx'):
          f = open(filename, 'rb')
  def open_idx(filename):
      if filename.endswith('.idx'):
          f = open(filename, 'rb')
@@ -740,7 +534,7 @@ def open_idx(filename):
          else:
              raise GitError('%s: unrecognized idx file header' % filename)
      elif filename.endswith('.midx'):
          else:
              raise GitError('%s: unrecognized idx file header' % filename)
      elif filename.endswith('.midx'):
-        return PackMidx(filename)
+        return midx.PackMidx(filename)
      else:
          raise GitError('idx filenames must end with .idx or .midx')
  
      else:
          raise GitError('idx filenames must end with .idx or .midx')
  
@@ -748,11 +542,12 @@ def open_idx(filename):
  def idxmerge(idxlist, final_progress=True):
      """Generate a list of all the objects reachable in a PackIdxList."""
      def pfunc(count, total):
  def idxmerge(idxlist, final_progress=True):
      """Generate a list of all the objects reachable in a PackIdxList."""
      def pfunc(count, total):
-        progress('Reading indexes: %.2f%% (%d/%d)\r'
-                 % (count*100.0/total, count, total))
+        qprogress('Reading indexes: %.2f%% (%d/%d)\r'
+                  % (count*100.0/total, count, total))
      def pfinal(count, total):
          if final_progress:
      def pfinal(count, total):
          if final_progress:
-            log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
+            progress('Reading indexes: %.2f%% (%d/%d), done.\n'
+                     % (100, total, total))
      return merge_iter(idxlist, 10024, pfunc, pfinal)
  
  
      return merge_iter(idxlist, 10024, pfunc, pfinal)
  
  
@@ -760,8 +555,8 @@ def _make_objcache():
      return PackIdxList(repo('objects/pack'))
  
  class PackWriter:
      return PackIdxList(repo('objects/pack'))
  
  class PackWriter:
-    """Writes Git objects insid a pack file."""
-    def __init__(self, objcache_maker=_make_objcache):
+    """Writes Git objects inside a pack file."""
+    def __init__(self, objcache_maker=_make_objcache, compression_level=1):
          self.count = 0
          self.outbytes = 0
          self.filename = None
          self.count = 0
          self.outbytes = 0
          self.filename = None
@@ -769,6 +564,7 @@ class PackWriter:
          self.idx = None
          self.objcache_maker = objcache_maker
          self.objcache = None
          self.idx = None
          self.objcache_maker = objcache_maker
          self.objcache = None
+        self.compression_level = compression_level
  
      def __del__(self):
          self.close()
  
      def __del__(self):
          self.close()
@@ -812,7 +608,11 @@ class PackWriter:
              log('>')
          if not sha:
              sha = calc_hash(type, content)
              log('>')
          if not sha:
              sha = calc_hash(type, content)
-        size, crc = self._raw_write(_encode_packobj(type, content), sha=sha)
+        size, crc = self._raw_write(_encode_packobj(type, content,
+                                                    self.compression_level),
+                                    sha=sha)
+        if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
+            self.breakpoint()
          return sha
  
      def breakpoint(self):
          return sha
  
      def breakpoint(self):
@@ -835,10 +635,10 @@ class PackWriter:
  
      def maybe_write(self, type, content):
          """Write an object to the pack file if not present and return its id."""
  
      def maybe_write(self, type, content):
          """Write an object to the pack file if not present and return its id."""
-        self._require_objcache()
          sha = calc_hash(type, content)
          if not self.exists(sha):
              self._write(sha, type, content)
          sha = calc_hash(type, content)
          if not self.exists(sha):
              self._write(sha, type, content)
+            self._require_objcache()
              self.objcache.add(sha)
          return sha
  
              self.objcache.add(sha)
          return sha
  
@@ -848,16 +648,8 @@ class PackWriter:
  
      def new_tree(self, shalist):
          """Create a tree object in the pack."""
  
      def new_tree(self, shalist):
          """Create a tree object in the pack."""
-        shalist = sorted(shalist, key = _shalist_sort_key)
-        l = []
-        for (mode,name,bin) in shalist:
-            assert(mode)
-            assert(mode != '0')
-            assert(mode[0] != '0')
-            assert(name)
-            assert(len(bin) == 20)
-            l.append('%s %s\0%s' % (mode,name,bin))
-        return self.maybe_write('tree', ''.join(l))
+        content = tree_encode(shalist)
+        return self.maybe_write('tree', content)
  
      def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
          l = []
  
      def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
          l = []
@@ -909,9 +701,7 @@ class PackWriter:
          f.write(packbin)
          f.close()
  
          f.write(packbin)
          f.close()
  
-        idx_f = open(self.filename + '.idx', 'wb')
-        obj_list_sha = self._write_pack_idx_v2(idx_f, idx, packbin)
-        idx_f.close()
+        obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
  
          nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
          if os.path.exists(self.filename + '.map'):
  
          nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
          if os.path.exists(self.filename + '.map'):
@@ -927,51 +717,58 @@ class PackWriter:
          """Close the pack file and move it to its definitive path."""
          return self._end(run_midx=run_midx)
  
          """Close the pack file and move it to its definitive path."""
          return self._end(run_midx=run_midx)
  
-    def _write_pack_idx_v2(self, file, idx, packbin):
-        sum = Sha1()
-
-        def write(data):
-            file.write(data)
-            sum.update(data)
-
-        write('\377tOc\0\0\0\2')
-
-        n = 0
-        for part in idx:
-            n += len(part)
-            write(struct.pack('!i', n))
-            part.sort(key=lambda x: x[0])
-
-        obj_list_sum = Sha1()
-        for part in idx:
-            for entry in part:
-                write(entry[0])
-                obj_list_sum.update(entry[0])
-        for part in idx:
-            for entry in part:
-                write(struct.pack('!I', entry[1]))
-        ofs64_list = []
-        for part in idx:
-            for entry in part:
-                if entry[2] & 0x80000000:
-                    write(struct.pack('!I', 0x80000000 | len(ofs64_list)))
-                    ofs64_list.append(struct.pack('!Q', entry[2]))
-                else:
-                    write(struct.pack('!i', entry[2]))
-        for ofs64 in ofs64_list:
-            write(ofs64)
-
-        write(packbin)
-        file.write(sum.digest())
-        return obj_list_sum.hexdigest()
+    def _write_pack_idx_v2(self, filename, idx, packbin):
+        ofs64_count = 0
+        for section in idx:
+            for entry in section:
+                if entry[2] >= 2**31:
+                    ofs64_count += 1
+
+        # Length: header + fan-out + shas-and-crcs + overflow-offsets
+        index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
+        idx_map = None
+        idx_f = open(filename, 'w+b')
+        try:
+            idx_f.truncate(index_len)
+            idx_map = mmap_readwrite(idx_f, close=False)
+            count = _helpers.write_idx(filename, idx_map, idx, self.count)
+            assert(count == self.count)
+        finally:
+            if idx_map: idx_map.close()
+            idx_f.close()
+
+        idx_f = open(filename, 'a+b')
+        try:
+            idx_f.write(packbin)
+            idx_f.seek(0)
+            idx_sum = Sha1()
+            b = idx_f.read(8 + 4*256)
+            idx_sum.update(b)
+
+            obj_list_sum = Sha1()
+            for b in chunkyreader(idx_f, 20*self.count):
+                idx_sum.update(b)
+                obj_list_sum.update(b)
+            namebase = obj_list_sum.hexdigest()
+
+            for b in chunkyreader(idx_f):
+                idx_sum.update(b)
+            idx_f.write(idx_sum.digest())
+            return namebase
+        finally:
+            idx_f.close()
  
  
  def _git_date(date):
      return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
  
  
  
  
  def _git_date(date):
      return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
  
  
-def _gitenv():
-    os.environ['GIT_DIR'] = os.path.abspath(repo())
+def _gitenv(repo_dir = None):
+    if not repo_dir:
+        repo_dir = repo()
+    def env():
+        os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
+    return env
  
  
  def list_refs(refname = None):
  
  
  def list_refs(refname = None):
@@ -981,7 +778,7 @@ def list_refs(refname = None):
      argv = ['git', 'show-ref', '--']
      if refname:
          argv += [refname]
      argv = ['git', 'show-ref', '--']
      if refname:
          argv += [refname]
-    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+    p = subprocess.Popen(argv, preexec_fn = _gitenv(), stdout = subprocess.PIPE)
      out = p.stdout.read().strip()
      rv = p.wait()  # not fatal
      if rv:
      out = p.stdout.read().strip()
      rv = p.wait()  # not fatal
      if rv:
@@ -1016,8 +813,8 @@ def rev_list(ref, count=None):
      opts = []
      if count:
          opts += ['-n', str(atoi(count))]
      opts = []
      if count:
          opts += ['-n', str(atoi(count))]
-    argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
-    p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
+    argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
+    p = subprocess.Popen(argv, preexec_fn = _gitenv(), stdout = subprocess.PIPE)
      commit = None
      for row in p.stdout:
          s = row.strip()
      commit = None
      for row in p.stdout:
          s = row.strip()
@@ -1031,11 +828,15 @@ def rev_list(ref, count=None):
          raise GitError, 'git rev-list returned error %d' % rv
  
  
          raise GitError, 'git rev-list returned error %d' % rv
  
  
-def rev_get_date(ref):
-    """Get the date of the latest commit on the specified ref."""
-    for (date, commit) in rev_list(ref, count=1):
-        return date
-    raise GitError, 'no such commit %r' % ref
+def get_commit_dates(refs):
+    """Get the dates for the specified commit refs.  For now, every unique
+       string in refs must resolve to a different commit or this
+       function will fail."""
+    result = []
+    for ref in refs:
+        commit = get_commit_items(ref, cp())
+        result.append(commit.author_sec)
+    return result
  
  
  def rev_parse(committish):
  
  
  def rev_parse(committish):
@@ -1072,7 +873,7 @@ def update_ref(refname, newval, oldval):
      assert(refname.startswith('refs/heads/'))
      p = subprocess.Popen(['git', 'update-ref', refname,
                            newval.encode('hex'), oldval.encode('hex')],
      assert(refname.startswith('refs/heads/'))
      p = subprocess.Popen(['git', 'update-ref', refname,
                            newval.encode('hex'), oldval.encode('hex')],
-                         preexec_fn = _gitenv)
+                         preexec_fn = _gitenv())
      _git_wait('git update-ref', p)
  
  
      _git_wait('git update-ref', p)
  
  
@@ -1100,14 +901,18 @@ def init_repo(path=None):
      if parent and not os.path.exists(parent):
          raise GitError('parent directory "%s" does not exist\n' % parent)
      if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
      if parent and not os.path.exists(parent):
          raise GitError('parent directory "%s" does not exist\n' % parent)
      if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
-        raise GitError('"%d" exists but is not a directory\n' % d)
+        raise GitError('"%s" exists but is not a directory\n' % d)
      p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
      p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
-                         preexec_fn = _gitenv)
+                         preexec_fn = _gitenv())
      _git_wait('git init', p)
      # Force the index version configuration in order to ensure bup works
      # regardless of the version of the installed Git binary.
      p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
      _git_wait('git init', p)
      # Force the index version configuration in order to ensure bup works
      # regardless of the version of the installed Git binary.
      p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
-                         stdout=sys.stderr, preexec_fn = _gitenv)
+                         stdout=sys.stderr, preexec_fn = _gitenv())
+    _git_wait('git config', p)
+    # Enable the reflog
+    p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
+                         stdout=sys.stderr, preexec_fn = _gitenv())
      _git_wait('git config', p)
  
  
      _git_wait('git config', p)
  
  
@@ -1117,25 +922,16 @@ def check_repo_or_die(path=None):
      initializes the default repository automatically.
      """
      guess_repo(path)
      initializes the default repository automatically.
      """
      guess_repo(path)
-    if not os.path.isdir(repo('objects/pack/.')):
-        if repodir == home_repodir:
-            init_repo()
-        else:
-            log('error: %r is not a bup/git repository\n' % repo())
+    try:
+        os.stat(repo('objects/pack/.'))
+    except OSError, e:
+        if e.errno == errno.ENOENT:
+            log('error: %r is not a bup repository; run "bup init"\n'
+                % repo())
              sys.exit(15)
              sys.exit(15)
-
-
-def treeparse(buf):
-    """Generate a list of (mode, name, hash) tuples of objects from 'buf'."""
-    ofs = 0
-    while ofs < len(buf):
-        z = buf[ofs:].find('\0')
-        assert(z > 0)
-        spl = buf[ofs:ofs+z].split(' ', 1)
-        assert(len(spl) == 2)
-        sha = buf[ofs+z+1:ofs+z+1+20]
-        ofs += z+1+20
-        yield (spl[0], spl[1], sha)
+        else:
+            log('error: %s\n' % e)
+            sys.exit(14)
  
  
  _ver = None
  
  
  _ver = None
@@ -1172,7 +968,7 @@ def _git_wait(cmd, p):
  
  
  def _git_capture(argv):
  
  
  def _git_capture(argv):
-    p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
+    p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
      r = p.stdout.read()
      _git_wait(repr(argv), p)
      return r
      r = p.stdout.read()
      _git_wait(repr(argv), p)
      return r
@@ -1211,8 +1007,9 @@ class _AbortableIter:
  _ver_warned = 0
  class CatPipe:
      """Link to 'git cat-file' that is used to retrieve blob data."""
  _ver_warned = 0
  class CatPipe:
      """Link to 'git cat-file' that is used to retrieve blob data."""
-    def __init__(self):
+    def __init__(self, repo_dir = None):
          global _ver_warned
          global _ver_warned
+        self.repo_dir = repo_dir
          wanted = ('1','5','6')
          if ver() < wanted:
              if not _ver_warned:
          wanted = ('1','5','6')
          if ver() < wanted:
              if not _ver_warned:
@@ -1238,15 +1035,16 @@ class CatPipe:
                                    stdout=subprocess.PIPE,
                                    close_fds = True,
                                    bufsize = 4096,
                                    stdout=subprocess.PIPE,
                                    close_fds = True,
                                    bufsize = 4096,
-                                  preexec_fn = _gitenv)
+                                  preexec_fn = _gitenv(self.repo_dir))
  
      def _fast_get(self, id):
          if not self.p or self.p.poll() != None:
              self._restart()
          assert(self.p)
  
      def _fast_get(self, id):
          if not self.p or self.p.poll() != None:
              self._restart()
          assert(self.p)
-        assert(self.p.poll() == None)
+        poll_result = self.p.poll()
+        assert(poll_result == None)
          if self.inprogress:
          if self.inprogress:
-            log('_fast_get: opening %r while %r is open'
+            log('_fast_get: opening %r while %r is open\n'
                  % (id, self.inprogress))
          assert(not self.inprogress)
          assert(id.find('\n') < 0)
                  % (id, self.inprogress))
          assert(not self.inprogress)
          assert(id.find('\n') < 0)
@@ -1270,7 +1068,8 @@ class CatPipe:
              yield type
              for blob in it:
                  yield blob
              yield type
              for blob in it:
                  yield blob
-            assert(self.p.stdout.readline() == '\n')
+            readline_result = self.p.stdout.readline()
+            assert(readline_result == '\n')
              self.inprogress = None
          except Exception, e:
              it.abort()
              self.inprogress = None
          except Exception, e:
              it.abort()
@@ -1285,7 +1084,7 @@ class CatPipe:
  
          p = subprocess.Popen(['git', 'cat-file', type, id],
                               stdout=subprocess.PIPE,
  
          p = subprocess.Popen(['git', 'cat-file', type, id],
                               stdout=subprocess.PIPE,
-                             preexec_fn = _gitenv)
+                             preexec_fn = _gitenv(self.repo_dir))
          for blob in chunkyreader(p.stdout):
              yield blob
          _git_wait('git cat-file', p)
          for blob in chunkyreader(p.stdout):
              yield blob
          _git_wait('git cat-file', p)
@@ -1297,7 +1096,7 @@ class CatPipe:
                  yield blob
          elif type == 'tree':
              treefile = ''.join(it)
                  yield blob
          elif type == 'tree':
              treefile = ''.join(it)
-            for (mode, name, sha) in treeparse(treefile):
+            for (mode, name, sha) in tree_decode(treefile):
                  for blob in self.join(sha.encode('hex')):
                      yield blob
          elif type == 'commit':
                  for blob in self.join(sha.encode('hex')):
                      yield blob
          elif type == 'commit':
@@ -1321,6 +1120,20 @@ class CatPipe:
          except StopIteration:
              log('booger!\n')
  
          except StopIteration:
              log('booger!\n')
  
+
+_cp = (None, None)
+
+def cp():
+    """Create a CatPipe object or reuse an already existing one."""
+    global _cp
+    cp_dir, cp = _cp
+    cur_dir = os.path.realpath(repo())
+    if cur_dir != cp_dir:
+        cp = CatPipe()
+        _cp = (cur_dir, cp)
+    return cp
+
+
  def tags():
      """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
      tags = {}
  def tags():
      """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
      tags = {}