2 from __future__ import absolute_import, print_function
3 import glob, os, struct
5 from bup import _helpers
6 from bup.compat import pending_raise
7 from bup.helpers import log, mmap_read
8 from bup.io import path_msg
13 extract_bits = _helpers.extract_bits
19 """Wrapper which contains data from multiple index files.
20 Multiple index (.midx) files constitute a wrapper around index (.idx) files
21 and make it possible for bup to expand Git's indexing capabilities to vast
24 def __init__(self, filename):
27 self.force_keep = False
29 assert(filename.endswith(b'.midx'))
30 self.map = mmap_read(open(filename))
31 if self.map[0:4] != b'MIDX':
32 log('Warning: skipping: invalid MIDX header in %r\n'
34 self.force_keep = True
37 ver = struct.unpack('!I', self.map[4:8])[0]
38 if ver < MIDX_VERSION:
39 log('Warning: ignoring old-style (v%d) midx %r\n'
40 % (ver, path_msg(filename)))
41 self.force_keep = False # old stuff is boring
44 if ver > MIDX_VERSION:
45 log('Warning: ignoring too-new (v%d) midx %r\n'
46 % (ver, path_msg(filename)))
47 self.force_keep = True # new stuff is exciting
51 self.bits = _helpers.firstword(self.map[8:12])
52 self.entries = 2**self.bits
54 # fanout len is self.entries * 4
55 self.sha_ofs = self.fanout_ofs + self.entries * 4
56 self.nsha = self._fanget(self.entries - 1)
57 # sha table len is self.nsha * 20
58 self.which_ofs = self.sha_ofs + 20 * self.nsha
59 # which len is self.nsha * 4
60 self.idxnames = self.map[self.which_ofs + 4 * self.nsha:].split(b'\0')
65 def __exit__(self, type, value, traceback):
66 with pending_raise(value, rethrow=False):
69 def _init_failed(self):
75 if i >= self.entries * 4 or i < 0:
76 raise IndexError('invalid midx index %d' % i)
77 ofs = self.fanout_ofs + i * 4
78 return _helpers.firstword(self.map[ofs : ofs + 4])
81 if i >= self.nsha or i < 0:
82 raise IndexError('invalid midx index %d' % i)
83 ofs = self.sha_ofs + i * 20
84 return self.map[ofs : ofs + 20]
86 def _get_idx_i(self, i):
87 if i >= self.nsha or i < 0:
88 raise IndexError('invalid midx index %d' % i)
89 ofs = self.which_ofs + i * 4
90 return struct.unpack_from('!I', self.map, offset=ofs)[0]
92 def _get_idxname(self, i):
93 return self.idxnames[self._get_idx_i(i)]
97 if self.map is not None:
98 self.fanout = self.shatable = self.whichlist = self.idxnames = None
105 def exists(self, hash, want_source=False):
106 """Return nonempty if the object exists in the index files."""
107 global _total_searches, _total_steps
110 el = extract_bits(want, self.bits)
112 start = self._fanget(el-1)
113 startv = el << (32-self.bits)
117 end = self._fanget(el)
118 endv = (el+1) << (32-self.bits)
119 _total_steps += 1 # lookup table is a step
120 hashv = _helpers.firstword(hash)
121 #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
124 #print '! %08x %08x %08x %d - %d' % (startv, hashv, endv, start, end)
125 mid = start + (hashv - startv) * (end - start - 1) // (endv - startv)
126 #print ' %08x %08x %08x %d %d %d' % (startv, hashv, endv, start, mid, end)
128 #print ' %08x' % self._num(v)
131 startv = _helpers.firstword(v)
134 endv = _helpers.firstword(v)
136 return want_source and self._get_idxname(mid) or True
141 for ofs in range(start, start + self.nsha * 20, 20):
142 yield self.map[ofs : ofs + 20]
145 return int(self.nsha)
148 def clear_midxes(dir=None):
149 for midx in glob.glob(os.path.join(dir, b'*.midx')):