2 import git, glob, mmap, os, struct
4 from bup import _helpers
5 from bup.helpers import log, mmap_read
10 extract_bits = _helpers.extract_bits
16 """Wrapper which contains data from multiple index files.
17 Multiple index (.midx) files constitute a wrapper around index (.idx) files
18 and make it possible for bup to expand Git's indexing capabilities to vast
21 def __init__(self, filename):
23 self.force_keep = False
25 assert(filename.endswith('.midx'))
26 self.map = mmap_read(open(filename))
27 if str(self.map[0:4]) != 'MIDX':
28 log('Warning: skipping: invalid MIDX header in %r\n' % filename)
29 self.force_keep = True
30 return self._init_failed()
31 ver = struct.unpack('!I', self.map[4:8])[0]
32 if ver < MIDX_VERSION:
33 log('Warning: ignoring old-style (v%d) midx %r\n'
35 self.force_keep = False # old stuff is boring
36 return self._init_failed()
37 if ver > MIDX_VERSION:
38 log('Warning: ignoring too-new (v%d) midx %r\n'
40 self.force_keep = True # new stuff is exciting
41 return self._init_failed()
43 self.bits = _helpers.firstword(self.map[8:12])
44 self.entries = 2**self.bits
45 self.fanout = buffer(self.map, 12, self.entries*4)
46 self.sha_ofs = 12 + self.entries*4
47 self.nsha = nsha = self._fanget(self.entries-1)
48 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
49 self.which_ofs = self.sha_ofs + 20*nsha
50 self.whichlist = buffer(self.map, self.which_ofs, nsha*4)
51 self.idxnames = str(self.map[self.which_ofs + 4*nsha:]).split('\0')
56 def _init_failed(self):
59 self.fanout = buffer('\0\0\0\0')
60 self.shatable = buffer('\0'*20)
65 s = self.fanout[start:start+4]
66 return _helpers.firstword(s)
69 return str(self.shatable[i*20:(i+1)*20])
71 def _get_idx_i(self, i):
72 return struct.unpack('!I', self.whichlist[i*4:(i+1)*4])[0]
74 def _get_idxname(self, i):
75 return self.idxnames[self._get_idx_i(i)]
78 if self.map is not None:
82 def exists(self, hash, want_source=False):
83 """Return nonempty if the object exists in the index files."""
84 global _total_searches, _total_steps
87 el = extract_bits(want, self.bits)
89 start = self._fanget(el-1)
90 startv = el << (32-self.bits)
94 end = self._fanget(el)
95 endv = (el+1) << (32-self.bits)
96 _total_steps += 1 # lookup table is a step
97 hashv = _helpers.firstword(hash)
98 #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
101 #print '! %08x %08x %08x %d - %d' % (startv, hashv, endv, start, end)
102 mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
103 #print ' %08x %08x %08x %d %d %d' % (startv, hashv, endv, start, mid, end)
105 #print ' %08x' % self._num(v)
108 startv = _helpers.firstword(v)
111 endv = _helpers.firstword(v)
113 return want_source and self._get_idxname(mid) or True
117 for i in xrange(self._fanget(self.entries-1)):
118 yield buffer(self.shatable, i*20, 20)
121 return int(self._fanget(self.entries-1))
124 def clear_midxes(dir=None):
125 dir = dir or git.repo('objects/pack')
126 for midx in glob.glob(os.path.join(dir, '*.midx')):