X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?p=bup.git;a=blobdiff_plain;f=lib%2Fbup%2Fmidx.py;h=0338906735fba53b3372916a588335677cfc044b;hp=57a478759297228622e32f102976150fe27d7525;hb=bf67f94dd4f4096de4eee07a7dc377d6c889a016;hpb=d95b3a594a72572503021c85d04804c51895b986 diff --git a/lib/bup/midx.py b/lib/bup/midx.py index 57a4787..0338906 100644 --- a/lib/bup/midx.py +++ b/lib/bup/midx.py @@ -1,6 +1,12 @@ -import mmap + +from __future__ import absolute_import, print_function +import glob, mmap, os, struct + from bup import _helpers -from bup.helpers import * +from bup.compat import range +from bup.helpers import log, mmap_read +from bup.io import path_msg + MIDX_VERSION = 4 @@ -19,33 +25,35 @@ class PackMidx: self.name = filename self.force_keep = False self.map = None - assert(filename.endswith('.midx')) + assert(filename.endswith(b'.midx')) self.map = mmap_read(open(filename)) - if str(self.map[0:4]) != 'MIDX': - log('Warning: skipping: invalid MIDX header in %r\n' % filename) + if self.map[0:4] != b'MIDX': + log('Warning: skipping: invalid MIDX header in %r\n' + % path_msg(filename)) self.force_keep = True return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < MIDX_VERSION: log('Warning: ignoring old-style (v%d) midx %r\n' - % (ver, filename)) + % (ver, path_msg(filename))) self.force_keep = False # old stuff is boring return self._init_failed() if ver > MIDX_VERSION: log('Warning: ignoring too-new (v%d) midx %r\n' - % (ver, filename)) + % (ver, path_msg(filename))) self.force_keep = True # new stuff is exciting return self._init_failed() self.bits = _helpers.firstword(self.map[8:12]) self.entries = 2**self.bits - self.fanout = buffer(self.map, 12, self.entries*4) - self.sha_ofs = 12 + self.entries*4 - self.nsha = nsha = self._fanget(self.entries-1) - self.shatable = buffer(self.map, self.sha_ofs, nsha*20) - self.which_ofs = self.sha_ofs + 20*nsha - self.whichlist = buffer(self.map, self.which_ofs, nsha*4) - self.idxnames = str(self.map[self.which_ofs + 4*nsha:]).split('\0') + self.fanout_ofs = 12 + # fanout len is self.entries * 4 + self.sha_ofs = self.fanout_ofs + self.entries * 4 + self.nsha = self._fanget(self.entries - 1) + # sha table len is self.nsha * 20 + self.which_ofs = self.sha_ofs + 20 * self.nsha + # which len is self.nsha * 4 + self.idxnames = self.map[self.which_ofs + 4 * self.nsha:].split(b'\0') def __del__(self): self.close() @@ -53,26 +61,32 @@ class PackMidx: def _init_failed(self): self.bits = 0 self.entries = 1 - self.fanout = buffer('\0\0\0\0') - self.shatable = buffer('\0'*20) self.idxnames = [] def _fanget(self, i): - start = i*4 - s = self.fanout[start:start+4] - return _helpers.firstword(s) + if i >= self.entries * 4 or i < 0: + raise IndexError('invalid midx index %d' % i) + ofs = self.fanout_ofs + i * 4 + return _helpers.firstword(self.map[ofs : ofs + 4]) def _get(self, i): - return str(self.shatable[i*20:(i+1)*20]) + if i >= self.nsha or i < 0: + raise IndexError('invalid midx index %d' % i) + ofs = self.sha_ofs + i * 20 + return self.map[ofs : ofs + 20] def _get_idx_i(self, i): - return struct.unpack('!I', self.whichlist[i*4:(i+1)*4])[0] + if i >= self.nsha or i < 0: + raise IndexError('invalid midx index %d' % i) + ofs = self.which_ofs + i * 4 + return struct.unpack_from('!I', self.map, offset=ofs)[0] def _get_idxname(self, i): return self.idxnames[self._get_idx_i(i)] def close(self): if self.map is not None: + self.fanout = self.shatable = self.whichlist = self.idxnames = None self.map.close() self.map = None @@ -80,7 +94,7 @@ class PackMidx: """Return nonempty if the object exists in the index files.""" global _total_searches, _total_steps _total_searches += 1 - want = str(hash) + want = hash el = extract_bits(want, self.bits) if el: start = self._fanget(el-1) @@ -96,7 +110,7 @@ class PackMidx: while start < end: _total_steps += 1 #print '! %08x %08x %08x %d - %d' % (startv, hashv, endv, start, end) - mid = start + (hashv-startv)*(end-start-1)/(endv-startv) + mid = start + (hashv - startv) * (end - start - 1) // (endv - startv) #print ' %08x %08x %08x %d %d %d' % (startv, hashv, endv, start, mid, end) v = self._get(mid) #print ' %08x' % self._num(v) @@ -111,10 +125,14 @@ class PackMidx: return None def __iter__(self): - for i in xrange(self._fanget(self.entries-1)): - yield buffer(self.shatable, i*20, 20) + start = self.sha_ofs + for ofs in range(start, start + self.nsha * 20, 20): + yield self.map[ofs : ofs + 20] def __len__(self): - return int(self._fanget(self.entries-1)) + return int(self.nsha) +def clear_midxes(dir=None): + for midx in glob.glob(os.path.join(dir, b'*.midx')): + os.unlink(midx)