From: Avery Pennarun Date: Tue, 2 Feb 2010 02:34:56 +0000 (-0500) Subject: midx: the fanout table entries can be 4 bytes, not 8. X-Git-Tag: bup-0.08~2 X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1732e801b41e3c56c65055c13a79d11336d2ed64;p=bup.git midx: the fanout table entries can be 4 bytes, not 8. I was trying to be future-proof, but it was kind of overkill, since a 32-bit fanout entry could handle a total of 4 billion *hashes* per midx. That would be 20*4bil = 80 gigs in a single midx. This corresponds to about 10 terabytes of packs, which isn't inconceivable... but if it happens, you could just use more than one midx. Plus you'd likely run into other weird bup problems before your midx files get anywhere near 80 gigs. --- diff --git a/cmd-midx.py b/cmd-midx.py index c0f8324..d8b08ec 100755 --- a/cmd-midx.py +++ b/cmd-midx.py @@ -46,10 +46,10 @@ def do_midx(outdir, outfilename, infilenames): except OSError: pass f = open(outfilename + '.tmp', 'w+') - f.write('MIDX\0\0\0\1') + f.write('MIDX\0\0\0\2') f.write(struct.pack('!I', bits)) assert(f.tell() == 12) - f.write('\0'*8*entries) + f.write('\0'*4*entries) for e in merge(inp, bits, table): f.write(e) @@ -57,7 +57,7 @@ def do_midx(outdir, outfilename, infilenames): f.write('\0'.join([os.path.basename(p) for p in infilenames])) f.seek(12) - f.write(struct.pack('!%dQ' % entries, *table)) + f.write(struct.pack('!%dI' % entries, *table)) f.close() os.rename(outfilename + '.tmp', outfilename) diff --git a/git.py b/git.py index 3196fd5..43191f5 100644 --- a/git.py +++ b/git.py @@ -141,8 +141,8 @@ class PackIndex: def extract_bits(buf, bits): mask = (1<> (64-bits)) & mask + v = struct.unpack('!I', buf[0:4])[0] + v = (v >> (32-bits)) & mask return v @@ -151,19 +151,27 @@ class PackMidx: self.name = filename assert(filename.endswith('.midx')) self.map = mmap_read(open(filename)) - assert(str(self.map[0:8]) == 'MIDX\0\0\0\1') - self.bits = struct.unpack('!I', self.map[8:12])[0] - self.entries = 2**self.bits - self.fanout = buffer(self.map, 12, self.entries*8) - shaofs = 12 + self.entries*8 - nsha = self._fanget(self.entries-1) - self.shalist = buffer(self.map, shaofs, nsha*20) - self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0') + if str(self.map[0:8]) == 'MIDX\0\0\0\1': + log('Warning: ignoring old-style midx %r\n' % filename) + self.bits = 0 + self.entries = 1 + self.fanout = buffer('\0\0\0\0') + self.shalist = buffer('\0'*20) + self.idxnames = [] + else: + assert(str(self.map[0:8]) == 'MIDX\0\0\0\2') + self.bits = struct.unpack('!I', self.map[8:12])[0] + self.entries = 2**self.bits + self.fanout = buffer(self.map, 12, self.entries*4) + shaofs = 12 + self.entries*4 + nsha = self._fanget(self.entries-1) + self.shalist = buffer(self.map, shaofs, nsha*20) + self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0') def _fanget(self, i): - start = i*8 - s = self.fanout[start:start+8] - return struct.unpack('!Q', s)[0] + start = i*4 + s = self.fanout[start:start+4] + return struct.unpack('!I', s)[0] def exists(self, hash): want = str(hash) diff --git a/t/test.sh b/t/test.sh index e89a571..fc0612d 100755 --- a/t/test.sh +++ b/t/test.sh @@ -65,6 +65,7 @@ WVPASSEQ "$(cd $D && bup index -s .)" "$(cd $D && bup index -s .)" WVSTART "split" WVPASS bup split --bench -b tags1.tmp WVPASS bup split -vvvv -b t/testfile2 >tags2.tmp +WVPASS bup midx -a WVPASS bup split -t t/testfile2 >tags2t.tmp WVPASS bup split -t t/testfile2 --fanout 3 >tags2tf.tmp WVPASS bup split -r "$BUP_DIR" -c t/testfile2 >tags2c.tmp