#include <assert.h>
#include <stdint.h>
#include <fcntl.h>
+#include <arpa/inet.h>
static PyObject *selftest(PyObject *self, PyObject *args)
{
}
+static PyObject *firstword(PyObject *self, PyObject *args)
+{
+ unsigned char *buf = NULL;
+ int len = 0;
+ uint32_t v;
+
+ if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+ return NULL;
+
+ if (len < 4)
+ return NULL;
+
+ v = ntohl(*(uint32_t *)buf);
+ return Py_BuildValue("I", v);
+}
+
+
+static PyObject *extract_bits(PyObject *self, PyObject *args)
+{
+ unsigned char *buf = NULL;
+ int len = 0, nbits = 0;
+ uint32_t v, mask;
+
+ if (!PyArg_ParseTuple(args, "t#i", &buf, &len, &nbits))
+ return NULL;
+
+ if (len < 4)
+ return NULL;
+
+ mask = (1<<nbits) - 1;
+ v = ntohl(*(uint32_t *)buf);
+ v = (v >> (32-nbits)) & mask;
+ return Py_BuildValue("I", v);
+}
+
+
// I would have made this a lower-level function that just fills in a buffer
// with random values, and then written those values from python. But that's
// about 20% slower in my tests, and since we typically generate random
"Split a list of strings based on a rolling checksum." },
{ "bitmatch", bitmatch, METH_VARARGS,
"Count the number of matching prefix bits between two strings." },
+ { "firstword", firstword, METH_VARARGS,
+ "Return an int corresponding to the first 32 bits of buf." },
+ { "extract_bits", extract_bits, METH_VARARGS,
+ "Take the first 'nbits' bits from 'buf' and return them as an int." },
{ "write_random", write_random, METH_VARARGS,
"Write random bytes to the given file descriptor" },
{ "open_noatime", open_noatime, METH_VARARGS,
import os, zlib, time, subprocess, struct, stat, re, tempfile
import heapq
from bup.helpers import *
+from bup import _helpers
MIDX_VERSION = 2
return int(self.fanout[255])
-def extract_bits(buf, nbits):
- """Take the first 'nbits' bits from 'buf' and return them as an integer."""
- mask = (1<<nbits) - 1
- v = struct.unpack('!I', buf[0:4])[0]
- v = (v >> (32-nbits)) & mask
- return v
+extract_bits = _helpers.extract_bits
class PackMidx:
self.force_keep = True # new stuff is exciting
return self._init_failed()
- self.bits = struct.unpack('!I', self.map[8:12])[0]
+ self.bits = _helpers.firstword(self.map[8:12])
self.entries = 2**self.bits
self.fanout = buffer(self.map, 12, self.entries*4)
shaofs = 12 + self.entries*4
def _fanget(self, i):
start = i*4
s = self.fanout[start:start+4]
- return struct.unpack('!I', s)[0]
+ return _helpers.firstword(s)
+
+ def _get(self, i):
+ return str(self.shalist[i*20:(i+1)*20])
def exists(self, hash):
"""Return nonempty if the object exists in the index files."""
el = extract_bits(want, self.bits)
if el:
start = self._fanget(el-1)
+ startv = el << (32-self.bits)
else:
start = 0
+ startv = 0
end = self._fanget(el)
+ endv = (el+1) << (32-self.bits)
_total_steps += 1 # lookup table is a step
+ hashv = _helpers.firstword(hash)
+ #print '(%08x) %08x %08x %08x' % (extract_bits(want, 32), startv, hashv, endv)
while start < end:
_total_steps += 1
- mid = start + (end-start)/2
- v = str(self.shalist[mid*20:(mid+1)*20])
+ #print '! %08x %08x %08x %d - %d' % (startv, hashv, endv, start, end)
+ mid = start + (hashv-startv)*(end-start-1)/(endv-startv)
+ #print ' %08x %08x %08x %d %d %d' % (startv, hashv, endv, start, mid, end)
+ v = self._get(mid)
+ #print ' %08x' % self._num(v)
if v < want:
start = mid+1
+ startv = _helpers.firstword(v)
elif v > want:
end = mid
+ endv = _helpers.firstword(v)
else: # got it!
return True
return None