6 #define BLOBSIZE (1<<(BLOBBITS-1))
8 #define WINDOWSIZE (1<<(WINDOWBITS-1))
11 // FIXME: replace this with a not-stupid rolling checksum algorithm,
12 // such as the one used in rsync (Adler32?)
13 static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
15 return ((old<<1) | (old>>31)) ^ drop ^ add;
19 static int find_ofs(const unsigned char *buf, int len)
21 unsigned char window[WINDOWSIZE];
24 memset(window, 0, sizeof(window));
26 for (count = 0; count < len; count++)
28 sum = stupidsum_add(sum, window[i], buf[count]);
29 window[i] = buf[count];
30 i = (i + 1) % WINDOWSIZE;
31 if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
38 static PyObject *splitbuf(PyObject *self, PyObject *args)
40 unsigned char *buf = NULL;
43 if (!PyArg_ParseTuple(args, "t#", &buf, &len))
45 out = find_ofs(buf, len);
46 //return Py_BuildValue("i", len);//len>BLOBSIZE ? BLOBSIZE : len);
47 return Py_BuildValue("i", out);
51 static PyObject *bitmatch(PyObject *self, PyObject *args)
53 unsigned char *buf1 = NULL, *buf2 = NULL;
54 int len1 = 0, len2 = 0;
57 if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
61 for (byte = 0; byte < len1 && byte < len2; byte++)
63 int b1 = buf1[byte], b2 = buf2[byte];
66 for (bit = 0; bit < 8; bit++)
67 if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
73 return Py_BuildValue("i", byte*8 + bit);
77 static PyMethodDef hashsplit_methods[] = {
78 { "splitbuf", splitbuf, METH_VARARGS,
79 "Split a list of strings based on a rolling checksum." },
80 { "bitmatch", bitmatch, METH_VARARGS,
81 "Count the number of matching prefix bits between two strings." },
82 { NULL, NULL, 0, NULL }, // sentinel
85 PyMODINIT_FUNC init_hashsplit(void)
87 Py_InitModule("_hashsplit", hashsplit_methods);