6 #define BLOBSIZE (1<<(BLOBBITS-1))
8 #define WINDOWSIZE (1<<(WINDOWBITS-1))
11 // FIXME: replace this with a not-stupid rolling checksum algorithm,
12 // such as the one used in rsync (Adler32?)
13 static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
15 return ((old<<1) | (old>>31)) ^ drop ^ add;
19 static PyObject *splitbuf(PyObject *self, PyObject *args)
24 if (!PyArg_ParseTuple(args, "et#", "utf-8", &buf, &len))
28 unsigned char window[WINDOWSIZE];
31 memset(window, 0, sizeof(window));
33 for (count = 0; count < len; count++)
35 sum = stupidsum_add(sum, window[i], buf[count]);
36 window[i] = buf[count];
37 i = (i + 1) % WINDOWSIZE;
38 if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
46 return Py_BuildValue("i", count+1);
50 static PyMethodDef hashsplit_methods[] = {
51 { "splitbuf", splitbuf, METH_VARARGS,
52 "Split a list of strings based on a rolling checksum." },
53 { NULL, NULL, 0, NULL }, // sentinel
56 PyMODINIT_FUNC inithashsplit()
58 Py_InitModule("hashsplit", hashsplit_methods);