#define WINDOWBITS (7)
#define WINDOWSIZE (1<<(WINDOWBITS-1))
+// According to librsync/rollsum.h:
+// "We should make this something other than zero to improve the
+// checksum algorithm: tridge suggests a prime number."
+// apenwarr: I unscientifically tried 0 and 7919, and they both ended up
+// slightly worse than the librsync value of 31 for my arbitrary test data.
+#define ROLLSUM_CHAR_OFFSET 31
-// FIXME: replace this with a not-stupid rolling checksum algorithm,
-// such as the one used in rsync (Adler32?)
-static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
+typedef struct {
+ unsigned s1, s2;
+ uint8_t window[WINDOWSIZE];
+ int wofs;
+} Rollsum;
+
+
+// These formulas are based on rollsum.h in the librsync project.
+static void rollsum_add(Rollsum *r, uint8_t drop, uint8_t add)
+{
+ r->s1 += add - drop;
+ r->s2 += r->s1 - (WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET));
+}
+
+
+static void rollsum_init(Rollsum *r)
+{
+ r->s1 = WINDOWSIZE * ROLLSUM_CHAR_OFFSET;
+ r->s2 = WINDOWSIZE * (WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET;
+ r->wofs = 0;
+ memset(r->window, 0, WINDOWSIZE);
+}
+
+
+// For some reason, gcc 4.3 (at least) optimizes badly if find_ofs()
+// is static and rollsum_roll is an inline function. Let's use a macro
+// here instead to help out the optimizer.
+#define rollsum_roll(r, ch) do { \
+ rollsum_add((r), (r)->window[(r)->wofs], (ch)); \
+ (r)->window[(r)->wofs] = (ch); \
+ (r)->wofs = ((r)->wofs + 1) % WINDOWSIZE; \
+} while (0)
+
+
+static uint32_t rollsum_digest(Rollsum *r)
+{
+ return (r->s1 << 16) | (r->s2 & 0xffff);
+}
+
+
+static uint32_t rollsum_sum(uint8_t *buf, size_t ofs, size_t len)
+{
+ int count;
+ Rollsum r;
+ rollsum_init(&r);
+ for (count = ofs; count < len; count++)
+ rollsum_roll(&r, buf[count]);
+ return rollsum_digest(&r);
+}
+
+
+static PyObject *selftest(PyObject *self, PyObject *args)
{
- return ((old<<1) | (old>>31)) ^ drop ^ add;
+ uint8_t buf[100000];
+ uint32_t sum1a, sum1b, sum2a, sum2b, sum3a, sum3b;
+ int count;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+
+ srandom(1);
+ for (count = 0; count < sizeof(buf); count++)
+ buf[count] = random();
+
+ sum1a = rollsum_sum(buf, 0, sizeof(buf));
+ sum1b = rollsum_sum(buf, 1, sizeof(buf));
+ sum2a = rollsum_sum(buf, sizeof(buf) - WINDOWSIZE*5/2,
+ sizeof(buf) - WINDOWSIZE);
+ sum2b = rollsum_sum(buf, 0, sizeof(buf) - WINDOWSIZE);
+ sum3a = rollsum_sum(buf, 0, WINDOWSIZE+3);
+ sum3b = rollsum_sum(buf, 3, WINDOWSIZE+3);
+
+ fprintf(stderr, "sum1a = 0x%08x\n", sum1a);
+ fprintf(stderr, "sum1b = 0x%08x\n", sum1b);
+ fprintf(stderr, "sum2a = 0x%08x\n", sum2a);
+ fprintf(stderr, "sum2b = 0x%08x\n", sum2b);
+ fprintf(stderr, "sum3a = 0x%08x\n", sum3a);
+ fprintf(stderr, "sum3b = 0x%08x\n", sum3b);
+
+ return Py_BuildValue("i", sum1a==sum1b && sum2a==sum2b && sum3a==sum3b);
}
static int find_ofs(const unsigned char *buf, int len, int *bits)
{
- unsigned char window[WINDOWSIZE];
- uint32_t sum = 0;
- int i = 0, count;
- memset(window, 0, sizeof(window));
+ Rollsum r;
+ int count;
+ rollsum_init(&r);
for (count = 0; count < len; count++)
{
- sum = stupidsum_add(sum, window[i], buf[count]);
- window[i] = buf[count];
- i = (i + 1) % WINDOWSIZE;
- if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
+ rollsum_roll(&r, buf[count]);
+ if ((r.s2 & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
{
if (bits)
{
+ unsigned rsum = rollsum_digest(&r);
*bits = BLOBBITS;
- sum >>= BLOBBITS;
- for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
+ rsum >>= BLOBBITS;
+ for (*bits = BLOBBITS; (rsum >>= 1) & 1; (*bits)++)
;
}
return count+1;
fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024);
}
+ // handle non-multiples of 1024
+ if (len % 1024)
+ {
+ int i;
+ for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
+ buf[i] = random();
+ ret = write(fd, buf, len % 1024);
+ if (ret < 0)
+ ret = 0;
+ written += ret;
+ }
+
if (kbytes/1024 > 0)
fprintf(stderr, "Random: %lld Mbytes, done.\n", kbytes/1024);
return Py_BuildValue("L", written);
static PyMethodDef hashsplit_methods[] = {
+ { "selftest", selftest, METH_VARARGS,
+ "Check that the rolling checksum rolls correctly (for unit tests)." },
{ "blobbits", blobbits, METH_VARARGS,
"Return the number of bits in the rolling checksum." },
{ "splitbuf", splitbuf, METH_VARARGS,