7 #define BLOBSIZE (1<<BLOBBITS)
9 #define WINDOWSIZE (1<<(WINDOWBITS-1))
12 // FIXME: replace this with a not-stupid rolling checksum algorithm,
13 // such as the one used in rsync (Adler32?)
14 static uint32_t stupidsum_add(uint32_t old, uint8_t drop, uint8_t add)
16 return ((old<<1) | (old>>31)) ^ drop ^ add;
20 static int find_ofs(const unsigned char *buf, int len, int *bits)
22 unsigned char window[WINDOWSIZE];
25 memset(window, 0, sizeof(window));
27 for (count = 0; count < len; count++)
29 sum = stupidsum_add(sum, window[i], buf[count]);
30 window[i] = buf[count];
31 i = (i + 1) % WINDOWSIZE;
32 if ((sum & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
38 for (*bits = BLOBBITS; (sum >>= 1) & 1; (*bits)++)
48 static PyObject *blobbits(PyObject *self, PyObject *args)
50 if (!PyArg_ParseTuple(args, ""))
52 return Py_BuildValue("i", BLOBBITS);
56 static PyObject *splitbuf(PyObject *self, PyObject *args)
58 unsigned char *buf = NULL;
59 int len = 0, out = 0, bits = -1;
61 if (!PyArg_ParseTuple(args, "t#", &buf, &len))
63 out = find_ofs(buf, len, &bits);
64 return Py_BuildValue("ii", out, bits);
68 static PyObject *bitmatch(PyObject *self, PyObject *args)
70 unsigned char *buf1 = NULL, *buf2 = NULL;
71 int len1 = 0, len2 = 0;
74 if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
78 for (byte = 0; byte < len1 && byte < len2; byte++)
80 int b1 = buf1[byte], b2 = buf2[byte];
83 for (bit = 0; bit < 8; bit++)
84 if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
90 return Py_BuildValue("i", byte*8 + bit);
94 // I would have made this a lower-level function that just fills in a buffer
95 // with random values, and then written those values from python. But that's
96 // about 20% slower in my tests, and since we typically generate random
97 // numbers for benchmarking other parts of bup, any slowness in generating
98 // random bytes will make our benchmarks inaccurate. Plus nobody wants
99 // pseudorandom bytes much except for this anyway.
100 static PyObject *write_random(PyObject *self, PyObject *args)
102 uint32_t buf[1024/4];
103 int fd = -1, seed = 0;
105 long long len = 0, kbytes = 0, written = 0;
107 if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
112 for (kbytes = 0; kbytes < len/1024; kbytes++)
115 for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
117 ret = write(fd, buf, sizeof(buf));
121 if (ret < sizeof(buf))
123 if (kbytes/1024 > 0 && !(kbytes%1024))
124 fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024);
127 // handle non-multiples of 1024
131 for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
133 ret = write(fd, buf, len % 1024);
140 fprintf(stderr, "Random: %lld Mbytes, done.\n", kbytes/1024);
141 return Py_BuildValue("L", written);
145 static PyObject *open_noatime(PyObject *self, PyObject *args)
147 char *filename = NULL;
148 int attrs, attrs_noatime, fd;
149 if (!PyArg_ParseTuple(args, "s", &filename))
156 attrs |= O_LARGEFILE;
158 attrs_noatime = attrs;
160 attrs_noatime |= O_NOATIME;
162 fd = open(filename, attrs_noatime);
163 if (fd < 0 && errno == EPERM)
165 // older Linux kernels would return EPERM if you used O_NOATIME
166 // and weren't the file's owner. This pointless restriction was
167 // relaxed eventually, but we have to handle it anyway.
168 // (VERY old kernels didn't recognized O_NOATIME, but they would
169 // just harmlessly ignore it, so this branch won't trigger)
170 fd = open(filename, attrs);
173 return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
174 return Py_BuildValue("i", fd);
178 static PyObject *fadvise_done(PyObject *self, PyObject *args)
182 if (!PyArg_ParseTuple(args, "iL", &fd, &ofs))
184 #ifdef POSIX_FADV_DONTNEED
185 posix_fadvise(fd, 0, ofs, POSIX_FADV_DONTNEED);
187 return Py_BuildValue("");
191 static PyMethodDef hashsplit_methods[] = {
192 { "blobbits", blobbits, METH_VARARGS,
193 "Return the number of bits in the rolling checksum." },
194 { "splitbuf", splitbuf, METH_VARARGS,
195 "Split a list of strings based on a rolling checksum." },
196 { "bitmatch", bitmatch, METH_VARARGS,
197 "Count the number of matching prefix bits between two strings." },
198 { "write_random", write_random, METH_VARARGS,
199 "Write random bytes to the given file descriptor" },
200 { "open_noatime", open_noatime, METH_VARARGS,
201 "open() the given filename for read with O_NOATIME if possible" },
202 { "fadvise_done", fadvise_done, METH_VARARGS,
203 "Inform the kernel that we're finished with earlier parts of a file" },
204 { NULL, NULL, 0, NULL }, // sentinel
207 PyMODINIT_FUNC init_hashsplit(void)
209 Py_InitModule("_hashsplit", hashsplit_methods);