lib/bup/_hashsplit.c

   1 #include <Python.h>
   2 #include <assert.h>
   3 #include <stdint.h>
   4 #include <fcntl.h>
   5
   6 #define BLOBBITS (13)
   7 #define BLOBSIZE (1<<BLOBBITS)
   8 #define WINDOWBITS (7)
   9 #define WINDOWSIZE (1<<(WINDOWBITS-1))
  10
  11 // According to librsync/rollsum.h:
  12 // "We should make this something other than zero to improve the
  13 // checksum algorithm: tridge suggests a prime number."
  14 // apenwarr: I unscientifically tried 0 and 7919, and they both ended up
  15 // slightly worse than the librsync value of 31 for my arbitrary test data.
  16 #define ROLLSUM_CHAR_OFFSET 31
  17
  18 typedef struct {
  19     unsigned s1, s2;
  20     uint8_t window[WINDOWSIZE];
  21     int wofs;
  22 } Rollsum;
  23
  24
  25 // These formulas are based on rollsum.h in the librsync project.
  26 static void rollsum_add(Rollsum *r, uint8_t drop, uint8_t add)
  27 {
  28     r->s1 += add - drop;
  29     r->s2 += r->s1 - (WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET));
  30 }
  31
  32
  33 static void rollsum_init(Rollsum *r)
  34 {
  35     r->s1 = WINDOWSIZE * ROLLSUM_CHAR_OFFSET;
  36     r->s2 = WINDOWSIZE * (WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET;
  37     r->wofs = 0;
  38     memset(r->window, 0, WINDOWSIZE);
  39 }
  40
  41
  42 // For some reason, gcc 4.3 (at least) optimizes badly if find_ofs()
  43 // is static and rollsum_roll is an inline function.  Let's use a macro
  44 // here instead to help out the optimizer.
  45 #define rollsum_roll(r, ch) do { \
  46     rollsum_add((r), (r)->window[(r)->wofs], (ch)); \
  47     (r)->window[(r)->wofs] = (ch); \
  48     (r)->wofs = ((r)->wofs + 1) % WINDOWSIZE; \
  49 } while (0)
  50
  51
  52 static uint32_t rollsum_digest(Rollsum *r)
  53 {
  54     return (r->s1 << 16) | (r->s2 & 0xffff);
  55 }
  56
  57
  58 static uint32_t rollsum_sum(uint8_t *buf, size_t ofs, size_t len)
  59 {
  60     int count;
  61     Rollsum r;
  62     rollsum_init(&r);
  63     for (count = ofs; count < len; count++)
  64         rollsum_roll(&r, buf[count]);
  65     return rollsum_digest(&r);
  66 }
  67
  68
  69 static PyObject *selftest(PyObject *self, PyObject *args)
  70 {
  71     uint8_t buf[100000];
  72     uint32_t sum1a, sum1b, sum2a, sum2b, sum3a, sum3b;
  73     int count;
  74
  75     if (!PyArg_ParseTuple(args, ""))
  76         return NULL;
  77
  78     srandom(1);
  79     for (count = 0; count < sizeof(buf); count++)
  80         buf[count] = random();
  81
  82     sum1a = rollsum_sum(buf, 0, sizeof(buf));
  83     sum1b = rollsum_sum(buf, 1, sizeof(buf));
  84     sum2a = rollsum_sum(buf, sizeof(buf) - WINDOWSIZE*5/2,
  85                         sizeof(buf) - WINDOWSIZE);
  86     sum2b = rollsum_sum(buf, 0, sizeof(buf) - WINDOWSIZE);
  87     sum3a = rollsum_sum(buf, 0, WINDOWSIZE+3);
  88     sum3b = rollsum_sum(buf, 3, WINDOWSIZE+3);
  89
  90     fprintf(stderr, "sum1a = 0x%08x\n", sum1a);
  91     fprintf(stderr, "sum1b = 0x%08x\n", sum1b);
  92     fprintf(stderr, "sum2a = 0x%08x\n", sum2a);
  93     fprintf(stderr, "sum2b = 0x%08x\n", sum2b);
  94     fprintf(stderr, "sum3a = 0x%08x\n", sum3a);
  95     fprintf(stderr, "sum3b = 0x%08x\n", sum3b);
  96
  97     return Py_BuildValue("i", sum1a==sum1b && sum2a==sum2b && sum3a==sum3b);
  98 }
  99
 100
 101 static int find_ofs(const unsigned char *buf, int len, int *bits)
 102 {
 103     Rollsum r;
 104     int count;
 105
 106     rollsum_init(&r);
 107     for (count = 0; count < len; count++)
 108     {
 109         rollsum_roll(&r, buf[count]);
 110         if ((r.s2 & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1)))
 111         {
 112             if (bits)
 113             {
 114                 unsigned rsum = rollsum_digest(&r);
 115                 *bits = BLOBBITS;
 116                 rsum >>= BLOBBITS;
 117                 for (*bits = BLOBBITS; (rsum >>= 1) & 1; (*bits)++)
 118                     ;
 119             }
 120             return count+1;
 121         }
 122     }
 123     return 0;
 124 }
 125
 126
 127 static PyObject *blobbits(PyObject *self, PyObject *args)
 128 {
 129     if (!PyArg_ParseTuple(args, ""))
 130         return NULL;
 131     return Py_BuildValue("i", BLOBBITS);
 132 }
 133
 134
 135 static PyObject *splitbuf(PyObject *self, PyObject *args)
 136 {
 137     unsigned char *buf = NULL;
 138     int len = 0, out = 0, bits = -1;
 139
 140     if (!PyArg_ParseTuple(args, "t#", &buf, &len))
 141         return NULL;
 142     out = find_ofs(buf, len, &bits);
 143     return Py_BuildValue("ii", out, bits);
 144 }
 145
 146
 147 static PyObject *bitmatch(PyObject *self, PyObject *args)
 148 {
 149     unsigned char *buf1 = NULL, *buf2 = NULL;
 150     int len1 = 0, len2 = 0;
 151     int byte, bit;
 152
 153     if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
 154         return NULL;
 155
 156     bit = 0;
 157     for (byte = 0; byte < len1 && byte < len2; byte++)
 158     {
 159         int b1 = buf1[byte], b2 = buf2[byte];
 160         if (b1 != b2)
 161         {
 162             for (bit = 0; bit < 8; bit++)
 163                 if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
 164                     break;
 165             break;
 166         }
 167     }
 168
 169     return Py_BuildValue("i", byte*8 + bit);
 170 }
 171
 172
 173 // I would have made this a lower-level function that just fills in a buffer
 174 // with random values, and then written those values from python.  But that's
 175 // about 20% slower in my tests, and since we typically generate random
 176 // numbers for benchmarking other parts of bup, any slowness in generating
 177 // random bytes will make our benchmarks inaccurate.  Plus nobody wants
 178 // pseudorandom bytes much except for this anyway.
 179 static PyObject *write_random(PyObject *self, PyObject *args)
 180 {
 181     uint32_t buf[1024/4];
 182     int fd = -1, seed = 0;
 183     ssize_t ret;
 184     long long len = 0, kbytes = 0, written = 0;
 185
 186     if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
 187         return NULL;
 188
 189     srandom(seed);
 190
 191     for (kbytes = 0; kbytes < len/1024; kbytes++)
 192     {
 193         int i;
 194         for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
 195             buf[i] = random();
 196         ret = write(fd, buf, sizeof(buf));
 197         if (ret < 0)
 198             ret = 0;
 199         written += ret;
 200         if (ret < sizeof(buf))
 201             break;
 202         if (kbytes/1024 > 0 && !(kbytes%1024))
 203             fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024);
 204     }
 205
 206     // handle non-multiples of 1024
 207     if (len % 1024)
 208     {
 209         int i;
 210         for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
 211             buf[i] = random();
 212         ret = write(fd, buf, len % 1024);
 213         if (ret < 0)
 214             ret = 0;
 215         written += ret;
 216     }
 217
 218     if (kbytes/1024 > 0)
 219         fprintf(stderr, "Random: %lld Mbytes, done.\n", kbytes/1024);
 220     return Py_BuildValue("L", written);
 221 }
 222
 223
 224 static PyObject *open_noatime(PyObject *self, PyObject *args)
 225 {
 226     char *filename = NULL;
 227     int attrs, attrs_noatime, fd;
 228     if (!PyArg_ParseTuple(args, "s", &filename))
 229         return NULL;
 230     attrs = O_RDONLY;
 231 #ifdef O_NOFOLLOW
 232     attrs |= O_NOFOLLOW;
 233 #endif
 234 #ifdef O_LARGEFILE
 235     attrs |= O_LARGEFILE;
 236 #endif
 237     attrs_noatime = attrs;
 238 #ifdef O_NOATIME
 239     attrs_noatime |= O_NOATIME;
 240 #endif
 241     fd = open(filename, attrs_noatime);
 242     if (fd < 0 && errno == EPERM)
 243     {
 244         // older Linux kernels would return EPERM if you used O_NOATIME
 245         // and weren't the file's owner.  This pointless restriction was
 246         // relaxed eventually, but we have to handle it anyway.
 247         // (VERY old kernels didn't recognized O_NOATIME, but they would
 248         // just harmlessly ignore it, so this branch won't trigger)
 249         fd = open(filename, attrs);
 250     }
 251     if (fd < 0)
 252         return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
 253     return Py_BuildValue("i", fd);
 254 }
 255
 256
 257 static PyObject *fadvise_done(PyObject *self, PyObject *args)
 258 {
 259     int fd = -1;
 260     long long ofs = 0;
 261     if (!PyArg_ParseTuple(args, "iL", &fd, &ofs))
 262         return NULL;
 263 #ifdef POSIX_FADV_DONTNEED
 264     posix_fadvise(fd, 0, ofs, POSIX_FADV_DONTNEED);
 265 #endif
 266     return Py_BuildValue("");
 267 }
 268
 269
 270 static PyMethodDef hashsplit_methods[] = {
 271     { "selftest", selftest, METH_VARARGS,
 272         "Check that the rolling checksum rolls correctly (for unit tests)." },
 273     { "blobbits", blobbits, METH_VARARGS,
 274         "Return the number of bits in the rolling checksum." },
 275     { "splitbuf", splitbuf, METH_VARARGS,
 276         "Split a list of strings based on a rolling checksum." },
 277     { "bitmatch", bitmatch, METH_VARARGS,
 278         "Count the number of matching prefix bits between two strings." },
 279     { "write_random", write_random, METH_VARARGS,
 280         "Write random bytes to the given file descriptor" },
 281     { "open_noatime", open_noatime, METH_VARARGS,
 282         "open() the given filename for read with O_NOATIME if possible" },
 283     { "fadvise_done", fadvise_done, METH_VARARGS,
 284         "Inform the kernel that we're finished with earlier parts of a file" },
 285     { NULL, NULL, 0, NULL },  // sentinel
 286 };
 287
 288 PyMODINIT_FUNC init_hashsplit(void)
 289 {
 290     Py_InitModule("_hashsplit", hashsplit_methods);
 291 }