lib/bup/_helpers.c

   1 #include "bupsplit.h"
   2 #include <Python.h>
   3 #include <assert.h>
   4 #include <stdint.h>
   5 #include <fcntl.h>
   6
   7 static PyObject *selftest(PyObject *self, PyObject *args)
   8 {
   9     if (!PyArg_ParseTuple(args, ""))
  10         return NULL;
  11
  12     return Py_BuildValue("i", !bupsplit_selftest());
  13 }
  14
  15
  16 static PyObject *blobbits(PyObject *self, PyObject *args)
  17 {
  18     if (!PyArg_ParseTuple(args, ""))
  19         return NULL;
  20     return Py_BuildValue("i", BUP_BLOBBITS);
  21 }
  22
  23
  24 static PyObject *splitbuf(PyObject *self, PyObject *args)
  25 {
  26     unsigned char *buf = NULL;
  27     int len = 0, out = 0, bits = -1;
  28
  29     if (!PyArg_ParseTuple(args, "t#", &buf, &len))
  30         return NULL;
  31     out = bupsplit_find_ofs(buf, len, &bits);
  32     return Py_BuildValue("ii", out, bits);
  33 }
  34
  35
  36 static PyObject *bitmatch(PyObject *self, PyObject *args)
  37 {
  38     unsigned char *buf1 = NULL, *buf2 = NULL;
  39     int len1 = 0, len2 = 0;
  40     int byte, bit;
  41
  42     if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
  43         return NULL;
  44
  45     bit = 0;
  46     for (byte = 0; byte < len1 && byte < len2; byte++)
  47     {
  48         int b1 = buf1[byte], b2 = buf2[byte];
  49         if (b1 != b2)
  50         {
  51             for (bit = 0; bit < 8; bit++)
  52                 if ( (b1 & (0x80 >> bit)) != (b2 & (0x80 >> bit)) )
  53                     break;
  54             break;
  55         }
  56     }
  57
  58     return Py_BuildValue("i", byte*8 + bit);
  59 }
  60
  61
  62 // I would have made this a lower-level function that just fills in a buffer
  63 // with random values, and then written those values from python.  But that's
  64 // about 20% slower in my tests, and since we typically generate random
  65 // numbers for benchmarking other parts of bup, any slowness in generating
  66 // random bytes will make our benchmarks inaccurate.  Plus nobody wants
  67 // pseudorandom bytes much except for this anyway.
  68 static PyObject *write_random(PyObject *self, PyObject *args)
  69 {
  70     uint32_t buf[1024/4];
  71     int fd = -1, seed = 0;
  72     ssize_t ret;
  73     long long len = 0, kbytes = 0, written = 0;
  74
  75     if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
  76         return NULL;
  77
  78     srandom(seed);
  79
  80     for (kbytes = 0; kbytes < len/1024; kbytes++)
  81     {
  82         unsigned i;
  83         for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
  84             buf[i] = random();
  85         ret = write(fd, buf, sizeof(buf));
  86         if (ret < 0)
  87             ret = 0;
  88         written += ret;
  89         if (ret < (int)sizeof(buf))
  90             break;
  91         if (kbytes/1024 > 0 && !(kbytes%1024))
  92             fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024);
  93     }
  94
  95     // handle non-multiples of 1024
  96     if (len % 1024)
  97     {
  98         unsigned i;
  99         for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
 100             buf[i] = random();
 101         ret = write(fd, buf, len % 1024);
 102         if (ret < 0)
 103             ret = 0;
 104         written += ret;
 105     }
 106
 107     if (kbytes/1024 > 0)
 108         fprintf(stderr, "Random: %lld Mbytes, done.\n", kbytes/1024);
 109     return Py_BuildValue("L", written);
 110 }
 111
 112
 113 static PyObject *open_noatime(PyObject *self, PyObject *args)
 114 {
 115     char *filename = NULL;
 116     int attrs, attrs_noatime, fd;
 117     if (!PyArg_ParseTuple(args, "s", &filename))
 118         return NULL;
 119     attrs = O_RDONLY;
 120 #ifdef O_NOFOLLOW
 121     attrs |= O_NOFOLLOW;
 122 #endif
 123 #ifdef O_LARGEFILE
 124     attrs |= O_LARGEFILE;
 125 #endif
 126     attrs_noatime = attrs;
 127 #ifdef O_NOATIME
 128     attrs_noatime |= O_NOATIME;
 129 #endif
 130     fd = open(filename, attrs_noatime);
 131     if (fd < 0 && errno == EPERM)
 132     {
 133         // older Linux kernels would return EPERM if you used O_NOATIME
 134         // and weren't the file's owner.  This pointless restriction was
 135         // relaxed eventually, but we have to handle it anyway.
 136         // (VERY old kernels didn't recognized O_NOATIME, but they would
 137         // just harmlessly ignore it, so this branch won't trigger)
 138         fd = open(filename, attrs);
 139     }
 140     if (fd < 0)
 141         return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
 142     return Py_BuildValue("i", fd);
 143 }
 144
 145
 146 static PyObject *fadvise_done(PyObject *self, PyObject *args)
 147 {
 148     int fd = -1;
 149     long long ofs = 0;
 150     if (!PyArg_ParseTuple(args, "iL", &fd, &ofs))
 151         return NULL;
 152 #ifdef POSIX_FADV_DONTNEED
 153     posix_fadvise(fd, 0, ofs, POSIX_FADV_DONTNEED);
 154 #endif
 155     return Py_BuildValue("");
 156 }
 157
 158
 159 static PyMethodDef faster_methods[] = {
 160     { "selftest", selftest, METH_VARARGS,
 161         "Check that the rolling checksum rolls correctly (for unit tests)." },
 162     { "blobbits", blobbits, METH_VARARGS,
 163         "Return the number of bits in the rolling checksum." },
 164     { "splitbuf", splitbuf, METH_VARARGS,
 165         "Split a list of strings based on a rolling checksum." },
 166     { "bitmatch", bitmatch, METH_VARARGS,
 167         "Count the number of matching prefix bits between two strings." },
 168     { "write_random", write_random, METH_VARARGS,
 169         "Write random bytes to the given file descriptor" },
 170     { "open_noatime", open_noatime, METH_VARARGS,
 171         "open() the given filename for read with O_NOATIME if possible" },
 172     { "fadvise_done", fadvise_done, METH_VARARGS,
 173         "Inform the kernel that we're finished with earlier parts of a file" },
 174     { NULL, NULL, 0, NULL },  // sentinel
 175 };
 176
 177 PyMODINIT_FUNC init_helpers(void)
 178 {
 179     Py_InitModule("_helpers", faster_methods);
 180 }