#include <errno.h>
#include <fcntl.h>
#include <arpa/inet.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
+#include <string.h>
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
#ifdef HAVE_LINUX_FS_H
#include <linux/fs.h>
#define FS_NOCOW_FL 0
#endif
-static int istty2 = 0;
+
+typedef unsigned char byte;
+
+
+typedef struct {
+ int istty2;
+} state_t;
+
+#if PY_MAJOR_VERSION < 3
+static state_t state;
+# define get_state(x) (&state)
+# define cstr_argf "s"
+#else
+# define get_state(x) ((state_t *) PyModule_GetState(x))
+# define cstr_argf "y"
+#endif // PY_MAJOR_VERSION >= 3
#ifndef htonll
(((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x))
+
+#if PY_MAJOR_VERSION < 3
static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
const char *name)
{
*x = tmp;
return 1;
}
+#endif
static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name)
{
+#if PY_MAJOR_VERSION < 3
if (PyInt_Check(py))
return bup_ulong_from_pyint(x, py, name);
+#endif
if (!PyLong_Check(py))
{
static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
const char *name)
{
+#if PY_MAJOR_VERSION < 3
if (PyInt_Check(py))
{
unsigned long tmp;
}
return 0;
}
+#endif
if (!PyLong_Check(py))
{
}
+static PyObject *bup_bytescmp(PyObject *self, PyObject *args)
+{
+ PyObject *py_s1, *py_s2; // This is really a PyBytes/PyString
+ if (!PyArg_ParseTuple(args, "SS", &py_s1, &py_s2))
+ return NULL;
+ char *s1, *s2;
+ Py_ssize_t s1_len, s2_len;
+ if (PyBytes_AsStringAndSize(py_s1, &s1, &s1_len) == -1)
+ return NULL;
+ if (PyBytes_AsStringAndSize(py_s2, &s2, &s2_len) == -1)
+ return NULL;
+ const Py_ssize_t n = (s1_len < s2_len) ? s1_len : s2_len;
+ const int cmp = memcmp(s1, s2, n);
+ if (cmp != 0)
+ return PyLong_FromLong(cmp);
+ if (s1_len == s2_len)
+ return PyLong_FromLong(0);;
+ return PyLong_FromLong((s1_len < s2_len) ? -1 : 1);
+}
+
+
// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
#if __WIN32__ || __CYGWIN__
#endif // not __WIN32__ or __CYGWIN__
-static unsigned long long count_leading_zeros(const unsigned char * const buf,
- unsigned long long len)
-{
- const unsigned char *cur = buf;
- while(len-- && *cur == 0)
- cur++;
- return cur - buf;
-}
-
-
static int write_all(int fd, const void *buf, const size_t count)
{
size_t written = 0;
return 1;
}
+
static PyObject *append_sparse_region(const int fd, unsigned long long n)
{
- while(n)
+ while (n)
{
off_t new_off;
if (!INTEGRAL_ASSIGNMENT_FITS(&new_off, n))
}
+static PyObject *record_sparse_zeros(unsigned long long *new_pending,
+ const int fd,
+ unsigned long long prev_pending,
+ const unsigned long long count)
+{
+ // Add count additional sparse zeros to prev_pending and store the
+ // result in new_pending, or if the total won't fit in
+ // new_pending, write some of the zeros to fd sparsely, and store
+ // the remaining sum in new_pending.
+ if (!uadd(new_pending, prev_pending, count))
+ {
+ PyObject *err = append_sparse_region(fd, prev_pending);
+ if (err != NULL)
+ return err;
+ *new_pending = count;
+ }
+ return NULL;
+}
+
+
+static byte* find_not_zero(const byte * const start, const byte * const end)
+{
+ // Return a pointer to first non-zero byte between start and end,
+ // or end if there isn't one.
+ assert(start <= end);
+ const unsigned char *cur = start;
+ while (cur < end && *cur == 0)
+ cur++;
+ return (byte *) cur;
+}
+
+
+static byte* find_trailing_zeros(const byte * const start,
+ const byte * const end)
+{
+ // Return a pointer to the start of any trailing run of zeros, or
+ // end if there isn't one.
+ assert(start <= end);
+ if (start == end)
+ return (byte *) end;
+ const byte * cur = end;
+ while (cur > start && *--cur == 0) {}
+ if (*cur == 0)
+ return (byte *) cur;
+ else
+ return (byte *) (cur + 1);
+}
+
+
+static byte *find_non_sparse_end(const byte * const start,
+ const byte * const end,
+ const unsigned long long min_len)
+{
+ // Return the first pointer to a min_len sparse block in [start,
+ // end) if there is one, otherwise a pointer to the start of any
+ // trailing run of zeros. If there are no trailing zeros, return
+ // end.
+ if (start == end)
+ return (byte *) end;
+ assert(start < end);
+ assert(min_len);
+ // Probe in min_len jumps, searching backward from the jump
+ // destination for a non-zero byte. If such a byte is found, move
+ // just past it and try again.
+ const byte *candidate = start;
+ // End of any run of zeros, starting at candidate, that we've already seen
+ const byte *end_of_known_zeros = candidate;
+ while (end - candidate >= min_len) // Handle all min_len candidate blocks
+ {
+ const byte * const probe_end = candidate + min_len;
+ const byte * const trailing_zeros =
+ find_trailing_zeros(end_of_known_zeros, probe_end);
+ if (trailing_zeros == probe_end)
+ end_of_known_zeros = candidate = probe_end;
+ else if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate <= end);
+ assert(*candidate == 0);
+ return (byte *) candidate;
+ }
+ else
+ {
+ candidate = trailing_zeros;
+ end_of_known_zeros = probe_end;
+ }
+ }
+
+ if (candidate == end)
+ return (byte *) end;
+
+ // No min_len sparse run found, search backward from end
+ const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros,
+ end);
+
+ if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate < end);
+ assert(*candidate == 0);
+ assert(end - candidate < min_len);
+ return (byte *) candidate;
+ }
+
+ if (trailing_zeros == end)
+ {
+ assert(*(end - 1) != 0);
+ return (byte *) end;
+ }
+
+ assert(end - trailing_zeros < min_len);
+ assert(trailing_zeros >= start);
+ assert(trailing_zeros < end);
+ assert(*trailing_zeros == 0);
+ return (byte *) trailing_zeros;
+}
+
+
static PyObject *bup_write_sparsely(PyObject *self, PyObject *args)
{
int fd;
if (!INTEGRAL_ASSIGNMENT_FITS(&buf_len, sbuf_len))
return PyErr_Format(PyExc_OverflowError, "buffer length too large");
- // The value of zeros_read indicates the number of zeros read from
- // buf that haven't been accounted for yet (with respect to cur),
- // while zeros indicates the total number of pending zeros, which
- // could be larger in the first iteration if prev_sparse_len
- // wasn't zero.
- int rc;
- unsigned long long unexamined = buf_len;
- unsigned char *block_start = buf, *cur = buf;
- unsigned long long zeros, zeros_read = count_leading_zeros(cur, unexamined);
- assert(zeros_read <= unexamined);
- unexamined -= zeros_read;
- if (!uadd(&zeros, prev_sparse_len, zeros_read))
+ const byte * block = buf; // Start of pending block
+ const byte * const end = buf + buf_len;
+ unsigned long long zeros = prev_sparse_len;
+ while (1)
{
- PyObject *err = append_sparse_region(fd, prev_sparse_len);
- if (err != NULL)
- return err;
- zeros = zeros_read;
- }
+ assert(block <= end);
+ if (block == end)
+ return PyLong_FromUnsignedLongLong(zeros);
- while(unexamined)
- {
- if (zeros < min_sparse_len)
- cur += zeros_read;
- else
+ if (*block != 0)
{
- rc = write_all(fd, block_start, cur - block_start);
- if (rc)
- return PyErr_SetFromErrno(PyExc_IOError);
+ // Look for the end of block, i.e. the next sparse run of
+ // at least min_sparse_len zeros, or the end of the
+ // buffer.
+ const byte * const probe = find_non_sparse_end(block + 1, end,
+ min_sparse_len);
+ // Either at end of block, or end of non-sparse; write pending data
PyObject *err = append_sparse_region(fd, zeros);
if (err != NULL)
return err;
- cur += zeros_read;
- block_start = cur;
- }
- // Pending zeros have ether been made sparse, or are going to
- // be rolled into the next non-sparse block since we know we
- // now have at least one unexamined non-zero byte.
- assert(unexamined && *cur != 0);
- zeros = zeros_read = 0;
- while (unexamined && *cur != 0)
- {
- cur++; unexamined--;
+ int rc = write_all(fd, block, probe - block);
+ if (rc)
+ return PyErr_SetFromErrno(PyExc_IOError);
+
+ if (end - probe < min_sparse_len)
+ zeros = end - probe;
+ else
+ zeros = min_sparse_len;
+ block = probe + zeros;
}
- if (unexamined)
+ else // *block == 0
{
- zeros_read = count_leading_zeros(cur, unexamined);
- assert(zeros_read <= unexamined);
- unexamined -= zeros_read;
- zeros = zeros_read;
+ // Should be in the first loop iteration, a sparse run of
+ // zeros, or nearly at the end of the block (within
+ // min_sparse_len).
+ const byte * const zeros_end = find_not_zero(block, end);
+ PyObject *err = record_sparse_zeros(&zeros, fd,
+ zeros, zeros_end - block);
+ if (err != NULL)
+ return err;
+ assert(block <= zeros_end);
+ block = zeros_end;
}
}
- rc = write_all(fd, block_start, cur - block_start);
- if (rc)
- return PyErr_SetFromErrno(PyExc_IOError);
- return PyLong_FromUnsignedLongLong(zeros);
}
unsigned char bytes[20];
};
+static inline int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
+{
+ return memcmp(sha1->bytes, sha2->bytes, sizeof(sha1->bytes));
+}
+
struct idx {
unsigned char *map;
int name_base;
};
-
-static int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
-{
- int i;
- for (i = 0; i < sizeof(struct sha); i++)
- if (sha1->bytes[i] != sha2->bytes[i])
- return sha1->bytes[i] - sha2->bytes[i];
- return 0;
-}
-
-
static void _fix_idx_order(struct idx **idxs, int *last_i)
{
struct idx *idx;
{
struct idx *idx;
uint32_t new_prefix;
- if (count % 102424 == 0 && istty2)
+ if (count % 102424 == 0 && get_state(self)->istty2)
fprintf(stderr, "midx: writing %.2f%% (%d/%d)\r",
count*100.0/total, count, total);
idx = idxs[last_i];
// The extents flag can't be removed, so don't (see chattr(1) and chattr.c).
orig_attr = 0; // Handle int/long mismatch (see above)
rc = ioctl(fd, FS_IOC_GETFLAGS, &orig_attr);
- assert(orig_attr <= UINT_MAX); // Kernel type is actually int
if (rc == -1)
{
close(fd);
return PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
}
+ assert(orig_attr <= UINT_MAX); // Kernel type is actually int
attr |= ((unsigned int) orig_attr) & FS_EXTENT_FL;
rc = ioctl(fd, FS_IOC_SETFLAGS, &attr);
PyObject *access_py, *modification_py;
struct timespec ts[2];
- if (!PyArg_ParseTuple(args, "is((Ol)(Ol))i",
+ if (!PyArg_ParseTuple(args, "i" cstr_argf "((Ol)(Ol))i",
&fd,
&path,
&access_py, &(ts[0].tv_nsec),
PyObject *access_py, *modification_py;
long long access_us, modification_us; // POSIX guarantees tv_usec is signed.
- if (!PyArg_ParseTuple(args, "s((OL)(OL))",
+ if (!PyArg_ParseTuple(args, cstr_argf "((OL)(OL))",
path,
&access_py, &access_us,
&modification_py, &modification_us))
int rc;
char *filename;
- if (!PyArg_ParseTuple(args, "s", &filename))
+ if (!PyArg_ParseTuple(args, cstr_argf, &filename))
return NULL;
struct stat st;
int rc;
char *filename;
- if (!PyArg_ParseTuple(args, "s", &filename))
+ if (!PyArg_ParseTuple(args, cstr_argf, &filename))
return NULL;
struct stat st;
return PyErr_Format(PyExc_OverflowError, "time value too large");
struct tm tm;
+ tzset();
if(localtime_r(&ttime, &tm) == NULL)
return PyErr_SetFromErrno(PyExc_OSError);
#ifdef BUP_MINCORE_BUF_TYPE
static PyObject *bup_mincore(PyObject *self, PyObject *args)
{
- const char *src;
- Py_ssize_t src_ssize;
- Py_buffer dest;
+ Py_buffer src, dest;
PyObject *py_src_n, *py_src_off, *py_dest_off;
- if (!PyArg_ParseTuple(args, "s#OOw*O",
- &src, &src_ssize, &py_src_n, &py_src_off,
+
+ if (!PyArg_ParseTuple(args, cstr_argf "*OOw*O",
+ &src, &py_src_n, &py_src_off,
&dest, &py_dest_off))
return NULL;
- unsigned long long src_size, src_n, src_off, dest_size, dest_off;
+ PyObject *result = NULL;
+
+ unsigned long long src_n, src_off, dest_off;
if (!(bup_ullong_from_py(&src_n, py_src_n, "src_n")
&& bup_ullong_from_py(&src_off, py_src_off, "src_off")
&& bup_ullong_from_py(&dest_off, py_dest_off, "dest_off")))
- return NULL;
+ goto clean_and_return;
- if (!INTEGRAL_ASSIGNMENT_FITS(&src_size, src_ssize))
- return PyErr_Format(PyExc_OverflowError, "invalid src size");
unsigned long long src_region_end;
+ if (!uadd(&src_region_end, src_off, src_n)) {
+ result = PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large");
+ goto clean_and_return;
+ }
+ if (src_region_end > src.len) {
+ result = PyErr_Format(PyExc_OverflowError, "region runs off end of src");
+ goto clean_and_return;
+ }
- if (!uadd(&src_region_end, src_off, src_n))
- return PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large");
- if (src_region_end > src_size)
- return PyErr_Format(PyExc_OverflowError, "region runs off end of src");
-
- if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len))
- return PyErr_Format(PyExc_OverflowError, "invalid dest size");
- if (dest_off > dest_size)
- return PyErr_Format(PyExc_OverflowError, "region runs off end of dest");
+ unsigned long long dest_size;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len)) {
+ result = PyErr_Format(PyExc_OverflowError, "invalid dest size");
+ goto clean_and_return;
+ }
+ if (dest_off > dest_size) {
+ result = PyErr_Format(PyExc_OverflowError, "region runs off end of dest");
+ goto clean_and_return;
+ }
size_t length;
- if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n))
- return PyErr_Format(PyExc_OverflowError, "src_n overflows size_t");
- int rc = mincore((void *)(src + src_off), src_n,
+ if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n)) {
+ result = PyErr_Format(PyExc_OverflowError, "src_n overflows size_t");
+ goto clean_and_return;
+ }
+ int rc = mincore((void *)(src.buf + src_off), src_n,
(BUP_MINCORE_BUF_TYPE *) (dest.buf + dest_off));
- if (rc != 0)
- return PyErr_SetFromErrno(PyExc_OSError);
- return Py_BuildValue("O", Py_None);
+ if (rc != 0) {
+ result = PyErr_SetFromErrno(PyExc_OSError);
+ goto clean_and_return;
+ }
+ result = Py_BuildValue("O", Py_None);
+
+ clean_and_return:
+ PyBuffer_Release(&src);
+ PyBuffer_Release(&dest);
+ return result;
}
#endif /* def BUP_MINCORE_BUF_TYPE */
{ "localtime", bup_localtime, METH_VARARGS,
"Return struct_time elements plus the timezone offset and name." },
#endif
+ { "bytescmp", bup_bytescmp, METH_VARARGS,
+ "Return a negative value if x < y, zero if equal, positive otherwise."},
#ifdef BUP_MINCORE_BUF_TYPE
{ "mincore", bup_mincore, METH_VARARGS,
"For mincore(src, src_n, src_off, dest, dest_off)"
{ NULL, NULL, 0, NULL }, // sentinel
};
-
-PyMODINIT_FUNC init_helpers(void)
+static int setup_module(PyObject *m)
{
- // FIXME: migrate these tests to configure. Check against the
- // type we're going to use when passing to python. Other stat
- // types are tested at runtime.
+ // FIXME: migrate these tests to configure, or at least don't
+ // possibly crash the whole application. Check against the type
+ // we're going to use when passing to python. Other stat types
+ // are tested at runtime.
assert(sizeof(ino_t) <= sizeof(unsigned PY_LONG_LONG));
assert(sizeof(off_t) <= sizeof(PY_LONG_LONG));
assert(sizeof(blksize_t) <= sizeof(PY_LONG_LONG));
assert(sizeof(PY_LONG_LONG) <= sizeof(long long));
assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long));
- if (sizeof(off_t) < sizeof(int))
+ // Originally required by append_sparse_region()
{
- // Originally required by append_sparse_region().
- fprintf(stderr, "sizeof(off_t) < sizeof(int); please report.\n");
- exit(1);
+ off_t probe;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&probe, INT_MAX))
+ {
+ fprintf(stderr, "off_t can't hold INT_MAX; please report.\n");
+ exit(1);
+ }
}
char *e;
- PyObject *m = Py_InitModule("_helpers", helper_methods);
- if (m == NULL)
- return;
-
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY().
+ {
+ PyObject *value;
+ value = INTEGER_TO_PY(INT_MAX);
+ PyObject_SetAttrString(m, "INT_MAX", value);
+ Py_DECREF(value);
+ value = INTEGER_TO_PY(UINT_MAX);
+ PyObject_SetAttrString(m, "UINT_MAX", value);
+ Py_DECREF(value);
+ }
#ifdef HAVE_UTIMENSAT
{
PyObject *value;
#pragma clang diagnostic pop // ignored "-Wtautological-compare"
e = getenv("BUP_FORCE_TTY");
- istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
+ get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
unpythonize_argv();
+ return 1;
}
+
+
+#if PY_MAJOR_VERSION < 3
+
+PyMODINIT_FUNC init_helpers(void)
+{
+ PyObject *m = Py_InitModule("_helpers", helper_methods);
+ if (m == NULL)
+ return;
+
+ if (!setup_module(m))
+ {
+ Py_DECREF(m);
+ return;
+ }
+}
+
+# else // PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef helpers_def = {
+ PyModuleDef_HEAD_INIT,
+ "_helpers",
+ NULL,
+ sizeof(state_t),
+ helper_methods,
+ NULL,
+ NULL, // helpers_traverse,
+ NULL, // helpers_clear,
+ NULL
+};
+
+PyMODINIT_FUNC PyInit__helpers(void)
+{
+ PyObject *module = PyModule_Create(&helpers_def);
+ if (module == NULL)
+ return NULL;
+ if (!setup_module(module))
+ {
+ Py_DECREF(module);
+ return NULL;
+ }
+ return module;
+}
+
+#endif // PY_MAJOR_VERSION >= 3