#include <errno.h>
#include <fcntl.h>
#include <arpa/inet.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
-#include <sys/mman.h>
+#include <string.h>
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#define FS_NOCOW_FL 0
#endif
-static int istty2 = 0;
+
+typedef unsigned char byte;
+
+
+typedef struct {
+ int istty2;
+} state_t;
+
+#if PY_MAJOR_VERSION < 3
+static state_t state;
+# define get_state(x) (&state)
+# define buf_argf "s"
+#else
+# define get_state(x) ((state_t *) PyModule_GetState(x))
+# define buf_argf "y"
+#endif // PY_MAJOR_VERSION >= 3
#ifndef htonll
(((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x))
+
+#if PY_MAJOR_VERSION < 3
static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
const char *name)
{
*x = tmp;
return 1;
}
+#endif
static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name)
{
+#if PY_MAJOR_VERSION < 3
if (PyInt_Check(py))
return bup_ulong_from_pyint(x, py, name);
+#endif
if (!PyLong_Check(py))
{
static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
const char *name)
{
+#if PY_MAJOR_VERSION < 3
if (PyInt_Check(py))
{
unsigned long tmp;
}
return 0;
}
+#endif
if (!PyLong_Check(py))
{
}
+static PyObject *bup_bytescmp(PyObject *self, PyObject *args)
+{
+ PyObject *py_s1, *py_s2; // This is really a PyBytes/PyString
+ if (!PyArg_ParseTuple(args, "SS", &py_s1, &py_s2))
+ return NULL;
+ char *s1, *s2;
+ Py_ssize_t s1_len, s2_len;
+ if (PyBytes_AsStringAndSize(py_s1, &s1, &s1_len) == -1)
+ return NULL;
+ if (PyBytes_AsStringAndSize(py_s2, &s2, &s2_len) == -1)
+ return NULL;
+ const Py_ssize_t n = (s1_len < s2_len) ? s1_len : s2_len;
+ const int cmp = memcmp(s1, s2, n);
+ if (cmp != 0)
+ return PyLong_FromLong(cmp);
+ if (s1_len == s2_len)
+ return PyLong_FromLong(0);;
+ return PyLong_FromLong((s1_len < s2_len) ? -1 : 1);
+}
+
+
// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
#if __WIN32__ || __CYGWIN__
#endif // not __WIN32__ or __CYGWIN__
-static unsigned long long count_leading_zeros(const unsigned char * const buf,
- unsigned long long len)
-{
- const unsigned char *cur = buf;
- while(len-- && *cur == 0)
- cur++;
- return cur - buf;
-}
-
-
static int write_all(int fd, const void *buf, const size_t count)
{
size_t written = 0;
return 1;
}
+
static PyObject *append_sparse_region(const int fd, unsigned long long n)
{
- while(n)
+ while (n)
{
off_t new_off;
if (!INTEGRAL_ASSIGNMENT_FITS(&new_off, n))
}
+static PyObject *record_sparse_zeros(unsigned long long *new_pending,
+ const int fd,
+ unsigned long long prev_pending,
+ const unsigned long long count)
+{
+ // Add count additional sparse zeros to prev_pending and store the
+ // result in new_pending, or if the total won't fit in
+ // new_pending, write some of the zeros to fd sparsely, and store
+ // the remaining sum in new_pending.
+ if (!uadd(new_pending, prev_pending, count))
+ {
+ PyObject *err = append_sparse_region(fd, prev_pending);
+ if (err != NULL)
+ return err;
+ *new_pending = count;
+ }
+ return NULL;
+}
+
+
+static byte* find_not_zero(const byte * const start, const byte * const end)
+{
+ // Return a pointer to first non-zero byte between start and end,
+ // or end if there isn't one.
+ assert(start <= end);
+ const unsigned char *cur = start;
+ while (cur < end && *cur == 0)
+ cur++;
+ return (byte *) cur;
+}
+
+
+static byte* find_trailing_zeros(const byte * const start,
+ const byte * const end)
+{
+ // Return a pointer to the start of any trailing run of zeros, or
+ // end if there isn't one.
+ assert(start <= end);
+ if (start == end)
+ return (byte *) end;
+ const byte * cur = end;
+ while (cur > start && *--cur == 0) {}
+ if (*cur == 0)
+ return (byte *) cur;
+ else
+ return (byte *) (cur + 1);
+}
+
+
+static byte *find_non_sparse_end(const byte * const start,
+ const byte * const end,
+ const unsigned long long min_len)
+{
+ // Return the first pointer to a min_len sparse block in [start,
+ // end) if there is one, otherwise a pointer to the start of any
+ // trailing run of zeros. If there are no trailing zeros, return
+ // end.
+ if (start == end)
+ return (byte *) end;
+ assert(start < end);
+ assert(min_len);
+ // Probe in min_len jumps, searching backward from the jump
+ // destination for a non-zero byte. If such a byte is found, move
+ // just past it and try again.
+ const byte *candidate = start;
+ // End of any run of zeros, starting at candidate, that we've already seen
+ const byte *end_of_known_zeros = candidate;
+ while (end - candidate >= min_len) // Handle all min_len candidate blocks
+ {
+ const byte * const probe_end = candidate + min_len;
+ const byte * const trailing_zeros =
+ find_trailing_zeros(end_of_known_zeros, probe_end);
+ if (trailing_zeros == probe_end)
+ end_of_known_zeros = candidate = probe_end;
+ else if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate <= end);
+ assert(*candidate == 0);
+ return (byte *) candidate;
+ }
+ else
+ {
+ candidate = trailing_zeros;
+ end_of_known_zeros = probe_end;
+ }
+ }
+
+ if (candidate == end)
+ return (byte *) end;
+
+ // No min_len sparse run found, search backward from end
+ const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros,
+ end);
+
+ if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate < end);
+ assert(*candidate == 0);
+ assert(end - candidate < min_len);
+ return (byte *) candidate;
+ }
+
+ if (trailing_zeros == end)
+ {
+ assert(*(end - 1) != 0);
+ return (byte *) end;
+ }
+
+ assert(end - trailing_zeros < min_len);
+ assert(trailing_zeros >= start);
+ assert(trailing_zeros < end);
+ assert(*trailing_zeros == 0);
+ return (byte *) trailing_zeros;
+}
+
+
static PyObject *bup_write_sparsely(PyObject *self, PyObject *args)
{
int fd;
if (!INTEGRAL_ASSIGNMENT_FITS(&buf_len, sbuf_len))
return PyErr_Format(PyExc_OverflowError, "buffer length too large");
- // The value of zeros_read indicates the number of zeros read from
- // buf that haven't been accounted for yet (with respect to cur),
- // while zeros indicates the total number of pending zeros, which
- // could be larger in the first iteration if prev_sparse_len
- // wasn't zero.
- int rc;
- unsigned long long unexamined = buf_len;
- unsigned char *block_start = buf, *cur = buf;
- unsigned long long zeros, zeros_read = count_leading_zeros(cur, unexamined);
- assert(zeros_read <= unexamined);
- unexamined -= zeros_read;
- if (!uadd(&zeros, prev_sparse_len, zeros_read))
+ const byte * block = buf; // Start of pending block
+ const byte * const end = buf + buf_len;
+ unsigned long long zeros = prev_sparse_len;
+ while (1)
{
- PyObject *err = append_sparse_region(fd, prev_sparse_len);
- if (err != NULL)
- return err;
- zeros = zeros_read;
- }
+ assert(block <= end);
+ if (block == end)
+ return PyLong_FromUnsignedLongLong(zeros);
- while(unexamined)
- {
- if (zeros < min_sparse_len)
- cur += zeros_read;
- else
+ if (*block != 0)
{
- rc = write_all(fd, block_start, cur - block_start);
- if (rc)
- return PyErr_SetFromErrno(PyExc_IOError);
+ // Look for the end of block, i.e. the next sparse run of
+ // at least min_sparse_len zeros, or the end of the
+ // buffer.
+ const byte * const probe = find_non_sparse_end(block + 1, end,
+ min_sparse_len);
+ // Either at end of block, or end of non-sparse; write pending data
PyObject *err = append_sparse_region(fd, zeros);
if (err != NULL)
return err;
- cur += zeros_read;
- block_start = cur;
- }
- // Pending zeros have ether been made sparse, or are going to
- // be rolled into the next non-sparse block since we know we
- // now have at least one unexamined non-zero byte.
- assert(unexamined && *cur != 0);
- zeros = zeros_read = 0;
- while (unexamined && *cur != 0)
- {
- cur++; unexamined--;
+ int rc = write_all(fd, block, probe - block);
+ if (rc)
+ return PyErr_SetFromErrno(PyExc_IOError);
+
+ if (end - probe < min_sparse_len)
+ zeros = end - probe;
+ else
+ zeros = min_sparse_len;
+ block = probe + zeros;
}
- if (unexamined)
+ else // *block == 0
{
- zeros_read = count_leading_zeros(cur, unexamined);
- assert(zeros_read <= unexamined);
- unexamined -= zeros_read;
- zeros = zeros_read;
+ // Should be in the first loop iteration, a sparse run of
+ // zeros, or nearly at the end of the block (within
+ // min_sparse_len).
+ const byte * const zeros_end = find_not_zero(block, end);
+ PyObject *err = record_sparse_zeros(&zeros, fd,
+ zeros, zeros_end - block);
+ if (err != NULL)
+ return err;
+ assert(block <= zeros_end);
+ block = zeros_end;
}
}
- rc = write_all(fd, block_start, cur - block_start);
- if (rc)
- return PyErr_SetFromErrno(PyExc_IOError);
- return PyLong_FromUnsignedLongLong(zeros);
}
{
struct idx *idx;
uint32_t new_prefix;
- if (count % 102424 == 0 && istty2)
+ if (count % 102424 == 0 && get_state(self)->istty2)
fprintf(stderr, "midx: writing %.2f%% (%d/%d)\r",
count*100.0/total, count, total);
idx = idxs[last_i];
// The extents flag can't be removed, so don't (see chattr(1) and chattr.c).
orig_attr = 0; // Handle int/long mismatch (see above)
rc = ioctl(fd, FS_IOC_GETFLAGS, &orig_attr);
- assert(orig_attr <= UINT_MAX); // Kernel type is actually int
if (rc == -1)
{
close(fd);
return PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
}
+ assert(orig_attr <= UINT_MAX); // Kernel type is actually int
attr |= ((unsigned int) orig_attr) & FS_EXTENT_FL;
rc = ioctl(fd, FS_IOC_SETFLAGS, &attr);
return PyErr_Format(PyExc_OverflowError, "time value too large");
struct tm tm;
+ tzset();
if(localtime_r(&ttime, &tm) == NULL)
return PyErr_SetFromErrno(PyExc_OSError);
#endif /* def HAVE_TM_TM_GMTOFF */
-#ifdef HAVE_MINCORE
-static PyObject *bup_fmincore(PyObject *self, PyObject *args)
+#ifdef BUP_MINCORE_BUF_TYPE
+static PyObject *bup_mincore(PyObject *self, PyObject *args)
{
- int fd, rc;
- if (!PyArg_ParseTuple(args, "i", &fd))
+ Py_buffer src, dest;
+ PyObject *py_src_n, *py_src_off, *py_dest_off;
+
+ if (!PyArg_ParseTuple(args, buf_argf "*OOw*O",
+ &src, &py_src_n, &py_src_off,
+ &dest, &py_dest_off))
return NULL;
- struct stat st;
- rc = fstat(fd, &st);
- if (rc != 0)
- return PyErr_SetFromErrno(PyExc_OSError);
+ PyObject *result = NULL;
- if (st.st_size == 0)
- return Py_BuildValue("s", "");
-
- const size_t page_size = (size_t) sysconf (_SC_PAGESIZE);
- const off_t pref_chunk_size = 64 * 1024 * 1024;
- off_t chunk_size = page_size;
- if (page_size < pref_chunk_size)
- chunk_size = page_size * (pref_chunk_size / page_size);
- const off_t pages_per_chunk = chunk_size / page_size;
- const off_t page_count = (st.st_size + page_size - 1) / page_size;
- const off_t chunk_count = page_count / chunk_size > 0 ? page_count / chunk_size : 1;
- unsigned char * const result = malloc(page_count);
- if (result == NULL)
- return PyErr_SetFromErrno(PyExc_OSError);
+ unsigned long long src_n, src_off, dest_off;
+ if (!(bup_ullong_from_py(&src_n, py_src_n, "src_n")
+ && bup_ullong_from_py(&src_off, py_src_off, "src_off")
+ && bup_ullong_from_py(&dest_off, py_dest_off, "dest_off")))
+ goto clean_and_return;
- off_t ci;
- for(ci = 0; ci < chunk_count; ci++)
- {
- const off_t pos = chunk_size * ci;
- const size_t msize = chunk_size < st.st_size - pos ? chunk_size : st.st_size - pos;
- void *m = mmap(NULL, msize, PROT_NONE, MAP_SHARED, fd, pos);
- if (m == MAP_FAILED)
- {
- free(result);
- return PyErr_SetFromErrno(PyExc_OSError);
- }
- rc = mincore(m, msize, &result[ci * pages_per_chunk]);
- if (rc != 0)
- {
- const int errno_stash = errno;
- rc = munmap(m, msize);
- if (rc != 0)
- {
- char buf[512];
- char *msg = strerror_r(errno, buf, 512);
- if (rc != 0)
- fprintf(stderr, "%s:%d: strerror_r failed (%d)\n",
- __FILE__, __LINE__, rc < 0 ? errno : rc);
- else
- fprintf(stderr,
- "%s:%d: munmap failed after mincore failed (%s)\n",
- __FILE__, __LINE__, msg);
- }
- free(result);
- errno = errno_stash;
- return PyErr_SetFromErrno(PyExc_OSError);
- }
- rc = munmap(m, msize);
- if (rc != 0)
- {
- free(result);
- return PyErr_SetFromErrno(PyExc_OSError);
- }
+ unsigned long long src_region_end;
+ if (!uadd(&src_region_end, src_off, src_n)) {
+ result = PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large");
+ goto clean_and_return;
+ }
+ if (src_region_end > src.len) {
+ result = PyErr_Format(PyExc_OverflowError, "region runs off end of src");
+ goto clean_and_return;
+ }
+
+ unsigned long long dest_size;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len)) {
+ result = PyErr_Format(PyExc_OverflowError, "invalid dest size");
+ goto clean_and_return;
+ }
+ if (dest_off > dest_size) {
+ result = PyErr_Format(PyExc_OverflowError, "region runs off end of dest");
+ goto clean_and_return;
+ }
+
+ size_t length;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n)) {
+ result = PyErr_Format(PyExc_OverflowError, "src_n overflows size_t");
+ goto clean_and_return;
+ }
+ int rc = mincore((void *)(src.buf + src_off), src_n,
+ (BUP_MINCORE_BUF_TYPE *) (dest.buf + dest_off));
+ if (rc != 0) {
+ result = PyErr_SetFromErrno(PyExc_OSError);
+ goto clean_and_return;
}
- PyObject *py_result = Py_BuildValue("s#", result, page_count);
- free(result);
- return py_result;
+ result = Py_BuildValue("O", Py_None);
+
+ clean_and_return:
+ PyBuffer_Release(&src);
+ PyBuffer_Release(&dest);
+ return result;
}
-#endif /* def HAVE_MINCORE */
+#endif /* def BUP_MINCORE_BUF_TYPE */
static PyMethodDef helper_methods[] = {
{ "localtime", bup_localtime, METH_VARARGS,
"Return struct_time elements plus the timezone offset and name." },
#endif
-#ifdef HAVE_MINCORE
- { "fmincore", bup_fmincore, METH_VARARGS,
- "Return mincore() information for the provided file descriptor." },
+ { "bytescmp", bup_bytescmp, METH_VARARGS,
+ "Return a negative value if x < y, zero if equal, positive otherwise."},
+#ifdef BUP_MINCORE_BUF_TYPE
+ { "mincore", bup_mincore, METH_VARARGS,
+ "For mincore(src, src_n, src_off, dest, dest_off)"
+ " call the system mincore(src + src_off, src_n, &dest[dest_off])." },
#endif
{ NULL, NULL, 0, NULL }, // sentinel
};
-
-PyMODINIT_FUNC init_helpers(void)
+static int setup_module(PyObject *m)
{
- // FIXME: migrate these tests to configure. Check against the
- // type we're going to use when passing to python. Other stat
- // types are tested at runtime.
+ // FIXME: migrate these tests to configure, or at least don't
+ // possibly crash the whole application. Check against the type
+ // we're going to use when passing to python. Other stat types
+ // are tested at runtime.
assert(sizeof(ino_t) <= sizeof(unsigned PY_LONG_LONG));
assert(sizeof(off_t) <= sizeof(PY_LONG_LONG));
assert(sizeof(blksize_t) <= sizeof(PY_LONG_LONG));
assert(sizeof(PY_LONG_LONG) <= sizeof(long long));
assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long));
- if (sizeof(off_t) < sizeof(int))
+ // Originally required by append_sparse_region()
{
- // Originally required by append_sparse_region().
- fprintf(stderr, "sizeof(off_t) < sizeof(int); please report.\n");
- exit(1);
+ off_t probe;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&probe, INT_MAX))
+ {
+ fprintf(stderr, "off_t can't hold INT_MAX; please report.\n");
+ exit(1);
+ }
}
char *e;
- PyObject *m = Py_InitModule("_helpers", helper_methods);
- if (m == NULL)
- return;
-
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY().
+ {
+ PyObject *value;
+ value = INTEGER_TO_PY(INT_MAX);
+ PyObject_SetAttrString(m, "INT_MAX", value);
+ Py_DECREF(value);
+ value = INTEGER_TO_PY(UINT_MAX);
+ PyObject_SetAttrString(m, "UINT_MAX", value);
+ Py_DECREF(value);
+ }
#ifdef HAVE_UTIMENSAT
{
PyObject *value;
Py_DECREF(value);
}
#endif
+#ifdef BUP_HAVE_MINCORE_INCORE
{
PyObject *value;
- const long arg_max = sysconf(_SC_ARG_MAX);
- if (arg_max == -1)
- {
- fprintf(stderr, "Cannot find SC_ARG_MAX, please report a bug.\n");
- exit(1);
- }
- value = INTEGER_TO_PY(arg_max);
- PyObject_SetAttrString(m, "SC_ARG_MAX", value);
+ value = INTEGER_TO_PY(MINCORE_INCORE);
+ PyObject_SetAttrString(m, "MINCORE_INCORE", value);
Py_DECREF(value);
}
+#endif
#pragma clang diagnostic pop // ignored "-Wtautological-compare"
e = getenv("BUP_FORCE_TTY");
- istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
+ get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
unpythonize_argv();
+ return 1;
+}
+
+
+#if PY_MAJOR_VERSION < 3
+
+PyMODINIT_FUNC init_helpers(void)
+{
+ PyObject *m = Py_InitModule("_helpers", helper_methods);
+ if (m == NULL)
+ return;
+
+ if (!setup_module(m))
+ {
+ Py_DECREF(m);
+ return;
+ }
+}
+
+# else // PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef helpers_def = {
+ PyModuleDef_HEAD_INIT,
+ "_helpers",
+ NULL,
+ sizeof(state_t),
+ helper_methods,
+ NULL,
+ NULL, // helpers_traverse,
+ NULL, // helpers_clear,
+ NULL
+};
+
+PyMODINIT_FUNC PyInit__helpers(void)
+{
+ PyObject *module = PyModule_Create(&helpers_def);
+ if (module == NULL)
+ return NULL;
+ if (!setup_module(module))
+ {
+ Py_DECREF(module);
+ return NULL;
+ }
+ return module;
}
+
+#endif // PY_MAJOR_VERSION >= 3