#include <errno.h>
#include <fcntl.h>
#include <arpa/inet.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
-#include <sys/mman.h>
+#include <string.h>
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
#ifdef HAVE_LINUX_FS_H
#include <linux/fs.h>
#include <sys/ioctl.h>
#endif
+#ifdef HAVE_TM_TM_GMTOFF
+#include <time.h>
+#endif
+
#include "bupsplit.h"
#if defined(FS_IOC_GETFLAGS) && defined(FS_IOC_SETFLAGS)
#define FS_NOCOW_FL 0
#endif
-static int istty2 = 0;
+
+typedef unsigned char byte;
+
+
+typedef struct {
+ int istty2;
+} state_t;
+
+#if PY_MAJOR_VERSION < 3
+static state_t state;
+# define get_state(x) (&state)
+# define cstr_argf "s"
+#else
+# define get_state(x) ((state_t *) PyModule_GetState(x))
+# define cstr_argf "y"
+#endif // PY_MAJOR_VERSION >= 3
#ifndef htonll
#endif
+#define INTEGRAL_ASSIGNMENT_FITS(dest, src) \
+ ({ \
+ *(dest) = (src); \
+ *(dest) == (src) && (*(dest) < 1) == ((src) < 1); \
+ })
+
+
// At the moment any code that calls INTGER_TO_PY() will have to
// disable -Wtautological-compare for clang. See below.
(((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x))
+
+#if PY_MAJOR_VERSION < 3
static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
const char *name)
{
*x = tmp;
return 1;
}
+#endif
static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name)
{
+#if PY_MAJOR_VERSION < 3
if (PyInt_Check(py))
return bup_ulong_from_pyint(x, py, name);
+#endif
if (!PyLong_Check(py))
{
static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
const char *name)
{
+#if PY_MAJOR_VERSION < 3
if (PyInt_Check(py))
{
unsigned long tmp;
}
return 0;
}
+#endif
if (!PyLong_Check(py))
{
}
+static PyObject *bup_bytescmp(PyObject *self, PyObject *args)
+{
+ PyObject *py_s1, *py_s2; // This is really a PyBytes/PyString
+ if (!PyArg_ParseTuple(args, "SS", &py_s1, &py_s2))
+ return NULL;
+ char *s1, *s2;
+ Py_ssize_t s1_len, s2_len;
+ if (PyBytes_AsStringAndSize(py_s1, &s1, &s1_len) == -1)
+ return NULL;
+ if (PyBytes_AsStringAndSize(py_s2, &s2, &s2_len) == -1)
+ return NULL;
+ const Py_ssize_t n = (s1_len < s2_len) ? s1_len : s2_len;
+ const int cmp = memcmp(s1, s2, n);
+ if (cmp != 0)
+ return PyLong_FromLong(cmp);
+ if (s1_len == s2_len)
+ return PyLong_FromLong(0);;
+ return PyLong_FromLong((s1_len < s2_len) ? -1 : 1);
+}
+
+
// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
#if __WIN32__ || __CYGWIN__
#endif // not __WIN32__ or __CYGWIN__
+static int write_all(int fd, const void *buf, const size_t count)
+{
+ size_t written = 0;
+ while (written < count)
+ {
+ const ssize_t rc = write(fd, buf + written, count - written);
+ if (rc == -1)
+ return -1;
+ written += rc;
+ }
+ return 0;
+}
+
+
+static int uadd(unsigned long long *dest,
+ const unsigned long long x,
+ const unsigned long long y)
+{
+ const unsigned long long result = x + y;
+ if (result < x || result < y)
+ return 0;
+ *dest = result;
+ return 1;
+}
+
+
+static PyObject *append_sparse_region(const int fd, unsigned long long n)
+{
+ while (n)
+ {
+ off_t new_off;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&new_off, n))
+ new_off = INT_MAX;
+ const off_t off = lseek(fd, new_off, SEEK_CUR);
+ if (off == (off_t) -1)
+ return PyErr_SetFromErrno(PyExc_IOError);
+ n -= new_off;
+ }
+ return NULL;
+}
+
+
+static PyObject *record_sparse_zeros(unsigned long long *new_pending,
+ const int fd,
+ unsigned long long prev_pending,
+ const unsigned long long count)
+{
+ // Add count additional sparse zeros to prev_pending and store the
+ // result in new_pending, or if the total won't fit in
+ // new_pending, write some of the zeros to fd sparsely, and store
+ // the remaining sum in new_pending.
+ if (!uadd(new_pending, prev_pending, count))
+ {
+ PyObject *err = append_sparse_region(fd, prev_pending);
+ if (err != NULL)
+ return err;
+ *new_pending = count;
+ }
+ return NULL;
+}
+
+
+static byte* find_not_zero(const byte * const start, const byte * const end)
+{
+ // Return a pointer to first non-zero byte between start and end,
+ // or end if there isn't one.
+ assert(start <= end);
+ const unsigned char *cur = start;
+ while (cur < end && *cur == 0)
+ cur++;
+ return (byte *) cur;
+}
+
+
+static byte* find_trailing_zeros(const byte * const start,
+ const byte * const end)
+{
+ // Return a pointer to the start of any trailing run of zeros, or
+ // end if there isn't one.
+ assert(start <= end);
+ if (start == end)
+ return (byte *) end;
+ const byte * cur = end;
+ while (cur > start && *--cur == 0) {}
+ if (*cur == 0)
+ return (byte *) cur;
+ else
+ return (byte *) (cur + 1);
+}
+
+
+static byte *find_non_sparse_end(const byte * const start,
+ const byte * const end,
+ const unsigned long long min_len)
+{
+ // Return the first pointer to a min_len sparse block in [start,
+ // end) if there is one, otherwise a pointer to the start of any
+ // trailing run of zeros. If there are no trailing zeros, return
+ // end.
+ if (start == end)
+ return (byte *) end;
+ assert(start < end);
+ assert(min_len);
+ // Probe in min_len jumps, searching backward from the jump
+ // destination for a non-zero byte. If such a byte is found, move
+ // just past it and try again.
+ const byte *candidate = start;
+ // End of any run of zeros, starting at candidate, that we've already seen
+ const byte *end_of_known_zeros = candidate;
+ while (end - candidate >= min_len) // Handle all min_len candidate blocks
+ {
+ const byte * const probe_end = candidate + min_len;
+ const byte * const trailing_zeros =
+ find_trailing_zeros(end_of_known_zeros, probe_end);
+ if (trailing_zeros == probe_end)
+ end_of_known_zeros = candidate = probe_end;
+ else if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate <= end);
+ assert(*candidate == 0);
+ return (byte *) candidate;
+ }
+ else
+ {
+ candidate = trailing_zeros;
+ end_of_known_zeros = probe_end;
+ }
+ }
+
+ if (candidate == end)
+ return (byte *) end;
+
+ // No min_len sparse run found, search backward from end
+ const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros,
+ end);
+
+ if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate < end);
+ assert(*candidate == 0);
+ assert(end - candidate < min_len);
+ return (byte *) candidate;
+ }
+
+ if (trailing_zeros == end)
+ {
+ assert(*(end - 1) != 0);
+ return (byte *) end;
+ }
+
+ assert(end - trailing_zeros < min_len);
+ assert(trailing_zeros >= start);
+ assert(trailing_zeros < end);
+ assert(*trailing_zeros == 0);
+ return (byte *) trailing_zeros;
+}
+
+
+static PyObject *bup_write_sparsely(PyObject *self, PyObject *args)
+{
+ int fd;
+ unsigned char *buf = NULL;
+ Py_ssize_t sbuf_len;
+ PyObject *py_min_sparse_len, *py_prev_sparse_len;
+ if (!PyArg_ParseTuple(args, "it#OO",
+ &fd, &buf, &sbuf_len,
+ &py_min_sparse_len, &py_prev_sparse_len))
+ return NULL;
+ unsigned long long min_sparse_len, prev_sparse_len, buf_len;
+ if (!bup_ullong_from_py(&min_sparse_len, py_min_sparse_len, "min_sparse_len"))
+ return NULL;
+ if (!bup_ullong_from_py(&prev_sparse_len, py_prev_sparse_len, "prev_sparse_len"))
+ return NULL;
+ if (sbuf_len < 0)
+ return PyErr_Format(PyExc_ValueError, "negative bufer length");
+ if (!INTEGRAL_ASSIGNMENT_FITS(&buf_len, sbuf_len))
+ return PyErr_Format(PyExc_OverflowError, "buffer length too large");
+
+ const byte * block = buf; // Start of pending block
+ const byte * const end = buf + buf_len;
+ unsigned long long zeros = prev_sparse_len;
+ while (1)
+ {
+ assert(block <= end);
+ if (block == end)
+ return PyLong_FromUnsignedLongLong(zeros);
+
+ if (*block != 0)
+ {
+ // Look for the end of block, i.e. the next sparse run of
+ // at least min_sparse_len zeros, or the end of the
+ // buffer.
+ const byte * const probe = find_non_sparse_end(block + 1, end,
+ min_sparse_len);
+ // Either at end of block, or end of non-sparse; write pending data
+ PyObject *err = append_sparse_region(fd, zeros);
+ if (err != NULL)
+ return err;
+ int rc = write_all(fd, block, probe - block);
+ if (rc)
+ return PyErr_SetFromErrno(PyExc_IOError);
+
+ if (end - probe < min_sparse_len)
+ zeros = end - probe;
+ else
+ zeros = min_sparse_len;
+ block = probe + zeros;
+ }
+ else // *block == 0
+ {
+ // Should be in the first loop iteration, a sparse run of
+ // zeros, or nearly at the end of the block (within
+ // min_sparse_len).
+ const byte * const zeros_end = find_not_zero(block, end);
+ PyObject *err = record_sparse_zeros(&zeros, fd,
+ zeros, zeros_end - block);
+ if (err != NULL)
+ return err;
+ assert(block <= zeros_end);
+ block = zeros_end;
+ }
+ }
+}
+
+
static PyObject *selftest(PyObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
unsigned char bytes[20];
};
+static inline int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
+{
+ return memcmp(sha1->bytes, sha2->bytes, sizeof(sha1->bytes));
+}
+
struct idx {
unsigned char *map;
int name_base;
};
-
-static int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
-{
- int i;
- for (i = 0; i < sizeof(struct sha); i++)
- if (sha1->bytes[i] != sha2->bytes[i])
- return sha1->bytes[i] - sha2->bytes[i];
- return 0;
-}
-
-
static void _fix_idx_order(struct idx **idxs, int *last_i)
{
struct idx *idx;
{
struct idx *idx;
uint32_t new_prefix;
- if (count % 102424 == 0 && istty2)
+ if (count % 102424 == 0 && get_state(self)->istty2)
fprintf(stderr, "midx: writing %.2f%% (%d/%d)\r",
count*100.0/total, count, total);
idx = idxs[last_i];
static PyObject *fadvise_done(PyObject *self, PyObject *args)
{
int fd = -1;
- long long ofs = 0;
- if (!PyArg_ParseTuple(args, "iL", &fd, &ofs))
+ long long llofs, lllen = 0;
+ if (!PyArg_ParseTuple(args, "iLL", &fd, &llofs, &lllen))
return NULL;
+ off_t ofs, len;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&ofs, llofs))
+ return PyErr_Format(PyExc_OverflowError,
+ "fadvise offset overflows off_t");
+ if (!INTEGRAL_ASSIGNMENT_FITS(&len, lllen))
+ return PyErr_Format(PyExc_OverflowError,
+ "fadvise length overflows off_t");
#ifdef POSIX_FADV_DONTNEED
- posix_fadvise(fd, 0, ofs, POSIX_FADV_DONTNEED);
+ posix_fadvise(fd, ofs, len, POSIX_FADV_DONTNEED);
#endif
return Py_BuildValue("");
}
attr = 0; // Handle int/long mismatch (see above)
rc = ioctl(fd, FS_IOC_GETFLAGS, &attr);
- assert(attr <= UINT_MAX); // Kernel type is actually int
if (rc == -1)
{
close(fd);
return PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
}
-
close(fd);
+ assert(attr <= UINT_MAX); // Kernel type is actually int
return PyLong_FromUnsignedLong(attr);
}
#endif /* def BUP_HAVE_FILE_ATTRS */
// The extents flag can't be removed, so don't (see chattr(1) and chattr.c).
orig_attr = 0; // Handle int/long mismatch (see above)
rc = ioctl(fd, FS_IOC_GETFLAGS, &orig_attr);
- assert(orig_attr <= UINT_MAX); // Kernel type is actually int
if (rc == -1)
{
close(fd);
return PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
}
+ assert(orig_attr <= UINT_MAX); // Kernel type is actually int
attr |= ((unsigned int) orig_attr) & FS_EXTENT_FL;
rc = ioctl(fd, FS_IOC_SETFLAGS, &attr);
#endif
#endif
-
-#define INTEGRAL_ASSIGNMENT_FITS(dest, src) \
- ({ \
- *(dest) = (src); \
- *(dest) == (src) && (*(dest) < 1) == ((src) < 1); \
- })
-
-
#define ASSIGN_PYLONG_TO_INTEGRAL(dest, pylong, overflow) \
({ \
int result = 0; \
int rc;
char *filename;
- if (!PyArg_ParseTuple(args, "s", &filename))
+ if (!PyArg_ParseTuple(args, cstr_argf, &filename))
return NULL;
struct stat st;
int rc;
char *filename;
- if (!PyArg_ParseTuple(args, "s", &filename))
+ if (!PyArg_ParseTuple(args, cstr_argf, &filename))
return NULL;
struct stat st;
}
+#ifdef HAVE_TM_TM_GMTOFF
+static PyObject *bup_localtime(PyObject *self, PyObject *args)
+{
+ long long lltime;
+ time_t ttime;
+ if (!PyArg_ParseTuple(args, "L", &lltime))
+ return NULL;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&ttime, lltime))
+ return PyErr_Format(PyExc_OverflowError, "time value too large");
+
+ struct tm tm;
+ tzset();
+ if(localtime_r(&ttime, &tm) == NULL)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ // Match the Python struct_time values.
+ return Py_BuildValue("[i,i,i,i,i,i,i,i,i,i,s]",
+ 1900 + tm.tm_year, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ tm.tm_wday, tm.tm_yday + 1,
+ tm.tm_isdst, tm.tm_gmtoff, tm.tm_zone);
+}
+#endif /* def HAVE_TM_TM_GMTOFF */
+
+
+#ifdef BUP_MINCORE_BUF_TYPE
+static PyObject *bup_mincore(PyObject *self, PyObject *args)
+{
+ Py_buffer src, dest;
+ PyObject *py_src_n, *py_src_off, *py_dest_off;
+
+ if (!PyArg_ParseTuple(args, cstr_argf "*OOw*O",
+ &src, &py_src_n, &py_src_off,
+ &dest, &py_dest_off))
+ return NULL;
+
+ PyObject *result = NULL;
+
+ unsigned long long src_n, src_off, dest_off;
+ if (!(bup_ullong_from_py(&src_n, py_src_n, "src_n")
+ && bup_ullong_from_py(&src_off, py_src_off, "src_off")
+ && bup_ullong_from_py(&dest_off, py_dest_off, "dest_off")))
+ goto clean_and_return;
+
+ unsigned long long src_region_end;
+ if (!uadd(&src_region_end, src_off, src_n)) {
+ result = PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large");
+ goto clean_and_return;
+ }
+ if (src_region_end > src.len) {
+ result = PyErr_Format(PyExc_OverflowError, "region runs off end of src");
+ goto clean_and_return;
+ }
+
+ unsigned long long dest_size;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len)) {
+ result = PyErr_Format(PyExc_OverflowError, "invalid dest size");
+ goto clean_and_return;
+ }
+ if (dest_off > dest_size) {
+ result = PyErr_Format(PyExc_OverflowError, "region runs off end of dest");
+ goto clean_and_return;
+ }
+
+ size_t length;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n)) {
+ result = PyErr_Format(PyExc_OverflowError, "src_n overflows size_t");
+ goto clean_and_return;
+ }
+ int rc = mincore((void *)(src.buf + src_off), src_n,
+ (BUP_MINCORE_BUF_TYPE *) (dest.buf + dest_off));
+ if (rc != 0) {
+ result = PyErr_SetFromErrno(PyExc_OSError);
+ goto clean_and_return;
+ }
+ result = Py_BuildValue("O", Py_None);
+
+ clean_and_return:
+ PyBuffer_Release(&src);
+ PyBuffer_Release(&dest);
+ return result;
+}
+#endif /* def BUP_MINCORE_BUF_TYPE */
+
+
static PyMethodDef helper_methods[] = {
+ { "write_sparsely", bup_write_sparsely, METH_VARARGS,
+ "Write buf excepting zeros at the end. Return trailing zero count." },
{ "selftest", selftest, METH_VARARGS,
"Check that the rolling checksum rolls correctly (for unit tests)." },
{ "blobbits", blobbits, METH_VARARGS,
"Extended version of lstat." },
{ "fstat", bup_fstat, METH_VARARGS,
"Extended version of fstat." },
+#ifdef HAVE_TM_TM_GMTOFF
+ { "localtime", bup_localtime, METH_VARARGS,
+ "Return struct_time elements plus the timezone offset and name." },
+#endif
+ { "bytescmp", bup_bytescmp, METH_VARARGS,
+ "Return a negative value if x < y, zero if equal, positive otherwise."},
+#ifdef BUP_MINCORE_BUF_TYPE
+ { "mincore", bup_mincore, METH_VARARGS,
+ "For mincore(src, src_n, src_off, dest, dest_off)"
+ " call the system mincore(src + src_off, src_n, &dest[dest_off])." },
+#endif
{ NULL, NULL, 0, NULL }, // sentinel
};
-
-PyMODINIT_FUNC init_helpers(void)
+static int setup_module(PyObject *m)
{
- // FIXME: migrate these tests to configure. Check against the
- // type we're going to use when passing to python. Other stat
- // types are tested at runtime.
+ // FIXME: migrate these tests to configure, or at least don't
+ // possibly crash the whole application. Check against the type
+ // we're going to use when passing to python. Other stat types
+ // are tested at runtime.
assert(sizeof(ino_t) <= sizeof(unsigned PY_LONG_LONG));
assert(sizeof(off_t) <= sizeof(PY_LONG_LONG));
assert(sizeof(blksize_t) <= sizeof(PY_LONG_LONG));
assert(sizeof(PY_LONG_LONG) <= sizeof(long long));
assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long));
- char *e;
- PyObject *m = Py_InitModule("_helpers", helper_methods);
- if (m == NULL)
- return;
+ // Originally required by append_sparse_region()
+ {
+ off_t probe;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&probe, INT_MAX))
+ {
+ fprintf(stderr, "off_t can't hold INT_MAX; please report.\n");
+ exit(1);
+ }
+ }
+ char *e;
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY().
+ {
+ PyObject *value;
+ value = INTEGER_TO_PY(INT_MAX);
+ PyObject_SetAttrString(m, "INT_MAX", value);
+ Py_DECREF(value);
+ value = INTEGER_TO_PY(UINT_MAX);
+ PyObject_SetAttrString(m, "UINT_MAX", value);
+ Py_DECREF(value);
+ }
#ifdef HAVE_UTIMENSAT
{
PyObject *value;
Py_DECREF(value);
}
#endif
+#ifdef BUP_HAVE_MINCORE_INCORE
{
PyObject *value;
- const long arg_max = sysconf(_SC_ARG_MAX);
- if (arg_max == -1)
- {
- fprintf(stderr, "Cannot find SC_ARG_MAX, please report a bug.\n");
- exit(1);
- }
- value = INTEGER_TO_PY(arg_max);
- PyObject_SetAttrString(m, "SC_ARG_MAX", value);
+ value = INTEGER_TO_PY(MINCORE_INCORE);
+ PyObject_SetAttrString(m, "MINCORE_INCORE", value);
Py_DECREF(value);
}
+#endif
#pragma clang diagnostic pop // ignored "-Wtautological-compare"
e = getenv("BUP_FORCE_TTY");
- istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
+ get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
unpythonize_argv();
+ return 1;
+}
+
+
+#if PY_MAJOR_VERSION < 3
+
+PyMODINIT_FUNC init_helpers(void)
+{
+ PyObject *m = Py_InitModule("_helpers", helper_methods);
+ if (m == NULL)
+ return;
+
+ if (!setup_module(m))
+ {
+ Py_DECREF(m);
+ return;
+ }
+}
+
+# else // PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef helpers_def = {
+ PyModuleDef_HEAD_INIT,
+ "_helpers",
+ NULL,
+ sizeof(state_t),
+ helper_methods,
+ NULL,
+ NULL, // helpers_traverse,
+ NULL, // helpers_clear,
+ NULL
+};
+
+PyMODINIT_FUNC PyInit__helpers(void)
+{
+ PyObject *module = PyModule_Create(&helpers_def);
+ if (module == NULL)
+ return NULL;
+ if (!setup_module(module))
+ {
+ Py_DECREF(module);
+ return NULL;
+ }
+ return module;
}
+
+#endif // PY_MAJOR_VERSION >= 3