+#ifdef HAVE_LINUX_FS_H
+#include <linux/fs.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+#include <sys/ioctl.h>
+#endif
+
+#ifdef HAVE_TM_TM_GMTOFF
+#include <time.h>
+#endif
+
+#include "bupsplit.h"
+
+#if defined(FS_IOC_GETFLAGS) && defined(FS_IOC_SETFLAGS)
+#define BUP_HAVE_FILE_ATTRS 1
+#endif
+
+/*
+ * Check for incomplete UTIMENSAT support (NetBSD 6), and if so,
+ * pretend we don't have it.
+ */
+#if !defined(AT_FDCWD) || !defined(AT_SYMLINK_NOFOLLOW)
+#undef HAVE_UTIMENSAT
+#endif
+
+#ifndef FS_NOCOW_FL
+// Of course, this assumes it's a bitfield value.
+#define FS_NOCOW_FL 0
+#endif
+
+
+typedef unsigned char byte;
+
+
+typedef struct {
+ int istty2;
+} state_t;
+
+// cstr_argf: for byte vectors without null characters (e.g. paths)
+// rbuf_argf: for read-only byte vectors
+// wbuf_argf: for mutable byte vectors
+
+#if PY_MAJOR_VERSION < 3
+static state_t state;
+# define get_state(x) (&state)
+# define cstr_argf "s"
+# define rbuf_argf "s#"
+# define wbuf_argf "s*"
+#else
+# define get_state(x) ((state_t *) PyModule_GetState(x))
+# define cstr_argf "y"
+# define rbuf_argf "y#"
+# define wbuf_argf "y*"
+#endif // PY_MAJOR_VERSION >= 3
+
+
+static void *checked_malloc(size_t n, size_t size)
+{
+ size_t total;
+ if (__builtin_mul_overflow(n, size, &total))
+ {
+ PyErr_Format(PyExc_OverflowError,
+ "request to allocate %lu items of size %lu is too large",
+ n, size);
+ return NULL;
+ }
+ void *result = malloc(total);
+ if (!result)
+ return PyErr_NoMemory();
+ return result;
+}
+
+static void *checked_calloc(size_t n, size_t size)
+{
+ void *result = calloc(n, size);
+ if (!result)
+ PyErr_NoMemory();
+ return result;
+}
+
+
+#ifndef htonll
+// This function should technically be macro'd out if it's going to be used
+// more than ocasionally. As of this writing, it'll actually never be called
+// in real world bup scenarios (because our packs are < MAX_INT bytes).
+static uint64_t htonll(uint64_t value)
+{
+ static const int endian_test = 42;
+
+ if (*(char *)&endian_test == endian_test) // LSB-MSB
+ return ((uint64_t)htonl(value & 0xFFFFFFFF) << 32) | htonl(value >> 32);
+ return value; // already in network byte order MSB-LSB
+}
+#endif
+
+
+// Disabling sign-compare here should be fine since we're explicitly
+// checking for a sign mismatch, i.e. if the signs don't match, then
+// it doesn't matter what the value comparison says.
+// FIXME: ... so should we reverse the order?
+#define INTEGRAL_ASSIGNMENT_FITS(dest, src) \
+ ({ \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wsign-compare\""); \
+ *(dest) = (src); \
+ int result = *(dest) == (src) && (*(dest) < 1) == ((src) < 1); \
+ _Pragma("GCC diagnostic pop"); \
+ result; \
+ })
+
+
+// At the moment any code that calls INTEGER_TO_PY() will have to
+// disable -Wtautological-compare for clang. See below.
+
+#define INTEGER_TO_PY(x) \
+ (((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x))
+
+
+
+#if PY_MAJOR_VERSION < 3
+static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
+ const char *name)
+{
+ const long tmp = PyInt_AsLong(py);
+ if (tmp == -1 && PyErr_Occurred())
+ {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError))
+ PyErr_Format(PyExc_OverflowError, "%s too big for unsigned long",
+ name);
+ return 0;
+ }
+ if (tmp < 0)
+ {
+ PyErr_Format(PyExc_OverflowError,
+ "negative %s cannot be converted to unsigned long", name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+#endif
+
+
+static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name)
+{
+#if PY_MAJOR_VERSION < 3
+ if (PyInt_Check(py))
+ return bup_ulong_from_pyint(x, py, name);
+#endif
+
+ if (!PyLong_Check(py))
+ {
+ PyErr_Format(PyExc_TypeError, "expected integer %s", name);
+ return 0;
+ }
+
+ const unsigned long tmp = PyLong_AsUnsignedLong(py);
+ if (PyErr_Occurred())
+ {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError))
+ PyErr_Format(PyExc_OverflowError, "%s too big for unsigned long",
+ name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+
+
+static int bup_uint_from_py(unsigned int *x, PyObject *py, const char *name)
+{
+ unsigned long tmp;
+ if (!bup_ulong_from_py(&tmp, py, name))
+ return 0;
+
+ if (tmp > UINT_MAX)
+ {
+ PyErr_Format(PyExc_OverflowError, "%s too big for unsigned int", name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+
+static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
+ const char *name)
+{
+#if PY_MAJOR_VERSION < 3
+ if (PyInt_Check(py))
+ {
+ unsigned long tmp;
+ if (bup_ulong_from_pyint(&tmp, py, name))
+ {
+ *x = tmp;
+ return 1;
+ }
+ return 0;
+ }
+#endif
+
+ if (!PyLong_Check(py))
+ {
+ PyErr_Format(PyExc_TypeError, "integer argument expected for %s", name);
+ return 0;
+ }
+
+ const unsigned PY_LONG_LONG tmp = PyLong_AsUnsignedLongLong(py);
+ if (tmp == (unsigned long long) -1 && PyErr_Occurred())
+ {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError))
+ PyErr_Format(PyExc_OverflowError,
+ "%s too big for unsigned long long", name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+
+
+static PyObject *bup_bytescmp(PyObject *self, PyObject *args)
+{
+ PyObject *py_s1, *py_s2; // This is really a PyBytes/PyString
+ if (!PyArg_ParseTuple(args, "SS", &py_s1, &py_s2))
+ return NULL;
+ char *s1, *s2;
+ Py_ssize_t s1_len, s2_len;
+ if (PyBytes_AsStringAndSize(py_s1, &s1, &s1_len) == -1)
+ return NULL;
+ if (PyBytes_AsStringAndSize(py_s2, &s2, &s2_len) == -1)
+ return NULL;
+ const Py_ssize_t n = (s1_len < s2_len) ? s1_len : s2_len;
+ const int cmp = memcmp(s1, s2, n);
+ if (cmp != 0)
+ return PyLong_FromLong(cmp);
+ if (s1_len == s2_len)
+ return PyLong_FromLong(0);;
+ return PyLong_FromLong((s1_len < s2_len) ? -1 : 1);
+}
+
+
+// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
+#if __WIN32__ || __CYGWIN__
+
+// There's no 'ps' on win32 anyway, and Py_GetArgcArgv() isn't available.
+static void unpythonize_argv(void) { }
+
+#else // not __WIN32__
+
+// For some reason this isn't declared in Python.h
+extern void Py_GetArgcArgv(int *argc, char ***argv);
+
+static void unpythonize_argv(void)
+{
+ int argc, i;
+ char **argv, *arge;
+
+ Py_GetArgcArgv(&argc, &argv);
+
+ for (i = 0; i < argc-1; i++)
+ {
+ if (argv[i] + strlen(argv[i]) + 1 != argv[i+1])
+ {
+ // The argv block doesn't work the way we expected; it's unsafe
+ // to mess with it.
+ return;
+ }
+ }
+
+ arge = argv[argc-1] + strlen(argv[argc-1]) + 1;
+
+ if (strstr(argv[0], "python") && argv[1] == argv[0] + strlen(argv[0]) + 1)
+ {
+ char *p;
+ size_t len, diff;
+ p = strrchr(argv[1], '/');
+ if (p)
+ {
+ p++;
+ diff = p - argv[0];
+ len = arge - p;
+ memmove(argv[0], p, len);
+ memset(arge - diff, 0, diff);
+ for (i = 0; i < argc; i++)
+ argv[i] = argv[i+1] ? argv[i+1]-diff : NULL;
+ }
+ }
+}
+
+#endif // not __WIN32__ or __CYGWIN__
+
+
+static int write_all(int fd, const void *buf, const size_t count)
+{
+ size_t written = 0;
+ while (written < count)
+ {
+ const ssize_t rc = write(fd, buf + written, count - written);
+ if (rc == -1)
+ return -1;
+ written += rc;
+ }
+ return 0;
+}
+
+
+static int uadd(unsigned long long *dest,
+ const unsigned long long x,
+ const unsigned long long y)
+{
+ const unsigned long long result = x + y;
+ if (result < x || result < y)
+ return 0;
+ *dest = result;
+ return 1;
+}
+
+
+static PyObject *append_sparse_region(const int fd, unsigned long long n)
+{
+ while (n)
+ {
+ off_t new_off;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&new_off, n))
+ new_off = INT_MAX;
+ const off_t off = lseek(fd, new_off, SEEK_CUR);
+ if (off == (off_t) -1)
+ return PyErr_SetFromErrno(PyExc_IOError);
+ n -= new_off;
+ }
+ return NULL;
+}
+
+
+static PyObject *record_sparse_zeros(unsigned long long *new_pending,
+ const int fd,
+ unsigned long long prev_pending,
+ const unsigned long long count)
+{
+ // Add count additional sparse zeros to prev_pending and store the
+ // result in new_pending, or if the total won't fit in
+ // new_pending, write some of the zeros to fd sparsely, and store
+ // the remaining sum in new_pending.
+ if (!uadd(new_pending, prev_pending, count))
+ {
+ PyObject *err = append_sparse_region(fd, prev_pending);
+ if (err != NULL)
+ return err;
+ *new_pending = count;
+ }
+ return NULL;
+}
+
+
+static byte* find_not_zero(const byte * const start, const byte * const end)
+{
+ // Return a pointer to first non-zero byte between start and end,
+ // or end if there isn't one.
+ assert(start <= end);
+ const unsigned char *cur = start;
+ while (cur < end && *cur == 0)
+ cur++;
+ return (byte *) cur;
+}
+
+
+static byte* find_trailing_zeros(const byte * const start,
+ const byte * const end)
+{
+ // Return a pointer to the start of any trailing run of zeros, or
+ // end if there isn't one.
+ assert(start <= end);
+ if (start == end)
+ return (byte *) end;
+ const byte * cur = end;
+ while (cur > start && *--cur == 0) {}
+ if (*cur == 0)
+ return (byte *) cur;
+ else
+ return (byte *) (cur + 1);
+}
+
+
+static byte *find_non_sparse_end(const byte * const start,
+ const byte * const end,
+ const ptrdiff_t min_len)
+{
+ // Return the first pointer to a min_len sparse block in [start,
+ // end) if there is one, otherwise a pointer to the start of any
+ // trailing run of zeros. If there are no trailing zeros, return
+ // end.
+ if (start == end)
+ return (byte *) end;
+ assert(start < end);
+ assert(min_len);
+ // Probe in min_len jumps, searching backward from the jump
+ // destination for a non-zero byte. If such a byte is found, move
+ // just past it and try again.
+ const byte *candidate = start;
+ // End of any run of zeros, starting at candidate, that we've already seen
+ const byte *end_of_known_zeros = candidate;
+ while (end - candidate >= min_len) // Handle all min_len candidate blocks
+ {
+ const byte * const probe_end = candidate + min_len;
+ const byte * const trailing_zeros =
+ find_trailing_zeros(end_of_known_zeros, probe_end);
+ if (trailing_zeros == probe_end)
+ end_of_known_zeros = candidate = probe_end;
+ else if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate <= end);
+ assert(*candidate == 0);
+ return (byte *) candidate;
+ }
+ else
+ {
+ candidate = trailing_zeros;
+ end_of_known_zeros = probe_end;
+ }
+ }
+
+ if (candidate == end)
+ return (byte *) end;
+
+ // No min_len sparse run found, search backward from end
+ const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros,
+ end);
+
+ if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate < end);
+ assert(*candidate == 0);
+ assert(end - candidate < min_len);
+ return (byte *) candidate;
+ }
+
+ if (trailing_zeros == end)
+ {
+ assert(*(end - 1) != 0);
+ return (byte *) end;
+ }
+
+ assert(end - trailing_zeros < min_len);
+ assert(trailing_zeros >= start);
+ assert(trailing_zeros < end);
+ assert(*trailing_zeros == 0);
+ return (byte *) trailing_zeros;
+}
+
+
+static PyObject *bup_write_sparsely(PyObject *self, PyObject *args)
+{
+ int fd;
+ unsigned char *buf = NULL;
+ Py_ssize_t sbuf_len;
+ PyObject *py_min_sparse_len, *py_prev_sparse_len;
+ if (!PyArg_ParseTuple(args, "i" rbuf_argf "OO",
+ &fd, &buf, &sbuf_len,
+ &py_min_sparse_len, &py_prev_sparse_len))
+ return NULL;
+ ptrdiff_t min_sparse_len;
+ unsigned long long prev_sparse_len, buf_len, ul_min_sparse_len;
+ if (!bup_ullong_from_py(&ul_min_sparse_len, py_min_sparse_len, "min_sparse_len"))
+ return NULL;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&min_sparse_len, ul_min_sparse_len))
+ return PyErr_Format(PyExc_OverflowError, "min_sparse_len too large");
+ if (!bup_ullong_from_py(&prev_sparse_len, py_prev_sparse_len, "prev_sparse_len"))
+ return NULL;
+ if (sbuf_len < 0)
+ return PyErr_Format(PyExc_ValueError, "negative bufer length");
+ if (!INTEGRAL_ASSIGNMENT_FITS(&buf_len, sbuf_len))
+ return PyErr_Format(PyExc_OverflowError, "buffer length too large");
+
+ const byte * block = buf; // Start of pending block
+ const byte * const end = buf + buf_len;
+ unsigned long long zeros = prev_sparse_len;
+ while (1)
+ {
+ assert(block <= end);
+ if (block == end)
+ return PyLong_FromUnsignedLongLong(zeros);
+
+ if (*block != 0)
+ {
+ // Look for the end of block, i.e. the next sparse run of
+ // at least min_sparse_len zeros, or the end of the
+ // buffer.
+ const byte * const probe = find_non_sparse_end(block + 1, end,
+ min_sparse_len);
+ // Either at end of block, or end of non-sparse; write pending data
+ PyObject *err = append_sparse_region(fd, zeros);
+ if (err != NULL)
+ return err;
+ int rc = write_all(fd, block, probe - block);
+ if (rc)
+ return PyErr_SetFromErrno(PyExc_IOError);
+
+ if (end - probe < min_sparse_len)
+ zeros = end - probe;
+ else
+ zeros = min_sparse_len;
+ block = probe + zeros;
+ }
+ else // *block == 0
+ {
+ // Should be in the first loop iteration, a sparse run of
+ // zeros, or nearly at the end of the block (within
+ // min_sparse_len).
+ const byte * const zeros_end = find_not_zero(block, end);
+ PyObject *err = record_sparse_zeros(&zeros, fd,
+ zeros, zeros_end - block);
+ if (err != NULL)
+ return err;
+ assert(block <= zeros_end);
+ block = zeros_end;
+ }
+ }
+}
+