+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_LINUX_FS_H
+#include <linux/fs.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+#include <sys/ioctl.h>
+#endif
+
+#ifdef HAVE_TM_TM_GMTOFF
+#include <time.h>
+#endif
+
+#include "bupsplit.h"
+
+#if defined(FS_IOC_GETFLAGS) && defined(FS_IOC_SETFLAGS)
+#define BUP_HAVE_FILE_ATTRS 1
+#endif
+
+/*
+ * Check for incomplete UTIMENSAT support (NetBSD 6), and if so,
+ * pretend we don't have it.
+ */
+#if !defined(AT_FDCWD) || !defined(AT_SYMLINK_NOFOLLOW)
+#undef HAVE_UTIMENSAT
+#endif
+
+#ifndef FS_NOCOW_FL
+// Of course, this assumes it's a bitfield value.
+#define FS_NOCOW_FL 0
+#endif
+
+
+typedef unsigned char byte;
+
+static int istty2 = 0;
+
+
+#ifndef htonll
+// This function should technically be macro'd out if it's going to be used
+// more than ocasionally. As of this writing, it'll actually never be called
+// in real world bup scenarios (because our packs are < MAX_INT bytes).
+static uint64_t htonll(uint64_t value)
+{
+ static const int endian_test = 42;
+
+ if (*(char *)&endian_test == endian_test) // LSB-MSB
+ return ((uint64_t)htonl(value & 0xFFFFFFFF) << 32) | htonl(value >> 32);
+ return value; // already in network byte order MSB-LSB
+}
+#endif
+
+
+#define INTEGRAL_ASSIGNMENT_FITS(dest, src) \
+ ({ \
+ *(dest) = (src); \
+ *(dest) == (src) && (*(dest) < 1) == ((src) < 1); \
+ })
+
+
+// At the moment any code that calls INTGER_TO_PY() will have to
+// disable -Wtautological-compare for clang. See below.
+
+#define INTEGER_TO_PY(x) \
+ (((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x))
+
+
+static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
+ const char *name)
+{
+ const long tmp = PyInt_AsLong(py);
+ if (tmp == -1 && PyErr_Occurred())
+ {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError))
+ PyErr_Format(PyExc_OverflowError, "%s too big for unsigned long",
+ name);
+ return 0;
+ }
+ if (tmp < 0)
+ {
+ PyErr_Format(PyExc_OverflowError,
+ "negative %s cannot be converted to unsigned long", name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+
+
+static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name)
+{
+ if (PyInt_Check(py))
+ return bup_ulong_from_pyint(x, py, name);
+
+ if (!PyLong_Check(py))
+ {
+ PyErr_Format(PyExc_TypeError, "expected integer %s", name);
+ return 0;
+ }
+
+ const unsigned long tmp = PyLong_AsUnsignedLong(py);
+ if (PyErr_Occurred())
+ {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError))
+ PyErr_Format(PyExc_OverflowError, "%s too big for unsigned long",
+ name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+
+
+static int bup_uint_from_py(unsigned int *x, PyObject *py, const char *name)
+{
+ unsigned long tmp;
+ if (!bup_ulong_from_py(&tmp, py, name))
+ return 0;
+
+ if (tmp > UINT_MAX)
+ {
+ PyErr_Format(PyExc_OverflowError, "%s too big for unsigned int", name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+
+static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
+ const char *name)
+{
+ if (PyInt_Check(py))
+ {
+ unsigned long tmp;
+ if (bup_ulong_from_pyint(&tmp, py, name))
+ {
+ *x = tmp;
+ return 1;
+ }
+ return 0;
+ }
+
+ if (!PyLong_Check(py))
+ {
+ PyErr_Format(PyExc_TypeError, "integer argument expected for %s", name);
+ return 0;
+ }
+
+ const unsigned PY_LONG_LONG tmp = PyLong_AsUnsignedLongLong(py);
+ if (tmp == (unsigned long long) -1 && PyErr_Occurred())
+ {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError))
+ PyErr_Format(PyExc_OverflowError,
+ "%s too big for unsigned long long", name);
+ return 0;
+ }
+ *x = tmp;
+ return 1;
+}
+
+
+// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
+#if __WIN32__ || __CYGWIN__
+
+// There's no 'ps' on win32 anyway, and Py_GetArgcArgv() isn't available.
+static void unpythonize_argv(void) { }
+
+#else // not __WIN32__
+
+// For some reason this isn't declared in Python.h
+extern void Py_GetArgcArgv(int *argc, char ***argv);
+
+static void unpythonize_argv(void)
+{
+ int argc, i;
+ char **argv, *arge;
+
+ Py_GetArgcArgv(&argc, &argv);
+
+ for (i = 0; i < argc-1; i++)
+ {
+ if (argv[i] + strlen(argv[i]) + 1 != argv[i+1])
+ {
+ // The argv block doesn't work the way we expected; it's unsafe
+ // to mess with it.
+ return;
+ }
+ }
+
+ arge = argv[argc-1] + strlen(argv[argc-1]) + 1;
+
+ if (strstr(argv[0], "python") && argv[1] == argv[0] + strlen(argv[0]) + 1)
+ {
+ char *p;
+ size_t len, diff;
+ p = strrchr(argv[1], '/');
+ if (p)
+ {
+ p++;
+ diff = p - argv[0];
+ len = arge - p;
+ memmove(argv[0], p, len);
+ memset(arge - diff, 0, diff);
+ for (i = 0; i < argc; i++)
+ argv[i] = argv[i+1] ? argv[i+1]-diff : NULL;
+ }
+ }
+}
+
+#endif // not __WIN32__ or __CYGWIN__
+
+
+static int write_all(int fd, const void *buf, const size_t count)
+{
+ size_t written = 0;
+ while (written < count)
+ {
+ const ssize_t rc = write(fd, buf + written, count - written);
+ if (rc == -1)
+ return -1;
+ written += rc;
+ }
+ return 0;
+}
+
+
+static int uadd(unsigned long long *dest,
+ const unsigned long long x,
+ const unsigned long long y)
+{
+ const unsigned long long result = x + y;
+ if (result < x || result < y)
+ return 0;
+ *dest = result;
+ return 1;
+}
+
+
+static PyObject *append_sparse_region(const int fd, unsigned long long n)
+{
+ while (n)
+ {
+ off_t new_off;
+ if (!INTEGRAL_ASSIGNMENT_FITS(&new_off, n))
+ new_off = INT_MAX;
+ const off_t off = lseek(fd, new_off, SEEK_CUR);
+ if (off == (off_t) -1)
+ return PyErr_SetFromErrno(PyExc_IOError);
+ n -= new_off;
+ }
+ return NULL;
+}
+
+
+static PyObject *record_sparse_zeros(unsigned long long *new_pending,
+ const int fd,
+ unsigned long long prev_pending,
+ const unsigned long long count)
+{
+ // Add count additional sparse zeros to prev_pending and store the
+ // result in new_pending, or if the total won't fit in
+ // new_pending, write some of the zeros to fd sparsely, and store
+ // the remaining sum in new_pending.
+ if (!uadd(new_pending, prev_pending, count))
+ {
+ PyObject *err = append_sparse_region(fd, prev_pending);
+ if (err != NULL)
+ return err;
+ *new_pending = count;
+ }
+ return NULL;
+}
+
+
+static const byte * find_not_zero(const byte * const start,
+ const byte * const end)
+{
+ // Return a pointer to first non-zero byte between start and end,
+ // or end if there isn't one.
+ assert(start <= end);
+ const unsigned char *cur = start;
+ while (cur < end && *cur == 0)
+ cur++;
+ return cur;
+}
+
+
+static const byte * const find_trailing_zeros(const byte * const start,
+ const byte * const end)
+{
+ // Return a pointer to the start of any trailing run of zeros, or
+ // end if there isn't one.
+ assert(start <= end);
+ if (start == end)
+ return end;
+ const byte * cur = end;
+ while (cur > start && *--cur == 0) {}
+ if (*cur == 0)
+ return cur;
+ else
+ return cur + 1;
+}
+
+
+static const byte *find_non_sparse_end(const byte * const start,
+ const byte * const end,
+ const unsigned long long min_len)
+{
+ // Return the first pointer to a min_len sparse block in [start,
+ // end) if there is one, otherwise a pointer to the start of any
+ // trailing run of zeros. If there are no trailing zeros, return
+ // end.
+ if (start == end)
+ return end;
+ assert(start < end);
+ assert(min_len);
+ // Probe in min_len jumps, searching backward from the jump
+ // destination for a non-zero byte. If such a byte is found, move
+ // just past it and try again.
+ const byte *candidate = start;
+ // End of any run of zeros, starting at candidate, that we've already seen
+ const byte *end_of_known_zeros = candidate;
+ while (end - candidate >= min_len) // Handle all min_len candidate blocks
+ {
+ const byte * const probe_end = candidate + min_len;
+ const byte * const trailing_zeros =
+ find_trailing_zeros(end_of_known_zeros, probe_end);
+ if (trailing_zeros == probe_end)
+ end_of_known_zeros = candidate = probe_end;
+ else if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate <= end);
+ assert(*candidate == 0);
+ return candidate;
+ }
+ else
+ {
+ candidate = trailing_zeros;
+ end_of_known_zeros = probe_end;
+ }
+ }
+
+ if (candidate == end)
+ return end;
+
+ // No min_len sparse run found, search backward from end
+ const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros,
+ end);
+
+ if (trailing_zeros == end_of_known_zeros)
+ {
+ assert(candidate >= start);
+ assert(candidate < end);
+ assert(*candidate == 0);
+ assert(end - candidate < min_len);
+ return candidate;
+ }
+
+ if (trailing_zeros == end)
+ {
+ assert(*(end - 1) != 0);
+ return end;
+ }
+
+ assert(end - trailing_zeros < min_len);
+ assert(trailing_zeros >= start);
+ assert(trailing_zeros < end);
+ assert(*trailing_zeros == 0);
+ return trailing_zeros;
+}
+
+
+static PyObject *bup_write_sparsely(PyObject *self, PyObject *args)
+{
+ int fd;
+ unsigned char *buf = NULL;
+ Py_ssize_t sbuf_len;
+ PyObject *py_min_sparse_len, *py_prev_sparse_len;
+ if (!PyArg_ParseTuple(args, "it#OO",
+ &fd, &buf, &sbuf_len,
+ &py_min_sparse_len, &py_prev_sparse_len))
+ return NULL;
+ unsigned long long min_sparse_len, prev_sparse_len, buf_len;
+ if (!bup_ullong_from_py(&min_sparse_len, py_min_sparse_len, "min_sparse_len"))
+ return NULL;
+ if (!bup_ullong_from_py(&prev_sparse_len, py_prev_sparse_len, "prev_sparse_len"))
+ return NULL;
+ if (sbuf_len < 0)
+ return PyErr_Format(PyExc_ValueError, "negative bufer length");
+ if (!INTEGRAL_ASSIGNMENT_FITS(&buf_len, sbuf_len))
+ return PyErr_Format(PyExc_OverflowError, "buffer length too large");
+
+ const byte * block = buf; // Start of pending block
+ const byte * const end = buf + buf_len;
+ unsigned long long zeros = prev_sparse_len;
+ while (1)
+ {
+ assert(block <= end);
+ if (block == end)
+ return PyLong_FromUnsignedLongLong(zeros);
+
+ if (*block != 0)
+ {
+ // Look for the end of block, i.e. the next sparse run of
+ // at least min_sparse_len zeros, or the end of the
+ // buffer.
+ const byte * const probe = find_non_sparse_end(block + 1, end,
+ min_sparse_len);
+ // Either at end of block, or end of non-sparse; write pending data
+ PyObject *err = append_sparse_region(fd, zeros);
+ if (err != NULL)
+ return err;
+ int rc = write_all(fd, block, probe - block);
+ if (rc)
+ return PyErr_SetFromErrno(PyExc_IOError);
+
+ if (end - probe < min_sparse_len)
+ zeros = end - probe;
+ else
+ zeros = min_sparse_len;
+ block = probe + zeros;
+ }
+ else // *block == 0
+ {
+ // Should be in the first loop iteration, a sparse run of
+ // zeros, or nearly at the end of the block (within
+ // min_sparse_len).
+ const byte * const zeros_end = find_not_zero(block, end);
+ PyObject *err = record_sparse_zeros(&zeros, fd,
+ zeros, zeros_end - block);
+ if (err != NULL)
+ return err;
+ assert(block <= zeros_end);
+ block = zeros_end;
+ }
+ }
+}
+