]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/_helpers.c
INTEGER_TO_PY(): include necessary compiler pgramas
[bup.git] / lib / bup / _helpers.c
index 30c627fa32c3114b4ed51c48807577afca11d9a3..ba8cd6035b6e8ecbf70d66b0d4175a9c3c58a068 100644 (file)
@@ -7,14 +7,17 @@
 //   http://docs.python.org/2/c-api/intro.html#include-files
 #include <Python.h>
 
+#include <arpa/inet.h>
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <arpa/inet.h>
+#include <grp.h>
+#include <pwd.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <string.h>
 
 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
@@ -28,6 +31,9 @@
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
 
 #ifdef HAVE_LINUX_FS_H
 #include <linux/fs.h>
 #include <time.h>
 #endif
 
+#if defined(BUP_RL_EXPECTED_XOPEN_SOURCE) \
+    && (!defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < BUP_RL_EXPECTED_XOPEN_SOURCE)
+# warning "_XOPEN_SOURCE version is incorrect for readline"
+#endif
+
+#ifdef BUP_HAVE_READLINE
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wstrict-prototypes"
+# ifdef BUP_READLINE_INCLUDES_IN_SUBDIR
+#   include <readline/readline.h>
+#   include <readline/history.h>
+# else
+#   include <readline.h>
+#   include <history.h>
+# endif
+# pragma GCC diagnostic pop
+#endif
+
 #include "bupsplit.h"
 
 #if defined(FS_IOC_GETFLAGS) && defined(FS_IOC_SETFLAGS)
 
 typedef unsigned char byte;
 
-static int istty2 = 0;
+
+typedef struct {
+    int istty2;
+} state_t;
+
+// cstr_argf: for byte vectors without null characters (e.g. paths)
+// rbuf_argf: for read-only byte vectors
+// wbuf_argf: for mutable byte vectors
+
+#if PY_MAJOR_VERSION < 3
+static state_t state;
+#  define get_state(x) (&state)
+#  define cstr_argf "s"
+#  define rbuf_argf "s#"
+#  define wbuf_argf "s*"
+#else
+#  define get_state(x) ((state_t *) PyModule_GetState(x))
+#  define cstr_argf "y"
+#  define rbuf_argf "y#"
+#  define wbuf_argf "y*"
+#endif // PY_MAJOR_VERSION >= 3
+
+
+static void *checked_calloc(size_t n, size_t size)
+{
+    void *result = calloc(n, size);
+    if (!result)
+        PyErr_NoMemory();
+    return result;
+}
+
+#ifndef BUP_HAVE_BUILTIN_MUL_OVERFLOW
+
+#define checked_malloc checked_calloc
+
+#else // defined BUP_HAVE_BUILTIN_MUL_OVERFLOW
+
+static void *checked_malloc(size_t n, size_t size)
+{
+    size_t total;
+    if (__builtin_mul_overflow(n, size, &total))
+    {
+        PyErr_Format(PyExc_OverflowError,
+                     "request to allocate %zu items of size %zu is too large",
+                     n, size);
+        return NULL;
+    }
+    void *result = malloc(total);
+    if (!result)
+        return PyErr_NoMemory();
+    return result;
+}
+
+#endif // defined BUP_HAVE_BUILTIN_MUL_OVERFLOW
 
 
 #ifndef htonll
@@ -80,20 +157,38 @@ static uint64_t htonll(uint64_t value)
 #endif
 
 
+// Disabling sign-compare here should be fine since we're explicitly
+// checking for a sign mismatch, i.e. if the signs don't match, then
+// it doesn't matter what the value comparison says.
+// FIXME: ... so should we reverse the order?
 #define INTEGRAL_ASSIGNMENT_FITS(dest, src)                             \
     ({                                                                  \
+        _Pragma("GCC diagnostic push");                                 \
+        _Pragma("GCC diagnostic ignored \"-Wsign-compare\"");           \
+        _Pragma("clang diagnostic push");                               \
+        _Pragma("clang diagnostic ignored \"-Wshorten-64-to-32\"");     \
         *(dest) = (src);                                                \
-        *(dest) == (src) && (*(dest) < 1) == ((src) < 1);               \
+        int result = *(dest) == (src) && (*(dest) < 1) == ((src) < 1);  \
+        _Pragma("clang diagnostic pop");                                \
+        _Pragma("GCC diagnostic pop");                                  \
+        result;                                                         \
     })
 
 
-// At the moment any code that calls INTGER_TO_PY() will have to
-// disable -Wtautological-compare for clang.  See below.
-
-#define INTEGER_TO_PY(x) \
-    (((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x))
+#define INTEGER_TO_PY(x)                                                \
+    ({                                                                  \
+        _Pragma("GCC diagnostic push");                                 \
+        _Pragma("GCC diagnostic ignored \"-Wtype-limits\"");   \
+        _Pragma("clang diagnostic push");                               \
+        _Pragma("clang diagnostic ignored \"-Wtautological-compare\""); \
+        PyObject *result = ((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x); \
+        _Pragma("clang diagnostic pop");                                \
+        _Pragma("GCC diagnostic pop");                                  \
+        result;                                                         \
+    })
 
 
+#if PY_MAJOR_VERSION < 3
 static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
                                 const char *name)
 {
@@ -114,12 +209,15 @@ static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
     *x = tmp;
     return 1;
 }
+#endif
 
 
 static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name)
 {
+#if PY_MAJOR_VERSION < 3
     if (PyInt_Check(py))
         return bup_ulong_from_pyint(x, py, name);
+#endif
 
     if (!PyLong_Check(py))
     {
@@ -151,13 +249,14 @@ static int bup_uint_from_py(unsigned int *x, PyObject *py, const char *name)
         PyErr_Format(PyExc_OverflowError, "%s too big for unsigned int", name);
         return 0;
     }
-    *x = tmp;
+    *x = (unsigned int) tmp;
     return 1;
 }
 
 static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
                               const char *name)
 {
+#if PY_MAJOR_VERSION < 3
     if (PyInt_Check(py))
     {
         unsigned long tmp;
@@ -168,6 +267,7 @@ static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
         }
         return 0;
     }
+#endif
 
     if (!PyLong_Check(py))
     {
@@ -188,6 +288,70 @@ static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py,
 }
 
 
+static PyObject *bup_bytescmp(PyObject *self, PyObject *args)
+{
+    PyObject *py_s1, *py_s2;  // This is really a PyBytes/PyString
+    if (!PyArg_ParseTuple(args, "SS", &py_s1, &py_s2))
+       return NULL;
+    char *s1, *s2;
+    Py_ssize_t s1_len, s2_len;
+    if (PyBytes_AsStringAndSize(py_s1, &s1, &s1_len) == -1)
+        return NULL;
+    if (PyBytes_AsStringAndSize(py_s2, &s2, &s2_len) == -1)
+        return NULL;
+    const Py_ssize_t n = (s1_len < s2_len) ? s1_len : s2_len;
+    const int cmp = memcmp(s1, s2, n);
+    if (cmp != 0)
+        return PyLong_FromLong(cmp);
+    if (s1_len == s2_len)
+        return PyLong_FromLong(0);;
+    return PyLong_FromLong((s1_len < s2_len) ? -1 : 1);
+}
+
+
+static PyObject *bup_cat_bytes(PyObject *self, PyObject *args)
+{
+    unsigned char *bufx = NULL, *bufy = NULL;
+    Py_ssize_t bufx_len, bufx_ofs, bufx_n;
+    Py_ssize_t bufy_len, bufy_ofs, bufy_n;
+    if (!PyArg_ParseTuple(args,
+                          rbuf_argf "nn"
+                          rbuf_argf "nn",
+                          &bufx, &bufx_len, &bufx_ofs, &bufx_n,
+                          &bufy, &bufy_len, &bufy_ofs, &bufy_n))
+       return NULL;
+    if (bufx_ofs < 0)
+        return PyErr_Format(PyExc_ValueError, "negative x offset");
+    if (bufx_n < 0)
+        return PyErr_Format(PyExc_ValueError, "negative x extent");
+    if (bufx_ofs > bufx_len)
+        return PyErr_Format(PyExc_ValueError, "x offset greater than length");
+    if (bufx_n > bufx_len - bufx_ofs)
+        return PyErr_Format(PyExc_ValueError, "x extent past end of buffer");
+
+    if (bufy_ofs < 0)
+        return PyErr_Format(PyExc_ValueError, "negative y offset");
+    if (bufy_n < 0)
+        return PyErr_Format(PyExc_ValueError, "negative y extent");
+    if (bufy_ofs > bufy_len)
+        return PyErr_Format(PyExc_ValueError, "y offset greater than length");
+    if (bufy_n > bufy_len - bufy_ofs)
+        return PyErr_Format(PyExc_ValueError, "y extent past end of buffer");
+
+    if (bufy_n > PY_SSIZE_T_MAX - bufx_n)
+        return PyErr_Format(PyExc_OverflowError, "result length too long");
+
+    PyObject *result = PyBytes_FromStringAndSize(NULL, bufx_n + bufy_n);
+    if (!result)
+        return PyErr_NoMemory();
+    char *buf = PyBytes_AS_STRING(result);
+    memcpy(buf, bufx + bufx_ofs, bufx_n);
+    memcpy(buf + bufx_n, bufy + bufy_ofs, bufy_n);
+    return result;
+}
+
+
+
 // Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
 #if __WIN32__ || __CYGWIN__
 
@@ -301,8 +465,7 @@ static PyObject *record_sparse_zeros(unsigned long long *new_pending,
 }
 
 
-static const byte * find_not_zero(const byte * const start,
-                                  const byte * const end)
+static byte* find_not_zero(const byte * const start, const byte * const end)
 {
     // Return a pointer to first non-zero byte between start and end,
     // or end if there isn't one.
@@ -310,37 +473,37 @@ static const byte * find_not_zero(const byte * const start,
     const unsigned char *cur = start;
     while (cur < end && *cur == 0)
         cur++;
-    return cur;
+    return (byte *) cur;
 }
 
 
-static const byte * const find_trailing_zeros(const byte * const start,
-                                              const byte * const end)
+static byte* find_trailing_zeros(const byte * const start,
+                                 const byte * const end)
 {
     // Return a pointer to the start of any trailing run of zeros, or
     // end if there isn't one.
     assert(start <= end);
     if (start == end)
-        return end;
+        return (byte *) end;
     const byte * cur = end;
     while (cur > start && *--cur == 0) {}
     if (*cur == 0)
-        return cur;
+        return (byte *) cur;
     else
-        return cur + 1;
+        return (byte *) (cur + 1);
 }
 
 
-static const byte *find_non_sparse_end(const byte * const start,
-                                       const byte * const end,
-                                       const unsigned long long min_len)
+static byte *find_non_sparse_end(const byte * const start,
+                                 const byte * const end,
+                                 const ptrdiff_t min_len)
 {
     // Return the first pointer to a min_len sparse block in [start,
     // end) if there is one, otherwise a pointer to the start of any
     // trailing run of zeros.  If there are no trailing zeros, return
     // end.
     if (start == end)
-        return end;
+        return (byte *) end;
     assert(start < end);
     assert(min_len);
     // Probe in min_len jumps, searching backward from the jump
@@ -361,7 +524,7 @@ static const byte *find_non_sparse_end(const byte * const start,
             assert(candidate >= start);
             assert(candidate <= end);
             assert(*candidate == 0);
-            return candidate;
+            return (byte *) candidate;
         }
         else
         {
@@ -371,7 +534,7 @@ static const byte *find_non_sparse_end(const byte * const start,
     }
 
     if (candidate == end)
-        return end;
+        return (byte *) end;
 
     // No min_len sparse run found, search backward from end
     const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros,
@@ -383,20 +546,20 @@ static const byte *find_non_sparse_end(const byte * const start,
         assert(candidate < end);
         assert(*candidate == 0);
         assert(end - candidate < min_len);
-        return candidate;
+        return (byte *) candidate;
     }
 
     if (trailing_zeros == end)
     {
         assert(*(end - 1) != 0);
-        return end;
+        return (byte *) end;
     }
 
     assert(end - trailing_zeros < min_len);
     assert(trailing_zeros >= start);
     assert(trailing_zeros < end);
     assert(*trailing_zeros == 0);
-    return trailing_zeros;
+    return (byte *) trailing_zeros;
 }
 
 
@@ -406,13 +569,16 @@ static PyObject *bup_write_sparsely(PyObject *self, PyObject *args)
     unsigned char *buf = NULL;
     Py_ssize_t sbuf_len;
     PyObject *py_min_sparse_len, *py_prev_sparse_len;
-    if (!PyArg_ParseTuple(args, "it#OO",
+    if (!PyArg_ParseTuple(args, "i" rbuf_argf "OO",
                           &fd, &buf, &sbuf_len,
                           &py_min_sparse_len, &py_prev_sparse_len))
        return NULL;
-    unsigned long long min_sparse_len, prev_sparse_len, buf_len;
-    if (!bup_ullong_from_py(&min_sparse_len, py_min_sparse_len, "min_sparse_len"))
+    ptrdiff_t min_sparse_len;
+    unsigned long long prev_sparse_len, buf_len, ul_min_sparse_len;
+    if (!bup_ullong_from_py(&ul_min_sparse_len, py_min_sparse_len, "min_sparse_len"))
         return NULL;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&min_sparse_len, ul_min_sparse_len))
+        return PyErr_Format(PyExc_OverflowError, "min_sparse_len too large");
     if (!bup_ullong_from_py(&prev_sparse_len, py_prev_sparse_len, "prev_sparse_len"))
         return NULL;
     if (sbuf_len < 0)
@@ -486,14 +652,29 @@ static PyObject *blobbits(PyObject *self, PyObject *args)
 
 static PyObject *splitbuf(PyObject *self, PyObject *args)
 {
-    unsigned char *buf = NULL;
-    Py_ssize_t len = 0;
+    // We stick to buffers in python 2 because they appear to be
+    // substantially smaller than memoryviews, and because
+    // zlib.compress() in python 2 can't accept a memoryview
+    // (cf. hashsplit.py).
     int out = 0, bits = -1;
-
-    if (!PyArg_ParseTuple(args, "t#", &buf, &len))
-       return NULL;
-    assert(len <= INT_MAX);
-    out = bupsplit_find_ofs(buf, len, &bits);
+    if (PY_MAJOR_VERSION > 2)
+    {
+        Py_buffer buf;
+        if (!PyArg_ParseTuple(args, "y*", &buf))
+            return NULL;
+        assert(buf.len <= INT_MAX);
+        out = bupsplit_find_ofs(buf.buf, buf.len, &bits);
+        PyBuffer_Release(&buf);
+    }
+    else
+    {
+        unsigned char *buf = NULL;
+        Py_ssize_t len = 0;
+        if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+            return NULL;
+        assert(len <= INT_MAX);
+        out = bupsplit_find_ofs(buf, (int) len, &bits);
+    }
     if (out) assert(bits >= BUP_BLOBBITS);
     return Py_BuildValue("ii", out, bits);
 }
@@ -506,7 +687,7 @@ static PyObject *bitmatch(PyObject *self, PyObject *args)
     Py_ssize_t byte;
     int bit;
 
-    if (!PyArg_ParseTuple(args, "t#t#", &buf1, &len1, &buf2, &len2))
+    if (!PyArg_ParseTuple(args, rbuf_argf rbuf_argf, &buf1, &len1, &buf2, &len2))
        return NULL;
     
     bit = 0;
@@ -533,7 +714,7 @@ static PyObject *firstword(PyObject *self, PyObject *args)
     Py_ssize_t len = 0;
     uint32_t v;
 
-    if (!PyArg_ParseTuple(args, "t#", &buf, &len))
+    if (!PyArg_ParseTuple(args, rbuf_argf, &buf, &len))
        return NULL;
     
     if (len < 4)
@@ -602,72 +783,95 @@ BLOOM_GET_BIT(bloom_get_bit5, to_bloom_address_bitmask5, uint32_t)
 
 static PyObject *bloom_add(PyObject *self, PyObject *args)
 {
-    unsigned char *sha = NULL, *bloom = NULL;
-    unsigned char *end;
-    Py_ssize_t len = 0, blen = 0;
+    Py_buffer bloom, sha;
     int nbits = 0, k = 0;
+    if (!PyArg_ParseTuple(args, wbuf_argf wbuf_argf "ii",
+                          &bloom, &sha, &nbits, &k))
+        return NULL;
 
-    if (!PyArg_ParseTuple(args, "w#s#ii", &bloom, &blen, &sha, &len, &nbits, &k))
-       return NULL;
+    PyObject *result = NULL;
 
-    if (blen < 16+(1<<nbits) || len % 20 != 0)
-       return NULL;
+    if (bloom.len < 16+(1<<nbits) || sha.len % 20 != 0)
+        goto clean_and_return;
 
     if (k == 5)
     {
-       if (nbits > 29)
-           return NULL;
-       for (end = sha + len; sha < end; sha += 20/k)
-           bloom_set_bit5(bloom, sha, nbits);
+        if (nbits > 29)
+            goto clean_and_return;
+        unsigned char *cur = sha.buf;
+        unsigned char *end;
+        for (end = cur + sha.len; cur < end; cur += 20/k)
+            bloom_set_bit5(bloom.buf, cur, nbits);
     }
     else if (k == 4)
     {
-       if (nbits > 37)
-           return NULL;
-       for (end = sha + len; sha < end; sha += 20/k)
-           bloom_set_bit4(bloom, sha, nbits);
+        if (nbits > 37)
+            goto clean_and_return;
+        unsigned char *cur = sha.buf;
+        unsigned char *end = cur + sha.len;
+        for (; cur < end; cur += 20/k)
+            bloom_set_bit4(bloom.buf, cur, nbits);
     }
     else
-       return NULL;
+        goto clean_and_return;
 
+    result = Py_BuildValue("n", sha.len / 20);
 
-    return Py_BuildValue("n", len/20);
+ clean_and_return:
+    PyBuffer_Release(&bloom);
+    PyBuffer_Release(&sha);
+    return result;
 }
 
 static PyObject *bloom_contains(PyObject *self, PyObject *args)
 {
-    unsigned char *sha = NULL, *bloom = NULL;
-    Py_ssize_t len = 0, blen = 0;
+    Py_buffer bloom;
+    unsigned char *sha = NULL;
+    Py_ssize_t len = 0;
     int nbits = 0, k = 0;
-    unsigned char *end;
-    int steps;
+    if (!PyArg_ParseTuple(args, wbuf_argf rbuf_argf "ii",
+                          &bloom, &sha, &len, &nbits, &k))
+        return NULL;
 
-    if (!PyArg_ParseTuple(args, "t#s#ii", &bloom, &blen, &sha, &len, &nbits, &k))
-       return NULL;
+    PyObject *result = NULL;
 
     if (len != 20)
-       return NULL;
+        goto clean_and_return;
 
     if (k == 5)
     {
-       if (nbits > 29)
-           return NULL;
-       for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
-           if (!bloom_get_bit5(bloom, sha, nbits))
-               return Py_BuildValue("Oi", Py_None, steps);
+        if (nbits > 29)
+            goto clean_and_return;
+        int steps;
+        unsigned char *end;
+        for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
+            if (!bloom_get_bit5(bloom.buf, sha, nbits))
+            {
+                result = Py_BuildValue("Oi", Py_None, steps);
+                goto clean_and_return;
+            }
     }
     else if (k == 4)
     {
-       if (nbits > 37)
-           return NULL;
-       for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
-           if (!bloom_get_bit4(bloom, sha, nbits))
-               return Py_BuildValue("Oi", Py_None, steps);
+        if (nbits > 37)
+            goto clean_and_return;
+        int steps;
+        unsigned char *end;
+        for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
+            if (!bloom_get_bit4(bloom.buf, sha, nbits))
+            {
+                result = Py_BuildValue("Oi", Py_None, steps);
+                goto clean_and_return;
+            }
     }
     else
-       return NULL;
+        goto clean_and_return;
+
+    result = Py_BuildValue("ii", 1, k);
 
-    return Py_BuildValue("ii", 1, k);
+ clean_and_return:
+    PyBuffer_Release(&bloom);
+    return result;
 }
 
 
@@ -688,7 +892,7 @@ static PyObject *extract_bits(PyObject *self, PyObject *args)
     Py_ssize_t len = 0;
     int nbits = 0;
 
-    if (!PyArg_ParseTuple(args, "t#i", &buf, &len, &nbits))
+    if (!PyArg_ParseTuple(args, rbuf_argf "i", &buf, &len, &nbits))
        return NULL;
     
     if (len < 4)
@@ -702,6 +906,11 @@ struct sha {
     unsigned char bytes[20];
 };
 
+static inline int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
+{
+    return memcmp(sha1->bytes, sha2->bytes, sizeof(sha1->bytes));
+}
+
 
 struct idx {
     unsigned char *map;
@@ -712,21 +921,11 @@ struct idx {
     int name_base;
 };
 
-
-static int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
-{
-    int i;
-    for (i = 0; i < sizeof(struct sha); i++)
-       if (sha1->bytes[i] != sha2->bytes[i])
-           return sha1->bytes[i] - sha2->bytes[i];
-    return 0;
-}
-
-
-static void _fix_idx_order(struct idx **idxs, int *last_i)
+static void _fix_idx_order(struct idx **idxs, Py_ssize_t *last_i)
 {
     struct idx *idx;
-    int low, mid, high, c = 0;
+    Py_ssize_t low, mid, high;
+    int c = 0;
 
     idx = idxs[*last_i];
     if (idxs[*last_i]->cur >= idxs[*last_i]->end)
@@ -773,36 +972,51 @@ static uint32_t _get_idx_i(struct idx *idx)
 
 static PyObject *merge_into(PyObject *self, PyObject *args)
 {
-    PyObject *py_total, *ilist = NULL;
-    unsigned char *fmap = NULL;
     struct sha *sha_ptr, *sha_start = NULL;
     uint32_t *table_ptr, *name_ptr, *name_start;
-    struct idx **idxs = NULL;
-    Py_ssize_t flen = 0;
-    int bits = 0, i;
+    int i;
     unsigned int total;
     uint32_t count, prefix;
-    int num_i;
-    int last_i;
 
-    if (!PyArg_ParseTuple(args, "w#iOO",
-                          &fmap, &flen, &bits, &py_total, &ilist))
+
+    Py_buffer fmap;
+    int bits;;
+    PyObject *py_total, *ilist = NULL;
+    if (!PyArg_ParseTuple(args, wbuf_argf "iOO",
+                          &fmap, &bits, &py_total, &ilist))
        return NULL;
 
+    PyObject *result = NULL;
+    struct idx **idxs = NULL;
+    Py_ssize_t num_i = 0;
+    int *idx_buf_init = NULL;
+    Py_buffer *idx_buf = NULL;
+
     if (!bup_uint_from_py(&total, py_total, "total"))
-        return NULL;
+        goto clean_and_return;
 
     num_i = PyList_Size(ilist);
-    idxs = (struct idx **)PyMem_Malloc(num_i * sizeof(struct idx *));
+
+    if (!(idxs = checked_malloc(num_i, sizeof(struct idx *))))
+        goto clean_and_return;
+    if (!(idx_buf_init = checked_calloc(num_i, sizeof(int))))
+        goto clean_and_return;
+    if (!(idx_buf = checked_malloc(num_i, sizeof(Py_buffer))))
+        goto clean_and_return;
 
     for (i = 0; i < num_i; i++)
     {
        long len, sha_ofs, name_map_ofs;
-       idxs[i] = (struct idx *)PyMem_Malloc(sizeof(struct idx));
+       if (!(idxs[i] = checked_malloc(1, sizeof(struct idx))))
+            goto clean_and_return;
        PyObject *itup = PyList_GetItem(ilist, i);
-       if (!PyArg_ParseTuple(itup, "t#llli", &idxs[i]->map, &idxs[i]->bytes,
-                   &len, &sha_ofs, &name_map_ofs, &idxs[i]->name_base))
+       if (!PyArg_ParseTuple(itup, wbuf_argf "llli",
+                              &(idx_buf[i]), &len, &sha_ofs, &name_map_ofs,
+                              &idxs[i]->name_base))
            return NULL;
+        idx_buf_init[i] = 1;
+        idxs[i]->map = idx_buf[i].buf;
+        idxs[i]->bytes = idx_buf[i].len;
        idxs[i]->cur = (struct sha *)&idxs[i]->map[sha_ofs];
        idxs[i]->end = &idxs[i]->cur[len];
        if (name_map_ofs)
@@ -810,18 +1024,18 @@ static PyObject *merge_into(PyObject *self, PyObject *args)
        else
            idxs[i]->cur_name = NULL;
     }
-    table_ptr = (uint32_t *)&fmap[MIDX4_HEADERLEN];
+    table_ptr = (uint32_t *) &((unsigned char *) fmap.buf)[MIDX4_HEADERLEN];
     sha_start = sha_ptr = (struct sha *)&table_ptr[1<<bits];
     name_start = name_ptr = (uint32_t *)&sha_ptr[total];
 
-    last_i = num_i-1;
+    Py_ssize_t last_i = num_i - 1;
     count = 0;
     prefix = 0;
     while (last_i >= 0)
     {
        struct idx *idx;
        uint32_t new_prefix;
-       if (count % 102424 == 0 && istty2)
+       if (count % 102424 == 0 && get_state(self)->istty2)
            fprintf(stderr, "midx: writing %.2f%% (%d/%d)\r",
                    count*100.0/total, count, total);
        idx = idxs[last_i];
@@ -836,15 +1050,32 @@ static PyObject *merge_into(PyObject *self, PyObject *args)
        _fix_idx_order(idxs, &last_i);
        ++count;
     }
-    while (prefix < (1<<bits))
+    while (prefix < ((uint32_t) 1 << bits))
        table_ptr[prefix++] = htonl(count);
     assert(count == total);
-    assert(prefix == (1<<bits));
+    assert(prefix == ((uint32_t) 1 << bits));
     assert(sha_ptr == sha_start+count);
     assert(name_ptr == name_start+count);
 
-    PyMem_Free(idxs);
-    return PyLong_FromUnsignedLong(count);
+    result = PyLong_FromUnsignedLong(count);
+
+ clean_and_return:
+    if (idx_buf_init)
+    {
+        for (i = 0; i < num_i; i++)
+            if (idx_buf_init[i])
+                PyBuffer_Release(&(idx_buf[i]));
+        free(idx_buf_init);
+        free(idx_buf);
+    }
+    if (idxs)
+    {
+        for (i = 0; i < num_i; i++)
+            free(idxs[i]);
+        free(idxs);
+    }
+    PyBuffer_Release(&fmap);
+    return result;
 }
 
 #define FAN_ENTRIES 256
@@ -854,8 +1085,6 @@ static PyObject *write_idx(PyObject *self, PyObject *args)
     char *filename = NULL;
     PyObject *py_total, *idx = NULL;
     PyObject *part;
-    unsigned char *fmap = NULL;
-    Py_ssize_t flen = 0;
     unsigned int total = 0;
     uint32_t count;
     int i, j, ofs64_count;
@@ -863,21 +1092,27 @@ static PyObject *write_idx(PyObject *self, PyObject *args)
     uint64_t *ofs64_ptr;
     struct sha *sha_ptr;
 
-    if (!PyArg_ParseTuple(args, "sw#OO",
-                          &filename, &fmap, &flen, &idx, &py_total))
+    Py_buffer fmap;
+    if (!PyArg_ParseTuple(args, cstr_argf wbuf_argf "OO",
+                          &filename, &fmap, &idx, &py_total))
        return NULL;
 
+    PyObject *result = NULL;
+
     if (!bup_uint_from_py(&total, py_total, "total"))
-        return NULL;
+        goto clean_and_return;
 
     if (PyList_Size (idx) != FAN_ENTRIES) // Check for list of the right length.
-        return PyErr_Format (PyExc_TypeError, "idx must contain %d entries",
-                             FAN_ENTRIES);
+    {
+        result = PyErr_Format (PyExc_TypeError, "idx must contain %d entries",
+                               FAN_ENTRIES);
+        goto clean_and_return;
+    }
 
     const char idx_header[] = "\377tOc\0\0\0\002";
-    memcpy (fmap, idx_header, sizeof(idx_header) - 1);
+    memcpy (fmap.buf, idx_header, sizeof(idx_header) - 1);
 
-    fan_ptr = (uint32_t *)&fmap[sizeof(idx_header) - 1];
+    fan_ptr = (uint32_t *)&((unsigned char *)fmap.buf)[sizeof(idx_header) - 1];
     sha_ptr = (struct sha *)&fan_ptr[FAN_ENTRIES];
     crc_ptr = (uint32_t *)&sha_ptr[total];
     ofs_ptr = (uint32_t *)&crc_ptr[total];
@@ -887,13 +1122,17 @@ static PyObject *write_idx(PyObject *self, PyObject *args)
     ofs64_count = 0;
     for (i = 0; i < FAN_ENTRIES; ++i)
     {
-       int plen;
+       Py_ssize_t plen;
        part = PyList_GET_ITEM(idx, i);
        PyList_Sort(part);
        plen = PyList_GET_SIZE(part);
-       count += plen;
+        if (plen > UINT32_MAX || UINT32_MAX - count < plen) {
+            PyErr_Format(PyExc_OverflowError, "too many objects in index part");
+            goto clean_and_return;
+        }
+        count += (uint32_t) plen;
        *fan_ptr++ = htonl(count);
-       for (j = 0; j < plen; ++j)
+        for (j = 0; j < plen; ++j)
        {
            unsigned char *sha = NULL;
            Py_ssize_t sha_len = 0;
@@ -901,18 +1140,18 @@ static PyObject *write_idx(PyObject *self, PyObject *args)
            unsigned int crc;
             unsigned PY_LONG_LONG ofs_ull;
            uint64_t ofs;
-           if (!PyArg_ParseTuple(PyList_GET_ITEM(part, j), "t#OO",
+           if (!PyArg_ParseTuple(PyList_GET_ITEM(part, j), rbuf_argf "OO",
                                  &sha, &sha_len, &crc_py, &ofs_py))
-               return NULL;
+                goto clean_and_return;
             if(!bup_uint_from_py(&crc, crc_py, "crc"))
-                return NULL;
+                goto clean_and_return;
             if(!bup_ullong_from_py(&ofs_ull, ofs_py, "ofs"))
-                return NULL;
+                goto clean_and_return;
             assert(crc <= UINT32_MAX);
             assert(ofs_ull <= UINT64_MAX);
            ofs = ofs_ull;
            if (sha_len != sizeof(struct sha))
-               return NULL;
+                goto clean_and_return;
            memcpy(sha_ptr++, sha, sizeof(struct sha));
            *crc_ptr++ = htonl(crc);
            if (ofs > 0x7fffffff)
@@ -924,11 +1163,18 @@ static PyObject *write_idx(PyObject *self, PyObject *args)
        }
     }
 
-    int rc = msync(fmap, flen, MS_ASYNC);
+    int rc = msync(fmap.buf, fmap.len, MS_ASYNC);
     if (rc != 0)
-       return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+    {
+        result = PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+        goto clean_and_return;
+    }
+
+    result = PyLong_FromUnsignedLong(count);
 
-    return PyLong_FromUnsignedLong(count);
+ clean_and_return:
+    PyBuffer_Release(&fmap);
+    return result;
 }
 
 
@@ -954,7 +1200,7 @@ static PyObject *write_random(PyObject *self, PyObject *args)
     {
        unsigned i;
        for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
-           buf[i] = random();
+           buf[i] = (uint32_t) random();
        ret = write(fd, buf, sizeof(buf));
        if (ret < 0)
            ret = 0;
@@ -970,7 +1216,7 @@ static PyObject *write_random(PyObject *self, PyObject *args)
     {
        unsigned i;
        for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
-           buf[i] = random();
+           buf[i] = (uint32_t) random();
        ret = write(fd, buf, len % 1024);
        if (ret < 0)
            ret = 0;
@@ -992,7 +1238,7 @@ static PyObject *random_sha(PyObject *self, PyObject *args)
     if (!seeded)
     {
        assert(sizeof(shabuf) == 20);
-       srandom(time(NULL));
+       srandom((unsigned int) time(NULL));
        seeded = 1;
     }
     
@@ -1001,8 +1247,8 @@ static PyObject *random_sha(PyObject *self, PyObject *args)
     
     memset(shabuf, 0, sizeof(shabuf));
     for (i=0; i < 20/4; i++)
-       shabuf[i] = random();
-    return Py_BuildValue("s#", shabuf, 20);
+       shabuf[i] = (uint32_t) random();
+    return Py_BuildValue(rbuf_argf, shabuf, 20);
 }
 
 
@@ -1038,7 +1284,7 @@ static PyObject *open_noatime(PyObject *self, PyObject *args)
 {
     char *filename = NULL;
     int fd;
-    if (!PyArg_ParseTuple(args, "s", &filename))
+    if (!PyArg_ParseTuple(args, cstr_argf, &filename))
        return NULL;
     fd = _open_noatime(filename, 0);
     if (fd < 0)
@@ -1085,7 +1331,7 @@ static PyObject *bup_get_linux_file_attr(PyObject *self, PyObject *args)
     char *path;
     int fd;
 
-    if (!PyArg_ParseTuple(args, "s", &path))
+    if (!PyArg_ParseTuple(args, cstr_argf, &path))
         return NULL;
 
     fd = _open_noatime(path, O_NONBLOCK);
@@ -1117,7 +1363,7 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args)
     PyObject *py_attr;
     int fd;
 
-    if (!PyArg_ParseTuple(args, "sO", &path, &py_attr))
+    if (!PyArg_ParseTuple(args, cstr_argf "O", &path, &py_attr))
         return NULL;
 
     if (!bup_uint_from_py(&attr, py_attr, "attr"))
@@ -1138,12 +1384,12 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args)
     // The extents flag can't be removed, so don't (see chattr(1) and chattr.c).
     orig_attr = 0; // Handle int/long mismatch (see above)
     rc = ioctl(fd, FS_IOC_GETFLAGS, &orig_attr);
-    assert(orig_attr <= UINT_MAX);  // Kernel type is actually int
     if (rc == -1)
     {
         close(fd);
         return PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
     }
+    assert(orig_attr <= UINT_MAX);  // Kernel type is actually int
     attr |= ((unsigned int) orig_attr) & FS_EXTENT_FL;
 
     rc = ioctl(fd, FS_IOC_SETFLAGS, &attr);
@@ -1213,7 +1459,7 @@ static PyObject *bup_utimensat(PyObject *self, PyObject *args)
     PyObject *access_py, *modification_py;
     struct timespec ts[2];
 
-    if (!PyArg_ParseTuple(args, "is((Ol)(Ol))i",
+    if (!PyArg_ParseTuple(args, "i" cstr_argf "((Ol)(Ol))i",
                           &fd,
                           &path,
                           &access_py, &(ts[0].tv_nsec),
@@ -1255,7 +1501,7 @@ static int bup_parse_xutimes_args(char **path,
     PyObject *access_py, *modification_py;
     long long access_us, modification_us; // POSIX guarantees tv_usec is signed.
 
-    if (!PyArg_ParseTuple(args, "s((OL)(OL))",
+    if (!PyArg_ParseTuple(args, cstr_argf "((OL)(OL))",
                           path,
                           &access_py, &access_us,
                           &modification_py, &modification_us))
@@ -1336,9 +1582,6 @@ static PyObject *bup_lutimes(PyObject *self, PyObject *args)
 #endif
 
 
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY().
-
 static PyObject *stat_struct_to_py(const struct stat *st,
                                    const char *filename,
                                    int fd)
@@ -1347,7 +1590,7 @@ static PyObject *stat_struct_to_py(const struct stat *st,
     // compile time, but not (easily) the unspecified types, so handle
     // those via INTEGER_TO_PY().  Assumes ns values will fit in a
     // long.
-    return Py_BuildValue("OKOOOOOL(Ol)(Ol)(Ol)",
+    return Py_BuildValue("NKNNNNNL(Nl)(Nl)(Nl)",
                          INTEGER_TO_PY(st->st_mode),
                          (unsigned PY_LONG_LONG) st->st_ino,
                          INTEGER_TO_PY(st->st_dev),
@@ -1364,14 +1607,13 @@ static PyObject *stat_struct_to_py(const struct stat *st,
                          (long) BUP_STAT_CTIME_NS(st));
 }
 
-#pragma clang diagnostic pop  // ignored "-Wtautological-compare"
 
 static PyObject *bup_stat(PyObject *self, PyObject *args)
 {
     int rc;
     char *filename;
 
-    if (!PyArg_ParseTuple(args, "s", &filename))
+    if (!PyArg_ParseTuple(args, cstr_argf, &filename))
         return NULL;
 
     struct stat st;
@@ -1387,7 +1629,7 @@ static PyObject *bup_lstat(PyObject *self, PyObject *args)
     int rc;
     char *filename;
 
-    if (!PyArg_ParseTuple(args, "s", &filename))
+    if (!PyArg_ParseTuple(args, cstr_argf, &filename))
         return NULL;
 
     struct stat st;
@@ -1441,47 +1683,559 @@ static PyObject *bup_localtime(PyObject *self, PyObject *args)
 #ifdef BUP_MINCORE_BUF_TYPE
 static PyObject *bup_mincore(PyObject *self, PyObject *args)
 {
-    const char *src;
-    Py_ssize_t src_ssize;
-    Py_buffer dest;
+    Py_buffer src, dest;
     PyObject *py_src_n, *py_src_off, *py_dest_off;
-    if (!PyArg_ParseTuple(args, "s#OOw*O",
-                          &src, &src_ssize, &py_src_n, &py_src_off,
+
+    if (!PyArg_ParseTuple(args, cstr_argf "*OOw*O",
+                          &src, &py_src_n, &py_src_off,
                           &dest, &py_dest_off))
        return NULL;
 
-    unsigned long long src_size, src_n, src_off, dest_size, dest_off;
+    PyObject *result = NULL;
+
+    unsigned long long src_n, src_off, dest_off;
     if (!(bup_ullong_from_py(&src_n, py_src_n, "src_n")
           && bup_ullong_from_py(&src_off, py_src_off, "src_off")
           && bup_ullong_from_py(&dest_off, py_dest_off, "dest_off")))
-        return NULL;
+        goto clean_and_return;
 
-    if (!INTEGRAL_ASSIGNMENT_FITS(&src_size, src_ssize))
-        return PyErr_Format(PyExc_OverflowError, "invalid src size");
     unsigned long long src_region_end;
+    if (!uadd(&src_region_end, src_off, src_n)) {
+        result = PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large");
+        goto clean_and_return;
+    }
+    assert(src.len >= 0);
+    if (src_region_end > (unsigned long long) src.len) {
+        result = PyErr_Format(PyExc_OverflowError, "region runs off end of src");
+        goto clean_and_return;
+    }
 
-    if (!uadd(&src_region_end, src_off, src_n))
-        return PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large");
-    if (src_region_end > src_size)
-        return PyErr_Format(PyExc_OverflowError, "region runs off end of src");
-
-    if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len))
-        return PyErr_Format(PyExc_OverflowError, "invalid dest size");
-    if (dest_off > dest_size)
-        return PyErr_Format(PyExc_OverflowError, "region runs off end of dest");
+    unsigned long long dest_size;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len)) {
+        result = PyErr_Format(PyExc_OverflowError, "invalid dest size");
+        goto clean_and_return;
+    }
+    if (dest_off > dest_size) {
+        result = PyErr_Format(PyExc_OverflowError, "region runs off end of dest");
+        goto clean_and_return;
+    }
 
     size_t length;
-    if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n))
-        return PyErr_Format(PyExc_OverflowError, "src_n overflows size_t");
-    int rc = mincore((void *)(src + src_off), src_n,
+    if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n)) {
+        result = PyErr_Format(PyExc_OverflowError, "src_n overflows size_t");
+        goto clean_and_return;
+    }
+    int rc = mincore((void *)(src.buf + src_off), length,
                      (BUP_MINCORE_BUF_TYPE *) (dest.buf + dest_off));
-    if (rc != 0)
-        return PyErr_SetFromErrno(PyExc_OSError);
-    return Py_BuildValue("O", Py_None);
+    if (rc != 0) {
+        result = PyErr_SetFromErrno(PyExc_OSError);
+        goto clean_and_return;
+    }
+    result = Py_BuildValue("O", Py_None);
+
+ clean_and_return:
+    PyBuffer_Release(&src);
+    PyBuffer_Release(&dest);
+    return result;
 }
 #endif /* def BUP_MINCORE_BUF_TYPE */
 
 
+static PyObject *tuple_from_cstrs(char **cstrs)
+{
+    // Assumes list is null terminated
+    size_t n = 0;
+    while(cstrs[n] != NULL)
+        n++;
+
+    Py_ssize_t sn;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&sn, n))
+        return PyErr_Format(PyExc_OverflowError, "string array too large");
+
+    PyObject *result = PyTuple_New(sn);
+    Py_ssize_t i = 0;
+    for (i = 0; i < sn; i++)
+    {
+        PyObject *gname = Py_BuildValue(cstr_argf, cstrs[i]);
+        if (gname == NULL)
+        {
+            Py_DECREF(result);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(result, i, gname);
+    }
+    return result;
+}
+
+static PyObject *appropriate_errno_ex(void)
+{
+    switch (errno) {
+    case ENOMEM:
+        return PyErr_NoMemory();
+    case EIO:
+    case EMFILE:
+    case ENFILE:
+        // In 3.3 IOError was merged into OSError.
+        return PyErr_SetFromErrno(PyExc_IOError);
+    default:
+        return PyErr_SetFromErrno(PyExc_OSError);
+    }
+}
+
+
+static PyObject *pwd_struct_to_py(const struct passwd *pwd)
+{
+    // We can check the known (via POSIX) signed and unsigned types at
+    // compile time, but not (easily) the unspecified types, so handle
+    // those via INTEGER_TO_PY().
+    if (pwd == NULL)
+        Py_RETURN_NONE;
+    return Py_BuildValue(cstr_argf cstr_argf "OO"
+                         cstr_argf cstr_argf cstr_argf,
+                         pwd->pw_name,
+                         pwd->pw_passwd,
+                         INTEGER_TO_PY(pwd->pw_uid),
+                         INTEGER_TO_PY(pwd->pw_gid),
+                         pwd->pw_gecos,
+                         pwd->pw_dir,
+                         pwd->pw_shell);
+}
+
+static PyObject *bup_getpwuid(PyObject *self, PyObject *args)
+{
+    unsigned long long py_uid;
+    if (!PyArg_ParseTuple(args, "K", &py_uid))
+       return NULL;
+    uid_t uid;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&uid, py_uid))
+        return PyErr_Format(PyExc_OverflowError, "uid too large for uid_t");
+
+    errno = 0;
+    struct passwd *pwd = getpwuid(uid);
+    if (!pwd && errno)
+        return appropriate_errno_ex();
+    return pwd_struct_to_py(pwd);
+}
+
+static PyObject *bup_getpwnam(PyObject *self, PyObject *args)
+{
+    PyObject *py_name;
+    if (!PyArg_ParseTuple(args, "S", &py_name))
+       return NULL;
+
+    char *name = PyBytes_AS_STRING(py_name);
+    errno = 0;
+    struct passwd *pwd = getpwnam(name);
+    if (!pwd && errno)
+        return appropriate_errno_ex();
+    return pwd_struct_to_py(pwd);
+}
+
+static PyObject *grp_struct_to_py(const struct group *grp)
+{
+    // We can check the known (via POSIX) signed and unsigned types at
+    // compile time, but not (easily) the unspecified types, so handle
+    // those via INTEGER_TO_PY().
+    if (grp == NULL)
+        Py_RETURN_NONE;
+
+    PyObject *members = tuple_from_cstrs(grp->gr_mem);
+    if (members == NULL)
+        return NULL;
+    return Py_BuildValue(cstr_argf cstr_argf "OO",
+                         grp->gr_name,
+                         grp->gr_passwd,
+                         INTEGER_TO_PY(grp->gr_gid),
+                         members);
+}
+
+static PyObject *bup_getgrgid(PyObject *self, PyObject *args)
+{
+    unsigned long long py_gid;
+    if (!PyArg_ParseTuple(args, "K", &py_gid))
+       return NULL;
+    gid_t gid;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&gid, py_gid))
+        return PyErr_Format(PyExc_OverflowError, "gid too large for gid_t");
+
+    errno = 0;
+    struct group *grp = getgrgid(gid);
+    if (!grp && errno)
+        return appropriate_errno_ex();
+    return grp_struct_to_py(grp);
+}
+
+static PyObject *bup_getgrnam(PyObject *self, PyObject *args)
+{
+    PyObject *py_name;
+    if (!PyArg_ParseTuple(args, "S", &py_name))
+       return NULL;
+
+    char *name = PyBytes_AS_STRING(py_name);
+    errno = 0;
+    struct group *grp = getgrnam(name);
+    if (!grp && errno)
+        return appropriate_errno_ex();
+    return grp_struct_to_py(grp);
+}
+
+
+static PyObject *bup_gethostname(PyObject *mod, PyObject *ignore)
+{
+#ifdef HOST_NAME_MAX
+    char buf[HOST_NAME_MAX + 1] = {};
+#else
+    /* 'SUSv2 guarantees that "Host names are limited to 255 bytes".' */
+    char buf[256] = {};
+#endif
+
+    if (gethostname(buf, sizeof(buf) - 1))
+        return PyErr_SetFromErrno(PyExc_IOError);
+    return PyBytes_FromString(buf);
+}
+
+
+#ifdef BUP_HAVE_READLINE
+
+static char *cstr_from_bytes(PyObject *bytes)
+{
+    char *buf;
+    Py_ssize_t length;
+    int rc = PyBytes_AsStringAndSize(bytes, &buf, &length);
+    if (rc == -1)
+        return NULL;
+    char *result = checked_malloc(length, sizeof(char));
+    if (!result)
+        return NULL;
+    memcpy(result, buf, length);
+    return result;
+}
+
+static char **cstrs_from_seq(PyObject *seq)
+{
+    char **result = NULL;
+    seq = PySequence_Fast(seq, "Cannot convert sequence items to C strings");
+    if (!seq)
+        return NULL;
+
+    const Py_ssize_t len = PySequence_Fast_GET_SIZE(seq);
+    if (len > PY_SSIZE_T_MAX - 1) {
+        PyErr_Format(PyExc_OverflowError,
+                     "Sequence length %zd too large for conversion to C array",
+                     len);
+        goto finish;
+    }
+    result = checked_malloc(len + 1, sizeof(char *));
+    if (!result)
+        goto finish;
+    Py_ssize_t i = 0;
+    for (i = 0; i < len; i++)
+    {
+        PyObject *item = PySequence_Fast_GET_ITEM(seq, i);
+        if (!item)
+            goto abandon_result;
+        result[i] = cstr_from_bytes(item);
+        if (!result[i]) {
+            i--;
+            goto abandon_result;
+        }
+    }
+    result[len] = NULL;
+    goto finish;
+
+ abandon_result:
+    if (result) {
+        for (; i > 0; i--)
+            free(result[i]);
+        free(result);
+        result = NULL;
+    }
+ finish:
+    Py_DECREF(seq);
+    return result;
+}
+
+static char* our_word_break_chars = NULL;
+
+static PyObject *
+bup_set_completer_word_break_characters(PyObject *self, PyObject *args)
+{
+    char *bytes;
+    if (!PyArg_ParseTuple(args, cstr_argf, &bytes))
+       return NULL;
+    char *prev = our_word_break_chars;
+    char *next = strdup(bytes);
+    if (!next)
+        return PyErr_NoMemory();
+    our_word_break_chars = next;
+    rl_completer_word_break_characters = next;
+    if (prev)
+        free(prev);
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+bup_get_completer_word_break_characters(PyObject *self, PyObject *args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+       return NULL;
+    return PyBytes_FromString(rl_completer_word_break_characters);
+}
+
+static PyObject *bup_get_line_buffer(PyObject *self, PyObject *args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+       return NULL;
+    return PyBytes_FromString(rl_line_buffer);
+}
+
+static PyObject *
+bup_parse_and_bind(PyObject *self, PyObject *args)
+{
+    char *bytes;
+    if (!PyArg_ParseTuple(args, cstr_argf ":parse_and_bind", &bytes))
+       return NULL;
+    char *tmp = strdup(bytes); // Because it may modify the arg
+    if (!tmp)
+        return PyErr_NoMemory();
+    int rc = rl_parse_and_bind(tmp);
+    free(tmp);
+    if (rc != 0)
+        return PyErr_Format(PyExc_OSError,
+                            "system rl_parse_and_bind failed (%d)", rc);
+    Py_RETURN_NONE;
+}
+
+
+static PyObject *py_on_attempted_completion;
+static char **prev_completions;
+
+static char **on_attempted_completion(const char *text, int start, int end)
+{
+    if (!py_on_attempted_completion)
+        return NULL;
+
+    char **result = NULL;
+    PyObject *py_result = PyObject_CallFunction(py_on_attempted_completion,
+                                                cstr_argf "ii",
+                                                text, start, end);
+    if (!py_result)
+        return NULL;
+    if (py_result != Py_None) {
+        result = cstrs_from_seq(py_result);
+        free(prev_completions);
+        prev_completions = result;
+    }
+    Py_DECREF(py_result);
+    return result;
+}
+
+static PyObject *
+bup_set_attempted_completion_function(PyObject *self, PyObject *args)
+{
+    PyObject *completer;
+    if (!PyArg_ParseTuple(args, "O", &completer))
+       return NULL;
+
+    PyObject *prev = py_on_attempted_completion;
+    if (completer == Py_None)
+    {
+        py_on_attempted_completion = NULL;
+        rl_attempted_completion_function = NULL;
+    } else {
+        py_on_attempted_completion = completer;
+        rl_attempted_completion_function = on_attempted_completion;
+        Py_INCREF(completer);
+    }
+    Py_XDECREF(prev);
+    Py_RETURN_NONE;
+}
+
+
+static PyObject *py_on_completion_entry;
+
+static char *on_completion_entry(const char *text, int state)
+{
+    if (!py_on_completion_entry)
+        return NULL;
+
+    PyObject *py_result = PyObject_CallFunction(py_on_completion_entry,
+                                                cstr_argf "i", text, state);
+    if (!py_result)
+        return NULL;
+    char *result = (py_result == Py_None) ? NULL : cstr_from_bytes(py_result);
+    Py_DECREF(py_result);
+    return result;
+}
+
+static PyObject *
+bup_set_completion_entry_function(PyObject *self, PyObject *args)
+{
+    PyObject *completer;
+    if (!PyArg_ParseTuple(args, "O", &completer))
+       return NULL;
+
+    PyObject *prev = py_on_completion_entry;
+    if (completer == Py_None) {
+        py_on_completion_entry = NULL;
+        rl_completion_entry_function = NULL;
+    } else {
+        py_on_completion_entry = completer;
+        rl_completion_entry_function = on_completion_entry;
+        Py_INCREF(completer);
+    }
+    Py_XDECREF(prev);
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+bup_readline(PyObject *self, PyObject *args)
+{
+    char *prompt;
+    if (!PyArg_ParseTuple(args, cstr_argf, &prompt))
+       return NULL;
+    char *line = readline(prompt);
+    if (!line)
+        return PyErr_Format(PyExc_EOFError, "readline EOF");
+    PyObject *result = PyBytes_FromString(line);
+    free(line);
+    return result;
+}
+
+#endif // defined BUP_HAVE_READLINE
+
+#if defined(HAVE_SYS_ACL_H) && \
+    defined(HAVE_ACL_LIBACL_H) && \
+    defined(HAVE_ACL_EXTENDED_FILE) && \
+    defined(HAVE_ACL_GET_FILE) && \
+    defined(HAVE_ACL_TO_ANY_TEXT) && \
+    defined(HAVE_ACL_FROM_TEXT) && \
+    defined(HAVE_ACL_SET_FILE)
+#define ACL_SUPPORT 1
+#include <sys/acl.h>
+#include <acl/libacl.h>
+
+// Returns
+//   0 for success
+//  -1 for errors, with python exception set
+//  -2 for ignored errors (not supported)
+static int bup_read_acl_to_text(const char *name, acl_type_t type,
+                                char **txt, char **num)
+{
+    acl_t acl;
+
+    acl = acl_get_file(name, type);
+    if (!acl) {
+        if (errno == EOPNOTSUPP || errno == ENOSYS)
+            return -2;
+        PyErr_SetFromErrno(PyExc_IOError);
+        return -1;
+    }
+
+    *num = NULL;
+    *txt = acl_to_any_text(acl, "", '\n', TEXT_ABBREVIATE);
+    if (*txt)
+        *num = acl_to_any_text(acl, "", '\n', TEXT_ABBREVIATE | TEXT_NUMERIC_IDS);
+
+    if (*txt && *num)
+        return 0;
+
+    if (errno == ENOMEM)
+        PyErr_NoMemory();
+    else
+        PyErr_SetFromErrno(PyExc_IOError);
+
+    if (*txt)
+        acl_free((acl_t)*txt);
+    if (*num)
+        acl_free((acl_t)*num);
+
+    return -1;
+}
+
+static PyObject *bup_read_acl(PyObject *self, PyObject *args)
+{
+    char *name;
+    int isdir, rv;
+    PyObject *ret = NULL;
+    char *acl_txt = NULL, *acl_num = NULL;
+
+    if (!PyArg_ParseTuple(args, cstr_argf "i", &name, &isdir))
+       return NULL;
+
+    if (!acl_extended_file(name))
+        Py_RETURN_NONE;
+
+    rv = bup_read_acl_to_text(name, ACL_TYPE_ACCESS, &acl_txt, &acl_num);
+    if (rv)
+        goto out;
+
+    if (isdir) {
+        char *def_txt = NULL, *def_num = NULL;
+
+        rv = bup_read_acl_to_text(name, ACL_TYPE_DEFAULT, &def_txt, &def_num);
+        if (rv)
+            goto out;
+
+        ret = Py_BuildValue("[" cstr_argf cstr_argf cstr_argf cstr_argf "]",
+                            acl_txt, acl_num, def_txt, def_num);
+
+        if (def_txt)
+            acl_free((acl_t)def_txt);
+        if (def_num)
+            acl_free((acl_t)def_num);
+    } else {
+        ret = Py_BuildValue("[" cstr_argf cstr_argf "]",
+                            acl_txt, acl_num);
+    }
+
+out:
+    if (acl_txt)
+        acl_free((acl_t)acl_txt);
+    if (acl_num)
+        acl_free((acl_t)acl_num);
+    if (rv == -2)
+        Py_RETURN_NONE;
+    return ret;
+}
+
+static int bup_apply_acl_string(const char *name, const char *s)
+{
+    acl_t acl = acl_from_text(s);
+    int ret = 0;
+
+    if (!acl) {
+        PyErr_SetFromErrno(PyExc_IOError);
+        return -1;
+    }
+
+    if (acl_set_file(name, ACL_TYPE_ACCESS, acl)) {
+        PyErr_SetFromErrno(PyExc_IOError);
+        ret = -1;
+    }
+
+    acl_free(acl);
+
+    return ret;
+}
+
+static PyObject *bup_apply_acl(PyObject *self, PyObject *args)
+{
+    char *name, *acl, *def = NULL;
+
+    if (!PyArg_ParseTuple(args, cstr_argf cstr_argf "|" cstr_argf, &name, &acl, &def))
+       return NULL;
+
+    if (bup_apply_acl_string(name, acl))
+        return NULL;
+
+    if (def && bup_apply_acl_string(name, def))
+        return NULL;
+
+    Py_RETURN_NONE;
+}
+#endif
+
 static PyMethodDef helper_methods[] = {
     { "write_sparsely", bup_write_sparsely, METH_VARARGS,
       "Write buf excepting zeros at the end. Return trailing zero count." },
@@ -1544,20 +2298,97 @@ static PyMethodDef helper_methods[] = {
     { "localtime", bup_localtime, METH_VARARGS,
       "Return struct_time elements plus the timezone offset and name." },
 #endif
+    { "bytescmp", bup_bytescmp, METH_VARARGS,
+      "Return a negative value if x < y, zero if equal, positive otherwise."},
+    { "cat_bytes", bup_cat_bytes, METH_VARARGS,
+      "For (x_bytes, x_ofs, x_n, y_bytes, y_ofs, y_n) arguments, return their concatenation."},
 #ifdef BUP_MINCORE_BUF_TYPE
     { "mincore", bup_mincore, METH_VARARGS,
       "For mincore(src, src_n, src_off, dest, dest_off)"
       " call the system mincore(src + src_off, src_n, &dest[dest_off])." },
 #endif
+    { "getpwuid", bup_getpwuid, METH_VARARGS,
+      "Return the password database entry for the given numeric user id,"
+      " as a tuple with all C strings as bytes(), or None if the user does"
+      " not exist." },
+    { "getpwnam", bup_getpwnam, METH_VARARGS,
+      "Return the password database entry for the given user name,"
+      " as a tuple with all C strings as bytes(), or None if the user does"
+      " not exist." },
+    { "getgrgid", bup_getgrgid, METH_VARARGS,
+      "Return the group database entry for the given numeric group id,"
+      " as a tuple with all C strings as bytes(), or None if the group does"
+      " not exist." },
+    { "getgrnam", bup_getgrnam, METH_VARARGS,
+      "Return the group database entry for the given group name,"
+      " as a tuple with all C strings as bytes(), or None if the group does"
+      " not exist." },
+    { "gethostname", bup_gethostname, METH_NOARGS,
+      "Return the current hostname (as bytes)" },
+#ifdef BUP_HAVE_READLINE
+    { "set_completion_entry_function", bup_set_completion_entry_function, METH_VARARGS,
+      "Set rl_completion_entry_function.  Called as f(text, state)." },
+    { "set_attempted_completion_function", bup_set_attempted_completion_function, METH_VARARGS,
+      "Set rl_attempted_completion_function.  Called as f(text, start, end)." },
+    { "parse_and_bind", bup_parse_and_bind, METH_VARARGS,
+      "Call rl_parse_and_bind." },
+    { "get_line_buffer", bup_get_line_buffer, METH_NOARGS,
+      "Return rl_line_buffer." },
+    { "get_completer_word_break_characters", bup_get_completer_word_break_characters, METH_NOARGS,
+      "Return rl_completer_word_break_characters." },
+    { "set_completer_word_break_characters", bup_set_completer_word_break_characters, METH_VARARGS,
+      "Set rl_completer_word_break_characters." },
+    { "readline", bup_readline, METH_VARARGS,
+      "Call readline(prompt)." },
+#endif // defined BUP_HAVE_READLINE
+#ifdef ACL_SUPPORT
+    { "read_acl", bup_read_acl, METH_VARARGS,
+      "read_acl(name, isdir)\n\n"
+      "Read ACLs for the given file/dirname and return the correctly encoded"
+      " list [txt, num, def_tx, def_num] (the def_* being empty bytestrings"
+      " unless the second argument 'isdir' is True)." },
+    { "apply_acl", bup_apply_acl, METH_VARARGS,
+      "apply_acl(name, acl, def=None)\n\n"
+      "Given a file/dirname (bytes) and the ACLs to restore, do that." },
+#endif /* HAVE_ACLS */
     { NULL, NULL, 0, NULL },  // sentinel
 };
 
+static void test_integral_assignment_fits(void)
+{
+    assert(sizeof(signed short) == sizeof(unsigned short));
+    assert(sizeof(signed short) < sizeof(signed long long));
+    assert(sizeof(signed short) < sizeof(unsigned long long));
+    assert(sizeof(unsigned short) < sizeof(signed long long));
+    assert(sizeof(unsigned short) < sizeof(unsigned long long));
+    assert(sizeof(Py_ssize_t) <= sizeof(size_t));
+    {
+        signed short ss, ssmin = SHRT_MIN, ssmax = SHRT_MAX;
+        unsigned short us, usmax = USHRT_MAX;
+        signed long long sllmin = LLONG_MIN, sllmax = LLONG_MAX;
+        unsigned long long ullmax = ULLONG_MAX;
+
+        assert(INTEGRAL_ASSIGNMENT_FITS(&ss, ssmax));
+        assert(INTEGRAL_ASSIGNMENT_FITS(&ss, ssmin));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&ss, usmax));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&ss, sllmin));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&ss, sllmax));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&ss, ullmax));
+
+        assert(INTEGRAL_ASSIGNMENT_FITS(&us, usmax));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&us, ssmin));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&us, sllmin));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&us, sllmax));
+        assert(!INTEGRAL_ASSIGNMENT_FITS(&us, ullmax));
+    }
+}
 
-PyMODINIT_FUNC init_helpers(void)
+static int setup_module(PyObject *m)
 {
-    // FIXME: migrate these tests to configure.  Check against the
-    // type we're going to use when passing to python.  Other stat
-    // types are tested at runtime.
+    // FIXME: migrate these tests to configure, or at least don't
+    // possibly crash the whole application.  Check against the type
+    // we're going to use when passing to python.  Other stat types
+    // are tested at runtime.
     assert(sizeof(ino_t) <= sizeof(unsigned PY_LONG_LONG));
     assert(sizeof(off_t) <= sizeof(PY_LONG_LONG));
     assert(sizeof(blksize_t) <= sizeof(PY_LONG_LONG));
@@ -1566,6 +2397,8 @@ PyMODINIT_FUNC init_helpers(void)
     assert(sizeof(PY_LONG_LONG) <= sizeof(long long));
     assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long));
 
+    test_integral_assignment_fits();
+
     // Originally required by append_sparse_region()
     {
         off_t probe;
@@ -1577,12 +2410,6 @@ PyMODINIT_FUNC init_helpers(void)
     }
 
     char *e;
-    PyObject *m = Py_InitModule("_helpers", helper_methods);
-    if (m == NULL)
-        return;
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY().
     {
         PyObject *value;
         value = INTEGER_TO_PY(INT_MAX);
@@ -1614,9 +2441,54 @@ PyMODINIT_FUNC init_helpers(void)
         Py_DECREF(value);
     }
 #endif
-#pragma clang diagnostic pop  // ignored "-Wtautological-compare"
 
     e = getenv("BUP_FORCE_TTY");
-    istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
+    get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
     unpythonize_argv();
+    return 1;
+}
+
+
+#if PY_MAJOR_VERSION < 3
+
+PyMODINIT_FUNC init_helpers(void)
+{
+    PyObject *m = Py_InitModule("_helpers", helper_methods);
+    if (m == NULL)
+        return;
+
+    if (!setup_module(m))
+    {
+        Py_DECREF(m);
+        return;
+    }
 }
+
+# else // PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef helpers_def = {
+    PyModuleDef_HEAD_INIT,
+    "_helpers",
+    NULL,
+    sizeof(state_t),
+    helper_methods,
+    NULL,
+    NULL, // helpers_traverse,
+    NULL, // helpers_clear,
+    NULL
+};
+
+PyMODINIT_FUNC PyInit__helpers(void)
+{
+    PyObject *module = PyModule_Create(&helpers_def);
+    if (module == NULL)
+        return NULL;
+    if (!setup_module(module))
+    {
+        Py_DECREF(module);
+        return NULL;
+    }
+    return module;
+}
+
+#endif // PY_MAJOR_VERSION >= 3