]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/_helpers.c
bitmatch: check for overflow via intprops
[bup.git] / lib / bup / _helpers.c
index d325578fcace65d16bf554d53b0b682c2ce95ea1..b5a3c799489e72d051e4735e2207c75e91196a11 100644 (file)
@@ -5,12 +5,15 @@
 
 // According to Python, its header has to go first:
 //   http://docs.python.org/2/c-api/intro.html#include-files
+//   http://docs.python.org/3/c-api/intro.html#include-files
 #include <Python.h>
 
+#include <arpa/inet.h>
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <arpa/inet.h>
+#include <grp.h>
+#include <pwd.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <time.h>
 #endif
 
+#if defined(BUP_RL_EXPECTED_XOPEN_SOURCE) \
+    && (!defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < BUP_RL_EXPECTED_XOPEN_SOURCE)
+# warning "_XOPEN_SOURCE version is incorrect for readline"
+#endif
+
+#ifdef BUP_HAVE_READLINE
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wstrict-prototypes"
+# ifdef BUP_READLINE_INCLUDES_IN_SUBDIR
+#   include <readline/readline.h>
+#   include <readline/history.h>
+# else
+#   include <readline.h>
+#   include <history.h>
+# endif
+# pragma GCC diagnostic pop
+#endif
+
 #include "bupsplit.h"
+#include "bup/intprops.h"
 
 #if defined(FS_IOC_GETFLAGS) && defined(FS_IOC_SETFLAGS)
 #define BUP_HAVE_FILE_ATTRS 1
 #endif
 
+#if PY_MAJOR_VERSION > 2
+# define BUP_USE_PYTHON_UTIME 1
+#endif
+
+#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now
 /*
  * Check for incomplete UTIMENSAT support (NetBSD 6), and if so,
  * pretend we don't have it.
@@ -57,6 +84,7 @@
 #if !defined(AT_FDCWD) || !defined(AT_SYMLINK_NOFOLLOW)
 #undef HAVE_UTIMENSAT
 #endif
+#endif // defined BUP_USE_PYTHON_UTIME
 
 #ifndef FS_NOCOW_FL
 // Of course, this assumes it's a bitfield value.
@@ -89,13 +117,21 @@ static state_t state;
 #endif // PY_MAJOR_VERSION >= 3
 
 
+static void *checked_calloc(size_t n, size_t size)
+{
+    void *result = calloc(n, size);
+    if (!result)
+        PyErr_NoMemory();
+    return result;
+}
+
 static void *checked_malloc(size_t n, size_t size)
 {
     size_t total;
-    if (__builtin_mul_overflow(n, size, &total))
+    if (!INT_MULTIPLY_OK(n, size, &total))
     {
         PyErr_Format(PyExc_OverflowError,
-                     "request to allocate %lu items of size %lu is too large",
+                     "request to allocate %zu items of size %zu is too large",
                      n, size);
         return NULL;
     }
@@ -105,14 +141,6 @@ static void *checked_malloc(size_t n, size_t size)
     return result;
 }
 
-static void *checked_calloc(size_t n, size_t size)
-{
-    void *result = calloc(n, size);
-    if (!result)
-        PyErr_NoMemory();
-    return result;
-}
-
 
 #ifndef htonll
 // This function should technically be macro'd out if it's going to be used
@@ -128,29 +156,10 @@ static uint64_t htonll(uint64_t value)
 }
 #endif
 
-
-// Disabling sign-compare here should be fine since we're explicitly
-// checking for a sign mismatch, i.e. if the signs don't match, then
-// it doesn't matter what the value comparison says.
-// FIXME: ... so should we reverse the order?
-#define INTEGRAL_ASSIGNMENT_FITS(dest, src)                             \
-    ({                                                                  \
-        _Pragma("GCC diagnostic push");                                 \
-        _Pragma("GCC diagnostic ignored \"-Wsign-compare\"");           \
-        *(dest) = (src);                                                \
-        int result = *(dest) == (src) && (*(dest) < 1) == ((src) < 1);  \
-        _Pragma("GCC diagnostic pop");                                  \
-        result;                                                         \
-    })
-
-
-// At the moment any code that calls INTEGER_TO_PY() will have to
-// disable -Wtautological-compare for clang.  See below.
+#define INTEGRAL_ASSIGNMENT_FITS(dest, src) INT_ADD_OK(src, 0, dest)
 
 #define INTEGER_TO_PY(x) \
-    (((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x))
-
-
+    EXPR_SIGNED(x) ? PyLong_FromLongLong(x) : PyLong_FromUnsignedLongLong(x)
 
 #if PY_MAJOR_VERSION < 3
 static int bup_ulong_from_pyint(unsigned long *x, PyObject *py,
@@ -213,7 +222,7 @@ static int bup_uint_from_py(unsigned int *x, PyObject *py, const char *name)
         PyErr_Format(PyExc_OverflowError, "%s too big for unsigned int", name);
         return 0;
     }
-    *x = tmp;
+    *x = (unsigned int) tmp;
     return 1;
 }
 
@@ -273,56 +282,47 @@ static PyObject *bup_bytescmp(PyObject *self, PyObject *args)
 }
 
 
-// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
-#if __WIN32__ || __CYGWIN__
-
-// There's no 'ps' on win32 anyway, and Py_GetArgcArgv() isn't available.
-static void unpythonize_argv(void) { }
-
-#else // not __WIN32__
-
-// For some reason this isn't declared in Python.h
-extern void Py_GetArgcArgv(int *argc, char ***argv);
-
-static void unpythonize_argv(void)
+static PyObject *bup_cat_bytes(PyObject *self, PyObject *args)
 {
-    int argc, i;
-    char **argv, *arge;
-    
-    Py_GetArgcArgv(&argc, &argv);
-    
-    for (i = 0; i < argc-1; i++)
-    {
-       if (argv[i] + strlen(argv[i]) + 1 != argv[i+1])
-       {
-           // The argv block doesn't work the way we expected; it's unsafe
-           // to mess with it.
-           return;
-       }
-    }
-    
-    arge = argv[argc-1] + strlen(argv[argc-1]) + 1;
-    
-    if (strstr(argv[0], "python") && argv[1] == argv[0] + strlen(argv[0]) + 1)
-    {
-       char *p;
-       size_t len, diff;
-       p = strrchr(argv[1], '/');
-       if (p)
-       {
-           p++;
-           diff = p - argv[0];
-           len = arge - p;
-           memmove(argv[0], p, len);
-           memset(arge - diff, 0, diff);
-           for (i = 0; i < argc; i++)
-               argv[i] = argv[i+1] ? argv[i+1]-diff : NULL;
-       }
-    }
+    unsigned char *bufx = NULL, *bufy = NULL;
+    Py_ssize_t bufx_len, bufx_ofs, bufx_n;
+    Py_ssize_t bufy_len, bufy_ofs, bufy_n;
+    if (!PyArg_ParseTuple(args,
+                          rbuf_argf "nn"
+                          rbuf_argf "nn",
+                          &bufx, &bufx_len, &bufx_ofs, &bufx_n,
+                          &bufy, &bufy_len, &bufy_ofs, &bufy_n))
+       return NULL;
+    if (bufx_ofs < 0)
+        return PyErr_Format(PyExc_ValueError, "negative x offset");
+    if (bufx_n < 0)
+        return PyErr_Format(PyExc_ValueError, "negative x extent");
+    if (bufx_ofs > bufx_len)
+        return PyErr_Format(PyExc_ValueError, "x offset greater than length");
+    if (bufx_n > bufx_len - bufx_ofs)
+        return PyErr_Format(PyExc_ValueError, "x extent past end of buffer");
+
+    if (bufy_ofs < 0)
+        return PyErr_Format(PyExc_ValueError, "negative y offset");
+    if (bufy_n < 0)
+        return PyErr_Format(PyExc_ValueError, "negative y extent");
+    if (bufy_ofs > bufy_len)
+        return PyErr_Format(PyExc_ValueError, "y offset greater than length");
+    if (bufy_n > bufy_len - bufy_ofs)
+        return PyErr_Format(PyExc_ValueError, "y extent past end of buffer");
+
+    if (bufy_n > PY_SSIZE_T_MAX - bufx_n)
+        return PyErr_Format(PyExc_OverflowError, "result length too long");
+
+    PyObject *result = PyBytes_FromStringAndSize(NULL, bufx_n + bufy_n);
+    if (!result)
+        return PyErr_NoMemory();
+    char *buf = PyBytes_AS_STRING(result);
+    memcpy(buf, bufx + bufx_ofs, bufx_n);
+    memcpy(buf + bufx_n, bufy + bufy_ofs, bufy_n);
+    return result;
 }
 
-#endif // not __WIN32__ or __CYGWIN__
-
 
 static int write_all(int fd, const void *buf, const size_t count)
 {
@@ -338,15 +338,11 @@ static int write_all(int fd, const void *buf, const size_t count)
 }
 
 
-static int uadd(unsigned long long *dest,
-                const unsigned long long x,
-                const unsigned long long y)
+static inline int uadd(unsigned long long *dest,
+                       const unsigned long long x,
+                       const unsigned long long y)
 {
-    const unsigned long long result = x + y;
-    if (result < x || result < y)
-        return 0;
-    *dest = result;
-    return 1;
+    return INT_ADD_OK(x, y, dest);
 }
 
 
@@ -594,7 +590,7 @@ static PyObject *splitbuf(PyObject *self, PyObject *args)
         if (!PyArg_ParseTuple(args, "t#", &buf, &len))
             return NULL;
         assert(len <= INT_MAX);
-        out = bupsplit_find_ofs(buf, len, &bits);
+        out = bupsplit_find_ofs(buf, (int) len, &bits);
     }
     if (out) assert(bits >= BUP_BLOBBITS);
     return Py_BuildValue("ii", out, bits);
@@ -624,8 +620,14 @@ static PyObject *bitmatch(PyObject *self, PyObject *args)
        }
     }
     
-    assert(byte <= (INT_MAX >> 3));
-    return Py_BuildValue("i", byte*8 + bit);
+    unsigned long long result;
+    if (!INT_MULTIPLY_OK(byte, 8, &result)
+        || !INT_ADD_OK(result, bit, &result))
+    {
+        PyErr_Format(PyExc_OverflowError, "bitmatch bit count too large");
+        return NULL;
+    }
+    return PyLong_FromUnsignedLongLong(result);
 }
 
 
@@ -845,7 +847,8 @@ struct idx {
 static void _fix_idx_order(struct idx **idxs, Py_ssize_t *last_i)
 {
     struct idx *idx;
-    int low, mid, high, c = 0;
+    Py_ssize_t low, mid, high;
+    int c = 0;
 
     idx = idxs[*last_i];
     if (idxs[*last_i]->cur >= idxs[*last_i]->end)
@@ -1007,7 +1010,7 @@ static PyObject *write_idx(PyObject *self, PyObject *args)
     PyObject *part;
     unsigned int total = 0;
     uint32_t count;
-    int i, j, ofs64_count;
+    int i;
     uint32_t *fan_ptr, *crc_ptr, *ofs_ptr;
     uint64_t *ofs64_ptr;
     struct sha *sha_ptr;
@@ -1039,16 +1042,21 @@ static PyObject *write_idx(PyObject *self, PyObject *args)
     ofs64_ptr = (uint64_t *)&ofs_ptr[total];
 
     count = 0;
-    ofs64_count = 0;
+    uint32_t ofs64_count = 0;
     for (i = 0; i < FAN_ENTRIES; ++i)
     {
-       int plen;
        part = PyList_GET_ITEM(idx, i);
        PyList_Sort(part);
-       plen = PyList_GET_SIZE(part);
-       count += plen;
+        uint32_t plen;
+        if (!INTEGRAL_ASSIGNMENT_FITS(&plen, PyList_GET_SIZE(part))
+            || UINT32_MAX - count < plen) {
+            PyErr_Format(PyExc_OverflowError, "too many objects in index part");
+            goto clean_and_return;
+        }
+        count += plen;
        *fan_ptr++ = htonl(count);
-       for (j = 0; j < plen; ++j)
+        uint32_t j;
+        for (j = 0; j < plen; ++j)
        {
            unsigned char *sha = NULL;
            Py_ssize_t sha_len = 0;
@@ -1116,7 +1124,7 @@ static PyObject *write_random(PyObject *self, PyObject *args)
     {
        unsigned i;
        for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
-           buf[i] = random();
+           buf[i] = (uint32_t) random();
        ret = write(fd, buf, sizeof(buf));
        if (ret < 0)
            ret = 0;
@@ -1132,7 +1140,7 @@ static PyObject *write_random(PyObject *self, PyObject *args)
     {
        unsigned i;
        for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
-           buf[i] = random();
+           buf[i] = (uint32_t) random();
        ret = write(fd, buf, len % 1024);
        if (ret < 0)
            ret = 0;
@@ -1154,7 +1162,7 @@ static PyObject *random_sha(PyObject *self, PyObject *args)
     if (!seeded)
     {
        assert(sizeof(shabuf) == 20);
-       srandom(time(NULL));
+       srandom((unsigned int) time(NULL));
        seeded = 1;
     }
     
@@ -1163,8 +1171,8 @@ static PyObject *random_sha(PyObject *self, PyObject *args)
     
     memset(shabuf, 0, sizeof(shabuf));
     for (i=0; i < 20/4; i++)
-       shabuf[i] = random();
-    return Py_BuildValue("s#", shabuf, 20);
+       shabuf[i] = (uint32_t) random();
+    return Py_BuildValue(rbuf_argf, shabuf, 20);
 }
 
 
@@ -1321,6 +1329,7 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args)
 #endif /* def BUP_HAVE_FILE_ATTRS */
 
 
+#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now
 #ifndef HAVE_UTIMENSAT
 #ifndef HAVE_UTIMES
 #error "cannot find utimensat or utimes()"
@@ -1329,6 +1338,7 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args)
 #error "cannot find utimensat or lutimes()"
 #endif
 #endif
+#endif // defined BUP_USE_PYTHON_UTIME
 
 #define ASSIGN_PYLONG_TO_INTEGRAL(dest, pylong, overflow) \
     ({                                                     \
@@ -1365,6 +1375,7 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args)
         })
 
 
+#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now
 #ifdef HAVE_UTIMENSAT
 
 static PyObject *bup_utimensat(PyObject *self, PyObject *args)
@@ -1482,6 +1493,8 @@ static PyObject *bup_lutimes(PyObject *self, PyObject *args)
 }
 #endif /* def HAVE_LUTIMES */
 
+#endif // defined BUP_USE_PYTHON_UTIME
+
 
 #ifdef HAVE_STAT_ST_ATIM
 # define BUP_STAT_ATIME_NS(st) (st)->st_atim.tv_nsec
@@ -1498,9 +1511,6 @@ static PyObject *bup_lutimes(PyObject *self, PyObject *args)
 #endif
 
 
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY().
-
 static PyObject *stat_struct_to_py(const struct stat *st,
                                    const char *filename,
                                    int fd)
@@ -1526,7 +1536,6 @@ static PyObject *stat_struct_to_py(const struct stat *st,
                          (long) BUP_STAT_CTIME_NS(st));
 }
 
-#pragma clang diagnostic pop  // ignored "-Wtautological-compare"
 
 static PyObject *bup_stat(PyObject *self, PyObject *args)
 {
@@ -1645,7 +1654,7 @@ static PyObject *bup_mincore(PyObject *self, PyObject *args)
         result = PyErr_Format(PyExc_OverflowError, "src_n overflows size_t");
         goto clean_and_return;
     }
-    int rc = mincore((void *)(src.buf + src_off), src_n,
+    int rc = mincore((void *)(src.buf + src_off), length,
                      (BUP_MINCORE_BUF_TYPE *) (dest.buf + dest_off));
     if (rc != 0) {
         result = PyErr_SetFromErrno(PyExc_OSError);
@@ -1660,6 +1669,669 @@ static PyObject *bup_mincore(PyObject *self, PyObject *args)
 }
 #endif /* def BUP_MINCORE_BUF_TYPE */
 
+static unsigned int vuint_encode(long long val, char *buf)
+{
+    unsigned int len = 0;
+
+    if (val < 0) {
+        PyErr_SetString(PyExc_Exception, "vuints must not be negative");
+        return 0;
+    }
+
+    do {
+        buf[len] = val & 0x7f;
+
+        val >>= 7;
+        if (val)
+            buf[len] |= 0x80;
+
+        len++;
+    } while (val);
+
+    return len;
+}
+
+static unsigned int vint_encode(long long val, char *buf)
+{
+    unsigned int len = 1;
+    char sign = 0;
+
+    if (val < 0) {
+        sign = 0x40;
+        val = -val;
+    }
+
+    buf[0] = (val & 0x3f) | sign;
+    val >>= 6;
+    if (val)
+        buf[0] |= 0x80;
+
+    while (val) {
+        buf[len] = val & 0x7f;
+        val >>= 7;
+        if (val)
+            buf[len] |= 0x80;
+        len++;
+    }
+
+    return len;
+}
+
+static PyObject *bup_vuint_encode(PyObject *self, PyObject *args)
+{
+    long long val;
+    // size the buffer appropriately - need 8 bits to encode each 7
+    char buf[(sizeof(val) + 1) / 7 * 8];
+
+    if (!PyArg_ParseTuple(args, "L", &val))
+       return NULL;
+
+    unsigned int len = vuint_encode(val, buf);
+    if (!len)
+        return NULL;
+
+    return PyBytes_FromStringAndSize(buf, len);
+}
+
+static PyObject *bup_vint_encode(PyObject *self, PyObject *args)
+{
+    long long val;
+    // size the buffer appropriately - need 8 bits to encode each 7
+    char buf[(sizeof(val) + 1) / 7 * 8];
+
+    if (!PyArg_ParseTuple(args, "L", &val))
+       return NULL;
+
+    return PyBytes_FromStringAndSize(buf, vint_encode(val, buf));
+}
+
+static PyObject *tuple_from_cstrs(char **cstrs)
+{
+    // Assumes list is null terminated
+    size_t n = 0;
+    while(cstrs[n] != NULL)
+        n++;
+
+    Py_ssize_t sn;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&sn, n))
+        return PyErr_Format(PyExc_OverflowError, "string array too large");
+
+    PyObject *result = PyTuple_New(sn);
+    Py_ssize_t i = 0;
+    for (i = 0; i < sn; i++)
+    {
+        PyObject *gname = Py_BuildValue(cstr_argf, cstrs[i]);
+        if (gname == NULL)
+        {
+            Py_DECREF(result);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(result, i, gname);
+    }
+    return result;
+}
+
+static PyObject *appropriate_errno_ex(void)
+{
+    switch (errno) {
+    case ENOMEM:
+        return PyErr_NoMemory();
+    case EIO:
+    case EMFILE:
+    case ENFILE:
+        // In 3.3 IOError was merged into OSError.
+        return PyErr_SetFromErrno(PyExc_IOError);
+    default:
+        return PyErr_SetFromErrno(PyExc_OSError);
+    }
+}
+
+
+static PyObject *pwd_struct_to_py(const struct passwd *pwd)
+{
+    // We can check the known (via POSIX) signed and unsigned types at
+    // compile time, but not (easily) the unspecified types, so handle
+    // those via INTEGER_TO_PY().
+    if (pwd == NULL)
+        Py_RETURN_NONE;
+    return Py_BuildValue(cstr_argf cstr_argf "OO"
+                         cstr_argf cstr_argf cstr_argf,
+                         pwd->pw_name,
+                         pwd->pw_passwd,
+                         INTEGER_TO_PY(pwd->pw_uid),
+                         INTEGER_TO_PY(pwd->pw_gid),
+                         pwd->pw_gecos,
+                         pwd->pw_dir,
+                         pwd->pw_shell);
+}
+
+static PyObject *bup_getpwuid(PyObject *self, PyObject *args)
+{
+    unsigned long long py_uid;
+    if (!PyArg_ParseTuple(args, "K", &py_uid))
+       return NULL;
+    uid_t uid;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&uid, py_uid))
+        return PyErr_Format(PyExc_OverflowError, "uid too large for uid_t");
+
+    errno = 0;
+    struct passwd *pwd = getpwuid(uid);
+    if (!pwd && errno)
+        return appropriate_errno_ex();
+    return pwd_struct_to_py(pwd);
+}
+
+static PyObject *bup_getpwnam(PyObject *self, PyObject *args)
+{
+    PyObject *py_name;
+    if (!PyArg_ParseTuple(args, "S", &py_name))
+       return NULL;
+
+    char *name = PyBytes_AS_STRING(py_name);
+    errno = 0;
+    struct passwd *pwd = getpwnam(name);
+    if (!pwd && errno)
+        return appropriate_errno_ex();
+    return pwd_struct_to_py(pwd);
+}
+
+static PyObject *grp_struct_to_py(const struct group *grp)
+{
+    // We can check the known (via POSIX) signed and unsigned types at
+    // compile time, but not (easily) the unspecified types, so handle
+    // those via INTEGER_TO_PY().
+    if (grp == NULL)
+        Py_RETURN_NONE;
+
+    PyObject *members = tuple_from_cstrs(grp->gr_mem);
+    if (members == NULL)
+        return NULL;
+    return Py_BuildValue(cstr_argf cstr_argf "OO",
+                         grp->gr_name,
+                         grp->gr_passwd,
+                         INTEGER_TO_PY(grp->gr_gid),
+                         members);
+}
+
+static PyObject *bup_getgrgid(PyObject *self, PyObject *args)
+{
+    unsigned long long py_gid;
+    if (!PyArg_ParseTuple(args, "K", &py_gid))
+       return NULL;
+    gid_t gid;
+    if (!INTEGRAL_ASSIGNMENT_FITS(&gid, py_gid))
+        return PyErr_Format(PyExc_OverflowError, "gid too large for gid_t");
+
+    errno = 0;
+    struct group *grp = getgrgid(gid);
+    if (!grp && errno)
+        return appropriate_errno_ex();
+    return grp_struct_to_py(grp);
+}
+
+static PyObject *bup_getgrnam(PyObject *self, PyObject *args)
+{
+    PyObject *py_name;
+    if (!PyArg_ParseTuple(args, "S", &py_name))
+       return NULL;
+
+    char *name = PyBytes_AS_STRING(py_name);
+    errno = 0;
+    struct group *grp = getgrnam(name);
+    if (!grp && errno)
+        return appropriate_errno_ex();
+    return grp_struct_to_py(grp);
+}
+
+
+static PyObject *bup_gethostname(PyObject *mod, PyObject *ignore)
+{
+#ifdef HOST_NAME_MAX
+    char buf[HOST_NAME_MAX + 1] = {};
+#else
+    /* 'SUSv2 guarantees that "Host names are limited to 255 bytes".' */
+    char buf[256] = {};
+#endif
+
+    if (gethostname(buf, sizeof(buf) - 1))
+        return PyErr_SetFromErrno(PyExc_IOError);
+    return PyBytes_FromString(buf);
+}
+
+
+#ifdef BUP_HAVE_READLINE
+
+static char *cstr_from_bytes(PyObject *bytes)
+{
+    char *buf;
+    Py_ssize_t length;
+    int rc = PyBytes_AsStringAndSize(bytes, &buf, &length);
+    if (rc == -1)
+        return NULL;
+    size_t c_len;
+    if (!INT_ADD_OK(length, 1, &c_len)) {
+        PyErr_Format(PyExc_OverflowError,
+                     "Cannot convert ssize_t sized bytes object (%zd) to C string",
+                     length);
+        return NULL;
+    }
+    char *result = checked_malloc(c_len, sizeof(char));
+    if (!result)
+        return NULL;
+    memcpy(result, buf, length);
+    result[length] = 0;
+    return result;
+}
+
+static char **cstrs_from_seq(PyObject *seq)
+{
+    char **result = NULL;
+    seq = PySequence_Fast(seq, "Cannot convert sequence items to C strings");
+    if (!seq)
+        return NULL;
+
+    const Py_ssize_t len = PySequence_Fast_GET_SIZE(seq);
+    if (len > PY_SSIZE_T_MAX - 1) {
+        PyErr_Format(PyExc_OverflowError,
+                     "Sequence length %zd too large for conversion to C array",
+                     len);
+        goto finish;
+    }
+    result = checked_malloc(len + 1, sizeof(char *));
+    if (!result)
+        goto finish;
+    Py_ssize_t i = 0;
+    for (i = 0; i < len; i++)
+    {
+        PyObject *item = PySequence_Fast_GET_ITEM(seq, i);
+        if (!item)
+            goto abandon_result;
+        result[i] = cstr_from_bytes(item);
+        if (!result[i]) {
+            i--;
+            goto abandon_result;
+        }
+    }
+    result[len] = NULL;
+    goto finish;
+
+ abandon_result:
+    if (result) {
+        for (; i > 0; i--)
+            free(result[i]);
+        free(result);
+        result = NULL;
+    }
+ finish:
+    Py_DECREF(seq);
+    return result;
+}
+
+static char* our_word_break_chars = NULL;
+
+static PyObject *
+bup_set_completer_word_break_characters(PyObject *self, PyObject *args)
+{
+    char *bytes;
+    if (!PyArg_ParseTuple(args, cstr_argf, &bytes))
+       return NULL;
+    char *prev = our_word_break_chars;
+    char *next = strdup(bytes);
+    if (!next)
+        return PyErr_NoMemory();
+    our_word_break_chars = next;
+    rl_completer_word_break_characters = next;
+    if (prev)
+        free(prev);
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+bup_get_completer_word_break_characters(PyObject *self, PyObject *args)
+{
+    return PyBytes_FromString(rl_completer_word_break_characters);
+}
+
+static PyObject *bup_get_line_buffer(PyObject *self, PyObject *args)
+{
+    return PyBytes_FromString(rl_line_buffer);
+}
+
+static PyObject *
+bup_parse_and_bind(PyObject *self, PyObject *args)
+{
+    char *bytes;
+    if (!PyArg_ParseTuple(args, cstr_argf ":parse_and_bind", &bytes))
+       return NULL;
+    char *tmp = strdup(bytes); // Because it may modify the arg
+    if (!tmp)
+        return PyErr_NoMemory();
+    int rc = rl_parse_and_bind(tmp);
+    free(tmp);
+    if (rc != 0)
+        return PyErr_Format(PyExc_OSError,
+                            "system rl_parse_and_bind failed (%d)", rc);
+    Py_RETURN_NONE;
+}
+
+
+static PyObject *py_on_attempted_completion;
+static char **prev_completions;
+
+static char **on_attempted_completion(const char *text, int start, int end)
+{
+    if (!py_on_attempted_completion)
+        return NULL;
+
+    char **result = NULL;
+    PyObject *py_result = PyObject_CallFunction(py_on_attempted_completion,
+                                                cstr_argf "ii",
+                                                text, start, end);
+    if (!py_result)
+        return NULL;
+    if (py_result != Py_None) {
+        result = cstrs_from_seq(py_result);
+        free(prev_completions);
+        prev_completions = result;
+    }
+    Py_DECREF(py_result);
+    return result;
+}
+
+static PyObject *
+bup_set_attempted_completion_function(PyObject *self, PyObject *args)
+{
+    PyObject *completer;
+    if (!PyArg_ParseTuple(args, "O", &completer))
+       return NULL;
+
+    PyObject *prev = py_on_attempted_completion;
+    if (completer == Py_None)
+    {
+        py_on_attempted_completion = NULL;
+        rl_attempted_completion_function = NULL;
+    } else {
+        py_on_attempted_completion = completer;
+        rl_attempted_completion_function = on_attempted_completion;
+        Py_INCREF(completer);
+    }
+    Py_XDECREF(prev);
+    Py_RETURN_NONE;
+}
+
+
+static PyObject *py_on_completion_entry;
+
+static char *on_completion_entry(const char *text, int state)
+{
+    if (!py_on_completion_entry)
+        return NULL;
+
+    PyObject *py_result = PyObject_CallFunction(py_on_completion_entry,
+                                                cstr_argf "i", text, state);
+    if (!py_result)
+        return NULL;
+    char *result = (py_result == Py_None) ? NULL : cstr_from_bytes(py_result);
+    Py_DECREF(py_result);
+    return result;
+}
+
+static PyObject *
+bup_set_completion_entry_function(PyObject *self, PyObject *args)
+{
+    PyObject *completer;
+    if (!PyArg_ParseTuple(args, "O", &completer))
+       return NULL;
+
+    PyObject *prev = py_on_completion_entry;
+    if (completer == Py_None) {
+        py_on_completion_entry = NULL;
+        rl_completion_entry_function = NULL;
+    } else {
+        py_on_completion_entry = completer;
+        rl_completion_entry_function = on_completion_entry;
+        Py_INCREF(completer);
+    }
+    Py_XDECREF(prev);
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+bup_readline(PyObject *self, PyObject *args)
+{
+    char *prompt;
+    if (!PyArg_ParseTuple(args, cstr_argf, &prompt))
+       return NULL;
+    char *line = readline(prompt);
+    if (!line)
+        return PyErr_Format(PyExc_EOFError, "readline EOF");
+    PyObject *result = PyBytes_FromString(line);
+    free(line);
+    return result;
+}
+
+#endif // defined BUP_HAVE_READLINE
+
+#if defined(HAVE_SYS_ACL_H) && \
+    defined(HAVE_ACL_LIBACL_H) && \
+    defined(HAVE_ACL_EXTENDED_FILE) && \
+    defined(HAVE_ACL_GET_FILE) && \
+    defined(HAVE_ACL_TO_ANY_TEXT) && \
+    defined(HAVE_ACL_FROM_TEXT) && \
+    defined(HAVE_ACL_SET_FILE)
+#define ACL_SUPPORT 1
+#include <sys/acl.h>
+#include <acl/libacl.h>
+
+// Returns
+//   0 for success
+//  -1 for errors, with python exception set
+//  -2 for ignored errors (not supported)
+static int bup_read_acl_to_text(const char *name, acl_type_t type,
+                                char **txt, char **num)
+{
+    acl_t acl;
+
+    acl = acl_get_file(name, type);
+    if (!acl) {
+        if (errno == EOPNOTSUPP || errno == ENOSYS)
+            return -2;
+        PyErr_SetFromErrno(PyExc_IOError);
+        return -1;
+    }
+
+    *num = NULL;
+    *txt = acl_to_any_text(acl, "", '\n', TEXT_ABBREVIATE);
+    if (*txt)
+        *num = acl_to_any_text(acl, "", '\n', TEXT_ABBREVIATE | TEXT_NUMERIC_IDS);
+
+    if (*txt && *num)
+        return 0;
+
+    if (errno == ENOMEM)
+        PyErr_NoMemory();
+    else
+        PyErr_SetFromErrno(PyExc_IOError);
+
+    if (*txt)
+        acl_free((acl_t)*txt);
+    if (*num)
+        acl_free((acl_t)*num);
+
+    return -1;
+}
+
+static PyObject *bup_read_acl(PyObject *self, PyObject *args)
+{
+    char *name;
+    int isdir, rv;
+    PyObject *ret = NULL;
+    char *acl_txt = NULL, *acl_num = NULL;
+
+    if (!PyArg_ParseTuple(args, cstr_argf "i", &name, &isdir))
+       return NULL;
+
+    if (!acl_extended_file(name))
+        Py_RETURN_NONE;
+
+    rv = bup_read_acl_to_text(name, ACL_TYPE_ACCESS, &acl_txt, &acl_num);
+    if (rv)
+        goto out;
+
+    if (isdir) {
+        char *def_txt = NULL, *def_num = NULL;
+
+        rv = bup_read_acl_to_text(name, ACL_TYPE_DEFAULT, &def_txt, &def_num);
+        if (rv)
+            goto out;
+
+        ret = Py_BuildValue("[" cstr_argf cstr_argf cstr_argf cstr_argf "]",
+                            acl_txt, acl_num, def_txt, def_num);
+
+        if (def_txt)
+            acl_free((acl_t)def_txt);
+        if (def_num)
+            acl_free((acl_t)def_num);
+    } else {
+        ret = Py_BuildValue("[" cstr_argf cstr_argf "]",
+                            acl_txt, acl_num);
+    }
+
+out:
+    if (acl_txt)
+        acl_free((acl_t)acl_txt);
+    if (acl_num)
+        acl_free((acl_t)acl_num);
+    if (rv == -2)
+        Py_RETURN_NONE;
+    return ret;
+}
+
+static int bup_apply_acl_string(const char *name, const char *s)
+{
+    acl_t acl = acl_from_text(s);
+    int ret = 0;
+
+    if (!acl) {
+        PyErr_SetFromErrno(PyExc_IOError);
+        return -1;
+    }
+
+    if (acl_set_file(name, ACL_TYPE_ACCESS, acl)) {
+        PyErr_SetFromErrno(PyExc_IOError);
+        ret = -1;
+    }
+
+    acl_free(acl);
+
+    return ret;
+}
+
+static PyObject *bup_apply_acl(PyObject *self, PyObject *args)
+{
+    char *name, *acl, *def = NULL;
+
+    if (!PyArg_ParseTuple(args, cstr_argf cstr_argf "|" cstr_argf, &name, &acl, &def))
+       return NULL;
+
+    if (bup_apply_acl_string(name, acl))
+        return NULL;
+
+    if (def && bup_apply_acl_string(name, def))
+        return NULL;
+
+    Py_RETURN_NONE;
+}
+#endif
+
+static PyObject *bup_limited_vint_pack(PyObject *self, PyObject *args)
+{
+    const char *fmt;
+    PyObject *packargs, *result;
+    Py_ssize_t sz, i, bufsz;
+    char *buf, *pos, *end;
+
+    if (!PyArg_ParseTuple(args, "sO", &fmt, &packargs))
+        return NULL;
+
+    if (!PyTuple_Check(packargs))
+        return PyErr_Format(PyExc_Exception, "pack() arg must be tuple");
+
+    sz = PyTuple_GET_SIZE(packargs);
+    if (sz != (Py_ssize_t)strlen(fmt))
+        return PyErr_Format(PyExc_Exception,
+                            "number of arguments (%ld) does not match format string (%ld)",
+                            (unsigned long)sz, (unsigned long)strlen(fmt));
+
+    if (sz > INT_MAX / 20)
+        return PyErr_Format(PyExc_Exception, "format is far too long");
+
+    // estimate no more than 20 bytes for each on average, the maximum
+    // vint/vuint we can encode is anyway 10 bytes, so this gives us
+    // some headroom for a few strings before we need to realloc ...
+    bufsz = sz * 20;
+    buf = malloc(bufsz);
+    if (!buf)
+        return PyErr_NoMemory();
+
+    pos = buf;
+    end = buf + bufsz;
+    for (i = 0; i < sz; i++) {
+        PyObject *item = PyTuple_GET_ITEM(packargs, i);
+        const char *bytes;
+
+        switch (fmt[i]) {
+        case 'V': {
+            long long val = PyLong_AsLongLong(item);
+            if (val == -1 && PyErr_Occurred())
+                return PyErr_Format(PyExc_OverflowError,
+                                    "pack arg %d invalid", (int)i);
+            if (end - pos < 10)
+                goto overflow;
+           pos += vuint_encode(val, pos);
+            break;
+        }
+        case 'v': {
+            long long val = PyLong_AsLongLong(item);
+            if (val == -1 && PyErr_Occurred())
+                return PyErr_Format(PyExc_OverflowError,
+                                    "pack arg %d invalid", (int)i);
+            if (end - pos < 10)
+                goto overflow;
+            pos += vint_encode(val, pos);
+            break;
+        }
+        case 's': {
+            bytes = PyBytes_AsString(item);
+            if (!bytes)
+                goto error;
+            if (end - pos < 10)
+                goto overflow;
+            Py_ssize_t val = PyBytes_GET_SIZE(item);
+            pos += vuint_encode(val, pos);
+            if (end - pos < val)
+                goto overflow;
+            memcpy(pos, bytes, val);
+            pos += val;
+            break;
+        }
+        default:
+            PyErr_Format(PyExc_Exception, "unknown xpack format string item %c",
+                         fmt[i]);
+            goto error;
+        }
+    }
+
+    result = PyBytes_FromStringAndSize(buf, pos - buf);
+    free(buf);
+    return result;
+
+ overflow:
+    PyErr_SetString(PyExc_OverflowError, "buffer (potentially) overflowed");
+ error:
+    free(buf);
+    return NULL;
+}
 
 static PyMethodDef helper_methods[] = {
     { "write_sparsely", bup_write_sparsely, METH_VARARGS,
@@ -1700,6 +2372,8 @@ static PyMethodDef helper_methods[] = {
     { "set_linux_file_attr", bup_set_linux_file_attr, METH_VARARGS,
       "Set the Linux attributes for the given file." },
 #endif
+
+#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now
 #ifdef HAVE_UTIMENSAT
     { "bup_utimensat", bup_utimensat, METH_VARARGS,
       "Change path timestamps with nanosecond precision (POSIX)." },
@@ -1713,6 +2387,8 @@ static PyMethodDef helper_methods[] = {
       "Change path timestamps with microsecond precision;"
       " don't follow symlinks." },
 #endif
+#endif // defined BUP_USE_PYTHON_UTIME
+
     { "stat", bup_stat, METH_VARARGS,
       "Extended version of stat." },
     { "lstat", bup_lstat, METH_VARARGS,
@@ -1725,11 +2401,61 @@ static PyMethodDef helper_methods[] = {
 #endif
     { "bytescmp", bup_bytescmp, METH_VARARGS,
       "Return a negative value if x < y, zero if equal, positive otherwise."},
+    { "cat_bytes", bup_cat_bytes, METH_VARARGS,
+      "For (x_bytes, x_ofs, x_n, y_bytes, y_ofs, y_n) arguments, return their concatenation."},
 #ifdef BUP_MINCORE_BUF_TYPE
     { "mincore", bup_mincore, METH_VARARGS,
       "For mincore(src, src_n, src_off, dest, dest_off)"
       " call the system mincore(src + src_off, src_n, &dest[dest_off])." },
 #endif
+    { "getpwuid", bup_getpwuid, METH_VARARGS,
+      "Return the password database entry for the given numeric user id,"
+      " as a tuple with all C strings as bytes(), or None if the user does"
+      " not exist." },
+    { "getpwnam", bup_getpwnam, METH_VARARGS,
+      "Return the password database entry for the given user name,"
+      " as a tuple with all C strings as bytes(), or None if the user does"
+      " not exist." },
+    { "getgrgid", bup_getgrgid, METH_VARARGS,
+      "Return the group database entry for the given numeric group id,"
+      " as a tuple with all C strings as bytes(), or None if the group does"
+      " not exist." },
+    { "getgrnam", bup_getgrnam, METH_VARARGS,
+      "Return the group database entry for the given group name,"
+      " as a tuple with all C strings as bytes(), or None if the group does"
+      " not exist." },
+    { "gethostname", bup_gethostname, METH_NOARGS,
+      "Return the current hostname (as bytes)" },
+#ifdef BUP_HAVE_READLINE
+    { "set_completion_entry_function", bup_set_completion_entry_function, METH_VARARGS,
+      "Set rl_completion_entry_function.  Called as f(text, state)." },
+    { "set_attempted_completion_function", bup_set_attempted_completion_function, METH_VARARGS,
+      "Set rl_attempted_completion_function.  Called as f(text, start, end)." },
+    { "parse_and_bind", bup_parse_and_bind, METH_VARARGS,
+      "Call rl_parse_and_bind." },
+    { "get_line_buffer", bup_get_line_buffer, METH_NOARGS,
+      "Return rl_line_buffer." },
+    { "get_completer_word_break_characters", bup_get_completer_word_break_characters, METH_NOARGS,
+      "Return rl_completer_word_break_characters." },
+    { "set_completer_word_break_characters", bup_set_completer_word_break_characters, METH_VARARGS,
+      "Set rl_completer_word_break_characters." },
+    { "readline", bup_readline, METH_VARARGS,
+      "Call readline(prompt)." },
+#endif // defined BUP_HAVE_READLINE
+#ifdef ACL_SUPPORT
+    { "read_acl", bup_read_acl, METH_VARARGS,
+      "read_acl(name, isdir)\n\n"
+      "Read ACLs for the given file/dirname and return the correctly encoded"
+      " list [txt, num, def_tx, def_num] (the def_* being empty bytestrings"
+      " unless the second argument 'isdir' is True)." },
+    { "apply_acl", bup_apply_acl, METH_VARARGS,
+      "apply_acl(name, acl, def=None)\n\n"
+      "Given a file/dirname (bytes) and the ACLs to restore, do that." },
+#endif /* HAVE_ACLS */
+    { "vuint_encode", bup_vuint_encode, METH_VARARGS, "encode an int to vuint" },
+    { "vint_encode", bup_vint_encode, METH_VARARGS, "encode an int to vint" },
+    { "limited_vint_pack", bup_limited_vint_pack, METH_VARARGS,
+      "Try to pack vint/vuint/str, throwing OverflowError when unable." },
     { NULL, NULL, 0, NULL },  // sentinel
 };
 
@@ -1775,6 +2501,9 @@ static int setup_module(PyObject *m)
     // Just be sure (relevant when passing timestamps back to Python above).
     assert(sizeof(PY_LONG_LONG) <= sizeof(long long));
     assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long));
+    // At least for INTEGER_TO_PY
+    assert(sizeof(intmax_t) <= sizeof(long long));
+    assert(sizeof(uintmax_t) <= sizeof(unsigned long long));
 
     test_integral_assignment_fits();
 
@@ -1789,8 +2518,6 @@ static int setup_module(PyObject *m)
     }
 
     char *e;
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY().
     {
         PyObject *value;
         value = INTEGER_TO_PY(INT_MAX);
@@ -1800,6 +2527,8 @@ static int setup_module(PyObject *m)
         PyObject_SetAttrString(m, "UINT_MAX", value);
         Py_DECREF(value);
     }
+
+#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now
 #ifdef HAVE_UTIMENSAT
     {
         PyObject *value;
@@ -1814,6 +2543,8 @@ static int setup_module(PyObject *m)
         Py_DECREF(value);
     }
 #endif
+#endif // defined BUP_USE_PYTHON_UTIME
+
 #ifdef BUP_HAVE_MINCORE_INCORE
     {
         PyObject *value;
@@ -1822,11 +2553,9 @@ static int setup_module(PyObject *m)
         Py_DECREF(value);
     }
 #endif
-#pragma clang diagnostic pop  // ignored "-Wtautological-compare"
 
     e = getenv("BUP_FORCE_TTY");
     get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
-    unpythonize_argv();
     return 1;
 }
 
@@ -1836,11 +2565,13 @@ static int setup_module(PyObject *m)
 PyMODINIT_FUNC init_helpers(void)
 {
     PyObject *m = Py_InitModule("_helpers", helper_methods);
-    if (m == NULL)
+    if (m == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "bup._helpers init failed");
         return;
-
+    }
     if (!setup_module(m))
     {
+        PyErr_SetString(PyExc_RuntimeError, "bup._helpers set up failed");
         Py_DECREF(m);
         return;
     }