X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2F_helpers.c;h=9265d49324a72b08f82132ae427eb105875263b7;hb=c8af075fbf5fe45ceaf411dc01f7016290e8d794;hp=5f07d446c703444d42edda0376209b3195cfc4e0;hpb=3a2a997771a1af6722059f15589fc15866d4bc54;p=bup.git diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c index 5f07d44..9265d49 100644 --- a/lib/bup/_helpers.c +++ b/lib/bup/_helpers.c @@ -4,7 +4,7 @@ #include "../../config/config.h" // According to Python, its header has to go first: -// http://docs.python.org/2/c-api/intro.html#include-files +// http://docs.python.org/3/c-api/intro.html#include-files #include #include @@ -65,11 +65,17 @@ #endif #include "bupsplit.h" +#include "bup/intprops.h" #if defined(FS_IOC_GETFLAGS) && defined(FS_IOC_SETFLAGS) #define BUP_HAVE_FILE_ATTRS 1 #endif +#if PY_MAJOR_VERSION > 2 +# define BUP_USE_PYTHON_UTIME 1 +#endif + +#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now /* * Check for incomplete UTIMENSAT support (NetBSD 6), and if so, * pretend we don't have it. @@ -77,6 +83,7 @@ #if !defined(AT_FDCWD) || !defined(AT_SYMLINK_NOFOLLOW) #undef HAVE_UTIMENSAT #endif +#endif // defined BUP_USE_PYTHON_UTIME #ifndef FS_NOCOW_FL // Of course, this assumes it's a bitfield value. @@ -95,18 +102,10 @@ typedef struct { // rbuf_argf: for read-only byte vectors // wbuf_argf: for mutable byte vectors -#if PY_MAJOR_VERSION < 3 -static state_t state; -# define get_state(x) (&state) -# define cstr_argf "s" -# define rbuf_argf "s#" -# define wbuf_argf "s*" -#else -# define get_state(x) ((state_t *) PyModule_GetState(x)) -# define cstr_argf "y" -# define rbuf_argf "y#" -# define wbuf_argf "y*" -#endif // PY_MAJOR_VERSION >= 3 +#define get_state(x) ((state_t *) PyModule_GetState(x)) +#define cstr_argf "y" +#define rbuf_argf "y#" +#define wbuf_argf "y*" static void *checked_calloc(size_t n, size_t size) @@ -117,16 +116,10 @@ static void *checked_calloc(size_t n, size_t size) return result; } -#ifndef BUP_HAVE_BUILTIN_MUL_OVERFLOW - -#define checked_malloc checked_calloc - -#else // defined BUP_HAVE_BUILTIN_MUL_OVERFLOW - static void *checked_malloc(size_t n, size_t size) { size_t total; - if (__builtin_mul_overflow(n, size, &total)) + if (!INT_MULTIPLY_OK(n, size, &total)) { PyErr_Format(PyExc_OverflowError, "request to allocate %zu items of size %zu is too large", @@ -139,8 +132,6 @@ static void *checked_malloc(size_t n, size_t size) return result; } -#endif // defined BUP_HAVE_BUILTIN_MUL_OVERFLOW - #ifndef htonll // This function should technically be macro'd out if it's going to be used @@ -156,69 +147,14 @@ static uint64_t htonll(uint64_t value) } #endif +#define INTEGRAL_ASSIGNMENT_FITS(dest, src) INT_ADD_OK(src, 0, dest) -// Disabling sign-compare here should be fine since we're explicitly -// checking for a sign mismatch, i.e. if the signs don't match, then -// it doesn't matter what the value comparison says. -// FIXME: ... so should we reverse the order? -#define INTEGRAL_ASSIGNMENT_FITS(dest, src) \ - ({ \ - _Pragma("GCC diagnostic push"); \ - _Pragma("GCC diagnostic ignored \"-Wsign-compare\""); \ - _Pragma("clang diagnostic push"); \ - _Pragma("clang diagnostic ignored \"-Wshorten-64-to-32\""); \ - *(dest) = (src); \ - int result = *(dest) == (src) && (*(dest) < 1) == ((src) < 1); \ - _Pragma("clang diagnostic pop"); \ - _Pragma("GCC diagnostic pop"); \ - result; \ - }) - - -#define INTEGER_TO_PY(x) \ - ({ \ - _Pragma("GCC diagnostic push"); \ - _Pragma("GCC diagnostic ignored \"-Wtype-limits\""); \ - _Pragma("clang diagnostic push"); \ - _Pragma("clang diagnostic ignored \"-Wtautological-compare\""); \ - PyObject *result = ((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x); \ - _Pragma("clang diagnostic pop"); \ - _Pragma("GCC diagnostic pop"); \ - result; \ - }) - - -#if PY_MAJOR_VERSION < 3 -static int bup_ulong_from_pyint(unsigned long *x, PyObject *py, - const char *name) -{ - const long tmp = PyInt_AsLong(py); - if (tmp == -1 && PyErr_Occurred()) - { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) - PyErr_Format(PyExc_OverflowError, "%s too big for unsigned long", - name); - return 0; - } - if (tmp < 0) - { - PyErr_Format(PyExc_OverflowError, - "negative %s cannot be converted to unsigned long", name); - return 0; - } - *x = tmp; - return 1; -} -#endif +#define INTEGER_TO_PY(x) \ + EXPR_SIGNED(x) ? PyLong_FromLongLong(x) : PyLong_FromUnsignedLongLong(x) static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name) { -#if PY_MAJOR_VERSION < 3 - if (PyInt_Check(py)) - return bup_ulong_from_pyint(x, py, name); -#endif - if (!PyLong_Check(py)) { PyErr_Format(PyExc_TypeError, "expected integer %s", name); @@ -256,19 +192,6 @@ static int bup_uint_from_py(unsigned int *x, PyObject *py, const char *name) static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py, const char *name) { -#if PY_MAJOR_VERSION < 3 - if (PyInt_Check(py)) - { - unsigned long tmp; - if (bup_ulong_from_pyint(&tmp, py, name)) - { - *x = tmp; - return 1; - } - return 0; - } -#endif - if (!PyLong_Check(py)) { PyErr_Format(PyExc_TypeError, "integer argument expected for %s", name); @@ -351,58 +274,6 @@ static PyObject *bup_cat_bytes(PyObject *self, PyObject *args) } - -// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV... -#if __WIN32__ || __CYGWIN__ || PY_VERSION_HEX >= 0x03090000 - -// There's no 'ps' on win32 anyway, and Py_GetArgcArgv() isn't available. -static void unpythonize_argv(void) { } - -#else // not __WIN32__ - -// For some reason this isn't declared in Python.h -extern void Py_GetArgcArgv(int *argc, char ***argv); - -static void unpythonize_argv(void) -{ - int argc, i; - char **argv, *arge; - - Py_GetArgcArgv(&argc, &argv); - - for (i = 0; i < argc-1; i++) - { - if (argv[i] + strlen(argv[i]) + 1 != argv[i+1]) - { - // The argv block doesn't work the way we expected; it's unsafe - // to mess with it. - return; - } - } - - arge = argv[argc-1] + strlen(argv[argc-1]) + 1; - - if (strstr(argv[0], "python") && argv[1] == argv[0] + strlen(argv[0]) + 1) - { - char *p; - size_t len, diff; - p = strrchr(argv[1], '/'); - if (p) - { - p++; - diff = p - argv[0]; - len = arge - p; - memmove(argv[0], p, len); - memset(arge - diff, 0, diff); - for (i = 0; i < argc; i++) - argv[i] = argv[i+1] ? argv[i+1]-diff : NULL; - } - } -} - -#endif // not __WIN32__ or __CYGWIN__ - - static int write_all(int fd, const void *buf, const size_t count) { size_t written = 0; @@ -417,15 +288,11 @@ static int write_all(int fd, const void *buf, const size_t count) } -static int uadd(unsigned long long *dest, - const unsigned long long x, - const unsigned long long y) +static inline int uadd(unsigned long long *dest, + const unsigned long long x, + const unsigned long long y) { - const unsigned long long result = x + y; - if (result < x || result < y) - return 0; - *dest = result; - return 1; + return INT_ADD_OK(x, y, dest); } @@ -652,29 +519,13 @@ static PyObject *blobbits(PyObject *self, PyObject *args) static PyObject *splitbuf(PyObject *self, PyObject *args) { - // We stick to buffers in python 2 because they appear to be - // substantially smaller than memoryviews, and because - // zlib.compress() in python 2 can't accept a memoryview - // (cf. hashsplit.py). int out = 0, bits = -1; - if (PY_MAJOR_VERSION > 2) - { - Py_buffer buf; - if (!PyArg_ParseTuple(args, "y*", &buf)) - return NULL; - assert(buf.len <= INT_MAX); - out = bupsplit_find_ofs(buf.buf, buf.len, &bits); - PyBuffer_Release(&buf); - } - else - { - unsigned char *buf = NULL; - Py_ssize_t len = 0; - if (!PyArg_ParseTuple(args, "t#", &buf, &len)) - return NULL; - assert(len <= INT_MAX); - out = bupsplit_find_ofs(buf, (int) len, &bits); - } + Py_buffer buf; + if (!PyArg_ParseTuple(args, "y*", &buf)) + return NULL; + assert(buf.len <= INT_MAX); + out = bupsplit_find_ofs(buf.buf, buf.len, &bits); + PyBuffer_Release(&buf); if (out) assert(bits >= BUP_BLOBBITS); return Py_BuildValue("ii", out, bits); } @@ -703,8 +554,14 @@ static PyObject *bitmatch(PyObject *self, PyObject *args) } } - assert(byte <= (INT_MAX >> 3)); - return Py_BuildValue("i", byte*8 + bit); + Py_ssize_t result; + if (!INT_MULTIPLY_OK(byte, 8, &result) + || !INT_ADD_OK(result, bit, &result)) + { + PyErr_Format(PyExc_OverflowError, "bitmatch bit count too large"); + return NULL; + } + return PyLong_FromSsize_t(result); } @@ -1406,6 +1263,7 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args) #endif /* def BUP_HAVE_FILE_ATTRS */ +#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now #ifndef HAVE_UTIMENSAT #ifndef HAVE_UTIMES #error "cannot find utimensat or utimes()" @@ -1414,6 +1272,7 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args) #error "cannot find utimensat or lutimes()" #endif #endif +#endif // defined BUP_USE_PYTHON_UTIME #define ASSIGN_PYLONG_TO_INTEGRAL(dest, pylong, overflow) \ ({ \ @@ -1450,6 +1309,7 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args) }) +#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now #ifdef HAVE_UTIMENSAT static PyObject *bup_utimensat(PyObject *self, PyObject *args) @@ -1567,6 +1427,8 @@ static PyObject *bup_lutimes(PyObject *self, PyObject *args) } #endif /* def HAVE_LUTIMES */ +#endif // defined BUP_USE_PYTHON_UTIME + #ifdef HAVE_STAT_ST_ATIM # define BUP_STAT_ATIME_NS(st) (st)->st_atim.tv_nsec @@ -1741,6 +1603,81 @@ static PyObject *bup_mincore(PyObject *self, PyObject *args) } #endif /* def BUP_MINCORE_BUF_TYPE */ +static unsigned int vuint_encode(long long val, char *buf) +{ + unsigned int len = 0; + + if (val < 0) { + PyErr_SetString(PyExc_Exception, "vuints must not be negative"); + return 0; + } + + do { + buf[len] = val & 0x7f; + + val >>= 7; + if (val) + buf[len] |= 0x80; + + len++; + } while (val); + + return len; +} + +static unsigned int vint_encode(long long val, char *buf) +{ + unsigned int len = 1; + char sign = 0; + + if (val < 0) { + sign = 0x40; + val = -val; + } + + buf[0] = (val & 0x3f) | sign; + val >>= 6; + if (val) + buf[0] |= 0x80; + + while (val) { + buf[len] = val & 0x7f; + val >>= 7; + if (val) + buf[len] |= 0x80; + len++; + } + + return len; +} + +static PyObject *bup_vuint_encode(PyObject *self, PyObject *args) +{ + long long val; + // size the buffer appropriately - need 8 bits to encode each 7 + char buf[(sizeof(val) + 1) / 7 * 8]; + + if (!PyArg_ParseTuple(args, "L", &val)) + return NULL; + + unsigned int len = vuint_encode(val, buf); + if (!len) + return NULL; + + return PyBytes_FromStringAndSize(buf, len); +} + +static PyObject *bup_vint_encode(PyObject *self, PyObject *args) +{ + long long val; + // size the buffer appropriately - need 8 bits to encode each 7 + char buf[(sizeof(val) + 1) / 7 * 8]; + + if (!PyArg_ParseTuple(args, "L", &val)) + return NULL; + + return PyBytes_FromStringAndSize(buf, vint_encode(val, buf)); +} static PyObject *tuple_from_cstrs(char **cstrs) { @@ -1892,6 +1829,7 @@ static PyObject *bup_gethostname(PyObject *mod, PyObject *ignore) if (gethostname(buf, sizeof(buf) - 1)) return PyErr_SetFromErrno(PyExc_IOError); + buf[sizeof(buf) - 1] = 0; return PyBytes_FromString(buf); } @@ -1905,10 +1843,18 @@ static char *cstr_from_bytes(PyObject *bytes) int rc = PyBytes_AsStringAndSize(bytes, &buf, &length); if (rc == -1) return NULL; - char *result = checked_malloc(length, sizeof(char)); + size_t c_len; + if (!INT_ADD_OK(length, 1, &c_len)) { + PyErr_Format(PyExc_OverflowError, + "Cannot convert ssize_t sized bytes object (%zd) to C string", + length); + return NULL; + } + char *result = checked_malloc(c_len, sizeof(char)); if (!result) return NULL; memcpy(result, buf, length); + result[length] = 0; return result; } @@ -1978,15 +1924,11 @@ bup_set_completer_word_break_characters(PyObject *self, PyObject *args) static PyObject * bup_get_completer_word_break_characters(PyObject *self, PyObject *args) { - if (!PyArg_ParseTuple(args, "")) - return NULL; return PyBytes_FromString(rl_completer_word_break_characters); } static PyObject *bup_get_line_buffer(PyObject *self, PyObject *args) { - if (!PyArg_ParseTuple(args, "")) - return NULL; return PyBytes_FromString(rl_line_buffer); } @@ -2237,6 +2179,95 @@ static PyObject *bup_apply_acl(PyObject *self, PyObject *args) } #endif +static PyObject *bup_limited_vint_pack(PyObject *self, PyObject *args) +{ + const char *fmt; + PyObject *packargs, *result; + Py_ssize_t sz, i, bufsz; + char *buf, *pos, *end; + + if (!PyArg_ParseTuple(args, "sO", &fmt, &packargs)) + return NULL; + + if (!PyTuple_Check(packargs)) + return PyErr_Format(PyExc_Exception, "pack() arg must be tuple"); + + sz = PyTuple_GET_SIZE(packargs); + if (sz != (Py_ssize_t)strlen(fmt)) + return PyErr_Format(PyExc_Exception, + "number of arguments (%ld) does not match format string (%ld)", + (unsigned long)sz, (unsigned long)strlen(fmt)); + + if (sz > INT_MAX / 20) + return PyErr_Format(PyExc_Exception, "format is far too long"); + + // estimate no more than 20 bytes for each on average, the maximum + // vint/vuint we can encode is anyway 10 bytes, so this gives us + // some headroom for a few strings before we need to realloc ... + bufsz = sz * 20; + buf = malloc(bufsz); + if (!buf) + return PyErr_NoMemory(); + + pos = buf; + end = buf + bufsz; + for (i = 0; i < sz; i++) { + PyObject *item = PyTuple_GET_ITEM(packargs, i); + const char *bytes; + + switch (fmt[i]) { + case 'V': { + long long val = PyLong_AsLongLong(item); + if (val == -1 && PyErr_Occurred()) + return PyErr_Format(PyExc_OverflowError, + "pack arg %d invalid", (int)i); + if (end - pos < 10) + goto overflow; + pos += vuint_encode(val, pos); + break; + } + case 'v': { + long long val = PyLong_AsLongLong(item); + if (val == -1 && PyErr_Occurred()) + return PyErr_Format(PyExc_OverflowError, + "pack arg %d invalid", (int)i); + if (end - pos < 10) + goto overflow; + pos += vint_encode(val, pos); + break; + } + case 's': { + bytes = PyBytes_AsString(item); + if (!bytes) + goto error; + if (end - pos < 10) + goto overflow; + Py_ssize_t val = PyBytes_GET_SIZE(item); + pos += vuint_encode(val, pos); + if (end - pos < val) + goto overflow; + memcpy(pos, bytes, val); + pos += val; + break; + } + default: + PyErr_Format(PyExc_Exception, "unknown xpack format string item %c", + fmt[i]); + goto error; + } + } + + result = PyBytes_FromStringAndSize(buf, pos - buf); + free(buf); + return result; + + overflow: + PyErr_SetString(PyExc_OverflowError, "buffer (potentially) overflowed"); + error: + free(buf); + return NULL; +} + static PyMethodDef helper_methods[] = { { "write_sparsely", bup_write_sparsely, METH_VARARGS, "Write buf excepting zeros at the end. Return trailing zero count." }, @@ -2276,6 +2307,8 @@ static PyMethodDef helper_methods[] = { { "set_linux_file_attr", bup_set_linux_file_attr, METH_VARARGS, "Set the Linux attributes for the given file." }, #endif + +#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now #ifdef HAVE_UTIMENSAT { "bup_utimensat", bup_utimensat, METH_VARARGS, "Change path timestamps with nanosecond precision (POSIX)." }, @@ -2289,6 +2322,8 @@ static PyMethodDef helper_methods[] = { "Change path timestamps with microsecond precision;" " don't follow symlinks." }, #endif +#endif // defined BUP_USE_PYTHON_UTIME + { "stat", bup_stat, METH_VARARGS, "Extended version of stat." }, { "lstat", bup_lstat, METH_VARARGS, @@ -2352,6 +2387,10 @@ static PyMethodDef helper_methods[] = { "apply_acl(name, acl, def=None)\n\n" "Given a file/dirname (bytes) and the ACLs to restore, do that." }, #endif /* HAVE_ACLS */ + { "vuint_encode", bup_vuint_encode, METH_VARARGS, "encode an int to vuint" }, + { "vint_encode", bup_vint_encode, METH_VARARGS, "encode an int to vint" }, + { "limited_vint_pack", bup_limited_vint_pack, METH_VARARGS, + "Try to pack vint/vuint/str, throwing OverflowError when unable." }, { NULL, NULL, 0, NULL }, // sentinel }; @@ -2397,6 +2436,9 @@ static int setup_module(PyObject *m) // Just be sure (relevant when passing timestamps back to Python above). assert(sizeof(PY_LONG_LONG) <= sizeof(long long)); assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long)); + // At least for INTEGER_TO_PY + assert(sizeof(intmax_t) <= sizeof(long long)); + assert(sizeof(uintmax_t) <= sizeof(unsigned long long)); test_integral_assignment_fits(); @@ -2420,6 +2462,8 @@ static int setup_module(PyObject *m) PyObject_SetAttrString(m, "UINT_MAX", value); Py_DECREF(value); } + +#ifndef BUP_USE_PYTHON_UTIME // just for Python 2 now #ifdef HAVE_UTIMENSAT { PyObject *value; @@ -2434,6 +2478,8 @@ static int setup_module(PyObject *m) Py_DECREF(value); } #endif +#endif // defined BUP_USE_PYTHON_UTIME + #ifdef BUP_HAVE_MINCORE_INCORE { PyObject *value; @@ -2445,28 +2491,10 @@ static int setup_module(PyObject *m) e = getenv("BUP_FORCE_TTY"); get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2); - unpythonize_argv(); return 1; } -#if PY_MAJOR_VERSION < 3 - -PyMODINIT_FUNC init_helpers(void) -{ - PyObject *m = Py_InitModule("_helpers", helper_methods); - if (m == NULL) - return; - - if (!setup_module(m)) - { - Py_DECREF(m); - return; - } -} - -# else // PY_MAJOR_VERSION >= 3 - static struct PyModuleDef helpers_def = { PyModuleDef_HEAD_INIT, "_helpers", @@ -2491,5 +2519,3 @@ PyMODINIT_FUNC PyInit__helpers(void) } return module; } - -#endif // PY_MAJOR_VERSION >= 3