From 6178db0ca9a784fa9961d2de5b29090c3c4c3e03 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Fri, 3 Jul 2015 12:34:24 -0500 Subject: [PATCH 1/1] Handle mincore cross-platform differences On the BSDs, mincore is defined to return char values rather than unsigned char values, so test for that and adjust our invocation. Also check for MINCORE_INCORE, which is used on some platforms to test the returned status bytes. When it's found, define helpers.MINCORE_INCORE appropriately. Rework the mincore-related code, moving much of the fmincore C code to Python to avoid dealing with platform-related strerror_r differences, and replace the _helpers fmincore with a more primitive mincore. To accommodate the more complicated ./configure testing, require bash, and use printf -v to (safely) set ac_defined_$name during all calls to AC_DEFINE so that we can use the discovered values to guard tests during configuration. Thanks to Thomas Klausner for reporting the problem. Signed-off-by: Rob Browning Tested-by: Rob Browning --- config/configure | 72 ++++++++++++++++++++- config/configure.inc | 6 +- lib/bup/_helpers.c | 134 +++++++++++++++------------------------- lib/bup/hashsplit.py | 11 ++-- lib/bup/helpers.py | 36 +++++++++++ lib/bup/t/thashsplit.py | 3 +- 6 files changed, 168 insertions(+), 94 deletions(-) diff --git a/config/configure b/config/configure index 89da205..f250d1a 100755 --- a/config/configure +++ b/config/configure @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash bup_find_prog() { @@ -12,6 +12,20 @@ bup_find_prog() echo "$result" } +bup_try_c_code() +{ + local code="$1" tmpdir rc + if test -z "$code"; then + AC_FAIL "No code provided to test compile" + fi + tmpdir="$(mktemp -d "bup-try-c-compile-XXXXXXX")" || exit $? + echo "$code" > "$tmpdir/test.c" || exit $? + $AC_CC -Wall -Werror -c -o "$tmpdir/test" "$tmpdir/test.c" + rc=$? + rm -r "$tmpdir" || exit $? + return $rc +} + TARGET=bup . ./configure.inc @@ -53,8 +67,13 @@ fi # For stat. AC_CHECK_HEADERS sys/stat.h AC_CHECK_HEADERS sys/types.h + +# For stat and mincore. AC_CHECK_HEADERS unistd.h +# For mincore. +AC_CHECK_HEADERS sys/mman.h + # For FS_IOC_GETFLAGS and FS_IOC_SETFLAGS. AC_CHECK_HEADERS linux/fs.h AC_CHECK_HEADERS sys/ioctl.h @@ -65,8 +84,59 @@ if [ -z "$OS_GNU_KFREEBSD" ]; then fi AC_CHECK_FUNCS utimes AC_CHECK_FUNCS lutimes + + AC_CHECK_FUNCS mincore +mincore_incore_code=" +#if 0$ac_defined_HAVE_UNISTD_H +#include +#endif +#if 0$ac_defined_HAVE_SYS_MMAN_H +#include +#endif +int main(int argc, char **argv) +{ + if (MINCORE_INCORE) + return 0; +} +" + +mincore_buf_type_code() +{ + local vec_type="$1" + echo " +#include +int main(int argc, char **argv) +{ + void *x = 0; + $vec_type *buf = 0; + return mincore(x, 0, buf); +}" || exit $? +} + +if test "$ac_defined_HAVE_MINCORE"; then + TLOGN "checking for MINCORE_INCORE" + if bup_try_c_code "$mincore_incore_code"; then + AC_DEFINE BUP_HAVE_MINCORE_INCORE 1 + TLOG ' (found)' + else + TLOG ' (not found)' + fi + + TLOGN "checking mincore buf type" + if bup_try_c_code "$(mincore_buf_type_code char)"; then + AC_DEFINE BUP_MINCORE_BUF_TYPE 'char' + TLOG ' (char)' + elif bup_try_c_code "$(mincore_buf_type_code 'unsigned char')"; then + AC_DEFINE BUP_MINCORE_BUF_TYPE 'unsigned char' + TLOG ' (unsigned char)' + else + AC_FAIL "ERROR: unexpected mincore definition; please notify bup-list@googlegroups.com" + fi +fi + + AC_CHECK_FIELD stat st_atim sys/types.h sys/stat.h unistd.h AC_CHECK_FIELD stat st_mtim sys/types.h sys/stat.h unistd.h AC_CHECK_FIELD stat st_ctim sys/types.h sys/stat.h unistd.h diff --git a/config/configure.inc b/config/configure.inc index f813656..c65b8f2 100644 --- a/config/configure.inc +++ b/config/configure.inc @@ -1287,7 +1287,11 @@ AC_MAK() { # # AC_DEFINE adds a #define to config.h AC_DEFINE() { - echo "#define $1 ${2:-1}" >> $__cwd/config.h.tmp + local name="$1" value="${2:-1}" + if ! printf -v "ac_defined_$name" '%s' "$value"; then + AC_FATAL 'AC_DEFINE unable to set "ac_defined_$name" to "$value"' + fi + echo "#define $name $value" >> $__cwd/config.h.tmp } # diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c index ee216f4..92d79e3 100644 --- a/lib/bup/_helpers.c +++ b/lib/bup/_helpers.c @@ -14,8 +14,10 @@ #include #include #include -#include +#ifdef HAVE_SYS_MMAN_H +#include +#endif #ifdef HAVE_SYS_TYPES_H #include #endif @@ -1332,91 +1334,48 @@ static PyObject *bup_localtime(PyObject *self, PyObject *args) #endif /* def HAVE_TM_TM_GMTOFF */ -#ifdef HAVE_MINCORE -static PyObject *bup_fmincore(PyObject *self, PyObject *args) +#ifdef BUP_MINCORE_BUF_TYPE +static PyObject *bup_mincore(PyObject *self, PyObject *args) { - int fd, rc; - if (!PyArg_ParseTuple(args, "i", &fd)) + const char *src; + Py_ssize_t src_ssize; + Py_buffer dest; + PyObject *py_src_n, *py_src_off, *py_dest_off; + if (!PyArg_ParseTuple(args, "s#OOw*O", + &src, &src_ssize, &py_src_n, &py_src_off, + &dest, &py_dest_off)) return NULL; - errno = 0; - const long sc_page_size = sysconf(_SC_PAGESIZE); - if (sc_page_size == -1) // Stymied - mincore works in page_size chunks - { - if (errno) - return PyErr_SetFromErrno(PyExc_OSError); - else - PyErr_Format(PyExc_RuntimeError, - "cannot determine memory page size"); - } - - struct stat st; - rc = fstat(fd, &st); - if (rc != 0) - return PyErr_SetFromErrno(PyExc_OSError); + unsigned long long src_size, src_n, src_off, dest_size, dest_off; + if (!(bup_ullong_from_py(&src_n, py_src_n, "src_n") + && bup_ullong_from_py(&src_off, py_src_off, "src_off") + && bup_ullong_from_py(&dest_off, py_dest_off, "dest_off"))) + return NULL; - if (st.st_size == 0) - return Py_BuildValue("s", ""); - - size_t page_size; - if (!INTEGRAL_ASSIGNMENT_FITS(&page_size, sc_page_size)) - return PyErr_Format(PyExc_OverflowError, "page size too large"); - - const off_t pref_chunk_size = 64 * 1024 * 1024; - off_t chunk_size = page_size; - if (page_size < pref_chunk_size) - chunk_size = page_size * (pref_chunk_size / page_size); - const off_t pages_per_chunk = chunk_size / page_size; - const off_t page_count = (st.st_size + page_size - 1) / page_size; - const off_t chunk_count = page_count / chunk_size > 0 ? page_count / chunk_size : 1; - unsigned char * const result = malloc(page_count); - if (result == NULL) + if (!INTEGRAL_ASSIGNMENT_FITS(&src_size, src_ssize)) + return PyErr_Format(PyExc_OverflowError, "invalid src size"); + unsigned long long src_region_end; + + if (!uadd(&src_region_end, src_off, src_n)) + return PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large"); + if (src_region_end > src_size) + return PyErr_Format(PyExc_OverflowError, "region runs off end of src"); + + if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len)) + return PyErr_Format(PyExc_OverflowError, "invalid dest size"); + if (dest_off > dest_size) + return PyErr_Format(PyExc_OverflowError, "region runs off end of dest"); + + size_t length; + if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n)) + return PyErr_Format(PyExc_OverflowError, "src_n overflows size_t"); + int rc = mincore((void *)(src + src_off), src_n, + (BUP_MINCORE_BUF_TYPE *) (dest.buf + dest_off)); + if (rc != 0) return PyErr_SetFromErrno(PyExc_OSError); - - off_t ci; - for(ci = 0; ci < chunk_count; ci++) - { - const off_t pos = chunk_size * ci; - const size_t msize = chunk_size < st.st_size - pos ? chunk_size : st.st_size - pos; - void *m = mmap(NULL, msize, PROT_NONE, MAP_SHARED, fd, pos); - if (m == MAP_FAILED) - { - free(result); - return PyErr_SetFromErrno(PyExc_OSError); - } - rc = mincore(m, msize, &result[ci * pages_per_chunk]); - if (rc != 0) - { - const int errno_stash = errno; - rc = munmap(m, msize); - if (rc != 0) - { - char buf[512]; - char *msg = strerror_r(errno, buf, 512); - if (rc != 0) - fprintf(stderr, "%s:%d: strerror_r failed (%d)\n", - __FILE__, __LINE__, rc < 0 ? errno : rc); - else - fprintf(stderr, - "%s:%d: munmap failed after mincore failed (%s)\n", - __FILE__, __LINE__, msg); - } - free(result); - errno = errno_stash; - return PyErr_SetFromErrno(PyExc_OSError); - } - rc = munmap(m, msize); - if (rc != 0) - { - free(result); - return PyErr_SetFromErrno(PyExc_OSError); - } - } - PyObject *py_result = Py_BuildValue("s#", result, page_count); - free(result); - return py_result; + return Py_BuildValue("O", Py_None); } -#endif /* def HAVE_MINCORE */ +#endif /* def BUP_MINCORE_BUF_TYPE */ static PyMethodDef helper_methods[] = { @@ -1481,9 +1440,10 @@ static PyMethodDef helper_methods[] = { { "localtime", bup_localtime, METH_VARARGS, "Return struct_time elements plus the timezone offset and name." }, #endif -#ifdef HAVE_MINCORE - { "fmincore", bup_fmincore, METH_VARARGS, - "Return mincore() information for the provided file descriptor." }, +#ifdef BUP_MINCORE_BUF_TYPE + { "mincore", bup_mincore, METH_VARARGS, + "For mincore(src, src_n, src_off, dest, dest_off)" + " call the system mincore(src + src_off, src_n, &dest[dest_off])." }, #endif { NULL, NULL, 0, NULL }, // sentinel }; @@ -1530,6 +1490,14 @@ PyMODINIT_FUNC init_helpers(void) Py_DECREF(value); } #endif +#ifdef BUP_HAVE_MINCORE_INCORE + { + PyObject *value; + value = INTEGER_TO_PY(MINCORE_INCORE); + PyObject_SetAttrString(m, "MINCORE_INCORE", value); + Py_DECREF(value); + } +#endif #pragma clang diagnostic pop // ignored "-Wtautological-compare" e = getenv("BUP_FORCE_TTY"); diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index 5df627f..9631605 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -1,12 +1,9 @@ import math, os -from bup import _helpers +from bup import _helpers, helpers from bup.helpers import sc_page_size -try: - _fmincore = _helpers.fmincore -except AttributeError, e: - _fmincore = None +_fmincore = getattr(helpers, 'fmincore', None) BLOB_MAX = 8192*4 # 8192 is the "typical" blob size for bupsplit BLOB_READ_SIZE = 1024*1024 @@ -64,7 +61,7 @@ def _nonresident_page_regions(status_bytes, max_region_len=None): assert(max_region_len is None or max_region_len > 0) start = None for i, x in enumerate(status_bytes): - in_core = ord(x) & 1 + in_core = x & helpers.MINCORE_INCORE if start is None: if not in_core: start = i @@ -100,7 +97,7 @@ def readfile_iter(files, progress=None): if _fmincore and hasattr(f, 'fileno'): fd = f.fileno() max_chunk = max(1, (8 * 1024 * 1024) / sc_page_size) - rpr = _nonresident_page_regions(_helpers.fmincore(fd), max_chunk) + rpr = _nonresident_page_regions(_fmincore(fd), max_chunk) rstart, rlen = next(rpr, (None, None)) while 1: if progress: diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index 4cbd2dc..811a8f1 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -719,6 +719,42 @@ def mmap_readwrite_private(f, sz = 0, close=True): close) +_mincore = getattr(_helpers, 'mincore', None) +if _mincore: + # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined. + MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1) + + _fmincore_chunk_size = None + def _set_fmincore_chunk_size(): + global _fmincore_chunk_size + pref_chunk_size = 64 * 1024 * 1024 + chunk_size = sc_page_size + if (sc_page_size < pref_chunk_size): + chunk_size = sc_page_size * (pref_chunk_size / sc_page_size) + _fmincore_chunk_size = chunk_size + + def fmincore(fd): + """Return the mincore() data for fd as a bytearray whose values can be + tested via MINCORE_INCORE""" + st = os.fstat(fd) + if (st.st_size == 0): + return bytearray(0) + if not _fmincore_chunk_size: + _set_fmincore_chunk_size() + pages_per_chunk = _fmincore_chunk_size / sc_page_size; + page_count = (st.st_size + sc_page_size - 1) / sc_page_size; + chunk_count = page_count / _fmincore_chunk_size + if chunk_count < 1: + chunk_count = 1 + result = bytearray(page_count) + for ci in xrange(chunk_count): + pos = _fmincore_chunk_size * ci; + msize = min(_fmincore_chunk_size, st.st_size - pos) + m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) + _mincore(m, msize, 0, result, ci * pages_per_chunk); + return result + + def parse_timestamp(epoch_str): """Return the number of nanoseconds since the epoch that are described by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed, diff --git a/lib/bup/t/thashsplit.py b/lib/bup/t/thashsplit.py index 7ddcf34..1b69cc2 100644 --- a/lib/bup/t/thashsplit.py +++ b/lib/bup/t/thashsplit.py @@ -6,8 +6,7 @@ from bup import hashsplit, _helpers, helpers def nr_regions(x, max_count=None): - return list(hashsplit._nonresident_page_regions(''.join(map(chr, x)), - max_count)) + return list(hashsplit._nonresident_page_regions(bytearray(x), max_count)) @wvtest def test_nonresident_page_regions(): -- 2.39.2