X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2F_helpers.c;h=f31fddb4c65e5e41604e390e35dd92f5cfa8e55a;hb=0fe92820b73bcea573dc4bc3993e7bdfaf044176;hp=cfdbc8afcfbc11171db504ee8dcd0ea6c8320ddf;hpb=b062252a5bca9b64d7b3034b6fd181424641f61e;p=bup.git diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c index cfdbc8a..f31fddb 100644 --- a/lib/bup/_helpers.c +++ b/lib/bup/_helpers.c @@ -11,11 +11,15 @@ #include #include #include +#include #include #include #include -#include +#include +#ifdef HAVE_SYS_MMAN_H +#include +#endif #ifdef HAVE_SYS_TYPES_H #include #endif @@ -56,7 +60,22 @@ #define FS_NOCOW_FL 0 #endif -static int istty2 = 0; + +typedef unsigned char byte; + + +typedef struct { + int istty2; +} state_t; + +#if PY_MAJOR_VERSION < 3 +static state_t state; +# define get_state(x) (&state) +# define buf_argf "s" +#else +# define get_state(x) ((state_t *) PyModule_GetState(x)) +# define buf_argf "y" +#endif // PY_MAJOR_VERSION >= 3 #ifndef htonll @@ -88,6 +107,8 @@ static uint64_t htonll(uint64_t value) (((x) >= 0) ? PyLong_FromUnsignedLongLong(x) : PyLong_FromLongLong(x)) + +#if PY_MAJOR_VERSION < 3 static int bup_ulong_from_pyint(unsigned long *x, PyObject *py, const char *name) { @@ -108,12 +129,15 @@ static int bup_ulong_from_pyint(unsigned long *x, PyObject *py, *x = tmp; return 1; } +#endif static int bup_ulong_from_py(unsigned long *x, PyObject *py, const char *name) { +#if PY_MAJOR_VERSION < 3 if (PyInt_Check(py)) return bup_ulong_from_pyint(x, py, name); +#endif if (!PyLong_Check(py)) { @@ -152,6 +176,7 @@ static int bup_uint_from_py(unsigned int *x, PyObject *py, const char *name) static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py, const char *name) { +#if PY_MAJOR_VERSION < 3 if (PyInt_Check(py)) { unsigned long tmp; @@ -162,6 +187,7 @@ static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py, } return 0; } +#endif if (!PyLong_Check(py)) { @@ -182,6 +208,27 @@ static int bup_ullong_from_py(unsigned PY_LONG_LONG *x, PyObject *py, } +static PyObject *bup_bytescmp(PyObject *self, PyObject *args) +{ + PyObject *py_s1, *py_s2; // This is really a PyBytes/PyString + if (!PyArg_ParseTuple(args, "SS", &py_s1, &py_s2)) + return NULL; + char *s1, *s2; + Py_ssize_t s1_len, s2_len; + if (PyBytes_AsStringAndSize(py_s1, &s1, &s1_len) == -1) + return NULL; + if (PyBytes_AsStringAndSize(py_s2, &s2, &s2_len) == -1) + return NULL; + const Py_ssize_t n = (s1_len < s2_len) ? s1_len : s2_len; + const int cmp = memcmp(s1, s2, n); + if (cmp != 0) + return PyLong_FromLong(cmp); + if (s1_len == s2_len) + return PyLong_FromLong(0);; + return PyLong_FromLong((s1_len < s2_len) ? -1 : 1); +} + + // Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV... #if __WIN32__ || __CYGWIN__ @@ -233,16 +280,6 @@ static void unpythonize_argv(void) #endif // not __WIN32__ or __CYGWIN__ -static unsigned long long count_leading_zeros(const unsigned char * const buf, - unsigned long long len) -{ - const unsigned char *cur = buf; - while(len-- && *cur == 0) - cur++; - return cur - buf; -} - - static int write_all(int fd, const void *buf, const size_t count) { size_t written = 0; @@ -268,9 +305,10 @@ static int uadd(unsigned long long *dest, return 1; } + static PyObject *append_sparse_region(const int fd, unsigned long long n) { - while(n) + while (n) { off_t new_off; if (!INTEGRAL_ASSIGNMENT_FITS(&new_off, n)) @@ -284,6 +322,124 @@ static PyObject *append_sparse_region(const int fd, unsigned long long n) } +static PyObject *record_sparse_zeros(unsigned long long *new_pending, + const int fd, + unsigned long long prev_pending, + const unsigned long long count) +{ + // Add count additional sparse zeros to prev_pending and store the + // result in new_pending, or if the total won't fit in + // new_pending, write some of the zeros to fd sparsely, and store + // the remaining sum in new_pending. + if (!uadd(new_pending, prev_pending, count)) + { + PyObject *err = append_sparse_region(fd, prev_pending); + if (err != NULL) + return err; + *new_pending = count; + } + return NULL; +} + + +static byte* find_not_zero(const byte * const start, const byte * const end) +{ + // Return a pointer to first non-zero byte between start and end, + // or end if there isn't one. + assert(start <= end); + const unsigned char *cur = start; + while (cur < end && *cur == 0) + cur++; + return (byte *) cur; +} + + +static byte* find_trailing_zeros(const byte * const start, + const byte * const end) +{ + // Return a pointer to the start of any trailing run of zeros, or + // end if there isn't one. + assert(start <= end); + if (start == end) + return (byte *) end; + const byte * cur = end; + while (cur > start && *--cur == 0) {} + if (*cur == 0) + return (byte *) cur; + else + return (byte *) (cur + 1); +} + + +static byte *find_non_sparse_end(const byte * const start, + const byte * const end, + const unsigned long long min_len) +{ + // Return the first pointer to a min_len sparse block in [start, + // end) if there is one, otherwise a pointer to the start of any + // trailing run of zeros. If there are no trailing zeros, return + // end. + if (start == end) + return (byte *) end; + assert(start < end); + assert(min_len); + // Probe in min_len jumps, searching backward from the jump + // destination for a non-zero byte. If such a byte is found, move + // just past it and try again. + const byte *candidate = start; + // End of any run of zeros, starting at candidate, that we've already seen + const byte *end_of_known_zeros = candidate; + while (end - candidate >= min_len) // Handle all min_len candidate blocks + { + const byte * const probe_end = candidate + min_len; + const byte * const trailing_zeros = + find_trailing_zeros(end_of_known_zeros, probe_end); + if (trailing_zeros == probe_end) + end_of_known_zeros = candidate = probe_end; + else if (trailing_zeros == end_of_known_zeros) + { + assert(candidate >= start); + assert(candidate <= end); + assert(*candidate == 0); + return (byte *) candidate; + } + else + { + candidate = trailing_zeros; + end_of_known_zeros = probe_end; + } + } + + if (candidate == end) + return (byte *) end; + + // No min_len sparse run found, search backward from end + const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros, + end); + + if (trailing_zeros == end_of_known_zeros) + { + assert(candidate >= start); + assert(candidate < end); + assert(*candidate == 0); + assert(end - candidate < min_len); + return (byte *) candidate; + } + + if (trailing_zeros == end) + { + assert(*(end - 1) != 0); + return (byte *) end; + } + + assert(end - trailing_zeros < min_len); + assert(trailing_zeros >= start); + assert(trailing_zeros < end); + assert(*trailing_zeros == 0); + return (byte *) trailing_zeros; +} + + static PyObject *bup_write_sparsely(PyObject *self, PyObject *args) { int fd; @@ -304,61 +460,50 @@ static PyObject *bup_write_sparsely(PyObject *self, PyObject *args) if (!INTEGRAL_ASSIGNMENT_FITS(&buf_len, sbuf_len)) return PyErr_Format(PyExc_OverflowError, "buffer length too large"); - // The value of zeros_read indicates the number of zeros read from - // buf that haven't been accounted for yet (with respect to cur), - // while zeros indicates the total number of pending zeros, which - // could be larger in the first iteration if prev_sparse_len - // wasn't zero. - int rc; - unsigned long long unexamined = buf_len; - unsigned char *block_start = buf, *cur = buf; - unsigned long long zeros, zeros_read = count_leading_zeros(cur, unexamined); - assert(zeros_read <= unexamined); - unexamined -= zeros_read; - if (!uadd(&zeros, prev_sparse_len, zeros_read)) + const byte * block = buf; // Start of pending block + const byte * const end = buf + buf_len; + unsigned long long zeros = prev_sparse_len; + while (1) { - PyObject *err = append_sparse_region(fd, prev_sparse_len); - if (err != NULL) - return err; - zeros = zeros_read; - } + assert(block <= end); + if (block == end) + return PyLong_FromUnsignedLongLong(zeros); - while(unexamined) - { - if (zeros < min_sparse_len) - cur += zeros_read; - else + if (*block != 0) { - rc = write_all(fd, block_start, cur - block_start); - if (rc) - return PyErr_SetFromErrno(PyExc_IOError); + // Look for the end of block, i.e. the next sparse run of + // at least min_sparse_len zeros, or the end of the + // buffer. + const byte * const probe = find_non_sparse_end(block + 1, end, + min_sparse_len); + // Either at end of block, or end of non-sparse; write pending data PyObject *err = append_sparse_region(fd, zeros); if (err != NULL) return err; - cur += zeros_read; - block_start = cur; - } - // Pending zeros have ether been made sparse, or are going to - // be rolled into the next non-sparse block since we know we - // now have at least one unexamined non-zero byte. - assert(unexamined && *cur != 0); - zeros = zeros_read = 0; - while (unexamined && *cur != 0) - { - cur++; unexamined--; + int rc = write_all(fd, block, probe - block); + if (rc) + return PyErr_SetFromErrno(PyExc_IOError); + + if (end - probe < min_sparse_len) + zeros = end - probe; + else + zeros = min_sparse_len; + block = probe + zeros; } - if (unexamined) + else // *block == 0 { - zeros_read = count_leading_zeros(cur, unexamined); - assert(zeros_read <= unexamined); - unexamined -= zeros_read; - zeros = zeros_read; + // Should be in the first loop iteration, a sparse run of + // zeros, or nearly at the end of the block (within + // min_sparse_len). + const byte * const zeros_end = find_not_zero(block, end); + PyObject *err = record_sparse_zeros(&zeros, fd, + zeros, zeros_end - block); + if (err != NULL) + return err; + assert(block <= zeros_end); + block = zeros_end; } } - rc = write_all(fd, block_start, cur - block_start); - if (rc) - return PyErr_SetFromErrno(PyExc_IOError); - return PyLong_FromUnsignedLongLong(zeros); } @@ -716,7 +861,7 @@ static PyObject *merge_into(PyObject *self, PyObject *args) { struct idx *idx; uint32_t new_prefix; - if (count % 102424 == 0 && istty2) + if (count % 102424 == 0 && get_state(self)->istty2) fprintf(stderr, "midx: writing %.2f%% (%d/%d)\r", count*100.0/total, count, total); idx = idxs[last_i]; @@ -1033,12 +1178,12 @@ static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args) // The extents flag can't be removed, so don't (see chattr(1) and chattr.c). orig_attr = 0; // Handle int/long mismatch (see above) rc = ioctl(fd, FS_IOC_GETFLAGS, &orig_attr); - assert(orig_attr <= UINT_MAX); // Kernel type is actually int if (rc == -1) { close(fd); return PyErr_SetFromErrnoWithFilename(PyExc_OSError, path); } + assert(orig_attr <= UINT_MAX); // Kernel type is actually int attr |= ((unsigned int) orig_attr) & FS_EXTENT_FL; rc = ioctl(fd, FS_IOC_SETFLAGS, &attr); @@ -1319,6 +1464,7 @@ static PyObject *bup_localtime(PyObject *self, PyObject *args) return PyErr_Format(PyExc_OverflowError, "time value too large"); struct tm tm; + tzset(); if(localtime_r(&ttime, &tm) == NULL) return PyErr_SetFromErrno(PyExc_OSError); @@ -1332,77 +1478,64 @@ static PyObject *bup_localtime(PyObject *self, PyObject *args) #endif /* def HAVE_TM_TM_GMTOFF */ -#ifdef HAVE_MINCORE -static PyObject *bup_fmincore(PyObject *self, PyObject *args) +#ifdef BUP_MINCORE_BUF_TYPE +static PyObject *bup_mincore(PyObject *self, PyObject *args) { - int fd, rc; - if (!PyArg_ParseTuple(args, "i", &fd)) + Py_buffer src, dest; + PyObject *py_src_n, *py_src_off, *py_dest_off; + + if (!PyArg_ParseTuple(args, buf_argf "*OOw*O", + &src, &py_src_n, &py_src_off, + &dest, &py_dest_off)) return NULL; - struct stat st; - rc = fstat(fd, &st); - if (rc != 0) - return PyErr_SetFromErrno(PyExc_OSError); + PyObject *result = NULL; - if (st.st_size == 0) - return Py_BuildValue("s", ""); - - const size_t page_size = (size_t) sysconf (_SC_PAGESIZE); - const off_t pref_chunk_size = 64 * 1024 * 1024; - off_t chunk_size = page_size; - if (page_size < pref_chunk_size) - chunk_size = page_size * (pref_chunk_size / page_size); - const off_t pages_per_chunk = chunk_size / page_size; - const off_t page_count = (st.st_size + page_size - 1) / page_size; - const off_t chunk_count = page_count / chunk_size > 0 ? page_count / chunk_size : 1; - unsigned char * const result = malloc(page_count); - if (result == NULL) - return PyErr_SetFromErrno(PyExc_OSError); + unsigned long long src_n, src_off, dest_off; + if (!(bup_ullong_from_py(&src_n, py_src_n, "src_n") + && bup_ullong_from_py(&src_off, py_src_off, "src_off") + && bup_ullong_from_py(&dest_off, py_dest_off, "dest_off"))) + goto clean_and_return; - off_t ci; - for(ci = 0; ci < chunk_count; ci++) - { - const off_t pos = chunk_size * ci; - const size_t msize = chunk_size < st.st_size - pos ? chunk_size : st.st_size - pos; - void *m = mmap(NULL, msize, PROT_NONE, MAP_SHARED, fd, pos); - if (m == MAP_FAILED) - { - free(result); - return PyErr_SetFromErrno(PyExc_OSError); - } - rc = mincore(m, msize, &result[ci * pages_per_chunk]); - if (rc != 0) - { - const int errno_stash = errno; - rc = munmap(m, msize); - if (rc != 0) - { - char buf[512]; - char *msg = strerror_r(errno, buf, 512); - if (rc != 0) - fprintf(stderr, "%s:%d: strerror_r failed (%d)\n", - __FILE__, __LINE__, rc < 0 ? errno : rc); - else - fprintf(stderr, - "%s:%d: munmap failed after mincore failed (%s)\n", - __FILE__, __LINE__, msg); - } - free(result); - errno = errno_stash; - return PyErr_SetFromErrno(PyExc_OSError); - } - rc = munmap(m, msize); - if (rc != 0) - { - free(result); - return PyErr_SetFromErrno(PyExc_OSError); - } + unsigned long long src_region_end; + if (!uadd(&src_region_end, src_off, src_n)) { + result = PyErr_Format(PyExc_OverflowError, "(src_off + src_n) too large"); + goto clean_and_return; + } + if (src_region_end > src.len) { + result = PyErr_Format(PyExc_OverflowError, "region runs off end of src"); + goto clean_and_return; + } + + unsigned long long dest_size; + if (!INTEGRAL_ASSIGNMENT_FITS(&dest_size, dest.len)) { + result = PyErr_Format(PyExc_OverflowError, "invalid dest size"); + goto clean_and_return; + } + if (dest_off > dest_size) { + result = PyErr_Format(PyExc_OverflowError, "region runs off end of dest"); + goto clean_and_return; + } + + size_t length; + if (!INTEGRAL_ASSIGNMENT_FITS(&length, src_n)) { + result = PyErr_Format(PyExc_OverflowError, "src_n overflows size_t"); + goto clean_and_return; + } + int rc = mincore((void *)(src.buf + src_off), src_n, + (BUP_MINCORE_BUF_TYPE *) (dest.buf + dest_off)); + if (rc != 0) { + result = PyErr_SetFromErrno(PyExc_OSError); + goto clean_and_return; } - PyObject *py_result = Py_BuildValue("s#", result, page_count); - free(result); - return py_result; + result = Py_BuildValue("O", Py_None); + + clean_and_return: + PyBuffer_Release(&src); + PyBuffer_Release(&dest); + return result; } -#endif /* def HAVE_MINCORE */ +#endif /* def BUP_MINCORE_BUF_TYPE */ static PyMethodDef helper_methods[] = { @@ -1467,19 +1600,22 @@ static PyMethodDef helper_methods[] = { { "localtime", bup_localtime, METH_VARARGS, "Return struct_time elements plus the timezone offset and name." }, #endif -#ifdef HAVE_MINCORE - { "fmincore", bup_fmincore, METH_VARARGS, - "Return mincore() information for the provided file descriptor." }, + { "bytescmp", bup_bytescmp, METH_VARARGS, + "Return a negative value if x < y, zero if equal, positive otherwise."}, +#ifdef BUP_MINCORE_BUF_TYPE + { "mincore", bup_mincore, METH_VARARGS, + "For mincore(src, src_n, src_off, dest, dest_off)" + " call the system mincore(src + src_off, src_n, &dest[dest_off])." }, #endif { NULL, NULL, 0, NULL }, // sentinel }; - -PyMODINIT_FUNC init_helpers(void) +static int setup_module(PyObject *m) { - // FIXME: migrate these tests to configure. Check against the - // type we're going to use when passing to python. Other stat - // types are tested at runtime. + // FIXME: migrate these tests to configure, or at least don't + // possibly crash the whole application. Check against the type + // we're going to use when passing to python. Other stat types + // are tested at runtime. assert(sizeof(ino_t) <= sizeof(unsigned PY_LONG_LONG)); assert(sizeof(off_t) <= sizeof(PY_LONG_LONG)); assert(sizeof(blksize_t) <= sizeof(PY_LONG_LONG)); @@ -1488,20 +1624,28 @@ PyMODINIT_FUNC init_helpers(void) assert(sizeof(PY_LONG_LONG) <= sizeof(long long)); assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long)); - if (sizeof(off_t) < sizeof(int)) + // Originally required by append_sparse_region() { - // Originally required by append_sparse_region(). - fprintf(stderr, "sizeof(off_t) < sizeof(int); please report.\n"); - exit(1); + off_t probe; + if (!INTEGRAL_ASSIGNMENT_FITS(&probe, INT_MAX)) + { + fprintf(stderr, "off_t can't hold INT_MAX; please report.\n"); + exit(1); + } } char *e; - PyObject *m = Py_InitModule("_helpers", helper_methods); - if (m == NULL) - return; - #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wtautological-compare" // For INTEGER_TO_PY(). + { + PyObject *value; + value = INTEGER_TO_PY(INT_MAX); + PyObject_SetAttrString(m, "INT_MAX", value); + Py_DECREF(value); + value = INTEGER_TO_PY(UINT_MAX); + PyObject_SetAttrString(m, "UINT_MAX", value); + Py_DECREF(value); + } #ifdef HAVE_UTIMENSAT { PyObject *value; @@ -1516,21 +1660,63 @@ PyMODINIT_FUNC init_helpers(void) Py_DECREF(value); } #endif +#ifdef BUP_HAVE_MINCORE_INCORE { PyObject *value; - const long arg_max = sysconf(_SC_ARG_MAX); - if (arg_max == -1) - { - fprintf(stderr, "Cannot find SC_ARG_MAX, please report a bug.\n"); - exit(1); - } - value = INTEGER_TO_PY(arg_max); - PyObject_SetAttrString(m, "SC_ARG_MAX", value); + value = INTEGER_TO_PY(MINCORE_INCORE); + PyObject_SetAttrString(m, "MINCORE_INCORE", value); Py_DECREF(value); } +#endif #pragma clang diagnostic pop // ignored "-Wtautological-compare" e = getenv("BUP_FORCE_TTY"); - istty2 = isatty(2) || (atoi(e ? e : "0") & 2); + get_state(m)->istty2 = isatty(2) || (atoi(e ? e : "0") & 2); unpythonize_argv(); + return 1; +} + + +#if PY_MAJOR_VERSION < 3 + +PyMODINIT_FUNC init_helpers(void) +{ + PyObject *m = Py_InitModule("_helpers", helper_methods); + if (m == NULL) + return; + + if (!setup_module(m)) + { + Py_DECREF(m); + return; + } +} + +# else // PY_MAJOR_VERSION >= 3 + +static struct PyModuleDef helpers_def = { + PyModuleDef_HEAD_INIT, + "_helpers", + NULL, + sizeof(state_t), + helper_methods, + NULL, + NULL, // helpers_traverse, + NULL, // helpers_clear, + NULL +}; + +PyMODINIT_FUNC PyInit__helpers(void) +{ + PyObject *module = PyModule_Create(&helpers_def); + if (module == NULL) + return NULL; + if (!setup_module(module)) + { + Py_DECREF(module); + return NULL; + } + return module; } + +#endif // PY_MAJOR_VERSION >= 3