X-Git-Url: https://arthur.barton.de/gitweb/?p=bup.git;a=blobdiff_plain;f=lib%2Fbup%2F_helpers.c;h=30c627fa32c3114b4ed51c48807577afca11d9a3;hp=808c728642ae2d0669ecebdb256ec85a294f7592;hb=f8e97d2c9b9c0d03cd5cc44d7c245bd0349d70c3;hpb=97213b1d4d53eaac738d6be2d6dad5315b217e7b diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c index 808c728..30c627f 100644 --- a/lib/bup/_helpers.c +++ b/lib/bup/_helpers.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,9 @@ #define FS_NOCOW_FL 0 #endif + +typedef unsigned char byte; + static int istty2 = 0; @@ -235,16 +239,6 @@ static void unpythonize_argv(void) #endif // not __WIN32__ or __CYGWIN__ -static unsigned long long count_leading_zeros(const unsigned char * const buf, - unsigned long long len) -{ - const unsigned char *cur = buf; - while(len-- && *cur == 0) - cur++; - return cur - buf; -} - - static int write_all(int fd, const void *buf, const size_t count) { size_t written = 0; @@ -270,9 +264,10 @@ static int uadd(unsigned long long *dest, return 1; } + static PyObject *append_sparse_region(const int fd, unsigned long long n) { - while(n) + while (n) { off_t new_off; if (!INTEGRAL_ASSIGNMENT_FITS(&new_off, n)) @@ -286,6 +281,125 @@ static PyObject *append_sparse_region(const int fd, unsigned long long n) } +static PyObject *record_sparse_zeros(unsigned long long *new_pending, + const int fd, + unsigned long long prev_pending, + const unsigned long long count) +{ + // Add count additional sparse zeros to prev_pending and store the + // result in new_pending, or if the total won't fit in + // new_pending, write some of the zeros to fd sparsely, and store + // the remaining sum in new_pending. + if (!uadd(new_pending, prev_pending, count)) + { + PyObject *err = append_sparse_region(fd, prev_pending); + if (err != NULL) + return err; + *new_pending = count; + } + return NULL; +} + + +static const byte * find_not_zero(const byte * const start, + const byte * const end) +{ + // Return a pointer to first non-zero byte between start and end, + // or end if there isn't one. + assert(start <= end); + const unsigned char *cur = start; + while (cur < end && *cur == 0) + cur++; + return cur; +} + + +static const byte * const find_trailing_zeros(const byte * const start, + const byte * const end) +{ + // Return a pointer to the start of any trailing run of zeros, or + // end if there isn't one. + assert(start <= end); + if (start == end) + return end; + const byte * cur = end; + while (cur > start && *--cur == 0) {} + if (*cur == 0) + return cur; + else + return cur + 1; +} + + +static const byte *find_non_sparse_end(const byte * const start, + const byte * const end, + const unsigned long long min_len) +{ + // Return the first pointer to a min_len sparse block in [start, + // end) if there is one, otherwise a pointer to the start of any + // trailing run of zeros. If there are no trailing zeros, return + // end. + if (start == end) + return end; + assert(start < end); + assert(min_len); + // Probe in min_len jumps, searching backward from the jump + // destination for a non-zero byte. If such a byte is found, move + // just past it and try again. + const byte *candidate = start; + // End of any run of zeros, starting at candidate, that we've already seen + const byte *end_of_known_zeros = candidate; + while (end - candidate >= min_len) // Handle all min_len candidate blocks + { + const byte * const probe_end = candidate + min_len; + const byte * const trailing_zeros = + find_trailing_zeros(end_of_known_zeros, probe_end); + if (trailing_zeros == probe_end) + end_of_known_zeros = candidate = probe_end; + else if (trailing_zeros == end_of_known_zeros) + { + assert(candidate >= start); + assert(candidate <= end); + assert(*candidate == 0); + return candidate; + } + else + { + candidate = trailing_zeros; + end_of_known_zeros = probe_end; + } + } + + if (candidate == end) + return end; + + // No min_len sparse run found, search backward from end + const byte * const trailing_zeros = find_trailing_zeros(end_of_known_zeros, + end); + + if (trailing_zeros == end_of_known_zeros) + { + assert(candidate >= start); + assert(candidate < end); + assert(*candidate == 0); + assert(end - candidate < min_len); + return candidate; + } + + if (trailing_zeros == end) + { + assert(*(end - 1) != 0); + return end; + } + + assert(end - trailing_zeros < min_len); + assert(trailing_zeros >= start); + assert(trailing_zeros < end); + assert(*trailing_zeros == 0); + return trailing_zeros; +} + + static PyObject *bup_write_sparsely(PyObject *self, PyObject *args) { int fd; @@ -306,61 +420,50 @@ static PyObject *bup_write_sparsely(PyObject *self, PyObject *args) if (!INTEGRAL_ASSIGNMENT_FITS(&buf_len, sbuf_len)) return PyErr_Format(PyExc_OverflowError, "buffer length too large"); - // The value of zeros_read indicates the number of zeros read from - // buf that haven't been accounted for yet (with respect to cur), - // while zeros indicates the total number of pending zeros, which - // could be larger in the first iteration if prev_sparse_len - // wasn't zero. - int rc; - unsigned long long unexamined = buf_len; - unsigned char *block_start = buf, *cur = buf; - unsigned long long zeros, zeros_read = count_leading_zeros(cur, unexamined); - assert(zeros_read <= unexamined); - unexamined -= zeros_read; - if (!uadd(&zeros, prev_sparse_len, zeros_read)) + const byte * block = buf; // Start of pending block + const byte * const end = buf + buf_len; + unsigned long long zeros = prev_sparse_len; + while (1) { - PyObject *err = append_sparse_region(fd, prev_sparse_len); - if (err != NULL) - return err; - zeros = zeros_read; - } + assert(block <= end); + if (block == end) + return PyLong_FromUnsignedLongLong(zeros); - while(unexamined) - { - if (zeros < min_sparse_len) - cur += zeros_read; - else + if (*block != 0) { - rc = write_all(fd, block_start, cur - block_start); - if (rc) - return PyErr_SetFromErrno(PyExc_IOError); + // Look for the end of block, i.e. the next sparse run of + // at least min_sparse_len zeros, or the end of the + // buffer. + const byte * const probe = find_non_sparse_end(block + 1, end, + min_sparse_len); + // Either at end of block, or end of non-sparse; write pending data PyObject *err = append_sparse_region(fd, zeros); if (err != NULL) return err; - cur += zeros_read; - block_start = cur; - } - // Pending zeros have ether been made sparse, or are going to - // be rolled into the next non-sparse block since we know we - // now have at least one unexamined non-zero byte. - assert(unexamined && *cur != 0); - zeros = zeros_read = 0; - while (unexamined && *cur != 0) - { - cur++; unexamined--; + int rc = write_all(fd, block, probe - block); + if (rc) + return PyErr_SetFromErrno(PyExc_IOError); + + if (end - probe < min_sparse_len) + zeros = end - probe; + else + zeros = min_sparse_len; + block = probe + zeros; } - if (unexamined) + else // *block == 0 { - zeros_read = count_leading_zeros(cur, unexamined); - assert(zeros_read <= unexamined); - unexamined -= zeros_read; - zeros = zeros_read; + // Should be in the first loop iteration, a sparse run of + // zeros, or nearly at the end of the block (within + // min_sparse_len). + const byte * const zeros_end = find_not_zero(block, end); + PyObject *err = record_sparse_zeros(&zeros, fd, + zeros, zeros_end - block); + if (err != NULL) + return err; + assert(block <= zeros_end); + block = zeros_end; } } - rc = write_all(fd, block_start, cur - block_start); - if (rc) - return PyErr_SetFromErrno(PyExc_IOError); - return PyLong_FromUnsignedLongLong(zeros); } @@ -1463,11 +1566,14 @@ PyMODINIT_FUNC init_helpers(void) assert(sizeof(PY_LONG_LONG) <= sizeof(long long)); assert(sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long)); - if (sizeof(off_t) < sizeof(int)) + // Originally required by append_sparse_region() { - // Originally required by append_sparse_region(). - fprintf(stderr, "sizeof(off_t) < sizeof(int); please report.\n"); - exit(1); + off_t probe; + if (!INTEGRAL_ASSIGNMENT_FITS(&probe, INT_MAX)) + { + fprintf(stderr, "off_t can't hold INT_MAX; please report.\n"); + exit(1); + } } char *e;