From d254bde8d204a02fb3f675dad7051fb2e5d58381 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 30 Jul 2010 20:23:08 -0400 Subject: [PATCH] Rename _hashsplit.so to _faster.so, and move bupsplit into its own source file. A lot of stuff in _hashsplit.c wasn't actually about hashsplitting; it was just a catch-all for all our C accelerator functions. Now the module name reflects that. Also move the bupsplit functions into their own non-python-dependent C source file so they can be used as part of other projects. Signed-off-by: Avery Pennarun --- DESIGN | 2 +- Makefile | 7 +- cmd/margin-cmd.py | 4 +- cmd/random-cmd.py | 4 +- lib/bup/{_hashsplit.c => _faster.c} | 125 ++-------------------------- lib/bup/bupsplit.c | 122 +++++++++++++++++++++++++++ lib/bup/bupsplit.h | 20 +++++ lib/bup/csetup.py | 8 +- lib/bup/hashsplit.py | 10 +-- lib/bup/t/thashsplit.py | 4 +- 10 files changed, 169 insertions(+), 137 deletions(-) rename lib/bup/{_hashsplit.c => _faster.c} (58%) create mode 100644 lib/bup/bupsplit.c create mode 100644 lib/bup/bupsplit.h diff --git a/DESIGN b/DESIGN index 00a56e4..c0d9fb7 100644 --- a/DESIGN +++ b/DESIGN @@ -144,7 +144,7 @@ we need your help! Avery's out of control! Join our mailing list! Please! Save us! ... oh boy, I sure hope he doesn't read this) In any case, stupidsum, although stupid, seems to do pretty well at its job. -You can find it in _hashsplit.c. Basically, it converts the last 32 bytes +You can find it in bupsplit.c. Basically, it converts the last 32 bytes of the file into a 32-bit integer. What we then do is take the lowest 13 bits of the checksum, and if they're all 1's, we consider that to be the end of a chunk. This happens on average once every 2^13 = 8192 bytes, so the diff --git a/Makefile b/Makefile index a555114..47b03ea 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ default: all all: bup Documentation/all -bup: lib/bup/_version.py lib/bup/_hashsplit$(SOEXT) cmds +bup: lib/bup/_version.py lib/bup/_faster$(SOEXT) cmds Documentation/all: bup @@ -62,10 +62,11 @@ install: all %/clean: $(MAKE) -C $* clean -lib/bup/_hashsplit$(SOEXT): lib/bup/_hashsplit.c lib/bup/csetup.py +lib/bup/_faster$(SOEXT): \ + lib/bup/bupsplit.c lib/bup/_faster.c lib/bup/csetup.py @rm -f $@ cd lib/bup && $(PYTHON) csetup.py build - cp lib/bup/build/*/_hashsplit$(SOEXT) lib/bup/ + cp lib/bup/build/*/_faster$(SOEXT) lib/bup/ .PHONY: lib/bup/_version.py lib/bup/_version.py: diff --git a/cmd/margin-cmd.py b/cmd/margin-cmd.py index 2564027..21f711f 100755 --- a/cmd/margin-cmd.py +++ b/cmd/margin-cmd.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import sys -from bup import options, git, _hashsplit +from bup import options, git, _faster from bup.helpers import * @@ -23,7 +23,7 @@ for i in mi: if i == last: continue #assert(str(i) >= last) - pm = _hashsplit.bitmatch(last, i) + pm = _faster.bitmatch(last, i) longmatch = max(longmatch, pm) last = i print longmatch diff --git a/cmd/random-cmd.py b/cmd/random-cmd.py index 362a19c..46fd4f2 100755 --- a/cmd/random-cmd.py +++ b/cmd/random-cmd.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import sys, mmap -from bup import options, _hashsplit +from bup import options, _faster from bup.helpers import * optspec = """ @@ -21,7 +21,7 @@ handle_ctrl_c() if opt.force or (not os.isatty(1) and not atoi(os.environ.get('BUP_FORCE_TTY')) & 1): - _hashsplit.write_random(sys.stdout.fileno(), total, opt.seed) + _faster.write_random(sys.stdout.fileno(), total, opt.seed) else: log('error: not writing binary data to a terminal. Use -f to force.\n') sys.exit(1) diff --git a/lib/bup/_hashsplit.c b/lib/bup/_faster.c similarity index 58% rename from lib/bup/_hashsplit.c rename to lib/bup/_faster.c index 8f7e4d1..cfab4fb 100644 --- a/lib/bup/_hashsplit.c +++ b/lib/bup/_faster.c @@ -1,126 +1,15 @@ +#include "bupsplit.h" #include #include #include #include -#define BLOBBITS (13) -#define BLOBSIZE (1<s1 += add - drop; - r->s2 += r->s1 - (WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET)); -} - - -static void rollsum_init(Rollsum *r) -{ - r->s1 = WINDOWSIZE * ROLLSUM_CHAR_OFFSET; - r->s2 = WINDOWSIZE * (WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET; - r->wofs = 0; - memset(r->window, 0, WINDOWSIZE); -} - - -// For some reason, gcc 4.3 (at least) optimizes badly if find_ofs() -// is static and rollsum_roll is an inline function. Let's use a macro -// here instead to help out the optimizer. -#define rollsum_roll(r, ch) do { \ - rollsum_add((r), (r)->window[(r)->wofs], (ch)); \ - (r)->window[(r)->wofs] = (ch); \ - (r)->wofs = ((r)->wofs + 1) % WINDOWSIZE; \ -} while (0) - - -static uint32_t rollsum_digest(Rollsum *r) -{ - return (r->s1 << 16) | (r->s2 & 0xffff); -} - - -static uint32_t rollsum_sum(uint8_t *buf, size_t ofs, size_t len) -{ - size_t count; - Rollsum r; - rollsum_init(&r); - for (count = ofs; count < len; count++) - rollsum_roll(&r, buf[count]); - return rollsum_digest(&r); -} - - static PyObject *selftest(PyObject *self, PyObject *args) { - uint8_t buf[100000]; - uint32_t sum1a, sum1b, sum2a, sum2b, sum3a, sum3b; - unsigned count; - if (!PyArg_ParseTuple(args, "")) return NULL; - srandom(1); - for (count = 0; count < sizeof(buf); count++) - buf[count] = random(); - - sum1a = rollsum_sum(buf, 0, sizeof(buf)); - sum1b = rollsum_sum(buf, 1, sizeof(buf)); - sum2a = rollsum_sum(buf, sizeof(buf) - WINDOWSIZE*5/2, - sizeof(buf) - WINDOWSIZE); - sum2b = rollsum_sum(buf, 0, sizeof(buf) - WINDOWSIZE); - sum3a = rollsum_sum(buf, 0, WINDOWSIZE+3); - sum3b = rollsum_sum(buf, 3, WINDOWSIZE+3); - - fprintf(stderr, "sum1a = 0x%08x\n", sum1a); - fprintf(stderr, "sum1b = 0x%08x\n", sum1b); - fprintf(stderr, "sum2a = 0x%08x\n", sum2a); - fprintf(stderr, "sum2b = 0x%08x\n", sum2b); - fprintf(stderr, "sum3a = 0x%08x\n", sum3a); - fprintf(stderr, "sum3b = 0x%08x\n", sum3b); - - return Py_BuildValue("i", sum1a==sum1b && sum2a==sum2b && sum3a==sum3b); -} - - -static int find_ofs(const unsigned char *buf, int len, int *bits) -{ - Rollsum r; - int count; - - rollsum_init(&r); - for (count = 0; count < len; count++) - { - rollsum_roll(&r, buf[count]); - if ((r.s2 & (BLOBSIZE-1)) == ((~0) & (BLOBSIZE-1))) - { - if (bits) - { - unsigned rsum = rollsum_digest(&r); - *bits = BLOBBITS; - rsum >>= BLOBBITS; - for (*bits = BLOBBITS; (rsum >>= 1) & 1; (*bits)++) - ; - } - return count+1; - } - } - return 0; + return Py_BuildValue("i", !bupsplit_selftest()); } @@ -128,7 +17,7 @@ static PyObject *blobbits(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; - return Py_BuildValue("i", BLOBBITS); + return Py_BuildValue("i", BUP_BLOBBITS); } @@ -139,7 +28,7 @@ static PyObject *splitbuf(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "t#", &buf, &len)) return NULL; - out = find_ofs(buf, len, &bits); + out = bupsplit_find_ofs(buf, len, &bits); return Py_BuildValue("ii", out, bits); } @@ -267,7 +156,7 @@ static PyObject *fadvise_done(PyObject *self, PyObject *args) } -static PyMethodDef hashsplit_methods[] = { +static PyMethodDef faster_methods[] = { { "selftest", selftest, METH_VARARGS, "Check that the rolling checksum rolls correctly (for unit tests)." }, { "blobbits", blobbits, METH_VARARGS, @@ -285,7 +174,7 @@ static PyMethodDef hashsplit_methods[] = { { NULL, NULL, 0, NULL }, // sentinel }; -PyMODINIT_FUNC init_hashsplit(void) +PyMODINIT_FUNC init_faster(void) { - Py_InitModule("_hashsplit", hashsplit_methods); + Py_InitModule("_faster", faster_methods); } diff --git a/lib/bup/bupsplit.c b/lib/bup/bupsplit.c new file mode 100644 index 0000000..532619a --- /dev/null +++ b/lib/bup/bupsplit.c @@ -0,0 +1,122 @@ +#include "bupsplit.h" +#include +#include +#include +#include +#include + +// According to librsync/rollsum.h: +// "We should make this something other than zero to improve the +// checksum algorithm: tridge suggests a prime number." +// apenwarr: I unscientifically tried 0 and 7919, and they both ended up +// slightly worse than the librsync value of 31 for my arbitrary test data. +#define ROLLSUM_CHAR_OFFSET 31 + +typedef struct { + unsigned s1, s2; + uint8_t window[BUP_WINDOWSIZE]; + int wofs; +} Rollsum; + + +// These formulas are based on rollsum.h in the librsync project. +static void rollsum_add(Rollsum *r, uint8_t drop, uint8_t add) +{ + r->s1 += add - drop; + r->s2 += r->s1 - (BUP_WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET)); +} + + +static void rollsum_init(Rollsum *r) +{ + r->s1 = BUP_WINDOWSIZE * ROLLSUM_CHAR_OFFSET; + r->s2 = BUP_WINDOWSIZE * (BUP_WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET; + r->wofs = 0; + memset(r->window, 0, BUP_WINDOWSIZE); +} + + +// For some reason, gcc 4.3 (at least) optimizes badly if find_ofs() +// is static and rollsum_roll is an inline function. Let's use a macro +// here instead to help out the optimizer. +#define rollsum_roll(r, ch) do { \ + rollsum_add((r), (r)->window[(r)->wofs], (ch)); \ + (r)->window[(r)->wofs] = (ch); \ + (r)->wofs = ((r)->wofs + 1) % BUP_WINDOWSIZE; \ +} while (0) + + +static uint32_t rollsum_digest(Rollsum *r) +{ + return (r->s1 << 16) | (r->s2 & 0xffff); +} + + +static uint32_t rollsum_sum(uint8_t *buf, size_t ofs, size_t len) +{ + size_t count; + Rollsum r; + rollsum_init(&r); + for (count = ofs; count < len; count++) + rollsum_roll(&r, buf[count]); + return rollsum_digest(&r); +} + + +int bupsplit_find_ofs(const unsigned char *buf, int len, int *bits) +{ + Rollsum r; + int count; + + rollsum_init(&r); + for (count = 0; count < len; count++) + { + rollsum_roll(&r, buf[count]); + if ((r.s2 & (BUP_BLOBSIZE-1)) == ((~0) & (BUP_BLOBSIZE-1))) + { + if (bits) + { + unsigned rsum = rollsum_digest(&r); + *bits = BUP_BLOBBITS; + rsum >>= BUP_BLOBBITS; + for (*bits = BUP_BLOBBITS; (rsum >>= 1) & 1; (*bits)++) + ; + } + return count+1; + } + } + return 0; +} + + +#ifndef BUP_NO_SELFTEST + +int bupsplit_selftest() +{ + uint8_t buf[100000]; + uint32_t sum1a, sum1b, sum2a, sum2b, sum3a, sum3b; + unsigned count; + + srandom(1); + for (count = 0; count < sizeof(buf); count++) + buf[count] = random(); + + sum1a = rollsum_sum(buf, 0, sizeof(buf)); + sum1b = rollsum_sum(buf, 1, sizeof(buf)); + sum2a = rollsum_sum(buf, sizeof(buf) - BUP_WINDOWSIZE*5/2, + sizeof(buf) - BUP_WINDOWSIZE); + sum2b = rollsum_sum(buf, 0, sizeof(buf) - BUP_WINDOWSIZE); + sum3a = rollsum_sum(buf, 0, BUP_WINDOWSIZE+3); + sum3b = rollsum_sum(buf, 3, BUP_WINDOWSIZE+3); + + fprintf(stderr, "sum1a = 0x%08x\n", sum1a); + fprintf(stderr, "sum1b = 0x%08x\n", sum1b); + fprintf(stderr, "sum2a = 0x%08x\n", sum2a); + fprintf(stderr, "sum2b = 0x%08x\n", sum2b); + fprintf(stderr, "sum3a = 0x%08x\n", sum3a); + fprintf(stderr, "sum3b = 0x%08x\n", sum3b); + + return sum1a!=sum1b || sum2a!=sum2b || sum3a!=sum3b; +} + +#endif // !BUP_NO_SELFTEST diff --git a/lib/bup/bupsplit.h b/lib/bup/bupsplit.h new file mode 100644 index 0000000..2fb6eb7 --- /dev/null +++ b/lib/bup/bupsplit.h @@ -0,0 +1,20 @@ +#ifndef __BUPSPLIT_H +#define __BUPSPLIT_H + +#define BUP_BLOBBITS (13) +#define BUP_BLOBSIZE (1<= base_bits) @@ -163,7 +163,7 @@ def split_to_blob_or_tree(w, files): def open_noatime(name): - fd = _hashsplit.open_noatime(name) + fd = _faster.open_noatime(name) try: return os.fdopen(fd, 'rb', 1024*1024) except: @@ -177,4 +177,4 @@ def open_noatime(name): def fadvise_done(f, ofs): assert(ofs >= 0) if ofs > 0: - _hashsplit.fadvise_done(f.fileno(), ofs) + _faster.fadvise_done(f.fileno(), ofs) diff --git a/lib/bup/t/thashsplit.py b/lib/bup/t/thashsplit.py index 9f56ca3..bd8a015 100644 --- a/lib/bup/t/thashsplit.py +++ b/lib/bup/t/thashsplit.py @@ -1,6 +1,6 @@ -from bup import hashsplit, _hashsplit +from bup import hashsplit, _faster from wvtest import * @wvtest def test_rolling_sums(): - WVPASS(_hashsplit.selftest()) + WVPASS(_faster.selftest()) -- 2.39.2