2 * Copyright 2011 Avery Pennarun. All rights reserved.
4 * (This license applies to bupsplit.c and bupsplit.h only.)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * THIS SOFTWARE IS PROVIDED BY AVERY PENNARUN ``AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // According to librsync/rollsum.h:
37 // "We should make this something other than zero to improve the
38 // checksum algorithm: tridge suggests a prime number."
39 // apenwarr: I unscientifically tried 0 and 7919, and they both ended up
40 // slightly worse than the librsync value of 31 for my arbitrary test data.
41 #define ROLLSUM_CHAR_OFFSET 31
45 uint8_t window[BUP_WINDOWSIZE];
50 // These formulas are based on rollsum.h in the librsync project.
51 static void rollsum_add(Rollsum *r, uint8_t drop, uint8_t add)
54 r->s2 += r->s1 - (BUP_WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET));
58 static void rollsum_init(Rollsum *r)
60 r->s1 = BUP_WINDOWSIZE * ROLLSUM_CHAR_OFFSET;
61 r->s2 = BUP_WINDOWSIZE * (BUP_WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET;
63 memset(r->window, 0, BUP_WINDOWSIZE);
67 // For some reason, gcc 4.3 (at least) optimizes badly if find_ofs()
68 // is static and rollsum_roll is an inline function. Let's use a macro
69 // here instead to help out the optimizer.
70 #define rollsum_roll(r, ch) do { \
71 rollsum_add((r), (r)->window[(r)->wofs], (ch)); \
72 (r)->window[(r)->wofs] = (ch); \
73 (r)->wofs = ((r)->wofs + 1) % BUP_WINDOWSIZE; \
77 static uint32_t rollsum_digest(Rollsum *r)
79 return (r->s1 << 16) | (r->s2 & 0xffff);
83 static uint32_t rollsum_sum(uint8_t *buf, size_t ofs, size_t len)
88 for (count = ofs; count < len; count++)
89 rollsum_roll(&r, buf[count]);
90 return rollsum_digest(&r);
94 int bupsplit_find_ofs(const unsigned char *buf, int len, int *bits)
100 for (count = 0; count < len; count++)
102 rollsum_roll(&r, buf[count]);
103 if ((r.s2 & (BUP_BLOBSIZE-1)) == ((~0) & (BUP_BLOBSIZE-1)))
107 unsigned rsum = rollsum_digest(&r);
108 rsum >>= BUP_BLOBBITS;
109 for (*bits = BUP_BLOBBITS; (rsum >>= 1) & 1; (*bits)++)
119 #ifndef BUP_NO_SELFTEST
120 #define BUP_SELFTEST_SIZE 100000
122 int bupsplit_selftest()
124 uint8_t *buf = malloc(BUP_SELFTEST_SIZE);
125 uint32_t sum1a, sum1b, sum2a, sum2b, sum3a, sum3b;
129 for (count = 0; count < BUP_SELFTEST_SIZE; count++)
130 buf[count] = random();
132 sum1a = rollsum_sum(buf, 0, BUP_SELFTEST_SIZE);
133 sum1b = rollsum_sum(buf, 1, BUP_SELFTEST_SIZE);
134 sum2a = rollsum_sum(buf, BUP_SELFTEST_SIZE - BUP_WINDOWSIZE*5/2,
135 BUP_SELFTEST_SIZE - BUP_WINDOWSIZE);
136 sum2b = rollsum_sum(buf, 0, BUP_SELFTEST_SIZE - BUP_WINDOWSIZE);
137 sum3a = rollsum_sum(buf, 0, BUP_WINDOWSIZE+3);
138 sum3b = rollsum_sum(buf, 3, BUP_WINDOWSIZE+3);
140 fprintf(stderr, "sum1a = 0x%08x\n", sum1a);
141 fprintf(stderr, "sum1b = 0x%08x\n", sum1b);
142 fprintf(stderr, "sum2a = 0x%08x\n", sum2a);
143 fprintf(stderr, "sum2b = 0x%08x\n", sum2b);
144 fprintf(stderr, "sum3a = 0x%08x\n", sum3a);
145 fprintf(stderr, "sum3b = 0x%08x\n", sum3b);
148 return sum1a!=sum1b || sum2a!=sum2b || sum3a!=sum3b;
151 #endif // !BUP_NO_SELFTEST