X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Fhashsplit.py;h=048347b2fdd312be787c7cfd3dbf7d6eb9820446;hb=de7ccab9ea2b7fa0937ca1aea5d88d97b06893cd;hp=1819294d1aa7d2029203316d78f9c300bc75b7c8;hpb=9133f733cdde36d7ecd627d339f90d87b7d2b0e6;p=bup.git diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index 1819294..048347b 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -8,6 +8,11 @@ MAX_PER_TREE = 256 progress_callback = None fanout = 16 +GIT_MODE_FILE = 0100644 +GIT_MODE_TREE = 040000 +GIT_MODE_SYMLINK = 0120000 +assert(GIT_MODE_TREE != 40000) # 0xxx should be treated as octal + # The purpose of this type of buffer is to avoid copying on peek(), get(), # and eat(). We do copy the buffer contents on put(), but that should # be ok if we always only put() large amounts of data at a time. @@ -36,6 +41,13 @@ class Buf: return len(self.data) - self.start +def _fadvise_done(f, ofs, len): + assert(ofs >= 0) + assert(len >= 0) + if len > 0 and hasattr(f, 'fileno'): + _helpers.fadvise_done(f.fileno(), ofs, len) + + def readfile_iter(files, progress=None): for filenum,f in enumerate(files): ofs = 0 @@ -43,11 +55,12 @@ def readfile_iter(files, progress=None): while 1: if progress: progress(filenum, len(b)) - fadvise_done(f, max(0, ofs - 1024*1024)) b = f.read(BLOB_READ_SIZE) ofs += len(b) + # Warning: ofs == 0 means 'done with the whole file' + # This will only happen here when the file is empty + _fadvise_done(f, 0, ofs) if not b: - fadvise_done(f, ofs) break yield b @@ -56,11 +69,13 @@ def _splitbuf(buf, basebits, fanbits): while 1: b = buf.peek(buf.used()) (ofs, bits) = _helpers.splitbuf(b) - if ofs > BLOB_MAX: - ofs = BLOB_MAX if ofs: + if ofs > BLOB_MAX: + ofs = BLOB_MAX + level = 0 + else: + level = (bits-basebits)//fanbits # integer division buf.eat(ofs) - level = (bits-basebits)//fanbits # integer division yield buffer(b, 0, ofs), level else: break @@ -115,17 +130,20 @@ def split_to_blobs(makeblob, files, keep_boundaries, progress): def _make_shalist(l): ofs = 0 + l = list(l) + total = sum(size for mode,sha,size, in l) + vlen = len('%x' % total) shalist = [] for (mode, sha, size) in l: - shalist.append((mode, '%016x' % ofs, sha)) + shalist.append((mode, '%0*x' % (vlen,ofs), sha)) ofs += size - total = ofs + assert(ofs == total) return (shalist, total) def _squish(maketree, stacks, n): i = 0 - while i MAX_PER_TREE: + while i < n or len(stacks[i]) >= MAX_PER_TREE: while len(stacks) <= i+1: stacks.append([]) if len(stacks[i]) == 1: @@ -133,7 +151,7 @@ def _squish(maketree, stacks, n): elif stacks[i]: (shalist, size) = _make_shalist(stacks[i]) tree = maketree(shalist) - stacks[i+1].append(('40000', tree, size)) + stacks[i+1].append((GIT_MODE_TREE, tree, size)) stacks[i] = [] i += 1 @@ -145,29 +163,29 @@ def split_to_shalist(makeblob, maketree, files, if not fanout: shal = [] for (sha,size,level) in sl: - shal.append(('100644', sha, size)) + shal.append((GIT_MODE_FILE, sha, size)) return _make_shalist(shal)[0] else: stacks = [[]] for (sha,size,level) in sl: - stacks[0].append(('100644', sha, size)) - if level: - _squish(maketree, stacks, level) + stacks[0].append((GIT_MODE_FILE, sha, size)) + _squish(maketree, stacks, level) #log('stacks: %r\n' % [len(i) for i in stacks]) _squish(maketree, stacks, len(stacks)-1) #log('stacks: %r\n' % [len(i) for i in stacks]) return _make_shalist(stacks[-1])[0] -def split_to_blob_or_tree(makeblob, maketree, files, keep_boundaries): +def split_to_blob_or_tree(makeblob, maketree, files, + keep_boundaries, progress=None): shalist = list(split_to_shalist(makeblob, maketree, - files, keep_boundaries)) + files, keep_boundaries, progress)) if len(shalist) == 1: return (shalist[0][0], shalist[0][2]) elif len(shalist) == 0: - return ('100644', makeblob('')) + return (GIT_MODE_FILE, makeblob('')) else: - return ('40000', maketree(shalist)) + return (GIT_MODE_TREE, maketree(shalist)) def open_noatime(name): @@ -180,9 +198,3 @@ def open_noatime(name): except: pass raise - - -def fadvise_done(f, ofs): - assert(ofs >= 0) - if ofs > 0 and hasattr(f, 'fileno'): - _helpers.fadvise_done(f.fileno(), ofs)