X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Fhashsplit.py;h=048347b2fdd312be787c7cfd3dbf7d6eb9820446;hb=de7ccab9ea2b7fa0937ca1aea5d88d97b06893cd;hp=914c2bb5a8e04b532a78e3b860ac2b23216288dc;hpb=2a191b01e6082564f188d1517241d19e4a6c5c7b;p=bup.git diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index 914c2bb..048347b 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -10,6 +10,7 @@ fanout = 16 GIT_MODE_FILE = 0100644 GIT_MODE_TREE = 040000 +GIT_MODE_SYMLINK = 0120000 assert(GIT_MODE_TREE != 40000) # 0xxx should be treated as octal # The purpose of this type of buffer is to avoid copying on peek(), get(), @@ -40,6 +41,13 @@ class Buf: return len(self.data) - self.start +def _fadvise_done(f, ofs, len): + assert(ofs >= 0) + assert(len >= 0) + if len > 0 and hasattr(f, 'fileno'): + _helpers.fadvise_done(f.fileno(), ofs, len) + + def readfile_iter(files, progress=None): for filenum,f in enumerate(files): ofs = 0 @@ -47,11 +55,12 @@ def readfile_iter(files, progress=None): while 1: if progress: progress(filenum, len(b)) - fadvise_done(f, max(0, ofs - 1024*1024)) b = f.read(BLOB_READ_SIZE) ofs += len(b) + # Warning: ofs == 0 means 'done with the whole file' + # This will only happen here when the file is empty + _fadvise_done(f, 0, ofs) if not b: - fadvise_done(f, ofs) break yield b @@ -60,11 +69,13 @@ def _splitbuf(buf, basebits, fanbits): while 1: b = buf.peek(buf.used()) (ofs, bits) = _helpers.splitbuf(b) - if ofs > BLOB_MAX: - ofs = BLOB_MAX if ofs: + if ofs > BLOB_MAX: + ofs = BLOB_MAX + level = 0 + else: + level = (bits-basebits)//fanbits # integer division buf.eat(ofs) - level = (bits-basebits)//fanbits # integer division yield buffer(b, 0, ofs), level else: break @@ -132,7 +143,7 @@ def _make_shalist(l): def _squish(maketree, stacks, n): i = 0 - while i MAX_PER_TREE: + while i < n or len(stacks[i]) >= MAX_PER_TREE: while len(stacks) <= i+1: stacks.append([]) if len(stacks[i]) == 1: @@ -158,17 +169,17 @@ def split_to_shalist(makeblob, maketree, files, stacks = [[]] for (sha,size,level) in sl: stacks[0].append((GIT_MODE_FILE, sha, size)) - if level: - _squish(maketree, stacks, level) + _squish(maketree, stacks, level) #log('stacks: %r\n' % [len(i) for i in stacks]) _squish(maketree, stacks, len(stacks)-1) #log('stacks: %r\n' % [len(i) for i in stacks]) return _make_shalist(stacks[-1])[0] -def split_to_blob_or_tree(makeblob, maketree, files, keep_boundaries): +def split_to_blob_or_tree(makeblob, maketree, files, + keep_boundaries, progress=None): shalist = list(split_to_shalist(makeblob, maketree, - files, keep_boundaries)) + files, keep_boundaries, progress)) if len(shalist) == 1: return (shalist[0][0], shalist[0][2]) elif len(shalist) == 0: @@ -187,9 +198,3 @@ def open_noatime(name): except: pass raise - - -def fadvise_done(f, ofs): - assert(ofs >= 0) - if ofs > 0 and hasattr(f, 'fileno'): - _helpers.fadvise_done(f.fileno(), ofs)