X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=lib%2Fbup%2Fhashsplit.py;h=ff00e54b3cd46b4fda1b30827e03762c7ebce025;hb=5e9fb7e5582d1c30c77265a416a5c1d172fdd501;hp=5df627f7f04bb3a651f3a5039601a336360a6282;hpb=2660592aa67e987fb5ab8351ecea142faedc8e53;p=bup.git diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index 5df627f..ff00e54 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -1,45 +1,56 @@ -import math, os -from bup import _helpers +from __future__ import absolute_import +import io, math, os + +from bup import _helpers, helpers +from bup._helpers import cat_bytes +from bup.compat import buffer from bup.helpers import sc_page_size -try: - _fmincore = _helpers.fmincore -except AttributeError, e: - _fmincore = None + +_fmincore = getattr(helpers, 'fmincore', None) BLOB_MAX = 8192*4 # 8192 is the "typical" blob size for bupsplit -BLOB_READ_SIZE = 1024*1024 +BLOB_READ_SIZE = 8 * 1024 * 1024 MAX_PER_TREE = 256 progress_callback = None fanout = 16 -GIT_MODE_FILE = 0100644 -GIT_MODE_TREE = 040000 -GIT_MODE_SYMLINK = 0120000 -assert(GIT_MODE_TREE != 40000) # 0xxx should be treated as octal +GIT_MODE_FILE = 0o100644 +GIT_MODE_TREE = 0o40000 +GIT_MODE_SYMLINK = 0o120000 # The purpose of this type of buffer is to avoid copying on peek(), get(), # and eat(). We do copy the buffer contents on put(), but that should # be ok if we always only put() large amounts of data at a time. class Buf: def __init__(self): - self.data = '' + self.data = b'' self.start = 0 def put(self, s): - if s: - self.data = buffer(self.data, self.start) + s + if not self.data: + self.data = s + self.start = 0 + elif s: + remaining = len(self.data) - self.start + self.data = cat_bytes(self.data, self.start, remaining, + s, 0, len(s)) self.start = 0 - + def peek(self, count): + if count <= 256: + return self.data[self.start : self.start + count] return buffer(self.data, self.start, count) - + def eat(self, count): self.start += count def get(self, count): - v = buffer(self.data, self.start, count) + if count <= 256: + v = self.data[self.start : self.start + count] + else: + v = buffer(self.data, self.start, count) self.start += count return v @@ -56,7 +67,7 @@ def _fadvise_pages_done(fd, first_page, count): count * sc_page_size) -def _nonresident_page_regions(status_bytes, max_region_len=None): +def _nonresident_page_regions(status_bytes, incore_mask, max_region_len=None): """Return (start_page, count) pairs in ascending start_page order for each contiguous region of nonresident pages indicated by the mincore() status_bytes. Limit the number of pages in each region @@ -64,7 +75,7 @@ def _nonresident_page_regions(status_bytes, max_region_len=None): assert(max_region_len is None or max_region_len > 0) start = None for i, x in enumerate(status_bytes): - in_core = ord(x) & 1 + in_core = x & incore_mask if start is None: if not in_core: start = i @@ -98,10 +109,17 @@ def readfile_iter(files, progress=None): b = '' fd = rpr = rstart = rlen = None if _fmincore and hasattr(f, 'fileno'): - fd = f.fileno() - max_chunk = max(1, (8 * 1024 * 1024) / sc_page_size) - rpr = _nonresident_page_regions(_helpers.fmincore(fd), max_chunk) - rstart, rlen = next(rpr, (None, None)) + try: + fd = f.fileno() + except io.UnsupportedOperation: + pass + if fd: + mcore = _fmincore(fd) + if mcore: + max_chunk = max(1, (8 * 1024 * 1024) / sc_page_size) + rpr = _nonresident_page_regions(mcore, helpers.MINCORE_INCORE, + max_chunk) + rstart, rlen = next(rpr, (None, None)) while 1: if progress: progress(filenum, len(b)) @@ -183,10 +201,10 @@ def _make_shalist(l): ofs = 0 l = list(l) total = sum(size for mode,sha,size, in l) - vlen = len('%x' % total) + vlen = len(b'%x' % total) shalist = [] for (mode, sha, size) in l: - shalist.append((mode, '%0*x' % (vlen,ofs), sha)) + shalist.append((mode, b'%0*x' % (vlen,ofs), sha)) ofs += size assert(ofs == total) return (shalist, total) @@ -234,7 +252,7 @@ def split_to_blob_or_tree(makeblob, maketree, files, if len(shalist) == 1: return (shalist[0][0], shalist[0][2]) elif len(shalist) == 0: - return (GIT_MODE_FILE, makeblob('')) + return (GIT_MODE_FILE, makeblob(b'')) else: return (GIT_MODE_TREE, maketree(shalist))