From: Rob Browning Date: Thu, 28 Nov 2019 08:01:02 +0000 (-0600) Subject: Remove inefficient (or will be) uses of buffer X-Git-Tag: 0.31~256 X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?p=bup.git;a=commitdiff_plain;h=b2f7ccd162e988a8785771a116c83b04f9ea51ce Remove inefficient (or will be) uses of buffer On amd64 sys.getsizeof(b'') is a bit under 40 bytes across python 2/3, while a buffer(b'') adds 64 bytes in python 2, and the memoryview(b'') that replaces it in python 3 adds 200. So just copy the bytes unless the added overhead is worth it. And while we're here, fix a few more python 3 str/bytes compatability issues, and remove some redundant offset arithemetic by leaning on range() a bit more. (Not that it likely matters, but aside from being simpler, this is apparently more efficient too, because it moves more of the work to C). Signed-off-by: Rob Browning Tested-by: Rob Browning --- diff --git a/lib/bup/git.py b/lib/bup/git.py index b33d964..ff48da7 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -394,22 +394,25 @@ class PackIdxV1(PackIdx): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) - self.fanout = list(struct.unpack('!256I', - str(buffer(self.map, 0, 256*4)))) + self.fanout = list(struct.unpack('!256I', buffer(self.map, 0, 256 * 4))) self.fanout.append(0) # entry "-1" nsha = self.fanout[255] self.sha_ofs = 256*4 self.shatable = buffer(self.map, self.sha_ofs, nsha*24) def _ofs_from_idx(self, idx): - return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0] + ofs = idx * 24 + return struct.unpack('!I', self.shatable[ofs : ofs + 4])[0] def _idx_to_hash(self, idx): - return str(self.shatable[idx*24+4 : idx*24+24]) + ofs = idx * 24 + 4 + return self.shatable[ofs : ofs + 20] def __iter__(self): - for i in range(self.fanout[255]): - yield buffer(self.map, 256*4 + 24*i + 4, 20) + count = self.fanout[255] + start = 256 * 4 + 4 + for ofs in range(start, start + (24 * count), 24): + yield self.map[ofs : ofs + 20] class PackIdxV2(PackIdx): @@ -418,9 +421,9 @@ class PackIdxV2(PackIdx): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) - assert(str(self.map[0:8]) == '\377tOc\0\0\0\2') + assert self.map[0:8] == b'\377tOc\0\0\0\2' self.fanout = list(struct.unpack('!256I', - str(buffer(self.map, 8, 256*4)))) + buffer(self.map[8 : 8 + 256 * 4]))) self.fanout.append(0) # entry "-1" nsha = self.fanout[255] self.sha_ofs = 8 + 256*4 @@ -432,19 +435,22 @@ class PackIdxV2(PackIdx): 8 + 256*4 + nsha*20 + nsha*4 + nsha*4) def _ofs_from_idx(self, idx): - ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0] + i = idx * 4 + ofs = struct.unpack('!I', self.ofstable[i : i + 4])[0] if ofs & 0x80000000: idx64 = ofs & 0x7fffffff - ofs = struct.unpack('!Q', - str(buffer(self.ofs64table, idx64*8, 8)))[0] + idx64_i = idx64 * 8 + ofs = struct.unpack('!Q', self.ofs64table[idx64_i : idx64_i + 8])[0] return ofs def _idx_to_hash(self, idx): - return str(self.shatable[idx*20:(idx+1)*20]) + return self.shatable[idx * 20 : (idx + 1) * 20] def __iter__(self): - for i in range(self.fanout[255]): - yield buffer(self.map, 8 + 256*4 + 20*i, 20) + count = self.fanout[255] + start = 8 + 256 * 4 + for ofs in range(start, start + (20 * count), 20): + yield self.map[ofs : ofs + 20] _mpi_count = 0 diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index 5c48132..4c903c1 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -39,7 +39,10 @@ class Buf: self.start += count def get(self, count): - v = buffer(self.data, self.start, count) + if count <= 256: + v = self.data[self.start : self.start + count] + else: + v = buffer(self.data, self.start, count) self.start += count return v diff --git a/lib/bup/index.py b/lib/bup/index.py index 4dffd3a..a03a2dd 100644 --- a/lib/bup/index.py +++ b/lib/bup/index.py @@ -165,8 +165,10 @@ def _golevel(level, f, ename, newentry, metastore, tmax): class Entry: def __init__(self, basename, name, meta_ofs, tmax): - self.basename = str(basename) - self.name = str(name) + assert basename is None or type(basename) == bytes + assert name is None or type(name) == bytes + self.basename = basename + self.name = name self.meta_ofs = meta_ofs self.tmax = tmax self.children_ofs = 0 @@ -348,7 +350,7 @@ class ExistingEntry(Entry): self.ctime, ctime_ns, self.mtime, mtime_ns, self.atime, atime_ns, self.size, self.mode, self.gitmode, self.sha, self.flags, self.children_ofs, self.children_n, self.meta_ofs - ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN))) + ) = struct.unpack(INDEX_SIG, m[ofs : ofs + ENTLEN]) self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns)) self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns)) self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns)) @@ -389,7 +391,7 @@ class ExistingEntry(Entry): assert(eon >= 0) assert(eon >= ofs) assert(eon > ofs) - basename = str(buffer(self._m, ofs, eon-ofs)) + basename = self._m[ofs : ofs + (eon - ofs)] child = ExistingEntry(self, basename, self.name + basename, self._m, eon+1) if (not dname @@ -431,7 +433,8 @@ class Reader: self.m = mmap_readwrite(f) self.writable = True self.count = struct.unpack(FOOTER_SIG, - str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0] + self.m[st.st_size - FOOTLEN + : st.st_size])[0] def __del__(self): self.close() @@ -446,7 +449,7 @@ class Reader: assert(eon >= 0) assert(eon >= ofs) assert(eon > ofs) - basename = str(buffer(self.m, ofs, eon-ofs)) + basename = self.m[ofs : ofs + (eon - ofs)] yield ExistingEntry(None, basename, basename, self.m, eon+1) ofs = eon + 1 + ENTLEN diff --git a/lib/bup/midx.py b/lib/bup/midx.py index 0b41837..9de3ae8 100644 --- a/lib/bup/midx.py +++ b/lib/bup/midx.py @@ -58,8 +58,8 @@ class PackMidx: def _init_failed(self): self.bits = 0 self.entries = 1 - self.fanout = buffer('\0\0\0\0') - self.shatable = buffer('\0'*20) + self.fanout = b'\0\0\0\0' + self.shatable = b'\0' * 20 self.idxnames = [] def _fanget(self, i): @@ -116,8 +116,9 @@ class PackMidx: return None def __iter__(self): - for i in range(self._fanget(self.entries-1)): - yield buffer(self.shatable, i*20, 20) + count = self._fanget(self.entries-1) + for ofs in range(0, count * 20, 20): + yield self.shatable[ofs : ofs + 20] def __len__(self): return int(self._fanget(self.entries-1))