From bd08128284ab3c4444f09071e7deeb3fb0684ce4 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Fri, 3 Jan 2020 14:27:09 -0600 Subject: [PATCH] hashsplit: replace join_bytes with cat_bytes Add a C cat_bytes that can concatenate two bytes objects with offsets and extents. This allows us to have the same implementation for python 2 and 3, to drop another use of buffer(), and may be handy in the future, particularly given the expense of getting a buffer offset in python 3 (i.e. memoryview() adds about ~200 bytes). Signed-off-by: Rob Browning Tested-by: Rob Browning --- lib/bup/_helpers.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ lib/bup/compat.py | 15 --------------- lib/bup/hashsplit.py | 9 +++++++-- 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c index 312ecd4..5f13d6a 100644 --- a/lib/bup/_helpers.c +++ b/lib/bup/_helpers.c @@ -273,6 +273,49 @@ static PyObject *bup_bytescmp(PyObject *self, PyObject *args) } +static PyObject *bup_cat_bytes(PyObject *self, PyObject *args) +{ + unsigned char *bufx = NULL, *bufy = NULL; + Py_ssize_t bufx_len, bufx_ofs, bufx_n; + Py_ssize_t bufy_len, bufy_ofs, bufy_n; + if (!PyArg_ParseTuple(args, + rbuf_argf "nn" + rbuf_argf "nn", + &bufx, &bufx_len, &bufx_ofs, &bufx_n, + &bufy, &bufy_len, &bufy_ofs, &bufy_n)) + return NULL; + if (bufx_ofs < 0) + return PyErr_Format(PyExc_ValueError, "negative x offset"); + if (bufx_n < 0) + return PyErr_Format(PyExc_ValueError, "negative x extent"); + if (bufx_ofs > bufx_len) + return PyErr_Format(PyExc_ValueError, "x offset greater than length"); + if (bufx_n > bufx_len - bufx_ofs) + return PyErr_Format(PyExc_ValueError, "x extent past end of buffer"); + + if (bufy_ofs < 0) + return PyErr_Format(PyExc_ValueError, "negative y offset"); + if (bufy_n < 0) + return PyErr_Format(PyExc_ValueError, "negative y extent"); + if (bufy_ofs > bufy_len) + return PyErr_Format(PyExc_ValueError, "y offset greater than length"); + if (bufy_n > bufy_len - bufy_ofs) + return PyErr_Format(PyExc_ValueError, "y extent past end of buffer"); + + if (bufy_n > PY_SSIZE_T_MAX - bufx_n) + return PyErr_Format(PyExc_OverflowError, "result length too long"); + + PyObject *result = PyBytes_FromStringAndSize(NULL, bufx_n + bufy_n); + if (!result) + return PyErr_NoMemory(); + char *buf = PyBytes_AS_STRING(result); + memcpy(buf, bufx + bufx_ofs, bufx_n); + memcpy(buf + bufx_n, bufy + bufy_ofs, bufy_n); + return result; +} + + + // Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV... #if __WIN32__ || __CYGWIN__ @@ -1725,6 +1768,8 @@ static PyMethodDef helper_methods[] = { #endif { "bytescmp", bup_bytescmp, METH_VARARGS, "Return a negative value if x < y, zero if equal, positive otherwise."}, + { "cat_bytes", bup_cat_bytes, METH_VARARGS, + "For (x_bytes, x_ofs, x_n, y_bytes, y_ofs, y_n) arguments, return their concatenation."}, #ifdef BUP_MINCORE_BUF_TYPE { "mincore", bup_mincore, METH_VARARGS, "For mincore(src, src_n, src_off, dest, dest_off)" diff --git a/lib/bup/compat.py b/lib/bup/compat.py index 985d8ac..859898d 100644 --- a/lib/bup/compat.py +++ b/lib/bup/compat.py @@ -70,10 +70,6 @@ if py3: return memoryview(object)[offset:] return memoryview(object) - def join_bytes(*items): - """Return the concatenated bytes or memoryview arguments as bytes.""" - return b''.join(items) - def getcwd(): return fsencode(os.getcwd()) @@ -148,17 +144,6 @@ else: # Python 2 buffer = buffer - def join_bytes(x, y): - """Return the concatenated bytes or buffer arguments as bytes.""" - if type(x) == buffer: - assert type(y) in (bytes, buffer) - return x + y - assert type(x) == bytes - if type(y) == bytes: - return b''.join((x, y)) - assert type(y) in (bytes, buffer) - return buffer(x) + y - def restore_lc_env(): # Once we're up and running with iso-8859-1, undo the bup-python diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index dc3a538..0ae6acd 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -3,7 +3,8 @@ from __future__ import absolute_import import io, math, os from bup import _helpers, compat, helpers -from bup.compat import buffer, join_bytes +from bup._helpers import cat_bytes +from bup.compat import buffer, py_maj from bup.helpers import sc_page_size @@ -29,10 +30,14 @@ class Buf: def put(self, s): if s: - self.data = join_bytes(buffer(self.data, self.start), s) + remaining = len(self.data) - self.start + self.data = cat_bytes(self.data, self.start, remaining, + s, 0, len(s)) self.start = 0 def peek(self, count): + if count <= 256: + return self.data[self.start : self.start + count] return buffer(self.data, self.start, count) def eat(self, count): -- 2.39.2