]> arthur.barton.de Git - bup.git/commitdiff
vint: implement the typical pack() in C
authorJohannes Berg <johannes@sipsolutions.net>
Sat, 1 Feb 2020 21:47:09 +0000 (22:47 +0100)
committerRob Browning <rlb@defaultvalue.org>
Sat, 15 May 2021 18:56:36 +0000 (13:56 -0500)
This estimates the size, and if we run over it falls back to the
slower python, and similarly if anything happens with the vuints/vints
contained in there, but it still speeds up the typical case
significantly, and improves raw index speed (on cached filesystem
data) by about 14% (~19.7k paths/sec to ~22.5k paths/sec).

Since it doesn't handle corner cases (very long strings or very large
numbers) add a few tests that cover the fallback to python.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reviewed-by: Rob Browning <rlb@defaultvalue.org>
[rlb@defaultvalue.org: switch to PyMem_Raw*; rename to
 limited_vint_pack; use Py_ssize_t with PyTuple_GET_SIZE]
Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
lib/bup/_helpers.c
lib/bup/vint.py
test/int/test_vint.py

index 95b679921f952068d0a7e275f72496b061cfd197..5ab9a9432f74c3a6be880df3bc8830b90c7dd5eb 100644 (file)
@@ -2268,6 +2268,95 @@ static PyObject *bup_apply_acl(PyObject *self, PyObject *args)
 }
 #endif
 
+static PyObject *bup_limited_vint_pack(PyObject *self, PyObject *args)
+{
+    const char *fmt;
+    PyObject *packargs, *result;
+    Py_ssize_t sz, i, bufsz;
+    char *buf, *pos, *end;
+
+    if (!PyArg_ParseTuple(args, "sO", &fmt, &packargs))
+        return NULL;
+
+    if (!PyTuple_Check(packargs))
+        return PyErr_Format(PyExc_Exception, "pack() arg must be tuple");
+
+    sz = PyTuple_GET_SIZE(packargs);
+    if (sz != (Py_ssize_t)strlen(fmt))
+        return PyErr_Format(PyExc_Exception,
+                            "number of arguments (%ld) does not match format string (%ld)",
+                            (unsigned long)sz, (unsigned long)strlen(fmt));
+
+    if (sz > INT_MAX / 20)
+        return PyErr_Format(PyExc_Exception, "format is far too long");
+
+    // estimate no more than 20 bytes for each on average, the maximum
+    // vint/vuint we can encode is anyway 10 bytes, so this gives us
+    // some headroom for a few strings before we need to realloc ...
+    bufsz = sz * 20;
+    buf = PyMem_RawMalloc(bufsz);
+    if (!buf)
+        return PyErr_NoMemory();
+
+    pos = buf;
+    end = buf + bufsz;
+    for (i = 0; i < sz; i++) {
+        PyObject *item = PyTuple_GET_ITEM(packargs, i);
+        const char *bytes;
+
+        switch (fmt[i]) {
+        case 'V': {
+            long long val = PyLong_AsLongLong(item);
+            if (val == -1 && PyErr_Occurred())
+                return PyErr_Format(PyExc_OverflowError,
+                                    "pack arg %d invalid", (int)i);
+            if (end - pos < 10)
+                goto overflow;
+           pos += vuint_encode(val, pos);
+            break;
+        }
+        case 'v': {
+            long long val = PyLong_AsLongLong(item);
+            if (val == -1 && PyErr_Occurred())
+                return PyErr_Format(PyExc_OverflowError,
+                                    "pack arg %d invalid", (int)i);
+            if (end - pos < 10)
+                goto overflow;
+            pos += vint_encode(val, pos);
+            break;
+        }
+        case 's': {
+            bytes = PyBytes_AsString(item);
+            if (!bytes)
+                goto error;
+            if (end - pos < 10)
+                goto overflow;
+            Py_ssize_t val = PyBytes_GET_SIZE(item);
+            pos += vuint_encode(val, pos);
+            if (end - pos < val)
+                goto overflow;
+            memcpy(pos, bytes, val);
+            pos += val;
+            break;
+        }
+        default:
+            PyErr_Format(PyExc_Exception, "unknown xpack format string item %c",
+                         fmt[i]);
+            goto error;
+        }
+    }
+
+    result = PyBytes_FromStringAndSize(buf, pos - buf);
+    PyMem_RawFree(buf);
+    return result;
+
+ overflow:
+    PyErr_SetString(PyExc_OverflowError, "buffer (potentially) overflowed");
+ error:
+    PyMem_RawFree(buf);
+    return NULL;
+}
+
 static PyMethodDef helper_methods[] = {
     { "write_sparsely", bup_write_sparsely, METH_VARARGS,
       "Write buf excepting zeros at the end. Return trailing zero count." },
@@ -2389,6 +2478,8 @@ static PyMethodDef helper_methods[] = {
 #endif /* HAVE_ACLS */
     { "vuint_encode", bup_vuint_encode, METH_VARARGS, "encode an int to vuint" },
     { "vint_encode", bup_vint_encode, METH_VARARGS, "encode an int to vint" },
+    { "limited_vint_pack", bup_limited_vint_pack, METH_VARARGS,
+      "Try to pack vint/vuint/str, throwing OverflowError when unable." },
     { NULL, NULL, 0, NULL },  // sentinel
 };
 
index a27204b0df268bad66f9cbcaa522ff49bc7b599d..fd8a8f0b369a2da24dc329fe0d76b64fe45bbe27 100644 (file)
@@ -165,19 +165,21 @@ def recv(port, types):
     return result
 
 def pack(types, *args):
-    if len(types) != len(args):
-        raise Exception('number of arguments does not match format string')
-    ret = []
-    for (type, value) in zip(types, args):
-        if type == 'V':
-            ret.append(encode_vuint(value))
-        elif type == 'v':
-            ret.append(encode_vint(value))
-        elif type == 's':
-            ret.append(encode_bvec(value))
-        else:
-            raise Exception('unknown xpack format string item "' + type + '"')
-    return b''.join(ret)
+    try:
+        return _helpers.limited_vint_pack(types, args)
+    except OverflowError:
+        assert len(types) == len(args)
+        ret = []
+        for typ, value in zip(types, args):
+            if typ == 'V':
+                ret.append(encode_vuint(value))
+            elif typ == 'v':
+                ret.append(encode_vint(value))
+            elif typ == 's':
+                ret.append(encode_bvec(value))
+            else:
+                assert False
+        return b''.join(ret)
 
 def unpack(types, data):
     port = BytesIO(data)
index 7ef1d2437e15d1fc1032a57e8715d0e4b957b6d9..e6059a5588bcdc736c06caf5ee2edc609622f17f 100644 (file)
@@ -75,7 +75,12 @@ def test_pack_and_unpack():
              ('v', [0]),
              ('vs', [0, b'foo']),
              ('vV', [0, 1]),
-             ('vv', [0, -1])]
+             ('vv', [0, -1]),
+             ('vv', [0, -1]),
+             # and a few things that aren't done in C
+             ('vv', [10**100, 10**100]),
+             ('s', [b'foo'*10]),
+            ]
     for test in tests:
         (types, values) = test
         WVPASSEQ(pack_and_unpack(types, *values), values)