+#undef NDEBUG
#include "bupsplit.h"
#include <Python.h>
#include <assert.h>
static int istty = 0;
+// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
+#if __WIN32__ || __CYGWIN__
+
+// There's no 'ps' on win32 anyway, and Py_GetArgcArgv() isn't available.
+static void unpythonize_argv(void) { }
+
+#else // not __WIN32__
+
+// For some reason this isn't declared in Python.h
+extern void Py_GetArgcArgv(int *argc, char ***argv);
+
+static void unpythonize_argv(void)
+{
+ int argc, i;
+ char **argv, *arge;
+
+ Py_GetArgcArgv(&argc, &argv);
+
+ for (i = 0; i < argc-1; i++)
+ {
+ if (argv[i] + strlen(argv[i]) + 1 != argv[i+1])
+ {
+ // The argv block doesn't work the way we expected; it's unsafe
+ // to mess with it.
+ return;
+ }
+ }
+
+ arge = argv[argc-1] + strlen(argv[argc-1]) + 1;
+
+ if (strstr(argv[0], "python") && argv[1] == argv[0] + strlen(argv[0]) + 1)
+ {
+ char *p;
+ size_t len, diff;
+ p = strrchr(argv[1], '/');
+ if (p)
+ {
+ p++;
+ diff = p - argv[0];
+ len = arge - p;
+ memmove(argv[0], p, len);
+ memset(arge - diff, 0, diff);
+ for (i = 0; i < argc; i++)
+ argv[i] = argv[i+1] ? argv[i+1]-diff : NULL;
+ }
+ }
+}
+
+#endif // not __WIN32__ or __CYGWIN__
+
+
static PyObject *selftest(PyObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
if (!PyArg_ParseTuple(args, "t#", &buf, &len))
return NULL;
out = bupsplit_find_ofs(buf, len, &bits);
+ if (out) assert(bits >= BUP_BLOBBITS);
return Py_BuildValue("ii", out, bits);
}
}
+#define BLOOM2_HEADERLEN 16
+
typedef struct {
uint32_t high;
unsigned char low;
} bits40_t;
-
static void to_bloom_address_bitmask4(const bits40_t *buf,
const int nbits, uint64_t *v, unsigned char *bitmask)
{
*bitmask = 1 << bit;
}
-
#define BLOOM_SET_BIT(name, address, itype, otype) \
static void name(unsigned char *bloom, const void *buf, const int nbits)\
{\
unsigned char bitmask;\
otype v;\
address((itype *)buf, nbits, &v, &bitmask);\
- bloom[16+v] |= bitmask;\
+ bloom[BLOOM2_HEADERLEN+v] |= bitmask;\
}
BLOOM_SET_BIT(bloom_set_bit4, to_bloom_address_bitmask4, bits40_t, uint64_t)
BLOOM_SET_BIT(bloom_set_bit5, to_bloom_address_bitmask5, uint32_t, uint32_t)
unsigned char bitmask;\
otype v;\
address((itype *)buf, nbits, &v, &bitmask);\
- return bloom[16+v] & bitmask;\
+ return bloom[BLOOM2_HEADERLEN+v] & bitmask;\
}
BLOOM_GET_BIT(bloom_get_bit4, to_bloom_address_bitmask4, bits40_t, uint64_t)
BLOOM_GET_BIT(bloom_get_bit5, to_bloom_address_bitmask5, uint32_t, uint32_t)
static int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
{
int i;
- for (i = 0; i < 20; i++)
+ for (i = 0; i < sizeof(struct sha); i++)
if (sha1->bytes[i] != sha2->bytes[i])
return sha1->bytes[i] - sha2->bytes[i];
return 0;
return ntohl(*idx->cur_name) + idx->name_base;
}
+#define MIDX4_HEADERLEN 12
static PyObject *merge_into(PyObject *self, PyObject *args)
{
PyObject *ilist = NULL;
unsigned char *fmap = NULL;
- struct sha *sha_ptr, *last = NULL;
- uint32_t *table_ptr, *name_ptr;
+ struct sha *sha_ptr, *sha_start, *last = NULL;
+ uint32_t *table_ptr, *name_ptr, *name_start;
struct idx **idxs = NULL;
int flen = 0, bits = 0, i;
uint32_t total, count, prefix;
return NULL;
idxs[i]->cur = (struct sha *)&idxs[i]->map[sha_ofs];
idxs[i]->end = &idxs[i]->cur[len];
- idxs[i]->cur_name = (uint32_t *)&idxs[i]->map[name_map_ofs];
+ if (name_map_ofs)
+ idxs[i]->cur_name = (uint32_t *)&idxs[i]->map[name_map_ofs];
+ else
+ idxs[i]->cur_name = NULL;
}
- table_ptr = (uint32_t *)&fmap[12];
- sha_ptr = (struct sha *)&table_ptr[1<<bits];
- name_ptr = (uint32_t *)&sha_ptr[total];
+ table_ptr = (uint32_t *)&fmap[MIDX4_HEADERLEN];
+ sha_start = sha_ptr = (struct sha *)&table_ptr[1<<bits];
+ name_start = name_ptr = (uint32_t *)&sha_ptr[total];
last_i = num_i-1;
count = 0;
new_prefix = _extract_bits((unsigned char *)idx->cur, bits);
while (prefix < new_prefix)
table_ptr[prefix++] = htonl(count);
- if (last == NULL || _cmp_sha(last, idx->cur) != 0)
- {
- memcpy(sha_ptr++, idx->cur, 20);
- *name_ptr++ = htonl(_get_idx_i(idx));
- last = idx->cur;
- }
+ memcpy(sha_ptr++, idx->cur, sizeof(struct sha));
+ *name_ptr++ = htonl(_get_idx_i(idx));
+ last = idx->cur;
++idx->cur;
if (idx->cur_name != NULL)
++idx->cur_name;
_fix_idx_order(idxs, &last_i);
++count;
}
- table_ptr[prefix] = htonl(count);
+ while (prefix < (1<<bits))
+ table_ptr[prefix++] = htonl(count);
+ assert(count == total);
+ assert(prefix == (1<<bits));
+ assert(sha_ptr == sha_start+count);
+ assert(name_ptr == name_start+count);
PyMem_Free(idxs);
return PyLong_FromUnsignedLong(count);
}
+// This function should technically be macro'd out if it's going to be used
+// more than ocasionally. As of this writing, it'll actually never be called
+// in real world bup scenarios (because our packs are < MAX_INT bytes).
+static uint64_t htonll(uint64_t value)
+{
+ static const int endian_test = 42;
+
+ if (*(char *)&endian_test == endian_test) // LSB-MSB
+ return ((uint64_t)htonl(value & 0xFFFFFFFF) << 32) | htonl(value >> 32);
+ return value; // already in network byte order MSB-LSB
+}
+
+#define PACK_IDX_V2_HEADERLEN 8
+#define FAN_ENTRIES 256
+
+static PyObject *write_idx(PyObject *self, PyObject *args)
+{
+ PyObject *pf = NULL, *idx = NULL;
+ PyObject *part;
+ FILE *f;
+ unsigned char *fmap = NULL;
+ int flen = 0;
+ uint32_t total = 0;
+ uint32_t count;
+ int i, j, ofs64_count;
+ uint32_t *fan_ptr, *crc_ptr, *ofs_ptr;
+ struct sha *sha_ptr;
+
+ if (!PyArg_ParseTuple(args, "Ow#OI", &pf, &fmap, &flen, &idx, &total))
+ return NULL;
+
+ fan_ptr = (uint32_t *)&fmap[PACK_IDX_V2_HEADERLEN];
+ sha_ptr = (struct sha *)&fan_ptr[FAN_ENTRIES];
+ crc_ptr = (uint32_t *)&sha_ptr[total];
+ ofs_ptr = (uint32_t *)&crc_ptr[total];
+ f = PyFile_AsFile(pf);
+
+ count = 0;
+ ofs64_count = 0;
+ for (i = 0; i < FAN_ENTRIES; ++i)
+ {
+ int plen;
+ part = PyList_GET_ITEM(idx, i);
+ PyList_Sort(part);
+ plen = PyList_GET_SIZE(part);
+ count += plen;
+ *fan_ptr++ = htonl(count);
+ for (j = 0; j < plen; ++j)
+ {
+ unsigned char *sha = NULL;
+ int sha_len = 0;
+ uint32_t crc = 0;
+ uint64_t ofs = 0;
+ if (!PyArg_ParseTuple(PyList_GET_ITEM(part, j), "t#IK",
+ &sha, &sha_len, &crc, &ofs))
+ return NULL;
+ if (sha_len != sizeof(struct sha))
+ return NULL;
+ memcpy(sha_ptr++, sha, sizeof(struct sha));
+ *crc_ptr++ = htonl(crc);
+ if (ofs > 0x7fffffff)
+ {
+ uint64_t nofs = htonll(ofs);
+ if (fwrite(&nofs, sizeof(uint64_t), 1, f) != 1)
+ return PyErr_SetFromErrno(PyExc_OSError);
+ ofs = 0x80000000 | ofs64_count++;
+ }
+ *ofs_ptr++ = htonl((uint32_t)ofs);
+ }
+ }
+ return PyLong_FromUnsignedLong(count);
+}
+
// I would have made this a lower-level function that just fills in a buffer
// with random values, and then written those values from python. But that's
"Take the first 'nbits' bits from 'buf' and return them as an int." },
{ "merge_into", merge_into, METH_VARARGS,
"Merges a bunch of idx and midx files into a single midx." },
+ { "write_idx", write_idx, METH_VARARGS,
+ "Write a PackIdxV2 file from an idx list of lists of tuples" },
{ "write_random", write_random, METH_VARARGS,
"Write random bytes to the given file descriptor" },
{ "random_sha", random_sha, METH_VARARGS,
{
Py_InitModule("_helpers", faster_methods);
istty = isatty(2) || getenv("BUP_FORCE_TTY");
+ unpythonize_argv();
}