+#define _LARGEFILE64_SOURCE 1
+#undef NDEBUG
#include "bupsplit.h"
#include <Python.h>
#include <assert.h>
-#include <stdint.h>
+#include <errno.h>
#include <fcntl.h>
#include <arpa/inet.h>
+#include <stdint.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
-static int istty = 0;
+#ifdef linux
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#endif
+
+static int istty2 = 0;
+
+// Probably we should use autoconf or something and set HAVE_PY_GETARGCARGV...
+#if __WIN32__ || __CYGWIN__
+
+// There's no 'ps' on win32 anyway, and Py_GetArgcArgv() isn't available.
+static void unpythonize_argv(void) { }
+
+#else // not __WIN32__
// For some reason this isn't declared in Python.h
extern void Py_GetArgcArgv(int *argc, char ***argv);
}
}
+#endif // not __WIN32__ or __CYGWIN__
+
static PyObject *selftest(PyObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, "t#", &buf, &len))
return NULL;
out = bupsplit_find_ofs(buf, len, &bits);
+ if (out) assert(bits >= BUP_BLOBBITS);
return Py_BuildValue("ii", out, bits);
}
}
+#define BLOOM2_HEADERLEN 16
+
typedef struct {
uint32_t high;
unsigned char low;
} bits40_t;
-
static void to_bloom_address_bitmask4(const bits40_t *buf,
const int nbits, uint64_t *v, unsigned char *bitmask)
{
*bitmask = 1 << bit;
}
-
#define BLOOM_SET_BIT(name, address, itype, otype) \
static void name(unsigned char *bloom, const void *buf, const int nbits)\
{\
unsigned char bitmask;\
otype v;\
address((itype *)buf, nbits, &v, &bitmask);\
- bloom[16+v] |= bitmask;\
+ bloom[BLOOM2_HEADERLEN+v] |= bitmask;\
}
BLOOM_SET_BIT(bloom_set_bit4, to_bloom_address_bitmask4, bits40_t, uint64_t)
BLOOM_SET_BIT(bloom_set_bit5, to_bloom_address_bitmask5, uint32_t, uint32_t)
unsigned char bitmask;\
otype v;\
address((itype *)buf, nbits, &v, &bitmask);\
- return bloom[16+v] & bitmask;\
+ return bloom[BLOOM2_HEADERLEN+v] & bitmask;\
}
BLOOM_GET_BIT(bloom_get_bit4, to_bloom_address_bitmask4, bits40_t, uint64_t)
BLOOM_GET_BIT(bloom_get_bit5, to_bloom_address_bitmask5, uint32_t, uint32_t)
else
return NULL;
- return Py_BuildValue("Oi", Py_True, k);
+ return Py_BuildValue("ii", 1, k);
}
static int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
{
int i;
- for (i = 0; i < 20; i++)
+ for (i = 0; i < sizeof(struct sha); i++)
if (sha1->bytes[i] != sha2->bytes[i])
return sha1->bytes[i] - sha2->bytes[i];
return 0;
return ntohl(*idx->cur_name) + idx->name_base;
}
+#define MIDX4_HEADERLEN 12
static PyObject *merge_into(PyObject *self, PyObject *args)
{
PyObject *ilist = NULL;
unsigned char *fmap = NULL;
- struct sha *sha_ptr, *last = NULL;
- uint32_t *table_ptr, *name_ptr;
+ struct sha *sha_ptr, *sha_start, *last = NULL;
+ uint32_t *table_ptr, *name_ptr, *name_start;
struct idx **idxs = NULL;
int flen = 0, bits = 0, i;
uint32_t total, count, prefix;
else
idxs[i]->cur_name = NULL;
}
- table_ptr = (uint32_t *)&fmap[12];
- sha_ptr = (struct sha *)&table_ptr[1<<bits];
- name_ptr = (uint32_t *)&sha_ptr[total];
+ table_ptr = (uint32_t *)&fmap[MIDX4_HEADERLEN];
+ sha_start = sha_ptr = (struct sha *)&table_ptr[1<<bits];
+ name_start = name_ptr = (uint32_t *)&sha_ptr[total];
last_i = num_i-1;
count = 0;
{
struct idx *idx;
uint32_t new_prefix;
- if (count % 102424 == 0 && istty)
+ if (count % 102424 == 0 && istty2)
fprintf(stderr, "midx: writing %.2f%% (%d/%d)\r",
count*100.0/total, count, total);
idx = idxs[last_i];
new_prefix = _extract_bits((unsigned char *)idx->cur, bits);
while (prefix < new_prefix)
table_ptr[prefix++] = htonl(count);
- if (last == NULL || _cmp_sha(last, idx->cur) != 0)
- {
- memcpy(sha_ptr++, idx->cur, 20);
- *name_ptr++ = htonl(_get_idx_i(idx));
- last = idx->cur;
- }
+ memcpy(sha_ptr++, idx->cur, sizeof(struct sha));
+ *name_ptr++ = htonl(_get_idx_i(idx));
+ last = idx->cur;
++idx->cur;
if (idx->cur_name != NULL)
++idx->cur_name;
_fix_idx_order(idxs, &last_i);
++count;
}
- table_ptr[prefix] = htonl(count);
+ while (prefix < (1<<bits))
+ table_ptr[prefix++] = htonl(count);
+ assert(count == total);
+ assert(prefix == (1<<bits));
+ assert(sha_ptr == sha_start+count);
+ assert(name_ptr == name_start+count);
PyMem_Free(idxs);
return PyLong_FromUnsignedLong(count);
}
+// This function should technically be macro'd out if it's going to be used
+// more than ocasionally. As of this writing, it'll actually never be called
+// in real world bup scenarios (because our packs are < MAX_INT bytes).
+static uint64_t htonll(uint64_t value)
+{
+ static const int endian_test = 42;
+
+ if (*(char *)&endian_test == endian_test) // LSB-MSB
+ return ((uint64_t)htonl(value & 0xFFFFFFFF) << 32) | htonl(value >> 32);
+ return value; // already in network byte order MSB-LSB
+}
+
+#define PACK_IDX_V2_HEADERLEN 8
+#define FAN_ENTRIES 256
+
+static PyObject *write_idx(PyObject *self, PyObject *args)
+{
+ PyObject *pf = NULL, *idx = NULL;
+ PyObject *part;
+ FILE *f;
+ unsigned char *fmap = NULL;
+ int flen = 0;
+ uint32_t total = 0;
+ uint32_t count;
+ int i, j, ofs64_count;
+ uint32_t *fan_ptr, *crc_ptr, *ofs_ptr;
+ struct sha *sha_ptr;
+
+ if (!PyArg_ParseTuple(args, "Ow#OI", &pf, &fmap, &flen, &idx, &total))
+ return NULL;
+
+ fan_ptr = (uint32_t *)&fmap[PACK_IDX_V2_HEADERLEN];
+ sha_ptr = (struct sha *)&fan_ptr[FAN_ENTRIES];
+ crc_ptr = (uint32_t *)&sha_ptr[total];
+ ofs_ptr = (uint32_t *)&crc_ptr[total];
+ f = PyFile_AsFile(pf);
+
+ count = 0;
+ ofs64_count = 0;
+ for (i = 0; i < FAN_ENTRIES; ++i)
+ {
+ int plen;
+ part = PyList_GET_ITEM(idx, i);
+ PyList_Sort(part);
+ plen = PyList_GET_SIZE(part);
+ count += plen;
+ *fan_ptr++ = htonl(count);
+ for (j = 0; j < plen; ++j)
+ {
+ unsigned char *sha = NULL;
+ int sha_len = 0;
+ uint32_t crc = 0;
+ uint64_t ofs = 0;
+ if (!PyArg_ParseTuple(PyList_GET_ITEM(part, j), "t#IK",
+ &sha, &sha_len, &crc, &ofs))
+ return NULL;
+ if (sha_len != sizeof(struct sha))
+ return NULL;
+ memcpy(sha_ptr++, sha, sizeof(struct sha));
+ *crc_ptr++ = htonl(crc);
+ if (ofs > 0x7fffffff)
+ {
+ uint64_t nofs = htonll(ofs);
+ if (fwrite(&nofs, sizeof(uint64_t), 1, f) != 1)
+ return PyErr_SetFromErrno(PyExc_OSError);
+ ofs = 0x80000000 | ofs64_count++;
+ }
+ *ofs_ptr++ = htonl((uint32_t)ofs);
+ }
+ }
+ return PyLong_FromUnsignedLong(count);
+}
+
// I would have made this a lower-level function that just fills in a buffer
// with random values, and then written those values from python. But that's
}
-static PyMethodDef faster_methods[] = {
+#ifdef linux
+static PyObject *bup_get_linux_file_attr(PyObject *self, PyObject *args)
+{
+ int rc;
+ unsigned long attr;
+ char *path;
+ int fd;
+
+ if (!PyArg_ParseTuple(args, "s", &path))
+ return NULL;
+
+ fd = open(path, O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_NOFOLLOW);
+ if (fd == -1)
+ return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+
+ attr = 0;
+ rc = ioctl(fd, FS_IOC_GETFLAGS, &attr);
+ if (rc == -1)
+ {
+ close(fd);
+ return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+ }
+
+ close(fd);
+ return Py_BuildValue("k", attr);
+}
+
+
+static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args)
+{
+ int rc;
+ unsigned long attr;
+ char *path;
+ int fd;
+
+ if (!PyArg_ParseTuple(args, "sk", &path, &attr))
+ return NULL;
+
+ fd = open(path, O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_NOFOLLOW);
+ if(fd == -1)
+ return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+
+ rc = ioctl(fd, FS_IOC_SETFLAGS, &attr);
+ if (rc == -1)
+ {
+ close(fd);
+ return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+ }
+
+ close(fd);
+ Py_RETURN_TRUE;
+}
+#endif /* def linux */
+
+
+#if defined(_ATFILE_SOURCE) \
+ || _XOPEN_SOURCE >= 700 || _POSIX_C_SOURCE >= 200809L
+#define HAVE_BUP_UTIMENSAT 1
+
+static PyObject *bup_utimensat(PyObject *self, PyObject *args)
+{
+ int rc, dirfd, flags;
+ char *path;
+ long access, access_ns, modification, modification_ns;
+ struct timespec ts[2];
+
+ if (!PyArg_ParseTuple(args, "is((ll)(ll))i",
+ &dirfd,
+ &path,
+ &access, &access_ns,
+ &modification, &modification_ns,
+ &flags))
+ return NULL;
+
+ if (isnan(access))
+ {
+ PyErr_SetString(PyExc_ValueError, "access time is NaN");
+ return NULL;
+ }
+ else if (isinf(access))
+ {
+ PyErr_SetString(PyExc_ValueError, "access time is infinite");
+ return NULL;
+ }
+ else if (isnan(modification))
+ {
+ PyErr_SetString(PyExc_ValueError, "modification time is NaN");
+ return NULL;
+ }
+ else if (isinf(modification))
+ {
+ PyErr_SetString(PyExc_ValueError, "modification time is infinite");
+ return NULL;
+ }
+
+ if (isnan(access_ns))
+ {
+ PyErr_SetString(PyExc_ValueError, "access time ns is NaN");
+ return NULL;
+ }
+ else if (isinf(access_ns))
+ {
+ PyErr_SetString(PyExc_ValueError, "access time ns is infinite");
+ return NULL;
+ }
+ else if (isnan(modification_ns))
+ {
+ PyErr_SetString(PyExc_ValueError, "modification time ns is NaN");
+ return NULL;
+ }
+ else if (isinf(modification_ns))
+ {
+ PyErr_SetString(PyExc_ValueError, "modification time ns is infinite");
+ return NULL;
+ }
+
+ ts[0].tv_sec = access;
+ ts[0].tv_nsec = access_ns;
+ ts[1].tv_sec = modification;
+ ts[1].tv_nsec = modification_ns;
+
+ rc = utimensat(dirfd, path, ts, flags);
+ if (rc != 0)
+ return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+
+ Py_RETURN_TRUE;
+}
+
+#endif /* defined(_ATFILE_SOURCE)
+ || _XOPEN_SOURCE >= 700 || _POSIX_C_SOURCE >= 200809L */
+
+
+#ifdef linux /* and likely others */
+
+#define HAVE_BUP_STAT 1
+static PyObject *bup_stat(PyObject *self, PyObject *args)
+{
+ int rc;
+ char *filename;
+
+ if (!PyArg_ParseTuple(args, "s", &filename))
+ return NULL;
+
+ struct stat st;
+ rc = stat(filename, &st);
+ if (rc != 0)
+ return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+
+ return Py_BuildValue("kkkkkkkk"
+ "(ll)"
+ "(ll)"
+ "(ll)",
+ (unsigned long) st.st_mode,
+ (unsigned long) st.st_ino,
+ (unsigned long) st.st_dev,
+ (unsigned long) st.st_nlink,
+ (unsigned long) st.st_uid,
+ (unsigned long) st.st_gid,
+ (unsigned long) st.st_rdev,
+ (unsigned long) st.st_size,
+ (long) st.st_atime,
+ (long) st.st_atim.tv_nsec,
+ (long) st.st_mtime,
+ (long) st.st_mtim.tv_nsec,
+ (long) st.st_ctime,
+ (long) st.st_ctim.tv_nsec);
+}
+
+
+#define HAVE_BUP_LSTAT 1
+static PyObject *bup_lstat(PyObject *self, PyObject *args)
+{
+ int rc;
+ char *filename;
+
+ if (!PyArg_ParseTuple(args, "s", &filename))
+ return NULL;
+
+ struct stat st;
+ rc = lstat(filename, &st);
+ if (rc != 0)
+ return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+
+ return Py_BuildValue("kkkkkkkk"
+ "(ll)"
+ "(ll)"
+ "(ll)",
+ (unsigned long) st.st_mode,
+ (unsigned long) st.st_ino,
+ (unsigned long) st.st_dev,
+ (unsigned long) st.st_nlink,
+ (unsigned long) st.st_uid,
+ (unsigned long) st.st_gid,
+ (unsigned long) st.st_rdev,
+ (unsigned long) st.st_size,
+ (long) st.st_atime,
+ (long) st.st_atim.tv_nsec,
+ (long) st.st_mtime,
+ (long) st.st_mtim.tv_nsec,
+ (long) st.st_ctime,
+ (long) st.st_ctim.tv_nsec);
+}
+
+
+#define HAVE_BUP_FSTAT 1
+static PyObject *bup_fstat(PyObject *self, PyObject *args)
+{
+ int rc, fd;
+
+ if (!PyArg_ParseTuple(args, "i", &fd))
+ return NULL;
+
+ struct stat st;
+ rc = fstat(fd, &st);
+ if (rc != 0)
+ return PyErr_SetFromErrno(PyExc_IOError);
+
+ return Py_BuildValue("kkkkkkkk"
+ "(ll)"
+ "(ll)"
+ "(ll)",
+ (unsigned long) st.st_mode,
+ (unsigned long) st.st_ino,
+ (unsigned long) st.st_dev,
+ (unsigned long) st.st_nlink,
+ (unsigned long) st.st_uid,
+ (unsigned long) st.st_gid,
+ (unsigned long) st.st_rdev,
+ (unsigned long) st.st_size,
+ (long) st.st_atime,
+ (long) st.st_atim.tv_nsec,
+ (long) st.st_mtime,
+ (long) st.st_mtim.tv_nsec,
+ (long) st.st_ctime,
+ (long) st.st_ctim.tv_nsec);
+}
+
+#endif /* def linux */
+
+
+static PyMethodDef helper_methods[] = {
{ "selftest", selftest, METH_VARARGS,
"Check that the rolling checksum rolls correctly (for unit tests)." },
{ "blobbits", blobbits, METH_VARARGS,
"Take the first 'nbits' bits from 'buf' and return them as an int." },
{ "merge_into", merge_into, METH_VARARGS,
"Merges a bunch of idx and midx files into a single midx." },
+ { "write_idx", write_idx, METH_VARARGS,
+ "Write a PackIdxV2 file from an idx list of lists of tuples" },
{ "write_random", write_random, METH_VARARGS,
"Write random bytes to the given file descriptor" },
{ "random_sha", random_sha, METH_VARARGS,
"open() the given filename for read with O_NOATIME if possible" },
{ "fadvise_done", fadvise_done, METH_VARARGS,
"Inform the kernel that we're finished with earlier parts of a file" },
+#ifdef linux
+ { "get_linux_file_attr", bup_get_linux_file_attr, METH_VARARGS,
+ "Return the Linux attributes for the given file." },
+ { "set_linux_file_attr", bup_set_linux_file_attr, METH_VARARGS,
+ "Set the Linux attributes for the given file." },
+#endif
+#ifdef HAVE_BUP_UTIMENSAT
+ { "utimensat", bup_utimensat, METH_VARARGS,
+ "Change file timestamps with nanosecond precision." },
+#endif
+#ifdef HAVE_BUP_STAT
+ { "stat", bup_stat, METH_VARARGS,
+ "Extended version of stat." },
+#endif
+#ifdef HAVE_BUP_LSTAT
+ { "lstat", bup_lstat, METH_VARARGS,
+ "Extended version of lstat." },
+#endif
+#ifdef HAVE_BUP_FSTAT
+ { "fstat", bup_fstat, METH_VARARGS,
+ "Extended version of fstat." },
+#endif
{ NULL, NULL, 0, NULL }, // sentinel
};
+
PyMODINIT_FUNC init_helpers(void)
{
- Py_InitModule("_helpers", faster_methods);
- istty = isatty(2) || getenv("BUP_FORCE_TTY");
+ char *e;
+ PyObject *m = Py_InitModule("_helpers", helper_methods);
+ if (m == NULL)
+ return;
+#ifdef HAVE_BUP_UTIMENSAT
+ PyModule_AddObject(m, "AT_FDCWD", Py_BuildValue("i", AT_FDCWD));
+ PyModule_AddObject(m, "AT_SYMLINK_NOFOLLOW",
+ Py_BuildValue("i", AT_SYMLINK_NOFOLLOW));
+#endif
+#ifdef HAVE_BUP_STAT
+ Py_INCREF(Py_True);
+ PyModule_AddObject(m, "_have_ns_fs_timestamps", Py_True);
+#else
+ Py_INCREF(Py_False);
+ PyModule_AddObject(m, "_have_ns_fs_timestamps", Py_False);
+#endif
+ e = getenv("BUP_FORCE_TTY");
+ istty2 = isatty(2) || (atoi(e ? e : "0") & 2);
unpythonize_argv();
}