Merge remote branch 'origin/master' into meta

author Rob Browning <rlb@defaultvalue.org>

Thu, 10 Feb 2011 05:01:45 +0000 (23:01 -0600)

committer Rob Browning <rlb@defaultvalue.org>

Thu, 10 Feb 2011 05:01:45 +0000 (23:01 -0600)
author Rob Browning <rlb@defaultvalue.org>
Thu, 10 Feb 2011 05:01:45 +0000 (23:01 -0600)
committer Rob Browning <rlb@defaultvalue.org>
Thu, 10 Feb 2011 05:01:45 +0000 (23:01 -0600)
diff --combined Makefile

index a4ca3abad77d389a6fa6f4ce5e4ed3200430c5e6,d18e2ed889eb780cd48b33c253414623c09d6a55..f95a919492cbbe3952c663a6b83330074eeef1ba
--- 1/Makefile
--- 2/Makefile
+++ b/Makefile
@@@ -83,11 -83,10 +83,11 @@@ lib/bup/_version.py
   runtests: all runtests-python runtests-cmdline
   
   runtests-python:
-       $(PYTHON) wvtest.py $(wildcard t/t*.py lib/*/t/t*.py)
+       $(PYTHON) wvtest.py t/t*.py lib/*/t/t*.py
   
   runtests-cmdline: all
         t/test.sh
+ +      t/test-meta.sh
   
   stupid:
         PATH=/bin:/usr/bin $(MAKE) test
@@@ -104,7 -103,9 +104,9 @@@ bup: main.p
         rm -f $@
         ln -s $< $@
   
- cmds: $(patsubst cmd/%-cmd.py,cmd/bup-%,$(wildcard cmd/*-cmd.py))
+ cmds: \
+     $(patsubst cmd/%-cmd.py,cmd/bup-%,$(wildcard cmd/*-cmd.py)) \
+     $(patsubst cmd/%-cmd.sh,cmd/bup-%,$(wildcard cmd/*-cmd.sh))
   
   cmd/bup-%: cmd/%-cmd.py
         rm -f $@
@@@ -118,8 -119,41 +120,41 @@@ bup-%: cmd-%.s
         rm -f $@
         ln -s $< $@
   
+ cmd/bup-%: cmd/%-cmd.sh
+       rm -f $@
+       ln -s $*-cmd.sh $@
+ 
   %.o: %.c
         gcc -c -o $@ $< $(CPPFLAGS) $(CFLAGS)
+       
+ # update the local 'man' and 'html' branches with pregenerated output files, for
+ # people who don't have pandoc (and maybe to aid in google searches or something)
+ export-docs: Documentation/all
+       git update-ref refs/heads/man origin/man '' 2>/dev/null || true
+       git update-ref refs/heads/html origin/html '' 2>/dev/null || true
+       GIT_INDEX_FILE=gitindex.tmp; export GIT_INDEX_FILE; \
+       rm -f $${GIT_INDEX_FILE} && \
+       git add -f Documentation/*.1 && \
+       git update-ref refs/heads/man \
+               $$(echo "Autogenerated man pages for $$(git describe)" \
+                   | git commit-tree $$(git write-tree --prefix=Documentation) \
+                               -p refs/heads/man) && \
+       rm -f $${GIT_INDEX_FILE} && \
+       git add -f Documentation/*.html && \
+       git update-ref refs/heads/html \
+               $$(echo "Autogenerated html pages for $$(git describe)" \
+                   | git commit-tree $$(git write-tree --prefix=Documentation) \
+                               -p refs/heads/html)
+ 
+ # push the pregenerated doc files to origin/man and origin/html
+ push-docs: export-docs
+       git push origin man html
+ 
+ # import pregenerated doc files from origin/man and origin/html, in case you
+ # don't have pandoc but still want to be able to install the docs.
+ import-docs: Documentation/clean
+       git archive origin/html | (cd Documentation; tar -xvf -)
+       git archive origin/man | (cd Documentation; tar -xvf -)
   
   clean: Documentation/clean
         rm -f *.o lib/*/*.o *.so lib/*/*.so *.dll *.exe \
diff --combined cmd/meta-cmd.py

index f1f5a7b27dd983b09d06022ca8c631ecd3fab002,0000000000000000000000000000000000000000..4f6e013810024b2c5e60e59246dd381084039b5f

mode 100755,000000..100755
--- 1/cmd/meta-cmd.py
--- /dev/null
+++ b/cmd/meta-cmd.py
@@@ -1,148 -1,0 +1,148 @@@
- o = options.Options('bup meta', optspec)
+ +#!/usr/bin/env python
+ +
+ +# Copyright (C) 2010 Rob Browning
+ +#
+ +# This code is covered under the terms of the GNU Library General
+ +# Public License as described in the bup LICENSE file.
+ +
+ +# TODO: Add tar-like -C option.
+ +# TODO: Add tar-like -v support to --list.
+ +
+ +import sys
+ +from bup import metadata
+ +from bup import options
+ +from bup.helpers import handle_ctrl_c, log, saved_errors
+ +
+ +optspec = """
+ +bup meta --create [OPTION ...] <PATH ...>
+ +bup meta --extract [OPTION ...]
+ +bup meta --start-extract [OPTION ...]
+ +bup meta --finish-extract [OPTION ...]
+ +--
+ +c,create       write metadata for PATHs to stdout (or --file)
+ +t,list         display metadata
+ +x,extract      perform --start-extract followed by --finish-extract
+ +start-extract  build tree matching metadata provided on standard input (or --file)
+ +finish-extract finish applying standard input (or --file) metadata to filesystem
+ +f,file=        specify source or destination file
+ +R,recurse      recurse into subdirectories
+ +xdev,one-file-system  don't cross filesystem boundaries
+ +numeric-ids    apply numeric IDs (user, group, etc.), not names, during restore
+ +symlinks       handle symbolic links (default is true)
+ +paths          include paths in metadata (default is true)
+ +v,verbose      increase log output (can be used more than once)
+ +q,quiet        don't show progress meter
+ +"""
+ +
+ +action = None
+ +target_filename = ''
+ +should_recurse = False
+ +restore_numeric_ids = False
+ +include_paths = True
+ +handle_symlinks = True
+ +xdev = False
+ +
+ +handle_ctrl_c()
+ +
++o = options.Options(optspec)
+ +(opt, flags, remainder) = o.parse(sys.argv[1:])
+ +
+ +for flag, value in flags:
+ +    if flag == '--create' or flag == '-c':
+ +        action = 'create'
+ +    elif flag == '--list' or flag == '-t':
+ +        action = 'list'
+ +    elif flag == '--extract' or flag == '-x':
+ +        action = 'extract'
+ +    elif flag == '--start-extract':
+ +        action = 'start-extract'
+ +    elif flag == '--finish-extract':
+ +        action = 'finish-extract'
+ +    elif flag == '--file' or flag == '-f':
+ +        target_filename = value
+ +    elif flag == '--recurse' or flag == '-R':
+ +        should_recurse = True
+ +    elif flag == '--no-recurse':
+ +        should_recurse = False
+ +    elif flag in frozenset(['--xdev', '--one-file-system']):
+ +        xdev = True
+ +    elif flag in frozenset(['--no-xdev', '--no-one-file-system']):
+ +        xdev = False
+ +    elif flag == '--numeric-ids':
+ +        restore_numeric_ids = True
+ +    elif flag == '--no-numeric-ids':
+ +        restore_numeric_ids = False
+ +    elif flag == '--paths':
+ +        include_paths = True
+ +    elif flag == '--no-paths':
+ +        include_paths = False
+ +    elif flag == '--symlinks':
+ +        handle_symlinks = True
+ +    elif flag == '--no-symlinks':
+ +        handle_symlinks = False
+ +    elif flag == '--verbose' or flag == '-v':
+ +        metadata.verbose += 1
+ +    elif flag == '--quiet' or flag == '-q':
+ +        metadata.verbose = 0
+ +
+ +if not action:
+ +    o.fatal("no action specified")
+ +
+ +if action == 'create':
+ +    if len(remainder) < 1:
+ +        o.fatal("no paths specified for create")
+ +    if target_filename != '-':
+ +        output_file = open(target_filename, 'w')
+ +    else:
+ +        output_file = sys.stdout
+ +    metadata.save_tree(output_file,
+ +                       remainder,
+ +                       recurse=should_recurse,
+ +                       write_paths=include_paths,
+ +                       save_symlinks=handle_symlinks,
+ +                       xdev=xdev)
+ +
+ +elif action == 'list':
+ +    if len(remainder) > 0:
+ +        o.fatal("cannot specify paths for --list")
+ +    if target_filename != '-':
+ +        src = open(target_filename, 'r')
+ +    else:
+ +        src = sys.stdin
+ +    metadata.display_archive(src)
+ +
+ +elif action == 'start-extract':
+ +    if len(remainder) > 0:
+ +        o.fatal("cannot specify paths for --start-extract")
+ +    if target_filename != '-':
+ +        src = open(target_filename, 'r')
+ +    else:
+ +        src = sys.stdin
+ +    metadata.start_extract(src, create_symlinks=handle_symlinks)
+ +
+ +elif action == 'finish-extract':
+ +    if len(remainder) > 0:
+ +        o.fatal("cannot specify paths for --finish-extract")
+ +    if target_filename != '-':
+ +        src = open(target_filename, 'r')
+ +    else:
+ +        src = sys.stdin
+ +    num_ids = restore_numeric_ids
+ +    metadata.finish_extract(src, restore_numeric_ids=num_ids)
+ +
+ +elif action == 'extract':
+ +    if len(remainder) > 0:
+ +        o.fatal("cannot specify paths for --extract")
+ +    if target_filename != '-':
+ +        src = open(target_filename, 'r')
+ +    else:
+ +        src = sys.stdin
+ +    metadata.extract(src,
+ +                     restore_numeric_ids=restore_numeric_ids,
+ +                     create_symlinks=handle_symlinks)
+ +
+ +if saved_errors:
+ +    log('WARNING: %d errors encountered.\n' % len(saved_errors))
+ +    sys.exit(1)
+ +else:
+ +    sys.exit(0)
diff --combined cmd/xstat-cmd.py

index b8ee4e808b32bbb73a5e0c9b9ccfc9704032661e,0000000000000000000000000000000000000000..6d60596810f7d8c50a885b3ee8db6dfdc0e707f1

mode 100755,000000..100755
--- 1/cmd/xstat-cmd.py
--- /dev/null
+++ b/cmd/xstat-cmd.py
@@@ -1,132 -1,0 +1,132 @@@
- o = options.Options('bup pathinfo', optspec)
+ +#!/usr/bin/env python
+ +
+ +# Copyright (C) 2010 Rob Browning
+ +#
+ +# This code is covered under the terms of the GNU Library General
+ +# Public License as described in the bup LICENSE file.
+ +
+ +import errno
+ +import posix1e
+ +import stat
+ +import sys
+ +from bup import metadata
+ +from bup import options
+ +from bup import xstat
+ +from bup.helpers import handle_ctrl_c, saved_errors, add_error, log
+ +
+ +
+ +def fstimestr(fstime):
+ +    (s, ns) = fstime.secs_nsecs()
+ +    if ns == 0:
+ +        return '%d' % s
+ +    else:
+ +        return '%d.%09d' % (s, ns)
+ +
+ +
+ +optspec = """
+ +bup pathinfo [OPTION ...] <PATH ...>
+ +--
+ +v,verbose       increase log output (can be used more than once)
+ +q,quiet         don't show progress meter
+ +exclude-fields= exclude comma-separated fields
+ +include-fields= include comma-separated fields (definitive if first)
+ +"""
+ +
+ +target_filename = ''
+ +all_fields = frozenset(['path',
+ +                        'mode',
+ +                        'link-target',
+ +                        'rdev',
+ +                        'uid',
+ +                        'gid',
+ +                        'owner',
+ +                        'group',
+ +                        'atime',
+ +                        'mtime',
+ +                        'ctime',
+ +                        'linux-attr',
+ +                        'linux-xattr',
+ +                        'posix1e-acl'])
+ +active_fields = all_fields
+ +
+ +handle_ctrl_c()
+ +
++o = options.Options(optspec)
+ +(opt, flags, remainder) = o.parse(sys.argv[1:])
+ +
+ +treat_include_fields_as_definitive = True
+ +for flag, value in flags:
+ +    if flag == '--verbose' or flag == '-v':
+ +        metadata.verbose += 1
+ +    elif flag == '--quiet' or flag == '-q':
+ +        metadata.verbose = 0
+ +    elif flag == '--exclude-fields':
+ +        exclude_fields = frozenset(value.split(','))
+ +        for f in exclude_fields:
+ +            if not f in all_fields:
+ +                o.fatal(f + ' is not a valid field name')
+ +        active_fields = active_fields - exclude_fields
+ +        treat_include_fields_as_definitive = False
+ +    elif flag == '--include-fields':
+ +        include_fields = frozenset(value.split(','))
+ +        for f in include_fields:
+ +            if not f in all_fields:
+ +                o.fatal(f + ' is not a valid field name')
+ +        if treat_include_fields_as_definitive:
+ +            active_fields = include_fields
+ +            treat_include_fields_as_definitive = False
+ +        else:
+ +            active_fields = active_fields | include_fields
+ +
+ +for path in remainder:
+ +    try:
+ +        m = metadata.from_path(path, archive_path = path)
+ +    except IOError, e:
+ +        if e.errno == errno.ENOENT:
+ +            add_error(e)
+ +            continue
+ +        else:
+ +            raise
+ +    if 'path' in active_fields:
+ +        print 'path:', m.path
+ +    if 'mode' in active_fields:
+ +        print 'mode:', oct(m.mode)
+ +    if 'link-target' in active_fields and stat.S_ISLNK(m.mode):
+ +        print 'link-target:', m.symlink_target
+ +    if 'rdev' in active_fields:
+ +        print 'rdev:', m.rdev
+ +    if 'uid' in active_fields:
+ +        print 'uid:', m.uid
+ +    if 'gid' in active_fields:
+ +        print 'gid:', m.gid
+ +    if 'owner' in active_fields:
+ +        print 'owner:', m.owner
+ +    if 'group' in active_fields:
+ +        print 'group:', m.group
+ +    if 'atime' in active_fields:
+ +        print 'atime: ' + fstimestr(m.atime)
+ +    if 'mtime' in active_fields:
+ +        print 'mtime: ' + fstimestr(m.mtime)
+ +    if 'ctime' in active_fields:
+ +        print 'ctime: ' + fstimestr(m.ctime)
+ +    if 'linux-attr' in active_fields and m.linux_attr:
+ +        print 'linux-attr:', hex(m.linux_attr)
+ +    if 'linux-xattr' in active_fields and m.linux_xattr:
+ +        for name, value in m.linux_xattr:
+ +            print 'linux-xattr: %s -> %s' % (name, repr(value))
+ +    if 'posix1e-acl' in active_fields and m.posix1e_acl:
+ +        flags = posix1e.TEXT_ABBREVIATE
+ +        if stat.S_ISDIR(m.mode):
+ +            acl = m.posix1e_acl[0]
+ +            default_acl = m.posix1e_acl[2]
+ +            print acl.to_any_text('posix1e-acl: ', '\n', flags)
+ +            print acl.to_any_text('posix1e-acl-default: ', '\n', flags)
+ +        else:
+ +            acl = m.posix1e_acl[0]
+ +            print acl.to_any_text('posix1e-acl: ', '\n', flags)
+ +
+ +if saved_errors:
+ +    log('WARNING: %d errors encountered.\n' % len(saved_errors))
+ +    sys.exit(1)
+ +else:
+ +    sys.exit(0)
diff --combined lib/bup/_helpers.c

index d5de937d92e54804d5aff4fc723bafaa0fe7bc4b,0af94113bd87d0116576f165b93e533b702c067e..dbe64a7a617c4cb6c4609656f9303b1e5a2caca4
--- 1/lib/bup/_helpers.c
--- 2/lib/bup/_helpers.c
+++ b/lib/bup/_helpers.c
@@@ -1,20 -1,14 +1,24 @@@
+ +#define _LARGEFILE64_SOURCE 1
+ +
   #include "bupsplit.h"
   #include <Python.h>
   #include <assert.h>
- -#include <stdint.h>
+ +#include <errno.h>
   #include <fcntl.h>
   #include <arpa/inet.h>
+ +#include <stdint.h>
+ #include <unistd.h>
+ #include <stdlib.h>
+ #include <stdio.h>
   
+ +#ifdef linux
+ +#include <linux/fs.h>
+ +#include <sys/ioctl.h>
+ +#include <sys/stat.h>
+ +#include <sys/time.h>
+ +#endif
+ +
+ static int istty = 0;
   
   static PyObject *selftest(PyObject *self, PyObject *args)
   {
@@@ -84,15 -78,149 +88,149 @@@ static PyObject *firstword(PyObject *se
         return NULL;
       
       v = ntohl(*(uint32_t *)buf);
-     return Py_BuildValue("I", v);
+     return PyLong_FromUnsignedLong(v);
   }
   
   
+ typedef struct {
+     uint32_t high;
+     unsigned char low;
+ } bits40_t;
+ 
+ 
+ static void to_bloom_address_bitmask4(const bits40_t *buf,
+       const int nbits, uint64_t *v, unsigned char *bitmask)
+ {
+     int bit;
+     uint64_t raw, mask;
+ 
+     mask = (1<<nbits) - 1;
+     raw = (((uint64_t)ntohl(buf->high)) << 8) | buf->low;
+     bit = (raw >> (37-nbits)) & 0x7;
+     *v = (raw >> (40-nbits)) & mask;
+     *bitmask = 1 << bit;
+ }
+ 
+ static void to_bloom_address_bitmask5(const uint32_t *buf,
+       const int nbits, uint32_t *v, unsigned char *bitmask)
+ {
+     int bit;
+     uint32_t raw, mask;
+ 
+     mask = (1<<nbits) - 1;
+     raw = ntohl(*buf);
+     bit = (raw >> (29-nbits)) & 0x7;
+     *v = (raw >> (32-nbits)) & mask;
+     *bitmask = 1 << bit;
+ }
+ 
+ 
+ #define BLOOM_SET_BIT(name, address, itype, otype) \
+ static void name(unsigned char *bloom, const void *buf, const int nbits)\
+ {\
+     unsigned char bitmask;\
+     otype v;\
+     address((itype *)buf, nbits, &v, &bitmask);\
+     bloom[16+v] |= bitmask;\
+ }
+ BLOOM_SET_BIT(bloom_set_bit4, to_bloom_address_bitmask4, bits40_t, uint64_t)
+ BLOOM_SET_BIT(bloom_set_bit5, to_bloom_address_bitmask5, uint32_t, uint32_t)
+ 
+ 
+ #define BLOOM_GET_BIT(name, address, itype, otype) \
+ static int name(const unsigned char *bloom, const void *buf, const int nbits)\
+ {\
+     unsigned char bitmask;\
+     otype v;\
+     address((itype *)buf, nbits, &v, &bitmask);\
+     return bloom[16+v] & bitmask;\
+ }
+ BLOOM_GET_BIT(bloom_get_bit4, to_bloom_address_bitmask4, bits40_t, uint64_t)
+ BLOOM_GET_BIT(bloom_get_bit5, to_bloom_address_bitmask5, uint32_t, uint32_t)
+ 
+ 
+ static PyObject *bloom_add(PyObject *self, PyObject *args)
+ {
+     unsigned char *sha = NULL, *bloom = NULL;
+     unsigned char *end;
+     int len = 0, blen = 0, nbits = 0, k = 0;
+ 
+     if (!PyArg_ParseTuple(args, "w#s#ii", &bloom, &blen, &sha, &len, &nbits, &k))
+       return NULL;
+ 
+     if (blen < 16+(1<<nbits) || len % 20 != 0)
+       return NULL;
+ 
+     if (k == 5)
+     {
+       if (nbits > 29)
+           return NULL;
+       for (end = sha + len; sha < end; sha += 20/k)
+           bloom_set_bit5(bloom, sha, nbits);
+     }
+     else if (k == 4)
+     {
+       if (nbits > 37)
+           return NULL;
+       for (end = sha + len; sha < end; sha += 20/k)
+           bloom_set_bit4(bloom, sha, nbits);
+     }
+     else
+       return NULL;
+ 
+ 
+     return Py_BuildValue("i", len/20);
+ }
+ 
+ static PyObject *bloom_contains(PyObject *self, PyObject *args)
+ {
+     unsigned char *sha = NULL, *bloom = NULL;
+     int len = 0, blen = 0, nbits = 0, k = 0;
+     unsigned char *end;
+     int steps;
+ 
+     if (!PyArg_ParseTuple(args, "t#s#ii", &bloom, &blen, &sha, &len, &nbits, &k))
+       return NULL;
+ 
+     if (len != 20)
+       return NULL;
+ 
+     if (k == 5)
+     {
+       if (nbits > 29)
+           return NULL;
+       for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
+           if (!bloom_get_bit5(bloom, sha, nbits))
+               return Py_BuildValue("Oi", Py_None, steps);
+     }
+     else if (k == 4)
+     {
+       if (nbits > 37)
+           return NULL;
+       for (steps = 1, end = sha + 20; sha < end; sha += 20/k, steps++)
+           if (!bloom_get_bit4(bloom, sha, nbits))
+               return Py_BuildValue("Oi", Py_None, steps);
+     }
+     else
+       return NULL;
+ 
+     return Py_BuildValue("Oi", Py_True, k);
+ }
+ 
+ 
+ static uint32_t _extract_bits(unsigned char *buf, int nbits)
+ {
+     uint32_t v, mask;
+ 
+     mask = (1<<nbits) - 1;
+     v = ntohl(*(uint32_t *)buf);
+     v = (v >> (32-nbits)) & mask;
+     return v;
+ }
   static PyObject *extract_bits(PyObject *self, PyObject *args)
   {
       unsigned char *buf = NULL;
       int len = 0, nbits = 0;
-     uint32_t v, mask;
   
       if (!PyArg_ParseTuple(args, "t#i", &buf, &len, &nbits))
         return NULL;
@@@ -100,10 -228,147 +238,147 @@@
       if (len < 4)
         return NULL;
       
-     mask = (1<<nbits) - 1;
-     v = ntohl(*(uint32_t *)buf);
-     v = (v >> (32-nbits)) & mask;
-     return Py_BuildValue("I", v);
+     return PyLong_FromUnsignedLong(_extract_bits(buf, nbits));
+ }
+ 
+ 
+ struct sha {
+     unsigned char bytes[20];
+ };
+ struct idx {
+     unsigned char *map;
+     struct sha *cur;
+     struct sha *end;
+     uint32_t *cur_name;
+     long bytes;
+     int name_base;
+ };
+ 
+ 
+ static int _cmp_sha(const struct sha *sha1, const struct sha *sha2)
+ {
+     int i;
+     for (i = 0; i < 20; i++)
+       if (sha1->bytes[i] != sha2->bytes[i])
+           return sha1->bytes[i] - sha2->bytes[i];
+     return 0;
+ }
+ 
+ 
+ static void _fix_idx_order(struct idx **idxs, int *last_i)
+ {
+     struct idx *idx;
+     int low, mid, high, c = 0;
+ 
+     idx = idxs[*last_i];
+     if (idxs[*last_i]->cur >= idxs[*last_i]->end)
+     {
+       idxs[*last_i] = NULL;
+       PyMem_Free(idx);
+       --*last_i;
+       return;
+     }
+     if (*last_i == 0)
+       return;
+ 
+     low = *last_i-1;
+     mid = *last_i;
+     high = 0;
+     while (low >= high)
+     {
+       mid = (low + high) / 2;
+       c = _cmp_sha(idx->cur, idxs[mid]->cur);
+       if (c < 0)
+           high = mid + 1;
+       else if (c > 0)
+           low = mid - 1;
+       else
+           break;
+     }
+     if (c < 0)
+       ++mid;
+     if (mid == *last_i)
+       return;
+     memmove(&idxs[mid+1], &idxs[mid], (*last_i-mid)*sizeof(struct idx *));
+     idxs[mid] = idx;
+ }
+ 
+ 
+ static uint32_t _get_idx_i(struct idx *idx)
+ {
+     if (idx->cur_name == NULL)
+       return idx->name_base;
+     return ntohl(*idx->cur_name) + idx->name_base;
+ }
+ 
+ 
+ static PyObject *merge_into(PyObject *self, PyObject *args)
+ {
+     PyObject *ilist = NULL;
+     unsigned char *fmap = NULL;
+     struct sha *sha_ptr, *last = NULL;
+     uint32_t *table_ptr, *name_ptr;
+     struct idx **idxs = NULL;
+     int flen = 0, bits = 0, i;
+     uint32_t total, count, prefix;
+     int num_i;
+     int last_i;
+ 
+     if (!PyArg_ParseTuple(args, "w#iIO", &fmap, &flen, &bits, &total, &ilist))
+       return NULL;
+ 
+     num_i = PyList_Size(ilist);
+     idxs = (struct idx **)PyMem_Malloc(num_i * sizeof(struct idx *));
+ 
+     for (i = 0; i < num_i; i++)
+     {
+       long len, sha_ofs, name_map_ofs;
+       idxs[i] = (struct idx *)PyMem_Malloc(sizeof(struct idx));
+       PyObject *itup = PyList_GetItem(ilist, i);
+       if (!PyArg_ParseTuple(itup, "t#llli", &idxs[i]->map, &idxs[i]->bytes,
+                   &len, &sha_ofs, &name_map_ofs, &idxs[i]->name_base))
+           return NULL;
+       idxs[i]->cur = (struct sha *)&idxs[i]->map[sha_ofs];
+       idxs[i]->end = &idxs[i]->cur[len];
+       if (name_map_ofs)
+           idxs[i]->cur_name = (uint32_t *)&idxs[i]->map[name_map_ofs];
+       else
+           idxs[i]->cur_name = NULL;
+     }
+     table_ptr = (uint32_t *)&fmap[12];
+     sha_ptr = (struct sha *)&table_ptr[1<<bits];
+     name_ptr = (uint32_t *)&sha_ptr[total];
+ 
+     last_i = num_i-1;
+     count = 0;
+     prefix = 0;
+     while (last_i >= 0)
+     {
+       struct idx *idx;
+       uint32_t new_prefix;
+       if (count % 102424 == 0 && istty)
+           fprintf(stderr, "midx: writing %.2f%% (%d/%d)\r",
+                   count*100.0/total, count, total);
+       idx = idxs[last_i];
+       new_prefix = _extract_bits((unsigned char *)idx->cur, bits);
+       while (prefix < new_prefix)
+           table_ptr[prefix++] = htonl(count);
+       if (last == NULL || _cmp_sha(last, idx->cur) != 0)
+       {
+           memcpy(sha_ptr++, idx->cur, 20);
+           *name_ptr++ = htonl(_get_idx_i(idx));
+           last = idx->cur;
+       }
+       ++idx->cur;
+       if (idx->cur_name != NULL)
+           ++idx->cur_name;
+       _fix_idx_order(idxs, &last_i);
+       ++count;
+     }
+     table_ptr[prefix] = htonl(count);
+ 
+     PyMem_Free(idxs);
+     return PyLong_FromUnsignedLong(count);
   }
   
   
@@@ -116,11 -381,11 +391,11 @@@
   static PyObject *write_random(PyObject *self, PyObject *args)
   {
       uint32_t buf[1024/4];
-     int fd = -1, seed = 0;
+     int fd = -1, seed = 0, verbose = 0;
       ssize_t ret;
       long long len = 0, kbytes = 0, written = 0;
   
-     if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+     if (!PyArg_ParseTuple(args, "iLii", &fd, &len, &seed, &verbose))
         return NULL;
       
       srandom(seed);
@@@ -136,7 -401,7 +411,7 @@@
         written += ret;
         if (ret < (int)sizeof(buf))
             break;
-       if (kbytes/1024 > 0 && !(kbytes%1024))
+       if (verbose && kbytes/1024 > 0 && !(kbytes%1024))
             fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024);
       }
       
@@@ -158,6 -423,29 +433,29 @@@
   }
   
   
+ static PyObject *random_sha(PyObject *self, PyObject *args)
+ {
+     static int seeded = 0;
+     uint32_t shabuf[20/4];
+     int i;
+     
+     if (!seeded)
+     {
+       assert(sizeof(shabuf) == 20);
+       srandom(time(NULL));
+       seeded = 1;
+     }
+     
+     if (!PyArg_ParseTuple(args, ""))
+       return NULL;
+     
+     memset(shabuf, 0, sizeof(shabuf));
+     for (i=0; i < 20/4; i++)
+       shabuf[i] = random();
+     return Py_BuildValue("s#", shabuf, 20);
+ }
+ 
+ 
   static PyObject *open_noatime(PyObject *self, PyObject *args)
   {
       char *filename = NULL;
@@@ -204,247 -492,7 +502,247 @@@ static PyObject *fadvise_done(PyObject 
   }
   
   
- static PyMethodDef helper_methods[] = {
+ +#ifdef linux
+ +static PyObject *bup_get_linux_file_attr(PyObject *self, PyObject *args)
+ +{
+ +    int rc;
+ +    unsigned long attr;
+ +    char *path;
+ +    int fd;
+ +
+ +    if (!PyArg_ParseTuple(args, "s", &path))
+ +        return NULL;
+ +
+ +    fd = open(path, O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_NOFOLLOW);
+ +    if (fd == -1)
+ +        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+ +
+ +    attr = 0;
+ +    rc = ioctl(fd, FS_IOC_GETFLAGS, &attr);
+ +    if (rc == -1)
+ +    {
+ +        close(fd);
+ +        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+ +    }
+ +
+ +    close(fd);
+ +    return Py_BuildValue("k", attr);
+ +}
+ +
+ +
+ +static PyObject *bup_set_linux_file_attr(PyObject *self, PyObject *args)
+ +{
+ +    int rc;
+ +    unsigned long attr;
+ +    char *path;
+ +    int fd;
+ +
+ +    if (!PyArg_ParseTuple(args, "sk", &path, &attr))
+ +        return NULL;
+ +
+ +    fd = open(path, O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_NOFOLLOW);
+ +    if(fd == -1)
+ +        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+ +
+ +    rc = ioctl(fd, FS_IOC_SETFLAGS, &attr);
+ +    if (rc == -1)
+ +    {
+ +        close(fd);
+ +        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+ +    }
+ +
+ +    close(fd);
+ +    Py_RETURN_TRUE;
+ +}
+ +#endif /* def linux */
+ +
+ +
+ +#if defined(_ATFILE_SOURCE) \
+ +  || _XOPEN_SOURCE >= 700 || _POSIX_C_SOURCE >= 200809L
+ +#define HAVE_BUP_UTIMENSAT 1
+ +
+ +static PyObject *bup_utimensat(PyObject *self, PyObject *args)
+ +{
+ +    int rc, dirfd, flags;
+ +    char *path;
+ +    long access, access_ns, modification, modification_ns;
+ +    struct timespec ts[2];
+ +
+ +    if (!PyArg_ParseTuple(args, "is((ll)(ll))i",
+ +                          &dirfd,
+ +                          &path,
+ +                          &access, &access_ns,
+ +                          &modification, &modification_ns,
+ +                          &flags))
+ +        return NULL;
+ +
+ +    if (isnan(access))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "access time is NaN");
+ +        return NULL;
+ +    }
+ +    else if (isinf(access))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "access time is infinite");
+ +        return NULL;
+ +    }
+ +    else if (isnan(modification))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "modification time is NaN");
+ +        return NULL;
+ +    }
+ +    else if (isinf(modification))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "modification time is infinite");
+ +        return NULL;
+ +    }
+ +
+ +    if (isnan(access_ns))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "access time ns is NaN");
+ +        return NULL;
+ +    }
+ +    else if (isinf(access_ns))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "access time ns is infinite");
+ +        return NULL;
+ +    }
+ +    else if (isnan(modification_ns))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "modification time ns is NaN");
+ +        return NULL;
+ +    }
+ +    else if (isinf(modification_ns))
+ +    {
+ +        PyErr_SetString(PyExc_ValueError, "modification time ns is infinite");
+ +        return NULL;
+ +    }
+ +
+ +    ts[0].tv_sec = access;
+ +    ts[0].tv_nsec = access_ns;
+ +    ts[1].tv_sec = modification;
+ +    ts[1].tv_nsec = modification_ns;
+ +
+ +    rc = utimensat(dirfd, path, ts, flags);
+ +    if (rc != 0)
+ +        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+ +
+ +    Py_RETURN_TRUE;
+ +}
+ +
+ +#endif /* defined(_ATFILE_SOURCE)
+ +          || _XOPEN_SOURCE >= 700 || _POSIX_C_SOURCE >= 200809L */
+ +
+ +
+ +#ifdef linux /* and likely others */
+ +
+ +#define HAVE_BUP_STAT 1
+ +static PyObject *bup_stat(PyObject *self, PyObject *args)
+ +{
+ +    int rc;
+ +    char *filename;
+ +
+ +    if (!PyArg_ParseTuple(args, "s", &filename))
+ +        return NULL;
+ +
+ +    struct stat st;
+ +    rc = stat(filename, &st);
+ +    if (rc != 0)
+ +        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ +
+ +    return Py_BuildValue("kkkkkkkk"
+ +                         "(ll)"
+ +                         "(ll)"
+ +                         "(ll)",
+ +                         (unsigned long) st.st_mode,
+ +                         (unsigned long) st.st_ino,
+ +                         (unsigned long) st.st_dev,
+ +                         (unsigned long) st.st_nlink,
+ +                         (unsigned long) st.st_uid,
+ +                         (unsigned long) st.st_gid,
+ +                         (unsigned long) st.st_rdev,
+ +                         (unsigned long) st.st_size,
+ +                         (long) st.st_atime,
+ +                         (long) st.st_atim.tv_nsec,
+ +                         (long) st.st_mtime,
+ +                         (long) st.st_mtim.tv_nsec,
+ +                         (long) st.st_ctime,
+ +                         (long) st.st_ctim.tv_nsec);
+ +}
+ +
+ +
+ +#define HAVE_BUP_LSTAT 1
+ +static PyObject *bup_lstat(PyObject *self, PyObject *args)
+ +{
+ +    int rc;
+ +    char *filename;
+ +
+ +    if (!PyArg_ParseTuple(args, "s", &filename))
+ +        return NULL;
+ +
+ +    struct stat st;
+ +    rc = lstat(filename, &st);
+ +    if (rc != 0)
+ +        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ +
+ +    return Py_BuildValue("kkkkkkkk"
+ +                         "(ll)"
+ +                         "(ll)"
+ +                         "(ll)",
+ +                         (unsigned long) st.st_mode,
+ +                         (unsigned long) st.st_ino,
+ +                         (unsigned long) st.st_dev,
+ +                         (unsigned long) st.st_nlink,
+ +                         (unsigned long) st.st_uid,
+ +                         (unsigned long) st.st_gid,
+ +                         (unsigned long) st.st_rdev,
+ +                         (unsigned long) st.st_size,
+ +                         (long) st.st_atime,
+ +                         (long) st.st_atim.tv_nsec,
+ +                         (long) st.st_mtime,
+ +                         (long) st.st_mtim.tv_nsec,
+ +                         (long) st.st_ctime,
+ +                         (long) st.st_ctim.tv_nsec);
+ +}
+ +
+ +
+ +#define HAVE_BUP_FSTAT 1
+ +static PyObject *bup_fstat(PyObject *self, PyObject *args)
+ +{
+ +    int rc, fd;
+ +
+ +    if (!PyArg_ParseTuple(args, "i", &fd))
+ +        return NULL;
+ +
+ +    struct stat st;
+ +    rc = fstat(fd, &st);
+ +    if (rc != 0)
+ +        return PyErr_SetFromErrno(PyExc_IOError);
+ +
+ +    return Py_BuildValue("kkkkkkkk"
+ +                         "(ll)"
+ +                         "(ll)"
+ +                         "(ll)",
+ +                         (unsigned long) st.st_mode,
+ +                         (unsigned long) st.st_ino,
+ +                         (unsigned long) st.st_dev,
+ +                         (unsigned long) st.st_nlink,
+ +                         (unsigned long) st.st_uid,
+ +                         (unsigned long) st.st_gid,
+ +                         (unsigned long) st.st_rdev,
+ +                         (unsigned long) st.st_size,
+ +                         (long) st.st_atime,
+ +                         (long) st.st_atim.tv_nsec,
+ +                         (long) st.st_mtime,
+ +                         (long) st.st_mtim.tv_nsec,
+ +                         (long) st.st_ctime,
+ +                         (long) st.st_ctim.tv_nsec);
+ +}
+ +
+ +#endif /* def linux */
+ +
+ +
+ static PyMethodDef faster_methods[] = {
       { "selftest", selftest, METH_VARARGS,
         "Check that the rolling checksum rolls correctly (for unit tests)." },
       { "blobbits", blobbits, METH_VARARGS,
@@@ -455,55 -503,27 +753,64 @@@
         "Count the number of matching prefix bits between two strings." },
       { "firstword", firstword, METH_VARARGS,
           "Return an int corresponding to the first 32 bits of buf." },
+     { "bloom_contains", bloom_contains, METH_VARARGS,
+       "Check if a bloom filter of 2^nbits bytes contains an object" },
+     { "bloom_add", bloom_add, METH_VARARGS,
+       "Add an object to a bloom filter of 2^nbits bytes" },
       { "extract_bits", extract_bits, METH_VARARGS,
         "Take the first 'nbits' bits from 'buf' and return them as an int." },
+     { "merge_into", merge_into, METH_VARARGS,
+       "Merges a bunch of idx and midx files into a single midx." },
       { "write_random", write_random, METH_VARARGS,
         "Write random bytes to the given file descriptor" },
+     { "random_sha", random_sha, METH_VARARGS,
+         "Return a random 20-byte string" },
       { "open_noatime", open_noatime, METH_VARARGS,
         "open() the given filename for read with O_NOATIME if possible" },
       { "fadvise_done", fadvise_done, METH_VARARGS,
         "Inform the kernel that we're finished with earlier parts of a file" },
+ +#ifdef linux
+ +    { "get_linux_file_attr", bup_get_linux_file_attr, METH_VARARGS,
+ +      "Return the Linux attributes for the given file." },
+ +    { "set_linux_file_attr", bup_set_linux_file_attr, METH_VARARGS,
+ +      "Set the Linux attributes for the given file." },
+ +#endif
+ +#ifdef HAVE_BUP_UTIMENSAT
+ +    { "utimensat", bup_utimensat, METH_VARARGS,
+ +      "Change file timestamps with nanosecond precision." },
+ +#endif
+ +#ifdef HAVE_BUP_STAT
+ +    { "stat", bup_stat, METH_VARARGS,
+ +      "Extended version of stat." },
+ +#endif
+ +#ifdef HAVE_BUP_LSTAT
+ +    { "lstat", bup_lstat, METH_VARARGS,
+ +      "Extended version of lstat." },
+ +#endif
+ +#ifdef HAVE_BUP_FSTAT
+ +    { "fstat", bup_fstat, METH_VARARGS,
+ +      "Extended version of fstat." },
+ +#endif
       { NULL, NULL, 0, NULL },  // sentinel
   };
   
+ +
   PyMODINIT_FUNC init_helpers(void)
   {
-     PyObject *m = Py_InitModule("_helpers", helper_methods);
- -    Py_InitModule("_helpers", faster_methods);
++    PyObject *m = Py_InitModule("_helpers", faster_methods);
+ +    if (m == NULL)
+ +        return;
+ +#ifdef HAVE_BUP_UTIMENSAT
+ +    PyModule_AddObject(m, "AT_FDCWD", Py_BuildValue("i", AT_FDCWD));
+ +    PyModule_AddObject(m, "AT_SYMLINK_NOFOLLOW",
+ +                       Py_BuildValue("i", AT_SYMLINK_NOFOLLOW));
+ +#endif
+ +#ifdef HAVE_BUP_STAT
+ +    Py_INCREF(Py_True);
+ +    PyModule_AddObject(m, "_have_ns_fs_timestamps", Py_True);
+ +#else
+ +    Py_INCREF(Py_False);
+ +    PyModule_AddObject(m, "_have_ns_fs_timestamps", Py_False);
+ +#endif
+     istty = isatty(2) || getenv("BUP_FORCE_TTY");
   }
diff --combined lib/bup/drecurse.py

index 129679a2219c5e84f8f1833af6b7ef44a81c05f4,4196dec0faabb6377db279892c956cf0cb437826..2dbe50c7934420f5e11d7b525d63f3f1626749d0
--- 1/lib/bup/drecurse.py
--- 2/lib/bup/drecurse.py
+++ b/lib/bup/drecurse.py
@@@ -1,11 -1,14 +1,15 @@@
- import stat
+ import stat, os
   from bup.helpers import *
+ +import bup.xstat as xstat
   
   try:
       O_LARGEFILE = os.O_LARGEFILE
   except AttributeError:
       O_LARGEFILE = 0
+ try:
+     O_NOFOLLOW = os.O_NOFOLLOW
+ except AttributeError:
+     O_NOFOLLOW = 0
   
   
   # the use of fchdir() and lstat() is for two reasons:
@@@ -14,8 -17,7 +18,7 @@@
   class OsFile:
       def __init__(self, path):
           self.fd = None
-         self.fd = os.open(path, 
-                           os.O_RDONLY|O_LARGEFILE|os.O_NOFOLLOW|os.O_NDELAY)
+         self.fd = os.open(path, os.O_RDONLY|O_LARGEFILE|O_NOFOLLOW|os.O_NDELAY)
           
       def __del__(self):
           if self.fd:
@@@ -27,7 -29,7 +30,7 @@@
           os.fchdir(self.fd)
   
       def stat(self):
- -        return os.fstat(self.fd)
+ +        return xstat.fstat(self.fd)
   
   
   _IFMT = stat.S_IFMT(0xffffffff)  # avoid function call in inner loop
@@@ -35,7 -37,7 +38,7 @@@ def _dirlist()
       l = []
       for n in os.listdir('.'):
           try:
- -            st = os.lstat(n)
+ +            st = xstat.lstat(n)
           except OSError, e:
               add_error(Exception('%s: %s' % (realpath(n), str(e))))
               continue
@@@ -46,30 -48,40 +49,40 @@@
       return l
   
   
- def _recursive_dirlist(prepend, xdev):
+ def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None):
       for (name,pst) in _dirlist():
           if name.endswith('/'):
               if xdev != None and pst.st_dev != xdev:
                   log('Skipping %r: different filesystem.\n' % (prepend+name))
                   continue
+             if bup_dir != None:
+                 if os.path.normpath(prepend+name) == bup_dir:
+                     log('Skipping BUP_DIR.\n')
+                     continue
+             if excluded_paths:
+                 if os.path.normpath(prepend+name) in excluded_paths:
+                     log('Skipping %r: excluded.\n' % (prepend+name))
+                     continue
               try:
                   OsFile(name).fchdir()
               except OSError, e:
                   add_error('%s: %s' % (prepend, e))
               else:
-                 for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev):
+                 for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev,
+                                             bup_dir=bup_dir,
+                                             excluded_paths=excluded_paths):
                       yield i
                   os.chdir('..')
           yield (prepend + name, pst)
   
   
- def recursive_dirlist(paths, xdev):
+ def recursive_dirlist(paths, xdev, bup_dir=None, excluded_paths=None):
       startdir = OsFile('.')
       try:
           assert(type(paths) != type(''))
           for path in paths:
               try:
- -                pst = os.lstat(path)
+ +                pst = xstat.lstat(path)
                   if stat.S_ISLNK(pst.st_mode):
                       yield (path, pst)
                       continue
@@@ -89,7 -101,9 +102,9 @@@
               if stat.S_ISDIR(pst.st_mode):
                   pfile.fchdir()
                   prepend = os.path.join(path, '')
-                 for i in _recursive_dirlist(prepend=prepend, xdev=xdev):
+                 for i in _recursive_dirlist(prepend=prepend, xdev=xdev,
+                                             bup_dir=bup_dir,
+                                             excluded_paths=excluded_paths):
                       yield i
                   startdir.fchdir()
               else:
@@@ -101,3 -115,25 +116,25 @@@
           except:
               pass
           raise
+ 
+ def parse_excludes(flags):
+     excluded_paths = []
+ 
+     for flag in flags:
+         (option, parameter) = flag
+         if option == '--exclude':
+             excluded_paths.append(realpath(parameter))
+ 
+         if option == '--exclude-from':
+             try:
+                 try:
+                     f = open(realpath(parameter))
+                     for exclude_path in f.readlines():
+                         excluded_paths.append(realpath(exclude_path.strip()))
+                 except Error, e:
+                     log("warning: couldn't read %s" % parameter)
+             finally:
+                 f.close()
+ 
+     return excluded_paths
+ 
diff --combined lib/bup/helpers.py

index 2e9d6f1f16d909e4537beabb60a474fb506df4c4,da27edb44b29330e6a9998e7862a9cb1747ba248..566343d2b0e4518b90e1ccacb9c656d4688c070a
--- 1/lib/bup/helpers.py
--- 2/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@@ -1,7 -1,8 +1,9 @@@
   """Helper functions and classes for bup."""
- import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
+ 
+ import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
+ import heapq, operator
   from bup import _version
+ +import bup._helpers as _helpers
   
   # This function should really be in helpers, not in bup.options.  But we
   # want options.py to be standalone so people can include it in other projects.
@@@ -87,6 -88,36 +89,36 @@@ def next(it)
           return None
   
   
+ def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
+     if key:
+         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
+     else:
+         samekey = operator.eq
+     count = 0
+     total = sum(len(it) for it in iters)
+     iters = (iter(it) for it in iters)
+     heap = ((next(it),it) for it in iters)
+     heap = [(e,it) for e,it in heap if e]
+ 
+     heapq.heapify(heap)
+     pe = None
+     while heap:
+         if not count % pfreq:
+             pfunc(count, total)
+         e, it = heap[0]
+         if not samekey(e, pe):
+             pe = e
+             yield e
+         count += 1
+         try:
+             e = it.next() # Don't use next() function, it's too expensive
+         except StopIteration:
+             heapq.heappop(heap) # remove current
+         else:
+             heapq.heapreplace(heap, (e, it)) # shift current to new location
+     pfinal(count, total)
+ 
+ 
   def unlink(f):
       """Delete a file at path 'f' if it currently exists.
   
@@@ -129,11 -160,6 +161,11 @@@ def realpath(p)
       return out
   
   
+ +def detect_fakeroot():
+ +    "Return True if we appear to be running under fakeroot."
+ +    return os.getenv("FAKEROOTKEY") != None
+ +
+ +
   _username = None
   def username():
       """Get the user's login name."""
@@@ -176,24 -202,27 +208,27 @@@ def resource_path(subdir='')
           _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
       return os.path.join(_resource_path, subdir)
   
+ 
   class NotOk(Exception):
       pass
   
- class Conn:
-     """A helper class for bup's client-server protocol."""
-     def __init__(self, inp, outp):
-         self.inp = inp
+ 
+ class BaseConn:
+     def __init__(self, outp):
           self.outp = outp
   
+     def close(self):
+         while self._read(65536): pass
+ 
       def read(self, size):
           """Read 'size' bytes from input stream."""
           self.outp.flush()
-         return self.inp.read(size)
+         return self._read(size)
   
       def readline(self):
           """Read from input stream until a newline is found."""
           self.outp.flush()
-         return self.inp.readline()
+         return self._readline()
   
       def write(self, data):
           """Write 'data' to output stream."""
@@@ -202,12 -231,7 +237,7 @@@
   
       def has_input(self):
           """Return true if input stream is readable."""
-         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
-         if rl:
-             assert(rl[0] == self.inp.fileno())
-             return True
-         else:
-             return None
+         raise NotImplemented("Subclasses must implement has_input")
   
       def ok(self):
           """Indicate end of output from last sent command."""
@@@ -221,7 -245,7 +251,7 @@@
       def _check_ok(self, onempty):
           self.outp.flush()
           rl = ''
-         for rl in linereader(self.inp):
+         for rl in linereader(self):
               #log('%d got line: %r\n' % (os.getpid(), rl))
               if not rl:  # empty line
                   continue
@@@ -247,6 -271,146 +277,146 @@@
           return self._check_ok(onempty)
   
   
+ class Conn(BaseConn):
+     def __init__(self, inp, outp):
+         BaseConn.__init__(self, outp)
+         self.inp = inp
+ 
+     def _read(self, size):
+         return self.inp.read(size)
+ 
+     def _readline(self):
+         return self.inp.readline()
+ 
+     def has_input(self):
+         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
+         if rl:
+             assert(rl[0] == self.inp.fileno())
+             return True
+         else:
+             return None
+ 
+ 
+ def checked_reader(fd, n):
+     while n > 0:
+         rl, _, _ = select.select([fd], [], [])
+         assert(rl[0] == fd)
+         buf = os.read(fd, n)
+         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
+         yield buf
+         n -= len(buf)
+ 
+ 
+ MAX_PACKET = 128 * 1024
+ def mux(p, outfd, outr, errr):
+     try:
+         fds = [outr, errr]
+         while p.poll() is None:
+             rl, _, _ = select.select(fds, [], [])
+             for fd in rl:
+                 if fd == outr:
+                     buf = os.read(outr, MAX_PACKET)
+                     if not buf: break
+                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
+                 elif fd == errr:
+                     buf = os.read(errr, 1024)
+                     if not buf: break
+                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
+     finally:
+         os.write(outfd, struct.pack('!IB', 0, 3))
+ 
+ 
+ class DemuxConn(BaseConn):
+     """A helper class for bup's client-server protocol."""
+     def __init__(self, infd, outp):
+         BaseConn.__init__(self, outp)
+         # Anything that comes through before the sync string was not
+         # multiplexed and can be assumed to be debug/log before mux init.
+         tail = ''
+         while tail != 'BUPMUX':
+             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
+             if not b:
+                 raise IOError('demux: unexpected EOF during initialization')
+             tail += b
+             sys.stderr.write(tail[:-6])  # pre-mux log messages
+             tail = tail[-6:]
+         self.infd = infd
+         self.reader = None
+         self.buf = None
+         self.closed = False
+ 
+     def write(self, data):
+         self._load_buf(0)
+         BaseConn.write(self, data)
+ 
+     def _next_packet(self, timeout):
+         if self.closed: return False
+         rl, wl, xl = select.select([self.infd], [], [], timeout)
+         if not rl: return False
+         assert(rl[0] == self.infd)
+         ns = ''.join(checked_reader(self.infd, 5))
+         n, fdw = struct.unpack('!IB', ns)
+         assert(n <= MAX_PACKET)
+         if fdw == 1:
+             self.reader = checked_reader(self.infd, n)
+         elif fdw == 2:
+             for buf in checked_reader(self.infd, n):
+                 sys.stderr.write(buf)
+         elif fdw == 3:
+             self.closed = True
+             debug2("DemuxConn: marked closed\n")
+         return True
+ 
+     def _load_buf(self, timeout):
+         if self.buf is not None:
+             return True
+         while not self.closed:
+             while not self.reader:
+                 if not self._next_packet(timeout):
+                     return False
+             try:
+                 self.buf = self.reader.next()
+                 return True
+             except StopIteration:
+                 self.reader = None
+         return False
+ 
+     def _read_parts(self, ix_fn):
+         while self._load_buf(None):
+             assert(self.buf is not None)
+             i = ix_fn(self.buf)
+             if i is None or i == len(self.buf):
+                 yv = self.buf
+                 self.buf = None
+             else:
+                 yv = self.buf[:i]
+                 self.buf = self.buf[i:]
+             yield yv
+             if i is not None:
+                 break
+ 
+     def _readline(self):
+         def find_eol(buf):
+             try:
+                 return buf.index('\n')+1
+             except ValueError:
+                 return None
+         return ''.join(self._read_parts(find_eol))
+ 
+     def _read(self, size):
+         csize = [size]
+         def until_size(buf): # Closes on csize
+             if len(buf) < csize[0]:
+                 csize[0] -= len(buf)
+                 return None
+             else:
+                 return csize[0]
+         return ''.join(self._read_parts(until_size))
+ 
+     def has_input(self):
+         return self._load_buf(0)
+ 
+ 
   def linereader(f):
       """Generate a list of input lines from 'f' without terminating newlines."""
       while 1:
@@@ -286,29 -450,44 +456,44 @@@ def slashappend(s)
           return s
   
   
- def _mmap_do(f, sz, flags, prot):
+ def _mmap_do(f, sz, flags, prot, close):
       if not sz:
           st = os.fstat(f.fileno())
           sz = st.st_size
+     if not sz:
+         # trying to open a zero-length map gives an error, but an empty
+         # string has all the same behaviour of a zero-length map, ie. it has
+         # no elements :)
+         return ''
       map = mmap.mmap(f.fileno(), sz, flags, prot)
-     f.close()  # map will persist beyond file close
+     if close:
+         f.close()  # map will persist beyond file close
       return map
   
   
- def mmap_read(f, sz = 0):
+ def mmap_read(f, sz = 0, close=True):
       """Create a read-only memory mapped region on file 'f'.
- 
       If sz is 0, the region will cover the entire file.
       """
-     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ)
+     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
   
   
- def mmap_readwrite(f, sz = 0):
+ def mmap_readwrite(f, sz = 0, close=True):
       """Create a read-write memory mapped region on file 'f'.
+     If sz is 0, the region will cover the entire file.
+     """
+     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
+                     close)
   
+ 
+ def mmap_readwrite_private(f, sz = 0, close=True):
+     """Create a read-write memory mapped region on file 'f'.
       If sz is 0, the region will cover the entire file.
+     The map is private, which means the changes are never flushed back to the
+     file.
       """
-     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
+     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
+                     close)
   
   
   def parse_num(s):
@@@ -356,11 -535,6 +541,11 @@@ def add_error(e)
       log('%-70s\n' % e)
   
   
+ +def clear_errors():
+ +    global saved_errors
+ +    saved_errors = []
+ +
+ +
   istty = os.isatty(2) or atoi(os.environ.get('BUP_FORCE_TTY'))
   def progress(s):
       """Calls log(s) if stderr is a TTY.  Does nothing otherwise."""
@@@ -408,6 -582,7 +593,7 @@@ def columnate(l, prefix)
           out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
       return out
   
+ 
   def parse_date_or_fatal(str, fatal):
       """Parses the given date or calls Option.fatal().
       For now we expect a string that contains a float."""
@@@ -419,6 -594,52 +605,52 @@@
           return date
   
   
+ def strip_path(prefix, path):
+     """Strips a given prefix from a path.
+ 
+     First both paths are normalized.
+ 
+     Raises an Exception if no prefix is given.
+     """
+     if prefix == None:
+         raise Exception('no path given')
+ 
+     normalized_prefix = os.path.realpath(prefix)
+     debug2("normalized_prefix: %s\n" % normalized_prefix)
+     normalized_path = os.path.realpath(path)
+     debug2("normalized_path: %s\n" % normalized_path)
+     if normalized_path.startswith(normalized_prefix):
+         return normalized_path[len(normalized_prefix):]
+     else:
+         return path
+ 
+ 
+ def strip_base_path(path, base_paths):
+     """Strips the base path from a given path.
+ 
+ 
+     Determines the base path for the given string and then strips it
+     using strip_path().
+     Iterates over all base_paths from long to short, to prevent that
+     a too short base_path is removed.
+     """
+     normalized_path = os.path.realpath(path)
+     sorted_base_paths = sorted(base_paths, key=len, reverse=True)
+     for bp in sorted_base_paths:
+         if normalized_path.startswith(os.path.realpath(bp)):
+             return strip_path(bp, normalized_path)
+     return path
+ 
+ 
+ def graft_path(graft_points, path):
+     normalized_path = os.path.realpath(path)
+     for graft_point in graft_points:
+         old_prefix, new_prefix = graft_point
+         if normalized_path.startswith(old_prefix):
+             return re.sub(r'^' + old_prefix, new_prefix, normalized_path)
+     return normalized_path
+ 
+ 
   # hashlib is only available in python 2.5 or higher, but the 'sha' module
   # produces a DeprecationWarning in python 2.6 or higher.  We want to support
   # python 2.4 and above without any stupid warnings, so let's try using hashlib
@@@ -436,10 -657,12 +668,12 @@@ def version_date()
       """Format bup's version date string for output."""
       return _version.DATE.split(' ')[0]
   
+ 
   def version_commit():
       """Get the commit hash of bup's current version."""
       return _version.COMMIT
   
+ 
   def version_tag():
       """Format bup's version tag (the official version number).
   
diff --combined lib/bup/index.py

index 2c53d9eb64def5b7677c643367e50dc2c12b6959,72a7296ea4fda43e299662873fbc77aa8947ff89..637d685e22ac1ce8959bd6f61f16b8e9833f012d
--- 1/lib/bup/index.py
--- 2/lib/bup/index.py
+++ b/lib/bup/index.py
@@@ -95,19 -95,17 +95,19 @@@ class Entry
       def from_stat(self, st, tstart):
           old = (self.dev, self.ctime, self.mtime,
                  self.uid, self.gid, self.size, self.flags & IX_EXISTS)
- -        new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
+ +        new = (st.st_dev,
+ +               int(st.st_ctime.approx_secs()),
+ +               int(st.st_mtime.approx_secs()),
                  st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
           self.dev = st.st_dev
- -        self.ctime = int(st.st_ctime)
- -        self.mtime = int(st.st_mtime)
+ +        self.ctime = int(st.st_ctime.approx_secs())
+ +        self.mtime = int(st.st_mtime.approx_secs())
           self.uid = st.st_uid
           self.gid = st.st_gid
           self.size = st.st_size
           self.mode = st.st_mode
           self.flags |= IX_EXISTS
- -        if int(st.st_ctime) >= tstart or old != new \
+ +        if int(st.st_ctime.approx_secs()) >= tstart or old != new \
                 or self.sha == EMPTY_SHA or not self.gitmode:
               self.invalidate()
           self._fixup()
@@@ -162,9 -160,9 +162,9 @@@
           return not self.ctime
   
       def __cmp__(a, b):
-         return (cmp(a.name, b.name)
-                 or -cmp(a.is_valid(), b.is_valid())
-                 or -cmp(a.is_fake(), b.is_fake()))
+         return (cmp(b.name, a.name)
+                 or cmp(a.is_valid(), b.is_valid())
+                 or cmp(a.is_fake(), b.is_fake()))
   
       def write(self, f):
           f.write(self.basename + '\0' + self.packed())
@@@ -408,10 -406,8 +408,10 @@@ class Writer
           if st:
               isdir = stat.S_ISDIR(st.st_mode)
               assert(isdir == endswith)
- -            e = NewEntry(basename, name, st.st_dev, int(st.st_ctime),
- -                         int(st.st_mtime), st.st_uid, st.st_gid,
+ +            e = NewEntry(basename, name, st.st_dev,
+ +                         int(st.st_ctime.approx_secs()),
+ +                         int(st.st_mtime.approx_secs()),
+ +                         st.st_uid, st.st_gid,
                            st.st_size, st.st_mode, gitmode, sha, flags,
                            0, 0)
           else:
@@@ -456,36 -452,9 +456,9 @@@ def reduce_paths(paths)
       paths.sort(reverse=True)
       return paths
   
- 
- class MergeIter:
-     def __init__(self, iters):
-         self.iters = iters
- 
-     def __len__(self):
-         # FIXME: doesn't remove duplicated entries between iters.
-         # That only happens for parent directories, but will mean the
-         # actual iteration returns fewer entries than this function counts.
-         return sum(len(it) for it in self.iters)
- 
-     def __iter__(self):
-         total = len(self)
-         l = [iter(it) for it in self.iters]
-         l = [(next(it),it) for it in l]
-         l = filter(lambda x: x[0], l)
-         count = 0
-         lastname = None
-         while l:
-             if not (count % 1024):
-                 progress('bup: merging indexes (%d/%d)\r' % (count, total))
-             l.sort()
-             (e,it) = l.pop()
-             if not e:
-                 continue
-             if e.name != lastname:
-                 yield e
-                 lastname = e.name
-             n = next(it)
-             if n:
-                 l.append((n,it))
-             count += 1
+ def merge(*iters):
+     def pfunc(count, total):
+         progress('bup: merging indexes (%d/%d)\r' % (count, total))
+     def pfinal(count, total):
           log('bup: merging indexes (%d/%d), done.\n' % (count, total))
+     return merge_iter(iters, 1024, pfunc, pfinal, key='name')
diff --combined lib/bup/t/thelpers.py

index 18f5e890bf413edfd8d97e9ef9f03fed46cab6a0,89cccdaebad7214472c319e6206eb6854b209385..31ecbb9513c15a63b691ef80ab661b8d9859d53c
--- 1/lib/bup/t/thelpers.py
--- 2/lib/bup/t/thelpers.py
+++ b/lib/bup/t/thelpers.py
@@@ -1,6 -1,4 +1,6 @@@
- import os, math
++import math
+ import os
+ +import bup._helpers as _helpers
- 
   from bup.helpers import *
   from wvtest import *
   
@@@ -14,10 -12,64 +14,71 @@@ def test_parse_num()
       WVPASSEQ(pn('1e+9 k'), 1000000000 * 1024)
       WVPASSEQ(pn('-3e-3mb'), int(-0.003 * 1024 * 1024))
   
- 
+ +@wvtest
+ +def test_detect_fakeroot():
+ +    if os.getenv('FAKEROOTKEY'):
+ +        WVPASS(detect_fakeroot())
+ +    else:
+ +        WVPASS(not detect_fakeroot())
++
+ @wvtest
+ def test_strip_path():
+     prefix = "/var/backup/daily.0/localhost"
+     empty_prefix = ""
+     non_matching_prefix = "/home"
+     path = "/var/backup/daily.0/localhost/etc/"
+ 
+     WVPASSEQ(strip_path(prefix, path), '/etc')
+     WVPASSEQ(strip_path(empty_prefix, path), path)
+     WVPASSEQ(strip_path(non_matching_prefix, path), path)
+     WVEXCEPT(Exception, strip_path, None, path)
+ 
+ @wvtest
+ def test_strip_base_path():
+     path = "/var/backup/daily.0/localhost/etc/"
+     base_paths = ["/var", "/var/backup", "/var/backup/daily.0/localhost"]
+     WVPASSEQ(strip_base_path(path, base_paths), '/etc')
+ 
+ @wvtest
+ def test_strip_symlinked_base_path():
+     tmpdir = os.path.join(os.getcwd(),"test_strip_symlinked_base_path.tmp")
+     symlink_src = os.path.join(tmpdir, "private", "var")
+     symlink_dst = os.path.join(tmpdir, "var")
+     path = os.path.join(symlink_dst, "a")
+ 
+     os.mkdir(tmpdir)
+     os.mkdir(os.path.join(tmpdir, "private"))
+     os.mkdir(symlink_src)
+     os.symlink(symlink_src, symlink_dst)
+ 
+     result = strip_base_path(path, [symlink_dst])
+ 
+     os.remove(symlink_dst)
+     os.rmdir(symlink_src)
+     os.rmdir(os.path.join(tmpdir, "private"))
+     os.rmdir(tmpdir)
+ 
+     WVPASSEQ(result, "/a")
+ 
+ @wvtest
+ def test_graft_path():
+     middle_matching_old_path = "/user"
+     non_matching_old_path = "/usr"
+     matching_old_path = "/home"
+     matching_full_path = "/home/user"
+     new_path = "/opt"
+ 
+     all_graft_points = [(middle_matching_old_path, new_path),
+                         (non_matching_old_path, new_path),
+                         (matching_old_path, new_path)]
+ 
+     path = "/home/user/"
+ 
+     WVPASSEQ(graft_path([(middle_matching_old_path, new_path)], path),
+                         "/home/user")
+     WVPASSEQ(graft_path([(non_matching_old_path, new_path)], path),
+                         "/home/user")
+     WVPASSEQ(graft_path([(matching_old_path, new_path)], path), "/opt/user")
+     WVPASSEQ(graft_path(all_graft_points, path), "/opt/user")
+     WVPASSEQ(graft_path([(matching_full_path, new_path)], path),
+                         "/opt")
diff --combined lib/bup/t/tindex.py

index e6ba44b6facaa2edd84667ab089c9758a309fc2b,4dd1411084be6bda2899a10dd92975cb00772941..4b9e16ab2da7c3bd06db481c3fb25dda62729e3e
--- 1/lib/bup/t/tindex.py
--- 2/lib/bup/t/tindex.py
+++ b/lib/bup/t/tindex.py
@@@ -1,7 -1,6 +1,7 @@@
   import os
   from bup import index
   from bup.helpers import *
+ +import bup.xstat as xstat
   from wvtest import *
   
   @wvtest
@@@ -18,8 -17,8 +18,8 @@@ def index_basic()
   @wvtest
   def index_writer():
       unlink('index.tmp')
- -    ds = os.stat('.')
- -    fs = os.stat('tindex.py')
+ +    ds = xstat.stat('.')
+ +    fs = xstat.stat('tindex.py')
       w = index.Writer('index.tmp')
       w.add('/var/tmp/sporky', fs)
       w.add('/etc/passwd', fs)
@@@ -50,8 -49,8 +50,8 @@@ def eget(l, ename)
   def index_dirty():
       unlink('index.tmp')
       unlink('index2.tmp')
- -    ds = os.stat('.')
- -    fs = os.stat('tindex.py')
+ +    ds = xstat.stat('.')
+ +    fs = xstat.stat('tindex.py')
       
       w1 = index.Writer('index.tmp')
       w1.add('/a/b/x', fs)
@@@ -85,8 -84,7 +85,7 @@@
       r3all = [e.name for e in r3]
       WVPASSEQ(r3all,
                ['/a/c/n/3', '/a/c/n/', '/a/c/', '/a/', '/'])
-     m = index.MergeIter([r2,r1,r3])
-     all = [e.name for e in m]
+     all = [e.name for e in index.merge(r2, r1, r3)]
       WVPASSEQ(all,
                ['/a/c/n/3', '/a/c/n/', '/a/c/',
                 '/a/b/x', '/a/b/n/2', '/a/b/n/', '/a/b/c',
@@@ -97,27 -95,27 +96,27 @@@
       print [hex(e.flags) for e in r1]
       WVPASSEQ([e.name for e in r1 if e.is_valid()], r1all)
       WVPASSEQ([e.name for e in r1 if not e.is_valid()], [])
-     WVPASSEQ([e.name for e in m if not e.is_valid()],
+     WVPASSEQ([e.name for e in index.merge(r2, r1, r3) if not e.is_valid()],
                ['/a/c/n/3', '/a/c/n/', '/a/c/',
                 '/a/b/n/2', '/a/b/n/', '/a/b/', '/a/', '/'])
   
       expect_invalid = ['/'] + r2all + r3all
       expect_real = (set(r1all) - set(r2all) - set(r3all)) \
                       | set(['/a/b/n/2', '/a/c/n/3'])
-     dump(m)
-     for e in m:
+     dump(index.merge(r2, r1, r3))
+     for e in index.merge(r2, r1, r3):
           print e.name, hex(e.flags), e.ctime
           eiv = e.name in expect_invalid
           er  = e.name in expect_real
           WVPASSEQ(eiv, not e.is_valid())
           WVPASSEQ(er, e.is_real())
       fake_validate(r2, r3)
-     dump(m)
-     WVPASSEQ([e.name for e in m if not e.is_valid()], [])
+     dump(index.merge(r2, r1, r3))
+     WVPASSEQ([e.name for e in index.merge(r2, r1, r3) if not e.is_valid()], [])
       
-     e = eget(m, '/a/b/c')
+     e = eget(index.merge(r2, r1, r3), '/a/b/c')
       e.invalidate()
       e.repack()
-     dump(m)
-     WVPASSEQ([e.name for e in m if not e.is_valid()],
+     dump(index.merge(r2, r1, r3))
+     WVPASSEQ([e.name for e in index.merge(r2, r1, r3) if not e.is_valid()],
                ['/a/b/c', '/a/b/', '/a/', '/'])
author	Rob Browning <rlb@defaultvalue.org>
	Thu, 10 Feb 2011 05:01:45 +0000 (23:01 -0600)
committer	Rob Browning <rlb@defaultvalue.org>
	Thu, 10 Feb 2011 05:01:45 +0000 (23:01 -0600)
		1	2
Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
cmd/meta-cmd.py	patch \|	diff1 \|	\|	blob \| history
cmd/xstat-cmd.py	patch \|	diff1 \|	\|	blob \| history
lib/bup/_helpers.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/bup/drecurse.py	patch \|	diff1 \|	diff2 \|	blob \| history
lib/bup/helpers.py	patch \|	diff1 \|	diff2 \|	blob \| history
lib/bup/index.py	patch \|	diff1 \|	diff2 \|	blob \| history
lib/bup/t/thelpers.py	patch \|	diff1 \|	diff2 \|	blob \| history
lib/bup/t/tindex.py	patch \|	diff1 \|	diff2 \|	blob \| history