]> arthur.barton.de Git - bup.git/commitdiff
Add initial support for metadata archives.
authorRob Browning <rlb@defaultvalue.org>
Wed, 25 Aug 2010 02:32:10 +0000 (21:32 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Wed, 22 Sep 2010 02:24:19 +0000 (19:24 -0700)
Add a bup meta command based on the new lib/bup/metadata.py.  At the
moment, this command can record the metadata for a set of paths in a
file and then extract that metadata to reproduce the original tree
(minus any file contents).  See "bup meta --help" for more
information.

Add get_linux_file_attr and set_linux_file_attr functions to the
_helper module to handle Linux filesystem attributes -- c.f. chattr(1)
and lsattr(1).

Add a lutimes function to the _helper module to support setting
symlink times.

Add a vint module to support the metadata binary encoding.

Add some very preliminary tests for bup meta and metadata.py.

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Documentation/bup-meta.1.md [new file with mode: 0644]
cmd/meta-cmd.py [new file with mode: 0755]
lib/bup/_helpers.c
lib/bup/metadata.py [new file with mode: 0644]
lib/bup/t/tmetadata.py [new file with mode: 0644]
lib/bup/vint.py [new file with mode: 0644]
t/test.sh

diff --git a/Documentation/bup-meta.1.md b/Documentation/bup-meta.1.md
new file mode 100644 (file)
index 0000000..ac1940b
--- /dev/null
@@ -0,0 +1,116 @@
+% bup-meta(1) Bup %BUP_VERSION%
+% Rob Browning <rlb@defaultvalue.org>
+% %BUP_DATE%
+
+# NAME
+
+bup-meta - create or extract a metadata archive
+
+# SYNOPSIS
+
+bup meta \-\-create
+  ~ [-R] [-v] [-q] [\-\-no-symlinks] [\-\-no-paths] [-f *file*] \<*paths*...\>
+  
+bup meta \-\-list
+  ~ [-v] [-q] [-f *file*]
+  
+bup meta \-\-extract
+  ~ [-v] [-q] [\-\-numeric-ids] [\-\-no-symlinks] [-f *file*]
+  
+bup meta \-\-start-extract
+  ~ [-v] [-q] [\-\-numeric-ids] [\-\-no-symlinks] [-f *file*]
+  
+bup meta \-\-finish-extract
+  ~ [-v] [-q] [\-\-numeric-ids] [-f *file*]
+
+# DESCRIPTION
+
+`bup meta` either creates or extracts a metadata archive.  A metadata
+archive contains the metadata information (timestamps, ownership,
+access permissions, etc.) for a set of filesystem paths.
+
+# OPTIONS
+
+-c, \-\-create
+:   Create a metadata archive for the specified *path*s.  Write the
+    archive to standard output unless **\-\-file** is specified.
+
+-t, \-\-list
+:   Display information about the metadata in an archive.  Read the
+    archive from standard output unless **\-\-file** is specified.
+
+-x, \-\-extract
+:   Extract a metadata archive.  Conceptually, perform **\-\-start-extract**
+    followed by **\-\-finish-extract**.  Read the archive from standard input
+    unless **\-\-file** is specified.
+
+\-\-start-extract
+:   Build a filesystem tree matching the paths stored in a metadata
+    archive.  By itself, this command does not produce a full
+    restoration of the metadata.  For a full restoration, this command
+    must be followed by a call to **\-\-finish-extract**.  Once this
+    command has finished, all of the normal files described by the
+    metadata will exist and be empty.  Restoring the data in those
+    files, and then calling **\-\-finish-extract** should restore the
+    original tree.  The archive will be read from standard input
+    unless **\-\-file** is specified.
+
+\-\-finish-extract
+:   Finish applying the metadata stored in an archive to the
+    filesystem.  Normally, this command should follow a call to
+    **\-\-start-extract**.  The archive will be read from standard input
+    unless **\-\-file** is specified.
+
+-f, \-\-file=*filename*
+:   Read the metadata archive from *filename* or write it to
+    *filename* as appropriate.  If *filename* is "-", then read from
+    standard input or write to standard output.
+
+-R, \-\-recurse
+:   Recursively descend into subdirectories during **\-\-create**.
+
+\-\-numeric-ids
+:   Apply numeric user and group IDs (rather than text IDs) during
+    **\-\-extract** or **\-\-finish-extract**.
+
+\-\-symlinks
+:   Record symbolic link targets when creating an archive, or restore
+    symbolic links when extracting an archive (during **\-\-extract**
+    or **\-\-start-extract**).  This option is enabled by default.
+    Specify **\-\-no-symlinks** to disable it.
+
+\-\-paths
+:   Record pathnames when creating an archive.  This option is enabled
+    by default.  Specify **\-\-no-paths** to disable it.
+
+-v, --verbose
+:   Be more verbose (can be used more than once).
+
+-q, --quiet
+:   Be quiet.
+
+# EXAMPLES
+
+    # Create a metadata archive for /etc.
+    $ bup meta -cRf etc.meta /etc
+    bup: removing leading "/" from "/etc"
+
+    # Extract the etc.meta archive (files will be empty).
+    $ mkdir tmp && cd tmp
+    $ bup meta -xf ../etc.meta
+    $ ls
+    etc
+
+    # Restore /etc completely.
+    $ mkdir tmp && cd tmp
+    $ bup meta -bf ../etc.meta
+    ...fill in all regular file contents using some other tool...
+    $ bup meta -sf ../etc.meta
+
+# BUGS
+
+Hard links are not handled yet.
+
+# BUP
+
+Part of the `bup`(1) suite.
diff --git a/cmd/meta-cmd.py b/cmd/meta-cmd.py
new file mode 100755 (executable)
index 0000000..41ff2d4
--- /dev/null
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 Rob Browning
+#
+# This code is covered under the terms of the GNU Library General
+# Public License as described in the bup LICENSE file.
+
+# TODO: Add tar-like -C option.
+# TODO: Add tar-like -v support to --list.
+
+import sys
+from bup import metadata
+from bup import options
+from bup.helpers import handle_ctrl_c, saved_errors
+
+optspec = """
+bup meta --create [OPTION ...] <PATH ...>
+bup meta --extract [OPTION ...]
+bup meta --start-extract [OPTION ...]
+bup meta --finish-extract [OPTION ...]
+--
+c,create       write metadata for PATHs to stdout (or --file)
+t,list         display metadata
+x,extract      perform --start-extract followed by --finish-extract
+start-extract  build tree matching metadata provided on standard input (or --file)
+finish-extract finish applying standard input (or --file) metadata to filesystem
+f,file=        specify source or destination file
+R,recurse      recurse into subdirectories
+numeric-ids    apply numeric IDs (user, group, etc.), not names, during restore
+symlinks       handle symbolic links (default is true)
+paths          include paths in metadata (default is true)
+v,verbose      increase log output (can be used more than once)
+q,quiet        don't show progress meter
+"""
+
+action = None
+target_filename = ''
+should_recurse = False
+restore_numeric_ids = False
+include_paths = True
+handle_symlinks = True
+
+handle_ctrl_c()
+
+o = options.Options('bup meta', optspec)
+(opt, flags, remainder) = o.parse(sys.argv[1:])
+
+for flag, value in flags:
+    if flag == '--create' or flag == '-c':
+        action = 'create'
+    if flag == '--list' or flag == '-t':
+        action = 'list'
+    elif flag == '--extract' or flag == '-x':
+        action = 'extract'
+    elif flag == '--start-extract':
+        action = 'start-extract'
+    elif flag == '--finish-extract':
+        action = 'finish-extract'
+    elif flag == '--file' or flag == '-f':
+        target_filename = value
+    elif flag == '--recurse' or flag == '-R':
+        should_recurse = True
+    elif flag == '--no-recurse':
+        should_recurse = False
+    elif flag == '--numeric-ids':
+        restore_numeric_ids = True
+    elif flag == '--no-numeric-ids':
+        restore_numeric_ids = False
+    elif flag == '--paths':
+        include_paths = True
+    elif flag == '--no-paths':
+        include_paths = False
+    elif flag == '--symlinks':
+        handle_symlinks = True
+    elif flag == '--no-symlinks':
+        handle_symlinks = False
+    elif flag == '--verbose' or flag == '-v':
+        metadata.verbose += 1
+    elif flag == '--quiet' or flag == '-q':
+        metadata.verbose = 0
+
+if not action:
+    o.fatal("no action specified")
+
+if action == 'create':
+    if len(remainder) < 1:
+        o.fatal("no paths specified for create")
+    if target_filename != '-':
+        output_file = open(target_filename, 'w')
+    else:
+        output_file = sys.stdout
+    metadata.save_tree(output_file,
+                       remainder,
+                       recurse=should_recurse,
+                       write_paths=include_paths,
+                       save_symlinks=handle_symlinks)
+
+elif action == 'list':
+    if len(remainder) > 0:
+        o.fatal("cannot specify paths for --list")
+    if target_filename != '-':
+        src = open(target_filename, 'r')
+    else:
+        src = sys.stdin
+    metadata.display_archive(src)
+
+elif action == 'start-extract':
+    if len(remainder) > 0:
+        o.fatal("cannot specify paths for --start-extract")
+    if target_filename != '-':
+        src = open(target_filename, 'r')
+    else:
+        src = sys.stdin
+    metadata.start_extract(src, create_symlinks=handle_symlinks)
+
+elif action == 'finish-extract':
+    if len(remainder) > 0:
+        o.fatal("cannot specify paths for --finish-extract")
+    if target_filename != '-':
+        src = open(target_filename, 'r')
+    else:
+        src = sys.stdin
+    num_ids = restore_numeric_ids
+    metadata.finish_extract(src, restore_numeric_ids=num_ids)
+
+elif action == 'extract':
+    if len(remainder) > 0:
+        o.fatal("cannot specify paths for --extract")
+    if target_filename != '-':
+        src = open(target_filename, 'r')
+    else:
+        src = sys.stdin
+    metadata.extract(src,
+                     restore_numeric_ids=restore_numeric_ids,
+                     create_symlinks=handle_symlinks)
+
+if saved_errors:
+    log('WARNING: %d errors encountered.\n' % len(saved_errors))
+    sys.exit(1)
+else:
+    sys.exit(0)
index 75d26030545fc9b59041937c5e35a46401bdd0cc..1d9d7defb3b32b58a834d6d0fd1845c69a946d4a 100644 (file)
@@ -1,9 +1,16 @@
+#define _LARGEFILE64_SOURCE 1
+
 #include "bupsplit.h"
 #include <Python.h>
 #include <assert.h>
-#include <stdint.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <arpa/inet.h>
+#include <linux/fs.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
 
 static PyObject *selftest(PyObject *self, PyObject *args)
 {
@@ -193,7 +200,116 @@ static PyObject *fadvise_done(PyObject *self, PyObject *args)
 }
 
 
-static PyMethodDef faster_methods[] = {
+static int set_linux_file_attr(const char *path, unsigned long attr)
+{
+}
+
+
+static PyObject *py_get_linux_file_attr(PyObject *self, PyObject *args)
+{
+    int rc;
+    unsigned long attr;
+    char *path;
+    int fd;
+
+    if (!PyArg_ParseTuple(args, "s", &path))
+        return NULL;
+
+    fd = open(path, O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_NOFOLLOW);
+    if (fd == -1)
+        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+
+    attr = 0;
+    rc = ioctl(fd, FS_IOC_GETFLAGS, &attr);
+    if (rc == -1)
+    {
+        close(fd);
+        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+    }
+
+    close(fd);
+    return Py_BuildValue("k", attr);
+}
+
+
+static PyObject *py_set_linux_file_attr(PyObject *self, PyObject *args)
+{
+    int rc;
+    unsigned long attr;
+    char *path;
+    int fd;
+
+    if (!PyArg_ParseTuple(args, "sk", &path, &attr))
+        return NULL;
+
+    fd = open(path, O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_NOFOLLOW);
+    if(fd == -1)
+        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+
+    rc = ioctl(fd, FS_IOC_SETFLAGS, &attr);
+    if (rc == -1)
+    {
+        close(fd);
+        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
+    }
+
+    close(fd);
+    return Py_True;
+}
+
+
+static PyObject *py_lutimes(PyObject *self, PyObject *args)
+{
+    int rc;
+    char *filename;
+    double access, modification;
+
+    if (!PyArg_ParseTuple(args, "s(dd)", &filename, &access, &modification))
+        return NULL;
+
+    if(isnan(access))
+    {
+        PyErr_SetString(PyExc_ValueError, "access time is NaN");
+        return NULL;
+    }
+    else if(isinf(access))
+    {
+        PyErr_SetString(PyExc_ValueError, "access time is infinite");
+        return NULL;
+    }
+    else if(isnan(modification))
+    {
+        PyErr_SetString(PyExc_ValueError, "modification time is NaN");
+        return NULL;
+    }
+    else if(isinf(modification))
+    {
+        PyErr_SetString(PyExc_ValueError, "modification time is infinite");
+        return NULL;
+    }
+
+    struct timeval tv[2];
+
+    double integral_part;
+    double fractional_part;
+
+    fractional_part = modf(access, &integral_part);
+    tv[0].tv_sec = integral_part;
+    tv[0].tv_usec = fractional_part * 1000000;
+
+    fractional_part = modf(modification, &integral_part);
+    tv[1].tv_sec = modification;
+    tv[1].tv_usec = fmod(modification, 1000000);
+
+    rc = lutimes(filename, tv);
+    if(rc != 0)
+        return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+
+    return Py_True;
+}
+
+
+static PyMethodDef helper_methods[] = {
     { "selftest", selftest, METH_VARARGS,
        "Check that the rolling checksum rolls correctly (for unit tests)." },
     { "blobbits", blobbits, METH_VARARGS,
@@ -212,10 +328,16 @@ static PyMethodDef faster_methods[] = {
        "open() the given filename for read with O_NOATIME if possible" },
     { "fadvise_done", fadvise_done, METH_VARARGS,
        "Inform the kernel that we're finished with earlier parts of a file" },
+    { "get_linux_file_attr", py_get_linux_file_attr, METH_VARARGS,
+      "Return the Linux attributes for the given file." },
+    { "set_linux_file_attr", py_set_linux_file_attr, METH_VARARGS,
+      "Set the Linux attributes for the given file." },
+    { "lutimes", py_lutimes, METH_VARARGS,
+      "Set the access and modification times for the given file or symlink." },
     { NULL, NULL, 0, NULL },  // sentinel
 };
 
 PyMODINIT_FUNC init_helpers(void)
 {
-    Py_InitModule("_helpers", faster_methods);
+    Py_InitModule("_helpers", helper_methods);
 }
diff --git a/lib/bup/metadata.py b/lib/bup/metadata.py
new file mode 100644 (file)
index 0000000..7c17aa5
--- /dev/null
@@ -0,0 +1,599 @@
+"""Metadata read/write support for bup."""
+
+# Copyright (C) 2010 Rob Browning
+#
+# This code is covered under the terms of the GNU Library General
+# Public License as described in the bup LICENSE file.
+
+import errno, os, sys, stat, pwd, grp, struct, xattr, posix1e, re
+
+from cStringIO import StringIO
+from bup import vint
+from bup.helpers import mkdirp, log
+from bup._helpers import get_linux_file_attr, set_linux_file_attr, lutimes
+
+# WARNING: the metadata encoding is *not* stable yet.  Caveat emptor!
+
+# Q: Consider hardlink support?
+# Q: Is it OK to store raw linux attr (chattr) flags?
+# Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
+# Q: Is the application of posix1e has_extended() correct?
+# Q: Is one global --numeric-ids argument sufficient?
+# Q: Do nfsv4 acls trump posix1e acls? (seems likely)
+# Q: Add support for crtime -- ntfs, and (only internally?) ext*?
+
+# FIXME: Fix relative/abs path detection/stripping wrt other platforms.
+# FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
+# FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
+# FIXME: Consider pack('vvvvsss', ...) optimization.
+# FIXME: Consider caching users/groups.
+
+## FS notes:
+#
+# osx (varies between hfs and hfs+):
+#   type - regular dir char block fifo socket ...
+#   perms - rwxrwxrwxsgt
+#   times - ctime atime mtime
+#   uid
+#   gid
+#   hard-link-info (hfs+ only)
+#   link-target
+#   device-major/minor
+#   attributes-osx see chflags
+#   content-type
+#   content-creator
+#   forks
+#
+# ntfs
+#   type - regular dir ...
+#   times - creation, modification, posix change, access
+#   hard-link-info
+#   link-target
+#   attributes - see attrib
+#   ACLs
+#   forks (alternate data streams)
+#   crtime?
+#
+# fat
+#   type - regular dir ...
+#   perms - rwxrwxrwx (maybe - see wikipedia)
+#   times - creation, modification, access
+#   attributes - see attrib
+
+verbose = 0
+
+_have_lchmod = hasattr(os, 'lchmod')
+
+
+def _clean_up_path_for_archive(p):
+    # Not the most efficient approach.
+    result = p
+
+    # Take everything after any '/../'.
+    pos = result.rfind('/../')
+    if(pos != -1):
+        result = result[result.rfind('/../') + 4:]
+
+    # Take everything after any remaining '../'.
+    if result.startswith("../"):
+        result = result[3:]
+
+    # Remove any '/./' sequences.
+    pos = result.find('/./')
+    while pos != -1:
+        result = result[0:pos] + '/' + result[pos + 3:]
+        pos = result.find('/./')
+
+    # Remove any leading '/'s.
+    result = result.lstrip('/')
+
+    # Replace '//' with '/' everywhere.
+    pos = result.find('//')
+    while pos != -1:
+        result = result[0:pos] + '/' + result[pos + 2:]
+        pos = result.find('//')
+
+    # Take everything after any remaining './'.
+    if result.startswith('./'):
+        result = result[2:]
+
+    # Take everything before any remaining '/.'.
+    if result.endswith('/.'):
+        result = result[:-2]
+
+    if result == '' or result.endswith('/..'):
+        result = '.'
+
+    return result
+
+
+def _risky_path(p):
+    if p.startswith('/'):
+        return True
+    if p.find('/../') != -1:
+        return True
+    if p.startswith('../'):
+        return True
+    if p.endswith('/..'):
+        return True
+    return False
+
+
+def _clean_up_extract_path(p):
+    result = p.lstrip('/')
+    if result == '':
+        return '.'
+    elif _risky_path(result):
+        return None
+    else:
+        return result
+
+
+# These tags are currently conceptually private to Metadata, and they
+# must be unique, and must *never* be changed.
+_rec_tag_end = 0
+_rec_tag_path = 1
+_rec_tag_common = 2           # times, user, group, type, perms, etc.
+_rec_tag_symlink_target = 3
+_rec_tag_posix1e_acl = 4      # getfacl(1), setfacl(1), etc.
+_rec_tag_nfsv4_acl = 5        # intended to supplant posix1e acls?
+_rec_tag_linux_attr = 6       # lsattr(1) chattr(1)
+_rec_tag_linux_xattr = 7      # getfattr(1) setfattr(1)
+
+
+class Metadata:
+    # Metadata is stored as a sequence of tagged binary records.  Each
+    # record will have some subset of add, encode, load, create, and
+    # apply methods, i.e. _add_foo...
+
+    ## Common records
+
+    def _add_common(self, path, st):
+        self.mode = st.st_mode
+        self.uid = st.st_uid
+        self.gid = st.st_gid
+        self.atime = st.st_atime
+        self.mtime = st.st_mtime
+        self.ctime = st.st_ctime
+        self.rdev = st.st_rdev
+        self.user = pwd.getpwuid(st.st_uid)[0]
+        self.group = grp.getgrgid(st.st_gid)[0]
+
+    def _encode_common(self):
+        result = vint.pack('VVsVsVVVVVVV',
+                           self.mode,
+                           self.uid,
+                           self.user,
+                           self.gid,
+                           self.group,
+                           int(self.atime),
+                           int(self.mtime),
+                           int(self.ctime),
+                           int(self.atime * 1e9) % 1000000000,
+                           int(self.mtime * 1e9) % 1000000000,
+                           int(self.ctime * 1e9) % 1000000000,
+                           self.rdev)
+        return result
+
+    def _load_common_rec(self, port):
+        data = vint.read_bvec(port)
+        (self.mode,
+         self.uid,
+         self.user,
+         self.gid,
+         self.group,
+         atime_s,
+         mtime_s,
+         ctime_s,
+         atime_ns,
+         mtime_ns,
+         ctime_ns,
+         self.rdev) = vint.unpack('VVsVsVVVVVVV', data)
+        self.atime = atime_s + (atime_ns / 1e9)
+        self.mtime = mtime_s + (mtime_ns / 1e9)
+        self.ctime = ctime_s + (ctime_ns / 1e9)
+
+    def _create_via_common_rec(self, path, create_symlinks=True):
+        if stat.S_ISREG(self.mode):
+            os.mknod(path, 0600 | stat.S_IFREG)
+        elif stat.S_ISDIR(self.mode):
+            os.mkdir(path, 0700)
+        elif stat.S_ISCHR(self.mode):
+            os.mknod(path, 0600 | stat.S_IFCHR, self.rdev)
+        elif stat.S_ISBLK(self.mode):
+            os.mknod(path, 0600 | stat.S_IFBLK, self.rdev)
+        elif stat.S_ISFIFO(self.mode):
+            os.mknod(path, 0600 | stat.S_IFIFO)
+        elif stat.S_ISLNK(self.mode):
+            if(self.symlink_target and create_symlinks):
+                os.symlink(self.symlink_target, path)
+        # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
+        # Otherwise, do nothing.
+
+    def _apply_common_rec(self, path, restore_numeric_ids=False):
+        # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
+        if stat.S_ISLNK(self.mode):
+            lutimes(path, (self.atime, self.mtime))
+        else:
+            os.utime(path, (self.atime, self.mtime))
+        if stat.S_ISREG(self.mode) \
+                | stat.S_ISDIR(self.mode) \
+                | stat.S_ISCHR(self.mode) \
+                | stat.S_ISBLK(self.mode) \
+                | stat.S_ISLNK(self.mode) \
+                | stat.S_ISFIFO(self.mode):
+            # Be safe.
+            if _have_lchmod:
+                os.lchmod(path, 0)
+            elif not stat.S_ISLNK(self.mode):
+                os.chmod(path, 0)
+
+            uid = self.uid
+            gid = self.gid
+            if(restore_numeric_ids):
+                uid = pwd.getpwnam(self.user)[2]
+                gid = grp.getgrnam(self.group)[2]
+            os.lchown(path, uid, gid)
+
+            if _have_lchmod:
+                os.lchmod(path, stat.S_IMODE(self.mode))
+            elif not stat.S_ISLNK(self.mode):
+                os.chmod(path, stat.S_IMODE(self.mode))
+
+
+    ## Path records
+
+    def _encode_path(self):
+        if self.path:
+            return vint.pack('s', self.path)
+        else:
+            return None
+
+    def _load_path_rec(self, port):
+        self.path = vint.unpack('s', vint.read_bvec(port))[0]
+
+
+    ## Symlink targets
+
+    def _add_symlink_target(self, path, st):
+        if(stat.S_ISLNK(st.st_mode)):
+            self.symlink_target = os.readlink(path)
+
+    def _encode_symlink_target(self):
+        return self.symlink_target
+
+    def _load_symlink_target_rec(self, port):
+        self.symlink_target = vint.read_bvec(port)
+
+
+    ## POSIX1e ACL records
+
+    # Recorded as a list:
+    #   [txt_id_acl, num_id_acl]
+    # or, if a directory:
+    #   [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
+    # The numeric/text distinction only matters when reading/restoring
+    # a stored record.
+    def _add_posix1e_acl(self, path, st):
+        if not stat.S_ISLNK(st.st_mode):
+            try:
+                if posix1e.has_extended(path):
+                    acl = posix1e.ACL(file=path)
+                    self.posix1e_acl = [acl, acl] # txt and num are the same
+                    if stat.S_ISDIR(st.st_mode):
+                        acl = posix1e.ACL(filedef=path)
+                        self.posix1e_acl.extend([acl, acl])
+            except EnvironmentError, e:
+                if e.errno != errno.EOPNOTSUPP:
+                    raise
+
+    def _encode_posix1e_acl(self):
+        # Encode as two strings (w/default ACL string possibly empty).
+        if self.posix1e_acl:
+            acls = self.posix1e_acl
+            txt_flags = posix1e.TEXT_ABBREVIATE
+            num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
+            acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
+                        acls[1].to_any_text('', '\n', num_flags)]
+            if(len(acls) < 3):
+                acl_reps += ['', '']
+            else:
+                acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
+                acl_reps.append(acls[3].to_any_text('', '\n', num_flags))
+            return vint.pack('ssss',
+                             acl_reps[0], acl_reps[1], acl_reps[2], acl_reps[3])
+        else:
+            return None
+
+    def _load_posix1e_acl_rec(self, port):
+        data = vint.read_bvec(port)
+        acl_reps = vint.unpack('ssss', data)
+        if(acl_reps[2] == ''):
+            acl_reps = acl_reps[:2]
+        self.posix1e_acl = [posix1e.ACL(x) for x in acl_reps]
+
+    def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
+        if(self.posix1e_acl):
+            acls = self.posix1e_acl
+            if(len(acls) > 2):
+                if restore_numeric_ids:
+                    acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
+                else:
+                    acls[2].applyto(path, posix1e.ACL_TYPE_DEFAULT)
+            if restore_numeric_ids:
+                acls[1].applyto(path, posix1e.ACL_TYPE_ACCESS)
+            else:
+                acls[0].applyto(path, posix1e.ACL_TYPE_ACCESS)
+
+
+    ## Linux attributes (lsattr(1), chattr(1))
+
+    def _add_linux_attr(self, path, st):
+        if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
+            attr = get_linux_file_attr(path)
+            if(attr != 0):
+                self.linux_attr = get_linux_file_attr(path)
+
+    def _encode_linux_attr(self):
+        if self.linux_attr:
+            return vint.pack('V', self.linux_attr)
+        else:
+            return None
+
+    def _load_linux_attr_rec(self, port):
+        data = vint.read_bvec(port)
+        self.linux_attr = vint.unpack('V', data)[0]
+
+    def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
+        if(self.linux_attr):
+            set_linux_file_attr(path, self.linux_attr)
+
+
+    ## Linux extended attributes (getfattr(1), setfattr(1))
+
+    def _add_linux_xattr(self, path, st):
+        try:
+            self.linux_xattr = xattr.get_all(path, nofollow=True)
+        except EnvironmentError, e:
+            if e.errno != errno.EOPNOTSUPP:
+                raise
+
+    def _encode_linux_xattr(self):
+        if self.linux_xattr:
+            result = vint.pack('V', len(items))
+            for name, value in self.attrs:
+                result += vint.pack('ss', name, value)
+            return result
+        else:
+            return None
+
+    def _load_linux_xattr_rec(self, file):
+        data = vint.read_bvec(file)
+        memfile = StringIO(data)
+        result = []
+        for i in range(vint.read_vuint(memfile)):
+            key = vint.read_bvec(memfile)
+            value = vint.read_bvec(memfile)
+            result.append((key, value))
+        self.linux_xattr = result
+
+    def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
+        if(self.linux_xattr):
+            for k, v in self.linux_xattr:
+                xattr.set(path, k, v, nofollow=True)
+
+    def __init__(self):
+        # optional members
+        self.path = None
+        self.symlink_target = None
+        self.linux_attr = None
+        self.linux_xattr = None
+        self.posix1e_acl = None
+        self.posix1e_acl_default = None
+
+    def write(self, port, include_path=True):
+        records = [(_rec_tag_path, self._encode_path())] if include_path else []
+        records.extend([(_rec_tag_common, self._encode_common()),
+                        (_rec_tag_symlink_target, self._encode_symlink_target()),
+                        (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
+                        (_rec_tag_linux_attr, self._encode_linux_attr()),
+                        (_rec_tag_linux_xattr, self._encode_linux_xattr())])
+        for tag, data in records:
+            if data:
+                vint.write_vuint(port, tag)
+                vint.write_bvec(port, data)
+        vint.write_vuint(port, _rec_tag_end)
+
+    @staticmethod
+    def read(port):
+        # This method should either: return a valid Metadata object;
+        # throw EOFError if there was nothing at all to read; throw an
+        # Exception if a valid object could not be read completely.
+        tag = vint.read_vuint(port)
+        try: # From here on, EOF is an error.
+            result = Metadata()
+            while(True): # only exit is error (exception) or _rec_tag_end
+                if tag == _rec_tag_path:
+                    result._load_path_rec(port)
+                elif tag == _rec_tag_common:
+                    result._load_common_rec(port)
+                elif tag == _rec_tag_symlink_target:
+                    result._load_symlink_target_rec(port)
+                elif tag == _rec_tag_posix1e_acl:
+                    result._load_posix1e_acl(port)
+                elif tag ==_rec_tag_nfsv4_acl:
+                    result._load_nfsv4_acl_rec(port)
+                elif tag == _rec_tag_linux_attr:
+                    result._load_linux_attr_rec(port)
+                elif tag == _rec_tag_linux_xattr:
+                    result._load_linux_xattr_rec(port)
+                elif tag == _rec_tag_end:
+                    return result
+                else: # unknown record
+                    vint.skip_bvec(port)
+                tag = vint.read_vuint(port)
+        except EOFError:
+            raise Exception("EOF while reading Metadata")
+
+    def isdir(self):
+        return stat.S_ISDIR(self.mode)
+
+    def create_path(self, path, create_symlinks=True):
+        self._create_via_common_rec(path, create_symlinks=create_symlinks)
+
+    def apply_to_path(self, path=None, restore_numeric_ids=False):
+        # apply metadata to path -- file must exist
+        if not path:
+            path = self.path
+        if not path:
+            raise Exception('Metadata.apply_to_path() called with no path');
+        num_ids = restore_numeric_ids
+        self._apply_common_rec(path, restore_numeric_ids=num_ids)
+        self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
+        self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
+        self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
+
+
+def from_path(path, archive_path=None, save_symlinks=True):
+    result = Metadata()
+    result.path = archive_path
+    st = os.lstat(path)
+    result._add_common(path, st)
+    if(save_symlinks):
+        result._add_symlink_target(path, st)
+    result._add_posix1e_acl(path, st)
+    result._add_linux_attr(path, st)
+    result._add_linux_xattr(path, st)
+    return result
+
+
+def save_tree(output_file, paths,
+              recurse=False,
+              write_paths=True,
+              save_symlinks=True):
+    for p in paths:
+        safe_path = _clean_up_path_for_archive(p)
+        if(safe_path != p):
+            log('bup: archiving "%s" as "%s"\n' % (p, safe_path))
+
+        # Handle path itself.
+        m = from_path(p, archive_path=safe_path, save_symlinks=save_symlinks)
+        if verbose:
+            print >> sys.stderr, m.path
+        m.write(output_file, include_path=write_paths)
+
+        if recurse and os.path.isdir(p):
+            def raise_error(x):
+                raise x
+            for root, dirs, files in os.walk(p, onerror=raise_error):
+                items = files + dirs
+                for sub_path in items:
+                    full_path = os.path.join(root, sub_path)
+                    safe_path = _clean_up_path_for_archive(full_path)
+                    m = from_path(full_path,
+                                  archive_path=safe_path,
+                                  save_symlinks=save_symlinks)
+                    if verbose:
+                        print >> sys.stderr, m.path
+                    m.write(output_file, include_path=write_paths)
+
+
+def _set_up_path(meta, create_symlinks=True):
+    # Allow directories to exist as a special case -- might have
+    # been created by an earlier longer path.
+    if meta.isdir():
+        mkdirp(meta.path, 0700)
+    else:
+        parent = os.path.dirname(meta.path)
+        if parent:
+            mkdirp(parent, 0700)
+            meta.create_path(meta.path, create_symlinks=create_symlinks)
+
+
+class _ArchiveIterator:
+    def next(self):
+        try:
+            return Metadata.read(self._file)
+        except EOFError:
+            raise StopIteration()
+
+    def __iter__(self):
+        return self
+
+    def __init__(self, file):
+        self._file = file
+
+
+def display_archive(file):
+    for meta in _ArchiveIterator(file):
+        if verbose:
+            print >> sys.stderr, meta.path # FIXME
+        else:
+            print >> sys.stderr, meta.path
+
+
+def start_extract(file, create_symlinks=True):
+    for meta in _ArchiveIterator(file):
+        if verbose:
+            print >> sys.stderr, meta.path
+        xpath = _clean_up_extract_path(meta.path)
+        if not xpath:
+            add_error(Exception('skipping risky path "%s"' % meta.path))
+        else:
+            meta.path = xpath
+            _set_up_path(meta, create_symlinks=create_symlinks)
+
+
+def finish_extract(file, restore_numeric_ids=False):
+    all_dirs = []
+    for meta in _ArchiveIterator(file):
+        xpath = _clean_up_extract_path(meta.path)
+        if not xpath:
+            add_error(Exception('skipping risky path "%s"' % dir.path))
+        else:
+            if os.path.isdir(meta.path):
+                all_dirs.append(meta)
+            else:
+                if verbose:
+                    print >> sys.stderr, meta.path
+                meta.apply_to_path(path=xpath,
+                                   restore_numeric_ids=restore_numeric_ids)
+
+    all_dirs.sort(key = lambda x : len(x.path), reverse=True)
+    for dir in all_dirs:
+        # Don't need to check xpath -- won't be in all_dirs if not OK.
+        xpath = _clean_up_extract_path(dir.path)
+        if verbose:
+            print >> sys.stderr, dir.path
+        dir.apply_to_path(path=xpath,
+                          restore_numeric_ids=restore_numeric_ids)
+
+
+def extract(file, restore_numeric_ids=False, create_symlinks=True):
+    # For now, just store all the directories and handle them last,
+    # longest first.
+    all_dirs = []
+    for meta in _ArchiveIterator(file):
+        xpath = _clean_up_extract_path(meta.path)
+        if not xpath:
+            add_error(Exception('skipping risky path "%s"' % meta.path))
+        else:
+            meta.path = xpath
+            if verbose:
+                print >> sys.stderr, '+', meta.path
+            _set_up_path(meta, create_symlinks=create_symlinks)
+            if os.path.isdir(meta.path):
+                all_dirs.append(meta)
+            else:
+                if verbose:
+                    print >> sys.stderr, '=', meta.path
+                meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
+    all_dirs.sort(key = lambda x : len(x.path), reverse=True)
+    for dir in all_dirs:
+        # Don't need to check xpath -- won't be in all_dirs if not OK.
+        xpath = _clean_up_extract_path(meta.path)
+        if verbose:
+            print >> sys.stderr, '=', meta.path
+        # Shouldn't have to check for risky paths here (omitted above).
+        dir.apply_to_path(path=dir.path,
+                          restore_numeric_ids=restore_numeric_ids)
diff --git a/lib/bup/t/tmetadata.py b/lib/bup/t/tmetadata.py
new file mode 100644 (file)
index 0000000..d6cd056
--- /dev/null
@@ -0,0 +1,68 @@
+from bup import metadata
+from wvtest import *
+
+
+@wvtest
+def test_clean_up_archive_path():
+    cleanup = metadata._clean_up_path_for_archive
+    WVPASSEQ(cleanup('foo'), 'foo')
+    WVPASSEQ(cleanup('/foo'), 'foo')
+    WVPASSEQ(cleanup('///foo'), 'foo')
+    WVPASSEQ(cleanup('/foo/bar'), 'foo/bar')
+    WVPASSEQ(cleanup('foo/./bar'), 'foo/bar')
+    WVPASSEQ(cleanup('/foo/./bar'), 'foo/bar')
+    WVPASSEQ(cleanup('/foo/./bar/././baz'), 'foo/bar/baz')
+    WVPASSEQ(cleanup('/foo/./bar///././baz'), 'foo/bar/baz')
+    WVPASSEQ(cleanup('//./foo/./bar///././baz/.///'), 'foo/bar/baz/')
+    WVPASSEQ(cleanup('./foo/./.bar'), 'foo/.bar')
+    WVPASSEQ(cleanup('./foo/.'), 'foo')
+    WVPASSEQ(cleanup('./foo/..'), '.')
+    WVPASSEQ(cleanup('//./..//.../..//.'), '.')
+    WVPASSEQ(cleanup('//./..//..././/.'), '...')
+    WVPASSEQ(cleanup('/////.'), '.')
+    WVPASSEQ(cleanup('/../'), '.')
+    WVPASSEQ(cleanup(''), '.')
+
+
+@wvtest
+def test_risky_path():
+    risky = metadata._risky_path
+    WVPASS(risky('/foo'))
+    WVPASS(risky('///foo'))
+    WVPASS(risky('/../foo'))
+    WVPASS(risky('../foo'))
+    WVPASS(risky('foo/..'))
+    WVPASS(risky('foo/../'))
+    WVPASS(risky('foo/../bar'))
+    WVFAIL(risky('foo'))
+    WVFAIL(risky('foo/'))
+    WVFAIL(risky('foo///'))
+    WVFAIL(risky('./foo'))
+    WVFAIL(risky('foo/.'))
+    WVFAIL(risky('./foo/.'))
+    WVFAIL(risky('foo/bar'))
+    WVFAIL(risky('foo/./bar'))
+
+
+@wvtest
+def test_clean_up_extract_path():
+    cleanup = metadata._clean_up_extract_path
+    WVPASSEQ(cleanup('/foo'), 'foo')
+    WVPASSEQ(cleanup('///foo'), 'foo')
+    WVFAIL(cleanup('/../foo'))
+    WVFAIL(cleanup('../foo'))
+    WVFAIL(cleanup('foo/..'))
+    WVFAIL(cleanup('foo/../'))
+    WVFAIL(cleanup('foo/../bar'))
+    WVPASSEQ(cleanup('foo'), 'foo')
+    WVPASSEQ(cleanup('foo/'), 'foo/')
+    WVPASSEQ(cleanup('foo///'), 'foo///')
+    WVPASSEQ(cleanup('./foo'), './foo')
+    WVPASSEQ(cleanup('foo/.'), 'foo/.')
+    WVPASSEQ(cleanup('./foo/.'), './foo/.')
+    WVPASSEQ(cleanup('foo/bar'), 'foo/bar')
+    WVPASSEQ(cleanup('foo/./bar'), 'foo/./bar')
+    WVPASSEQ(cleanup('/'), '.')
+    WVPASSEQ(cleanup('./'), './')
+    WVPASSEQ(cleanup('///foo/bar'), 'foo/bar')
+    WVPASSEQ(cleanup('///foo/bar'), 'foo/bar')
diff --git a/lib/bup/vint.py b/lib/bup/vint.py
new file mode 100644 (file)
index 0000000..31e2dfa
--- /dev/null
@@ -0,0 +1,83 @@
+"""Binary encodings for bup."""
+
+# Copyright (C) 2010 Rob Browning
+#
+# This code is covered under the terms of the GNU Library General
+# Public License as described in the bup LICENSE file.
+
+from cStringIO import StringIO
+
+# Variable length integers are encoded as vints -- see jakarta lucene.
+
+def write_vuint(port, x):
+    if x < 0:
+        raise Exception("vuints must not be negative")
+    elif x == 0:
+        port.write('\0')
+    else:
+        while x:
+            seven_bits = x & 0x7f
+            x >>= 7
+            if x:
+                port.write(chr(0x80 | seven_bits))
+            else:
+                port.write(chr(seven_bits))
+
+
+def read_vuint(port):
+    c = port.read(1)
+    if c == '':
+        raise EOFError('encountered EOF while reading vuint');
+    result = 0
+    offset = 0
+    while c:
+        b = ord(c)
+        if b & 0x80:
+            result |= ((b & 0x7f) << offset)
+            offset += 7
+            c = port.read(1)
+        else:
+            result |= (b << offset)
+            break
+    return result
+
+
+def write_bvec(port, x):
+    write_vuint(port, len(x))
+    port.write(x)
+
+
+def read_bvec(port):
+    n = read_vuint(port)
+    return port.read(n)
+
+
+def skip_bvec(port):
+    port.read(read_vuint(port))
+
+
+def pack(types, *args):
+    if len(types) != len(args):
+        raise Exception('number of arguments does not match format string')
+    port = StringIO()
+    for (type, value) in zip(types, args):
+        if type == 'V':
+            write_vuint(port, value)
+        elif type == 's':
+            write_bvec(port, value)
+        else:
+            raise Exception('unknown xpack format string item "' + type + '"')
+    return port.getvalue()
+
+
+def unpack(types, data):
+    result = []
+    port = StringIO(data)
+    for type in types:
+        if type == 'V':
+            result.append(read_vuint(port))
+        elif type == 's':
+            result.append(read_bvec(port))
+        else:
+            raise Exception('unknown xunpack format string item "' + type + '"')
+    return result
index acfdda5093a3ab3a961a57bd8476381637f692d8..0c326aba8657da30f357aa404306f836e7ef0643 100755 (executable)
--- a/t/test.sh
+++ b/t/test.sh
@@ -219,3 +219,60 @@ if bup fsck --par2-ok; then
 else
     WVFAIL bup fsck --quick -r # still fails because par2 was missing
 fi
+
+# Very simple metadata tests -- "make install" to a temp directory,
+# then check that bup meta can reproduce the metadata correctly
+# (according to coreutils stat) via create, extract, build-tree, and
+# settle-up.  The current use of stat is crude, and this does not test
+# devices, varying users/groups, acls, attrs, etc.
+WVSTART "meta"
+# Create a test tree and collect its info via stat(1).
+(
+  set -e
+  rm -rf "${TOP}/bupmeta.tmp"
+  mkdir -p "${TOP}/bupmeta.tmp"
+  make DESTDIR="${TOP}/bupmeta.tmp/src" install
+  mkdir "${TOP}/bupmeta.tmp/src/misc"
+  cp -a cmd/bup-* "${TOP}/bupmeta.tmp/src/misc/"
+  cd "${TOP}/bupmeta.tmp/src"
+  find . | sort | xargs stat \
+    | sed 's/Inode: [0-9]\+//' \
+    | sed '/^ \+Size: /d' \
+    | sed '/^Change: /d' \
+    > ../src-stat
+) || exit 1
+# Use the test tree to check bup meta.
+(
+  cd "${TOP}/bupmeta.tmp" || exit 1
+  WVPASS bup meta --create --recurse --file src.meta src
+  mkdir src-restore || exit 1
+  cd src-restore || exit 1
+  WVPASS bup meta --extract --file ../src.meta
+  WVPASS test -d src
+  (
+    set -e
+    cd src
+    find . | sort | xargs stat \
+      | sed 's/Inode: [0-9]\+//' \
+      | sed '/^ \+Size: /d' \
+      | sed '/^Change: /d' \
+      > ../../src-restore-stat
+  ) || exit 1
+  WVPASS diff -u ../src-stat ../src-restore-stat
+  rm -rf src
+  WVPASS bup meta --start-extract --file ../src.meta
+  WVPASS test -d src
+  WVPASS bup meta --finish-extract --file ../src.meta
+  (
+    set -e
+    cd src
+    find . | sort | xargs stat \
+      | sed 's/Inode: [0-9]\+//' \
+      | sed '/^ \+Size: /d' \
+      | sed '/^Change: /d' \
+      > ../../src-restore-stat
+  ) || exit 1
+  WVPASS diff -u ../src-stat ../src-restore-stat
+)
+
+exit 0