]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/metadata.py
Call add_error() with one arg on readlink failure
[bup.git] / lib / bup / metadata.py
index 118688a42ca6d6c5721f439e01f169e077d8ec58..8a84f11747e1ca6725275683f8b736501eebc8af 100644 (file)
@@ -4,18 +4,57 @@
 #
 # This code is covered under the terms of the GNU Library General
 # Public License as described in the bup LICENSE file.
-
-import errno, os, sys, stat, pwd, grp, struct, xattr, posix1e, re
-
+import errno, os, sys, stat, time, pwd, grp, socket, struct
 from cStringIO import StringIO
-from bup import vint
+from bup import vint, xstat
 from bup.drecurse import recursive_dirlist
-from bup.helpers import add_error, mkdirp, log
-from bup.xstat import utime, lutime, lstat, FSTime
-import bup._helpers as _helpers
+from bup.helpers import add_error, mkdirp, log, is_superuser, format_filesize
+from bup.helpers import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name
+from bup.xstat import utime, lutime
 
-if _helpers.get_linux_file_attr:
+xattr = None
+if sys.platform.startswith('linux'):
+    try:
+        import xattr
+    except ImportError:
+        log('Warning: Linux xattr support missing; install python-pyxattr.\n')
+    if xattr:
+        try:
+            xattr.get_all
+        except AttributeError:
+            log('Warning: python-xattr module is too old; '
+                'install python-pyxattr instead.\n')
+            xattr = None
+
+posix1e = None
+if not (sys.platform.startswith('cygwin') \
+        or sys.platform.startswith('darwin') \
+        or sys.platform.startswith('netbsd')):
+    try:
+        import posix1e
+    except ImportError:
+        log('Warning: POSIX ACL support missing; install python-pylibacl.\n')
+
+try:
     from bup._helpers import get_linux_file_attr, set_linux_file_attr
+except ImportError:
+    # No need for a warning here; the only reason they won't exist is that we're
+    # not on Linux, in which case files don't have any linux attrs anyway, so
+    # lacking the functions isn't a problem.
+    get_linux_file_attr = set_linux_file_attr = None
+
+
+_suppress_linux_file_attr = \
+    sys.byteorder == 'big' and struct.calcsize('=l') > struct.calcsize('=i')
+
+def check_linux_file_attr_api():
+    global get_linux_file_attr, set_linux_file_attr
+    if not (get_linux_file_attr or set_linux_file_attr):
+        return
+    if _suppress_linux_file_attr:
+        log('Warning: Linux attr support disabled (see "bup help index").\n')
+        get_linux_file_attr = set_linux_file_attr = None
+
 
 # WARNING: the metadata encoding is *not* stable yet.  Caveat emptor!
 
@@ -31,7 +70,6 @@ if _helpers.get_linux_file_attr:
 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
 # FIXME: Consider pack('vvvvsss', ...) optimization.
-# FIXME: Consider caching users/groups.
 
 ## FS notes:
 #
@@ -76,7 +114,7 @@ def _clean_up_path_for_archive(p):
 
     # Take everything after any '/../'.
     pos = result.rfind('/../')
-    if(pos != -1):
+    if pos != -1:
         result = result[result.rfind('/../') + 4:]
 
     # Take everything after any remaining '../'.
@@ -138,24 +176,17 @@ def _clean_up_extract_path(p):
 # must be unique, and must *never* be changed.
 _rec_tag_end = 0
 _rec_tag_path = 1
-_rec_tag_common = 2           # times, owner, group, type, perms, etc.
+_rec_tag_common = 2 # times, user, group, type, perms, etc. (legacy/broken)
 _rec_tag_symlink_target = 3
 _rec_tag_posix1e_acl = 4      # getfacl(1), setfacl(1), etc.
-_rec_tag_nfsv4_acl = 5        # intended to supplant posix1e acls?
+_rec_tag_nfsv4_acl = 5        # intended to supplant posix1e? (unimplemented)
 _rec_tag_linux_attr = 6       # lsattr(1) chattr(1)
 _rec_tag_linux_xattr = 7      # getfattr(1) setfattr(1)
+_rec_tag_hardlink_target = 8 # hard link target path
+_rec_tag_common_v2 = 9 # times, user, group, type, perms, etc. (current)
 
 
-class MetadataError(Exception):
-    pass
-
-
-class MetadataAcquireError(MetadataError):
-    # Thrown when unable to extract any given bit of metadata from a path.
-    pass
-
-
-class MetadataApplyError(MetadataError):
+class ApplyError(Exception):
     # Thrown when unable to apply any given bit of metadata to a path.
     pass
 
@@ -165,30 +196,63 @@ class Metadata:
     # record will have some subset of add, encode, load, create, and
     # apply methods, i.e. _add_foo...
 
+    # We do allow an "empty" object as a special case, i.e. no
+    # records.  One can be created by trying to write Metadata(), and
+    # for such an object, read() will return None.  This is used by
+    # "bup save", for example, as a placeholder in cases where
+    # from_path() fails.
+
+    # NOTE: if any relevant fields are added or removed, be sure to
+    # update same_file() below.
+
     ## Common records
 
     # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
     # must be non-negative and < 10**9.
 
     def _add_common(self, path, st):
-        self.mode = st.st_mode
         self.uid = st.st_uid
         self.gid = st.st_gid
-        self.rdev = st.st_rdev
         self.atime = st.st_atime
         self.mtime = st.st_mtime
         self.ctime = st.st_ctime
-        self.owner = pwd.getpwuid(st.st_uid)[0]
-        self.group = grp.getgrgid(st.st_gid)[0]
+        self.user = self.group = ''
+        entry = pwd_from_uid(st.st_uid)
+        if entry:
+            self.user = entry.pw_name
+        entry = grp_from_gid(st.st_gid)
+        if entry:
+            self.group = entry.gr_name
+        self.mode = st.st_mode
+        # Only collect st_rdev if we might need it for a mknod()
+        # during restore.  On some platforms (i.e. kFreeBSD), it isn't
+        # stable for other file types.  For example "cp -a" will
+        # change it for a plain file.
+        if stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
+            self.rdev = st.st_rdev
+        else:
+            self.rdev = 0
+
+    def _same_common(self, other):
+        """Return true or false to indicate similarity in the hardlink sense."""
+        return self.uid == other.uid \
+            and self.gid == other.gid \
+            and self.rdev == other.rdev \
+            and self.mtime == other.mtime \
+            and self.ctime == other.ctime \
+            and self.user == other.user \
+            and self.group == other.group
 
     def _encode_common(self):
-        atime = self.atime.to_timespec()
-        mtime = self.mtime.to_timespec()
-        ctime = self.ctime.to_timespec()
-        result = vint.pack('VVsVsVvVvVvV',
+        if not self.mode:
+            return None
+        atime = xstat.nsecs_to_timespec(self.atime)
+        mtime = xstat.nsecs_to_timespec(self.mtime)
+        ctime = xstat.nsecs_to_timespec(self.ctime)
+        result = vint.pack('vvsvsvvVvVvV',
                            self.mode,
                            self.uid,
-                           self.owner,
+                           self.user,
                            self.gid,
                            self.group,
                            self.rdev,
@@ -200,11 +264,14 @@ class Metadata:
                            ctime[1])
         return result
 
-    def _load_common_rec(self, port):
+    def _load_common_rec(self, port, legacy_format=False):
+        unpack_fmt = 'vvsvsvvVvVvV'
+        if legacy_format:
+            unpack_fmt = 'VVsVsVvVvVvV'
         data = vint.read_bvec(port)
         (self.mode,
          self.uid,
-         self.owner,
+         self.user,
          self.gid,
          self.group,
          self.rdev,
@@ -213,18 +280,30 @@ class Metadata:
          self.mtime,
          mtime_ns,
          self.ctime,
-         ctime_ns) = vint.unpack('VVsVsVvVvVvV', data)
-        self.atime = FSTime.from_timespec((self.atime, atime_ns))
-        self.mtime = FSTime.from_timespec((self.mtime, mtime_ns))
-        self.ctime = FSTime.from_timespec((self.ctime, ctime_ns))
+         ctime_ns) = vint.unpack(unpack_fmt, data)
+        self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns))
+        self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns))
+        self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns))
+
+    def _recognized_file_type(self):
+        return stat.S_ISREG(self.mode) \
+            or stat.S_ISDIR(self.mode) \
+            or stat.S_ISCHR(self.mode) \
+            or stat.S_ISBLK(self.mode) \
+            or stat.S_ISFIFO(self.mode) \
+            or stat.S_ISSOCK(self.mode) \
+            or stat.S_ISLNK(self.mode)
 
     def _create_via_common_rec(self, path, create_symlinks=True):
+        if not self.mode:
+            raise ApplyError('no metadata - cannot create path ' + path)
+
         # If the path already exists and is a dir, try rmdir.
         # If the path already exists and is anything else, try unlink.
         st = None
         try:
-            st = lstat(path)
-        except IOError, e:
+            st = xstat.lstat(path)
+        except OSError, e:
             if e.errno != errno.ENOENT:
                 raise
         if st:
@@ -232,74 +311,120 @@ class Metadata:
                 try:
                     os.rmdir(path)
                 except OSError, e:
-                    if e.errno == errno.ENOTEMPTY:
-                        msg = 'refusing to overwrite non-empty dir' + path
+                    if e.errno in (errno.ENOTEMPTY, errno.EEXIST):
+                        msg = 'refusing to overwrite non-empty dir ' + path
                         raise Exception(msg)
                     raise
             else:
                 os.unlink(path)
 
         if stat.S_ISREG(self.mode):
-            os.mknod(path, 0600 | stat.S_IFREG)
+            assert(self._recognized_file_type())
+            fd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL, 0600)
+            os.close(fd)
         elif stat.S_ISDIR(self.mode):
+            assert(self._recognized_file_type())
             os.mkdir(path, 0700)
         elif stat.S_ISCHR(self.mode):
+            assert(self._recognized_file_type())
             os.mknod(path, 0600 | stat.S_IFCHR, self.rdev)
         elif stat.S_ISBLK(self.mode):
+            assert(self._recognized_file_type())
             os.mknod(path, 0600 | stat.S_IFBLK, self.rdev)
         elif stat.S_ISFIFO(self.mode):
+            assert(self._recognized_file_type())
             os.mknod(path, 0600 | stat.S_IFIFO)
+        elif stat.S_ISSOCK(self.mode):
+            try:
+                os.mknod(path, 0600 | stat.S_IFSOCK)
+            except OSError, e:
+                if e.errno in (errno.EINVAL, errno.EPERM):
+                    s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                    s.bind(path)
+                else:
+                    raise
         elif stat.S_ISLNK(self.mode):
-            if(self.symlink_target and create_symlinks):
-                os.symlink(self.symlink_target, path)
+            assert(self._recognized_file_type())
+            if self.symlink_target and create_symlinks:
+                # on MacOS, symlink() permissions depend on umask, and there's
+                # no way to chown a symlink after creating it, so we have to
+                # be careful here!
+                oldumask = os.umask((self.mode & 0777) ^ 0777)
+                try:
+                    os.symlink(self.symlink_target, path)
+                finally:
+                    os.umask(oldumask)
         # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
-        # Otherwise, do nothing.
+        else:
+            assert(not self._recognized_file_type())
+            add_error('not creating "%s" with unrecognized mode "0x%x"\n'
+                      % (path, self.mode))
 
     def _apply_common_rec(self, path, restore_numeric_ids=False):
+        if not self.mode:
+            raise ApplyError('no metadata - cannot apply to ' + path)
+
         # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
-        if stat.S_ISLNK(self.mode):
-            lutime(path, (self.atime, self.mtime))
+        # EACCES errors at this stage are fatal for the current path.
+        if lutime and stat.S_ISLNK(self.mode):
+            try:
+                lutime(path, (self.atime, self.mtime))
+            except OSError, e:
+                if e.errno == errno.EACCES:
+                    raise ApplyError('lutime: %s' % e)
+                else:
+                    raise
         else:
-            utime(path, (self.atime, self.mtime))
-        if stat.S_ISREG(self.mode) \
-                | stat.S_ISDIR(self.mode) \
-                | stat.S_ISCHR(self.mode) \
-                | stat.S_ISBLK(self.mode) \
-                | stat.S_ISLNK(self.mode) \
-                | stat.S_ISFIFO(self.mode):
-            # Be safe.
-            if _have_lchmod:
-                os.lchmod(path, 0)
-            elif not stat.S_ISLNK(self.mode):
-                os.chmod(path, 0)
-
-            # Don't try to restore owner unless we're root, and even
-            # if asked, don't try to restore the owner or group if
-            # it doesn't exist in the system db.
+            try:
+                utime(path, (self.atime, self.mtime))
+            except OSError, e:
+                if e.errno == errno.EACCES:
+                    raise ApplyError('utime: %s' % e)
+                else:
+                    raise
+
+        uid = gid = -1 # By default, do nothing.
+        if is_superuser():
             uid = self.uid
             gid = self.gid
             if not restore_numeric_ids:
-                if os.geteuid() == 0:
-                    try:
-                        uid = pwd.getpwnam(self.owner)[2]
-                    except KeyError:
-                        uid = -1
-                        log('bup: ignoring unknown owner %s for "%s"\n'
-                            % (self.owner, path))
+                if self.uid != 0 and self.user:
+                    entry = pwd_from_name(self.user)
+                    if entry:
+                        uid = entry.pw_uid
+                if self.gid != 0 and self.group:
+                    entry = grp_from_name(self.group)
+                    if entry:
+                        gid = entry.gr_gid
+        else: # not superuser - only consider changing the group/gid
+            user_gids = os.getgroups()
+            if self.gid in user_gids:
+                gid = self.gid
+            if not restore_numeric_ids and self.gid != 0:
+                # The grp might not exist on the local system.
+                grps = filter(None, [grp_from_gid(x) for x in user_gids])
+                if self.group in [x.gr_name for x in grps]:
+                    g = grp_from_name(self.group)
+                    if g:
+                        gid = g.gr_gid
+
+        if uid != -1 or gid != -1:
+            try:
+                os.lchown(path, uid, gid)
+            except OSError, e:
+                if e.errno == errno.EPERM:
+                    add_error('lchown: %s' %  e)
+                elif sys.platform.startswith('cygwin') \
+                   and e.errno == errno.EINVAL:
+                    add_error('lchown: unknown uid/gid (%d/%d) for %s'
+                              %  (uid, gid, path))
                 else:
-                    uid = -1 # Not root; assume we can't change owner.
-                try:
-                    gid = grp.getgrnam(self.group)[2]
-                except KeyError:
-                    gid = -1
-                    log('bup: ignoring unknown group %s for "%s"\n'
-                        % (self.group, path))
-            os.lchown(path, uid, gid)
+                    raise
 
-            if _have_lchmod:
-                os.lchmod(path, stat.S_IMODE(self.mode))
-            elif not stat.S_ISLNK(self.mode):
-                os.chmod(path, stat.S_IMODE(self.mode))
+        if _have_lchmod:
+            os.lchmod(path, stat.S_IMODE(self.mode))
+        elif not stat.S_ISLNK(self.mode):
+            os.chmod(path, stat.S_IMODE(self.mode))
 
 
     ## Path records
@@ -317,8 +442,11 @@ class Metadata:
     ## Symlink targets
 
     def _add_symlink_target(self, path, st):
-        if(stat.S_ISLNK(st.st_mode)):
-            self.symlink_target = os.readlink(path)
+        try:
+            if stat.S_ISLNK(st.st_mode):
+                self.symlink_target = os.readlink(path)
+        except OSError, e:
+            add_error('readlink: %s' % e)
 
     def _encode_symlink_target(self):
         return self.symlink_target
@@ -327,6 +455,22 @@ class Metadata:
         self.symlink_target = vint.read_bvec(port)
 
 
+    ## Hardlink targets
+
+    def _add_hardlink_target(self, target):
+        self.hardlink_target = target
+
+    def _same_hardlink_target(self, other):
+        """Return true or false to indicate similarity in the hardlink sense."""
+        return self.hardlink_target == other.hardlink_target
+
+    def _encode_hardlink_target(self):
+        return self.hardlink_target
+
+    def _load_hardlink_target_rec(self, port):
+        self.hardlink_target = vint.read_bvec(port)
+
+
     ## POSIX1e ACL records
 
     # Recorded as a list:
@@ -336,64 +480,111 @@ class Metadata:
     # The numeric/text distinction only matters when reading/restoring
     # a stored record.
     def _add_posix1e_acl(self, path, st):
+        if not posix1e: return
         if not stat.S_ISLNK(st.st_mode):
+            acls = None
+            def_acls = None
             try:
                 if posix1e.has_extended(path):
                     acl = posix1e.ACL(file=path)
-                    self.posix1e_acl = [acl, acl] # txt and num are the same
+                    acls = [acl, acl] # txt and num are the same
                     if stat.S_ISDIR(st.st_mode):
-                        acl = posix1e.ACL(filedef=path)
-                        self.posix1e_acl.extend([acl, acl])
+                        def_acl = posix1e.ACL(filedef=path)
+                        def_acls = [def_acl, def_acl]
             except EnvironmentError, e:
-                if e.errno != errno.EOPNOTSUPP:
+                if e.errno not in (errno.EOPNOTSUPP, errno.ENOSYS):
                     raise
+            if acls:
+                txt_flags = posix1e.TEXT_ABBREVIATE
+                num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
+                acl_rep = [acls[0].to_any_text('', '\n', txt_flags),
+                           acls[1].to_any_text('', '\n', num_flags)]
+                if def_acls:
+                    acl_rep.append(def_acls[0].to_any_text('', '\n', txt_flags))
+                    acl_rep.append(def_acls[1].to_any_text('', '\n', num_flags))
+                self.posix1e_acl = acl_rep
+
+    def _same_posix1e_acl(self, other):
+        """Return true or false to indicate similarity in the hardlink sense."""
+        return self.posix1e_acl == other.posix1e_acl
 
     def _encode_posix1e_acl(self):
         # Encode as two strings (w/default ACL string possibly empty).
         if self.posix1e_acl:
             acls = self.posix1e_acl
-            txt_flags = posix1e.TEXT_ABBREVIATE
-            num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
-            acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
-                        acls[1].to_any_text('', '\n', num_flags)]
-            if(len(acls) < 3):
-                acl_reps += ['', '']
-            else:
-                acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
-                acl_reps.append(acls[3].to_any_text('', '\n', num_flags))
-            return vint.pack('ssss',
-                             acl_reps[0], acl_reps[1], acl_reps[2], acl_reps[3])
+            if len(acls) == 2:
+                acls.extend(['', ''])
+            return vint.pack('ssss', acls[0], acls[1], acls[2], acls[3])
         else:
             return None
 
     def _load_posix1e_acl_rec(self, port):
-        data = vint.read_bvec(port)
-        acl_reps = vint.unpack('ssss', data)
-        if(acl_reps[2] == ''):
-            acl_reps = acl_reps[:2]
-        self.posix1e_acl = [posix1e.ACL(x) for x in acl_reps]
+        acl_rep = vint.unpack('ssss', vint.read_bvec(port))
+        if acl_rep[2] == '':
+            acl_rep = acl_rep[:2]
+        self.posix1e_acl = acl_rep
 
     def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
-        if(self.posix1e_acl):
+        def apply_acl(acl_rep, kind):
+            try:
+                acl = posix1e.ACL(text = acl_rep)
+            except IOError, e:
+                if e.errno == 0:
+                    # pylibacl appears to return an IOError with errno
+                    # set to 0 if a group referred to by the ACL rep
+                    # doesn't exist on the current system.
+                    raise ApplyError("POSIX1e ACL: can't create %r for %r"
+                                     % (acl_rep, path))
+                else:
+                    raise
+            try:
+                acl.applyto(path, kind)
+            except IOError, e:
+                if e.errno == errno.EPERM or e.errno == errno.EOPNOTSUPP:
+                    raise ApplyError('POSIX1e ACL applyto: %s' % e)
+                else:
+                    raise
+
+        if not posix1e:
+            if self.posix1e_acl:
+                add_error("%s: can't restore ACLs; posix1e support missing.\n"
+                          % path)
+            return
+        if self.posix1e_acl:
             acls = self.posix1e_acl
-            if(len(acls) > 2):
+            if len(acls) > 2:
                 if restore_numeric_ids:
-                    acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
+                    apply_acl(acls[3], posix1e.ACL_TYPE_DEFAULT)
                 else:
-                    acls[2].applyto(path, posix1e.ACL_TYPE_DEFAULT)
+                    apply_acl(acls[2], posix1e.ACL_TYPE_DEFAULT)
             if restore_numeric_ids:
-                acls[1].applyto(path, posix1e.ACL_TYPE_ACCESS)
+                apply_acl(acls[1], posix1e.ACL_TYPE_ACCESS)
             else:
-                acls[0].applyto(path, posix1e.ACL_TYPE_ACCESS)
+                apply_acl(acls[0], posix1e.ACL_TYPE_ACCESS)
 
 
     ## Linux attributes (lsattr(1), chattr(1))
 
     def _add_linux_attr(self, path, st):
+        check_linux_file_attr_api()
+        if not get_linux_file_attr: return
         if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
-            attr = get_linux_file_attr(path)
-            if(attr != 0):
-                self.linux_attr = get_linux_file_attr(path)
+            try:
+                attr = get_linux_file_attr(path)
+                if attr != 0:
+                    self.linux_attr = attr
+            except OSError, e:
+                if e.errno == errno.EACCES:
+                    add_error('read Linux attr: %s' % e)
+                elif e.errno in (errno.ENOTTY, errno.ENOSYS, errno.EOPNOTSUPP):
+                    # Assume filesystem doesn't support attrs.
+                    return
+                else:
+                    raise
+
+    def _same_linux_attr(self, other):
+        """Return true or false to indicate similarity in the hardlink sense."""
+        return self.linux_attr == other.linux_attr
 
     def _encode_linux_attr(self):
         if self.linux_attr:
@@ -406,19 +597,37 @@ class Metadata:
         self.linux_attr = vint.unpack('V', data)[0]
 
     def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
-        if(self.linux_attr):
-            set_linux_file_attr(path, self.linux_attr)
+        if self.linux_attr:
+            check_linux_file_attr_api()
+            if not set_linux_file_attr:
+                add_error("%s: can't restore linuxattrs: "
+                          "linuxattr support missing.\n" % path)
+                return
+            try:
+                set_linux_file_attr(path, self.linux_attr)
+            except OSError, e:
+                if e.errno in (errno.ENOTTY, errno.EOPNOTSUPP, errno.ENOSYS,
+                               errno.EACCES):
+                    raise ApplyError('Linux chattr: %s (0x%s)'
+                                     % (e, hex(self.linux_attr)))
+                else:
+                    raise
 
 
     ## Linux extended attributes (getfattr(1), setfattr(1))
 
     def _add_linux_xattr(self, path, st):
+        if not xattr: return
         try:
             self.linux_xattr = xattr.get_all(path, nofollow=True)
         except EnvironmentError, e:
             if e.errno != errno.EOPNOTSUPP:
                 raise
 
+    def _same_linux_xattr(self, other):
+        """Return true or false to indicate similarity in the hardlink sense."""
+        return self.linux_xattr == other.linux_xattr
+
     def _encode_linux_xattr(self):
         if self.linux_xattr:
             result = vint.pack('V', len(self.linux_xattr))
@@ -439,23 +648,87 @@ class Metadata:
         self.linux_xattr = result
 
     def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
-        if(self.linux_xattr):
-            for k, v in self.linux_xattr:
-                xattr.set(path, k, v, nofollow=True)
+        if not xattr:
+            if self.linux_xattr:
+                add_error("%s: can't restore xattr; xattr support missing.\n"
+                          % path)
+            return
+        if not self.linux_xattr:
+            return
+        try:
+            existing_xattrs = set(xattr.list(path, nofollow=True))
+        except IOError, e:
+            if e.errno == errno.EACCES:
+                raise ApplyError('xattr.set %r: %s' % (path, e))
+            else:
+                raise
+        for k, v in self.linux_xattr:
+            if k not in existing_xattrs \
+                    or v != xattr.get(path, k, nofollow=True):
+                try:
+                    xattr.set(path, k, v, nofollow=True)
+                except IOError, e:
+                    if e.errno == errno.EPERM \
+                            or e.errno == errno.EOPNOTSUPP:
+                        raise ApplyError('xattr.set %r: %s' % (path, e))
+                    else:
+                        raise
+            existing_xattrs -= frozenset([k])
+        for k in existing_xattrs:
+            try:
+                xattr.remove(path, k, nofollow=True)
+            except IOError, e:
+                if e.errno == errno.EPERM:
+                    raise ApplyError('xattr.remove %r: %s' % (path, e))
+                else:
+                    raise
 
     def __init__(self):
+        self.mode = self.uid = self.gid = self.user = self.group = None
+        self.atime = self.mtime = self.ctime = None
         # optional members
         self.path = None
+        self.size = None
         self.symlink_target = None
+        self.hardlink_target = None
         self.linux_attr = None
         self.linux_xattr = None
         self.posix1e_acl = None
-        self.posix1e_acl_default = None
+
+    def __repr__(self):
+        result = ['<%s instance at %s' % (self.__class__, hex(id(self)))]
+        if self.path:
+            result += ' path:' + repr(self.path)
+        if self.mode:
+            result += ' mode:' + repr(xstat.mode_str(self.mode)
+                                      + '(%s)' % hex(self.mode))
+        if self.uid:
+            result += ' uid:' + str(self.uid)
+        if self.gid:
+            result += ' gid:' + str(self.gid)
+        if self.user:
+            result += ' user:' + repr(self.user)
+        if self.group:
+            result += ' group:' + repr(self.group)
+        if self.size:
+            result += ' size:' + repr(self.size)
+        for name, val in (('atime', self.atime),
+                          ('mtime', self.mtime),
+                          ('ctime', self.ctime)):
+            result += ' %s:%r' \
+                % (name,
+                   time.strftime('%Y-%m-%d %H:%M %z',
+                                 time.gmtime(xstat.fstime_floor_secs(val))))
+        result += '>'
+        return ''.join(result)
 
     def write(self, port, include_path=True):
-        records = [(_rec_tag_path, self._encode_path())] if include_path else []
-        records.extend([(_rec_tag_common, self._encode_common()),
-                        (_rec_tag_symlink_target, self._encode_symlink_target()),
+        records = include_path and [(_rec_tag_path, self._encode_path())] or []
+        records.extend([(_rec_tag_common_v2, self._encode_common()),
+                        (_rec_tag_symlink_target,
+                         self._encode_symlink_target()),
+                        (_rec_tag_hardlink_target,
+                         self._encode_hardlink_target()),
                         (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
                         (_rec_tag_linux_attr, self._encode_linux_attr()),
                         (_rec_tag_linux_xattr, self._encode_linux_xattr())])
@@ -465,31 +738,42 @@ class Metadata:
                 vint.write_bvec(port, data)
         vint.write_vuint(port, _rec_tag_end)
 
+    def encode(self, include_path=True):
+        port = StringIO()
+        self.write(port, include_path)
+        return port.getvalue()
+
     @staticmethod
     def read(port):
-        # This method should either: return a valid Metadata object;
-        # throw EOFError if there was nothing at all to read; throw an
-        # Exception if a valid object could not be read completely.
+        # This method should either return a valid Metadata object,
+        # return None if there was no information at all (just a
+        # _rec_tag_end), throw EOFError if there was nothing at all to
+        # read, or throw an Exception if a valid object could not be
+        # read completely.
         tag = vint.read_vuint(port)
+        if tag == _rec_tag_end:
+            return None
         try: # From here on, EOF is an error.
             result = Metadata()
-            while(True): # only exit is error (exception) or _rec_tag_end
+            while True: # only exit is error (exception) or _rec_tag_end
                 if tag == _rec_tag_path:
                     result._load_path_rec(port)
-                elif tag == _rec_tag_common:
+                elif tag == _rec_tag_common_v2:
                     result._load_common_rec(port)
                 elif tag == _rec_tag_symlink_target:
                     result._load_symlink_target_rec(port)
+                elif tag == _rec_tag_hardlink_target:
+                    result._load_hardlink_target_rec(port)
                 elif tag == _rec_tag_posix1e_acl:
-                    result._load_posix1e_acl(port)
-                elif tag ==_rec_tag_nfsv4_acl:
-                    result._load_nfsv4_acl_rec(port)
+                    result._load_posix1e_acl_rec(port)
                 elif tag == _rec_tag_linux_attr:
                     result._load_linux_attr_rec(port)
                 elif tag == _rec_tag_linux_xattr:
                     result._load_linux_xattr_rec(port)
                 elif tag == _rec_tag_end:
                     return result
+                elif tag == _rec_tag_common: # Should be very rare.
+                    result._load_common_rec(port, legacy_format = True)
                 else: # unknown record
                     vint.skip_bvec(port)
                 tag = vint.read_vuint(port)
@@ -507,30 +791,46 @@ class Metadata:
         if not path:
             path = self.path
         if not path:
-            raise Exception('Metadata.apply_to_path() called with no path');
+            raise Exception('Metadata.apply_to_path() called with no path')
+        if not self._recognized_file_type():
+            add_error('not applying metadata to "%s"' % path
+                      + ' with unrecognized mode "0x%x"\n' % self.mode)
+            return
         num_ids = restore_numeric_ids
-        try: # Later we may want to push this down and make it finer grained.
-            self._apply_common_rec(path, restore_numeric_ids=num_ids)
-            self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
-            self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
-            self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
-        except Exception, e:
-            raise MetadataApplyError(e)
+        for apply_metadata in (self._apply_common_rec,
+                               self._apply_posix1e_acl_rec,
+                               self._apply_linux_attr_rec,
+                               self._apply_linux_xattr_rec):
+            try:
+                apply_metadata(path, restore_numeric_ids=num_ids)
+            except ApplyError, e:
+                add_error(e)
+
+    def same_file(self, other):
+        """Compare this to other for equivalency.  Return true if
+        their information implies they could represent the same file
+        on disk, in the hardlink sense.  Assume they're both regular
+        files."""
+        return self._same_common(other) \
+            and self._same_hardlink_target(other) \
+            and self._same_posix1e_acl(other) \
+            and self._same_linux_attr(other) \
+            and self._same_linux_xattr(other)
 
 
-def from_path(path, archive_path=None, save_symlinks=True):
+def from_path(path, statinfo=None, archive_path=None,
+              save_symlinks=True, hardlink_target=None):
     result = Metadata()
     result.path = archive_path
-    st = lstat(path)
-    try: # Later we may want to push this down and make it finer grained.
-        result._add_common(path, st)
-        if(save_symlinks):
-            result._add_symlink_target(path, st)
-        result._add_posix1e_acl(path, st)
-        result._add_linux_attr(path, st)
-        result._add_linux_xattr(path, st)
-    except Exception, e:
-        raise MetadataAcquireError(e)
+    st = statinfo or xstat.lstat(path)
+    result.size = st.st_size
+    result._add_common(path, st)
+    if save_symlinks:
+        result._add_symlink_target(path, st)
+    result._add_hardlink_target(hardlink_target)
+    result._add_posix1e_acl(path, st)
+    result._add_linux_attr(path, st)
+    result._add_linux_xattr(path, st)
     return result
 
 
@@ -543,39 +843,179 @@ def save_tree(output_file, paths,
     # Issue top-level rewrite warnings.
     for path in paths:
         safe_path = _clean_up_path_for_archive(path)
-        if(safe_path != path):
-            log('bup: archiving "%s" as "%s"\n' % (path, safe_path))
+        if safe_path != path:
+            log('archiving "%s" as "%s"\n' % (path, safe_path))
 
-    start_dir = os.getcwd()
-    try:
-        for (p, st) in recursive_dirlist(paths, xdev=xdev):
-            dirlist_dir = os.getcwd()
-            os.chdir(start_dir)
+    if not recurse:
+        for p in paths:
             safe_path = _clean_up_path_for_archive(p)
-            try:
-                m = from_path(p, archive_path=safe_path,
-                              save_symlinks=save_symlinks)
-            except MetadataAcquireError, e:
-                add_error(e)
-
+            st = xstat.lstat(p)
+            if stat.S_ISDIR(st.st_mode):
+                safe_path += '/'
+            m = from_path(p, statinfo=st, archive_path=safe_path,
+                          save_symlinks=save_symlinks)
             if verbose:
                 print >> sys.stderr, m.path
             m.write(output_file, include_path=write_paths)
-            os.chdir(dirlist_dir)
-    finally:
-        os.chdir(start_dir)
+    else:
+        start_dir = os.getcwd()
+        try:
+            for (p, st) in recursive_dirlist(paths, xdev=xdev):
+                dirlist_dir = os.getcwd()
+                os.chdir(start_dir)
+                safe_path = _clean_up_path_for_archive(p)
+                m = from_path(p, statinfo=st, archive_path=safe_path,
+                              save_symlinks=save_symlinks)
+                if verbose:
+                    print >> sys.stderr, m.path
+                m.write(output_file, include_path=write_paths)
+                os.chdir(dirlist_dir)
+        finally:
+            os.chdir(start_dir)
 
 
 def _set_up_path(meta, create_symlinks=True):
     # Allow directories to exist as a special case -- might have
     # been created by an earlier longer path.
     if meta.isdir():
-        mkdirp(meta.path, 0700)
+        mkdirp(meta.path)
     else:
         parent = os.path.dirname(meta.path)
         if parent:
-            mkdirp(parent, 0700)
-            meta.create_path(meta.path, create_symlinks=create_symlinks)
+            mkdirp(parent)
+        meta.create_path(meta.path, create_symlinks=create_symlinks)
+
+
+all_fields = frozenset(['path',
+                        'mode',
+                        'link-target',
+                        'rdev',
+                        'size',
+                        'uid',
+                        'gid',
+                        'user',
+                        'group',
+                        'atime',
+                        'mtime',
+                        'ctime',
+                        'linux-attr',
+                        'linux-xattr',
+                        'posix1e-acl'])
+
+
+def summary_str(meta, numeric_ids = False, classification = None,
+                human_readable = False):
+
+    """Return a string containing the "ls -l" style listing for meta.
+    Classification may be "all", "type", or None."""
+    user_str = group_str = size_or_dev_str = '?'
+    symlink_target = None
+    if meta:
+        name = meta.path
+        mode_str = xstat.mode_str(meta.mode)
+        symlink_target = meta.symlink_target
+        mtime_secs = xstat.fstime_floor_secs(meta.mtime)
+        mtime_str = time.strftime('%Y-%m-%d %H:%M', time.localtime(mtime_secs))
+        if meta.user and not numeric_ids:
+            user_str = meta.user
+        elif meta.uid != None:
+            user_str = str(meta.uid)
+        if meta.group and not numeric_ids:
+            group_str = meta.group
+        elif meta.gid != None:
+            group_str = str(meta.gid)
+        if stat.S_ISCHR(meta.mode) or stat.S_ISBLK(meta.mode):
+            if meta.rdev:
+                size_or_dev_str = '%d,%d' % (os.major(meta.rdev),
+                                             os.minor(meta.rdev))
+        elif meta.size != None:
+            if human_readable:
+                size_or_dev_str = format_filesize(meta.size)
+            else:
+                size_or_dev_str = str(meta.size)
+        else:
+            size_or_dev_str = '-'
+        if classification:
+            classification_str = \
+                xstat.classification_str(meta.mode, classification == 'all')
+    else:
+        mode_str = '?' * 10
+        mtime_str = '????-??-?? ??:??'
+        classification_str = '?'
+
+    name = name or ''
+    if classification:
+        name += classification_str
+    if symlink_target:
+        name += ' -> ' + meta.symlink_target
+
+    return '%-10s %-11s %11s %16s %s' % (mode_str,
+                                         user_str + "/" + group_str,
+                                         size_or_dev_str,
+                                         mtime_str,
+                                         name)
+
+
+def detailed_str(meta, fields = None):
+    # FIXME: should optional fields be omitted, or empty i.e. "rdev:
+    # 0", "link-target:", etc.
+    if not fields:
+        fields = all_fields
+
+    result = []
+    if 'path' in fields:
+        path = meta.path or ''
+        result.append('path: ' + path)
+    if 'mode' in fields:
+        result.append('mode: %s (%s)' % (oct(meta.mode),
+                                         xstat.mode_str(meta.mode)))
+    if 'link-target' in fields and stat.S_ISLNK(meta.mode):
+        result.append('link-target: ' + meta.symlink_target)
+    if 'rdev' in fields:
+        if meta.rdev:
+            result.append('rdev: %d,%d' % (os.major(meta.rdev),
+                                           os.minor(meta.rdev)))
+        else:
+            result.append('rdev: 0')
+    if 'size' in fields and meta.size:
+        result.append('size: ' + str(meta.size))
+    if 'uid' in fields:
+        result.append('uid: ' + str(meta.uid))
+    if 'gid' in fields:
+        result.append('gid: ' + str(meta.gid))
+    if 'user' in fields:
+        result.append('user: ' + meta.user)
+    if 'group' in fields:
+        result.append('group: ' + meta.group)
+    if 'atime' in fields:
+        # If we don't have xstat.lutime, that means we have to use
+        # utime(), and utime() has no way to set the mtime/atime of a
+        # symlink.  Thus, the mtime/atime of a symlink is meaningless,
+        # so let's not report it.  (That way scripts comparing
+        # before/after won't trigger.)
+        if xstat.lutime or not stat.S_ISLNK(meta.mode):
+            result.append('atime: ' + xstat.fstime_to_sec_str(meta.atime))
+        else:
+            result.append('atime: 0')
+    if 'mtime' in fields:
+        if xstat.lutime or not stat.S_ISLNK(meta.mode):
+            result.append('mtime: ' + xstat.fstime_to_sec_str(meta.mtime))
+        else:
+            result.append('mtime: 0')
+    if 'ctime' in fields:
+        result.append('ctime: ' + xstat.fstime_to_sec_str(meta.ctime))
+    if 'linux-attr' in fields and meta.linux_attr:
+        result.append('linux-attr: ' + hex(meta.linux_attr))
+    if 'linux-xattr' in fields and meta.linux_xattr:
+        for name, value in meta.linux_xattr:
+            result.append('linux-xattr: %s -> %s' % (name, repr(value)))
+    if 'posix1e-acl' in fields and meta.posix1e_acl:
+        acl = meta.posix1e_acl[0]
+        result.append('posix1e-acl: ' + acl + '\n')
+        if stat.S_ISDIR(meta.mode):
+            def_acl = meta.posix1e_acl[2]
+            result.append('posix1e-acl-default: ' + def_acl + '\n')
+    return '\n'.join(result)
 
 
 class _ArchiveIterator:
@@ -593,15 +1033,30 @@ class _ArchiveIterator:
 
 
 def display_archive(file):
-    for meta in _ArchiveIterator(file):
-        if verbose:
-            print meta.path # FIXME
-        else:
+    if verbose > 1:
+        first_item = True
+        for meta in _ArchiveIterator(file):
+            if not first_item:
+                print
+            print detailed_str(meta)
+            first_item = False
+    elif verbose > 0:
+        for meta in _ArchiveIterator(file):
+            print summary_str(meta)
+    elif verbose == 0:
+        for meta in _ArchiveIterator(file):
+            if not meta.path:
+                print >> sys.stderr, \
+                    'bup: no metadata path, but asked to only display path', \
+                    '(increase verbosity?)'
+                sys.exit(1)
             print meta.path
 
 
 def start_extract(file, create_symlinks=True):
     for meta in _ArchiveIterator(file):
+        if not meta: # Hit end record.
+            break
         if verbose:
             print >> sys.stderr, meta.path
         xpath = _clean_up_extract_path(meta.path)
@@ -615,6 +1070,8 @@ def start_extract(file, create_symlinks=True):
 def finish_extract(file, restore_numeric_ids=False):
     all_dirs = []
     for meta in _ArchiveIterator(file):
+        if not meta: # Hit end record.
+            break
         xpath = _clean_up_extract_path(meta.path)
         if not xpath:
             add_error(Exception('skipping risky path "%s"' % dir.path))
@@ -624,23 +1081,15 @@ def finish_extract(file, restore_numeric_ids=False):
             else:
                 if verbose:
                     print >> sys.stderr, meta.path
-                try:
-                    meta.apply_to_path(path=xpath,
-                                       restore_numeric_ids=restore_numeric_ids)
-                except MetadataApplyError, e:
-                    add_error(e)
-
+                meta.apply_to_path(path=xpath,
+                                   restore_numeric_ids=restore_numeric_ids)
     all_dirs.sort(key = lambda x : len(x.path), reverse=True)
     for dir in all_dirs:
         # Don't need to check xpath -- won't be in all_dirs if not OK.
         xpath = _clean_up_extract_path(dir.path)
         if verbose:
             print >> sys.stderr, dir.path
-        try:
-            dir.apply_to_path(path=xpath,
-                              restore_numeric_ids=restore_numeric_ids)
-        except MetadataApplyError, e:
-            add_error(e)
+        dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids)
 
 
 def extract(file, restore_numeric_ids=False, create_symlinks=True):
@@ -648,6 +1097,8 @@ def extract(file, restore_numeric_ids=False, create_symlinks=True):
     # longest first.
     all_dirs = []
     for meta in _ArchiveIterator(file):
+        if not meta: # Hit end record.
+            break
         xpath = _clean_up_extract_path(meta.path)
         if not xpath:
             add_error(Exception('skipping risky path "%s"' % meta.path))
@@ -661,19 +1112,13 @@ def extract(file, restore_numeric_ids=False, create_symlinks=True):
             else:
                 if verbose:
                     print >> sys.stderr, '=', meta.path
-                try:
-                    meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
-                except MetadataApplyError, e:
-                    add_error(e)
+                meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
     all_dirs.sort(key = lambda x : len(x.path), reverse=True)
     for dir in all_dirs:
         # Don't need to check xpath -- won't be in all_dirs if not OK.
-        xpath = _clean_up_extract_path(meta.path)
+        xpath = _clean_up_extract_path(dir.path)
         if verbose:
-            print >> sys.stderr, '=', meta.path
+            print >> sys.stderr, '=', xpath
         # Shouldn't have to check for risky paths here (omitted above).
-        try:
-            dir.apply_to_path(path=dir.path,
-                              restore_numeric_ids=restore_numeric_ids)
-        except MetadataApplyError, e:
-            add_error(e)
+        dir.apply_to_path(path=dir.path,
+                          restore_numeric_ids=restore_numeric_ids)