]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/metadata.py
metadata.py: be careful with the umask() when restoring symlinks.
[bup.git] / lib / bup / metadata.py
index c598701e70d3a0b07ba36afa42715399ed3a2bea..6dae38b83bc08404b5952ce87e2d5c85c150ecb8 100644 (file)
@@ -4,16 +4,39 @@
 #
 # This code is covered under the terms of the GNU Library General
 # Public License as described in the bup LICENSE file.
-
-import errno, os, sys, stat, pwd, grp, struct, xattr, posix1e, re
-
+import errno, os, sys, stat, pwd, grp, struct, re
 from cStringIO import StringIO
 from bup import vint
-from bup.helpers import add_error, mkdirp, log, utime, lutime, lstat
+from bup.drecurse import recursive_dirlist
+from bup.helpers import add_error, mkdirp, log
+from bup.xstat import utime, lutime, lstat, FSTime
 import bup._helpers as _helpers
 
-if _helpers.get_linux_file_attr:
+try:
+    import xattr
+except ImportError:
+    log('Warning: Linux xattr support missing; install python-pyxattr.\n')
+    xattr = None
+if xattr:
+    try:
+        xattr.get_all
+    except AttributeError:
+        log('Warning: python-xattr module is too old; '
+            'install python-pyxattr instead.\n')
+        xattr = None
+try:
+    import posix1e
+except ImportError:
+    log('Warning: POSIX ACL support missing; install python-pylibacl.\n')
+    posix1e = None
+try:
     from bup._helpers import get_linux_file_attr, set_linux_file_attr
+except ImportError:
+    # No need for a warning here; the only reason they won't exist is that we're
+    # not on Linux, in which case files don't have any linux attrs anyway, so
+    # lacking the functions isn't a problem.
+    get_linux_file_attr = set_linux_file_attr = None
+    
 
 # WARNING: the metadata encoding is *not* stable yet.  Caveat emptor!
 
@@ -74,7 +97,7 @@ def _clean_up_path_for_archive(p):
 
     # Take everything after any '/../'.
     pos = result.rfind('/../')
-    if(pos != -1):
+    if pos != -1:
         result = result[result.rfind('/../') + 4:]
 
     # Take everything after any remaining '../'.
@@ -132,18 +155,6 @@ def _clean_up_extract_path(p):
         return result
 
 
-def _normalize_ts(stamp):
-    # For the purposes of normalization, t = s + ns.
-    s = stamp[0]
-    ns = stamp[1]
-    if ns < 0 or ns >= 10**9:
-        t = (s * 10**9) + ns
-        if t == 0:
-            return (0, 0)
-        return ((t / 10**9), t % 10**9)
-    return stamp
-
-
 # These tags are currently conceptually private to Metadata, and they
 # must be unique, and must *never* be changed.
 _rec_tag_end = 0
@@ -156,12 +167,7 @@ _rec_tag_linux_attr = 6       # lsattr(1) chattr(1)
 _rec_tag_linux_xattr = 7      # getfattr(1) setfattr(1)
 
 
-class MetadataAcquisitionError(Exception):
-    # Thrown when unable to extract any given bit of metadata from a path.
-    pass
-
-
-class MetadataApplicationError(Exception):
+class ApplyError(Exception):
     # Thrown when unable to apply any given bit of metadata to a path.
     pass
 
@@ -184,13 +190,20 @@ class Metadata:
         self.atime = st.st_atime
         self.mtime = st.st_mtime
         self.ctime = st.st_ctime
-        self.owner = pwd.getpwuid(st.st_uid)[0]
-        self.group = grp.getgrgid(st.st_gid)[0]
+        self.owner = self.group = ''
+        try:
+            self.owner = pwd.getpwuid(st.st_uid)[0]
+        except KeyError, e:
+            add_error("no user name for id %s '%s'" % (st.st_gid, path))
+        try:
+            self.group = grp.getgrgid(st.st_gid)[0]
+        except KeyError, e:
+            add_error("no group name for id %s '%s'" % (st.st_gid, path))
 
     def _encode_common(self):
-        atime = _normalize_ts(self.atime)
-        mtime = _normalize_ts(self.mtime)
-        ctime = _normalize_ts(self.ctime)
+        atime = self.atime.to_timespec()
+        mtime = self.mtime.to_timespec()
+        ctime = self.ctime.to_timespec()
         result = vint.pack('VVsVsVvVvVvV',
                            self.mode,
                            self.uid,
@@ -220,21 +233,9 @@ class Metadata:
          mtime_ns,
          self.ctime,
          ctime_ns) = vint.unpack('VVsVsVvVvVvV', data)
-        self.atime = (self.atime, atime_ns)
-        self.mtime = (self.mtime, mtime_ns)
-        self.ctime = (self.ctime, ctime_ns)
-        if self.atime[1] >= 10**9:
-            path = ' for ' + self.path if self.path else ''
-            log('bup: warning - normalizing bad atime%s\n' % (path))
-            self.atime = _normalize_ts(self.atime)
-        if self.mtime[1] >= 10**9:
-            path = ' for ' + self.path if self.path else ''
-            log('bup: warning - normalizing bad mtime%s\n' % (path))
-            self.mtime = _normalize_ts(self.mtime)
-        if self.ctime[1] >= 10**9:
-            path = ' for ' + self.path if self.path else ''
-            log('bup: warning - normalizing bad ctime%s\n' % (path))
-            self.ctime = _normalize_ts(self.ctime)
+        self.atime = FSTime.from_timespec((self.atime, atime_ns))
+        self.mtime = FSTime.from_timespec((self.mtime, mtime_ns))
+        self.ctime = FSTime.from_timespec((self.ctime, ctime_ns))
 
     def _create_via_common_rec(self, path, create_symlinks=True):
         # If the path already exists and is a dir, try rmdir.
@@ -242,7 +243,7 @@ class Metadata:
         st = None
         try:
             st = lstat(path)
-        except IOError, e:
+        except OSError, e:
             if e.errno != errno.ENOENT:
                 raise
         if st:
@@ -258,7 +259,8 @@ class Metadata:
                 os.unlink(path)
 
         if stat.S_ISREG(self.mode):
-            os.mknod(path, 0600 | stat.S_IFREG)
+            fd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL, 0600)
+            os.close(fd)
         elif stat.S_ISDIR(self.mode):
             os.mkdir(path, 0700)
         elif stat.S_ISCHR(self.mode):
@@ -268,56 +270,80 @@ class Metadata:
         elif stat.S_ISFIFO(self.mode):
             os.mknod(path, 0600 | stat.S_IFIFO)
         elif stat.S_ISLNK(self.mode):
-            if(self.symlink_target and create_symlinks):
-                os.symlink(self.symlink_target, path)
+            if self.symlink_target and create_symlinks:
+                # on MacOS, symlink() permissions depend on umask, and there's no
+                # way to chown a symlink after creating it, so we have to
+                # be careful here!
+                oldumask = os.umask((self.mode & 0777) ^ 0777)
+                try:
+                    os.symlink(self.symlink_target, path)
+                finally:
+                    os.umask(oldumask)
         # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
         # Otherwise, do nothing.
 
     def _apply_common_rec(self, path, restore_numeric_ids=False):
         # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
+        # EACCES errors at this stage are fatal for the current path.
         if stat.S_ISLNK(self.mode):
-            lutime(path, (self.atime, self.mtime))
+            try:
+                lutime(path, (self.atime, self.mtime))
+            except OSError, e:
+                if e.errno == errno.EACCES:
+                    raise ApplyError('lutime: %s' % e)
+                else:
+                    raise
         else:
-            utime(path, (self.atime, self.mtime))
-        if stat.S_ISREG(self.mode) \
-                | stat.S_ISDIR(self.mode) \
-                | stat.S_ISCHR(self.mode) \
-                | stat.S_ISBLK(self.mode) \
-                | stat.S_ISLNK(self.mode) \
-                | stat.S_ISFIFO(self.mode):
-            # Be safe.
-            if _have_lchmod:
-                os.lchmod(path, 0)
-            elif not stat.S_ISLNK(self.mode):
-                os.chmod(path, 0)
-
-            # Don't try to restore owner unless we're root, and even
-            # if asked, don't try to restore the owner or group if
-            # it doesn't exist in the system db.
-            uid = self.uid
-            gid = self.gid
-            if not restore_numeric_ids:
-                if os.geteuid() == 0:
+            try:
+                utime(path, (self.atime, self.mtime))
+            except OSError, e:
+                if e.errno == errno.EACCES:
+                    raise ApplyError('utime: %s' % e)
+                else:
+                    raise
+
+        # Don't try to restore owner unless we're root, and even
+        # if asked, don't try to restore the owner or group if
+        # it doesn't exist in the system db.
+        uid = self.uid
+        gid = self.gid
+        if not restore_numeric_ids:
+            if not self.owner:
+                uid = -1
+                add_error('ignoring missing owner for "%s"\n' % path)
+            else:
+                if os.geteuid() != 0:
+                    uid = -1 # Not root; assume we can't change owner.
+                else:
                     try:
                         uid = pwd.getpwnam(self.owner)[2]
                     except KeyError:
                         uid = -1
-                        log('bup: ignoring unknown owner %s for "%s"\n'
-                            % (self.owner, path))
-                else:
-                    uid = -1 # Not root; assume we can't change owner.
+                        fmt = 'ignoring unknown owner %s for "%s"\n'
+                        add_error(fmt % (self.owner, path))
+            if not self.group:
+                gid = -1
+                add_error('ignoring missing group for "%s"\n' % path)
+            else:
                 try:
                     gid = grp.getgrnam(self.group)[2]
                 except KeyError:
                     gid = -1
-                    log('bup: ignoring unknown group %s for "%s"\n'
-                        % (self.group, path))
+                    add_error('ignoring unknown group %s for "%s"\n'
+                              % (self.group, path))
+
+        try:
             os.lchown(path, uid, gid)
+        except OSError, e:
+            if e.errno == errno.EPERM:
+                add_error('lchown: %s' %  e)
+            else:
+                raise
 
-            if _have_lchmod:
-                os.lchmod(path, stat.S_IMODE(self.mode))
-            elif not stat.S_ISLNK(self.mode):
-                os.chmod(path, stat.S_IMODE(self.mode))
+        if _have_lchmod:
+            os.lchmod(path, stat.S_IMODE(self.mode))
+        elif not stat.S_ISLNK(self.mode):
+            os.chmod(path, stat.S_IMODE(self.mode))
 
 
     ## Path records
@@ -335,8 +361,11 @@ class Metadata:
     ## Symlink targets
 
     def _add_symlink_target(self, path, st):
-        if(stat.S_ISLNK(st.st_mode)):
-            self.symlink_target = os.readlink(path)
+        try:
+            if stat.S_ISLNK(st.st_mode):
+                self.symlink_target = os.readlink(path)
+        except OSError, e:
+            add_error('readlink: %s', e)
 
     def _encode_symlink_target(self):
         return self.symlink_target
@@ -354,6 +383,7 @@ class Metadata:
     # The numeric/text distinction only matters when reading/restoring
     # a stored record.
     def _add_posix1e_acl(self, path, st):
+        if not posix1e: return
         if not stat.S_ISLNK(st.st_mode):
             try:
                 if posix1e.has_extended(path):
@@ -374,7 +404,7 @@ class Metadata:
             num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
             acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
                         acls[1].to_any_text('', '\n', num_flags)]
-            if(len(acls) < 3):
+            if len(acls) < 3:
                 acl_reps += ['', '']
             else:
                 acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
@@ -387,14 +417,19 @@ class Metadata:
     def _load_posix1e_acl_rec(self, port):
         data = vint.read_bvec(port)
         acl_reps = vint.unpack('ssss', data)
-        if(acl_reps[2] == ''):
+        if acl_reps[2] == '':
             acl_reps = acl_reps[:2]
-        self.posix1e_acl = [posix1e.ACL(x) for x in acl_reps]
+        self.posix1e_acl = [posix1e.ACL(text=x) for x in acl_reps]
 
     def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
-        if(self.posix1e_acl):
+        if not posix1e:
+            if self.posix1e_acl:
+                add_error("%s: can't restore ACLs; posix1e support missing.\n"
+                          % path)
+            return
+        if self.posix1e_acl:
             acls = self.posix1e_acl
-            if(len(acls) > 2):
+            if len(acls) > 2:
                 if restore_numeric_ids:
                     acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
                 else:
@@ -408,10 +443,19 @@ class Metadata:
     ## Linux attributes (lsattr(1), chattr(1))
 
     def _add_linux_attr(self, path, st):
+        if not get_linux_file_attr: return
         if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
-            attr = get_linux_file_attr(path)
-            if(attr != 0):
-                self.linux_attr = get_linux_file_attr(path)
+            try:
+                attr = get_linux_file_attr(path)
+                if attr != 0:
+                    self.linux_attr = attr
+            except OSError, e:
+                if e.errno == errno.EACCES:
+                    add_error('read Linux attr: %s' % e)
+                elif e.errno == errno.ENOTTY: # Inappropriate ioctl for device.
+                    add_error('read Linux attr: %s' % e)
+                else:
+                    raise
 
     def _encode_linux_attr(self):
         if self.linux_attr:
@@ -424,13 +468,18 @@ class Metadata:
         self.linux_attr = vint.unpack('V', data)[0]
 
     def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
-        if(self.linux_attr):
+        if self.linux_attr:
+            if not set_linux_file_attr:
+                add_error("%s: can't restore linuxattrs: "
+                          "linuxattr support missing.\n" % path)
+                return
             set_linux_file_attr(path, self.linux_attr)
 
 
     ## Linux extended attributes (getfattr(1), setfattr(1))
 
     def _add_linux_xattr(self, path, st):
+        if not xattr: return
         try:
             self.linux_xattr = xattr.get_all(path, nofollow=True)
         except EnvironmentError, e:
@@ -439,8 +488,8 @@ class Metadata:
 
     def _encode_linux_xattr(self):
         if self.linux_xattr:
-            result = vint.pack('V', len(items))
-            for name, value in self.attrs:
+            result = vint.pack('V', len(self.linux_xattr))
+            for name, value in self.linux_xattr:
                 result += vint.pack('ss', name, value)
             return result
         else:
@@ -457,9 +506,32 @@ class Metadata:
         self.linux_xattr = result
 
     def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
-        if(self.linux_xattr):
+        if not xattr:
+            if self.linux_xattr:
+                add_error("%s: can't restore xattr; xattr support missing.\n"
+                          % path)
+            return
+        existing_xattrs = set(xattr.list(path, nofollow=True))
+        if self.linux_xattr:
             for k, v in self.linux_xattr:
-                xattr.set(path, k, v, nofollow=True)
+                if k not in existing_xattrs \
+                        or v != xattr.get(path, k, nofollow=True):
+                    try:
+                        xattr.set(path, k, v, nofollow=True)
+                    except IOError, e:
+                        if e.errno == errno.EPERM:
+                            raise ApplyError('xattr.set: %s' % e)
+                        else:
+                            raise
+                existing_xattrs -= frozenset([k])
+            for k in existing_xattrs:
+                try:
+                    xattr.remove(path, k, nofollow=True)
+                except IOError, e:
+                    if e.errno == errno.EPERM:
+                        raise ApplyError('xattr.remove: %s' % e)
+                    else:
+                        raise
 
     def __init__(self):
         # optional members
@@ -471,7 +543,7 @@ class Metadata:
         self.posix1e_acl_default = None
 
     def write(self, port, include_path=True):
-        records = [(_rec_tag_path, self._encode_path())] if include_path else []
+        records = include_path and [(_rec_tag_path, self._encode_path())] or []
         records.extend([(_rec_tag_common, self._encode_common()),
                         (_rec_tag_symlink_target, self._encode_symlink_target()),
                         (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
@@ -491,7 +563,7 @@ class Metadata:
         tag = vint.read_vuint(port)
         try: # From here on, EOF is an error.
             result = Metadata()
-            while(True): # only exit is error (exception) or _rec_tag_end
+            while True: # only exit is error (exception) or _rec_tag_end
                 if tag == _rec_tag_path:
                     result._load_path_rec(port)
                 elif tag == _rec_tag_common:
@@ -499,7 +571,7 @@ class Metadata:
                 elif tag == _rec_tag_symlink_target:
                     result._load_symlink_target_rec(port)
                 elif tag == _rec_tag_posix1e_acl:
-                    result._load_posix1e_acl(port)
+                    result._load_posix1e_acl_rec(port)
                 elif tag ==_rec_tag_nfsv4_acl:
                     result._load_nfsv4_acl_rec(port)
                 elif tag == _rec_tag_linux_attr:
@@ -527,77 +599,65 @@ class Metadata:
         if not path:
             raise Exception('Metadata.apply_to_path() called with no path');
         num_ids = restore_numeric_ids
-        try: # Later we may want to push this down and make it finer grained.
+        try:
             self._apply_common_rec(path, restore_numeric_ids=num_ids)
             self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
             self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
             self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
-        except Exception, e:
-            raise MetadataApplicationError(str(e))
+        except ApplyError, e:
+            add_error(e)
 
 
-def from_path(path, archive_path=None, save_symlinks=True):
+def from_path(path, statinfo=None, archive_path=None, save_symlinks=True):
     result = Metadata()
     result.path = archive_path
-    st = lstat(path)
-    try: # Later we may want to push this down and make it finer grained.
-        result._add_common(path, st)
-        if(save_symlinks):
-            result._add_symlink_target(path, st)
-        result._add_posix1e_acl(path, st)
-        result._add_linux_attr(path, st)
-        result._add_linux_xattr(path, st)
-    except Exception, e:
-        raise MetadataAcquisitionError(str(e))
+    st = statinfo or lstat(path)
+    result._add_common(path, st)
+    if save_symlinks:
+        result._add_symlink_target(path, st)
+    result._add_posix1e_acl(path, st)
+    result._add_linux_attr(path, st)
+    result._add_linux_xattr(path, st)
     return result
 
 
 def save_tree(output_file, paths,
               recurse=False,
               write_paths=True,
-              save_symlinks=True):
-    for p in paths:
-        safe_path = _clean_up_path_for_archive(p)
-        if(safe_path != p):
-            log('bup: archiving "%s" as "%s"\n' % (p, safe_path))
-
-        # Handle path itself.
-        try:
-            m = from_path(p, archive_path=safe_path,
+              save_symlinks=True,
+              xdev=False):
+
+    # Issue top-level rewrite warnings.
+    for path in paths:
+        safe_path = _clean_up_path_for_archive(path)
+        if safe_path != path:
+            log('archiving "%s" as "%s"\n' % (path, safe_path))
+
+    start_dir = os.getcwd()
+    try:
+        for (p, st) in recursive_dirlist(paths, xdev=xdev):
+            dirlist_dir = os.getcwd()
+            os.chdir(start_dir)
+            safe_path = _clean_up_path_for_archive(p)
+            m = from_path(p, statinfo=st, archive_path=safe_path,
                           save_symlinks=save_symlinks)
-        except MetadataAcquisitionError, e:
-            add_error(e)
-
-        if verbose:
-            print >> sys.stderr, m.path
-        m.write(output_file, include_path=write_paths)
-
-        if recurse and os.path.isdir(p):
-            for root, dirs, files in os.walk(p, onerror=add_error):
-                items = files + dirs
-                for sub_path in items:
-                    full_path = os.path.join(root, sub_path)
-                    safe_path = _clean_up_path_for_archive(full_path)
-                    try:
-                        m = from_path(full_path,
-                                      archive_path=safe_path,
-                                      save_symlinks=save_symlinks)
-                    except MetadataAcquisitionError, e:
-                        add_error(e)
-                    if verbose:
-                        print >> sys.stderr, m.path
-                    m.write(output_file, include_path=write_paths)
+            if verbose:
+                print >> sys.stderr, m.path
+            m.write(output_file, include_path=write_paths)
+            os.chdir(dirlist_dir)
+    finally:
+        os.chdir(start_dir)
 
 
 def _set_up_path(meta, create_symlinks=True):
     # Allow directories to exist as a special case -- might have
     # been created by an earlier longer path.
     if meta.isdir():
-        mkdirp(meta.path, 0700)
+        mkdirp(meta.path)
     else:
         parent = os.path.dirname(meta.path)
         if parent:
-            mkdirp(parent, 0700)
+            mkdirp(parent)
             meta.create_path(meta.path, create_symlinks=create_symlinks)
 
 
@@ -647,23 +707,15 @@ def finish_extract(file, restore_numeric_ids=False):
             else:
                 if verbose:
                     print >> sys.stderr, meta.path
-                try:
-                    meta.apply_to_path(path=xpath,
-                                       restore_numeric_ids=restore_numeric_ids)
-                except MetadataApplicationError, e:
-                    add_error(e)
-
+                meta.apply_to_path(path=xpath,
+                                   restore_numeric_ids=restore_numeric_ids)
     all_dirs.sort(key = lambda x : len(x.path), reverse=True)
     for dir in all_dirs:
         # Don't need to check xpath -- won't be in all_dirs if not OK.
         xpath = _clean_up_extract_path(dir.path)
         if verbose:
             print >> sys.stderr, dir.path
-        try:
-            dir.apply_to_path(path=xpath,
-                              restore_numeric_ids=restore_numeric_ids)
-        except MetadataApplicationError, e:
-            add_error(e)
+        dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids)
 
 
 def extract(file, restore_numeric_ids=False, create_symlinks=True):
@@ -684,19 +736,13 @@ def extract(file, restore_numeric_ids=False, create_symlinks=True):
             else:
                 if verbose:
                     print >> sys.stderr, '=', meta.path
-                try:
-                    meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
-                except MetadataApplicationError, e:
-                    add_error(e)
+                meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
     all_dirs.sort(key = lambda x : len(x.path), reverse=True)
     for dir in all_dirs:
         # Don't need to check xpath -- won't be in all_dirs if not OK.
-        xpath = _clean_up_extract_path(meta.path)
+        xpath = _clean_up_extract_path(dir.path)
         if verbose:
-            print >> sys.stderr, '=', meta.path
+            print >> sys.stderr, '=', xpath
         # Shouldn't have to check for risky paths here (omitted above).
-        try:
-            dir.apply_to_path(path=dir.path,
-                              restore_numeric_ids=restore_numeric_ids)
-        except MetadataApplicationError, e:
-            add_error(e)
+        dir.apply_to_path(path=dir.path,
+                          restore_numeric_ids=restore_numeric_ids)