]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/metadata.py
metadata: use python-xattr pyxattr compat code
[bup.git] / lib / bup / metadata.py
index f77edbe3aef09f1d7d91fa1a9b89d0747b3a0e77..fe1d5f378e5750da4d52c707241c0d7787f4cc19 100644 (file)
@@ -5,27 +5,33 @@
 # This code is covered under the terms of the GNU Library General
 # Public License as described in the bup LICENSE file.
 
+from __future__ import absolute_import, print_function
+from copy import deepcopy
+from errno import EACCES, EINVAL, ENOTTY, ENOSYS, EOPNOTSUPP
 from io import BytesIO
+from time import gmtime, strftime
 import errno, os, sys, stat, time, pwd, grp, socket, struct
 
 from bup import vint, xstat
 from bup.drecurse import recursive_dirlist
 from bup.helpers import add_error, mkdirp, log, is_superuser, format_filesize
-from bup.helpers import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name
+from bup.pwdgrp import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name
 from bup.xstat import utime, lutime
 
 xattr = None
 if sys.platform.startswith('linux'):
+    # prefer python-pyxattr (it's a lot faster), but fall back to python-xattr
+    # as the two are incompatible and only one can be installed on a system
     try:
         import xattr
     except ImportError:
         log('Warning: Linux xattr support missing; install python-pyxattr.\n')
-    if xattr:
+    if xattr and getattr(xattr, 'get_all', None) is None:
         try:
-            xattr.get_all
-        except AttributeError:
+            from xattr import pyxattr_compat as xattr
+        except ImportError:
             log('Warning: python-xattr module is too old; '
-                'install python-pyxattr instead.\n')
+                'upgrade or install python-pyxattr instead.\n')
             xattr = None
 
 posix1e = None
@@ -179,7 +185,7 @@ def _clean_up_extract_path(p):
 # must be unique, and must *never* be changed.
 _rec_tag_end = 0
 _rec_tag_path = 1
-_rec_tag_common = 2 # times, user, group, type, perms, etc. (legacy/broken)
+_rec_tag_common_v1 = 2 # times, user, group, type, perms, etc. (legacy/broken)
 _rec_tag_symlink_target = 3
 _rec_tag_posix1e_acl = 4      # getfacl(1), setfacl(1), etc.
 _rec_tag_nfsv4_acl = 5        # intended to supplant posix1e? (unimplemented)
@@ -187,6 +193,9 @@ _rec_tag_linux_attr = 6       # lsattr(1) chattr(1)
 _rec_tag_linux_xattr = 7      # getfattr(1) setfattr(1)
 _rec_tag_hardlink_target = 8 # hard link target path
 _rec_tag_common_v2 = 9 # times, user, group, type, perms, etc. (current)
+_rec_tag_common_v3 = 10  # adds optional size to v2
+
+_warned_about_attr_einval = None
 
 
 class ApplyError(Exception):
@@ -216,6 +225,7 @@ class Metadata:
     def _add_common(self, path, st):
         assert(st.st_uid >= 0)
         assert(st.st_gid >= 0)
+        self.size = st.st_size
         self.uid = st.st_uid
         self.gid = st.st_gid
         self.atime = st.st_atime
@@ -246,7 +256,8 @@ class Metadata:
             and self.mtime == other.mtime \
             and self.ctime == other.ctime \
             and self.user == other.user \
-            and self.group == other.group
+            and self.group == other.group \
+            and self.size == other.size
 
     def _encode_common(self):
         if not self.mode:
@@ -254,7 +265,7 @@ class Metadata:
         atime = xstat.nsecs_to_timespec(self.atime)
         mtime = xstat.nsecs_to_timespec(self.mtime)
         ctime = xstat.nsecs_to_timespec(self.ctime)
-        result = vint.pack('vvsvsvvVvVvV',
+        result = vint.pack('vvsvsvvVvVvVv',
                            self.mode,
                            self.uid,
                            self.user,
@@ -266,26 +277,36 @@ class Metadata:
                            mtime[0],
                            mtime[1],
                            ctime[0],
-                           ctime[1])
+                           ctime[1],
+                           self.size if self.size is not None else -1)
         return result
 
-    def _load_common_rec(self, port, legacy_format=False):
-        unpack_fmt = 'vvsvsvvVvVvV'
-        if legacy_format:
+    def _load_common_rec(self, port, version=3):
+        if version == 3:
+            # Added trailing size to v2, negative when None.
+            unpack_fmt = 'vvsvsvvVvVvVv'
+        elif version == 2:
+            unpack_fmt = 'vvsvsvvVvVvV'
+        elif version == 1:
             unpack_fmt = 'VVsVsVvVvVvV'
+        else:
+            raise Exception('unexpected common_rec version %d' % version)
         data = vint.read_bvec(port)
-        (self.mode,
-         self.uid,
-         self.user,
-         self.gid,
-         self.group,
-         self.rdev,
-         self.atime,
-         atime_ns,
-         self.mtime,
-         mtime_ns,
-         self.ctime,
-         ctime_ns) = vint.unpack(unpack_fmt, data)
+        values = vint.unpack(unpack_fmt, data)
+        if version == 3:
+            (self.mode, self.uid, self.user, self.gid, self.group,
+             self.rdev,
+             self.atime, atime_ns,
+             self.mtime, mtime_ns,
+             self.ctime, ctime_ns, size) = values
+            if size >= 0:
+                self.size = size
+        else:
+            (self.mode, self.uid, self.user, self.gid, self.group,
+             self.rdev,
+             self.atime, atime_ns,
+             self.mtime, mtime_ns,
+             self.ctime, ctime_ns) = values
         self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns))
         self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns))
         self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns))
@@ -338,7 +359,7 @@ class Metadata:
             os.mknod(path, 0o600 | stat.S_IFBLK, self.rdev)
         elif stat.S_ISFIFO(self.mode):
             assert(self._recognized_file_type())
-            os.mknod(path, 0o600 | stat.S_IFIFO)
+            os.mkfifo(path, 0o600 | stat.S_IFIFO)
         elif stat.S_ISSOCK(self.mode):
             try:
                 os.mknod(path, 0o600 | stat.S_IFSOCK)
@@ -460,7 +481,12 @@ class Metadata:
         return self.symlink_target
 
     def _load_symlink_target_rec(self, port):
-        self.symlink_target = vint.read_bvec(port)
+        target = vint.read_bvec(port)
+        self.symlink_target = target
+        if self.size is None:
+            self.size = len(target)
+        else:
+            assert(self.size == len(target))
 
 
     ## Hardlink targets
@@ -585,9 +611,17 @@ class Metadata:
             except OSError as e:
                 if e.errno == errno.EACCES:
                     add_error('read Linux attr: %s' % e)
-                elif e.errno in (errno.ENOTTY, errno.ENOSYS, errno.EOPNOTSUPP):
+                elif e.errno in (ENOTTY, ENOSYS, EOPNOTSUPP):
                     # Assume filesystem doesn't support attrs.
                     return
+                elif e.errno == EINVAL:
+                    global _warned_about_attr_einval
+                    if not _warned_about_attr_einval:
+                        log("Ignoring attr EINVAL;"
+                            + " if you're not using ntfs-3g, please report: "
+                            + repr(path) + '\n')
+                        _warned_about_attr_einval = True
+                    return
                 else:
                     raise
 
@@ -615,10 +649,13 @@ class Metadata:
             try:
                 set_linux_file_attr(path, self.linux_attr)
             except OSError as e:
-                if e.errno in (errno.ENOTTY, errno.EOPNOTSUPP, errno.ENOSYS,
-                               errno.EACCES):
+                if e.errno in (EACCES, ENOTTY, EOPNOTSUPP, ENOSYS):
                     raise ApplyError('Linux chattr: %s (0x%s)'
                                      % (e, hex(self.linux_attr)))
+                elif e.errno == EINVAL:
+                    msg = "if you're not using ntfs-3g, please report"
+                    raise ApplyError('Linux chattr: %s (0x%s) (%s)'
+                                     % (e, hex(self.linux_attr), msg))
                 else:
                     raise
 
@@ -704,36 +741,75 @@ class Metadata:
         self.linux_xattr = None
         self.posix1e_acl = None
 
+    def __eq__(self, other):
+        if not isinstance(other, Metadata): return False
+        if self.mode != other.mode: return False
+        if self.mtime != other.mtime: return False
+        if self.ctime != other.ctime: return False
+        if self.atime != other.atime: return False
+        if self.path != other.path: return False
+        if self.uid != other.uid: return False
+        if self.gid != other.gid: return False
+        if self.size != other.size: return False
+        if self.user != other.user: return False
+        if self.group != other.group: return False
+        if self.symlink_target != other.symlink_target: return False
+        if self.hardlink_target != other.hardlink_target: return False
+        if self.linux_attr != other.linux_attr: return False
+        if self.posix1e_acl != other.posix1e_acl: return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return hash((self.mode,
+                     self.mtime,
+                     self.ctime,
+                     self.atime,
+                     self.path,
+                     self.uid,
+                     self.gid,
+                     self.size,
+                     self.user,
+                     self.group,
+                     self.symlink_target,
+                     self.hardlink_target,
+                     self.linux_attr,
+                     self.posix1e_acl))
+
     def __repr__(self):
         result = ['<%s instance at %s' % (self.__class__, hex(id(self)))]
-        if self.path:
+        if self.path is not None:
             result += ' path:' + repr(self.path)
-        if self.mode:
+        if self.mode is not None:
             result += ' mode:' + repr(xstat.mode_str(self.mode)
-                                      + '(%s)' % hex(self.mode))
-        if self.uid:
+                                      + '(%s)' % oct(self.mode))
+        if self.uid is not None:
             result += ' uid:' + str(self.uid)
-        if self.gid:
+        if self.gid is not None:
             result += ' gid:' + str(self.gid)
-        if self.user:
+        if self.user is not None:
             result += ' user:' + repr(self.user)
-        if self.group:
+        if self.group is not None:
             result += ' group:' + repr(self.group)
-        if self.size:
+        if self.size is not None:
             result += ' size:' + repr(self.size)
         for name, val in (('atime', self.atime),
                           ('mtime', self.mtime),
                           ('ctime', self.ctime)):
-            result += ' %s:%r' \
-                % (name,
-                   time.strftime('%Y-%m-%d %H:%M %z',
-                                 time.gmtime(xstat.fstime_floor_secs(val))))
+            if val is not None:
+                result += ' %s:%r (%d)' \
+                          % (name,
+                             strftime('%Y-%m-%d %H:%M %z',
+                                      gmtime(xstat.fstime_floor_secs(val))),
+                             val)
         result += '>'
         return ''.join(result)
 
     def write(self, port, include_path=True):
         records = include_path and [(_rec_tag_path, self._encode_path())] or []
-        records.extend([(_rec_tag_common_v2, self._encode_common()),
+        records.extend([(_rec_tag_common_v3, self._encode_common()),
                         (_rec_tag_symlink_target,
                          self._encode_symlink_target()),
                         (_rec_tag_hardlink_target,
@@ -752,6 +828,9 @@ class Metadata:
         self.write(port, include_path)
         return port.getvalue()
 
+    def copy(self):
+        return deepcopy(self)
+
     @staticmethod
     def read(port):
         # This method should either return a valid Metadata object,
@@ -767,8 +846,10 @@ class Metadata:
             while True: # only exit is error (exception) or _rec_tag_end
                 if tag == _rec_tag_path:
                     result._load_path_rec(port)
+                elif tag == _rec_tag_common_v3:
+                    result._load_common_rec(port, version=3)
                 elif tag == _rec_tag_common_v2:
-                    result._load_common_rec(port)
+                    result._load_common_rec(port, version=2)
                 elif tag == _rec_tag_symlink_target:
                     result._load_symlink_target_rec(port)
                 elif tag == _rec_tag_hardlink_target:
@@ -781,8 +862,8 @@ class Metadata:
                     result._load_linux_xattr_rec(port)
                 elif tag == _rec_tag_end:
                     return result
-                elif tag == _rec_tag_common: # Should be very rare.
-                    result._load_common_rec(port, legacy_format = True)
+                elif tag == _rec_tag_common_v1: # Should be very rare.
+                    result._load_common_rec(port, version=1)
                 else: # unknown record
                     vint.skip_bvec(port)
                 tag = vint.read_vuint(port)
@@ -828,11 +909,14 @@ class Metadata:
 
 
 def from_path(path, statinfo=None, archive_path=None,
-              save_symlinks=True, hardlink_target=None):
+              save_symlinks=True, hardlink_target=None,
+              normalized=False):
+    """Return the metadata associated with the path.  When normalized is
+    true, return the metadata appropriate for a typical save, which
+    may or may not be all of it."""
     result = Metadata()
     result.path = archive_path
     st = statinfo or xstat.lstat(path)
-    result.size = st.st_size
     result._add_common(path, st)
     if save_symlinks:
         result._add_symlink_target(path, st)
@@ -840,6 +924,10 @@ def from_path(path, statinfo=None, archive_path=None,
     result._add_posix1e_acl(path, st)
     result._add_linux_attr(path, st)
     result._add_linux_xattr(path, st)
+    if normalized:
+        # Only store sizes for regular files and symlinks for now.
+        if not (stat.S_ISREG(result.mode) or stat.S_ISLNK(result.mode)):
+            result.size = None
     return result
 
 
@@ -864,7 +952,7 @@ def save_tree(output_file, paths,
             m = from_path(p, statinfo=st, archive_path=safe_path,
                           save_symlinks=save_symlinks)
             if verbose:
-                print >> sys.stderr, m.path
+                print(m.path, file=sys.stderr)
             m.write(output_file, include_path=write_paths)
     else:
         start_dir = os.getcwd()
@@ -876,7 +964,7 @@ def save_tree(output_file, paths,
                 m = from_path(p, statinfo=st, archive_path=safe_path,
                               save_symlinks=save_symlinks)
                 if verbose:
-                    print >> sys.stderr, m.path
+                    print(m.path, file=sys.stderr)
                 m.write(output_file, include_path=write_paths)
                 os.chdir(dirlist_dir)
         finally:
@@ -924,7 +1012,7 @@ def summary_str(meta, numeric_ids = False, classification = None,
         mode_str = xstat.mode_str(meta.mode)
         symlink_target = meta.symlink_target
         mtime_secs = xstat.fstime_floor_secs(meta.mtime)
-        mtime_str = time.strftime('%Y-%m-%d %H:%M', time.localtime(mtime_secs))
+        mtime_str = strftime('%Y-%m-%d %H:%M', time.localtime(mtime_secs))
         if meta.user and not numeric_ids:
             user_str = meta.user
         elif meta.uid != None:
@@ -986,7 +1074,7 @@ def detailed_str(meta, fields = None):
                                            os.minor(meta.rdev)))
         else:
             result.append('rdev: 0')
-    if 'size' in fields and meta.size:
+    if 'size' in fields and meta.size is not None:
         result.append('size: ' + str(meta.size))
     if 'uid' in fields:
         result.append('uid: ' + str(meta.uid))
@@ -1046,20 +1134,19 @@ def display_archive(file):
         first_item = True
         for meta in _ArchiveIterator(file):
             if not first_item:
-                print
-            print detailed_str(meta)
+                print()
+            print(detailed_str(meta))
             first_item = False
     elif verbose > 0:
         for meta in _ArchiveIterator(file):
-            print summary_str(meta)
+            print(summary_str(meta))
     elif verbose == 0:
         for meta in _ArchiveIterator(file):
             if not meta.path:
-                print >> sys.stderr, \
-                    'bup: no metadata path, but asked to only display path', \
-                    '(increase verbosity?)'
+                print('bup: no metadata path, but asked to only display path'
+                     '(increase verbosity?)')
                 sys.exit(1)
-            print meta.path
+            print(meta.path)
 
 
 def start_extract(file, create_symlinks=True):
@@ -1067,7 +1154,7 @@ def start_extract(file, create_symlinks=True):
         if not meta: # Hit end record.
             break
         if verbose:
-            print >> sys.stderr, meta.path
+            print(meta.path, file=sys.stderr)
         xpath = _clean_up_extract_path(meta.path)
         if not xpath:
             add_error(Exception('skipping risky path "%s"' % meta.path))
@@ -1089,7 +1176,7 @@ def finish_extract(file, restore_numeric_ids=False):
                 all_dirs.append(meta)
             else:
                 if verbose:
-                    print >> sys.stderr, meta.path
+                    print(meta.path, file=sys.stderr)
                 meta.apply_to_path(path=xpath,
                                    restore_numeric_ids=restore_numeric_ids)
     all_dirs.sort(key = lambda x : len(x.path), reverse=True)
@@ -1097,7 +1184,7 @@ def finish_extract(file, restore_numeric_ids=False):
         # Don't need to check xpath -- won't be in all_dirs if not OK.
         xpath = _clean_up_extract_path(dir.path)
         if verbose:
-            print >> sys.stderr, dir.path
+            print(dir.path, file=sys.stderr)
         dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids)
 
 
@@ -1114,20 +1201,20 @@ def extract(file, restore_numeric_ids=False, create_symlinks=True):
         else:
             meta.path = xpath
             if verbose:
-                print >> sys.stderr, '+', meta.path
+                print('+', meta.path, file=sys.stderr)
             _set_up_path(meta, create_symlinks=create_symlinks)
             if os.path.isdir(meta.path):
                 all_dirs.append(meta)
             else:
                 if verbose:
-                    print >> sys.stderr, '=', meta.path
+                    print('=', meta.path, file=sys.stderr)
                 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
     all_dirs.sort(key = lambda x : len(x.path), reverse=True)
     for dir in all_dirs:
         # Don't need to check xpath -- won't be in all_dirs if not OK.
         xpath = _clean_up_extract_path(dir.path)
         if verbose:
-            print >> sys.stderr, '=', xpath
+            print('=', xpath, file=sys.stderr)
         # Shouldn't have to check for risky paths here (omitted above).
         dir.apply_to_path(path=dir.path,
                           restore_numeric_ids=restore_numeric_ids)