X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Fmetadata.py;h=fe1d5f378e5750da4d52c707241c0d7787f4cc19;hb=2e45bac53d5f57404458d93ac5649348826bdd1a;hp=9c6d46a987516f3522df5896bb64b156959215c1;hpb=6546e8d046570c9d3d13c793d927b4f27fa99eb9;p=bup.git diff --git a/lib/bup/metadata.py b/lib/bup/metadata.py index 9c6d46a..fe1d5f3 100644 --- a/lib/bup/metadata.py +++ b/lib/bup/metadata.py @@ -4,26 +4,34 @@ # # This code is covered under the terms of the GNU Library General # Public License as described in the bup LICENSE file. + +from __future__ import absolute_import, print_function +from copy import deepcopy +from errno import EACCES, EINVAL, ENOTTY, ENOSYS, EOPNOTSUPP +from io import BytesIO +from time import gmtime, strftime import errno, os, sys, stat, time, pwd, grp, socket, struct -from cStringIO import StringIO + from bup import vint, xstat from bup.drecurse import recursive_dirlist from bup.helpers import add_error, mkdirp, log, is_superuser, format_filesize -from bup.helpers import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name +from bup.pwdgrp import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name from bup.xstat import utime, lutime xattr = None if sys.platform.startswith('linux'): + # prefer python-pyxattr (it's a lot faster), but fall back to python-xattr + # as the two are incompatible and only one can be installed on a system try: import xattr except ImportError: log('Warning: Linux xattr support missing; install python-pyxattr.\n') - if xattr: + if xattr and getattr(xattr, 'get_all', None) is None: try: - xattr.get_all - except AttributeError: + from xattr import pyxattr_compat as xattr + except ImportError: log('Warning: python-xattr module is too old; ' - 'install python-pyxattr instead.\n') + 'upgrade or install python-pyxattr instead.\n') xattr = None posix1e = None @@ -177,7 +185,7 @@ def _clean_up_extract_path(p): # must be unique, and must *never* be changed. _rec_tag_end = 0 _rec_tag_path = 1 -_rec_tag_common = 2 # times, user, group, type, perms, etc. (legacy/broken) +_rec_tag_common_v1 = 2 # times, user, group, type, perms, etc. (legacy/broken) _rec_tag_symlink_target = 3 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc. _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e? (unimplemented) @@ -185,6 +193,9 @@ _rec_tag_linux_attr = 6 # lsattr(1) chattr(1) _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1) _rec_tag_hardlink_target = 8 # hard link target path _rec_tag_common_v2 = 9 # times, user, group, type, perms, etc. (current) +_rec_tag_common_v3 = 10 # adds optional size to v2 + +_warned_about_attr_einval = None class ApplyError(Exception): @@ -214,6 +225,7 @@ class Metadata: def _add_common(self, path, st): assert(st.st_uid >= 0) assert(st.st_gid >= 0) + self.size = st.st_size self.uid = st.st_uid self.gid = st.st_gid self.atime = st.st_atime @@ -244,7 +256,8 @@ class Metadata: and self.mtime == other.mtime \ and self.ctime == other.ctime \ and self.user == other.user \ - and self.group == other.group + and self.group == other.group \ + and self.size == other.size def _encode_common(self): if not self.mode: @@ -252,7 +265,7 @@ class Metadata: atime = xstat.nsecs_to_timespec(self.atime) mtime = xstat.nsecs_to_timespec(self.mtime) ctime = xstat.nsecs_to_timespec(self.ctime) - result = vint.pack('vvsvsvvVvVvV', + result = vint.pack('vvsvsvvVvVvVv', self.mode, self.uid, self.user, @@ -264,26 +277,36 @@ class Metadata: mtime[0], mtime[1], ctime[0], - ctime[1]) + ctime[1], + self.size if self.size is not None else -1) return result - def _load_common_rec(self, port, legacy_format=False): - unpack_fmt = 'vvsvsvvVvVvV' - if legacy_format: + def _load_common_rec(self, port, version=3): + if version == 3: + # Added trailing size to v2, negative when None. + unpack_fmt = 'vvsvsvvVvVvVv' + elif version == 2: + unpack_fmt = 'vvsvsvvVvVvV' + elif version == 1: unpack_fmt = 'VVsVsVvVvVvV' + else: + raise Exception('unexpected common_rec version %d' % version) data = vint.read_bvec(port) - (self.mode, - self.uid, - self.user, - self.gid, - self.group, - self.rdev, - self.atime, - atime_ns, - self.mtime, - mtime_ns, - self.ctime, - ctime_ns) = vint.unpack(unpack_fmt, data) + values = vint.unpack(unpack_fmt, data) + if version == 3: + (self.mode, self.uid, self.user, self.gid, self.group, + self.rdev, + self.atime, atime_ns, + self.mtime, mtime_ns, + self.ctime, ctime_ns, size) = values + if size >= 0: + self.size = size + else: + (self.mode, self.uid, self.user, self.gid, self.group, + self.rdev, + self.atime, atime_ns, + self.mtime, mtime_ns, + self.ctime, ctime_ns) = values self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns)) self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns)) self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns)) @@ -306,14 +329,14 @@ class Metadata: st = None try: st = xstat.lstat(path) - except OSError, e: + except OSError as e: if e.errno != errno.ENOENT: raise if st: if stat.S_ISDIR(st.st_mode): try: os.rmdir(path) - except OSError, e: + except OSError as e: if e.errno in (errno.ENOTEMPTY, errno.EEXIST): msg = 'refusing to overwrite non-empty dir ' + path raise Exception(msg) @@ -323,24 +346,24 @@ class Metadata: if stat.S_ISREG(self.mode): assert(self._recognized_file_type()) - fd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL, 0600) + fd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL, 0o600) os.close(fd) elif stat.S_ISDIR(self.mode): assert(self._recognized_file_type()) - os.mkdir(path, 0700) + os.mkdir(path, 0o700) elif stat.S_ISCHR(self.mode): assert(self._recognized_file_type()) - os.mknod(path, 0600 | stat.S_IFCHR, self.rdev) + os.mknod(path, 0o600 | stat.S_IFCHR, self.rdev) elif stat.S_ISBLK(self.mode): assert(self._recognized_file_type()) - os.mknod(path, 0600 | stat.S_IFBLK, self.rdev) + os.mknod(path, 0o600 | stat.S_IFBLK, self.rdev) elif stat.S_ISFIFO(self.mode): assert(self._recognized_file_type()) - os.mknod(path, 0600 | stat.S_IFIFO) + os.mkfifo(path, 0o600 | stat.S_IFIFO) elif stat.S_ISSOCK(self.mode): try: - os.mknod(path, 0600 | stat.S_IFSOCK) - except OSError, e: + os.mknod(path, 0o600 | stat.S_IFSOCK) + except OSError as e: if e.errno in (errno.EINVAL, errno.EPERM): s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) s.bind(path) @@ -352,7 +375,7 @@ class Metadata: # on MacOS, symlink() permissions depend on umask, and there's # no way to chown a symlink after creating it, so we have to # be careful here! - oldumask = os.umask((self.mode & 0777) ^ 0777) + oldumask = os.umask((self.mode & 0o777) ^ 0o777) try: os.symlink(self.symlink_target, path) finally: @@ -372,7 +395,7 @@ class Metadata: if lutime and stat.S_ISLNK(self.mode): try: lutime(path, (self.atime, self.mtime)) - except OSError, e: + except OSError as e: if e.errno == errno.EACCES: raise ApplyError('lutime: %s' % e) else: @@ -380,7 +403,7 @@ class Metadata: else: try: utime(path, (self.atime, self.mtime)) - except OSError, e: + except OSError as e: if e.errno == errno.EACCES: raise ApplyError('utime: %s' % e) else: @@ -414,7 +437,7 @@ class Metadata: if uid != -1 or gid != -1: try: os.lchown(path, uid, gid) - except OSError, e: + except OSError as e: if e.errno == errno.EPERM: add_error('lchown: %s' % e) elif sys.platform.startswith('cygwin') \ @@ -451,14 +474,19 @@ class Metadata: try: if stat.S_ISLNK(st.st_mode): self.symlink_target = os.readlink(path) - except OSError, e: + except OSError as e: add_error('readlink: %s' % e) def _encode_symlink_target(self): return self.symlink_target def _load_symlink_target_rec(self, port): - self.symlink_target = vint.read_bvec(port) + target = vint.read_bvec(port) + self.symlink_target = target + if self.size is None: + self.size = len(target) + else: + assert(self.size == len(target)) ## Hardlink targets @@ -486,7 +514,8 @@ class Metadata: # The numeric/text distinction only matters when reading/restoring # a stored record. def _add_posix1e_acl(self, path, st): - if not posix1e: return + if not posix1e or not posix1e.HAS_EXTENDED_CHECK: + return if not stat.S_ISLNK(st.st_mode): acls = None def_acls = None @@ -497,7 +526,7 @@ class Metadata: if stat.S_ISDIR(st.st_mode): def_acl = posix1e.ACL(filedef=path) def_acls = [def_acl, def_acl] - except EnvironmentError, e: + except EnvironmentError as e: if e.errno not in (errno.EOPNOTSUPP, errno.ENOSYS): raise if acls: @@ -534,7 +563,7 @@ class Metadata: def apply_acl(acl_rep, kind): try: acl = posix1e.ACL(text = acl_rep) - except IOError, e: + except IOError as e: if e.errno == 0: # pylibacl appears to return an IOError with errno # set to 0 if a group referred to by the ACL rep @@ -545,7 +574,7 @@ class Metadata: raise try: acl.applyto(path, kind) - except IOError, e: + except IOError as e: if e.errno == errno.EPERM or e.errno == errno.EOPNOTSUPP: raise ApplyError('POSIX1e ACL applyto: %s' % e) else: @@ -579,12 +608,20 @@ class Metadata: attr = get_linux_file_attr(path) if attr != 0: self.linux_attr = attr - except OSError, e: + except OSError as e: if e.errno == errno.EACCES: add_error('read Linux attr: %s' % e) - elif e.errno in (errno.ENOTTY, errno.ENOSYS, errno.EOPNOTSUPP): + elif e.errno in (ENOTTY, ENOSYS, EOPNOTSUPP): # Assume filesystem doesn't support attrs. return + elif e.errno == EINVAL: + global _warned_about_attr_einval + if not _warned_about_attr_einval: + log("Ignoring attr EINVAL;" + + " if you're not using ntfs-3g, please report: " + + repr(path) + '\n') + _warned_about_attr_einval = True + return else: raise @@ -611,11 +648,14 @@ class Metadata: return try: set_linux_file_attr(path, self.linux_attr) - except OSError, e: - if e.errno in (errno.ENOTTY, errno.EOPNOTSUPP, errno.ENOSYS, - errno.EACCES): + except OSError as e: + if e.errno in (EACCES, ENOTTY, EOPNOTSUPP, ENOSYS): raise ApplyError('Linux chattr: %s (0x%s)' % (e, hex(self.linux_attr))) + elif e.errno == EINVAL: + msg = "if you're not using ntfs-3g, please report" + raise ApplyError('Linux chattr: %s (0x%s) (%s)' + % (e, hex(self.linux_attr), msg)) else: raise @@ -626,7 +666,7 @@ class Metadata: if not xattr: return try: self.linux_xattr = xattr.get_all(path, nofollow=True) - except EnvironmentError, e: + except EnvironmentError as e: if e.errno != errno.EOPNOTSUPP: raise @@ -645,7 +685,7 @@ class Metadata: def _load_linux_xattr_rec(self, file): data = vint.read_bvec(file) - memfile = StringIO(data) + memfile = BytesIO(data) result = [] for i in range(vint.read_vuint(memfile)): key = vint.read_bvec(memfile) @@ -663,7 +703,7 @@ class Metadata: return try: existing_xattrs = set(xattr.list(path, nofollow=True)) - except IOError, e: + except IOError as e: if e.errno == errno.EACCES: raise ApplyError('xattr.set %r: %s' % (path, e)) else: @@ -673,7 +713,7 @@ class Metadata: or v != xattr.get(path, k, nofollow=True): try: xattr.set(path, k, v, nofollow=True) - except IOError, e: + except IOError as e: if e.errno == errno.EPERM \ or e.errno == errno.EOPNOTSUPP: raise ApplyError('xattr.set %r: %s' % (path, e)) @@ -683,8 +723,8 @@ class Metadata: for k in existing_xattrs: try: xattr.remove(path, k, nofollow=True) - except IOError, e: - if e.errno == errno.EPERM: + except IOError as e: + if e.errno in (errno.EPERM, errno.EACCES): raise ApplyError('xattr.remove %r: %s' % (path, e)) else: raise @@ -701,36 +741,75 @@ class Metadata: self.linux_xattr = None self.posix1e_acl = None + def __eq__(self, other): + if not isinstance(other, Metadata): return False + if self.mode != other.mode: return False + if self.mtime != other.mtime: return False + if self.ctime != other.ctime: return False + if self.atime != other.atime: return False + if self.path != other.path: return False + if self.uid != other.uid: return False + if self.gid != other.gid: return False + if self.size != other.size: return False + if self.user != other.user: return False + if self.group != other.group: return False + if self.symlink_target != other.symlink_target: return False + if self.hardlink_target != other.hardlink_target: return False + if self.linux_attr != other.linux_attr: return False + if self.posix1e_acl != other.posix1e_acl: return False + return True + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self.mode, + self.mtime, + self.ctime, + self.atime, + self.path, + self.uid, + self.gid, + self.size, + self.user, + self.group, + self.symlink_target, + self.hardlink_target, + self.linux_attr, + self.posix1e_acl)) + def __repr__(self): result = ['<%s instance at %s' % (self.__class__, hex(id(self)))] - if self.path: + if self.path is not None: result += ' path:' + repr(self.path) - if self.mode: + if self.mode is not None: result += ' mode:' + repr(xstat.mode_str(self.mode) - + '(%s)' % hex(self.mode)) - if self.uid: + + '(%s)' % oct(self.mode)) + if self.uid is not None: result += ' uid:' + str(self.uid) - if self.gid: + if self.gid is not None: result += ' gid:' + str(self.gid) - if self.user: + if self.user is not None: result += ' user:' + repr(self.user) - if self.group: + if self.group is not None: result += ' group:' + repr(self.group) - if self.size: + if self.size is not None: result += ' size:' + repr(self.size) for name, val in (('atime', self.atime), ('mtime', self.mtime), ('ctime', self.ctime)): - result += ' %s:%r' \ - % (name, - time.strftime('%Y-%m-%d %H:%M %z', - time.gmtime(xstat.fstime_floor_secs(val)))) + if val is not None: + result += ' %s:%r (%d)' \ + % (name, + strftime('%Y-%m-%d %H:%M %z', + gmtime(xstat.fstime_floor_secs(val))), + val) result += '>' return ''.join(result) def write(self, port, include_path=True): records = include_path and [(_rec_tag_path, self._encode_path())] or [] - records.extend([(_rec_tag_common_v2, self._encode_common()), + records.extend([(_rec_tag_common_v3, self._encode_common()), (_rec_tag_symlink_target, self._encode_symlink_target()), (_rec_tag_hardlink_target, @@ -745,10 +824,13 @@ class Metadata: vint.write_vuint(port, _rec_tag_end) def encode(self, include_path=True): - port = StringIO() + port = BytesIO() self.write(port, include_path) return port.getvalue() + def copy(self): + return deepcopy(self) + @staticmethod def read(port): # This method should either return a valid Metadata object, @@ -764,8 +846,10 @@ class Metadata: while True: # only exit is error (exception) or _rec_tag_end if tag == _rec_tag_path: result._load_path_rec(port) + elif tag == _rec_tag_common_v3: + result._load_common_rec(port, version=3) elif tag == _rec_tag_common_v2: - result._load_common_rec(port) + result._load_common_rec(port, version=2) elif tag == _rec_tag_symlink_target: result._load_symlink_target_rec(port) elif tag == _rec_tag_hardlink_target: @@ -778,8 +862,8 @@ class Metadata: result._load_linux_xattr_rec(port) elif tag == _rec_tag_end: return result - elif tag == _rec_tag_common: # Should be very rare. - result._load_common_rec(port, legacy_format = True) + elif tag == _rec_tag_common_v1: # Should be very rare. + result._load_common_rec(port, version=1) else: # unknown record vint.skip_bvec(port) tag = vint.read_vuint(port) @@ -809,7 +893,7 @@ class Metadata: self._apply_linux_xattr_rec): try: apply_metadata(path, restore_numeric_ids=num_ids) - except ApplyError, e: + except ApplyError as e: add_error(e) def same_file(self, other): @@ -825,11 +909,14 @@ class Metadata: def from_path(path, statinfo=None, archive_path=None, - save_symlinks=True, hardlink_target=None): + save_symlinks=True, hardlink_target=None, + normalized=False): + """Return the metadata associated with the path. When normalized is + true, return the metadata appropriate for a typical save, which + may or may not be all of it.""" result = Metadata() result.path = archive_path st = statinfo or xstat.lstat(path) - result.size = st.st_size result._add_common(path, st) if save_symlinks: result._add_symlink_target(path, st) @@ -837,6 +924,10 @@ def from_path(path, statinfo=None, archive_path=None, result._add_posix1e_acl(path, st) result._add_linux_attr(path, st) result._add_linux_xattr(path, st) + if normalized: + # Only store sizes for regular files and symlinks for now. + if not (stat.S_ISREG(result.mode) or stat.S_ISLNK(result.mode)): + result.size = None return result @@ -861,7 +952,7 @@ def save_tree(output_file, paths, m = from_path(p, statinfo=st, archive_path=safe_path, save_symlinks=save_symlinks) if verbose: - print >> sys.stderr, m.path + print(m.path, file=sys.stderr) m.write(output_file, include_path=write_paths) else: start_dir = os.getcwd() @@ -873,7 +964,7 @@ def save_tree(output_file, paths, m = from_path(p, statinfo=st, archive_path=safe_path, save_symlinks=save_symlinks) if verbose: - print >> sys.stderr, m.path + print(m.path, file=sys.stderr) m.write(output_file, include_path=write_paths) os.chdir(dirlist_dir) finally: @@ -921,7 +1012,7 @@ def summary_str(meta, numeric_ids = False, classification = None, mode_str = xstat.mode_str(meta.mode) symlink_target = meta.symlink_target mtime_secs = xstat.fstime_floor_secs(meta.mtime) - mtime_str = time.strftime('%Y-%m-%d %H:%M', time.localtime(mtime_secs)) + mtime_str = strftime('%Y-%m-%d %H:%M', time.localtime(mtime_secs)) if meta.user and not numeric_ids: user_str = meta.user elif meta.uid != None: @@ -983,7 +1074,7 @@ def detailed_str(meta, fields = None): os.minor(meta.rdev))) else: result.append('rdev: 0') - if 'size' in fields and meta.size: + if 'size' in fields and meta.size is not None: result.append('size: ' + str(meta.size)) if 'uid' in fields: result.append('uid: ' + str(meta.uid)) @@ -1043,20 +1134,19 @@ def display_archive(file): first_item = True for meta in _ArchiveIterator(file): if not first_item: - print - print detailed_str(meta) + print() + print(detailed_str(meta)) first_item = False elif verbose > 0: for meta in _ArchiveIterator(file): - print summary_str(meta) + print(summary_str(meta)) elif verbose == 0: for meta in _ArchiveIterator(file): if not meta.path: - print >> sys.stderr, \ - 'bup: no metadata path, but asked to only display path', \ - '(increase verbosity?)' + print('bup: no metadata path, but asked to only display path' + '(increase verbosity?)') sys.exit(1) - print meta.path + print(meta.path) def start_extract(file, create_symlinks=True): @@ -1064,7 +1154,7 @@ def start_extract(file, create_symlinks=True): if not meta: # Hit end record. break if verbose: - print >> sys.stderr, meta.path + print(meta.path, file=sys.stderr) xpath = _clean_up_extract_path(meta.path) if not xpath: add_error(Exception('skipping risky path "%s"' % meta.path)) @@ -1086,7 +1176,7 @@ def finish_extract(file, restore_numeric_ids=False): all_dirs.append(meta) else: if verbose: - print >> sys.stderr, meta.path + print(meta.path, file=sys.stderr) meta.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids) all_dirs.sort(key = lambda x : len(x.path), reverse=True) @@ -1094,7 +1184,7 @@ def finish_extract(file, restore_numeric_ids=False): # Don't need to check xpath -- won't be in all_dirs if not OK. xpath = _clean_up_extract_path(dir.path) if verbose: - print >> sys.stderr, dir.path + print(dir.path, file=sys.stderr) dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids) @@ -1111,20 +1201,20 @@ def extract(file, restore_numeric_ids=False, create_symlinks=True): else: meta.path = xpath if verbose: - print >> sys.stderr, '+', meta.path + print('+', meta.path, file=sys.stderr) _set_up_path(meta, create_symlinks=create_symlinks) if os.path.isdir(meta.path): all_dirs.append(meta) else: if verbose: - print >> sys.stderr, '=', meta.path + print('=', meta.path, file=sys.stderr) meta.apply_to_path(restore_numeric_ids=restore_numeric_ids) all_dirs.sort(key = lambda x : len(x.path), reverse=True) for dir in all_dirs: # Don't need to check xpath -- won't be in all_dirs if not OK. xpath = _clean_up_extract_path(dir.path) if verbose: - print >> sys.stderr, '=', xpath + print('=', xpath, file=sys.stderr) # Shouldn't have to check for risky paths here (omitted above). dir.apply_to_path(path=dir.path, restore_numeric_ids=restore_numeric_ids)