1 """Metadata read/write support for bup."""
3 # Copyright (C) 2010 Rob Browning
5 # This code is covered under the terms of the GNU Library General
6 # Public License as described in the bup LICENSE file.
8 from __future__ import absolute_import, print_function
9 from binascii import hexlify
10 from copy import deepcopy
11 from errno import EACCES, EINVAL, ENOTTY, ENOSYS, EOPNOTSUPP
12 from io import BytesIO
13 from time import gmtime, strftime
14 import errno, os, sys, stat, time, pwd, grp, socket, struct
16 from bup import compat, vint, xstat
17 from bup.compat import py_maj
18 from bup.drecurse import recursive_dirlist
19 from bup.helpers import add_error, mkdirp, log, is_superuser, format_filesize
20 from bup.io import path_msg
21 from bup.pwdgrp import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name
22 from bup.xstat import utime, lutime
25 if sys.platform.startswith('linux'):
26 # prefer python-pyxattr (it's a lot faster), but fall back to python-xattr
27 # as the two are incompatible and only one can be installed on a system
31 log('Warning: Linux xattr support missing; install python-pyxattr.\n')
32 if xattr and getattr(xattr, 'get_all', None) is None:
34 from xattr import pyxattr_compat as xattr
35 if not isinstance(xattr.NS_USER, bytes):
40 log('Warning: python-xattr module is too old; '
41 'upgrade or install python-pyxattr instead.\n')
44 if not (sys.platform.startswith('cygwin') \
45 or sys.platform.startswith('darwin') \
46 or sys.platform.startswith('netbsd')):
50 log('Warning: POSIX ACL support missing; install python-pylibacl.\n')
53 from bup._helpers import get_linux_file_attr, set_linux_file_attr
55 # No need for a warning here; the only reason they won't exist is that we're
56 # not on Linux, in which case files don't have any linux attrs anyway, so
57 # lacking the functions isn't a problem.
58 get_linux_file_attr = set_linux_file_attr = None
61 # See the bup_get_linux_file_attr() comments.
62 _suppress_linux_file_attr = \
63 sys.byteorder == 'big' and struct.calcsize('@l') > struct.calcsize('@i')
65 def check_linux_file_attr_api():
66 global get_linux_file_attr, set_linux_file_attr
67 if not (get_linux_file_attr or set_linux_file_attr):
69 if _suppress_linux_file_attr:
70 log('Warning: Linux attr support disabled (see "bup help index").\n')
71 get_linux_file_attr = set_linux_file_attr = None
74 # WARNING: the metadata encoding is *not* stable yet. Caveat emptor!
76 # Q: Consider hardlink support?
77 # Q: Is it OK to store raw linux attr (chattr) flags?
78 # Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
79 # Q: Is the application of posix1e has_extended() correct?
80 # Q: Is one global --numeric-ids argument sufficient?
81 # Q: Do nfsv4 acls trump posix1e acls? (seems likely)
82 # Q: Add support for crtime -- ntfs, and (only internally?) ext*?
84 # FIXME: Fix relative/abs path detection/stripping wrt other platforms.
85 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
86 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
87 # FIXME: Consider pack('vvvvsss', ...) optimization.
91 # osx (varies between hfs and hfs+):
92 # type - regular dir char block fifo socket ...
93 # perms - rwxrwxrwxsgt
94 # times - ctime atime mtime
97 # hard-link-info (hfs+ only)
100 # attributes-osx see chflags
106 # type - regular dir ...
107 # times - creation, modification, posix change, access
110 # attributes - see attrib
112 # forks (alternate data streams)
116 # type - regular dir ...
117 # perms - rwxrwxrwx (maybe - see wikipedia)
118 # times - creation, modification, access
119 # attributes - see attrib
123 _have_lchmod = hasattr(os, 'lchmod')
126 def _clean_up_path_for_archive(p):
127 # Not the most efficient approach.
130 # Take everything after any '/../'.
131 pos = result.rfind(b'/../')
133 result = result[result.rfind(b'/../') + 4:]
135 # Take everything after any remaining '../'.
136 if result.startswith(b"../"):
139 # Remove any '/./' sequences.
140 pos = result.find(b'/./')
142 result = result[0:pos] + b'/' + result[pos + 3:]
143 pos = result.find(b'/./')
145 # Remove any leading '/'s.
146 result = result.lstrip(b'/')
148 # Replace '//' with '/' everywhere.
149 pos = result.find(b'//')
151 result = result[0:pos] + b'/' + result[pos + 2:]
152 pos = result.find(b'//')
154 # Take everything after any remaining './'.
155 if result.startswith(b'./'):
158 # Take everything before any remaining '/.'.
159 if result.endswith(b'/.'):
162 if result == b'' or result.endswith(b'/..'):
169 if p.startswith(b'/'):
171 if p.find(b'/../') != -1:
173 if p.startswith(b'../'):
175 if p.endswith(b'/..'):
180 def _clean_up_extract_path(p):
181 result = p.lstrip(b'/')
184 elif _risky_path(result):
190 # These tags are currently conceptually private to Metadata, and they
191 # must be unique, and must *never* be changed.
194 _rec_tag_common_v1 = 2 # times, user, group, type, perms, etc. (legacy/broken)
195 _rec_tag_symlink_target = 3
196 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc.
197 _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e? (unimplemented)
198 _rec_tag_linux_attr = 6 # lsattr(1) chattr(1)
199 _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1)
200 _rec_tag_hardlink_target = 8 # hard link target path
201 _rec_tag_common_v2 = 9 # times, user, group, type, perms, etc. (current)
202 _rec_tag_common_v3 = 10 # adds optional size to v2
204 _warned_about_attr_einval = None
207 class ApplyError(Exception):
208 # Thrown when unable to apply any given bit of metadata to a path.
213 # Metadata is stored as a sequence of tagged binary records. Each
214 # record will have some subset of add, encode, load, create, and
215 # apply methods, i.e. _add_foo...
217 # We do allow an "empty" object as a special case, i.e. no
218 # records. One can be created by trying to write Metadata(), and
219 # for such an object, read() will return None. This is used by
220 # "bup save", for example, as a placeholder in cases where
223 # NOTE: if any relevant fields are added or removed, be sure to
224 # update same_file() below.
228 # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
229 # must be non-negative and < 10**9.
231 def _add_common(self, path, st):
232 assert(st.st_uid >= 0)
233 assert(st.st_gid >= 0)
234 self.size = st.st_size
237 self.atime = st.st_atime
238 self.mtime = st.st_mtime
239 self.ctime = st.st_ctime
240 self.user = self.group = b''
241 entry = pwd_from_uid(st.st_uid)
243 self.user = entry.pw_name
244 entry = grp_from_gid(st.st_gid)
246 self.group = entry.gr_name
247 self.mode = st.st_mode
248 # Only collect st_rdev if we might need it for a mknod()
249 # during restore. On some platforms (i.e. kFreeBSD), it isn't
250 # stable for other file types. For example "cp -a" will
251 # change it for a plain file.
252 if stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
253 self.rdev = st.st_rdev
257 def _same_common(self, other):
258 """Return true or false to indicate similarity in the hardlink sense."""
259 return self.uid == other.uid \
260 and self.gid == other.gid \
261 and self.rdev == other.rdev \
262 and self.mtime == other.mtime \
263 and self.ctime == other.ctime \
264 and self.user == other.user \
265 and self.group == other.group \
266 and self.size == other.size
268 def _encode_common(self):
271 atime = xstat.nsecs_to_timespec(self.atime)
272 mtime = xstat.nsecs_to_timespec(self.mtime)
273 ctime = xstat.nsecs_to_timespec(self.ctime)
274 result = vint.pack('vvsvsvvVvVvVv',
287 self.size if self.size is not None else -1)
290 def _load_common_rec(self, port, version=3):
292 # Added trailing size to v2, negative when None.
293 unpack_fmt = 'vvsvsvvVvVvVv'
295 unpack_fmt = 'vvsvsvvVvVvV'
297 unpack_fmt = 'VVsVsVvVvVvV'
299 raise Exception('unexpected common_rec version %d' % version)
300 data = vint.read_bvec(port)
301 values = vint.unpack(unpack_fmt, data)
303 (self.mode, self.uid, self.user, self.gid, self.group,
305 self.atime, atime_ns,
306 self.mtime, mtime_ns,
307 self.ctime, ctime_ns, size) = values
311 (self.mode, self.uid, self.user, self.gid, self.group,
313 self.atime, atime_ns,
314 self.mtime, mtime_ns,
315 self.ctime, ctime_ns) = values
316 self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns))
317 self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns))
318 self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns))
320 def _recognized_file_type(self):
321 return stat.S_ISREG(self.mode) \
322 or stat.S_ISDIR(self.mode) \
323 or stat.S_ISCHR(self.mode) \
324 or stat.S_ISBLK(self.mode) \
325 or stat.S_ISFIFO(self.mode) \
326 or stat.S_ISSOCK(self.mode) \
327 or stat.S_ISLNK(self.mode)
329 def _create_via_common_rec(self, path, create_symlinks=True):
331 raise ApplyError('no metadata - cannot create path '
334 # If the path already exists and is a dir, try rmdir.
335 # If the path already exists and is anything else, try unlink.
338 st = xstat.lstat(path)
340 if e.errno != errno.ENOENT:
343 if stat.S_ISDIR(st.st_mode):
347 if e.errno in (errno.ENOTEMPTY, errno.EEXIST):
348 raise Exception('refusing to overwrite non-empty dir '
354 if stat.S_ISREG(self.mode):
355 assert(self._recognized_file_type())
356 fd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL, 0o600)
358 elif stat.S_ISDIR(self.mode):
359 assert(self._recognized_file_type())
360 os.mkdir(path, 0o700)
361 elif stat.S_ISCHR(self.mode):
362 assert(self._recognized_file_type())
363 os.mknod(path, 0o600 | stat.S_IFCHR, self.rdev)
364 elif stat.S_ISBLK(self.mode):
365 assert(self._recognized_file_type())
366 os.mknod(path, 0o600 | stat.S_IFBLK, self.rdev)
367 elif stat.S_ISFIFO(self.mode):
368 assert(self._recognized_file_type())
369 os.mkfifo(path, 0o600 | stat.S_IFIFO)
370 elif stat.S_ISSOCK(self.mode):
372 os.mknod(path, 0o600 | stat.S_IFSOCK)
374 if e.errno in (errno.EINVAL, errno.EPERM):
375 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
379 elif stat.S_ISLNK(self.mode):
380 assert(self._recognized_file_type())
381 if self.symlink_target and create_symlinks:
382 # on MacOS, symlink() permissions depend on umask, and there's
383 # no way to chown a symlink after creating it, so we have to
385 oldumask = os.umask((self.mode & 0o777) ^ 0o777)
387 os.symlink(self.symlink_target, path)
390 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
392 assert(not self._recognized_file_type())
393 add_error('not creating "%s" with unrecognized mode "0x%x"\n'
394 % (path_msg(path), self.mode))
396 def _apply_common_rec(self, path, restore_numeric_ids=False):
398 raise ApplyError('no metadata - cannot apply to ' + path_msg(path))
400 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
401 # EACCES errors at this stage are fatal for the current path.
402 if lutime and stat.S_ISLNK(self.mode):
404 lutime(path, (self.atime, self.mtime))
406 if e.errno == errno.EACCES:
407 raise ApplyError('lutime: %s' % e)
412 utime(path, (self.atime, self.mtime))
414 if e.errno == errno.EACCES:
415 raise ApplyError('utime: %s' % e)
419 uid = gid = -1 # By default, do nothing.
421 if self.uid is not None:
423 if self.gid is not None:
425 if not restore_numeric_ids:
426 if self.uid != 0 and self.user:
427 entry = pwd_from_name(self.user)
430 if self.gid != 0 and self.group:
431 entry = grp_from_name(self.group)
434 else: # not superuser - only consider changing the group/gid
435 user_gids = os.getgroups()
436 if self.gid in user_gids:
438 if not restore_numeric_ids and self.gid != 0:
439 # The grp might not exist on the local system.
440 grps = filter(None, [grp_from_gid(x) for x in user_gids])
441 if self.group in [x.gr_name for x in grps]:
442 g = grp_from_name(self.group)
446 if uid != -1 or gid != -1:
448 os.lchown(path, uid, gid)
450 if e.errno == errno.EPERM:
451 add_error('lchown: %s' % e)
452 elif sys.platform.startswith('cygwin') \
453 and e.errno == errno.EINVAL:
454 add_error('lchown: unknown uid/gid (%d/%d) for %s'
455 % (uid, gid, path_msg(path)))
461 os.lchmod(path, stat.S_IMODE(self.mode))
462 except errno.ENOSYS: # Function not implemented
464 elif not stat.S_ISLNK(self.mode):
465 os.chmod(path, stat.S_IMODE(self.mode))
470 def _encode_path(self):
472 return vint.pack('s', self.path)
476 def _load_path_rec(self, port):
477 self.path = vint.unpack('s', vint.read_bvec(port))[0]
482 def _add_symlink_target(self, path, st):
484 if stat.S_ISLNK(st.st_mode):
485 self.symlink_target = os.readlink(path)
487 add_error('readlink: %s' % e)
489 def _encode_symlink_target(self):
490 return self.symlink_target
492 def _load_symlink_target_rec(self, port):
493 target = vint.read_bvec(port)
494 self.symlink_target = target
495 if self.size is None:
496 self.size = len(target)
498 assert(self.size == len(target))
503 def _add_hardlink_target(self, target):
504 self.hardlink_target = target
506 def _same_hardlink_target(self, other):
507 """Return true or false to indicate similarity in the hardlink sense."""
508 return self.hardlink_target == other.hardlink_target
510 def _encode_hardlink_target(self):
511 return self.hardlink_target
513 def _load_hardlink_target_rec(self, port):
514 self.hardlink_target = vint.read_bvec(port)
517 ## POSIX1e ACL records
519 # Recorded as a list:
520 # [txt_id_acl, num_id_acl]
521 # or, if a directory:
522 # [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
523 # The numeric/text distinction only matters when reading/restoring
525 def _add_posix1e_acl(self, path, st):
526 if not posix1e or not posix1e.HAS_EXTENDED_CHECK:
528 if not stat.S_ISLNK(st.st_mode):
532 if posix1e.has_extended(path):
533 acl = posix1e.ACL(file=path)
534 acls = [acl, acl] # txt and num are the same
535 if stat.S_ISDIR(st.st_mode):
536 def_acl = posix1e.ACL(filedef=(path if py_maj < 3
537 else path.decode('iso-8859-1')))
538 def_acls = [def_acl, def_acl]
539 except EnvironmentError as e:
540 if e.errno not in (errno.EOPNOTSUPP, errno.ENOSYS):
543 txt_flags = posix1e.TEXT_ABBREVIATE
544 num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
545 acl_rep = [acls[0].to_any_text('', b'\n', txt_flags),
546 acls[1].to_any_text('', b'\n', num_flags)]
548 acl_rep.append(def_acls[0].to_any_text('', b'\n', txt_flags))
549 acl_rep.append(def_acls[1].to_any_text('', b'\n', num_flags))
550 self.posix1e_acl = acl_rep
552 def _same_posix1e_acl(self, other):
553 """Return true or false to indicate similarity in the hardlink sense."""
554 return self.posix1e_acl == other.posix1e_acl
556 def _encode_posix1e_acl(self):
557 # Encode as two strings (w/default ACL string possibly empty).
559 acls = self.posix1e_acl
561 acls.extend([b'', b''])
562 return vint.pack('ssss', acls[0], acls[1], acls[2], acls[3])
566 def _load_posix1e_acl_rec(self, port):
567 acl_rep = vint.unpack('ssss', vint.read_bvec(port))
568 if acl_rep[2] == b'':
569 acl_rep = acl_rep[:2]
570 self.posix1e_acl = acl_rep
572 def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
573 def apply_acl(acl_rep, kind):
575 acl = posix1e.ACL(text=acl_rep.decode('ascii'))
578 # pylibacl appears to return an IOError with errno
579 # set to 0 if a group referred to by the ACL rep
580 # doesn't exist on the current system.
581 raise ApplyError("POSIX1e ACL: can't create %r for %r"
582 % (acl_rep, path_msg(path)))
586 acl.applyto(path, kind)
588 if e.errno == errno.EPERM or e.errno == errno.EOPNOTSUPP:
589 raise ApplyError('POSIX1e ACL applyto: %s' % e)
595 add_error("%s: can't restore ACLs; posix1e support missing.\n"
599 acls = self.posix1e_acl
601 if restore_numeric_ids:
602 apply_acl(acls[3], posix1e.ACL_TYPE_DEFAULT)
604 apply_acl(acls[2], posix1e.ACL_TYPE_DEFAULT)
605 if restore_numeric_ids:
606 apply_acl(acls[1], posix1e.ACL_TYPE_ACCESS)
608 apply_acl(acls[0], posix1e.ACL_TYPE_ACCESS)
611 ## Linux attributes (lsattr(1), chattr(1))
613 def _add_linux_attr(self, path, st):
614 check_linux_file_attr_api()
615 if not get_linux_file_attr: return
616 if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
618 attr = get_linux_file_attr(path)
620 self.linux_attr = attr
622 if e.errno == errno.EACCES:
623 add_error('read Linux attr: %s' % e)
624 elif e.errno in (ENOTTY, ENOSYS, EOPNOTSUPP):
625 # Assume filesystem doesn't support attrs.
627 elif e.errno == EINVAL:
628 global _warned_about_attr_einval
629 if not _warned_about_attr_einval:
630 log("Ignoring attr EINVAL;"
631 + " if you're not using ntfs-3g, please report: "
632 + path_msg(path) + '\n')
633 _warned_about_attr_einval = True
638 def _same_linux_attr(self, other):
639 """Return true or false to indicate similarity in the hardlink sense."""
640 return self.linux_attr == other.linux_attr
642 def _encode_linux_attr(self):
644 return vint.pack('V', self.linux_attr)
648 def _load_linux_attr_rec(self, port):
649 data = vint.read_bvec(port)
650 self.linux_attr = vint.unpack('V', data)[0]
652 def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
654 check_linux_file_attr_api()
655 if not set_linux_file_attr:
656 add_error("%s: can't restore linuxattrs: "
657 "linuxattr support missing.\n" % path_msg(path))
660 set_linux_file_attr(path, self.linux_attr)
662 if e.errno in (EACCES, ENOTTY, EOPNOTSUPP, ENOSYS):
663 raise ApplyError('Linux chattr: %s (0x%s)'
664 % (e, hex(self.linux_attr)))
665 elif e.errno == EINVAL:
666 msg = "if you're not using ntfs-3g, please report"
667 raise ApplyError('Linux chattr: %s (0x%s) (%s)'
668 % (e, hex(self.linux_attr), msg))
673 ## Linux extended attributes (getfattr(1), setfattr(1))
675 def _add_linux_xattr(self, path, st):
678 self.linux_xattr = xattr.get_all(path, nofollow=True)
679 except EnvironmentError as e:
680 if e.errno != errno.EOPNOTSUPP:
683 def _same_linux_xattr(self, other):
684 """Return true or false to indicate similarity in the hardlink sense."""
685 return self.linux_xattr == other.linux_xattr
687 def _encode_linux_xattr(self):
689 result = vint.pack('V', len(self.linux_xattr))
690 for name, value in self.linux_xattr:
691 result += vint.pack('ss', name, value)
696 def _load_linux_xattr_rec(self, file):
697 data = vint.read_bvec(file)
698 memfile = BytesIO(data)
700 for i in range(vint.read_vuint(memfile)):
701 key = vint.read_bvec(memfile)
702 value = vint.read_bvec(memfile)
703 result.append((key, value))
704 self.linux_xattr = result
706 def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
709 add_error("%s: can't restore xattr; xattr support missing.\n"
712 if not self.linux_xattr:
715 existing_xattrs = set(xattr.list(path, nofollow=True))
717 if e.errno == errno.EACCES:
718 raise ApplyError('xattr.set %r: %s' % (path_msg(path), e))
721 for k, v in self.linux_xattr:
722 if k not in existing_xattrs \
723 or v != xattr.get(path, k, nofollow=True):
725 xattr.set(path, k, v, nofollow=True)
727 if e.errno == errno.EPERM \
728 or e.errno == errno.EOPNOTSUPP:
729 raise ApplyError('xattr.set %r: %s' % (path_msg(path), e))
732 existing_xattrs -= frozenset([k])
733 for k in existing_xattrs:
735 xattr.remove(path, k, nofollow=True)
737 if e.errno in (errno.EPERM, errno.EACCES):
738 raise ApplyError('xattr.remove %r: %s' % (path_msg(path), e))
743 self.mode = self.uid = self.gid = self.user = self.group = None
744 self.atime = self.mtime = self.ctime = None
748 self.symlink_target = None
749 self.hardlink_target = None
750 self.linux_attr = None
751 self.linux_xattr = None
752 self.posix1e_acl = None
754 def __eq__(self, other):
755 if not isinstance(other, Metadata): return False
756 if self.mode != other.mode: return False
757 if self.mtime != other.mtime: return False
758 if self.ctime != other.ctime: return False
759 if self.atime != other.atime: return False
760 if self.path != other.path: return False
761 if self.uid != other.uid: return False
762 if self.gid != other.gid: return False
763 if self.size != other.size: return False
764 if self.user != other.user: return False
765 if self.group != other.group: return False
766 if self.symlink_target != other.symlink_target: return False
767 if self.hardlink_target != other.hardlink_target: return False
768 if self.linux_attr != other.linux_attr: return False
769 if self.posix1e_acl != other.posix1e_acl: return False
772 def __ne__(self, other):
773 return not self.__eq__(other)
776 return hash((self.mode,
787 self.hardlink_target,
792 result = ['<%s instance at %s' % (self.__class__, hex(id(self)))]
793 if self.path is not None:
794 result += ' path:' + repr(self.path)
795 if self.mode is not None:
796 result += ' mode: %o (%s)' % (self.mode, xstat.mode_str(self.mode))
797 if self.uid is not None:
798 result += ' uid:' + str(self.uid)
799 if self.gid is not None:
800 result += ' gid:' + str(self.gid)
801 if self.user is not None:
802 result += ' user:' + repr(self.user)
803 if self.group is not None:
804 result += ' group:' + repr(self.group)
805 if self.size is not None:
806 result += ' size:' + repr(self.size)
807 for name, val in (('atime', self.atime),
808 ('mtime', self.mtime),
809 ('ctime', self.ctime)):
811 result += ' %s:%r (%d)' \
813 strftime('%Y-%m-%d %H:%M %z',
814 gmtime(xstat.fstime_floor_secs(val))),
817 return ''.join(result)
819 def write(self, port, include_path=True):
820 records = include_path and [(_rec_tag_path, self._encode_path())] or []
821 records.extend([(_rec_tag_common_v3, self._encode_common()),
822 (_rec_tag_symlink_target,
823 self._encode_symlink_target()),
824 (_rec_tag_hardlink_target,
825 self._encode_hardlink_target()),
826 (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
827 (_rec_tag_linux_attr, self._encode_linux_attr()),
828 (_rec_tag_linux_xattr, self._encode_linux_xattr())])
829 for tag, data in records:
831 vint.write_vuint(port, tag)
832 vint.write_bvec(port, data)
833 vint.write_vuint(port, _rec_tag_end)
835 def encode(self, include_path=True):
837 self.write(port, include_path)
838 return port.getvalue()
841 return deepcopy(self)
845 # This method should either return a valid Metadata object,
846 # return None if there was no information at all (just a
847 # _rec_tag_end), throw EOFError if there was nothing at all to
848 # read, or throw an Exception if a valid object could not be
850 tag = vint.read_vuint(port)
851 if tag == _rec_tag_end:
853 try: # From here on, EOF is an error.
855 while True: # only exit is error (exception) or _rec_tag_end
856 if tag == _rec_tag_path:
857 result._load_path_rec(port)
858 elif tag == _rec_tag_common_v3:
859 result._load_common_rec(port, version=3)
860 elif tag == _rec_tag_common_v2:
861 result._load_common_rec(port, version=2)
862 elif tag == _rec_tag_symlink_target:
863 result._load_symlink_target_rec(port)
864 elif tag == _rec_tag_hardlink_target:
865 result._load_hardlink_target_rec(port)
866 elif tag == _rec_tag_posix1e_acl:
867 result._load_posix1e_acl_rec(port)
868 elif tag == _rec_tag_linux_attr:
869 result._load_linux_attr_rec(port)
870 elif tag == _rec_tag_linux_xattr:
871 result._load_linux_xattr_rec(port)
872 elif tag == _rec_tag_end:
874 elif tag == _rec_tag_common_v1: # Should be very rare.
875 result._load_common_rec(port, version=1)
876 else: # unknown record
878 tag = vint.read_vuint(port)
880 raise Exception("EOF while reading Metadata")
883 return stat.S_ISDIR(self.mode)
885 def create_path(self, path, create_symlinks=True):
886 self._create_via_common_rec(path, create_symlinks=create_symlinks)
888 def apply_to_path(self, path=None, restore_numeric_ids=False):
889 # apply metadata to path -- file must exist
893 raise Exception('Metadata.apply_to_path() called with no path')
894 if not self._recognized_file_type():
895 add_error('not applying metadata to "%s"' % path_msg(path)
896 + ' with unrecognized mode "0x%x"\n' % self.mode)
898 num_ids = restore_numeric_ids
899 for apply_metadata in (self._apply_common_rec,
900 self._apply_posix1e_acl_rec,
901 self._apply_linux_attr_rec,
902 self._apply_linux_xattr_rec):
904 apply_metadata(path, restore_numeric_ids=num_ids)
905 except ApplyError as e:
908 def same_file(self, other):
909 """Compare this to other for equivalency. Return true if
910 their information implies they could represent the same file
911 on disk, in the hardlink sense. Assume they're both regular
913 return self._same_common(other) \
914 and self._same_hardlink_target(other) \
915 and self._same_posix1e_acl(other) \
916 and self._same_linux_attr(other) \
917 and self._same_linux_xattr(other)
920 def from_path(path, statinfo=None, archive_path=None,
921 save_symlinks=True, hardlink_target=None,
923 """Return the metadata associated with the path. When normalized is
924 true, return the metadata appropriate for a typical save, which
925 may or may not be all of it."""
927 result.path = archive_path
928 st = statinfo or xstat.lstat(path)
929 result._add_common(path, st)
931 result._add_symlink_target(path, st)
932 result._add_hardlink_target(hardlink_target)
933 result._add_posix1e_acl(path, st)
934 result._add_linux_attr(path, st)
935 result._add_linux_xattr(path, st)
937 # Only store sizes for regular files and symlinks for now.
938 if not (stat.S_ISREG(result.mode) or stat.S_ISLNK(result.mode)):
943 def save_tree(output_file, paths,
949 # Issue top-level rewrite warnings.
951 safe_path = _clean_up_path_for_archive(path)
952 if safe_path != path:
953 log('archiving "%s" as "%s"\n'
954 % (path_msg(path), path_msg(safe_path)))
958 safe_path = _clean_up_path_for_archive(p)
960 if stat.S_ISDIR(st.st_mode):
962 m = from_path(p, statinfo=st, archive_path=safe_path,
963 save_symlinks=save_symlinks)
965 print(m.path, file=sys.stderr)
966 m.write(output_file, include_path=write_paths)
968 start_dir = os.getcwd()
970 for (p, st) in recursive_dirlist(paths, xdev=xdev):
971 dirlist_dir = os.getcwd()
973 safe_path = _clean_up_path_for_archive(p)
974 m = from_path(p, statinfo=st, archive_path=safe_path,
975 save_symlinks=save_symlinks)
977 print(m.path, file=sys.stderr)
978 m.write(output_file, include_path=write_paths)
979 os.chdir(dirlist_dir)
984 def _set_up_path(meta, create_symlinks=True):
985 # Allow directories to exist as a special case -- might have
986 # been created by an earlier longer path.
990 parent = os.path.dirname(meta.path)
993 meta.create_path(meta.path, create_symlinks=create_symlinks)
996 all_fields = frozenset(['path',
1013 def summary_bytes(meta, numeric_ids = False, classification = None,
1014 human_readable = False):
1015 """Return bytes containing the "ls -l" style listing for meta.
1016 Classification may be "all", "type", or None."""
1017 user_str = group_str = size_or_dev_str = b'?'
1018 symlink_target = None
1021 mode_str = xstat.mode_str(meta.mode).encode('ascii')
1022 symlink_target = meta.symlink_target
1023 mtime_secs = xstat.fstime_floor_secs(meta.mtime)
1024 mtime_str = strftime('%Y-%m-%d %H:%M',
1025 time.localtime(mtime_secs)).encode('ascii')
1026 if meta.user and not numeric_ids:
1027 user_str = meta.user
1028 elif meta.uid != None:
1029 user_str = str(meta.uid).encode()
1030 if meta.group and not numeric_ids:
1031 group_str = meta.group
1032 elif meta.gid != None:
1033 group_str = str(meta.gid).encode()
1034 if stat.S_ISCHR(meta.mode) or stat.S_ISBLK(meta.mode):
1036 size_or_dev_str = ('%d,%d' % (os.major(meta.rdev),
1037 os.minor(meta.rdev))).encode()
1038 elif meta.size != None:
1040 size_or_dev_str = format_filesize(meta.size).encode()
1042 size_or_dev_str = str(meta.size).encode()
1044 size_or_dev_str = b'-'
1046 classification_str = \
1047 xstat.classification_str(meta.mode,
1048 classification == 'all').encode()
1050 mode_str = b'?' * 10
1051 mtime_str = b'????-??-?? ??:??'
1052 classification_str = b'?'
1056 name += classification_str
1058 name += b' -> ' + meta.symlink_target
1060 return b'%-10s %-11s %11s %16s %s' % (mode_str,
1061 user_str + b'/' + group_str,
1067 def detailed_bytes(meta, fields = None):
1068 # FIXME: should optional fields be omitted, or empty i.e. "rdev:
1069 # 0", "link-target:", etc.
1074 if 'path' in fields:
1075 path = meta.path or b''
1076 result.append(b'path: ' + path)
1077 if 'mode' in fields:
1078 result.append(b'mode: %o (%s)'
1079 % (meta.mode, xstat.mode_str(meta.mode).encode('ascii')))
1080 if 'link-target' in fields and stat.S_ISLNK(meta.mode):
1081 result.append(b'link-target: ' + meta.symlink_target)
1082 if 'rdev' in fields:
1084 result.append(b'rdev: %d,%d' % (os.major(meta.rdev),
1085 os.minor(meta.rdev)))
1087 result.append(b'rdev: 0')
1088 if 'size' in fields and meta.size is not None:
1089 result.append(b'size: %d' % meta.size)
1091 result.append(b'uid: %d' % meta.uid)
1093 result.append(b'gid: %d' % meta.gid)
1094 if 'user' in fields:
1095 result.append(b'user: ' + meta.user)
1096 if 'group' in fields:
1097 result.append(b'group: ' + meta.group)
1098 if 'atime' in fields:
1099 # If we don't have xstat.lutime, that means we have to use
1100 # utime(), and utime() has no way to set the mtime/atime of a
1101 # symlink. Thus, the mtime/atime of a symlink is meaningless,
1102 # so let's not report it. (That way scripts comparing
1103 # before/after won't trigger.)
1104 if xstat.lutime or not stat.S_ISLNK(meta.mode):
1105 result.append(b'atime: ' + xstat.fstime_to_sec_bytes(meta.atime))
1107 result.append(b'atime: 0')
1108 if 'mtime' in fields:
1109 if xstat.lutime or not stat.S_ISLNK(meta.mode):
1110 result.append(b'mtime: ' + xstat.fstime_to_sec_bytes(meta.mtime))
1112 result.append(b'mtime: 0')
1113 if 'ctime' in fields:
1114 result.append(b'ctime: ' + xstat.fstime_to_sec_bytes(meta.ctime))
1115 if 'linux-attr' in fields and meta.linux_attr:
1116 result.append(b'linux-attr: %x' % meta.linux_attr)
1117 if 'linux-xattr' in fields and meta.linux_xattr:
1118 for name, value in meta.linux_xattr:
1119 result.append(b'linux-xattr: %s -> %s' % (name, value))
1120 if 'posix1e-acl' in fields and meta.posix1e_acl:
1121 acl = meta.posix1e_acl[0]
1122 result.append(b'posix1e-acl: ' + acl + b'\n')
1123 if stat.S_ISDIR(meta.mode):
1124 def_acl = meta.posix1e_acl[2]
1125 result.append(b'posix1e-acl-default: ' + def_acl + b'\n')
1126 return b'\n'.join(result)
1129 class _ArchiveIterator:
1132 return Metadata.read(self._file)
1134 raise StopIteration()
1141 def __init__(self, file):
1145 def display_archive(file, out):
1148 for meta in _ArchiveIterator(file):
1151 out.write(detailed_bytes(meta))
1155 for meta in _ArchiveIterator(file):
1156 out.write(summary_bytes(meta))
1159 for meta in _ArchiveIterator(file):
1161 log('bup: no metadata path, but asked to only display path'
1162 ' (increase verbosity?)')
1164 out.write(meta.path)
1168 def start_extract(file, create_symlinks=True):
1169 for meta in _ArchiveIterator(file):
1170 if not meta: # Hit end record.
1173 print(path_msg(meta.path), file=sys.stderr)
1174 xpath = _clean_up_extract_path(meta.path)
1176 add_error(Exception('skipping risky path "%s"'
1177 % path_msg(meta.path)))
1180 _set_up_path(meta, create_symlinks=create_symlinks)
1183 def finish_extract(file, restore_numeric_ids=False):
1185 for meta in _ArchiveIterator(file):
1186 if not meta: # Hit end record.
1188 xpath = _clean_up_extract_path(meta.path)
1190 add_error(Exception('skipping risky path "%s"'
1191 % path_msg(meta.path)))
1193 if os.path.isdir(meta.path):
1194 all_dirs.append(meta)
1197 print(path_msg(meta.path), file=sys.stderr)
1198 meta.apply_to_path(path=xpath,
1199 restore_numeric_ids=restore_numeric_ids)
1200 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
1201 for dir in all_dirs:
1202 # Don't need to check xpath -- won't be in all_dirs if not OK.
1203 xpath = _clean_up_extract_path(dir.path)
1205 print(path_msg(dir.path), file=sys.stderr)
1206 dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids)
1209 def extract(file, restore_numeric_ids=False, create_symlinks=True):
1210 # For now, just store all the directories and handle them last,
1213 for meta in _ArchiveIterator(file):
1214 if not meta: # Hit end record.
1216 xpath = _clean_up_extract_path(meta.path)
1218 add_error(Exception('skipping risky path "%s"'
1219 % path_msg(meta.path)))
1223 print('+', path_msg(meta.path), file=sys.stderr)
1224 _set_up_path(meta, create_symlinks=create_symlinks)
1225 if os.path.isdir(meta.path):
1226 all_dirs.append(meta)
1229 print('=', path_msg(meta.path), file=sys.stderr)
1230 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
1231 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
1232 for dir in all_dirs:
1233 # Don't need to check xpath -- won't be in all_dirs if not OK.
1234 xpath = _clean_up_extract_path(dir.path)
1236 print('=', path_msg(xpath), file=sys.stderr)
1237 # Shouldn't have to check for risky paths here (omitted above).
1238 dir.apply_to_path(path=dir.path,
1239 restore_numeric_ids=restore_numeric_ids)