1 """Metadata read/write support for bup."""
3 # Copyright (C) 2010 Rob Browning
5 # This code is covered under the terms of the GNU Library General
6 # Public License as described in the bup LICENSE file.
8 from __future__ import absolute_import, print_function
9 from binascii import hexlify
10 from copy import deepcopy
11 from errno import EACCES, EINVAL, ENOTTY, ENOSYS, EOPNOTSUPP
12 from io import BytesIO
13 from time import gmtime, strftime
14 import errno, os, sys, stat, time, pwd, grp, socket, struct
16 from bup import compat, vint, xstat
17 from bup.compat import py_maj
18 from bup.drecurse import recursive_dirlist
19 from bup.helpers import add_error, mkdirp, log, is_superuser, format_filesize
20 from bup.io import path_msg
21 from bup.pwdgrp import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name
22 from bup.xstat import utime, lutime
25 if sys.platform.startswith('linux'):
26 # prefer python-pyxattr (it's a lot faster), but fall back to python-xattr
27 # as the two are incompatible and only one can be installed on a system
31 log('Warning: Linux xattr support missing; install python-pyxattr.\n')
32 if xattr and getattr(xattr, 'get_all', None) is None:
34 from xattr import pyxattr_compat as xattr
35 if not isinstance(xattr.NS_USER, bytes):
40 log('Warning: python-xattr module is too old; '
41 'upgrade or install python-pyxattr instead.\n')
44 if not (sys.platform.startswith('cygwin') \
45 or sys.platform.startswith('darwin') \
46 or sys.platform.startswith('netbsd')):
50 log('Warning: POSIX ACL support missing; install python-pylibacl.\n')
53 from bup._helpers import get_linux_file_attr, set_linux_file_attr
55 # No need for a warning here; the only reason they won't exist is that we're
56 # not on Linux, in which case files don't have any linux attrs anyway, so
57 # lacking the functions isn't a problem.
58 get_linux_file_attr = set_linux_file_attr = None
61 # See the bup_get_linux_file_attr() comments.
62 _suppress_linux_file_attr = \
63 sys.byteorder == 'big' and struct.calcsize('@l') > struct.calcsize('@i')
65 def check_linux_file_attr_api():
66 global get_linux_file_attr, set_linux_file_attr
67 if not (get_linux_file_attr or set_linux_file_attr):
69 if _suppress_linux_file_attr:
70 log('Warning: Linux attr support disabled (see "bup help index").\n')
71 get_linux_file_attr = set_linux_file_attr = None
74 # WARNING: the metadata encoding is *not* stable yet. Caveat emptor!
76 # Q: Consider hardlink support?
77 # Q: Is it OK to store raw linux attr (chattr) flags?
78 # Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
79 # Q: Is the application of posix1e has_extended() correct?
80 # Q: Is one global --numeric-ids argument sufficient?
81 # Q: Do nfsv4 acls trump posix1e acls? (seems likely)
82 # Q: Add support for crtime -- ntfs, and (only internally?) ext*?
84 # FIXME: Fix relative/abs path detection/stripping wrt other platforms.
85 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
86 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
87 # FIXME: Consider pack('vvvvsss', ...) optimization.
91 # osx (varies between hfs and hfs+):
92 # type - regular dir char block fifo socket ...
93 # perms - rwxrwxrwxsgt
94 # times - ctime atime mtime
97 # hard-link-info (hfs+ only)
100 # attributes-osx see chflags
106 # type - regular dir ...
107 # times - creation, modification, posix change, access
110 # attributes - see attrib
112 # forks (alternate data streams)
116 # type - regular dir ...
117 # perms - rwxrwxrwx (maybe - see wikipedia)
118 # times - creation, modification, access
119 # attributes - see attrib
123 _have_lchmod = hasattr(os, 'lchmod')
126 def _clean_up_path_for_archive(p):
127 # Not the most efficient approach.
130 # Take everything after any '/../'.
131 pos = result.rfind(b'/../')
133 result = result[result.rfind(b'/../') + 4:]
135 # Take everything after any remaining '../'.
136 if result.startswith(b"../"):
139 # Remove any '/./' sequences.
140 pos = result.find(b'/./')
142 result = result[0:pos] + b'/' + result[pos + 3:]
143 pos = result.find(b'/./')
145 # Remove any leading '/'s.
146 result = result.lstrip(b'/')
148 # Replace '//' with '/' everywhere.
149 pos = result.find(b'//')
151 result = result[0:pos] + b'/' + result[pos + 2:]
152 pos = result.find(b'//')
154 # Take everything after any remaining './'.
155 if result.startswith(b'./'):
158 # Take everything before any remaining '/.'.
159 if result.endswith(b'/.'):
162 if result == b'' or result.endswith(b'/..'):
169 if p.startswith(b'/'):
171 if p.find(b'/../') != -1:
173 if p.startswith(b'../'):
175 if p.endswith(b'/..'):
180 def _clean_up_extract_path(p):
181 result = p.lstrip(b'/')
184 elif _risky_path(result):
190 # These tags are currently conceptually private to Metadata, and they
191 # must be unique, and must *never* be changed.
194 _rec_tag_common_v1 = 2 # times, user, group, type, perms, etc. (legacy/broken)
195 _rec_tag_symlink_target = 3
196 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc.
197 _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e? (unimplemented)
198 _rec_tag_linux_attr = 6 # lsattr(1) chattr(1)
199 _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1)
200 _rec_tag_hardlink_target = 8 # hard link target path
201 _rec_tag_common_v2 = 9 # times, user, group, type, perms, etc. (current)
202 _rec_tag_common_v3 = 10 # adds optional size to v2
204 _warned_about_attr_einval = None
207 class ApplyError(Exception):
208 # Thrown when unable to apply any given bit of metadata to a path.
213 # Metadata is stored as a sequence of tagged binary records. Each
214 # record will have some subset of add, encode, load, create, and
215 # apply methods, i.e. _add_foo...
217 # We do allow an "empty" object as a special case, i.e. no
218 # records. One can be created by trying to write Metadata(), and
219 # for such an object, read() will return None. This is used by
220 # "bup save", for example, as a placeholder in cases where
223 # NOTE: if any relevant fields are added or removed, be sure to
224 # update same_file() below.
228 # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
229 # must be non-negative and < 10**9.
231 def _add_common(self, path, st):
232 assert(st.st_uid >= 0)
233 assert(st.st_gid >= 0)
234 self.size = st.st_size
237 self.atime = st.st_atime
238 self.mtime = st.st_mtime
239 self.ctime = st.st_ctime
240 self.user = self.group = b''
241 entry = pwd_from_uid(st.st_uid)
243 self.user = entry.pw_name
244 entry = grp_from_gid(st.st_gid)
246 self.group = entry.gr_name
247 self.mode = st.st_mode
248 # Only collect st_rdev if we might need it for a mknod()
249 # during restore. On some platforms (i.e. kFreeBSD), it isn't
250 # stable for other file types. For example "cp -a" will
251 # change it for a plain file.
252 if stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
253 self.rdev = st.st_rdev
257 def _same_common(self, other):
258 """Return true or false to indicate similarity in the hardlink sense."""
259 return self.uid == other.uid \
260 and self.gid == other.gid \
261 and self.rdev == other.rdev \
262 and self.mtime == other.mtime \
263 and self.ctime == other.ctime \
264 and self.user == other.user \
265 and self.group == other.group \
266 and self.size == other.size
268 def _encode_common(self):
271 atime = xstat.nsecs_to_timespec(self.atime)
272 mtime = xstat.nsecs_to_timespec(self.mtime)
273 ctime = xstat.nsecs_to_timespec(self.ctime)
274 result = vint.pack('vvsvsvvVvVvVv',
287 self.size if self.size is not None else -1)
290 def _load_common_rec(self, port, version=3):
292 # Added trailing size to v2, negative when None.
293 unpack_fmt = 'vvsvsvvVvVvVv'
295 unpack_fmt = 'vvsvsvvVvVvV'
297 unpack_fmt = 'VVsVsVvVvVvV'
299 raise Exception('unexpected common_rec version %d' % version)
300 data = vint.read_bvec(port)
301 values = vint.unpack(unpack_fmt, data)
303 (self.mode, self.uid, self.user, self.gid, self.group,
305 self.atime, atime_ns,
306 self.mtime, mtime_ns,
307 self.ctime, ctime_ns, size) = values
311 (self.mode, self.uid, self.user, self.gid, self.group,
313 self.atime, atime_ns,
314 self.mtime, mtime_ns,
315 self.ctime, ctime_ns) = values
316 self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns))
317 self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns))
318 self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns))
320 def _recognized_file_type(self):
321 return stat.S_ISREG(self.mode) \
322 or stat.S_ISDIR(self.mode) \
323 or stat.S_ISCHR(self.mode) \
324 or stat.S_ISBLK(self.mode) \
325 or stat.S_ISFIFO(self.mode) \
326 or stat.S_ISSOCK(self.mode) \
327 or stat.S_ISLNK(self.mode)
329 def _create_via_common_rec(self, path, create_symlinks=True):
331 raise ApplyError('no metadata - cannot create path '
334 # If the path already exists and is a dir, try rmdir.
335 # If the path already exists and is anything else, try unlink.
338 st = xstat.lstat(path)
340 if e.errno != errno.ENOENT:
343 if stat.S_ISDIR(st.st_mode):
347 if e.errno in (errno.ENOTEMPTY, errno.EEXIST):
348 raise Exception('refusing to overwrite non-empty dir '
354 if stat.S_ISREG(self.mode):
355 assert(self._recognized_file_type())
356 fd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL, 0o600)
358 elif stat.S_ISDIR(self.mode):
359 assert(self._recognized_file_type())
360 os.mkdir(path, 0o700)
361 elif stat.S_ISCHR(self.mode):
362 assert(self._recognized_file_type())
363 os.mknod(path, 0o600 | stat.S_IFCHR, self.rdev)
364 elif stat.S_ISBLK(self.mode):
365 assert(self._recognized_file_type())
366 os.mknod(path, 0o600 | stat.S_IFBLK, self.rdev)
367 elif stat.S_ISFIFO(self.mode):
368 assert(self._recognized_file_type())
369 os.mkfifo(path, 0o600 | stat.S_IFIFO)
370 elif stat.S_ISSOCK(self.mode):
372 os.mknod(path, 0o600 | stat.S_IFSOCK)
374 if e.errno in (errno.EINVAL, errno.EPERM):
375 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
379 elif stat.S_ISLNK(self.mode):
380 assert(self._recognized_file_type())
381 if self.symlink_target and create_symlinks:
382 # on MacOS, symlink() permissions depend on umask, and there's
383 # no way to chown a symlink after creating it, so we have to
385 oldumask = os.umask((self.mode & 0o777) ^ 0o777)
387 os.symlink(self.symlink_target, path)
390 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
392 assert(not self._recognized_file_type())
393 add_error('not creating "%s" with unrecognized mode "0x%x"\n'
394 % (path_msg(path), self.mode))
396 def _apply_common_rec(self, path, restore_numeric_ids=False):
398 raise ApplyError('no metadata - cannot apply to ' + path_msg(path))
400 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
401 # EACCES errors at this stage are fatal for the current path.
402 if lutime and stat.S_ISLNK(self.mode):
404 lutime(path, (self.atime, self.mtime))
406 if e.errno == errno.EACCES:
407 raise ApplyError('lutime: %s' % e)
412 utime(path, (self.atime, self.mtime))
414 if e.errno == errno.EACCES:
415 raise ApplyError('utime: %s' % e)
419 uid = gid = -1 # By default, do nothing.
423 if not restore_numeric_ids:
424 if self.uid != 0 and self.user:
425 entry = pwd_from_name(self.user)
428 if self.gid != 0 and self.group:
429 entry = grp_from_name(self.group)
432 else: # not superuser - only consider changing the group/gid
433 user_gids = os.getgroups()
434 if self.gid in user_gids:
436 if not restore_numeric_ids and self.gid != 0:
437 # The grp might not exist on the local system.
438 grps = filter(None, [grp_from_gid(x) for x in user_gids])
439 if self.group in [x.gr_name for x in grps]:
440 g = grp_from_name(self.group)
444 if uid != -1 or gid != -1:
446 os.lchown(path, uid, gid)
448 if e.errno == errno.EPERM:
449 add_error('lchown: %s' % e)
450 elif sys.platform.startswith('cygwin') \
451 and e.errno == errno.EINVAL:
452 add_error('lchown: unknown uid/gid (%d/%d) for %s'
453 % (uid, gid, path_msg(path)))
459 os.lchmod(path, stat.S_IMODE(self.mode))
460 except errno.ENOSYS: # Function not implemented
462 elif not stat.S_ISLNK(self.mode):
463 os.chmod(path, stat.S_IMODE(self.mode))
468 def _encode_path(self):
470 return vint.pack('s', self.path)
474 def _load_path_rec(self, port):
475 self.path = vint.unpack('s', vint.read_bvec(port))[0]
480 def _add_symlink_target(self, path, st):
482 if stat.S_ISLNK(st.st_mode):
483 self.symlink_target = os.readlink(path)
485 add_error('readlink: %s' % e)
487 def _encode_symlink_target(self):
488 return self.symlink_target
490 def _load_symlink_target_rec(self, port):
491 target = vint.read_bvec(port)
492 self.symlink_target = target
493 if self.size is None:
494 self.size = len(target)
496 assert(self.size == len(target))
501 def _add_hardlink_target(self, target):
502 self.hardlink_target = target
504 def _same_hardlink_target(self, other):
505 """Return true or false to indicate similarity in the hardlink sense."""
506 return self.hardlink_target == other.hardlink_target
508 def _encode_hardlink_target(self):
509 return self.hardlink_target
511 def _load_hardlink_target_rec(self, port):
512 self.hardlink_target = vint.read_bvec(port)
515 ## POSIX1e ACL records
517 # Recorded as a list:
518 # [txt_id_acl, num_id_acl]
519 # or, if a directory:
520 # [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
521 # The numeric/text distinction only matters when reading/restoring
523 def _add_posix1e_acl(self, path, st):
524 if not posix1e or not posix1e.HAS_EXTENDED_CHECK:
526 if not stat.S_ISLNK(st.st_mode):
530 if posix1e.has_extended(path):
531 acl = posix1e.ACL(file=path)
532 acls = [acl, acl] # txt and num are the same
533 if stat.S_ISDIR(st.st_mode):
534 def_acl = posix1e.ACL(filedef=(path if py_maj < 3
535 else path.decode('iso-8859-1')))
536 def_acls = [def_acl, def_acl]
537 except EnvironmentError as e:
538 if e.errno not in (errno.EOPNOTSUPP, errno.ENOSYS):
541 txt_flags = posix1e.TEXT_ABBREVIATE
542 num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
543 acl_rep = [acls[0].to_any_text('', b'\n', txt_flags),
544 acls[1].to_any_text('', b'\n', num_flags)]
546 acl_rep.append(def_acls[0].to_any_text('', b'\n', txt_flags))
547 acl_rep.append(def_acls[1].to_any_text('', b'\n', num_flags))
548 self.posix1e_acl = acl_rep
550 def _same_posix1e_acl(self, other):
551 """Return true or false to indicate similarity in the hardlink sense."""
552 return self.posix1e_acl == other.posix1e_acl
554 def _encode_posix1e_acl(self):
555 # Encode as two strings (w/default ACL string possibly empty).
557 acls = self.posix1e_acl
559 acls.extend([b'', b''])
560 return vint.pack('ssss', acls[0], acls[1], acls[2], acls[3])
564 def _load_posix1e_acl_rec(self, port):
565 acl_rep = vint.unpack('ssss', vint.read_bvec(port))
566 if acl_rep[2] == b'':
567 acl_rep = acl_rep[:2]
568 self.posix1e_acl = acl_rep
570 def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
571 def apply_acl(acl_rep, kind):
573 acl = posix1e.ACL(text=acl_rep.decode('ascii'))
576 # pylibacl appears to return an IOError with errno
577 # set to 0 if a group referred to by the ACL rep
578 # doesn't exist on the current system.
579 raise ApplyError("POSIX1e ACL: can't create %r for %r"
580 % (acl_rep, path_msg(path)))
584 acl.applyto(path, kind)
586 if e.errno == errno.EPERM or e.errno == errno.EOPNOTSUPP:
587 raise ApplyError('POSIX1e ACL applyto: %s' % e)
593 add_error("%s: can't restore ACLs; posix1e support missing.\n"
597 acls = self.posix1e_acl
599 if restore_numeric_ids:
600 apply_acl(acls[3], posix1e.ACL_TYPE_DEFAULT)
602 apply_acl(acls[2], posix1e.ACL_TYPE_DEFAULT)
603 if restore_numeric_ids:
604 apply_acl(acls[1], posix1e.ACL_TYPE_ACCESS)
606 apply_acl(acls[0], posix1e.ACL_TYPE_ACCESS)
609 ## Linux attributes (lsattr(1), chattr(1))
611 def _add_linux_attr(self, path, st):
612 check_linux_file_attr_api()
613 if not get_linux_file_attr: return
614 if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
616 attr = get_linux_file_attr(path)
618 self.linux_attr = attr
620 if e.errno == errno.EACCES:
621 add_error('read Linux attr: %s' % e)
622 elif e.errno in (ENOTTY, ENOSYS, EOPNOTSUPP):
623 # Assume filesystem doesn't support attrs.
625 elif e.errno == EINVAL:
626 global _warned_about_attr_einval
627 if not _warned_about_attr_einval:
628 log("Ignoring attr EINVAL;"
629 + " if you're not using ntfs-3g, please report: "
630 + path_msg(path) + '\n')
631 _warned_about_attr_einval = True
636 def _same_linux_attr(self, other):
637 """Return true or false to indicate similarity in the hardlink sense."""
638 return self.linux_attr == other.linux_attr
640 def _encode_linux_attr(self):
642 return vint.pack('V', self.linux_attr)
646 def _load_linux_attr_rec(self, port):
647 data = vint.read_bvec(port)
648 self.linux_attr = vint.unpack('V', data)[0]
650 def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
652 check_linux_file_attr_api()
653 if not set_linux_file_attr:
654 add_error("%s: can't restore linuxattrs: "
655 "linuxattr support missing.\n" % path_msg(path))
658 set_linux_file_attr(path, self.linux_attr)
660 if e.errno in (EACCES, ENOTTY, EOPNOTSUPP, ENOSYS):
661 raise ApplyError('Linux chattr: %s (0x%s)'
662 % (e, hex(self.linux_attr)))
663 elif e.errno == EINVAL:
664 msg = "if you're not using ntfs-3g, please report"
665 raise ApplyError('Linux chattr: %s (0x%s) (%s)'
666 % (e, hex(self.linux_attr), msg))
671 ## Linux extended attributes (getfattr(1), setfattr(1))
673 def _add_linux_xattr(self, path, st):
676 self.linux_xattr = xattr.get_all(path, nofollow=True)
677 except EnvironmentError as e:
678 if e.errno != errno.EOPNOTSUPP:
681 def _same_linux_xattr(self, other):
682 """Return true or false to indicate similarity in the hardlink sense."""
683 return self.linux_xattr == other.linux_xattr
685 def _encode_linux_xattr(self):
687 result = vint.pack('V', len(self.linux_xattr))
688 for name, value in self.linux_xattr:
689 result += vint.pack('ss', name, value)
694 def _load_linux_xattr_rec(self, file):
695 data = vint.read_bvec(file)
696 memfile = BytesIO(data)
698 for i in range(vint.read_vuint(memfile)):
699 key = vint.read_bvec(memfile)
700 value = vint.read_bvec(memfile)
701 result.append((key, value))
702 self.linux_xattr = result
704 def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
707 add_error("%s: can't restore xattr; xattr support missing.\n"
710 if not self.linux_xattr:
713 existing_xattrs = set(xattr.list(path, nofollow=True))
715 if e.errno == errno.EACCES:
716 raise ApplyError('xattr.set %r: %s' % (path_msg(path), e))
719 for k, v in self.linux_xattr:
720 if k not in existing_xattrs \
721 or v != xattr.get(path, k, nofollow=True):
723 xattr.set(path, k, v, nofollow=True)
725 if e.errno == errno.EPERM \
726 or e.errno == errno.EOPNOTSUPP:
727 raise ApplyError('xattr.set %r: %s' % (path_msg(path), e))
730 existing_xattrs -= frozenset([k])
731 for k in existing_xattrs:
733 xattr.remove(path, k, nofollow=True)
735 if e.errno in (errno.EPERM, errno.EACCES):
736 raise ApplyError('xattr.remove %r: %s' % (path_msg(path), e))
741 self.mode = self.uid = self.gid = self.user = self.group = None
742 self.atime = self.mtime = self.ctime = None
746 self.symlink_target = None
747 self.hardlink_target = None
748 self.linux_attr = None
749 self.linux_xattr = None
750 self.posix1e_acl = None
752 def __eq__(self, other):
753 if not isinstance(other, Metadata): return False
754 if self.mode != other.mode: return False
755 if self.mtime != other.mtime: return False
756 if self.ctime != other.ctime: return False
757 if self.atime != other.atime: return False
758 if self.path != other.path: return False
759 if self.uid != other.uid: return False
760 if self.gid != other.gid: return False
761 if self.size != other.size: return False
762 if self.user != other.user: return False
763 if self.group != other.group: return False
764 if self.symlink_target != other.symlink_target: return False
765 if self.hardlink_target != other.hardlink_target: return False
766 if self.linux_attr != other.linux_attr: return False
767 if self.posix1e_acl != other.posix1e_acl: return False
770 def __ne__(self, other):
771 return not self.__eq__(other)
774 return hash((self.mode,
785 self.hardlink_target,
790 result = ['<%s instance at %s' % (self.__class__, hex(id(self)))]
791 if self.path is not None:
792 result += ' path:' + repr(self.path)
793 if self.mode is not None:
794 result += ' mode: %o (%s)' % (self.mode, xstat.mode_str(self.mode))
795 if self.uid is not None:
796 result += ' uid:' + str(self.uid)
797 if self.gid is not None:
798 result += ' gid:' + str(self.gid)
799 if self.user is not None:
800 result += ' user:' + repr(self.user)
801 if self.group is not None:
802 result += ' group:' + repr(self.group)
803 if self.size is not None:
804 result += ' size:' + repr(self.size)
805 for name, val in (('atime', self.atime),
806 ('mtime', self.mtime),
807 ('ctime', self.ctime)):
809 result += ' %s:%r (%d)' \
811 strftime('%Y-%m-%d %H:%M %z',
812 gmtime(xstat.fstime_floor_secs(val))),
815 return ''.join(result)
817 def write(self, port, include_path=True):
818 records = include_path and [(_rec_tag_path, self._encode_path())] or []
819 records.extend([(_rec_tag_common_v3, self._encode_common()),
820 (_rec_tag_symlink_target,
821 self._encode_symlink_target()),
822 (_rec_tag_hardlink_target,
823 self._encode_hardlink_target()),
824 (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
825 (_rec_tag_linux_attr, self._encode_linux_attr()),
826 (_rec_tag_linux_xattr, self._encode_linux_xattr())])
827 for tag, data in records:
829 vint.write_vuint(port, tag)
830 vint.write_bvec(port, data)
831 vint.write_vuint(port, _rec_tag_end)
833 def encode(self, include_path=True):
835 self.write(port, include_path)
836 return port.getvalue()
839 return deepcopy(self)
843 # This method should either return a valid Metadata object,
844 # return None if there was no information at all (just a
845 # _rec_tag_end), throw EOFError if there was nothing at all to
846 # read, or throw an Exception if a valid object could not be
848 tag = vint.read_vuint(port)
849 if tag == _rec_tag_end:
851 try: # From here on, EOF is an error.
853 while True: # only exit is error (exception) or _rec_tag_end
854 if tag == _rec_tag_path:
855 result._load_path_rec(port)
856 elif tag == _rec_tag_common_v3:
857 result._load_common_rec(port, version=3)
858 elif tag == _rec_tag_common_v2:
859 result._load_common_rec(port, version=2)
860 elif tag == _rec_tag_symlink_target:
861 result._load_symlink_target_rec(port)
862 elif tag == _rec_tag_hardlink_target:
863 result._load_hardlink_target_rec(port)
864 elif tag == _rec_tag_posix1e_acl:
865 result._load_posix1e_acl_rec(port)
866 elif tag == _rec_tag_linux_attr:
867 result._load_linux_attr_rec(port)
868 elif tag == _rec_tag_linux_xattr:
869 result._load_linux_xattr_rec(port)
870 elif tag == _rec_tag_end:
872 elif tag == _rec_tag_common_v1: # Should be very rare.
873 result._load_common_rec(port, version=1)
874 else: # unknown record
876 tag = vint.read_vuint(port)
878 raise Exception("EOF while reading Metadata")
881 return stat.S_ISDIR(self.mode)
883 def create_path(self, path, create_symlinks=True):
884 self._create_via_common_rec(path, create_symlinks=create_symlinks)
886 def apply_to_path(self, path=None, restore_numeric_ids=False):
887 # apply metadata to path -- file must exist
891 raise Exception('Metadata.apply_to_path() called with no path')
892 if not self._recognized_file_type():
893 add_error('not applying metadata to "%s"' % path_msg(path)
894 + ' with unrecognized mode "0x%x"\n' % self.mode)
896 num_ids = restore_numeric_ids
897 for apply_metadata in (self._apply_common_rec,
898 self._apply_posix1e_acl_rec,
899 self._apply_linux_attr_rec,
900 self._apply_linux_xattr_rec):
902 apply_metadata(path, restore_numeric_ids=num_ids)
903 except ApplyError as e:
906 def same_file(self, other):
907 """Compare this to other for equivalency. Return true if
908 their information implies they could represent the same file
909 on disk, in the hardlink sense. Assume they're both regular
911 return self._same_common(other) \
912 and self._same_hardlink_target(other) \
913 and self._same_posix1e_acl(other) \
914 and self._same_linux_attr(other) \
915 and self._same_linux_xattr(other)
918 def from_path(path, statinfo=None, archive_path=None,
919 save_symlinks=True, hardlink_target=None,
921 """Return the metadata associated with the path. When normalized is
922 true, return the metadata appropriate for a typical save, which
923 may or may not be all of it."""
925 result.path = archive_path
926 st = statinfo or xstat.lstat(path)
927 result._add_common(path, st)
929 result._add_symlink_target(path, st)
930 result._add_hardlink_target(hardlink_target)
931 result._add_posix1e_acl(path, st)
932 result._add_linux_attr(path, st)
933 result._add_linux_xattr(path, st)
935 # Only store sizes for regular files and symlinks for now.
936 if not (stat.S_ISREG(result.mode) or stat.S_ISLNK(result.mode)):
941 def save_tree(output_file, paths,
947 # Issue top-level rewrite warnings.
949 safe_path = _clean_up_path_for_archive(path)
950 if safe_path != path:
951 log('archiving "%s" as "%s"\n'
952 % (path_msg(path), path_msg(safe_path)))
956 safe_path = _clean_up_path_for_archive(p)
958 if stat.S_ISDIR(st.st_mode):
960 m = from_path(p, statinfo=st, archive_path=safe_path,
961 save_symlinks=save_symlinks)
963 print(m.path, file=sys.stderr)
964 m.write(output_file, include_path=write_paths)
966 start_dir = os.getcwd()
968 for (p, st) in recursive_dirlist(paths, xdev=xdev):
969 dirlist_dir = os.getcwd()
971 safe_path = _clean_up_path_for_archive(p)
972 m = from_path(p, statinfo=st, archive_path=safe_path,
973 save_symlinks=save_symlinks)
975 print(m.path, file=sys.stderr)
976 m.write(output_file, include_path=write_paths)
977 os.chdir(dirlist_dir)
982 def _set_up_path(meta, create_symlinks=True):
983 # Allow directories to exist as a special case -- might have
984 # been created by an earlier longer path.
988 parent = os.path.dirname(meta.path)
991 meta.create_path(meta.path, create_symlinks=create_symlinks)
994 all_fields = frozenset(['path',
1011 def summary_bytes(meta, numeric_ids = False, classification = None,
1012 human_readable = False):
1013 """Return bytes containing the "ls -l" style listing for meta.
1014 Classification may be "all", "type", or None."""
1015 user_str = group_str = size_or_dev_str = '?'
1016 symlink_target = None
1019 mode_str = xstat.mode_str(meta.mode).encode('ascii')
1020 symlink_target = meta.symlink_target
1021 mtime_secs = xstat.fstime_floor_secs(meta.mtime)
1022 mtime_str = strftime('%Y-%m-%d %H:%M',
1023 time.localtime(mtime_secs)).encode('ascii')
1024 if meta.user and not numeric_ids:
1025 user_str = meta.user
1026 elif meta.uid != None:
1027 user_str = str(meta.uid).encode()
1028 if meta.group and not numeric_ids:
1029 group_str = meta.group
1030 elif meta.gid != None:
1031 group_str = str(meta.gid).encode()
1032 if stat.S_ISCHR(meta.mode) or stat.S_ISBLK(meta.mode):
1034 size_or_dev_str = ('%d,%d' % (os.major(meta.rdev),
1035 os.minor(meta.rdev))).encode()
1036 elif meta.size != None:
1038 size_or_dev_str = format_filesize(meta.size).encode()
1040 size_or_dev_str = str(meta.size).encode()
1042 size_or_dev_str = b'-'
1044 classification_str = \
1045 xstat.classification_str(meta.mode,
1046 classification == 'all').encode()
1048 mode_str = b'?' * 10
1049 mtime_str = b'????-??-?? ??:??'
1050 classification_str = b'?'
1054 name += classification_str
1056 name += b' -> ' + meta.symlink_target
1058 return b'%-10s %-11s %11s %16s %s' % (mode_str,
1059 user_str + b'/' + group_str,
1065 def detailed_bytes(meta, fields = None):
1066 # FIXME: should optional fields be omitted, or empty i.e. "rdev:
1067 # 0", "link-target:", etc.
1072 if 'path' in fields:
1073 path = meta.path or b''
1074 result.append(b'path: ' + path)
1075 if 'mode' in fields:
1076 result.append(b'mode: %o (%s)'
1077 % (meta.mode, xstat.mode_str(meta.mode).encode('ascii')))
1078 if 'link-target' in fields and stat.S_ISLNK(meta.mode):
1079 result.append(b'link-target: ' + meta.symlink_target)
1080 if 'rdev' in fields:
1082 result.append(b'rdev: %d,%d' % (os.major(meta.rdev),
1083 os.minor(meta.rdev)))
1085 result.append(b'rdev: 0')
1086 if 'size' in fields and meta.size is not None:
1087 result.append(b'size: %d' % meta.size)
1089 result.append(b'uid: %d' % meta.uid)
1091 result.append(b'gid: %d' % meta.gid)
1092 if 'user' in fields:
1093 result.append(b'user: ' + meta.user)
1094 if 'group' in fields:
1095 result.append(b'group: ' + meta.group)
1096 if 'atime' in fields:
1097 # If we don't have xstat.lutime, that means we have to use
1098 # utime(), and utime() has no way to set the mtime/atime of a
1099 # symlink. Thus, the mtime/atime of a symlink is meaningless,
1100 # so let's not report it. (That way scripts comparing
1101 # before/after won't trigger.)
1102 if xstat.lutime or not stat.S_ISLNK(meta.mode):
1103 result.append(b'atime: ' + xstat.fstime_to_sec_bytes(meta.atime))
1105 result.append(b'atime: 0')
1106 if 'mtime' in fields:
1107 if xstat.lutime or not stat.S_ISLNK(meta.mode):
1108 result.append(b'mtime: ' + xstat.fstime_to_sec_bytes(meta.mtime))
1110 result.append(b'mtime: 0')
1111 if 'ctime' in fields:
1112 result.append(b'ctime: ' + xstat.fstime_to_sec_bytes(meta.ctime))
1113 if 'linux-attr' in fields and meta.linux_attr:
1114 result.append(b'linux-attr: %x' % meta.linux_attr)
1115 if 'linux-xattr' in fields and meta.linux_xattr:
1116 for name, value in meta.linux_xattr:
1117 result.append(b'linux-xattr: %s -> %s' % (name, value))
1118 if 'posix1e-acl' in fields and meta.posix1e_acl:
1119 acl = meta.posix1e_acl[0]
1120 result.append(b'posix1e-acl: ' + acl + b'\n')
1121 if stat.S_ISDIR(meta.mode):
1122 def_acl = meta.posix1e_acl[2]
1123 result.append(b'posix1e-acl-default: ' + def_acl + b'\n')
1124 return b'\n'.join(result)
1127 class _ArchiveIterator:
1130 return Metadata.read(self._file)
1132 raise StopIteration()
1139 def __init__(self, file):
1143 def display_archive(file, out):
1146 for meta in _ArchiveIterator(file):
1149 out.write(detailed_bytes(meta))
1153 for meta in _ArchiveIterator(file):
1154 out.write(summary_bytes(meta))
1157 for meta in _ArchiveIterator(file):
1159 log('bup: no metadata path, but asked to only display path'
1160 ' (increase verbosity?)')
1162 out.write(meta.path)
1166 def start_extract(file, create_symlinks=True):
1167 for meta in _ArchiveIterator(file):
1168 if not meta: # Hit end record.
1171 print(path_msg(meta.path), file=sys.stderr)
1172 xpath = _clean_up_extract_path(meta.path)
1174 add_error(Exception('skipping risky path "%s"'
1175 % path_msg(meta.path)))
1178 _set_up_path(meta, create_symlinks=create_symlinks)
1181 def finish_extract(file, restore_numeric_ids=False):
1183 for meta in _ArchiveIterator(file):
1184 if not meta: # Hit end record.
1186 xpath = _clean_up_extract_path(meta.path)
1188 add_error(Exception('skipping risky path "%s"'
1189 % path_msg(dir.path)))
1191 if os.path.isdir(meta.path):
1192 all_dirs.append(meta)
1195 print(path_msg(meta.path), file=sys.stderr)
1196 meta.apply_to_path(path=xpath,
1197 restore_numeric_ids=restore_numeric_ids)
1198 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
1199 for dir in all_dirs:
1200 # Don't need to check xpath -- won't be in all_dirs if not OK.
1201 xpath = _clean_up_extract_path(dir.path)
1203 print(path_msg(dir.path), file=sys.stderr)
1204 dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids)
1207 def extract(file, restore_numeric_ids=False, create_symlinks=True):
1208 # For now, just store all the directories and handle them last,
1211 for meta in _ArchiveIterator(file):
1212 if not meta: # Hit end record.
1214 xpath = _clean_up_extract_path(meta.path)
1216 add_error(Exception('skipping risky path "%s"'
1217 % path_msg(meta.path)))
1221 print('+', path_msg(meta.path), file=sys.stderr)
1222 _set_up_path(meta, create_symlinks=create_symlinks)
1223 if os.path.isdir(meta.path):
1224 all_dirs.append(meta)
1227 print('=', path_msg(meta.path), file=sys.stderr)
1228 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
1229 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
1230 for dir in all_dirs:
1231 # Don't need to check xpath -- won't be in all_dirs if not OK.
1232 xpath = _clean_up_extract_path(dir.path)
1234 print('=', path_msg(xpath), file=sys.stderr)
1235 # Shouldn't have to check for risky paths here (omitted above).
1236 dir.apply_to_path(path=dir.path,
1237 restore_numeric_ids=restore_numeric_ids)