1 """Metadata read/write support for bup."""
3 # Copyright (C) 2010 Rob Browning
5 # This code is covered under the terms of the GNU Library General
6 # Public License as described in the bup LICENSE file.
7 import errno, os, sys, stat, pwd, grp, struct, re
8 from cStringIO import StringIO
9 from bup import vint, xstat
10 from bup.drecurse import recursive_dirlist
11 from bup.helpers import add_error, mkdirp, log, is_superuser
12 from bup.xstat import utime, lutime, lstat
13 import bup._helpers as _helpers
18 log('Warning: Linux xattr support missing; install python-pyxattr.\n')
23 except AttributeError:
24 log('Warning: python-xattr module is too old; '
25 'install python-pyxattr instead.\n')
30 log('Warning: POSIX ACL support missing; install python-pylibacl.\n')
33 from bup._helpers import get_linux_file_attr, set_linux_file_attr
35 # No need for a warning here; the only reason they won't exist is that we're
36 # not on Linux, in which case files don't have any linux attrs anyway, so
37 # lacking the functions isn't a problem.
38 get_linux_file_attr = set_linux_file_attr = None
41 # WARNING: the metadata encoding is *not* stable yet. Caveat emptor!
43 # Q: Consider hardlink support?
44 # Q: Is it OK to store raw linux attr (chattr) flags?
45 # Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
46 # Q: Is the application of posix1e has_extended() correct?
47 # Q: Is one global --numeric-ids argument sufficient?
48 # Q: Do nfsv4 acls trump posix1e acls? (seems likely)
49 # Q: Add support for crtime -- ntfs, and (only internally?) ext*?
51 # FIXME: Fix relative/abs path detection/stripping wrt other platforms.
52 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
53 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
54 # FIXME: Consider pack('vvvvsss', ...) optimization.
55 # FIXME: Consider caching users/groups.
59 # osx (varies between hfs and hfs+):
60 # type - regular dir char block fifo socket ...
61 # perms - rwxrwxrwxsgt
62 # times - ctime atime mtime
65 # hard-link-info (hfs+ only)
68 # attributes-osx see chflags
74 # type - regular dir ...
75 # times - creation, modification, posix change, access
78 # attributes - see attrib
80 # forks (alternate data streams)
84 # type - regular dir ...
85 # perms - rwxrwxrwx (maybe - see wikipedia)
86 # times - creation, modification, access
87 # attributes - see attrib
91 _have_lchmod = hasattr(os, 'lchmod')
94 def _clean_up_path_for_archive(p):
95 # Not the most efficient approach.
98 # Take everything after any '/../'.
99 pos = result.rfind('/../')
101 result = result[result.rfind('/../') + 4:]
103 # Take everything after any remaining '../'.
104 if result.startswith("../"):
107 # Remove any '/./' sequences.
108 pos = result.find('/./')
110 result = result[0:pos] + '/' + result[pos + 3:]
111 pos = result.find('/./')
113 # Remove any leading '/'s.
114 result = result.lstrip('/')
116 # Replace '//' with '/' everywhere.
117 pos = result.find('//')
119 result = result[0:pos] + '/' + result[pos + 2:]
120 pos = result.find('//')
122 # Take everything after any remaining './'.
123 if result.startswith('./'):
126 # Take everything before any remaining '/.'.
127 if result.endswith('/.'):
130 if result == '' or result.endswith('/..'):
137 if p.startswith('/'):
139 if p.find('/../') != -1:
141 if p.startswith('../'):
143 if p.endswith('/..'):
148 def _clean_up_extract_path(p):
149 result = p.lstrip('/')
152 elif _risky_path(result):
158 # These tags are currently conceptually private to Metadata, and they
159 # must be unique, and must *never* be changed.
162 _rec_tag_common = 2 # times, owner, group, type, perms, etc.
163 _rec_tag_symlink_target = 3
164 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc.
165 _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e acls?
166 _rec_tag_linux_attr = 6 # lsattr(1) chattr(1)
167 _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1)
170 class ApplyError(Exception):
171 # Thrown when unable to apply any given bit of metadata to a path.
176 # Metadata is stored as a sequence of tagged binary records. Each
177 # record will have some subset of add, encode, load, create, and
178 # apply methods, i.e. _add_foo...
182 # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
183 # must be non-negative and < 10**9.
185 def _add_common(self, path, st):
186 self.mode = st.st_mode
189 self.rdev = st.st_rdev
190 self.atime = st.st_atime
191 self.mtime = st.st_mtime
192 self.ctime = st.st_ctime
193 self.owner = self.group = ''
195 self.owner = pwd.getpwuid(st.st_uid)[0]
197 add_error("no user name for id %s '%s'" % (st.st_gid, path))
199 self.group = grp.getgrgid(st.st_gid)[0]
201 add_error("no group name for id %s '%s'" % (st.st_gid, path))
203 def _encode_common(self):
204 atime = xstat.nsecs_to_timespec(self.atime)
205 mtime = xstat.nsecs_to_timespec(self.mtime)
206 ctime = xstat.nsecs_to_timespec(self.ctime)
207 result = vint.pack('VVsVsVvVvVvV',
222 def _load_common_rec(self, port):
223 data = vint.read_bvec(port)
235 ctime_ns) = vint.unpack('VVsVsVvVvVvV', data)
236 self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns))
237 self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns))
238 self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns))
240 def _recognized_file_type(self):
241 return stat.S_ISREG(self.mode) \
242 or stat.S_ISDIR(self.mode) \
243 or stat.S_ISCHR(self.mode) \
244 or stat.S_ISBLK(self.mode) \
245 or stat.S_ISFIFO(self.mode) \
246 or stat.S_ISSOCK(self.mode) \
247 or stat.S_ISLNK(self.mode)
249 def _create_via_common_rec(self, path, create_symlinks=True):
250 # If the path already exists and is a dir, try rmdir.
251 # If the path already exists and is anything else, try unlink.
254 st = xstat.lstat(path)
256 if e.errno != errno.ENOENT:
259 if stat.S_ISDIR(st.st_mode):
263 if e.errno == errno.ENOTEMPTY:
264 msg = 'refusing to overwrite non-empty dir ' + path
270 if stat.S_ISREG(self.mode):
271 assert(self._recognized_file_type())
272 fd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL, 0600)
274 elif stat.S_ISDIR(self.mode):
275 assert(self._recognized_file_type())
277 elif stat.S_ISCHR(self.mode):
278 assert(self._recognized_file_type())
279 os.mknod(path, 0600 | stat.S_IFCHR, self.rdev)
280 elif stat.S_ISBLK(self.mode):
281 assert(self._recognized_file_type())
282 os.mknod(path, 0600 | stat.S_IFBLK, self.rdev)
283 elif stat.S_ISFIFO(self.mode):
284 assert(self._recognized_file_type())
285 os.mknod(path, 0600 | stat.S_IFIFO)
286 elif stat.S_ISSOCK(self.mode):
287 os.mknod(path, 0600 | stat.S_IFSOCK)
288 elif stat.S_ISLNK(self.mode):
289 assert(self._recognized_file_type())
290 if self.symlink_target and create_symlinks:
291 # on MacOS, symlink() permissions depend on umask, and there's
292 # no way to chown a symlink after creating it, so we have to
294 oldumask = os.umask((self.mode & 0777) ^ 0777)
296 os.symlink(self.symlink_target, path)
299 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
301 assert(not self._recognized_file_type())
302 add_error('not creating "%s" with unrecognized mode "0x%x"\n'
305 def _apply_common_rec(self, path, restore_numeric_ids=False):
306 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
307 # EACCES errors at this stage are fatal for the current path.
308 if lutime and stat.S_ISLNK(self.mode):
310 lutime(path, (self.atime, self.mtime))
312 if e.errno == errno.EACCES:
313 raise ApplyError('lutime: %s' % e)
318 utime(path, (self.atime, self.mtime))
320 if e.errno == errno.EACCES:
321 raise ApplyError('utime: %s' % e)
325 # Don't try to restore owner unless we're root, and even
326 # if asked, don't try to restore the owner or group if
327 # it doesn't exist in the system db.
330 if not restore_numeric_ids:
333 add_error('ignoring missing owner for "%s"\n' % path)
335 if not is_superuser():
336 uid = -1 # Not root; assume we can't change owner.
339 uid = pwd.getpwnam(self.owner)[2]
342 fmt = 'ignoring unknown owner %s for "%s"\n'
343 add_error(fmt % (self.owner, path))
346 add_error('ignoring missing group for "%s"\n' % path)
349 gid = grp.getgrnam(self.group)[2]
352 add_error('ignoring unknown group %s for "%s"\n'
353 % (self.group, path))
356 os.lchown(path, uid, gid)
358 if e.errno == errno.EPERM:
359 add_error('lchown: %s' % e)
364 os.lchmod(path, stat.S_IMODE(self.mode))
365 elif not stat.S_ISLNK(self.mode):
366 os.chmod(path, stat.S_IMODE(self.mode))
371 def _encode_path(self):
373 return vint.pack('s', self.path)
377 def _load_path_rec(self, port):
378 self.path = vint.unpack('s', vint.read_bvec(port))[0]
383 def _add_symlink_target(self, path, st):
385 if stat.S_ISLNK(st.st_mode):
386 self.symlink_target = os.readlink(path)
388 add_error('readlink: %s', e)
390 def _encode_symlink_target(self):
391 return self.symlink_target
393 def _load_symlink_target_rec(self, port):
394 self.symlink_target = vint.read_bvec(port)
397 ## POSIX1e ACL records
399 # Recorded as a list:
400 # [txt_id_acl, num_id_acl]
401 # or, if a directory:
402 # [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
403 # The numeric/text distinction only matters when reading/restoring
405 def _add_posix1e_acl(self, path, st):
406 if not posix1e: return
407 if not stat.S_ISLNK(st.st_mode):
409 if posix1e.has_extended(path):
410 acl = posix1e.ACL(file=path)
411 self.posix1e_acl = [acl, acl] # txt and num are the same
412 if stat.S_ISDIR(st.st_mode):
413 acl = posix1e.ACL(filedef=path)
414 self.posix1e_acl.extend([acl, acl])
415 except EnvironmentError, e:
416 if e.errno != errno.EOPNOTSUPP:
419 def _encode_posix1e_acl(self):
420 # Encode as two strings (w/default ACL string possibly empty).
422 acls = self.posix1e_acl
423 txt_flags = posix1e.TEXT_ABBREVIATE
424 num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
425 acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
426 acls[1].to_any_text('', '\n', num_flags)]
430 acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
431 acl_reps.append(acls[3].to_any_text('', '\n', num_flags))
432 return vint.pack('ssss',
433 acl_reps[0], acl_reps[1], acl_reps[2], acl_reps[3])
437 def _load_posix1e_acl_rec(self, port):
438 data = vint.read_bvec(port)
439 acl_reps = vint.unpack('ssss', data)
440 if acl_reps[2] == '':
441 acl_reps = acl_reps[:2]
442 self.posix1e_acl = [posix1e.ACL(text=x) for x in acl_reps]
444 def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
447 add_error("%s: can't restore ACLs; posix1e support missing.\n"
451 acls = self.posix1e_acl
453 if restore_numeric_ids:
454 acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
456 acls[2].applyto(path, posix1e.ACL_TYPE_DEFAULT)
457 if restore_numeric_ids:
458 acls[1].applyto(path, posix1e.ACL_TYPE_ACCESS)
460 acls[0].applyto(path, posix1e.ACL_TYPE_ACCESS)
463 ## Linux attributes (lsattr(1), chattr(1))
465 def _add_linux_attr(self, path, st):
466 if not get_linux_file_attr: return
467 if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
469 attr = get_linux_file_attr(path)
471 self.linux_attr = attr
473 if e.errno == errno.EACCES:
474 add_error('read Linux attr: %s' % e)
475 elif e.errno == errno.ENOTTY or e.errno == errno.ENOSYS:
476 # ENOTTY: Function not implemented.
477 # ENOSYS: Inappropriate ioctl for device.
478 # Assume filesystem doesn't support attrs.
483 def _encode_linux_attr(self):
485 return vint.pack('V', self.linux_attr)
489 def _load_linux_attr_rec(self, port):
490 data = vint.read_bvec(port)
491 self.linux_attr = vint.unpack('V', data)[0]
493 def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
495 if not set_linux_file_attr:
496 add_error("%s: can't restore linuxattrs: "
497 "linuxattr support missing.\n" % path)
499 set_linux_file_attr(path, self.linux_attr)
502 ## Linux extended attributes (getfattr(1), setfattr(1))
504 def _add_linux_xattr(self, path, st):
507 self.linux_xattr = xattr.get_all(path, nofollow=True)
508 except EnvironmentError, e:
509 if e.errno != errno.EOPNOTSUPP:
512 def _encode_linux_xattr(self):
514 result = vint.pack('V', len(self.linux_xattr))
515 for name, value in self.linux_xattr:
516 result += vint.pack('ss', name, value)
521 def _load_linux_xattr_rec(self, file):
522 data = vint.read_bvec(file)
523 memfile = StringIO(data)
525 for i in range(vint.read_vuint(memfile)):
526 key = vint.read_bvec(memfile)
527 value = vint.read_bvec(memfile)
528 result.append((key, value))
529 self.linux_xattr = result
531 def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
534 add_error("%s: can't restore xattr; xattr support missing.\n"
537 existing_xattrs = set(xattr.list(path, nofollow=True))
539 for k, v in self.linux_xattr:
540 if k not in existing_xattrs \
541 or v != xattr.get(path, k, nofollow=True):
543 xattr.set(path, k, v, nofollow=True)
545 if e.errno == errno.EPERM:
546 raise ApplyError('xattr.set: %s' % e)
549 existing_xattrs -= frozenset([k])
550 for k in existing_xattrs:
552 xattr.remove(path, k, nofollow=True)
554 if e.errno == errno.EPERM:
555 raise ApplyError('xattr.remove: %s' % e)
563 self.symlink_target = None
564 self.linux_attr = None
565 self.linux_xattr = None
566 self.posix1e_acl = None
567 self.posix1e_acl_default = None
569 def write(self, port, include_path=True):
570 records = include_path and [(_rec_tag_path, self._encode_path())] or []
571 records.extend([(_rec_tag_common, self._encode_common()),
572 (_rec_tag_symlink_target, self._encode_symlink_target()),
573 (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
574 (_rec_tag_linux_attr, self._encode_linux_attr()),
575 (_rec_tag_linux_xattr, self._encode_linux_xattr())])
576 for tag, data in records:
578 vint.write_vuint(port, tag)
579 vint.write_bvec(port, data)
580 vint.write_vuint(port, _rec_tag_end)
584 # This method should either: return a valid Metadata object;
585 # throw EOFError if there was nothing at all to read; throw an
586 # Exception if a valid object could not be read completely.
587 tag = vint.read_vuint(port)
588 try: # From here on, EOF is an error.
590 while True: # only exit is error (exception) or _rec_tag_end
591 if tag == _rec_tag_path:
592 result._load_path_rec(port)
593 elif tag == _rec_tag_common:
594 result._load_common_rec(port)
595 elif tag == _rec_tag_symlink_target:
596 result._load_symlink_target_rec(port)
597 elif tag == _rec_tag_posix1e_acl:
598 result._load_posix1e_acl_rec(port)
599 elif tag ==_rec_tag_nfsv4_acl:
600 result._load_nfsv4_acl_rec(port)
601 elif tag == _rec_tag_linux_attr:
602 result._load_linux_attr_rec(port)
603 elif tag == _rec_tag_linux_xattr:
604 result._load_linux_xattr_rec(port)
605 elif tag == _rec_tag_end:
607 else: # unknown record
609 tag = vint.read_vuint(port)
611 raise Exception("EOF while reading Metadata")
614 return stat.S_ISDIR(self.mode)
616 def create_path(self, path, create_symlinks=True):
617 self._create_via_common_rec(path, create_symlinks=create_symlinks)
619 def apply_to_path(self, path=None, restore_numeric_ids=False):
620 # apply metadata to path -- file must exist
624 raise Exception('Metadata.apply_to_path() called with no path');
625 if not self._recognized_file_type():
626 add_error('not applying metadata to "%s"' % path
627 + ' with unrecognized mode "0x%x"\n' % self.mode)
629 num_ids = restore_numeric_ids
631 self._apply_common_rec(path, restore_numeric_ids=num_ids)
632 self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
633 self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
634 self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
635 except ApplyError, e:
639 def from_path(path, statinfo=None, archive_path=None, save_symlinks=True):
641 result.path = archive_path
642 st = statinfo or xstat.lstat(path)
643 result.size = st.st_size
644 result._add_common(path, st)
646 result._add_symlink_target(path, st)
647 result._add_posix1e_acl(path, st)
648 result._add_linux_attr(path, st)
649 result._add_linux_xattr(path, st)
653 def save_tree(output_file, paths,
659 # Issue top-level rewrite warnings.
661 safe_path = _clean_up_path_for_archive(path)
662 if safe_path != path:
663 log('archiving "%s" as "%s"\n' % (path, safe_path))
665 start_dir = os.getcwd()
667 for (p, st) in recursive_dirlist(paths, xdev=xdev):
668 dirlist_dir = os.getcwd()
670 safe_path = _clean_up_path_for_archive(p)
671 m = from_path(p, statinfo=st, archive_path=safe_path,
672 save_symlinks=save_symlinks)
674 print >> sys.stderr, m.path
675 m.write(output_file, include_path=write_paths)
676 os.chdir(dirlist_dir)
681 def _set_up_path(meta, create_symlinks=True):
682 # Allow directories to exist as a special case -- might have
683 # been created by an earlier longer path.
687 parent = os.path.dirname(meta.path)
690 meta.create_path(meta.path, create_symlinks=create_symlinks)
693 class _ArchiveIterator:
696 return Metadata.read(self._file)
698 raise StopIteration()
703 def __init__(self, file):
707 def display_archive(file):
708 for meta in _ArchiveIterator(file):
710 print meta.path # FIXME
715 def start_extract(file, create_symlinks=True):
716 for meta in _ArchiveIterator(file):
718 print >> sys.stderr, meta.path
719 xpath = _clean_up_extract_path(meta.path)
721 add_error(Exception('skipping risky path "%s"' % meta.path))
724 _set_up_path(meta, create_symlinks=create_symlinks)
727 def finish_extract(file, restore_numeric_ids=False):
729 for meta in _ArchiveIterator(file):
730 xpath = _clean_up_extract_path(meta.path)
732 add_error(Exception('skipping risky path "%s"' % dir.path))
734 if os.path.isdir(meta.path):
735 all_dirs.append(meta)
738 print >> sys.stderr, meta.path
739 meta.apply_to_path(path=xpath,
740 restore_numeric_ids=restore_numeric_ids)
741 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
743 # Don't need to check xpath -- won't be in all_dirs if not OK.
744 xpath = _clean_up_extract_path(dir.path)
746 print >> sys.stderr, dir.path
747 dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids)
750 def extract(file, restore_numeric_ids=False, create_symlinks=True):
751 # For now, just store all the directories and handle them last,
754 for meta in _ArchiveIterator(file):
755 xpath = _clean_up_extract_path(meta.path)
757 add_error(Exception('skipping risky path "%s"' % meta.path))
761 print >> sys.stderr, '+', meta.path
762 _set_up_path(meta, create_symlinks=create_symlinks)
763 if os.path.isdir(meta.path):
764 all_dirs.append(meta)
767 print >> sys.stderr, '=', meta.path
768 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
769 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
771 # Don't need to check xpath -- won't be in all_dirs if not OK.
772 xpath = _clean_up_extract_path(dir.path)
774 print >> sys.stderr, '=', xpath
775 # Shouldn't have to check for risky paths here (omitted above).
776 dir.apply_to_path(path=dir.path,
777 restore_numeric_ids=restore_numeric_ids)