1 """Metadata read/write support for bup."""
3 # Copyright (C) 2010 Rob Browning
5 # This code is covered under the terms of the GNU Library General
6 # Public License as described in the bup LICENSE file.
7 import errno, os, sys, stat, pwd, grp, struct, re
8 from cStringIO import StringIO
10 from bup.drecurse import recursive_dirlist
11 from bup.helpers import add_error, mkdirp, log
12 from bup.xstat import utime, lutime, lstat, FSTime
13 import bup._helpers as _helpers
18 log('Warning: Linux xattr support missing; install python-pyxattr.\n')
23 log('Warning: POSIX ACL support missing; install python-pylibacl.\n')
25 if _helpers.get_linux_file_attr:
26 from bup._helpers import get_linux_file_attr, set_linux_file_attr
28 # WARNING: the metadata encoding is *not* stable yet. Caveat emptor!
30 # Q: Consider hardlink support?
31 # Q: Is it OK to store raw linux attr (chattr) flags?
32 # Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
33 # Q: Is the application of posix1e has_extended() correct?
34 # Q: Is one global --numeric-ids argument sufficient?
35 # Q: Do nfsv4 acls trump posix1e acls? (seems likely)
36 # Q: Add support for crtime -- ntfs, and (only internally?) ext*?
38 # FIXME: Fix relative/abs path detection/stripping wrt other platforms.
39 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
40 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
41 # FIXME: Consider pack('vvvvsss', ...) optimization.
42 # FIXME: Consider caching users/groups.
46 # osx (varies between hfs and hfs+):
47 # type - regular dir char block fifo socket ...
48 # perms - rwxrwxrwxsgt
49 # times - ctime atime mtime
52 # hard-link-info (hfs+ only)
55 # attributes-osx see chflags
61 # type - regular dir ...
62 # times - creation, modification, posix change, access
65 # attributes - see attrib
67 # forks (alternate data streams)
71 # type - regular dir ...
72 # perms - rwxrwxrwx (maybe - see wikipedia)
73 # times - creation, modification, access
74 # attributes - see attrib
78 _have_lchmod = hasattr(os, 'lchmod')
81 def _clean_up_path_for_archive(p):
82 # Not the most efficient approach.
85 # Take everything after any '/../'.
86 pos = result.rfind('/../')
88 result = result[result.rfind('/../') + 4:]
90 # Take everything after any remaining '../'.
91 if result.startswith("../"):
94 # Remove any '/./' sequences.
95 pos = result.find('/./')
97 result = result[0:pos] + '/' + result[pos + 3:]
98 pos = result.find('/./')
100 # Remove any leading '/'s.
101 result = result.lstrip('/')
103 # Replace '//' with '/' everywhere.
104 pos = result.find('//')
106 result = result[0:pos] + '/' + result[pos + 2:]
107 pos = result.find('//')
109 # Take everything after any remaining './'.
110 if result.startswith('./'):
113 # Take everything before any remaining '/.'.
114 if result.endswith('/.'):
117 if result == '' or result.endswith('/..'):
124 if p.startswith('/'):
126 if p.find('/../') != -1:
128 if p.startswith('../'):
130 if p.endswith('/..'):
135 def _clean_up_extract_path(p):
136 result = p.lstrip('/')
139 elif _risky_path(result):
145 # These tags are currently conceptually private to Metadata, and they
146 # must be unique, and must *never* be changed.
149 _rec_tag_common = 2 # times, owner, group, type, perms, etc.
150 _rec_tag_symlink_target = 3
151 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc.
152 _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e acls?
153 _rec_tag_linux_attr = 6 # lsattr(1) chattr(1)
154 _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1)
157 class ApplyError(Exception):
158 # Thrown when unable to apply any given bit of metadata to a path.
163 # Metadata is stored as a sequence of tagged binary records. Each
164 # record will have some subset of add, encode, load, create, and
165 # apply methods, i.e. _add_foo...
169 # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
170 # must be non-negative and < 10**9.
172 def _add_common(self, path, st):
173 self.mode = st.st_mode
176 self.rdev = st.st_rdev
177 self.atime = st.st_atime
178 self.mtime = st.st_mtime
179 self.ctime = st.st_ctime
180 self.owner = self.group = ''
182 self.owner = pwd.getpwuid(st.st_uid)[0]
184 add_error("no user name for id %s '%s'" % (st.st_gid, path))
186 self.group = grp.getgrgid(st.st_gid)[0]
188 add_error("no group name for id %s '%s'" % (st.st_gid, path))
190 def _encode_common(self):
191 atime = self.atime.to_timespec()
192 mtime = self.mtime.to_timespec()
193 ctime = self.ctime.to_timespec()
194 result = vint.pack('VVsVsVvVvVvV',
209 def _load_common_rec(self, port):
210 data = vint.read_bvec(port)
222 ctime_ns) = vint.unpack('VVsVsVvVvVvV', data)
223 self.atime = FSTime.from_timespec((self.atime, atime_ns))
224 self.mtime = FSTime.from_timespec((self.mtime, mtime_ns))
225 self.ctime = FSTime.from_timespec((self.ctime, ctime_ns))
227 def _create_via_common_rec(self, path, create_symlinks=True):
228 # If the path already exists and is a dir, try rmdir.
229 # If the path already exists and is anything else, try unlink.
234 if e.errno != errno.ENOENT:
237 if stat.S_ISDIR(st.st_mode):
241 if e.errno == errno.ENOTEMPTY:
242 msg = 'refusing to overwrite non-empty dir' + path
248 if stat.S_ISREG(self.mode):
249 os.mknod(path, 0600 | stat.S_IFREG)
250 elif stat.S_ISDIR(self.mode):
252 elif stat.S_ISCHR(self.mode):
253 os.mknod(path, 0600 | stat.S_IFCHR, self.rdev)
254 elif stat.S_ISBLK(self.mode):
255 os.mknod(path, 0600 | stat.S_IFBLK, self.rdev)
256 elif stat.S_ISFIFO(self.mode):
257 os.mknod(path, 0600 | stat.S_IFIFO)
258 elif stat.S_ISLNK(self.mode):
259 if self.symlink_target and create_symlinks:
260 os.symlink(self.symlink_target, path)
261 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
262 # Otherwise, do nothing.
264 def _apply_common_rec(self, path, restore_numeric_ids=False):
265 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
266 # EACCES errors at this stage are fatal for the current path.
267 if stat.S_ISLNK(self.mode):
269 lutime(path, (self.atime, self.mtime))
271 if e.errno == errno.EACCES:
272 raise ApplyError('lutime: %s' % e)
277 utime(path, (self.atime, self.mtime))
279 if e.errno == errno.EACCES:
280 raise ApplyError('utime: %s' % e)
284 # Don't try to restore owner unless we're root, and even
285 # if asked, don't try to restore the owner or group if
286 # it doesn't exist in the system db.
289 if not restore_numeric_ids:
292 add_error('ignoring missing owner for "%s"\n' % path)
294 if os.geteuid() != 0:
295 uid = -1 # Not root; assume we can't change owner.
298 uid = pwd.getpwnam(self.owner)[2]
301 fmt = 'ignoring unknown owner %s for "%s"\n'
302 add_error(fmt % (self.owner, path))
305 add_error('ignoring missing group for "%s"\n' % path)
308 gid = grp.getgrnam(self.group)[2]
311 add_error('ignoring unknown group %s for "%s"\n'
312 % (self.group, path))
315 os.lchown(path, uid, gid)
317 if e.errno == errno.EPERM:
318 add_error('lchown: %s' % e)
323 os.lchmod(path, stat.S_IMODE(self.mode))
324 elif not stat.S_ISLNK(self.mode):
325 os.chmod(path, stat.S_IMODE(self.mode))
330 def _encode_path(self):
332 return vint.pack('s', self.path)
336 def _load_path_rec(self, port):
337 self.path = vint.unpack('s', vint.read_bvec(port))[0]
342 def _add_symlink_target(self, path, st):
344 if stat.S_ISLNK(st.st_mode):
345 self.symlink_target = os.readlink(path)
347 add_error('readlink: %s', e)
349 def _encode_symlink_target(self):
350 return self.symlink_target
352 def _load_symlink_target_rec(self, port):
353 self.symlink_target = vint.read_bvec(port)
356 ## POSIX1e ACL records
358 # Recorded as a list:
359 # [txt_id_acl, num_id_acl]
360 # or, if a directory:
361 # [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
362 # The numeric/text distinction only matters when reading/restoring
364 def _add_posix1e_acl(self, path, st):
365 if not posix1e: return
366 if not stat.S_ISLNK(st.st_mode):
368 if posix1e.has_extended(path):
369 acl = posix1e.ACL(file=path)
370 self.posix1e_acl = [acl, acl] # txt and num are the same
371 if stat.S_ISDIR(st.st_mode):
372 acl = posix1e.ACL(filedef=path)
373 self.posix1e_acl.extend([acl, acl])
374 except EnvironmentError, e:
375 if e.errno != errno.EOPNOTSUPP:
378 def _encode_posix1e_acl(self):
379 # Encode as two strings (w/default ACL string possibly empty).
381 acls = self.posix1e_acl
382 txt_flags = posix1e.TEXT_ABBREVIATE
383 num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
384 acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
385 acls[1].to_any_text('', '\n', num_flags)]
389 acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
390 acl_reps.append(acls[3].to_any_text('', '\n', num_flags))
391 return vint.pack('ssss',
392 acl_reps[0], acl_reps[1], acl_reps[2], acl_reps[3])
396 def _load_posix1e_acl_rec(self, port):
397 data = vint.read_bvec(port)
398 acl_reps = vint.unpack('ssss', data)
399 if acl_reps[2] == '':
400 acl_reps = acl_reps[:2]
401 self.posix1e_acl = [posix1e.ACL(text=x) for x in acl_reps]
403 def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
406 add_error("%s: can't restore ACLs; posix1e support missing.\n"
410 acls = self.posix1e_acl
412 if restore_numeric_ids:
413 acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
415 acls[2].applyto(path, posix1e.ACL_TYPE_DEFAULT)
416 if restore_numeric_ids:
417 acls[1].applyto(path, posix1e.ACL_TYPE_ACCESS)
419 acls[0].applyto(path, posix1e.ACL_TYPE_ACCESS)
422 ## Linux attributes (lsattr(1), chattr(1))
424 def _add_linux_attr(self, path, st):
425 if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
427 attr = get_linux_file_attr(path)
429 self.linux_attr = attr
431 if e.errno == errno.EACCES:
432 add_error('read Linux attr: %s' % e)
433 elif e.errno == errno.ENOTTY: # Inappropriate ioctl for device.
434 add_error('read Linux attr: %s' % e)
438 def _encode_linux_attr(self):
440 return vint.pack('V', self.linux_attr)
444 def _load_linux_attr_rec(self, port):
445 data = vint.read_bvec(port)
446 self.linux_attr = vint.unpack('V', data)[0]
448 def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
450 set_linux_file_attr(path, self.linux_attr)
453 ## Linux extended attributes (getfattr(1), setfattr(1))
455 def _add_linux_xattr(self, path, st):
458 self.linux_xattr = xattr.get_all(path, nofollow=True)
459 except EnvironmentError, e:
460 if e.errno != errno.EOPNOTSUPP:
463 def _encode_linux_xattr(self):
465 result = vint.pack('V', len(self.linux_xattr))
466 for name, value in self.linux_xattr:
467 result += vint.pack('ss', name, value)
472 def _load_linux_xattr_rec(self, file):
473 data = vint.read_bvec(file)
474 memfile = StringIO(data)
476 for i in range(vint.read_vuint(memfile)):
477 key = vint.read_bvec(memfile)
478 value = vint.read_bvec(memfile)
479 result.append((key, value))
480 self.linux_xattr = result
482 def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
485 add_error("%s: can't restore xattr; xattr support missing.\n"
488 existing_xattrs = set(xattr.list(path, nofollow=True))
490 for k, v in self.linux_xattr:
491 if k not in existing_xattrs \
492 or v != xattr.get(path, k, nofollow=True):
494 xattr.set(path, k, v, nofollow=True)
496 if e.errno == errno.EPERM:
497 raise ApplyError('xattr.set: %s' % e)
500 existing_xattrs -= frozenset([k])
501 for k in existing_xattrs:
503 xattr.remove(path, k, nofollow=True)
505 if e.errno == errno.EPERM:
506 raise ApplyError('xattr.remove: %s' % e)
513 self.symlink_target = None
514 self.linux_attr = None
515 self.linux_xattr = None
516 self.posix1e_acl = None
517 self.posix1e_acl_default = None
519 def write(self, port, include_path=True):
520 records = [(_rec_tag_path, self._encode_path())] if include_path else []
521 records.extend([(_rec_tag_common, self._encode_common()),
522 (_rec_tag_symlink_target, self._encode_symlink_target()),
523 (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
524 (_rec_tag_linux_attr, self._encode_linux_attr()),
525 (_rec_tag_linux_xattr, self._encode_linux_xattr())])
526 for tag, data in records:
528 vint.write_vuint(port, tag)
529 vint.write_bvec(port, data)
530 vint.write_vuint(port, _rec_tag_end)
534 # This method should either: return a valid Metadata object;
535 # throw EOFError if there was nothing at all to read; throw an
536 # Exception if a valid object could not be read completely.
537 tag = vint.read_vuint(port)
538 try: # From here on, EOF is an error.
540 while True: # only exit is error (exception) or _rec_tag_end
541 if tag == _rec_tag_path:
542 result._load_path_rec(port)
543 elif tag == _rec_tag_common:
544 result._load_common_rec(port)
545 elif tag == _rec_tag_symlink_target:
546 result._load_symlink_target_rec(port)
547 elif tag == _rec_tag_posix1e_acl:
548 result._load_posix1e_acl_rec(port)
549 elif tag ==_rec_tag_nfsv4_acl:
550 result._load_nfsv4_acl_rec(port)
551 elif tag == _rec_tag_linux_attr:
552 result._load_linux_attr_rec(port)
553 elif tag == _rec_tag_linux_xattr:
554 result._load_linux_xattr_rec(port)
555 elif tag == _rec_tag_end:
557 else: # unknown record
559 tag = vint.read_vuint(port)
561 raise Exception("EOF while reading Metadata")
564 return stat.S_ISDIR(self.mode)
566 def create_path(self, path, create_symlinks=True):
567 self._create_via_common_rec(path, create_symlinks=create_symlinks)
569 def apply_to_path(self, path=None, restore_numeric_ids=False):
570 # apply metadata to path -- file must exist
574 raise Exception('Metadata.apply_to_path() called with no path');
575 num_ids = restore_numeric_ids
577 self._apply_common_rec(path, restore_numeric_ids=num_ids)
578 self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
579 self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
580 self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
581 except ApplyError, e:
585 def from_path(path, statinfo=None, archive_path=None, save_symlinks=True):
587 result.path = archive_path
588 st = statinfo if statinfo else lstat(path)
589 result._add_common(path, st)
591 result._add_symlink_target(path, st)
592 result._add_posix1e_acl(path, st)
593 result._add_linux_attr(path, st)
594 result._add_linux_xattr(path, st)
598 def save_tree(output_file, paths,
604 # Issue top-level rewrite warnings.
606 safe_path = _clean_up_path_for_archive(path)
607 if safe_path != path:
608 log('archiving "%s" as "%s"\n' % (path, safe_path))
610 start_dir = os.getcwd()
612 for (p, st) in recursive_dirlist(paths, xdev=xdev):
613 dirlist_dir = os.getcwd()
615 safe_path = _clean_up_path_for_archive(p)
616 m = from_path(p, statinfo=st, archive_path=safe_path,
617 save_symlinks=save_symlinks)
619 print >> sys.stderr, m.path
620 m.write(output_file, include_path=write_paths)
621 os.chdir(dirlist_dir)
626 def _set_up_path(meta, create_symlinks=True):
627 # Allow directories to exist as a special case -- might have
628 # been created by an earlier longer path.
632 parent = os.path.dirname(meta.path)
635 meta.create_path(meta.path, create_symlinks=create_symlinks)
638 class _ArchiveIterator:
641 return Metadata.read(self._file)
643 raise StopIteration()
648 def __init__(self, file):
652 def display_archive(file):
653 for meta in _ArchiveIterator(file):
655 print meta.path # FIXME
660 def start_extract(file, create_symlinks=True):
661 for meta in _ArchiveIterator(file):
663 print >> sys.stderr, meta.path
664 xpath = _clean_up_extract_path(meta.path)
666 add_error(Exception('skipping risky path "%s"' % meta.path))
669 _set_up_path(meta, create_symlinks=create_symlinks)
672 def finish_extract(file, restore_numeric_ids=False):
674 for meta in _ArchiveIterator(file):
675 xpath = _clean_up_extract_path(meta.path)
677 add_error(Exception('skipping risky path "%s"' % dir.path))
679 if os.path.isdir(meta.path):
680 all_dirs.append(meta)
683 print >> sys.stderr, meta.path
684 meta.apply_to_path(path=xpath,
685 restore_numeric_ids=restore_numeric_ids)
686 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
688 # Don't need to check xpath -- won't be in all_dirs if not OK.
689 xpath = _clean_up_extract_path(dir.path)
691 print >> sys.stderr, dir.path
692 dir.apply_to_path(path=xpath, restore_numeric_ids=restore_numeric_ids)
695 def extract(file, restore_numeric_ids=False, create_symlinks=True):
696 # For now, just store all the directories and handle them last,
699 for meta in _ArchiveIterator(file):
700 xpath = _clean_up_extract_path(meta.path)
702 add_error(Exception('skipping risky path "%s"' % meta.path))
706 print >> sys.stderr, '+', meta.path
707 _set_up_path(meta, create_symlinks=create_symlinks)
708 if os.path.isdir(meta.path):
709 all_dirs.append(meta)
712 print >> sys.stderr, '=', meta.path
713 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
714 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
716 # Don't need to check xpath -- won't be in all_dirs if not OK.
717 xpath = _clean_up_extract_path(dir.path)
719 print >> sys.stderr, '=', xpath
720 # Shouldn't have to check for risky paths here (omitted above).
721 dir.apply_to_path(path=dir.path,
722 restore_numeric_ids=restore_numeric_ids)