1 """Metadata read/write support for bup."""
3 # Copyright (C) 2010 Rob Browning
5 # This code is covered under the terms of the GNU Library General
6 # Public License as described in the bup LICENSE file.
8 import errno, os, sys, stat, pwd, grp, struct, xattr, posix1e, re
10 from cStringIO import StringIO
12 from bup.drecurse import recursive_dirlist
13 from bup.helpers import add_error, mkdirp, log
14 from bup.xstat import utime, lutime, lstat, FSTime
15 import bup._helpers as _helpers
17 if _helpers.get_linux_file_attr:
18 from bup._helpers import get_linux_file_attr, set_linux_file_attr
20 # WARNING: the metadata encoding is *not* stable yet. Caveat emptor!
22 # Q: Consider hardlink support?
23 # Q: Is it OK to store raw linux attr (chattr) flags?
24 # Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
25 # Q: Is the application of posix1e has_extended() correct?
26 # Q: Is one global --numeric-ids argument sufficient?
27 # Q: Do nfsv4 acls trump posix1e acls? (seems likely)
28 # Q: Add support for crtime -- ntfs, and (only internally?) ext*?
30 # FIXME: Fix relative/abs path detection/stripping wrt other platforms.
31 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
32 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
33 # FIXME: Consider pack('vvvvsss', ...) optimization.
34 # FIXME: Consider caching users/groups.
38 # osx (varies between hfs and hfs+):
39 # type - regular dir char block fifo socket ...
40 # perms - rwxrwxrwxsgt
41 # times - ctime atime mtime
44 # hard-link-info (hfs+ only)
47 # attributes-osx see chflags
53 # type - regular dir ...
54 # times - creation, modification, posix change, access
57 # attributes - see attrib
59 # forks (alternate data streams)
63 # type - regular dir ...
64 # perms - rwxrwxrwx (maybe - see wikipedia)
65 # times - creation, modification, access
66 # attributes - see attrib
70 _have_lchmod = hasattr(os, 'lchmod')
73 def _clean_up_path_for_archive(p):
74 # Not the most efficient approach.
77 # Take everything after any '/../'.
78 pos = result.rfind('/../')
80 result = result[result.rfind('/../') + 4:]
82 # Take everything after any remaining '../'.
83 if result.startswith("../"):
86 # Remove any '/./' sequences.
87 pos = result.find('/./')
89 result = result[0:pos] + '/' + result[pos + 3:]
90 pos = result.find('/./')
92 # Remove any leading '/'s.
93 result = result.lstrip('/')
95 # Replace '//' with '/' everywhere.
96 pos = result.find('//')
98 result = result[0:pos] + '/' + result[pos + 2:]
99 pos = result.find('//')
101 # Take everything after any remaining './'.
102 if result.startswith('./'):
105 # Take everything before any remaining '/.'.
106 if result.endswith('/.'):
109 if result == '' or result.endswith('/..'):
116 if p.startswith('/'):
118 if p.find('/../') != -1:
120 if p.startswith('../'):
122 if p.endswith('/..'):
127 def _clean_up_extract_path(p):
128 result = p.lstrip('/')
131 elif _risky_path(result):
137 # These tags are currently conceptually private to Metadata, and they
138 # must be unique, and must *never* be changed.
141 _rec_tag_common = 2 # times, owner, group, type, perms, etc.
142 _rec_tag_symlink_target = 3
143 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc.
144 _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e acls?
145 _rec_tag_linux_attr = 6 # lsattr(1) chattr(1)
146 _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1)
149 class MetadataError(Exception):
153 class MetadataAcquireError(MetadataError):
154 # Thrown when unable to extract any given bit of metadata from a path.
158 class MetadataApplyError(MetadataError):
159 # Thrown when unable to apply any given bit of metadata to a path.
164 # Metadata is stored as a sequence of tagged binary records. Each
165 # record will have some subset of add, encode, load, create, and
166 # apply methods, i.e. _add_foo...
170 # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
171 # must be non-negative and < 10**9.
173 def _add_common(self, path, st):
174 self.mode = st.st_mode
177 self.rdev = st.st_rdev
178 self.atime = st.st_atime
179 self.mtime = st.st_mtime
180 self.ctime = st.st_ctime
181 self.owner = self.group = ''
183 self.owner = pwd.getpwuid(st.st_uid)[0]
187 self.group = grp.getgrgid(st.st_gid)[0]
191 def _encode_common(self):
192 atime = self.atime.to_timespec()
193 mtime = self.mtime.to_timespec()
194 ctime = self.ctime.to_timespec()
195 result = vint.pack('VVsVsVvVvVvV',
210 def _load_common_rec(self, port):
211 data = vint.read_bvec(port)
223 ctime_ns) = vint.unpack('VVsVsVvVvVvV', data)
224 self.atime = FSTime.from_timespec((self.atime, atime_ns))
225 self.mtime = FSTime.from_timespec((self.mtime, mtime_ns))
226 self.ctime = FSTime.from_timespec((self.ctime, ctime_ns))
228 def _create_via_common_rec(self, path, create_symlinks=True):
229 # If the path already exists and is a dir, try rmdir.
230 # If the path already exists and is anything else, try unlink.
235 if e.errno != errno.ENOENT:
238 if stat.S_ISDIR(st.st_mode):
242 if e.errno == errno.ENOTEMPTY:
243 msg = 'refusing to overwrite non-empty dir' + path
249 if stat.S_ISREG(self.mode):
250 os.mknod(path, 0600 | stat.S_IFREG)
251 elif stat.S_ISDIR(self.mode):
253 elif stat.S_ISCHR(self.mode):
254 os.mknod(path, 0600 | stat.S_IFCHR, self.rdev)
255 elif stat.S_ISBLK(self.mode):
256 os.mknod(path, 0600 | stat.S_IFBLK, self.rdev)
257 elif stat.S_ISFIFO(self.mode):
258 os.mknod(path, 0600 | stat.S_IFIFO)
259 elif stat.S_ISLNK(self.mode):
260 if(self.symlink_target and create_symlinks):
261 os.symlink(self.symlink_target, path)
262 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
263 # Otherwise, do nothing.
265 def _apply_common_rec(self, path, restore_numeric_ids=False):
266 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
267 if stat.S_ISLNK(self.mode):
268 lutime(path, (self.atime, self.mtime))
270 utime(path, (self.atime, self.mtime))
271 if stat.S_ISREG(self.mode) \
272 | stat.S_ISDIR(self.mode) \
273 | stat.S_ISCHR(self.mode) \
274 | stat.S_ISBLK(self.mode) \
275 | stat.S_ISLNK(self.mode) \
276 | stat.S_ISFIFO(self.mode):
280 elif not stat.S_ISLNK(self.mode):
283 # Don't try to restore owner unless we're root, and even
284 # if asked, don't try to restore the owner or group if
285 # it doesn't exist in the system db.
288 if not restore_numeric_ids:
291 add_error('bup: ignoring missing owner for "%s"\n' % path)
293 if os.geteuid() != 0:
294 uid = -1 # Not root; assume we can't change owner.
297 uid = pwd.getpwnam(self.owner)[2]
300 fmt = 'bup: ignoring unknown owner %s for "%s"\n'
301 add_error(fmt % (self.owner, path))
304 add_error('bup: ignoring missing group for "%s"\n' % path)
307 gid = grp.getgrnam(self.group)[2]
310 add_error('bup: ignoring unknown group %s for "%s"\n'
311 % (self.group, path))
312 os.lchown(path, uid, gid)
315 os.lchmod(path, stat.S_IMODE(self.mode))
316 elif not stat.S_ISLNK(self.mode):
317 os.chmod(path, stat.S_IMODE(self.mode))
322 def _encode_path(self):
324 return vint.pack('s', self.path)
328 def _load_path_rec(self, port):
329 self.path = vint.unpack('s', vint.read_bvec(port))[0]
334 def _add_symlink_target(self, path, st):
335 if(stat.S_ISLNK(st.st_mode)):
336 self.symlink_target = os.readlink(path)
338 def _encode_symlink_target(self):
339 return self.symlink_target
341 def _load_symlink_target_rec(self, port):
342 self.symlink_target = vint.read_bvec(port)
345 ## POSIX1e ACL records
347 # Recorded as a list:
348 # [txt_id_acl, num_id_acl]
349 # or, if a directory:
350 # [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
351 # The numeric/text distinction only matters when reading/restoring
353 def _add_posix1e_acl(self, path, st):
354 if not stat.S_ISLNK(st.st_mode):
356 if posix1e.has_extended(path):
357 acl = posix1e.ACL(file=path)
358 self.posix1e_acl = [acl, acl] # txt and num are the same
359 if stat.S_ISDIR(st.st_mode):
360 acl = posix1e.ACL(filedef=path)
361 self.posix1e_acl.extend([acl, acl])
362 except EnvironmentError, e:
363 if e.errno != errno.EOPNOTSUPP:
366 def _encode_posix1e_acl(self):
367 # Encode as two strings (w/default ACL string possibly empty).
369 acls = self.posix1e_acl
370 txt_flags = posix1e.TEXT_ABBREVIATE
371 num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
372 acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
373 acls[1].to_any_text('', '\n', num_flags)]
377 acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
378 acl_reps.append(acls[3].to_any_text('', '\n', num_flags))
379 return vint.pack('ssss',
380 acl_reps[0], acl_reps[1], acl_reps[2], acl_reps[3])
384 def _load_posix1e_acl_rec(self, port):
385 data = vint.read_bvec(port)
386 acl_reps = vint.unpack('ssss', data)
387 if(acl_reps[2] == ''):
388 acl_reps = acl_reps[:2]
389 self.posix1e_acl = [posix1e.ACL(x) for x in acl_reps]
391 def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
392 if(self.posix1e_acl):
393 acls = self.posix1e_acl
395 if restore_numeric_ids:
396 acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
398 acls[2].applyto(path, posix1e.ACL_TYPE_DEFAULT)
399 if restore_numeric_ids:
400 acls[1].applyto(path, posix1e.ACL_TYPE_ACCESS)
402 acls[0].applyto(path, posix1e.ACL_TYPE_ACCESS)
405 ## Linux attributes (lsattr(1), chattr(1))
407 def _add_linux_attr(self, path, st):
408 if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
409 attr = get_linux_file_attr(path)
411 self.linux_attr = get_linux_file_attr(path)
413 def _encode_linux_attr(self):
415 return vint.pack('V', self.linux_attr)
419 def _load_linux_attr_rec(self, port):
420 data = vint.read_bvec(port)
421 self.linux_attr = vint.unpack('V', data)[0]
423 def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
425 set_linux_file_attr(path, self.linux_attr)
428 ## Linux extended attributes (getfattr(1), setfattr(1))
430 def _add_linux_xattr(self, path, st):
432 self.linux_xattr = xattr.get_all(path, nofollow=True)
433 except EnvironmentError, e:
434 if e.errno != errno.EOPNOTSUPP:
437 def _encode_linux_xattr(self):
439 result = vint.pack('V', len(self.linux_xattr))
440 for name, value in self.linux_xattr:
441 result += vint.pack('ss', name, value)
446 def _load_linux_xattr_rec(self, file):
447 data = vint.read_bvec(file)
448 memfile = StringIO(data)
450 for i in range(vint.read_vuint(memfile)):
451 key = vint.read_bvec(memfile)
452 value = vint.read_bvec(memfile)
453 result.append((key, value))
454 self.linux_xattr = result
456 def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
457 if(self.linux_xattr):
458 for k, v in self.linux_xattr:
459 xattr.set(path, k, v, nofollow=True)
464 self.symlink_target = None
465 self.linux_attr = None
466 self.linux_xattr = None
467 self.posix1e_acl = None
468 self.posix1e_acl_default = None
470 def write(self, port, include_path=True):
471 records = [(_rec_tag_path, self._encode_path())] if include_path else []
472 records.extend([(_rec_tag_common, self._encode_common()),
473 (_rec_tag_symlink_target, self._encode_symlink_target()),
474 (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
475 (_rec_tag_linux_attr, self._encode_linux_attr()),
476 (_rec_tag_linux_xattr, self._encode_linux_xattr())])
477 for tag, data in records:
479 vint.write_vuint(port, tag)
480 vint.write_bvec(port, data)
481 vint.write_vuint(port, _rec_tag_end)
485 # This method should either: return a valid Metadata object;
486 # throw EOFError if there was nothing at all to read; throw an
487 # Exception if a valid object could not be read completely.
488 tag = vint.read_vuint(port)
489 try: # From here on, EOF is an error.
491 while(True): # only exit is error (exception) or _rec_tag_end
492 if tag == _rec_tag_path:
493 result._load_path_rec(port)
494 elif tag == _rec_tag_common:
495 result._load_common_rec(port)
496 elif tag == _rec_tag_symlink_target:
497 result._load_symlink_target_rec(port)
498 elif tag == _rec_tag_posix1e_acl:
499 result._load_posix1e_acl(port)
500 elif tag ==_rec_tag_nfsv4_acl:
501 result._load_nfsv4_acl_rec(port)
502 elif tag == _rec_tag_linux_attr:
503 result._load_linux_attr_rec(port)
504 elif tag == _rec_tag_linux_xattr:
505 result._load_linux_xattr_rec(port)
506 elif tag == _rec_tag_end:
508 else: # unknown record
510 tag = vint.read_vuint(port)
512 raise Exception("EOF while reading Metadata")
515 return stat.S_ISDIR(self.mode)
517 def create_path(self, path, create_symlinks=True):
518 self._create_via_common_rec(path, create_symlinks=create_symlinks)
520 def apply_to_path(self, path=None, restore_numeric_ids=False):
521 # apply metadata to path -- file must exist
525 raise Exception('Metadata.apply_to_path() called with no path');
526 num_ids = restore_numeric_ids
527 try: # Later we may want to push this down and make it finer grained.
528 self._apply_common_rec(path, restore_numeric_ids=num_ids)
529 self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
530 self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
531 self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
533 raise MetadataApplyError(e), None, sys.exc_info()[2]
536 def from_path(path, archive_path=None, save_symlinks=True):
538 result.path = archive_path
540 try: # Later we may want to push this down and make it finer grained.
541 result._add_common(path, st)
543 result._add_symlink_target(path, st)
544 result._add_posix1e_acl(path, st)
545 result._add_linux_attr(path, st)
546 result._add_linux_xattr(path, st)
548 raise MetadataAcquireError(e), None, sys.exc_info()[2]
552 def save_tree(output_file, paths,
558 # Issue top-level rewrite warnings.
560 safe_path = _clean_up_path_for_archive(path)
561 if(safe_path != path):
562 log('bup: archiving "%s" as "%s"\n' % (path, safe_path))
564 start_dir = os.getcwd()
566 for (p, st) in recursive_dirlist(paths, xdev=xdev):
567 dirlist_dir = os.getcwd()
569 safe_path = _clean_up_path_for_archive(p)
571 m = from_path(p, archive_path=safe_path,
572 save_symlinks=save_symlinks)
573 except MetadataAcquireError, e:
577 print >> sys.stderr, m.path
578 m.write(output_file, include_path=write_paths)
579 os.chdir(dirlist_dir)
584 def _set_up_path(meta, create_symlinks=True):
585 # Allow directories to exist as a special case -- might have
586 # been created by an earlier longer path.
588 mkdirp(meta.path, 0700)
590 parent = os.path.dirname(meta.path)
593 meta.create_path(meta.path, create_symlinks=create_symlinks)
596 class _ArchiveIterator:
599 return Metadata.read(self._file)
601 raise StopIteration()
606 def __init__(self, file):
610 def display_archive(file):
611 for meta in _ArchiveIterator(file):
613 print meta.path # FIXME
618 def start_extract(file, create_symlinks=True):
619 for meta in _ArchiveIterator(file):
621 print >> sys.stderr, meta.path
622 xpath = _clean_up_extract_path(meta.path)
624 add_error(Exception('skipping risky path "%s"' % meta.path))
627 _set_up_path(meta, create_symlinks=create_symlinks)
630 def finish_extract(file, restore_numeric_ids=False):
632 for meta in _ArchiveIterator(file):
633 xpath = _clean_up_extract_path(meta.path)
635 add_error(Exception('skipping risky path "%s"' % dir.path))
637 if os.path.isdir(meta.path):
638 all_dirs.append(meta)
641 print >> sys.stderr, meta.path
643 meta.apply_to_path(path=xpath,
644 restore_numeric_ids=restore_numeric_ids)
645 except MetadataApplyError, e:
648 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
650 # Don't need to check xpath -- won't be in all_dirs if not OK.
651 xpath = _clean_up_extract_path(dir.path)
653 print >> sys.stderr, dir.path
655 dir.apply_to_path(path=xpath,
656 restore_numeric_ids=restore_numeric_ids)
657 except MetadataApplyError, e:
661 def extract(file, restore_numeric_ids=False, create_symlinks=True):
662 # For now, just store all the directories and handle them last,
665 for meta in _ArchiveIterator(file):
666 xpath = _clean_up_extract_path(meta.path)
668 add_error(Exception('skipping risky path "%s"' % meta.path))
672 print >> sys.stderr, '+', meta.path
673 _set_up_path(meta, create_symlinks=create_symlinks)
674 if os.path.isdir(meta.path):
675 all_dirs.append(meta)
678 print >> sys.stderr, '=', meta.path
680 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
681 except MetadataApplyError, e:
683 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
685 # Don't need to check xpath -- won't be in all_dirs if not OK.
686 xpath = _clean_up_extract_path(meta.path)
688 print >> sys.stderr, '=', meta.path
689 # Shouldn't have to check for risky paths here (omitted above).
691 dir.apply_to_path(path=dir.path,
692 restore_numeric_ids=restore_numeric_ids)
693 except MetadataApplyError, e: