1 """Metadata read/write support for bup."""
3 # Copyright (C) 2010 Rob Browning
5 # This code is covered under the terms of the GNU Library General
6 # Public License as described in the bup LICENSE file.
8 import errno, os, sys, stat, pwd, grp, struct, xattr, posix1e, re
10 from cStringIO import StringIO
12 from bup.helpers import add_error, mkdirp, log, utime, lutime, lstat, FSTime
13 import bup._helpers as _helpers
15 if _helpers.get_linux_file_attr:
16 from bup._helpers import get_linux_file_attr, set_linux_file_attr
18 # WARNING: the metadata encoding is *not* stable yet. Caveat emptor!
20 # Q: Consider hardlink support?
21 # Q: Is it OK to store raw linux attr (chattr) flags?
22 # Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
23 # Q: Is the application of posix1e has_extended() correct?
24 # Q: Is one global --numeric-ids argument sufficient?
25 # Q: Do nfsv4 acls trump posix1e acls? (seems likely)
26 # Q: Add support for crtime -- ntfs, and (only internally?) ext*?
28 # FIXME: Fix relative/abs path detection/stripping wrt other platforms.
29 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
30 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
31 # FIXME: Consider pack('vvvvsss', ...) optimization.
32 # FIXME: Consider caching users/groups.
36 # osx (varies between hfs and hfs+):
37 # type - regular dir char block fifo socket ...
38 # perms - rwxrwxrwxsgt
39 # times - ctime atime mtime
42 # hard-link-info (hfs+ only)
45 # attributes-osx see chflags
51 # type - regular dir ...
52 # times - creation, modification, posix change, access
55 # attributes - see attrib
57 # forks (alternate data streams)
61 # type - regular dir ...
62 # perms - rwxrwxrwx (maybe - see wikipedia)
63 # times - creation, modification, access
64 # attributes - see attrib
68 _have_lchmod = hasattr(os, 'lchmod')
71 def _clean_up_path_for_archive(p):
72 # Not the most efficient approach.
75 # Take everything after any '/../'.
76 pos = result.rfind('/../')
78 result = result[result.rfind('/../') + 4:]
80 # Take everything after any remaining '../'.
81 if result.startswith("../"):
84 # Remove any '/./' sequences.
85 pos = result.find('/./')
87 result = result[0:pos] + '/' + result[pos + 3:]
88 pos = result.find('/./')
90 # Remove any leading '/'s.
91 result = result.lstrip('/')
93 # Replace '//' with '/' everywhere.
94 pos = result.find('//')
96 result = result[0:pos] + '/' + result[pos + 2:]
97 pos = result.find('//')
99 # Take everything after any remaining './'.
100 if result.startswith('./'):
103 # Take everything before any remaining '/.'.
104 if result.endswith('/.'):
107 if result == '' or result.endswith('/..'):
114 if p.startswith('/'):
116 if p.find('/../') != -1:
118 if p.startswith('../'):
120 if p.endswith('/..'):
125 def _clean_up_extract_path(p):
126 result = p.lstrip('/')
129 elif _risky_path(result):
135 # These tags are currently conceptually private to Metadata, and they
136 # must be unique, and must *never* be changed.
139 _rec_tag_common = 2 # times, owner, group, type, perms, etc.
140 _rec_tag_symlink_target = 3
141 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc.
142 _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e acls?
143 _rec_tag_linux_attr = 6 # lsattr(1) chattr(1)
144 _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1)
147 class MetadataAcquisitionError(Exception):
148 # Thrown when unable to extract any given bit of metadata from a path.
152 class MetadataApplicationError(Exception):
153 # Thrown when unable to apply any given bit of metadata to a path.
158 # Metadata is stored as a sequence of tagged binary records. Each
159 # record will have some subset of add, encode, load, create, and
160 # apply methods, i.e. _add_foo...
164 # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
165 # must be non-negative and < 10**9.
167 def _add_common(self, path, st):
168 self.mode = st.st_mode
171 self.rdev = st.st_rdev
172 self.atime = st.st_atime
173 self.mtime = st.st_mtime
174 self.ctime = st.st_ctime
175 self.owner = pwd.getpwuid(st.st_uid)[0]
176 self.group = grp.getgrgid(st.st_gid)[0]
178 def _encode_common(self):
179 atime = self.atime.to_timespec()
180 mtime = self.mtime.to_timespec()
181 ctime = self.ctime.to_timespec()
182 result = vint.pack('VVsVsVvVvVvV',
197 def _load_common_rec(self, port):
198 data = vint.read_bvec(port)
210 ctime_ns) = vint.unpack('VVsVsVvVvVvV', data)
211 self.atime = FSTime.from_timespec((self.atime, atime_ns))
212 self.mtime = FSTime.from_timespec((self.mtime, mtime_ns))
213 self.ctime = FSTime.from_timespec((self.ctime, ctime_ns))
215 def _create_via_common_rec(self, path, create_symlinks=True):
216 # If the path already exists and is a dir, try rmdir.
217 # If the path already exists and is anything else, try unlink.
222 if e.errno != errno.ENOENT:
225 if stat.S_ISDIR(st.st_mode):
229 if e.errno == errno.ENOTEMPTY:
230 msg = 'refusing to overwrite non-empty dir' + path
236 if stat.S_ISREG(self.mode):
237 os.mknod(path, 0600 | stat.S_IFREG)
238 elif stat.S_ISDIR(self.mode):
240 elif stat.S_ISCHR(self.mode):
241 os.mknod(path, 0600 | stat.S_IFCHR, self.rdev)
242 elif stat.S_ISBLK(self.mode):
243 os.mknod(path, 0600 | stat.S_IFBLK, self.rdev)
244 elif stat.S_ISFIFO(self.mode):
245 os.mknod(path, 0600 | stat.S_IFIFO)
246 elif stat.S_ISLNK(self.mode):
247 if(self.symlink_target and create_symlinks):
248 os.symlink(self.symlink_target, path)
249 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
250 # Otherwise, do nothing.
252 def _apply_common_rec(self, path, restore_numeric_ids=False):
253 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
254 if stat.S_ISLNK(self.mode):
255 lutime(path, (self.atime, self.mtime))
257 utime(path, (self.atime, self.mtime))
258 if stat.S_ISREG(self.mode) \
259 | stat.S_ISDIR(self.mode) \
260 | stat.S_ISCHR(self.mode) \
261 | stat.S_ISBLK(self.mode) \
262 | stat.S_ISLNK(self.mode) \
263 | stat.S_ISFIFO(self.mode):
267 elif not stat.S_ISLNK(self.mode):
270 # Don't try to restore owner unless we're root, and even
271 # if asked, don't try to restore the owner or group if
272 # it doesn't exist in the system db.
275 if not restore_numeric_ids:
276 if os.geteuid() == 0:
278 uid = pwd.getpwnam(self.owner)[2]
281 log('bup: ignoring unknown owner %s for "%s"\n'
282 % (self.owner, path))
284 uid = -1 # Not root; assume we can't change owner.
286 gid = grp.getgrnam(self.group)[2]
289 log('bup: ignoring unknown group %s for "%s"\n'
290 % (self.group, path))
291 os.lchown(path, uid, gid)
294 os.lchmod(path, stat.S_IMODE(self.mode))
295 elif not stat.S_ISLNK(self.mode):
296 os.chmod(path, stat.S_IMODE(self.mode))
301 def _encode_path(self):
303 return vint.pack('s', self.path)
307 def _load_path_rec(self, port):
308 self.path = vint.unpack('s', vint.read_bvec(port))[0]
313 def _add_symlink_target(self, path, st):
314 if(stat.S_ISLNK(st.st_mode)):
315 self.symlink_target = os.readlink(path)
317 def _encode_symlink_target(self):
318 return self.symlink_target
320 def _load_symlink_target_rec(self, port):
321 self.symlink_target = vint.read_bvec(port)
324 ## POSIX1e ACL records
326 # Recorded as a list:
327 # [txt_id_acl, num_id_acl]
328 # or, if a directory:
329 # [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
330 # The numeric/text distinction only matters when reading/restoring
332 def _add_posix1e_acl(self, path, st):
333 if not stat.S_ISLNK(st.st_mode):
335 if posix1e.has_extended(path):
336 acl = posix1e.ACL(file=path)
337 self.posix1e_acl = [acl, acl] # txt and num are the same
338 if stat.S_ISDIR(st.st_mode):
339 acl = posix1e.ACL(filedef=path)
340 self.posix1e_acl.extend([acl, acl])
341 except EnvironmentError, e:
342 if e.errno != errno.EOPNOTSUPP:
345 def _encode_posix1e_acl(self):
346 # Encode as two strings (w/default ACL string possibly empty).
348 acls = self.posix1e_acl
349 txt_flags = posix1e.TEXT_ABBREVIATE
350 num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
351 acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
352 acls[1].to_any_text('', '\n', num_flags)]
356 acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
357 acl_reps.append(acls[3].to_any_text('', '\n', num_flags))
358 return vint.pack('ssss',
359 acl_reps[0], acl_reps[1], acl_reps[2], acl_reps[3])
363 def _load_posix1e_acl_rec(self, port):
364 data = vint.read_bvec(port)
365 acl_reps = vint.unpack('ssss', data)
366 if(acl_reps[2] == ''):
367 acl_reps = acl_reps[:2]
368 self.posix1e_acl = [posix1e.ACL(x) for x in acl_reps]
370 def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
371 if(self.posix1e_acl):
372 acls = self.posix1e_acl
374 if restore_numeric_ids:
375 acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
377 acls[2].applyto(path, posix1e.ACL_TYPE_DEFAULT)
378 if restore_numeric_ids:
379 acls[1].applyto(path, posix1e.ACL_TYPE_ACCESS)
381 acls[0].applyto(path, posix1e.ACL_TYPE_ACCESS)
384 ## Linux attributes (lsattr(1), chattr(1))
386 def _add_linux_attr(self, path, st):
387 if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
388 attr = get_linux_file_attr(path)
390 self.linux_attr = get_linux_file_attr(path)
392 def _encode_linux_attr(self):
394 return vint.pack('V', self.linux_attr)
398 def _load_linux_attr_rec(self, port):
399 data = vint.read_bvec(port)
400 self.linux_attr = vint.unpack('V', data)[0]
402 def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
404 set_linux_file_attr(path, self.linux_attr)
407 ## Linux extended attributes (getfattr(1), setfattr(1))
409 def _add_linux_xattr(self, path, st):
411 self.linux_xattr = xattr.get_all(path, nofollow=True)
412 except EnvironmentError, e:
413 if e.errno != errno.EOPNOTSUPP:
416 def _encode_linux_xattr(self):
418 result = vint.pack('V', len(items))
419 for name, value in self.attrs:
420 result += vint.pack('ss', name, value)
425 def _load_linux_xattr_rec(self, file):
426 data = vint.read_bvec(file)
427 memfile = StringIO(data)
429 for i in range(vint.read_vuint(memfile)):
430 key = vint.read_bvec(memfile)
431 value = vint.read_bvec(memfile)
432 result.append((key, value))
433 self.linux_xattr = result
435 def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
436 if(self.linux_xattr):
437 for k, v in self.linux_xattr:
438 xattr.set(path, k, v, nofollow=True)
443 self.symlink_target = None
444 self.linux_attr = None
445 self.linux_xattr = None
446 self.posix1e_acl = None
447 self.posix1e_acl_default = None
449 def write(self, port, include_path=True):
450 records = [(_rec_tag_path, self._encode_path())] if include_path else []
451 records.extend([(_rec_tag_common, self._encode_common()),
452 (_rec_tag_symlink_target, self._encode_symlink_target()),
453 (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
454 (_rec_tag_linux_attr, self._encode_linux_attr()),
455 (_rec_tag_linux_xattr, self._encode_linux_xattr())])
456 for tag, data in records:
458 vint.write_vuint(port, tag)
459 vint.write_bvec(port, data)
460 vint.write_vuint(port, _rec_tag_end)
464 # This method should either: return a valid Metadata object;
465 # throw EOFError if there was nothing at all to read; throw an
466 # Exception if a valid object could not be read completely.
467 tag = vint.read_vuint(port)
468 try: # From here on, EOF is an error.
470 while(True): # only exit is error (exception) or _rec_tag_end
471 if tag == _rec_tag_path:
472 result._load_path_rec(port)
473 elif tag == _rec_tag_common:
474 result._load_common_rec(port)
475 elif tag == _rec_tag_symlink_target:
476 result._load_symlink_target_rec(port)
477 elif tag == _rec_tag_posix1e_acl:
478 result._load_posix1e_acl(port)
479 elif tag ==_rec_tag_nfsv4_acl:
480 result._load_nfsv4_acl_rec(port)
481 elif tag == _rec_tag_linux_attr:
482 result._load_linux_attr_rec(port)
483 elif tag == _rec_tag_linux_xattr:
484 result._load_linux_xattr_rec(port)
485 elif tag == _rec_tag_end:
487 else: # unknown record
489 tag = vint.read_vuint(port)
491 raise Exception("EOF while reading Metadata")
494 return stat.S_ISDIR(self.mode)
496 def create_path(self, path, create_symlinks=True):
497 self._create_via_common_rec(path, create_symlinks=create_symlinks)
499 def apply_to_path(self, path=None, restore_numeric_ids=False):
500 # apply metadata to path -- file must exist
504 raise Exception('Metadata.apply_to_path() called with no path');
505 num_ids = restore_numeric_ids
506 try: # Later we may want to push this down and make it finer grained.
507 self._apply_common_rec(path, restore_numeric_ids=num_ids)
508 self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
509 self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
510 self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
512 raise MetadataApplicationError(str(e))
515 def from_path(path, archive_path=None, save_symlinks=True):
517 result.path = archive_path
519 try: # Later we may want to push this down and make it finer grained.
520 result._add_common(path, st)
522 result._add_symlink_target(path, st)
523 result._add_posix1e_acl(path, st)
524 result._add_linux_attr(path, st)
525 result._add_linux_xattr(path, st)
527 raise MetadataAcquisitionError(str(e))
531 def save_tree(output_file, paths,
536 safe_path = _clean_up_path_for_archive(p)
538 log('bup: archiving "%s" as "%s"\n' % (p, safe_path))
540 # Handle path itself.
542 m = from_path(p, archive_path=safe_path,
543 save_symlinks=save_symlinks)
544 except MetadataAcquisitionError, e:
548 print >> sys.stderr, m.path
549 m.write(output_file, include_path=write_paths)
551 if recurse and os.path.isdir(p):
552 for root, dirs, files in os.walk(p, onerror=add_error):
554 for sub_path in items:
555 full_path = os.path.join(root, sub_path)
556 safe_path = _clean_up_path_for_archive(full_path)
558 m = from_path(full_path,
559 archive_path=safe_path,
560 save_symlinks=save_symlinks)
561 except MetadataAcquisitionError, e:
564 print >> sys.stderr, m.path
565 m.write(output_file, include_path=write_paths)
568 def _set_up_path(meta, create_symlinks=True):
569 # Allow directories to exist as a special case -- might have
570 # been created by an earlier longer path.
572 mkdirp(meta.path, 0700)
574 parent = os.path.dirname(meta.path)
577 meta.create_path(meta.path, create_symlinks=create_symlinks)
580 class _ArchiveIterator:
583 return Metadata.read(self._file)
585 raise StopIteration()
590 def __init__(self, file):
594 def display_archive(file):
595 for meta in _ArchiveIterator(file):
597 print meta.path # FIXME
602 def start_extract(file, create_symlinks=True):
603 for meta in _ArchiveIterator(file):
605 print >> sys.stderr, meta.path
606 xpath = _clean_up_extract_path(meta.path)
608 add_error(Exception('skipping risky path "%s"' % meta.path))
611 _set_up_path(meta, create_symlinks=create_symlinks)
614 def finish_extract(file, restore_numeric_ids=False):
616 for meta in _ArchiveIterator(file):
617 xpath = _clean_up_extract_path(meta.path)
619 add_error(Exception('skipping risky path "%s"' % dir.path))
621 if os.path.isdir(meta.path):
622 all_dirs.append(meta)
625 print >> sys.stderr, meta.path
627 meta.apply_to_path(path=xpath,
628 restore_numeric_ids=restore_numeric_ids)
629 except MetadataApplicationError, e:
632 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
634 # Don't need to check xpath -- won't be in all_dirs if not OK.
635 xpath = _clean_up_extract_path(dir.path)
637 print >> sys.stderr, dir.path
639 dir.apply_to_path(path=xpath,
640 restore_numeric_ids=restore_numeric_ids)
641 except MetadataApplicationError, e:
645 def extract(file, restore_numeric_ids=False, create_symlinks=True):
646 # For now, just store all the directories and handle them last,
649 for meta in _ArchiveIterator(file):
650 xpath = _clean_up_extract_path(meta.path)
652 add_error(Exception('skipping risky path "%s"' % meta.path))
656 print >> sys.stderr, '+', meta.path
657 _set_up_path(meta, create_symlinks=create_symlinks)
658 if os.path.isdir(meta.path):
659 all_dirs.append(meta)
662 print >> sys.stderr, '=', meta.path
664 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
665 except MetadataApplicationError, e:
667 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
669 # Don't need to check xpath -- won't be in all_dirs if not OK.
670 xpath = _clean_up_extract_path(meta.path)
672 print >> sys.stderr, '=', meta.path
673 # Shouldn't have to check for risky paths here (omitted above).
675 dir.apply_to_path(path=dir.path,
676 restore_numeric_ids=restore_numeric_ids)
677 except MetadataApplicationError, e: