1 """Metadata read/write support for bup."""
3 # Copyright (C) 2010 Rob Browning
5 # This code is covered under the terms of the GNU Library General
6 # Public License as described in the bup LICENSE file.
8 import errno, os, sys, stat, pwd, grp, struct, xattr, posix1e, re
10 from cStringIO import StringIO
12 from bup.helpers import add_error, mkdirp, log, utime, lutime, lstat
13 from bup._helpers import get_linux_file_attr, set_linux_file_attr
15 # WARNING: the metadata encoding is *not* stable yet. Caveat emptor!
17 # Q: Consider hardlink support?
18 # Q: Is it OK to store raw linux attr (chattr) flags?
19 # Q: Can anything other than S_ISREG(x) or S_ISDIR(x) support posix1e ACLs?
20 # Q: Is the application of posix1e has_extended() correct?
21 # Q: Is one global --numeric-ids argument sufficient?
22 # Q: Do nfsv4 acls trump posix1e acls? (seems likely)
23 # Q: Add support for crtime -- ntfs, and (only internally?) ext*?
25 # FIXME: Fix relative/abs path detection/stripping wrt other platforms.
26 # FIXME: Add nfsv4 acl handling - see nfs4-acl-tools.
27 # FIXME: Consider other entries mentioned in stat(2) (S_IFDOOR, etc.).
28 # FIXME: Consider pack('vvvvsss', ...) optimization.
29 # FIXME: Consider caching users/groups.
33 # osx (varies between hfs and hfs+):
34 # type - regular dir char block fifo socket ...
35 # perms - rwxrwxrwxsgt
36 # times - ctime atime mtime
39 # hard-link-info (hfs+ only)
42 # attributes-osx see chflags
48 # type - regular dir ...
49 # times - creation, modification, posix change, access
52 # attributes - see attrib
54 # forks (alternate data streams)
58 # type - regular dir ...
59 # perms - rwxrwxrwx (maybe - see wikipedia)
60 # times - creation, modification, access
61 # attributes - see attrib
65 _have_lchmod = hasattr(os, 'lchmod')
68 def _clean_up_path_for_archive(p):
69 # Not the most efficient approach.
72 # Take everything after any '/../'.
73 pos = result.rfind('/../')
75 result = result[result.rfind('/../') + 4:]
77 # Take everything after any remaining '../'.
78 if result.startswith("../"):
81 # Remove any '/./' sequences.
82 pos = result.find('/./')
84 result = result[0:pos] + '/' + result[pos + 3:]
85 pos = result.find('/./')
87 # Remove any leading '/'s.
88 result = result.lstrip('/')
90 # Replace '//' with '/' everywhere.
91 pos = result.find('//')
93 result = result[0:pos] + '/' + result[pos + 2:]
94 pos = result.find('//')
96 # Take everything after any remaining './'.
97 if result.startswith('./'):
100 # Take everything before any remaining '/.'.
101 if result.endswith('/.'):
104 if result == '' or result.endswith('/..'):
111 if p.startswith('/'):
113 if p.find('/../') != -1:
115 if p.startswith('../'):
117 if p.endswith('/..'):
122 def _clean_up_extract_path(p):
123 result = p.lstrip('/')
126 elif _risky_path(result):
132 def _normalize_ts(stamp):
133 # For the purposes of normalization, t = s + ns.
136 if ns < 0 or ns >= 10**9:
140 return ((t / 10**9), t % 10**9)
144 # These tags are currently conceptually private to Metadata, and they
145 # must be unique, and must *never* be changed.
148 _rec_tag_common = 2 # times, owner, group, type, perms, etc.
149 _rec_tag_symlink_target = 3
150 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc.
151 _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e acls?
152 _rec_tag_linux_attr = 6 # lsattr(1) chattr(1)
153 _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1)
156 class MetadataAcquisitionError(Exception):
157 # Thrown when unable to extract any given bit of metadata from a path.
161 class MetadataApplicationError(Exception):
162 # Thrown when unable to apply any given bit of metadata to a path.
167 # Metadata is stored as a sequence of tagged binary records. Each
168 # record will have some subset of add, encode, load, create, and
169 # apply methods, i.e. _add_foo...
173 # Timestamps are (sec, ns), relative to 1970-01-01 00:00:00, ns
174 # must be non-negative and < 10**9.
176 def _add_common(self, path, st):
177 self.mode = st.st_mode
180 self.rdev = st.st_rdev
181 self.atime = st.st_atime
182 self.mtime = st.st_mtime
183 self.ctime = st.st_ctime
184 self.owner = pwd.getpwuid(st.st_uid)[0]
185 self.group = grp.getgrgid(st.st_gid)[0]
187 def _encode_common(self):
188 atime = _normalize_ts(self.atime)
189 mtime = _normalize_ts(self.mtime)
190 ctime = _normalize_ts(self.ctime)
191 result = vint.pack('VVsVsVvVvVvV',
206 def _load_common_rec(self, port):
207 data = vint.read_bvec(port)
219 ctime_ns) = vint.unpack('VVsVsVvVvVvV', data)
220 self.atime = (self.atime, atime_ns)
221 self.mtime = (self.mtime, mtime_ns)
222 self.ctime = (self.ctime, ctime_ns)
223 if self.atime[1] >= 10**9:
224 path = ' for ' + self.path if self.path else ''
225 log('bup: warning - normalizing bad atime%s\n' % (path))
226 self.atime = _normalize_ts(self.atime)
227 if self.mtime[1] >= 10**9:
228 path = ' for ' + self.path if self.path else ''
229 log('bup: warning - normalizing bad mtime%s\n' % (path))
230 self.mtime = _normalize_ts(self.mtime)
231 if self.ctime[1] >= 10**9:
232 path = ' for ' + self.path if self.path else ''
233 log('bup: warning - normalizing bad ctime%s\n' % (path))
234 self.ctime = _normalize_ts(self.ctime)
236 def _create_via_common_rec(self, path, create_symlinks=True):
237 if stat.S_ISREG(self.mode):
238 os.mknod(path, 0600 | stat.S_IFREG)
239 elif stat.S_ISDIR(self.mode):
241 elif stat.S_ISCHR(self.mode):
242 os.mknod(path, 0600 | stat.S_IFCHR, self.rdev)
243 elif stat.S_ISBLK(self.mode):
244 os.mknod(path, 0600 | stat.S_IFBLK, self.rdev)
245 elif stat.S_ISFIFO(self.mode):
246 os.mknod(path, 0600 | stat.S_IFIFO)
247 elif stat.S_ISLNK(self.mode):
248 if(self.symlink_target and create_symlinks):
249 os.symlink(self.symlink_target, path)
250 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
251 # Otherwise, do nothing.
253 def _apply_common_rec(self, path, restore_numeric_ids=False):
254 # FIXME: S_ISDOOR, S_IFMPB, S_IFCMP, S_IFNWK, ... see stat(2).
255 if stat.S_ISLNK(self.mode):
256 lutime(path, (self.atime, self.mtime))
258 utime(path, (self.atime, self.mtime))
259 if stat.S_ISREG(self.mode) \
260 | stat.S_ISDIR(self.mode) \
261 | stat.S_ISCHR(self.mode) \
262 | stat.S_ISBLK(self.mode) \
263 | stat.S_ISLNK(self.mode) \
264 | stat.S_ISFIFO(self.mode):
268 elif not stat.S_ISLNK(self.mode):
273 if not restore_numeric_ids:
274 uid = pwd.getpwnam(self.owner)[2]
275 gid = grp.getgrnam(self.group)[2]
276 os.lchown(path, uid, gid)
279 os.lchmod(path, stat.S_IMODE(self.mode))
280 elif not stat.S_ISLNK(self.mode):
281 os.chmod(path, stat.S_IMODE(self.mode))
286 def _encode_path(self):
288 return vint.pack('s', self.path)
292 def _load_path_rec(self, port):
293 self.path = vint.unpack('s', vint.read_bvec(port))[0]
298 def _add_symlink_target(self, path, st):
299 if(stat.S_ISLNK(st.st_mode)):
300 self.symlink_target = os.readlink(path)
302 def _encode_symlink_target(self):
303 return self.symlink_target
305 def _load_symlink_target_rec(self, port):
306 self.symlink_target = vint.read_bvec(port)
309 ## POSIX1e ACL records
311 # Recorded as a list:
312 # [txt_id_acl, num_id_acl]
313 # or, if a directory:
314 # [txt_id_acl, num_id_acl, txt_id_default_acl, num_id_default_acl]
315 # The numeric/text distinction only matters when reading/restoring
317 def _add_posix1e_acl(self, path, st):
318 if not stat.S_ISLNK(st.st_mode):
320 if posix1e.has_extended(path):
321 acl = posix1e.ACL(file=path)
322 self.posix1e_acl = [acl, acl] # txt and num are the same
323 if stat.S_ISDIR(st.st_mode):
324 acl = posix1e.ACL(filedef=path)
325 self.posix1e_acl.extend([acl, acl])
326 except EnvironmentError, e:
327 if e.errno != errno.EOPNOTSUPP:
330 def _encode_posix1e_acl(self):
331 # Encode as two strings (w/default ACL string possibly empty).
333 acls = self.posix1e_acl
334 txt_flags = posix1e.TEXT_ABBREVIATE
335 num_flags = posix1e.TEXT_ABBREVIATE | posix1e.TEXT_NUMERIC_IDS
336 acl_reps = [acls[0].to_any_text('', '\n', txt_flags),
337 acls[1].to_any_text('', '\n', num_flags)]
341 acl_reps.append(acls[2].to_any_text('', '\n', txt_flags))
342 acl_reps.append(acls[3].to_any_text('', '\n', num_flags))
343 return vint.pack('ssss',
344 acl_reps[0], acl_reps[1], acl_reps[2], acl_reps[3])
348 def _load_posix1e_acl_rec(self, port):
349 data = vint.read_bvec(port)
350 acl_reps = vint.unpack('ssss', data)
351 if(acl_reps[2] == ''):
352 acl_reps = acl_reps[:2]
353 self.posix1e_acl = [posix1e.ACL(x) for x in acl_reps]
355 def _apply_posix1e_acl_rec(self, path, restore_numeric_ids=False):
356 if(self.posix1e_acl):
357 acls = self.posix1e_acl
359 if restore_numeric_ids:
360 acls[3].applyto(path, posix1e.ACL_TYPE_DEFAULT)
362 acls[2].applyto(path, posix1e.ACL_TYPE_DEFAULT)
363 if restore_numeric_ids:
364 acls[1].applyto(path, posix1e.ACL_TYPE_ACCESS)
366 acls[0].applyto(path, posix1e.ACL_TYPE_ACCESS)
369 ## Linux attributes (lsattr(1), chattr(1))
371 def _add_linux_attr(self, path, st):
372 if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode):
373 attr = get_linux_file_attr(path)
375 self.linux_attr = get_linux_file_attr(path)
377 def _encode_linux_attr(self):
379 return vint.pack('V', self.linux_attr)
383 def _load_linux_attr_rec(self, port):
384 data = vint.read_bvec(port)
385 self.linux_attr = vint.unpack('V', data)[0]
387 def _apply_linux_attr_rec(self, path, restore_numeric_ids=False):
389 set_linux_file_attr(path, self.linux_attr)
392 ## Linux extended attributes (getfattr(1), setfattr(1))
394 def _add_linux_xattr(self, path, st):
396 self.linux_xattr = xattr.get_all(path, nofollow=True)
397 except EnvironmentError, e:
398 if e.errno != errno.EOPNOTSUPP:
401 def _encode_linux_xattr(self):
403 result = vint.pack('V', len(items))
404 for name, value in self.attrs:
405 result += vint.pack('ss', name, value)
410 def _load_linux_xattr_rec(self, file):
411 data = vint.read_bvec(file)
412 memfile = StringIO(data)
414 for i in range(vint.read_vuint(memfile)):
415 key = vint.read_bvec(memfile)
416 value = vint.read_bvec(memfile)
417 result.append((key, value))
418 self.linux_xattr = result
420 def _apply_linux_xattr_rec(self, path, restore_numeric_ids=False):
421 if(self.linux_xattr):
422 for k, v in self.linux_xattr:
423 xattr.set(path, k, v, nofollow=True)
428 self.symlink_target = None
429 self.linux_attr = None
430 self.linux_xattr = None
431 self.posix1e_acl = None
432 self.posix1e_acl_default = None
434 def write(self, port, include_path=True):
435 records = [(_rec_tag_path, self._encode_path())] if include_path else []
436 records.extend([(_rec_tag_common, self._encode_common()),
437 (_rec_tag_symlink_target, self._encode_symlink_target()),
438 (_rec_tag_posix1e_acl, self._encode_posix1e_acl()),
439 (_rec_tag_linux_attr, self._encode_linux_attr()),
440 (_rec_tag_linux_xattr, self._encode_linux_xattr())])
441 for tag, data in records:
443 vint.write_vuint(port, tag)
444 vint.write_bvec(port, data)
445 vint.write_vuint(port, _rec_tag_end)
449 # This method should either: return a valid Metadata object;
450 # throw EOFError if there was nothing at all to read; throw an
451 # Exception if a valid object could not be read completely.
452 tag = vint.read_vuint(port)
453 try: # From here on, EOF is an error.
455 while(True): # only exit is error (exception) or _rec_tag_end
456 if tag == _rec_tag_path:
457 result._load_path_rec(port)
458 elif tag == _rec_tag_common:
459 result._load_common_rec(port)
460 elif tag == _rec_tag_symlink_target:
461 result._load_symlink_target_rec(port)
462 elif tag == _rec_tag_posix1e_acl:
463 result._load_posix1e_acl(port)
464 elif tag ==_rec_tag_nfsv4_acl:
465 result._load_nfsv4_acl_rec(port)
466 elif tag == _rec_tag_linux_attr:
467 result._load_linux_attr_rec(port)
468 elif tag == _rec_tag_linux_xattr:
469 result._load_linux_xattr_rec(port)
470 elif tag == _rec_tag_end:
472 else: # unknown record
474 tag = vint.read_vuint(port)
476 raise Exception("EOF while reading Metadata")
479 return stat.S_ISDIR(self.mode)
481 def create_path(self, path, create_symlinks=True):
482 self._create_via_common_rec(path, create_symlinks=create_symlinks)
484 def apply_to_path(self, path=None, restore_numeric_ids=False):
485 # apply metadata to path -- file must exist
489 raise Exception('Metadata.apply_to_path() called with no path');
490 num_ids = restore_numeric_ids
491 try: # Later we may want to push this down and make it finer grained.
492 self._apply_common_rec(path, restore_numeric_ids=num_ids)
493 self._apply_posix1e_acl_rec(path, restore_numeric_ids=num_ids)
494 self._apply_linux_attr_rec(path, restore_numeric_ids=num_ids)
495 self._apply_linux_xattr_rec(path, restore_numeric_ids=num_ids)
497 raise MetadataApplicationError(str(e))
500 def from_path(path, archive_path=None, save_symlinks=True):
502 result.path = archive_path
504 try: # Later we may want to push this down and make it finer grained.
505 result._add_common(path, st)
507 result._add_symlink_target(path, st)
508 result._add_posix1e_acl(path, st)
509 result._add_linux_attr(path, st)
510 result._add_linux_xattr(path, st)
512 raise MetadataAcquisitionError(str(e))
516 def save_tree(output_file, paths,
521 safe_path = _clean_up_path_for_archive(p)
523 log('bup: archiving "%s" as "%s"\n' % (p, safe_path))
525 # Handle path itself.
527 m = from_path(p, archive_path=safe_path,
528 save_symlinks=save_symlinks)
529 except MetadataAcquisitionError, e:
533 print >> sys.stderr, m.path
534 m.write(output_file, include_path=write_paths)
536 if recurse and os.path.isdir(p):
537 for root, dirs, files in os.walk(p, onerror=add_error):
539 for sub_path in items:
540 full_path = os.path.join(root, sub_path)
541 safe_path = _clean_up_path_for_archive(full_path)
543 m = from_path(full_path,
544 archive_path=safe_path,
545 save_symlinks=save_symlinks)
546 except MetadataAcquisitionError, e:
549 print >> sys.stderr, m.path
550 m.write(output_file, include_path=write_paths)
553 def _set_up_path(meta, create_symlinks=True):
554 # Allow directories to exist as a special case -- might have
555 # been created by an earlier longer path.
557 mkdirp(meta.path, 0700)
559 parent = os.path.dirname(meta.path)
562 meta.create_path(meta.path, create_symlinks=create_symlinks)
565 class _ArchiveIterator:
568 return Metadata.read(self._file)
570 raise StopIteration()
575 def __init__(self, file):
579 def display_archive(file):
580 for meta in _ArchiveIterator(file):
582 print meta.path # FIXME
587 def start_extract(file, create_symlinks=True):
588 for meta in _ArchiveIterator(file):
590 print >> sys.stderr, meta.path
591 xpath = _clean_up_extract_path(meta.path)
593 add_error(Exception('skipping risky path "%s"' % meta.path))
596 _set_up_path(meta, create_symlinks=create_symlinks)
599 def finish_extract(file, restore_numeric_ids=False):
601 for meta in _ArchiveIterator(file):
602 xpath = _clean_up_extract_path(meta.path)
604 add_error(Exception('skipping risky path "%s"' % dir.path))
606 if os.path.isdir(meta.path):
607 all_dirs.append(meta)
610 print >> sys.stderr, meta.path
612 meta.apply_to_path(path=xpath,
613 restore_numeric_ids=restore_numeric_ids)
614 except MetadataApplicationError, e:
617 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
619 # Don't need to check xpath -- won't be in all_dirs if not OK.
620 xpath = _clean_up_extract_path(dir.path)
622 print >> sys.stderr, dir.path
624 dir.apply_to_path(path=xpath,
625 restore_numeric_ids=restore_numeric_ids)
626 except MetadataApplicationError, e:
630 def extract(file, restore_numeric_ids=False, create_symlinks=True):
631 # For now, just store all the directories and handle them last,
634 for meta in _ArchiveIterator(file):
635 xpath = _clean_up_extract_path(meta.path)
637 add_error(Exception('skipping risky path "%s"' % meta.path))
641 print >> sys.stderr, '+', meta.path
642 _set_up_path(meta, create_symlinks=create_symlinks)
643 if os.path.isdir(meta.path):
644 all_dirs.append(meta)
647 print >> sys.stderr, '=', meta.path
649 meta.apply_to_path(restore_numeric_ids=restore_numeric_ids)
650 except MetadataApplicationError, e:
652 all_dirs.sort(key = lambda x : len(x.path), reverse=True)
654 # Don't need to check xpath -- won't be in all_dirs if not OK.
655 xpath = _clean_up_extract_path(meta.path)
657 print >> sys.stderr, '=', meta.path
658 # Shouldn't have to check for risky paths here (omitted above).
660 dir.apply_to_path(path=dir.path,
661 restore_numeric_ids=restore_numeric_ids)
662 except MetadataApplicationError, e: