From b8bb5f94f2fcd98e09eaeec8451495e3ec363ff8 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sat, 20 Oct 2018 18:04:27 -0500 Subject: [PATCH] metadata: always add/store/retrieve size for links and normal files This simplifies cases where we need to transmit Metadata objects (i.e. bup-get's repo.resolve()), and it means that for trees created using this new v3 format, retrieving the sizes of chunked files should be notably less expensive, since they'll be directly available in the directory's .bupm file. Without that, we have to seek around in the chunked tree to find the last byte (cf. vfs._normal_or_chunked_file_size). Only store the sizes for symlinks and regular files (which might be chunked) until it's clear that other st_sizes are useful. Signed-off-by: Rob Browning Tested-by: Rob Browning --- cmd/save-cmd.py | 7 ++-- lib/bup/metadata.py | 79 +++++++++++++++++++++++++++++---------------- lib/bup/t/tvfs.py | 2 +- 3 files changed, 57 insertions(+), 31 deletions(-) diff --git a/cmd/save-cmd.py b/cmd/save-cmd.py index bf2877b..e78796b 100755 --- a/cmd/save-cmd.py +++ b/cmd/save-cmd.py @@ -107,7 +107,6 @@ def eatslash(dir): else: return dir - # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory @@ -348,7 +347,8 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): dir_name, fs_path = path_component # Not indexed, so just grab the FS metadata or use empty metadata. try: - meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() + meta = metadata.from_path(fs_path, normalized=True) \ + if fs_path else metadata.Metadata() except (OSError, IOError) as e: add_error(e) lastskip_name = dir_name @@ -425,7 +425,8 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): sort_key = git.shalist_item_sort_key((ent.mode, file, id)) hlink = find_hardlink_target(hlink_db, ent) try: - meta = metadata.from_path(ent.name, hardlink_target=hlink) + meta = metadata.from_path(ent.name, hardlink_target=hlink, + normalized=True) except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name diff --git a/lib/bup/metadata.py b/lib/bup/metadata.py index 58e3afa..e092380 100644 --- a/lib/bup/metadata.py +++ b/lib/bup/metadata.py @@ -183,7 +183,7 @@ def _clean_up_extract_path(p): # must be unique, and must *never* be changed. _rec_tag_end = 0 _rec_tag_path = 1 -_rec_tag_common = 2 # times, user, group, type, perms, etc. (legacy/broken) +_rec_tag_common_v1 = 2 # times, user, group, type, perms, etc. (legacy/broken) _rec_tag_symlink_target = 3 _rec_tag_posix1e_acl = 4 # getfacl(1), setfacl(1), etc. _rec_tag_nfsv4_acl = 5 # intended to supplant posix1e? (unimplemented) @@ -191,6 +191,7 @@ _rec_tag_linux_attr = 6 # lsattr(1) chattr(1) _rec_tag_linux_xattr = 7 # getfattr(1) setfattr(1) _rec_tag_hardlink_target = 8 # hard link target path _rec_tag_common_v2 = 9 # times, user, group, type, perms, etc. (current) +_rec_tag_common_v3 = 10 # adds optional size to v2 _warned_about_attr_einval = None @@ -222,6 +223,7 @@ class Metadata: def _add_common(self, path, st): assert(st.st_uid >= 0) assert(st.st_gid >= 0) + self.size = st.st_size self.uid = st.st_uid self.gid = st.st_gid self.atime = st.st_atime @@ -252,7 +254,8 @@ class Metadata: and self.mtime == other.mtime \ and self.ctime == other.ctime \ and self.user == other.user \ - and self.group == other.group + and self.group == other.group \ + and self.size == other.size def _encode_common(self): if not self.mode: @@ -260,7 +263,7 @@ class Metadata: atime = xstat.nsecs_to_timespec(self.atime) mtime = xstat.nsecs_to_timespec(self.mtime) ctime = xstat.nsecs_to_timespec(self.ctime) - result = vint.pack('vvsvsvvVvVvV', + result = vint.pack('vvsvsvvVvVvVv', self.mode, self.uid, self.user, @@ -272,26 +275,36 @@ class Metadata: mtime[0], mtime[1], ctime[0], - ctime[1]) + ctime[1], + self.size if self.size is not None else -1) return result - def _load_common_rec(self, port, legacy_format=False): - unpack_fmt = 'vvsvsvvVvVvV' - if legacy_format: + def _load_common_rec(self, port, version=3): + if version == 3: + # Added trailing size to v2, negative when None. + unpack_fmt = 'vvsvsvvVvVvVv' + elif version == 2: + unpack_fmt = 'vvsvsvvVvVvV' + elif version == 1: unpack_fmt = 'VVsVsVvVvVvV' + else: + raise Exception('unexpected common_rec version %d' % version) data = vint.read_bvec(port) - (self.mode, - self.uid, - self.user, - self.gid, - self.group, - self.rdev, - self.atime, - atime_ns, - self.mtime, - mtime_ns, - self.ctime, - ctime_ns) = vint.unpack(unpack_fmt, data) + values = vint.unpack(unpack_fmt, data) + if version == 3: + (self.mode, self.uid, self.user, self.gid, self.group, + self.rdev, + self.atime, atime_ns, + self.mtime, mtime_ns, + self.ctime, ctime_ns, size) = values + if size >= 0: + self.size = size + else: + (self.mode, self.uid, self.user, self.gid, self.group, + self.rdev, + self.atime, atime_ns, + self.mtime, mtime_ns, + self.ctime, ctime_ns) = values self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns)) self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns)) self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns)) @@ -468,7 +481,10 @@ class Metadata: def _load_symlink_target_rec(self, port): target = vint.read_bvec(port) self.symlink_target = target - self.size = len(target) + if self.size is None: + self.size = len(target) + else: + assert(self.size == len(target)) ## Hardlink targets @@ -791,7 +807,7 @@ class Metadata: def write(self, port, include_path=True): records = include_path and [(_rec_tag_path, self._encode_path())] or [] - records.extend([(_rec_tag_common_v2, self._encode_common()), + records.extend([(_rec_tag_common_v3, self._encode_common()), (_rec_tag_symlink_target, self._encode_symlink_target()), (_rec_tag_hardlink_target, @@ -828,8 +844,10 @@ class Metadata: while True: # only exit is error (exception) or _rec_tag_end if tag == _rec_tag_path: result._load_path_rec(port) + elif tag == _rec_tag_common_v3: + result._load_common_rec(port, version=3) elif tag == _rec_tag_common_v2: - result._load_common_rec(port) + result._load_common_rec(port, version=2) elif tag == _rec_tag_symlink_target: result._load_symlink_target_rec(port) elif tag == _rec_tag_hardlink_target: @@ -842,8 +860,8 @@ class Metadata: result._load_linux_xattr_rec(port) elif tag == _rec_tag_end: return result - elif tag == _rec_tag_common: # Should be very rare. - result._load_common_rec(port, legacy_format = True) + elif tag == _rec_tag_common_v1: # Should be very rare. + result._load_common_rec(port, version=1) else: # unknown record vint.skip_bvec(port) tag = vint.read_vuint(port) @@ -889,11 +907,14 @@ class Metadata: def from_path(path, statinfo=None, archive_path=None, - save_symlinks=True, hardlink_target=None): + save_symlinks=True, hardlink_target=None, + normalized=False): + """Return the metadata associated with the path. When normalized is + true, return the metadata appropriate for a typical save, which + may or may not be all of it.""" result = Metadata() result.path = archive_path st = statinfo or xstat.lstat(path) - result.size = st.st_size result._add_common(path, st) if save_symlinks: result._add_symlink_target(path, st) @@ -901,6 +922,10 @@ def from_path(path, statinfo=None, archive_path=None, result._add_posix1e_acl(path, st) result._add_linux_attr(path, st) result._add_linux_xattr(path, st) + if normalized: + # Only store sizes for regular files and symlinks for now. + if not (stat.S_ISREG(result.mode) or stat.S_ISLNK(result.mode)): + result.size = None return result @@ -1047,7 +1072,7 @@ def detailed_str(meta, fields = None): os.minor(meta.rdev))) else: result.append('rdev: 0') - if 'size' in fields and meta.size: + if 'size' in fields and meta.size is not None: result.append('size: ' + str(meta.size)) if 'uid' in fields: result.append('uid: ' + str(meta.uid)) diff --git a/lib/bup/t/tvfs.py b/lib/bup/t/tvfs.py index 127acb6..6689291 100644 --- a/lib/bup/t/tvfs.py +++ b/lib/bup/t/tvfs.py @@ -89,7 +89,7 @@ def tree_items(repo, oid): try: maybe_meta = lambda : Metadata.read(bupm) if bupm else None m = maybe_meta() - if m: + if m and m.size is None: m.size = 0 yield TreeDictValue(name='.', oid=oid, meta=m) tree_ents = vfs.ordered_tree_entries(tree_data, bupm=True) -- 2.39.2