X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=lib%2Fbup%2Findex.py;h=b9a4013be032bfd75d68e350250a43d4b9571d85;hb=f26a6a9ec9d7179c440c2486fe6fd7459c43bda3;hp=1f3c777c8ed9e804aa011b5b8773bc76f0b33334;hpb=1eafa883ab462f73d3edbb3ce10d71706935a29c;p=bup.git diff --git a/lib/bup/index.py b/lib/bup/index.py index 1f3c777..b9a4013 100644 --- a/lib/bup/index.py +++ b/lib/bup/index.py @@ -1,11 +1,17 @@ -import metadata, os, stat, struct, tempfile -from bup import xstat -from bup.helpers import * -EMPTY_SHA = '\0'*20 -FAKE_SHA = '\x01'*20 +from __future__ import absolute_import, print_function +import errno, os, stat, struct, tempfile -INDEX_HDR = 'BUPI\0\0\0\5' +from bup import metadata, xstat +from bup._helpers import UINT_MAX, bytescmp +from bup.compat import pending_raise, range +from bup.helpers import (add_error, log, merge_iter, mmap_readwrite, + progress, qprogress, resolve_parent, slashappend) + +EMPTY_SHA = b'\0' * 20 +FAKE_SHA = b'\x01' * 20 + +INDEX_HDR = b'BUPI\0\0\0\7' # Time values are handled as integer nanoseconds since the epoch in # memory, but are written as xstat/metadata timespecs. This behavior @@ -14,7 +20,21 @@ INDEX_HDR = 'BUPI\0\0\0\5' # Record times (mtime, ctime, atime) as xstat/metadata timespecs, and # store all of the times in the index so they won't interfere with the # forthcoming metadata cache. -INDEX_SIG = '!QQQqQqQqQIIQII20sHIIQ' +INDEX_SIG = ('!' + 'Q' # dev + 'Q' # ino + 'Q' # nlink + 'qQ' # ctime_s, ctime_ns + 'qQ' # mtime_s, mtime_ns + 'qQ' # atime_s, atime_ns + 'Q' # size + 'I' # mode + 'I' # gitmode + '20s' # sha + 'H' # flags + 'Q' # children_ofs + 'I' # children_n + 'Q') # meta_ofs ENTLEN = struct.calcsize(INDEX_SIG) FOOTER_SIG = '!Q' @@ -30,6 +50,7 @@ class Error(Exception): class MetaStoreReader: def __init__(self, filename): + self._file = None self._file = open(filename, 'rb') def close(self): @@ -37,8 +58,12 @@ class MetaStoreReader: self._file.close() self._file = None - def __del__(self): - self.close() + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + with pending_raise(value, rethrow=True): + self.close() def metadata_at(self, ofs): self._file.seek(ofs) @@ -53,20 +78,23 @@ class MetaStoreWriter: # Map metadata hashes to bupindex.meta offsets. self._offsets = {} self._filename = filename + self._file = None # FIXME: see how slow this is; does it matter? m_file = open(filename, 'ab+') try: m_file.seek(0) try: + m_off = m_file.tell() m = metadata.Metadata.read(m_file) while m: m_encoded = m.encode() - self._offsets[m_encoded] = m_file.tell() - len(m_encoded) + self._offsets[m_encoded] = m_off + m_off = m_file.tell() m = metadata.Metadata.read(m_file) except EOFError: pass except: - log('index metadata in %r appears to be corrupt' % filename) + log('index metadata in %r appears to be corrupt\n' % filename) raise finally: m_file.close() @@ -77,9 +105,12 @@ class MetaStoreWriter: self._file.close() self._file = None - def __del__(self): - # Be optimistic. - self.close() + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + with pending_raise(value, rethrow=False): + self.close() def store(self, metadata): meta_encoded = metadata.encode(include_path=False) @@ -103,7 +134,7 @@ class Level: (ofs,n) = (f.tell(), len(self.list)) if self.list: count = len(self.list) - #log('popping %r with %d entries\n' + #log('popping %r with %d entries\n' # % (''.join(self.ename), count)) for e in self.list: e.write(f) @@ -141,17 +172,19 @@ def _golevel(level, f, ename, newentry, metastore, tmax): class Entry: def __init__(self, basename, name, meta_ofs, tmax): - self.basename = str(basename) - self.name = str(name) + assert basename is None or isinstance(basename, bytes) + assert name is None or isinstance(name, bytes) + self.basename = basename + self.name = name self.meta_ofs = meta_ofs self.tmax = tmax self.children_ofs = 0 self.children_n = 0 def __repr__(self): - return ("(%s,0x%04x,%d,%d,%d,%d,%d,%d,%d,%d,%s/%s,0x%04x,%d,0x%08x/%d)" + return ("(%r,0x%04x,%d,%d,%d,%d,%d,%d,%s/%s,0x%04x,%d,0x%08x/%d)" % (self.name, self.dev, self.ino, self.nlink, - self.ctime, self.mtime, self.atime, self.uid, self.gid, + self.ctime, self.mtime, self.atime, self.size, self.mode, self.gitmode, self.flags, self.meta_ofs, self.children_ofs, self.children_n)) @@ -166,47 +199,51 @@ class Entry: ctime[0], ctime[1], mtime[0], mtime[1], atime[0], atime[1], - self.uid, self.gid, self.size, self.mode, + self.size, self.mode, self.gitmode, self.sha, self.flags, self.children_ofs, self.children_n, self.meta_ofs) - except (DeprecationWarning, struct.error), e: + except (DeprecationWarning, struct.error) as e: log('pack error: %s (%r)\n' % (e, self)) raise - def from_stat(self, st, meta_ofs, tstart, check_device=True): - old = (self.dev if check_device else 0, - self.ino, self.nlink, self.ctime, self.mtime, - self.uid, self.gid, self.size, self.flags & IX_EXISTS) - new = (st.st_dev if check_device else 0, - st.st_ino, st.st_nlink, st.st_ctime, st.st_mtime, - st.st_uid, st.st_gid, st.st_size, IX_EXISTS) + def stale(self, st, check_device=True): + if self.size != st.st_size: + return True + if self.mtime != st.st_mtime: + return True + if self.sha == EMPTY_SHA: + return True + if not self.gitmode: + return True + if self.ctime != st.st_ctime: + return True + if self.ino != st.st_ino: + return True + if self.nlink != st.st_nlink: + return True + if not (self.flags & IX_EXISTS): + return True + if check_device and (self.dev != st.st_dev): + return True + return False + + def update_from_stat(self, st, meta_ofs): + # Should only be called when the entry is stale(), and + # invalidate() should almost certainly be called afterward. self.dev = st.st_dev self.ino = st.st_ino self.nlink = st.st_nlink self.ctime = st.st_ctime self.mtime = st.st_mtime self.atime = st.st_atime - self.uid = st.st_uid - self.gid = st.st_gid self.size = st.st_size self.mode = st.st_mode self.flags |= IX_EXISTS self.meta_ofs = meta_ofs - # Check that the ctime's "second" is at or after tstart's. - ctime_sec_in_ns = xstat.fstime_floor_secs(st.st_ctime) * 10**9 - if ctime_sec_in_ns >= tstart or old != new \ - or self.sha == EMPTY_SHA or not self.gitmode: - self.invalidate() self._fixup() - + def _fixup(self): - if self.uid < 0: - self.uid += 0x100000000 - if self.gid < 0: - self.gid += 0x100000000 - assert(self.uid >= 0) - assert(self.gid >= 0) self.mtime = self._fixup_time(self.mtime) self.ctime = self._fixup_time(self.ctime) @@ -250,25 +287,51 @@ class Entry: def is_fake(self): return not self.ctime - def __cmp__(a, b): - return (cmp(b.name, a.name) - or cmp(a.is_valid(), b.is_valid()) - or cmp(a.is_fake(), b.is_fake())) + def _cmp(self, other): + # Note reversed name ordering + bc = bytescmp(other.name, self.name) + if bc != 0: + return bc + vc = self.is_valid() - other.is_valid() + if vc != 0: + return vc + fc = self.is_fake() - other.is_fake() + if fc != 0: + return fc + return 0 + + def __eq__(self, other): + return self._cmp(other) == 0 + + def __ne__(self, other): + return self._cmp(other) != 0 + + def __lt__(self, other): + return self._cmp(other) < 0 + + def __gt__(self, other): + return self._cmp(other) > 0 + + def __le__(self, other): + return self._cmp(other) <= 0 + + def __ge__(self, other): + return self._cmp(other) >= 0 def write(self, f): - f.write(self.basename + '\0' + self.packed()) + f.write(self.basename + b'\0' + self.packed()) class NewEntry(Entry): def __init__(self, basename, name, tmax, dev, ino, nlink, ctime, mtime, atime, - uid, gid, size, mode, gitmode, sha, flags, meta_ofs, + size, mode, gitmode, sha, flags, meta_ofs, children_ofs, children_n): Entry.__init__(self, basename, name, meta_ofs, tmax) (self.dev, self.ino, self.nlink, self.ctime, self.mtime, self.atime, - self.uid, self.gid, self.size, self.mode, self.gitmode, self.sha, + self.size, self.mode, self.gitmode, self.sha, self.flags, self.children_ofs, self.children_n - ) = (dev, ino, nlink, ctime, mtime, atime, uid, gid, + ) = (dev, ino, nlink, ctime, mtime, atime, size, mode, gitmode, sha, flags, children_ofs, children_n) self._fixup() @@ -276,7 +339,7 @@ class NewEntry(Entry): class BlankNewEntry(NewEntry): def __init__(self, basename, meta_ofs, tmax): NewEntry.__init__(self, basename, basename, tmax, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, EMPTY_SHA, 0, meta_ofs, 0, 0) @@ -288,9 +351,9 @@ class ExistingEntry(Entry): self._ofs = ofs (self.dev, self.ino, self.nlink, self.ctime, ctime_ns, self.mtime, mtime_ns, self.atime, atime_ns, - self.uid, self.gid, self.size, self.mode, self.gitmode, self.sha, + self.size, self.mode, self.gitmode, self.sha, self.flags, self.children_ofs, self.children_n, self.meta_ofs - ) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN))) + ) = struct.unpack(INDEX_SIG, m[ofs : ofs + ENTLEN]) self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns)) self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns)) self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns)) @@ -321,22 +384,22 @@ class ExistingEntry(Entry): def iter(self, name=None, wantrecurse=None): dname = name - if dname and not dname.endswith('/'): - dname += '/' + if dname and not dname.endswith(b'/'): + dname += b'/' ofs = self.children_ofs assert(ofs <= len(self._m)) - assert(self.children_n < 1000000) - for i in xrange(self.children_n): - eon = self._m.find('\0', ofs) + assert(self.children_n <= UINT_MAX) # i.e. python struct 'I' + for i in range(self.children_n): + eon = self._m.find(b'\0', ofs) assert(eon >= 0) assert(eon >= ofs) assert(eon > ofs) - basename = str(buffer(self._m, ofs, eon-ofs)) + basename = self._m[ofs : ofs + (eon - ofs)] child = ExistingEntry(self, basename, self.name + basename, self._m, eon+1) if (not dname or child.name.startswith(dname) - or child.name.endswith('/') and dname.startswith(child.name)): + or child.name.endswith(b'/') and dname.startswith(child.name)): if not wantrecurse or wantrecurse(child): for e in child.iter(name=name, wantrecurse=wantrecurse): yield e @@ -346,18 +409,18 @@ class ExistingEntry(Entry): def __iter__(self): return self.iter() - + class Reader: def __init__(self, filename): self.filename = filename - self.m = '' + self.m = b'' self.writable = False self.count = 0 f = None try: - f = open(filename, 'r+') - except IOError, e: + f = open(filename, 'rb+') + except IOError as e: if e.errno == errno.ENOENT: pass else: @@ -373,10 +436,15 @@ class Reader: self.m = mmap_readwrite(f) self.writable = True self.count = struct.unpack(FOOTER_SIG, - str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0] + self.m[st.st_size - FOOTLEN + : st.st_size])[0] + + def __enter__(self): + return self - def __del__(self): - self.close() + def __exit__(self, type, value, traceback): + with pending_raise(value, rethrow=False): + self.close() def __len__(self): return int(self.count) @@ -384,20 +452,20 @@ class Reader: def forward_iter(self): ofs = len(INDEX_HDR) while ofs+ENTLEN <= len(self.m)-FOOTLEN: - eon = self.m.find('\0', ofs) + eon = self.m.find(b'\0', ofs) assert(eon >= 0) assert(eon >= ofs) assert(eon > ofs) - basename = str(buffer(self.m, ofs, eon-ofs)) + basename = self.m[ofs : ofs + (eon - ofs)] yield ExistingEntry(None, basename, basename, self.m, eon+1) ofs = eon + 1 + ENTLEN def iter(self, name=None, wantrecurse=None): if len(self.m) > len(INDEX_HDR)+ENTLEN: dname = name - if dname and not dname.endswith('/'): - dname += '/' - root = ExistingEntry(None, '/', '/', + if dname and not dname.endswith(b'/'): + dname += b'/' + root = ExistingEntry(None, b'/', b'/', self.m, len(self.m)-FOOTLEN-ENTLEN) for sub in root.iter(name=name, wantrecurse=wantrecurse): yield sub @@ -407,6 +475,11 @@ class Reader: def __iter__(self): return self.iter() + def find(self, name): + return next((e for e in self.iter(name, wantrecurse=lambda x : True) + if e.name == name), + None) + def exists(self): return self.m @@ -423,19 +496,28 @@ class Reader: def filter(self, prefixes, wantrecurse=None): for (rp, path) in reduce_paths(prefixes): + any_entries = False for e in self.iter(rp, wantrecurse=wantrecurse): + any_entries = True assert(e.name.startswith(rp)) name = path + e.name[len(rp):] yield (name, e) - + if not any_entries: + # Always return at least the top for each prefix. + # Otherwise something like "save x/y" will produce + # nothing if x is up to date. + pe = self.find(rp) + assert(pe) + name = path + pe.name[len(rp):] + yield (name, pe) # FIXME: this function isn't very generic, because it splits the filename # in an odd way and depends on a terminating '/' to indicate directories. def pathsplit(p): """Split a path into a list of elements of the file system hierarchy.""" - l = p.split('/') - l = [i+'/' for i in l[:-1]] + l[-1:] - if l[-1] == '': + l = p.split(b'/') + l = [i + b'/' for i in l[:-1]] + l[-1:] + if l[-1] == b'': l.pop() # extra blank caused by terminating '/' return l @@ -447,16 +529,20 @@ class Writer: self.count = 0 self.lastfile = None self.filename = None - self.filename = filename = realpath(filename) + self.filename = filename = resolve_parent(filename) self.metastore = metastore self.tmax = tmax (dir,name) = os.path.split(filename) - (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir) + ffd, self.tmpname = tempfile.mkstemp(b'.tmp', filename, dir) self.f = os.fdopen(ffd, 'wb', 65536) self.f.write(INDEX_HDR) - def __del__(self): - self.abort() + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + with pending_raise(value, rethrow=False): + self.abort() def abort(self): f = self.f @@ -486,14 +572,14 @@ class Writer: def _add(self, ename, entry): if self.lastfile and self.lastfile <= ename: - raise Error('%r must come before %r' - % (''.join(e.name), ''.join(self.lastfile))) - self.lastfile = e.name + raise Error('%r must come before %r' + % (''.join(ename), ''.join(self.lastfile))) + self.lastfile = ename self.level = _golevel(self.level, self.f, ename, entry, self.metastore, self.tmax) def add(self, name, st, meta_ofs, hashgen = None): - endswith = name.endswith('/') + endswith = name.endswith(b'/') ename = pathsplit(name) basename = ename[-1] #log('add: %r %r\n' % (basename, name)) @@ -510,13 +596,12 @@ class Writer: e = NewEntry(basename, name, self.tmax, st.st_dev, st.st_ino, st.st_nlink, st.st_ctime, st.st_mtime, st.st_atime, - st.st_uid, st.st_gid, st.st_size, st.st_mode, gitmode, sha, flags, meta_ofs, 0, 0) else: assert(endswith) meta_ofs = self.metastore.store(metadata.Metadata()) - e = BlankNewEntry(basename, meta_ofs, tmax) + e = BlankNewEntry(basename, meta_ofs, self.tmax) e.gitmode = gitmode e.sha = sha e.flags = flags @@ -531,31 +616,48 @@ class Writer: return Reader(self.tmpname) +def _slashappend_or_add_error(p, caller): + """Return p, after ensuring it has a single trailing slash if it names + a directory, unless there's an OSError, in which case, call + add_error() and return None.""" + try: + st = os.lstat(p) + except OSError as e: + add_error('%s: %s' % (caller, e)) + return None + else: + if stat.S_ISDIR(st.st_mode): + return slashappend(p) + return p + + +def unique_resolved_paths(paths): + "Return a collection of unique resolved paths." + rps = (_slashappend_or_add_error(resolve_parent(p), 'unique_resolved_paths') + for p in paths) + return frozenset((x for x in rps if x is not None)) + + def reduce_paths(paths): xpaths = [] for p in paths: - rp = realpath(p) - try: - st = os.lstat(rp) - if stat.S_ISDIR(st.st_mode): - rp = slashappend(rp) - p = slashappend(p) - xpaths.append((rp, p)) - except OSError, e: - add_error('reduce_paths: %s' % e) + rp = _slashappend_or_add_error(resolve_parent(p), 'reduce_paths') + if rp: + xpaths.append((rp, slashappend(p) if rp.endswith(b'/') else p)) xpaths.sort() paths = [] prev = None for (rp, p) in xpaths: - if prev and (prev == rp - or (prev.endswith('/') and rp.startswith(prev))): + if prev and (prev == rp + or (prev.endswith(b'/') and rp.startswith(prev))): continue # already superceded by previous path paths.append((rp, p)) prev = rp paths.sort(reverse=True) return paths + def merge(*iters): def pfunc(count, total): qprogress('bup: merging indexes (%d/%d)\r' % (count, total))