X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Findex.py;h=d6d44accb7eb0b320526283cd174513d3f1a84ed;hb=c40b3dd5fd74e72024fbaad3daf5a958aefa1c54;hp=31f6c115ff3dc5a2561c291bef69794df450f2d2;hpb=d7c834cf9c91795093ff319bce54dbc506dfb1ec;p=bup.git diff --git a/lib/bup/index.py b/lib/bup/index.py index 31f6c11..d6d44ac 100644 --- a/lib/bup/index.py +++ b/lib/bup/index.py @@ -1,11 +1,16 @@ -import metadata, os, stat, struct, tempfile -from bup import xstat -from bup.helpers import * + +from __future__ import absolute_import +import errno, os, stat, struct, tempfile + +from bup import metadata, xstat +from bup._helpers import UINT_MAX, bytescmp +from bup.helpers import (add_error, log, merge_iter, mmap_readwrite, + progress, qprogress, resolve_parent, slashappend) EMPTY_SHA = '\0'*20 FAKE_SHA = '\x01'*20 -INDEX_HDR = 'BUPI\0\0\0\6' +INDEX_HDR = 'BUPI\0\0\0\7' # Time values are handled as integer nanoseconds since the epoch in # memory, but are written as xstat/metadata timespecs. This behavior @@ -14,7 +19,21 @@ INDEX_HDR = 'BUPI\0\0\0\6' # Record times (mtime, ctime, atime) as xstat/metadata timespecs, and # store all of the times in the index so they won't interfere with the # forthcoming metadata cache. -INDEX_SIG = '!QQQqQqQqQQII20sHIIQ' +INDEX_SIG = ('!' + 'Q' # dev + 'Q' # ino + 'Q' # nlink + 'qQ' # ctime_s, ctime_ns + 'qQ' # mtime_s, mtime_ns + 'qQ' # atime_s, atime_ns + 'Q' # size + 'I' # mode + 'I' # gitmode + '20s' # sha + 'H' # flags + 'Q' # children_ofs + 'I' # children_n + 'Q') # meta_ofs ENTLEN = struct.calcsize(INDEX_SIG) FOOTER_SIG = '!Q' @@ -174,17 +193,38 @@ class Entry: self.gitmode, self.sha, self.flags, self.children_ofs, self.children_n, self.meta_ofs) - except (DeprecationWarning, struct.error), e: + except (DeprecationWarning, struct.error) as e: log('pack error: %s (%r)\n' % (e, self)) raise - def from_stat(self, st, meta_ofs, tstart, check_device=True): - old = (self.dev if check_device else 0, - self.ino, self.nlink, self.ctime, self.mtime, - self.size, self.flags & IX_EXISTS) - new = (st.st_dev if check_device else 0, - st.st_ino, st.st_nlink, st.st_ctime, st.st_mtime, - st.st_size, IX_EXISTS) + def stale(self, st, tstart, check_device=True): + if self.size != st.st_size: + return True + if self.mtime != st.st_mtime: + return True + if self.sha == EMPTY_SHA: + return True + if not self.gitmode: + return True + if self.ctime != st.st_ctime: + return True + if self.ino != st.st_ino: + return True + if self.nlink != st.st_nlink: + return True + if not (self.flags & IX_EXISTS): + return True + if check_device and (self.dev != st.st_dev): + return True + # Check that the ctime's "second" is at or after tstart's. + ctime_sec_in_ns = xstat.fstime_floor_secs(st.st_ctime) * 10**9 + if ctime_sec_in_ns >= tstart: + return True + return False + + def update_from_stat(self, st, meta_ofs): + # Should only be called when the entry is stale(), and + # invalidate() should almost certainly be called afterward. self.dev = st.st_dev self.ino = st.st_ino self.nlink = st.st_nlink @@ -195,13 +235,8 @@ class Entry: self.mode = st.st_mode self.flags |= IX_EXISTS self.meta_ofs = meta_ofs - # Check that the ctime's "second" is at or after tstart's. - ctime_sec_in_ns = xstat.fstime_floor_secs(st.st_ctime) * 10**9 - if ctime_sec_in_ns >= tstart or old != new \ - or self.sha == EMPTY_SHA or not self.gitmode: - self.invalidate() self._fixup() - + def _fixup(self): self.mtime = self._fixup_time(self.mtime) self.ctime = self._fixup_time(self.ctime) @@ -246,10 +281,36 @@ class Entry: def is_fake(self): return not self.ctime - def __cmp__(a, b): - return (cmp(b.name, a.name) - or cmp(a.is_valid(), b.is_valid()) - or cmp(a.is_fake(), b.is_fake())) + def _cmp(self, other): + # Note reversed name ordering + bc = bytescmp(other.name, self.name) + if bc != 0: + return bc + vc = self.is_valid() - other.is_valid() + if vc != 0: + return vc + fc = self.is_fake() - other.is_fake() + if fc != 0: + return fc + return 0 + + def __eq__(self, other): + return self._cmp(other) == 0 + + def __ne__(): + return self._cmp(other) != 0 + + def __lt__(self, other): + return self._cmp(other) < 0 + + def __gt__(self, other): + return self._cmp(other) > 0 + + def __le__(): + return self._cmp(other) <= 0 + + def __ge__(): + return self._cmp(other) >= 0 def write(self, f): f.write(self.basename + '\0' + self.packed()) @@ -321,7 +382,7 @@ class ExistingEntry(Entry): dname += '/' ofs = self.children_ofs assert(ofs <= len(self._m)) - assert(self.children_n < 1000000) + assert(self.children_n <= UINT_MAX) # i.e. python struct 'I' for i in xrange(self.children_n): eon = self._m.find('\0', ofs) assert(eon >= 0) @@ -353,7 +414,7 @@ class Reader: f = None try: f = open(filename, 'r+') - except IOError, e: + except IOError as e: if e.errno == errno.ENOENT: pass else: @@ -403,6 +464,11 @@ class Reader: def __iter__(self): return self.iter() + def find(self, name): + return next((e for e in self.iter(name, wantrecurse=lambda x : True) + if e.name == name), + None) + def exists(self): return self.m @@ -419,11 +485,20 @@ class Reader: def filter(self, prefixes, wantrecurse=None): for (rp, path) in reduce_paths(prefixes): + any_entries = False for e in self.iter(rp, wantrecurse=wantrecurse): + any_entries = True assert(e.name.startswith(rp)) name = path + e.name[len(rp):] yield (name, e) - + if not any_entries: + # Always return at least the top for each prefix. + # Otherwise something like "save x/y" will produce + # nothing if x is up to date. + pe = self.find(rp) + assert(pe) + name = path + pe.name[len(rp):] + yield (name, pe) # FIXME: this function isn't very generic, because it splits the filename # in an odd way and depends on a terminating '/' to indicate directories. @@ -443,7 +518,7 @@ class Writer: self.count = 0 self.lastfile = None self.filename = None - self.filename = filename = realpath(filename) + self.filename = filename = resolve_parent(filename) self.metastore = metastore self.tmax = tmax (dir,name) = os.path.split(filename) @@ -526,18 +601,34 @@ class Writer: return Reader(self.tmpname) +def _slashappend_or_add_error(p, caller): + """Return p, after ensuring it has a single trailing slash if it names + a directory, unless there's an OSError, in which case, call + add_error() and return None.""" + try: + st = os.lstat(p) + except OSError as e: + add_error('%s: %s' % (caller, e)) + return None + else: + if stat.S_ISDIR(st.st_mode): + return slashappend(p) + return p + + +def unique_resolved_paths(paths): + "Return a collection of unique resolved paths." + rps = (_slashappend_or_add_error(resolve_parent(p), 'unique_resolved_paths') + for p in paths) + return frozenset((x for x in rps if x is not None)) + + def reduce_paths(paths): xpaths = [] for p in paths: - rp = realpath(p) - try: - st = os.lstat(rp) - if stat.S_ISDIR(st.st_mode): - rp = slashappend(rp) - p = slashappend(p) - xpaths.append((rp, p)) - except OSError, e: - add_error('reduce_paths: %s' % e) + rp = _slashappend_or_add_error(resolve_parent(p), 'reduce_paths') + if rp: + xpaths.append((rp, slashappend(p) if rp.endswith('/') else p)) xpaths.sort() paths = [] @@ -551,6 +642,7 @@ def reduce_paths(paths): paths.sort(reverse=True) return paths + def merge(*iters): def pfunc(count, total): qprogress('bup: merging indexes (%d/%d)\r' % (count, total))