2 from __future__ import absolute_import, print_function
3 import errno, os, stat, struct, tempfile
5 from bup import metadata, xstat
6 from bup._helpers import UINT_MAX, bytescmp
7 from bup.compat import pending_raise, range
8 from bup.helpers import (add_error, log, merge_iter, mmap_readwrite,
9 progress, qprogress, resolve_parent, slashappend)
11 EMPTY_SHA = b'\0' * 20
12 FAKE_SHA = b'\x01' * 20
14 INDEX_HDR = b'BUPI\0\0\0\7'
16 # Time values are handled as integer nanoseconds since the epoch in
17 # memory, but are written as xstat/metadata timespecs. This behavior
18 # matches the existing metadata/xstat/.bupm code.
20 # Record times (mtime, ctime, atime) as xstat/metadata timespecs, and
21 # store all of the times in the index so they won't interfere with the
22 # forthcoming metadata cache.
27 'qQ' # ctime_s, ctime_ns
28 'qQ' # mtime_s, mtime_ns
29 'qQ' # atime_s, atime_ns
39 ENTLEN = struct.calcsize(INDEX_SIG)
41 FOOTLEN = struct.calcsize(FOOTER_SIG)
43 IX_EXISTS = 0x8000 # file exists on filesystem
44 IX_HASHVALID = 0x4000 # the stored sha1 matches the filesystem
45 IX_SHAMISSING = 0x2000 # the stored sha1 object doesn't seem to exist
47 class Error(Exception):
51 class MetaStoreReader:
52 def __init__(self, filename):
54 self._file = open(filename, 'rb')
64 def __exit__(self, type, value, traceback):
65 with pending_raise(value, rethrow=True):
68 def metadata_at(self, ofs):
70 return metadata.Metadata.read(self._file)
73 class MetaStoreWriter:
74 # For now, we just append to the file, and try to handle any
75 # truncation or corruption somewhat sensibly.
77 def __init__(self, filename):
78 # Map metadata hashes to bupindex.meta offsets.
80 self._filename = filename
82 # FIXME: see how slow this is; does it matter?
83 m_file = open(filename, 'ab+')
88 m = metadata.Metadata.read(m_file)
90 m_encoded = m.encode()
91 self._offsets[m_encoded] = m_off
93 m = metadata.Metadata.read(m_file)
97 log('index metadata in %r appears to be corrupt\n' % filename)
101 self._file = open(filename, 'ab')
111 def __exit__(self, type, value, traceback):
112 with pending_raise(value, rethrow=False):
115 def store(self, metadata):
116 meta_encoded = metadata.encode(include_path=False)
117 ofs = self._offsets.get(meta_encoded)
120 ofs = self._file.tell()
121 self._file.write(meta_encoded)
122 self._offsets[meta_encoded] = ofs
127 def __init__(self, ename, parent):
134 (ofs,n) = (f.tell(), len(self.list))
136 count = len(self.list)
137 #log('popping %r with %d entries\n'
138 # % (''.join(self.ename), count))
142 self.parent.count += count + self.count
146 def _golevel(level, f, ename, newentry, metastore, tmax):
147 # close nodes back up the tree
149 default_meta_ofs = metastore.store(metadata.Metadata())
150 while ename[:len(level.ename)] != level.ename:
151 n = BlankNewEntry(level.ename[-1], default_meta_ofs, tmax)
153 (n.children_ofs,n.children_n) = level.write(f)
154 level.parent.list.append(n)
157 # create nodes down the tree
158 while len(level.ename) < len(ename):
159 level = Level(ename[:len(level.ename)+1], level)
161 # are we in precisely the right place?
162 assert(ename == level.ename)
164 BlankNewEntry(ename and level.ename[-1] or None, default_meta_ofs, tmax)
165 (n.children_ofs,n.children_n) = level.write(f)
167 level.parent.list.append(n)
174 def __init__(self, basename, name, meta_ofs, tmax):
175 assert basename is None or isinstance(basename, bytes)
176 assert name is None or isinstance(name, bytes)
177 self.basename = basename
179 self.meta_ofs = meta_ofs
181 self.children_ofs = 0
185 return ("(%r,0x%04x,%d,%d,%d,%d,%d,%d,%s/%s,0x%04x,%d,0x%08x/%d)"
186 % (self.name, self.dev, self.ino, self.nlink,
187 self.ctime, self.mtime, self.atime,
188 self.size, self.mode, self.gitmode,
189 self.flags, self.meta_ofs,
190 self.children_ofs, self.children_n))
194 ctime = xstat.nsecs_to_timespec(self.ctime)
195 mtime = xstat.nsecs_to_timespec(self.mtime)
196 atime = xstat.nsecs_to_timespec(self.atime)
197 return struct.pack(INDEX_SIG,
198 self.dev, self.ino, self.nlink,
202 self.size, self.mode,
203 self.gitmode, self.sha, self.flags,
204 self.children_ofs, self.children_n,
206 except (DeprecationWarning, struct.error) as e:
207 log('pack error: %s (%r)\n' % (e, self))
210 def stale(self, st, check_device=True):
211 if self.size != st.st_size:
213 if self.mtime != st.st_mtime:
215 if self.sha == EMPTY_SHA:
219 if self.ctime != st.st_ctime:
221 if self.ino != st.st_ino:
223 if self.nlink != st.st_nlink:
225 if not (self.flags & IX_EXISTS):
227 if check_device and (self.dev != st.st_dev):
231 def update_from_stat(self, st, meta_ofs):
232 # Should only be called when the entry is stale(), and
233 # invalidate() should almost certainly be called afterward.
236 self.nlink = st.st_nlink
237 self.ctime = st.st_ctime
238 self.mtime = st.st_mtime
239 self.atime = st.st_atime
240 self.size = st.st_size
241 self.mode = st.st_mode
242 self.flags |= IX_EXISTS
243 self.meta_ofs = meta_ofs
247 self.mtime = self._fixup_time(self.mtime)
248 self.ctime = self._fixup_time(self.ctime)
250 def _fixup_time(self, t):
251 if self.tmax != None and t > self.tmax:
257 f = IX_HASHVALID|IX_EXISTS
258 return (self.flags & f) == f
260 def invalidate(self):
261 self.flags &= ~IX_HASHVALID
263 def validate(self, gitmode, sha):
266 assert(gitmode+0 == gitmode)
267 self.gitmode = gitmode
269 self.flags |= IX_HASHVALID|IX_EXISTS
272 return not self.is_deleted()
274 def sha_missing(self):
275 return (self.flags & IX_SHAMISSING) or not (self.flags & IX_HASHVALID)
277 def is_deleted(self):
278 return (self.flags & IX_EXISTS) == 0
280 def set_deleted(self):
281 if self.flags & IX_EXISTS:
282 self.flags &= ~(IX_EXISTS | IX_HASHVALID)
285 return not self.is_fake()
288 return not self.ctime
290 def _cmp(self, other):
291 # Note reversed name ordering
292 bc = bytescmp(other.name, self.name)
295 vc = self.is_valid() - other.is_valid()
298 fc = self.is_fake() - other.is_fake()
303 def __eq__(self, other):
304 return self._cmp(other) == 0
306 def __ne__(self, other):
307 return self._cmp(other) != 0
309 def __lt__(self, other):
310 return self._cmp(other) < 0
312 def __gt__(self, other):
313 return self._cmp(other) > 0
315 def __le__(self, other):
316 return self._cmp(other) <= 0
318 def __ge__(self, other):
319 return self._cmp(other) >= 0
322 f.write(self.basename + b'\0' + self.packed())
325 class NewEntry(Entry):
326 def __init__(self, basename, name, tmax, dev, ino, nlink,
328 size, mode, gitmode, sha, flags, meta_ofs,
329 children_ofs, children_n):
330 Entry.__init__(self, basename, name, meta_ofs, tmax)
331 (self.dev, self.ino, self.nlink, self.ctime, self.mtime, self.atime,
332 self.size, self.mode, self.gitmode, self.sha,
333 self.flags, self.children_ofs, self.children_n
334 ) = (dev, ino, nlink, ctime, mtime, atime,
335 size, mode, gitmode, sha, flags, children_ofs, children_n)
339 class BlankNewEntry(NewEntry):
340 def __init__(self, basename, meta_ofs, tmax):
341 NewEntry.__init__(self, basename, basename, tmax,
342 0, 0, 0, 0, 0, 0, 0, 0,
343 0, EMPTY_SHA, 0, meta_ofs, 0, 0)
346 class ExistingEntry(Entry):
347 def __init__(self, parent, basename, name, m, ofs):
348 Entry.__init__(self, basename, name, None, None)
352 (self.dev, self.ino, self.nlink,
353 self.ctime, ctime_ns, self.mtime, mtime_ns, self.atime, atime_ns,
354 self.size, self.mode, self.gitmode, self.sha,
355 self.flags, self.children_ofs, self.children_n, self.meta_ofs
356 ) = struct.unpack(INDEX_SIG, m[ofs : ofs + ENTLEN])
357 self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns))
358 self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns))
359 self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns))
361 # effectively, we don't bother messing with IX_SHAMISSING if
362 # not IX_HASHVALID, since it's redundant, and repacking is more
363 # expensive than not repacking.
364 # This is implemented by having sha_missing() check IX_HASHVALID too.
365 def set_sha_missing(self, val):
367 oldval = self.sha_missing() and 1 or 0
369 flag = val and IX_SHAMISSING or 0
370 newflags = (self.flags & (~IX_SHAMISSING)) | flag
371 self.flags = newflags
374 def unset_sha_missing(self, flag):
375 if self.flags & IX_SHAMISSING:
376 self.flags &= ~IX_SHAMISSING
380 self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
381 if self.parent and not self.is_valid():
382 self.parent.invalidate()
385 def iter(self, name=None, wantrecurse=None):
387 if dname and not dname.endswith(b'/'):
389 ofs = self.children_ofs
390 assert(ofs <= len(self._m))
391 assert(self.children_n <= UINT_MAX) # i.e. python struct 'I'
392 for i in range(self.children_n):
393 eon = self._m.find(b'\0', ofs)
397 basename = self._m[ofs : ofs + (eon - ofs)]
398 child = ExistingEntry(self, basename, self.name + basename,
401 or child.name.startswith(dname)
402 or child.name.endswith(b'/') and dname.startswith(child.name)):
403 if not wantrecurse or wantrecurse(child):
404 for e in child.iter(name=name, wantrecurse=wantrecurse):
406 if not name or child.name == name or child.name.startswith(dname):
408 ofs = eon + 1 + ENTLEN
415 def __init__(self, filename):
416 self.filename = filename
418 self.writable = False
422 f = open(filename, 'rb+')
424 if e.errno == errno.ENOENT:
429 b = f.read(len(INDEX_HDR))
431 log('warning: %s: header: expected %r, got %r\n'
432 % (filename, INDEX_HDR, b))
434 st = os.fstat(f.fileno())
436 self.m = mmap_readwrite(f)
438 self.count = struct.unpack(FOOTER_SIG,
439 self.m[st.st_size - FOOTLEN
445 def __exit__(self, type, value, traceback):
446 with pending_raise(value, rethrow=False):
450 return int(self.count)
452 def forward_iter(self):
454 while ofs+ENTLEN <= len(self.m)-FOOTLEN:
455 eon = self.m.find(b'\0', ofs)
459 basename = self.m[ofs : ofs + (eon - ofs)]
460 yield ExistingEntry(None, basename, basename, self.m, eon+1)
461 ofs = eon + 1 + ENTLEN
463 def iter(self, name=None, wantrecurse=None):
464 if len(self.m) > len(INDEX_HDR)+ENTLEN:
466 if dname and not dname.endswith(b'/'):
468 root = ExistingEntry(None, b'/', b'/',
469 self.m, len(self.m)-FOOTLEN-ENTLEN)
470 for sub in root.iter(name=name, wantrecurse=wantrecurse):
472 if not dname or dname == root.name:
478 def find(self, name):
479 return next((e for e in self.iter(name, wantrecurse=lambda x : True)
487 if self.writable and self.m:
492 if self.writable and self.m:
495 self.writable = False
497 def filter(self, prefixes, wantrecurse=None):
498 for (rp, path) in reduce_paths(prefixes):
500 for e in self.iter(rp, wantrecurse=wantrecurse):
502 assert(e.name.startswith(rp))
503 name = path + e.name[len(rp):]
506 # Always return at least the top for each prefix.
507 # Otherwise something like "save x/y" will produce
508 # nothing if x is up to date.
511 name = path + pe.name[len(rp):]
514 # FIXME: this function isn't very generic, because it splits the filename
515 # in an odd way and depends on a terminating '/' to indicate directories.
517 """Split a path into a list of elements of the file system hierarchy."""
519 l = [i + b'/' for i in l[:-1]] + l[-1:]
521 l.pop() # extra blank caused by terminating '/'
526 def __init__(self, filename, metastore, tmax):
527 self.rootlevel = self.level = Level([], None)
532 self.filename = filename = resolve_parent(filename)
533 self.metastore = metastore
535 (dir,name) = os.path.split(filename)
536 ffd, self.tmpname = tempfile.mkstemp(b'.tmp', filename, dir)
537 self.f = os.fdopen(ffd, 'wb', 65536)
538 self.f.write(INDEX_HDR)
548 os.unlink(self.tmpname)
552 self.level = _golevel(self.level, self.f, [], None,
553 self.metastore, self.tmax)
554 self.count = self.rootlevel.count
557 self.f.write(struct.pack(FOOTER_SIG, self.count))
559 assert(self.level == None)
567 os.rename(self.tmpname, self.filename)
569 def _add(self, ename, entry):
570 if self.lastfile and self.lastfile <= ename:
571 raise Error('%r must come before %r'
572 % (''.join(ename), ''.join(self.lastfile)))
573 self.lastfile = ename
574 self.level = _golevel(self.level, self.f, ename, entry,
575 self.metastore, self.tmax)
577 def add(self, name, st, meta_ofs, hashgen = None):
578 endswith = name.endswith(b'/')
579 ename = pathsplit(name)
581 #log('add: %r %r\n' % (basename, name))
585 (gitmode, sha) = hashgen(name)
586 flags |= IX_HASHVALID
588 (gitmode, sha) = (0, EMPTY_SHA)
590 isdir = stat.S_ISDIR(st.st_mode)
591 assert(isdir == endswith)
592 e = NewEntry(basename, name, self.tmax,
593 st.st_dev, st.st_ino, st.st_nlink,
594 st.st_ctime, st.st_mtime, st.st_atime,
595 st.st_size, st.st_mode, gitmode, sha, flags,
599 meta_ofs = self.metastore.store(metadata.Metadata())
600 e = BlankNewEntry(basename, meta_ofs, self.tmax)
606 def add_ixentry(self, e):
607 e.children_ofs = e.children_n = 0
608 self._add(pathsplit(e.name), e)
610 def new_reader(self):
612 return Reader(self.tmpname)
615 def _slashappend_or_add_error(p, caller):
616 """Return p, after ensuring it has a single trailing slash if it names
617 a directory, unless there's an OSError, in which case, call
618 add_error() and return None."""
622 add_error('%s: %s' % (caller, e))
625 if stat.S_ISDIR(st.st_mode):
626 return slashappend(p)
630 def unique_resolved_paths(paths):
631 "Return a collection of unique resolved paths."
632 rps = (_slashappend_or_add_error(resolve_parent(p), 'unique_resolved_paths')
634 return frozenset((x for x in rps if x is not None))
637 def reduce_paths(paths):
640 rp = _slashappend_or_add_error(resolve_parent(p), 'reduce_paths')
642 xpaths.append((rp, slashappend(p) if rp.endswith(b'/') else p))
647 for (rp, p) in xpaths:
648 if prev and (prev == rp
649 or (prev.endswith(b'/') and rp.startswith(prev))):
650 continue # already superceded by previous path
651 paths.append((rp, p))
653 paths.sort(reverse=True)
658 def pfunc(count, total):
659 qprogress('bup: merging indexes (%d/%d)\r' % (count, total))
660 def pfinal(count, total):
661 progress('bup: merging indexes (%d/%d), done.\n' % (count, total))
662 return merge_iter(iters, 1024, pfunc, pfinal, key='name')