From: Avery Pennarun Date: Sun, 10 Jan 2010 06:13:10 +0000 (-0500) Subject: This adds the long-awaited indexfile feature, so you no longer have to feed X-Git-Tag: bup-0.04~1 X-Git-Url: https://arthur.barton.de/gitweb/?a=commitdiff_plain;h=84c3d04310d40b0ae0ccbdf13a57543ae88b342d;hp=3198fba54521cd7f313a02d237f9198dd58f6a90;p=bup.git This adds the long-awaited indexfile feature, so you no longer have to feed your backups through tar. Okay, 'bup save' is still a bit weak... but it could be much worse. Merge branch 'indexfile' * indexfile: Minor fix for python 2.4.4 compatibility. cmd-save: completely reimplement using the indexfile. Moved some reusable index-handling code from cmd-index.py to index.py. A bunch of wvtests for the 'bup index' command. Start using wvtest.sh for shell-based tests in test-sh. cmd-index: default indexfile path is ~/.bup/bupindex, not $PWD/index cmd-index: skip merging the index if nothing was written to the new one. cmd-index: only update if -u is given; print only given file/dirnames. cmd-index: correct reporting of deleted vs. added vs. modified status. Generalize the multi-index-walking code. cmd-index: indexfiles should start with a well-known header. cmd-index: eliminate redundant paths from index update command. cmd-index: some handy options. index: add --xdev (--one-file-system) option. Fix some bugs with indexing '/' cmd-index: basic index reader/writer/merger. --- diff --git a/Makefile b/Makefile index 6ca9447..7c24fa1 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,8 @@ endif default: all -all: bup-split bup-join bup-save bup-init bup-server bup randomgen chashsplit.so +all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \ + bup randomgen chashsplit.so randomgen: randomgen.o $(CC) $(CFLAGS) -o $@ $< @@ -20,16 +21,18 @@ randomgen: randomgen.o chashsplit.so: chashsplitmodule.o $(CC) $(CFLAGS) $(SHARED) -o $@ $< $(PYLIB) -runtests: all +runtests: all runtests-python runtests-cmdline + +runtests-python: ./wvtest.py $(wildcard t/t*.py) runtests-cmdline: all - ./test-sh + t/test.sh stupid: PATH=/bin:/usr/bin $(MAKE) test -test: all runtests-cmdline +test: all ./wvtestrun $(MAKE) runtests %: %.o diff --git a/cmd-index.py b/cmd-index.py new file mode 100755 index 0000000..ef596b7 --- /dev/null +++ b/cmd-index.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python2.5 +import os, sys, stat +import options, git, index +from helpers import * + +class OsFile: + def __init__(self, path): + self.fd = None + self.fd = os.open(path, os.O_RDONLY|os.O_LARGEFILE|os.O_NOFOLLOW) + #self.st = os.fstat(self.fd) + + def __del__(self): + if self.fd: + fd = self.fd + self.fd = None + os.close(fd) + + def fchdir(self): + os.fchdir(self.fd) + + +saved_errors = [] +def add_error(e): + saved_errors.append(e) + log('\n%s\n' % e) + + +# the use of fchdir() and lstat() are for two reasons: +# - help out the kernel by not making it repeatedly look up the absolute path +# - avoid race conditions caused by doing listdir() on a changing symlink +def handle_path(ri, wi, dir, name, pst, xdev, can_delete_siblings): + hashgen = None + if opt.fake_valid: + def hashgen(name): + return (0, index.FAKE_SHA) + + dirty = 0 + path = dir + name + #log('handle_path(%r,%r)\n' % (dir, name)) + if stat.S_ISDIR(pst.st_mode): + if opt.verbose == 1: # log dirs only + sys.stdout.write('%s\n' % path) + sys.stdout.flush() + try: + OsFile(name).fchdir() + except OSError, e: + add_error(Exception('in %s: %s' % (dir, str(e)))) + return 0 + try: + try: + ld = os.listdir('.') + #log('* %r: %r\n' % (name, ld)) + except OSError, e: + add_error(Exception('in %s: %s' % (path, str(e)))) + return 0 + lds = [] + for p in ld: + try: + st = os.lstat(p) + except OSError, e: + add_error(Exception('in %s: %s' % (path, str(e)))) + continue + if xdev != None and st.st_dev != xdev: + log('Skipping %r: different filesystem.\n' + % index.realpath(p)) + continue + if stat.S_ISDIR(st.st_mode): + p = slashappend(p) + lds.append((p, st)) + for p,st in reversed(sorted(lds)): + dirty += handle_path(ri, wi, path, p, st, xdev, + can_delete_siblings = True) + finally: + os.chdir('..') + #log('endloop: ri.cur:%r path:%r\n' % (ri.cur.name, path)) + while ri.cur and ri.cur.name > path: + #log('ricur:%r path:%r\n' % (ri.cur, path)) + if can_delete_siblings and dir and ri.cur.name.startswith(dir): + #log(' --- deleting\n') + ri.cur.flags &= ~(index.IX_EXISTS | index.IX_HASHVALID) + ri.cur.repack() + dirty += 1 + ri.next() + if ri.cur and ri.cur.name == path: + dirty += ri.cur.from_stat(pst) + if dirty or not (ri.cur.flags & index.IX_HASHVALID): + #log(' --- updating %r\n' % path) + if hashgen: + (ri.cur.gitmode, ri.cur.sha) = hashgen(name) + ri.cur.flags |= index.IX_HASHVALID + ri.cur.repack() + ri.next() + else: + wi.add(path, pst, hashgen = hashgen) + dirty += 1 + if opt.verbose > 1: # all files, not just dirs + sys.stdout.write('%s\n' % path) + sys.stdout.flush() + return dirty + + +def merge_indexes(out, r1, r2): + log('bup: merging indexes.\n') + for e in index._last_writer_wins_iter([r1, r2]): + #if e.flags & index.IX_EXISTS: + out.add_ixentry(e) + + +class MergeGetter: + def __init__(self, l): + self.i = iter(l) + self.cur = None + self.next() + + def next(self): + try: + self.cur = self.i.next() + except StopIteration: + self.cur = None + return self.cur + + +def update_index(path): + ri = index.Reader(indexfile) + wi = index.Writer(indexfile) + rig = MergeGetter(ri) + + rpath = index.realpath(path) + st = os.lstat(rpath) + if opt.xdev: + xdev = st.st_dev + else: + xdev = None + f = OsFile('.') + if rpath[-1] == '/': + rpath = rpath[:-1] + (dir, name) = os.path.split(rpath) + dir = slashappend(dir) + if stat.S_ISDIR(st.st_mode) and (not rpath or rpath[-1] != '/'): + name += '/' + can_delete_siblings = True + else: + can_delete_siblings = False + OsFile(dir or '/').fchdir() + dirty = handle_path(rig, wi, dir, name, st, xdev, can_delete_siblings) + + # make sure all the parents of the updated path exist and are invalidated + # if appropriate. + while 1: + (rpath, junk) = os.path.split(rpath) + if not rpath: + break + elif rpath == '/': + p = rpath + else: + p = rpath + '/' + while rig.cur and rig.cur.name > p: + #log('FINISHING: %r path=%r d=%r\n' % (rig.cur.name, p, dirty)) + rig.next() + if rig.cur and rig.cur.name == p: + if dirty: + rig.cur.flags &= ~index.IX_HASHVALID + rig.cur.repack() + else: + wi.add(p, os.lstat(p)) + if p == '/': + break + + f.fchdir() + ri.save() + if wi.count: + mi = index.Writer(indexfile) + merge_indexes(mi, ri, wi.new_reader()) + mi.close() + wi.abort() + + +optspec = """ +bup index <-p|s|m|u> [options...] +-- +p,print print the index entries for the given names (also works with -u) +m,modified print only added/deleted/modified files (implies -p) +s,status print each filename with a status char (A/M/D) (implies -p) +H,hash print the hash for each object next to its name (implies -p) +u,update (recursively) update the index entries for the given filenames +x,xdev,one-file-system don't cross filesystem boundaries +fake-valid mark all index entries as up-to-date even if they aren't +f,indexfile= the name of the index file (default 'index') +v,verbose increase log output (can be used more than once) +""" +o = options.Options('bup index', optspec) +(opt, flags, extra) = o.parse(sys.argv[1:]) + +if not (opt.modified or opt['print'] or opt.status or opt.update): + log('bup index: you must supply one or more of -p, -s, -m, or -u\n') + o.usage() +if opt.fake_valid and not opt.update: + log('bup index: --fake-valid is meaningless without -u\n') + o.usage() + +git.check_repo_or_die() +indexfile = opt.indexfile or git.repo('bupindex') + +paths = index.reduce_paths(extra) + +if opt.update: + if not paths: + log('bup index: update (-u) requested but no paths given\n') + o.usage() + for (rp, path) in paths: + update_index(rp) + +if opt['print'] or opt.status or opt.modified: + for (name, ent) in index.Reader(indexfile).filter(extra or ['']): + if opt.modified and ent.flags & index.IX_HASHVALID: + continue + line = '' + if opt.status: + if not ent.flags & index.IX_EXISTS: + line += 'D ' + elif not ent.flags & index.IX_HASHVALID: + if ent.sha == index.EMPTY_SHA: + line += 'A ' + else: + line += 'M ' + else: + line += ' ' + if opt.hash: + line += ent.sha.encode('hex') + ' ' + print line + (name or './') + #print repr(ent) + +if saved_errors: + log('WARNING: %d errors encountered.\n' % len(saved_errors)) + exit(1) diff --git a/cmd-join.py b/cmd-join.py index b87319a..5378629 100755 --- a/cmd-join.py +++ b/cmd-join.py @@ -26,5 +26,6 @@ if opt.remote: cli.close() else: for id in extra: + #log('id=%r\n' % id) for blob in git.cat(id): sys.stdout.write(blob) diff --git a/cmd-save.py b/cmd-save.py index 7da0429..67cb30c 100755 --- a/cmd-save.py +++ b/cmd-save.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2.5 import sys, re, errno, stat, client -import hashsplit, git, options +import hashsplit, git, options, index from helpers import * @@ -10,90 +10,6 @@ def add_error(e): log('\n%s\n' % e) -def _direxpand(name): - st = os.lstat(name) - try: - if stat.S_ISDIR(st.st_mode): - for sub in os.listdir(name): - subfull = os.path.join(name, sub) - for fn_st in _direxpand(subfull): - yield fn_st - else: - yield (name,st) - except OSError, e: - if e.errno in [errno.ENOENT, errno.EPERM, errno.EACCES]: - add_error(e) - else: - raise - - -def direxpand(names): - for n in names: - for fn_st in _direxpand(n): - yield fn_st - - -def _normpath(dir): - p = os.path.normpath(dir) - return (p != '.') and p or '' - - -class Tree: - def __init__(self, parent, name): - assert(name != '.') - assert(not (parent and not name)) - self.parent = parent - self.name = name - self.sha = None - self.children = {} - if self.parent: - self.parent.children[self.name] = self - - def fullpath(self): - if self.parent: - return os.path.join(self.parent.fullpath(), self.name) - else: - return self.name - - def gettop(self): - p = self - while p.parent: - p = p.parent - return p - - def getdir(self, dir): - # FIXME: deal with '..' somehow (look at how tar does it) - dir = _normpath(dir) - if dir.startswith('/'): - dir = dir[1:] - top = self.gettop() - if not dir: - return top - for part in dir.split('/'): - sub = top.children.get(part) - if not sub: - sub = top.children[part] = Tree(top, part) - top = sub - return top - - def addfile(self, mode, fullname, id): - (dir, name) = os.path.split(fullname) - self.getdir(dir).children[name] = (mode, name, id) - - def shalist(self, w): - for c in self.children.values(): - if isinstance(c, tuple): # sha1 entry for a file - yield c - else: # tree - t = ('40000', c.name, c.gen_tree(w)) - yield t - - def gen_tree(self, w): - if not self.sha: - self.sha = w.new_tree(self.shalist(w)) - return self.sha - - optspec = """ bup save [-tc] [-n name] -- @@ -110,6 +26,9 @@ git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): log("bup save: use one or more of -t, -c, -n\n") o.usage() +if not extra: + log("bup save: no filenames given.\n") + o.usage() if opt.verbose >= 2: git.verbose = opt.verbose - 1 @@ -124,26 +43,94 @@ else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter() - -root = Tree(None, '') -for (fn,st) in direxpand(extra): + + +def eatslash(dir): + if dir.endswith('/'): + return dir[:-1] + else: + return dir + + +parts = [''] +shalists = [[]] + +def _push(part): + parts.append(part) + shalists.append([]) + +def _pop(): + assert(len(parts) > 1) + part = parts.pop() + shalist = shalists.pop() + tree = w.new_tree(shalist) + shalists[-1].append(('40000', part, tree)) + + +for (transname,ent) in index.Reader(git.repo('bupindex')).filter(extra): + (dir, file) = os.path.split(ent.name) + exists = (ent.flags & index.IX_EXISTS) + hashvalid = (ent.flags & index.IX_HASHVALID) and w.exists(ent.sha) if opt.verbose: - log('\n%s ' % fn) - try: - if stat.S_ISREG(st.st_mode): # regular file - f = open(fn) - (mode, id) = hashsplit.split_to_blob_or_tree(w, [f]) - elif stat.S_ISLNK(st.st_mode): # symlink - (mode, id) = ('120000', w.new_blob(os.readlink(fn))) + if not exists: + status = 'D' + elif not hashvalid: + if ent.sha == index.EMPTY_SHA: + status = 'A' + else: + status = 'M' else: - add_error(Exception('skipping special file "%s"' % fn)) - except IOError, e: - add_error(e) - except OSError, e: - add_error(e) + status = ' ' + log('\n%s %s ' % (status, ent.name)) + + if not exists: + continue + + assert(dir.startswith('/')) + dirp = dir.split('/') + while parts > dirp: + _pop() + for part in dirp[len(parts):]: + _push(part) + + if not file: + # directory already handled. + # FIXME: not using the indexed tree sha1's for anything, which is + # a waste. That's a potential optimization... + continue + + id = None + if hashvalid: + mode = '%o' % ent.mode + id = ent.sha + shalists[-1].append((mode, file, id)) else: - root.addfile(mode, fn, id) -tree = root.gen_tree(w) + try: + if stat.S_ISREG(ent.mode): + f = open(ent.name) + (mode, id) = hashsplit.split_to_blob_or_tree(w, [f]) + elif stat.S_ISDIR(ent.mode): + assert(0) # handled above + elif stat.S_ISLNK(ent.mode): + (mode, id) = ('120000', w.new_blob(os.readlink(ent.name))) + else: + add_error(Exception('skipping special file "%s"' % ent.name)) + except IOError, e: + add_error(e) + except OSError, e: + add_error(e) + if id: + ent.validate(id) + ent.repack() + shalists[-1].append((mode, file, id)) +#log('parts out: %r\n' % parts) +#log('stk out: %r\n' % shalists) +while len(parts) > 1: + _pop() +#log('parts out: %r\n' % parts) +#log('stk out: %r\n' % shalists) +assert(len(shalists) == 1) +tree = w.new_tree(shalists[-1]) if opt.verbose: log('\n') if opt.tree: diff --git a/cmd-tick.py b/cmd-tick.py new file mode 100755 index 0000000..da1d003 --- /dev/null +++ b/cmd-tick.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python2.5 +import sys, time +import options + +optspec = """ +bup tick +""" +o = options.Options('bup tick', optspec) +(opt, flags, extra) = o.parse(sys.argv[1:]) + +if extra: + log("bup tick: no arguments expected\n") + o.usage() + +t = time.time() +tleft = 1 - (t - int(t)) +time.sleep(tleft) diff --git a/git.py b/git.py index ccafa5c..5aa7933 100644 --- a/git.py +++ b/git.py @@ -183,11 +183,14 @@ class PackWriter: def write(self, type, content): return self._write(calc_hash(type, content), type, content) - def maybe_write(self, type, content): - bin = calc_hash(type, content) + def exists(self, id): if not self.objcache: self._make_objcache() - if not self.objcache.exists(bin): + return self.objcache.exists(id) + + def maybe_write(self, type, content): + bin = calc_hash(type, content) + if not self.exists(bin): self._write(bin, type, content) self.objcache.add(bin) return bin @@ -398,9 +401,11 @@ class CatPipe: assert(id[0] != '-') self.p.stdin.write('%s\n' % id) hdr = self.p.stdout.readline() + if hdr.endswith(' missing\n'): + raise GitError('blob %r is missing' % id) spl = hdr.split(' ') - assert(len(spl) == 3) - assert(len(spl[0]) == 40) + if len(spl) != 3 or len(spl[0]) != 40: + raise GitError('expected blob, got %r' % spl) (hex, type, size) = spl yield type for blob in chunkyreader(self.p.stdout, int(spl[2])): @@ -437,7 +442,8 @@ class CatPipe: for blob in self.join(treeline[5:]): yield blob else: - raise GitError('unknown object type %r' % type) + raise GitError('invalid object type %r: expected blob/tree/commit' + % type) def join(self, id): for d in self._join(self.get(id)): diff --git a/helpers.py b/helpers.py index b478d61..b0b054d 100644 --- a/helpers.py +++ b/helpers.py @@ -112,3 +112,11 @@ def chunkyreader(f, count = None): b = f.read(65536) if not b: break yield b + + +def slashappend(s): + if s and not s.endswith('/'): + return s + '/' + else: + return s + diff --git a/index.py b/index.py new file mode 100644 index 0000000..9a746eb --- /dev/null +++ b/index.py @@ -0,0 +1,268 @@ +import os, stat, time, struct, tempfile, mmap +from helpers import * + +EMPTY_SHA = '\0'*20 +FAKE_SHA = '\x01'*20 +INDEX_HDR = 'BUPI\0\0\0\1' +INDEX_SIG = '!IIIIIQII20sH' +ENTLEN = struct.calcsize(INDEX_SIG) + +IX_EXISTS = 0x8000 +IX_HASHVALID = 0x4000 + +class Error(Exception): + pass + + +class Entry: + def __init__(self, name, m, ofs, tstart): + self._m = m + self._ofs = ofs + self.name = str(name) + self.tstart = tstart + (self.dev, self.ctime, self.mtime, self.uid, self.gid, + self.size, self.mode, self.gitmode, self.sha, + self.flags) = struct.unpack(INDEX_SIG, str(buffer(m, ofs, ENTLEN))) + + def __repr__(self): + return ("(%s,0x%04x,%d,%d,%d,%d,%d,0x%04x)" + % (self.name, self.dev, + self.ctime, self.mtime, self.uid, self.gid, + self.size, self.flags)) + + def packed(self): + return struct.pack(INDEX_SIG, + self.dev, self.ctime, self.mtime, + self.uid, self.gid, self.size, self.mode, + self.gitmode, self.sha, self.flags) + + def repack(self): + self._m[self._ofs:self._ofs+ENTLEN] = self.packed() + + def from_stat(self, st): + old = (self.dev, self.ctime, self.mtime, + self.uid, self.gid, self.size, self.flags & IX_EXISTS) + new = (st.st_dev, int(st.st_ctime), int(st.st_mtime), + st.st_uid, st.st_gid, st.st_size, IX_EXISTS) + self.dev = st.st_dev + self.ctime = int(st.st_ctime) + self.mtime = int(st.st_mtime) + self.uid = st.st_uid + self.gid = st.st_gid + self.size = st.st_size + self.mode = st.st_mode + self.flags |= IX_EXISTS + if int(st.st_ctime) >= self.tstart or old != new: + self.flags &= ~IX_HASHVALID + return 1 # dirty + else: + return 0 # not dirty + + def validate(self, sha): + assert(sha) + self.sha = sha + self.flags |= IX_HASHVALID + + def __cmp__(a, b): + return cmp(a.name, b.name) + + +class Reader: + def __init__(self, filename): + self.filename = filename + self.m = '' + self.writable = False + f = None + try: + f = open(filename, 'r+') + except IOError, e: + if e.errno == errno.ENOENT: + pass + else: + raise + if f: + b = f.read(len(INDEX_HDR)) + if b != INDEX_HDR: + raise Error('%s: header: expected %r, got %r' + % (filename, INDEX_HDR, b)) + st = os.fstat(f.fileno()) + if st.st_size: + self.m = mmap.mmap(f.fileno(), 0, + mmap.MAP_SHARED, + mmap.PROT_READ|mmap.PROT_WRITE) + f.close() # map will persist beyond file close + self.writable = True + + def __del__(self): + self.save() + + def __iter__(self): + tstart = int(time.time()) + ofs = len(INDEX_HDR) + while ofs < len(self.m): + eon = self.m.find('\0', ofs) + assert(eon >= 0) + yield Entry(buffer(self.m, ofs, eon-ofs), + self.m, eon+1, tstart = tstart) + ofs = eon + 1 + ENTLEN + + def save(self): + if self.writable: + self.m.flush() + + def filter(self, prefixes): + #log("filtering %r\n" % prefixes) + paths = reduce_paths(prefixes) + #log("filtering %r\n" % paths) + pi = iter(paths) + (rpin, pin) = pi.next() + for ent in self: + #log('checking %r vs %r\n' % (ent.name, rpin)) + while ent.name < rpin: + try: + (rpin, pin) = pi.next() + except StopIteration: + return # no more files can possibly match + if not ent.name.startswith(rpin): + continue # not interested + else: + name = pin + ent.name[len(rpin):] + yield (name, ent) + + +# Read all the iters in order; when more than one iter has the same entry, +# the *later* iter in the list wins. (ie. more recent iter entries replace +# older ones) +def _last_writer_wins_iter(iters): + l = [] + for e in iters: + it = iter(e) + try: + l.append([it.next(), it]) + except StopIteration: + pass + del iters # to avoid accidents + while l: + l.sort() + mv = l[0][0] + mi = [] + for (i,(v,it)) in enumerate(l): + #log('(%d) considering %d: %r\n' % (len(l), i, v)) + if v > mv: + mv = v + mi = [i] + elif v == mv: + mi.append(i) + yield mv + for i in mi: + try: + l[i][0] = l[i][1].next() + except StopIteration: + l[i] = None + l = filter(None, l) + + +class Writer: + def __init__(self, filename): + self.f = None + self.count = 0 + self.lastfile = None + self.filename = None + self.filename = filename = realpath(filename) + (dir,name) = os.path.split(filename) + (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir) + self.f = os.fdopen(ffd, 'wb', 65536) + self.f.write(INDEX_HDR) + + def __del__(self): + self.abort() + + def abort(self): + f = self.f + self.f = None + if f: + f.close() + os.unlink(self.tmpname) + + def close(self): + f = self.f + self.f = None + if f: + f.close() + os.rename(self.tmpname, self.filename) + + def _write(self, data): + self.f.write(data) + self.count += 1 + + def add(self, name, st, hashgen=None): + #log('ADDING %r\n' % name) + if self.lastfile: + assert(cmp(self.lastfile, name) > 0) # reverse order only + self.lastfile = name + flags = IX_EXISTS + sha = None + if hashgen: + (gitmode, sha) = hashgen(name) + if sha: + flags |= IX_HASHVALID + else: + (gitmode, sha) = (0, EMPTY_SHA) + data = name + '\0' + \ + struct.pack(INDEX_SIG, st.st_dev, int(st.st_ctime), + int(st.st_mtime), st.st_uid, st.st_gid, + st.st_size, st.st_mode, gitmode, sha, flags) + self._write(data) + + def add_ixentry(self, e): + if self.lastfile and self.lastfile <= e.name: + raise Error('%r must come before %r' + % (e.name, self.lastfile)) + self.lastfile = e.name + data = e.name + '\0' + e.packed() + self._write(data) + + def new_reader(self): + self.f.flush() + return Reader(self.tmpname) + + +# like os.path.realpath, but doesn't follow a symlink for the last element. +# (ie. if 'p' itself is itself a symlink, this one won't follow it) +def realpath(p): + try: + st = os.lstat(p) + except OSError: + st = None + if st and stat.S_ISLNK(st.st_mode): + (dir, name) = os.path.split(p) + dir = os.path.realpath(dir) + out = os.path.join(dir, name) + else: + out = os.path.realpath(p) + #log('realpathing:%r,%r\n' % (p, out)) + return out + + +def reduce_paths(paths): + xpaths = [] + for p in paths: + rp = realpath(p) + st = os.lstat(rp) + if stat.S_ISDIR(st.st_mode): + rp = slashappend(rp) + p = slashappend(p) + xpaths.append((rp, p)) + xpaths.sort() + + paths = [] + prev = None + for (rp, p) in xpaths: + if prev and (prev == rp + or (prev.endswith('/') and rp.startswith(prev))): + continue # already superceded by previous path + paths.append((rp, p)) + prev = rp + paths.sort(reverse=True) + return paths + diff --git a/t/test.sh b/t/test.sh new file mode 100755 index 0000000..7e994c1 --- /dev/null +++ b/t/test.sh @@ -0,0 +1,103 @@ +#!/bin/bash +. wvtest.sh +#set -e + +TOP="$(pwd)" +export BUP_DIR="$TOP/buptest.tmp" + +bup() +{ + "$TOP/bup" "$@" +} + +WVSTART "init" + +#set -x +rm -rf "$BUP_DIR" +WVPASS bup init + +WVSTART "index" +D=bupdata.tmp +rm -rf $D +mkdir $D +WVPASSEQ "$(bup index -p)" "" +WVPASSEQ "$(bup index -p $D)" "" +WVFAIL [ -e $D.fake ] +WVFAIL bup index -u $D.fake +WVPASS bup index -u $D +WVPASSEQ "$(bup index -p $D)" "$D/" +touch $D/a $D/b +mkdir $D/d $D/d/e +WVPASSEQ "$(bup index -s $D/)" "A $D/" +WVPASSEQ "$(bup index -s $D/b)" "" +bup tick +WVPASSEQ "$(bup index -us $D/b)" "A $D/b" +WVPASSEQ "$(bup index -us $D)" \ +"A $D/d/e/ +A $D/d/ +A $D/b +A $D/a +A $D/" +WVPASSEQ "$(bup index -us $D/a $D/b --fake-valid)" \ +" $D/b + $D/a" +WVPASSEQ "$(bup index -us $D/a)" " $D/a" # stays unmodified +touch $D/a +WVPASS bup index -u $D/a # becomes modified +WVPASSEQ "$(bup index -s $D/a $D $D/b)" \ +"A $D/d/e/ +A $D/d/ + $D/b +M $D/a +A $D/" +WVPASSEQ "$(cd $D && bup index -m .)" \ +"./d/e/ +./d/ +./a +./" +WVPASSEQ "$(cd $D && bup index -m)" \ +"d/e/ +d/ +a +./" +WVPASSEQ "$(cd $D && bup index -s .)" "$(cd $D && bup index -s .)" + + +WVSTART "split" +WVPASS bup split --bench -b tags1.tmp +WVPASS bup split -vvvv -b testfile2 >tags2.tmp +WVPASS bup split -t testfile2 >tags2t.tmp +WVPASS bup split -t testfile2 --fanout 3 >tags2tf.tmp +WVPASS bup split -r "$BUP_DIR" -c testfile2 >tags2c.tmp +WVPASS ls -lR \ + | WVPASS bup split -r "$BUP_DIR" -c --fanout 3 --max-pack-objects 3 -n lslr +WVFAIL diff -u tags1.tmp tags2.tmp + +# fanout must be different from non-fanout +WVFAIL diff -q tags2t.tmp tags2tf.tmp +wc -c testfile1 testfile2 +wc -l tags1.tmp tags2.tmp + +WVSTART "join" +WVPASS bup join $(cat tags1.tmp) >out1.tmp +WVPASS bup join out2.tmp +WVPASS bup join out2t.tmp +WVPASS bup join -r "$BUP_DIR" out2c.tmp +WVPASS diff -u testfile1 out1.tmp +WVPASS diff -u testfile2 out2.tmp +WVPASS diff -u testfile2 out2t.tmp +WVPASS diff -u testfile2 out2c.tmp + +WVSTART "save/fsck" +( + set -e + cd "$BUP_DIR" || exit 1 + #git repack -Ad + #git prune + (cd "$TOP/t/sampledata" && WVPASS bup save -vvn master .) || WVFAIL + n=$(git fsck --full --strict 2>&1 | + egrep -v 'dangling (commit|tree)' | + tee -a /dev/stderr | + wc -l) + WVPASS [ "$n" -eq 0 ] +) || exit 1 diff --git a/t/tindex.py b/t/tindex.py new file mode 100644 index 0000000..9922b8f --- /dev/null +++ b/t/tindex.py @@ -0,0 +1,13 @@ +import os +import index +from wvtest import * + +@wvtest +def testbasic(): + cd = os.path.realpath('') + WVPASS(cd) + sd = os.path.realpath('t/sampledata') + WVPASSEQ(index.realpath('t/sampledata'), cd + '/t/sampledata') + WVPASSEQ(os.path.realpath('t/sampledata/x'), sd + '/x') + WVPASSEQ(os.path.realpath('t/sampledata/etc'), '/etc') + WVPASSEQ(index.realpath('t/sampledata/etc'), sd + '/etc') diff --git a/test-sh b/test-sh deleted file mode 100755 index f48be99..0000000 --- a/test-sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -set -e -echo "Testing \"integration\" in $0:" - -TOP="$(pwd)" -export BUP_DIR="$TOP/buptest.tmp" - -bup() -{ - "$TOP/bup" "$@" -} - -set -x -rm -rf "$BUP_DIR" -bup init -bup split --bench -b tags1.tmp -bup split -vvvv -b testfile2 >tags2.tmp -bup split -t testfile2 >tags2t.tmp -bup split -t testfile2 --fanout 3 >tags2tf.tmp -bup split -r "$BUP_DIR" -c testfile2 >tags2c.tmp -ls -lR | bup split -r "$BUP_DIR" -c --fanout 3 --max-pack-objects 3 -n lslr -diff -u tags1.tmp tags2.tmp || true -if diff -q tags2t.tmp tags2tf.tmp; then - echo "fanout tree same as non-fanout tree!?" - false -fi -wc -c testfile1 testfile2 -wc -l tags1.tmp tags2.tmp -bup join $(cat tags1.tmp) >out1.tmp -bup join out2.tmp -bup join out2t.tmp -bup join -r "$BUP_DIR" out2c.tmp -diff -u testfile1 out1.tmp -diff -u testfile2 out2.tmp -diff -u testfile2 out2t.tmp -diff -u testfile2 out2c.tmp - -( - set -e - cd "$BUP_DIR" || exit 1 - #git repack -Ad - #git prune - (cd "$TOP/t/sampledata" && bup save -vvn master .) || exit 1 - n=$(git fsck --full --strict 2>&1 | - egrep -v 'dangling (commit|tree)' | - tee -a /dev/stderr | - wc -l) - if [ "$n" -ne 0 ]; then - echo "git fsck error." - exit 5 - fi -) || exit 1 diff --git a/wvtest.sh b/wvtest.sh new file mode 100644 index 0000000..90bdc90 --- /dev/null +++ b/wvtest.sh @@ -0,0 +1,89 @@ +# we don't quote $TEXT in case it contains newlines; newlines +# aren't allowed in test output. However, we set -f so that +# at least shell glob characters aren't processed. +_textclean() +{ + ( set -f; echo $* ) +} + +_wvcheck() +{ + CODE="$1" + TEXT=$(_textclean "$2") + OK=ok + if [ "$CODE" -ne 0 ]; then + OK=FAILED + fi + echo "! ${BASH_SOURCE[2]}:${BASH_LINENO[1]} $TEXT $OK" >&2 + if [ "$CODE" -ne 0 ]; then + exit $CODE + else + return 0 + fi +} + + +WVPASS() +{ + TEXT="$*" + + if "$@"; then + _wvcheck 0 "$TEXT" + return 0 + else + _wvcheck 1 "$TEXT" + # NOTREACHED + return 1 + fi +} + + +WVFAIL() +{ + TEXT="$*" + + if "$@"; then + _wvcheck 1 "NOT($TEXT)" + # NOTREACHED + return 1 + else + _wvcheck 0 "NOT($TEXT)" + return 0 + fi +} + + +_wvgetrv() +{ + ( "$@" >&2 ) + echo -n $? +} + + +WVPASSEQ() +{ + WVPASS [ "$#" -eq 2 ] + echo "Comparing:" >&2 + echo "$1" >&2 + echo "--" >&2 + echo "$2" >&2 + _wvcheck $(_wvgetrv [ "$1" = "$2" ]) "'$1' = '$2'" +} + + +WVPASSNE() +{ + WVPASS [ "$#" -eq 2 ] + echo "Comparing:" >&2 + echo "$1" >&2 + echo "--" >&2 + echo "$2" >&2 + _wvcheck $(_wvgetrv [ "$1" != "$2" ]) "'$1' != '$2'" +} + + +WVSTART() +{ + echo >&2 + echo "Testing \"$*\" in ${BASH_SOURCE[1]}:" >&2 +}