X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Findex-cmd.py;h=539e89ef5019839d67669096593e783d532366e0;hb=c40b3dd5fd74e72024fbaad3daf5a958aefa1c54;hp=8195674390758c4fad0f7f655601c31b4cd85b88;hpb=38208840b8f669598e0077ac291080da0d075d94;p=bup.git diff --git a/cmd/index-cmd.py b/cmd/index-cmd.py index 8195674..539e89e 100755 --- a/cmd/index-cmd.py +++ b/cmd/index-cmd.py @@ -1,8 +1,19 @@ -#!/usr/bin/env python -import sys, stat, time, os -from bup import options, git, index, drecurse -from bup.helpers import * +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import +import sys, stat, time, os, errno, re + +from bup import metadata, options, git, index, drecurse, hlinkdb +from bup.drecurse import recursive_dirlist from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE +from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes, + progress, qprogress, saved_errors) + class IterHelper: def __init__(self, l): @@ -11,10 +22,7 @@ class IterHelper: self.next() def next(self): - try: - self.cur = self.i.next() - except StopIteration: - self.cur = None + self.cur = next(self.i, None) return self.cur @@ -48,50 +56,124 @@ def check_index(reader): log('check: passed.\n') -def update_index(top, excluded_paths): - tmax = time.time() - 1 +def clear_index(indexfile): + indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink'] + for indexfile in indexfiles: + path = git.repo(indexfile) + try: + os.remove(path) + if opt.verbose: + log('clear: removed %s\n' % path) + except OSError as e: + if e.errno != errno.ENOENT: + raise + + +def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions): + # tmax and start must be epoch nanoseconds. + tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) - wi = index.Writer(indexfile, tmax) + msw = index.MetaStoreWriter(indexfile + '.meta') + wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) - tstart = int(time.time()) + tstart = int(time.time()) * 10**9 - hashgen = None + hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') + + fake_hash = None if opt.fake_valid: - def hashgen(name): + def fake_hash(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) - for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, - bup_dir=bup_dir, - excluded_paths=excluded_paths): + index_start = time.time() + for path, pst in recursive_dirlist([top], + xdev=opt.xdev, + bup_dir=bup_dir, + excluded_paths=excluded_paths, + exclude_rxs=exclude_rxs, + xdev_exceptions=xdev_exceptions): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() - qprogress('Indexing: %d\r' % total) + elapsed = time.time() - index_start + paths_per_sec = total / elapsed if elapsed else 0 + qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) elif not (total % 128): - qprogress('Indexing: %d\r' % total) + elapsed = time.time() - index_start + paths_per_sec = total / elapsed if elapsed else 0 + qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 + while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() + if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): + hlinks.del_path(rig.cur.name) rig.next() + if rig.cur and rig.cur.name == path: # paths that already existed - if pst: - rig.cur.from_stat(pst, tstart) + need_repack = False + if(rig.cur.stale(pst, tstart, check_device=opt.check_device)): + try: + meta = metadata.from_path(path, statinfo=pst) + except (OSError, IOError) as e: + add_error(e) + rig.next() + continue + if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: + hlinks.del_path(rig.cur.name) + if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: + hlinks.add_path(path, pst.st_dev, pst.st_ino) + # Clear these so they don't bloat the store -- they're + # already in the index (since they vary a lot and they're + # fixed length). If you've noticed "tmax", you might + # wonder why it's OK to do this, since that code may + # adjust (mangle) the index mtime and ctime -- producing + # fake values which must not end up in a .bupm. However, + # it looks like that shouldn't be possible: (1) When + # "save" validates the index entry, it always reads the + # metadata from the filesytem. (2) Metadata is only + # read/used from the index if hashvalid is true. (3) + # "faked" entries will be stale(), and so we'll invalidate + # them below. + meta.ctime = meta.mtime = meta.atime = 0 + meta_ofs = msw.store(meta) + rig.cur.update_from_stat(pst, meta_ofs) + rig.cur.invalidate() + need_repack = True if not (rig.cur.flags & index.IX_HASHVALID): - if hashgen: - (rig.cur.gitmode, rig.cur.sha) = hashgen(path) + if fake_hash: + rig.cur.gitmode, rig.cur.sha = fake_hash(path) rig.cur.flags |= index.IX_HASHVALID + need_repack = True if opt.fake_invalid: rig.cur.invalidate() - rig.cur.repack() + need_repack = True + if need_repack: + rig.cur.repack() rig.next() else: # new paths - wi.add(path, pst, hashgen = hashgen) - progress('Indexing: %d, done.\n' % total) - + try: + meta = metadata.from_path(path, statinfo=pst) + except (OSError, IOError) as e: + add_error(e) + continue + # See same assignment to 0, above, for rationale. + meta.atime = meta.mtime = meta.ctime = 0 + meta_ofs = msw.store(meta) + wi.add(path, pst, meta_ofs, hashgen=fake_hash) + if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: + hlinks.add_path(path, pst.st_dev, pst.st_ino) + + elapsed = time.time() - index_start + paths_per_sec = total / elapsed if elapsed else 0 + progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) + + hlinks.prepare_save() + if ri.exists(): ri.save() wi.flush() @@ -102,7 +184,7 @@ def update_index(top, excluded_paths): check_index(ri) log('check: before merging: newfile\n') check_index(wr) - mi = index.Writer(indexfile, tmax) + mi = index.Writer(indexfile, msw, tmax) for e in index.merge(ri, wr): # FIXME: shouldn't we remove deleted entries eventually? When? @@ -115,9 +197,12 @@ def update_index(top, excluded_paths): else: wi.close() + msw.close() + hlinks.commit_save() + optspec = """ -bup index <-p|m|s|u> [options...] +bup index <-p|-m|-s|-u|--clear|--check> [options...] -- Modes: p,print print the index entries for the given names (also works with -u) @@ -125,26 +210,44 @@ m,modified print only added/deleted/modified files (implies -p) s,status print each filename with a status char (A/M/D) (implies -p) u,update recursively update the index entries for the given file/dir names (default if no mode is specified) check carefully check index file integrity +clear clear the default index Options: H,hash print the hash for each object next to its name l,long print more information about each file +no-check-device don't invalidate an entry if the containing device changes fake-valid mark all index entries as up-to-date even if they aren't fake-invalid mark all index entries as invalid f,indexfile= the name of the index file (normally BUP_DIR/bupindex) -exclude= a path to exclude from the backup (can be used more than once) -exclude-from= a file that contains exclude paths (can be used more than once) +exclude= a path to exclude from the backup (may be repeated) +exclude-from= skip --exclude paths in file (may be repeated) +exclude-rx= skip paths matching the unanchored regex (may be repeated) +exclude-rx-from= skip --exclude-rx patterns in file (may be repeated) v,verbose increase log output (can be used more than once) x,xdev,one-file-system don't cross filesystem boundaries """ o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) -if not (opt.modified or opt['print'] or opt.status or opt.update or opt.check): +if not (opt.modified or \ + opt['print'] or \ + opt.status or \ + opt.update or \ + opt.check or \ + opt.clear): opt.update = 1 if (opt.fake_valid or opt.fake_invalid) and not opt.update: o.fatal('--fake-{in,}valid are meaningless without -u') if opt.fake_valid and opt.fake_invalid: o.fatal('--fake-valid is incompatible with --fake-invalid') +if opt.clear and opt.indexfile: + o.fatal('cannot clear an external index (via -f)') + +# FIXME: remove this once we account for timestamp races, i.e. index; +# touch new-file; index. It's possible for this to happen quickly +# enough that new-file ends up with the same timestamp as the first +# index, and then bup will ignore it. +tick_start = time.time() +time.sleep(1 - (tick_start - int(tick_start))) git.check_repo_or_die() indexfile = opt.indexfile or git.repo('bupindex') @@ -155,15 +258,18 @@ if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) -excluded_paths = drecurse.parse_excludes(flags) - -paths = index.reduce_paths(extra) +if opt.clear: + log('clear: clearing index.\n') + clear_index(indexfile) if opt.update: if not extra: o.fatal('update mode (-u) requested but no paths given') - for (rp,path) in paths: - update_index(rp, excluded_paths) + excluded_paths = parse_excludes(flags, o.fatal) + exclude_rxs = parse_rx_excludes(flags, o.fatal) + xexcept = index.unique_resolved_paths(extra) + for rp, path in index.reduce_paths(extra): + update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept) if opt['print'] or opt.status or opt.modified: for (name, ent) in index.Reader(indexfile).filter(extra or ['']):