X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Findex-cmd.py;h=c2a22f913aa64bd2fe83f4a6b68812ed5837613b;hb=bf67f94dd4f4096de4eee07a7dc377d6c889a016;hp=6f2adf4e581c1dc288babe4bee3525c2f61ea004;hpb=c7139a3fae51633f0f290ebf73f0cf5e4864e547;p=bup.git diff --git a/cmd/index-cmd.py b/cmd/index-cmd.py index 6f2adf4..c2a22f9 100755 --- a/cmd/index-cmd.py +++ b/cmd/index-cmd.py @@ -1,9 +1,22 @@ -#!/usr/bin/env python +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble +from __future__ import absolute_import, print_function +from binascii import hexlify import sys, stat, time, os, errno, re + from bup import metadata, options, git, index, drecurse, hlinkdb -from bup.helpers import * +from bup.compat import argv_bytes +from bup.drecurse import recursive_dirlist from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE +from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes, + progress, qprogress, saved_errors) +from bup.io import byte_stream, path_msg + class IterHelper: def __init__(self, l): @@ -11,13 +24,11 @@ class IterHelper: self.cur = None self.next() - def next(self): - try: - self.cur = self.i.next() - except StopIteration: - self.cur = None + def __next__(self): + self.cur = next(self.i, None) return self.cur + next = __next__ def check_index(reader): try: @@ -28,15 +39,15 @@ def check_index(reader): if e.children_n: if opt.verbose: log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n, - e.name)) + path_msg(e.name))) assert(e.children_ofs) - assert(e.name.endswith('/')) + assert e.name.endswith(b'/') assert(not d.get(e.children_ofs)) d[e.children_ofs] = 1 if e.flags & index.IX_HASHVALID: assert(e.sha != index.EMPTY_SHA) assert(e.gitmode) - assert(not e or e.name == '/') # last entry is *always* / + assert not e or bytes(e.name) == b'/' # last entry is *always* / log('check: checking normal iteration...\n') last = None for e in reader: @@ -50,44 +61,46 @@ def check_index(reader): def clear_index(indexfile): - indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink'] + indexfiles = [indexfile, indexfile + b'.meta', indexfile + b'.hlink'] for indexfile in indexfiles: path = git.repo(indexfile) try: os.remove(path) if opt.verbose: - log('clear: removed %s\n' % path) - except OSError, e: + log('clear: removed %s\n' % path_msg(path)) + except OSError as e: if e.errno != errno.ENOENT: raise -def update_index(top, excluded_paths, exclude_rxs): +def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions, out=None): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) - msw = index.MetaStoreWriter(indexfile + '.meta') + msw = index.MetaStoreWriter(indexfile + b'.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 - hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') + hlinks = hlinkdb.HLinkDB(indexfile + b'.hlink') - hashgen = None + fake_hash = None if opt.fake_valid: - def hashgen(name): + def fake_hash(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() - for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, - bup_dir=bup_dir, - excluded_paths=excluded_paths, - exclude_rxs=exclude_rxs): + for path, pst in recursive_dirlist([top], + xdev=opt.xdev, + bup_dir=bup_dir, + excluded_paths=excluded_paths, + exclude_rxs=exclude_rxs, + xdev_exceptions=xdev_exceptions): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): - sys.stdout.write('%s\n' % path) - sys.stdout.flush() + out.write(b'%s\n' % path) + out.flush() elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) @@ -96,6 +109,7 @@ def update_index(top, excluded_paths, exclude_rxs): paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 + while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() @@ -103,51 +117,58 @@ def update_index(top, excluded_paths, exclude_rxs): if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() + if rig.cur and rig.cur.name == path: # paths that already existed - try: - meta = metadata.from_path(path, statinfo=pst) - except (OSError, IOError), e: - add_error(e) - rig.next() - continue - if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: - hlinks.del_path(rig.cur.name) - if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: - hlinks.add_path(path, pst.st_dev, pst.st_ino) - # Clear these so they don't bloat the store -- they're - # already in the index (since they vary a lot and they're - # fixed length). If you've noticed "tmax", you might - # wonder why it's OK to do this, since that code may - # adjust (mangle) the index mtime and ctime -- producing - # fake values which must not end up in a .bupm. However, - # it looks like that shouldn't be possible: (1) When - # "save" validates the index entry, it always reads the - # metadata from the filesytem. (2) Metadata is only - # read/used from the index if hashvalid is true. (3) index - # always invalidates "faked" entries, because "old != new" - # in from_stat(). - meta.ctime = meta.mtime = meta.atime = 0 - meta_ofs = msw.store(meta) - rig.cur.from_stat(pst, meta_ofs, tstart, - check_device=opt.check_device) + need_repack = False + if(rig.cur.stale(pst, tstart, check_device=opt.check_device)): + try: + meta = metadata.from_path(path, statinfo=pst) + except (OSError, IOError) as e: + add_error(e) + rig.next() + continue + if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: + hlinks.del_path(rig.cur.name) + if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: + hlinks.add_path(path, pst.st_dev, pst.st_ino) + # Clear these so they don't bloat the store -- they're + # already in the index (since they vary a lot and they're + # fixed length). If you've noticed "tmax", you might + # wonder why it's OK to do this, since that code may + # adjust (mangle) the index mtime and ctime -- producing + # fake values which must not end up in a .bupm. However, + # it looks like that shouldn't be possible: (1) When + # "save" validates the index entry, it always reads the + # metadata from the filesytem. (2) Metadata is only + # read/used from the index if hashvalid is true. (3) + # "faked" entries will be stale(), and so we'll invalidate + # them below. + meta.ctime = meta.mtime = meta.atime = 0 + meta_ofs = msw.store(meta) + rig.cur.update_from_stat(pst, meta_ofs) + rig.cur.invalidate() + need_repack = True if not (rig.cur.flags & index.IX_HASHVALID): - if hashgen: - (rig.cur.gitmode, rig.cur.sha) = hashgen(path) + if fake_hash: + rig.cur.gitmode, rig.cur.sha = fake_hash(path) rig.cur.flags |= index.IX_HASHVALID + need_repack = True if opt.fake_invalid: rig.cur.invalidate() - rig.cur.repack() + need_repack = True + if need_repack: + rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) - except (OSError, IOError), e: + except (OSError, IOError) as e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) - wi.add(path, pst, meta_ofs, hashgen = hashgen) + wi.add(path, pst, meta_ofs, hashgen=fake_hash) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) @@ -233,10 +254,17 @@ tick_start = time.time() time.sleep(1 - (tick_start - int(tick_start))) git.check_repo_or_die() -indexfile = opt.indexfile or git.repo('bupindex') handle_ctrl_c() +if opt.verbose is None: + opt.verbose = 0 + +if opt.indexfile: + indexfile = argv_bytes(opt.indexfile) +else: + indexfile = git.repo(b'bupindex') + if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) @@ -245,37 +273,42 @@ if opt.clear: log('clear: clearing index.\n') clear_index(indexfile) -excluded_paths = parse_excludes(flags, o.fatal) -exclude_rxs = parse_rx_excludes(flags, o.fatal) -paths = index.reduce_paths(extra) +sys.stdout.flush() +out = byte_stream(sys.stdout) if opt.update: if not extra: o.fatal('update mode (-u) requested but no paths given') - for (rp,path) in paths: - update_index(rp, excluded_paths, exclude_rxs) + extra = [argv_bytes(x) for x in extra] + excluded_paths = parse_excludes(flags, o.fatal) + exclude_rxs = parse_rx_excludes(flags, o.fatal) + xexcept = index.unique_resolved_paths(extra) + for rp, path in index.reduce_paths(extra): + update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept, + out=out) if opt['print'] or opt.status or opt.modified: - for (name, ent) in index.Reader(indexfile).filter(extra or ['']): + extra = [argv_bytes(x) for x in extra] + for name, ent in index.Reader(indexfile).filter(extra or [b'']): if (opt.modified and (ent.is_valid() or ent.is_deleted() or not ent.mode)): continue - line = '' + line = b'' if opt.status: if ent.is_deleted(): - line += 'D ' + line += b'D ' elif not ent.is_valid(): if ent.sha == index.EMPTY_SHA: - line += 'A ' + line += b'A ' else: - line += 'M ' + line += b'M ' else: - line += ' ' + line += b' ' if opt.hash: - line += ent.sha.encode('hex') + ' ' + line += hexlify(ent) + b' ' if opt.long: - line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode)) - print line + (name or './') + line += b'%7s %7s ' % (oct(ent.mode), oct(ent.gitmode)) + out.write(line + (name or b'./') + b'\n') if opt.check and (opt['print'] or opt.status or opt.modified or opt.update): log('check: starting final check.\n')