-#!/usr/bin/env python
-import sys, stat, time, os
-from bup import options, git, index, drecurse, hlinkdb
-from bup.helpers import *
+#!/bin/sh
+"""": # -*-python-*-
+bup_python="$(dirname "$0")/bup-python" || exit $?
+exec "$bup_python" "$0" ${1+"$@"}
+"""
+# end of bup preamble
+
+import sys, stat, time, os, errno, re
+
+from bup import metadata, options, git, index, drecurse, hlinkdb
+from bup.drecurse import recursive_dirlist
from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
+from bup.helpers import (handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
+ progress, qprogress, saved_errors)
+
class IterHelper:
def __init__(self, l):
log('check: passed.\n')
-def update_index(top, excluded_paths):
+def clear_index(indexfile):
+ indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink']
+ for indexfile in indexfiles:
+ path = git.repo(indexfile)
+ try:
+ os.remove(path)
+ if opt.verbose:
+ log('clear: removed %s\n' % path)
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ raise
+
+
+def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
# tmax and start must be epoch nanoseconds.
tmax = (time.time() - 1) * 10**9
ri = index.Reader(indexfile)
- wi = index.Writer(indexfile, tmax)
+ msw = index.MetaStoreWriter(indexfile + '.meta')
+ wi = index.Writer(indexfile, msw, tmax)
rig = IterHelper(ri.iter(name=top))
tstart = int(time.time()) * 10**9
total = 0
bup_dir = os.path.abspath(git.repo())
- for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
- bup_dir=bup_dir,
- excluded_paths=excluded_paths):
+ index_start = time.time()
+ for path, pst in recursive_dirlist([top],
+ xdev=opt.xdev,
+ bup_dir=bup_dir,
+ excluded_paths=excluded_paths,
+ exclude_rxs=exclude_rxs,
+ xdev_exceptions=xdev_exceptions):
if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
sys.stdout.write('%s\n' % path)
sys.stdout.flush()
- qprogress('Indexing: %d\r' % total)
+ elapsed = time.time() - index_start
+ paths_per_sec = total / elapsed if elapsed else 0
+ qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
elif not (total % 128):
- qprogress('Indexing: %d\r' % total)
+ elapsed = time.time() - index_start
+ paths_per_sec = total / elapsed if elapsed else 0
+ qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
total += 1
while rig.cur and rig.cur.name > path: # deleted paths
if rig.cur.exists():
hlinks.del_path(rig.cur.name)
rig.next()
if rig.cur and rig.cur.name == path: # paths that already existed
+ try:
+ meta = metadata.from_path(path, statinfo=pst)
+ except (OSError, IOError) as e:
+ add_error(e)
+ rig.next()
+ continue
if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
hlinks.del_path(rig.cur.name)
if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
hlinks.add_path(path, pst.st_dev, pst.st_ino)
- rig.cur.from_stat(pst, tstart)
+ # Clear these so they don't bloat the store -- they're
+ # already in the index (since they vary a lot and they're
+ # fixed length). If you've noticed "tmax", you might
+ # wonder why it's OK to do this, since that code may
+ # adjust (mangle) the index mtime and ctime -- producing
+ # fake values which must not end up in a .bupm. However,
+ # it looks like that shouldn't be possible: (1) When
+ # "save" validates the index entry, it always reads the
+ # metadata from the filesytem. (2) Metadata is only
+ # read/used from the index if hashvalid is true. (3) index
+ # always invalidates "faked" entries, because "old != new"
+ # in from_stat().
+ meta.ctime = meta.mtime = meta.atime = 0
+ meta_ofs = msw.store(meta)
+ rig.cur.from_stat(pst, meta_ofs, tstart,
+ check_device=opt.check_device)
if not (rig.cur.flags & index.IX_HASHVALID):
if hashgen:
(rig.cur.gitmode, rig.cur.sha) = hashgen(path)
rig.cur.repack()
rig.next()
else: # new paths
- wi.add(path, pst, hashgen = hashgen)
+ try:
+ meta = metadata.from_path(path, statinfo=pst)
+ except (OSError, IOError) as e:
+ add_error(e)
+ continue
+ # See same assignment to 0, above, for rationale.
+ meta.atime = meta.mtime = meta.ctime = 0
+ meta_ofs = msw.store(meta)
+ wi.add(path, pst, meta_ofs, hashgen = hashgen)
if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
hlinks.add_path(path, pst.st_dev, pst.st_ino)
- progress('Indexing: %d, done.\n' % total)
-
+ elapsed = time.time() - index_start
+ paths_per_sec = total / elapsed if elapsed else 0
+ progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
+
hlinks.prepare_save()
if ri.exists():
check_index(ri)
log('check: before merging: newfile\n')
check_index(wr)
- mi = index.Writer(indexfile, tmax)
+ mi = index.Writer(indexfile, msw, tmax)
for e in index.merge(ri, wr):
# FIXME: shouldn't we remove deleted entries eventually? When?
else:
wi.close()
+ msw.close()
hlinks.commit_save()
optspec = """
-bup index <-p|m|s|u> [options...] <filenames...>
+bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
--
Modes:
p,print print the index entries for the given names (also works with -u)
s,status print each filename with a status char (A/M/D) (implies -p)
u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
check carefully check index file integrity
+clear clear the default index
Options:
H,hash print the hash for each object next to its name
l,long print more information about each file
+no-check-device don't invalidate an entry if the containing device changes
fake-valid mark all index entries as up-to-date even if they aren't
fake-invalid mark all index entries as invalid
f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
-exclude= a path to exclude from the backup (can be used more than once)
-exclude-from= a file that contains exclude paths (can be used more than once)
+exclude= a path to exclude from the backup (may be repeated)
+exclude-from= skip --exclude paths in file (may be repeated)
+exclude-rx= skip paths matching the unanchored regex (may be repeated)
+exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
v,verbose increase log output (can be used more than once)
x,xdev,one-file-system don't cross filesystem boundaries
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])
-if not (opt.modified or opt['print'] or opt.status or opt.update or opt.check):
+if not (opt.modified or \
+ opt['print'] or \
+ opt.status or \
+ opt.update or \
+ opt.check or \
+ opt.clear):
opt.update = 1
if (opt.fake_valid or opt.fake_invalid) and not opt.update:
o.fatal('--fake-{in,}valid are meaningless without -u')
if opt.fake_valid and opt.fake_invalid:
o.fatal('--fake-valid is incompatible with --fake-invalid')
+if opt.clear and opt.indexfile:
+ o.fatal('cannot clear an external index (via -f)')
# FIXME: remove this once we account for timestamp races, i.e. index;
# touch new-file; index. It's possible for this to happen quickly
log('check: starting initial check.\n')
check_index(index.Reader(indexfile))
-excluded_paths = drecurse.parse_excludes(flags)
-
-paths = index.reduce_paths(extra)
+if opt.clear:
+ log('clear: clearing index.\n')
+ clear_index(indexfile)
if opt.update:
if not extra:
o.fatal('update mode (-u) requested but no paths given')
- for (rp,path) in paths:
- update_index(rp, excluded_paths)
+ excluded_paths = parse_excludes(flags, o.fatal)
+ exclude_rxs = parse_rx_excludes(flags, o.fatal)
+ xexcept = index.unique_resolved_paths(extra)
+ for rp, path in index.reduce_paths(extra):
+ update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept)
if opt['print'] or opt.status or opt.modified:
for (name, ent) in index.Reader(indexfile).filter(extra or ['']):