-#!/usr/bin/env python
-import sys, stat, time
-from bup import options, git, index, drecurse
-from bup.helpers import *
+#!/bin/sh
+"""": # -*-python-*-
+bup_python="$(dirname "$0")/bup-python" || exit $?
+exec "$bup_python" "$0" ${1+"$@"}
+"""
+# end of bup preamble
+from __future__ import absolute_import, print_function
+import sys, stat, time, os, errno, re
-def merge_indexes(out, r1, r2):
- for e in index.MergeIter([r1, r2]):
- # FIXME: shouldn't we remove deleted entries eventually? When?
- out.add_ixentry(e)
+from bup import metadata, options, git, index, drecurse, hlinkdb
+from bup.drecurse import recursive_dirlist
+from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
+from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
+ progress, qprogress, saved_errors)
class IterHelper:
self.next()
def next(self):
- try:
- self.cur = self.i.next()
- except StopIteration:
- self.cur = None
+ self.cur = next(self.i, None)
return self.cur
log('check: passed.\n')
-def update_index(top):
+def clear_index(indexfile):
+ indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink']
+ for indexfile in indexfiles:
+ path = git.repo(indexfile)
+ try:
+ os.remove(path)
+ if opt.verbose:
+ log('clear: removed %s\n' % path)
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ raise
+
+
+def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
+ # tmax and start must be epoch nanoseconds.
+ tmax = (time.time() - 1) * 10**9
ri = index.Reader(indexfile)
- wi = index.Writer(indexfile)
+ msw = index.MetaStoreWriter(indexfile + '.meta')
+ wi = index.Writer(indexfile, msw, tmax)
rig = IterHelper(ri.iter(name=top))
- tstart = int(time.time())
+ tstart = int(time.time()) * 10**9
+
+ hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')
- hashgen = None
+ fake_hash = None
if opt.fake_valid:
- def hashgen(name):
- return (0100644, index.FAKE_SHA)
+ def fake_hash(name):
+ return (GIT_MODE_FILE, index.FAKE_SHA)
total = 0
- for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev):
+ bup_dir = os.path.abspath(git.repo())
+ index_start = time.time()
+ for path, pst in recursive_dirlist([top],
+ xdev=opt.xdev,
+ bup_dir=bup_dir,
+ excluded_paths=excluded_paths,
+ exclude_rxs=exclude_rxs,
+ xdev_exceptions=xdev_exceptions):
if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
sys.stdout.write('%s\n' % path)
sys.stdout.flush()
- progress('Indexing: %d\r' % total)
+ elapsed = time.time() - index_start
+ paths_per_sec = total / elapsed if elapsed else 0
+ qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
elif not (total % 128):
- progress('Indexing: %d\r' % total)
+ elapsed = time.time() - index_start
+ paths_per_sec = total / elapsed if elapsed else 0
+ qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
total += 1
+
while rig.cur and rig.cur.name > path: # deleted paths
if rig.cur.exists():
rig.cur.set_deleted()
rig.cur.repack()
+ if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
+ hlinks.del_path(rig.cur.name)
rig.next()
+
if rig.cur and rig.cur.name == path: # paths that already existed
- if pst:
- rig.cur.from_stat(pst, tstart)
+ need_repack = False
+ if(rig.cur.stale(pst, tstart, check_device=opt.check_device)):
+ try:
+ meta = metadata.from_path(path, statinfo=pst)
+ except (OSError, IOError) as e:
+ add_error(e)
+ rig.next()
+ continue
+ if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
+ hlinks.del_path(rig.cur.name)
+ if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
+ hlinks.add_path(path, pst.st_dev, pst.st_ino)
+ # Clear these so they don't bloat the store -- they're
+ # already in the index (since they vary a lot and they're
+ # fixed length). If you've noticed "tmax", you might
+ # wonder why it's OK to do this, since that code may
+ # adjust (mangle) the index mtime and ctime -- producing
+ # fake values which must not end up in a .bupm. However,
+ # it looks like that shouldn't be possible: (1) When
+ # "save" validates the index entry, it always reads the
+ # metadata from the filesytem. (2) Metadata is only
+ # read/used from the index if hashvalid is true. (3)
+ # "faked" entries will be stale(), and so we'll invalidate
+ # them below.
+ meta.ctime = meta.mtime = meta.atime = 0
+ meta_ofs = msw.store(meta)
+ rig.cur.update_from_stat(pst, meta_ofs)
+ rig.cur.invalidate()
+ need_repack = True
if not (rig.cur.flags & index.IX_HASHVALID):
- if hashgen:
- (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
+ if fake_hash:
+ rig.cur.gitmode, rig.cur.sha = fake_hash(path)
rig.cur.flags |= index.IX_HASHVALID
+ need_repack = True
if opt.fake_invalid:
rig.cur.invalidate()
- rig.cur.repack()
+ need_repack = True
+ if need_repack:
+ rig.cur.repack()
rig.next()
else: # new paths
- wi.add(path, pst, hashgen = hashgen)
- progress('Indexing: %d, done.\n' % total)
-
+ try:
+ meta = metadata.from_path(path, statinfo=pst)
+ except (OSError, IOError) as e:
+ add_error(e)
+ continue
+ # See same assignment to 0, above, for rationale.
+ meta.atime = meta.mtime = meta.ctime = 0
+ meta_ofs = msw.store(meta)
+ wi.add(path, pst, meta_ofs, hashgen=fake_hash)
+ if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
+ hlinks.add_path(path, pst.st_dev, pst.st_ino)
+
+ elapsed = time.time() - index_start
+ paths_per_sec = total / elapsed if elapsed else 0
+ progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
+
+ hlinks.prepare_save()
+
if ri.exists():
ri.save()
wi.flush()
check_index(ri)
log('check: before merging: newfile\n')
check_index(wr)
- mi = index.Writer(indexfile)
- merge_indexes(mi, ri, wr)
+ mi = index.Writer(indexfile, msw, tmax)
+
+ for e in index.merge(ri, wr):
+ # FIXME: shouldn't we remove deleted entries eventually? When?
+ mi.add_ixentry(e)
+
ri.close()
mi.close()
wr.close()
else:
wi.close()
+ msw.close()
+ hlinks.commit_save()
+
optspec = """
-bup index <-p|m|u> [options...] <filenames...>
+bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
--
+ Modes:
p,print print the index entries for the given names (also works with -u)
m,modified print only added/deleted/modified files (implies -p)
s,status print each filename with a status char (A/M/D) (implies -p)
-H,hash print the hash for each object next to its name (implies -p)
+u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
+check carefully check index file integrity
+clear clear the default index
+ Options:
+H,hash print the hash for each object next to its name
l,long print more information about each file
-u,update (recursively) update the index entries for the given filenames
-x,xdev,one-file-system don't cross filesystem boundaries
+no-check-device don't invalidate an entry if the containing device changes
fake-valid mark all index entries as up-to-date even if they aren't
fake-invalid mark all index entries as invalid
-check carefully check index file integrity
-f,indexfile= the name of the index file (default 'index')
+f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
+exclude= a path to exclude from the backup (may be repeated)
+exclude-from= skip --exclude paths in file (may be repeated)
+exclude-rx= skip paths matching the unanchored regex (may be repeated)
+exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
v,verbose increase log output (can be used more than once)
+x,xdev,one-file-system don't cross filesystem boundaries
"""
-o = options.Options('bup index', optspec)
+o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])
-if not (opt.modified or opt['print'] or opt.status or opt.update or opt.check):
- o.fatal('supply one or more of -p, -s, -m, -u, or --check')
+if not (opt.modified or \
+ opt['print'] or \
+ opt.status or \
+ opt.update or \
+ opt.check or \
+ opt.clear):
+ opt.update = 1
if (opt.fake_valid or opt.fake_invalid) and not opt.update:
o.fatal('--fake-{in,}valid are meaningless without -u')
if opt.fake_valid and opt.fake_invalid:
o.fatal('--fake-valid is incompatible with --fake-invalid')
+if opt.clear and opt.indexfile:
+ o.fatal('cannot clear an external index (via -f)')
+
+# FIXME: remove this once we account for timestamp races, i.e. index;
+# touch new-file; index. It's possible for this to happen quickly
+# enough that new-file ends up with the same timestamp as the first
+# index, and then bup will ignore it.
+tick_start = time.time()
+time.sleep(1 - (tick_start - int(tick_start)))
git.check_repo_or_die()
indexfile = opt.indexfile or git.repo('bupindex')
log('check: starting initial check.\n')
check_index(index.Reader(indexfile))
-paths = index.reduce_paths(extra)
+if opt.clear:
+ log('clear: clearing index.\n')
+ clear_index(indexfile)
if opt.update:
if not extra:
- o.fatal('update (-u) requested but no paths given')
- for (rp,path) in paths:
- update_index(rp)
+ o.fatal('update mode (-u) requested but no paths given')
+ excluded_paths = parse_excludes(flags, o.fatal)
+ exclude_rxs = parse_rx_excludes(flags, o.fatal)
+ xexcept = index.unique_resolved_paths(extra)
+ for rp, path in index.reduce_paths(extra):
+ update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept)
if opt['print'] or opt.status or opt.modified:
for (name, ent) in index.Reader(indexfile).filter(extra or ['']):
line += ent.sha.encode('hex') + ' '
if opt.long:
line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode))
- print line + (name or './')
+ print(line + (name or './'))
if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
log('check: starting final check.\n')