3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 import sys, stat, time, os, errno, re
11 from bup import metadata, options, git, index, drecurse, hlinkdb
12 from bup.drecurse import recursive_dirlist
13 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
14 from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
15 progress, qprogress, saved_errors)
19 def __init__(self, l):
25 self.cur = next(self.i, None)
29 def check_index(reader):
31 log('check: checking forward iteration...\n')
34 for e in reader.forward_iter():
37 log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
39 assert(e.children_ofs)
40 assert(e.name.endswith('/'))
41 assert(not d.get(e.children_ofs))
43 if e.flags & index.IX_HASHVALID:
44 assert(e.sha != index.EMPTY_SHA)
46 assert(not e or e.name == '/') # last entry is *always* /
47 log('check: checking normal iteration...\n')
54 log('index error! at %r\n' % e)
56 log('check: passed.\n')
59 def clear_index(indexfile):
60 indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink']
61 for indexfile in indexfiles:
62 path = git.repo(indexfile)
66 log('clear: removed %s\n' % path)
68 if e.errno != errno.ENOENT:
72 def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
73 # tmax and start must be epoch nanoseconds.
74 tmax = (time.time() - 1) * 10**9
75 ri = index.Reader(indexfile)
76 msw = index.MetaStoreWriter(indexfile + '.meta')
77 wi = index.Writer(indexfile, msw, tmax)
78 rig = IterHelper(ri.iter(name=top))
79 tstart = int(time.time()) * 10**9
81 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')
86 return (GIT_MODE_FILE, index.FAKE_SHA)
89 bup_dir = os.path.abspath(git.repo())
90 index_start = time.time()
91 for path, pst in recursive_dirlist([top],
94 excluded_paths=excluded_paths,
95 exclude_rxs=exclude_rxs,
96 xdev_exceptions=xdev_exceptions):
97 if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
98 sys.stdout.write('%s\n' % path)
100 elapsed = time.time() - index_start
101 paths_per_sec = total / elapsed if elapsed else 0
102 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
103 elif not (total % 128):
104 elapsed = time.time() - index_start
105 paths_per_sec = total / elapsed if elapsed else 0
106 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
109 while rig.cur and rig.cur.name > path: # deleted paths
111 rig.cur.set_deleted()
113 if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
114 hlinks.del_path(rig.cur.name)
117 if rig.cur and rig.cur.name == path: # paths that already existed
119 if(rig.cur.stale(pst, tstart, check_device=opt.check_device)):
121 meta = metadata.from_path(path, statinfo=pst)
122 except (OSError, IOError) as e:
126 if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
127 hlinks.del_path(rig.cur.name)
128 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
129 hlinks.add_path(path, pst.st_dev, pst.st_ino)
130 # Clear these so they don't bloat the store -- they're
131 # already in the index (since they vary a lot and they're
132 # fixed length). If you've noticed "tmax", you might
133 # wonder why it's OK to do this, since that code may
134 # adjust (mangle) the index mtime and ctime -- producing
135 # fake values which must not end up in a .bupm. However,
136 # it looks like that shouldn't be possible: (1) When
137 # "save" validates the index entry, it always reads the
138 # metadata from the filesytem. (2) Metadata is only
139 # read/used from the index if hashvalid is true. (3)
140 # "faked" entries will be stale(), and so we'll invalidate
142 meta.ctime = meta.mtime = meta.atime = 0
143 meta_ofs = msw.store(meta)
144 rig.cur.update_from_stat(pst, meta_ofs)
147 if not (rig.cur.flags & index.IX_HASHVALID):
149 if rig.cur.sha == index.EMPTY_SHA:
150 rig.cur.gitmode, rig.cur.sha = fake_hash(path)
151 rig.cur.flags |= index.IX_HASHVALID
161 meta = metadata.from_path(path, statinfo=pst)
162 except (OSError, IOError) as e:
165 # See same assignment to 0, above, for rationale.
166 meta.atime = meta.mtime = meta.ctime = 0
167 meta_ofs = msw.store(meta)
168 wi.add(path, pst, meta_ofs, hashgen=fake_hash)
169 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
170 hlinks.add_path(path, pst.st_dev, pst.st_ino)
172 elapsed = time.time() - index_start
173 paths_per_sec = total / elapsed if elapsed else 0
174 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
176 hlinks.prepare_save()
184 log('check: before merging: oldfile\n')
186 log('check: before merging: newfile\n')
188 mi = index.Writer(indexfile, msw, tmax)
190 for e in index.merge(ri, wr):
191 # FIXME: shouldn't we remove deleted entries eventually? When?
206 bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
209 p,print print the index entries for the given names (also works with -u)
210 m,modified print only added/deleted/modified files (implies -p)
211 s,status print each filename with a status char (A/M/D) (implies -p)
212 u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
213 check carefully check index file integrity
214 clear clear the default index
216 H,hash print the hash for each object next to its name
217 l,long print more information about each file
218 no-check-device don't invalidate an entry if the containing device changes
219 fake-valid mark all index entries as up-to-date even if they aren't
220 fake-invalid mark all index entries as invalid
221 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
222 exclude= a path to exclude from the backup (may be repeated)
223 exclude-from= skip --exclude paths in file (may be repeated)
224 exclude-rx= skip paths matching the unanchored regex (may be repeated)
225 exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
226 v,verbose increase log output (can be used more than once)
227 x,xdev,one-file-system don't cross filesystem boundaries
229 o = options.Options(optspec)
230 (opt, flags, extra) = o.parse(sys.argv[1:])
232 if not (opt.modified or \
239 if (opt.fake_valid or opt.fake_invalid) and not opt.update:
240 o.fatal('--fake-{in,}valid are meaningless without -u')
241 if opt.fake_valid and opt.fake_invalid:
242 o.fatal('--fake-valid is incompatible with --fake-invalid')
243 if opt.clear and opt.indexfile:
244 o.fatal('cannot clear an external index (via -f)')
246 # FIXME: remove this once we account for timestamp races, i.e. index;
247 # touch new-file; index. It's possible for this to happen quickly
248 # enough that new-file ends up with the same timestamp as the first
249 # index, and then bup will ignore it.
250 tick_start = time.time()
251 time.sleep(1 - (tick_start - int(tick_start)))
253 git.check_repo_or_die()
254 indexfile = opt.indexfile or git.repo('bupindex')
259 log('check: starting initial check.\n')
260 check_index(index.Reader(indexfile))
263 log('clear: clearing index.\n')
264 clear_index(indexfile)
268 o.fatal('update mode (-u) requested but no paths given')
269 excluded_paths = parse_excludes(flags, o.fatal)
270 exclude_rxs = parse_rx_excludes(flags, o.fatal)
271 xexcept = index.unique_resolved_paths(extra)
272 for rp, path in index.reduce_paths(extra):
273 update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept)
275 if opt['print'] or opt.status or opt.modified:
276 for (name, ent) in index.Reader(indexfile).filter(extra or ['']):
278 and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
284 elif not ent.is_valid():
285 if ent.sha == index.EMPTY_SHA:
292 line += ent.sha.encode('hex') + ' '
294 line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode))
295 print(line + (name or './'))
297 if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
298 log('check: starting final check.\n')
299 check_index(index.Reader(indexfile))
302 log('WARNING: %d errors encountered.\n' % len(saved_errors))