1 from __future__ import absolute_import, print_function
3 from binascii import hexlify
4 import errno, os, stat, sys, time
6 from bup import metadata, options, git, index, hlinkdb
7 from bup.compat import argv_bytes
8 from bup.drecurse import recursive_dirlist
9 from bup.hashsplit import GIT_MODE_FILE
10 from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
11 progress, qprogress, saved_errors)
12 from bup.io import byte_stream, path_msg
16 def __init__(self, l):
22 self.cur = next(self.i, None)
27 def check_index(reader, verbose):
29 log('check: checking forward iteration...\n')
32 for e in reader.forward_iter():
35 log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
37 assert(e.children_ofs)
38 assert e.name.endswith(b'/')
39 assert(not d.get(e.children_ofs))
41 if e.flags & index.IX_HASHVALID:
42 assert(e.sha != index.EMPTY_SHA)
44 assert not e or bytes(e.name) == b'/' # last entry is *always* /
45 log('check: checking normal iteration...\n')
52 log('index error! at %r\n' % e)
54 log('check: passed.\n')
57 def clear_index(indexfile, verbose):
58 indexfiles = [indexfile, indexfile + b'.meta', indexfile + b'.hlink']
59 for indexfile in indexfiles:
63 log('clear: removed %s\n' % path_msg(indexfile))
65 if e.errno != errno.ENOENT:
69 def update_index(top, excluded_paths, exclude_rxs, indexfile,
70 check=False, check_device=True,
71 xdev=False, xdev_exceptions=frozenset(),
72 fake_valid=False, fake_invalid=False,
74 # tmax must be epoch nanoseconds.
75 tmax = (time.time() - 1) * 10**9
76 ri = index.Reader(indexfile)
77 msw = index.MetaStoreWriter(indexfile + b'.meta')
78 wi = index.Writer(indexfile, msw, tmax)
79 rig = IterHelper(ri.iter(name=top))
81 hlinks = hlinkdb.HLinkDB(indexfile + b'.hlink')
86 return (GIT_MODE_FILE, index.FAKE_SHA)
89 bup_dir = os.path.abspath(git.repo())
90 index_start = time.time()
91 for path, pst in recursive_dirlist([top],
94 excluded_paths=excluded_paths,
95 exclude_rxs=exclude_rxs,
96 xdev_exceptions=xdev_exceptions):
97 if verbose>=2 or (verbose == 1 and stat.S_ISDIR(pst.st_mode)):
98 out.write(b'%s\n' % path)
100 elapsed = time.time() - index_start
101 paths_per_sec = total / elapsed if elapsed else 0
102 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
103 elif not (total % 128):
104 elapsed = time.time() - index_start
105 paths_per_sec = total / elapsed if elapsed else 0
106 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
109 while rig.cur and rig.cur.name > path: # deleted paths
111 rig.cur.set_deleted()
113 if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
114 hlinks.del_path(rig.cur.name)
117 if rig.cur and rig.cur.name == path: # paths that already existed
119 if(rig.cur.stale(pst, check_device=check_device)):
121 meta = metadata.from_path(path, statinfo=pst)
122 except (OSError, IOError) as e:
126 if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
127 hlinks.del_path(rig.cur.name)
128 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
129 hlinks.add_path(path, pst.st_dev, pst.st_ino)
130 # Clear these so they don't bloat the store -- they're
131 # already in the index (since they vary a lot and they're
132 # fixed length). If you've noticed "tmax", you might
133 # wonder why it's OK to do this, since that code may
134 # adjust (mangle) the index mtime and ctime -- producing
135 # fake values which must not end up in a .bupm. However,
136 # it looks like that shouldn't be possible: (1) When
137 # "save" validates the index entry, it always reads the
138 # metadata from the filesytem. (2) Metadata is only
139 # read/used from the index if hashvalid is true. (3)
140 # "faked" entries will be stale(), and so we'll invalidate
142 meta.ctime = meta.mtime = meta.atime = 0
143 meta_ofs = msw.store(meta)
144 rig.cur.update_from_stat(pst, meta_ofs)
147 if not (rig.cur.flags & index.IX_HASHVALID):
149 if rig.cur.sha == index.EMPTY_SHA:
150 rig.cur.gitmode, rig.cur.sha = fake_hash(path)
151 rig.cur.flags |= index.IX_HASHVALID
161 meta = metadata.from_path(path, statinfo=pst)
162 except (OSError, IOError) as e:
165 # See same assignment to 0, above, for rationale.
166 meta.atime = meta.mtime = meta.ctime = 0
167 meta_ofs = msw.store(meta)
168 wi.add(path, pst, meta_ofs, hashgen=fake_hash)
169 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
170 hlinks.add_path(path, pst.st_dev, pst.st_ino)
172 elapsed = time.time() - index_start
173 paths_per_sec = total / elapsed if elapsed else 0
174 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
176 hlinks.prepare_save()
184 log('check: before merging: oldfile\n')
185 check_index(ri, verbose)
186 log('check: before merging: newfile\n')
187 check_index(wr, verbose)
188 mi = index.Writer(indexfile, msw, tmax)
190 for e in index.merge(ri, wr):
191 # FIXME: shouldn't we remove deleted entries eventually? When?
206 bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
209 p,print print the index entries for the given names (also works with -u)
210 m,modified print only added/deleted/modified files (implies -p)
211 s,status print each filename with a status char (A/M/D) (implies -p)
212 u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
213 check carefully check index file integrity
214 clear clear the default index
216 H,hash print the hash for each object next to its name
217 l,long print more information about each file
218 no-check-device don't invalidate an entry if the containing device changes
219 fake-valid mark all index entries as up-to-date even if they aren't
220 fake-invalid mark all index entries as invalid
221 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
222 exclude= a path to exclude from the backup (may be repeated)
223 exclude-from= skip --exclude paths in file (may be repeated)
224 exclude-rx= skip paths matching the unanchored regex (may be repeated)
225 exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
226 v,verbose increase log output (can be used more than once)
227 x,xdev,one-file-system don't cross filesystem boundaries
231 o = options.Options(optspec)
232 opt, flags, extra = o.parse_bytes(argv[1:])
234 if not (opt.modified or \
241 if (opt.fake_valid or opt.fake_invalid) and not opt.update:
242 o.fatal('--fake-{in,}valid are meaningless without -u')
243 if opt.fake_valid and opt.fake_invalid:
244 o.fatal('--fake-valid is incompatible with --fake-invalid')
245 if opt.clear and opt.indexfile:
246 o.fatal('cannot clear an external index (via -f)')
248 # FIXME: remove this once we account for timestamp races, i.e. index;
249 # touch new-file; index. It's possible for this to happen quickly
250 # enough that new-file ends up with the same timestamp as the first
251 # index, and then bup will ignore it.
252 tick_start = time.time()
253 time.sleep(1 - (tick_start - int(tick_start)))
255 git.check_repo_or_die()
259 if opt.verbose is None:
263 indexfile = argv_bytes(opt.indexfile)
265 indexfile = git.repo(b'bupindex')
268 log('check: starting initial check.\n')
269 check_index(index.Reader(indexfile), opt.verbose)
272 log('clear: clearing index.\n')
273 clear_index(indexfile, opt.verbose)
276 out = byte_stream(sys.stdout)
280 o.fatal('update mode (-u) requested but no paths given')
281 extra = [argv_bytes(x) for x in extra]
282 excluded_paths = parse_excludes(flags, o.fatal)
283 exclude_rxs = parse_rx_excludes(flags, o.fatal)
284 xexcept = index.unique_resolved_paths(extra)
285 for rp, path in index.reduce_paths(extra):
286 update_index(rp, excluded_paths, exclude_rxs, indexfile,
287 check=opt.check, check_device=opt.check_device,
288 xdev=opt.xdev, xdev_exceptions=xexcept,
289 fake_valid=opt.fake_valid,
290 fake_invalid=opt.fake_invalid,
291 out=out, verbose=opt.verbose)
293 if opt['print'] or opt.status or opt.modified:
294 extra = [argv_bytes(x) for x in extra]
295 for name, ent in index.Reader(indexfile).filter(extra or [b'']):
297 and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
303 elif not ent.is_valid():
304 if ent.sha == index.EMPTY_SHA:
311 line += hexlify(ent.sha) + b' '
313 line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'),
314 oct(ent.gitmode).encode('ascii'))
315 out.write(line + (name or b'./') + b'\n')
317 if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
318 log('check: starting final check.\n')
319 check_index(index.Reader(indexfile), opt.verbose)
322 log('WARNING: %d errors encountered.\n' % len(saved_errors))