1 from __future__ import absolute_import, print_function
3 from binascii import hexlify
4 import errno, os, stat, sys, time
6 from bup import metadata, options, git, index, hlinkdb
7 from bup.compat import argv_bytes
8 from bup.drecurse import recursive_dirlist
9 from bup.hashsplit import GIT_MODE_FILE
10 from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
11 progress, qprogress, saved_errors)
12 from bup.io import byte_stream, path_msg
16 def __init__(self, l):
22 self.cur = next(self.i, None)
27 def check_index(reader, verbose):
29 log('check: checking forward iteration...\n')
32 for e in reader.forward_iter():
35 log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
37 assert(e.children_ofs)
38 assert e.name.endswith(b'/')
39 assert(not d.get(e.children_ofs))
41 if e.flags & index.IX_HASHVALID:
42 assert(e.sha != index.EMPTY_SHA)
44 assert not e or bytes(e.name) == b'/' # last entry is *always* /
45 log('check: checking normal iteration...\n')
52 log('index error! at %r\n' % e)
54 log('check: passed.\n')
57 def clear_index(indexfile, verbose):
58 indexfiles = [indexfile, indexfile + b'.meta', indexfile + b'.hlink']
59 for indexfile in indexfiles:
63 log('clear: removed %s\n' % path_msg(indexfile))
65 if e.errno != errno.ENOENT:
69 def update_index(top, excluded_paths, exclude_rxs, indexfile,
70 check=False, check_device=True,
71 xdev=False, xdev_exceptions=frozenset(),
72 fake_valid=False, fake_invalid=False,
74 # tmax must be epoch nanoseconds.
75 tmax = (time.time() - 1) * 10**9
77 with index.MetaStoreWriter(indexfile + b'.meta') as msw, \
78 hlinkdb.HLinkDB(indexfile + b'.hlink') as hlinks, \
79 index.Writer(indexfile, msw, tmax) as wi, \
80 index.Reader(indexfile) as ri:
82 rig = IterHelper(ri.iter(name=top))
87 return (GIT_MODE_FILE, index.FAKE_SHA)
90 bup_dir = os.path.abspath(git.repo())
91 index_start = time.time()
92 for path, pst in recursive_dirlist([top],
95 excluded_paths=excluded_paths,
96 exclude_rxs=exclude_rxs,
97 xdev_exceptions=xdev_exceptions):
98 if verbose>=2 or (verbose == 1 and stat.S_ISDIR(pst.st_mode)):
99 out.write(b'%s\n' % path)
101 elapsed = time.time() - index_start
102 paths_per_sec = total / elapsed if elapsed else 0
103 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
104 elif not (total % 128):
105 elapsed = time.time() - index_start
106 paths_per_sec = total / elapsed if elapsed else 0
107 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
110 while rig.cur and rig.cur.name > path: # deleted paths
112 rig.cur.set_deleted()
114 if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
115 hlinks.del_path(rig.cur.name)
118 if rig.cur and rig.cur.name == path: # paths that already existed
120 if(rig.cur.stale(pst, check_device=check_device)):
122 meta = metadata.from_path(path, statinfo=pst)
123 except (OSError, IOError) as e:
127 if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
128 hlinks.del_path(rig.cur.name)
129 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
130 hlinks.add_path(path, pst.st_dev, pst.st_ino)
131 # Clear these so they don't bloat the store -- they're
132 # already in the index (since they vary a lot and they're
133 # fixed length). If you've noticed "tmax", you might
134 # wonder why it's OK to do this, since that code may
135 # adjust (mangle) the index mtime and ctime -- producing
136 # fake values which must not end up in a .bupm. However,
137 # it looks like that shouldn't be possible: (1) When
138 # "save" validates the index entry, it always reads the
139 # metadata from the filesytem. (2) Metadata is only
140 # read/used from the index if hashvalid is true. (3)
141 # "faked" entries will be stale(), and so we'll invalidate
143 meta.ctime = meta.mtime = meta.atime = 0
144 meta_ofs = msw.store(meta)
145 rig.cur.update_from_stat(pst, meta_ofs)
148 if not (rig.cur.flags & index.IX_HASHVALID):
150 if rig.cur.sha == index.EMPTY_SHA:
151 rig.cur.gitmode, rig.cur.sha = fake_hash(path)
152 rig.cur.flags |= index.IX_HASHVALID
162 meta = metadata.from_path(path, statinfo=pst)
163 except (OSError, IOError) as e:
166 # See same assignment to 0, above, for rationale.
167 meta.atime = meta.mtime = meta.ctime = 0
168 meta_ofs = msw.store(meta)
169 wi.add(path, pst, meta_ofs, hashgen=fake_hash)
170 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
171 hlinks.add_path(path, pst.st_dev, pst.st_ino)
173 elapsed = time.time() - index_start
174 paths_per_sec = total / elapsed if elapsed else 0
175 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
177 hlinks.prepare_save()
185 with wi.new_reader() as wr:
187 log('check: before merging: oldfile\n')
188 check_index(ri, verbose)
189 log('check: before merging: newfile\n')
190 check_index(wr, verbose)
191 with index.Writer(indexfile, msw, tmax) as mi:
192 for e in index.merge(ri, wr):
193 # FIXME: shouldn't we remove deleted entries
202 bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
205 p,print print the index entries for the given names (also works with -u)
206 m,modified print only added/deleted/modified files (implies -p)
207 s,status print each filename with a status char (A/M/D) (implies -p)
208 u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
209 check carefully check index file integrity
210 clear clear the default index
212 H,hash print the hash for each object next to its name
213 l,long print more information about each file
214 no-check-device don't invalidate an entry if the containing device changes
215 fake-valid mark all index entries as up-to-date even if they aren't
216 fake-invalid mark all index entries as invalid
217 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
218 exclude= a path to exclude from the backup (may be repeated)
219 exclude-from= skip --exclude paths in file (may be repeated)
220 exclude-rx= skip paths matching the unanchored regex (may be repeated)
221 exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
222 v,verbose increase log output (can be used more than once)
223 x,xdev,one-file-system don't cross filesystem boundaries
227 o = options.Options(optspec)
228 opt, flags, extra = o.parse_bytes(argv[1:])
230 if not (opt.modified or \
237 if (opt.fake_valid or opt.fake_invalid) and not opt.update:
238 o.fatal('--fake-{in,}valid are meaningless without -u')
239 if opt.fake_valid and opt.fake_invalid:
240 o.fatal('--fake-valid is incompatible with --fake-invalid')
241 if opt.clear and opt.indexfile:
242 o.fatal('cannot clear an external index (via -f)')
244 # FIXME: remove this once we account for timestamp races, i.e. index;
245 # touch new-file; index. It's possible for this to happen quickly
246 # enough that new-file ends up with the same timestamp as the first
247 # index, and then bup will ignore it.
248 tick_start = time.time()
249 time.sleep(1 - (tick_start - int(tick_start)))
251 git.check_repo_or_die()
255 if opt.verbose is None:
259 indexfile = argv_bytes(opt.indexfile)
261 indexfile = git.repo(b'bupindex')
264 log('check: starting initial check.\n')
265 with index.Reader(indexfile) as reader:
266 check_index(reader, opt.verbose)
269 log('clear: clearing index.\n')
270 clear_index(indexfile, opt.verbose)
273 out = byte_stream(sys.stdout)
277 o.fatal('update mode (-u) requested but no paths given')
278 extra = [argv_bytes(x) for x in extra]
279 excluded_paths = parse_excludes(flags, o.fatal)
280 exclude_rxs = parse_rx_excludes(flags, o.fatal)
281 xexcept = index.unique_resolved_paths(extra)
282 for rp, path in index.reduce_paths(extra):
283 update_index(rp, excluded_paths, exclude_rxs, indexfile,
284 check=opt.check, check_device=opt.check_device,
285 xdev=opt.xdev, xdev_exceptions=xexcept,
286 fake_valid=opt.fake_valid,
287 fake_invalid=opt.fake_invalid,
288 out=out, verbose=opt.verbose)
290 if opt['print'] or opt.status or opt.modified:
291 extra = [argv_bytes(x) for x in extra]
292 with index.Reader(indexfile) as reader:
293 for name, ent in reader.filter(extra or [b'']):
295 and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
301 elif not ent.is_valid():
302 if ent.sha == index.EMPTY_SHA:
309 line += hexlify(ent.sha) + b' '
311 line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'),
312 oct(ent.gitmode).encode('ascii'))
313 out.write(line + (name or b'./') + b'\n')
315 if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
316 log('check: starting final check.\n')
317 with index.Reader(indexfile) as reader:
318 check_index(reader, opt.verbose)
321 log('WARNING: %d errors encountered.\n' % len(saved_errors))