3 import sys, stat, time, os, errno, re
4 from bup import metadata, options, git, index, drecurse, hlinkdb
5 from bup.helpers import *
6 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
16 self.cur = self.i.next()
22 def check_index(reader):
24 log('check: checking forward iteration...\n')
27 for e in reader.forward_iter():
30 log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
32 assert(e.children_ofs)
33 assert(e.name.endswith('/'))
34 assert(not d.get(e.children_ofs))
36 if e.flags & index.IX_HASHVALID:
37 assert(e.sha != index.EMPTY_SHA)
39 assert(not e or e.name == '/') # last entry is *always* /
40 log('check: checking normal iteration...\n')
47 log('index error! at %r\n' % e)
49 log('check: passed.\n')
52 def clear_index(indexfile):
53 indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink']
54 for indexfile in indexfiles:
55 path = git.repo(indexfile)
59 log('clear: removed %s\n' % path)
61 if e.errno != errno.ENOENT:
65 def update_index(top, excluded_paths, exclude_rxs):
66 # tmax and start must be epoch nanoseconds.
67 tmax = (time.time() - 1) * 10**9
68 ri = index.Reader(indexfile)
69 msw = index.MetaStoreWriter(indexfile + '.meta')
70 wi = index.Writer(indexfile, msw, tmax)
71 rig = IterHelper(ri.iter(name=top))
72 tstart = int(time.time()) * 10**9
74 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')
79 return (GIT_MODE_FILE, index.FAKE_SHA)
82 bup_dir = os.path.abspath(git.repo())
83 index_start = time.time()
84 for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
86 excluded_paths=excluded_paths,
87 exclude_rxs=exclude_rxs):
88 if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
89 sys.stdout.write('%s\n' % path)
91 paths_per_sec = total / (time.time() - index_start)
92 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
93 elif not (total % 128):
94 paths_per_sec = total / (time.time() - index_start)
95 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
97 while rig.cur and rig.cur.name > path: # deleted paths
101 if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
102 hlinks.del_path(rig.cur.name)
104 if rig.cur and rig.cur.name == path: # paths that already existed
106 meta = metadata.from_path(path, statinfo=pst)
107 except (OSError, IOError), e:
111 if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
112 hlinks.del_path(rig.cur.name)
113 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
114 hlinks.add_path(path, pst.st_dev, pst.st_ino)
115 # Clear these so they don't bloat the store -- they're
116 # already in the index (since they vary a lot and they're
117 # fixed length). If you've noticed "tmax", you might
118 # wonder why it's OK to do this, since that code may
119 # adjust (mangle) the index mtime and ctime -- producing
120 # fake values which must not end up in a .bupm. However,
121 # it looks like that shouldn't be possible: (1) When
122 # "save" validates the index entry, it always reads the
123 # metadata from the filesytem. (2) Metadata is only
124 # read/used from the index if hashvalid is true. (3) index
125 # always invalidates "faked" entries, because "old != new"
127 meta.ctime = meta.mtime = meta.atime = 0
128 meta_ofs = msw.store(meta)
129 rig.cur.from_stat(pst, meta_ofs, tstart,
130 check_device=opt.check_device)
131 if not (rig.cur.flags & index.IX_HASHVALID):
133 (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
134 rig.cur.flags |= index.IX_HASHVALID
141 meta = metadata.from_path(path, statinfo=pst)
142 except (OSError, IOError), e:
145 # See same assignment to 0, above, for rationale.
146 meta.atime = meta.mtime = meta.ctime = 0
147 meta_ofs = msw.store(meta)
148 wi.add(path, pst, meta_ofs, hashgen = hashgen)
149 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
150 hlinks.add_path(path, pst.st_dev, pst.st_ino)
152 paths_per_sec = total / (time.time() - index_start)
153 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
155 hlinks.prepare_save()
163 log('check: before merging: oldfile\n')
165 log('check: before merging: newfile\n')
167 mi = index.Writer(indexfile, msw, tmax)
169 for e in index.merge(ri, wr):
170 # FIXME: shouldn't we remove deleted entries eventually? When?
185 bup index <-p|m|s|u> [options...] <filenames...>
188 p,print print the index entries for the given names (also works with -u)
189 m,modified print only added/deleted/modified files (implies -p)
190 s,status print each filename with a status char (A/M/D) (implies -p)
191 u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
192 check carefully check index file integrity
193 clear clear the default index
195 H,hash print the hash for each object next to its name
196 l,long print more information about each file
197 no-check-device don't invalidate an entry if the containing device changes
198 fake-valid mark all index entries as up-to-date even if they aren't
199 fake-invalid mark all index entries as invalid
200 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
201 exclude= a path to exclude from the backup (may be repeated)
202 exclude-from= skip --exclude paths in file (may be repeated)
203 exclude-rx= skip paths matching the unanchored regex (may be repeated)
204 exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
205 v,verbose increase log output (can be used more than once)
206 x,xdev,one-file-system don't cross filesystem boundaries
208 o = options.Options(optspec)
209 (opt, flags, extra) = o.parse(sys.argv[1:])
211 if not (opt.modified or \
218 if (opt.fake_valid or opt.fake_invalid) and not opt.update:
219 o.fatal('--fake-{in,}valid are meaningless without -u')
220 if opt.fake_valid and opt.fake_invalid:
221 o.fatal('--fake-valid is incompatible with --fake-invalid')
222 if opt.clear and opt.indexfile:
223 o.fatal('cannot clear an external index (via -f)')
225 # FIXME: remove this once we account for timestamp races, i.e. index;
226 # touch new-file; index. It's possible for this to happen quickly
227 # enough that new-file ends up with the same timestamp as the first
228 # index, and then bup will ignore it.
229 tick_start = time.time()
230 time.sleep(1 - (tick_start - int(tick_start)))
232 git.check_repo_or_die()
233 indexfile = opt.indexfile or git.repo('bupindex')
238 log('check: starting initial check.\n')
239 check_index(index.Reader(indexfile))
242 log('clear: clearing index.\n')
243 clear_index(indexfile)
245 excluded_paths = parse_excludes(flags, o.fatal)
246 exclude_rxs = parse_rx_excludes(flags, o.fatal)
247 paths = index.reduce_paths(extra)
251 o.fatal('update mode (-u) requested but no paths given')
252 for (rp,path) in paths:
253 update_index(rp, excluded_paths, exclude_rxs)
255 if opt['print'] or opt.status or opt.modified:
256 for (name, ent) in index.Reader(indexfile).filter(extra or ['']):
258 and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
264 elif not ent.is_valid():
265 if ent.sha == index.EMPTY_SHA:
272 line += ent.sha.encode('hex') + ' '
274 line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode))
275 print line + (name or './')
277 if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
278 log('check: starting final check.\n')
279 check_index(index.Reader(indexfile))
282 log('WARNING: %d errors encountered.\n' % len(saved_errors))