3 import sys, stat, time, os, errno, re
4 from bup import metadata, options, git, index, drecurse, hlinkdb
5 from bup.helpers import *
6 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
16 self.cur = self.i.next()
22 def check_index(reader):
24 log('check: checking forward iteration...\n')
27 for e in reader.forward_iter():
30 log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
32 assert(e.children_ofs)
33 assert(e.name.endswith('/'))
34 assert(not d.get(e.children_ofs))
36 if e.flags & index.IX_HASHVALID:
37 assert(e.sha != index.EMPTY_SHA)
39 assert(not e or e.name == '/') # last entry is *always* /
40 log('check: checking normal iteration...\n')
47 log('index error! at %r\n' % e)
49 log('check: passed.\n')
52 def clear_index(indexfile):
53 indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink']
54 for indexfile in indexfiles:
55 path = git.repo(indexfile)
59 log('clear: removed %s\n' % path)
61 if e.errno != errno.ENOENT:
65 def update_index(top, excluded_paths, exclude_rxs):
66 # tmax and start must be epoch nanoseconds.
67 tmax = (time.time() - 1) * 10**9
68 ri = index.Reader(indexfile)
69 msw = index.MetaStoreWriter(indexfile + '.meta')
70 wi = index.Writer(indexfile, msw, tmax)
71 rig = IterHelper(ri.iter(name=top))
72 tstart = int(time.time()) * 10**9
74 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')
79 return (GIT_MODE_FILE, index.FAKE_SHA)
82 bup_dir = os.path.abspath(git.repo())
83 index_start = time.time()
84 for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
86 excluded_paths=excluded_paths,
87 exclude_rxs=exclude_rxs):
88 if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
89 sys.stdout.write('%s\n' % path)
91 elapsed = time.time() - index_start
92 paths_per_sec = total / elapsed if elapsed else 0
93 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
94 elif not (total % 128):
95 elapsed = time.time() - index_start
96 paths_per_sec = total / elapsed if elapsed else 0
97 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
99 while rig.cur and rig.cur.name > path: # deleted paths
101 rig.cur.set_deleted()
103 if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
104 hlinks.del_path(rig.cur.name)
106 if rig.cur and rig.cur.name == path: # paths that already existed
108 meta = metadata.from_path(path, statinfo=pst)
109 except (OSError, IOError), e:
113 if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
114 hlinks.del_path(rig.cur.name)
115 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
116 hlinks.add_path(path, pst.st_dev, pst.st_ino)
117 # Clear these so they don't bloat the store -- they're
118 # already in the index (since they vary a lot and they're
119 # fixed length). If you've noticed "tmax", you might
120 # wonder why it's OK to do this, since that code may
121 # adjust (mangle) the index mtime and ctime -- producing
122 # fake values which must not end up in a .bupm. However,
123 # it looks like that shouldn't be possible: (1) When
124 # "save" validates the index entry, it always reads the
125 # metadata from the filesytem. (2) Metadata is only
126 # read/used from the index if hashvalid is true. (3) index
127 # always invalidates "faked" entries, because "old != new"
129 meta.ctime = meta.mtime = meta.atime = 0
130 meta_ofs = msw.store(meta)
131 rig.cur.from_stat(pst, meta_ofs, tstart,
132 check_device=opt.check_device)
133 if not (rig.cur.flags & index.IX_HASHVALID):
135 (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
136 rig.cur.flags |= index.IX_HASHVALID
143 meta = metadata.from_path(path, statinfo=pst)
144 except (OSError, IOError), e:
147 # See same assignment to 0, above, for rationale.
148 meta.atime = meta.mtime = meta.ctime = 0
149 meta_ofs = msw.store(meta)
150 wi.add(path, pst, meta_ofs, hashgen = hashgen)
151 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
152 hlinks.add_path(path, pst.st_dev, pst.st_ino)
154 elapsed = time.time() - index_start
155 paths_per_sec = total / elapsed if elapsed else 0
156 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
158 hlinks.prepare_save()
166 log('check: before merging: oldfile\n')
168 log('check: before merging: newfile\n')
170 mi = index.Writer(indexfile, msw, tmax)
172 for e in index.merge(ri, wr):
173 # FIXME: shouldn't we remove deleted entries eventually? When?
188 bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
191 p,print print the index entries for the given names (also works with -u)
192 m,modified print only added/deleted/modified files (implies -p)
193 s,status print each filename with a status char (A/M/D) (implies -p)
194 u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
195 check carefully check index file integrity
196 clear clear the default index
198 H,hash print the hash for each object next to its name
199 l,long print more information about each file
200 no-check-device don't invalidate an entry if the containing device changes
201 fake-valid mark all index entries as up-to-date even if they aren't
202 fake-invalid mark all index entries as invalid
203 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
204 exclude= a path to exclude from the backup (may be repeated)
205 exclude-from= skip --exclude paths in file (may be repeated)
206 exclude-rx= skip paths matching the unanchored regex (may be repeated)
207 exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
208 v,verbose increase log output (can be used more than once)
209 x,xdev,one-file-system don't cross filesystem boundaries
211 o = options.Options(optspec)
212 (opt, flags, extra) = o.parse(sys.argv[1:])
214 if not (opt.modified or \
221 if (opt.fake_valid or opt.fake_invalid) and not opt.update:
222 o.fatal('--fake-{in,}valid are meaningless without -u')
223 if opt.fake_valid and opt.fake_invalid:
224 o.fatal('--fake-valid is incompatible with --fake-invalid')
225 if opt.clear and opt.indexfile:
226 o.fatal('cannot clear an external index (via -f)')
228 # FIXME: remove this once we account for timestamp races, i.e. index;
229 # touch new-file; index. It's possible for this to happen quickly
230 # enough that new-file ends up with the same timestamp as the first
231 # index, and then bup will ignore it.
232 tick_start = time.time()
233 time.sleep(1 - (tick_start - int(tick_start)))
235 git.check_repo_or_die()
236 indexfile = opt.indexfile or git.repo('bupindex')
241 log('check: starting initial check.\n')
242 check_index(index.Reader(indexfile))
245 log('clear: clearing index.\n')
246 clear_index(indexfile)
248 excluded_paths = parse_excludes(flags, o.fatal)
249 exclude_rxs = parse_rx_excludes(flags, o.fatal)
250 paths = index.reduce_paths(extra)
254 o.fatal('update mode (-u) requested but no paths given')
255 for (rp,path) in paths:
256 update_index(rp, excluded_paths, exclude_rxs)
258 if opt['print'] or opt.status or opt.modified:
259 for (name, ent) in index.Reader(indexfile).filter(extra or ['']):
261 and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
267 elif not ent.is_valid():
268 if ent.sha == index.EMPTY_SHA:
275 line += ent.sha.encode('hex') + ' '
277 line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode))
278 print line + (name or './')
280 if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
281 log('check: starting final check.\n')
282 check_index(index.Reader(indexfile))
285 log('WARNING: %d errors encountered.\n' % len(saved_errors))