3 import sys, stat, time, os, errno, re
4 from bup import metadata, options, git, index, drecurse, hlinkdb
5 from bup.helpers import *
6 from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
16 self.cur = self.i.next()
22 def check_index(reader):
24 log('check: checking forward iteration...\n')
27 for e in reader.forward_iter():
30 log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
32 assert(e.children_ofs)
33 assert(e.name.endswith('/'))
34 assert(not d.get(e.children_ofs))
36 if e.flags & index.IX_HASHVALID:
37 assert(e.sha != index.EMPTY_SHA)
39 assert(not e or e.name == '/') # last entry is *always* /
40 log('check: checking normal iteration...\n')
47 log('index error! at %r\n' % e)
49 log('check: passed.\n')
52 def clear_index(indexfile):
53 indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink']
54 for indexfile in indexfiles:
55 path = git.repo(indexfile)
59 log('clear: removed %s\n' % path)
61 if e.errno != errno.ENOENT:
65 def update_index(top, excluded_paths):
66 # tmax and start must be epoch nanoseconds.
67 tmax = (time.time() - 1) * 10**9
68 ri = index.Reader(indexfile)
69 msw = index.MetaStoreWriter(indexfile + '.meta')
70 wi = index.Writer(indexfile, msw, tmax)
71 rig = IterHelper(ri.iter(name=top))
72 tstart = int(time.time()) * 10**9
74 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')
79 return (GIT_MODE_FILE, index.FAKE_SHA)
82 bup_dir = os.path.abspath(git.repo())
83 for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
85 excluded_paths=excluded_paths,
86 exclude_rxs=exclude_rxs):
87 if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
88 sys.stdout.write('%s\n' % path)
90 qprogress('Indexing: %d\r' % total)
91 elif not (total % 128):
92 qprogress('Indexing: %d\r' % total)
94 while rig.cur and rig.cur.name > path: # deleted paths
98 if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
99 hlinks.del_path(rig.cur.name)
101 if rig.cur and rig.cur.name == path: # paths that already existed
102 if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
103 hlinks.del_path(rig.cur.name)
104 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
105 hlinks.add_path(path, pst.st_dev, pst.st_ino)
106 meta = metadata.from_path(path, statinfo=pst)
107 # Clear these so they don't bloat the store -- they're
108 # already in the index (since they vary a lot and they're
109 # fixed length). If you've noticed "tmax", you might
110 # wonder why it's OK to do this, since that code may
111 # adjust (mangle) the index mtime and ctime -- producing
112 # fake values which must not end up in a .bupm. However,
113 # it looks like that shouldn't be possible: (1) When
114 # "save" validates the index entry, it always reads the
115 # metadata from the filesytem. (2) Metadata is only
116 # read/used from the index if hashvalid is true. (3) index
117 # always invalidates "faked" entries, because "old != new"
119 meta.ctime = meta.mtime = meta.atime = 0
120 meta_ofs = msw.store(meta)
121 rig.cur.from_stat(pst, meta_ofs, tstart,
122 check_device=opt.check_device)
123 if not (rig.cur.flags & index.IX_HASHVALID):
125 (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
126 rig.cur.flags |= index.IX_HASHVALID
132 meta = metadata.from_path(path, statinfo=pst)
133 # See same assignment to 0, above, for rationale.
134 meta.atime = meta.mtime = meta.ctime = 0
135 meta_ofs = msw.store(meta)
136 wi.add(path, pst, meta_ofs, hashgen = hashgen)
137 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
138 hlinks.add_path(path, pst.st_dev, pst.st_ino)
140 progress('Indexing: %d, done.\n' % total)
142 hlinks.prepare_save()
150 log('check: before merging: oldfile\n')
152 log('check: before merging: newfile\n')
154 mi = index.Writer(indexfile, msw, tmax)
156 for e in index.merge(ri, wr):
157 # FIXME: shouldn't we remove deleted entries eventually? When?
172 bup index <-p|m|s|u> [options...] <filenames...>
175 p,print print the index entries for the given names (also works with -u)
176 m,modified print only added/deleted/modified files (implies -p)
177 s,status print each filename with a status char (A/M/D) (implies -p)
178 u,update recursively update the index entries for the given file/dir names (default if no mode is specified)
179 check carefully check index file integrity
180 clear clear the index
182 H,hash print the hash for each object next to its name
183 l,long print more information about each file
184 no-check-device don't invalidate an entry if the containing device changes
185 fake-valid mark all index entries as up-to-date even if they aren't
186 fake-invalid mark all index entries as invalid
187 f,indexfile= the name of the index file (normally BUP_DIR/bupindex)
188 exclude= a path to exclude from the backup (can be used more than once)
189 exclude-from= a file that contains exclude paths (can be used more than once)
190 exclude-rx= skip paths that match the unanchored regular expression
191 v,verbose increase log output (can be used more than once)
192 x,xdev,one-file-system don't cross filesystem boundaries
194 o = options.Options(optspec)
195 (opt, flags, extra) = o.parse(sys.argv[1:])
197 if not (opt.modified or \
204 if (opt.fake_valid or opt.fake_invalid) and not opt.update:
205 o.fatal('--fake-{in,}valid are meaningless without -u')
206 if opt.fake_valid and opt.fake_invalid:
207 o.fatal('--fake-valid is incompatible with --fake-invalid')
209 # FIXME: remove this once we account for timestamp races, i.e. index;
210 # touch new-file; index. It's possible for this to happen quickly
211 # enough that new-file ends up with the same timestamp as the first
212 # index, and then bup will ignore it.
213 tick_start = time.time()
214 time.sleep(1 - (tick_start - int(tick_start)))
216 git.check_repo_or_die()
217 indexfile = opt.indexfile or git.repo('bupindex')
222 log('check: starting initial check.\n')
223 check_index(index.Reader(indexfile))
226 log('clear: clearing index.\n')
227 clear_index(indexfile)
229 excluded_paths = parse_excludes(flags, o.fatal)
230 exclude_rxs = parse_rx_excludes(flags, o.fatal)
231 paths = index.reduce_paths(extra)
235 o.fatal('update mode (-u) requested but no paths given')
236 for (rp,path) in paths:
237 update_index(rp, excluded_paths)
239 if opt['print'] or opt.status or opt.modified:
240 for (name, ent) in index.Reader(indexfile).filter(extra or ['']):
242 and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
248 elif not ent.is_valid():
249 if ent.sha == index.EMPTY_SHA:
256 line += ent.sha.encode('hex') + ' '
258 line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode))
259 print line + (name or './')
261 if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
262 log('check: starting final check.\n')
263 check_index(index.Reader(indexfile))
266 log('WARNING: %d errors encountered.\n' % len(saved_errors))