1 #!/usr/bin/env python2.5
2 import sys, re, errno, stat, tempfile, struct, mmap, time
8 INDEX_HDR = 'BUPI\0\0\0\1'
9 INDEX_SIG = '!IIIIIQ20sH'
10 ENTLEN = struct.calcsize(INDEX_SIG)
16 class IndexError(Exception):
21 def __init__(self, path):
23 self.fd = os.open(path, os.O_RDONLY|os.O_LARGEFILE|os.O_NOFOLLOW)
24 #self.st = os.fstat(self.fd)
37 def __init__(self, name, m, ofs, tstart):
42 (self.dev, self.ctime, self.mtime, self.uid, self.gid,
44 self.flags) = struct.unpack(INDEX_SIG, buffer(m, ofs, ENTLEN))
47 return ("(%s,0x%04x,%d,%d,%d,%d,%d,0x%04x)"
48 % (self.name, self.dev,
49 self.ctime, self.mtime, self.uid, self.gid,
50 self.size, self.flags))
53 return struct.pack(INDEX_SIG, self.dev, self.ctime, self.mtime,
54 self.uid, self.gid, self.size, self.sha,
58 self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
60 def from_stat(self, st):
61 old = (self.dev, self.ctime, self.mtime,
62 self.uid, self.gid, self.size, self.flags & IX_EXISTS)
63 new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
64 st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
66 self.ctime = int(st.st_ctime)
67 self.mtime = int(st.st_mtime)
70 self.size = st.st_size
71 self.flags |= IX_EXISTS
72 if int(st.st_ctime) >= self.tstart or old != new:
73 self.flags &= ~IX_HASHVALID
79 return cmp(a.name, b.name)
83 def __init__(self, filename):
84 self.filename = filename
89 f = open(filename, 'r+')
91 if e.errno == errno.ENOENT:
96 b = f.read(len(INDEX_HDR))
98 raise IndexError('%s: header: expected %r, got %r'
99 % (filename, INDEX_HDR, b))
100 st = os.fstat(f.fileno())
102 self.m = mmap.mmap(f.fileno(), 0,
104 mmap.PROT_READ|mmap.PROT_WRITE)
105 f.close() # map will persist beyond file close
112 tstart = int(time.time())
114 while ofs < len(self.m):
115 eon = self.m.find('\0', ofs)
117 yield IxEntry(buffer(self.m, ofs, eon-ofs),
118 self.m, eon+1, tstart = tstart)
119 ofs = eon + 1 + ENTLEN
126 # Read all the iters in order; when more than one iter has the same entry,
127 # the *later* iter in the list wins. (ie. more recent iter entries replace
129 def _last_writer_wins_iter(iters):
134 l.append([it.next(), it])
135 except StopIteration:
137 del iters # to avoid accidents
142 for (i,(v,it)) in enumerate(l):
143 #log('(%d) considering %d: %r\n' % (len(l), i, v))
152 l[i][0] = l[i][1].next()
153 except StopIteration:
158 def ix_encode(st, sha, flags):
159 return struct.pack(INDEX_SIG, st.st_dev, int(st.st_ctime),
160 int(st.st_mtime), st.st_uid, st.st_gid,
161 st.st_size, sha, flags)
165 def __init__(self, filename):
170 self.filename = filename = os.path.realpath(filename)
171 (dir,name) = os.path.split(filename)
172 (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
173 self.f = os.fdopen(ffd, 'wb', 65536)
174 self.f.write(INDEX_HDR)
184 os.unlink(self.tmpname)
191 os.rename(self.tmpname, self.filename)
193 def _write(self, data):
197 def add(self, name, st, hashgen=None):
198 #log('ADDING %r\n' % name)
200 assert(cmp(self.lastfile, name) > 0) # reverse order only
207 flags |= IX_HASHVALID
210 data = name + '\0' + ix_encode(st, sha, flags)
213 def add_ixentry(self, e):
214 if self.lastfile and self.lastfile <= e.name:
215 raise IndexError('%r must come before %r'
216 % (e.name, self.lastfile))
217 self.lastfile = e.name
218 data = e.name + '\0' + e.packed()
221 def new_reader(self):
223 return IndexReader(self.tmpname)
228 saved_errors.append(e)
232 # the use of fchdir() and lstat() are for two reasons:
233 # - help out the kernel by not making it repeatedly look up the absolute path
234 # - avoid race conditions caused by doing listdir() on a changing symlink
235 def handle_path(ri, wi, dir, name, pst, xdev, can_delete_siblings):
243 #log('handle_path(%r,%r)\n' % (dir, name))
244 if stat.S_ISDIR(pst.st_mode):
245 if opt.verbose == 1: # log dirs only
246 sys.stdout.write('%s\n' % path)
249 OsFile(name).fchdir()
251 add_error(Exception('in %s: %s' % (dir, str(e))))
256 #log('* %r: %r\n' % (name, ld))
258 add_error(Exception('in %s: %s' % (path, str(e))))
265 add_error(Exception('in %s: %s' % (path, str(e))))
267 if xdev != None and st.st_dev != xdev:
268 log('Skipping %r: different filesystem.\n'
269 % os.path.realpath(p))
271 if stat.S_ISDIR(st.st_mode):
274 for p,st in reversed(sorted(lds)):
275 dirty += handle_path(ri, wi, path, p, st, xdev,
276 can_delete_siblings = True)
279 #log('endloop: ri.cur:%r path:%r\n' % (ri.cur.name, path))
280 while ri.cur and ri.cur.name > path:
281 #log('ricur:%r path:%r\n' % (ri.cur, path))
282 if can_delete_siblings and dir and ri.cur.name.startswith(dir):
283 #log(' --- deleting\n')
284 ri.cur.flags &= ~(IX_EXISTS | IX_HASHVALID)
288 if ri.cur and ri.cur.name == path:
289 dirty += ri.cur.from_stat(pst)
290 if dirty or not (ri.cur.flags & IX_HASHVALID):
291 #log(' --- updating %r\n' % path)
293 ri.cur.sha = hashgen(name)
294 ri.cur.flags |= IX_HASHVALID
298 wi.add(path, pst, hashgen = hashgen)
300 if opt.verbose > 1: # all files, not just dirs
301 sys.stdout.write('%s\n' % path)
306 def merge_indexes(out, r1, r2):
307 log('bup: merging indexes.\n')
308 for e in _last_writer_wins_iter([r1, r2]):
309 #if e.flags & IX_EXISTS:
314 def __init__(self, l):
321 self.cur = self.i.next()
322 except StopIteration:
328 if s and not s.endswith('/'):
333 def update_index(path):
334 ri = IndexReader(indexfile)
335 wi = IndexWriter(indexfile)
336 rig = MergeGetter(ri)
338 rpath = os.path.realpath(path)
347 (dir, name) = os.path.split(rpath)
348 dir = _slashappend(dir)
349 if stat.S_ISDIR(st.st_mode) and (not rpath or rpath[-1] != '/'):
351 can_delete_siblings = True
353 can_delete_siblings = False
354 OsFile(dir or '/').fchdir()
355 dirty = handle_path(rig, wi, dir, name, st, xdev, can_delete_siblings)
357 # make sure all the parents of the updated path exist and are invalidated
360 (rpath, junk) = os.path.split(rpath)
367 while rig.cur and rig.cur.name > p:
368 #log('FINISHING: %r path=%r d=%r\n' % (rig.cur.name, p, dirty))
370 if rig.cur and rig.cur.name == p:
372 rig.cur.flags &= ~IX_HASHVALID
375 wi.add(p, os.lstat(p))
382 mi = IndexWriter(indexfile)
383 merge_indexes(mi, ri, wi.new_reader())
389 bup index <-p|s|m|u> [options...] <filenames...>
391 p,print print the index entries for the given names (also works with -u)
392 m,modified print only added/deleted/modified files (implies -p)
393 s,status print each filename with a status char (A/M/D) (implies -p)
394 u,update (recursively) update the index entries for the given filenames
395 x,xdev,one-file-system don't cross filesystem boundaries
396 fake-valid mark all index entries as up-to-date even if they aren't
397 f,indexfile= the name of the index file (default 'index')
398 v,verbose increase log output (can be used more than once)
400 o = options.Options('bup index', optspec)
401 (opt, flags, extra) = o.parse(sys.argv[1:])
403 if not (opt.modified or opt['print'] or opt.status or opt.update):
404 log('bup index: you must supply one or more of -p, -s, -m, or -u\n')
406 if opt.fake_valid and not opt.update:
407 log('bup index: --fake-valid is meaningless without -u\n')
410 git.check_repo_or_die()
411 indexfile = opt.indexfile or git.repo('bupindex')
415 rp = os.path.realpath(path)
417 if stat.S_ISDIR(st.st_mode):
418 rp = _slashappend(rp)
419 path = _slashappend(path)
420 xpaths.append((rp, path))
423 for (rp, path) in reversed(sorted(xpaths)):
424 if paths and rp.endswith('/') and paths[-1][0].startswith(rp):
425 paths[-1] = (rp, path)
427 paths.append((rp, path))
431 log('bup index: update (-u) requested but no paths given\n')
433 for (rp, path) in paths:
436 if opt['print'] or opt.status or opt.modified:
437 pi = iter(paths or [(_slashappend(os.path.realpath('.')), '')])
438 (rpin, pin) = pi.next()
439 for ent in IndexReader(indexfile):
442 (rpin, pin) = pi.next()
443 except StopIteration:
444 break # no more files can possibly match
445 elif not ent.name.startswith(rpin):
446 continue # not interested
447 if opt.modified and ent.flags & IX_HASHVALID:
449 name = pin + ent.name[len(rpin):]
453 if not ent.flags & IX_EXISTS:
455 elif not ent.flags & IX_HASHVALID:
456 if ent.sha == EMPTY_SHA:
467 log('WARNING: %d errors encountered.\n' % len(saved_errors))