1 #!/usr/bin/env python2.5
2 import sys, re, errno, stat, tempfile, struct, mmap
8 INDEX_HDR = 'BUPI\0\0\0\1'
9 INDEX_SIG = '!IIIIIQ20sH'
10 ENTLEN = struct.calcsize(INDEX_SIG)
16 class IndexError(Exception):
21 def __init__(self, path):
23 self.fd = os.open(path, os.O_RDONLY|os.O_LARGEFILE|os.O_NOFOLLOW)
24 #self.st = os.fstat(self.fd)
37 def __init__(self, name, m, ofs):
41 (self.dev, self.ctime, self.mtime, self.uid, self.gid,
43 self.flags) = struct.unpack(INDEX_SIG, buffer(m, ofs, ENTLEN))
46 return ("(%s,0x%04x,%d,%d,%d,%d,%d,0x%04x)"
47 % (self.name, self.dev,
48 self.ctime, self.mtime, self.uid, self.gid,
49 self.size, self.flags))
52 return struct.pack(INDEX_SIG, self.dev, self.ctime, self.mtime,
53 self.uid, self.gid, self.size, self.sha,
57 self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
59 def from_stat(self, st):
60 old = (self.dev, self.ctime, self.mtime,
61 self.uid, self.gid, self.size, self.flags & IX_EXISTS)
62 new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
63 st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
65 self.ctime = int(st.st_ctime)
66 self.mtime = int(st.st_mtime)
69 self.size = st.st_size
70 self.flags |= IX_EXISTS
72 self.flags &= ~IX_HASHVALID
78 return cmp(a.name, b.name)
82 def __init__(self, filename):
83 self.filename = filename
88 f = open(filename, 'r+')
90 if e.errno == errno.ENOENT:
95 b = f.read(len(INDEX_HDR))
97 raise IndexError('%s: header: expected %r, got %r'
98 % (filename, INDEX_HDR, b))
99 st = os.fstat(f.fileno())
101 self.m = mmap.mmap(f.fileno(), 0,
103 mmap.PROT_READ|mmap.PROT_WRITE)
104 f.close() # map will persist beyond file close
112 while ofs < len(self.m):
113 eon = self.m.find('\0', ofs)
115 yield IxEntry(buffer(self.m, ofs, eon-ofs),
117 ofs = eon + 1 + ENTLEN
124 # Read all the iters in order; when more than one iter has the same entry,
125 # the *later* iter in the list wins. (ie. more recent iter entries replace
127 def _last_writer_wins_iter(iters):
132 l.append([it.next(), it])
133 except StopIteration:
135 del iters # to avoid accidents
140 for (i,(v,it)) in enumerate(l):
141 #log('(%d) considering %d: %r\n' % (len(l), i, v))
150 l[i][0] = l[i][1].next()
151 except StopIteration:
156 def ix_encode(st, sha, flags):
157 return struct.pack(INDEX_SIG, st.st_dev, int(st.st_ctime),
158 int(st.st_mtime), st.st_uid, st.st_gid,
159 st.st_size, sha, flags)
163 def __init__(self, filename):
167 self.filename = filename = os.path.realpath(filename)
168 (dir,name) = os.path.split(filename)
169 (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
170 self.f = os.fdopen(ffd, 'wb', 65536)
171 self.f.write(INDEX_HDR)
181 os.unlink(self.tmpname)
188 os.rename(self.tmpname, self.filename)
190 def add(self, name, st, hashgen=None):
191 #log('ADDING %r\n' % name)
193 assert(cmp(self.lastfile, name) > 0) # reverse order only
200 flags |= IX_HASHVALID
203 data = name + '\0' + ix_encode(st, sha, flags)
206 def add_ixentry(self, e):
207 if self.lastfile and self.lastfile <= e.name:
208 raise IndexError('%r must come before %r'
209 % (e.name, self.lastfile))
210 self.lastfile = e.name
211 data = e.name + '\0' + e.packed()
214 def new_reader(self):
216 return IndexReader(self.tmpname)
221 saved_errors.append(e)
225 # the use of fchdir() and lstat() are for two reasons:
226 # - help out the kernel by not making it repeatedly look up the absolute path
227 # - avoid race conditions caused by doing listdir() on a changing symlink
228 def handle_path(ri, wi, dir, name, pst, xdev, can_delete_siblings):
236 #log('handle_path(%r,%r)\n' % (dir, name))
237 if stat.S_ISDIR(pst.st_mode):
238 if opt.verbose == 1: # log dirs only
239 sys.stdout.write('%s\n' % path)
242 OsFile(name).fchdir()
244 add_error(Exception('in %s: %s' % (dir, str(e))))
249 #log('* %r: %r\n' % (name, ld))
251 add_error(Exception('in %s: %s' % (path, str(e))))
258 add_error(Exception('in %s: %s' % (path, str(e))))
260 if xdev != None and st.st_dev != xdev:
261 log('Skipping %r: different filesystem.\n'
262 % os.path.realpath(p))
264 if stat.S_ISDIR(st.st_mode):
267 for p,st in reversed(sorted(lds)):
268 dirty += handle_path(ri, wi, path, p, st, xdev,
269 can_delete_siblings = True)
272 #log('endloop: ri.cur:%r path:%r\n' % (ri.cur.name, path))
273 while ri.cur and ri.cur.name > path:
274 #log('ricur:%r path:%r\n' % (ri.cur, path))
275 if can_delete_siblings and dir and ri.cur.name.startswith(dir):
276 #log(' --- deleting\n')
277 ri.cur.flags &= ~(IX_EXISTS | IX_HASHVALID)
281 if ri.cur and ri.cur.name == path:
282 dirty += ri.cur.from_stat(pst)
283 if dirty or not (ri.cur.flags & IX_HASHVALID):
284 #log(' --- updating %r\n' % path)
286 ri.cur.sha = hashgen(name)
287 ri.cur.flags |= IX_HASHVALID
291 wi.add(path, pst, hashgen = hashgen)
293 if opt.verbose > 1: # all files, not just dirs
294 sys.stdout.write('%s\n' % path)
299 def merge_indexes(out, r1, r2):
300 log('Merging indexes.\n')
301 for e in _last_writer_wins_iter([r1, r2]):
302 #if e.flags & IX_EXISTS:
307 def __init__(self, l):
314 self.cur = self.i.next()
315 except StopIteration:
320 def update_index(path):
321 ri = IndexReader(indexfile)
322 wi = IndexWriter(indexfile)
323 rig = MergeGetter(ri)
325 rpath = os.path.realpath(path)
334 (dir, name) = os.path.split(rpath)
335 if dir and dir[-1] != '/':
337 if stat.S_ISDIR(st.st_mode) and (not rpath or rpath[-1] != '/'):
339 can_delete_siblings = True
341 can_delete_siblings = False
342 OsFile(dir or '/').fchdir()
343 dirty = handle_path(rig, wi, dir, name, st, xdev, can_delete_siblings)
345 # make sure all the parents of the updated path exist and are invalidated
348 (rpath, junk) = os.path.split(rpath)
355 while rig.cur and rig.cur.name > p:
356 #log('FINISHING: %r path=%r d=%r\n' % (rig.cur.name, p, dirty))
358 if rig.cur and rig.cur.name == p:
360 rig.cur.flags &= ~IX_HASHVALID
363 wi.add(p, os.lstat(p))
369 mi = IndexWriter(indexfile)
370 merge_indexes(mi, ri, wi.new_reader())
376 bup index [options...] <filenames...>
378 p,print print index after updating
379 m,modified print only modified files (implies -p)
380 x,xdev,one-file-system don't cross filesystem boundaries
381 fake-valid mark all index entries as up-to-date even if they aren't
382 f,indexfile= the name of the index file (default 'index')
383 s,status print each filename with a status char (A/M/D) (implies -p)
384 v,verbose increase log output (can be used more than once)
386 o = options.Options('bup index', optspec)
387 (opt, flags, extra) = o.parse(sys.argv[1:])
389 indexfile = opt.indexfile or 'index'
393 rp = os.path.realpath(path)
395 if stat.S_ISDIR(st.st_mode) and not rp.endswith('/'):
400 for path in reversed(sorted(xpaths)):
401 if paths and path.endswith('/') and paths[-1].startswith(path):
409 if opt.fake_valid and not extra:
410 mi = IndexWriter(indexfile)
411 merge_indexes(mi, IndexReader(indexfile),
412 IndexWriter(indexfile).new_reader())
415 if opt['print'] or opt.status or opt.modified:
416 for ent in IndexReader(indexfile):
417 if opt.modified and ent.flags & IX_HASHVALID:
420 if not ent.flags & IX_EXISTS:
421 print 'D ' + ent.name
422 elif not ent.flags & IX_HASHVALID:
423 if ent.sha == EMPTY_SHA:
424 print 'A ' + ent.name
426 print 'M ' + ent.name
434 log('WARNING: %d errors encountered.\n' % len(saved_errors))