1 #!/usr/bin/env python2.5
2 import sys, re, errno, stat, tempfile, struct, mmap
8 INDEX_HDR = 'BUPI\0\0\0\1'
9 INDEX_SIG = '!IIIIIQ20sH'
10 ENTLEN = struct.calcsize(INDEX_SIG)
16 class IndexError(Exception):
21 def __init__(self, path):
23 self.fd = os.open(path, os.O_RDONLY|os.O_LARGEFILE|os.O_NOFOLLOW)
24 #self.st = os.fstat(self.fd)
37 def __init__(self, name, m, ofs):
41 (self.dev, self.ctime, self.mtime, self.uid, self.gid,
43 self.flags) = struct.unpack(INDEX_SIG, buffer(m, ofs, ENTLEN))
46 return ("(%s,0x%04x,%d,%d,%d,%d,%d,0x%04x)"
47 % (self.name, self.dev,
48 self.ctime, self.mtime, self.uid, self.gid,
49 self.size, self.flags))
52 return struct.pack(INDEX_SIG, self.dev, self.ctime, self.mtime,
53 self.uid, self.gid, self.size, self.sha,
57 self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
59 def from_stat(self, st):
60 old = (self.dev, self.ctime, self.mtime,
61 self.uid, self.gid, self.size, self.flags & IX_EXISTS)
62 new = (st.st_dev, int(st.st_ctime), int(st.st_mtime),
63 st.st_uid, st.st_gid, st.st_size, IX_EXISTS)
65 self.ctime = int(st.st_ctime)
66 self.mtime = int(st.st_mtime)
69 self.size = st.st_size
70 self.flags |= IX_EXISTS
72 self.flags &= ~IX_HASHVALID
78 return cmp(a.name, b.name)
82 def __init__(self, filename):
83 self.filename = filename
88 f = open(filename, 'r+')
90 if e.errno == errno.ENOENT:
95 b = f.read(len(INDEX_HDR))
97 raise IndexError('%s: header: expected %r, got %r'
98 % (filename, INDEX_HDR, b))
99 st = os.fstat(f.fileno())
101 self.m = mmap.mmap(f.fileno(), 0,
103 mmap.PROT_READ|mmap.PROT_WRITE)
104 f.close() # map will persist beyond file close
112 while ofs < len(self.m):
113 eon = self.m.find('\0', ofs)
115 yield IxEntry(buffer(self.m, ofs, eon-ofs),
117 ofs = eon + 1 + ENTLEN
124 # Read all the iters in order; when more than one iter has the same entry,
125 # the *later* iter in the list wins. (ie. more recent iter entries replace
127 def _last_writer_wins_iter(iters):
132 l.append([it.next(), it])
133 except StopIteration:
135 del iters # to avoid accidents
140 for (i,(v,it)) in enumerate(l):
141 #log('(%d) considering %d: %r\n' % (len(l), i, v))
150 l[i][0] = l[i][1].next()
151 except StopIteration:
156 def ix_encode(st, sha, flags):
157 return struct.pack(INDEX_SIG, st.st_dev, int(st.st_ctime),
158 int(st.st_mtime), st.st_uid, st.st_gid,
159 st.st_size, sha, flags)
163 def __init__(self, filename):
168 self.filename = filename = os.path.realpath(filename)
169 (dir,name) = os.path.split(filename)
170 (ffd,self.tmpname) = tempfile.mkstemp('.tmp', filename, dir)
171 self.f = os.fdopen(ffd, 'wb', 65536)
172 self.f.write(INDEX_HDR)
182 os.unlink(self.tmpname)
189 os.rename(self.tmpname, self.filename)
191 def _write(self, data):
195 def add(self, name, st, hashgen=None):
196 #log('ADDING %r\n' % name)
198 assert(cmp(self.lastfile, name) > 0) # reverse order only
205 flags |= IX_HASHVALID
208 data = name + '\0' + ix_encode(st, sha, flags)
211 def add_ixentry(self, e):
212 if self.lastfile and self.lastfile <= e.name:
213 raise IndexError('%r must come before %r'
214 % (e.name, self.lastfile))
215 self.lastfile = e.name
216 data = e.name + '\0' + e.packed()
219 def new_reader(self):
221 return IndexReader(self.tmpname)
226 saved_errors.append(e)
230 # the use of fchdir() and lstat() are for two reasons:
231 # - help out the kernel by not making it repeatedly look up the absolute path
232 # - avoid race conditions caused by doing listdir() on a changing symlink
233 def handle_path(ri, wi, dir, name, pst, xdev, can_delete_siblings):
241 #log('handle_path(%r,%r)\n' % (dir, name))
242 if stat.S_ISDIR(pst.st_mode):
243 if opt.verbose == 1: # log dirs only
244 sys.stdout.write('%s\n' % path)
247 OsFile(name).fchdir()
249 add_error(Exception('in %s: %s' % (dir, str(e))))
254 #log('* %r: %r\n' % (name, ld))
256 add_error(Exception('in %s: %s' % (path, str(e))))
263 add_error(Exception('in %s: %s' % (path, str(e))))
265 if xdev != None and st.st_dev != xdev:
266 log('Skipping %r: different filesystem.\n'
267 % os.path.realpath(p))
269 if stat.S_ISDIR(st.st_mode):
272 for p,st in reversed(sorted(lds)):
273 dirty += handle_path(ri, wi, path, p, st, xdev,
274 can_delete_siblings = True)
277 #log('endloop: ri.cur:%r path:%r\n' % (ri.cur.name, path))
278 while ri.cur and ri.cur.name > path:
279 #log('ricur:%r path:%r\n' % (ri.cur, path))
280 if can_delete_siblings and dir and ri.cur.name.startswith(dir):
281 #log(' --- deleting\n')
282 ri.cur.flags &= ~(IX_EXISTS | IX_HASHVALID)
286 if ri.cur and ri.cur.name == path:
287 dirty += ri.cur.from_stat(pst)
288 if dirty or not (ri.cur.flags & IX_HASHVALID):
289 #log(' --- updating %r\n' % path)
291 ri.cur.sha = hashgen(name)
292 ri.cur.flags |= IX_HASHVALID
296 wi.add(path, pst, hashgen = hashgen)
298 if opt.verbose > 1: # all files, not just dirs
299 sys.stdout.write('%s\n' % path)
304 def merge_indexes(out, r1, r2):
305 log('bup: merging indexes.\n')
306 for e in _last_writer_wins_iter([r1, r2]):
307 #if e.flags & IX_EXISTS:
312 def __init__(self, l):
319 self.cur = self.i.next()
320 except StopIteration:
325 def update_index(path):
326 ri = IndexReader(indexfile)
327 wi = IndexWriter(indexfile)
328 rig = MergeGetter(ri)
330 rpath = os.path.realpath(path)
339 (dir, name) = os.path.split(rpath)
340 if dir and dir[-1] != '/':
342 if stat.S_ISDIR(st.st_mode) and (not rpath or rpath[-1] != '/'):
344 can_delete_siblings = True
346 can_delete_siblings = False
347 OsFile(dir or '/').fchdir()
348 dirty = handle_path(rig, wi, dir, name, st, xdev, can_delete_siblings)
350 # make sure all the parents of the updated path exist and are invalidated
353 (rpath, junk) = os.path.split(rpath)
360 while rig.cur and rig.cur.name > p:
361 #log('FINISHING: %r path=%r d=%r\n' % (rig.cur.name, p, dirty))
363 if rig.cur and rig.cur.name == p:
365 rig.cur.flags &= ~IX_HASHVALID
368 wi.add(p, os.lstat(p))
375 mi = IndexWriter(indexfile)
376 merge_indexes(mi, ri, wi.new_reader())
382 bup index <-p|s|m|u> [options...] <filenames...>
384 p,print print the index entries for the given names (also works with -u)
385 m,modified print only added/deleted/modified files (implies -p)
386 s,status print each filename with a status char (A/M/D) (implies -p)
387 u,update (recursively) update the index entries for the given filenames
388 x,xdev,one-file-system don't cross filesystem boundaries
389 fake-valid mark all index entries as up-to-date even if they aren't
390 f,indexfile= the name of the index file (default 'index')
391 v,verbose increase log output (can be used more than once)
393 o = options.Options('bup index', optspec)
394 (opt, flags, extra) = o.parse(sys.argv[1:])
396 if not (opt.modified or opt['print'] or opt.status or opt.update):
397 log('bup index: you must supply one or more of -p, -s, -m, or -u\n')
399 if opt.fake_valid and not opt.update:
400 log('bup index: --fake-valid is meaningless without -u\n')
403 git.check_repo_or_die()
404 indexfile = opt.indexfile or git.repo('bupindex')
408 rp = os.path.realpath(path)
410 if stat.S_ISDIR(st.st_mode) and not rp.endswith('/'):
413 xpaths.append((rp, path))
416 for (rp, path) in reversed(sorted(xpaths)):
417 if paths and rp.endswith('/') and paths[-1][0].startswith(rp):
418 paths[-1] = (rp, path)
420 paths.append((rp, path))
424 log('bup index: update (-u) requested but no paths given\n')
426 for (rp, path) in paths:
429 if opt['print'] or opt.status or opt.modified:
430 pi = iter(paths or [('/', '/')])
431 (rpin, pin) = pi.next()
432 for ent in IndexReader(indexfile):
435 (rpin, pin) = pi.next()
436 except StopIteration:
437 break # no more files can possibly match
438 elif not ent.name.startswith(rpin):
439 continue # not interested
440 if opt.modified and ent.flags & IX_HASHVALID:
442 name = pin + ent.name[len(rpin):]
444 if not ent.flags & IX_EXISTS:
446 elif not ent.flags & IX_HASHVALID:
447 if ent.sha == EMPTY_SHA:
458 log('WARNING: %d errors encountered.\n' % len(saved_errors))