2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 * Please read the file COPYING, README and AUTHORS for more information.
8 * I/O abstraction interface.
9 * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
15 static char UNUSED id[] = "$Id: io.c,v 1.16 2006/07/23 23:11:44 alex Exp $";
21 #include <sys/types.h>
29 /* Enables extra debug messages in event add/delete/callback code. */
30 /* #define DEBUG_IO */
33 void (*callback)(int, short);
37 #define INIT_IOEVENT { NULL, -1, 0, NULL }
40 #ifdef HAVE_EPOLL_CREATE
41 #define IO_USE_EPOLL 1
44 #define IO_USE_KQUEUE 1
46 #define IO_USE_SELECT 1
50 static bool library_initialized;
53 #include <sys/epoll.h>
55 static int io_masterfd;
56 static bool io_event_change_epoll(int fd, short what, const int action);
57 static int io_dispatch_epoll(struct timeval *tv);
61 #include <sys/types.h>
62 #include <sys/event.h>
63 static array io_evcache;
64 static int io_masterfd;
66 static int io_dispatch_kqueue(struct timeval *tv);
67 static bool io_event_change_kqueue(int, short, const int action);
71 #include "defines.h" /* for conn.h */
72 #include "conn.h" /* for CONN_IDX (needed by resolve.h) */
73 #include "resolve.h" /* for RES_STAT (needed by conf.h) */
74 #include "conf.h" /* for Conf_MaxConnections */
76 static fd_set readers;
77 static fd_set writers;
78 static int select_maxfd; /* the select() interface sucks badly */
79 static int io_dispatch_select(struct timeval *tv);
82 static array io_events;
84 static void io_docallback PARAMS((int fd, short what));
93 i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
102 io_library_init(unsigned int eventsize)
104 #if defined(IO_USE_EPOLL) || defined(IO_USE_KQUEUE)
108 int ecreate_hint = (int)eventsize;
109 if (ecreate_hint <= 0)
112 if (library_initialized)
117 if (eventsize >= FD_SETSIZE)
118 eventsize = FD_SETSIZE - 1;
121 if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
125 io_masterfd = epoll_create(ecreate_hint);
127 "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
128 ecreate_hint, eventsize, io_masterfd);
129 ret = io_masterfd >= 0;
130 if (ret) library_initialized = true;
135 Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
140 if (Conf_MaxConnections >= (int)FD_SETSIZE) {
142 "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
143 Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
145 Conf_MaxConnections = FD_SETSIZE - 1;
149 "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
150 #endif /* FD_SETSIZE */
151 library_initialized = true;
155 io_masterfd = kqueue();
158 "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
159 eventsize, io_masterfd);
160 ret = io_masterfd >= 0;
161 if (ret) library_initialized = true;
169 io_library_shutdown(void)
175 close(io_masterfd); /* kqueue, epoll */
179 array_free(&io_evcache);
181 library_initialized = false;
186 io_event_setcb(int fd, void (*cbfunc) (int, short))
188 io_event *i = io_event_get(fd);
192 i->callback = cbfunc;
198 io_event_create(int fd, short what, void (*cbfunc) (int, short))
207 if (fd >= FD_SETSIZE) {
209 "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
213 #endif /* FD_SETSIZE */
214 #endif /* IO_USE_SELECT */
216 i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
219 "array_alloc failed: could not allocate space for %d io_event structures",
224 i->callback = cbfunc;
227 ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
230 ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
233 ret = io_event_add(fd, what);
235 if (ret) i->what = what;
242 io_event_change_epoll(int fd, short what, const int action)
244 struct epoll_event ev = { 0, {0} };
247 if (what & IO_WANTREAD)
248 ev.events = EPOLLIN | EPOLLPRI;
249 if (what & IO_WANTWRITE)
250 ev.events |= EPOLLOUT;
252 return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
258 io_event_kqueue_commit_cache(void)
260 struct kevent *events;
262 int len = (int) array_length(&io_evcache, sizeof (struct kevent));
264 if (!len) /* nothing to do */
270 array_free(&io_evcache);
274 events = array_start(&io_evcache);
276 assert(events != NULL);
278 ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
280 array_trunc(&io_evcache);
286 io_event_change_kqueue(int fd, short what, const int action)
291 if (what & IO_WANTREAD) {
292 EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
293 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
295 ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
298 if (ret && (what & IO_WANTWRITE)) {
299 EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
300 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
302 ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
305 if (array_length(&io_evcache, sizeof kev) >= 100)
306 io_event_kqueue_commit_cache();
313 io_event_add(int fd, short what)
315 io_event *i = io_event_get(fd);
317 if (!i) return false;
318 if (i->what == what) return true;
320 Log(LOG_DEBUG, "io_event_add(): fd %d (arg: %d), what %d.", i->fd, fd, what);
324 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
328 return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
332 if (fd > select_maxfd)
335 if (what & IO_WANTREAD)
336 FD_SET(fd, &readers);
337 if (what & IO_WANTWRITE)
338 FD_SET(fd, &writers);
346 io_setnonblock(int fd)
348 int flags = fcntl(fd, F_GETFL);
353 #define O_NONBLOCK O_NDELAY
357 return fcntl(fd, F_SETFL, flags) == 0;
366 FD_CLR(fd, &writers);
367 FD_CLR(fd, &readers);
369 if (fd == select_maxfd) {
370 while (select_maxfd>0) {
371 --select_maxfd; /* find largest fd */
372 i = io_event_get(select_maxfd);
373 if (i && i->callback) break;
377 i = io_event_get(fd);
379 if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
380 io_event_kqueue_commit_cache();
382 /* both kqueue and epoll remove fd from all sets automatically on the last close
383 * of the descriptor. since we don't know if this is the last close we'll have
384 * to remove the set explicitly. */
386 io_event_change_kqueue(fd, i->what, EV_DELETE);
387 io_event_kqueue_commit_cache();
391 io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
397 return close(fd) == 0;
402 io_event_del(int fd, short what)
404 io_event *i = io_event_get(fd);
406 Log(LOG_DEBUG, "io_event_del(): trying to delete eventtype %d on fd %d", what, fd);
408 if (!i) return false;
413 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
417 return io_event_change_kqueue(fd, what, EV_DISABLE);
420 if (what & IO_WANTWRITE)
421 FD_CLR(fd, &writers);
423 if (what & IO_WANTREAD)
424 FD_CLR(fd, &readers);
433 io_dispatch_select(struct timeval *tv)
435 fd_set readers_tmp = readers;
436 fd_set writers_tmp = writers;
440 ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
446 for (i = 0; i <= select_maxfd; i++) {
448 if (FD_ISSET(i, &readers_tmp)) {
453 if (FD_ISSET(i, &writers_tmp)) {
454 what |= IO_WANTWRITE;
458 io_docallback(i, what);
470 io_dispatch_epoll(struct timeval *tv)
472 time_t sec = tv->tv_sec * 1000;
473 int i, total = 0, ret, timeout = tv->tv_usec + sec;
474 struct epoll_event epoll_ev[100];
481 ret = epoll_wait(io_masterfd, epoll_ev, 100, timeout);
486 for (i = 0; i < ret; i++) {
488 if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
491 if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
494 if (epoll_ev[i].events & EPOLLOUT)
495 type |= IO_WANTWRITE;
497 io_docallback(epoll_ev[i].data.fd, type);
501 } while (ret == 100);
510 io_dispatch_kqueue(struct timeval *tv)
512 int i, total = 0, ret;
513 struct kevent kev[100];
514 struct kevent *newevents;
517 ts.tv_sec = tv->tv_sec;
518 ts.tv_nsec = tv->tv_usec * 1000;
521 newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
522 newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
523 assert(newevents_len >= 0);
524 if (newevents_len < 0)
528 assert(newevents != NULL);
530 ret = kevent(io_masterfd, newevents, newevents_len, kev,
532 if ((newevents_len>0) && ret != -1)
533 array_trunc(&io_evcache);
539 for (i = 0; i < ret; i++) {
540 if (kev[i].flags & EV_EOF) {
542 LogDebug("kev.flag has EV_EOF set, setting IO_ERROR",
543 kev[i].filter, kev[i].ident);
545 io_docallback((int)kev[i].ident, IO_ERROR);
549 switch (kev[i].filter) {
551 io_docallback((int)kev[i].ident, IO_WANTREAD);
554 io_docallback((int)kev[i].ident, IO_WANTWRITE);
558 LogDebug("Unknown kev.filter number %d for fd %d",
559 kev[i].filter, kev[i].ident); /* Fall through */
562 io_docallback((int)kev[i].ident, IO_ERROR);
568 } while (ret == 100);
576 io_dispatch(struct timeval *tv)
579 return io_dispatch_select(tv);
582 return io_dispatch_kqueue(tv);
585 return io_dispatch_epoll(tv);
590 /* call the callback function inside the struct matching fd */
592 io_docallback(int fd, short what)
596 Log(LOG_DEBUG, "doing callback for fd %d, what %d", fd, what);
598 i = io_event_get(fd);
600 if (i->callback) { /* callback might be NULL if a previous callback function
601 called io_close on this fd */
602 i->callback(fd, (what & IO_ERROR) ? i->what : what);
604 /* if error indicator is set, we return the event(s) that were registered */