2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 * Please read the file COPYING, README and AUTHORS for more information.
8 * I/O abstraction interface.
9 * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
15 static char UNUSED id[] = "$Id: io.c,v 1.19 2006/09/16 16:47:27 fw Exp $";
21 #include <sys/types.h>
29 /* Enables extra debug messages in event add/delete/callback code. */
30 /* #define DEBUG_IO */
33 void (*callback)(int, short);
37 #define INIT_IOEVENT { NULL, -1, 0, NULL }
40 #ifdef HAVE_EPOLL_CREATE
41 #define IO_USE_EPOLL 1
44 #define IO_USE_KQUEUE 1
49 #define IO_USE_SELECT 1
50 # endif /* HAVE_POLL */
51 # endif /* HAVE_KQUEUE */
52 #endif /* HAVE_EPOLL_CREATE */
54 static bool library_initialized;
57 #include <sys/epoll.h>
59 static int io_masterfd;
60 static bool io_event_change_epoll(int fd, short what, const int action);
61 static int io_dispatch_epoll(struct timeval *tv);
65 #include <sys/types.h>
66 #include <sys/event.h>
67 static array io_evcache;
68 static int io_masterfd;
70 static int io_dispatch_kqueue(struct timeval *tv);
71 static bool io_event_change_kqueue(int, short, const int action);
77 static int poll_maxfd;
79 static bool io_event_change_poll(int fd, short what);
83 #include "defines.h" /* for conn.h */
84 #include "conn.h" /* for CONN_IDX (needed by resolve.h) */
85 #include "resolve.h" /* for RES_STAT (needed by conf.h) */
86 #include "conf.h" /* for Conf_MaxConnections */
88 static fd_set readers;
89 static fd_set writers;
90 static int select_maxfd; /* the select() interface sucks badly */
91 static int io_dispatch_select(struct timeval *tv);
94 static array io_events;
96 static void io_docallback PARAMS((int fd, short what));
105 i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
115 io_library_init_poll(unsigned int eventsize)
118 array_init(&pollfds);
120 Log(LOG_INFO, "IO subsystem: poll (initial maxfd %u).",
122 p = array_alloc(&pollfds, sizeof(struct pollfd), eventsize);
125 p = array_start(&pollfds);
126 for (i = 0; i < eventsize; i++)
129 library_initialized = true;
137 io_library_init_select(unsigned int eventsize)
139 Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
144 if (Conf_MaxConnections >= (int)FD_SETSIZE) {
146 "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
147 Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
149 Conf_MaxConnections = FD_SETSIZE - 1;
151 #endif /* FD_SETSIZE */
152 library_initialized = true;
159 io_library_init_epoll(unsigned int eventsize)
161 int ecreate_hint = (int)eventsize;
162 if (ecreate_hint <= 0)
164 io_masterfd = epoll_create(ecreate_hint);
166 "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
167 ecreate_hint, eventsize, io_masterfd);
168 if (io_masterfd >= 0)
169 library_initialized = true;
176 io_library_init_kqueue(unsigned int eventsize)
178 io_masterfd = kqueue();
181 "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
182 eventsize, io_masterfd);
183 if (io_masterfd >= 0)
184 library_initialized = true;
190 io_library_init(unsigned int eventsize)
192 if (library_initialized)
197 "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
199 if (eventsize >= FD_SETSIZE)
200 eventsize = FD_SETSIZE - 1;
201 #endif /* FD_SETSIZE */
202 #endif /* IO_USE_SELECT */
203 if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
206 io_library_init_epoll(eventsize);
209 io_library_init_kqueue(eventsize);
212 io_library_init_poll(eventsize);
215 io_library_init_select(eventsize);
217 return library_initialized;
222 io_library_shutdown(void)
235 array_free(&io_evcache);
237 library_initialized = false;
242 io_event_setcb(int fd, void (*cbfunc) (int, short))
244 io_event *i = io_event_get(fd);
248 i->callback = cbfunc;
254 io_event_create(int fd, short what, void (*cbfunc) (int, short))
260 #if defined(IO_USE_SELECT) || defined(FD_SETSIZE)
261 if (fd >= FD_SETSIZE) {
263 "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
268 i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
271 "array_alloc failed: could not allocate space for %d io_event structures",
276 i->callback = cbfunc;
279 ret = io_event_change_poll(fd, what);
282 ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
285 ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
288 ret = io_event_add(fd, what);
290 if (ret) i->what = what;
297 io_event_change_poll(int fd, short what)
302 if (what & IO_WANTREAD)
303 events = POLLIN | POLLPRI;
304 if (what & IO_WANTWRITE)
307 p = array_alloc(&pollfds, sizeof *p, fd);
320 io_event_change_epoll(int fd, short what, const int action)
322 struct epoll_event ev = { 0, {0} };
325 if (what & IO_WANTREAD)
326 ev.events = EPOLLIN | EPOLLPRI;
327 if (what & IO_WANTWRITE)
328 ev.events |= EPOLLOUT;
330 return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
336 io_event_kqueue_commit_cache(void)
338 struct kevent *events;
340 int len = (int) array_length(&io_evcache, sizeof (struct kevent));
342 if (!len) /* nothing to do */
348 array_free(&io_evcache);
352 events = array_start(&io_evcache);
354 assert(events != NULL);
356 ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
358 array_trunc(&io_evcache);
364 io_event_change_kqueue(int fd, short what, const int action)
369 if (what & IO_WANTREAD) {
370 EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
371 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
373 ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
376 if (ret && (what & IO_WANTWRITE)) {
377 EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
378 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
380 ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
383 if (array_length(&io_evcache, sizeof kev) >= 100)
384 io_event_kqueue_commit_cache();
391 io_event_add(int fd, short what)
393 io_event *i = io_event_get(fd);
395 if (!i) return false;
396 if (i->what == what) return true;
398 Log(LOG_DEBUG, "io_event_add(): fd %d (arg: %d), what %d.", i->fd, fd, what);
402 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
406 return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
409 return io_event_change_poll(fd, i->what);
412 if (fd > select_maxfd)
415 if (what & IO_WANTREAD)
416 FD_SET(fd, &readers);
417 if (what & IO_WANTWRITE)
418 FD_SET(fd, &writers);
426 io_setnonblock(int fd)
428 int flags = fcntl(fd, F_GETFL);
433 #define O_NONBLOCK O_NDELAY
437 return fcntl(fd, F_SETFL, flags) == 0;
443 io_close_poll(int fd)
446 p = array_get(&pollfds, sizeof *p, fd);
450 if (fd == poll_maxfd) {
451 while (poll_maxfd > 0) {
453 p = array_get(&pollfds, sizeof *p, poll_maxfd);
460 static inline void io_close_poll(int UNUSED x) { /* NOTHING */ }
466 io_close_select(int fd)
469 FD_CLR(fd, &writers);
470 FD_CLR(fd, &readers);
472 i = io_event_get(fd);
475 if (fd == select_maxfd) {
476 while (select_maxfd>0) {
477 --select_maxfd; /* find largest fd */
478 i = io_event_get(select_maxfd);
479 if (i && i->callback) break;
484 static inline void io_close_select(int UNUSED x) { /* NOTHING */ }
493 i = io_event_get(fd);
495 if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
496 io_event_kqueue_commit_cache();
498 /* both kqueue and epoll remove fd from all sets automatically on the last close
499 * of the descriptor. since we don't know if this is the last close we'll have
500 * to remove the set explicitly. */
502 io_event_change_kqueue(fd, i->what, EV_DELETE);
503 io_event_kqueue_commit_cache();
511 io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
517 return close(fd) == 0;
522 io_event_del(int fd, short what)
524 io_event *i = io_event_get(fd);
526 Log(LOG_DEBUG, "io_event_del(): trying to delete eventtype %d on fd %d", what, fd);
528 if (!i) return false;
533 return io_event_change_poll(fd, i->what);
536 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
540 return io_event_change_kqueue(fd, what, EV_DISABLE);
543 if (what & IO_WANTWRITE)
544 FD_CLR(fd, &writers);
546 if (what & IO_WANTREAD)
547 FD_CLR(fd, &readers);
556 io_dispatch_select(struct timeval *tv)
558 fd_set readers_tmp = readers;
559 fd_set writers_tmp = writers;
563 ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
569 for (i = 0; i <= select_maxfd; i++) {
571 if (FD_ISSET(i, &readers_tmp)) {
576 if (FD_ISSET(i, &writers_tmp)) {
577 what |= IO_WANTWRITE;
581 io_docallback(i, what);
593 io_dispatch_poll(struct timeval *tv)
595 time_t sec = tv->tv_sec * 1000;
596 int i, ret, timeout = tv->tv_usec + sec;
599 struct pollfd *p = array_start(&pollfds);
604 ret = poll(p, poll_maxfd + 1, timeout);
609 for (i=0; i <= poll_maxfd; i++) {
611 if (p[i].revents & (POLLIN|POLLPRI))
614 if (p[i].revents & POLLOUT)
615 what |= IO_WANTWRITE;
617 if (p[i].revents && !what) {
618 /* other flag is set, probably POLLERR */
623 io_docallback(i, what);
636 io_dispatch_epoll(struct timeval *tv)
638 time_t sec = tv->tv_sec * 1000;
639 int i, total = 0, ret, timeout = tv->tv_usec + sec;
640 struct epoll_event epoll_ev[100];
647 ret = epoll_wait(io_masterfd, epoll_ev, 100, timeout);
652 for (i = 0; i < ret; i++) {
654 if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
657 if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
660 if (epoll_ev[i].events & EPOLLOUT)
661 type |= IO_WANTWRITE;
663 io_docallback(epoll_ev[i].data.fd, type);
667 } while (ret == 100);
676 io_dispatch_kqueue(struct timeval *tv)
678 int i, total = 0, ret;
679 struct kevent kev[100];
680 struct kevent *newevents;
683 ts.tv_sec = tv->tv_sec;
684 ts.tv_nsec = tv->tv_usec * 1000;
687 newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
688 newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
689 assert(newevents_len >= 0);
690 if (newevents_len < 0)
694 assert(newevents != NULL);
696 ret = kevent(io_masterfd, newevents, newevents_len, kev,
698 if ((newevents_len>0) && ret != -1)
699 array_trunc(&io_evcache);
705 for (i = 0; i < ret; i++) {
706 if (kev[i].flags & EV_EOF) {
708 LogDebug("kev.flag has EV_EOF set, setting IO_ERROR",
709 kev[i].filter, kev[i].ident);
711 io_docallback((int)kev[i].ident, IO_ERROR);
715 switch (kev[i].filter) {
717 io_docallback((int)kev[i].ident, IO_WANTREAD);
720 io_docallback((int)kev[i].ident, IO_WANTWRITE);
724 LogDebug("Unknown kev.filter number %d for fd %d",
725 kev[i].filter, kev[i].ident); /* Fall through */
728 io_docallback((int)kev[i].ident, IO_ERROR);
734 } while (ret == 100);
742 io_dispatch(struct timeval *tv)
745 return io_dispatch_select(tv);
748 return io_dispatch_kqueue(tv);
751 return io_dispatch_poll(tv);
754 return io_dispatch_epoll(tv);
759 /* call the callback function inside the struct matching fd */
761 io_docallback(int fd, short what)
765 Log(LOG_DEBUG, "doing callback for fd %d, what %d", fd, what);
767 i = io_event_get(fd);
769 if (i->callback) { /* callback might be NULL if a previous callback function
770 called io_close on this fd */
771 i->callback(fd, (what & IO_ERROR) ? i->what : what);
773 /* if error indicator is set, we return the event(s) that were registered */