2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 * Please read the file COPYING, README and AUTHORS for more information.
8 * I/O abstraction interface.
9 * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
15 static char UNUSED id[] = "$Id: io.c,v 1.25 2007/01/19 13:52:54 fw Exp $";
21 #include <sys/types.h>
29 /* Enables extra debug messages in event add/delete/callback code. */
30 /* #define DEBUG_IO */
33 void (*callback)(int, short);
37 #define INIT_IOEVENT { NULL, -1, 0, NULL }
40 #ifdef HAVE_EPOLL_CREATE
41 # define IO_USE_EPOLL 1
43 # define IO_USE_SELECT 1
47 # define IO_USE_KQUEUE 1
49 # ifdef HAVE_SYS_DEVPOLL_H
50 # define IO_USE_DEVPOLL 1
53 # define IO_USE_POLL 1
56 # define IO_USE_SELECT 1
58 # error "no IO API available!?"
59 # endif /* HAVE_SELECT */
60 # endif /* HAVE_POLL */
61 # endif /* HAVE_SYS_DEVPOLL_H */
62 # endif /* HAVE_KQUEUE */
63 #endif /* HAVE_EPOLL_CREATE */
65 static bool library_initialized = false;
68 #include <sys/epoll.h>
70 static int io_masterfd = -1;
71 static bool io_event_change_epoll(int fd, short what, const int action);
72 static int io_dispatch_epoll(struct timeval *tv);
76 #include <sys/types.h>
77 #include <sys/event.h>
78 static array io_evcache;
79 static int io_masterfd;
81 static int io_dispatch_kqueue(struct timeval *tv);
82 static bool io_event_change_kqueue(int, short, const int action);
89 static int poll_maxfd;
91 static bool io_event_change_poll(int fd, short what);
95 #include <sys/devpoll.h>
96 static int io_masterfd;
98 static bool io_event_change_devpoll(int fd, short what);
102 #include "defines.h" /* for conn.h */
103 #include "conn.h" /* for CONN_IDX (needed by resolve.h) */
104 #include "resolve.h" /* for RES_STAT (needed by conf.h) */
105 #include "conf.h" /* for Conf_MaxConnections */
107 static fd_set readers;
108 static fd_set writers;
109 static int select_maxfd; /* the select() interface sucks badly */
110 static int io_dispatch_select(struct timeval *tv);
113 #define io_masterfd -1
115 #endif /* IO_USE_SELECT */
117 static array io_events;
119 static void io_docallback PARAMS((int fd, short what));
128 i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
136 #ifdef IO_USE_DEVPOLL
138 io_library_init_devpoll(unsigned int eventsize)
140 io_masterfd = open("/dev/poll", O_RDWR);
141 if (io_masterfd >= 0)
142 library_initialized = true;
143 Log(LOG_INFO, "IO subsystem: /dev/poll (initial maxfd %u, masterfd %d).",
144 eventsize, io_masterfd);
151 io_library_init_poll(unsigned int eventsize)
154 array_init(&pollfds);
156 Log(LOG_INFO, "IO subsystem: poll (initial maxfd %u).",
158 p = array_alloc(&pollfds, sizeof(struct pollfd), eventsize);
161 p = array_start(&pollfds);
162 for (i = 0; i < eventsize; i++)
165 library_initialized = true;
173 io_library_init_select(unsigned int eventsize)
175 Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
180 if (Conf_MaxConnections >= (int)FD_SETSIZE) {
182 "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
183 Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
185 Conf_MaxConnections = FD_SETSIZE - 1;
187 #endif /* FD_SETSIZE */
188 library_initialized = true;
195 io_library_init_epoll(unsigned int eventsize)
197 int ecreate_hint = (int)eventsize;
198 if (ecreate_hint <= 0)
200 io_masterfd = epoll_create(ecreate_hint);
201 if (io_masterfd >= 0) {
202 library_initialized = true;
204 "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
205 ecreate_hint, eventsize, io_masterfd);
213 io_library_init_kqueue(unsigned int eventsize)
215 io_masterfd = kqueue();
218 "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
219 eventsize, io_masterfd);
220 if (io_masterfd >= 0)
221 library_initialized = true;
227 io_library_init(unsigned int eventsize)
229 if (library_initialized)
234 "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
236 if (eventsize >= FD_SETSIZE)
237 eventsize = FD_SETSIZE - 1;
238 #endif /* FD_SETSIZE */
239 #endif /* IO_USE_SELECT */
240 if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
243 io_library_init_epoll(eventsize);
246 Log(LOG_INFO, "Can't initialize epoll() IO interface, falling back to select() ...");
250 io_library_init_kqueue(eventsize);
252 #ifdef IO_USE_DEVPOLL
253 io_library_init_devpoll(eventsize);
256 io_library_init_poll(eventsize);
259 if (! library_initialized)
260 io_library_init_select(eventsize);
262 return library_initialized;
267 io_library_shutdown(void)
274 if (io_masterfd >= 0)
281 array_free(&io_evcache);
283 library_initialized = false;
288 io_event_setcb(int fd, void (*cbfunc) (int, short))
290 io_event *i = io_event_get(fd);
294 i->callback = cbfunc;
300 io_event_create(int fd, short what, void (*cbfunc) (int, short))
306 #if defined(IO_USE_SELECT) && defined(FD_SETSIZE)
307 if (fd >= FD_SETSIZE) {
309 "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
314 i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
317 "array_alloc failed: could not allocate space for %d io_event structures",
322 i->callback = cbfunc;
324 #ifdef IO_USE_DEVPOLL
325 ret = io_event_change_devpoll(fd, what);
328 ret = io_event_change_poll(fd, what);
331 ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
334 ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
338 ret = io_event_add(fd, what);
340 if (ret) i->what = what;
345 #ifdef IO_USE_DEVPOLL
347 io_event_change_devpoll(int fd, short what)
353 if (what & IO_WANTREAD)
354 p.events = POLLIN | POLLPRI;
355 if (what & IO_WANTWRITE)
359 return write(io_masterfd, &p, sizeof p) == (ssize_t)sizeof p;
367 io_event_change_poll(int fd, short what)
372 if (what & IO_WANTREAD)
373 events = POLLIN | POLLPRI;
374 if (what & IO_WANTWRITE)
377 p = array_alloc(&pollfds, sizeof *p, fd);
390 io_event_change_epoll(int fd, short what, const int action)
392 struct epoll_event ev = { 0, {0} };
395 if (what & IO_WANTREAD)
396 ev.events = EPOLLIN | EPOLLPRI;
397 if (what & IO_WANTWRITE)
398 ev.events |= EPOLLOUT;
400 return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
406 io_event_kqueue_commit_cache(void)
408 struct kevent *events;
410 int len = (int) array_length(&io_evcache, sizeof (struct kevent));
412 if (!len) /* nothing to do */
418 array_free(&io_evcache);
422 events = array_start(&io_evcache);
424 assert(events != NULL);
426 ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
428 array_trunc(&io_evcache);
434 io_event_change_kqueue(int fd, short what, const int action)
439 if (what & IO_WANTREAD) {
440 EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
441 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
443 ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
446 if (ret && (what & IO_WANTWRITE)) {
447 EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
448 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
450 ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
453 if (array_length(&io_evcache, sizeof kev) >= 100)
454 io_event_kqueue_commit_cache();
461 io_event_add(int fd, short what)
463 io_event *i = io_event_get(fd);
465 if (!i) return false;
467 if ((i->what & what) == what) /* event type is already registered */
470 Log(LOG_DEBUG, "io_event_add(): fd %d, what %d.", fd, what);
474 if (io_masterfd >= 0)
475 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
479 return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
481 #ifdef IO_USE_DEVPOLL
482 return io_event_change_devpoll(fd, i->what);
485 return io_event_change_poll(fd, i->what);
488 if (fd > select_maxfd)
491 if (what & IO_WANTREAD)
492 FD_SET(fd, &readers);
493 if (what & IO_WANTWRITE)
494 FD_SET(fd, &writers);
502 io_setnonblock(int fd)
504 int flags = fcntl(fd, F_GETFL);
509 #define O_NONBLOCK O_NDELAY
513 return fcntl(fd, F_SETFL, flags) == 0;
517 #ifdef IO_USE_DEVPOLL
519 io_close_devpoll(int fd)
522 p.events = POLLREMOVE;
524 write(io_masterfd, &p, sizeof p);
527 static inline void io_close_devpoll(int UNUSED x) { /* NOTHING */ }
534 io_close_poll(int fd)
537 p = array_get(&pollfds, sizeof *p, fd);
541 if (fd == poll_maxfd) {
542 while (poll_maxfd > 0) {
544 p = array_get(&pollfds, sizeof *p, poll_maxfd);
551 static inline void io_close_poll(int UNUSED x) { /* NOTHING */ }
557 io_close_select(int fd)
561 if (io_masterfd >= 0) /* Are we using epoll()? */
564 FD_CLR(fd, &writers);
565 FD_CLR(fd, &readers);
567 i = io_event_get(fd);
570 if (fd == select_maxfd) {
571 while (select_maxfd>0) {
572 --select_maxfd; /* find largest fd */
573 i = io_event_get(select_maxfd);
574 if (i && i->callback) break;
579 static inline void io_close_select(int UNUSED x) { /* NOTHING */ }
588 i = io_event_get(fd);
590 if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
591 io_event_kqueue_commit_cache();
593 /* both kqueue and epoll remove fd from all sets automatically on the last close
594 * of the descriptor. since we don't know if this is the last close we'll have
595 * to remove the set explicitly. */
597 io_event_change_kqueue(fd, i->what, EV_DELETE);
598 io_event_kqueue_commit_cache();
602 io_close_devpoll(fd);
607 io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
613 return close(fd) == 0;
618 io_event_del(int fd, short what)
620 io_event *i = io_event_get(fd);
622 Log(LOG_DEBUG, "io_event_del(): trying to delete eventtype %d on fd %d", what, fd);
624 if (!i) return false;
628 #ifdef IO_USE_DEVPOLL
629 return io_event_change_devpoll(fd, i->what);
632 return io_event_change_poll(fd, i->what);
635 if (io_masterfd >= 0)
636 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
640 return io_event_change_kqueue(fd, what, EV_DISABLE);
643 if (what & IO_WANTWRITE)
644 FD_CLR(fd, &writers);
646 if (what & IO_WANTREAD)
647 FD_CLR(fd, &readers);
656 io_dispatch_select(struct timeval *tv)
658 fd_set readers_tmp = readers;
659 fd_set writers_tmp = writers;
663 ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
669 for (i = 0; i <= select_maxfd; i++) {
671 if (FD_ISSET(i, &readers_tmp)) {
676 if (FD_ISSET(i, &writers_tmp)) {
677 what |= IO_WANTWRITE;
681 io_docallback(i, what);
691 #ifdef IO_USE_DEVPOLL
693 io_dispatch_devpoll(struct timeval *tv)
696 time_t sec = tv->tv_sec * 1000;
697 int i, total, ret, timeout = tv->tv_usec + sec;
699 struct pollfd p[100];
706 dvp.dp_timeout = timeout;
709 ret = ioctl(io_masterfd, DP_POLL, &dvp);
713 for (i=0; i < ret ; i++) {
715 if (p[i].revents & (POLLIN|POLLPRI))
718 if (p[i].revents & POLLOUT)
719 what |= IO_WANTWRITE;
721 if (p[i].revents && !what) {
722 /* other flag is set, probably POLLERR */
725 io_docallback(p[i].fd, what);
727 } while (ret == 100);
736 io_dispatch_poll(struct timeval *tv)
738 time_t sec = tv->tv_sec * 1000;
739 int i, ret, timeout = tv->tv_usec + sec;
742 struct pollfd *p = array_start(&pollfds);
747 ret = poll(p, poll_maxfd + 1, timeout);
752 for (i=0; i <= poll_maxfd; i++) {
754 if (p[i].revents & (POLLIN|POLLPRI))
757 if (p[i].revents & POLLOUT)
758 what |= IO_WANTWRITE;
760 if (p[i].revents && !what) {
761 /* other flag is set, probably POLLERR */
766 io_docallback(i, what);
779 io_dispatch_epoll(struct timeval *tv)
781 time_t sec = tv->tv_sec * 1000;
782 int i, total = 0, ret, timeout = tv->tv_usec + sec;
783 struct epoll_event epoll_ev[100];
790 ret = epoll_wait(io_masterfd, epoll_ev, 100, timeout);
795 for (i = 0; i < ret; i++) {
797 if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
800 if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
803 if (epoll_ev[i].events & EPOLLOUT)
804 type |= IO_WANTWRITE;
806 io_docallback(epoll_ev[i].data.fd, type);
810 } while (ret == 100);
819 io_dispatch_kqueue(struct timeval *tv)
821 int i, total = 0, ret;
822 struct kevent kev[100];
823 struct kevent *newevents;
826 ts.tv_sec = tv->tv_sec;
827 ts.tv_nsec = tv->tv_usec * 1000;
830 newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
831 newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
832 assert(newevents_len >= 0);
833 if (newevents_len < 0)
837 assert(newevents != NULL);
839 ret = kevent(io_masterfd, newevents, newevents_len, kev,
841 if ((newevents_len>0) && ret != -1)
842 array_trunc(&io_evcache);
848 for (i = 0; i < ret; i++) {
849 if (kev[i].flags & EV_EOF) {
851 LogDebug("kev.flag has EV_EOF set, setting IO_ERROR",
852 kev[i].filter, kev[i].ident);
854 io_docallback((int)kev[i].ident, IO_ERROR);
858 switch (kev[i].filter) {
860 io_docallback((int)kev[i].ident, IO_WANTREAD);
863 io_docallback((int)kev[i].ident, IO_WANTWRITE);
867 LogDebug("Unknown kev.filter number %d for fd %d",
868 kev[i].filter, kev[i].ident); /* Fall through */
871 io_docallback((int)kev[i].ident, IO_ERROR);
877 } while (ret == 100);
885 io_dispatch(struct timeval *tv)
888 if (io_masterfd >= 0)
889 return io_dispatch_epoll(tv);
892 return io_dispatch_select(tv);
895 return io_dispatch_kqueue(tv);
897 #ifdef IO_USE_DEVPOLL
898 return io_dispatch_devpoll(tv);
901 return io_dispatch_poll(tv);
906 /* call the callback function inside the struct matching fd */
908 io_docallback(int fd, short what)
912 Log(LOG_DEBUG, "doing callback for fd %d, what %d", fd, what);
914 i = io_event_get(fd);
916 if (i->callback) { /* callback might be NULL if a previous callback function
917 called io_close on this fd */
918 i->callback(fd, (what & IO_ERROR) ? i->what : what);
920 /* if error indicator is set, we return the event(s) that were registered */