2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 * Please read the file COPYING, README and AUTHORS for more information.
8 * I/O abstraction interface.
9 * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
15 static char UNUSED id[] = "$Id: io.c,v 1.20 2006/09/17 10:41:07 fw Exp $";
21 #include <sys/types.h>
29 /* Enables extra debug messages in event add/delete/callback code. */
30 /* #define DEBUG_IO */
33 void (*callback)(int, short);
37 #define INIT_IOEVENT { NULL, -1, 0, NULL }
40 #ifdef HAVE_EPOLL_CREATE
41 #define IO_USE_EPOLL 1
44 #define IO_USE_KQUEUE 1
46 # ifdef HAVE_SYS_DEVPOLL_H
47 #define IO_USE_DEVPOLL 1
52 #define IO_USE_SELECT 1
53 # endif /* HAVE_POLL */
54 # endif /* HAVE_SYS_DEVPOLL_H */
55 # endif /* HAVE_KQUEUE */
56 #endif /* HAVE_EPOLL_CREATE */
58 static bool library_initialized;
61 #include <sys/epoll.h>
63 static int io_masterfd;
64 static bool io_event_change_epoll(int fd, short what, const int action);
65 static int io_dispatch_epoll(struct timeval *tv);
69 #include <sys/types.h>
70 #include <sys/event.h>
71 static array io_evcache;
72 static int io_masterfd;
74 static int io_dispatch_kqueue(struct timeval *tv);
75 static bool io_event_change_kqueue(int, short, const int action);
82 static int poll_maxfd;
84 static bool io_event_change_poll(int fd, short what);
88 #include <sys/devpoll.h>
89 static int io_masterfd;
91 static bool io_event_change_devpoll(int fd, short what);
95 #include "defines.h" /* for conn.h */
96 #include "conn.h" /* for CONN_IDX (needed by resolve.h) */
97 #include "resolve.h" /* for RES_STAT (needed by conf.h) */
98 #include "conf.h" /* for Conf_MaxConnections */
100 static fd_set readers;
101 static fd_set writers;
102 static int select_maxfd; /* the select() interface sucks badly */
103 static int io_dispatch_select(struct timeval *tv);
106 static array io_events;
108 static void io_docallback PARAMS((int fd, short what));
117 i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
125 #ifdef IO_USE_DEVPOLL
127 io_library_init_devpoll(unsigned int eventsize)
129 io_masterfd = open("/dev/poll", O_RDWR);
130 if (io_masterfd >= 0)
131 library_initialized = true;
132 Log(LOG_INFO, "IO subsystem: /dev/poll (initial maxfd %u, masterfd %d).",
133 eventsize, io_masterfd);
140 io_library_init_poll(unsigned int eventsize)
143 array_init(&pollfds);
145 Log(LOG_INFO, "IO subsystem: poll (initial maxfd %u).",
147 p = array_alloc(&pollfds, sizeof(struct pollfd), eventsize);
150 p = array_start(&pollfds);
151 for (i = 0; i < eventsize; i++)
154 library_initialized = true;
162 io_library_init_select(unsigned int eventsize)
164 Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
169 if (Conf_MaxConnections >= (int)FD_SETSIZE) {
171 "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
172 Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
174 Conf_MaxConnections = FD_SETSIZE - 1;
176 #endif /* FD_SETSIZE */
177 library_initialized = true;
184 io_library_init_epoll(unsigned int eventsize)
186 int ecreate_hint = (int)eventsize;
187 if (ecreate_hint <= 0)
189 io_masterfd = epoll_create(ecreate_hint);
191 "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
192 ecreate_hint, eventsize, io_masterfd);
193 if (io_masterfd >= 0)
194 library_initialized = true;
201 io_library_init_kqueue(unsigned int eventsize)
203 io_masterfd = kqueue();
206 "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
207 eventsize, io_masterfd);
208 if (io_masterfd >= 0)
209 library_initialized = true;
215 io_library_init(unsigned int eventsize)
217 if (library_initialized)
222 "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
224 if (eventsize >= FD_SETSIZE)
225 eventsize = FD_SETSIZE - 1;
226 #endif /* FD_SETSIZE */
227 #endif /* IO_USE_SELECT */
228 if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
231 io_library_init_epoll(eventsize);
234 io_library_init_kqueue(eventsize);
236 #ifdef IO_USE_DEVPOLL
237 io_library_init_devpoll(eventsize);
240 io_library_init_poll(eventsize);
243 io_library_init_select(eventsize);
245 return library_initialized;
250 io_library_shutdown(void)
263 array_free(&io_evcache);
265 library_initialized = false;
270 io_event_setcb(int fd, void (*cbfunc) (int, short))
272 io_event *i = io_event_get(fd);
276 i->callback = cbfunc;
282 io_event_create(int fd, short what, void (*cbfunc) (int, short))
288 #if defined(IO_USE_SELECT) || defined(FD_SETSIZE)
289 if (fd >= FD_SETSIZE) {
291 "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
296 i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
299 "array_alloc failed: could not allocate space for %d io_event structures",
304 i->callback = cbfunc;
306 #ifdef IO_USE_DEVPOLL
307 ret = io_event_change_devpoll(fd, what);
310 ret = io_event_change_poll(fd, what);
313 ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
316 ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
319 ret = io_event_add(fd, what);
321 if (ret) i->what = what;
326 #ifdef IO_USE_DEVPOLL
328 io_event_change_devpoll(int fd, short what)
334 if (what & IO_WANTREAD)
335 p.events = POLLIN | POLLPRI;
336 if (what & IO_WANTWRITE)
340 return write(io_masterfd, &p, sizeof p) == (ssize_t)sizeof p;
348 io_event_change_poll(int fd, short what)
353 if (what & IO_WANTREAD)
354 events = POLLIN | POLLPRI;
355 if (what & IO_WANTWRITE)
358 p = array_alloc(&pollfds, sizeof *p, fd);
371 io_event_change_epoll(int fd, short what, const int action)
373 struct epoll_event ev = { 0, {0} };
376 if (what & IO_WANTREAD)
377 ev.events = EPOLLIN | EPOLLPRI;
378 if (what & IO_WANTWRITE)
379 ev.events |= EPOLLOUT;
381 return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
387 io_event_kqueue_commit_cache(void)
389 struct kevent *events;
391 int len = (int) array_length(&io_evcache, sizeof (struct kevent));
393 if (!len) /* nothing to do */
399 array_free(&io_evcache);
403 events = array_start(&io_evcache);
405 assert(events != NULL);
407 ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
409 array_trunc(&io_evcache);
415 io_event_change_kqueue(int fd, short what, const int action)
420 if (what & IO_WANTREAD) {
421 EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
422 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
424 ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
427 if (ret && (what & IO_WANTWRITE)) {
428 EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
429 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
431 ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
434 if (array_length(&io_evcache, sizeof kev) >= 100)
435 io_event_kqueue_commit_cache();
442 io_event_add(int fd, short what)
444 io_event *i = io_event_get(fd);
446 if (!i) return false;
447 if (i->what == what) return true;
449 Log(LOG_DEBUG, "io_event_add(): fd %d (arg: %d), what %d.", i->fd, fd, what);
453 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
457 return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
459 #ifdef IO_USE_DEVPOLL
460 return io_event_change_devpoll(fd, i->what);
463 return io_event_change_poll(fd, i->what);
466 if (fd > select_maxfd)
469 if (what & IO_WANTREAD)
470 FD_SET(fd, &readers);
471 if (what & IO_WANTWRITE)
472 FD_SET(fd, &writers);
480 io_setnonblock(int fd)
482 int flags = fcntl(fd, F_GETFL);
487 #define O_NONBLOCK O_NDELAY
491 return fcntl(fd, F_SETFL, flags) == 0;
495 #ifdef IO_USE_DEVPOLL
497 io_close_devpoll(int fd)
500 p.events = POLLREMOVE;
502 write(io_masterfd, &p, sizeof p);
505 static inline void io_close_devpoll(int UNUSED x) { /* NOTHING */ }
512 io_close_poll(int fd)
515 p = array_get(&pollfds, sizeof *p, fd);
519 if (fd == poll_maxfd) {
520 while (poll_maxfd > 0) {
522 p = array_get(&pollfds, sizeof *p, poll_maxfd);
528 static inline void io_close_poll(int UNUSED x) { /* NOTHING */ }
534 io_close_select(int fd)
537 FD_CLR(fd, &writers);
538 FD_CLR(fd, &readers);
540 i = io_event_get(fd);
543 if (fd == select_maxfd) {
544 while (select_maxfd>0) {
545 --select_maxfd; /* find largest fd */
546 i = io_event_get(select_maxfd);
547 if (i && i->callback) break;
552 static inline void io_close_select(int UNUSED x) { /* NOTHING */ }
561 i = io_event_get(fd);
563 if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
564 io_event_kqueue_commit_cache();
566 /* both kqueue and epoll remove fd from all sets automatically on the last close
567 * of the descriptor. since we don't know if this is the last close we'll have
568 * to remove the set explicitly. */
570 io_event_change_kqueue(fd, i->what, EV_DELETE);
571 io_event_kqueue_commit_cache();
575 io_close_devpoll(fd);
580 io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
586 return close(fd) == 0;
591 io_event_del(int fd, short what)
593 io_event *i = io_event_get(fd);
595 Log(LOG_DEBUG, "io_event_del(): trying to delete eventtype %d on fd %d", what, fd);
597 if (!i) return false;
601 #ifdef IO_USE_DEVPOLL
602 return io_event_change_devpoll(fd, i->what);
605 return io_event_change_poll(fd, i->what);
608 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
612 return io_event_change_kqueue(fd, what, EV_DISABLE);
615 if (what & IO_WANTWRITE)
616 FD_CLR(fd, &writers);
618 if (what & IO_WANTREAD)
619 FD_CLR(fd, &readers);
628 io_dispatch_select(struct timeval *tv)
630 fd_set readers_tmp = readers;
631 fd_set writers_tmp = writers;
635 ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
641 for (i = 0; i <= select_maxfd; i++) {
643 if (FD_ISSET(i, &readers_tmp)) {
648 if (FD_ISSET(i, &writers_tmp)) {
649 what |= IO_WANTWRITE;
653 io_docallback(i, what);
663 #ifdef IO_USE_DEVPOLL
665 io_dispatch_devpoll(struct timeval *tv)
668 time_t sec = tv->tv_sec * 1000;
669 int i, total, ret, timeout = tv->tv_usec + sec;
671 struct pollfd p[100];
678 dvp.dp_timeout = timeout;
681 ret = ioctl(io_masterfd, DP_POLL, &dvp);
685 for (i=0; i < ret ; i++) {
687 if (p[i].revents & (POLLIN|POLLPRI))
690 if (p[i].revents & POLLOUT)
691 what |= IO_WANTWRITE;
693 if (p[i].revents && !what) {
694 /* other flag is set, probably POLLERR */
697 io_docallback(p[i].fd, what);
699 } while (ret == 100);
708 io_dispatch_poll(struct timeval *tv)
710 time_t sec = tv->tv_sec * 1000;
711 int i, ret, timeout = tv->tv_usec + sec;
714 struct pollfd *p = array_start(&pollfds);
719 ret = poll(p, poll_maxfd + 1, timeout);
724 for (i=0; i <= poll_maxfd; i++) {
726 if (p[i].revents & (POLLIN|POLLPRI))
729 if (p[i].revents & POLLOUT)
730 what |= IO_WANTWRITE;
732 if (p[i].revents && !what) {
733 /* other flag is set, probably POLLERR */
738 io_docallback(i, what);
751 io_dispatch_epoll(struct timeval *tv)
753 time_t sec = tv->tv_sec * 1000;
754 int i, total = 0, ret, timeout = tv->tv_usec + sec;
755 struct epoll_event epoll_ev[100];
762 ret = epoll_wait(io_masterfd, epoll_ev, 100, timeout);
767 for (i = 0; i < ret; i++) {
769 if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
772 if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
775 if (epoll_ev[i].events & EPOLLOUT)
776 type |= IO_WANTWRITE;
778 io_docallback(epoll_ev[i].data.fd, type);
782 } while (ret == 100);
791 io_dispatch_kqueue(struct timeval *tv)
793 int i, total = 0, ret;
794 struct kevent kev[100];
795 struct kevent *newevents;
798 ts.tv_sec = tv->tv_sec;
799 ts.tv_nsec = tv->tv_usec * 1000;
802 newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
803 newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
804 assert(newevents_len >= 0);
805 if (newevents_len < 0)
809 assert(newevents != NULL);
811 ret = kevent(io_masterfd, newevents, newevents_len, kev,
813 if ((newevents_len>0) && ret != -1)
814 array_trunc(&io_evcache);
820 for (i = 0; i < ret; i++) {
821 if (kev[i].flags & EV_EOF) {
823 LogDebug("kev.flag has EV_EOF set, setting IO_ERROR",
824 kev[i].filter, kev[i].ident);
826 io_docallback((int)kev[i].ident, IO_ERROR);
830 switch (kev[i].filter) {
832 io_docallback((int)kev[i].ident, IO_WANTREAD);
835 io_docallback((int)kev[i].ident, IO_WANTWRITE);
839 LogDebug("Unknown kev.filter number %d for fd %d",
840 kev[i].filter, kev[i].ident); /* Fall through */
843 io_docallback((int)kev[i].ident, IO_ERROR);
849 } while (ret == 100);
857 io_dispatch(struct timeval *tv)
860 return io_dispatch_select(tv);
863 return io_dispatch_kqueue(tv);
865 #ifdef IO_USE_DEVPOLL
866 return io_dispatch_devpoll(tv);
869 return io_dispatch_poll(tv);
872 return io_dispatch_epoll(tv);
877 /* call the callback function inside the struct matching fd */
879 io_docallback(int fd, short what)
883 Log(LOG_DEBUG, "doing callback for fd %d, what %d", fd, what);
885 i = io_event_get(fd);
887 if (i->callback) { /* callback might be NULL if a previous callback function
888 called io_close on this fd */
889 i->callback(fd, (what & IO_ERROR) ? i->what : what);
891 /* if error indicator is set, we return the event(s) that were registered */