2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 * Please read the file COPYING, README and AUTHORS for more information.
8 * I/O abstraction interface.
9 * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
15 static char UNUSED id[] = "$Id: io.c,v 1.29 2008/03/27 15:47:21 fw Exp $";
21 #include <sys/types.h>
29 /* Enables extra debug messages in event add/delete/callback code. */
30 /* #define DEBUG_IO */
34 void (*callback)(int, short);
41 #define INIT_IOEVENT { NULL, -1, 0, NULL }
44 #ifdef HAVE_EPOLL_CREATE
45 # define IO_USE_EPOLL 1
47 # define IO_USE_SELECT 1
51 # define IO_USE_KQUEUE 1
53 # ifdef HAVE_SYS_DEVPOLL_H
54 # define IO_USE_DEVPOLL 1
57 # define IO_USE_POLL 1
60 # define IO_USE_SELECT 1
62 # error "no IO API available!?"
63 # endif /* HAVE_SELECT */
64 # endif /* HAVE_POLL */
65 # endif /* HAVE_SYS_DEVPOLL_H */
66 # endif /* HAVE_KQUEUE */
67 #endif /* HAVE_EPOLL_CREATE */
69 static bool library_initialized = false;
72 #include <sys/epoll.h>
74 static int io_masterfd = -1;
75 static bool io_event_change_epoll(int fd, short what, const int action);
76 static int io_dispatch_epoll(struct timeval *tv);
80 #include <sys/types.h>
81 #include <sys/event.h>
82 static array io_evcache;
83 static int io_masterfd;
85 static int io_dispatch_kqueue(struct timeval *tv);
86 static bool io_event_change_kqueue(int, short, const int action);
93 static int poll_maxfd;
95 static bool io_event_change_poll PARAMS((int fd, short what));
99 #include <sys/devpoll.h>
100 static int io_masterfd;
102 static bool io_event_change_devpoll(int fd, short what);
106 #include "defines.h" /* for conn.h */
107 #include "conn.h" /* for CONN_IDX (needed by resolve.h) */
108 #include "resolve.h" /* for RES_STAT (needed by conf.h) */
109 #include "conf.h" /* for Conf_MaxConnections */
111 static fd_set readers;
112 static fd_set writers;
113 static int select_maxfd; /* the select() interface sucks badly */
114 static int io_dispatch_select(struct timeval *tv);
117 #define io_masterfd -1
119 #endif /* IO_USE_SELECT */
121 static array io_events;
123 static void io_docallback PARAMS((int fd, short what));
126 static void io_debug(const char *s, int fd, int what)
128 Log(LOG_DEBUG, "%s: %d, %d\n", s, fd, what);
131 static inline void io_debug(const char UNUSED *s,int UNUSED a, int UNUSED b) {/*NOTHING*/}
141 i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
149 #ifdef IO_USE_DEVPOLL
151 io_dispatch_devpoll(struct timeval *tv)
154 time_t sec = tv->tv_sec * 1000;
155 int i, total, ret, timeout = tv->tv_usec + sec;
157 struct pollfd p[100];
164 dvp.dp_timeout = timeout;
167 ret = ioctl(io_masterfd, DP_POLL, &dvp);
171 for (i=0; i < ret ; i++) {
173 if (p[i].revents & (POLLIN|POLLPRI))
176 if (p[i].revents & POLLOUT)
177 what |= IO_WANTWRITE;
179 if (p[i].revents && !what) {
180 /* other flag is set, probably POLLERR */
183 io_docallback(p[i].fd, what);
185 } while (ret == 100);
192 io_event_change_devpoll(int fd, short what)
198 if (what & IO_WANTREAD)
199 p.events = POLLIN | POLLPRI;
200 if (what & IO_WANTWRITE)
204 return write(io_masterfd, &p, sizeof p) == (ssize_t)sizeof p;
208 io_close_devpoll(int fd)
211 p.events = POLLREMOVE;
213 write(io_masterfd, &p, sizeof p);
217 io_library_init_devpoll(unsigned int eventsize)
219 io_masterfd = open("/dev/poll", O_RDWR);
220 if (io_masterfd >= 0)
221 library_initialized = true;
222 Log(LOG_INFO, "IO subsystem: /dev/poll (initial maxfd %u, masterfd %d).",
223 eventsize, io_masterfd);
226 static inline void io_close_devpoll(int UNUSED x) {/* NOTHING */}
227 static inline void io_library_init_devpoll(unsigned int UNUSED ev) {/*NOTHING*/}
233 io_dispatch_poll(struct timeval *tv)
235 time_t sec = tv->tv_sec * 1000;
236 int i, ret, timeout = tv->tv_usec + sec;
239 struct pollfd *p = array_start(&pollfds);
244 ret = poll(p, poll_maxfd + 1, timeout);
249 for (i=0; i <= poll_maxfd; i++) {
251 if (p[i].revents & (POLLIN|POLLPRI))
254 if (p[i].revents & POLLOUT)
255 what |= IO_WANTWRITE;
257 if (p[i].revents && !what) {
258 /* other flag is set, probably POLLERR */
263 io_docallback(i, what);
273 io_event_change_poll(int fd, short what)
278 if (what & IO_WANTREAD)
279 events = POLLIN | POLLPRI;
280 if (what & IO_WANTWRITE)
283 p = array_alloc(&pollfds, sizeof *p, fd);
294 io_close_poll(int fd)
297 p = array_get(&pollfds, sizeof *p, fd);
301 if (fd == poll_maxfd) {
302 while (poll_maxfd > 0) {
304 p = array_get(&pollfds, sizeof *p, poll_maxfd);
312 io_library_init_poll(unsigned int eventsize)
315 array_init(&pollfds);
317 Log(LOG_INFO, "IO subsystem: poll (initial maxfd %u).",
319 p = array_alloc(&pollfds, sizeof(struct pollfd), eventsize);
322 p = array_start(&pollfds);
323 for (i = 0; i < eventsize; i++)
326 library_initialized = true;
330 static inline void io_close_poll(int UNUSED x) {/* NOTHING */}
331 static inline void io_library_init_poll(unsigned int UNUSED ev) {/*NOTHING*/}
337 io_dispatch_select(struct timeval *tv)
339 fd_set readers_tmp = readers;
340 fd_set writers_tmp = writers;
344 ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
350 for (i = 0; i <= select_maxfd; i++) {
352 if (FD_ISSET(i, &readers_tmp)) {
357 if (FD_ISSET(i, &writers_tmp)) {
358 what |= IO_WANTWRITE;
362 io_docallback(i, what);
371 io_library_init_select(unsigned int eventsize)
373 if (library_initialized)
375 Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
380 if (Conf_MaxConnections >= (int)FD_SETSIZE) {
382 "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
383 Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
385 Conf_MaxConnections = FD_SETSIZE - 1;
387 #endif /* FD_SETSIZE */
388 library_initialized = true;
392 io_close_select(int fd)
396 if (io_masterfd >= 0) /* Are we using epoll()? */
399 FD_CLR(fd, &writers);
400 FD_CLR(fd, &readers);
402 i = io_event_get(fd);
405 if (fd == select_maxfd) {
406 while (select_maxfd>0) {
407 --select_maxfd; /* find largest fd */
408 i = io_event_get(select_maxfd);
409 if (i && i->callback) break;
414 static inline void io_library_init_select(int UNUSED x) {/* NOTHING */}
415 static inline void io_close_select(int UNUSED x) {/* NOTHING */}
421 io_event_change_epoll(int fd, short what, const int action)
423 struct epoll_event ev = { 0, {0} };
426 if (what & IO_WANTREAD)
427 ev.events = EPOLLIN | EPOLLPRI;
428 if (what & IO_WANTWRITE)
429 ev.events |= EPOLLOUT;
431 return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
435 io_dispatch_epoll(struct timeval *tv)
437 time_t sec = tv->tv_sec * 1000;
438 int i, total = 0, ret, timeout = tv->tv_usec + sec;
439 struct epoll_event epoll_ev[100];
446 ret = epoll_wait(io_masterfd, epoll_ev, 100, timeout);
451 for (i = 0; i < ret; i++) {
453 if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
456 if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
459 if (epoll_ev[i].events & EPOLLOUT)
460 type |= IO_WANTWRITE;
462 io_docallback(epoll_ev[i].data.fd, type);
466 } while (ret == 100);
472 io_library_init_epoll(unsigned int eventsize)
474 int ecreate_hint = (int)eventsize;
475 if (ecreate_hint <= 0)
477 io_masterfd = epoll_create(ecreate_hint);
478 if (io_masterfd >= 0) {
479 library_initialized = true;
481 "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
482 ecreate_hint, eventsize, io_masterfd);
486 Log(LOG_INFO, "Can't initialize epoll() IO interface, falling back to select() ...");
490 static inline void io_library_init_epoll(unsigned int UNUSED ev) {/* NOTHING */}
491 #endif /* IO_USE_EPOLL */
496 io_event_kqueue_commit_cache(void)
498 struct kevent *events;
500 int len = (int) array_length(&io_evcache, sizeof (struct kevent));
502 if (!len) /* nothing to do */
508 array_free(&io_evcache);
512 events = array_start(&io_evcache);
514 assert(events != NULL);
516 ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
518 array_trunc(&io_evcache);
523 io_event_change_kqueue(int fd, short what, const int action)
528 if (what & IO_WANTREAD) {
529 EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
530 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
532 ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
535 if (ret && (what & IO_WANTWRITE)) {
536 EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
537 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
539 ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
542 if (array_length(&io_evcache, sizeof kev) >= 100)
543 io_event_kqueue_commit_cache();
548 io_dispatch_kqueue(struct timeval *tv)
550 int i, total = 0, ret;
551 struct kevent kev[100];
552 struct kevent *newevents;
555 ts.tv_sec = tv->tv_sec;
556 ts.tv_nsec = tv->tv_usec * 1000;
559 newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
560 newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
561 assert(newevents_len >= 0);
563 ret = kevent(io_masterfd, newevents, newevents_len, kev, 100, &ts);
564 if (newevents && ret != -1)
565 array_trunc(&io_evcache);
571 for (i = 0; i < ret; i++) {
572 io_debug("dispatch_kqueue: fd, kev.flags", (int)kev[i].ident, kev[i].flags);
573 if (kev[i].flags & (EV_EOF|EV_ERROR)) {
574 if (kev[i].flags & EV_ERROR)
575 Log(LOG_ERR, "kevent fd %d: EV_ERROR (%s)",
576 (int)kev[i].ident, strerror((int)kev[i].data));
577 io_docallback((int)kev[i].ident, IO_ERROR);
581 switch (kev[i].filter) {
583 io_docallback((int)kev[i].ident, IO_WANTREAD);
586 io_docallback((int)kev[i].ident, IO_WANTWRITE);
589 LogDebug("Unknown kev.filter number %d for fd %d",
590 kev[i].filter, kev[i].ident);
593 io_docallback((int)kev[i].ident, IO_ERROR);
599 } while (ret == 100);
605 io_library_init_kqueue(unsigned int eventsize)
607 io_masterfd = kqueue();
610 "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
611 eventsize, io_masterfd);
612 if (io_masterfd >= 0)
613 library_initialized = true;
616 static inline void io_library_init_kqueue(unsigned int UNUSED ev) {/* NOTHING */}
621 io_library_init(unsigned int eventsize)
623 if (library_initialized)
628 "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
630 if (eventsize >= FD_SETSIZE)
631 eventsize = FD_SETSIZE - 1;
632 #endif /* FD_SETSIZE */
633 #endif /* IO_USE_SELECT */
634 if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
637 io_library_init_epoll(eventsize);
638 io_library_init_kqueue(eventsize);
639 io_library_init_devpoll(eventsize);
640 io_library_init_poll(eventsize);
641 io_library_init_select(eventsize);
643 return library_initialized;
648 io_library_shutdown(void)
654 #if defined(IO_USE_EPOLL) || defined(IO_USE_KQUEUE) || defined(IO_USE_DEVPOLL)
655 if (io_masterfd >= 0)
660 array_free(&io_evcache);
662 library_initialized = false;
667 io_event_setcb(int fd, void (*cbfunc) (int, short))
669 io_event *i = io_event_get(fd);
673 i->callback = cbfunc;
679 backend_create_ev(int fd, short what)
682 #ifdef IO_USE_DEVPOLL
683 ret = io_event_change_devpoll(fd, what);
686 ret = io_event_change_poll(fd, what);
689 ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
692 ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
696 ret = io_event_add(fd, what);
703 io_event_create(int fd, short what, void (*cbfunc) (int, short))
709 #if defined(IO_USE_SELECT) && defined(FD_SETSIZE)
710 if (fd >= FD_SETSIZE) {
712 "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
717 i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
720 "array_alloc failed: could not allocate space for %d io_event structures",
725 i->callback = cbfunc;
727 ret = backend_create_ev(fd, what);
735 io_event_add(int fd, short what)
737 io_event *i = io_event_get(fd);
739 if (!i) return false;
741 if ((i->what & what) == what) /* event type is already registered */
744 io_debug("io_event_add: fd, what", fd, what);
748 if (io_masterfd >= 0)
749 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
752 return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
754 #ifdef IO_USE_DEVPOLL
755 return io_event_change_devpoll(fd, i->what);
758 return io_event_change_poll(fd, i->what);
761 if (fd > select_maxfd)
764 if (what & IO_WANTREAD)
765 FD_SET(fd, &readers);
766 if (what & IO_WANTWRITE)
767 FD_SET(fd, &writers);
776 io_setnonblock(int fd)
778 int flags = fcntl(fd, F_GETFL);
782 #define O_NONBLOCK O_NDELAY
786 return fcntl(fd, F_SETFL, flags) == 0;
795 i = io_event_get(fd);
797 if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
798 io_event_kqueue_commit_cache();
800 /* both kqueue and epoll remove fd from all sets automatically on the last close
801 * of the descriptor. since we don't know if this is the last close we'll have
802 * to remove the set explicitly. */
804 io_event_change_kqueue(fd, i->what, EV_DELETE);
805 io_event_kqueue_commit_cache();
808 io_close_devpoll(fd);
812 io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
818 return close(fd) == 0;
823 io_event_del(int fd, short what)
825 io_event *i = io_event_get(fd);
827 io_debug("io_event_del: trying to delete eventtype; fd, what", fd, what);
828 if (!i) return false;
830 if (!(i->what & what)) /* event is already disabled */
834 #ifdef IO_USE_DEVPOLL
835 return io_event_change_devpoll(fd, i->what);
838 return io_event_change_poll(fd, i->what);
841 if (io_masterfd >= 0)
842 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
845 return io_event_change_kqueue(fd, what, EV_DISABLE);
848 if (what & IO_WANTWRITE)
849 FD_CLR(fd, &writers);
851 if (what & IO_WANTREAD)
852 FD_CLR(fd, &readers);
860 io_dispatch(struct timeval *tv)
863 if (io_masterfd >= 0)
864 return io_dispatch_epoll(tv);
867 return io_dispatch_select(tv);
870 return io_dispatch_kqueue(tv);
872 #ifdef IO_USE_DEVPOLL
873 return io_dispatch_devpoll(tv);
876 return io_dispatch_poll(tv);
882 /* call the callback function inside the struct matching fd */
884 io_docallback(int fd, short what)
886 io_event *i = io_event_get(fd);
888 io_debug("io_docallback; fd, what", fd, what);
890 if (i->callback) { /* callback might be NULL if a previous callback function
891 called io_close on this fd */
892 i->callback(fd, (what & IO_ERROR) ? i->what : what);
894 /* if error indicator is set, we return the event(s) that were registered */