2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 * Please read the file COPYING, README and AUTHORS for more information.
8 * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
15 * I/O abstraction interface.
22 #include <sys/types.h>
30 /* Enables extra debug messages in event add/delete/callback code. */
31 /* #define DEBUG_IO */
35 void (*callback)(int, short);
42 #define INIT_IOEVENT { NULL, -1, 0, NULL }
45 #ifdef HAVE_EPOLL_CREATE
46 # define IO_USE_EPOLL 1
48 # define IO_USE_SELECT 1
52 # define IO_USE_KQUEUE 1
54 # ifdef HAVE_SYS_DEVPOLL_H
55 # define IO_USE_DEVPOLL 1
58 # define IO_USE_POLL 1
61 # define IO_USE_SELECT 1
63 # error "no IO API available!?"
64 # endif /* HAVE_SELECT */
65 # endif /* HAVE_POLL */
66 # endif /* HAVE_SYS_DEVPOLL_H */
67 # endif /* HAVE_KQUEUE */
68 #endif /* HAVE_EPOLL_CREATE */
70 static bool library_initialized = false;
73 #include <sys/epoll.h>
75 static int io_masterfd = -1;
76 static bool io_event_change_epoll(int fd, short what, const int action);
77 static int io_dispatch_epoll(struct timeval *tv);
81 #include <sys/types.h>
82 #include <sys/event.h>
83 static array io_evcache;
84 static int io_masterfd;
86 static int io_dispatch_kqueue(struct timeval *tv);
87 static bool io_event_change_kqueue(int, short, const int action);
94 static int poll_maxfd;
96 static bool io_event_change_poll PARAMS((int fd, short what));
100 #include <sys/devpoll.h>
101 static int io_masterfd;
103 static bool io_event_change_devpoll(int fd, short what);
107 #include "defines.h" /* for conn.h */
108 #include "proc.h" /* for PROC_STAT (needed by conf.h) */
109 #include "conn.h" /* for CONN_ID (needed by conf.h) */
110 #include "conf.h" /* for Conf_MaxConnections */
112 static fd_set readers;
113 static fd_set writers;
115 * this is the first argument for select(), i.e.
116 * the largest fd registered, plus one.
118 static int select_maxfd;
119 static int io_dispatch_select PARAMS((struct timeval *tv));
122 #define io_masterfd -1
124 #endif /* IO_USE_SELECT */
126 static array io_events;
128 static void io_docallback PARAMS((int fd, short what));
132 io_debug(const char *s, int fd, int what)
134 Log(LOG_DEBUG, "%s: %d, %d\n", s, fd, what);
138 io_debug(const char UNUSED *s,int UNUSED a, int UNUSED b)
149 i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
157 #ifdef IO_USE_DEVPOLL
159 io_dispatch_devpoll(struct timeval *tv)
162 time_t sec = tv->tv_sec * 1000;
163 int i, total, ret, timeout = tv->tv_usec + sec;
165 struct pollfd p[100];
172 dvp.dp_timeout = timeout;
175 ret = ioctl(io_masterfd, DP_POLL, &dvp);
179 for (i=0; i < ret ; i++) {
181 if (p[i].revents & (POLLIN|POLLPRI))
184 if (p[i].revents & POLLOUT)
185 what |= IO_WANTWRITE;
187 if (p[i].revents && !what) {
188 /* other flag is set, probably POLLERR */
191 io_docallback(p[i].fd, what);
193 } while (ret == 100);
200 io_event_change_devpoll(int fd, short what)
206 if (what & IO_WANTREAD)
207 p.events = POLLIN | POLLPRI;
208 if (what & IO_WANTWRITE)
212 return write(io_masterfd, &p, sizeof p) == (ssize_t)sizeof p;
216 io_close_devpoll(int fd)
219 p.events = POLLREMOVE;
221 write(io_masterfd, &p, sizeof p);
225 io_library_init_devpoll(unsigned int eventsize)
227 io_masterfd = open("/dev/poll", O_RDWR);
228 if (io_masterfd >= 0)
229 library_initialized = true;
230 Log(LOG_INFO, "IO subsystem: /dev/poll (initial maxfd %u, masterfd %d).",
231 eventsize, io_masterfd);
235 io_close_devpoll(int UNUSED x)
238 io_library_init_devpoll(unsigned int UNUSED ev)
245 io_dispatch_poll(struct timeval *tv)
247 time_t sec = tv->tv_sec * 1000;
248 int i, ret, timeout = tv->tv_usec + sec;
251 struct pollfd *p = array_start(&pollfds);
256 ret = poll(p, poll_maxfd + 1, timeout);
261 for (i=0; i <= poll_maxfd; i++) {
263 if (p[i].revents & (POLLIN|POLLPRI))
266 if (p[i].revents & POLLOUT)
267 what |= IO_WANTWRITE;
269 if (p[i].revents && !what) {
270 /* other flag is set, probably POLLERR */
275 io_docallback(i, what);
285 io_event_change_poll(int fd, short what)
290 if (what & IO_WANTREAD)
291 events = POLLIN | POLLPRI;
292 if (what & IO_WANTWRITE)
295 p = array_alloc(&pollfds, sizeof *p, fd);
306 io_close_poll(int fd)
309 p = array_get(&pollfds, sizeof *p, fd);
313 if (fd == poll_maxfd) {
314 while (poll_maxfd > 0) {
316 p = array_get(&pollfds, sizeof *p, poll_maxfd);
324 io_library_init_poll(unsigned int eventsize)
327 array_init(&pollfds);
329 Log(LOG_INFO, "IO subsystem: poll (initial maxfd %u).",
331 p = array_alloc(&pollfds, sizeof(struct pollfd), eventsize);
334 p = array_start(&pollfds);
335 for (i = 0; i < eventsize; i++)
338 library_initialized = true;
343 io_close_poll(int UNUSED x)
346 io_library_init_poll(unsigned int UNUSED ev)
353 io_dispatch_select(struct timeval *tv)
361 readers_tmp = readers;
362 writers_tmp = writers;
364 ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
370 for (i = 0; i <= select_maxfd; i++) {
372 if (FD_ISSET(i, &readers_tmp)) {
377 if (FD_ISSET(i, &writers_tmp)) {
378 what |= IO_WANTWRITE;
382 io_docallback(i, what);
391 io_library_init_select(unsigned int eventsize)
393 if (library_initialized)
395 Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
400 if (Conf_MaxConnections >= (int)FD_SETSIZE) {
402 "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
403 Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
405 Conf_MaxConnections = FD_SETSIZE - 1;
409 "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
410 #endif /* FD_SETSIZE */
411 library_initialized = true;
415 io_close_select(int fd)
419 if (io_masterfd >= 0) /* Are we using epoll()? */
422 FD_CLR(fd, &writers);
423 FD_CLR(fd, &readers);
425 i = io_event_get(fd);
428 if (fd == select_maxfd) {
429 while (select_maxfd>0) {
430 --select_maxfd; /* find largest fd */
431 i = io_event_get(select_maxfd);
432 if (i && i->callback) break;
438 io_library_init_select(int UNUSED x)
441 io_close_select(int UNUSED x)
448 io_event_change_epoll(int fd, short what, const int action)
450 struct epoll_event ev = { 0, {0} };
453 if (what & IO_WANTREAD)
454 ev.events = EPOLLIN | EPOLLPRI;
455 if (what & IO_WANTWRITE)
456 ev.events |= EPOLLOUT;
458 return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
462 io_dispatch_epoll(struct timeval *tv)
464 time_t sec = tv->tv_sec * 1000;
465 int i, total = 0, ret, timeout = tv->tv_usec + sec;
466 struct epoll_event epoll_ev[100];
473 ret = epoll_wait(io_masterfd, epoll_ev, 100, timeout);
478 for (i = 0; i < ret; i++) {
480 if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
483 if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
486 if (epoll_ev[i].events & EPOLLOUT)
487 type |= IO_WANTWRITE;
489 io_docallback(epoll_ev[i].data.fd, type);
493 } while (ret == 100);
499 io_library_init_epoll(unsigned int eventsize)
501 int ecreate_hint = (int)eventsize;
502 if (ecreate_hint <= 0)
504 io_masterfd = epoll_create(ecreate_hint);
505 if (io_masterfd >= 0) {
506 library_initialized = true;
508 "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
509 ecreate_hint, eventsize, io_masterfd);
513 Log(LOG_INFO, "Can't initialize epoll() IO interface, falling back to select() ...");
518 io_library_init_epoll(unsigned int UNUSED ev)
520 #endif /* IO_USE_EPOLL */
525 io_event_kqueue_commit_cache(void)
527 struct kevent *events;
529 int len = (int) array_length(&io_evcache, sizeof (struct kevent));
531 if (!len) /* nothing to do */
537 array_free(&io_evcache);
541 events = array_start(&io_evcache);
543 assert(events != NULL);
545 ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
547 array_trunc(&io_evcache);
552 io_event_change_kqueue(int fd, short what, const int action)
557 if (what & IO_WANTREAD) {
558 EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
559 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
561 ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
564 if (ret && (what & IO_WANTWRITE)) {
565 EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
566 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
568 ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
571 if (array_length(&io_evcache, sizeof kev) >= 100)
572 io_event_kqueue_commit_cache();
577 io_dispatch_kqueue(struct timeval *tv)
579 int i, total = 0, ret;
580 struct kevent kev[100];
581 struct kevent *newevents;
584 ts.tv_sec = tv->tv_sec;
585 ts.tv_nsec = tv->tv_usec * 1000;
588 newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
589 newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
590 assert(newevents_len >= 0);
592 ret = kevent(io_masterfd, newevents, newevents_len, kev, 100, &ts);
593 if (newevents && ret != -1)
594 array_trunc(&io_evcache);
600 for (i = 0; i < ret; i++) {
601 io_debug("dispatch_kqueue: fd, kev.flags", (int)kev[i].ident, kev[i].flags);
602 if (kev[i].flags & (EV_EOF|EV_ERROR)) {
603 if (kev[i].flags & EV_ERROR)
604 Log(LOG_ERR, "kevent fd %d: EV_ERROR (%s)",
605 (int)kev[i].ident, strerror((int)kev[i].data));
606 io_docallback((int)kev[i].ident, IO_ERROR);
610 switch (kev[i].filter) {
612 io_docallback((int)kev[i].ident, IO_WANTREAD);
615 io_docallback((int)kev[i].ident, IO_WANTWRITE);
618 LogDebug("Unknown kev.filter number %d for fd %d",
619 kev[i].filter, kev[i].ident);
622 io_docallback((int)kev[i].ident, IO_ERROR);
628 } while (ret == 100);
634 io_library_init_kqueue(unsigned int eventsize)
636 io_masterfd = kqueue();
639 "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
640 eventsize, io_masterfd);
641 if (io_masterfd >= 0)
642 library_initialized = true;
646 io_library_init_kqueue(unsigned int UNUSED ev)
652 io_library_init(unsigned int eventsize)
654 if (library_initialized)
657 if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
660 io_library_init_epoll(eventsize);
661 io_library_init_kqueue(eventsize);
662 io_library_init_devpoll(eventsize);
663 io_library_init_poll(eventsize);
664 io_library_init_select(eventsize);
666 return library_initialized;
671 io_library_shutdown(void)
677 #if defined(IO_USE_EPOLL) || defined(IO_USE_KQUEUE) || defined(IO_USE_DEVPOLL)
678 if (io_masterfd >= 0)
683 array_free(&io_evcache);
685 library_initialized = false;
690 io_event_setcb(int fd, void (*cbfunc) (int, short))
692 io_event *i = io_event_get(fd);
696 i->callback = cbfunc;
702 backend_create_ev(int fd, short what)
705 #ifdef IO_USE_DEVPOLL
706 ret = io_event_change_devpoll(fd, what);
709 ret = io_event_change_poll(fd, what);
712 ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
715 ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
719 ret = io_event_add(fd, what);
726 io_event_create(int fd, short what, void (*cbfunc) (int, short))
732 #if defined(IO_USE_SELECT) && defined(FD_SETSIZE)
733 if (io_masterfd < 0 && fd >= FD_SETSIZE) {
735 "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
740 i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
743 "array_alloc failed: could not allocate space for %d io_event structures",
748 i->callback = cbfunc;
750 ret = backend_create_ev(fd, what);
758 io_event_add(int fd, short what)
760 io_event *i = io_event_get(fd);
762 if (!i) return false;
764 if ((i->what & what) == what) /* event type is already registered */
767 io_debug("io_event_add: fd, what", fd, what);
771 if (io_masterfd >= 0)
772 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
775 return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
777 #ifdef IO_USE_DEVPOLL
778 return io_event_change_devpoll(fd, i->what);
781 return io_event_change_poll(fd, i->what);
784 if (fd > select_maxfd)
787 if (what & IO_WANTREAD)
788 FD_SET(fd, &readers);
789 if (what & IO_WANTWRITE)
790 FD_SET(fd, &writers);
799 io_setnonblock(int fd)
801 int flags = fcntl(fd, F_GETFL);
805 #define O_NONBLOCK O_NDELAY
809 return fcntl(fd, F_SETFL, flags) == 0;
813 io_setcloexec(int fd)
815 int flags = fcntl(fd, F_GETFD);
822 return fcntl(fd, F_SETFD, flags) == 0;
830 i = io_event_get(fd);
832 if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
833 io_event_kqueue_commit_cache();
835 /* both kqueue and epoll remove fd from all sets automatically on the last close
836 * of the descriptor. since we don't know if this is the last close we'll have
837 * to remove the set explicitly. */
839 io_event_change_kqueue(fd, i->what, EV_DELETE);
840 io_event_kqueue_commit_cache();
843 io_close_devpoll(fd);
847 io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
853 return close(fd) == 0;
858 io_event_del(int fd, short what)
860 io_event *i = io_event_get(fd);
862 io_debug("io_event_del: trying to delete eventtype; fd, what", fd, what);
863 if (!i) return false;
865 if (!(i->what & what)) /* event is already disabled */
869 #ifdef IO_USE_DEVPOLL
870 return io_event_change_devpoll(fd, i->what);
873 return io_event_change_poll(fd, i->what);
876 if (io_masterfd >= 0)
877 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
880 return io_event_change_kqueue(fd, what, EV_DISABLE);
883 if (what & IO_WANTWRITE)
884 FD_CLR(fd, &writers);
886 if (what & IO_WANTREAD)
887 FD_CLR(fd, &readers);
895 io_dispatch(struct timeval *tv)
898 if (io_masterfd >= 0)
899 return io_dispatch_epoll(tv);
902 return io_dispatch_select(tv);
905 return io_dispatch_kqueue(tv);
907 #ifdef IO_USE_DEVPOLL
908 return io_dispatch_devpoll(tv);
911 return io_dispatch_poll(tv);
917 /* call the callback function inside the struct matching fd */
919 io_docallback(int fd, short what)
921 io_event *i = io_event_get(fd);
923 io_debug("io_docallback; fd, what", fd, what);
925 if (i->callback) { /* callback might be NULL if a previous callback function
926 called io_close on this fd */
927 i->callback(fd, (what & IO_ERROR) ? i->what : what);
929 /* if error indicator is set, we return the event(s) that were registered */