2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 * Please read the file COPYING, README and AUTHORS for more information.
8 * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
15 * I/O abstraction interface.
22 #include <sys/types.h>
30 /* Enables extra debug messages in event add/delete/callback code. */
31 /* #define DEBUG_IO */
35 void (*callback)(int, short);
42 #define INIT_IOEVENT { NULL, -1, 0, NULL }
44 #define MAX_EVENTS 100
46 #ifdef HAVE_EPOLL_CREATE
47 # define IO_USE_EPOLL 1
49 # define IO_USE_SELECT 1
53 # define IO_USE_KQUEUE 1
55 # ifdef HAVE_SYS_DEVPOLL_H
56 # define IO_USE_DEVPOLL 1
58 # if defined(HAVE_POLL) && defined(HAVE_POLL_H)
59 # define IO_USE_POLL 1
62 # define IO_USE_SELECT 1
64 # error "no IO API available!?"
65 # endif /* HAVE_SELECT */
66 # endif /* HAVE_POLL */
67 # endif /* HAVE_SYS_DEVPOLL_H */
68 # endif /* HAVE_KQUEUE */
69 #endif /* HAVE_EPOLL_CREATE */
71 static bool library_initialized = false;
74 #include <sys/epoll.h>
76 static int io_masterfd = -1;
77 static bool io_event_change_epoll(int fd, short what, const int action);
78 static int io_dispatch_epoll(struct timeval *tv);
82 #include <sys/types.h>
83 #include <sys/event.h>
84 static array io_evcache;
85 static int io_masterfd;
87 static int io_dispatch_kqueue(struct timeval *tv);
88 static bool io_event_change_kqueue(int, short, const int action);
95 static int poll_maxfd;
97 static bool io_event_change_poll PARAMS((int fd, short what));
100 #ifdef IO_USE_DEVPOLL
101 #include <sys/devpoll.h>
102 static int io_masterfd;
104 static bool io_event_change_devpoll(int fd, short what);
108 #include "defines.h" /* for conn.h */
109 #include "proc.h" /* for PROC_STAT (needed by conf.h) */
110 #include "conn.h" /* for CONN_ID (needed by conf.h) */
111 #include "conf.h" /* for Conf_MaxConnections */
113 static fd_set readers;
114 static fd_set writers;
116 * this is the first argument for select(), i.e.
117 * the largest fd registered, plus one.
119 static int select_maxfd;
120 static int io_dispatch_select PARAMS((struct timeval *tv));
123 #define io_masterfd -1
125 #endif /* IO_USE_SELECT */
127 static array io_events;
129 static void io_docallback PARAMS((int fd, short what));
133 io_debug(const char *s, int fd, int what)
135 Log(LOG_DEBUG, "%s: %d, %d\n", s, fd, what);
139 io_debug(const char UNUSED *s,int UNUSED a, int UNUSED b)
150 i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
158 #ifdef IO_USE_DEVPOLL
160 io_dispatch_devpoll(struct timeval *tv)
163 time_t sec = tv->tv_sec * 1000;
164 int i, ret, timeout = tv->tv_usec + sec;
166 struct pollfd p[MAX_EVENTS];
171 dvp.dp_timeout = timeout;
172 dvp.dp_nfds = MAX_EVENTS;
174 ret = ioctl(io_masterfd, DP_POLL, &dvp);
176 for (i=0; i < ret ; i++) {
178 if (p[i].revents & (POLLIN|POLLPRI))
181 if (p[i].revents & POLLOUT)
182 what |= IO_WANTWRITE;
184 if (p[i].revents && !what) {
185 /* other flag is set, probably POLLERR */
188 io_docallback(p[i].fd, what);
196 io_event_change_devpoll(int fd, short what)
202 if (what & IO_WANTREAD)
203 p.events = POLLIN | POLLPRI;
204 if (what & IO_WANTWRITE)
208 return write(io_masterfd, &p, sizeof p) == (ssize_t)sizeof p;
212 io_close_devpoll(int fd)
215 p.events = POLLREMOVE;
217 write(io_masterfd, &p, sizeof p);
221 io_library_init_devpoll(unsigned int eventsize)
223 io_masterfd = open("/dev/poll", O_RDWR);
224 if (io_masterfd >= 0)
225 library_initialized = true;
226 Log(LOG_INFO, "IO subsystem: /dev/poll (initial maxfd %u, masterfd %d).",
227 eventsize, io_masterfd);
231 io_close_devpoll(int UNUSED x)
234 io_library_init_devpoll(unsigned int UNUSED ev)
241 io_dispatch_poll(struct timeval *tv)
243 time_t sec = tv->tv_sec * 1000;
244 int i, ret, timeout = tv->tv_usec + sec;
247 struct pollfd *p = array_start(&pollfds);
252 ret = poll(p, poll_maxfd + 1, timeout);
257 for (i=0; i <= poll_maxfd; i++) {
259 if (p[i].revents & (POLLIN|POLLPRI))
262 if (p[i].revents & POLLOUT)
263 what |= IO_WANTWRITE;
265 if (p[i].revents && !what) {
266 /* other flag is set, probably POLLERR */
271 io_docallback(i, what);
281 io_event_change_poll(int fd, short what)
286 if (what & IO_WANTREAD)
287 events = POLLIN | POLLPRI;
288 if (what & IO_WANTWRITE)
291 p = array_alloc(&pollfds, sizeof *p, fd);
302 io_close_poll(int fd)
305 p = array_get(&pollfds, sizeof *p, fd);
309 if (fd == poll_maxfd) {
310 while (poll_maxfd > 0) {
312 p = array_get(&pollfds, sizeof *p, poll_maxfd);
320 io_library_init_poll(unsigned int eventsize)
323 array_init(&pollfds);
325 Log(LOG_INFO, "IO subsystem: poll (initial maxfd %u).",
327 p = array_alloc(&pollfds, sizeof(struct pollfd), eventsize);
330 p = array_start(&pollfds);
331 for (i = 0; i < eventsize; i++)
334 library_initialized = true;
339 io_close_poll(int UNUSED x)
342 io_library_init_poll(unsigned int UNUSED ev)
349 io_dispatch_select(struct timeval *tv)
357 readers_tmp = readers;
358 writers_tmp = writers;
360 ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
366 for (i = 0; i <= select_maxfd; i++) {
368 if (FD_ISSET(i, &readers_tmp)) {
373 if (FD_ISSET(i, &writers_tmp)) {
374 what |= IO_WANTWRITE;
378 io_docallback(i, what);
387 io_library_init_select(unsigned int eventsize)
389 if (library_initialized)
391 Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
396 if (Conf_MaxConnections >= (int)FD_SETSIZE) {
398 "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
399 Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
401 Conf_MaxConnections = FD_SETSIZE - 1;
405 "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
406 #endif /* FD_SETSIZE */
407 library_initialized = true;
411 io_close_select(int fd)
415 if (io_masterfd >= 0) /* Are we using epoll()? */
418 FD_CLR(fd, &writers);
419 FD_CLR(fd, &readers);
421 i = io_event_get(fd);
424 if (fd == select_maxfd) {
425 while (select_maxfd>0) {
426 --select_maxfd; /* find largest fd */
427 i = io_event_get(select_maxfd);
428 if (i && i->callback) break;
434 io_library_init_select(int UNUSED x)
437 io_close_select(int UNUSED x)
444 io_event_change_epoll(int fd, short what, const int action)
446 struct epoll_event ev = { 0, {0} };
449 if (what & IO_WANTREAD)
450 ev.events = EPOLLIN | EPOLLPRI;
451 if (what & IO_WANTWRITE)
452 ev.events |= EPOLLOUT;
454 return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
458 io_dispatch_epoll(struct timeval *tv)
460 time_t sec = tv->tv_sec * 1000;
461 int i, ret, timeout = tv->tv_usec + sec;
462 struct epoll_event epoll_ev[MAX_EVENTS];
468 ret = epoll_wait(io_masterfd, epoll_ev, MAX_EVENTS, timeout);
470 for (i = 0; i < ret; i++) {
472 if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
475 if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
478 if (epoll_ev[i].events & EPOLLOUT)
479 type |= IO_WANTWRITE;
481 io_docallback(epoll_ev[i].data.fd, type);
488 io_library_init_epoll(unsigned int eventsize)
490 int ecreate_hint = (int)eventsize;
491 if (ecreate_hint <= 0)
493 io_masterfd = epoll_create(ecreate_hint);
494 if (io_masterfd >= 0) {
495 library_initialized = true;
497 "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
498 ecreate_hint, eventsize, io_masterfd);
502 Log(LOG_INFO, "Can't initialize epoll() IO interface, falling back to select() ...");
507 io_library_init_epoll(unsigned int UNUSED ev)
509 #endif /* IO_USE_EPOLL */
514 io_event_kqueue_commit_cache(void)
516 struct kevent *events;
518 int len = (int) array_length(&io_evcache, sizeof (struct kevent));
520 if (!len) /* nothing to do */
526 array_free(&io_evcache);
530 events = array_start(&io_evcache);
532 assert(events != NULL);
534 ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
536 array_trunc(&io_evcache);
541 io_event_change_kqueue(int fd, short what, const int action)
546 if (what & IO_WANTREAD) {
547 EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
548 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
550 ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
553 if (ret && (what & IO_WANTWRITE)) {
554 EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
555 ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
557 ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
560 if (array_length(&io_evcache, sizeof kev) >= 100)
561 io_event_kqueue_commit_cache();
566 io_dispatch_kqueue(struct timeval *tv)
569 struct kevent kev[MAX_EVENTS];
570 struct kevent *newevents;
573 ts.tv_sec = tv->tv_sec;
574 ts.tv_nsec = tv->tv_usec * 1000;
576 newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
577 newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
578 assert(newevents_len >= 0);
580 ret = kevent(io_masterfd, newevents, newevents_len, kev, MAX_EVENTS, &ts);
581 if (newevents && ret != -1)
582 array_trunc(&io_evcache);
584 for (i = 0; i < ret; i++) {
585 io_debug("dispatch_kqueue: fd, kev.flags", (int)kev[i].ident, kev[i].flags);
586 if (kev[i].flags & (EV_EOF|EV_ERROR)) {
587 if (kev[i].flags & EV_ERROR)
588 Log(LOG_ERR, "kevent fd %d: EV_ERROR (%s)",
589 (int)kev[i].ident, strerror((int)kev[i].data));
590 io_docallback((int)kev[i].ident, IO_ERROR);
594 switch (kev[i].filter) {
596 io_docallback((int)kev[i].ident, IO_WANTREAD);
599 io_docallback((int)kev[i].ident, IO_WANTWRITE);
602 LogDebug("Unknown kev.filter number %d for fd %d",
603 kev[i].filter, kev[i].ident);
606 io_docallback((int)kev[i].ident, IO_ERROR);
615 io_library_init_kqueue(unsigned int eventsize)
617 io_masterfd = kqueue();
620 "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
621 eventsize, io_masterfd);
622 if (io_masterfd >= 0)
623 library_initialized = true;
627 io_library_init_kqueue(unsigned int UNUSED ev)
633 io_library_init(unsigned int eventsize)
635 if (library_initialized)
638 if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
641 io_library_init_epoll(eventsize);
642 io_library_init_kqueue(eventsize);
643 io_library_init_devpoll(eventsize);
644 io_library_init_poll(eventsize);
645 io_library_init_select(eventsize);
647 return library_initialized;
652 io_library_shutdown(void)
658 #if defined(IO_USE_EPOLL) || defined(IO_USE_KQUEUE) || defined(IO_USE_DEVPOLL)
659 if (io_masterfd >= 0)
664 array_free(&io_evcache);
666 library_initialized = false;
671 io_event_setcb(int fd, void (*cbfunc) (int, short))
673 io_event *i = io_event_get(fd);
677 i->callback = cbfunc;
683 backend_create_ev(int fd, short what)
686 #ifdef IO_USE_DEVPOLL
687 ret = io_event_change_devpoll(fd, what);
690 ret = io_event_change_poll(fd, what);
693 ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
696 ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
700 ret = io_event_add(fd, what);
707 io_event_create(int fd, short what, void (*cbfunc) (int, short))
713 #if defined(IO_USE_SELECT) && defined(FD_SETSIZE)
714 if (io_masterfd < 0 && fd >= FD_SETSIZE) {
716 "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
721 i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
724 "array_alloc failed: could not allocate space for %d io_event structures",
729 i->callback = cbfunc;
731 ret = backend_create_ev(fd, what);
739 io_event_add(int fd, short what)
741 io_event *i = io_event_get(fd);
743 if (!i) return false;
745 if ((i->what & what) == what) /* event type is already registered */
748 io_debug("io_event_add: fd, what", fd, what);
752 if (io_masterfd >= 0)
753 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
756 return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
758 #ifdef IO_USE_DEVPOLL
759 return io_event_change_devpoll(fd, i->what);
762 return io_event_change_poll(fd, i->what);
765 if (fd > select_maxfd)
768 if (what & IO_WANTREAD)
769 FD_SET(fd, &readers);
770 if (what & IO_WANTWRITE)
771 FD_SET(fd, &writers);
780 io_setnonblock(int fd)
782 int flags = fcntl(fd, F_GETFL);
786 #define O_NONBLOCK O_NDELAY
790 return fcntl(fd, F_SETFL, flags) == 0;
794 io_setcloexec(int fd)
796 int flags = fcntl(fd, F_GETFD);
803 return fcntl(fd, F_SETFD, flags) == 0;
811 i = io_event_get(fd);
813 if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
814 io_event_kqueue_commit_cache();
816 /* both kqueue and epoll remove fd from all sets automatically on the last close
817 * of the descriptor. since we don't know if this is the last close we'll have
818 * to remove the set explicitly. */
820 io_event_change_kqueue(fd, i->what, EV_DELETE);
821 io_event_kqueue_commit_cache();
824 io_close_devpoll(fd);
828 io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
834 return close(fd) == 0;
839 io_event_del(int fd, short what)
841 io_event *i = io_event_get(fd);
843 io_debug("io_event_del: trying to delete eventtype; fd, what", fd, what);
844 if (!i) return false;
846 if (!(i->what & what)) /* event is already disabled */
850 #ifdef IO_USE_DEVPOLL
851 return io_event_change_devpoll(fd, i->what);
854 return io_event_change_poll(fd, i->what);
857 if (io_masterfd >= 0)
858 return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
861 return io_event_change_kqueue(fd, what, EV_DISABLE);
864 if (what & IO_WANTWRITE)
865 FD_CLR(fd, &writers);
867 if (what & IO_WANTREAD)
868 FD_CLR(fd, &readers);
876 io_dispatch(struct timeval *tv)
879 if (io_masterfd >= 0)
880 return io_dispatch_epoll(tv);
883 return io_dispatch_select(tv);
886 return io_dispatch_kqueue(tv);
888 #ifdef IO_USE_DEVPOLL
889 return io_dispatch_devpoll(tv);
892 return io_dispatch_poll(tv);
898 /* call the callback function inside the struct matching fd */
900 io_docallback(int fd, short what)
902 io_event *i = io_event_get(fd);
904 io_debug("io_docallback; fd, what", fd, what);
906 if (i->callback) { /* callback might be NULL if a previous callback function
907 called io_close on this fd */
908 i->callback(fd, (what & IO_ERROR) ? i->what : what);
910 /* if error indicator is set, we return the event(s) that were registered */