2 * Copyright (c) 2007-2010 Niels Provos and Nick Mathewson
3 * Copyright (c) 2002-2006 Niels Provos <provos@citi.umich.edu>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/types.h>
31 #include "event2/event-config.h"
33 #ifdef _EVENT_HAVE_SYS_TIME_H
41 #ifdef _EVENT_HAVE_STDARG_H
44 #ifdef _EVENT_HAVE_UNISTD_H
53 #ifdef _EVENT_HAVE_SYS_SOCKET_H
54 #include <sys/socket.h>
56 #ifdef _EVENT_HAVE_NETINET_IN_H
57 #include <netinet/in.h>
59 #ifdef _EVENT_HAVE_NETINET_IN6_H
60 #include <netinet/in6.h>
63 #include "event2/util.h"
64 #include "event2/bufferevent.h"
65 #include "event2/buffer.h"
66 #include "event2/bufferevent_struct.h"
67 #include "event2/bufferevent_compat.h"
68 #include "event2/event.h"
69 #include "log-internal.h"
70 #include "mm-internal.h"
71 #include "bufferevent-internal.h"
72 #include "util-internal.h"
74 #include "iocp-internal.h"
78 static int be_socket_enable(struct bufferevent *, short);
79 static int be_socket_disable(struct bufferevent *, short);
80 static void be_socket_destruct(struct bufferevent *);
81 static int be_socket_adj_timeouts(struct bufferevent *);
82 static int be_socket_flush(struct bufferevent *, short, enum bufferevent_flush_mode);
83 static int be_socket_ctrl(struct bufferevent *, enum bufferevent_ctrl_op, union bufferevent_ctrl_data *);
85 static void be_socket_setfd(struct bufferevent *, evutil_socket_t);
87 const struct bufferevent_ops bufferevent_ops_socket = {
89 evutil_offsetof(struct bufferevent_private, bev),
93 be_socket_adj_timeouts,
98 #define be_socket_add(ev, t) \
99 _bufferevent_add_event((ev), (t))
102 bufferevent_socket_outbuf_cb(struct evbuffer *buf,
103 const struct evbuffer_cb_info *cbinfo,
106 struct bufferevent *bufev = arg;
107 struct bufferevent_private *bufev_p =
108 EVUTIL_UPCAST(bufev, struct bufferevent_private, bev);
110 if (cbinfo->n_added &&
111 (bufev->enabled & EV_WRITE) &&
112 !event_pending(&bufev->ev_write, EV_WRITE, NULL) &&
113 !bufev_p->write_suspended) {
114 /* Somebody added data to the buffer, and we would like to
115 * write, and we were not writing. So, start writing. */
116 be_socket_add(&bufev->ev_write, &bufev->timeout_write);
117 /* XXXX handle failure from be_socket_add */
122 bufferevent_readcb(evutil_socket_t fd, short event, void *arg)
124 struct bufferevent *bufev = arg;
125 struct bufferevent_private *bufev_p =
126 EVUTIL_UPCAST(bufev, struct bufferevent_private, bev);
127 struct evbuffer *input;
129 short what = BEV_EVENT_READING;
130 ev_ssize_t howmuch = -1, readmax=-1;
132 _bufferevent_incref_and_lock(bufev);
134 if (event == EV_TIMEOUT) {
135 what |= BEV_EVENT_TIMEOUT;
139 input = bufev->input;
142 * If we have a high watermark configured then we don't want to
143 * read more data than would make us reach the watermark.
145 if (bufev->wm_read.high != 0) {
146 howmuch = bufev->wm_read.high - evbuffer_get_length(input);
147 /* we somehow lowered the watermark, stop reading */
149 bufferevent_wm_suspend_read(bufev);
153 readmax = _bufferevent_get_read_max(bufev_p);
154 if (howmuch < 0 || howmuch > readmax) /* The use of -1 for "unlimited"
155 * uglifies this code. XXXX */
157 if (bufev_p->read_suspended)
160 evbuffer_unfreeze(input, 0);
161 res = evbuffer_read(input, fd, (int)howmuch); /* XXXX evbuffer_read would do better to take and return ev_ssize_t */
162 evbuffer_freeze(input, 0);
165 int err = evutil_socket_geterror(fd);
166 if (EVUTIL_ERR_RW_RETRIABLE(err))
169 what |= BEV_EVENT_ERROR;
170 } else if (res == 0) {
172 what |= BEV_EVENT_EOF;
178 _bufferevent_decrement_read_buckets(bufev_p, res);
180 /* Invoke the user callback - must always be called last */
181 if (evbuffer_get_length(input) >= bufev->wm_read.low)
182 _bufferevent_run_readcb(bufev);
190 bufferevent_disable(bufev, EV_READ);
191 _bufferevent_run_eventcb(bufev, what);
194 _bufferevent_decref_and_unlock(bufev);
198 bufferevent_writecb(evutil_socket_t fd, short event, void *arg)
200 struct bufferevent *bufev = arg;
201 struct bufferevent_private *bufev_p =
202 EVUTIL_UPCAST(bufev, struct bufferevent_private, bev);
204 short what = BEV_EVENT_WRITING;
206 ev_ssize_t atmost = -1;
208 _bufferevent_incref_and_lock(bufev);
210 if (event == EV_TIMEOUT) {
211 what |= BEV_EVENT_TIMEOUT;
214 if (bufev_p->connecting) {
215 int c = evutil_socket_finished_connecting(fd);
216 /* we need to fake the error if the connection was refused
217 * immediately - usually connection to localhost on BSD */
218 if (bufev_p->connection_refused) {
219 bufev_p->connection_refused = 0;
226 bufev_p->connecting = 0;
228 event_del(&bufev->ev_write);
229 event_del(&bufev->ev_read);
230 _bufferevent_run_eventcb(bufev, BEV_EVENT_ERROR);
235 if (BEV_IS_ASYNC(bufev)) {
236 event_del(&bufev->ev_write);
237 bufferevent_async_set_connected(bufev);
238 _bufferevent_run_eventcb(bufev,
239 BEV_EVENT_CONNECTED);
243 _bufferevent_run_eventcb(bufev,
244 BEV_EVENT_CONNECTED);
245 if (!(bufev->enabled & EV_WRITE) ||
246 bufev_p->write_suspended) {
247 event_del(&bufev->ev_write);
253 atmost = _bufferevent_get_write_max(bufev_p);
255 if (bufev_p->write_suspended)
258 if (evbuffer_get_length(bufev->output)) {
259 evbuffer_unfreeze(bufev->output, 1);
260 res = evbuffer_write_atmost(bufev->output, fd, atmost);
261 evbuffer_freeze(bufev->output, 1);
263 int err = evutil_socket_geterror(fd);
264 if (EVUTIL_ERR_RW_RETRIABLE(err))
266 what |= BEV_EVENT_ERROR;
267 } else if (res == 0) {
269 XXXX Actually, a 0 on write doesn't indicate
270 an EOF. An ECONNRESET might be more typical.
272 what |= BEV_EVENT_EOF;
277 _bufferevent_decrement_write_buckets(bufev_p, res);
280 if (evbuffer_get_length(bufev->output) == 0) {
281 event_del(&bufev->ev_write);
285 * Invoke the user callback if our buffer is drained or below the
288 if ((res || !connected) &&
289 evbuffer_get_length(bufev->output) <= bufev->wm_write.low) {
290 _bufferevent_run_writecb(bufev);
296 if (evbuffer_get_length(bufev->output) == 0) {
297 event_del(&bufev->ev_write);
302 bufferevent_disable(bufev, EV_WRITE);
303 _bufferevent_run_eventcb(bufev, what);
306 _bufferevent_decref_and_unlock(bufev);
310 bufferevent_socket_new(struct event_base *base, evutil_socket_t fd,
313 struct bufferevent_private *bufev_p;
314 struct bufferevent *bufev;
317 if (base && event_base_get_iocp(base))
318 return bufferevent_async_new(base, fd, options);
321 if ((bufev_p = mm_calloc(1, sizeof(struct bufferevent_private)))== NULL)
324 if (bufferevent_init_common(bufev_p, base, &bufferevent_ops_socket,
329 bufev = &bufev_p->bev;
331 event_assign(&bufev->ev_read, bufev->ev_base, fd,
332 EV_READ|EV_PERSIST, bufferevent_readcb, bufev);
333 event_assign(&bufev->ev_write, bufev->ev_base, fd,
334 EV_WRITE|EV_PERSIST, bufferevent_writecb, bufev);
336 evbuffer_add_cb(bufev->output, bufferevent_socket_outbuf_cb, bufev);
338 evbuffer_freeze(bufev->input, 0);
339 evbuffer_freeze(bufev->output, 1);
345 bufferevent_socket_connect(struct bufferevent *bev,
346 struct sockaddr *sa, int socklen)
348 struct bufferevent_private *bufev_p =
349 EVUTIL_UPCAST(bev, struct bufferevent_private, bev);
356 _bufferevent_incref_and_lock(bev);
361 fd = bufferevent_getfd(bev);
365 fd = socket(sa->sa_family, SOCK_STREAM, 0);
368 if (evutil_make_socket_nonblocking(fd)<0)
374 if (bufferevent_async_can_connect(bev)) {
375 bufferevent_setfd(bev, fd);
376 r = bufferevent_async_connect(bev, fd, sa, socklen);
379 bufev_p->connecting = 1;
384 r = evutil_socket_connect(&fd, sa, socklen);
389 /* ConnectEx() isn't always around, even when IOCP is enabled.
390 * Here, we borrow the socket object's write handler to fall back
391 * on a non-blocking connect() when ConnectEx() is unavailable. */
392 if (BEV_IS_ASYNC(bev)) {
393 event_assign(&bev->ev_write, bev->ev_base, fd,
394 EV_WRITE|EV_PERSIST, bufferevent_writecb, bev);
397 bufferevent_setfd(bev, fd);
399 if (! be_socket_enable(bev, EV_WRITE)) {
400 bufev_p->connecting = 1;
405 /* The connect succeeded already. How very BSD of it. */
407 bufev_p->connecting = 1;
408 event_active(&bev->ev_write, EV_WRITE, 1);
410 /* The connect failed already. How very BSD of it. */
411 bufev_p->connection_refused = 1;
412 bufev_p->connecting = 1;
414 event_active(&bev->ev_write, EV_WRITE, 1);
420 _bufferevent_run_eventcb(bev, BEV_EVENT_ERROR);
422 evutil_closesocket(fd);
423 /* do something about the error? */
425 _bufferevent_decref_and_unlock(bev);
430 bufferevent_connect_getaddrinfo_cb(int result, struct evutil_addrinfo *ai,
433 struct bufferevent *bev = arg;
434 struct bufferevent_private *bev_p =
435 EVUTIL_UPCAST(bev, struct bufferevent_private, bev);
439 bufferevent_unsuspend_write(bev, BEV_SUSPEND_LOOKUP);
440 bufferevent_unsuspend_read(bev, BEV_SUSPEND_LOOKUP);
443 bev_p->dns_error = result;
444 _bufferevent_run_eventcb(bev, BEV_EVENT_ERROR);
445 _bufferevent_decref_and_unlock(bev);
447 evutil_freeaddrinfo(ai);
451 /* XXX use the other addrinfos? */
452 /* XXX use this return value */
453 r = bufferevent_socket_connect(bev, ai->ai_addr, (int)ai->ai_addrlen);
454 _bufferevent_decref_and_unlock(bev);
455 evutil_freeaddrinfo(ai);
459 bufferevent_socket_connect_hostname(struct bufferevent *bev,
460 struct evdns_base *evdns_base, int family, const char *hostname, int port)
463 struct evutil_addrinfo hint;
465 struct bufferevent_private *bev_p =
466 EVUTIL_UPCAST(bev, struct bufferevent_private, bev);
468 if (family != AF_INET && family != AF_INET6 && family != AF_UNSPEC)
470 if (port < 1 || port > 65535)
474 bev_p->dns_error = 0;
477 evutil_snprintf(portbuf, sizeof(portbuf), "%d", port);
479 memset(&hint, 0, sizeof(hint));
480 hint.ai_family = family;
481 hint.ai_protocol = IPPROTO_TCP;
482 hint.ai_socktype = SOCK_STREAM;
484 bufferevent_suspend_write(bev, BEV_SUSPEND_LOOKUP);
485 bufferevent_suspend_read(bev, BEV_SUSPEND_LOOKUP);
487 bufferevent_incref(bev);
488 err = evutil_getaddrinfo_async(evdns_base, hostname, portbuf,
489 &hint, bufferevent_connect_getaddrinfo_cb, bev);
494 bufferevent_unsuspend_write(bev, BEV_SUSPEND_LOOKUP);
495 bufferevent_unsuspend_read(bev, BEV_SUSPEND_LOOKUP);
501 bufferevent_socket_get_dns_error(struct bufferevent *bev)
504 struct bufferevent_private *bev_p =
505 EVUTIL_UPCAST(bev, struct bufferevent_private, bev);
508 rv = bev_p->dns_error;
515 * Create a new buffered event object.
517 * The read callback is invoked whenever we read new data.
518 * The write callback is invoked whenever the output buffer is drained.
519 * The error callback is invoked on a write/read error or on EOF.
521 * Both read and write callbacks maybe NULL. The error callback is not
522 * allowed to be NULL and have to be provided always.
526 bufferevent_new(evutil_socket_t fd,
527 bufferevent_data_cb readcb, bufferevent_data_cb writecb,
528 bufferevent_event_cb eventcb, void *cbarg)
530 struct bufferevent *bufev;
532 if (!(bufev = bufferevent_socket_new(NULL, fd, 0)))
535 bufferevent_setcb(bufev, readcb, writecb, eventcb, cbarg);
542 be_socket_enable(struct bufferevent *bufev, short event)
544 if (event & EV_READ) {
545 if (be_socket_add(&bufev->ev_read,&bufev->timeout_read) == -1)
548 if (event & EV_WRITE) {
549 if (be_socket_add(&bufev->ev_write,&bufev->timeout_write) == -1)
556 be_socket_disable(struct bufferevent *bufev, short event)
558 struct bufferevent_private *bufev_p =
559 EVUTIL_UPCAST(bufev, struct bufferevent_private, bev);
560 if (event & EV_READ) {
561 if (event_del(&bufev->ev_read) == -1)
564 /* Don't actually disable the write if we are trying to connect. */
565 if ((event & EV_WRITE) && ! bufev_p->connecting) {
566 if (event_del(&bufev->ev_write) == -1)
573 be_socket_destruct(struct bufferevent *bufev)
575 struct bufferevent_private *bufev_p =
576 EVUTIL_UPCAST(bufev, struct bufferevent_private, bev);
578 EVUTIL_ASSERT(bufev->be_ops == &bufferevent_ops_socket);
580 fd = event_get_fd(&bufev->ev_read);
582 event_del(&bufev->ev_read);
583 event_del(&bufev->ev_write);
585 if ((bufev_p->options & BEV_OPT_CLOSE_ON_FREE) && fd >= 0)
586 EVUTIL_CLOSESOCKET(fd);
590 be_socket_adj_timeouts(struct bufferevent *bufev)
593 if (event_pending(&bufev->ev_read, EV_READ, NULL))
594 if (be_socket_add(&bufev->ev_read, &bufev->timeout_read) < 0)
596 if (event_pending(&bufev->ev_write, EV_WRITE, NULL)) {
597 if (be_socket_add(&bufev->ev_write, &bufev->timeout_write) < 0)
604 be_socket_flush(struct bufferevent *bev, short iotype,
605 enum bufferevent_flush_mode mode)
612 be_socket_setfd(struct bufferevent *bufev, evutil_socket_t fd)
615 EVUTIL_ASSERT(bufev->be_ops == &bufferevent_ops_socket);
617 event_del(&bufev->ev_read);
618 event_del(&bufev->ev_write);
620 event_assign(&bufev->ev_read, bufev->ev_base, fd,
621 EV_READ|EV_PERSIST, bufferevent_readcb, bufev);
622 event_assign(&bufev->ev_write, bufev->ev_base, fd,
623 EV_WRITE|EV_PERSIST, bufferevent_writecb, bufev);
626 bufferevent_enable(bufev, bufev->enabled);
631 /* XXXX Should non-socket bufferevents support this? */
633 bufferevent_priority_set(struct bufferevent *bufev, int priority)
638 if (bufev->be_ops != &bufferevent_ops_socket)
641 if (event_priority_set(&bufev->ev_read, priority) == -1)
643 if (event_priority_set(&bufev->ev_write, priority) == -1)
652 /* XXXX Should non-socket bufferevents support this? */
654 bufferevent_base_set(struct event_base *base, struct bufferevent *bufev)
659 if (bufev->be_ops != &bufferevent_ops_socket)
662 bufev->ev_base = base;
664 res = event_base_set(base, &bufev->ev_read);
668 res = event_base_set(base, &bufev->ev_write);
675 be_socket_ctrl(struct bufferevent *bev, enum bufferevent_ctrl_op op,
676 union bufferevent_ctrl_data *data)
679 case BEV_CTRL_SET_FD:
680 be_socket_setfd(bev, data->fd);
682 case BEV_CTRL_GET_FD:
683 data->fd = event_get_fd(&bev->ev_read);
685 case BEV_CTRL_GET_UNDERLYING: