]> arthur.barton.de Git - netatalk.git/blob - libevent/evdns.c
Add libevent
[netatalk.git] / libevent / evdns.c
1 /* $Id: evdns.c 6979 2006-08-04 18:31:13Z nickm $ */
2
3 /* The original version of this module was written by Adam Langley; for
4  * a history of modifications, check out the subversion logs.
5  *
6  * When editing this module, try to keep it re-mergeable by Adam.  Don't
7  * reformat the whitespace, add Tor dependencies, or so on.
8  *
9  * TODO:
10  *   - Support IPv6 and PTR records.
11  *   - Replace all externally visible magic numbers with #defined constants.
12  *   - Write documentation for APIs of all external functions.
13  */
14
15 /* Async DNS Library
16  * Adam Langley <agl@imperialviolet.org>
17  * http://www.imperialviolet.org/eventdns.html
18  * Public Domain code
19  *
20  * This software is Public Domain. To view a copy of the public domain dedication,
21  * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
22  * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
23  *
24  * I ask and expect, but do not require, that all derivative works contain an
25  * attribution similar to:
26  *      Parts developed by Adam Langley <agl@imperialviolet.org>
27  *
28  * You may wish to replace the word "Parts" with something else depending on
29  * the amount of original code.
30  *
31  * (Derivative works does not include programs which link against, run or include
32  * the source verbatim in their source distributions)
33  *
34  * Version: 0.1b
35  */
36
37 #include <sys/types.h>
38 #include "event2/event-config.h"
39
40 #ifndef _FORTIFY_SOURCE
41 #define _FORTIFY_SOURCE 3
42 #endif
43
44 #include <string.h>
45 #include <fcntl.h>
46 #ifdef _EVENT_HAVE_SYS_TIME_H
47 #include <sys/time.h>
48 #endif
49 #ifdef _EVENT_HAVE_STDINT_H
50 #include <stdint.h>
51 #endif
52 #include <stdlib.h>
53 #include <string.h>
54 #include <errno.h>
55 #ifdef _EVENT_HAVE_UNISTD_H
56 #include <unistd.h>
57 #endif
58 #include <limits.h>
59 #include <sys/stat.h>
60 #include <stdio.h>
61 #include <stdarg.h>
62 #ifdef WIN32
63 #include <winsock2.h>
64 #include <ws2tcpip.h>
65 #ifndef _WIN32_IE
66 #define _WIN32_IE 0x400
67 #endif
68 #include <shlobj.h>
69 #endif
70
71 #include "event2/dns.h"
72 #include "event2/dns_struct.h"
73 #include "event2/dns_compat.h"
74 #include "event2/util.h"
75 #include "event2/event.h"
76 #include "event2/event_struct.h"
77 #include "event2/thread.h"
78
79 #include "event2/bufferevent.h"
80 #include "event2/bufferevent_struct.h"
81 #include "bufferevent-internal.h"
82
83 #include "defer-internal.h"
84 #include "log-internal.h"
85 #include "mm-internal.h"
86 #include "strlcpy-internal.h"
87 #include "ipv6-internal.h"
88 #include "util-internal.h"
89 #include "evthread-internal.h"
90 #ifdef WIN32
91 #include <ctype.h>
92 #include <winsock2.h>
93 #include <windows.h>
94 #include <iphlpapi.h>
95 #include <io.h>
96 #else
97 #include <sys/socket.h>
98 #include <netinet/in.h>
99 #include <arpa/inet.h>
100 #endif
101
102 #ifdef _EVENT_HAVE_NETINET_IN6_H
103 #include <netinet/in6.h>
104 #endif
105
106 #define EVDNS_LOG_DEBUG 0
107 #define EVDNS_LOG_WARN 1
108 #define EVDNS_LOG_MSG 2
109
110 #ifndef HOST_NAME_MAX
111 #define HOST_NAME_MAX 255
112 #endif
113
114 #include <stdio.h>
115
116 #undef MIN
117 #define MIN(a,b) ((a)<(b)?(a):(b))
118
119 #define ASSERT_VALID_REQUEST(req) \
120         EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
121
122 #define u64 ev_uint64_t
123 #define u32 ev_uint32_t
124 #define u16 ev_uint16_t
125 #define u8  ev_uint8_t
126
127 /* maximum number of addresses from a single packet */
128 /* that we bother recording */
129 #define MAX_V4_ADDRS 32
130 #define MAX_V6_ADDRS 32
131
132
133 #define TYPE_A         EVDNS_TYPE_A
134 #define TYPE_CNAME     5
135 #define TYPE_PTR       EVDNS_TYPE_PTR
136 #define TYPE_AAAA      EVDNS_TYPE_AAAA
137
138 #define CLASS_INET     EVDNS_CLASS_INET
139
140 /* Persistent handle.  We keep this separate from 'struct request' since we
141  * need some object to last for as long as an evdns_request is outstanding so
142  * that it can be canceled, whereas a search request can lead to multiple
143  * 'struct request' instances being created over its lifetime. */
144 struct evdns_request {
145         struct request *current_req;
146         struct evdns_base *base;
147
148         int pending_cb; /* Waiting for its callback to be invoked; not
149                          * owned by event base any more. */
150
151         /* elements used by the searching code */
152         int search_index;
153         struct search_state *search_state;
154         char *search_origname;  /* needs to be free()ed */
155         int search_flags;
156 };
157
158 struct request {
159         u8 *request;  /* the dns packet data */
160         u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
161         unsigned int request_len;
162         int reissue_count;
163         int tx_count;  /* the number of times that this packet has been sent */
164         void *user_pointer;  /* the pointer given to us for this request */
165         evdns_callback_type user_callback;
166         struct nameserver *ns;  /* the server which we last sent it */
167
168         /* these objects are kept in a circular list */
169         struct request *next, *prev;
170
171         struct event timeout_event;
172
173         u16 trans_id;  /* the transaction id */
174         unsigned request_appended :1;   /* true if the request pointer is data which follows this struct */
175         unsigned transmit_me :1;  /* needs to be transmitted */
176
177         /* XXXX This is a horrible hack. */
178         char **put_cname_in_ptr; /* store the cname here if we get one. */
179
180         struct evdns_base *base;
181
182         struct evdns_request *handle;
183 };
184
185 struct reply {
186         unsigned int type;
187         unsigned int have_answer : 1;
188         union {
189                 struct {
190                         u32 addrcount;
191                         u32 addresses[MAX_V4_ADDRS];
192                 } a;
193                 struct {
194                         u32 addrcount;
195                         struct in6_addr addresses[MAX_V6_ADDRS];
196                 } aaaa;
197                 struct {
198                         char name[HOST_NAME_MAX];
199                 } ptr;
200         } data;
201 };
202
203 struct nameserver {
204         evutil_socket_t socket;  /* a connected UDP socket */
205         struct sockaddr_storage address;
206         ev_socklen_t addrlen;
207         int failed_times;  /* number of times which we have given this server a chance */
208         int timedout;  /* number of times in a row a request has timed out */
209         struct event event;
210         /* these objects are kept in a circular list */
211         struct nameserver *next, *prev;
212         struct event timeout_event;  /* used to keep the timeout for */
213                                      /* when we next probe this server. */
214                                      /* Valid if state == 0 */
215         /* Outstanding probe request for this nameserver, if any */
216         struct evdns_request *probe_request;
217         char state;  /* zero if we think that this server is down */
218         char choked;  /* true if we have an EAGAIN from this server's socket */
219         char write_waiting;  /* true if we are waiting for EV_WRITE events */
220         struct evdns_base *base;
221 };
222
223
224 /* Represents a local port where we're listening for DNS requests. Right now, */
225 /* only UDP is supported. */
226 struct evdns_server_port {
227         evutil_socket_t socket; /* socket we use to read queries and write replies. */
228         int refcnt; /* reference count. */
229         char choked; /* Are we currently blocked from writing? */
230         char closing; /* Are we trying to close this port, pending writes? */
231         evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
232         void *user_data; /* Opaque pointer passed to user_callback */
233         struct event event; /* Read/write event */
234         /* circular list of replies that we want to write. */
235         struct server_request *pending_replies;
236         struct event_base *event_base;
237
238 #ifndef _EVENT_DISABLE_THREAD_SUPPORT
239         void *lock;
240 #endif
241 };
242
243 /* Represents part of a reply being built.      (That is, a single RR.) */
244 struct server_reply_item {
245         struct server_reply_item *next; /* next item in sequence. */
246         char *name; /* name part of the RR */
247         u16 type; /* The RR type */
248         u16 class; /* The RR class (usually CLASS_INET) */
249         u32 ttl; /* The RR TTL */
250         char is_name; /* True iff data is a label */
251         u16 datalen; /* Length of data; -1 if data is a label */
252         void *data; /* The contents of the RR */
253 };
254
255 /* Represents a request that we've received as a DNS server, and holds */
256 /* the components of the reply as we're constructing it. */
257 struct server_request {
258         /* Pointers to the next and previous entries on the list of replies */
259         /* that we're waiting to write.  Only set if we have tried to respond */
260         /* and gotten EAGAIN. */
261         struct server_request *next_pending;
262         struct server_request *prev_pending;
263
264         u16 trans_id; /* Transaction id. */
265         struct evdns_server_port *port; /* Which port received this request on? */
266         struct sockaddr_storage addr; /* Where to send the response */
267         ev_socklen_t addrlen; /* length of addr */
268
269         int n_answer; /* how many answer RRs have been set? */
270         int n_authority; /* how many authority RRs have been set? */
271         int n_additional; /* how many additional RRs have been set? */
272
273         struct server_reply_item *answer; /* linked list of answer RRs */
274         struct server_reply_item *authority; /* linked list of authority RRs */
275         struct server_reply_item *additional; /* linked list of additional RRs */
276
277         /* Constructed response.  Only set once we're ready to send a reply. */
278         /* Once this is set, the RR fields are cleared, and no more should be set. */
279         char *response;
280         size_t response_len;
281
282         /* Caller-visible fields: flags, questions. */
283         struct evdns_server_request base;
284 };
285
286 struct evdns_base {
287         /* An array of n_req_heads circular lists for inflight requests.
288          * Each inflight request req is in req_heads[req->trans_id % n_req_heads].
289          */
290         struct request **req_heads;
291         /* A circular list of requests that we're waiting to send, but haven't
292          * sent yet because there are too many requests inflight */
293         struct request *req_waiting_head;
294         /* A circular list of nameservers. */
295         struct nameserver *server_head;
296         int n_req_heads;
297
298         struct event_base *event_base;
299
300         /* The number of good nameservers that we have */
301         int global_good_nameservers;
302
303         /* inflight requests are contained in the req_head list */
304         /* and are actually going out across the network */
305         int global_requests_inflight;
306         /* requests which aren't inflight are in the waiting list */
307         /* and are counted here */
308         int global_requests_waiting;
309
310         int global_max_requests_inflight;
311
312         struct timeval global_timeout;  /* 5 seconds by default */
313         int global_max_reissues;  /* a reissue occurs when we get some errors from the server */
314         int global_max_retransmits;  /* number of times we'll retransmit a request which timed out */
315         /* number of timeouts in a row before we consider this server to be down */
316         int global_max_nameserver_timeout;
317         /* true iff we will use the 0x20 hack to prevent poisoning attacks. */
318         int global_randomize_case;
319
320         /* The first time that a nameserver fails, how long do we wait before
321          * probing to see if it has returned?  */
322         struct timeval global_nameserver_probe_initial_timeout;
323
324         /** Port to bind to for outgoing DNS packets. */
325         struct sockaddr_storage global_outgoing_address;
326         /** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
327         ev_socklen_t global_outgoing_addrlen;
328
329         struct timeval global_getaddrinfo_allow_skew;
330
331         int getaddrinfo_ipv4_timeouts;
332         int getaddrinfo_ipv6_timeouts;
333         int getaddrinfo_ipv4_answered;
334         int getaddrinfo_ipv6_answered;
335
336         struct search_state *global_search_state;
337
338         TAILQ_HEAD(hosts_list, hosts_entry) hostsdb;
339
340 #ifndef _EVENT_DISABLE_THREAD_SUPPORT
341         void *lock;
342 #endif
343 };
344
345 struct hosts_entry {
346         TAILQ_ENTRY(hosts_entry) next;
347         union {
348                 struct sockaddr sa;
349                 struct sockaddr_in sin;
350                 struct sockaddr_in6 sin6;
351         } addr;
352         int addrlen;
353         char hostname[1];
354 };
355
356 static struct evdns_base *current_base = NULL;
357
358 struct evdns_base *
359 evdns_get_global_base(void)
360 {
361         return current_base;
362 }
363
364 /* Given a pointer to an evdns_server_request, get the corresponding */
365 /* server_request. */
366 #define TO_SERVER_REQUEST(base_ptr)                                     \
367         ((struct server_request*)                                       \
368           (((char*)(base_ptr) - evutil_offsetof(struct server_request, base))))
369
370 #define REQ_HEAD(base, id) ((base)->req_heads[id % (base)->n_req_heads])
371
372 static struct nameserver *nameserver_pick(struct evdns_base *base);
373 static void evdns_request_insert(struct request *req, struct request **head);
374 static void evdns_request_remove(struct request *req, struct request **head);
375 static void nameserver_ready_callback(evutil_socket_t fd, short events, void *arg);
376 static int evdns_transmit(struct evdns_base *base);
377 static int evdns_request_transmit(struct request *req);
378 static void nameserver_send_probe(struct nameserver *const ns);
379 static void search_request_finished(struct evdns_request *const);
380 static int search_try_next(struct evdns_request *const req);
381 static struct request *search_request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
382 static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
383 static u16 transaction_id_pick(struct evdns_base *base);
384 static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
385 static void request_submit(struct request *const req);
386
387 static int server_request_free(struct server_request *req);
388 static void server_request_free_answers(struct server_request *req);
389 static void server_port_free(struct evdns_server_port *port);
390 static void server_port_ready_callback(evutil_socket_t fd, short events, void *arg);
391 static int evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename);
392 static int evdns_base_set_option_impl(struct evdns_base *base,
393     const char *option, const char *val, int flags);
394 static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
395
396 static int strtoint(const char *const str);
397
398 #ifdef _EVENT_DISABLE_THREAD_SUPPORT
399 #define EVDNS_LOCK(base)  _EVUTIL_NIL_STMT
400 #define EVDNS_UNLOCK(base) _EVUTIL_NIL_STMT
401 #define ASSERT_LOCKED(base) _EVUTIL_NIL_STMT
402 #else
403 #define EVDNS_LOCK(base)                        \
404         EVLOCK_LOCK((base)->lock, 0)
405 #define EVDNS_UNLOCK(base)                      \
406         EVLOCK_UNLOCK((base)->lock, 0)
407 #define ASSERT_LOCKED(base)                     \
408         EVLOCK_ASSERT_LOCKED((base)->lock)
409 #endif
410
411 static void
412 default_evdns_log_fn(int warning, const char *buf)
413 {
414         if (warning == EVDNS_LOG_WARN)
415                 event_warnx("[evdns] %s", buf);
416         else if (warning == EVDNS_LOG_MSG)
417                 event_msgx("[evdns] %s", buf);
418         else
419                 event_debug(("[evdns] %s", buf));
420 }
421
422 static evdns_debug_log_fn_type evdns_log_fn = NULL;
423
424 void
425 evdns_set_log_fn(evdns_debug_log_fn_type fn)
426 {
427         evdns_log_fn = fn;
428 }
429
430 #ifdef __GNUC__
431 #define EVDNS_LOG_CHECK  __attribute__ ((format(printf, 2, 3)))
432 #else
433 #define EVDNS_LOG_CHECK
434 #endif
435
436 static void _evdns_log(int warn, const char *fmt, ...) EVDNS_LOG_CHECK;
437 static void
438 _evdns_log(int warn, const char *fmt, ...)
439 {
440         va_list args;
441         char buf[512];
442         if (!evdns_log_fn)
443                 return;
444         va_start(args,fmt);
445         evutil_vsnprintf(buf, sizeof(buf), fmt, args);
446         va_end(args);
447         if (evdns_log_fn) {
448                 if (warn == EVDNS_LOG_MSG)
449                         warn = EVDNS_LOG_WARN;
450                 evdns_log_fn(warn, buf);
451         } else {
452                 default_evdns_log_fn(warn, buf);
453         }
454
455 }
456
457 #define log _evdns_log
458
459 /* This walks the list of inflight requests to find the */
460 /* one with a matching transaction id. Returns NULL on */
461 /* failure */
462 static struct request *
463 request_find_from_trans_id(struct evdns_base *base, u16 trans_id) {
464         struct request *req = REQ_HEAD(base, trans_id);
465         struct request *const started_at = req;
466
467         ASSERT_LOCKED(base);
468
469         if (req) {
470                 do {
471                         if (req->trans_id == trans_id) return req;
472                         req = req->next;
473                 } while (req != started_at);
474         }
475
476         return NULL;
477 }
478
479 /* a libevent callback function which is called when a nameserver */
480 /* has gone down and we want to test if it has came back to life yet */
481 static void
482 nameserver_prod_callback(evutil_socket_t fd, short events, void *arg) {
483         struct nameserver *const ns = (struct nameserver *) arg;
484         (void)fd;
485         (void)events;
486
487         EVDNS_LOCK(ns->base);
488         nameserver_send_probe(ns);
489         EVDNS_UNLOCK(ns->base);
490 }
491
492 /* a libevent callback which is called when a nameserver probe (to see if */
493 /* it has come back to life) times out. We increment the count of failed_times */
494 /* and wait longer to send the next probe packet. */
495 static void
496 nameserver_probe_failed(struct nameserver *const ns) {
497         struct timeval timeout;
498         int i;
499
500         ASSERT_LOCKED(ns->base);
501         (void) evtimer_del(&ns->timeout_event);
502         if (ns->state == 1) {
503                 /* This can happen if the nameserver acts in a way which makes us mark */
504                 /* it as bad and then starts sending good replies. */
505                 return;
506         }
507
508 #define MAX_PROBE_TIMEOUT 3600
509 #define TIMEOUT_BACKOFF_FACTOR 3
510
511         memcpy(&timeout, &ns->base->global_nameserver_probe_initial_timeout,
512             sizeof(struct timeval));
513         for (i=ns->failed_times; i > 0 && timeout.tv_sec < MAX_PROBE_TIMEOUT; --i) {
514                 timeout.tv_sec *= TIMEOUT_BACKOFF_FACTOR;
515                 timeout.tv_usec *= TIMEOUT_BACKOFF_FACTOR;
516                 if (timeout.tv_usec > 1000000) {
517                         timeout.tv_sec += timeout.tv_usec / 1000000;
518                         timeout.tv_usec %= 1000000;
519                 }
520         }
521         if (timeout.tv_sec > MAX_PROBE_TIMEOUT) {
522                 timeout.tv_sec = MAX_PROBE_TIMEOUT;
523                 timeout.tv_usec = 0;
524         }
525
526         ns->failed_times++;
527
528         if (evtimer_add(&ns->timeout_event, &timeout) < 0) {
529                 char addrbuf[128];
530                 log(EVDNS_LOG_WARN,
531                     "Error from libevent when adding timer event for %s",
532                     evutil_format_sockaddr_port(
533                             (struct sockaddr *)&ns->address,
534                             addrbuf, sizeof(addrbuf)));
535         }
536 }
537
538 /* called when a nameserver has been deemed to have failed. For example, too */
539 /* many packets have timed out etc */
540 static void
541 nameserver_failed(struct nameserver *const ns, const char *msg) {
542         struct request *req, *started_at;
543         struct evdns_base *base = ns->base;
544         int i;
545         char addrbuf[128];
546
547         ASSERT_LOCKED(base);
548         /* if this nameserver has already been marked as failed */
549         /* then don't do anything */
550         if (!ns->state) return;
551
552         log(EVDNS_LOG_MSG, "Nameserver %s has failed: %s",
553             evutil_format_sockaddr_port(
554                     (struct sockaddr *)&ns->address,
555                     addrbuf, sizeof(addrbuf)),
556             msg);
557
558         base->global_good_nameservers--;
559         EVUTIL_ASSERT(base->global_good_nameservers >= 0);
560         if (base->global_good_nameservers == 0) {
561                 log(EVDNS_LOG_MSG, "All nameservers have failed");
562         }
563
564         ns->state = 0;
565         ns->failed_times = 1;
566
567         if (evtimer_add(&ns->timeout_event,
568                 &base->global_nameserver_probe_initial_timeout) < 0) {
569                 log(EVDNS_LOG_WARN,
570                     "Error from libevent when adding timer event for %s",
571                     evutil_format_sockaddr_port(
572                             (struct sockaddr *)&ns->address,
573                             addrbuf, sizeof(addrbuf)));
574                 /* ???? Do more? */
575         }
576
577         /* walk the list of inflight requests to see if any can be reassigned to */
578         /* a different server. Requests in the waiting queue don't have a */
579         /* nameserver assigned yet */
580
581         /* if we don't have *any* good nameservers then there's no point */
582         /* trying to reassign requests to one */
583         if (!base->global_good_nameservers) return;
584
585         for (i = 0; i < base->n_req_heads; ++i) {
586                 req = started_at = base->req_heads[i];
587                 if (req) {
588                         do {
589                                 if (req->tx_count == 0 && req->ns == ns) {
590                                         /* still waiting to go out, can be moved */
591                                         /* to another server */
592                                         req->ns = nameserver_pick(base);
593                                 }
594                                 req = req->next;
595                         } while (req != started_at);
596                 }
597         }
598 }
599
600 static void
601 nameserver_up(struct nameserver *const ns)
602 {
603         char addrbuf[128];
604         ASSERT_LOCKED(ns->base);
605         if (ns->state) return;
606         log(EVDNS_LOG_MSG, "Nameserver %s is back up",
607             evutil_format_sockaddr_port(
608                     (struct sockaddr *)&ns->address,
609                     addrbuf, sizeof(addrbuf)));
610         evtimer_del(&ns->timeout_event);
611         if (ns->probe_request) {
612                 evdns_cancel_request(ns->base, ns->probe_request);
613                 ns->probe_request = NULL;
614         }
615         ns->state = 1;
616         ns->failed_times = 0;
617         ns->timedout = 0;
618         ns->base->global_good_nameservers++;
619 }
620
621 static void
622 request_trans_id_set(struct request *const req, const u16 trans_id) {
623         req->trans_id = trans_id;
624         *((u16 *) req->request) = htons(trans_id);
625 }
626
627 /* Called to remove a request from a list and dealloc it. */
628 /* head is a pointer to the head of the list it should be */
629 /* removed from or NULL if the request isn't in a list. */
630 /* when free_handle is one, free the handle as well. */
631 static void
632 request_finished(struct request *const req, struct request **head, int free_handle) {
633         struct evdns_base *base = req->base;
634         int was_inflight = (head != &base->req_waiting_head);
635         EVDNS_LOCK(base);
636         ASSERT_VALID_REQUEST(req);
637
638         if (head)
639                 evdns_request_remove(req, head);
640
641         log(EVDNS_LOG_DEBUG, "Removing timeout for request %p", req);
642         if (was_inflight) {
643                 evtimer_del(&req->timeout_event);
644                 base->global_requests_inflight--;
645         } else {
646                 base->global_requests_waiting--;
647         }
648
649         if (!req->request_appended) {
650                 /* need to free the request data on it's own */
651                 mm_free(req->request);
652         } else {
653                 /* the request data is appended onto the header */
654                 /* so everything gets free()ed when we: */
655         }
656
657         if (req->handle) {
658                 EVUTIL_ASSERT(req->handle->current_req == req);
659
660                 if (free_handle) {
661                         search_request_finished(req->handle);
662                         req->handle->current_req = NULL;
663                         if (! req->handle->pending_cb) {
664                                 /* If we're planning to run the callback,
665                                  * don't free the handle until later. */
666                                 mm_free(req->handle);
667                         }
668                         req->handle = NULL; /* If we have a bug, let's crash
669                                              * early */
670                 } else {
671                         req->handle->current_req = NULL;
672                 }
673         }
674
675         mm_free(req);
676
677         evdns_requests_pump_waiting_queue(base);
678         EVDNS_UNLOCK(base);
679 }
680
681 /* This is called when a server returns a funny error code. */
682 /* We try the request again with another server. */
683 /* */
684 /* return: */
685 /*   0 ok */
686 /*   1 failed/reissue is pointless */
687 static int
688 request_reissue(struct request *req) {
689         const struct nameserver *const last_ns = req->ns;
690         ASSERT_LOCKED(req->base);
691         ASSERT_VALID_REQUEST(req);
692         /* the last nameserver should have been marked as failing */
693         /* by the caller of this function, therefore pick will try */
694         /* not to return it */
695         req->ns = nameserver_pick(req->base);
696         if (req->ns == last_ns) {
697                 /* ... but pick did return it */
698                 /* not a lot of point in trying again with the */
699                 /* same server */
700                 return 1;
701         }
702
703         req->reissue_count++;
704         req->tx_count = 0;
705         req->transmit_me = 1;
706
707         return 0;
708 }
709
710 /* this function looks for space on the inflight queue and promotes */
711 /* requests from the waiting queue if it can. */
712 static void
713 evdns_requests_pump_waiting_queue(struct evdns_base *base) {
714         ASSERT_LOCKED(base);
715         while (base->global_requests_inflight < base->global_max_requests_inflight &&
716                    base->global_requests_waiting) {
717                 struct request *req;
718                 /* move a request from the waiting queue to the inflight queue */
719                 EVUTIL_ASSERT(base->req_waiting_head);
720                 req = base->req_waiting_head;
721                 evdns_request_remove(req, &base->req_waiting_head);
722
723                 base->global_requests_waiting--;
724                 base->global_requests_inflight++;
725
726                 req->ns = nameserver_pick(base);
727                 request_trans_id_set(req, transaction_id_pick(base));
728
729                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
730                 evdns_request_transmit(req);
731                 evdns_transmit(base);
732         }
733 }
734
735 /* TODO(nickm) document */
736 struct deferred_reply_callback {
737         struct deferred_cb deferred;
738         struct evdns_request *handle;
739         u8 request_type;
740         u8 have_reply;
741         u32 ttl;
742         u32 err;
743         evdns_callback_type user_callback;
744         struct reply reply;
745 };
746
747 static void
748 reply_run_callback(struct deferred_cb *d, void *user_pointer)
749 {
750         struct deferred_reply_callback *cb =
751             EVUTIL_UPCAST(d, struct deferred_reply_callback, deferred);
752
753         switch (cb->request_type) {
754         case TYPE_A:
755                 if (cb->have_reply)
756                         cb->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
757                             cb->reply.data.a.addrcount, cb->ttl,
758                             cb->reply.data.a.addresses,
759                             user_pointer);
760                 else
761                         cb->user_callback(cb->err, 0, 0, 0, NULL, user_pointer);
762                 break;
763         case TYPE_PTR:
764                 if (cb->have_reply) {
765                         char *name = cb->reply.data.ptr.name;
766                         cb->user_callback(DNS_ERR_NONE, DNS_PTR, 1, cb->ttl,
767                             &name, user_pointer);
768                 } else {
769                         cb->user_callback(cb->err, 0, 0, 0, NULL, user_pointer);
770                 }
771                 break;
772         case TYPE_AAAA:
773                 if (cb->have_reply)
774                         cb->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
775                             cb->reply.data.aaaa.addrcount, cb->ttl,
776                             cb->reply.data.aaaa.addresses,
777                             user_pointer);
778                 else
779                         cb->user_callback(cb->err, 0, 0, 0, NULL, user_pointer);
780                 break;
781         default:
782                 EVUTIL_ASSERT(0);
783         }
784
785         if (cb->handle && cb->handle->pending_cb) {
786                 mm_free(cb->handle);
787         }
788
789         mm_free(cb);
790 }
791
792 static void
793 reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply)
794 {
795         struct deferred_reply_callback *d = mm_calloc(1, sizeof(*d));
796
797         ASSERT_LOCKED(req->base);
798
799         d->request_type = req->request_type;
800         d->user_callback = req->user_callback;
801         d->ttl = ttl;
802         d->err = err;
803         if (reply) {
804                 d->have_reply = 1;
805                 memcpy(&d->reply, reply, sizeof(struct reply));
806         }
807
808         if (req->handle) {
809                 req->handle->pending_cb = 1;
810                 d->handle = req->handle;
811         }
812
813         event_deferred_cb_init(&d->deferred, reply_run_callback,
814             req->user_pointer);
815         event_deferred_cb_schedule(
816                 event_base_get_deferred_cb_queue(req->base->event_base),
817                 &d->deferred);
818 }
819
820 /* this processes a parsed reply packet */
821 static void
822 reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
823         int error;
824         char addrbuf[128];
825         static const int error_codes[] = {
826                 DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
827                 DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
828         };
829
830         ASSERT_LOCKED(req->base);
831         ASSERT_VALID_REQUEST(req);
832
833         if (flags & 0x020f || !reply || !reply->have_answer) {
834                 /* there was an error */
835                 if (flags & 0x0200) {
836                         error = DNS_ERR_TRUNCATED;
837                 } else {
838                         u16 error_code = (flags & 0x000f) - 1;
839                         if (error_code > 4) {
840                                 error = DNS_ERR_UNKNOWN;
841                         } else {
842                                 error = error_codes[error_code];
843                         }
844                 }
845
846                 switch (error) {
847                 case DNS_ERR_NOTIMPL:
848                 case DNS_ERR_REFUSED:
849                         /* we regard these errors as marking a bad nameserver */
850                         if (req->reissue_count < req->base->global_max_reissues) {
851                                 char msg[64];
852                                 evutil_snprintf(msg, sizeof(msg), "Bad response %d (%s)",
853                                          error, evdns_err_to_string(error));
854                                 nameserver_failed(req->ns, msg);
855                                 if (!request_reissue(req)) return;
856                         }
857                         break;
858                 case DNS_ERR_SERVERFAILED:
859                         /* rcode 2 (servfailed) sometimes means "we
860                          * are broken" and sometimes (with some binds)
861                          * means "that request was very confusing."
862                          * Treat this as a timeout, not a failure.
863                          */
864                         log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver"
865                                 "at %s; will allow the request to time out.",
866                             evutil_format_sockaddr_port(
867                                     (struct sockaddr *)&req->ns->address,
868                                     addrbuf, sizeof(addrbuf)));
869                         break;
870                 default:
871                         /* we got a good reply from the nameserver */
872                         nameserver_up(req->ns);
873                 }
874
875                 if (req->handle->search_state &&
876                     req->request_type != TYPE_PTR) {
877                         /* if we have a list of domains to search in,
878                          * try the next one */
879                         if (!search_try_next(req->handle)) {
880                                 /* a new request was issued so this
881                                  * request is finished and */
882                                 /* the user callback will be made when
883                                  * that request (or a */
884                                 /* child of it) finishes. */
885                                 return;
886                         }
887                 }
888
889                 /* all else failed. Pass the failure up */
890                 reply_schedule_callback(req, 0, error, NULL);
891                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
892         } else {
893                 /* all ok, tell the user */
894                 reply_schedule_callback(req, ttl, 0, reply);
895                 if (req->handle == req->ns->probe_request)
896                         req->ns->probe_request = NULL; /* Avoid double-free */
897                 nameserver_up(req->ns);
898                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
899         }
900 }
901
902 static int
903 name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
904         int name_end = -1;
905         int j = *idx;
906         int ptr_count = 0;
907 #define GET32(x) do { if (j + 4 > length) goto err; memcpy(&_t32, packet + j, 4); j += 4; x = ntohl(_t32); } while (0)
908 #define GET16(x) do { if (j + 2 > length) goto err; memcpy(&_t, packet + j, 2); j += 2; x = ntohs(_t); } while (0)
909 #define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while (0)
910
911         char *cp = name_out;
912         const char *const end = name_out + name_out_len;
913
914         /* Normally, names are a series of length prefixed strings terminated */
915         /* with a length of 0 (the lengths are u8's < 63). */
916         /* However, the length can start with a pair of 1 bits and that */
917         /* means that the next 14 bits are a pointer within the current */
918         /* packet. */
919
920         for (;;) {
921                 u8 label_len;
922                 if (j >= length) return -1;
923                 GET8(label_len);
924                 if (!label_len) break;
925                 if (label_len & 0xc0) {
926                         u8 ptr_low;
927                         GET8(ptr_low);
928                         if (name_end < 0) name_end = j;
929                         j = (((int)label_len & 0x3f) << 8) + ptr_low;
930                         /* Make sure that the target offset is in-bounds. */
931                         if (j < 0 || j >= length) return -1;
932                         /* If we've jumped more times than there are characters in the
933                          * message, we must have a loop. */
934                         if (++ptr_count > length) return -1;
935                         continue;
936                 }
937                 if (label_len > 63) return -1;
938                 if (cp != name_out) {
939                         if (cp + 1 >= end) return -1;
940                         *cp++ = '.';
941                 }
942                 if (cp + label_len >= end) return -1;
943                 memcpy(cp, packet + j, label_len);
944                 cp += label_len;
945                 j += label_len;
946         }
947         if (cp >= end) return -1;
948         *cp = '\0';
949         if (name_end < 0)
950                 *idx = j;
951         else
952                 *idx = name_end;
953         return 0;
954  err:
955         return -1;
956 }
957
958 /* parses a raw request from a nameserver */
959 static int
960 reply_parse(struct evdns_base *base, u8 *packet, int length) {
961         int j = 0, k = 0;  /* index into packet */
962         u16 _t;  /* used by the macros */
963         u32 _t32;  /* used by the macros */
964         char tmp_name[256], cmp_name[256]; /* used by the macros */
965         int name_matches = 0;
966
967         u16 trans_id, questions, answers, authority, additional, datalength;
968         u16 flags = 0;
969         u32 ttl, ttl_r = 0xffffffff;
970         struct reply reply;
971         struct request *req = NULL;
972         unsigned int i;
973
974         ASSERT_LOCKED(base);
975
976         GET16(trans_id);
977         GET16(flags);
978         GET16(questions);
979         GET16(answers);
980         GET16(authority);
981         GET16(additional);
982         (void) authority; /* suppress "unused variable" warnings. */
983         (void) additional; /* suppress "unused variable" warnings. */
984
985         req = request_find_from_trans_id(base, trans_id);
986         if (!req) return -1;
987         EVUTIL_ASSERT(req->base == base);
988
989         memset(&reply, 0, sizeof(reply));
990
991         /* If it's not an answer, it doesn't correspond to any request. */
992         if (!(flags & 0x8000)) return -1;  /* must be an answer */
993         if (flags & 0x020f) {
994                 /* there was an error */
995                 goto err;
996         }
997         /* if (!answers) return; */  /* must have an answer of some form */
998
999         /* This macro skips a name in the DNS reply. */
1000 #define SKIP_NAME                                               \
1001         do { tmp_name[0] = '\0';                                \
1002                 if (name_parse(packet, length, &j, tmp_name,    \
1003                         sizeof(tmp_name))<0)                    \
1004                         goto err;                               \
1005         } while (0)
1006 #define TEST_NAME                                                       \
1007         do { tmp_name[0] = '\0';                                        \
1008                 cmp_name[0] = '\0';                                     \
1009                 k = j;                                                  \
1010                 if (name_parse(packet, length, &j, tmp_name,            \
1011                         sizeof(tmp_name))<0)                            \
1012                         goto err;                                       \
1013                 if (name_parse(req->request, req->request_len, &k,      \
1014                         cmp_name, sizeof(cmp_name))<0)                  \
1015                         goto err;                                       \
1016                 if (base->global_randomize_case) {                      \
1017                         if (strcmp(tmp_name, cmp_name) == 0)            \
1018                                 name_matches = 1;                       \
1019                 } else {                                                \
1020                         if (evutil_ascii_strcasecmp(tmp_name, cmp_name) == 0) \
1021                                 name_matches = 1;                       \
1022                 }                                                       \
1023         } while (0)
1024
1025         reply.type = req->request_type;
1026
1027         /* skip over each question in the reply */
1028         for (i = 0; i < questions; ++i) {
1029                 /* the question looks like
1030                  *   <label:name><u16:type><u16:class>
1031                  */
1032                 TEST_NAME;
1033                 j += 4;
1034                 if (j >= length) goto err;
1035         }
1036
1037         if (!name_matches)
1038                 goto err;
1039
1040         /* now we have the answer section which looks like
1041          * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
1042          */
1043
1044         for (i = 0; i < answers; ++i) {
1045                 u16 type, class;
1046
1047                 SKIP_NAME;
1048                 GET16(type);
1049                 GET16(class);
1050                 GET32(ttl);
1051                 GET16(datalength);
1052
1053                 if (type == TYPE_A && class == CLASS_INET) {
1054                         int addrcount, addrtocopy;
1055                         if (req->request_type != TYPE_A) {
1056                                 j += datalength; continue;
1057                         }
1058                         if ((datalength & 3) != 0) /* not an even number of As. */
1059                             goto err;
1060                         addrcount = datalength >> 2;
1061                         addrtocopy = MIN(MAX_V4_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
1062
1063                         ttl_r = MIN(ttl_r, ttl);
1064                         /* we only bother with the first four addresses. */
1065                         if (j + 4*addrtocopy > length) goto err;
1066                         memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
1067                                    packet + j, 4*addrtocopy);
1068                         j += 4*addrtocopy;
1069                         reply.data.a.addrcount += addrtocopy;
1070                         reply.have_answer = 1;
1071                         if (reply.data.a.addrcount == MAX_V4_ADDRS) break;
1072                 } else if (type == TYPE_PTR && class == CLASS_INET) {
1073                         if (req->request_type != TYPE_PTR) {
1074                                 j += datalength; continue;
1075                         }
1076                         if (name_parse(packet, length, &j, reply.data.ptr.name,
1077                                                    sizeof(reply.data.ptr.name))<0)
1078                                 goto err;
1079                         ttl_r = MIN(ttl_r, ttl);
1080                         reply.have_answer = 1;
1081                         break;
1082                 } else if (type == TYPE_CNAME) {
1083                         char cname[HOST_NAME_MAX];
1084                         if (!req->put_cname_in_ptr || *req->put_cname_in_ptr) {
1085                                 j += datalength; continue;
1086                         }
1087                         if (name_parse(packet, length, &j, cname,
1088                                 sizeof(cname))<0)
1089                                 goto err;
1090                         *req->put_cname_in_ptr = mm_strdup(cname);
1091                 } else if (type == TYPE_AAAA && class == CLASS_INET) {
1092                         int addrcount, addrtocopy;
1093                         if (req->request_type != TYPE_AAAA) {
1094                                 j += datalength; continue;
1095                         }
1096                         if ((datalength & 15) != 0) /* not an even number of AAAAs. */
1097                                 goto err;
1098                         addrcount = datalength >> 4;  /* each address is 16 bytes long */
1099                         addrtocopy = MIN(MAX_V6_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
1100                         ttl_r = MIN(ttl_r, ttl);
1101
1102                         /* we only bother with the first four addresses. */
1103                         if (j + 16*addrtocopy > length) goto err;
1104                         memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
1105                                    packet + j, 16*addrtocopy);
1106                         reply.data.aaaa.addrcount += addrtocopy;
1107                         j += 16*addrtocopy;
1108                         reply.have_answer = 1;
1109                         if (reply.data.aaaa.addrcount == MAX_V6_ADDRS) break;
1110                 } else {
1111                         /* skip over any other type of resource */
1112                         j += datalength;
1113                 }
1114         }
1115
1116         reply_handle(req, flags, ttl_r, &reply);
1117         return 0;
1118  err:
1119         if (req)
1120                 reply_handle(req, flags, 0, NULL);
1121         return -1;
1122 }
1123
1124 /* Parse a raw request (packet,length) sent to a nameserver port (port) from */
1125 /* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
1126 /* callback. */
1127 static int
1128 request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, ev_socklen_t addrlen)
1129 {
1130         int j = 0;      /* index into packet */
1131         u16 _t;  /* used by the macros */
1132         char tmp_name[256]; /* used by the macros */
1133
1134         int i;
1135         u16 trans_id, flags, questions, answers, authority, additional;
1136         struct server_request *server_req = NULL;
1137
1138         ASSERT_LOCKED(port);
1139
1140         /* Get the header fields */
1141         GET16(trans_id);
1142         GET16(flags);
1143         GET16(questions);
1144         GET16(answers);
1145         GET16(authority);
1146         GET16(additional);
1147
1148         if (flags & 0x8000) return -1; /* Must not be an answer. */
1149         flags &= 0x0110; /* Only RD and CD get preserved. */
1150
1151         server_req = mm_malloc(sizeof(struct server_request));
1152         if (server_req == NULL) return -1;
1153         memset(server_req, 0, sizeof(struct server_request));
1154
1155         server_req->trans_id = trans_id;
1156         memcpy(&server_req->addr, addr, addrlen);
1157         server_req->addrlen = addrlen;
1158
1159         server_req->base.flags = flags;
1160         server_req->base.nquestions = 0;
1161         server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
1162         if (server_req->base.questions == NULL)
1163                 goto err;
1164
1165         for (i = 0; i < questions; ++i) {
1166                 u16 type, class;
1167                 struct evdns_server_question *q;
1168                 int namelen;
1169                 if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
1170                         goto err;
1171                 GET16(type);
1172                 GET16(class);
1173                 namelen = (int)strlen(tmp_name);
1174                 q = mm_malloc(sizeof(struct evdns_server_question) + namelen);
1175                 if (!q)
1176                         goto err;
1177                 q->type = type;
1178                 q->dns_question_class = class;
1179                 memcpy(q->name, tmp_name, namelen+1);
1180                 server_req->base.questions[server_req->base.nquestions++] = q;
1181         }
1182
1183         /* Ignore answers, authority, and additional. */
1184
1185         server_req->port = port;
1186         port->refcnt++;
1187
1188         /* Only standard queries are supported. */
1189         if (flags & 0x7800) {
1190                 evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
1191                 return -1;
1192         }
1193
1194         port->user_callback(&(server_req->base), port->user_data);
1195
1196         return 0;
1197 err:
1198         if (server_req) {
1199                 if (server_req->base.questions) {
1200                         for (i = 0; i < server_req->base.nquestions; ++i)
1201                                 mm_free(server_req->base.questions[i]);
1202                         mm_free(server_req->base.questions);
1203                 }
1204                 mm_free(server_req);
1205         }
1206         return -1;
1207
1208 #undef SKIP_NAME
1209 #undef GET32
1210 #undef GET16
1211 #undef GET8
1212 }
1213
1214
1215 void
1216 evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void))
1217 {
1218 }
1219
1220 void
1221 evdns_set_random_bytes_fn(void (*fn)(char *, size_t))
1222 {
1223 }
1224
1225 /* Try to choose a strong transaction id which isn't already in flight */
1226 static u16
1227 transaction_id_pick(struct evdns_base *base) {
1228         ASSERT_LOCKED(base);
1229         for (;;) {
1230                 u16 trans_id;
1231                 evutil_secure_rng_get_bytes(&trans_id, sizeof(trans_id));
1232
1233                 if (trans_id == 0xffff) continue;
1234                 /* now check to see if that id is already inflight */
1235                 if (request_find_from_trans_id(base, trans_id) == NULL)
1236                         return trans_id;
1237         }
1238 }
1239
1240 /* choose a namesever to use. This function will try to ignore */
1241 /* nameservers which we think are down and load balance across the rest */
1242 /* by updating the server_head global each time. */
1243 static struct nameserver *
1244 nameserver_pick(struct evdns_base *base) {
1245         struct nameserver *started_at = base->server_head, *picked;
1246         ASSERT_LOCKED(base);
1247         if (!base->server_head) return NULL;
1248
1249         /* if we don't have any good nameservers then there's no */
1250         /* point in trying to find one. */
1251         if (!base->global_good_nameservers) {
1252                 base->server_head = base->server_head->next;
1253                 return base->server_head;
1254         }
1255
1256         /* remember that nameservers are in a circular list */
1257         for (;;) {
1258                 if (base->server_head->state) {
1259                         /* we think this server is currently good */
1260                         picked = base->server_head;
1261                         base->server_head = base->server_head->next;
1262                         return picked;
1263                 }
1264
1265                 base->server_head = base->server_head->next;
1266                 if (base->server_head == started_at) {
1267                         /* all the nameservers seem to be down */
1268                         /* so we just return this one and hope for the */
1269                         /* best */
1270                         EVUTIL_ASSERT(base->global_good_nameservers == 0);
1271                         picked = base->server_head;
1272                         base->server_head = base->server_head->next;
1273                         return picked;
1274                 }
1275         }
1276 }
1277
1278 /* this is called when a namesever socket is ready for reading */
1279 static void
1280 nameserver_read(struct nameserver *ns) {
1281         struct sockaddr_storage ss;
1282         ev_socklen_t addrlen = sizeof(ss);
1283         u8 packet[1500];
1284         char addrbuf[128];
1285         ASSERT_LOCKED(ns->base);
1286
1287         for (;;) {
1288                 const int r = recvfrom(ns->socket, (void*)packet,
1289                     sizeof(packet), 0,
1290                     (struct sockaddr*)&ss, &addrlen);
1291                 if (r < 0) {
1292                         int err = evutil_socket_geterror(ns->socket);
1293                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1294                                 return;
1295                         nameserver_failed(ns,
1296                             evutil_socket_error_to_string(err));
1297                         return;
1298                 }
1299                 if (evutil_sockaddr_cmp((struct sockaddr*)&ss,
1300                         (struct sockaddr*)&ns->address, 0)) {
1301                         log(EVDNS_LOG_WARN, "Address mismatch on received "
1302                             "DNS packet.  Apparent source was %s",
1303                             evutil_format_sockaddr_port(
1304                                     (struct sockaddr *)&ss,
1305                                     addrbuf, sizeof(addrbuf)));
1306                         return;
1307                 }
1308
1309                 ns->timedout = 0;
1310                 reply_parse(ns->base, packet, r);
1311         }
1312 }
1313
1314 /* Read a packet from a DNS client on a server port s, parse it, and */
1315 /* act accordingly. */
1316 static void
1317 server_port_read(struct evdns_server_port *s) {
1318         u8 packet[1500];
1319         struct sockaddr_storage addr;
1320         ev_socklen_t addrlen;
1321         int r;
1322         ASSERT_LOCKED(s);
1323
1324         for (;;) {
1325                 addrlen = sizeof(struct sockaddr_storage);
1326                 r = recvfrom(s->socket, (void*)packet, sizeof(packet), 0,
1327                                          (struct sockaddr*) &addr, &addrlen);
1328                 if (r < 0) {
1329                         int err = evutil_socket_geterror(s->socket);
1330                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1331                                 return;
1332                         log(EVDNS_LOG_WARN,
1333                             "Error %s (%d) while reading request.",
1334                             evutil_socket_error_to_string(err), err);
1335                         return;
1336                 }
1337                 request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
1338         }
1339 }
1340
1341 /* Try to write all pending replies on a given DNS server port. */
1342 static void
1343 server_port_flush(struct evdns_server_port *port)
1344 {
1345         struct server_request *req = port->pending_replies;
1346         ASSERT_LOCKED(port);
1347         while (req) {
1348                 int r = sendto(port->socket, req->response, (int)req->response_len, 0,
1349                            (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1350                 if (r < 0) {
1351                         int err = evutil_socket_geterror(port->socket);
1352                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1353                                 return;
1354                         log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", evutil_socket_error_to_string(err), err);
1355                 }
1356                 if (server_request_free(req)) {
1357                         /* we released the last reference to req->port. */
1358                         return;
1359                 } else {
1360                         EVUTIL_ASSERT(req != port->pending_replies);
1361                         req = port->pending_replies;
1362                 }
1363         }
1364
1365         /* We have no more pending requests; stop listening for 'writeable' events. */
1366         (void) event_del(&port->event);
1367         event_assign(&port->event, port->event_base,
1368                                  port->socket, EV_READ | EV_PERSIST,
1369                                  server_port_ready_callback, port);
1370
1371         if (event_add(&port->event, NULL) < 0) {
1372                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
1373                 /* ???? Do more? */
1374         }
1375 }
1376
1377 /* set if we are waiting for the ability to write to this server. */
1378 /* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
1379 /* we stop these events. */
1380 static void
1381 nameserver_write_waiting(struct nameserver *ns, char waiting) {
1382         ASSERT_LOCKED(ns->base);
1383         if (ns->write_waiting == waiting) return;
1384
1385         ns->write_waiting = waiting;
1386         (void) event_del(&ns->event);
1387         event_assign(&ns->event, ns->base->event_base,
1388             ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
1389             nameserver_ready_callback, ns);
1390         if (event_add(&ns->event, NULL) < 0) {
1391                 char addrbuf[128];
1392                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
1393                     evutil_format_sockaddr_port(
1394                             (struct sockaddr *)&ns->address,
1395                             addrbuf, sizeof(addrbuf)));
1396                 /* ???? Do more? */
1397         }
1398 }
1399
1400 /* a callback function. Called by libevent when the kernel says that */
1401 /* a nameserver socket is ready for writing or reading */
1402 static void
1403 nameserver_ready_callback(evutil_socket_t fd, short events, void *arg) {
1404         struct nameserver *ns = (struct nameserver *) arg;
1405         (void)fd;
1406
1407         EVDNS_LOCK(ns->base);
1408         if (events & EV_WRITE) {
1409                 ns->choked = 0;
1410                 if (!evdns_transmit(ns->base)) {
1411                         nameserver_write_waiting(ns, 0);
1412                 }
1413         }
1414         if (events & EV_READ) {
1415                 nameserver_read(ns);
1416         }
1417         EVDNS_UNLOCK(ns->base);
1418 }
1419
1420 /* a callback function. Called by libevent when the kernel says that */
1421 /* a server socket is ready for writing or reading. */
1422 static void
1423 server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
1424         struct evdns_server_port *port = (struct evdns_server_port *) arg;
1425         (void) fd;
1426
1427         EVDNS_LOCK(port);
1428         if (events & EV_WRITE) {
1429                 port->choked = 0;
1430                 server_port_flush(port);
1431         }
1432         if (events & EV_READ) {
1433                 server_port_read(port);
1434         }
1435         EVDNS_UNLOCK(port);
1436 }
1437
1438 /* This is an inefficient representation; only use it via the dnslabel_table_*
1439  * functions, so that is can be safely replaced with something smarter later. */
1440 #define MAX_LABELS 128
1441 /* Structures used to implement name compression */
1442 struct dnslabel_entry { char *v; off_t pos; };
1443 struct dnslabel_table {
1444         int n_labels; /* number of current entries */
1445         /* map from name to position in message */
1446         struct dnslabel_entry labels[MAX_LABELS];
1447 };
1448
1449 /* Initialize dnslabel_table. */
1450 static void
1451 dnslabel_table_init(struct dnslabel_table *table)
1452 {
1453         table->n_labels = 0;
1454 }
1455
1456 /* Free all storage held by table, but not the table itself. */
1457 static void
1458 dnslabel_clear(struct dnslabel_table *table)
1459 {
1460         int i;
1461         for (i = 0; i < table->n_labels; ++i)
1462                 mm_free(table->labels[i].v);
1463         table->n_labels = 0;
1464 }
1465
1466 /* return the position of the label in the current message, or -1 if the label */
1467 /* hasn't been used yet. */
1468 static int
1469 dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
1470 {
1471         int i;
1472         for (i = 0; i < table->n_labels; ++i) {
1473                 if (!strcmp(label, table->labels[i].v))
1474                         return table->labels[i].pos;
1475         }
1476         return -1;
1477 }
1478
1479 /* remember that we've used the label at position pos */
1480 static int
1481 dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
1482 {
1483         char *v;
1484         int p;
1485         if (table->n_labels == MAX_LABELS)
1486                 return (-1);
1487         v = mm_strdup(label);
1488         if (v == NULL)
1489                 return (-1);
1490         p = table->n_labels++;
1491         table->labels[p].v = v;
1492         table->labels[p].pos = pos;
1493
1494         return (0);
1495 }
1496
1497 /* Converts a string to a length-prefixed set of DNS labels, starting */
1498 /* at buf[j]. name and buf must not overlap. name_len should be the length */
1499 /* of name.      table is optional, and is used for compression. */
1500 /* */
1501 /* Input: abc.def */
1502 /* Output: <3>abc<3>def<0> */
1503 /* */
1504 /* Returns the first index after the encoded name, or negative on error. */
1505 /*       -1      label was > 63 bytes */
1506 /*       -2      name too long to fit in buffer. */
1507 /* */
1508 static off_t
1509 dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
1510                                   const char *name, const size_t name_len,
1511                                   struct dnslabel_table *table) {
1512         const char *end = name + name_len;
1513         int ref = 0;
1514         u16 _t;
1515
1516 #define APPEND16(x) do {                                                \
1517                 if (j + 2 > (off_t)buf_len)                             \
1518                         goto overflow;                                  \
1519                 _t = htons(x);                                          \
1520                 memcpy(buf + j, &_t, 2);                                \
1521                 j += 2;                                                 \
1522         } while (0)
1523 #define APPEND32(x) do {                                                \
1524                 if (j + 4 > (off_t)buf_len)                             \
1525                         goto overflow;                                  \
1526                 _t32 = htonl(x);                                        \
1527                 memcpy(buf + j, &_t32, 4);                              \
1528                 j += 4;                                                 \
1529         } while (0)
1530
1531         if (name_len > 255) return -2;
1532
1533         for (;;) {
1534                 const char *const start = name;
1535                 if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
1536                         APPEND16(ref | 0xc000);
1537                         return j;
1538                 }
1539                 name = strchr(name, '.');
1540                 if (!name) {
1541                         const size_t label_len = end - start;
1542                         if (label_len > 63) return -1;
1543                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1544                         if (table) dnslabel_table_add(table, start, j);
1545                         buf[j++] = (ev_uint8_t)label_len;
1546
1547                         memcpy(buf + j, start, label_len);
1548                         j += (int) label_len;
1549                         break;
1550                 } else {
1551                         /* append length of the label. */
1552                         const size_t label_len = name - start;
1553                         if (label_len > 63) return -1;
1554                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1555                         if (table) dnslabel_table_add(table, start, j);
1556                         buf[j++] = (ev_uint8_t)label_len;
1557
1558                         memcpy(buf + j, start, label_len);
1559                         j += (int) label_len;
1560                         /* hop over the '.' */
1561                         name++;
1562                 }
1563         }
1564
1565         /* the labels must be terminated by a 0. */
1566         /* It's possible that the name ended in a . */
1567         /* in which case the zero is already there */
1568         if (!j || buf[j-1]) buf[j++] = 0;
1569         return j;
1570  overflow:
1571         return (-2);
1572 }
1573
1574 /* Finds the length of a dns request for a DNS name of the given */
1575 /* length. The actual request may be smaller than the value returned */
1576 /* here */
1577 static size_t
1578 evdns_request_len(const size_t name_len) {
1579         return 96 + /* length of the DNS standard header */
1580                 name_len + 2 +
1581                 4;  /* space for the resource type */
1582 }
1583
1584 /* build a dns request packet into buf. buf should be at least as long */
1585 /* as evdns_request_len told you it should be. */
1586 /* */
1587 /* Returns the amount of space used. Negative on error. */
1588 static int
1589 evdns_request_data_build(const char *const name, const size_t name_len,
1590     const u16 trans_id, const u16 type, const u16 class,
1591     u8 *const buf, size_t buf_len) {
1592         off_t j = 0;  /* current offset into buf */
1593         u16 _t;  /* used by the macros */
1594
1595         APPEND16(trans_id);
1596         APPEND16(0x0100);  /* standard query, recusion needed */
1597         APPEND16(1);  /* one question */
1598         APPEND16(0);  /* no answers */
1599         APPEND16(0);  /* no authority */
1600         APPEND16(0);  /* no additional */
1601
1602         j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
1603         if (j < 0) {
1604                 return (int)j;
1605         }
1606
1607         APPEND16(type);
1608         APPEND16(class);
1609
1610         return (int)j;
1611  overflow:
1612         return (-1);
1613 }
1614
1615 /* exported function */
1616 struct evdns_server_port *
1617 evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1618 {
1619         struct evdns_server_port *port;
1620         if (flags)
1621                 return NULL; /* flags not yet implemented */
1622         if (!(port = mm_malloc(sizeof(struct evdns_server_port))))
1623                 return NULL;
1624         memset(port, 0, sizeof(struct evdns_server_port));
1625
1626
1627         port->socket = socket;
1628         port->refcnt = 1;
1629         port->choked = 0;
1630         port->closing = 0;
1631         port->user_callback = cb;
1632         port->user_data = user_data;
1633         port->pending_replies = NULL;
1634         port->event_base = base;
1635
1636         event_assign(&port->event, port->event_base,
1637                                  port->socket, EV_READ | EV_PERSIST,
1638                                  server_port_ready_callback, port);
1639         if (event_add(&port->event, NULL) < 0) {
1640                 mm_free(port);
1641                 return NULL;
1642         }
1643         EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
1644         return port;
1645 }
1646
1647 struct evdns_server_port *
1648 evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1649 {
1650         return evdns_add_server_port_with_base(NULL, socket, flags, cb, user_data);
1651 }
1652
1653 /* exported function */
1654 void
1655 evdns_close_server_port(struct evdns_server_port *port)
1656 {
1657         EVDNS_LOCK(port);
1658         if (--port->refcnt == 0) {
1659                 EVDNS_UNLOCK(port);
1660                 server_port_free(port);
1661         } else {
1662                 port->closing = 1;
1663         }
1664 }
1665
1666 /* exported function */
1667 int
1668 evdns_server_request_add_reply(struct evdns_server_request *_req, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
1669 {
1670         struct server_request *req = TO_SERVER_REQUEST(_req);
1671         struct server_reply_item **itemp, *item;
1672         int *countp;
1673         int result = -1;
1674
1675         EVDNS_LOCK(req->port);
1676         if (req->response) /* have we already answered? */
1677                 goto done;
1678
1679         switch (section) {
1680         case EVDNS_ANSWER_SECTION:
1681                 itemp = &req->answer;
1682                 countp = &req->n_answer;
1683                 break;
1684         case EVDNS_AUTHORITY_SECTION:
1685                 itemp = &req->authority;
1686                 countp = &req->n_authority;
1687                 break;
1688         case EVDNS_ADDITIONAL_SECTION:
1689                 itemp = &req->additional;
1690                 countp = &req->n_additional;
1691                 break;
1692         default:
1693                 goto done;
1694         }
1695         while (*itemp) {
1696                 itemp = &((*itemp)->next);
1697         }
1698         item = mm_malloc(sizeof(struct server_reply_item));
1699         if (!item)
1700                 goto done;
1701         item->next = NULL;
1702         if (!(item->name = mm_strdup(name))) {
1703                 mm_free(item);
1704                 goto done;
1705         }
1706         item->type = type;
1707         item->dns_question_class = class;
1708         item->ttl = ttl;
1709         item->is_name = is_name != 0;
1710         item->datalen = 0;
1711         item->data = NULL;
1712         if (data) {
1713                 if (item->is_name) {
1714                         if (!(item->data = mm_strdup(data))) {
1715                                 mm_free(item->name);
1716                                 mm_free(item);
1717                                 goto done;
1718                         }
1719                         item->datalen = (u16)-1;
1720                 } else {
1721                         if (!(item->data = mm_malloc(datalen))) {
1722                                 mm_free(item->name);
1723                                 mm_free(item);
1724                                 goto done;
1725                         }
1726                         item->datalen = datalen;
1727                         memcpy(item->data, data, datalen);
1728                 }
1729         }
1730
1731         *itemp = item;
1732         ++(*countp);
1733         result = 0;
1734 done:
1735         EVDNS_UNLOCK(req->port);
1736         return result;
1737 }
1738
1739 /* exported function */
1740 int
1741 evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1742 {
1743         return evdns_server_request_add_reply(
1744                   req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
1745                   ttl, n*4, 0, addrs);
1746 }
1747
1748 /* exported function */
1749 int
1750 evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1751 {
1752         return evdns_server_request_add_reply(
1753                   req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
1754                   ttl, n*16, 0, addrs);
1755 }
1756
1757 /* exported function */
1758 int
1759 evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
1760 {
1761         u32 a;
1762         char buf[32];
1763         if (in && inaddr_name)
1764                 return -1;
1765         else if (!in && !inaddr_name)
1766                 return -1;
1767         if (in) {
1768                 a = ntohl(in->s_addr);
1769                 evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
1770                                 (int)(u8)((a    )&0xff),
1771                                 (int)(u8)((a>>8 )&0xff),
1772                                 (int)(u8)((a>>16)&0xff),
1773                                 (int)(u8)((a>>24)&0xff));
1774                 inaddr_name = buf;
1775         }
1776         return evdns_server_request_add_reply(
1777                   req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
1778                   ttl, -1, 1, hostname);
1779 }
1780
1781 /* exported function */
1782 int
1783 evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
1784 {
1785         return evdns_server_request_add_reply(
1786                   req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
1787                   ttl, -1, 1, cname);
1788 }
1789
1790 /* exported function */
1791 void
1792 evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
1793 {
1794         struct server_request *req = TO_SERVER_REQUEST(exreq);
1795         req->base.flags &= ~(EVDNS_FLAGS_AA|EVDNS_FLAGS_RD);
1796         req->base.flags |= flags;
1797 }
1798
1799 static int
1800 evdns_server_request_format_response(struct server_request *req, int err)
1801 {
1802         unsigned char buf[1500];
1803         size_t buf_len = sizeof(buf);
1804         off_t j = 0, r;
1805         u16 _t;
1806         u32 _t32;
1807         int i;
1808         u16 flags;
1809         struct dnslabel_table table;
1810
1811         if (err < 0 || err > 15) return -1;
1812
1813         /* Set response bit and error code; copy OPCODE and RD fields from
1814          * question; copy RA and AA if set by caller. */
1815         flags = req->base.flags;
1816         flags |= (0x8000 | err);
1817
1818         dnslabel_table_init(&table);
1819         APPEND16(req->trans_id);
1820         APPEND16(flags);
1821         APPEND16(req->base.nquestions);
1822         APPEND16(req->n_answer);
1823         APPEND16(req->n_authority);
1824         APPEND16(req->n_additional);
1825
1826         /* Add questions. */
1827         for (i=0; i < req->base.nquestions; ++i) {
1828                 const char *s = req->base.questions[i]->name;
1829                 j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
1830                 if (j < 0) {
1831                         dnslabel_clear(&table);
1832                         return (int) j;
1833                 }
1834                 APPEND16(req->base.questions[i]->type);
1835                 APPEND16(req->base.questions[i]->dns_question_class);
1836         }
1837
1838         /* Add answer, authority, and additional sections. */
1839         for (i=0; i<3; ++i) {
1840                 struct server_reply_item *item;
1841                 if (i==0)
1842                         item = req->answer;
1843                 else if (i==1)
1844                         item = req->authority;
1845                 else
1846                         item = req->additional;
1847                 while (item) {
1848                         r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
1849                         if (r < 0)
1850                                 goto overflow;
1851                         j = r;
1852
1853                         APPEND16(item->type);
1854                         APPEND16(item->dns_question_class);
1855                         APPEND32(item->ttl);
1856                         if (item->is_name) {
1857                                 off_t len_idx = j, name_start;
1858                                 j += 2;
1859                                 name_start = j;
1860                                 r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
1861                                 if (r < 0)
1862                                         goto overflow;
1863                                 j = r;
1864                                 _t = htons( (short) (j-name_start) );
1865                                 memcpy(buf+len_idx, &_t, 2);
1866                         } else {
1867                                 APPEND16(item->datalen);
1868                                 if (j+item->datalen > (off_t)buf_len)
1869                                         goto overflow;
1870                                 memcpy(buf+j, item->data, item->datalen);
1871                                 j += item->datalen;
1872                         }
1873                         item = item->next;
1874                 }
1875         }
1876
1877         if (j > 512) {
1878 overflow:
1879                 j = 512;
1880                 buf[2] |= 0x02; /* set the truncated bit. */
1881         }
1882
1883         req->response_len = j;
1884
1885         if (!(req->response = mm_malloc(req->response_len))) {
1886                 server_request_free_answers(req);
1887                 dnslabel_clear(&table);
1888                 return (-1);
1889         }
1890         memcpy(req->response, buf, req->response_len);
1891         server_request_free_answers(req);
1892         dnslabel_clear(&table);
1893         return (0);
1894 }
1895
1896 /* exported function */
1897 int
1898 evdns_server_request_respond(struct evdns_server_request *_req, int err)
1899 {
1900         struct server_request *req = TO_SERVER_REQUEST(_req);
1901         struct evdns_server_port *port = req->port;
1902         int r = -1;
1903
1904         EVDNS_LOCK(port);
1905         if (!req->response) {
1906                 if ((r = evdns_server_request_format_response(req, err))<0)
1907                         goto done;
1908         }
1909
1910         r = sendto(port->socket, req->response, (int)req->response_len, 0,
1911                            (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1912         if (r<0) {
1913                 int sock_err = evutil_socket_geterror(port->socket);
1914                 if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
1915                         goto done;
1916
1917                 if (port->pending_replies) {
1918                         req->prev_pending = port->pending_replies->prev_pending;
1919                         req->next_pending = port->pending_replies;
1920                         req->prev_pending->next_pending =
1921                                 req->next_pending->prev_pending = req;
1922                 } else {
1923                         req->prev_pending = req->next_pending = req;
1924                         port->pending_replies = req;
1925                         port->choked = 1;
1926
1927                         (void) event_del(&port->event);
1928                         event_assign(&port->event, port->event_base, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
1929
1930                         if (event_add(&port->event, NULL) < 0) {
1931                                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
1932                         }
1933
1934                 }
1935
1936                 r = 1;
1937                 goto done;
1938         }
1939         if (server_request_free(req)) {
1940                 r = 0;
1941                 goto done;
1942         }
1943
1944         if (port->pending_replies)
1945                 server_port_flush(port);
1946
1947         r = 0;
1948 done:
1949         EVDNS_UNLOCK(port);
1950         return r;
1951 }
1952
1953 /* Free all storage held by RRs in req. */
1954 static void
1955 server_request_free_answers(struct server_request *req)
1956 {
1957         struct server_reply_item *victim, *next, **list;
1958         int i;
1959         for (i = 0; i < 3; ++i) {
1960                 if (i==0)
1961                         list = &req->answer;
1962                 else if (i==1)
1963                         list = &req->authority;
1964                 else
1965                         list = &req->additional;
1966
1967                 victim = *list;
1968                 while (victim) {
1969                         next = victim->next;
1970                         mm_free(victim->name);
1971                         if (victim->data)
1972                                 mm_free(victim->data);
1973                         mm_free(victim);
1974                         victim = next;
1975                 }
1976                 *list = NULL;
1977         }
1978 }
1979
1980 /* Free all storage held by req, and remove links to it. */
1981 /* return true iff we just wound up freeing the server_port. */
1982 static int
1983 server_request_free(struct server_request *req)
1984 {
1985         int i, rc=1, lock=0;
1986         if (req->base.questions) {
1987                 for (i = 0; i < req->base.nquestions; ++i)
1988                         mm_free(req->base.questions[i]);
1989                 mm_free(req->base.questions);
1990         }
1991
1992         if (req->port) {
1993                 EVDNS_LOCK(req->port);
1994                 lock=1;
1995                 if (req->port->pending_replies == req) {
1996                         if (req->next_pending)
1997                                 req->port->pending_replies = req->next_pending;
1998                         else
1999                                 req->port->pending_replies = NULL;
2000                 }
2001                 rc = --req->port->refcnt;
2002         }
2003
2004         if (req->response) {
2005                 mm_free(req->response);
2006         }
2007
2008         server_request_free_answers(req);
2009
2010         if (req->next_pending && req->next_pending != req) {
2011                 req->next_pending->prev_pending = req->prev_pending;
2012                 req->prev_pending->next_pending = req->next_pending;
2013         }
2014
2015         if (rc == 0) {
2016                 EVDNS_UNLOCK(req->port); /* ????? nickm */
2017                 server_port_free(req->port);
2018                 mm_free(req);
2019                 return (1);
2020         }
2021         if (lock)
2022                 EVDNS_UNLOCK(req->port);
2023         mm_free(req);
2024         return (0);
2025 }
2026
2027 /* Free all storage held by an evdns_server_port.  Only called when  */
2028 static void
2029 server_port_free(struct evdns_server_port *port)
2030 {
2031         EVUTIL_ASSERT(port);
2032         EVUTIL_ASSERT(!port->refcnt);
2033         EVUTIL_ASSERT(!port->pending_replies);
2034         if (port->socket > 0) {
2035                 evutil_closesocket(port->socket);
2036                 port->socket = -1;
2037         }
2038         (void) event_del(&port->event);
2039         event_debug_unassign(&port->event);
2040         EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2041         mm_free(port);
2042 }
2043
2044 /* exported function */
2045 int
2046 evdns_server_request_drop(struct evdns_server_request *_req)
2047 {
2048         struct server_request *req = TO_SERVER_REQUEST(_req);
2049         server_request_free(req);
2050         return 0;
2051 }
2052
2053 /* exported function */
2054 int
2055 evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len)
2056 {
2057         struct server_request *req = TO_SERVER_REQUEST(_req);
2058         if (addr_len < (int)req->addrlen)
2059                 return -1;
2060         memcpy(sa, &(req->addr), req->addrlen);
2061         return req->addrlen;
2062 }
2063
2064 #undef APPEND16
2065 #undef APPEND32
2066
2067 /* this is a libevent callback function which is called when a request */
2068 /* has timed out. */
2069 static void
2070 evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
2071         struct request *const req = (struct request *) arg;
2072 #ifndef _EVENT_DISABLE_THREAD_SUPPORT
2073         struct evdns_base *base = req->base;
2074 #endif
2075         (void) fd;
2076         (void) events;
2077
2078         log(EVDNS_LOG_DEBUG, "Request %p timed out", arg);
2079         EVDNS_LOCK(base);
2080
2081         req->ns->timedout++;
2082         if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
2083                 req->ns->timedout = 0;
2084                 nameserver_failed(req->ns, "request timed out.");
2085         }
2086
2087         if (req->tx_count >= req->base->global_max_retransmits) {
2088                 /* this request has failed */
2089                 reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2090                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2091         } else {
2092                 /* retransmit it */
2093                 (void) evtimer_del(&req->timeout_event);
2094                 evdns_request_transmit(req);
2095         }
2096         EVDNS_UNLOCK(base);
2097 }
2098
2099 /* try to send a request to a given server. */
2100 /* */
2101 /* return: */
2102 /*   0 ok */
2103 /*   1 temporary failure */
2104 /*   2 other failure */
2105 static int
2106 evdns_request_transmit_to(struct request *req, struct nameserver *server) {
2107         int r;
2108         ASSERT_LOCKED(req->base);
2109         ASSERT_VALID_REQUEST(req);
2110         r = sendto(server->socket, (void*)req->request, req->request_len, 0,
2111             (struct sockaddr *)&server->address, server->addrlen);
2112         if (r < 0) {
2113                 int err = evutil_socket_geterror(server->socket);
2114                 if (EVUTIL_ERR_RW_RETRIABLE(err))
2115                         return 1;
2116                 nameserver_failed(req->ns, evutil_socket_error_to_string(err));
2117                 return 2;
2118         } else if (r != (int)req->request_len) {
2119                 return 1;  /* short write */
2120         } else {
2121                 return 0;
2122         }
2123 }
2124
2125 /* try to send a request, updating the fields of the request */
2126 /* as needed */
2127 /* */
2128 /* return: */
2129 /*   0 ok */
2130 /*   1 failed */
2131 static int
2132 evdns_request_transmit(struct request *req) {
2133         int retcode = 0, r;
2134
2135         ASSERT_LOCKED(req->base);
2136         ASSERT_VALID_REQUEST(req);
2137         /* if we fail to send this packet then this flag marks it */
2138         /* for evdns_transmit */
2139         req->transmit_me = 1;
2140         EVUTIL_ASSERT(req->trans_id != 0xffff);
2141
2142         if (req->ns->choked) {
2143                 /* don't bother trying to write to a socket */
2144                 /* which we have had EAGAIN from */
2145                 return 1;
2146         }
2147
2148         r = evdns_request_transmit_to(req, req->ns);
2149         switch (r) {
2150         case 1:
2151                 /* temp failure */
2152                 req->ns->choked = 1;
2153                 nameserver_write_waiting(req->ns, 1);
2154                 return 1;
2155         case 2:
2156                 /* failed to transmit the request entirely. */
2157                 retcode = 1;
2158                 /* fall through: we'll set a timeout, which will time out,
2159                  * and make us retransmit the request anyway. */
2160         default:
2161                 /* all ok */
2162                 log(EVDNS_LOG_DEBUG,
2163                     "Setting timeout for request %p", req);
2164                 if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0) {
2165                         log(EVDNS_LOG_WARN,
2166                       "Error from libevent when adding timer for request %p",
2167                             req);
2168                         /* ???? Do more? */
2169                 }
2170                 req->tx_count++;
2171                 req->transmit_me = 0;
2172                 return retcode;
2173         }
2174 }
2175
2176 static void
2177 nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
2178         struct nameserver *const ns = (struct nameserver *) arg;
2179         (void) type;
2180         (void) count;
2181         (void) ttl;
2182         (void) addresses;
2183
2184         EVDNS_LOCK(ns->base);
2185         ns->probe_request = NULL;
2186         if (result == DNS_ERR_CANCEL) {
2187                 /* We canceled this request because the nameserver came up
2188                  * for some other reason.  Do not change our opinion about
2189                  * the nameserver. */
2190         } else if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
2191                 /* this is a good reply */
2192                 nameserver_up(ns);
2193         } else {
2194                 nameserver_probe_failed(ns);
2195         }
2196         EVDNS_UNLOCK(ns->base);
2197 }
2198
2199 static void
2200 nameserver_send_probe(struct nameserver *const ns) {
2201         struct evdns_request *handle;
2202         struct request *req;
2203         char addrbuf[128];
2204         /* here we need to send a probe to a given nameserver */
2205         /* in the hope that it is up now. */
2206
2207         ASSERT_LOCKED(ns->base);
2208         log(EVDNS_LOG_DEBUG, "Sending probe to %s",
2209             evutil_format_sockaddr_port(
2210                     (struct sockaddr *)&ns->address,
2211                     addrbuf, sizeof(addrbuf)));
2212         handle = mm_calloc(1, sizeof(*handle));
2213         if (!handle) return;
2214         req = request_new(ns->base, handle, TYPE_A, "google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
2215         if (!req) return;
2216         ns->probe_request = handle;
2217         /* we force this into the inflight queue no matter what */
2218         request_trans_id_set(req, transaction_id_pick(ns->base));
2219         req->ns = ns;
2220         request_submit(req);
2221 }
2222
2223 /* returns: */
2224 /*   0 didn't try to transmit anything */
2225 /*   1 tried to transmit something */
2226 static int
2227 evdns_transmit(struct evdns_base *base) {
2228         char did_try_to_transmit = 0;
2229         int i;
2230
2231         ASSERT_LOCKED(base);
2232         for (i = 0; i < base->n_req_heads; ++i) {
2233                 if (base->req_heads[i]) {
2234                         struct request *const started_at = base->req_heads[i], *req = started_at;
2235                         /* first transmit all the requests which are currently waiting */
2236                         do {
2237                                 if (req->transmit_me) {
2238                                         did_try_to_transmit = 1;
2239                                         evdns_request_transmit(req);
2240                                 }
2241
2242                                 req = req->next;
2243                         } while (req != started_at);
2244                 }
2245         }
2246
2247         return did_try_to_transmit;
2248 }
2249
2250 /* exported function */
2251 int
2252 evdns_base_count_nameservers(struct evdns_base *base)
2253 {
2254         const struct nameserver *server;
2255         int n = 0;
2256
2257         EVDNS_LOCK(base);
2258         server = base->server_head;
2259         if (!server)
2260                 goto done;
2261         do {
2262                 ++n;
2263                 server = server->next;
2264         } while (server != base->server_head);
2265 done:
2266         EVDNS_UNLOCK(base);
2267         return n;
2268 }
2269
2270 int
2271 evdns_count_nameservers(void)
2272 {
2273         return evdns_base_count_nameservers(current_base);
2274 }
2275
2276 /* exported function */
2277 int
2278 evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
2279 {
2280         struct nameserver *server, *started_at;
2281         int i;
2282
2283         EVDNS_LOCK(base);
2284         server = base->server_head;
2285         started_at = base->server_head;
2286         if (!server) {
2287                 EVDNS_UNLOCK(base);
2288                 return 0;
2289         }
2290         while (1) {
2291                 struct nameserver *next = server->next;
2292                 (void) event_del(&server->event);
2293                 if (evtimer_initialized(&server->timeout_event))
2294                         (void) evtimer_del(&server->timeout_event);
2295                 if (server->socket >= 0)
2296                         evutil_closesocket(server->socket);
2297                 mm_free(server);
2298                 if (next == started_at)
2299                         break;
2300                 server = next;
2301         }
2302         base->server_head = NULL;
2303         base->global_good_nameservers = 0;
2304
2305         for (i = 0; i < base->n_req_heads; ++i) {
2306                 struct request *req, *req_started_at;
2307                 req = req_started_at = base->req_heads[i];
2308                 while (req) {
2309                         struct request *next = req->next;
2310                         req->tx_count = req->reissue_count = 0;
2311                         req->ns = NULL;
2312                         /* ???? What to do about searches? */
2313                         (void) evtimer_del(&req->timeout_event);
2314                         req->trans_id = 0;
2315                         req->transmit_me = 0;
2316
2317                         base->global_requests_waiting++;
2318                         evdns_request_insert(req, &base->req_waiting_head);
2319                         /* We want to insert these suspended elements at the front of
2320                          * the waiting queue, since they were pending before any of
2321                          * the waiting entries were added.  This is a circular list,
2322                          * so we can just shift the start back by one.*/
2323                         base->req_waiting_head = base->req_waiting_head->prev;
2324
2325                         if (next == req_started_at)
2326                                 break;
2327                         req = next;
2328                 }
2329                 base->req_heads[i] = NULL;
2330         }
2331
2332         base->global_requests_inflight = 0;
2333
2334         EVDNS_UNLOCK(base);
2335         return 0;
2336 }
2337
2338 int
2339 evdns_clear_nameservers_and_suspend(void)
2340 {
2341         return evdns_base_clear_nameservers_and_suspend(current_base);
2342 }
2343
2344
2345 /* exported function */
2346 int
2347 evdns_base_resume(struct evdns_base *base)
2348 {
2349         EVDNS_LOCK(base);
2350         evdns_requests_pump_waiting_queue(base);
2351         EVDNS_UNLOCK(base);
2352         return 0;
2353 }
2354
2355 int
2356 evdns_resume(void)
2357 {
2358         return evdns_base_resume(current_base);
2359 }
2360
2361 static int
2362 _evdns_nameserver_add_impl(struct evdns_base *base, const struct sockaddr *address, int addrlen) {
2363         /* first check to see if we already have this nameserver */
2364
2365         const struct nameserver *server = base->server_head, *const started_at = base->server_head;
2366         struct nameserver *ns;
2367         int err = 0;
2368         char addrbuf[128];
2369
2370         ASSERT_LOCKED(base);
2371         if (server) {
2372                 do {
2373                         if (!evutil_sockaddr_cmp((struct sockaddr*)&server->address, address, 1)) return 3;
2374                         server = server->next;
2375                 } while (server != started_at);
2376         }
2377         if (addrlen > (int)sizeof(ns->address)) {
2378                 log(EVDNS_LOG_DEBUG, "Addrlen %d too long.", (int)addrlen);
2379                 return 2;
2380         }
2381
2382         ns = (struct nameserver *) mm_malloc(sizeof(struct nameserver));
2383         if (!ns) return -1;
2384
2385         memset(ns, 0, sizeof(struct nameserver));
2386         ns->base = base;
2387
2388         evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns);
2389
2390         ns->socket = socket(PF_INET, SOCK_DGRAM, 0);
2391         if (ns->socket < 0) { err = 1; goto out1; }
2392         evutil_make_socket_closeonexec(ns->socket);
2393         evutil_make_socket_nonblocking(ns->socket);
2394
2395         if (base->global_outgoing_addrlen &&
2396             !evutil_sockaddr_is_loopback(address)) {
2397                 if (bind(ns->socket,
2398                         (struct sockaddr*)&base->global_outgoing_address,
2399                         base->global_outgoing_addrlen) < 0) {
2400                         log(EVDNS_LOG_WARN,"Couldn't bind to outgoing address");
2401                         err = 2;
2402                         goto out2;
2403                 }
2404         }
2405
2406         memcpy(&ns->address, address, addrlen);
2407         ns->addrlen = addrlen;
2408         ns->state = 1;
2409         event_assign(&ns->event, ns->base->event_base, ns->socket, EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
2410         if (event_add(&ns->event, NULL) < 0) {
2411                 err = 2;
2412                 goto out2;
2413         }
2414
2415         log(EVDNS_LOG_DEBUG, "Added nameserver %s",
2416             evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)));
2417
2418         /* insert this nameserver into the list of them */
2419         if (!base->server_head) {
2420                 ns->next = ns->prev = ns;
2421                 base->server_head = ns;
2422         } else {
2423                 ns->next = base->server_head->next;
2424                 ns->prev = base->server_head;
2425                 base->server_head->next = ns;
2426                 if (base->server_head->prev == base->server_head) {
2427                         base->server_head->prev = ns;
2428                 }
2429         }
2430
2431         base->global_good_nameservers++;
2432
2433         return 0;
2434
2435 out2:
2436         evutil_closesocket(ns->socket);
2437 out1:
2438         event_debug_unassign(&ns->event);
2439         mm_free(ns);
2440         log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d",
2441             evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)), err);
2442         return err;
2443 }
2444
2445 /* exported function */
2446 int
2447 evdns_base_nameserver_add(struct evdns_base *base, unsigned long int address)
2448 {
2449         struct sockaddr_in sin;
2450         int res;
2451         sin.sin_addr.s_addr = address;
2452         sin.sin_port = htons(53);
2453         sin.sin_family = AF_INET;
2454         EVDNS_LOCK(base);
2455         res = _evdns_nameserver_add_impl(base, (struct sockaddr*)&sin, sizeof(sin));
2456         EVDNS_UNLOCK(base);
2457         return res;
2458 }
2459
2460 int
2461 evdns_nameserver_add(unsigned long int address) {
2462         if (!current_base)
2463                 current_base = evdns_base_new(NULL, 0);
2464         return evdns_base_nameserver_add(current_base, address);
2465 }
2466
2467 static void
2468 sockaddr_setport(struct sockaddr *sa, ev_uint16_t port)
2469 {
2470         if (sa->sa_family == AF_INET) {
2471                 ((struct sockaddr_in *)sa)->sin_port = htons(port);
2472         } else if (sa->sa_family == AF_INET6) {
2473                 ((struct sockaddr_in6 *)sa)->sin6_port = htons(port);
2474         }
2475 }
2476
2477 static ev_uint16_t
2478 sockaddr_getport(struct sockaddr *sa)
2479 {
2480         if (sa->sa_family == AF_INET) {
2481                 return ntohs(((struct sockaddr_in *)sa)->sin_port);
2482         } else if (sa->sa_family == AF_INET6) {
2483                 return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
2484         } else {
2485                 return 0;
2486         }
2487 }
2488
2489 /* exported function */
2490 int
2491 evdns_base_nameserver_ip_add(struct evdns_base *base, const char *ip_as_string) {
2492         struct sockaddr_storage ss;
2493         struct sockaddr *sa;
2494         int len = sizeof(ss);
2495         int res;
2496         if (evutil_parse_sockaddr_port(ip_as_string, (struct sockaddr *)&ss,
2497                 &len)) {
2498                 log(EVDNS_LOG_WARN, "Unable to parse nameserver address %s",
2499                         ip_as_string);
2500                 return 4;
2501         }
2502         sa = (struct sockaddr *) &ss;
2503         if (sockaddr_getport(sa) == 0)
2504                 sockaddr_setport(sa, 53);
2505
2506         EVDNS_LOCK(base);
2507         res = _evdns_nameserver_add_impl(base, sa, len);
2508         EVDNS_UNLOCK(base);
2509         return res;
2510 }
2511
2512 int
2513 evdns_nameserver_ip_add(const char *ip_as_string) {
2514         if (!current_base)
2515                 current_base = evdns_base_new(NULL, 0);
2516         return evdns_base_nameserver_ip_add(current_base, ip_as_string);
2517 }
2518
2519 int
2520 evdns_base_nameserver_sockaddr_add(struct evdns_base *base,
2521     const struct sockaddr *sa, ev_socklen_t len, unsigned flags)
2522 {
2523         int res;
2524         EVUTIL_ASSERT(base);
2525         EVDNS_LOCK(base);
2526         res = _evdns_nameserver_add_impl(base, sa, len);
2527         EVDNS_UNLOCK(base);
2528         return res;
2529 }
2530
2531 /* remove from the queue */
2532 static void
2533 evdns_request_remove(struct request *req, struct request **head)
2534 {
2535         ASSERT_LOCKED(req->base);
2536         ASSERT_VALID_REQUEST(req);
2537
2538 #if 0
2539         {
2540                 struct request *ptr;
2541                 int found = 0;
2542                 EVUTIL_ASSERT(*head != NULL);
2543
2544                 ptr = *head;
2545                 do {
2546                         if (ptr == req) {
2547                                 found = 1;
2548                                 break;
2549                         }
2550                         ptr = ptr->next;
2551                 } while (ptr != *head);
2552                 EVUTIL_ASSERT(found);
2553
2554                 EVUTIL_ASSERT(req->next);
2555         }
2556 #endif
2557
2558         if (req->next == req) {
2559                 /* only item in the list */
2560                 *head = NULL;
2561         } else {
2562                 req->next->prev = req->prev;
2563                 req->prev->next = req->next;
2564                 if (*head == req) *head = req->next;
2565         }
2566         req->next = req->prev = NULL;
2567 }
2568
2569 /* insert into the tail of the queue */
2570 static void
2571 evdns_request_insert(struct request *req, struct request **head) {
2572         ASSERT_LOCKED(req->base);
2573         ASSERT_VALID_REQUEST(req);
2574         if (!*head) {
2575                 *head = req;
2576                 req->next = req->prev = req;
2577                 return;
2578         }
2579
2580         req->prev = (*head)->prev;
2581         req->prev->next = req;
2582         req->next = *head;
2583         (*head)->prev = req;
2584 }
2585
2586 static int
2587 string_num_dots(const char *s) {
2588         int count = 0;
2589         while ((s = strchr(s, '.'))) {
2590                 s++;
2591                 count++;
2592         }
2593         return count;
2594 }
2595
2596 static struct request *
2597 request_new(struct evdns_base *base, struct evdns_request *handle, int type,
2598             const char *name, int flags, evdns_callback_type callback,
2599             void *user_ptr) {
2600
2601         const char issuing_now =
2602             (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
2603
2604         const size_t name_len = strlen(name);
2605         const size_t request_max_len = evdns_request_len(name_len);
2606         const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
2607         /* the request data is alloced in a single block with the header */
2608         struct request *const req =
2609             mm_malloc(sizeof(struct request) + request_max_len);
2610         int rlen;
2611         char namebuf[256];
2612         (void) flags;
2613
2614         ASSERT_LOCKED(base);
2615
2616         if (!req) return NULL;
2617
2618         if (name_len >= sizeof(namebuf)) {
2619                 mm_free(req);
2620                 return NULL;
2621         }
2622
2623         memset(req, 0, sizeof(struct request));
2624         req->base = base;
2625
2626         evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
2627
2628         if (base->global_randomize_case) {
2629                 unsigned i;
2630                 char randbits[(sizeof(namebuf)+7)/8];
2631                 strlcpy(namebuf, name, sizeof(namebuf));
2632                 evutil_secure_rng_get_bytes(randbits, (name_len+7)/8);
2633                 for (i = 0; i < name_len; ++i) {
2634                         if (EVUTIL_ISALPHA(namebuf[i])) {
2635                                 if ((randbits[i >> 3] & (1<<(i & 7))))
2636                                         namebuf[i] |= 0x20;
2637                                 else
2638                                         namebuf[i] &= ~0x20;
2639                         }
2640                 }
2641                 name = namebuf;
2642         }
2643
2644         /* request data lives just after the header */
2645         req->request = ((u8 *) req) + sizeof(struct request);
2646         /* denotes that the request data shouldn't be free()ed */
2647         req->request_appended = 1;
2648         rlen = evdns_request_data_build(name, name_len, trans_id,
2649             type, CLASS_INET, req->request, request_max_len);
2650         if (rlen < 0)
2651                 goto err1;
2652
2653         req->request_len = rlen;
2654         req->trans_id = trans_id;
2655         req->tx_count = 0;
2656         req->request_type = type;
2657         req->user_pointer = user_ptr;
2658         req->user_callback = callback;
2659         req->ns = issuing_now ? nameserver_pick(base) : NULL;
2660         req->next = req->prev = NULL;
2661         req->handle = handle;
2662         if (handle) {
2663                 handle->current_req = req;
2664                 handle->base = base;
2665         }
2666
2667         return req;
2668 err1:
2669         mm_free(req);
2670         return NULL;
2671 }
2672
2673 static void
2674 request_submit(struct request *const req) {
2675         struct evdns_base *base = req->base;
2676         ASSERT_LOCKED(base);
2677         ASSERT_VALID_REQUEST(req);
2678         if (req->ns) {
2679                 /* if it has a nameserver assigned then this is going */
2680                 /* straight into the inflight queue */
2681                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
2682                 base->global_requests_inflight++;
2683                 evdns_request_transmit(req);
2684         } else {
2685                 evdns_request_insert(req, &base->req_waiting_head);
2686                 base->global_requests_waiting++;
2687         }
2688 }
2689
2690 /* exported function */
2691 void
2692 evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
2693 {
2694         struct request *req;
2695
2696         if (!base) {
2697                 /* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
2698                 base = handle->base;
2699                 if (!base && handle->current_req)
2700                         base = handle->current_req->base;
2701         }
2702
2703         EVDNS_LOCK(base);
2704         if (handle->pending_cb) {
2705                 EVDNS_UNLOCK(base);
2706                 return;
2707         }
2708
2709         req = handle->current_req;
2710         ASSERT_VALID_REQUEST(req);
2711
2712         reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
2713         if (req->ns) {
2714                 /* remove from inflight queue */
2715                 request_finished(req, &REQ_HEAD(base, req->trans_id), 1);
2716         } else {
2717                 /* remove from global_waiting head */
2718                 request_finished(req, &base->req_waiting_head, 1);
2719         }
2720         EVDNS_UNLOCK(base);
2721 }
2722
2723 /* exported function */
2724 struct evdns_request *
2725 evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
2726     evdns_callback_type callback, void *ptr) {
2727         struct evdns_request *handle;
2728         struct request *req;
2729         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2730         handle = mm_calloc(1, sizeof(*handle));
2731         if (handle == NULL)
2732                 return NULL;
2733         EVDNS_LOCK(base);
2734         if (flags & DNS_QUERY_NO_SEARCH) {
2735                 req =
2736                         request_new(base, handle, TYPE_A, name, flags,
2737                                     callback, ptr);
2738                 if (req)
2739                         request_submit(req);
2740         } else {
2741                 search_request_new(base, handle, TYPE_A, name, flags,
2742                     callback, ptr);
2743         }
2744         if (handle->current_req == NULL) {
2745                 mm_free(handle);
2746                 handle = NULL;
2747         }
2748         EVDNS_UNLOCK(base);
2749         return handle;
2750 }
2751
2752 int evdns_resolve_ipv4(const char *name, int flags,
2753                                            evdns_callback_type callback, void *ptr)
2754 {
2755         return evdns_base_resolve_ipv4(current_base, name, flags, callback, ptr)
2756                 ? 0 : -1;
2757 }
2758
2759
2760 /* exported function */
2761 struct evdns_request *
2762 evdns_base_resolve_ipv6(struct evdns_base *base,
2763     const char *name, int flags,
2764     evdns_callback_type callback, void *ptr)
2765 {
2766         struct evdns_request *handle;
2767         struct request *req;
2768         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2769         handle = mm_calloc(1, sizeof(*handle));
2770         if (handle == NULL)
2771                 return NULL;
2772         EVDNS_LOCK(base);
2773         if (flags & DNS_QUERY_NO_SEARCH) {
2774                 req = request_new(base, handle, TYPE_AAAA, name, flags,
2775                                   callback, ptr);
2776                 if (req)
2777                         request_submit(req);
2778         } else {
2779                 search_request_new(base, handle, TYPE_AAAA, name, flags,
2780                     callback, ptr);
2781         }
2782         if (handle->current_req == NULL) {
2783                 mm_free(handle);
2784                 handle = NULL;
2785         }
2786         EVDNS_UNLOCK(base);
2787         return handle;
2788 }
2789
2790 int evdns_resolve_ipv6(const char *name, int flags,
2791     evdns_callback_type callback, void *ptr) {
2792         return evdns_base_resolve_ipv6(current_base, name, flags, callback, ptr)
2793                 ? 0 : -1;
2794 }
2795
2796 struct evdns_request *
2797 evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2798         char buf[32];
2799         struct evdns_request *handle;
2800         struct request *req;
2801         u32 a;
2802         EVUTIL_ASSERT(in);
2803         a = ntohl(in->s_addr);
2804         evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
2805                         (int)(u8)((a    )&0xff),
2806                         (int)(u8)((a>>8 )&0xff),
2807                         (int)(u8)((a>>16)&0xff),
2808                         (int)(u8)((a>>24)&0xff));
2809         handle = mm_calloc(1, sizeof(*handle));
2810         if (handle == NULL)
2811                 return NULL;
2812         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2813         EVDNS_LOCK(base);
2814         req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2815         if (req)
2816                 request_submit(req);
2817         if (handle->current_req == NULL) {
2818                 mm_free(handle);
2819                 handle = NULL;
2820         }
2821         EVDNS_UNLOCK(base);
2822         return (handle);
2823 }
2824
2825 int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2826         return evdns_base_resolve_reverse(current_base, in, flags, callback, ptr)
2827                 ? 0 : -1;
2828 }
2829
2830 struct evdns_request *
2831 evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2832         /* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
2833         char buf[73];
2834         char *cp;
2835         struct evdns_request *handle;
2836         struct request *req;
2837         int i;
2838         EVUTIL_ASSERT(in);
2839         cp = buf;
2840         for (i=15; i >= 0; --i) {
2841                 u8 byte = in->s6_addr[i];
2842                 *cp++ = "0123456789abcdef"[byte & 0x0f];
2843                 *cp++ = '.';
2844                 *cp++ = "0123456789abcdef"[byte >> 4];
2845                 *cp++ = '.';
2846         }
2847         EVUTIL_ASSERT(cp + strlen("ip6.arpa") < buf+sizeof(buf));
2848         memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
2849         handle = mm_calloc(1, sizeof(*handle));
2850         if (handle == NULL)
2851                 return NULL;
2852         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2853         EVDNS_LOCK(base);
2854         req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2855         if (req)
2856                 request_submit(req);
2857         if (handle->current_req == NULL) {
2858                 mm_free(handle);
2859                 handle = NULL;
2860         }
2861         EVDNS_UNLOCK(base);
2862         return (handle);
2863 }
2864
2865 int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2866         return evdns_base_resolve_reverse_ipv6(current_base, in, flags, callback, ptr)
2867                 ? 0 : -1;
2868 }
2869
2870 /* ================================================================= */
2871 /* Search support */
2872 /* */
2873 /* the libc resolver has support for searching a number of domains */
2874 /* to find a name. If nothing else then it takes the single domain */
2875 /* from the gethostname() call. */
2876 /* */
2877 /* It can also be configured via the domain and search options in a */
2878 /* resolv.conf. */
2879 /* */
2880 /* The ndots option controls how many dots it takes for the resolver */
2881 /* to decide that a name is non-local and so try a raw lookup first. */
2882
2883 struct search_domain {
2884         int len;
2885         struct search_domain *next;
2886         /* the text string is appended to this structure */
2887 };
2888
2889 struct search_state {
2890         int refcount;
2891         int ndots;
2892         int num_domains;
2893         struct search_domain *head;
2894 };
2895
2896 static void
2897 search_state_decref(struct search_state *const state) {
2898         if (!state) return;
2899         state->refcount--;
2900         if (!state->refcount) {
2901                 struct search_domain *next, *dom;
2902                 for (dom = state->head; dom; dom = next) {
2903                         next = dom->next;
2904                         mm_free(dom);
2905                 }
2906                 mm_free(state);
2907         }
2908 }
2909
2910 static struct search_state *
2911 search_state_new(void) {
2912         struct search_state *state = (struct search_state *) mm_malloc(sizeof(struct search_state));
2913         if (!state) return NULL;
2914         memset(state, 0, sizeof(struct search_state));
2915         state->refcount = 1;
2916         state->ndots = 1;
2917
2918         return state;
2919 }
2920
2921 static void
2922 search_postfix_clear(struct evdns_base *base) {
2923         search_state_decref(base->global_search_state);
2924
2925         base->global_search_state = search_state_new();
2926 }
2927
2928 /* exported function */
2929 void
2930 evdns_base_search_clear(struct evdns_base *base)
2931 {
2932         EVDNS_LOCK(base);
2933         search_postfix_clear(base);
2934         EVDNS_UNLOCK(base);
2935 }
2936
2937 void
2938 evdns_search_clear(void) {
2939         evdns_base_search_clear(current_base);
2940 }
2941
2942 static void
2943 search_postfix_add(struct evdns_base *base, const char *domain) {
2944         size_t domain_len;
2945         struct search_domain *sdomain;
2946         while (domain[0] == '.') domain++;
2947         domain_len = strlen(domain);
2948
2949         ASSERT_LOCKED(base);
2950         if (!base->global_search_state) base->global_search_state = search_state_new();
2951         if (!base->global_search_state) return;
2952         base->global_search_state->num_domains++;
2953
2954         sdomain = (struct search_domain *) mm_malloc(sizeof(struct search_domain) + domain_len);
2955         if (!sdomain) return;
2956         memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
2957         sdomain->next = base->global_search_state->head;
2958         sdomain->len = (int) domain_len;
2959
2960         base->global_search_state->head = sdomain;
2961 }
2962
2963 /* reverse the order of members in the postfix list. This is needed because, */
2964 /* when parsing resolv.conf we push elements in the wrong order */
2965 static void
2966 search_reverse(struct evdns_base *base) {
2967         struct search_domain *cur, *prev = NULL, *next;
2968         ASSERT_LOCKED(base);
2969         cur = base->global_search_state->head;
2970         while (cur) {
2971                 next = cur->next;
2972                 cur->next = prev;
2973                 prev = cur;
2974                 cur = next;
2975         }
2976
2977         base->global_search_state->head = prev;
2978 }
2979
2980 /* exported function */
2981 void
2982 evdns_base_search_add(struct evdns_base *base, const char *domain) {
2983         EVDNS_LOCK(base);
2984         search_postfix_add(base, domain);
2985         EVDNS_UNLOCK(base);
2986 }
2987 void
2988 evdns_search_add(const char *domain) {
2989         evdns_base_search_add(current_base, domain);
2990 }
2991
2992 /* exported function */
2993 void
2994 evdns_base_search_ndots_set(struct evdns_base *base, const int ndots) {
2995         EVDNS_LOCK(base);
2996         if (!base->global_search_state) base->global_search_state = search_state_new();
2997         if (base->global_search_state)
2998                 base->global_search_state->ndots = ndots;
2999         EVDNS_UNLOCK(base);
3000 }
3001 void
3002 evdns_search_ndots_set(const int ndots) {
3003         evdns_base_search_ndots_set(current_base, ndots);
3004 }
3005
3006 static void
3007 search_set_from_hostname(struct evdns_base *base) {
3008         char hostname[HOST_NAME_MAX + 1], *domainname;
3009
3010         ASSERT_LOCKED(base);
3011         search_postfix_clear(base);
3012         if (gethostname(hostname, sizeof(hostname))) return;
3013         domainname = strchr(hostname, '.');
3014         if (!domainname) return;
3015         search_postfix_add(base, domainname);
3016 }
3017
3018 /* warning: returns malloced string */
3019 static char *
3020 search_make_new(const struct search_state *const state, int n, const char *const base_name) {
3021         const size_t base_len = strlen(base_name);
3022         const char need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
3023         struct search_domain *dom;
3024
3025         for (dom = state->head; dom; dom = dom->next) {
3026                 if (!n--) {
3027                         /* this is the postfix we want */
3028                         /* the actual postfix string is kept at the end of the structure */
3029                         const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
3030                         const int postfix_len = dom->len;
3031                         char *const newname = (char *) mm_malloc(base_len + need_to_append_dot + postfix_len + 1);
3032                         if (!newname) return NULL;
3033                         memcpy(newname, base_name, base_len);
3034                         if (need_to_append_dot) newname[base_len] = '.';
3035                         memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
3036                         newname[base_len + need_to_append_dot + postfix_len] = 0;
3037                         return newname;
3038                 }
3039         }
3040
3041         /* we ran off the end of the list and still didn't find the requested string */
3042         EVUTIL_ASSERT(0);
3043         return NULL; /* unreachable; stops warnings in some compilers. */
3044 }
3045
3046 static struct request *
3047 search_request_new(struct evdns_base *base, struct evdns_request *handle,
3048                    int type, const char *const name, int flags,
3049                    evdns_callback_type user_callback, void *user_arg) {
3050         ASSERT_LOCKED(base);
3051         EVUTIL_ASSERT(type == TYPE_A || type == TYPE_AAAA);
3052         EVUTIL_ASSERT(handle->current_req == NULL);
3053         if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
3054              base->global_search_state &&
3055                  base->global_search_state->num_domains) {
3056                 /* we have some domains to search */
3057                 struct request *req;
3058                 if (string_num_dots(name) >= base->global_search_state->ndots) {
3059                         req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3060                         if (!req) return NULL;
3061                         handle->search_index = -1;
3062                 } else {
3063                         char *const new_name = search_make_new(base->global_search_state, 0, name);
3064                         if (!new_name) return NULL;
3065                         req = request_new(base, handle, type, new_name, flags, user_callback, user_arg);
3066                         mm_free(new_name);
3067                         if (!req) return NULL;
3068                         handle->search_index = 0;
3069                 }
3070                 EVUTIL_ASSERT(handle->search_origname == NULL);
3071                 handle->search_origname = mm_strdup(name);
3072                 handle->search_state = base->global_search_state;
3073                 handle->search_flags = flags;
3074                 base->global_search_state->refcount++;
3075                 request_submit(req);
3076                 return req;
3077         } else {
3078                 struct request *const req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3079                 if (!req) return NULL;
3080                 request_submit(req);
3081                 return req;
3082         }
3083 }
3084
3085 /* this is called when a request has failed to find a name. We need to check */
3086 /* if it is part of a search and, if so, try the next name in the list */
3087 /* returns: */
3088 /*   0 another request has been submitted */
3089 /*   1 no more requests needed */
3090 static int
3091 search_try_next(struct evdns_request *const handle) {
3092         struct request *req = handle->current_req;
3093         struct evdns_base *base = req->base;
3094         struct request *newreq;
3095         ASSERT_LOCKED(base);
3096         if (handle->search_state) {
3097                 /* it is part of a search */
3098                 char *new_name;
3099                 handle->search_index++;
3100                 if (handle->search_index >= handle->search_state->num_domains) {
3101                         /* no more postfixes to try, however we may need to try */
3102                         /* this name without a postfix */
3103                         if (string_num_dots(handle->search_origname) < handle->search_state->ndots) {
3104                                 /* yep, we need to try it raw */
3105                                 newreq = request_new(base, NULL, req->request_type, handle->search_origname, handle->search_flags, req->user_callback, req->user_pointer);
3106                                 log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", handle->search_origname);
3107                                 if (newreq) {
3108                                         search_request_finished(handle);
3109                                         goto submit_next;
3110                                 }
3111                         }
3112                         return 1;
3113                 }
3114
3115                 new_name = search_make_new(handle->search_state, handle->search_index, handle->search_origname);
3116                 if (!new_name) return 1;
3117                 log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, handle->search_index);
3118                 newreq = request_new(base, NULL, req->request_type, new_name, handle->search_flags, req->user_callback, req->user_pointer);
3119                 mm_free(new_name);
3120                 if (!newreq) return 1;
3121                 goto submit_next;
3122         }
3123         return 1;
3124
3125 submit_next:
3126         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
3127         handle->current_req = newreq;
3128         newreq->handle = handle;
3129         request_submit(newreq);
3130         return 0;
3131 }
3132
3133 static void
3134 search_request_finished(struct evdns_request *const handle) {
3135         ASSERT_LOCKED(handle->current_req->base);
3136         if (handle->search_state) {
3137                 search_state_decref(handle->search_state);
3138                 handle->search_state = NULL;
3139         }
3140         if (handle->search_origname) {
3141                 mm_free(handle->search_origname);
3142                 handle->search_origname = NULL;
3143         }
3144 }
3145
3146 /* ================================================================= */
3147 /* Parsing resolv.conf files */
3148
3149 static void
3150 evdns_resolv_set_defaults(struct evdns_base *base, int flags) {
3151         /* if the file isn't found then we assume a local resolver */
3152         ASSERT_LOCKED(base);
3153         if (flags & DNS_OPTION_SEARCH) search_set_from_hostname(base);
3154         if (flags & DNS_OPTION_NAMESERVERS) evdns_base_nameserver_ip_add(base,"127.0.0.1");
3155 }
3156
3157 #ifndef _EVENT_HAVE_STRTOK_R
3158 static char *
3159 strtok_r(char *s, const char *delim, char **state) {
3160         char *cp, *start;
3161         start = cp = s ? s : *state;
3162         if (!cp)
3163                 return NULL;
3164         while (*cp && !strchr(delim, *cp))
3165                 ++cp;
3166         if (!*cp) {
3167                 if (cp == start)
3168                         return NULL;
3169                 *state = NULL;
3170                 return start;
3171         } else {
3172                 *cp++ = '\0';
3173                 *state = cp;
3174                 return start;
3175         }
3176 }
3177 #endif
3178
3179 /* helper version of atoi which returns -1 on error */
3180 static int
3181 strtoint(const char *const str)
3182 {
3183         char *endptr;
3184         const int r = strtol(str, &endptr, 10);
3185         if (*endptr) return -1;
3186         return r;
3187 }
3188
3189 /* Parse a number of seconds into a timeval; return -1 on error. */
3190 static int
3191 strtotimeval(const char *const str, struct timeval *out)
3192 {
3193         double d;
3194         char *endptr;
3195         d = strtod(str, &endptr);
3196         if (*endptr) return -1;
3197         if (d < 0) return -1;
3198         out->tv_sec = (int) d;
3199         out->tv_usec = (int) ((d - (int) d)*1000000);
3200         if (out->tv_sec == 0 && out->tv_usec < 1000) /* less than 1 msec */
3201                 return -1;
3202         return 0;
3203 }
3204
3205 /* helper version of atoi that returns -1 on error and clips to bounds. */
3206 static int
3207 strtoint_clipped(const char *const str, int min, int max)
3208 {
3209         int r = strtoint(str);
3210         if (r == -1)
3211                 return r;
3212         else if (r<min)
3213                 return min;
3214         else if (r>max)
3215                 return max;
3216         else
3217                 return r;
3218 }
3219
3220 static int
3221 evdns_base_set_max_requests_inflight(struct evdns_base *base, int maxinflight)
3222 {
3223         int old_n_heads = base->n_req_heads, n_heads;
3224         struct request **old_heads = base->req_heads, **new_heads, *req;
3225         int i;
3226
3227         ASSERT_LOCKED(base);
3228         if (maxinflight < 1)
3229                 maxinflight = 1;
3230         n_heads = (maxinflight+4) / 5;
3231         EVUTIL_ASSERT(n_heads > 0);
3232         new_heads = mm_calloc(n_heads, sizeof(struct request*));
3233         if (!new_heads)
3234                 return (-1);
3235         if (old_heads) {
3236                 for (i = 0; i < old_n_heads; ++i) {
3237                         while (old_heads[i]) {
3238                                 req = old_heads[i];
3239                                 evdns_request_remove(req, &old_heads[i]);
3240                                 evdns_request_insert(req, &new_heads[req->trans_id % n_heads]);
3241                         }
3242                 }
3243                 mm_free(old_heads);
3244         }
3245         base->req_heads = new_heads;
3246         base->n_req_heads = n_heads;
3247         base->global_max_requests_inflight = maxinflight;
3248         return (0);
3249 }
3250
3251 /* exported function */
3252 int
3253 evdns_base_set_option(struct evdns_base *base,
3254     const char *option, const char *val)
3255 {
3256         int res;
3257         EVDNS_LOCK(base);
3258         res = evdns_base_set_option_impl(base, option, val, DNS_OPTIONS_ALL);
3259         EVDNS_UNLOCK(base);
3260         return res;
3261 }
3262
3263 static inline int
3264 str_matches_option(const char *s1, const char *optionname)
3265 {
3266         /* Option names are given as "option:" We accept either 'option' in
3267          * s1, or 'option:randomjunk'.  The latter form is to implement the
3268          * resolv.conf parser. */
3269         size_t optlen = strlen(optionname);
3270         size_t slen = strlen(s1);
3271         if (slen == optlen || slen == optlen - 1)
3272                 return !strncmp(s1, optionname, slen);
3273         else if (slen > optlen)
3274                 return !strncmp(s1, optionname, optlen);
3275         else
3276                 return 0;
3277 }
3278
3279 static int
3280 evdns_base_set_option_impl(struct evdns_base *base,
3281     const char *option, const char *val, int flags)
3282 {
3283         ASSERT_LOCKED(base);
3284         if (str_matches_option(option, "ndots:")) {
3285                 const int ndots = strtoint(val);
3286                 if (ndots == -1) return -1;
3287                 if (!(flags & DNS_OPTION_SEARCH)) return 0;
3288                 log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
3289                 if (!base->global_search_state) base->global_search_state = search_state_new();
3290                 if (!base->global_search_state) return -1;
3291                 base->global_search_state->ndots = ndots;
3292         } else if (str_matches_option(option, "timeout:")) {
3293                 struct timeval tv;
3294                 if (strtotimeval(val, &tv) == -1) return -1;
3295                 if (!(flags & DNS_OPTION_MISC)) return 0;
3296                 log(EVDNS_LOG_DEBUG, "Setting timeout to %s", val);
3297                 memcpy(&base->global_timeout, &tv, sizeof(struct timeval));
3298         } else if (str_matches_option(option, "getaddrinfo-allow-skew:")) {
3299                 struct timeval tv;
3300                 if (strtotimeval(val, &tv) == -1) return -1;
3301                 if (!(flags & DNS_OPTION_MISC)) return 0;
3302                 log(EVDNS_LOG_DEBUG, "Setting getaddrinfo-allow-skew to %s",
3303                     val);
3304                 memcpy(&base->global_getaddrinfo_allow_skew, &tv,
3305                     sizeof(struct timeval));
3306         } else if (str_matches_option(option, "max-timeouts:")) {
3307                 const int maxtimeout = strtoint_clipped(val, 1, 255);
3308                 if (maxtimeout == -1) return -1;
3309                 if (!(flags & DNS_OPTION_MISC)) return 0;
3310                 log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
3311                         maxtimeout);
3312                 base->global_max_nameserver_timeout = maxtimeout;
3313         } else if (str_matches_option(option, "max-inflight:")) {
3314                 const int maxinflight = strtoint_clipped(val, 1, 65000);
3315                 if (maxinflight == -1) return -1;
3316                 if (!(flags & DNS_OPTION_MISC)) return 0;
3317                 log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
3318                         maxinflight);
3319                 evdns_base_set_max_requests_inflight(base, maxinflight);
3320         } else if (str_matches_option(option, "attempts:")) {
3321                 int retries = strtoint(val);
3322                 if (retries == -1) return -1;
3323                 if (retries > 255) retries = 255;
3324                 if (!(flags & DNS_OPTION_MISC)) return 0;
3325                 log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
3326                 base->global_max_retransmits = retries;
3327         } else if (str_matches_option(option, "randomize-case:")) {
3328                 int randcase = strtoint(val);
3329                 if (!(flags & DNS_OPTION_MISC)) return 0;
3330                 base->global_randomize_case = randcase;
3331         } else if (str_matches_option(option, "bind-to:")) {
3332                 /* XXX This only applies to successive nameservers, not
3333                  * to already-configured ones.  We might want to fix that. */
3334                 int len = sizeof(base->global_outgoing_address);
3335                 if (!(flags & DNS_OPTION_NAMESERVERS)) return 0;
3336                 if (evutil_parse_sockaddr_port(val,
3337                         (struct sockaddr*)&base->global_outgoing_address, &len))
3338                         return -1;
3339                 base->global_outgoing_addrlen = len;
3340         } else if (str_matches_option(option, "initial-probe-timeout:")) {
3341                 struct timeval tv;
3342                 if (strtotimeval(val, &tv) == -1) return -1;
3343                 if (tv.tv_sec > 3600)
3344                         tv.tv_sec = 3600;
3345                 if (!(flags & DNS_OPTION_MISC)) return 0;
3346                 log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
3347                     val);
3348                 memcpy(&base->global_nameserver_probe_initial_timeout, &tv,
3349                     sizeof(tv));
3350         }
3351         return 0;
3352 }
3353
3354 int
3355 evdns_set_option(const char *option, const char *val, int flags)
3356 {
3357         if (!current_base)
3358                 current_base = evdns_base_new(NULL, 0);
3359         return evdns_base_set_option(current_base, option, val);
3360 }
3361
3362 static void
3363 resolv_conf_parse_line(struct evdns_base *base, char *const start, int flags) {
3364         char *strtok_state;
3365         static const char *const delims = " \t";
3366 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
3367
3368
3369         char *const first_token = strtok_r(start, delims, &strtok_state);
3370         ASSERT_LOCKED(base);
3371         if (!first_token) return;
3372
3373         if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
3374                 const char *const nameserver = NEXT_TOKEN;
3375
3376                 if (nameserver)
3377                         evdns_base_nameserver_ip_add(base, nameserver);
3378         } else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
3379                 const char *const domain = NEXT_TOKEN;
3380                 if (domain) {
3381                         search_postfix_clear(base);
3382                         search_postfix_add(base, domain);
3383                 }
3384         } else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
3385                 const char *domain;
3386                 search_postfix_clear(base);
3387
3388                 while ((domain = NEXT_TOKEN)) {
3389                         search_postfix_add(base, domain);
3390                 }
3391                 search_reverse(base);
3392         } else if (!strcmp(first_token, "options")) {
3393                 const char *option;
3394                 while ((option = NEXT_TOKEN)) {
3395                         const char *val = strchr(option, ':');
3396                         evdns_base_set_option_impl(base, option, val ? val+1 : "", flags);
3397                 }
3398         }
3399 #undef NEXT_TOKEN
3400 }
3401
3402 /* exported function */
3403 /* returns: */
3404 /*   0 no errors */
3405 /*   1 failed to open file */
3406 /*   2 failed to stat file */
3407 /*   3 file too large */
3408 /*   4 out of memory */
3409 /*   5 short read from file */
3410 int
3411 evdns_base_resolv_conf_parse(struct evdns_base *base, int flags, const char *const filename) {
3412         int res;
3413         EVDNS_LOCK(base);
3414         res = evdns_base_resolv_conf_parse_impl(base, flags, filename);
3415         EVDNS_UNLOCK(base);
3416         return res;
3417 }
3418
3419 static char *
3420 evdns_get_default_hosts_filename(void)
3421 {
3422 #ifdef WIN32
3423         /* Windows is a little coy about where it puts its configuration
3424          * files.  Sure, they're _usually_ in C:\windows\system32, but
3425          * there's no reason in principle they couldn't be in
3426          * W:\hoboken chicken emergency\
3427          */
3428         char path[MAX_PATH+1];
3429         static const char hostfile[] = "\\drivers\\etc\\hosts";
3430         char *path_out;
3431         size_t len_out;
3432
3433         if (! SHGetSpecialFolderPathA(NULL, path, CSIDL_SYSTEM, 0))
3434                 return NULL;
3435         len_out = strlen(path)+strlen(hostfile);
3436         path_out = mm_malloc(len_out+1);
3437         evutil_snprintf(path_out, len_out, "%s%s", path, hostfile);
3438         return path_out;
3439 #else
3440         return mm_strdup("/etc/hosts");
3441 #endif
3442 }
3443
3444 static int
3445 evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename) {
3446         size_t n;
3447         char *resolv;
3448         char *start;
3449         int err = 0;
3450
3451         log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
3452
3453         if (flags & DNS_OPTION_HOSTSFILE) {
3454                 char *fname = evdns_get_default_hosts_filename();
3455                 evdns_base_load_hosts(base, fname);
3456                 if (fname)
3457                         mm_free(fname);
3458         }
3459
3460         if ((err = evutil_read_file(filename, &resolv, &n, 0)) < 0) {
3461                 if (err == -1) {
3462                         /* No file. */
3463                         evdns_resolv_set_defaults(base, flags);
3464                         return 1;
3465                 } else {
3466                         return 2;
3467                 }
3468         }
3469
3470         start = resolv;
3471         for (;;) {
3472                 char *const newline = strchr(start, '\n');
3473                 if (!newline) {
3474                         resolv_conf_parse_line(base, start, flags);
3475                         break;
3476                 } else {
3477                         *newline = 0;
3478                         resolv_conf_parse_line(base, start, flags);
3479                         start = newline + 1;
3480                 }
3481         }
3482
3483         if (!base->server_head && (flags & DNS_OPTION_NAMESERVERS)) {
3484                 /* no nameservers were configured. */
3485                 evdns_base_nameserver_ip_add(base, "127.0.0.1");
3486                 err = 6;
3487         }
3488         if (flags & DNS_OPTION_SEARCH && (!base->global_search_state || base->global_search_state->num_domains == 0)) {
3489                 search_set_from_hostname(base);
3490         }
3491
3492         mm_free(resolv);
3493         return err;
3494 }
3495
3496 int
3497 evdns_resolv_conf_parse(int flags, const char *const filename) {
3498         if (!current_base)
3499                 current_base = evdns_base_new(NULL, 0);
3500         return evdns_base_resolv_conf_parse(current_base, flags, filename);
3501 }
3502
3503
3504 #ifdef WIN32
3505 /* Add multiple nameservers from a space-or-comma-separated list. */
3506 static int
3507 evdns_nameserver_ip_add_line(struct evdns_base *base, const char *ips) {
3508         const char *addr;
3509         char *buf;
3510         int r;
3511         ASSERT_LOCKED(base);
3512         while (*ips) {
3513                 while (isspace(*ips) || *ips == ',' || *ips == '\t')
3514                         ++ips;
3515                 addr = ips;
3516                 while (isdigit(*ips) || *ips == '.' || *ips == ':' ||
3517                     *ips=='[' || *ips==']')
3518                         ++ips;
3519                 buf = mm_malloc(ips-addr+1);
3520                 if (!buf) return 4;
3521                 memcpy(buf, addr, ips-addr);
3522                 buf[ips-addr] = '\0';
3523                 r = evdns_base_nameserver_ip_add(base, buf);
3524                 mm_free(buf);
3525                 if (r) return r;
3526         }
3527         return 0;
3528 }
3529
3530 typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
3531
3532 /* Use the windows GetNetworkParams interface in iphlpapi.dll to */
3533 /* figure out what our nameservers are. */
3534 static int
3535 load_nameservers_with_getnetworkparams(struct evdns_base *base)
3536 {
3537         /* Based on MSDN examples and inspection of  c-ares code. */
3538         FIXED_INFO *fixed;
3539         HMODULE handle = 0;
3540         ULONG size = sizeof(FIXED_INFO);
3541         void *buf = NULL;
3542         int status = 0, r, added_any;
3543         IP_ADDR_STRING *ns;
3544         GetNetworkParams_fn_t fn;
3545
3546         ASSERT_LOCKED(base);
3547         if (!(handle = evutil_load_windows_system_library(
3548                         TEXT("iphlpapi.dll")))) {
3549                 log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
3550                 status = -1;
3551                 goto done;
3552         }
3553         if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
3554                 log(EVDNS_LOG_WARN, "Could not get address of function.");
3555                 status = -1;
3556                 goto done;
3557         }
3558
3559         buf = mm_malloc(size);
3560         if (!buf) { status = 4; goto done; }
3561         fixed = buf;
3562         r = fn(fixed, &size);
3563         if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
3564                 status = -1;
3565                 goto done;
3566         }
3567         if (r != ERROR_SUCCESS) {
3568                 mm_free(buf);
3569                 buf = mm_malloc(size);
3570                 if (!buf) { status = 4; goto done; }
3571                 fixed = buf;
3572                 r = fn(fixed, &size);
3573                 if (r != ERROR_SUCCESS) {
3574                         log(EVDNS_LOG_DEBUG, "fn() failed.");
3575                         status = -1;
3576                         goto done;
3577                 }
3578         }
3579
3580         EVUTIL_ASSERT(fixed);
3581         added_any = 0;
3582         ns = &(fixed->DnsServerList);
3583         while (ns) {
3584                 r = evdns_nameserver_ip_add_line(base, ns->IpAddress.String);
3585                 if (r) {
3586                         log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
3587                                 (ns->IpAddress.String),(int)GetLastError());
3588                         status = r;
3589                 } else {
3590                         ++added_any;
3591                         log(EVDNS_LOG_DEBUG,"Successfully added %s as nameserver",ns->IpAddress.String);
3592                 }
3593
3594                 ns = ns->Next;
3595         }
3596
3597         if (!added_any) {
3598                 log(EVDNS_LOG_DEBUG, "No nameservers added.");
3599                 if (status == 0)
3600                         status = -1;
3601         } else {
3602                 status = 0;
3603         }
3604
3605  done:
3606         if (buf)
3607                 mm_free(buf);
3608         if (handle)
3609                 FreeLibrary(handle);
3610         return status;
3611 }
3612
3613 static int
3614 config_nameserver_from_reg_key(struct evdns_base *base, HKEY key, const TCHAR *subkey)
3615 {
3616         char *buf;
3617         DWORD bufsz = 0, type = 0;
3618         int status = 0;
3619
3620         ASSERT_LOCKED(base);
3621         if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
3622             != ERROR_MORE_DATA)
3623                 return -1;
3624         if (!(buf = mm_malloc(bufsz)))
3625                 return -1;
3626
3627         if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
3628             == ERROR_SUCCESS && bufsz > 1) {
3629                 status = evdns_nameserver_ip_add_line(base,buf);
3630         }
3631
3632         mm_free(buf);
3633         return status;
3634 }
3635
3636 #define SERVICES_KEY TEXT("System\\CurrentControlSet\\Services\\")
3637 #define WIN_NS_9X_KEY  SERVICES_KEY TEXT("VxD\\MSTCP")
3638 #define WIN_NS_NT_KEY  SERVICES_KEY TEXT("Tcpip\\Parameters")
3639
3640 static int
3641 load_nameservers_from_registry(struct evdns_base *base)
3642 {
3643         int found = 0;
3644         int r;
3645 #define TRY(k, name) \
3646         if (!found && config_nameserver_from_reg_key(base,k,TEXT(name)) == 0) { \
3647                 log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
3648                 found = 1;                                              \
3649         } else if (!found) {                                            \
3650                 log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
3651                     #k,#name);                                          \
3652         }
3653
3654         ASSERT_LOCKED(base);
3655
3656         if (((int)GetVersion()) > 0) { /* NT */
3657                 HKEY nt_key = 0, interfaces_key = 0;
3658
3659                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
3660                                  KEY_READ, &nt_key) != ERROR_SUCCESS) {
3661                         log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
3662                         return -1;
3663                 }
3664                 r = RegOpenKeyEx(nt_key, TEXT("Interfaces"), 0,
3665                              KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
3666                              &interfaces_key);
3667                 if (r != ERROR_SUCCESS) {
3668                         log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
3669                         return -1;
3670                 }
3671                 TRY(nt_key, "NameServer");
3672                 TRY(nt_key, "DhcpNameServer");
3673                 TRY(interfaces_key, "NameServer");
3674                 TRY(interfaces_key, "DhcpNameServer");
3675                 RegCloseKey(interfaces_key);
3676                 RegCloseKey(nt_key);
3677         } else {
3678                 HKEY win_key = 0;
3679                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
3680                                  KEY_READ, &win_key) != ERROR_SUCCESS) {
3681                         log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
3682                         return -1;
3683                 }
3684                 TRY(win_key, "NameServer");
3685                 RegCloseKey(win_key);
3686         }
3687
3688         if (found == 0) {
3689                 log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
3690         }
3691
3692         return found ? 0 : -1;
3693 #undef TRY
3694 }
3695
3696 int
3697 evdns_base_config_windows_nameservers(struct evdns_base *base)
3698 {
3699         int r;
3700         char *fname;
3701         if (base == NULL)
3702                 base = current_base;
3703         if (base == NULL)
3704                 return -1;
3705         EVDNS_LOCK(base);
3706         if (load_nameservers_with_getnetworkparams(base) == 0) {
3707                 EVDNS_UNLOCK(base);
3708                 return 0;
3709         }
3710         r = load_nameservers_from_registry(base);
3711
3712         fname = evdns_get_default_hosts_filename();
3713         evdns_base_load_hosts(base, fname);
3714         if (fname)
3715                 mm_free(fname);
3716
3717         EVDNS_UNLOCK(base);
3718         return r;
3719 }
3720
3721 int
3722 evdns_config_windows_nameservers(void)
3723 {
3724         if (!current_base) {
3725                 current_base = evdns_base_new(NULL, 1);
3726                 return current_base == NULL ? -1 : 0;
3727         } else {
3728                 return evdns_base_config_windows_nameservers(current_base);
3729         }
3730 }
3731 #endif
3732
3733 struct evdns_base *
3734 evdns_base_new(struct event_base *event_base, int initialize_nameservers)
3735 {
3736         struct evdns_base *base;
3737
3738         if (evutil_secure_rng_init() < 0) {
3739                 log(EVDNS_LOG_WARN, "Unable to seed random number generator; "
3740                     "DNS can't run.");
3741                 return NULL;
3742         }
3743
3744         /* Give the evutil library a hook into its evdns-enabled
3745          * functionality.  We can't just call evdns_getaddrinfo directly or
3746          * else libevent-core will depend on libevent-extras. */
3747         evutil_set_evdns_getaddrinfo_fn(evdns_getaddrinfo);
3748
3749         base = mm_malloc(sizeof(struct evdns_base));
3750         if (base == NULL)
3751                 return (NULL);
3752         memset(base, 0, sizeof(struct evdns_base));
3753         base->req_waiting_head = NULL;
3754
3755         EVTHREAD_ALLOC_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3756         EVDNS_LOCK(base);
3757
3758         /* Set max requests inflight and allocate req_heads. */
3759         base->req_heads = NULL;
3760
3761         evdns_base_set_max_requests_inflight(base, 64);
3762
3763         base->server_head = NULL;
3764         base->event_base = event_base;
3765         base->global_good_nameservers = base->global_requests_inflight =
3766                 base->global_requests_waiting = 0;
3767
3768         base->global_timeout.tv_sec = 5;
3769         base->global_timeout.tv_usec = 0;
3770         base->global_max_reissues = 1;
3771         base->global_max_retransmits = 3;
3772         base->global_max_nameserver_timeout = 3;
3773         base->global_search_state = NULL;
3774         base->global_randomize_case = 1;
3775         base->global_getaddrinfo_allow_skew.tv_sec = 3;
3776         base->global_getaddrinfo_allow_skew.tv_usec = 0;
3777         base->global_nameserver_probe_initial_timeout.tv_sec = 10;
3778         base->global_nameserver_probe_initial_timeout.tv_usec = 0;
3779
3780         TAILQ_INIT(&base->hostsdb);
3781
3782         if (initialize_nameservers) {
3783                 int r;
3784 #ifdef WIN32
3785                 r = evdns_base_config_windows_nameservers(base);
3786 #else
3787                 r = evdns_base_resolv_conf_parse(base, DNS_OPTIONS_ALL, "/etc/resolv.conf");
3788 #endif
3789                 if (r == -1) {
3790                         evdns_base_free_and_unlock(base, 0);
3791                         return NULL;
3792                 }
3793         }
3794         EVDNS_UNLOCK(base);
3795         return base;
3796 }
3797
3798 int
3799 evdns_init(void)
3800 {
3801         struct evdns_base *base = evdns_base_new(NULL, 1);
3802         if (base) {
3803                 current_base = base;
3804                 return 0;
3805         } else {
3806                 return -1;
3807         }
3808 }
3809
3810 const char *
3811 evdns_err_to_string(int err)
3812 {
3813     switch (err) {
3814         case DNS_ERR_NONE: return "no error";
3815         case DNS_ERR_FORMAT: return "misformatted query";
3816         case DNS_ERR_SERVERFAILED: return "server failed";
3817         case DNS_ERR_NOTEXIST: return "name does not exist";
3818         case DNS_ERR_NOTIMPL: return "query not implemented";
3819         case DNS_ERR_REFUSED: return "refused";
3820
3821         case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
3822         case DNS_ERR_UNKNOWN: return "unknown";
3823         case DNS_ERR_TIMEOUT: return "request timed out";
3824         case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
3825         case DNS_ERR_CANCEL: return "dns request canceled";
3826         default: return "[Unknown error code]";
3827     }
3828 }
3829
3830 static void
3831 evdns_nameserver_free(struct nameserver *server)
3832 {
3833         if (server->socket >= 0)
3834         evutil_closesocket(server->socket);
3835         (void) event_del(&server->event);
3836         event_debug_unassign(&server->event);
3837         if (server->state == 0)
3838                 (void) event_del(&server->timeout_event);
3839         event_debug_unassign(&server->timeout_event);
3840         mm_free(server);
3841 }
3842
3843 static void
3844 evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests)
3845 {
3846         struct nameserver *server, *server_next;
3847         struct search_domain *dom, *dom_next;
3848         int i;
3849
3850         /* Requires that we hold the lock. */
3851
3852         /* TODO(nickm) we might need to refcount here. */
3853
3854         for (i = 0; i < base->n_req_heads; ++i) {
3855                 while (base->req_heads[i]) {
3856                         if (fail_requests)
3857                                 reply_schedule_callback(base->req_heads[i], 0, DNS_ERR_SHUTDOWN, NULL);
3858                         request_finished(base->req_heads[i], &REQ_HEAD(base, base->req_heads[i]->trans_id), 1);
3859                 }
3860         }
3861         while (base->req_waiting_head) {
3862                 if (fail_requests)
3863                         reply_schedule_callback(base->req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
3864                 request_finished(base->req_waiting_head, &base->req_waiting_head, 1);
3865         }
3866         base->global_requests_inflight = base->global_requests_waiting = 0;
3867
3868         for (server = base->server_head; server; server = server_next) {
3869                 server_next = server->next;
3870                 evdns_nameserver_free(server);
3871                 if (server_next == base->server_head)
3872                         break;
3873         }
3874         base->server_head = NULL;
3875         base->global_good_nameservers = 0;
3876
3877         if (base->global_search_state) {
3878                 for (dom = base->global_search_state->head; dom; dom = dom_next) {
3879                         dom_next = dom->next;
3880                         mm_free(dom);
3881                 }
3882                 mm_free(base->global_search_state);
3883                 base->global_search_state = NULL;
3884         }
3885
3886         {
3887                 struct hosts_entry *victim;
3888                 while ((victim = TAILQ_FIRST(&base->hostsdb))) {
3889                         TAILQ_REMOVE(&base->hostsdb, victim, next);
3890                         mm_free(victim);
3891                 }
3892         }
3893
3894         mm_free(base->req_heads);
3895
3896         EVDNS_UNLOCK(base);
3897         EVTHREAD_FREE_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3898
3899         mm_free(base);
3900 }
3901
3902 void
3903 evdns_base_free(struct evdns_base *base, int fail_requests)
3904 {
3905         EVDNS_LOCK(base);
3906         evdns_base_free_and_unlock(base, fail_requests);
3907 }
3908
3909 void
3910 evdns_shutdown(int fail_requests)
3911 {
3912         if (current_base) {
3913                 struct evdns_base *b = current_base;
3914                 current_base = NULL;
3915                 evdns_base_free(b, fail_requests);
3916         }
3917         evdns_log_fn = NULL;
3918 }
3919
3920 static int
3921 evdns_base_parse_hosts_line(struct evdns_base *base, char *line)
3922 {
3923         char *strtok_state;
3924         static const char *const delims = " \t";
3925         char *const addr = strtok_r(line, delims, &strtok_state);
3926         char *hostname, *hash;
3927         struct sockaddr_storage ss;
3928         int socklen = sizeof(ss);
3929         ASSERT_LOCKED(base);
3930
3931 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
3932
3933         if (!addr || *addr == '#')
3934                 return 0;
3935
3936         memset(&ss, 0, sizeof(ss));
3937         if (evutil_parse_sockaddr_port(addr, (struct sockaddr*)&ss, &socklen)<0)
3938                 return -1;
3939         if (socklen > (int)sizeof(struct sockaddr_in6))
3940                 return -1;
3941
3942         if (sockaddr_getport((struct sockaddr*)&ss))
3943                 return -1;
3944
3945         while ((hostname = NEXT_TOKEN)) {
3946                 struct hosts_entry *he;
3947                 size_t namelen;
3948                 if ((hash = strchr(hostname, '#'))) {
3949                         if (hash == hostname)
3950                                 return 0;
3951                         *hash = '\0';
3952                 }
3953
3954                 namelen = strlen(hostname);
3955
3956                 he = mm_calloc(1, sizeof(struct hosts_entry)+namelen);
3957                 if (!he)
3958                         return -1;
3959                 EVUTIL_ASSERT(socklen <= (int)sizeof(he->addr));
3960                 memcpy(&he->addr, &ss, socklen);
3961                 memcpy(he->hostname, hostname, namelen+1);
3962                 he->addrlen = socklen;
3963
3964                 TAILQ_INSERT_TAIL(&base->hostsdb, he, next);
3965
3966                 if (hash)
3967                         return 0;
3968         }
3969
3970         return 0;
3971 #undef NEXT_TOKEN
3972 }
3973
3974 static int
3975 evdns_base_load_hosts_impl(struct evdns_base *base, const char *hosts_fname)
3976 {
3977         char *str=NULL, *cp, *eol;
3978         size_t len;
3979         int err=0;
3980
3981         ASSERT_LOCKED(base);
3982
3983         if (hosts_fname == NULL ||
3984             (err = evutil_read_file(hosts_fname, &str, &len, 0)) < 0) {
3985                 char tmp[64];
3986                 strlcpy(tmp, "127.0.0.1   localhost", sizeof(tmp));
3987                 evdns_base_parse_hosts_line(base, tmp);
3988                 strlcpy(tmp, "::1   localhost", sizeof(tmp));
3989                 evdns_base_parse_hosts_line(base, tmp);
3990                 return err ? -1 : 0;
3991         }
3992
3993         /* This will break early if there is a NUL in the hosts file.
3994          * Probably not a problem.*/
3995         cp = str;
3996         for (;;) {
3997                 eol = strchr(cp, '\n');
3998
3999                 if (eol) {
4000                         *eol = '\0';
4001                         evdns_base_parse_hosts_line(base, cp);
4002                         cp = eol+1;
4003                 } else {
4004                         evdns_base_parse_hosts_line(base, cp);
4005                         break;
4006                 }
4007         }
4008
4009         mm_free(str);
4010         return 0;
4011 }
4012
4013 int
4014 evdns_base_load_hosts(struct evdns_base *base, const char *hosts_fname)
4015 {
4016         int res;
4017         if (!base)
4018                 base = current_base;
4019         EVDNS_LOCK(base);
4020         res = evdns_base_load_hosts_impl(base, hosts_fname);
4021         EVDNS_UNLOCK(base);
4022         return res;
4023 }
4024
4025 /* A single request for a getaddrinfo, either v4 or v6. */
4026 struct getaddrinfo_subrequest {
4027         struct evdns_request *r;
4028         ev_uint32_t type;
4029 };
4030
4031 /* State data used to implement an in-progress getaddrinfo. */
4032 struct evdns_getaddrinfo_request {
4033         struct evdns_base *evdns_base;
4034         /* Copy of the modified 'hints' data that we'll use to build
4035          * answers. */
4036         struct evutil_addrinfo hints;
4037         /* The callback to invoke when we're done */
4038         evdns_getaddrinfo_cb user_cb;
4039         /* User-supplied data to give to the callback. */
4040         void *user_data;
4041         /* The port to use when building sockaddrs. */
4042         ev_uint16_t port;
4043         /* The sub_request for an A record (if any) */
4044         struct getaddrinfo_subrequest ipv4_request;
4045         /* The sub_request for an AAAA record (if any) */
4046         struct getaddrinfo_subrequest ipv6_request;
4047
4048         /* The cname result that we were told (if any) */
4049         char *cname_result;
4050
4051         /* If we have one request answered and one request still inflight,
4052          * then this field holds the answer from the first request... */
4053         struct evutil_addrinfo *pending_result;
4054         /* And this event is a timeout that will tell us to cancel the second
4055          * request if it's taking a long time. */
4056         struct event timeout;
4057
4058         /* And this field holds the error code from the first request... */
4059         int pending_error;
4060         /* If this is set, the user canceled this request. */
4061         unsigned user_canceled : 1;
4062         /* If this is set, the user can no longer cancel this request; we're
4063          * just waiting for the free. */
4064         unsigned request_done : 1;
4065 };
4066
4067 /* Convert an evdns errors to the equivalent getaddrinfo error. */
4068 static int
4069 evdns_err_to_getaddrinfo_err(int e1)
4070 {
4071         /* XXX Do this better! */
4072         if (e1 == DNS_ERR_NONE)
4073                 return 0;
4074         else if (e1 == DNS_ERR_NOTEXIST)
4075                 return EVUTIL_EAI_NONAME;
4076         else
4077                 return EVUTIL_EAI_FAIL;
4078 }
4079
4080 /* Return the more informative of two getaddrinfo errors. */
4081 static int
4082 getaddrinfo_merge_err(int e1, int e2)
4083 {
4084         /* XXXX be cleverer here. */
4085         if (e1 == 0)
4086                 return e2;
4087         else
4088                 return e1;
4089 }
4090
4091 static void
4092 free_getaddrinfo_request(struct evdns_getaddrinfo_request *data)
4093 {
4094         /* DO NOT CALL this if either of the requests is pending.  Only once
4095          * both callbacks have been invoked is it safe to free the request */
4096         if (data->pending_result)
4097                 evutil_freeaddrinfo(data->pending_result);
4098         if (data->cname_result)
4099                 mm_free(data->cname_result);
4100         event_del(&data->timeout);
4101         mm_free(data);
4102         return;
4103 }
4104
4105 static void
4106 add_cname_to_reply(struct evdns_getaddrinfo_request *data,
4107     struct evutil_addrinfo *ai)
4108 {
4109         if (data->cname_result && ai) {
4110                 ai->ai_canonname = data->cname_result;
4111                 data->cname_result = NULL;
4112         }
4113 }
4114
4115 /* Callback: invoked when one request in a mixed-format A/AAAA getaddrinfo
4116  * request has finished, but the other one took too long to answer. Pass
4117  * along the answer we got, and cancel the other request.
4118  */
4119 static void
4120 evdns_getaddrinfo_timeout_cb(evutil_socket_t fd, short what, void *ptr)
4121 {
4122         int v4_timedout = 0, v6_timedout = 0;
4123         struct evdns_getaddrinfo_request *data = ptr;
4124
4125         /* Cancel any pending requests, and note which one */
4126         if (data->ipv4_request.r) {
4127                 evdns_cancel_request(NULL, data->ipv4_request.r);
4128                 v4_timedout = 1;
4129                 EVDNS_LOCK(data->evdns_base);
4130                 ++data->evdns_base->getaddrinfo_ipv4_timeouts;
4131                 EVDNS_UNLOCK(data->evdns_base);
4132         }
4133         if (data->ipv6_request.r) {
4134                 evdns_cancel_request(NULL, data->ipv6_request.r);
4135                 v6_timedout = 1;
4136                 EVDNS_LOCK(data->evdns_base);
4137                 ++data->evdns_base->getaddrinfo_ipv6_timeouts;
4138                 EVDNS_UNLOCK(data->evdns_base);
4139         }
4140
4141         /* We only use this timeout callback when we have an answer for
4142          * one address. */
4143         EVUTIL_ASSERT(!v4_timedout || !v6_timedout);
4144
4145         /* Report the outcome of the other request that didn't time out. */
4146         if (data->pending_result) {
4147                 add_cname_to_reply(data, data->pending_result);
4148                 data->user_cb(0, data->pending_result, data->user_data);
4149                 data->pending_result = NULL;
4150         } else {
4151                 int e = data->pending_error;
4152                 if (!e)
4153                         e = EVUTIL_EAI_AGAIN;
4154                 data->user_cb(e, NULL, data->user_data);
4155         }
4156
4157         if (!v4_timedout && !v6_timedout) {
4158                 /* should be impossible? XXXX */
4159                 free_getaddrinfo_request(data);
4160         }
4161 }
4162
4163 static int
4164 evdns_getaddrinfo_set_timeout(struct evdns_base *evdns_base,
4165     struct evdns_getaddrinfo_request *data)
4166 {
4167         return event_add(&data->timeout, &evdns_base->global_getaddrinfo_allow_skew);
4168 }
4169
4170 static inline int
4171 evdns_result_is_answer(int result)
4172 {
4173         return (result != DNS_ERR_NOTIMPL && result != DNS_ERR_REFUSED &&
4174             result != DNS_ERR_SERVERFAILED && result != DNS_ERR_CANCEL);
4175 }
4176
4177 static void
4178 evdns_getaddrinfo_gotresolve(int result, char type, int count,
4179     int ttl, void *addresses, void *arg)
4180 {
4181         int i;
4182         struct getaddrinfo_subrequest *req = arg;
4183         struct getaddrinfo_subrequest *other_req;
4184         struct evdns_getaddrinfo_request *data;
4185
4186         struct evutil_addrinfo *res;
4187
4188         struct sockaddr_in sin;
4189         struct sockaddr_in6 sin6;
4190         struct sockaddr *sa;
4191         int socklen, addrlen;
4192         void *addrp;
4193         int err;
4194         int user_canceled;
4195
4196         EVUTIL_ASSERT(req->type == DNS_IPv4_A || req->type == DNS_IPv6_AAAA);
4197         if (req->type == DNS_IPv4_A) {
4198                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv4_request);
4199                 other_req = &data->ipv6_request;
4200         } else {
4201                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv6_request);
4202                 other_req = &data->ipv4_request;
4203         }
4204
4205         EVDNS_LOCK(data->evdns_base);
4206         if (evdns_result_is_answer(result)) {
4207                 if (req->type == DNS_IPv4_A)
4208                         ++data->evdns_base->getaddrinfo_ipv4_answered;
4209                 else
4210                         ++data->evdns_base->getaddrinfo_ipv6_answered;
4211         }
4212         user_canceled = data->user_canceled;
4213         if (other_req->r == NULL)
4214                 data->request_done = 1;
4215         EVDNS_UNLOCK(data->evdns_base);
4216
4217         req->r = NULL;
4218
4219         if (result == DNS_ERR_CANCEL && ! user_canceled) {
4220                 /* Internal cancel request from timeout or internal error.
4221                  * we already answered the user. */
4222                 if (other_req->r == NULL)
4223                         free_getaddrinfo_request(data);
4224                 return;
4225         }
4226
4227         if (result == DNS_ERR_NONE) {
4228                 if (count == 0)
4229                         err = EVUTIL_EAI_NODATA;
4230                 else
4231                         err = 0;
4232         } else {
4233                 err = evdns_err_to_getaddrinfo_err(result);
4234         }
4235
4236         if (err) {
4237                 /* Looks like we got an error. */
4238                 if (other_req->r) {
4239                         /* The other request is still working; maybe it will
4240                          * succeed. */
4241                         /* XXXX handle failure from set_timeout */
4242                         evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4243                         data->pending_error = err;
4244                         return;
4245                 }
4246
4247                 if (user_canceled) {
4248                         data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4249                 } else if (data->pending_result) {
4250                         /* If we have an answer waiting, and we weren't
4251                          * canceled, ignore this error. */
4252                         add_cname_to_reply(data, data->pending_result);
4253                         data->user_cb(0, data->pending_result, data->user_data);
4254                         data->pending_result = NULL;
4255                 } else {
4256                         if (data->pending_error)
4257                                 err = getaddrinfo_merge_err(err,
4258                                     data->pending_error);
4259                         data->user_cb(err, NULL, data->user_data);
4260                 }
4261                 free_getaddrinfo_request(data);
4262                 return;
4263         } else if (user_canceled) {
4264                 if (other_req->r) {
4265                         /* The other request is still working; let it hit this
4266                          * callback with EVUTIL_EAI_CANCEL callback and report
4267                          * the failure. */
4268                         return;
4269                 }
4270                 data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4271                 free_getaddrinfo_request(data);
4272                 return;
4273         }
4274
4275         /* Looks like we got some answers. We should turn them into addrinfos
4276          * and then either queue those or return them all. */
4277         EVUTIL_ASSERT(type == DNS_IPv4_A || type == DNS_IPv6_AAAA);
4278
4279         if (type == DNS_IPv4_A) {
4280                 memset(&sin, 0, sizeof(sin));
4281                 sin.sin_family = AF_INET;
4282                 sin.sin_port = htons(data->port);
4283
4284                 sa = (struct sockaddr *)&sin;
4285                 socklen = sizeof(sin);
4286                 addrlen = 4;
4287                 addrp = &sin.sin_addr.s_addr;
4288         } else {
4289                 memset(&sin6, 0, sizeof(sin6));
4290                 sin6.sin6_family = AF_INET6;
4291                 sin6.sin6_port = htons(data->port);
4292
4293                 sa = (struct sockaddr *)&sin6;
4294                 socklen = sizeof(sin6);
4295                 addrlen = 16;
4296                 addrp = &sin6.sin6_addr.s6_addr;
4297         }
4298
4299         res = NULL;
4300         for (i=0; i < count; ++i) {
4301                 struct evutil_addrinfo *ai;
4302                 memcpy(addrp, ((char*)addresses)+i*addrlen, addrlen);
4303                 ai = evutil_new_addrinfo(sa, socklen, &data->hints);
4304                 if (!ai) {
4305                         if (other_req->r) {
4306                                 evdns_cancel_request(NULL, other_req->r);
4307                         }
4308                         data->user_cb(EVUTIL_EAI_MEMORY, NULL, data->user_data);
4309                         evutil_freeaddrinfo(res);
4310
4311                         if (other_req->r == NULL)
4312                                 free_getaddrinfo_request(data);
4313                         return;
4314                 }
4315                 res = evutil_addrinfo_append(res, ai);
4316         }
4317
4318         if (other_req->r) {
4319                 /* The other request is still in progress; wait for it */
4320                 /* XXXX handle failure from set_timeout */
4321                 evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4322                 data->pending_result = res;
4323                 return;
4324         } else {
4325                 /* The other request is done or never started; append its
4326                  * results (if any) and return them. */
4327                 if (data->pending_result) {
4328                         if (req->type == DNS_IPv4_A)
4329                                 res = evutil_addrinfo_append(res,
4330                                     data->pending_result);
4331                         else
4332                                 res = evutil_addrinfo_append(
4333                                     data->pending_result, res);
4334                         data->pending_result = NULL;
4335                 }
4336
4337                 /* Call the user callback. */
4338                 add_cname_to_reply(data, res);
4339                 data->user_cb(0, res, data->user_data);
4340
4341                 /* Free data. */
4342                 free_getaddrinfo_request(data);
4343         }
4344 }
4345
4346 static struct hosts_entry *
4347 find_hosts_entry(struct evdns_base *base, const char *hostname,
4348     struct hosts_entry *find_after)
4349 {
4350         struct hosts_entry *e;
4351
4352         if (find_after)
4353                 e = TAILQ_NEXT(find_after, next);
4354         else
4355                 e = TAILQ_FIRST(&base->hostsdb);
4356
4357         for (; e; e = TAILQ_NEXT(e, next)) {
4358                 if (!evutil_ascii_strcasecmp(e->hostname, hostname))
4359                         return e;
4360         }
4361         return NULL;
4362 }
4363
4364 static int
4365 evdns_getaddrinfo_fromhosts(struct evdns_base *base,
4366     const char *nodename, struct evutil_addrinfo *hints, ev_uint16_t port,
4367     struct evutil_addrinfo **res)
4368 {
4369         int n_found = 0;
4370         struct hosts_entry *e;
4371         struct evutil_addrinfo *ai=NULL;
4372         int f = hints->ai_family;
4373
4374         EVDNS_LOCK(base);
4375         for (e = find_hosts_entry(base, nodename, NULL); e;
4376             e = find_hosts_entry(base, nodename, e)) {
4377                 struct evutil_addrinfo *ai_new;
4378                 ++n_found;
4379                 if ((e->addr.sa.sa_family == AF_INET && f == PF_INET6) ||
4380                     (e->addr.sa.sa_family == AF_INET6 && f == PF_INET))
4381                         continue;
4382                 ai_new = evutil_new_addrinfo(&e->addr.sa, e->addrlen, hints);
4383                 if (!ai_new) {
4384                         n_found = 0;
4385                         goto out;
4386                 }
4387                 sockaddr_setport(ai_new->ai_addr, port);
4388                 ai = evutil_addrinfo_append(ai, ai_new);
4389         }
4390         EVDNS_UNLOCK(base);
4391 out:
4392         if (n_found) {
4393                 /* Note that we return an empty answer if we found entries for
4394                  * this hostname but none were of the right address type. */
4395                 *res = ai;
4396                 return 0;
4397         } else {
4398                 if (ai)
4399                         evutil_freeaddrinfo(ai);
4400                 return -1;
4401         }
4402 }
4403
4404 struct evdns_getaddrinfo_request *
4405 evdns_getaddrinfo(struct evdns_base *dns_base,
4406     const char *nodename, const char *servname,
4407     const struct evutil_addrinfo *hints_in,
4408     evdns_getaddrinfo_cb cb, void *arg)
4409 {
4410         struct evdns_getaddrinfo_request *data;
4411         struct evutil_addrinfo hints;
4412         struct evutil_addrinfo *res = NULL;
4413         int err;
4414         int port = 0;
4415         int want_cname = 0;
4416
4417         if (!dns_base) {
4418                 dns_base = current_base;
4419                 if (!dns_base) {
4420                         log(EVDNS_LOG_WARN,
4421                             "Call to getaddrinfo_async with no "
4422                             "evdns_base configured.");
4423                         cb(EVUTIL_EAI_FAIL, NULL, arg); /* ??? better error? */
4424                         return NULL;
4425                 }
4426         }
4427
4428         /* If we _must_ answer this immediately, do so. */
4429         if ((hints_in && (hints_in->ai_flags & EVUTIL_AI_NUMERICHOST))) {
4430                 res = NULL;
4431                 err = evutil_getaddrinfo(nodename, servname, hints_in, &res);
4432                 cb(err, res, arg);
4433                 return NULL;
4434         }
4435
4436         if (hints_in) {
4437                 memcpy(&hints, hints_in, sizeof(hints));
4438         } else {
4439                 memset(&hints, 0, sizeof(hints));
4440                 hints.ai_family = PF_UNSPEC;
4441         }
4442
4443         evutil_adjust_hints_for_addrconfig(&hints);
4444
4445         /* Now try to see if we _can_ answer immediately. */
4446         /* (It would be nice to do this by calling getaddrinfo directly, with
4447          * AI_NUMERICHOST, on plaforms that have it, but we can't: there isn't
4448          * a reliable way to distinguish the "that wasn't a numeric host!" case
4449          * from any other EAI_NONAME cases.) */
4450         err = evutil_getaddrinfo_common(nodename, servname, &hints, &res, &port);
4451         if (err != EVUTIL_EAI_NEED_RESOLVE) {
4452                 cb(err, res, arg);
4453                 return NULL;
4454         }
4455
4456         /* If there is an entry in the hosts file, we should give it now. */
4457         if (!evdns_getaddrinfo_fromhosts(dns_base, nodename, &hints, port, &res)) {
4458                 cb(0, res, arg);
4459                 return NULL;
4460         }
4461
4462         /* Okay, things are serious now. We're going to need to actually
4463          * launch a request.
4464          */
4465         data = mm_calloc(1,sizeof(struct evdns_getaddrinfo_request));
4466         if (!data) {
4467                 cb(EVUTIL_EAI_MEMORY, NULL, arg);
4468                 return NULL;
4469         }
4470
4471         memcpy(&data->hints, &hints, sizeof(data->hints));
4472         data->port = (ev_uint16_t)port;
4473         data->ipv4_request.type = DNS_IPv4_A;
4474         data->ipv6_request.type = DNS_IPv6_AAAA;
4475         data->user_cb = cb;
4476         data->user_data = arg;
4477         data->evdns_base = dns_base;
4478
4479         want_cname = (hints.ai_flags & EVUTIL_AI_CANONNAME);
4480
4481         /* If we are asked for a PF_UNSPEC address, we launch two requests in
4482          * parallel: one for an A address and one for an AAAA address.  We
4483          * can't send just one request, since many servers only answer one
4484          * question per DNS request.
4485          *
4486          * Once we have the answer to one request, we allow for a short
4487          * timeout before we report it, to see if the other one arrives.  If
4488          * they both show up in time, then we report both the answers.
4489          *
4490          * If too many addresses of one type time out or fail, we should stop
4491          * launching those requests. (XXX we don't do that yet.)
4492          */
4493
4494         if (hints.ai_family != PF_INET6) {
4495                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv4 as %p",
4496                     nodename, &data->ipv4_request);
4497
4498                 data->ipv4_request.r = evdns_base_resolve_ipv4(dns_base,
4499                     nodename, 0, evdns_getaddrinfo_gotresolve,
4500                     &data->ipv4_request);
4501                 if (want_cname)
4502                         data->ipv4_request.r->current_req->put_cname_in_ptr =
4503                             &data->cname_result;
4504         }
4505         if (hints.ai_family != PF_INET) {
4506                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv6 as %p",
4507                     nodename, &data->ipv6_request);
4508
4509                 data->ipv6_request.r = evdns_base_resolve_ipv6(dns_base,
4510                     nodename, 0, evdns_getaddrinfo_gotresolve,
4511                     &data->ipv6_request);
4512                 if (want_cname)
4513                         data->ipv6_request.r->current_req->put_cname_in_ptr =
4514                             &data->cname_result;
4515         }
4516
4517         evtimer_assign(&data->timeout, dns_base->event_base,
4518             evdns_getaddrinfo_timeout_cb, data);
4519
4520         if (data->ipv4_request.r || data->ipv6_request.r) {
4521                 return data;
4522         } else {
4523                 mm_free(data);
4524                 cb(EVUTIL_EAI_FAIL, NULL, arg);
4525                 return NULL;
4526         }
4527 }
4528
4529 void
4530 evdns_getaddrinfo_cancel(struct evdns_getaddrinfo_request *data)
4531 {
4532         EVDNS_LOCK(data->evdns_base);
4533         if (data->request_done) {
4534                 EVDNS_UNLOCK(data->evdns_base);
4535                 return;
4536         }
4537         event_del(&data->timeout);
4538         data->user_canceled = 1;
4539         if (data->ipv4_request.r)
4540                 evdns_cancel_request(data->evdns_base, data->ipv4_request.r);
4541         if (data->ipv6_request.r)
4542                 evdns_cancel_request(data->evdns_base, data->ipv6_request.r);
4543         EVDNS_UNLOCK(data->evdns_base);
4544 }