]> arthur.barton.de Git - netatalk.git/blob - libevent/evdns.c
Update libevent to 2.0.12
[netatalk.git] / libevent / evdns.c
1 /* $Id: evdns.c 6979 2006-08-04 18:31:13Z nickm $ */
2
3 /* The original version of this module was written by Adam Langley; for
4  * a history of modifications, check out the subversion logs.
5  *
6  * When editing this module, try to keep it re-mergeable by Adam.  Don't
7  * reformat the whitespace, add Tor dependencies, or so on.
8  *
9  * TODO:
10  *   - Support IPv6 and PTR records.
11  *   - Replace all externally visible magic numbers with #defined constants.
12  *   - Write documentation for APIs of all external functions.
13  */
14
15 /* Async DNS Library
16  * Adam Langley <agl@imperialviolet.org>
17  * http://www.imperialviolet.org/eventdns.html
18  * Public Domain code
19  *
20  * This software is Public Domain. To view a copy of the public domain dedication,
21  * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
22  * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
23  *
24  * I ask and expect, but do not require, that all derivative works contain an
25  * attribution similar to:
26  *      Parts developed by Adam Langley <agl@imperialviolet.org>
27  *
28  * You may wish to replace the word "Parts" with something else depending on
29  * the amount of original code.
30  *
31  * (Derivative works does not include programs which link against, run or include
32  * the source verbatim in their source distributions)
33  *
34  * Version: 0.1b
35  */
36
37 #include <sys/types.h>
38 #include "event2/event-config.h"
39
40 #ifndef _FORTIFY_SOURCE
41 #define _FORTIFY_SOURCE 3
42 #endif
43
44 #include <string.h>
45 #include <fcntl.h>
46 #ifdef _EVENT_HAVE_SYS_TIME_H
47 #include <sys/time.h>
48 #endif
49 #ifdef _EVENT_HAVE_STDINT_H
50 #include <stdint.h>
51 #endif
52 #include <stdlib.h>
53 #include <string.h>
54 #include <errno.h>
55 #ifdef _EVENT_HAVE_UNISTD_H
56 #include <unistd.h>
57 #endif
58 #include <limits.h>
59 #include <sys/stat.h>
60 #include <stdio.h>
61 #include <stdarg.h>
62 #ifdef WIN32
63 #include <winsock2.h>
64 #include <ws2tcpip.h>
65 #ifndef _WIN32_IE
66 #define _WIN32_IE 0x400
67 #endif
68 #include <shlobj.h>
69 #endif
70
71 #include "event2/dns.h"
72 #include "event2/dns_struct.h"
73 #include "event2/dns_compat.h"
74 #include "event2/util.h"
75 #include "event2/event.h"
76 #include "event2/event_struct.h"
77 #include "event2/thread.h"
78
79 #include "event2/bufferevent.h"
80 #include "event2/bufferevent_struct.h"
81 #include "bufferevent-internal.h"
82
83 #include "defer-internal.h"
84 #include "log-internal.h"
85 #include "mm-internal.h"
86 #include "strlcpy-internal.h"
87 #include "ipv6-internal.h"
88 #include "util-internal.h"
89 #include "evthread-internal.h"
90 #ifdef WIN32
91 #include <ctype.h>
92 #include <winsock2.h>
93 #include <windows.h>
94 #include <iphlpapi.h>
95 #include <io.h>
96 #else
97 #include <sys/socket.h>
98 #include <netinet/in.h>
99 #include <arpa/inet.h>
100 #endif
101
102 #ifdef _EVENT_HAVE_NETINET_IN6_H
103 #include <netinet/in6.h>
104 #endif
105
106 #define EVDNS_LOG_DEBUG 0
107 #define EVDNS_LOG_WARN 1
108 #define EVDNS_LOG_MSG 2
109
110 #ifndef HOST_NAME_MAX
111 #define HOST_NAME_MAX 255
112 #endif
113
114 #include <stdio.h>
115
116 #undef MIN
117 #define MIN(a,b) ((a)<(b)?(a):(b))
118
119 #define ASSERT_VALID_REQUEST(req) \
120         EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
121
122 #define u64 ev_uint64_t
123 #define u32 ev_uint32_t
124 #define u16 ev_uint16_t
125 #define u8  ev_uint8_t
126
127 /* maximum number of addresses from a single packet */
128 /* that we bother recording */
129 #define MAX_V4_ADDRS 32
130 #define MAX_V6_ADDRS 32
131
132
133 #define TYPE_A         EVDNS_TYPE_A
134 #define TYPE_CNAME     5
135 #define TYPE_PTR       EVDNS_TYPE_PTR
136 #define TYPE_AAAA      EVDNS_TYPE_AAAA
137
138 #define CLASS_INET     EVDNS_CLASS_INET
139
140 /* Persistent handle.  We keep this separate from 'struct request' since we
141  * need some object to last for as long as an evdns_request is outstanding so
142  * that it can be canceled, whereas a search request can lead to multiple
143  * 'struct request' instances being created over its lifetime. */
144 struct evdns_request {
145         struct request *current_req;
146         struct evdns_base *base;
147
148         int pending_cb; /* Waiting for its callback to be invoked; not
149                          * owned by event base any more. */
150
151         /* elements used by the searching code */
152         int search_index;
153         struct search_state *search_state;
154         char *search_origname;  /* needs to be free()ed */
155         int search_flags;
156 };
157
158 struct request {
159         u8 *request;  /* the dns packet data */
160         u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
161         unsigned int request_len;
162         int reissue_count;
163         int tx_count;  /* the number of times that this packet has been sent */
164         void *user_pointer;  /* the pointer given to us for this request */
165         evdns_callback_type user_callback;
166         struct nameserver *ns;  /* the server which we last sent it */
167
168         /* these objects are kept in a circular list */
169         struct request *next, *prev;
170
171         struct event timeout_event;
172
173         u16 trans_id;  /* the transaction id */
174         unsigned request_appended :1;   /* true if the request pointer is data which follows this struct */
175         unsigned transmit_me :1;  /* needs to be transmitted */
176
177         /* XXXX This is a horrible hack. */
178         char **put_cname_in_ptr; /* store the cname here if we get one. */
179
180         struct evdns_base *base;
181
182         struct evdns_request *handle;
183 };
184
185 struct reply {
186         unsigned int type;
187         unsigned int have_answer : 1;
188         union {
189                 struct {
190                         u32 addrcount;
191                         u32 addresses[MAX_V4_ADDRS];
192                 } a;
193                 struct {
194                         u32 addrcount;
195                         struct in6_addr addresses[MAX_V6_ADDRS];
196                 } aaaa;
197                 struct {
198                         char name[HOST_NAME_MAX];
199                 } ptr;
200         } data;
201 };
202
203 struct nameserver {
204         evutil_socket_t socket;  /* a connected UDP socket */
205         struct sockaddr_storage address;
206         ev_socklen_t addrlen;
207         int failed_times;  /* number of times which we have given this server a chance */
208         int timedout;  /* number of times in a row a request has timed out */
209         struct event event;
210         /* these objects are kept in a circular list */
211         struct nameserver *next, *prev;
212         struct event timeout_event;  /* used to keep the timeout for */
213                                      /* when we next probe this server. */
214                                      /* Valid if state == 0 */
215         /* Outstanding probe request for this nameserver, if any */
216         struct evdns_request *probe_request;
217         char state;  /* zero if we think that this server is down */
218         char choked;  /* true if we have an EAGAIN from this server's socket */
219         char write_waiting;  /* true if we are waiting for EV_WRITE events */
220         struct evdns_base *base;
221 };
222
223
224 /* Represents a local port where we're listening for DNS requests. Right now, */
225 /* only UDP is supported. */
226 struct evdns_server_port {
227         evutil_socket_t socket; /* socket we use to read queries and write replies. */
228         int refcnt; /* reference count. */
229         char choked; /* Are we currently blocked from writing? */
230         char closing; /* Are we trying to close this port, pending writes? */
231         evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
232         void *user_data; /* Opaque pointer passed to user_callback */
233         struct event event; /* Read/write event */
234         /* circular list of replies that we want to write. */
235         struct server_request *pending_replies;
236         struct event_base *event_base;
237
238 #ifndef _EVENT_DISABLE_THREAD_SUPPORT
239         void *lock;
240 #endif
241 };
242
243 /* Represents part of a reply being built.      (That is, a single RR.) */
244 struct server_reply_item {
245         struct server_reply_item *next; /* next item in sequence. */
246         char *name; /* name part of the RR */
247         u16 type; /* The RR type */
248         u16 class; /* The RR class (usually CLASS_INET) */
249         u32 ttl; /* The RR TTL */
250         char is_name; /* True iff data is a label */
251         u16 datalen; /* Length of data; -1 if data is a label */
252         void *data; /* The contents of the RR */
253 };
254
255 /* Represents a request that we've received as a DNS server, and holds */
256 /* the components of the reply as we're constructing it. */
257 struct server_request {
258         /* Pointers to the next and previous entries on the list of replies */
259         /* that we're waiting to write.  Only set if we have tried to respond */
260         /* and gotten EAGAIN. */
261         struct server_request *next_pending;
262         struct server_request *prev_pending;
263
264         u16 trans_id; /* Transaction id. */
265         struct evdns_server_port *port; /* Which port received this request on? */
266         struct sockaddr_storage addr; /* Where to send the response */
267         ev_socklen_t addrlen; /* length of addr */
268
269         int n_answer; /* how many answer RRs have been set? */
270         int n_authority; /* how many authority RRs have been set? */
271         int n_additional; /* how many additional RRs have been set? */
272
273         struct server_reply_item *answer; /* linked list of answer RRs */
274         struct server_reply_item *authority; /* linked list of authority RRs */
275         struct server_reply_item *additional; /* linked list of additional RRs */
276
277         /* Constructed response.  Only set once we're ready to send a reply. */
278         /* Once this is set, the RR fields are cleared, and no more should be set. */
279         char *response;
280         size_t response_len;
281
282         /* Caller-visible fields: flags, questions. */
283         struct evdns_server_request base;
284 };
285
286 struct evdns_base {
287         /* An array of n_req_heads circular lists for inflight requests.
288          * Each inflight request req is in req_heads[req->trans_id % n_req_heads].
289          */
290         struct request **req_heads;
291         /* A circular list of requests that we're waiting to send, but haven't
292          * sent yet because there are too many requests inflight */
293         struct request *req_waiting_head;
294         /* A circular list of nameservers. */
295         struct nameserver *server_head;
296         int n_req_heads;
297
298         struct event_base *event_base;
299
300         /* The number of good nameservers that we have */
301         int global_good_nameservers;
302
303         /* inflight requests are contained in the req_head list */
304         /* and are actually going out across the network */
305         int global_requests_inflight;
306         /* requests which aren't inflight are in the waiting list */
307         /* and are counted here */
308         int global_requests_waiting;
309
310         int global_max_requests_inflight;
311
312         struct timeval global_timeout;  /* 5 seconds by default */
313         int global_max_reissues;  /* a reissue occurs when we get some errors from the server */
314         int global_max_retransmits;  /* number of times we'll retransmit a request which timed out */
315         /* number of timeouts in a row before we consider this server to be down */
316         int global_max_nameserver_timeout;
317         /* true iff we will use the 0x20 hack to prevent poisoning attacks. */
318         int global_randomize_case;
319
320         /* The first time that a nameserver fails, how long do we wait before
321          * probing to see if it has returned?  */
322         struct timeval global_nameserver_probe_initial_timeout;
323
324         /** Port to bind to for outgoing DNS packets. */
325         struct sockaddr_storage global_outgoing_address;
326         /** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
327         ev_socklen_t global_outgoing_addrlen;
328
329         struct timeval global_getaddrinfo_allow_skew;
330
331         int getaddrinfo_ipv4_timeouts;
332         int getaddrinfo_ipv6_timeouts;
333         int getaddrinfo_ipv4_answered;
334         int getaddrinfo_ipv6_answered;
335
336         struct search_state *global_search_state;
337
338         TAILQ_HEAD(hosts_list, hosts_entry) hostsdb;
339
340 #ifndef _EVENT_DISABLE_THREAD_SUPPORT
341         void *lock;
342 #endif
343 };
344
345 struct hosts_entry {
346         TAILQ_ENTRY(hosts_entry) next;
347         union {
348                 struct sockaddr sa;
349                 struct sockaddr_in sin;
350                 struct sockaddr_in6 sin6;
351         } addr;
352         int addrlen;
353         char hostname[1];
354 };
355
356 static struct evdns_base *current_base = NULL;
357
358 struct evdns_base *
359 evdns_get_global_base(void)
360 {
361         return current_base;
362 }
363
364 /* Given a pointer to an evdns_server_request, get the corresponding */
365 /* server_request. */
366 #define TO_SERVER_REQUEST(base_ptr)                                     \
367         ((struct server_request*)                                       \
368           (((char*)(base_ptr) - evutil_offsetof(struct server_request, base))))
369
370 #define REQ_HEAD(base, id) ((base)->req_heads[id % (base)->n_req_heads])
371
372 static struct nameserver *nameserver_pick(struct evdns_base *base);
373 static void evdns_request_insert(struct request *req, struct request **head);
374 static void evdns_request_remove(struct request *req, struct request **head);
375 static void nameserver_ready_callback(evutil_socket_t fd, short events, void *arg);
376 static int evdns_transmit(struct evdns_base *base);
377 static int evdns_request_transmit(struct request *req);
378 static void nameserver_send_probe(struct nameserver *const ns);
379 static void search_request_finished(struct evdns_request *const);
380 static int search_try_next(struct evdns_request *const req);
381 static struct request *search_request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
382 static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
383 static u16 transaction_id_pick(struct evdns_base *base);
384 static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
385 static void request_submit(struct request *const req);
386
387 static int server_request_free(struct server_request *req);
388 static void server_request_free_answers(struct server_request *req);
389 static void server_port_free(struct evdns_server_port *port);
390 static void server_port_ready_callback(evutil_socket_t fd, short events, void *arg);
391 static int evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename);
392 static int evdns_base_set_option_impl(struct evdns_base *base,
393     const char *option, const char *val, int flags);
394 static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
395
396 static int strtoint(const char *const str);
397
398 #ifdef _EVENT_DISABLE_THREAD_SUPPORT
399 #define EVDNS_LOCK(base)  _EVUTIL_NIL_STMT
400 #define EVDNS_UNLOCK(base) _EVUTIL_NIL_STMT
401 #define ASSERT_LOCKED(base) _EVUTIL_NIL_STMT
402 #else
403 #define EVDNS_LOCK(base)                        \
404         EVLOCK_LOCK((base)->lock, 0)
405 #define EVDNS_UNLOCK(base)                      \
406         EVLOCK_UNLOCK((base)->lock, 0)
407 #define ASSERT_LOCKED(base)                     \
408         EVLOCK_ASSERT_LOCKED((base)->lock)
409 #endif
410
411 static void
412 default_evdns_log_fn(int warning, const char *buf)
413 {
414         if (warning == EVDNS_LOG_WARN)
415                 event_warnx("[evdns] %s", buf);
416         else if (warning == EVDNS_LOG_MSG)
417                 event_msgx("[evdns] %s", buf);
418         else
419                 event_debug(("[evdns] %s", buf));
420 }
421
422 static evdns_debug_log_fn_type evdns_log_fn = NULL;
423
424 void
425 evdns_set_log_fn(evdns_debug_log_fn_type fn)
426 {
427         evdns_log_fn = fn;
428 }
429
430 #ifdef __GNUC__
431 #define EVDNS_LOG_CHECK  __attribute__ ((format(printf, 2, 3)))
432 #else
433 #define EVDNS_LOG_CHECK
434 #endif
435
436 static void _evdns_log(int warn, const char *fmt, ...) EVDNS_LOG_CHECK;
437 static void
438 _evdns_log(int warn, const char *fmt, ...)
439 {
440         va_list args;
441         char buf[512];
442         if (!evdns_log_fn)
443                 return;
444         va_start(args,fmt);
445         evutil_vsnprintf(buf, sizeof(buf), fmt, args);
446         va_end(args);
447         if (evdns_log_fn) {
448                 if (warn == EVDNS_LOG_MSG)
449                         warn = EVDNS_LOG_WARN;
450                 evdns_log_fn(warn, buf);
451         } else {
452                 default_evdns_log_fn(warn, buf);
453         }
454
455 }
456
457 #define log _evdns_log
458
459 /* This walks the list of inflight requests to find the */
460 /* one with a matching transaction id. Returns NULL on */
461 /* failure */
462 static struct request *
463 request_find_from_trans_id(struct evdns_base *base, u16 trans_id) {
464         struct request *req = REQ_HEAD(base, trans_id);
465         struct request *const started_at = req;
466
467         ASSERT_LOCKED(base);
468
469         if (req) {
470                 do {
471                         if (req->trans_id == trans_id) return req;
472                         req = req->next;
473                 } while (req != started_at);
474         }
475
476         return NULL;
477 }
478
479 /* a libevent callback function which is called when a nameserver */
480 /* has gone down and we want to test if it has came back to life yet */
481 static void
482 nameserver_prod_callback(evutil_socket_t fd, short events, void *arg) {
483         struct nameserver *const ns = (struct nameserver *) arg;
484         (void)fd;
485         (void)events;
486
487         EVDNS_LOCK(ns->base);
488         nameserver_send_probe(ns);
489         EVDNS_UNLOCK(ns->base);
490 }
491
492 /* a libevent callback which is called when a nameserver probe (to see if */
493 /* it has come back to life) times out. We increment the count of failed_times */
494 /* and wait longer to send the next probe packet. */
495 static void
496 nameserver_probe_failed(struct nameserver *const ns) {
497         struct timeval timeout;
498         int i;
499
500         ASSERT_LOCKED(ns->base);
501         (void) evtimer_del(&ns->timeout_event);
502         if (ns->state == 1) {
503                 /* This can happen if the nameserver acts in a way which makes us mark */
504                 /* it as bad and then starts sending good replies. */
505                 return;
506         }
507
508 #define MAX_PROBE_TIMEOUT 3600
509 #define TIMEOUT_BACKOFF_FACTOR 3
510
511         memcpy(&timeout, &ns->base->global_nameserver_probe_initial_timeout,
512             sizeof(struct timeval));
513         for (i=ns->failed_times; i > 0 && timeout.tv_sec < MAX_PROBE_TIMEOUT; --i) {
514                 timeout.tv_sec *= TIMEOUT_BACKOFF_FACTOR;
515                 timeout.tv_usec *= TIMEOUT_BACKOFF_FACTOR;
516                 if (timeout.tv_usec > 1000000) {
517                         timeout.tv_sec += timeout.tv_usec / 1000000;
518                         timeout.tv_usec %= 1000000;
519                 }
520         }
521         if (timeout.tv_sec > MAX_PROBE_TIMEOUT) {
522                 timeout.tv_sec = MAX_PROBE_TIMEOUT;
523                 timeout.tv_usec = 0;
524         }
525
526         ns->failed_times++;
527
528         if (evtimer_add(&ns->timeout_event, &timeout) < 0) {
529                 char addrbuf[128];
530                 log(EVDNS_LOG_WARN,
531                     "Error from libevent when adding timer event for %s",
532                     evutil_format_sockaddr_port(
533                             (struct sockaddr *)&ns->address,
534                             addrbuf, sizeof(addrbuf)));
535         }
536 }
537
538 /* called when a nameserver has been deemed to have failed. For example, too */
539 /* many packets have timed out etc */
540 static void
541 nameserver_failed(struct nameserver *const ns, const char *msg) {
542         struct request *req, *started_at;
543         struct evdns_base *base = ns->base;
544         int i;
545         char addrbuf[128];
546
547         ASSERT_LOCKED(base);
548         /* if this nameserver has already been marked as failed */
549         /* then don't do anything */
550         if (!ns->state) return;
551
552         log(EVDNS_LOG_MSG, "Nameserver %s has failed: %s",
553             evutil_format_sockaddr_port(
554                     (struct sockaddr *)&ns->address,
555                     addrbuf, sizeof(addrbuf)),
556             msg);
557
558         base->global_good_nameservers--;
559         EVUTIL_ASSERT(base->global_good_nameservers >= 0);
560         if (base->global_good_nameservers == 0) {
561                 log(EVDNS_LOG_MSG, "All nameservers have failed");
562         }
563
564         ns->state = 0;
565         ns->failed_times = 1;
566
567         if (evtimer_add(&ns->timeout_event,
568                 &base->global_nameserver_probe_initial_timeout) < 0) {
569                 log(EVDNS_LOG_WARN,
570                     "Error from libevent when adding timer event for %s",
571                     evutil_format_sockaddr_port(
572                             (struct sockaddr *)&ns->address,
573                             addrbuf, sizeof(addrbuf)));
574                 /* ???? Do more? */
575         }
576
577         /* walk the list of inflight requests to see if any can be reassigned to */
578         /* a different server. Requests in the waiting queue don't have a */
579         /* nameserver assigned yet */
580
581         /* if we don't have *any* good nameservers then there's no point */
582         /* trying to reassign requests to one */
583         if (!base->global_good_nameservers) return;
584
585         for (i = 0; i < base->n_req_heads; ++i) {
586                 req = started_at = base->req_heads[i];
587                 if (req) {
588                         do {
589                                 if (req->tx_count == 0 && req->ns == ns) {
590                                         /* still waiting to go out, can be moved */
591                                         /* to another server */
592                                         req->ns = nameserver_pick(base);
593                                 }
594                                 req = req->next;
595                         } while (req != started_at);
596                 }
597         }
598 }
599
600 static void
601 nameserver_up(struct nameserver *const ns)
602 {
603         char addrbuf[128];
604         ASSERT_LOCKED(ns->base);
605         if (ns->state) return;
606         log(EVDNS_LOG_MSG, "Nameserver %s is back up",
607             evutil_format_sockaddr_port(
608                     (struct sockaddr *)&ns->address,
609                     addrbuf, sizeof(addrbuf)));
610         evtimer_del(&ns->timeout_event);
611         if (ns->probe_request) {
612                 evdns_cancel_request(ns->base, ns->probe_request);
613                 ns->probe_request = NULL;
614         }
615         ns->state = 1;
616         ns->failed_times = 0;
617         ns->timedout = 0;
618         ns->base->global_good_nameservers++;
619 }
620
621 static void
622 request_trans_id_set(struct request *const req, const u16 trans_id) {
623         req->trans_id = trans_id;
624         *((u16 *) req->request) = htons(trans_id);
625 }
626
627 /* Called to remove a request from a list and dealloc it. */
628 /* head is a pointer to the head of the list it should be */
629 /* removed from or NULL if the request isn't in a list. */
630 /* when free_handle is one, free the handle as well. */
631 static void
632 request_finished(struct request *const req, struct request **head, int free_handle) {
633         struct evdns_base *base = req->base;
634         int was_inflight = (head != &base->req_waiting_head);
635         EVDNS_LOCK(base);
636         ASSERT_VALID_REQUEST(req);
637
638         if (head)
639                 evdns_request_remove(req, head);
640
641         log(EVDNS_LOG_DEBUG, "Removing timeout for request %p", req);
642         if (was_inflight) {
643                 evtimer_del(&req->timeout_event);
644                 base->global_requests_inflight--;
645         } else {
646                 base->global_requests_waiting--;
647         }
648
649         if (!req->request_appended) {
650                 /* need to free the request data on it's own */
651                 mm_free(req->request);
652         } else {
653                 /* the request data is appended onto the header */
654                 /* so everything gets free()ed when we: */
655         }
656
657         if (req->handle) {
658                 EVUTIL_ASSERT(req->handle->current_req == req);
659
660                 if (free_handle) {
661                         search_request_finished(req->handle);
662                         req->handle->current_req = NULL;
663                         if (! req->handle->pending_cb) {
664                                 /* If we're planning to run the callback,
665                                  * don't free the handle until later. */
666                                 mm_free(req->handle);
667                         }
668                         req->handle = NULL; /* If we have a bug, let's crash
669                                              * early */
670                 } else {
671                         req->handle->current_req = NULL;
672                 }
673         }
674
675         mm_free(req);
676
677         evdns_requests_pump_waiting_queue(base);
678         EVDNS_UNLOCK(base);
679 }
680
681 /* This is called when a server returns a funny error code. */
682 /* We try the request again with another server. */
683 /* */
684 /* return: */
685 /*   0 ok */
686 /*   1 failed/reissue is pointless */
687 static int
688 request_reissue(struct request *req) {
689         const struct nameserver *const last_ns = req->ns;
690         ASSERT_LOCKED(req->base);
691         ASSERT_VALID_REQUEST(req);
692         /* the last nameserver should have been marked as failing */
693         /* by the caller of this function, therefore pick will try */
694         /* not to return it */
695         req->ns = nameserver_pick(req->base);
696         if (req->ns == last_ns) {
697                 /* ... but pick did return it */
698                 /* not a lot of point in trying again with the */
699                 /* same server */
700                 return 1;
701         }
702
703         req->reissue_count++;
704         req->tx_count = 0;
705         req->transmit_me = 1;
706
707         return 0;
708 }
709
710 /* this function looks for space on the inflight queue and promotes */
711 /* requests from the waiting queue if it can. */
712 static void
713 evdns_requests_pump_waiting_queue(struct evdns_base *base) {
714         ASSERT_LOCKED(base);
715         while (base->global_requests_inflight < base->global_max_requests_inflight &&
716                    base->global_requests_waiting) {
717                 struct request *req;
718                 /* move a request from the waiting queue to the inflight queue */
719                 EVUTIL_ASSERT(base->req_waiting_head);
720                 req = base->req_waiting_head;
721                 evdns_request_remove(req, &base->req_waiting_head);
722
723                 base->global_requests_waiting--;
724                 base->global_requests_inflight++;
725
726                 req->ns = nameserver_pick(base);
727                 request_trans_id_set(req, transaction_id_pick(base));
728
729                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
730                 evdns_request_transmit(req);
731                 evdns_transmit(base);
732         }
733 }
734
735 /* TODO(nickm) document */
736 struct deferred_reply_callback {
737         struct deferred_cb deferred;
738         struct evdns_request *handle;
739         u8 request_type;
740         u8 have_reply;
741         u32 ttl;
742         u32 err;
743         evdns_callback_type user_callback;
744         struct reply reply;
745 };
746
747 static void
748 reply_run_callback(struct deferred_cb *d, void *user_pointer)
749 {
750         struct deferred_reply_callback *cb =
751             EVUTIL_UPCAST(d, struct deferred_reply_callback, deferred);
752
753         switch (cb->request_type) {
754         case TYPE_A:
755                 if (cb->have_reply)
756                         cb->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
757                             cb->reply.data.a.addrcount, cb->ttl,
758                             cb->reply.data.a.addresses,
759                             user_pointer);
760                 else
761                         cb->user_callback(cb->err, 0, 0, 0, NULL, user_pointer);
762                 break;
763         case TYPE_PTR:
764                 if (cb->have_reply) {
765                         char *name = cb->reply.data.ptr.name;
766                         cb->user_callback(DNS_ERR_NONE, DNS_PTR, 1, cb->ttl,
767                             &name, user_pointer);
768                 } else {
769                         cb->user_callback(cb->err, 0, 0, 0, NULL, user_pointer);
770                 }
771                 break;
772         case TYPE_AAAA:
773                 if (cb->have_reply)
774                         cb->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
775                             cb->reply.data.aaaa.addrcount, cb->ttl,
776                             cb->reply.data.aaaa.addresses,
777                             user_pointer);
778                 else
779                         cb->user_callback(cb->err, 0, 0, 0, NULL, user_pointer);
780                 break;
781         default:
782                 EVUTIL_ASSERT(0);
783         }
784
785         if (cb->handle && cb->handle->pending_cb) {
786                 mm_free(cb->handle);
787         }
788
789         mm_free(cb);
790 }
791
792 static void
793 reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply)
794 {
795         struct deferred_reply_callback *d = mm_calloc(1, sizeof(*d));
796
797         if (!d) {
798                 event_warn("%s: Couldn't allocate space for deferred callback.",
799                     __func__);
800                 return;
801         }
802
803         ASSERT_LOCKED(req->base);
804
805         d->request_type = req->request_type;
806         d->user_callback = req->user_callback;
807         d->ttl = ttl;
808         d->err = err;
809         if (reply) {
810                 d->have_reply = 1;
811                 memcpy(&d->reply, reply, sizeof(struct reply));
812         }
813
814         if (req->handle) {
815                 req->handle->pending_cb = 1;
816                 d->handle = req->handle;
817         }
818
819         event_deferred_cb_init(&d->deferred, reply_run_callback,
820             req->user_pointer);
821         event_deferred_cb_schedule(
822                 event_base_get_deferred_cb_queue(req->base->event_base),
823                 &d->deferred);
824 }
825
826 /* this processes a parsed reply packet */
827 static void
828 reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
829         int error;
830         char addrbuf[128];
831         static const int error_codes[] = {
832                 DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
833                 DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
834         };
835
836         ASSERT_LOCKED(req->base);
837         ASSERT_VALID_REQUEST(req);
838
839         if (flags & 0x020f || !reply || !reply->have_answer) {
840                 /* there was an error */
841                 if (flags & 0x0200) {
842                         error = DNS_ERR_TRUNCATED;
843                 } else {
844                         u16 error_code = (flags & 0x000f) - 1;
845                         if (error_code > 4) {
846                                 error = DNS_ERR_UNKNOWN;
847                         } else {
848                                 error = error_codes[error_code];
849                         }
850                 }
851
852                 switch (error) {
853                 case DNS_ERR_NOTIMPL:
854                 case DNS_ERR_REFUSED:
855                         /* we regard these errors as marking a bad nameserver */
856                         if (req->reissue_count < req->base->global_max_reissues) {
857                                 char msg[64];
858                                 evutil_snprintf(msg, sizeof(msg), "Bad response %d (%s)",
859                                          error, evdns_err_to_string(error));
860                                 nameserver_failed(req->ns, msg);
861                                 if (!request_reissue(req)) return;
862                         }
863                         break;
864                 case DNS_ERR_SERVERFAILED:
865                         /* rcode 2 (servfailed) sometimes means "we
866                          * are broken" and sometimes (with some binds)
867                          * means "that request was very confusing."
868                          * Treat this as a timeout, not a failure.
869                          */
870                         log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver"
871                                 "at %s; will allow the request to time out.",
872                             evutil_format_sockaddr_port(
873                                     (struct sockaddr *)&req->ns->address,
874                                     addrbuf, sizeof(addrbuf)));
875                         break;
876                 default:
877                         /* we got a good reply from the nameserver */
878                         nameserver_up(req->ns);
879                 }
880
881                 if (req->handle->search_state &&
882                     req->request_type != TYPE_PTR) {
883                         /* if we have a list of domains to search in,
884                          * try the next one */
885                         if (!search_try_next(req->handle)) {
886                                 /* a new request was issued so this
887                                  * request is finished and */
888                                 /* the user callback will be made when
889                                  * that request (or a */
890                                 /* child of it) finishes. */
891                                 return;
892                         }
893                 }
894
895                 /* all else failed. Pass the failure up */
896                 reply_schedule_callback(req, 0, error, NULL);
897                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
898         } else {
899                 /* all ok, tell the user */
900                 reply_schedule_callback(req, ttl, 0, reply);
901                 if (req->handle == req->ns->probe_request)
902                         req->ns->probe_request = NULL; /* Avoid double-free */
903                 nameserver_up(req->ns);
904                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
905         }
906 }
907
908 static int
909 name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
910         int name_end = -1;
911         int j = *idx;
912         int ptr_count = 0;
913 #define GET32(x) do { if (j + 4 > length) goto err; memcpy(&_t32, packet + j, 4); j += 4; x = ntohl(_t32); } while (0)
914 #define GET16(x) do { if (j + 2 > length) goto err; memcpy(&_t, packet + j, 2); j += 2; x = ntohs(_t); } while (0)
915 #define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while (0)
916
917         char *cp = name_out;
918         const char *const end = name_out + name_out_len;
919
920         /* Normally, names are a series of length prefixed strings terminated */
921         /* with a length of 0 (the lengths are u8's < 63). */
922         /* However, the length can start with a pair of 1 bits and that */
923         /* means that the next 14 bits are a pointer within the current */
924         /* packet. */
925
926         for (;;) {
927                 u8 label_len;
928                 if (j >= length) return -1;
929                 GET8(label_len);
930                 if (!label_len) break;
931                 if (label_len & 0xc0) {
932                         u8 ptr_low;
933                         GET8(ptr_low);
934                         if (name_end < 0) name_end = j;
935                         j = (((int)label_len & 0x3f) << 8) + ptr_low;
936                         /* Make sure that the target offset is in-bounds. */
937                         if (j < 0 || j >= length) return -1;
938                         /* If we've jumped more times than there are characters in the
939                          * message, we must have a loop. */
940                         if (++ptr_count > length) return -1;
941                         continue;
942                 }
943                 if (label_len > 63) return -1;
944                 if (cp != name_out) {
945                         if (cp + 1 >= end) return -1;
946                         *cp++ = '.';
947                 }
948                 if (cp + label_len >= end) return -1;
949                 memcpy(cp, packet + j, label_len);
950                 cp += label_len;
951                 j += label_len;
952         }
953         if (cp >= end) return -1;
954         *cp = '\0';
955         if (name_end < 0)
956                 *idx = j;
957         else
958                 *idx = name_end;
959         return 0;
960  err:
961         return -1;
962 }
963
964 /* parses a raw request from a nameserver */
965 static int
966 reply_parse(struct evdns_base *base, u8 *packet, int length) {
967         int j = 0, k = 0;  /* index into packet */
968         u16 _t;  /* used by the macros */
969         u32 _t32;  /* used by the macros */
970         char tmp_name[256], cmp_name[256]; /* used by the macros */
971         int name_matches = 0;
972
973         u16 trans_id, questions, answers, authority, additional, datalength;
974         u16 flags = 0;
975         u32 ttl, ttl_r = 0xffffffff;
976         struct reply reply;
977         struct request *req = NULL;
978         unsigned int i;
979
980         ASSERT_LOCKED(base);
981
982         GET16(trans_id);
983         GET16(flags);
984         GET16(questions);
985         GET16(answers);
986         GET16(authority);
987         GET16(additional);
988         (void) authority; /* suppress "unused variable" warnings. */
989         (void) additional; /* suppress "unused variable" warnings. */
990
991         req = request_find_from_trans_id(base, trans_id);
992         if (!req) return -1;
993         EVUTIL_ASSERT(req->base == base);
994
995         memset(&reply, 0, sizeof(reply));
996
997         /* If it's not an answer, it doesn't correspond to any request. */
998         if (!(flags & 0x8000)) return -1;  /* must be an answer */
999         if (flags & 0x020f) {
1000                 /* there was an error */
1001                 goto err;
1002         }
1003         /* if (!answers) return; */  /* must have an answer of some form */
1004
1005         /* This macro skips a name in the DNS reply. */
1006 #define SKIP_NAME                                               \
1007         do { tmp_name[0] = '\0';                                \
1008                 if (name_parse(packet, length, &j, tmp_name,    \
1009                         sizeof(tmp_name))<0)                    \
1010                         goto err;                               \
1011         } while (0)
1012 #define TEST_NAME                                                       \
1013         do { tmp_name[0] = '\0';                                        \
1014                 cmp_name[0] = '\0';                                     \
1015                 k = j;                                                  \
1016                 if (name_parse(packet, length, &j, tmp_name,            \
1017                         sizeof(tmp_name))<0)                            \
1018                         goto err;                                       \
1019                 if (name_parse(req->request, req->request_len, &k,      \
1020                         cmp_name, sizeof(cmp_name))<0)                  \
1021                         goto err;                                       \
1022                 if (base->global_randomize_case) {                      \
1023                         if (strcmp(tmp_name, cmp_name) == 0)            \
1024                                 name_matches = 1;                       \
1025                 } else {                                                \
1026                         if (evutil_ascii_strcasecmp(tmp_name, cmp_name) == 0) \
1027                                 name_matches = 1;                       \
1028                 }                                                       \
1029         } while (0)
1030
1031         reply.type = req->request_type;
1032
1033         /* skip over each question in the reply */
1034         for (i = 0; i < questions; ++i) {
1035                 /* the question looks like
1036                  *   <label:name><u16:type><u16:class>
1037                  */
1038                 TEST_NAME;
1039                 j += 4;
1040                 if (j >= length) goto err;
1041         }
1042
1043         if (!name_matches)
1044                 goto err;
1045
1046         /* now we have the answer section which looks like
1047          * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
1048          */
1049
1050         for (i = 0; i < answers; ++i) {
1051                 u16 type, class;
1052
1053                 SKIP_NAME;
1054                 GET16(type);
1055                 GET16(class);
1056                 GET32(ttl);
1057                 GET16(datalength);
1058
1059                 if (type == TYPE_A && class == CLASS_INET) {
1060                         int addrcount, addrtocopy;
1061                         if (req->request_type != TYPE_A) {
1062                                 j += datalength; continue;
1063                         }
1064                         if ((datalength & 3) != 0) /* not an even number of As. */
1065                             goto err;
1066                         addrcount = datalength >> 2;
1067                         addrtocopy = MIN(MAX_V4_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
1068
1069                         ttl_r = MIN(ttl_r, ttl);
1070                         /* we only bother with the first four addresses. */
1071                         if (j + 4*addrtocopy > length) goto err;
1072                         memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
1073                                    packet + j, 4*addrtocopy);
1074                         j += 4*addrtocopy;
1075                         reply.data.a.addrcount += addrtocopy;
1076                         reply.have_answer = 1;
1077                         if (reply.data.a.addrcount == MAX_V4_ADDRS) break;
1078                 } else if (type == TYPE_PTR && class == CLASS_INET) {
1079                         if (req->request_type != TYPE_PTR) {
1080                                 j += datalength; continue;
1081                         }
1082                         if (name_parse(packet, length, &j, reply.data.ptr.name,
1083                                                    sizeof(reply.data.ptr.name))<0)
1084                                 goto err;
1085                         ttl_r = MIN(ttl_r, ttl);
1086                         reply.have_answer = 1;
1087                         break;
1088                 } else if (type == TYPE_CNAME) {
1089                         char cname[HOST_NAME_MAX];
1090                         if (!req->put_cname_in_ptr || *req->put_cname_in_ptr) {
1091                                 j += datalength; continue;
1092                         }
1093                         if (name_parse(packet, length, &j, cname,
1094                                 sizeof(cname))<0)
1095                                 goto err;
1096                         *req->put_cname_in_ptr = mm_strdup(cname);
1097                 } else if (type == TYPE_AAAA && class == CLASS_INET) {
1098                         int addrcount, addrtocopy;
1099                         if (req->request_type != TYPE_AAAA) {
1100                                 j += datalength; continue;
1101                         }
1102                         if ((datalength & 15) != 0) /* not an even number of AAAAs. */
1103                                 goto err;
1104                         addrcount = datalength >> 4;  /* each address is 16 bytes long */
1105                         addrtocopy = MIN(MAX_V6_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
1106                         ttl_r = MIN(ttl_r, ttl);
1107
1108                         /* we only bother with the first four addresses. */
1109                         if (j + 16*addrtocopy > length) goto err;
1110                         memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
1111                                    packet + j, 16*addrtocopy);
1112                         reply.data.aaaa.addrcount += addrtocopy;
1113                         j += 16*addrtocopy;
1114                         reply.have_answer = 1;
1115                         if (reply.data.aaaa.addrcount == MAX_V6_ADDRS) break;
1116                 } else {
1117                         /* skip over any other type of resource */
1118                         j += datalength;
1119                 }
1120         }
1121
1122         reply_handle(req, flags, ttl_r, &reply);
1123         return 0;
1124  err:
1125         if (req)
1126                 reply_handle(req, flags, 0, NULL);
1127         return -1;
1128 }
1129
1130 /* Parse a raw request (packet,length) sent to a nameserver port (port) from */
1131 /* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
1132 /* callback. */
1133 static int
1134 request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, ev_socklen_t addrlen)
1135 {
1136         int j = 0;      /* index into packet */
1137         u16 _t;  /* used by the macros */
1138         char tmp_name[256]; /* used by the macros */
1139
1140         int i;
1141         u16 trans_id, flags, questions, answers, authority, additional;
1142         struct server_request *server_req = NULL;
1143
1144         ASSERT_LOCKED(port);
1145
1146         /* Get the header fields */
1147         GET16(trans_id);
1148         GET16(flags);
1149         GET16(questions);
1150         GET16(answers);
1151         GET16(authority);
1152         GET16(additional);
1153         (void)answers;
1154         (void)additional;
1155         (void)authority;
1156
1157         if (flags & 0x8000) return -1; /* Must not be an answer. */
1158         flags &= 0x0110; /* Only RD and CD get preserved. */
1159
1160         server_req = mm_malloc(sizeof(struct server_request));
1161         if (server_req == NULL) return -1;
1162         memset(server_req, 0, sizeof(struct server_request));
1163
1164         server_req->trans_id = trans_id;
1165         memcpy(&server_req->addr, addr, addrlen);
1166         server_req->addrlen = addrlen;
1167
1168         server_req->base.flags = flags;
1169         server_req->base.nquestions = 0;
1170         server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
1171         if (server_req->base.questions == NULL)
1172                 goto err;
1173
1174         for (i = 0; i < questions; ++i) {
1175                 u16 type, class;
1176                 struct evdns_server_question *q;
1177                 int namelen;
1178                 if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
1179                         goto err;
1180                 GET16(type);
1181                 GET16(class);
1182                 namelen = (int)strlen(tmp_name);
1183                 q = mm_malloc(sizeof(struct evdns_server_question) + namelen);
1184                 if (!q)
1185                         goto err;
1186                 q->type = type;
1187                 q->dns_question_class = class;
1188                 memcpy(q->name, tmp_name, namelen+1);
1189                 server_req->base.questions[server_req->base.nquestions++] = q;
1190         }
1191
1192         /* Ignore answers, authority, and additional. */
1193
1194         server_req->port = port;
1195         port->refcnt++;
1196
1197         /* Only standard queries are supported. */
1198         if (flags & 0x7800) {
1199                 evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
1200                 return -1;
1201         }
1202
1203         port->user_callback(&(server_req->base), port->user_data);
1204
1205         return 0;
1206 err:
1207         if (server_req) {
1208                 if (server_req->base.questions) {
1209                         for (i = 0; i < server_req->base.nquestions; ++i)
1210                                 mm_free(server_req->base.questions[i]);
1211                         mm_free(server_req->base.questions);
1212                 }
1213                 mm_free(server_req);
1214         }
1215         return -1;
1216
1217 #undef SKIP_NAME
1218 #undef GET32
1219 #undef GET16
1220 #undef GET8
1221 }
1222
1223
1224 void
1225 evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void))
1226 {
1227 }
1228
1229 void
1230 evdns_set_random_bytes_fn(void (*fn)(char *, size_t))
1231 {
1232 }
1233
1234 /* Try to choose a strong transaction id which isn't already in flight */
1235 static u16
1236 transaction_id_pick(struct evdns_base *base) {
1237         ASSERT_LOCKED(base);
1238         for (;;) {
1239                 u16 trans_id;
1240                 evutil_secure_rng_get_bytes(&trans_id, sizeof(trans_id));
1241
1242                 if (trans_id == 0xffff) continue;
1243                 /* now check to see if that id is already inflight */
1244                 if (request_find_from_trans_id(base, trans_id) == NULL)
1245                         return trans_id;
1246         }
1247 }
1248
1249 /* choose a namesever to use. This function will try to ignore */
1250 /* nameservers which we think are down and load balance across the rest */
1251 /* by updating the server_head global each time. */
1252 static struct nameserver *
1253 nameserver_pick(struct evdns_base *base) {
1254         struct nameserver *started_at = base->server_head, *picked;
1255         ASSERT_LOCKED(base);
1256         if (!base->server_head) return NULL;
1257
1258         /* if we don't have any good nameservers then there's no */
1259         /* point in trying to find one. */
1260         if (!base->global_good_nameservers) {
1261                 base->server_head = base->server_head->next;
1262                 return base->server_head;
1263         }
1264
1265         /* remember that nameservers are in a circular list */
1266         for (;;) {
1267                 if (base->server_head->state) {
1268                         /* we think this server is currently good */
1269                         picked = base->server_head;
1270                         base->server_head = base->server_head->next;
1271                         return picked;
1272                 }
1273
1274                 base->server_head = base->server_head->next;
1275                 if (base->server_head == started_at) {
1276                         /* all the nameservers seem to be down */
1277                         /* so we just return this one and hope for the */
1278                         /* best */
1279                         EVUTIL_ASSERT(base->global_good_nameservers == 0);
1280                         picked = base->server_head;
1281                         base->server_head = base->server_head->next;
1282                         return picked;
1283                 }
1284         }
1285 }
1286
1287 /* this is called when a namesever socket is ready for reading */
1288 static void
1289 nameserver_read(struct nameserver *ns) {
1290         struct sockaddr_storage ss;
1291         ev_socklen_t addrlen = sizeof(ss);
1292         u8 packet[1500];
1293         char addrbuf[128];
1294         ASSERT_LOCKED(ns->base);
1295
1296         for (;;) {
1297                 const int r = recvfrom(ns->socket, (void*)packet,
1298                     sizeof(packet), 0,
1299                     (struct sockaddr*)&ss, &addrlen);
1300                 if (r < 0) {
1301                         int err = evutil_socket_geterror(ns->socket);
1302                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1303                                 return;
1304                         nameserver_failed(ns,
1305                             evutil_socket_error_to_string(err));
1306                         return;
1307                 }
1308                 if (evutil_sockaddr_cmp((struct sockaddr*)&ss,
1309                         (struct sockaddr*)&ns->address, 0)) {
1310                         log(EVDNS_LOG_WARN, "Address mismatch on received "
1311                             "DNS packet.  Apparent source was %s",
1312                             evutil_format_sockaddr_port(
1313                                     (struct sockaddr *)&ss,
1314                                     addrbuf, sizeof(addrbuf)));
1315                         return;
1316                 }
1317
1318                 ns->timedout = 0;
1319                 reply_parse(ns->base, packet, r);
1320         }
1321 }
1322
1323 /* Read a packet from a DNS client on a server port s, parse it, and */
1324 /* act accordingly. */
1325 static void
1326 server_port_read(struct evdns_server_port *s) {
1327         u8 packet[1500];
1328         struct sockaddr_storage addr;
1329         ev_socklen_t addrlen;
1330         int r;
1331         ASSERT_LOCKED(s);
1332
1333         for (;;) {
1334                 addrlen = sizeof(struct sockaddr_storage);
1335                 r = recvfrom(s->socket, (void*)packet, sizeof(packet), 0,
1336                                          (struct sockaddr*) &addr, &addrlen);
1337                 if (r < 0) {
1338                         int err = evutil_socket_geterror(s->socket);
1339                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1340                                 return;
1341                         log(EVDNS_LOG_WARN,
1342                             "Error %s (%d) while reading request.",
1343                             evutil_socket_error_to_string(err), err);
1344                         return;
1345                 }
1346                 request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
1347         }
1348 }
1349
1350 /* Try to write all pending replies on a given DNS server port. */
1351 static void
1352 server_port_flush(struct evdns_server_port *port)
1353 {
1354         struct server_request *req = port->pending_replies;
1355         ASSERT_LOCKED(port);
1356         while (req) {
1357                 int r = sendto(port->socket, req->response, (int)req->response_len, 0,
1358                            (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1359                 if (r < 0) {
1360                         int err = evutil_socket_geterror(port->socket);
1361                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1362                                 return;
1363                         log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", evutil_socket_error_to_string(err), err);
1364                 }
1365                 if (server_request_free(req)) {
1366                         /* we released the last reference to req->port. */
1367                         return;
1368                 } else {
1369                         EVUTIL_ASSERT(req != port->pending_replies);
1370                         req = port->pending_replies;
1371                 }
1372         }
1373
1374         /* We have no more pending requests; stop listening for 'writeable' events. */
1375         (void) event_del(&port->event);
1376         event_assign(&port->event, port->event_base,
1377                                  port->socket, EV_READ | EV_PERSIST,
1378                                  server_port_ready_callback, port);
1379
1380         if (event_add(&port->event, NULL) < 0) {
1381                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
1382                 /* ???? Do more? */
1383         }
1384 }
1385
1386 /* set if we are waiting for the ability to write to this server. */
1387 /* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
1388 /* we stop these events. */
1389 static void
1390 nameserver_write_waiting(struct nameserver *ns, char waiting) {
1391         ASSERT_LOCKED(ns->base);
1392         if (ns->write_waiting == waiting) return;
1393
1394         ns->write_waiting = waiting;
1395         (void) event_del(&ns->event);
1396         event_assign(&ns->event, ns->base->event_base,
1397             ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
1398             nameserver_ready_callback, ns);
1399         if (event_add(&ns->event, NULL) < 0) {
1400                 char addrbuf[128];
1401                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
1402                     evutil_format_sockaddr_port(
1403                             (struct sockaddr *)&ns->address,
1404                             addrbuf, sizeof(addrbuf)));
1405                 /* ???? Do more? */
1406         }
1407 }
1408
1409 /* a callback function. Called by libevent when the kernel says that */
1410 /* a nameserver socket is ready for writing or reading */
1411 static void
1412 nameserver_ready_callback(evutil_socket_t fd, short events, void *arg) {
1413         struct nameserver *ns = (struct nameserver *) arg;
1414         (void)fd;
1415
1416         EVDNS_LOCK(ns->base);
1417         if (events & EV_WRITE) {
1418                 ns->choked = 0;
1419                 if (!evdns_transmit(ns->base)) {
1420                         nameserver_write_waiting(ns, 0);
1421                 }
1422         }
1423         if (events & EV_READ) {
1424                 nameserver_read(ns);
1425         }
1426         EVDNS_UNLOCK(ns->base);
1427 }
1428
1429 /* a callback function. Called by libevent when the kernel says that */
1430 /* a server socket is ready for writing or reading. */
1431 static void
1432 server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
1433         struct evdns_server_port *port = (struct evdns_server_port *) arg;
1434         (void) fd;
1435
1436         EVDNS_LOCK(port);
1437         if (events & EV_WRITE) {
1438                 port->choked = 0;
1439                 server_port_flush(port);
1440         }
1441         if (events & EV_READ) {
1442                 server_port_read(port);
1443         }
1444         EVDNS_UNLOCK(port);
1445 }
1446
1447 /* This is an inefficient representation; only use it via the dnslabel_table_*
1448  * functions, so that is can be safely replaced with something smarter later. */
1449 #define MAX_LABELS 128
1450 /* Structures used to implement name compression */
1451 struct dnslabel_entry { char *v; off_t pos; };
1452 struct dnslabel_table {
1453         int n_labels; /* number of current entries */
1454         /* map from name to position in message */
1455         struct dnslabel_entry labels[MAX_LABELS];
1456 };
1457
1458 /* Initialize dnslabel_table. */
1459 static void
1460 dnslabel_table_init(struct dnslabel_table *table)
1461 {
1462         table->n_labels = 0;
1463 }
1464
1465 /* Free all storage held by table, but not the table itself. */
1466 static void
1467 dnslabel_clear(struct dnslabel_table *table)
1468 {
1469         int i;
1470         for (i = 0; i < table->n_labels; ++i)
1471                 mm_free(table->labels[i].v);
1472         table->n_labels = 0;
1473 }
1474
1475 /* return the position of the label in the current message, or -1 if the label */
1476 /* hasn't been used yet. */
1477 static int
1478 dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
1479 {
1480         int i;
1481         for (i = 0; i < table->n_labels; ++i) {
1482                 if (!strcmp(label, table->labels[i].v))
1483                         return table->labels[i].pos;
1484         }
1485         return -1;
1486 }
1487
1488 /* remember that we've used the label at position pos */
1489 static int
1490 dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
1491 {
1492         char *v;
1493         int p;
1494         if (table->n_labels == MAX_LABELS)
1495                 return (-1);
1496         v = mm_strdup(label);
1497         if (v == NULL)
1498                 return (-1);
1499         p = table->n_labels++;
1500         table->labels[p].v = v;
1501         table->labels[p].pos = pos;
1502
1503         return (0);
1504 }
1505
1506 /* Converts a string to a length-prefixed set of DNS labels, starting */
1507 /* at buf[j]. name and buf must not overlap. name_len should be the length */
1508 /* of name.      table is optional, and is used for compression. */
1509 /* */
1510 /* Input: abc.def */
1511 /* Output: <3>abc<3>def<0> */
1512 /* */
1513 /* Returns the first index after the encoded name, or negative on error. */
1514 /*       -1      label was > 63 bytes */
1515 /*       -2      name too long to fit in buffer. */
1516 /* */
1517 static off_t
1518 dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
1519                                   const char *name, const size_t name_len,
1520                                   struct dnslabel_table *table) {
1521         const char *end = name + name_len;
1522         int ref = 0;
1523         u16 _t;
1524
1525 #define APPEND16(x) do {                                                \
1526                 if (j + 2 > (off_t)buf_len)                             \
1527                         goto overflow;                                  \
1528                 _t = htons(x);                                          \
1529                 memcpy(buf + j, &_t, 2);                                \
1530                 j += 2;                                                 \
1531         } while (0)
1532 #define APPEND32(x) do {                                                \
1533                 if (j + 4 > (off_t)buf_len)                             \
1534                         goto overflow;                                  \
1535                 _t32 = htonl(x);                                        \
1536                 memcpy(buf + j, &_t32, 4);                              \
1537                 j += 4;                                                 \
1538         } while (0)
1539
1540         if (name_len > 255) return -2;
1541
1542         for (;;) {
1543                 const char *const start = name;
1544                 if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
1545                         APPEND16(ref | 0xc000);
1546                         return j;
1547                 }
1548                 name = strchr(name, '.');
1549                 if (!name) {
1550                         const size_t label_len = end - start;
1551                         if (label_len > 63) return -1;
1552                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1553                         if (table) dnslabel_table_add(table, start, j);
1554                         buf[j++] = (ev_uint8_t)label_len;
1555
1556                         memcpy(buf + j, start, label_len);
1557                         j += (int) label_len;
1558                         break;
1559                 } else {
1560                         /* append length of the label. */
1561                         const size_t label_len = name - start;
1562                         if (label_len > 63) return -1;
1563                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1564                         if (table) dnslabel_table_add(table, start, j);
1565                         buf[j++] = (ev_uint8_t)label_len;
1566
1567                         memcpy(buf + j, start, label_len);
1568                         j += (int) label_len;
1569                         /* hop over the '.' */
1570                         name++;
1571                 }
1572         }
1573
1574         /* the labels must be terminated by a 0. */
1575         /* It's possible that the name ended in a . */
1576         /* in which case the zero is already there */
1577         if (!j || buf[j-1]) buf[j++] = 0;
1578         return j;
1579  overflow:
1580         return (-2);
1581 }
1582
1583 /* Finds the length of a dns request for a DNS name of the given */
1584 /* length. The actual request may be smaller than the value returned */
1585 /* here */
1586 static size_t
1587 evdns_request_len(const size_t name_len) {
1588         return 96 + /* length of the DNS standard header */
1589                 name_len + 2 +
1590                 4;  /* space for the resource type */
1591 }
1592
1593 /* build a dns request packet into buf. buf should be at least as long */
1594 /* as evdns_request_len told you it should be. */
1595 /* */
1596 /* Returns the amount of space used. Negative on error. */
1597 static int
1598 evdns_request_data_build(const char *const name, const size_t name_len,
1599     const u16 trans_id, const u16 type, const u16 class,
1600     u8 *const buf, size_t buf_len) {
1601         off_t j = 0;  /* current offset into buf */
1602         u16 _t;  /* used by the macros */
1603
1604         APPEND16(trans_id);
1605         APPEND16(0x0100);  /* standard query, recusion needed */
1606         APPEND16(1);  /* one question */
1607         APPEND16(0);  /* no answers */
1608         APPEND16(0);  /* no authority */
1609         APPEND16(0);  /* no additional */
1610
1611         j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
1612         if (j < 0) {
1613                 return (int)j;
1614         }
1615
1616         APPEND16(type);
1617         APPEND16(class);
1618
1619         return (int)j;
1620  overflow:
1621         return (-1);
1622 }
1623
1624 /* exported function */
1625 struct evdns_server_port *
1626 evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1627 {
1628         struct evdns_server_port *port;
1629         if (flags)
1630                 return NULL; /* flags not yet implemented */
1631         if (!(port = mm_malloc(sizeof(struct evdns_server_port))))
1632                 return NULL;
1633         memset(port, 0, sizeof(struct evdns_server_port));
1634
1635
1636         port->socket = socket;
1637         port->refcnt = 1;
1638         port->choked = 0;
1639         port->closing = 0;
1640         port->user_callback = cb;
1641         port->user_data = user_data;
1642         port->pending_replies = NULL;
1643         port->event_base = base;
1644
1645         event_assign(&port->event, port->event_base,
1646                                  port->socket, EV_READ | EV_PERSIST,
1647                                  server_port_ready_callback, port);
1648         if (event_add(&port->event, NULL) < 0) {
1649                 mm_free(port);
1650                 return NULL;
1651         }
1652         EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
1653         return port;
1654 }
1655
1656 struct evdns_server_port *
1657 evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1658 {
1659         return evdns_add_server_port_with_base(NULL, socket, flags, cb, user_data);
1660 }
1661
1662 /* exported function */
1663 void
1664 evdns_close_server_port(struct evdns_server_port *port)
1665 {
1666         EVDNS_LOCK(port);
1667         if (--port->refcnt == 0) {
1668                 EVDNS_UNLOCK(port);
1669                 server_port_free(port);
1670         } else {
1671                 port->closing = 1;
1672         }
1673 }
1674
1675 /* exported function */
1676 int
1677 evdns_server_request_add_reply(struct evdns_server_request *_req, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
1678 {
1679         struct server_request *req = TO_SERVER_REQUEST(_req);
1680         struct server_reply_item **itemp, *item;
1681         int *countp;
1682         int result = -1;
1683
1684         EVDNS_LOCK(req->port);
1685         if (req->response) /* have we already answered? */
1686                 goto done;
1687
1688         switch (section) {
1689         case EVDNS_ANSWER_SECTION:
1690                 itemp = &req->answer;
1691                 countp = &req->n_answer;
1692                 break;
1693         case EVDNS_AUTHORITY_SECTION:
1694                 itemp = &req->authority;
1695                 countp = &req->n_authority;
1696                 break;
1697         case EVDNS_ADDITIONAL_SECTION:
1698                 itemp = &req->additional;
1699                 countp = &req->n_additional;
1700                 break;
1701         default:
1702                 goto done;
1703         }
1704         while (*itemp) {
1705                 itemp = &((*itemp)->next);
1706         }
1707         item = mm_malloc(sizeof(struct server_reply_item));
1708         if (!item)
1709                 goto done;
1710         item->next = NULL;
1711         if (!(item->name = mm_strdup(name))) {
1712                 mm_free(item);
1713                 goto done;
1714         }
1715         item->type = type;
1716         item->dns_question_class = class;
1717         item->ttl = ttl;
1718         item->is_name = is_name != 0;
1719         item->datalen = 0;
1720         item->data = NULL;
1721         if (data) {
1722                 if (item->is_name) {
1723                         if (!(item->data = mm_strdup(data))) {
1724                                 mm_free(item->name);
1725                                 mm_free(item);
1726                                 goto done;
1727                         }
1728                         item->datalen = (u16)-1;
1729                 } else {
1730                         if (!(item->data = mm_malloc(datalen))) {
1731                                 mm_free(item->name);
1732                                 mm_free(item);
1733                                 goto done;
1734                         }
1735                         item->datalen = datalen;
1736                         memcpy(item->data, data, datalen);
1737                 }
1738         }
1739
1740         *itemp = item;
1741         ++(*countp);
1742         result = 0;
1743 done:
1744         EVDNS_UNLOCK(req->port);
1745         return result;
1746 }
1747
1748 /* exported function */
1749 int
1750 evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1751 {
1752         return evdns_server_request_add_reply(
1753                   req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
1754                   ttl, n*4, 0, addrs);
1755 }
1756
1757 /* exported function */
1758 int
1759 evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1760 {
1761         return evdns_server_request_add_reply(
1762                   req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
1763                   ttl, n*16, 0, addrs);
1764 }
1765
1766 /* exported function */
1767 int
1768 evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
1769 {
1770         u32 a;
1771         char buf[32];
1772         if (in && inaddr_name)
1773                 return -1;
1774         else if (!in && !inaddr_name)
1775                 return -1;
1776         if (in) {
1777                 a = ntohl(in->s_addr);
1778                 evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
1779                                 (int)(u8)((a    )&0xff),
1780                                 (int)(u8)((a>>8 )&0xff),
1781                                 (int)(u8)((a>>16)&0xff),
1782                                 (int)(u8)((a>>24)&0xff));
1783                 inaddr_name = buf;
1784         }
1785         return evdns_server_request_add_reply(
1786                   req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
1787                   ttl, -1, 1, hostname);
1788 }
1789
1790 /* exported function */
1791 int
1792 evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
1793 {
1794         return evdns_server_request_add_reply(
1795                   req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
1796                   ttl, -1, 1, cname);
1797 }
1798
1799 /* exported function */
1800 void
1801 evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
1802 {
1803         struct server_request *req = TO_SERVER_REQUEST(exreq);
1804         req->base.flags &= ~(EVDNS_FLAGS_AA|EVDNS_FLAGS_RD);
1805         req->base.flags |= flags;
1806 }
1807
1808 static int
1809 evdns_server_request_format_response(struct server_request *req, int err)
1810 {
1811         unsigned char buf[1500];
1812         size_t buf_len = sizeof(buf);
1813         off_t j = 0, r;
1814         u16 _t;
1815         u32 _t32;
1816         int i;
1817         u16 flags;
1818         struct dnslabel_table table;
1819
1820         if (err < 0 || err > 15) return -1;
1821
1822         /* Set response bit and error code; copy OPCODE and RD fields from
1823          * question; copy RA and AA if set by caller. */
1824         flags = req->base.flags;
1825         flags |= (0x8000 | err);
1826
1827         dnslabel_table_init(&table);
1828         APPEND16(req->trans_id);
1829         APPEND16(flags);
1830         APPEND16(req->base.nquestions);
1831         APPEND16(req->n_answer);
1832         APPEND16(req->n_authority);
1833         APPEND16(req->n_additional);
1834
1835         /* Add questions. */
1836         for (i=0; i < req->base.nquestions; ++i) {
1837                 const char *s = req->base.questions[i]->name;
1838                 j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
1839                 if (j < 0) {
1840                         dnslabel_clear(&table);
1841                         return (int) j;
1842                 }
1843                 APPEND16(req->base.questions[i]->type);
1844                 APPEND16(req->base.questions[i]->dns_question_class);
1845         }
1846
1847         /* Add answer, authority, and additional sections. */
1848         for (i=0; i<3; ++i) {
1849                 struct server_reply_item *item;
1850                 if (i==0)
1851                         item = req->answer;
1852                 else if (i==1)
1853                         item = req->authority;
1854                 else
1855                         item = req->additional;
1856                 while (item) {
1857                         r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
1858                         if (r < 0)
1859                                 goto overflow;
1860                         j = r;
1861
1862                         APPEND16(item->type);
1863                         APPEND16(item->dns_question_class);
1864                         APPEND32(item->ttl);
1865                         if (item->is_name) {
1866                                 off_t len_idx = j, name_start;
1867                                 j += 2;
1868                                 name_start = j;
1869                                 r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
1870                                 if (r < 0)
1871                                         goto overflow;
1872                                 j = r;
1873                                 _t = htons( (short) (j-name_start) );
1874                                 memcpy(buf+len_idx, &_t, 2);
1875                         } else {
1876                                 APPEND16(item->datalen);
1877                                 if (j+item->datalen > (off_t)buf_len)
1878                                         goto overflow;
1879                                 memcpy(buf+j, item->data, item->datalen);
1880                                 j += item->datalen;
1881                         }
1882                         item = item->next;
1883                 }
1884         }
1885
1886         if (j > 512) {
1887 overflow:
1888                 j = 512;
1889                 buf[2] |= 0x02; /* set the truncated bit. */
1890         }
1891
1892         req->response_len = j;
1893
1894         if (!(req->response = mm_malloc(req->response_len))) {
1895                 server_request_free_answers(req);
1896                 dnslabel_clear(&table);
1897                 return (-1);
1898         }
1899         memcpy(req->response, buf, req->response_len);
1900         server_request_free_answers(req);
1901         dnslabel_clear(&table);
1902         return (0);
1903 }
1904
1905 /* exported function */
1906 int
1907 evdns_server_request_respond(struct evdns_server_request *_req, int err)
1908 {
1909         struct server_request *req = TO_SERVER_REQUEST(_req);
1910         struct evdns_server_port *port = req->port;
1911         int r = -1;
1912
1913         EVDNS_LOCK(port);
1914         if (!req->response) {
1915                 if ((r = evdns_server_request_format_response(req, err))<0)
1916                         goto done;
1917         }
1918
1919         r = sendto(port->socket, req->response, (int)req->response_len, 0,
1920                            (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1921         if (r<0) {
1922                 int sock_err = evutil_socket_geterror(port->socket);
1923                 if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
1924                         goto done;
1925
1926                 if (port->pending_replies) {
1927                         req->prev_pending = port->pending_replies->prev_pending;
1928                         req->next_pending = port->pending_replies;
1929                         req->prev_pending->next_pending =
1930                                 req->next_pending->prev_pending = req;
1931                 } else {
1932                         req->prev_pending = req->next_pending = req;
1933                         port->pending_replies = req;
1934                         port->choked = 1;
1935
1936                         (void) event_del(&port->event);
1937                         event_assign(&port->event, port->event_base, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
1938
1939                         if (event_add(&port->event, NULL) < 0) {
1940                                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
1941                         }
1942
1943                 }
1944
1945                 r = 1;
1946                 goto done;
1947         }
1948         if (server_request_free(req)) {
1949                 r = 0;
1950                 goto done;
1951         }
1952
1953         if (port->pending_replies)
1954                 server_port_flush(port);
1955
1956         r = 0;
1957 done:
1958         EVDNS_UNLOCK(port);
1959         return r;
1960 }
1961
1962 /* Free all storage held by RRs in req. */
1963 static void
1964 server_request_free_answers(struct server_request *req)
1965 {
1966         struct server_reply_item *victim, *next, **list;
1967         int i;
1968         for (i = 0; i < 3; ++i) {
1969                 if (i==0)
1970                         list = &req->answer;
1971                 else if (i==1)
1972                         list = &req->authority;
1973                 else
1974                         list = &req->additional;
1975
1976                 victim = *list;
1977                 while (victim) {
1978                         next = victim->next;
1979                         mm_free(victim->name);
1980                         if (victim->data)
1981                                 mm_free(victim->data);
1982                         mm_free(victim);
1983                         victim = next;
1984                 }
1985                 *list = NULL;
1986         }
1987 }
1988
1989 /* Free all storage held by req, and remove links to it. */
1990 /* return true iff we just wound up freeing the server_port. */
1991 static int
1992 server_request_free(struct server_request *req)
1993 {
1994         int i, rc=1, lock=0;
1995         if (req->base.questions) {
1996                 for (i = 0; i < req->base.nquestions; ++i)
1997                         mm_free(req->base.questions[i]);
1998                 mm_free(req->base.questions);
1999         }
2000
2001         if (req->port) {
2002                 EVDNS_LOCK(req->port);
2003                 lock=1;
2004                 if (req->port->pending_replies == req) {
2005                         if (req->next_pending && req->next_pending != req)
2006                                 req->port->pending_replies = req->next_pending;
2007                         else
2008                                 req->port->pending_replies = NULL;
2009                 }
2010                 rc = --req->port->refcnt;
2011         }
2012
2013         if (req->response) {
2014                 mm_free(req->response);
2015         }
2016
2017         server_request_free_answers(req);
2018
2019         if (req->next_pending && req->next_pending != req) {
2020                 req->next_pending->prev_pending = req->prev_pending;
2021                 req->prev_pending->next_pending = req->next_pending;
2022         }
2023
2024         if (rc == 0) {
2025                 EVDNS_UNLOCK(req->port); /* ????? nickm */
2026                 server_port_free(req->port);
2027                 mm_free(req);
2028                 return (1);
2029         }
2030         if (lock)
2031                 EVDNS_UNLOCK(req->port);
2032         mm_free(req);
2033         return (0);
2034 }
2035
2036 /* Free all storage held by an evdns_server_port.  Only called when  */
2037 static void
2038 server_port_free(struct evdns_server_port *port)
2039 {
2040         EVUTIL_ASSERT(port);
2041         EVUTIL_ASSERT(!port->refcnt);
2042         EVUTIL_ASSERT(!port->pending_replies);
2043         if (port->socket > 0) {
2044                 evutil_closesocket(port->socket);
2045                 port->socket = -1;
2046         }
2047         (void) event_del(&port->event);
2048         event_debug_unassign(&port->event);
2049         EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2050         mm_free(port);
2051 }
2052
2053 /* exported function */
2054 int
2055 evdns_server_request_drop(struct evdns_server_request *_req)
2056 {
2057         struct server_request *req = TO_SERVER_REQUEST(_req);
2058         server_request_free(req);
2059         return 0;
2060 }
2061
2062 /* exported function */
2063 int
2064 evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len)
2065 {
2066         struct server_request *req = TO_SERVER_REQUEST(_req);
2067         if (addr_len < (int)req->addrlen)
2068                 return -1;
2069         memcpy(sa, &(req->addr), req->addrlen);
2070         return req->addrlen;
2071 }
2072
2073 #undef APPEND16
2074 #undef APPEND32
2075
2076 /* this is a libevent callback function which is called when a request */
2077 /* has timed out. */
2078 static void
2079 evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
2080         struct request *const req = (struct request *) arg;
2081 #ifndef _EVENT_DISABLE_THREAD_SUPPORT
2082         struct evdns_base *base = req->base;
2083 #endif
2084         (void) fd;
2085         (void) events;
2086
2087         log(EVDNS_LOG_DEBUG, "Request %p timed out", arg);
2088         EVDNS_LOCK(base);
2089
2090         req->ns->timedout++;
2091         if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
2092                 req->ns->timedout = 0;
2093                 nameserver_failed(req->ns, "request timed out.");
2094         }
2095
2096         if (req->tx_count >= req->base->global_max_retransmits) {
2097                 /* this request has failed */
2098                 reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2099                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2100         } else {
2101                 /* retransmit it */
2102                 (void) evtimer_del(&req->timeout_event);
2103                 evdns_request_transmit(req);
2104         }
2105         EVDNS_UNLOCK(base);
2106 }
2107
2108 /* try to send a request to a given server. */
2109 /* */
2110 /* return: */
2111 /*   0 ok */
2112 /*   1 temporary failure */
2113 /*   2 other failure */
2114 static int
2115 evdns_request_transmit_to(struct request *req, struct nameserver *server) {
2116         int r;
2117         ASSERT_LOCKED(req->base);
2118         ASSERT_VALID_REQUEST(req);
2119         r = sendto(server->socket, (void*)req->request, req->request_len, 0,
2120             (struct sockaddr *)&server->address, server->addrlen);
2121         if (r < 0) {
2122                 int err = evutil_socket_geterror(server->socket);
2123                 if (EVUTIL_ERR_RW_RETRIABLE(err))
2124                         return 1;
2125                 nameserver_failed(req->ns, evutil_socket_error_to_string(err));
2126                 return 2;
2127         } else if (r != (int)req->request_len) {
2128                 return 1;  /* short write */
2129         } else {
2130                 return 0;
2131         }
2132 }
2133
2134 /* try to send a request, updating the fields of the request */
2135 /* as needed */
2136 /* */
2137 /* return: */
2138 /*   0 ok */
2139 /*   1 failed */
2140 static int
2141 evdns_request_transmit(struct request *req) {
2142         int retcode = 0, r;
2143
2144         ASSERT_LOCKED(req->base);
2145         ASSERT_VALID_REQUEST(req);
2146         /* if we fail to send this packet then this flag marks it */
2147         /* for evdns_transmit */
2148         req->transmit_me = 1;
2149         EVUTIL_ASSERT(req->trans_id != 0xffff);
2150
2151         if (req->ns->choked) {
2152                 /* don't bother trying to write to a socket */
2153                 /* which we have had EAGAIN from */
2154                 return 1;
2155         }
2156
2157         r = evdns_request_transmit_to(req, req->ns);
2158         switch (r) {
2159         case 1:
2160                 /* temp failure */
2161                 req->ns->choked = 1;
2162                 nameserver_write_waiting(req->ns, 1);
2163                 return 1;
2164         case 2:
2165                 /* failed to transmit the request entirely. */
2166                 retcode = 1;
2167                 /* fall through: we'll set a timeout, which will time out,
2168                  * and make us retransmit the request anyway. */
2169         default:
2170                 /* all ok */
2171                 log(EVDNS_LOG_DEBUG,
2172                     "Setting timeout for request %p", req);
2173                 if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0) {
2174                         log(EVDNS_LOG_WARN,
2175                       "Error from libevent when adding timer for request %p",
2176                             req);
2177                         /* ???? Do more? */
2178                 }
2179                 req->tx_count++;
2180                 req->transmit_me = 0;
2181                 return retcode;
2182         }
2183 }
2184
2185 static void
2186 nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
2187         struct nameserver *const ns = (struct nameserver *) arg;
2188         (void) type;
2189         (void) count;
2190         (void) ttl;
2191         (void) addresses;
2192
2193         EVDNS_LOCK(ns->base);
2194         ns->probe_request = NULL;
2195         if (result == DNS_ERR_CANCEL) {
2196                 /* We canceled this request because the nameserver came up
2197                  * for some other reason.  Do not change our opinion about
2198                  * the nameserver. */
2199         } else if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
2200                 /* this is a good reply */
2201                 nameserver_up(ns);
2202         } else {
2203                 nameserver_probe_failed(ns);
2204         }
2205         EVDNS_UNLOCK(ns->base);
2206 }
2207
2208 static void
2209 nameserver_send_probe(struct nameserver *const ns) {
2210         struct evdns_request *handle;
2211         struct request *req;
2212         char addrbuf[128];
2213         /* here we need to send a probe to a given nameserver */
2214         /* in the hope that it is up now. */
2215
2216         ASSERT_LOCKED(ns->base);
2217         log(EVDNS_LOG_DEBUG, "Sending probe to %s",
2218             evutil_format_sockaddr_port(
2219                     (struct sockaddr *)&ns->address,
2220                     addrbuf, sizeof(addrbuf)));
2221         handle = mm_calloc(1, sizeof(*handle));
2222         if (!handle) return;
2223         req = request_new(ns->base, handle, TYPE_A, "google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
2224         if (!req) return;
2225         ns->probe_request = handle;
2226         /* we force this into the inflight queue no matter what */
2227         request_trans_id_set(req, transaction_id_pick(ns->base));
2228         req->ns = ns;
2229         request_submit(req);
2230 }
2231
2232 /* returns: */
2233 /*   0 didn't try to transmit anything */
2234 /*   1 tried to transmit something */
2235 static int
2236 evdns_transmit(struct evdns_base *base) {
2237         char did_try_to_transmit = 0;
2238         int i;
2239
2240         ASSERT_LOCKED(base);
2241         for (i = 0; i < base->n_req_heads; ++i) {
2242                 if (base->req_heads[i]) {
2243                         struct request *const started_at = base->req_heads[i], *req = started_at;
2244                         /* first transmit all the requests which are currently waiting */
2245                         do {
2246                                 if (req->transmit_me) {
2247                                         did_try_to_transmit = 1;
2248                                         evdns_request_transmit(req);
2249                                 }
2250
2251                                 req = req->next;
2252                         } while (req != started_at);
2253                 }
2254         }
2255
2256         return did_try_to_transmit;
2257 }
2258
2259 /* exported function */
2260 int
2261 evdns_base_count_nameservers(struct evdns_base *base)
2262 {
2263         const struct nameserver *server;
2264         int n = 0;
2265
2266         EVDNS_LOCK(base);
2267         server = base->server_head;
2268         if (!server)
2269                 goto done;
2270         do {
2271                 ++n;
2272                 server = server->next;
2273         } while (server != base->server_head);
2274 done:
2275         EVDNS_UNLOCK(base);
2276         return n;
2277 }
2278
2279 int
2280 evdns_count_nameservers(void)
2281 {
2282         return evdns_base_count_nameservers(current_base);
2283 }
2284
2285 /* exported function */
2286 int
2287 evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
2288 {
2289         struct nameserver *server, *started_at;
2290         int i;
2291
2292         EVDNS_LOCK(base);
2293         server = base->server_head;
2294         started_at = base->server_head;
2295         if (!server) {
2296                 EVDNS_UNLOCK(base);
2297                 return 0;
2298         }
2299         while (1) {
2300                 struct nameserver *next = server->next;
2301                 (void) event_del(&server->event);
2302                 if (evtimer_initialized(&server->timeout_event))
2303                         (void) evtimer_del(&server->timeout_event);
2304                 if (server->socket >= 0)
2305                         evutil_closesocket(server->socket);
2306                 mm_free(server);
2307                 if (next == started_at)
2308                         break;
2309                 server = next;
2310         }
2311         base->server_head = NULL;
2312         base->global_good_nameservers = 0;
2313
2314         for (i = 0; i < base->n_req_heads; ++i) {
2315                 struct request *req, *req_started_at;
2316                 req = req_started_at = base->req_heads[i];
2317                 while (req) {
2318                         struct request *next = req->next;
2319                         req->tx_count = req->reissue_count = 0;
2320                         req->ns = NULL;
2321                         /* ???? What to do about searches? */
2322                         (void) evtimer_del(&req->timeout_event);
2323                         req->trans_id = 0;
2324                         req->transmit_me = 0;
2325
2326                         base->global_requests_waiting++;
2327                         evdns_request_insert(req, &base->req_waiting_head);
2328                         /* We want to insert these suspended elements at the front of
2329                          * the waiting queue, since they were pending before any of
2330                          * the waiting entries were added.  This is a circular list,
2331                          * so we can just shift the start back by one.*/
2332                         base->req_waiting_head = base->req_waiting_head->prev;
2333
2334                         if (next == req_started_at)
2335                                 break;
2336                         req = next;
2337                 }
2338                 base->req_heads[i] = NULL;
2339         }
2340
2341         base->global_requests_inflight = 0;
2342
2343         EVDNS_UNLOCK(base);
2344         return 0;
2345 }
2346
2347 int
2348 evdns_clear_nameservers_and_suspend(void)
2349 {
2350         return evdns_base_clear_nameservers_and_suspend(current_base);
2351 }
2352
2353
2354 /* exported function */
2355 int
2356 evdns_base_resume(struct evdns_base *base)
2357 {
2358         EVDNS_LOCK(base);
2359         evdns_requests_pump_waiting_queue(base);
2360         EVDNS_UNLOCK(base);
2361         return 0;
2362 }
2363
2364 int
2365 evdns_resume(void)
2366 {
2367         return evdns_base_resume(current_base);
2368 }
2369
2370 static int
2371 _evdns_nameserver_add_impl(struct evdns_base *base, const struct sockaddr *address, int addrlen) {
2372         /* first check to see if we already have this nameserver */
2373
2374         const struct nameserver *server = base->server_head, *const started_at = base->server_head;
2375         struct nameserver *ns;
2376         int err = 0;
2377         char addrbuf[128];
2378
2379         ASSERT_LOCKED(base);
2380         if (server) {
2381                 do {
2382                         if (!evutil_sockaddr_cmp((struct sockaddr*)&server->address, address, 1)) return 3;
2383                         server = server->next;
2384                 } while (server != started_at);
2385         }
2386         if (addrlen > (int)sizeof(ns->address)) {
2387                 log(EVDNS_LOG_DEBUG, "Addrlen %d too long.", (int)addrlen);
2388                 return 2;
2389         }
2390
2391         ns = (struct nameserver *) mm_malloc(sizeof(struct nameserver));
2392         if (!ns) return -1;
2393
2394         memset(ns, 0, sizeof(struct nameserver));
2395         ns->base = base;
2396
2397         evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns);
2398
2399         ns->socket = socket(address->sa_family, SOCK_DGRAM, 0);
2400         if (ns->socket < 0) { err = 1; goto out1; }
2401         evutil_make_socket_closeonexec(ns->socket);
2402         evutil_make_socket_nonblocking(ns->socket);
2403
2404         if (base->global_outgoing_addrlen &&
2405             !evutil_sockaddr_is_loopback(address)) {
2406                 if (bind(ns->socket,
2407                         (struct sockaddr*)&base->global_outgoing_address,
2408                         base->global_outgoing_addrlen) < 0) {
2409                         log(EVDNS_LOG_WARN,"Couldn't bind to outgoing address");
2410                         err = 2;
2411                         goto out2;
2412                 }
2413         }
2414
2415         memcpy(&ns->address, address, addrlen);
2416         ns->addrlen = addrlen;
2417         ns->state = 1;
2418         event_assign(&ns->event, ns->base->event_base, ns->socket, EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
2419         if (event_add(&ns->event, NULL) < 0) {
2420                 err = 2;
2421                 goto out2;
2422         }
2423
2424         log(EVDNS_LOG_DEBUG, "Added nameserver %s",
2425             evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)));
2426
2427         /* insert this nameserver into the list of them */
2428         if (!base->server_head) {
2429                 ns->next = ns->prev = ns;
2430                 base->server_head = ns;
2431         } else {
2432                 ns->next = base->server_head->next;
2433                 ns->prev = base->server_head;
2434                 base->server_head->next = ns;
2435                 if (base->server_head->prev == base->server_head) {
2436                         base->server_head->prev = ns;
2437                 }
2438         }
2439
2440         base->global_good_nameservers++;
2441
2442         return 0;
2443
2444 out2:
2445         evutil_closesocket(ns->socket);
2446 out1:
2447         event_debug_unassign(&ns->event);
2448         mm_free(ns);
2449         log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d",
2450             evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)), err);
2451         return err;
2452 }
2453
2454 /* exported function */
2455 int
2456 evdns_base_nameserver_add(struct evdns_base *base, unsigned long int address)
2457 {
2458         struct sockaddr_in sin;
2459         int res;
2460         sin.sin_addr.s_addr = address;
2461         sin.sin_port = htons(53);
2462         sin.sin_family = AF_INET;
2463         EVDNS_LOCK(base);
2464         res = _evdns_nameserver_add_impl(base, (struct sockaddr*)&sin, sizeof(sin));
2465         EVDNS_UNLOCK(base);
2466         return res;
2467 }
2468
2469 int
2470 evdns_nameserver_add(unsigned long int address) {
2471         if (!current_base)
2472                 current_base = evdns_base_new(NULL, 0);
2473         return evdns_base_nameserver_add(current_base, address);
2474 }
2475
2476 static void
2477 sockaddr_setport(struct sockaddr *sa, ev_uint16_t port)
2478 {
2479         if (sa->sa_family == AF_INET) {
2480                 ((struct sockaddr_in *)sa)->sin_port = htons(port);
2481         } else if (sa->sa_family == AF_INET6) {
2482                 ((struct sockaddr_in6 *)sa)->sin6_port = htons(port);
2483         }
2484 }
2485
2486 static ev_uint16_t
2487 sockaddr_getport(struct sockaddr *sa)
2488 {
2489         if (sa->sa_family == AF_INET) {
2490                 return ntohs(((struct sockaddr_in *)sa)->sin_port);
2491         } else if (sa->sa_family == AF_INET6) {
2492                 return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
2493         } else {
2494                 return 0;
2495         }
2496 }
2497
2498 /* exported function */
2499 int
2500 evdns_base_nameserver_ip_add(struct evdns_base *base, const char *ip_as_string) {
2501         struct sockaddr_storage ss;
2502         struct sockaddr *sa;
2503         int len = sizeof(ss);
2504         int res;
2505         if (evutil_parse_sockaddr_port(ip_as_string, (struct sockaddr *)&ss,
2506                 &len)) {
2507                 log(EVDNS_LOG_WARN, "Unable to parse nameserver address %s",
2508                         ip_as_string);
2509                 return 4;
2510         }
2511         sa = (struct sockaddr *) &ss;
2512         if (sockaddr_getport(sa) == 0)
2513                 sockaddr_setport(sa, 53);
2514
2515         EVDNS_LOCK(base);
2516         res = _evdns_nameserver_add_impl(base, sa, len);
2517         EVDNS_UNLOCK(base);
2518         return res;
2519 }
2520
2521 int
2522 evdns_nameserver_ip_add(const char *ip_as_string) {
2523         if (!current_base)
2524                 current_base = evdns_base_new(NULL, 0);
2525         return evdns_base_nameserver_ip_add(current_base, ip_as_string);
2526 }
2527
2528 int
2529 evdns_base_nameserver_sockaddr_add(struct evdns_base *base,
2530     const struct sockaddr *sa, ev_socklen_t len, unsigned flags)
2531 {
2532         int res;
2533         EVUTIL_ASSERT(base);
2534         EVDNS_LOCK(base);
2535         res = _evdns_nameserver_add_impl(base, sa, len);
2536         EVDNS_UNLOCK(base);
2537         return res;
2538 }
2539
2540 /* remove from the queue */
2541 static void
2542 evdns_request_remove(struct request *req, struct request **head)
2543 {
2544         ASSERT_LOCKED(req->base);
2545         ASSERT_VALID_REQUEST(req);
2546
2547 #if 0
2548         {
2549                 struct request *ptr;
2550                 int found = 0;
2551                 EVUTIL_ASSERT(*head != NULL);
2552
2553                 ptr = *head;
2554                 do {
2555                         if (ptr == req) {
2556                                 found = 1;
2557                                 break;
2558                         }
2559                         ptr = ptr->next;
2560                 } while (ptr != *head);
2561                 EVUTIL_ASSERT(found);
2562
2563                 EVUTIL_ASSERT(req->next);
2564         }
2565 #endif
2566
2567         if (req->next == req) {
2568                 /* only item in the list */
2569                 *head = NULL;
2570         } else {
2571                 req->next->prev = req->prev;
2572                 req->prev->next = req->next;
2573                 if (*head == req) *head = req->next;
2574         }
2575         req->next = req->prev = NULL;
2576 }
2577
2578 /* insert into the tail of the queue */
2579 static void
2580 evdns_request_insert(struct request *req, struct request **head) {
2581         ASSERT_LOCKED(req->base);
2582         ASSERT_VALID_REQUEST(req);
2583         if (!*head) {
2584                 *head = req;
2585                 req->next = req->prev = req;
2586                 return;
2587         }
2588
2589         req->prev = (*head)->prev;
2590         req->prev->next = req;
2591         req->next = *head;
2592         (*head)->prev = req;
2593 }
2594
2595 static int
2596 string_num_dots(const char *s) {
2597         int count = 0;
2598         while ((s = strchr(s, '.'))) {
2599                 s++;
2600                 count++;
2601         }
2602         return count;
2603 }
2604
2605 static struct request *
2606 request_new(struct evdns_base *base, struct evdns_request *handle, int type,
2607             const char *name, int flags, evdns_callback_type callback,
2608             void *user_ptr) {
2609
2610         const char issuing_now =
2611             (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
2612
2613         const size_t name_len = strlen(name);
2614         const size_t request_max_len = evdns_request_len(name_len);
2615         const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
2616         /* the request data is alloced in a single block with the header */
2617         struct request *const req =
2618             mm_malloc(sizeof(struct request) + request_max_len);
2619         int rlen;
2620         char namebuf[256];
2621         (void) flags;
2622
2623         ASSERT_LOCKED(base);
2624
2625         if (!req) return NULL;
2626
2627         if (name_len >= sizeof(namebuf)) {
2628                 mm_free(req);
2629                 return NULL;
2630         }
2631
2632         memset(req, 0, sizeof(struct request));
2633         req->base = base;
2634
2635         evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
2636
2637         if (base->global_randomize_case) {
2638                 unsigned i;
2639                 char randbits[(sizeof(namebuf)+7)/8];
2640                 strlcpy(namebuf, name, sizeof(namebuf));
2641                 evutil_secure_rng_get_bytes(randbits, (name_len+7)/8);
2642                 for (i = 0; i < name_len; ++i) {
2643                         if (EVUTIL_ISALPHA(namebuf[i])) {
2644                                 if ((randbits[i >> 3] & (1<<(i & 7))))
2645                                         namebuf[i] |= 0x20;
2646                                 else
2647                                         namebuf[i] &= ~0x20;
2648                         }
2649                 }
2650                 name = namebuf;
2651         }
2652
2653         /* request data lives just after the header */
2654         req->request = ((u8 *) req) + sizeof(struct request);
2655         /* denotes that the request data shouldn't be free()ed */
2656         req->request_appended = 1;
2657         rlen = evdns_request_data_build(name, name_len, trans_id,
2658             type, CLASS_INET, req->request, request_max_len);
2659         if (rlen < 0)
2660                 goto err1;
2661
2662         req->request_len = rlen;
2663         req->trans_id = trans_id;
2664         req->tx_count = 0;
2665         req->request_type = type;
2666         req->user_pointer = user_ptr;
2667         req->user_callback = callback;
2668         req->ns = issuing_now ? nameserver_pick(base) : NULL;
2669         req->next = req->prev = NULL;
2670         req->handle = handle;
2671         if (handle) {
2672                 handle->current_req = req;
2673                 handle->base = base;
2674         }
2675
2676         return req;
2677 err1:
2678         mm_free(req);
2679         return NULL;
2680 }
2681
2682 static void
2683 request_submit(struct request *const req) {
2684         struct evdns_base *base = req->base;
2685         ASSERT_LOCKED(base);
2686         ASSERT_VALID_REQUEST(req);
2687         if (req->ns) {
2688                 /* if it has a nameserver assigned then this is going */
2689                 /* straight into the inflight queue */
2690                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
2691                 base->global_requests_inflight++;
2692                 evdns_request_transmit(req);
2693         } else {
2694                 evdns_request_insert(req, &base->req_waiting_head);
2695                 base->global_requests_waiting++;
2696         }
2697 }
2698
2699 /* exported function */
2700 void
2701 evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
2702 {
2703         struct request *req;
2704
2705         if (!handle->current_req)
2706                 return;
2707
2708         if (!base) {
2709                 /* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
2710                 base = handle->base;
2711                 if (!base)
2712                         base = handle->current_req->base;
2713         }
2714
2715         EVDNS_LOCK(base);
2716         if (handle->pending_cb) {
2717                 EVDNS_UNLOCK(base);
2718                 return;
2719         }
2720
2721         req = handle->current_req;
2722         ASSERT_VALID_REQUEST(req);
2723
2724         reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
2725         if (req->ns) {
2726                 /* remove from inflight queue */
2727                 request_finished(req, &REQ_HEAD(base, req->trans_id), 1);
2728         } else {
2729                 /* remove from global_waiting head */
2730                 request_finished(req, &base->req_waiting_head, 1);
2731         }
2732         EVDNS_UNLOCK(base);
2733 }
2734
2735 /* exported function */
2736 struct evdns_request *
2737 evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
2738     evdns_callback_type callback, void *ptr) {
2739         struct evdns_request *handle;
2740         struct request *req;
2741         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2742         handle = mm_calloc(1, sizeof(*handle));
2743         if (handle == NULL)
2744                 return NULL;
2745         EVDNS_LOCK(base);
2746         if (flags & DNS_QUERY_NO_SEARCH) {
2747                 req =
2748                         request_new(base, handle, TYPE_A, name, flags,
2749                                     callback, ptr);
2750                 if (req)
2751                         request_submit(req);
2752         } else {
2753                 search_request_new(base, handle, TYPE_A, name, flags,
2754                     callback, ptr);
2755         }
2756         if (handle->current_req == NULL) {
2757                 mm_free(handle);
2758                 handle = NULL;
2759         }
2760         EVDNS_UNLOCK(base);
2761         return handle;
2762 }
2763
2764 int evdns_resolve_ipv4(const char *name, int flags,
2765                                            evdns_callback_type callback, void *ptr)
2766 {
2767         return evdns_base_resolve_ipv4(current_base, name, flags, callback, ptr)
2768                 ? 0 : -1;
2769 }
2770
2771
2772 /* exported function */
2773 struct evdns_request *
2774 evdns_base_resolve_ipv6(struct evdns_base *base,
2775     const char *name, int flags,
2776     evdns_callback_type callback, void *ptr)
2777 {
2778         struct evdns_request *handle;
2779         struct request *req;
2780         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2781         handle = mm_calloc(1, sizeof(*handle));
2782         if (handle == NULL)
2783                 return NULL;
2784         EVDNS_LOCK(base);
2785         if (flags & DNS_QUERY_NO_SEARCH) {
2786                 req = request_new(base, handle, TYPE_AAAA, name, flags,
2787                                   callback, ptr);
2788                 if (req)
2789                         request_submit(req);
2790         } else {
2791                 search_request_new(base, handle, TYPE_AAAA, name, flags,
2792                     callback, ptr);
2793         }
2794         if (handle->current_req == NULL) {
2795                 mm_free(handle);
2796                 handle = NULL;
2797         }
2798         EVDNS_UNLOCK(base);
2799         return handle;
2800 }
2801
2802 int evdns_resolve_ipv6(const char *name, int flags,
2803     evdns_callback_type callback, void *ptr) {
2804         return evdns_base_resolve_ipv6(current_base, name, flags, callback, ptr)
2805                 ? 0 : -1;
2806 }
2807
2808 struct evdns_request *
2809 evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2810         char buf[32];
2811         struct evdns_request *handle;
2812         struct request *req;
2813         u32 a;
2814         EVUTIL_ASSERT(in);
2815         a = ntohl(in->s_addr);
2816         evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
2817                         (int)(u8)((a    )&0xff),
2818                         (int)(u8)((a>>8 )&0xff),
2819                         (int)(u8)((a>>16)&0xff),
2820                         (int)(u8)((a>>24)&0xff));
2821         handle = mm_calloc(1, sizeof(*handle));
2822         if (handle == NULL)
2823                 return NULL;
2824         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2825         EVDNS_LOCK(base);
2826         req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2827         if (req)
2828                 request_submit(req);
2829         if (handle->current_req == NULL) {
2830                 mm_free(handle);
2831                 handle = NULL;
2832         }
2833         EVDNS_UNLOCK(base);
2834         return (handle);
2835 }
2836
2837 int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2838         return evdns_base_resolve_reverse(current_base, in, flags, callback, ptr)
2839                 ? 0 : -1;
2840 }
2841
2842 struct evdns_request *
2843 evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2844         /* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
2845         char buf[73];
2846         char *cp;
2847         struct evdns_request *handle;
2848         struct request *req;
2849         int i;
2850         EVUTIL_ASSERT(in);
2851         cp = buf;
2852         for (i=15; i >= 0; --i) {
2853                 u8 byte = in->s6_addr[i];
2854                 *cp++ = "0123456789abcdef"[byte & 0x0f];
2855                 *cp++ = '.';
2856                 *cp++ = "0123456789abcdef"[byte >> 4];
2857                 *cp++ = '.';
2858         }
2859         EVUTIL_ASSERT(cp + strlen("ip6.arpa") < buf+sizeof(buf));
2860         memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
2861         handle = mm_calloc(1, sizeof(*handle));
2862         if (handle == NULL)
2863                 return NULL;
2864         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2865         EVDNS_LOCK(base);
2866         req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2867         if (req)
2868                 request_submit(req);
2869         if (handle->current_req == NULL) {
2870                 mm_free(handle);
2871                 handle = NULL;
2872         }
2873         EVDNS_UNLOCK(base);
2874         return (handle);
2875 }
2876
2877 int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2878         return evdns_base_resolve_reverse_ipv6(current_base, in, flags, callback, ptr)
2879                 ? 0 : -1;
2880 }
2881
2882 /* ================================================================= */
2883 /* Search support */
2884 /* */
2885 /* the libc resolver has support for searching a number of domains */
2886 /* to find a name. If nothing else then it takes the single domain */
2887 /* from the gethostname() call. */
2888 /* */
2889 /* It can also be configured via the domain and search options in a */
2890 /* resolv.conf. */
2891 /* */
2892 /* The ndots option controls how many dots it takes for the resolver */
2893 /* to decide that a name is non-local and so try a raw lookup first. */
2894
2895 struct search_domain {
2896         int len;
2897         struct search_domain *next;
2898         /* the text string is appended to this structure */
2899 };
2900
2901 struct search_state {
2902         int refcount;
2903         int ndots;
2904         int num_domains;
2905         struct search_domain *head;
2906 };
2907
2908 static void
2909 search_state_decref(struct search_state *const state) {
2910         if (!state) return;
2911         state->refcount--;
2912         if (!state->refcount) {
2913                 struct search_domain *next, *dom;
2914                 for (dom = state->head; dom; dom = next) {
2915                         next = dom->next;
2916                         mm_free(dom);
2917                 }
2918                 mm_free(state);
2919         }
2920 }
2921
2922 static struct search_state *
2923 search_state_new(void) {
2924         struct search_state *state = (struct search_state *) mm_malloc(sizeof(struct search_state));
2925         if (!state) return NULL;
2926         memset(state, 0, sizeof(struct search_state));
2927         state->refcount = 1;
2928         state->ndots = 1;
2929
2930         return state;
2931 }
2932
2933 static void
2934 search_postfix_clear(struct evdns_base *base) {
2935         search_state_decref(base->global_search_state);
2936
2937         base->global_search_state = search_state_new();
2938 }
2939
2940 /* exported function */
2941 void
2942 evdns_base_search_clear(struct evdns_base *base)
2943 {
2944         EVDNS_LOCK(base);
2945         search_postfix_clear(base);
2946         EVDNS_UNLOCK(base);
2947 }
2948
2949 void
2950 evdns_search_clear(void) {
2951         evdns_base_search_clear(current_base);
2952 }
2953
2954 static void
2955 search_postfix_add(struct evdns_base *base, const char *domain) {
2956         size_t domain_len;
2957         struct search_domain *sdomain;
2958         while (domain[0] == '.') domain++;
2959         domain_len = strlen(domain);
2960
2961         ASSERT_LOCKED(base);
2962         if (!base->global_search_state) base->global_search_state = search_state_new();
2963         if (!base->global_search_state) return;
2964         base->global_search_state->num_domains++;
2965
2966         sdomain = (struct search_domain *) mm_malloc(sizeof(struct search_domain) + domain_len);
2967         if (!sdomain) return;
2968         memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
2969         sdomain->next = base->global_search_state->head;
2970         sdomain->len = (int) domain_len;
2971
2972         base->global_search_state->head = sdomain;
2973 }
2974
2975 /* reverse the order of members in the postfix list. This is needed because, */
2976 /* when parsing resolv.conf we push elements in the wrong order */
2977 static void
2978 search_reverse(struct evdns_base *base) {
2979         struct search_domain *cur, *prev = NULL, *next;
2980         ASSERT_LOCKED(base);
2981         cur = base->global_search_state->head;
2982         while (cur) {
2983                 next = cur->next;
2984                 cur->next = prev;
2985                 prev = cur;
2986                 cur = next;
2987         }
2988
2989         base->global_search_state->head = prev;
2990 }
2991
2992 /* exported function */
2993 void
2994 evdns_base_search_add(struct evdns_base *base, const char *domain) {
2995         EVDNS_LOCK(base);
2996         search_postfix_add(base, domain);
2997         EVDNS_UNLOCK(base);
2998 }
2999 void
3000 evdns_search_add(const char *domain) {
3001         evdns_base_search_add(current_base, domain);
3002 }
3003
3004 /* exported function */
3005 void
3006 evdns_base_search_ndots_set(struct evdns_base *base, const int ndots) {
3007         EVDNS_LOCK(base);
3008         if (!base->global_search_state) base->global_search_state = search_state_new();
3009         if (base->global_search_state)
3010                 base->global_search_state->ndots = ndots;
3011         EVDNS_UNLOCK(base);
3012 }
3013 void
3014 evdns_search_ndots_set(const int ndots) {
3015         evdns_base_search_ndots_set(current_base, ndots);
3016 }
3017
3018 static void
3019 search_set_from_hostname(struct evdns_base *base) {
3020         char hostname[HOST_NAME_MAX + 1], *domainname;
3021
3022         ASSERT_LOCKED(base);
3023         search_postfix_clear(base);
3024         if (gethostname(hostname, sizeof(hostname))) return;
3025         domainname = strchr(hostname, '.');
3026         if (!domainname) return;
3027         search_postfix_add(base, domainname);
3028 }
3029
3030 /* warning: returns malloced string */
3031 static char *
3032 search_make_new(const struct search_state *const state, int n, const char *const base_name) {
3033         const size_t base_len = strlen(base_name);
3034         const char need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
3035         struct search_domain *dom;
3036
3037         for (dom = state->head; dom; dom = dom->next) {
3038                 if (!n--) {
3039                         /* this is the postfix we want */
3040                         /* the actual postfix string is kept at the end of the structure */
3041                         const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
3042                         const int postfix_len = dom->len;
3043                         char *const newname = (char *) mm_malloc(base_len + need_to_append_dot + postfix_len + 1);
3044                         if (!newname) return NULL;
3045                         memcpy(newname, base_name, base_len);
3046                         if (need_to_append_dot) newname[base_len] = '.';
3047                         memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
3048                         newname[base_len + need_to_append_dot + postfix_len] = 0;
3049                         return newname;
3050                 }
3051         }
3052
3053         /* we ran off the end of the list and still didn't find the requested string */
3054         EVUTIL_ASSERT(0);
3055         return NULL; /* unreachable; stops warnings in some compilers. */
3056 }
3057
3058 static struct request *
3059 search_request_new(struct evdns_base *base, struct evdns_request *handle,
3060                    int type, const char *const name, int flags,
3061                    evdns_callback_type user_callback, void *user_arg) {
3062         ASSERT_LOCKED(base);
3063         EVUTIL_ASSERT(type == TYPE_A || type == TYPE_AAAA);
3064         EVUTIL_ASSERT(handle->current_req == NULL);
3065         if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
3066              base->global_search_state &&
3067                  base->global_search_state->num_domains) {
3068                 /* we have some domains to search */
3069                 struct request *req;
3070                 if (string_num_dots(name) >= base->global_search_state->ndots) {
3071                         req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3072                         if (!req) return NULL;
3073                         handle->search_index = -1;
3074                 } else {
3075                         char *const new_name = search_make_new(base->global_search_state, 0, name);
3076                         if (!new_name) return NULL;
3077                         req = request_new(base, handle, type, new_name, flags, user_callback, user_arg);
3078                         mm_free(new_name);
3079                         if (!req) return NULL;
3080                         handle->search_index = 0;
3081                 }
3082                 EVUTIL_ASSERT(handle->search_origname == NULL);
3083                 handle->search_origname = mm_strdup(name);
3084                 if (handle->search_origname == NULL) {
3085                         /* XXX Should we dealloc req? If yes, how? */
3086                         return NULL;
3087                 }
3088                 handle->search_state = base->global_search_state;
3089                 handle->search_flags = flags;
3090                 base->global_search_state->refcount++;
3091                 request_submit(req);
3092                 return req;
3093         } else {
3094                 struct request *const req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3095                 if (!req) return NULL;
3096                 request_submit(req);
3097                 return req;
3098         }
3099 }
3100
3101 /* this is called when a request has failed to find a name. We need to check */
3102 /* if it is part of a search and, if so, try the next name in the list */
3103 /* returns: */
3104 /*   0 another request has been submitted */
3105 /*   1 no more requests needed */
3106 static int
3107 search_try_next(struct evdns_request *const handle) {
3108         struct request *req = handle->current_req;
3109         struct evdns_base *base = req->base;
3110         struct request *newreq;
3111         ASSERT_LOCKED(base);
3112         if (handle->search_state) {
3113                 /* it is part of a search */
3114                 char *new_name;
3115                 handle->search_index++;
3116                 if (handle->search_index >= handle->search_state->num_domains) {
3117                         /* no more postfixes to try, however we may need to try */
3118                         /* this name without a postfix */
3119                         if (string_num_dots(handle->search_origname) < handle->search_state->ndots) {
3120                                 /* yep, we need to try it raw */
3121                                 newreq = request_new(base, NULL, req->request_type, handle->search_origname, handle->search_flags, req->user_callback, req->user_pointer);
3122                                 log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", handle->search_origname);
3123                                 if (newreq) {
3124                                         search_request_finished(handle);
3125                                         goto submit_next;
3126                                 }
3127                         }
3128                         return 1;
3129                 }
3130
3131                 new_name = search_make_new(handle->search_state, handle->search_index, handle->search_origname);
3132                 if (!new_name) return 1;
3133                 log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, handle->search_index);
3134                 newreq = request_new(base, NULL, req->request_type, new_name, handle->search_flags, req->user_callback, req->user_pointer);
3135                 mm_free(new_name);
3136                 if (!newreq) return 1;
3137                 goto submit_next;
3138         }
3139         return 1;
3140
3141 submit_next:
3142         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
3143         handle->current_req = newreq;
3144         newreq->handle = handle;
3145         request_submit(newreq);
3146         return 0;
3147 }
3148
3149 static void
3150 search_request_finished(struct evdns_request *const handle) {
3151         ASSERT_LOCKED(handle->current_req->base);
3152         if (handle->search_state) {
3153                 search_state_decref(handle->search_state);
3154                 handle->search_state = NULL;
3155         }
3156         if (handle->search_origname) {
3157                 mm_free(handle->search_origname);
3158                 handle->search_origname = NULL;
3159         }
3160 }
3161
3162 /* ================================================================= */
3163 /* Parsing resolv.conf files */
3164
3165 static void
3166 evdns_resolv_set_defaults(struct evdns_base *base, int flags) {
3167         /* if the file isn't found then we assume a local resolver */
3168         ASSERT_LOCKED(base);
3169         if (flags & DNS_OPTION_SEARCH) search_set_from_hostname(base);
3170         if (flags & DNS_OPTION_NAMESERVERS) evdns_base_nameserver_ip_add(base,"127.0.0.1");
3171 }
3172
3173 #ifndef _EVENT_HAVE_STRTOK_R
3174 static char *
3175 strtok_r(char *s, const char *delim, char **state) {
3176         char *cp, *start;
3177         start = cp = s ? s : *state;
3178         if (!cp)
3179                 return NULL;
3180         while (*cp && !strchr(delim, *cp))
3181                 ++cp;
3182         if (!*cp) {
3183                 if (cp == start)
3184                         return NULL;
3185                 *state = NULL;
3186                 return start;
3187         } else {
3188                 *cp++ = '\0';
3189                 *state = cp;
3190                 return start;
3191         }
3192 }
3193 #endif
3194
3195 /* helper version of atoi which returns -1 on error */
3196 static int
3197 strtoint(const char *const str)
3198 {
3199         char *endptr;
3200         const int r = strtol(str, &endptr, 10);
3201         if (*endptr) return -1;
3202         return r;
3203 }
3204
3205 /* Parse a number of seconds into a timeval; return -1 on error. */
3206 static int
3207 strtotimeval(const char *const str, struct timeval *out)
3208 {
3209         double d;
3210         char *endptr;
3211         d = strtod(str, &endptr);
3212         if (*endptr) return -1;
3213         if (d < 0) return -1;
3214         out->tv_sec = (int) d;
3215         out->tv_usec = (int) ((d - (int) d)*1000000);
3216         if (out->tv_sec == 0 && out->tv_usec < 1000) /* less than 1 msec */
3217                 return -1;
3218         return 0;
3219 }
3220
3221 /* helper version of atoi that returns -1 on error and clips to bounds. */
3222 static int
3223 strtoint_clipped(const char *const str, int min, int max)
3224 {
3225         int r = strtoint(str);
3226         if (r == -1)
3227                 return r;
3228         else if (r<min)
3229                 return min;
3230         else if (r>max)
3231                 return max;
3232         else
3233                 return r;
3234 }
3235
3236 static int
3237 evdns_base_set_max_requests_inflight(struct evdns_base *base, int maxinflight)
3238 {
3239         int old_n_heads = base->n_req_heads, n_heads;
3240         struct request **old_heads = base->req_heads, **new_heads, *req;
3241         int i;
3242
3243         ASSERT_LOCKED(base);
3244         if (maxinflight < 1)
3245                 maxinflight = 1;
3246         n_heads = (maxinflight+4) / 5;
3247         EVUTIL_ASSERT(n_heads > 0);
3248         new_heads = mm_calloc(n_heads, sizeof(struct request*));
3249         if (!new_heads)
3250                 return (-1);
3251         if (old_heads) {
3252                 for (i = 0; i < old_n_heads; ++i) {
3253                         while (old_heads[i]) {
3254                                 req = old_heads[i];
3255                                 evdns_request_remove(req, &old_heads[i]);
3256                                 evdns_request_insert(req, &new_heads[req->trans_id % n_heads]);
3257                         }
3258                 }
3259                 mm_free(old_heads);
3260         }
3261         base->req_heads = new_heads;
3262         base->n_req_heads = n_heads;
3263         base->global_max_requests_inflight = maxinflight;
3264         return (0);
3265 }
3266
3267 /* exported function */
3268 int
3269 evdns_base_set_option(struct evdns_base *base,
3270     const char *option, const char *val)
3271 {
3272         int res;
3273         EVDNS_LOCK(base);
3274         res = evdns_base_set_option_impl(base, option, val, DNS_OPTIONS_ALL);
3275         EVDNS_UNLOCK(base);
3276         return res;
3277 }
3278
3279 static inline int
3280 str_matches_option(const char *s1, const char *optionname)
3281 {
3282         /* Option names are given as "option:" We accept either 'option' in
3283          * s1, or 'option:randomjunk'.  The latter form is to implement the
3284          * resolv.conf parser. */
3285         size_t optlen = strlen(optionname);
3286         size_t slen = strlen(s1);
3287         if (slen == optlen || slen == optlen - 1)
3288                 return !strncmp(s1, optionname, slen);
3289         else if (slen > optlen)
3290                 return !strncmp(s1, optionname, optlen);
3291         else
3292                 return 0;
3293 }
3294
3295 static int
3296 evdns_base_set_option_impl(struct evdns_base *base,
3297     const char *option, const char *val, int flags)
3298 {
3299         ASSERT_LOCKED(base);
3300         if (str_matches_option(option, "ndots:")) {
3301                 const int ndots = strtoint(val);
3302                 if (ndots == -1) return -1;
3303                 if (!(flags & DNS_OPTION_SEARCH)) return 0;
3304                 log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
3305                 if (!base->global_search_state) base->global_search_state = search_state_new();
3306                 if (!base->global_search_state) return -1;
3307                 base->global_search_state->ndots = ndots;
3308         } else if (str_matches_option(option, "timeout:")) {
3309                 struct timeval tv;
3310                 if (strtotimeval(val, &tv) == -1) return -1;
3311                 if (!(flags & DNS_OPTION_MISC)) return 0;
3312                 log(EVDNS_LOG_DEBUG, "Setting timeout to %s", val);
3313                 memcpy(&base->global_timeout, &tv, sizeof(struct timeval));
3314         } else if (str_matches_option(option, "getaddrinfo-allow-skew:")) {
3315                 struct timeval tv;
3316                 if (strtotimeval(val, &tv) == -1) return -1;
3317                 if (!(flags & DNS_OPTION_MISC)) return 0;
3318                 log(EVDNS_LOG_DEBUG, "Setting getaddrinfo-allow-skew to %s",
3319                     val);
3320                 memcpy(&base->global_getaddrinfo_allow_skew, &tv,
3321                     sizeof(struct timeval));
3322         } else if (str_matches_option(option, "max-timeouts:")) {
3323                 const int maxtimeout = strtoint_clipped(val, 1, 255);
3324                 if (maxtimeout == -1) return -1;
3325                 if (!(flags & DNS_OPTION_MISC)) return 0;
3326                 log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
3327                         maxtimeout);
3328                 base->global_max_nameserver_timeout = maxtimeout;
3329         } else if (str_matches_option(option, "max-inflight:")) {
3330                 const int maxinflight = strtoint_clipped(val, 1, 65000);
3331                 if (maxinflight == -1) return -1;
3332                 if (!(flags & DNS_OPTION_MISC)) return 0;
3333                 log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
3334                         maxinflight);
3335                 evdns_base_set_max_requests_inflight(base, maxinflight);
3336         } else if (str_matches_option(option, "attempts:")) {
3337                 int retries = strtoint(val);
3338                 if (retries == -1) return -1;
3339                 if (retries > 255) retries = 255;
3340                 if (!(flags & DNS_OPTION_MISC)) return 0;
3341                 log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
3342                 base->global_max_retransmits = retries;
3343         } else if (str_matches_option(option, "randomize-case:")) {
3344                 int randcase = strtoint(val);
3345                 if (!(flags & DNS_OPTION_MISC)) return 0;
3346                 base->global_randomize_case = randcase;
3347         } else if (str_matches_option(option, "bind-to:")) {
3348                 /* XXX This only applies to successive nameservers, not
3349                  * to already-configured ones.  We might want to fix that. */
3350                 int len = sizeof(base->global_outgoing_address);
3351                 if (!(flags & DNS_OPTION_NAMESERVERS)) return 0;
3352                 if (evutil_parse_sockaddr_port(val,
3353                         (struct sockaddr*)&base->global_outgoing_address, &len))
3354                         return -1;
3355                 base->global_outgoing_addrlen = len;
3356         } else if (str_matches_option(option, "initial-probe-timeout:")) {
3357                 struct timeval tv;
3358                 if (strtotimeval(val, &tv) == -1) return -1;
3359                 if (tv.tv_sec > 3600)
3360                         tv.tv_sec = 3600;
3361                 if (!(flags & DNS_OPTION_MISC)) return 0;
3362                 log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
3363                     val);
3364                 memcpy(&base->global_nameserver_probe_initial_timeout, &tv,
3365                     sizeof(tv));
3366         }
3367         return 0;
3368 }
3369
3370 int
3371 evdns_set_option(const char *option, const char *val, int flags)
3372 {
3373         if (!current_base)
3374                 current_base = evdns_base_new(NULL, 0);
3375         return evdns_base_set_option(current_base, option, val);
3376 }
3377
3378 static void
3379 resolv_conf_parse_line(struct evdns_base *base, char *const start, int flags) {
3380         char *strtok_state;
3381         static const char *const delims = " \t";
3382 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
3383
3384
3385         char *const first_token = strtok_r(start, delims, &strtok_state);
3386         ASSERT_LOCKED(base);
3387         if (!first_token) return;
3388
3389         if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
3390                 const char *const nameserver = NEXT_TOKEN;
3391
3392                 if (nameserver)
3393                         evdns_base_nameserver_ip_add(base, nameserver);
3394         } else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
3395                 const char *const domain = NEXT_TOKEN;
3396                 if (domain) {
3397                         search_postfix_clear(base);
3398                         search_postfix_add(base, domain);
3399                 }
3400         } else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
3401                 const char *domain;
3402                 search_postfix_clear(base);
3403
3404                 while ((domain = NEXT_TOKEN)) {
3405                         search_postfix_add(base, domain);
3406                 }
3407                 search_reverse(base);
3408         } else if (!strcmp(first_token, "options")) {
3409                 const char *option;
3410                 while ((option = NEXT_TOKEN)) {
3411                         const char *val = strchr(option, ':');
3412                         evdns_base_set_option_impl(base, option, val ? val+1 : "", flags);
3413                 }
3414         }
3415 #undef NEXT_TOKEN
3416 }
3417
3418 /* exported function */
3419 /* returns: */
3420 /*   0 no errors */
3421 /*   1 failed to open file */
3422 /*   2 failed to stat file */
3423 /*   3 file too large */
3424 /*   4 out of memory */
3425 /*   5 short read from file */
3426 int
3427 evdns_base_resolv_conf_parse(struct evdns_base *base, int flags, const char *const filename) {
3428         int res;
3429         EVDNS_LOCK(base);
3430         res = evdns_base_resolv_conf_parse_impl(base, flags, filename);
3431         EVDNS_UNLOCK(base);
3432         return res;
3433 }
3434
3435 static char *
3436 evdns_get_default_hosts_filename(void)
3437 {
3438 #ifdef WIN32
3439         /* Windows is a little coy about where it puts its configuration
3440          * files.  Sure, they're _usually_ in C:\windows\system32, but
3441          * there's no reason in principle they couldn't be in
3442          * W:\hoboken chicken emergency\
3443          */
3444         char path[MAX_PATH+1];
3445         static const char hostfile[] = "\\drivers\\etc\\hosts";
3446         char *path_out;
3447         size_t len_out;
3448
3449         if (! SHGetSpecialFolderPathA(NULL, path, CSIDL_SYSTEM, 0))
3450                 return NULL;
3451         len_out = strlen(path)+strlen(hostfile);
3452         path_out = mm_malloc(len_out+1);
3453         evutil_snprintf(path_out, len_out, "%s%s", path, hostfile);
3454         return path_out;
3455 #else
3456         return mm_strdup("/etc/hosts");
3457 #endif
3458 }
3459
3460 static int
3461 evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename) {
3462         size_t n;
3463         char *resolv;
3464         char *start;
3465         int err = 0;
3466
3467         log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
3468
3469         if (flags & DNS_OPTION_HOSTSFILE) {
3470                 char *fname = evdns_get_default_hosts_filename();
3471                 evdns_base_load_hosts(base, fname);
3472                 if (fname)
3473                         mm_free(fname);
3474         }
3475
3476         if ((err = evutil_read_file(filename, &resolv, &n, 0)) < 0) {
3477                 if (err == -1) {
3478                         /* No file. */
3479                         evdns_resolv_set_defaults(base, flags);
3480                         return 1;
3481                 } else {
3482                         return 2;
3483                 }
3484         }
3485
3486         start = resolv;
3487         for (;;) {
3488                 char *const newline = strchr(start, '\n');
3489                 if (!newline) {
3490                         resolv_conf_parse_line(base, start, flags);
3491                         break;
3492                 } else {
3493                         *newline = 0;
3494                         resolv_conf_parse_line(base, start, flags);
3495                         start = newline + 1;
3496                 }
3497         }
3498
3499         if (!base->server_head && (flags & DNS_OPTION_NAMESERVERS)) {
3500                 /* no nameservers were configured. */
3501                 evdns_base_nameserver_ip_add(base, "127.0.0.1");
3502                 err = 6;
3503         }
3504         if (flags & DNS_OPTION_SEARCH && (!base->global_search_state || base->global_search_state->num_domains == 0)) {
3505                 search_set_from_hostname(base);
3506         }
3507
3508         mm_free(resolv);
3509         return err;
3510 }
3511
3512 int
3513 evdns_resolv_conf_parse(int flags, const char *const filename) {
3514         if (!current_base)
3515                 current_base = evdns_base_new(NULL, 0);
3516         return evdns_base_resolv_conf_parse(current_base, flags, filename);
3517 }
3518
3519
3520 #ifdef WIN32
3521 /* Add multiple nameservers from a space-or-comma-separated list. */
3522 static int
3523 evdns_nameserver_ip_add_line(struct evdns_base *base, const char *ips) {
3524         const char *addr;
3525         char *buf;
3526         int r;
3527         ASSERT_LOCKED(base);
3528         while (*ips) {
3529                 while (isspace(*ips) || *ips == ',' || *ips == '\t')
3530                         ++ips;
3531                 addr = ips;
3532                 while (isdigit(*ips) || *ips == '.' || *ips == ':' ||
3533                     *ips=='[' || *ips==']')
3534                         ++ips;
3535                 buf = mm_malloc(ips-addr+1);
3536                 if (!buf) return 4;
3537                 memcpy(buf, addr, ips-addr);
3538                 buf[ips-addr] = '\0';
3539                 r = evdns_base_nameserver_ip_add(base, buf);
3540                 mm_free(buf);
3541                 if (r) return r;
3542         }
3543         return 0;
3544 }
3545
3546 typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
3547
3548 /* Use the windows GetNetworkParams interface in iphlpapi.dll to */
3549 /* figure out what our nameservers are. */
3550 static int
3551 load_nameservers_with_getnetworkparams(struct evdns_base *base)
3552 {
3553         /* Based on MSDN examples and inspection of  c-ares code. */
3554         FIXED_INFO *fixed;
3555         HMODULE handle = 0;
3556         ULONG size = sizeof(FIXED_INFO);
3557         void *buf = NULL;
3558         int status = 0, r, added_any;
3559         IP_ADDR_STRING *ns;
3560         GetNetworkParams_fn_t fn;
3561
3562         ASSERT_LOCKED(base);
3563         if (!(handle = evutil_load_windows_system_library(
3564                         TEXT("iphlpapi.dll")))) {
3565                 log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
3566                 status = -1;
3567                 goto done;
3568         }
3569         if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
3570                 log(EVDNS_LOG_WARN, "Could not get address of function.");
3571                 status = -1;
3572                 goto done;
3573         }
3574
3575         buf = mm_malloc(size);
3576         if (!buf) { status = 4; goto done; }
3577         fixed = buf;
3578         r = fn(fixed, &size);
3579         if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
3580                 status = -1;
3581                 goto done;
3582         }
3583         if (r != ERROR_SUCCESS) {
3584                 mm_free(buf);
3585                 buf = mm_malloc(size);
3586                 if (!buf) { status = 4; goto done; }
3587                 fixed = buf;
3588                 r = fn(fixed, &size);
3589                 if (r != ERROR_SUCCESS) {
3590                         log(EVDNS_LOG_DEBUG, "fn() failed.");
3591                         status = -1;
3592                         goto done;
3593                 }
3594         }
3595
3596         EVUTIL_ASSERT(fixed);
3597         added_any = 0;
3598         ns = &(fixed->DnsServerList);
3599         while (ns) {
3600                 r = evdns_nameserver_ip_add_line(base, ns->IpAddress.String);
3601                 if (r) {
3602                         log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
3603                                 (ns->IpAddress.String),(int)GetLastError());
3604                         status = r;
3605                 } else {
3606                         ++added_any;
3607                         log(EVDNS_LOG_DEBUG,"Successfully added %s as nameserver",ns->IpAddress.String);
3608                 }
3609
3610                 ns = ns->Next;
3611         }
3612
3613         if (!added_any) {
3614                 log(EVDNS_LOG_DEBUG, "No nameservers added.");
3615                 if (status == 0)
3616                         status = -1;
3617         } else {
3618                 status = 0;
3619         }
3620
3621  done:
3622         if (buf)
3623                 mm_free(buf);
3624         if (handle)
3625                 FreeLibrary(handle);
3626         return status;
3627 }
3628
3629 static int
3630 config_nameserver_from_reg_key(struct evdns_base *base, HKEY key, const TCHAR *subkey)
3631 {
3632         char *buf;
3633         DWORD bufsz = 0, type = 0;
3634         int status = 0;
3635
3636         ASSERT_LOCKED(base);
3637         if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
3638             != ERROR_MORE_DATA)
3639                 return -1;
3640         if (!(buf = mm_malloc(bufsz)))
3641                 return -1;
3642
3643         if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
3644             == ERROR_SUCCESS && bufsz > 1) {
3645                 status = evdns_nameserver_ip_add_line(base,buf);
3646         }
3647
3648         mm_free(buf);
3649         return status;
3650 }
3651
3652 #define SERVICES_KEY TEXT("System\\CurrentControlSet\\Services\\")
3653 #define WIN_NS_9X_KEY  SERVICES_KEY TEXT("VxD\\MSTCP")
3654 #define WIN_NS_NT_KEY  SERVICES_KEY TEXT("Tcpip\\Parameters")
3655
3656 static int
3657 load_nameservers_from_registry(struct evdns_base *base)
3658 {
3659         int found = 0;
3660         int r;
3661 #define TRY(k, name) \
3662         if (!found && config_nameserver_from_reg_key(base,k,TEXT(name)) == 0) { \
3663                 log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
3664                 found = 1;                                              \
3665         } else if (!found) {                                            \
3666                 log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
3667                     #k,#name);                                          \
3668         }
3669
3670         ASSERT_LOCKED(base);
3671
3672         if (((int)GetVersion()) > 0) { /* NT */
3673                 HKEY nt_key = 0, interfaces_key = 0;
3674
3675                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
3676                                  KEY_READ, &nt_key) != ERROR_SUCCESS) {
3677                         log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
3678                         return -1;
3679                 }
3680                 r = RegOpenKeyEx(nt_key, TEXT("Interfaces"), 0,
3681                              KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
3682                              &interfaces_key);
3683                 if (r != ERROR_SUCCESS) {
3684                         log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
3685                         return -1;
3686                 }
3687                 TRY(nt_key, "NameServer");
3688                 TRY(nt_key, "DhcpNameServer");
3689                 TRY(interfaces_key, "NameServer");
3690                 TRY(interfaces_key, "DhcpNameServer");
3691                 RegCloseKey(interfaces_key);
3692                 RegCloseKey(nt_key);
3693         } else {
3694                 HKEY win_key = 0;
3695                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
3696                                  KEY_READ, &win_key) != ERROR_SUCCESS) {
3697                         log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
3698                         return -1;
3699                 }
3700                 TRY(win_key, "NameServer");
3701                 RegCloseKey(win_key);
3702         }
3703
3704         if (found == 0) {
3705                 log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
3706         }
3707
3708         return found ? 0 : -1;
3709 #undef TRY
3710 }
3711
3712 int
3713 evdns_base_config_windows_nameservers(struct evdns_base *base)
3714 {
3715         int r;
3716         char *fname;
3717         if (base == NULL)
3718                 base = current_base;
3719         if (base == NULL)
3720                 return -1;
3721         EVDNS_LOCK(base);
3722         if (load_nameservers_with_getnetworkparams(base) == 0) {
3723                 EVDNS_UNLOCK(base);
3724                 return 0;
3725         }
3726         r = load_nameservers_from_registry(base);
3727
3728         fname = evdns_get_default_hosts_filename();
3729         evdns_base_load_hosts(base, fname);
3730         if (fname)
3731                 mm_free(fname);
3732
3733         EVDNS_UNLOCK(base);
3734         return r;
3735 }
3736
3737 int
3738 evdns_config_windows_nameservers(void)
3739 {
3740         if (!current_base) {
3741                 current_base = evdns_base_new(NULL, 1);
3742                 return current_base == NULL ? -1 : 0;
3743         } else {
3744                 return evdns_base_config_windows_nameservers(current_base);
3745         }
3746 }
3747 #endif
3748
3749 struct evdns_base *
3750 evdns_base_new(struct event_base *event_base, int initialize_nameservers)
3751 {
3752         struct evdns_base *base;
3753
3754         if (evutil_secure_rng_init() < 0) {
3755                 log(EVDNS_LOG_WARN, "Unable to seed random number generator; "
3756                     "DNS can't run.");
3757                 return NULL;
3758         }
3759
3760         /* Give the evutil library a hook into its evdns-enabled
3761          * functionality.  We can't just call evdns_getaddrinfo directly or
3762          * else libevent-core will depend on libevent-extras. */
3763         evutil_set_evdns_getaddrinfo_fn(evdns_getaddrinfo);
3764
3765         base = mm_malloc(sizeof(struct evdns_base));
3766         if (base == NULL)
3767                 return (NULL);
3768         memset(base, 0, sizeof(struct evdns_base));
3769         base->req_waiting_head = NULL;
3770
3771         EVTHREAD_ALLOC_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3772         EVDNS_LOCK(base);
3773
3774         /* Set max requests inflight and allocate req_heads. */
3775         base->req_heads = NULL;
3776
3777         evdns_base_set_max_requests_inflight(base, 64);
3778
3779         base->server_head = NULL;
3780         base->event_base = event_base;
3781         base->global_good_nameservers = base->global_requests_inflight =
3782                 base->global_requests_waiting = 0;
3783
3784         base->global_timeout.tv_sec = 5;
3785         base->global_timeout.tv_usec = 0;
3786         base->global_max_reissues = 1;
3787         base->global_max_retransmits = 3;
3788         base->global_max_nameserver_timeout = 3;
3789         base->global_search_state = NULL;
3790         base->global_randomize_case = 1;
3791         base->global_getaddrinfo_allow_skew.tv_sec = 3;
3792         base->global_getaddrinfo_allow_skew.tv_usec = 0;
3793         base->global_nameserver_probe_initial_timeout.tv_sec = 10;
3794         base->global_nameserver_probe_initial_timeout.tv_usec = 0;
3795
3796         TAILQ_INIT(&base->hostsdb);
3797
3798         if (initialize_nameservers) {
3799                 int r;
3800 #ifdef WIN32
3801                 r = evdns_base_config_windows_nameservers(base);
3802 #else
3803                 r = evdns_base_resolv_conf_parse(base, DNS_OPTIONS_ALL, "/etc/resolv.conf");
3804 #endif
3805                 if (r == -1) {
3806                         evdns_base_free_and_unlock(base, 0);
3807                         return NULL;
3808                 }
3809         }
3810         EVDNS_UNLOCK(base);
3811         return base;
3812 }
3813
3814 int
3815 evdns_init(void)
3816 {
3817         struct evdns_base *base = evdns_base_new(NULL, 1);
3818         if (base) {
3819                 current_base = base;
3820                 return 0;
3821         } else {
3822                 return -1;
3823         }
3824 }
3825
3826 const char *
3827 evdns_err_to_string(int err)
3828 {
3829     switch (err) {
3830         case DNS_ERR_NONE: return "no error";
3831         case DNS_ERR_FORMAT: return "misformatted query";
3832         case DNS_ERR_SERVERFAILED: return "server failed";
3833         case DNS_ERR_NOTEXIST: return "name does not exist";
3834         case DNS_ERR_NOTIMPL: return "query not implemented";
3835         case DNS_ERR_REFUSED: return "refused";
3836
3837         case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
3838         case DNS_ERR_UNKNOWN: return "unknown";
3839         case DNS_ERR_TIMEOUT: return "request timed out";
3840         case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
3841         case DNS_ERR_CANCEL: return "dns request canceled";
3842         default: return "[Unknown error code]";
3843     }
3844 }
3845
3846 static void
3847 evdns_nameserver_free(struct nameserver *server)
3848 {
3849         if (server->socket >= 0)
3850         evutil_closesocket(server->socket);
3851         (void) event_del(&server->event);
3852         event_debug_unassign(&server->event);
3853         if (server->state == 0)
3854                 (void) event_del(&server->timeout_event);
3855         event_debug_unassign(&server->timeout_event);
3856         mm_free(server);
3857 }
3858
3859 static void
3860 evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests)
3861 {
3862         struct nameserver *server, *server_next;
3863         struct search_domain *dom, *dom_next;
3864         int i;
3865
3866         /* Requires that we hold the lock. */
3867
3868         /* TODO(nickm) we might need to refcount here. */
3869
3870         for (i = 0; i < base->n_req_heads; ++i) {
3871                 while (base->req_heads[i]) {
3872                         if (fail_requests)
3873                                 reply_schedule_callback(base->req_heads[i], 0, DNS_ERR_SHUTDOWN, NULL);
3874                         request_finished(base->req_heads[i], &REQ_HEAD(base, base->req_heads[i]->trans_id), 1);
3875                 }
3876         }
3877         while (base->req_waiting_head) {
3878                 if (fail_requests)
3879                         reply_schedule_callback(base->req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
3880                 request_finished(base->req_waiting_head, &base->req_waiting_head, 1);
3881         }
3882         base->global_requests_inflight = base->global_requests_waiting = 0;
3883
3884         for (server = base->server_head; server; server = server_next) {
3885                 server_next = server->next;
3886                 evdns_nameserver_free(server);
3887                 if (server_next == base->server_head)
3888                         break;
3889         }
3890         base->server_head = NULL;
3891         base->global_good_nameservers = 0;
3892
3893         if (base->global_search_state) {
3894                 for (dom = base->global_search_state->head; dom; dom = dom_next) {
3895                         dom_next = dom->next;
3896                         mm_free(dom);
3897                 }
3898                 mm_free(base->global_search_state);
3899                 base->global_search_state = NULL;
3900         }
3901
3902         {
3903                 struct hosts_entry *victim;
3904                 while ((victim = TAILQ_FIRST(&base->hostsdb))) {
3905                         TAILQ_REMOVE(&base->hostsdb, victim, next);
3906                         mm_free(victim);
3907                 }
3908         }
3909
3910         mm_free(base->req_heads);
3911
3912         EVDNS_UNLOCK(base);
3913         EVTHREAD_FREE_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3914
3915         mm_free(base);
3916 }
3917
3918 void
3919 evdns_base_free(struct evdns_base *base, int fail_requests)
3920 {
3921         EVDNS_LOCK(base);
3922         evdns_base_free_and_unlock(base, fail_requests);
3923 }
3924
3925 void
3926 evdns_shutdown(int fail_requests)
3927 {
3928         if (current_base) {
3929                 struct evdns_base *b = current_base;
3930                 current_base = NULL;
3931                 evdns_base_free(b, fail_requests);
3932         }
3933         evdns_log_fn = NULL;
3934 }
3935
3936 static int
3937 evdns_base_parse_hosts_line(struct evdns_base *base, char *line)
3938 {
3939         char *strtok_state;
3940         static const char *const delims = " \t";
3941         char *const addr = strtok_r(line, delims, &strtok_state);
3942         char *hostname, *hash;
3943         struct sockaddr_storage ss;
3944         int socklen = sizeof(ss);
3945         ASSERT_LOCKED(base);
3946
3947 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
3948
3949         if (!addr || *addr == '#')
3950                 return 0;
3951
3952         memset(&ss, 0, sizeof(ss));
3953         if (evutil_parse_sockaddr_port(addr, (struct sockaddr*)&ss, &socklen)<0)
3954                 return -1;
3955         if (socklen > (int)sizeof(struct sockaddr_in6))
3956                 return -1;
3957
3958         if (sockaddr_getport((struct sockaddr*)&ss))
3959                 return -1;
3960
3961         while ((hostname = NEXT_TOKEN)) {
3962                 struct hosts_entry *he;
3963                 size_t namelen;
3964                 if ((hash = strchr(hostname, '#'))) {
3965                         if (hash == hostname)
3966                                 return 0;
3967                         *hash = '\0';
3968                 }
3969
3970                 namelen = strlen(hostname);
3971
3972                 he = mm_calloc(1, sizeof(struct hosts_entry)+namelen);
3973                 if (!he)
3974                         return -1;
3975                 EVUTIL_ASSERT(socklen <= (int)sizeof(he->addr));
3976                 memcpy(&he->addr, &ss, socklen);
3977                 memcpy(he->hostname, hostname, namelen+1);
3978                 he->addrlen = socklen;
3979
3980                 TAILQ_INSERT_TAIL(&base->hostsdb, he, next);
3981
3982                 if (hash)
3983                         return 0;
3984         }
3985
3986         return 0;
3987 #undef NEXT_TOKEN
3988 }
3989
3990 static int
3991 evdns_base_load_hosts_impl(struct evdns_base *base, const char *hosts_fname)
3992 {
3993         char *str=NULL, *cp, *eol;
3994         size_t len;
3995         int err=0;
3996
3997         ASSERT_LOCKED(base);
3998
3999         if (hosts_fname == NULL ||
4000             (err = evutil_read_file(hosts_fname, &str, &len, 0)) < 0) {
4001                 char tmp[64];
4002                 strlcpy(tmp, "127.0.0.1   localhost", sizeof(tmp));
4003                 evdns_base_parse_hosts_line(base, tmp);
4004                 strlcpy(tmp, "::1   localhost", sizeof(tmp));
4005                 evdns_base_parse_hosts_line(base, tmp);
4006                 return err ? -1 : 0;
4007         }
4008
4009         /* This will break early if there is a NUL in the hosts file.
4010          * Probably not a problem.*/
4011         cp = str;
4012         for (;;) {
4013                 eol = strchr(cp, '\n');
4014
4015                 if (eol) {
4016                         *eol = '\0';
4017                         evdns_base_parse_hosts_line(base, cp);
4018                         cp = eol+1;
4019                 } else {
4020                         evdns_base_parse_hosts_line(base, cp);
4021                         break;
4022                 }
4023         }
4024
4025         mm_free(str);
4026         return 0;
4027 }
4028
4029 int
4030 evdns_base_load_hosts(struct evdns_base *base, const char *hosts_fname)
4031 {
4032         int res;
4033         if (!base)
4034                 base = current_base;
4035         EVDNS_LOCK(base);
4036         res = evdns_base_load_hosts_impl(base, hosts_fname);
4037         EVDNS_UNLOCK(base);
4038         return res;
4039 }
4040
4041 /* A single request for a getaddrinfo, either v4 or v6. */
4042 struct getaddrinfo_subrequest {
4043         struct evdns_request *r;
4044         ev_uint32_t type;
4045 };
4046
4047 /* State data used to implement an in-progress getaddrinfo. */
4048 struct evdns_getaddrinfo_request {
4049         struct evdns_base *evdns_base;
4050         /* Copy of the modified 'hints' data that we'll use to build
4051          * answers. */
4052         struct evutil_addrinfo hints;
4053         /* The callback to invoke when we're done */
4054         evdns_getaddrinfo_cb user_cb;
4055         /* User-supplied data to give to the callback. */
4056         void *user_data;
4057         /* The port to use when building sockaddrs. */
4058         ev_uint16_t port;
4059         /* The sub_request for an A record (if any) */
4060         struct getaddrinfo_subrequest ipv4_request;
4061         /* The sub_request for an AAAA record (if any) */
4062         struct getaddrinfo_subrequest ipv6_request;
4063
4064         /* The cname result that we were told (if any) */
4065         char *cname_result;
4066
4067         /* If we have one request answered and one request still inflight,
4068          * then this field holds the answer from the first request... */
4069         struct evutil_addrinfo *pending_result;
4070         /* And this event is a timeout that will tell us to cancel the second
4071          * request if it's taking a long time. */
4072         struct event timeout;
4073
4074         /* And this field holds the error code from the first request... */
4075         int pending_error;
4076         /* If this is set, the user canceled this request. */
4077         unsigned user_canceled : 1;
4078         /* If this is set, the user can no longer cancel this request; we're
4079          * just waiting for the free. */
4080         unsigned request_done : 1;
4081 };
4082
4083 /* Convert an evdns errors to the equivalent getaddrinfo error. */
4084 static int
4085 evdns_err_to_getaddrinfo_err(int e1)
4086 {
4087         /* XXX Do this better! */
4088         if (e1 == DNS_ERR_NONE)
4089                 return 0;
4090         else if (e1 == DNS_ERR_NOTEXIST)
4091                 return EVUTIL_EAI_NONAME;
4092         else
4093                 return EVUTIL_EAI_FAIL;
4094 }
4095
4096 /* Return the more informative of two getaddrinfo errors. */
4097 static int
4098 getaddrinfo_merge_err(int e1, int e2)
4099 {
4100         /* XXXX be cleverer here. */
4101         if (e1 == 0)
4102                 return e2;
4103         else
4104                 return e1;
4105 }
4106
4107 static void
4108 free_getaddrinfo_request(struct evdns_getaddrinfo_request *data)
4109 {
4110         /* DO NOT CALL this if either of the requests is pending.  Only once
4111          * both callbacks have been invoked is it safe to free the request */
4112         if (data->pending_result)
4113                 evutil_freeaddrinfo(data->pending_result);
4114         if (data->cname_result)
4115                 mm_free(data->cname_result);
4116         event_del(&data->timeout);
4117         mm_free(data);
4118         return;
4119 }
4120
4121 static void
4122 add_cname_to_reply(struct evdns_getaddrinfo_request *data,
4123     struct evutil_addrinfo *ai)
4124 {
4125         if (data->cname_result && ai) {
4126                 ai->ai_canonname = data->cname_result;
4127                 data->cname_result = NULL;
4128         }
4129 }
4130
4131 /* Callback: invoked when one request in a mixed-format A/AAAA getaddrinfo
4132  * request has finished, but the other one took too long to answer. Pass
4133  * along the answer we got, and cancel the other request.
4134  */
4135 static void
4136 evdns_getaddrinfo_timeout_cb(evutil_socket_t fd, short what, void *ptr)
4137 {
4138         int v4_timedout = 0, v6_timedout = 0;
4139         struct evdns_getaddrinfo_request *data = ptr;
4140
4141         /* Cancel any pending requests, and note which one */
4142         if (data->ipv4_request.r) {
4143                 evdns_cancel_request(NULL, data->ipv4_request.r);
4144                 v4_timedout = 1;
4145                 EVDNS_LOCK(data->evdns_base);
4146                 ++data->evdns_base->getaddrinfo_ipv4_timeouts;
4147                 EVDNS_UNLOCK(data->evdns_base);
4148         }
4149         if (data->ipv6_request.r) {
4150                 evdns_cancel_request(NULL, data->ipv6_request.r);
4151                 v6_timedout = 1;
4152                 EVDNS_LOCK(data->evdns_base);
4153                 ++data->evdns_base->getaddrinfo_ipv6_timeouts;
4154                 EVDNS_UNLOCK(data->evdns_base);
4155         }
4156
4157         /* We only use this timeout callback when we have an answer for
4158          * one address. */
4159         EVUTIL_ASSERT(!v4_timedout || !v6_timedout);
4160
4161         /* Report the outcome of the other request that didn't time out. */
4162         if (data->pending_result) {
4163                 add_cname_to_reply(data, data->pending_result);
4164                 data->user_cb(0, data->pending_result, data->user_data);
4165                 data->pending_result = NULL;
4166         } else {
4167                 int e = data->pending_error;
4168                 if (!e)
4169                         e = EVUTIL_EAI_AGAIN;
4170                 data->user_cb(e, NULL, data->user_data);
4171         }
4172
4173         if (!v4_timedout && !v6_timedout) {
4174                 /* should be impossible? XXXX */
4175                 free_getaddrinfo_request(data);
4176         }
4177 }
4178
4179 static int
4180 evdns_getaddrinfo_set_timeout(struct evdns_base *evdns_base,
4181     struct evdns_getaddrinfo_request *data)
4182 {
4183         return event_add(&data->timeout, &evdns_base->global_getaddrinfo_allow_skew);
4184 }
4185
4186 static inline int
4187 evdns_result_is_answer(int result)
4188 {
4189         return (result != DNS_ERR_NOTIMPL && result != DNS_ERR_REFUSED &&
4190             result != DNS_ERR_SERVERFAILED && result != DNS_ERR_CANCEL);
4191 }
4192
4193 static void
4194 evdns_getaddrinfo_gotresolve(int result, char type, int count,
4195     int ttl, void *addresses, void *arg)
4196 {
4197         int i;
4198         struct getaddrinfo_subrequest *req = arg;
4199         struct getaddrinfo_subrequest *other_req;
4200         struct evdns_getaddrinfo_request *data;
4201
4202         struct evutil_addrinfo *res;
4203
4204         struct sockaddr_in sin;
4205         struct sockaddr_in6 sin6;
4206         struct sockaddr *sa;
4207         int socklen, addrlen;
4208         void *addrp;
4209         int err;
4210         int user_canceled;
4211
4212         EVUTIL_ASSERT(req->type == DNS_IPv4_A || req->type == DNS_IPv6_AAAA);
4213         if (req->type == DNS_IPv4_A) {
4214                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv4_request);
4215                 other_req = &data->ipv6_request;
4216         } else {
4217                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv6_request);
4218                 other_req = &data->ipv4_request;
4219         }
4220
4221         EVDNS_LOCK(data->evdns_base);
4222         if (evdns_result_is_answer(result)) {
4223                 if (req->type == DNS_IPv4_A)
4224                         ++data->evdns_base->getaddrinfo_ipv4_answered;
4225                 else
4226                         ++data->evdns_base->getaddrinfo_ipv6_answered;
4227         }
4228         user_canceled = data->user_canceled;
4229         if (other_req->r == NULL)
4230                 data->request_done = 1;
4231         EVDNS_UNLOCK(data->evdns_base);
4232
4233         req->r = NULL;
4234
4235         if (result == DNS_ERR_CANCEL && ! user_canceled) {
4236                 /* Internal cancel request from timeout or internal error.
4237                  * we already answered the user. */
4238                 if (other_req->r == NULL)
4239                         free_getaddrinfo_request(data);
4240                 return;
4241         }
4242
4243         if (result == DNS_ERR_NONE) {
4244                 if (count == 0)
4245                         err = EVUTIL_EAI_NODATA;
4246                 else
4247                         err = 0;
4248         } else {
4249                 err = evdns_err_to_getaddrinfo_err(result);
4250         }
4251
4252         if (err) {
4253                 /* Looks like we got an error. */
4254                 if (other_req->r) {
4255                         /* The other request is still working; maybe it will
4256                          * succeed. */
4257                         /* XXXX handle failure from set_timeout */
4258                         evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4259                         data->pending_error = err;
4260                         return;
4261                 }
4262
4263                 if (user_canceled) {
4264                         data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4265                 } else if (data->pending_result) {
4266                         /* If we have an answer waiting, and we weren't
4267                          * canceled, ignore this error. */
4268                         add_cname_to_reply(data, data->pending_result);
4269                         data->user_cb(0, data->pending_result, data->user_data);
4270                         data->pending_result = NULL;
4271                 } else {
4272                         if (data->pending_error)
4273                                 err = getaddrinfo_merge_err(err,
4274                                     data->pending_error);
4275                         data->user_cb(err, NULL, data->user_data);
4276                 }
4277                 free_getaddrinfo_request(data);
4278                 return;
4279         } else if (user_canceled) {
4280                 if (other_req->r) {
4281                         /* The other request is still working; let it hit this
4282                          * callback with EVUTIL_EAI_CANCEL callback and report
4283                          * the failure. */
4284                         return;
4285                 }
4286                 data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4287                 free_getaddrinfo_request(data);
4288                 return;
4289         }
4290
4291         /* Looks like we got some answers. We should turn them into addrinfos
4292          * and then either queue those or return them all. */
4293         EVUTIL_ASSERT(type == DNS_IPv4_A || type == DNS_IPv6_AAAA);
4294
4295         if (type == DNS_IPv4_A) {
4296                 memset(&sin, 0, sizeof(sin));
4297                 sin.sin_family = AF_INET;
4298                 sin.sin_port = htons(data->port);
4299
4300                 sa = (struct sockaddr *)&sin;
4301                 socklen = sizeof(sin);
4302                 addrlen = 4;
4303                 addrp = &sin.sin_addr.s_addr;
4304         } else {
4305                 memset(&sin6, 0, sizeof(sin6));
4306                 sin6.sin6_family = AF_INET6;
4307                 sin6.sin6_port = htons(data->port);
4308
4309                 sa = (struct sockaddr *)&sin6;
4310                 socklen = sizeof(sin6);
4311                 addrlen = 16;
4312                 addrp = &sin6.sin6_addr.s6_addr;
4313         }
4314
4315         res = NULL;
4316         for (i=0; i < count; ++i) {
4317                 struct evutil_addrinfo *ai;
4318                 memcpy(addrp, ((char*)addresses)+i*addrlen, addrlen);
4319                 ai = evutil_new_addrinfo(sa, socklen, &data->hints);
4320                 if (!ai) {
4321                         if (other_req->r) {
4322                                 evdns_cancel_request(NULL, other_req->r);
4323                         }
4324                         data->user_cb(EVUTIL_EAI_MEMORY, NULL, data->user_data);
4325                         if (res)
4326                                 evutil_freeaddrinfo(res);
4327
4328                         if (other_req->r == NULL)
4329                                 free_getaddrinfo_request(data);
4330                         return;
4331                 }
4332                 res = evutil_addrinfo_append(res, ai);
4333         }
4334
4335         if (other_req->r) {
4336                 /* The other request is still in progress; wait for it */
4337                 /* XXXX handle failure from set_timeout */
4338                 evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4339                 data->pending_result = res;
4340                 return;
4341         } else {
4342                 /* The other request is done or never started; append its
4343                  * results (if any) and return them. */
4344                 if (data->pending_result) {
4345                         if (req->type == DNS_IPv4_A)
4346                                 res = evutil_addrinfo_append(res,
4347                                     data->pending_result);
4348                         else
4349                                 res = evutil_addrinfo_append(
4350                                     data->pending_result, res);
4351                         data->pending_result = NULL;
4352                 }
4353
4354                 /* Call the user callback. */
4355                 add_cname_to_reply(data, res);
4356                 data->user_cb(0, res, data->user_data);
4357
4358                 /* Free data. */
4359                 free_getaddrinfo_request(data);
4360         }
4361 }
4362
4363 static struct hosts_entry *
4364 find_hosts_entry(struct evdns_base *base, const char *hostname,
4365     struct hosts_entry *find_after)
4366 {
4367         struct hosts_entry *e;
4368
4369         if (find_after)
4370                 e = TAILQ_NEXT(find_after, next);
4371         else
4372                 e = TAILQ_FIRST(&base->hostsdb);
4373
4374         for (; e; e = TAILQ_NEXT(e, next)) {
4375                 if (!evutil_ascii_strcasecmp(e->hostname, hostname))
4376                         return e;
4377         }
4378         return NULL;
4379 }
4380
4381 static int
4382 evdns_getaddrinfo_fromhosts(struct evdns_base *base,
4383     const char *nodename, struct evutil_addrinfo *hints, ev_uint16_t port,
4384     struct evutil_addrinfo **res)
4385 {
4386         int n_found = 0;
4387         struct hosts_entry *e;
4388         struct evutil_addrinfo *ai=NULL;
4389         int f = hints->ai_family;
4390
4391         EVDNS_LOCK(base);
4392         for (e = find_hosts_entry(base, nodename, NULL); e;
4393             e = find_hosts_entry(base, nodename, e)) {
4394                 struct evutil_addrinfo *ai_new;
4395                 ++n_found;
4396                 if ((e->addr.sa.sa_family == AF_INET && f == PF_INET6) ||
4397                     (e->addr.sa.sa_family == AF_INET6 && f == PF_INET))
4398                         continue;
4399                 ai_new = evutil_new_addrinfo(&e->addr.sa, e->addrlen, hints);
4400                 if (!ai_new) {
4401                         n_found = 0;
4402                         goto out;
4403                 }
4404                 sockaddr_setport(ai_new->ai_addr, port);
4405                 ai = evutil_addrinfo_append(ai, ai_new);
4406         }
4407         EVDNS_UNLOCK(base);
4408 out:
4409         if (n_found) {
4410                 /* Note that we return an empty answer if we found entries for
4411                  * this hostname but none were of the right address type. */
4412                 *res = ai;
4413                 return 0;
4414         } else {
4415                 if (ai)
4416                         evutil_freeaddrinfo(ai);
4417                 return -1;
4418         }
4419 }
4420
4421 struct evdns_getaddrinfo_request *
4422 evdns_getaddrinfo(struct evdns_base *dns_base,
4423     const char *nodename, const char *servname,
4424     const struct evutil_addrinfo *hints_in,
4425     evdns_getaddrinfo_cb cb, void *arg)
4426 {
4427         struct evdns_getaddrinfo_request *data;
4428         struct evutil_addrinfo hints;
4429         struct evutil_addrinfo *res = NULL;
4430         int err;
4431         int port = 0;
4432         int want_cname = 0;
4433
4434         if (!dns_base) {
4435                 dns_base = current_base;
4436                 if (!dns_base) {
4437                         log(EVDNS_LOG_WARN,
4438                             "Call to getaddrinfo_async with no "
4439                             "evdns_base configured.");
4440                         cb(EVUTIL_EAI_FAIL, NULL, arg); /* ??? better error? */
4441                         return NULL;
4442                 }
4443         }
4444
4445         /* If we _must_ answer this immediately, do so. */
4446         if ((hints_in && (hints_in->ai_flags & EVUTIL_AI_NUMERICHOST))) {
4447                 res = NULL;
4448                 err = evutil_getaddrinfo(nodename, servname, hints_in, &res);
4449                 cb(err, res, arg);
4450                 return NULL;
4451         }
4452
4453         if (hints_in) {
4454                 memcpy(&hints, hints_in, sizeof(hints));
4455         } else {
4456                 memset(&hints, 0, sizeof(hints));
4457                 hints.ai_family = PF_UNSPEC;
4458         }
4459
4460         evutil_adjust_hints_for_addrconfig(&hints);
4461
4462         /* Now try to see if we _can_ answer immediately. */
4463         /* (It would be nice to do this by calling getaddrinfo directly, with
4464          * AI_NUMERICHOST, on plaforms that have it, but we can't: there isn't
4465          * a reliable way to distinguish the "that wasn't a numeric host!" case
4466          * from any other EAI_NONAME cases.) */
4467         err = evutil_getaddrinfo_common(nodename, servname, &hints, &res, &port);
4468         if (err != EVUTIL_EAI_NEED_RESOLVE) {
4469                 cb(err, res, arg);
4470                 return NULL;
4471         }
4472
4473         /* If there is an entry in the hosts file, we should give it now. */
4474         if (!evdns_getaddrinfo_fromhosts(dns_base, nodename, &hints, port, &res)) {
4475                 cb(0, res, arg);
4476                 return NULL;
4477         }
4478
4479         /* Okay, things are serious now. We're going to need to actually
4480          * launch a request.
4481          */
4482         data = mm_calloc(1,sizeof(struct evdns_getaddrinfo_request));
4483         if (!data) {
4484                 cb(EVUTIL_EAI_MEMORY, NULL, arg);
4485                 return NULL;
4486         }
4487
4488         memcpy(&data->hints, &hints, sizeof(data->hints));
4489         data->port = (ev_uint16_t)port;
4490         data->ipv4_request.type = DNS_IPv4_A;
4491         data->ipv6_request.type = DNS_IPv6_AAAA;
4492         data->user_cb = cb;
4493         data->user_data = arg;
4494         data->evdns_base = dns_base;
4495
4496         want_cname = (hints.ai_flags & EVUTIL_AI_CANONNAME);
4497
4498         /* If we are asked for a PF_UNSPEC address, we launch two requests in
4499          * parallel: one for an A address and one for an AAAA address.  We
4500          * can't send just one request, since many servers only answer one
4501          * question per DNS request.
4502          *
4503          * Once we have the answer to one request, we allow for a short
4504          * timeout before we report it, to see if the other one arrives.  If
4505          * they both show up in time, then we report both the answers.
4506          *
4507          * If too many addresses of one type time out or fail, we should stop
4508          * launching those requests. (XXX we don't do that yet.)
4509          */
4510
4511         if (hints.ai_family != PF_INET6) {
4512                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv4 as %p",
4513                     nodename, &data->ipv4_request);
4514
4515                 data->ipv4_request.r = evdns_base_resolve_ipv4(dns_base,
4516                     nodename, 0, evdns_getaddrinfo_gotresolve,
4517                     &data->ipv4_request);
4518                 if (want_cname)
4519                         data->ipv4_request.r->current_req->put_cname_in_ptr =
4520                             &data->cname_result;
4521         }
4522         if (hints.ai_family != PF_INET) {
4523                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv6 as %p",
4524                     nodename, &data->ipv6_request);
4525
4526                 data->ipv6_request.r = evdns_base_resolve_ipv6(dns_base,
4527                     nodename, 0, evdns_getaddrinfo_gotresolve,
4528                     &data->ipv6_request);
4529                 if (want_cname)
4530                         data->ipv6_request.r->current_req->put_cname_in_ptr =
4531                             &data->cname_result;
4532         }
4533
4534         evtimer_assign(&data->timeout, dns_base->event_base,
4535             evdns_getaddrinfo_timeout_cb, data);
4536
4537         if (data->ipv4_request.r || data->ipv6_request.r) {
4538                 return data;
4539         } else {
4540                 mm_free(data);
4541                 cb(EVUTIL_EAI_FAIL, NULL, arg);
4542                 return NULL;
4543         }
4544 }
4545
4546 void
4547 evdns_getaddrinfo_cancel(struct evdns_getaddrinfo_request *data)
4548 {
4549         EVDNS_LOCK(data->evdns_base);
4550         if (data->request_done) {
4551                 EVDNS_UNLOCK(data->evdns_base);
4552                 return;
4553         }
4554         event_del(&data->timeout);
4555         data->user_canceled = 1;
4556         if (data->ipv4_request.r)
4557                 evdns_cancel_request(data->evdns_base, data->ipv4_request.r);
4558         if (data->ipv6_request.r)
4559                 evdns_cancel_request(data->evdns_base, data->ipv6_request.r);
4560         EVDNS_UNLOCK(data->evdns_base);
4561 }