]> arthur.barton.de Git - netatalk.git/blob - libatalk/tsocket/tsocket_bsd.c
Merge master
[netatalk.git] / libatalk / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tsocket
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif /* HAVE_CONFIG_H */
27
28 #include <atalk/tsocket.h>
29 #include "tsocket_internal.h"
30
31 static int tsocket_bsd_error_from_errno(int ret,
32                                         int sys_errno,
33                                         bool *retry)
34 {
35         *retry = false;
36
37         if (ret >= 0) {
38                 return 0;
39         }
40
41         if (ret != -1) {
42                 return EIO;
43         }
44
45         if (sys_errno == 0) {
46                 return EIO;
47         }
48
49         if (sys_errno == EINTR) {
50                 *retry = true;
51                 return sys_errno;
52         }
53
54         if (sys_errno == EINPROGRESS) {
55                 *retry = true;
56                 return sys_errno;
57         }
58
59         if (sys_errno == EAGAIN) {
60                 *retry = true;
61                 return sys_errno;
62         }
63
64 #ifdef EWOULDBLOCK
65         if (sys_errno == EWOULDBLOCK) {
66                 *retry = true;
67                 return sys_errno;
68         }
69 #endif
70
71         return sys_errno;
72 }
73
74 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
75 {
76         int i;
77         int sys_errno = 0;
78         int fds[3];
79         int num_fds = 0;
80
81         int result, flags;
82
83         if (fd == -1) {
84                 return -1;
85         }
86
87         /* first make a fd >= 3 */
88         if (high_fd) {
89                 while (fd < 3) {
90                         fds[num_fds++] = fd;
91                         fd = dup(fd);
92                         if (fd == -1) {
93                                 sys_errno = errno;
94                                 break;
95                         }
96                 }
97                 for (i=0; i<num_fds; i++) {
98                         close(fds[i]);
99                 }
100                 if (fd == -1) {
101                         errno = sys_errno;
102                         return fd;
103                 }
104         }
105
106         /* fd should be nonblocking. */
107
108 #ifdef O_NONBLOCK
109 #define FLAG_TO_SET O_NONBLOCK
110 #else
111 #ifdef SYSV
112 #define FLAG_TO_SET O_NDELAY
113 #else /* BSD */
114 #define FLAG_TO_SET FNDELAY
115 #endif
116 #endif
117
118         if ((flags = fcntl(fd, F_GETFL)) == -1) {
119                 goto fail;
120         }
121
122         flags |= FLAG_TO_SET;
123         if (fcntl(fd, F_SETFL, flags) == -1) {
124                 goto fail;
125         }
126
127 #undef FLAG_TO_SET
128
129         /* fd should be closed on exec() */
130 #ifdef FD_CLOEXEC
131         result = flags = fcntl(fd, F_GETFD, 0);
132         if (flags >= 0) {
133                 flags |= FD_CLOEXEC;
134                 result = fcntl(fd, F_SETFD, flags);
135         }
136         if (result < 0) {
137                 goto fail;
138         }
139 #endif
140         return fd;
141
142  fail:
143         if (fd != -1) {
144                 sys_errno = errno;
145                 close(fd);
146                 errno = sys_errno;
147         }
148         return -1;
149 }
150
151 static ssize_t tsocket_bsd_pending(int fd)
152 {
153         int ret, error;
154         int value = 0;
155         socklen_t len;
156
157         ret = ioctl(fd, FIONREAD, &value);
158         if (ret == -1) {
159                 return ret;
160         }
161
162         if (ret != 0) {
163                 /* this should not be reached */
164                 errno = EIO;
165                 return -1;
166         }
167
168         if (value != 0) {
169                 return value;
170         }
171
172         error = 0;
173         len = sizeof(error);
174
175         /*
176          * if no data is available check if the socket is in error state. For
177          * dgram sockets it's the way to return ICMP error messages of
178          * connected sockets to the caller.
179          */
180         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
181         if (ret == -1) {
182                 return ret;
183         }
184         if (error != 0) {
185                 errno = error;
186                 return -1;
187         }
188         return 0;
189 }
190
191 static const struct tsocket_address_ops tsocket_address_bsd_ops;
192
193 struct tsocket_address_bsd {
194         socklen_t sa_socklen;
195         union {
196                 struct sockaddr sa;
197                 struct sockaddr_in in;
198 #ifdef HAVE_IPV6
199                 struct sockaddr_in6 in6;
200 #endif
201                 struct sockaddr_un un;
202                 struct sockaddr_storage ss;
203         } u;
204 };
205
206 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
207                                        struct sockaddr *sa,
208                                        size_t sa_socklen,
209                                        struct tsocket_address **_addr,
210                                        const char *location)
211 {
212         struct tsocket_address *addr;
213         struct tsocket_address_bsd *bsda;
214
215         if (sa_socklen < sizeof(sa->sa_family)) {
216                 errno = EINVAL;
217                 return -1;
218         }
219
220         switch (sa->sa_family) {
221         case AF_UNIX:
222                 if (sa_socklen > sizeof(struct sockaddr_un)) {
223                         sa_socklen = sizeof(struct sockaddr_un);
224                 }
225                 break;
226         case AF_INET:
227                 if (sa_socklen < sizeof(struct sockaddr_in)) {
228                         errno = EINVAL;
229                         return -1;
230                 }
231                 sa_socklen = sizeof(struct sockaddr_in);
232                 break;
233 #ifdef HAVE_IPV6
234         case AF_INET6:
235                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
236                         errno = EINVAL;
237                         return -1;
238                 }
239                 sa_socklen = sizeof(struct sockaddr_in6);
240                 break;
241 #endif
242         default:
243                 errno = EAFNOSUPPORT;
244                 return -1;
245         }
246
247         if (sa_socklen > sizeof(struct sockaddr_storage)) {
248                 errno = EINVAL;
249                 return -1;
250         }
251
252         addr = tsocket_address_create(mem_ctx,
253                                       &tsocket_address_bsd_ops,
254                                       &bsda,
255                                       struct tsocket_address_bsd,
256                                       location);
257         if (!addr) {
258                 errno = ENOMEM;
259                 return -1;
260         }
261
262         ZERO_STRUCTP(bsda);
263
264         memcpy(&bsda->u.ss, sa, sa_socklen);
265
266         bsda->sa_socklen = sa_socklen;
267
268         *_addr = addr;
269         return 0;
270 }
271
272 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
273                                      struct sockaddr *sa,
274                                      size_t sa_socklen)
275 {
276         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
277                                            struct tsocket_address_bsd);
278
279         if (!bsda) {
280                 errno = EINVAL;
281                 return -1;
282         }
283
284         if (sa_socklen < bsda->sa_socklen) {
285                 errno = EINVAL;
286                 return -1;
287         }
288
289         if (sa_socklen > bsda->sa_socklen) {
290                 memset(sa, 0, sa_socklen);
291                 sa_socklen = bsda->sa_socklen;
292         }
293
294         memcpy(sa, &bsda->u.ss, sa_socklen);
295         return sa_socklen;
296 }
297
298 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
299                                        const char *fam,
300                                        const char *addr,
301                                        uint16_t port,
302                                        struct tsocket_address **_addr,
303                                        const char *location)
304 {
305         struct addrinfo hints;
306         struct addrinfo *result = NULL;
307         char port_str[6];
308         int ret;
309
310         ZERO_STRUCT(hints);
311         /*
312          * we use SOCKET_STREAM here to get just one result
313          * back from getaddrinfo().
314          */
315         hints.ai_socktype = SOCK_STREAM;
316         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
317
318         if (strcasecmp(fam, "ip") == 0) {
319                 hints.ai_family = AF_UNSPEC;
320                 if (!addr) {
321 #ifdef HAVE_IPV6
322                         addr = "::";
323 #else
324                         addr = "0.0.0.0";
325 #endif
326                 }
327         } else if (strcasecmp(fam, "ipv4") == 0) {
328                 hints.ai_family = AF_INET;
329                 if (!addr) {
330                         addr = "0.0.0.0";
331                 }
332 #ifdef HAVE_IPV6
333         } else if (strcasecmp(fam, "ipv6") == 0) {
334                 hints.ai_family = AF_INET6;
335                 if (!addr) {
336                         addr = "::";
337                 }
338 #endif
339         } else {
340                 errno = EAFNOSUPPORT;
341                 return -1;
342         }
343
344         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
345
346         ret = getaddrinfo(addr, port_str, &hints, &result);
347         if (ret != 0) {
348                 switch (ret) {
349                 case EAI_FAIL:
350                         errno = EINVAL;
351                         break;
352                 }
353                 ret = -1;
354                 goto done;
355         }
356
357         if (result->ai_socktype != SOCK_STREAM) {
358                 errno = EINVAL;
359                 ret = -1;
360                 goto done;
361         }
362
363         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
364                                                   result->ai_addr,
365                                                   result->ai_addrlen,
366                                                   _addr,
367                                                   location);
368
369 done:
370         if (result) {
371                 freeaddrinfo(result);
372         }
373         return ret;
374 }
375
376 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
377                                        TALLOC_CTX *mem_ctx)
378 {
379         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
380                                            struct tsocket_address_bsd);
381         char addr_str[INET6_ADDRSTRLEN+1];
382         const char *str;
383
384         if (!bsda) {
385                 errno = EINVAL;
386                 return NULL;
387         }
388
389         switch (bsda->u.sa.sa_family) {
390         case AF_INET:
391                 str = inet_ntop(bsda->u.in.sin_family,
392                                 &bsda->u.in.sin_addr,
393                                 addr_str, sizeof(addr_str));
394                 break;
395 #ifdef HAVE_IPV6
396         case AF_INET6:
397                 str = inet_ntop(bsda->u.in6.sin6_family,
398                                 &bsda->u.in6.sin6_addr,
399                                 addr_str, sizeof(addr_str));
400                 break;
401 #endif
402         default:
403                 errno = EINVAL;
404                 return NULL;
405         }
406
407         if (!str) {
408                 return NULL;
409         }
410
411         return talloc_strdup(mem_ctx, str);
412 }
413
414 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
415 {
416         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
417                                            struct tsocket_address_bsd);
418         uint16_t port = 0;
419
420         if (!bsda) {
421                 errno = EINVAL;
422                 return 0;
423         }
424
425         switch (bsda->u.sa.sa_family) {
426         case AF_INET:
427                 port = ntohs(bsda->u.in.sin_port);
428                 break;
429 #ifdef HAVE_IPV6
430         case AF_INET6:
431                 port = ntohs(bsda->u.in6.sin6_port);
432                 break;
433 #endif
434         default:
435                 errno = EINVAL;
436                 return 0;
437         }
438
439         return port;
440 }
441
442 int tsocket_address_inet_set_port(struct tsocket_address *addr,
443                                   uint16_t port)
444 {
445         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
446                                            struct tsocket_address_bsd);
447
448         if (!bsda) {
449                 errno = EINVAL;
450                 return -1;
451         }
452
453         switch (bsda->u.sa.sa_family) {
454         case AF_INET:
455                 bsda->u.in.sin_port = htons(port);
456                 break;
457 #ifdef HAVE_IPV6
458         case AF_INET6:
459                 bsda->u.in6.sin6_port = htons(port);
460                 break;
461 #endif
462         default:
463                 errno = EINVAL;
464                 return -1;
465         }
466
467         return 0;
468 }
469
470 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
471                                     const char *path,
472                                     struct tsocket_address **_addr,
473                                     const char *location)
474 {
475         struct sockaddr_un un;
476         void *p = &un;
477         int ret;
478
479         if (!path) {
480                 path = "";
481         }
482
483         if (strlen(path) > sizeof(un.sun_path)-1) {
484                 errno = ENAMETOOLONG;
485                 return -1;
486         }
487
488         ZERO_STRUCT(un);
489         un.sun_family = AF_UNIX;
490         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
491
492         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
493                                                  (struct sockaddr *)p,
494                                                  sizeof(un),
495                                                  _addr,
496                                                  location);
497
498         return ret;
499 }
500
501 char *tsocket_address_unix_path(const struct tsocket_address *addr,
502                                 TALLOC_CTX *mem_ctx)
503 {
504         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
505                                            struct tsocket_address_bsd);
506         const char *str;
507
508         if (!bsda) {
509                 errno = EINVAL;
510                 return NULL;
511         }
512
513         switch (bsda->u.sa.sa_family) {
514         case AF_UNIX:
515                 str = bsda->u.un.sun_path;
516                 break;
517         default:
518                 errno = EINVAL;
519                 return NULL;
520         }
521
522         return talloc_strdup(mem_ctx, str);
523 }
524
525 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
526                                         TALLOC_CTX *mem_ctx)
527 {
528         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
529                                            struct tsocket_address_bsd);
530         char *str;
531         char *addr_str;
532         const char *prefix = NULL;
533         uint16_t port;
534
535         switch (bsda->u.sa.sa_family) {
536         case AF_UNIX:
537                 return talloc_asprintf(mem_ctx, "unix:%s",
538                                        bsda->u.un.sun_path);
539         case AF_INET:
540                 prefix = "ipv4";
541                 break;
542 #ifdef HAVE_IPV6
543         case AF_INET6:
544                 prefix = "ipv6";
545                 break;
546 #endif
547         default:
548                 errno = EINVAL;
549                 return NULL;
550         }
551
552         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
553         if (!addr_str) {
554                 return NULL;
555         }
556
557         port = tsocket_address_inet_port(addr);
558
559         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
560                               prefix, addr_str, port);
561         talloc_free(addr_str);
562
563         return str;
564 }
565
566 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
567                                                          TALLOC_CTX *mem_ctx,
568                                                          const char *location)
569 {
570         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
571                                            struct tsocket_address_bsd);
572         struct tsocket_address *copy;
573         int ret;
574
575         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
576                                                  &bsda->u.sa,
577                                                  bsda->sa_socklen,
578                                                  &copy,
579                                                  location);
580         if (ret != 0) {
581                 return NULL;
582         }
583
584         return copy;
585 }
586
587 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
588         .name           = "bsd",
589         .string         = tsocket_address_bsd_string,
590         .copy           = tsocket_address_bsd_copy,
591 };
592
593 struct tdgram_bsd {
594         int fd;
595
596         void *event_ptr;
597         struct tevent_fd *fde;
598
599         void *readable_private;
600         void (*readable_handler)(void *private_data);
601         void *writeable_private;
602         void (*writeable_handler)(void *private_data);
603 };
604
605 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
606                                    struct tevent_fd *fde,
607                                    uint16_t flags,
608                                    void *private_data)
609 {
610         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
611                                   struct tdgram_bsd);
612
613         if (flags & TEVENT_FD_WRITE) {
614                 bsds->writeable_handler(bsds->writeable_private);
615                 return;
616         }
617         if (flags & TEVENT_FD_READ) {
618                 if (!bsds->readable_handler) {
619                         TEVENT_FD_NOT_READABLE(bsds->fde);
620                         return;
621                 }
622                 bsds->readable_handler(bsds->readable_private);
623                 return;
624         }
625 }
626
627 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
628                                            struct tevent_context *ev,
629                                            void (*handler)(void *private_data),
630                                            void *private_data)
631 {
632         if (ev == NULL) {
633                 if (handler) {
634                         errno = EINVAL;
635                         return -1;
636                 }
637                 if (!bsds->readable_handler) {
638                         return 0;
639                 }
640                 bsds->readable_handler = NULL;
641                 bsds->readable_private = NULL;
642
643                 return 0;
644         }
645
646         /* read and write must use the same tevent_context */
647         if (bsds->event_ptr != ev) {
648                 if (bsds->readable_handler || bsds->writeable_handler) {
649                         errno = EINVAL;
650                         return -1;
651                 }
652                 bsds->event_ptr = NULL;
653                 TALLOC_FREE(bsds->fde);
654         }
655
656         if (tevent_fd_get_flags(bsds->fde) == 0) {
657                 TALLOC_FREE(bsds->fde);
658
659                 bsds->fde = tevent_add_fd(ev, bsds,
660                                           bsds->fd, TEVENT_FD_READ,
661                                           tdgram_bsd_fde_handler,
662                                           bsds);
663                 if (!bsds->fde) {
664                         errno = ENOMEM;
665                         return -1;
666                 }
667
668                 /* cache the event context we're running on */
669                 bsds->event_ptr = ev;
670         } else if (!bsds->readable_handler) {
671                 TEVENT_FD_READABLE(bsds->fde);
672         }
673
674         bsds->readable_handler = handler;
675         bsds->readable_private = private_data;
676
677         return 0;
678 }
679
680 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
681                                             struct tevent_context *ev,
682                                             void (*handler)(void *private_data),
683                                             void *private_data)
684 {
685         if (ev == NULL) {
686                 if (handler) {
687                         errno = EINVAL;
688                         return -1;
689                 }
690                 if (!bsds->writeable_handler) {
691                         return 0;
692                 }
693                 bsds->writeable_handler = NULL;
694                 bsds->writeable_private = NULL;
695                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
696
697                 return 0;
698         }
699
700         /* read and write must use the same tevent_context */
701         if (bsds->event_ptr != ev) {
702                 if (bsds->readable_handler || bsds->writeable_handler) {
703                         errno = EINVAL;
704                         return -1;
705                 }
706                 bsds->event_ptr = NULL;
707                 TALLOC_FREE(bsds->fde);
708         }
709
710         if (tevent_fd_get_flags(bsds->fde) == 0) {
711                 TALLOC_FREE(bsds->fde);
712
713                 bsds->fde = tevent_add_fd(ev, bsds,
714                                           bsds->fd, TEVENT_FD_WRITE,
715                                           tdgram_bsd_fde_handler,
716                                           bsds);
717                 if (!bsds->fde) {
718                         errno = ENOMEM;
719                         return -1;
720                 }
721
722                 /* cache the event context we're running on */
723                 bsds->event_ptr = ev;
724         } else if (!bsds->writeable_handler) {
725                 TEVENT_FD_WRITEABLE(bsds->fde);
726         }
727
728         bsds->writeable_handler = handler;
729         bsds->writeable_private = private_data;
730
731         return 0;
732 }
733
734 struct tdgram_bsd_recvfrom_state {
735         struct tdgram_context *dgram;
736
737         uint8_t *buf;
738         size_t len;
739         struct tsocket_address *src;
740 };
741
742 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
743 {
744         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
745                                   struct tdgram_bsd);
746
747         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
748
749         return 0;
750 }
751
752 static void tdgram_bsd_recvfrom_handler(void *private_data);
753
754 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
755                                         struct tevent_context *ev,
756                                         struct tdgram_context *dgram)
757 {
758         struct tevent_req *req;
759         struct tdgram_bsd_recvfrom_state *state;
760         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
761         int ret;
762
763         req = tevent_req_create(mem_ctx, &state,
764                                 struct tdgram_bsd_recvfrom_state);
765         if (!req) {
766                 return NULL;
767         }
768
769         state->dgram    = dgram;
770         state->buf      = NULL;
771         state->len      = 0;
772         state->src      = NULL;
773
774         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
775
776         if (bsds->fd == -1) {
777                 tevent_req_error(req, ENOTCONN);
778                 goto post;
779         }
780
781         /*
782          * this is a fast path, not waiting for the
783          * socket to become explicit readable gains
784          * about 10%-20% performance in benchmark tests.
785          */
786         tdgram_bsd_recvfrom_handler(req);
787         if (!tevent_req_is_in_progress(req)) {
788                 goto post;
789         }
790
791         ret = tdgram_bsd_set_readable_handler(bsds, ev,
792                                               tdgram_bsd_recvfrom_handler,
793                                               req);
794         if (ret == -1) {
795                 tevent_req_error(req, errno);
796                 goto post;
797         }
798
799         return req;
800
801  post:
802         tevent_req_post(req, ev);
803         return req;
804 }
805
806 static void tdgram_bsd_recvfrom_handler(void *private_data)
807 {
808         struct tevent_req *req = talloc_get_type_abort(private_data,
809                                  struct tevent_req);
810         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
811                                         struct tdgram_bsd_recvfrom_state);
812         struct tdgram_context *dgram = state->dgram;
813         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
814         struct tsocket_address_bsd *bsda;
815         ssize_t ret;
816         int err;
817         bool retry;
818
819         ret = tsocket_bsd_pending(bsds->fd);
820         if (ret == 0) {
821                 /* retry later */
822                 return;
823         }
824         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
825         if (retry) {
826                 /* retry later */
827                 return;
828         }
829         if (tevent_req_error(req, err)) {
830                 return;
831         }
832
833         state->buf = talloc_array(state, uint8_t, ret);
834         if (tevent_req_nomem(state->buf, req)) {
835                 return;
836         }
837         state->len = ret;
838
839         state->src = tsocket_address_create(state,
840                                             &tsocket_address_bsd_ops,
841                                             &bsda,
842                                             struct tsocket_address_bsd,
843                                             __location__ "bsd_recvfrom");
844         if (tevent_req_nomem(state->src, req)) {
845                 return;
846         }
847
848         ZERO_STRUCTP(bsda);
849         bsda->sa_socklen = sizeof(bsda->u.ss);
850
851         ret = recvfrom(bsds->fd, state->buf, state->len, 0,
852                        &bsda->u.sa, &bsda->sa_socklen);
853         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
854         if (retry) {
855                 /* retry later */
856                 return;
857         }
858         if (tevent_req_error(req, err)) {
859                 return;
860         }
861
862         /*
863          * Some systems (FreeBSD, see bug #7115) return too much
864          * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
865          * the return value includes some IP/UDP header bytes,
866          * while recvfrom() just returns the payload.
867          */
868         state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
869         if (tevent_req_nomem(state->buf, req)) {
870                 return;
871         }
872         state->len = ret;
873
874         tevent_req_done(req);
875 }
876
877 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
878                                         int *perrno,
879                                         TALLOC_CTX *mem_ctx,
880                                         uint8_t **buf,
881                                         struct tsocket_address **src)
882 {
883         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
884                                         struct tdgram_bsd_recvfrom_state);
885         ssize_t ret;
886
887         ret = tsocket_simple_int_recv(req, perrno);
888         if (ret == 0) {
889                 *buf = talloc_move(mem_ctx, &state->buf);
890                 ret = state->len;
891                 if (src) {
892                         *src = talloc_move(mem_ctx, &state->src);
893                 }
894         }
895
896         tevent_req_received(req);
897         return ret;
898 }
899
900 struct tdgram_bsd_sendto_state {
901         struct tdgram_context *dgram;
902
903         const uint8_t *buf;
904         size_t len;
905         const struct tsocket_address *dst;
906
907         ssize_t ret;
908 };
909
910 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
911 {
912         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
913                                   struct tdgram_bsd);
914
915         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
916
917         return 0;
918 }
919
920 static void tdgram_bsd_sendto_handler(void *private_data);
921
922 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
923                                                  struct tevent_context *ev,
924                                                  struct tdgram_context *dgram,
925                                                  const uint8_t *buf,
926                                                  size_t len,
927                                                  const struct tsocket_address *dst)
928 {
929         struct tevent_req *req;
930         struct tdgram_bsd_sendto_state *state;
931         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
932         int ret;
933
934         req = tevent_req_create(mem_ctx, &state,
935                                 struct tdgram_bsd_sendto_state);
936         if (!req) {
937                 return NULL;
938         }
939
940         state->dgram    = dgram;
941         state->buf      = buf;
942         state->len      = len;
943         state->dst      = dst;
944         state->ret      = -1;
945
946         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
947
948         if (bsds->fd == -1) {
949                 tevent_req_error(req, ENOTCONN);
950                 goto post;
951         }
952
953         /*
954          * this is a fast path, not waiting for the
955          * socket to become explicit writeable gains
956          * about 10%-20% performance in benchmark tests.
957          */
958         tdgram_bsd_sendto_handler(req);
959         if (!tevent_req_is_in_progress(req)) {
960                 goto post;
961         }
962
963         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
964                                                tdgram_bsd_sendto_handler,
965                                                req);
966         if (ret == -1) {
967                 tevent_req_error(req, errno);
968                 goto post;
969         }
970
971         return req;
972
973  post:
974         tevent_req_post(req, ev);
975         return req;
976 }
977
978 static void tdgram_bsd_sendto_handler(void *private_data)
979 {
980         struct tevent_req *req = talloc_get_type_abort(private_data,
981                                  struct tevent_req);
982         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
983                                         struct tdgram_bsd_sendto_state);
984         struct tdgram_context *dgram = state->dgram;
985         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
986         struct sockaddr *sa = NULL;
987         socklen_t sa_socklen = 0;
988         ssize_t ret;
989         int err;
990         bool retry;
991
992         if (state->dst) {
993                 struct tsocket_address_bsd *bsda =
994                         talloc_get_type(state->dst->private_data,
995                         struct tsocket_address_bsd);
996
997                 sa = &bsda->u.sa;
998                 sa_socklen = bsda->sa_socklen;
999         }
1000
1001         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1002         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1003         if (retry) {
1004                 /* retry later */
1005                 return;
1006         }
1007         if (tevent_req_error(req, err)) {
1008                 return;
1009         }
1010
1011         state->ret = ret;
1012
1013         tevent_req_done(req);
1014 }
1015
1016 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1017 {
1018         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1019                                         struct tdgram_bsd_sendto_state);
1020         ssize_t ret;
1021
1022         ret = tsocket_simple_int_recv(req, perrno);
1023         if (ret == 0) {
1024                 ret = state->ret;
1025         }
1026
1027         tevent_req_received(req);
1028         return ret;
1029 }
1030
1031 struct tdgram_bsd_disconnect_state {
1032         uint8_t __dummy;
1033 };
1034
1035 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1036                                                      struct tevent_context *ev,
1037                                                      struct tdgram_context *dgram)
1038 {
1039         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1040         struct tevent_req *req;
1041         struct tdgram_bsd_disconnect_state *state;
1042         int ret;
1043         int err;
1044         bool dummy;
1045
1046         req = tevent_req_create(mem_ctx, &state,
1047                                 struct tdgram_bsd_disconnect_state);
1048         if (req == NULL) {
1049                 return NULL;
1050         }
1051
1052         if (bsds->fd == -1) {
1053                 tevent_req_error(req, ENOTCONN);
1054                 goto post;
1055         }
1056
1057         ret = close(bsds->fd);
1058         bsds->fd = -1;
1059         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1060         if (tevent_req_error(req, err)) {
1061                 goto post;
1062         }
1063
1064         tevent_req_done(req);
1065 post:
1066         tevent_req_post(req, ev);
1067         return req;
1068 }
1069
1070 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1071                                       int *perrno)
1072 {
1073         int ret;
1074
1075         ret = tsocket_simple_int_recv(req, perrno);
1076
1077         tevent_req_received(req);
1078         return ret;
1079 }
1080
1081 static const struct tdgram_context_ops tdgram_bsd_ops = {
1082         .name                   = "bsd",
1083
1084         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1085         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1086
1087         .sendto_send            = tdgram_bsd_sendto_send,
1088         .sendto_recv            = tdgram_bsd_sendto_recv,
1089
1090         .disconnect_send        = tdgram_bsd_disconnect_send,
1091         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1092 };
1093
1094 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1095 {
1096         TALLOC_FREE(bsds->fde);
1097         if (bsds->fd != -1) {
1098                 close(bsds->fd);
1099                 bsds->fd = -1;
1100         }
1101         return 0;
1102 }
1103
1104 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1105                                    const struct tsocket_address *remote,
1106                                    bool broadcast,
1107                                    TALLOC_CTX *mem_ctx,
1108                                    struct tdgram_context **_dgram,
1109                                    const char *location)
1110 {
1111         struct tsocket_address_bsd *lbsda =
1112                 talloc_get_type_abort(local->private_data,
1113                 struct tsocket_address_bsd);
1114         struct tsocket_address_bsd *rbsda = NULL;
1115         struct tdgram_context *dgram;
1116         struct tdgram_bsd *bsds;
1117         int fd;
1118         int ret;
1119         bool do_bind = false;
1120         bool do_reuseaddr = false;
1121         bool do_ipv6only = false;
1122         bool is_inet = false;
1123         int sa_fam = lbsda->u.sa.sa_family;
1124
1125         if (remote) {
1126                 rbsda = talloc_get_type_abort(remote->private_data,
1127                         struct tsocket_address_bsd);
1128         }
1129
1130         switch (lbsda->u.sa.sa_family) {
1131         case AF_UNIX:
1132                 if (broadcast) {
1133                         errno = EINVAL;
1134                         return -1;
1135                 }
1136                 if (lbsda->u.un.sun_path[0] != 0) {
1137                         do_reuseaddr = true;
1138                         do_bind = true;
1139                 }
1140                 break;
1141         case AF_INET:
1142                 if (lbsda->u.in.sin_port != 0) {
1143                         do_reuseaddr = true;
1144                         do_bind = true;
1145                 }
1146                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1147                         do_bind = true;
1148                 }
1149                 is_inet = true;
1150                 break;
1151 #ifdef HAVE_IPV6
1152         case AF_INET6:
1153                 if (lbsda->u.in6.sin6_port != 0) {
1154                         do_reuseaddr = true;
1155                         do_bind = true;
1156                 }
1157                 if (memcmp(&in6addr_any,
1158                            &lbsda->u.in6.sin6_addr,
1159                            sizeof(in6addr_any)) != 0) {
1160                         do_bind = true;
1161                 }
1162                 is_inet = true;
1163                 do_ipv6only = true;
1164                 break;
1165 #endif
1166         default:
1167                 errno = EINVAL;
1168                 return -1;
1169         }
1170
1171         if (!do_bind && is_inet && rbsda) {
1172                 sa_fam = rbsda->u.sa.sa_family;
1173                 switch (sa_fam) {
1174                 case AF_INET:
1175                         do_ipv6only = false;
1176                         break;
1177 #ifdef HAVE_IPV6
1178                 case AF_INET6:
1179                         do_ipv6only = true;
1180                         break;
1181 #endif
1182                 }
1183         }
1184
1185         fd = socket(sa_fam, SOCK_DGRAM, 0);
1186         if (fd < 0) {
1187                 return fd;
1188         }
1189
1190         fd = tsocket_bsd_common_prepare_fd(fd, true);
1191         if (fd < 0) {
1192                 return fd;
1193         }
1194
1195         dgram = tdgram_context_create(mem_ctx,
1196                                       &tdgram_bsd_ops,
1197                                       &bsds,
1198                                       struct tdgram_bsd,
1199                                       location);
1200         if (!dgram) {
1201                 int saved_errno = errno;
1202                 close(fd);
1203                 errno = saved_errno;
1204                 return -1;
1205         }
1206         ZERO_STRUCTP(bsds);
1207         bsds->fd = fd;
1208         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1209
1210 #ifdef HAVE_IPV6
1211         if (do_ipv6only) {
1212                 int val = 1;
1213
1214                 ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
1215                                  (const void *)&val, sizeof(val));
1216                 if (ret == -1) {
1217                         int saved_errno = errno;
1218                         talloc_free(dgram);
1219                         errno = saved_errno;
1220                         return ret;
1221                 }
1222         }
1223 #endif
1224
1225         if (broadcast) {
1226                 int val = 1;
1227
1228                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1229                                  (const void *)&val, sizeof(val));
1230                 if (ret == -1) {
1231                         int saved_errno = errno;
1232                         talloc_free(dgram);
1233                         errno = saved_errno;
1234                         return ret;
1235                 }
1236         }
1237
1238         if (do_reuseaddr) {
1239                 int val = 1;
1240
1241                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1242                                  (const void *)&val, sizeof(val));
1243                 if (ret == -1) {
1244                         int saved_errno = errno;
1245                         talloc_free(dgram);
1246                         errno = saved_errno;
1247                         return ret;
1248                 }
1249         }
1250
1251         if (do_bind) {
1252                 ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
1253                 if (ret == -1) {
1254                         int saved_errno = errno;
1255                         talloc_free(dgram);
1256                         errno = saved_errno;
1257                         return ret;
1258                 }
1259         }
1260
1261         if (rbsda) {
1262                 if (rbsda->u.sa.sa_family != sa_fam) {
1263                         talloc_free(dgram);
1264                         errno = EINVAL;
1265                         return -1;
1266                 }
1267
1268                 ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
1269                 if (ret == -1) {
1270                         int saved_errno = errno;
1271                         talloc_free(dgram);
1272                         errno = saved_errno;
1273                         return ret;
1274                 }
1275         }
1276
1277         *_dgram = dgram;
1278         return 0;
1279 }
1280
1281 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1282                             const struct tsocket_address *remote,
1283                             TALLOC_CTX *mem_ctx,
1284                             struct tdgram_context **dgram,
1285                             const char *location)
1286 {
1287         struct tsocket_address_bsd *lbsda =
1288                 talloc_get_type_abort(local->private_data,
1289                 struct tsocket_address_bsd);
1290         int ret;
1291
1292         switch (lbsda->u.sa.sa_family) {
1293         case AF_INET:
1294                 break;
1295 #ifdef HAVE_IPV6
1296         case AF_INET6:
1297                 break;
1298 #endif
1299         default:
1300                 errno = EINVAL;
1301                 return -1;
1302         }
1303
1304         ret = tdgram_bsd_dgram_socket(local, remote, false,
1305                                       mem_ctx, dgram, location);
1306
1307         return ret;
1308 }
1309
1310 int _tdgram_unix_socket(const struct tsocket_address *local,
1311                         const struct tsocket_address *remote,
1312                         TALLOC_CTX *mem_ctx,
1313                         struct tdgram_context **dgram,
1314                         const char *location)
1315 {
1316         struct tsocket_address_bsd *lbsda =
1317                 talloc_get_type_abort(local->private_data,
1318                 struct tsocket_address_bsd);
1319         int ret;
1320
1321         switch (lbsda->u.sa.sa_family) {
1322         case AF_UNIX:
1323                 break;
1324         default:
1325                 errno = EINVAL;
1326                 return -1;
1327         }
1328
1329         ret = tdgram_bsd_dgram_socket(local, remote, false,
1330                                       mem_ctx, dgram, location);
1331
1332         return ret;
1333 }
1334
1335 struct tstream_bsd {
1336         int fd;
1337
1338         void *event_ptr;
1339         struct tevent_fd *fde;
1340
1341         void *readable_private;
1342         void (*readable_handler)(void *private_data);
1343         void *writeable_private;
1344         void (*writeable_handler)(void *private_data);
1345 };
1346
1347 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1348                                     struct tevent_fd *fde,
1349                                     uint16_t flags,
1350                                     void *private_data)
1351 {
1352         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1353                                    struct tstream_bsd);
1354
1355         if (flags & TEVENT_FD_WRITE) {
1356                 bsds->writeable_handler(bsds->writeable_private);
1357                 return;
1358         }
1359         if (flags & TEVENT_FD_READ) {
1360                 if (!bsds->readable_handler) {
1361                         if (bsds->writeable_handler) {
1362                                 bsds->writeable_handler(bsds->writeable_private);
1363                                 return;
1364                         }
1365                         TEVENT_FD_NOT_READABLE(bsds->fde);
1366                         return;
1367                 }
1368                 bsds->readable_handler(bsds->readable_private);
1369                 return;
1370         }
1371 }
1372
1373 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1374                                             struct tevent_context *ev,
1375                                             void (*handler)(void *private_data),
1376                                             void *private_data)
1377 {
1378         if (ev == NULL) {
1379                 if (handler) {
1380                         errno = EINVAL;
1381                         return -1;
1382                 }
1383                 if (!bsds->readable_handler) {
1384                         return 0;
1385                 }
1386                 bsds->readable_handler = NULL;
1387                 bsds->readable_private = NULL;
1388
1389                 return 0;
1390         }
1391
1392         /* read and write must use the same tevent_context */
1393         if (bsds->event_ptr != ev) {
1394                 if (bsds->readable_handler || bsds->writeable_handler) {
1395                         errno = EINVAL;
1396                         return -1;
1397                 }
1398                 bsds->event_ptr = NULL;
1399                 TALLOC_FREE(bsds->fde);
1400         }
1401
1402         if (tevent_fd_get_flags(bsds->fde) == 0) {
1403                 TALLOC_FREE(bsds->fde);
1404
1405                 bsds->fde = tevent_add_fd(ev, bsds,
1406                                           bsds->fd, TEVENT_FD_READ,
1407                                           tstream_bsd_fde_handler,
1408                                           bsds);
1409                 if (!bsds->fde) {
1410                         errno = ENOMEM;
1411                         return -1;
1412                 }
1413
1414                 /* cache the event context we're running on */
1415                 bsds->event_ptr = ev;
1416         } else if (!bsds->readable_handler) {
1417                 TEVENT_FD_READABLE(bsds->fde);
1418         }
1419
1420         bsds->readable_handler = handler;
1421         bsds->readable_private = private_data;
1422
1423         return 0;
1424 }
1425
1426 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1427                                              struct tevent_context *ev,
1428                                              void (*handler)(void *private_data),
1429                                              void *private_data)
1430 {
1431         if (ev == NULL) {
1432                 if (handler) {
1433                         errno = EINVAL;
1434                         return -1;
1435                 }
1436                 if (!bsds->writeable_handler) {
1437                         return 0;
1438                 }
1439                 bsds->writeable_handler = NULL;
1440                 bsds->writeable_private = NULL;
1441                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1442
1443                 return 0;
1444         }
1445
1446         /* read and write must use the same tevent_context */
1447         if (bsds->event_ptr != ev) {
1448                 if (bsds->readable_handler || bsds->writeable_handler) {
1449                         errno = EINVAL;
1450                         return -1;
1451                 }
1452                 bsds->event_ptr = NULL;
1453                 TALLOC_FREE(bsds->fde);
1454         }
1455
1456         if (tevent_fd_get_flags(bsds->fde) == 0) {
1457                 TALLOC_FREE(bsds->fde);
1458
1459                 bsds->fde = tevent_add_fd(ev, bsds,
1460                                           bsds->fd,
1461                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1462                                           tstream_bsd_fde_handler,
1463                                           bsds);
1464                 if (!bsds->fde) {
1465                         errno = ENOMEM;
1466                         return -1;
1467                 }
1468
1469                 /* cache the event context we're running on */
1470                 bsds->event_ptr = ev;
1471         } else if (!bsds->writeable_handler) {
1472                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1473                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1474                 tevent_fd_set_flags(bsds->fde, flags);
1475         }
1476
1477         bsds->writeable_handler = handler;
1478         bsds->writeable_private = private_data;
1479
1480         return 0;
1481 }
1482
1483 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1484 {
1485         struct tstream_bsd *bsds = tstream_context_data(stream,
1486                                    struct tstream_bsd);
1487         ssize_t ret;
1488
1489         if (bsds->fd == -1) {
1490                 errno = ENOTCONN;
1491                 return -1;
1492         }
1493
1494         ret = tsocket_bsd_pending(bsds->fd);
1495
1496         return ret;
1497 }
1498
1499 struct tstream_bsd_readv_state {
1500         struct tstream_context *stream;
1501
1502         struct iovec *vector;
1503         size_t count;
1504
1505         int ret;
1506 };
1507
1508 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1509 {
1510         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1511                                    struct tstream_bsd);
1512
1513         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1514
1515         return 0;
1516 }
1517
1518 static void tstream_bsd_readv_handler(void *private_data);
1519
1520 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1521                                         struct tevent_context *ev,
1522                                         struct tstream_context *stream,
1523                                         struct iovec *vector,
1524                                         size_t count)
1525 {
1526         struct tevent_req *req;
1527         struct tstream_bsd_readv_state *state;
1528         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1529         int ret;
1530
1531         req = tevent_req_create(mem_ctx, &state,
1532                                 struct tstream_bsd_readv_state);
1533         if (!req) {
1534                 return NULL;
1535         }
1536
1537         state->stream   = stream;
1538         /* we make a copy of the vector so that we can modify it */
1539         state->vector   = talloc_array(state, struct iovec, count);
1540         if (tevent_req_nomem(state->vector, req)) {
1541                 goto post;
1542         }
1543         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1544         state->count    = count;
1545         state->ret      = 0;
1546
1547         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1548
1549         if (bsds->fd == -1) {
1550                 tevent_req_error(req, ENOTCONN);
1551                 goto post;
1552         }
1553
1554         /*
1555          * this is a fast path, not waiting for the
1556          * socket to become explicit readable gains
1557          * about 10%-20% performance in benchmark tests.
1558          */
1559         tstream_bsd_readv_handler(req);
1560         if (!tevent_req_is_in_progress(req)) {
1561                 goto post;
1562         }
1563
1564         ret = tstream_bsd_set_readable_handler(bsds, ev,
1565                                               tstream_bsd_readv_handler,
1566                                               req);
1567         if (ret == -1) {
1568                 tevent_req_error(req, errno);
1569                 goto post;
1570         }
1571
1572         return req;
1573
1574  post:
1575         tevent_req_post(req, ev);
1576         return req;
1577 }
1578
1579 static void tstream_bsd_readv_handler(void *private_data)
1580 {
1581         struct tevent_req *req = talloc_get_type_abort(private_data,
1582                                  struct tevent_req);
1583         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1584                                         struct tstream_bsd_readv_state);
1585         struct tstream_context *stream = state->stream;
1586         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1587         int ret;
1588         int err;
1589         bool retry;
1590
1591         ret = readv(bsds->fd, state->vector, state->count);
1592         if (ret == 0) {
1593                 /* propagate end of file */
1594                 tevent_req_error(req, EPIPE);
1595                 return;
1596         }
1597         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1598         if (retry) {
1599                 /* retry later */
1600                 return;
1601         }
1602         if (tevent_req_error(req, err)) {
1603                 return;
1604         }
1605
1606         state->ret += ret;
1607
1608         while (ret > 0) {
1609                 if (ret < state->vector[0].iov_len) {
1610                         uint8_t *base;
1611                         base = (uint8_t *)state->vector[0].iov_base;
1612                         base += ret;
1613                         state->vector[0].iov_base = base;
1614                         state->vector[0].iov_len -= ret;
1615                         break;
1616                 }
1617                 ret -= state->vector[0].iov_len;
1618                 state->vector += 1;
1619                 state->count -= 1;
1620         }
1621
1622         /*
1623          * there're maybe some empty vectors at the end
1624          * which we need to skip, otherwise we would get
1625          * ret == 0 from the readv() call and return EPIPE
1626          */
1627         while (state->count > 0) {
1628                 if (state->vector[0].iov_len > 0) {
1629                         break;
1630                 }
1631                 state->vector += 1;
1632                 state->count -= 1;
1633         }
1634
1635         if (state->count > 0) {
1636                 /* we have more to read */
1637                 return;
1638         }
1639
1640         tevent_req_done(req);
1641 }
1642
1643 static int tstream_bsd_readv_recv(struct tevent_req *req,
1644                                   int *perrno)
1645 {
1646         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1647                                         struct tstream_bsd_readv_state);
1648         int ret;
1649
1650         ret = tsocket_simple_int_recv(req, perrno);
1651         if (ret == 0) {
1652                 ret = state->ret;
1653         }
1654
1655         tevent_req_received(req);
1656         return ret;
1657 }
1658
1659 struct tstream_bsd_writev_state {
1660         struct tstream_context *stream;
1661
1662         struct iovec *vector;
1663         size_t count;
1664
1665         int ret;
1666 };
1667
1668 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1669 {
1670         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1671                                   struct tstream_bsd);
1672
1673         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1674
1675         return 0;
1676 }
1677
1678 static void tstream_bsd_writev_handler(void *private_data);
1679
1680 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1681                                                  struct tevent_context *ev,
1682                                                  struct tstream_context *stream,
1683                                                  const struct iovec *vector,
1684                                                  size_t count)
1685 {
1686         struct tevent_req *req;
1687         struct tstream_bsd_writev_state *state;
1688         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1689         int ret;
1690
1691         req = tevent_req_create(mem_ctx, &state,
1692                                 struct tstream_bsd_writev_state);
1693         if (!req) {
1694                 return NULL;
1695         }
1696
1697         state->stream   = stream;
1698         /* we make a copy of the vector so that we can modify it */
1699         state->vector   = talloc_array(state, struct iovec, count);
1700         if (tevent_req_nomem(state->vector, req)) {
1701                 goto post;
1702         }
1703         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1704         state->count    = count;
1705         state->ret      = 0;
1706
1707         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1708
1709         if (bsds->fd == -1) {
1710                 tevent_req_error(req, ENOTCONN);
1711                 goto post;
1712         }
1713
1714         /*
1715          * this is a fast path, not waiting for the
1716          * socket to become explicit writeable gains
1717          * about 10%-20% performance in benchmark tests.
1718          */
1719         tstream_bsd_writev_handler(req);
1720         if (!tevent_req_is_in_progress(req)) {
1721                 goto post;
1722         }
1723
1724         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1725                                                tstream_bsd_writev_handler,
1726                                                req);
1727         if (ret == -1) {
1728                 tevent_req_error(req, errno);
1729                 goto post;
1730         }
1731
1732         return req;
1733
1734  post:
1735         tevent_req_post(req, ev);
1736         return req;
1737 }
1738
1739 static void tstream_bsd_writev_handler(void *private_data)
1740 {
1741         struct tevent_req *req = talloc_get_type_abort(private_data,
1742                                  struct tevent_req);
1743         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1744                                         struct tstream_bsd_writev_state);
1745         struct tstream_context *stream = state->stream;
1746         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1747         ssize_t ret;
1748         int err;
1749         bool retry;
1750
1751         ret = writev(bsds->fd, state->vector, state->count);
1752         if (ret == 0) {
1753                 /* propagate end of file */
1754                 tevent_req_error(req, EPIPE);
1755                 return;
1756         }
1757         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1758         if (retry) {
1759                 /* retry later */
1760                 return;
1761         }
1762         if (tevent_req_error(req, err)) {
1763                 return;
1764         }
1765
1766         state->ret += ret;
1767
1768         while (ret > 0) {
1769                 if (ret < state->vector[0].iov_len) {
1770                         uint8_t *base;
1771                         base = (uint8_t *)state->vector[0].iov_base;
1772                         base += ret;
1773                         state->vector[0].iov_base = base;
1774                         state->vector[0].iov_len -= ret;
1775                         break;
1776                 }
1777                 ret -= state->vector[0].iov_len;
1778                 state->vector += 1;
1779                 state->count -= 1;
1780         }
1781
1782         /*
1783          * there're maybe some empty vectors at the end
1784          * which we need to skip, otherwise we would get
1785          * ret == 0 from the writev() call and return EPIPE
1786          */
1787         while (state->count > 0) {
1788                 if (state->vector[0].iov_len > 0) {
1789                         break;
1790                 }
1791                 state->vector += 1;
1792                 state->count -= 1;
1793         }
1794
1795         if (state->count > 0) {
1796                 /* we have more to read */
1797                 return;
1798         }
1799
1800         tevent_req_done(req);
1801 }
1802
1803 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1804 {
1805         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1806                                         struct tstream_bsd_writev_state);
1807         int ret;
1808
1809         ret = tsocket_simple_int_recv(req, perrno);
1810         if (ret == 0) {
1811                 ret = state->ret;
1812         }
1813
1814         tevent_req_received(req);
1815         return ret;
1816 }
1817
1818 struct tstream_bsd_disconnect_state {
1819         void *__dummy;
1820 };
1821
1822 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1823                                                      struct tevent_context *ev,
1824                                                      struct tstream_context *stream)
1825 {
1826         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1827         struct tevent_req *req;
1828         struct tstream_bsd_disconnect_state *state;
1829         int ret;
1830         int err;
1831         bool dummy;
1832
1833         req = tevent_req_create(mem_ctx, &state,
1834                                 struct tstream_bsd_disconnect_state);
1835         if (req == NULL) {
1836                 return NULL;
1837         }
1838
1839         if (bsds->fd == -1) {
1840                 tevent_req_error(req, ENOTCONN);
1841                 goto post;
1842         }
1843
1844         ret = close(bsds->fd);
1845         bsds->fd = -1;
1846         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1847         if (tevent_req_error(req, err)) {
1848                 goto post;
1849         }
1850
1851         tevent_req_done(req);
1852 post:
1853         tevent_req_post(req, ev);
1854         return req;
1855 }
1856
1857 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1858                                       int *perrno)
1859 {
1860         int ret;
1861
1862         ret = tsocket_simple_int_recv(req, perrno);
1863
1864         tevent_req_received(req);
1865         return ret;
1866 }
1867
1868 static const struct tstream_context_ops tstream_bsd_ops = {
1869         .name                   = "bsd",
1870
1871         .pending_bytes          = tstream_bsd_pending_bytes,
1872
1873         .readv_send             = tstream_bsd_readv_send,
1874         .readv_recv             = tstream_bsd_readv_recv,
1875
1876         .writev_send            = tstream_bsd_writev_send,
1877         .writev_recv            = tstream_bsd_writev_recv,
1878
1879         .disconnect_send        = tstream_bsd_disconnect_send,
1880         .disconnect_recv        = tstream_bsd_disconnect_recv,
1881 };
1882
1883 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1884 {
1885         TALLOC_FREE(bsds->fde);
1886         if (bsds->fd != -1) {
1887                 close(bsds->fd);
1888                 bsds->fd = -1;
1889         }
1890         return 0;
1891 }
1892
1893 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1894                                  int fd,
1895                                  struct tstream_context **_stream,
1896                                  const char *location)
1897 {
1898         struct tstream_context *stream;
1899         struct tstream_bsd *bsds;
1900
1901         stream = tstream_context_create(mem_ctx,
1902                                         &tstream_bsd_ops,
1903                                         &bsds,
1904                                         struct tstream_bsd,
1905                                         location);
1906         if (!stream) {
1907                 return -1;
1908         }
1909         ZERO_STRUCTP(bsds);
1910         bsds->fd = fd;
1911         talloc_set_destructor(bsds, tstream_bsd_destructor);
1912
1913         *_stream = stream;
1914         return 0;
1915 }
1916
1917 struct tstream_bsd_connect_state {
1918         int fd;
1919         struct tevent_fd *fde;
1920         struct tstream_conext *stream;
1921 };
1922
1923 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1924 {
1925         TALLOC_FREE(state->fde);
1926         if (state->fd != -1) {
1927                 close(state->fd);
1928                 state->fd = -1;
1929         }
1930
1931         return 0;
1932 }
1933
1934 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1935                                             struct tevent_fd *fde,
1936                                             uint16_t flags,
1937                                             void *private_data);
1938
1939 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1940                                         struct tevent_context *ev,
1941                                         int sys_errno,
1942                                         const struct tsocket_address *local,
1943                                         const struct tsocket_address *remote)
1944 {
1945         struct tevent_req *req;
1946         struct tstream_bsd_connect_state *state;
1947         struct tsocket_address_bsd *lbsda =
1948                 talloc_get_type_abort(local->private_data,
1949                 struct tsocket_address_bsd);
1950         struct tsocket_address_bsd *rbsda =
1951                 talloc_get_type_abort(remote->private_data,
1952                 struct tsocket_address_bsd);
1953         int ret;
1954         int err;
1955         bool retry;
1956         bool do_bind = false;
1957         bool do_reuseaddr = false;
1958         bool do_ipv6only = false;
1959         bool is_inet = false;
1960         int sa_fam = lbsda->u.sa.sa_family;
1961
1962         req = tevent_req_create(mem_ctx, &state,
1963                                 struct tstream_bsd_connect_state);
1964         if (!req) {
1965                 return NULL;
1966         }
1967         state->fd = -1;
1968         state->fde = NULL;
1969
1970         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1971
1972         /* give the wrappers a chance to report an error */
1973         if (sys_errno != 0) {
1974                 tevent_req_error(req, sys_errno);
1975                 goto post;
1976         }
1977
1978         switch (lbsda->u.sa.sa_family) {
1979         case AF_UNIX:
1980                 if (lbsda->u.un.sun_path[0] != 0) {
1981                         do_reuseaddr = true;
1982                         do_bind = true;
1983                 }
1984                 break;
1985         case AF_INET:
1986                 if (lbsda->u.in.sin_port != 0) {
1987                         do_reuseaddr = true;
1988                         do_bind = true;
1989                 }
1990                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1991                         do_bind = true;
1992                 }
1993                 is_inet = true;
1994                 break;
1995 #ifdef HAVE_IPV6
1996         case AF_INET6:
1997                 if (lbsda->u.in6.sin6_port != 0) {
1998                         do_reuseaddr = true;
1999                         do_bind = true;
2000                 }
2001                 if (memcmp(&in6addr_any,
2002                            &lbsda->u.in6.sin6_addr,
2003                            sizeof(in6addr_any)) != 0) {
2004                         do_bind = true;
2005                 }
2006                 is_inet = true;
2007                 do_ipv6only = true;
2008                 break;
2009 #endif
2010         default:
2011                 tevent_req_error(req, EINVAL);
2012                 goto post;
2013         }
2014
2015         if (!do_bind && is_inet) {
2016                 sa_fam = rbsda->u.sa.sa_family;
2017                 switch (sa_fam) {
2018                 case AF_INET:
2019                         do_ipv6only = false;
2020                         break;
2021 #ifdef HAVE_IPV6
2022                 case AF_INET6:
2023                         do_ipv6only = true;
2024                         break;
2025 #endif
2026                 }
2027         }
2028
2029         state->fd = socket(sa_fam, SOCK_STREAM, 0);
2030         if (state->fd == -1) {
2031                 tevent_req_error(req, errno);
2032                 goto post;
2033         }
2034
2035         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2036         if (state->fd == -1) {
2037                 tevent_req_error(req, errno);
2038                 goto post;
2039         }
2040
2041 #ifdef HAVE_IPV6
2042         if (do_ipv6only) {
2043                 int val = 1;
2044
2045                 ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
2046                                  (const void *)&val, sizeof(val));
2047                 if (ret == -1) {
2048                         tevent_req_error(req, errno);
2049                         goto post;
2050                 }
2051         }
2052 #endif
2053
2054         if (do_reuseaddr) {
2055                 int val = 1;
2056
2057                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2058                                  (const void *)&val, sizeof(val));
2059                 if (ret == -1) {
2060                         tevent_req_error(req, errno);
2061                         goto post;
2062                 }
2063         }
2064
2065         if (do_bind) {
2066                 ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
2067                 if (ret == -1) {
2068                         tevent_req_error(req, errno);
2069                         goto post;
2070                 }
2071         }
2072
2073         if (rbsda->u.sa.sa_family != sa_fam) {
2074                 tevent_req_error(req, EINVAL);
2075                 goto post;
2076         }
2077
2078         ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
2079         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2080         if (retry) {
2081                 /* retry later */
2082                 goto async;
2083         }
2084         if (tevent_req_error(req, err)) {
2085                 goto post;
2086         }
2087
2088         tevent_req_done(req);
2089         goto post;
2090
2091  async:
2092         state->fde = tevent_add_fd(ev, state,
2093                                    state->fd,
2094                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2095                                    tstream_bsd_connect_fde_handler,
2096                                    req);
2097         if (tevent_req_nomem(state->fde, req)) {
2098                 goto post;
2099         }
2100
2101         return req;
2102
2103  post:
2104         tevent_req_post(req, ev);
2105         return req;
2106 }
2107
2108 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2109                                             struct tevent_fd *fde,
2110                                             uint16_t flags,
2111                                             void *private_data)
2112 {
2113         struct tevent_req *req = talloc_get_type_abort(private_data,
2114                                  struct tevent_req);
2115         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2116                                         struct tstream_bsd_connect_state);
2117         int ret;
2118         int error=0;
2119         socklen_t len = sizeof(error);
2120         int err;
2121         bool retry;
2122
2123         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2124         if (ret == 0) {
2125                 if (error != 0) {
2126                         errno = error;
2127                         ret = -1;
2128                 }
2129         }
2130         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2131         if (retry) {
2132                 /* retry later */
2133                 return;
2134         }
2135         if (tevent_req_error(req, err)) {
2136                 return;
2137         }
2138
2139         tevent_req_done(req);
2140 }
2141
2142 static int tstream_bsd_connect_recv(struct tevent_req *req,
2143                                     int *perrno,
2144                                     TALLOC_CTX *mem_ctx,
2145                                     struct tstream_context **stream,
2146                                     const char *location)
2147 {
2148         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2149                                         struct tstream_bsd_connect_state);
2150         int ret;
2151
2152         ret = tsocket_simple_int_recv(req, perrno);
2153         if (ret == 0) {
2154                 ret = _tstream_bsd_existing_socket(mem_ctx,
2155                                                    state->fd,
2156                                                    stream,
2157                                                    location);
2158                 if (ret == -1) {
2159                         *perrno = errno;
2160                         goto done;
2161                 }
2162                 TALLOC_FREE(state->fde);
2163                 state->fd = -1;
2164         }
2165
2166 done:
2167         tevent_req_received(req);
2168         return ret;
2169 }
2170
2171 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2172                                         struct tevent_context *ev,
2173                                         const struct tsocket_address *local,
2174                                         const struct tsocket_address *remote)
2175 {
2176         struct tsocket_address_bsd *lbsda =
2177                 talloc_get_type_abort(local->private_data,
2178                 struct tsocket_address_bsd);
2179         struct tevent_req *req;
2180         int sys_errno = 0;
2181
2182         switch (lbsda->u.sa.sa_family) {
2183         case AF_INET:
2184                 break;
2185 #ifdef HAVE_IPV6
2186         case AF_INET6:
2187                 break;
2188 #endif
2189         default:
2190                 sys_errno = EINVAL;
2191                 break;
2192         }
2193
2194         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2195
2196         return req;
2197 }
2198
2199 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2200                                    int *perrno,
2201                                    TALLOC_CTX *mem_ctx,
2202                                    struct tstream_context **stream,
2203                                    const char *location)
2204 {
2205         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2206 }
2207
2208 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2209                                         struct tevent_context *ev,
2210                                         const struct tsocket_address *local,
2211                                         const struct tsocket_address *remote)
2212 {
2213         struct tsocket_address_bsd *lbsda =
2214                 talloc_get_type_abort(local->private_data,
2215                 struct tsocket_address_bsd);
2216         struct tevent_req *req;
2217         int sys_errno = 0;
2218
2219         switch (lbsda->u.sa.sa_family) {
2220         case AF_UNIX:
2221                 break;
2222         default:
2223                 sys_errno = EINVAL;
2224                 break;
2225         }
2226
2227         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2228
2229         return req;
2230 }
2231
2232 int _tstream_unix_connect_recv(struct tevent_req *req,
2233                                       int *perrno,
2234                                       TALLOC_CTX *mem_ctx,
2235                                       struct tstream_context **stream,
2236                                       const char *location)
2237 {
2238         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2239 }
2240
2241 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2242                              struct tstream_context **_stream1,
2243                              TALLOC_CTX *mem_ctx2,
2244                              struct tstream_context **_stream2,
2245                              const char *location)
2246 {
2247         int ret;
2248         int fds[2];
2249         int fd1;
2250         int fd2;
2251         struct tstream_context *stream1 = NULL;
2252         struct tstream_context *stream2 = NULL;
2253
2254         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2255         if (ret == -1) {
2256                 return -1;
2257         }
2258         fd1 = fds[0];
2259         fd2 = fds[1];
2260
2261         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2262         if (fd1 == -1) {
2263                 int sys_errno = errno;
2264                 close(fd2);
2265                 errno = sys_errno;
2266                 return -1;
2267         }
2268
2269         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2270         if (fd2 == -1) {
2271                 int sys_errno = errno;
2272                 close(fd1);
2273                 errno = sys_errno;
2274                 return -1;
2275         }
2276
2277         ret = _tstream_bsd_existing_socket(mem_ctx1,
2278                                            fd1,
2279                                            &stream1,
2280                                            location);
2281         if (ret == -1) {
2282                 int sys_errno = errno;
2283                 close(fd1);
2284                 close(fd2);
2285                 errno = sys_errno;
2286                 return -1;
2287         }
2288
2289         ret = _tstream_bsd_existing_socket(mem_ctx2,
2290                                            fd2,
2291                                            &stream2,
2292                                            location);
2293         if (ret == -1) {
2294                 int sys_errno = errno;
2295                 talloc_free(stream1);
2296                 close(fd2);
2297                 errno = sys_errno;
2298                 return -1;
2299         }
2300
2301         *_stream1 = stream1;
2302         *_stream2 = stream2;
2303         return 0;
2304 }
2305