]> arthur.barton.de Git - netatalk.git/blob - libatalk/tsocket/tsocket_bsd.c
Import tsocket from samba
[netatalk.git] / libatalk / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tsocket
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         socklen_t sa_socklen;
194         union {
195                 struct sockaddr sa;
196                 struct sockaddr_in in;
197 #ifdef HAVE_IPV6
198                 struct sockaddr_in6 in6;
199 #endif
200                 struct sockaddr_un un;
201                 struct sockaddr_storage ss;
202         } u;
203 };
204
205 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
206                                        struct sockaddr *sa,
207                                        size_t sa_socklen,
208                                        struct tsocket_address **_addr,
209                                        const char *location)
210 {
211         struct tsocket_address *addr;
212         struct tsocket_address_bsd *bsda;
213
214         if (sa_socklen < sizeof(sa->sa_family)) {
215                 errno = EINVAL;
216                 return -1;
217         }
218
219         switch (sa->sa_family) {
220         case AF_UNIX:
221                 if (sa_socklen > sizeof(struct sockaddr_un)) {
222                         sa_socklen = sizeof(struct sockaddr_un);
223                 }
224                 break;
225         case AF_INET:
226                 if (sa_socklen < sizeof(struct sockaddr_in)) {
227                         errno = EINVAL;
228                         return -1;
229                 }
230                 sa_socklen = sizeof(struct sockaddr_in);
231                 break;
232 #ifdef HAVE_IPV6
233         case AF_INET6:
234                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
235                         errno = EINVAL;
236                         return -1;
237                 }
238                 sa_socklen = sizeof(struct sockaddr_in6);
239                 break;
240 #endif
241         default:
242                 errno = EAFNOSUPPORT;
243                 return -1;
244         }
245
246         if (sa_socklen > sizeof(struct sockaddr_storage)) {
247                 errno = EINVAL;
248                 return -1;
249         }
250
251         addr = tsocket_address_create(mem_ctx,
252                                       &tsocket_address_bsd_ops,
253                                       &bsda,
254                                       struct tsocket_address_bsd,
255                                       location);
256         if (!addr) {
257                 errno = ENOMEM;
258                 return -1;
259         }
260
261         ZERO_STRUCTP(bsda);
262
263         memcpy(&bsda->u.ss, sa, sa_socklen);
264
265         bsda->sa_socklen = sa_socklen;
266
267         *_addr = addr;
268         return 0;
269 }
270
271 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
272                                      struct sockaddr *sa,
273                                      size_t sa_socklen)
274 {
275         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
276                                            struct tsocket_address_bsd);
277
278         if (!bsda) {
279                 errno = EINVAL;
280                 return -1;
281         }
282
283         if (sa_socklen < bsda->sa_socklen) {
284                 errno = EINVAL;
285                 return -1;
286         }
287
288         if (sa_socklen > bsda->sa_socklen) {
289                 memset(sa, 0, sa_socklen);
290                 sa_socklen = bsda->sa_socklen;
291         }
292
293         memcpy(sa, &bsda->u.ss, sa_socklen);
294         return sa_socklen;
295 }
296
297 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
298                                        const char *fam,
299                                        const char *addr,
300                                        uint16_t port,
301                                        struct tsocket_address **_addr,
302                                        const char *location)
303 {
304         struct addrinfo hints;
305         struct addrinfo *result = NULL;
306         char port_str[6];
307         int ret;
308
309         ZERO_STRUCT(hints);
310         /*
311          * we use SOCKET_STREAM here to get just one result
312          * back from getaddrinfo().
313          */
314         hints.ai_socktype = SOCK_STREAM;
315         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
316
317         if (strcasecmp(fam, "ip") == 0) {
318                 hints.ai_family = AF_UNSPEC;
319                 if (!addr) {
320 #ifdef HAVE_IPV6
321                         addr = "::";
322 #else
323                         addr = "0.0.0.0";
324 #endif
325                 }
326         } else if (strcasecmp(fam, "ipv4") == 0) {
327                 hints.ai_family = AF_INET;
328                 if (!addr) {
329                         addr = "0.0.0.0";
330                 }
331 #ifdef HAVE_IPV6
332         } else if (strcasecmp(fam, "ipv6") == 0) {
333                 hints.ai_family = AF_INET6;
334                 if (!addr) {
335                         addr = "::";
336                 }
337 #endif
338         } else {
339                 errno = EAFNOSUPPORT;
340                 return -1;
341         }
342
343         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
344
345         ret = getaddrinfo(addr, port_str, &hints, &result);
346         if (ret != 0) {
347                 switch (ret) {
348                 case EAI_FAIL:
349                         errno = EINVAL;
350                         break;
351                 }
352                 ret = -1;
353                 goto done;
354         }
355
356         if (result->ai_socktype != SOCK_STREAM) {
357                 errno = EINVAL;
358                 ret = -1;
359                 goto done;
360         }
361
362         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
363                                                   result->ai_addr,
364                                                   result->ai_addrlen,
365                                                   _addr,
366                                                   location);
367
368 done:
369         if (result) {
370                 freeaddrinfo(result);
371         }
372         return ret;
373 }
374
375 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
376                                        TALLOC_CTX *mem_ctx)
377 {
378         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
379                                            struct tsocket_address_bsd);
380         char addr_str[INET6_ADDRSTRLEN+1];
381         const char *str;
382
383         if (!bsda) {
384                 errno = EINVAL;
385                 return NULL;
386         }
387
388         switch (bsda->u.sa.sa_family) {
389         case AF_INET:
390                 str = inet_ntop(bsda->u.in.sin_family,
391                                 &bsda->u.in.sin_addr,
392                                 addr_str, sizeof(addr_str));
393                 break;
394 #ifdef HAVE_IPV6
395         case AF_INET6:
396                 str = inet_ntop(bsda->u.in6.sin6_family,
397                                 &bsda->u.in6.sin6_addr,
398                                 addr_str, sizeof(addr_str));
399                 break;
400 #endif
401         default:
402                 errno = EINVAL;
403                 return NULL;
404         }
405
406         if (!str) {
407                 return NULL;
408         }
409
410         return talloc_strdup(mem_ctx, str);
411 }
412
413 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
414 {
415         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
416                                            struct tsocket_address_bsd);
417         uint16_t port = 0;
418
419         if (!bsda) {
420                 errno = EINVAL;
421                 return 0;
422         }
423
424         switch (bsda->u.sa.sa_family) {
425         case AF_INET:
426                 port = ntohs(bsda->u.in.sin_port);
427                 break;
428 #ifdef HAVE_IPV6
429         case AF_INET6:
430                 port = ntohs(bsda->u.in6.sin6_port);
431                 break;
432 #endif
433         default:
434                 errno = EINVAL;
435                 return 0;
436         }
437
438         return port;
439 }
440
441 int tsocket_address_inet_set_port(struct tsocket_address *addr,
442                                   uint16_t port)
443 {
444         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
445                                            struct tsocket_address_bsd);
446
447         if (!bsda) {
448                 errno = EINVAL;
449                 return -1;
450         }
451
452         switch (bsda->u.sa.sa_family) {
453         case AF_INET:
454                 bsda->u.in.sin_port = htons(port);
455                 break;
456 #ifdef HAVE_IPV6
457         case AF_INET6:
458                 bsda->u.in6.sin6_port = htons(port);
459                 break;
460 #endif
461         default:
462                 errno = EINVAL;
463                 return -1;
464         }
465
466         return 0;
467 }
468
469 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
470                                     const char *path,
471                                     struct tsocket_address **_addr,
472                                     const char *location)
473 {
474         struct sockaddr_un un;
475         void *p = &un;
476         int ret;
477
478         if (!path) {
479                 path = "";
480         }
481
482         if (strlen(path) > sizeof(un.sun_path)-1) {
483                 errno = ENAMETOOLONG;
484                 return -1;
485         }
486
487         ZERO_STRUCT(un);
488         un.sun_family = AF_UNIX;
489         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
490
491         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
492                                                  (struct sockaddr *)p,
493                                                  sizeof(un),
494                                                  _addr,
495                                                  location);
496
497         return ret;
498 }
499
500 char *tsocket_address_unix_path(const struct tsocket_address *addr,
501                                 TALLOC_CTX *mem_ctx)
502 {
503         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
504                                            struct tsocket_address_bsd);
505         const char *str;
506
507         if (!bsda) {
508                 errno = EINVAL;
509                 return NULL;
510         }
511
512         switch (bsda->u.sa.sa_family) {
513         case AF_UNIX:
514                 str = bsda->u.un.sun_path;
515                 break;
516         default:
517                 errno = EINVAL;
518                 return NULL;
519         }
520
521         return talloc_strdup(mem_ctx, str);
522 }
523
524 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
525                                         TALLOC_CTX *mem_ctx)
526 {
527         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
528                                            struct tsocket_address_bsd);
529         char *str;
530         char *addr_str;
531         const char *prefix = NULL;
532         uint16_t port;
533
534         switch (bsda->u.sa.sa_family) {
535         case AF_UNIX:
536                 return talloc_asprintf(mem_ctx, "unix:%s",
537                                        bsda->u.un.sun_path);
538         case AF_INET:
539                 prefix = "ipv4";
540                 break;
541 #ifdef HAVE_IPV6
542         case AF_INET6:
543                 prefix = "ipv6";
544                 break;
545 #endif
546         default:
547                 errno = EINVAL;
548                 return NULL;
549         }
550
551         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
552         if (!addr_str) {
553                 return NULL;
554         }
555
556         port = tsocket_address_inet_port(addr);
557
558         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
559                               prefix, addr_str, port);
560         talloc_free(addr_str);
561
562         return str;
563 }
564
565 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
566                                                          TALLOC_CTX *mem_ctx,
567                                                          const char *location)
568 {
569         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
570                                            struct tsocket_address_bsd);
571         struct tsocket_address *copy;
572         int ret;
573
574         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
575                                                  &bsda->u.sa,
576                                                  bsda->sa_socklen,
577                                                  &copy,
578                                                  location);
579         if (ret != 0) {
580                 return NULL;
581         }
582
583         return copy;
584 }
585
586 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
587         .name           = "bsd",
588         .string         = tsocket_address_bsd_string,
589         .copy           = tsocket_address_bsd_copy,
590 };
591
592 struct tdgram_bsd {
593         int fd;
594
595         void *event_ptr;
596         struct tevent_fd *fde;
597
598         void *readable_private;
599         void (*readable_handler)(void *private_data);
600         void *writeable_private;
601         void (*writeable_handler)(void *private_data);
602 };
603
604 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
605                                    struct tevent_fd *fde,
606                                    uint16_t flags,
607                                    void *private_data)
608 {
609         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
610                                   struct tdgram_bsd);
611
612         if (flags & TEVENT_FD_WRITE) {
613                 bsds->writeable_handler(bsds->writeable_private);
614                 return;
615         }
616         if (flags & TEVENT_FD_READ) {
617                 if (!bsds->readable_handler) {
618                         TEVENT_FD_NOT_READABLE(bsds->fde);
619                         return;
620                 }
621                 bsds->readable_handler(bsds->readable_private);
622                 return;
623         }
624 }
625
626 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
627                                            struct tevent_context *ev,
628                                            void (*handler)(void *private_data),
629                                            void *private_data)
630 {
631         if (ev == NULL) {
632                 if (handler) {
633                         errno = EINVAL;
634                         return -1;
635                 }
636                 if (!bsds->readable_handler) {
637                         return 0;
638                 }
639                 bsds->readable_handler = NULL;
640                 bsds->readable_private = NULL;
641
642                 return 0;
643         }
644
645         /* read and write must use the same tevent_context */
646         if (bsds->event_ptr != ev) {
647                 if (bsds->readable_handler || bsds->writeable_handler) {
648                         errno = EINVAL;
649                         return -1;
650                 }
651                 bsds->event_ptr = NULL;
652                 TALLOC_FREE(bsds->fde);
653         }
654
655         if (tevent_fd_get_flags(bsds->fde) == 0) {
656                 TALLOC_FREE(bsds->fde);
657
658                 bsds->fde = tevent_add_fd(ev, bsds,
659                                           bsds->fd, TEVENT_FD_READ,
660                                           tdgram_bsd_fde_handler,
661                                           bsds);
662                 if (!bsds->fde) {
663                         errno = ENOMEM;
664                         return -1;
665                 }
666
667                 /* cache the event context we're running on */
668                 bsds->event_ptr = ev;
669         } else if (!bsds->readable_handler) {
670                 TEVENT_FD_READABLE(bsds->fde);
671         }
672
673         bsds->readable_handler = handler;
674         bsds->readable_private = private_data;
675
676         return 0;
677 }
678
679 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
680                                             struct tevent_context *ev,
681                                             void (*handler)(void *private_data),
682                                             void *private_data)
683 {
684         if (ev == NULL) {
685                 if (handler) {
686                         errno = EINVAL;
687                         return -1;
688                 }
689                 if (!bsds->writeable_handler) {
690                         return 0;
691                 }
692                 bsds->writeable_handler = NULL;
693                 bsds->writeable_private = NULL;
694                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
695
696                 return 0;
697         }
698
699         /* read and write must use the same tevent_context */
700         if (bsds->event_ptr != ev) {
701                 if (bsds->readable_handler || bsds->writeable_handler) {
702                         errno = EINVAL;
703                         return -1;
704                 }
705                 bsds->event_ptr = NULL;
706                 TALLOC_FREE(bsds->fde);
707         }
708
709         if (tevent_fd_get_flags(bsds->fde) == 0) {
710                 TALLOC_FREE(bsds->fde);
711
712                 bsds->fde = tevent_add_fd(ev, bsds,
713                                           bsds->fd, TEVENT_FD_WRITE,
714                                           tdgram_bsd_fde_handler,
715                                           bsds);
716                 if (!bsds->fde) {
717                         errno = ENOMEM;
718                         return -1;
719                 }
720
721                 /* cache the event context we're running on */
722                 bsds->event_ptr = ev;
723         } else if (!bsds->writeable_handler) {
724                 TEVENT_FD_WRITEABLE(bsds->fde);
725         }
726
727         bsds->writeable_handler = handler;
728         bsds->writeable_private = private_data;
729
730         return 0;
731 }
732
733 struct tdgram_bsd_recvfrom_state {
734         struct tdgram_context *dgram;
735
736         uint8_t *buf;
737         size_t len;
738         struct tsocket_address *src;
739 };
740
741 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
742 {
743         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
744                                   struct tdgram_bsd);
745
746         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
747
748         return 0;
749 }
750
751 static void tdgram_bsd_recvfrom_handler(void *private_data);
752
753 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
754                                         struct tevent_context *ev,
755                                         struct tdgram_context *dgram)
756 {
757         struct tevent_req *req;
758         struct tdgram_bsd_recvfrom_state *state;
759         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
760         int ret;
761
762         req = tevent_req_create(mem_ctx, &state,
763                                 struct tdgram_bsd_recvfrom_state);
764         if (!req) {
765                 return NULL;
766         }
767
768         state->dgram    = dgram;
769         state->buf      = NULL;
770         state->len      = 0;
771         state->src      = NULL;
772
773         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
774
775         if (bsds->fd == -1) {
776                 tevent_req_error(req, ENOTCONN);
777                 goto post;
778         }
779
780         /*
781          * this is a fast path, not waiting for the
782          * socket to become explicit readable gains
783          * about 10%-20% performance in benchmark tests.
784          */
785         tdgram_bsd_recvfrom_handler(req);
786         if (!tevent_req_is_in_progress(req)) {
787                 goto post;
788         }
789
790         ret = tdgram_bsd_set_readable_handler(bsds, ev,
791                                               tdgram_bsd_recvfrom_handler,
792                                               req);
793         if (ret == -1) {
794                 tevent_req_error(req, errno);
795                 goto post;
796         }
797
798         return req;
799
800  post:
801         tevent_req_post(req, ev);
802         return req;
803 }
804
805 static void tdgram_bsd_recvfrom_handler(void *private_data)
806 {
807         struct tevent_req *req = talloc_get_type_abort(private_data,
808                                  struct tevent_req);
809         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
810                                         struct tdgram_bsd_recvfrom_state);
811         struct tdgram_context *dgram = state->dgram;
812         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
813         struct tsocket_address_bsd *bsda;
814         ssize_t ret;
815         int err;
816         bool retry;
817
818         ret = tsocket_bsd_pending(bsds->fd);
819         if (ret == 0) {
820                 /* retry later */
821                 return;
822         }
823         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
824         if (retry) {
825                 /* retry later */
826                 return;
827         }
828         if (tevent_req_error(req, err)) {
829                 return;
830         }
831
832         state->buf = talloc_array(state, uint8_t, ret);
833         if (tevent_req_nomem(state->buf, req)) {
834                 return;
835         }
836         state->len = ret;
837
838         state->src = tsocket_address_create(state,
839                                             &tsocket_address_bsd_ops,
840                                             &bsda,
841                                             struct tsocket_address_bsd,
842                                             __location__ "bsd_recvfrom");
843         if (tevent_req_nomem(state->src, req)) {
844                 return;
845         }
846
847         ZERO_STRUCTP(bsda);
848         bsda->sa_socklen = sizeof(bsda->u.ss);
849
850         ret = recvfrom(bsds->fd, state->buf, state->len, 0,
851                        &bsda->u.sa, &bsda->sa_socklen);
852         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
853         if (retry) {
854                 /* retry later */
855                 return;
856         }
857         if (tevent_req_error(req, err)) {
858                 return;
859         }
860
861         /*
862          * Some systems (FreeBSD, see bug #7115) return too much
863          * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
864          * the return value includes some IP/UDP header bytes,
865          * while recvfrom() just returns the payload.
866          */
867         state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
868         if (tevent_req_nomem(state->buf, req)) {
869                 return;
870         }
871         state->len = ret;
872
873         tevent_req_done(req);
874 }
875
876 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
877                                         int *perrno,
878                                         TALLOC_CTX *mem_ctx,
879                                         uint8_t **buf,
880                                         struct tsocket_address **src)
881 {
882         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
883                                         struct tdgram_bsd_recvfrom_state);
884         ssize_t ret;
885
886         ret = tsocket_simple_int_recv(req, perrno);
887         if (ret == 0) {
888                 *buf = talloc_move(mem_ctx, &state->buf);
889                 ret = state->len;
890                 if (src) {
891                         *src = talloc_move(mem_ctx, &state->src);
892                 }
893         }
894
895         tevent_req_received(req);
896         return ret;
897 }
898
899 struct tdgram_bsd_sendto_state {
900         struct tdgram_context *dgram;
901
902         const uint8_t *buf;
903         size_t len;
904         const struct tsocket_address *dst;
905
906         ssize_t ret;
907 };
908
909 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
910 {
911         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
912                                   struct tdgram_bsd);
913
914         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
915
916         return 0;
917 }
918
919 static void tdgram_bsd_sendto_handler(void *private_data);
920
921 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
922                                                  struct tevent_context *ev,
923                                                  struct tdgram_context *dgram,
924                                                  const uint8_t *buf,
925                                                  size_t len,
926                                                  const struct tsocket_address *dst)
927 {
928         struct tevent_req *req;
929         struct tdgram_bsd_sendto_state *state;
930         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
931         int ret;
932
933         req = tevent_req_create(mem_ctx, &state,
934                                 struct tdgram_bsd_sendto_state);
935         if (!req) {
936                 return NULL;
937         }
938
939         state->dgram    = dgram;
940         state->buf      = buf;
941         state->len      = len;
942         state->dst      = dst;
943         state->ret      = -1;
944
945         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
946
947         if (bsds->fd == -1) {
948                 tevent_req_error(req, ENOTCONN);
949                 goto post;
950         }
951
952         /*
953          * this is a fast path, not waiting for the
954          * socket to become explicit writeable gains
955          * about 10%-20% performance in benchmark tests.
956          */
957         tdgram_bsd_sendto_handler(req);
958         if (!tevent_req_is_in_progress(req)) {
959                 goto post;
960         }
961
962         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
963                                                tdgram_bsd_sendto_handler,
964                                                req);
965         if (ret == -1) {
966                 tevent_req_error(req, errno);
967                 goto post;
968         }
969
970         return req;
971
972  post:
973         tevent_req_post(req, ev);
974         return req;
975 }
976
977 static void tdgram_bsd_sendto_handler(void *private_data)
978 {
979         struct tevent_req *req = talloc_get_type_abort(private_data,
980                                  struct tevent_req);
981         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
982                                         struct tdgram_bsd_sendto_state);
983         struct tdgram_context *dgram = state->dgram;
984         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
985         struct sockaddr *sa = NULL;
986         socklen_t sa_socklen = 0;
987         ssize_t ret;
988         int err;
989         bool retry;
990
991         if (state->dst) {
992                 struct tsocket_address_bsd *bsda =
993                         talloc_get_type(state->dst->private_data,
994                         struct tsocket_address_bsd);
995
996                 sa = &bsda->u.sa;
997                 sa_socklen = bsda->sa_socklen;
998         }
999
1000         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1001         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1002         if (retry) {
1003                 /* retry later */
1004                 return;
1005         }
1006         if (tevent_req_error(req, err)) {
1007                 return;
1008         }
1009
1010         state->ret = ret;
1011
1012         tevent_req_done(req);
1013 }
1014
1015 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1016 {
1017         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1018                                         struct tdgram_bsd_sendto_state);
1019         ssize_t ret;
1020
1021         ret = tsocket_simple_int_recv(req, perrno);
1022         if (ret == 0) {
1023                 ret = state->ret;
1024         }
1025
1026         tevent_req_received(req);
1027         return ret;
1028 }
1029
1030 struct tdgram_bsd_disconnect_state {
1031         uint8_t __dummy;
1032 };
1033
1034 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1035                                                      struct tevent_context *ev,
1036                                                      struct tdgram_context *dgram)
1037 {
1038         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1039         struct tevent_req *req;
1040         struct tdgram_bsd_disconnect_state *state;
1041         int ret;
1042         int err;
1043         bool dummy;
1044
1045         req = tevent_req_create(mem_ctx, &state,
1046                                 struct tdgram_bsd_disconnect_state);
1047         if (req == NULL) {
1048                 return NULL;
1049         }
1050
1051         if (bsds->fd == -1) {
1052                 tevent_req_error(req, ENOTCONN);
1053                 goto post;
1054         }
1055
1056         ret = close(bsds->fd);
1057         bsds->fd = -1;
1058         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1059         if (tevent_req_error(req, err)) {
1060                 goto post;
1061         }
1062
1063         tevent_req_done(req);
1064 post:
1065         tevent_req_post(req, ev);
1066         return req;
1067 }
1068
1069 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1070                                       int *perrno)
1071 {
1072         int ret;
1073
1074         ret = tsocket_simple_int_recv(req, perrno);
1075
1076         tevent_req_received(req);
1077         return ret;
1078 }
1079
1080 static const struct tdgram_context_ops tdgram_bsd_ops = {
1081         .name                   = "bsd",
1082
1083         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1084         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1085
1086         .sendto_send            = tdgram_bsd_sendto_send,
1087         .sendto_recv            = tdgram_bsd_sendto_recv,
1088
1089         .disconnect_send        = tdgram_bsd_disconnect_send,
1090         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1091 };
1092
1093 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1094 {
1095         TALLOC_FREE(bsds->fde);
1096         if (bsds->fd != -1) {
1097                 close(bsds->fd);
1098                 bsds->fd = -1;
1099         }
1100         return 0;
1101 }
1102
1103 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1104                                    const struct tsocket_address *remote,
1105                                    bool broadcast,
1106                                    TALLOC_CTX *mem_ctx,
1107                                    struct tdgram_context **_dgram,
1108                                    const char *location)
1109 {
1110         struct tsocket_address_bsd *lbsda =
1111                 talloc_get_type_abort(local->private_data,
1112                 struct tsocket_address_bsd);
1113         struct tsocket_address_bsd *rbsda = NULL;
1114         struct tdgram_context *dgram;
1115         struct tdgram_bsd *bsds;
1116         int fd;
1117         int ret;
1118         bool do_bind = false;
1119         bool do_reuseaddr = false;
1120         bool do_ipv6only = false;
1121         bool is_inet = false;
1122         int sa_fam = lbsda->u.sa.sa_family;
1123
1124         if (remote) {
1125                 rbsda = talloc_get_type_abort(remote->private_data,
1126                         struct tsocket_address_bsd);
1127         }
1128
1129         switch (lbsda->u.sa.sa_family) {
1130         case AF_UNIX:
1131                 if (broadcast) {
1132                         errno = EINVAL;
1133                         return -1;
1134                 }
1135                 if (lbsda->u.un.sun_path[0] != 0) {
1136                         do_reuseaddr = true;
1137                         do_bind = true;
1138                 }
1139                 break;
1140         case AF_INET:
1141                 if (lbsda->u.in.sin_port != 0) {
1142                         do_reuseaddr = true;
1143                         do_bind = true;
1144                 }
1145                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1146                         do_bind = true;
1147                 }
1148                 is_inet = true;
1149                 break;
1150 #ifdef HAVE_IPV6
1151         case AF_INET6:
1152                 if (lbsda->u.in6.sin6_port != 0) {
1153                         do_reuseaddr = true;
1154                         do_bind = true;
1155                 }
1156                 if (memcmp(&in6addr_any,
1157                            &lbsda->u.in6.sin6_addr,
1158                            sizeof(in6addr_any)) != 0) {
1159                         do_bind = true;
1160                 }
1161                 is_inet = true;
1162                 do_ipv6only = true;
1163                 break;
1164 #endif
1165         default:
1166                 errno = EINVAL;
1167                 return -1;
1168         }
1169
1170         if (!do_bind && is_inet && rbsda) {
1171                 sa_fam = rbsda->u.sa.sa_family;
1172                 switch (sa_fam) {
1173                 case AF_INET:
1174                         do_ipv6only = false;
1175                         break;
1176 #ifdef HAVE_IPV6
1177                 case AF_INET6:
1178                         do_ipv6only = true;
1179                         break;
1180 #endif
1181                 }
1182         }
1183
1184         fd = socket(sa_fam, SOCK_DGRAM, 0);
1185         if (fd < 0) {
1186                 return fd;
1187         }
1188
1189         fd = tsocket_bsd_common_prepare_fd(fd, true);
1190         if (fd < 0) {
1191                 return fd;
1192         }
1193
1194         dgram = tdgram_context_create(mem_ctx,
1195                                       &tdgram_bsd_ops,
1196                                       &bsds,
1197                                       struct tdgram_bsd,
1198                                       location);
1199         if (!dgram) {
1200                 int saved_errno = errno;
1201                 close(fd);
1202                 errno = saved_errno;
1203                 return -1;
1204         }
1205         ZERO_STRUCTP(bsds);
1206         bsds->fd = fd;
1207         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1208
1209 #ifdef HAVE_IPV6
1210         if (do_ipv6only) {
1211                 int val = 1;
1212
1213                 ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
1214                                  (const void *)&val, sizeof(val));
1215                 if (ret == -1) {
1216                         int saved_errno = errno;
1217                         talloc_free(dgram);
1218                         errno = saved_errno;
1219                         return ret;
1220                 }
1221         }
1222 #endif
1223
1224         if (broadcast) {
1225                 int val = 1;
1226
1227                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1228                                  (const void *)&val, sizeof(val));
1229                 if (ret == -1) {
1230                         int saved_errno = errno;
1231                         talloc_free(dgram);
1232                         errno = saved_errno;
1233                         return ret;
1234                 }
1235         }
1236
1237         if (do_reuseaddr) {
1238                 int val = 1;
1239
1240                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1241                                  (const void *)&val, sizeof(val));
1242                 if (ret == -1) {
1243                         int saved_errno = errno;
1244                         talloc_free(dgram);
1245                         errno = saved_errno;
1246                         return ret;
1247                 }
1248         }
1249
1250         if (do_bind) {
1251                 ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
1252                 if (ret == -1) {
1253                         int saved_errno = errno;
1254                         talloc_free(dgram);
1255                         errno = saved_errno;
1256                         return ret;
1257                 }
1258         }
1259
1260         if (rbsda) {
1261                 if (rbsda->u.sa.sa_family != sa_fam) {
1262                         talloc_free(dgram);
1263                         errno = EINVAL;
1264                         return -1;
1265                 }
1266
1267                 ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
1268                 if (ret == -1) {
1269                         int saved_errno = errno;
1270                         talloc_free(dgram);
1271                         errno = saved_errno;
1272                         return ret;
1273                 }
1274         }
1275
1276         *_dgram = dgram;
1277         return 0;
1278 }
1279
1280 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1281                             const struct tsocket_address *remote,
1282                             TALLOC_CTX *mem_ctx,
1283                             struct tdgram_context **dgram,
1284                             const char *location)
1285 {
1286         struct tsocket_address_bsd *lbsda =
1287                 talloc_get_type_abort(local->private_data,
1288                 struct tsocket_address_bsd);
1289         int ret;
1290
1291         switch (lbsda->u.sa.sa_family) {
1292         case AF_INET:
1293                 break;
1294 #ifdef HAVE_IPV6
1295         case AF_INET6:
1296                 break;
1297 #endif
1298         default:
1299                 errno = EINVAL;
1300                 return -1;
1301         }
1302
1303         ret = tdgram_bsd_dgram_socket(local, remote, false,
1304                                       mem_ctx, dgram, location);
1305
1306         return ret;
1307 }
1308
1309 int _tdgram_unix_socket(const struct tsocket_address *local,
1310                         const struct tsocket_address *remote,
1311                         TALLOC_CTX *mem_ctx,
1312                         struct tdgram_context **dgram,
1313                         const char *location)
1314 {
1315         struct tsocket_address_bsd *lbsda =
1316                 talloc_get_type_abort(local->private_data,
1317                 struct tsocket_address_bsd);
1318         int ret;
1319
1320         switch (lbsda->u.sa.sa_family) {
1321         case AF_UNIX:
1322                 break;
1323         default:
1324                 errno = EINVAL;
1325                 return -1;
1326         }
1327
1328         ret = tdgram_bsd_dgram_socket(local, remote, false,
1329                                       mem_ctx, dgram, location);
1330
1331         return ret;
1332 }
1333
1334 struct tstream_bsd {
1335         int fd;
1336
1337         void *event_ptr;
1338         struct tevent_fd *fde;
1339
1340         void *readable_private;
1341         void (*readable_handler)(void *private_data);
1342         void *writeable_private;
1343         void (*writeable_handler)(void *private_data);
1344 };
1345
1346 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1347                                     struct tevent_fd *fde,
1348                                     uint16_t flags,
1349                                     void *private_data)
1350 {
1351         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1352                                    struct tstream_bsd);
1353
1354         if (flags & TEVENT_FD_WRITE) {
1355                 bsds->writeable_handler(bsds->writeable_private);
1356                 return;
1357         }
1358         if (flags & TEVENT_FD_READ) {
1359                 if (!bsds->readable_handler) {
1360                         if (bsds->writeable_handler) {
1361                                 bsds->writeable_handler(bsds->writeable_private);
1362                                 return;
1363                         }
1364                         TEVENT_FD_NOT_READABLE(bsds->fde);
1365                         return;
1366                 }
1367                 bsds->readable_handler(bsds->readable_private);
1368                 return;
1369         }
1370 }
1371
1372 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1373                                             struct tevent_context *ev,
1374                                             void (*handler)(void *private_data),
1375                                             void *private_data)
1376 {
1377         if (ev == NULL) {
1378                 if (handler) {
1379                         errno = EINVAL;
1380                         return -1;
1381                 }
1382                 if (!bsds->readable_handler) {
1383                         return 0;
1384                 }
1385                 bsds->readable_handler = NULL;
1386                 bsds->readable_private = NULL;
1387
1388                 return 0;
1389         }
1390
1391         /* read and write must use the same tevent_context */
1392         if (bsds->event_ptr != ev) {
1393                 if (bsds->readable_handler || bsds->writeable_handler) {
1394                         errno = EINVAL;
1395                         return -1;
1396                 }
1397                 bsds->event_ptr = NULL;
1398                 TALLOC_FREE(bsds->fde);
1399         }
1400
1401         if (tevent_fd_get_flags(bsds->fde) == 0) {
1402                 TALLOC_FREE(bsds->fde);
1403
1404                 bsds->fde = tevent_add_fd(ev, bsds,
1405                                           bsds->fd, TEVENT_FD_READ,
1406                                           tstream_bsd_fde_handler,
1407                                           bsds);
1408                 if (!bsds->fde) {
1409                         errno = ENOMEM;
1410                         return -1;
1411                 }
1412
1413                 /* cache the event context we're running on */
1414                 bsds->event_ptr = ev;
1415         } else if (!bsds->readable_handler) {
1416                 TEVENT_FD_READABLE(bsds->fde);
1417         }
1418
1419         bsds->readable_handler = handler;
1420         bsds->readable_private = private_data;
1421
1422         return 0;
1423 }
1424
1425 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1426                                              struct tevent_context *ev,
1427                                              void (*handler)(void *private_data),
1428                                              void *private_data)
1429 {
1430         if (ev == NULL) {
1431                 if (handler) {
1432                         errno = EINVAL;
1433                         return -1;
1434                 }
1435                 if (!bsds->writeable_handler) {
1436                         return 0;
1437                 }
1438                 bsds->writeable_handler = NULL;
1439                 bsds->writeable_private = NULL;
1440                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1441
1442                 return 0;
1443         }
1444
1445         /* read and write must use the same tevent_context */
1446         if (bsds->event_ptr != ev) {
1447                 if (bsds->readable_handler || bsds->writeable_handler) {
1448                         errno = EINVAL;
1449                         return -1;
1450                 }
1451                 bsds->event_ptr = NULL;
1452                 TALLOC_FREE(bsds->fde);
1453         }
1454
1455         if (tevent_fd_get_flags(bsds->fde) == 0) {
1456                 TALLOC_FREE(bsds->fde);
1457
1458                 bsds->fde = tevent_add_fd(ev, bsds,
1459                                           bsds->fd,
1460                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1461                                           tstream_bsd_fde_handler,
1462                                           bsds);
1463                 if (!bsds->fde) {
1464                         errno = ENOMEM;
1465                         return -1;
1466                 }
1467
1468                 /* cache the event context we're running on */
1469                 bsds->event_ptr = ev;
1470         } else if (!bsds->writeable_handler) {
1471                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1472                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1473                 tevent_fd_set_flags(bsds->fde, flags);
1474         }
1475
1476         bsds->writeable_handler = handler;
1477         bsds->writeable_private = private_data;
1478
1479         return 0;
1480 }
1481
1482 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1483 {
1484         struct tstream_bsd *bsds = tstream_context_data(stream,
1485                                    struct tstream_bsd);
1486         ssize_t ret;
1487
1488         if (bsds->fd == -1) {
1489                 errno = ENOTCONN;
1490                 return -1;
1491         }
1492
1493         ret = tsocket_bsd_pending(bsds->fd);
1494
1495         return ret;
1496 }
1497
1498 struct tstream_bsd_readv_state {
1499         struct tstream_context *stream;
1500
1501         struct iovec *vector;
1502         size_t count;
1503
1504         int ret;
1505 };
1506
1507 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1508 {
1509         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1510                                    struct tstream_bsd);
1511
1512         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1513
1514         return 0;
1515 }
1516
1517 static void tstream_bsd_readv_handler(void *private_data);
1518
1519 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1520                                         struct tevent_context *ev,
1521                                         struct tstream_context *stream,
1522                                         struct iovec *vector,
1523                                         size_t count)
1524 {
1525         struct tevent_req *req;
1526         struct tstream_bsd_readv_state *state;
1527         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1528         int ret;
1529
1530         req = tevent_req_create(mem_ctx, &state,
1531                                 struct tstream_bsd_readv_state);
1532         if (!req) {
1533                 return NULL;
1534         }
1535
1536         state->stream   = stream;
1537         /* we make a copy of the vector so that we can modify it */
1538         state->vector   = talloc_array(state, struct iovec, count);
1539         if (tevent_req_nomem(state->vector, req)) {
1540                 goto post;
1541         }
1542         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1543         state->count    = count;
1544         state->ret      = 0;
1545
1546         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1547
1548         if (bsds->fd == -1) {
1549                 tevent_req_error(req, ENOTCONN);
1550                 goto post;
1551         }
1552
1553         /*
1554          * this is a fast path, not waiting for the
1555          * socket to become explicit readable gains
1556          * about 10%-20% performance in benchmark tests.
1557          */
1558         tstream_bsd_readv_handler(req);
1559         if (!tevent_req_is_in_progress(req)) {
1560                 goto post;
1561         }
1562
1563         ret = tstream_bsd_set_readable_handler(bsds, ev,
1564                                               tstream_bsd_readv_handler,
1565                                               req);
1566         if (ret == -1) {
1567                 tevent_req_error(req, errno);
1568                 goto post;
1569         }
1570
1571         return req;
1572
1573  post:
1574         tevent_req_post(req, ev);
1575         return req;
1576 }
1577
1578 static void tstream_bsd_readv_handler(void *private_data)
1579 {
1580         struct tevent_req *req = talloc_get_type_abort(private_data,
1581                                  struct tevent_req);
1582         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1583                                         struct tstream_bsd_readv_state);
1584         struct tstream_context *stream = state->stream;
1585         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1586         int ret;
1587         int err;
1588         bool retry;
1589
1590         ret = readv(bsds->fd, state->vector, state->count);
1591         if (ret == 0) {
1592                 /* propagate end of file */
1593                 tevent_req_error(req, EPIPE);
1594                 return;
1595         }
1596         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1597         if (retry) {
1598                 /* retry later */
1599                 return;
1600         }
1601         if (tevent_req_error(req, err)) {
1602                 return;
1603         }
1604
1605         state->ret += ret;
1606
1607         while (ret > 0) {
1608                 if (ret < state->vector[0].iov_len) {
1609                         uint8_t *base;
1610                         base = (uint8_t *)state->vector[0].iov_base;
1611                         base += ret;
1612                         state->vector[0].iov_base = base;
1613                         state->vector[0].iov_len -= ret;
1614                         break;
1615                 }
1616                 ret -= state->vector[0].iov_len;
1617                 state->vector += 1;
1618                 state->count -= 1;
1619         }
1620
1621         /*
1622          * there're maybe some empty vectors at the end
1623          * which we need to skip, otherwise we would get
1624          * ret == 0 from the readv() call and return EPIPE
1625          */
1626         while (state->count > 0) {
1627                 if (state->vector[0].iov_len > 0) {
1628                         break;
1629                 }
1630                 state->vector += 1;
1631                 state->count -= 1;
1632         }
1633
1634         if (state->count > 0) {
1635                 /* we have more to read */
1636                 return;
1637         }
1638
1639         tevent_req_done(req);
1640 }
1641
1642 static int tstream_bsd_readv_recv(struct tevent_req *req,
1643                                   int *perrno)
1644 {
1645         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1646                                         struct tstream_bsd_readv_state);
1647         int ret;
1648
1649         ret = tsocket_simple_int_recv(req, perrno);
1650         if (ret == 0) {
1651                 ret = state->ret;
1652         }
1653
1654         tevent_req_received(req);
1655         return ret;
1656 }
1657
1658 struct tstream_bsd_writev_state {
1659         struct tstream_context *stream;
1660
1661         struct iovec *vector;
1662         size_t count;
1663
1664         int ret;
1665 };
1666
1667 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1668 {
1669         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1670                                   struct tstream_bsd);
1671
1672         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1673
1674         return 0;
1675 }
1676
1677 static void tstream_bsd_writev_handler(void *private_data);
1678
1679 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1680                                                  struct tevent_context *ev,
1681                                                  struct tstream_context *stream,
1682                                                  const struct iovec *vector,
1683                                                  size_t count)
1684 {
1685         struct tevent_req *req;
1686         struct tstream_bsd_writev_state *state;
1687         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1688         int ret;
1689
1690         req = tevent_req_create(mem_ctx, &state,
1691                                 struct tstream_bsd_writev_state);
1692         if (!req) {
1693                 return NULL;
1694         }
1695
1696         state->stream   = stream;
1697         /* we make a copy of the vector so that we can modify it */
1698         state->vector   = talloc_array(state, struct iovec, count);
1699         if (tevent_req_nomem(state->vector, req)) {
1700                 goto post;
1701         }
1702         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1703         state->count    = count;
1704         state->ret      = 0;
1705
1706         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1707
1708         if (bsds->fd == -1) {
1709                 tevent_req_error(req, ENOTCONN);
1710                 goto post;
1711         }
1712
1713         /*
1714          * this is a fast path, not waiting for the
1715          * socket to become explicit writeable gains
1716          * about 10%-20% performance in benchmark tests.
1717          */
1718         tstream_bsd_writev_handler(req);
1719         if (!tevent_req_is_in_progress(req)) {
1720                 goto post;
1721         }
1722
1723         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1724                                                tstream_bsd_writev_handler,
1725                                                req);
1726         if (ret == -1) {
1727                 tevent_req_error(req, errno);
1728                 goto post;
1729         }
1730
1731         return req;
1732
1733  post:
1734         tevent_req_post(req, ev);
1735         return req;
1736 }
1737
1738 static void tstream_bsd_writev_handler(void *private_data)
1739 {
1740         struct tevent_req *req = talloc_get_type_abort(private_data,
1741                                  struct tevent_req);
1742         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1743                                         struct tstream_bsd_writev_state);
1744         struct tstream_context *stream = state->stream;
1745         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1746         ssize_t ret;
1747         int err;
1748         bool retry;
1749
1750         ret = writev(bsds->fd, state->vector, state->count);
1751         if (ret == 0) {
1752                 /* propagate end of file */
1753                 tevent_req_error(req, EPIPE);
1754                 return;
1755         }
1756         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1757         if (retry) {
1758                 /* retry later */
1759                 return;
1760         }
1761         if (tevent_req_error(req, err)) {
1762                 return;
1763         }
1764
1765         state->ret += ret;
1766
1767         while (ret > 0) {
1768                 if (ret < state->vector[0].iov_len) {
1769                         uint8_t *base;
1770                         base = (uint8_t *)state->vector[0].iov_base;
1771                         base += ret;
1772                         state->vector[0].iov_base = base;
1773                         state->vector[0].iov_len -= ret;
1774                         break;
1775                 }
1776                 ret -= state->vector[0].iov_len;
1777                 state->vector += 1;
1778                 state->count -= 1;
1779         }
1780
1781         /*
1782          * there're maybe some empty vectors at the end
1783          * which we need to skip, otherwise we would get
1784          * ret == 0 from the writev() call and return EPIPE
1785          */
1786         while (state->count > 0) {
1787                 if (state->vector[0].iov_len > 0) {
1788                         break;
1789                 }
1790                 state->vector += 1;
1791                 state->count -= 1;
1792         }
1793
1794         if (state->count > 0) {
1795                 /* we have more to read */
1796                 return;
1797         }
1798
1799         tevent_req_done(req);
1800 }
1801
1802 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1803 {
1804         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1805                                         struct tstream_bsd_writev_state);
1806         int ret;
1807
1808         ret = tsocket_simple_int_recv(req, perrno);
1809         if (ret == 0) {
1810                 ret = state->ret;
1811         }
1812
1813         tevent_req_received(req);
1814         return ret;
1815 }
1816
1817 struct tstream_bsd_disconnect_state {
1818         void *__dummy;
1819 };
1820
1821 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1822                                                      struct tevent_context *ev,
1823                                                      struct tstream_context *stream)
1824 {
1825         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1826         struct tevent_req *req;
1827         struct tstream_bsd_disconnect_state *state;
1828         int ret;
1829         int err;
1830         bool dummy;
1831
1832         req = tevent_req_create(mem_ctx, &state,
1833                                 struct tstream_bsd_disconnect_state);
1834         if (req == NULL) {
1835                 return NULL;
1836         }
1837
1838         if (bsds->fd == -1) {
1839                 tevent_req_error(req, ENOTCONN);
1840                 goto post;
1841         }
1842
1843         ret = close(bsds->fd);
1844         bsds->fd = -1;
1845         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1846         if (tevent_req_error(req, err)) {
1847                 goto post;
1848         }
1849
1850         tevent_req_done(req);
1851 post:
1852         tevent_req_post(req, ev);
1853         return req;
1854 }
1855
1856 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1857                                       int *perrno)
1858 {
1859         int ret;
1860
1861         ret = tsocket_simple_int_recv(req, perrno);
1862
1863         tevent_req_received(req);
1864         return ret;
1865 }
1866
1867 static const struct tstream_context_ops tstream_bsd_ops = {
1868         .name                   = "bsd",
1869
1870         .pending_bytes          = tstream_bsd_pending_bytes,
1871
1872         .readv_send             = tstream_bsd_readv_send,
1873         .readv_recv             = tstream_bsd_readv_recv,
1874
1875         .writev_send            = tstream_bsd_writev_send,
1876         .writev_recv            = tstream_bsd_writev_recv,
1877
1878         .disconnect_send        = tstream_bsd_disconnect_send,
1879         .disconnect_recv        = tstream_bsd_disconnect_recv,
1880 };
1881
1882 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1883 {
1884         TALLOC_FREE(bsds->fde);
1885         if (bsds->fd != -1) {
1886                 close(bsds->fd);
1887                 bsds->fd = -1;
1888         }
1889         return 0;
1890 }
1891
1892 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1893                                  int fd,
1894                                  struct tstream_context **_stream,
1895                                  const char *location)
1896 {
1897         struct tstream_context *stream;
1898         struct tstream_bsd *bsds;
1899
1900         stream = tstream_context_create(mem_ctx,
1901                                         &tstream_bsd_ops,
1902                                         &bsds,
1903                                         struct tstream_bsd,
1904                                         location);
1905         if (!stream) {
1906                 return -1;
1907         }
1908         ZERO_STRUCTP(bsds);
1909         bsds->fd = fd;
1910         talloc_set_destructor(bsds, tstream_bsd_destructor);
1911
1912         *_stream = stream;
1913         return 0;
1914 }
1915
1916 struct tstream_bsd_connect_state {
1917         int fd;
1918         struct tevent_fd *fde;
1919         struct tstream_conext *stream;
1920 };
1921
1922 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1923 {
1924         TALLOC_FREE(state->fde);
1925         if (state->fd != -1) {
1926                 close(state->fd);
1927                 state->fd = -1;
1928         }
1929
1930         return 0;
1931 }
1932
1933 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1934                                             struct tevent_fd *fde,
1935                                             uint16_t flags,
1936                                             void *private_data);
1937
1938 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1939                                         struct tevent_context *ev,
1940                                         int sys_errno,
1941                                         const struct tsocket_address *local,
1942                                         const struct tsocket_address *remote)
1943 {
1944         struct tevent_req *req;
1945         struct tstream_bsd_connect_state *state;
1946         struct tsocket_address_bsd *lbsda =
1947                 talloc_get_type_abort(local->private_data,
1948                 struct tsocket_address_bsd);
1949         struct tsocket_address_bsd *rbsda =
1950                 talloc_get_type_abort(remote->private_data,
1951                 struct tsocket_address_bsd);
1952         int ret;
1953         int err;
1954         bool retry;
1955         bool do_bind = false;
1956         bool do_reuseaddr = false;
1957         bool do_ipv6only = false;
1958         bool is_inet = false;
1959         int sa_fam = lbsda->u.sa.sa_family;
1960
1961         req = tevent_req_create(mem_ctx, &state,
1962                                 struct tstream_bsd_connect_state);
1963         if (!req) {
1964                 return NULL;
1965         }
1966         state->fd = -1;
1967         state->fde = NULL;
1968
1969         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1970
1971         /* give the wrappers a chance to report an error */
1972         if (sys_errno != 0) {
1973                 tevent_req_error(req, sys_errno);
1974                 goto post;
1975         }
1976
1977         switch (lbsda->u.sa.sa_family) {
1978         case AF_UNIX:
1979                 if (lbsda->u.un.sun_path[0] != 0) {
1980                         do_reuseaddr = true;
1981                         do_bind = true;
1982                 }
1983                 break;
1984         case AF_INET:
1985                 if (lbsda->u.in.sin_port != 0) {
1986                         do_reuseaddr = true;
1987                         do_bind = true;
1988                 }
1989                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1990                         do_bind = true;
1991                 }
1992                 is_inet = true;
1993                 break;
1994 #ifdef HAVE_IPV6
1995         case AF_INET6:
1996                 if (lbsda->u.in6.sin6_port != 0) {
1997                         do_reuseaddr = true;
1998                         do_bind = true;
1999                 }
2000                 if (memcmp(&in6addr_any,
2001                            &lbsda->u.in6.sin6_addr,
2002                            sizeof(in6addr_any)) != 0) {
2003                         do_bind = true;
2004                 }
2005                 is_inet = true;
2006                 do_ipv6only = true;
2007                 break;
2008 #endif
2009         default:
2010                 tevent_req_error(req, EINVAL);
2011                 goto post;
2012         }
2013
2014         if (!do_bind && is_inet) {
2015                 sa_fam = rbsda->u.sa.sa_family;
2016                 switch (sa_fam) {
2017                 case AF_INET:
2018                         do_ipv6only = false;
2019                         break;
2020 #ifdef HAVE_IPV6
2021                 case AF_INET6:
2022                         do_ipv6only = true;
2023                         break;
2024 #endif
2025                 }
2026         }
2027
2028         state->fd = socket(sa_fam, SOCK_STREAM, 0);
2029         if (state->fd == -1) {
2030                 tevent_req_error(req, errno);
2031                 goto post;
2032         }
2033
2034         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2035         if (state->fd == -1) {
2036                 tevent_req_error(req, errno);
2037                 goto post;
2038         }
2039
2040 #ifdef HAVE_IPV6
2041         if (do_ipv6only) {
2042                 int val = 1;
2043
2044                 ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
2045                                  (const void *)&val, sizeof(val));
2046                 if (ret == -1) {
2047                         tevent_req_error(req, errno);
2048                         goto post;
2049                 }
2050         }
2051 #endif
2052
2053         if (do_reuseaddr) {
2054                 int val = 1;
2055
2056                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2057                                  (const void *)&val, sizeof(val));
2058                 if (ret == -1) {
2059                         tevent_req_error(req, errno);
2060                         goto post;
2061                 }
2062         }
2063
2064         if (do_bind) {
2065                 ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
2066                 if (ret == -1) {
2067                         tevent_req_error(req, errno);
2068                         goto post;
2069                 }
2070         }
2071
2072         if (rbsda->u.sa.sa_family != sa_fam) {
2073                 tevent_req_error(req, EINVAL);
2074                 goto post;
2075         }
2076
2077         ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
2078         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2079         if (retry) {
2080                 /* retry later */
2081                 goto async;
2082         }
2083         if (tevent_req_error(req, err)) {
2084                 goto post;
2085         }
2086
2087         tevent_req_done(req);
2088         goto post;
2089
2090  async:
2091         state->fde = tevent_add_fd(ev, state,
2092                                    state->fd,
2093                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2094                                    tstream_bsd_connect_fde_handler,
2095                                    req);
2096         if (tevent_req_nomem(state->fde, req)) {
2097                 goto post;
2098         }
2099
2100         return req;
2101
2102  post:
2103         tevent_req_post(req, ev);
2104         return req;
2105 }
2106
2107 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2108                                             struct tevent_fd *fde,
2109                                             uint16_t flags,
2110                                             void *private_data)
2111 {
2112         struct tevent_req *req = talloc_get_type_abort(private_data,
2113                                  struct tevent_req);
2114         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2115                                         struct tstream_bsd_connect_state);
2116         int ret;
2117         int error=0;
2118         socklen_t len = sizeof(error);
2119         int err;
2120         bool retry;
2121
2122         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2123         if (ret == 0) {
2124                 if (error != 0) {
2125                         errno = error;
2126                         ret = -1;
2127                 }
2128         }
2129         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2130         if (retry) {
2131                 /* retry later */
2132                 return;
2133         }
2134         if (tevent_req_error(req, err)) {
2135                 return;
2136         }
2137
2138         tevent_req_done(req);
2139 }
2140
2141 static int tstream_bsd_connect_recv(struct tevent_req *req,
2142                                     int *perrno,
2143                                     TALLOC_CTX *mem_ctx,
2144                                     struct tstream_context **stream,
2145                                     const char *location)
2146 {
2147         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2148                                         struct tstream_bsd_connect_state);
2149         int ret;
2150
2151         ret = tsocket_simple_int_recv(req, perrno);
2152         if (ret == 0) {
2153                 ret = _tstream_bsd_existing_socket(mem_ctx,
2154                                                    state->fd,
2155                                                    stream,
2156                                                    location);
2157                 if (ret == -1) {
2158                         *perrno = errno;
2159                         goto done;
2160                 }
2161                 TALLOC_FREE(state->fde);
2162                 state->fd = -1;
2163         }
2164
2165 done:
2166         tevent_req_received(req);
2167         return ret;
2168 }
2169
2170 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2171                                         struct tevent_context *ev,
2172                                         const struct tsocket_address *local,
2173                                         const struct tsocket_address *remote)
2174 {
2175         struct tsocket_address_bsd *lbsda =
2176                 talloc_get_type_abort(local->private_data,
2177                 struct tsocket_address_bsd);
2178         struct tevent_req *req;
2179         int sys_errno = 0;
2180
2181         switch (lbsda->u.sa.sa_family) {
2182         case AF_INET:
2183                 break;
2184 #ifdef HAVE_IPV6
2185         case AF_INET6:
2186                 break;
2187 #endif
2188         default:
2189                 sys_errno = EINVAL;
2190                 break;
2191         }
2192
2193         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2194
2195         return req;
2196 }
2197
2198 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2199                                    int *perrno,
2200                                    TALLOC_CTX *mem_ctx,
2201                                    struct tstream_context **stream,
2202                                    const char *location)
2203 {
2204         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2205 }
2206
2207 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2208                                         struct tevent_context *ev,
2209                                         const struct tsocket_address *local,
2210                                         const struct tsocket_address *remote)
2211 {
2212         struct tsocket_address_bsd *lbsda =
2213                 talloc_get_type_abort(local->private_data,
2214                 struct tsocket_address_bsd);
2215         struct tevent_req *req;
2216         int sys_errno = 0;
2217
2218         switch (lbsda->u.sa.sa_family) {
2219         case AF_UNIX:
2220                 break;
2221         default:
2222                 sys_errno = EINVAL;
2223                 break;
2224         }
2225
2226         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2227
2228         return req;
2229 }
2230
2231 int _tstream_unix_connect_recv(struct tevent_req *req,
2232                                       int *perrno,
2233                                       TALLOC_CTX *mem_ctx,
2234                                       struct tstream_context **stream,
2235                                       const char *location)
2236 {
2237         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2238 }
2239
2240 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2241                              struct tstream_context **_stream1,
2242                              TALLOC_CTX *mem_ctx2,
2243                              struct tstream_context **_stream2,
2244                              const char *location)
2245 {
2246         int ret;
2247         int fds[2];
2248         int fd1;
2249         int fd2;
2250         struct tstream_context *stream1 = NULL;
2251         struct tstream_context *stream2 = NULL;
2252
2253         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2254         if (ret == -1) {
2255                 return -1;
2256         }
2257         fd1 = fds[0];
2258         fd2 = fds[1];
2259
2260         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2261         if (fd1 == -1) {
2262                 int sys_errno = errno;
2263                 close(fd2);
2264                 errno = sys_errno;
2265                 return -1;
2266         }
2267
2268         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2269         if (fd2 == -1) {
2270                 int sys_errno = errno;
2271                 close(fd1);
2272                 errno = sys_errno;
2273                 return -1;
2274         }
2275
2276         ret = _tstream_bsd_existing_socket(mem_ctx1,
2277                                            fd1,
2278                                            &stream1,
2279                                            location);
2280         if (ret == -1) {
2281                 int sys_errno = errno;
2282                 close(fd1);
2283                 close(fd2);
2284                 errno = sys_errno;
2285                 return -1;
2286         }
2287
2288         ret = _tstream_bsd_existing_socket(mem_ctx2,
2289                                            fd2,
2290                                            &stream2,
2291                                            location);
2292         if (ret == -1) {
2293                 int sys_errno = errno;
2294                 talloc_free(stream1);
2295                 close(fd2);
2296                 errno = sys_errno;
2297                 return -1;
2298         }
2299
2300         *_stream1 = stream1;
2301         *_stream2 = stream2;
2302         return 0;
2303 }
2304