From 42a0a094b72577aee3fee7f186da52dc83db001d Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Wed, 2 Oct 2013 09:39:20 +0200 Subject: [PATCH] Add recvfile support with splice() on Linux o Global option "recvfile" controls whether splice() is used. Ddefaults to false. o Global option "splice size" limits the amount of data spliced. Defaults to 64k. --- NEWS | 2 + configure.ac | 1 + doc/manpages/man5/afp.conf.5.xml | 19 +++ etc/afpd/fork.c | 70 +++++---- include/atalk/adouble.h | 3 + include/atalk/globals.h | 2 + libatalk/adouble/Makefile.am | 1 + libatalk/adouble/ad_recvfile.c | 261 +++++++++++++++++++++++++++++++ libatalk/adouble/ad_sendfile.c | 26 --- libatalk/dsi/dsi_stream.c | 15 +- libatalk/dsi/dsi_write.c | 36 ++--- libatalk/util/netatalk_conf.c | 3 + macros/netatalk.m4 | 17 ++ man/man5/afp.conf.5.in | 10 ++ 14 files changed, 387 insertions(+), 79 deletions(-) create mode 100644 libatalk/adouble/ad_recvfile.c diff --git a/NEWS b/NEWS index 2188fcf1..504d2c08 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,8 @@ Changes in 3.1.0 * NEW: New option "dbus daemon" (G) * UPD: Add configure option --with-afpstats for overriding the result of autodetecting dbus-glib presence +* NEW: Add recvfile support with splice() on Linux. New global options + "recvfile" (default: no) and "splice size" (default 64k). Changes in 3.0.6 ================ diff --git a/configure.ac b/configure.ac index cbbc6771..12a0ce2f 100644 --- a/configure.ac +++ b/configure.ac @@ -180,6 +180,7 @@ AC_NETATALK_REALPATH dnl Check for sendfile() AC_NETATALK_SENDFILE +AC_NETATALK_RECVFILE dnl Check whether bundled libevent shall not be used AC_NETATALK_LIBEVENT diff --git a/doc/manpages/man5/afp.conf.5.xml b/doc/manpages/man5/afp.conf.5.xml index 150459cd..df6d2403 100644 --- a/doc/manpages/man5/afp.conf.5.xml +++ b/doc/manpages/man5/afp.conf.5.xml @@ -713,6 +713,24 @@ + + recvfile = BOOLEAN (default: + no) (G) + + + Whether to use splice() on Linux for receiving data. + + + + + splice size = number (default: + 64k) (G) + + + Maximum number of bytes spliced. + + + use sendfile = BOOLEAN (default: yes) (G) @@ -724,6 +742,7 @@ + zeroconf = BOOLEAN (default: yes) (G) diff --git a/etc/afpd/fork.c b/etc/afpd/fork.c index 350b5711..73abb1e9 100644 --- a/etc/afpd/fork.c +++ b/etc/afpd/fork.c @@ -1182,41 +1182,54 @@ static int write_fork(AFPObj *obj, char *ibuf, size_t ibuflen _U_, char *rbuf, s } /* find out what we have already */ - cc = dsi_writeinit(dsi, rcvbuf, rcvbuflen); - - if (!cc || (cc = write_file(ofork, eid, offset, rcvbuf, cc)) < 0) { - dsi_writeflush(dsi); - *rbuflen = 0; - if (obj->options.flags & OPTION_AFP_READ_LOCK) - ad_tmplock(ofork->of_ad, eid, ADLOCK_CLR, saveoff, reqcount, ofork->of_refnum); - return cc; + if ((cc = dsi_writeinit(dsi, rcvbuf, rcvbuflen)) > 0) { + ssize_t written; + if ((written = write_file(ofork, eid, offset, rcvbuf, cc)) != cc) { + dsi_writeflush(dsi); + *rbuflen = 0; + if (obj->options.flags & OPTION_AFP_READ_LOCK) + ad_tmplock(ofork->of_ad, eid, ADLOCK_CLR, saveoff, reqcount, ofork->of_refnum); + if (written > 0) + /* It's used for the read size and as error code in write_file(), ugh */ + written = AFPERR_MISC; + return written; + } } offset += cc; -#if 0 /*def HAVE_SENDFILE_WRITE*/ - if ((cc = ad_writefile(ofork->of_ad, eid, dsi->socket, offset, dsi->datasize)) < 0) { - switch (errno) { - case EDQUOT: - case EFBIG: - case ENOSPC: - cc = AFPERR_DFULL; - break; - default: - LOG(log_error, logtype_afpd, "afp_write: ad_writefile: %s", strerror(errno) ); - goto afp_write_loop; +#ifdef WITH_RECVFILE + if (obj->options.flags & OPTION_RECVFILE) { + LOG(log_maxdebug, logtype_afpd, "afp_write(fork: %" PRIu16 " [%s], off: %" PRIu64 ", size: %" PRIu32 ")", + ofork->of_refnum, (ofork->of_flags & AFPFORK_DATA) ? "data" : "reso", offset, dsi->datasize); + + if ((cc = ad_recvfile(ofork->of_ad, eid, dsi->socket, offset, dsi->datasize, obj->options.splice_size)) < dsi->datasize) { + switch (errno) { + case EDQUOT: + case EFBIG: + case ENOSPC: + cc = AFPERR_DFULL; + dsi_writeflush(dsi); + break; + case ENOSYS: + goto afp_write_loop; + default: + /* Low level error, can't do much to back up */ + cc = AFPERR_MISC; + LOG(log_error, logtype_afpd, "afp_write: ad_writefile: %s", strerror(errno)); + } + *rbuflen = 0; + if (obj->options.flags & OPTION_AFP_READ_LOCK) + ad_tmplock(ofork->of_ad, eid, ADLOCK_CLR, saveoff, reqcount, ofork->of_refnum); + return cc; } - dsi_writeflush(dsi); - *rbuflen = 0; - if (obj->options.flags & OPTION_AFP_READ_LOCK) - ad_tmplock(ofork->of_ad, eid, ADLOCK_CLR, saveoff, reqcount, ofork->of_refnum); - return cc; - } - offset += cc; - goto afp_write_done; -#endif /* 0, was HAVE_SENDFILE_WRITE */ + offset += cc; + goto afp_write_done; + } +#endif +afp_write_loop: /* loop until everything gets written. currently * dsi_write handles the end case by itself. */ while ((cc = dsi_write(dsi, rcvbuf, rcvbuflen))) { @@ -1235,6 +1248,7 @@ static int write_fork(AFPObj *obj, char *ibuf, size_t ibuflen _U_, char *rbuf, s offset += cc; } +afp_write_done: if (obj->options.flags & OPTION_AFP_READ_LOCK) ad_tmplock(ofork->of_ad, eid, ADLOCK_CLR, saveoff, reqcount, ofork->of_refnum); if ( ad_meta_fileno( ofork->of_ad ) != -1 ) /* META */ diff --git a/include/atalk/adouble.h b/include/atalk/adouble.h index 0a66ad26..972ce26f 100644 --- a/include/atalk/adouble.h +++ b/include/atalk/adouble.h @@ -450,5 +450,8 @@ extern uint32_t ad_forcegetid(struct adouble *adp); #ifdef WITH_SENDFILE extern int ad_readfile_init(const struct adouble *ad, int eid, off_t *off, int end); #endif +#ifdef WITH_RECVFILE +extern ssize_t ad_recvfile(struct adouble *ad, int eid, int sock, off_t off, size_t len, int); +#endif #endif /* _ATALK_ADOUBLE_H */ diff --git a/include/atalk/globals.h b/include/atalk/globals.h index b8762808..8944ec86 100644 --- a/include/atalk/globals.h +++ b/include/atalk/globals.h @@ -59,6 +59,7 @@ #define OPTION_DBUS_AFPSTATS (1 << 12) /* whether to run dbus thread for afpstats */ #define OPTION_SPOTLIGHT (1 << 13) /* whether to initialize Spotlight support */ #define OPTION_SPOTLIGHT_VOL (1 << 14) /* whether spotlight shall be enabled by default for volumes */ +#define OPTION_RECVFILE (1 << 15) #define PASSWD_NONE 0 #define PASSWD_SET (1 << 0) @@ -123,6 +124,7 @@ struct afp_options { char *adminauthuser; char *ignored_attr; char *slmod_path; + int splice_size; struct afp_volume_name volfile; }; diff --git a/libatalk/adouble/Makefile.am b/libatalk/adouble/Makefile.am index 49144a2a..1ae23665 100644 --- a/libatalk/adouble/Makefile.am +++ b/libatalk/adouble/Makefile.am @@ -11,6 +11,7 @@ libadouble_la_SOURCES = \ ad_mmap.c \ ad_open.c \ ad_read.c \ + ad_recvfile.c \ ad_sendfile.c \ ad_size.c \ ad_write.c diff --git a/libatalk/adouble/ad_recvfile.c b/libatalk/adouble/ad_recvfile.c new file mode 100644 index 00000000..1703ee83 --- /dev/null +++ b/libatalk/adouble/ad_recvfile.c @@ -0,0 +1,261 @@ +/* + * Copyright (C) Jeremy Allison 2007 + * Copyright (c) 2013 Ralph Boehme + * All rights reserved. See COPYRIGHT. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif /* HAVE_CONFIG_H */ + +#ifdef WITH_RECVFILE + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int ad_recvfile_init(const struct adouble *ad, int eid, off_t *off) +{ + int fd; + + if (eid == ADEID_DFORK) { + fd = ad_data_fileno(ad); + } else { + *off += ad_getentryoff(ad, eid); + fd = ad_reso_fileno(ad); + } + + return fd; +} + +/* + * If tofd is -1, drain the incoming socket of count bytes without writing to the outgoing fd, + * if a write fails we do the same. + * + * Returns -1 on short reads from fromfd (read error) and sets errno. + * + * Returns number of bytes written to 'tofd' or thrown away if 'tofd == -1'. + * return != count then sets errno. + * Returns count if complete success. + */ + +#define TRANSFER_BUF_SIZE (128*1024) + +static ssize_t default_sys_recvfile(int fromfd, + int tofd, + off_t offset, + size_t count) +{ + int saved_errno = 0; + size_t total = 0; + size_t bufsize = MIN(TRANSFER_BUF_SIZE, count); + size_t total_written = 0; + char *buffer = NULL; + + if (count == 0) { + return 0; + } + + LOG(log_maxdebug, logtype_dsi, "default_recvfile: from = %d, to = %d, offset = %.0f, count = %lu\n", + fromfd, tofd, (double)offset, (unsigned long)count); + + if ((buffer = malloc(bufsize)) == NULL) + return -1; + + while (total < count) { + size_t num_written = 0; + ssize_t read_ret; + size_t toread = MIN(bufsize,count - total); + + /* Read from socket - ignore EINTR. */ + read_ret = read(fromfd, buffer, toread); + if (read_ret <= 0) { + /* EOF or socket error. */ + free(buffer); + return -1; + } + + num_written = 0; + + while (num_written < read_ret) { + ssize_t write_ret; + + if (tofd == -1) { + write_ret = read_ret; + } else { + /* Write to file - ignore EINTR. */ + write_ret = pwrite(tofd, buffer + num_written, read_ret - num_written, offset); + if (write_ret <= 0) { + /* write error - stop writing. */ + tofd = -1; + saved_errno = errno; + continue; + } + } + num_written += (size_t)write_ret; + total_written += (size_t)write_ret; + } + total += read_ret; + } + + free(buffer); + if (saved_errno) { + /* Return the correct write error. */ + errno = saved_errno; + } + return (ssize_t)total_written; +} + +#ifdef HAVE_SPLICE +static int waitfordata(int socket) +{ + fd_set readfds; + int maxfd = socket + 1; + int ret; + + FD_ZERO(&readfds); + + while (1) { + FD_ZERO(&readfds); + FD_SET(socket, &readfds); + if ((ret = select(maxfd, &readfds, NULL, NULL, NULL)) <= 0) { + if (ret == -1 && errno == EINTR) + continue; + LOG(log_error, logtype_dsi, "waitfordata: unexpected select return: %d %s", + ret, ret < 0 ? strerror(errno) : ""); + return -1; + } + if (FD_ISSET(socket, &readfds)) + return 0; + return -1; + } + +} + +/* + * Try and use the Linux system call to do this. + * Remember we only return -1 if the socket read + * failed. Else we return the number of bytes + * actually written. We always read count bytes + * from the network in the case of return != -1. + */ +static ssize_t sys_recvfile(int fromfd, int tofd, off_t offset, size_t count, int splice_size) +{ + static int pipefd[2] = { -1, -1 }; + static bool try_splice_call = true; + size_t total_written = 0; + loff_t splice_offset = offset; + + LOG(log_debug, logtype_dsi, "sys_recvfile: from = %d, to = %d, offset = %.0f, count = %lu", + fromfd, tofd, (double)offset, (unsigned long)count); + + if (count == 0) + return 0; + + /* + * Older Linux kernels have splice for sendfile, + * but it fails for recvfile. Ensure we only try + * this once and always fall back to the userspace + * implementation if recvfile splice fails. JRA. + */ + + if (!try_splice_call) { + errno = ENOSYS; + return -1; + } + + if ((pipefd[0] == -1) && (pipe(pipefd) == -1)) { + try_splice_call = false; + errno = ENOSYS; + return -1; + } + + while (count > 0) { + int nread, to_write; + + nread = splice(fromfd, NULL, pipefd[1], NULL, MIN(count, splice_size), SPLICE_F_MOVE | SPLICE_F_NONBLOCK); + + if (nread == -1) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) { + if (waitfordata(fromfd) != -1) + continue; + return -1; + } + if (total_written == 0 && (errno == EBADF || errno == EINVAL)) { + LOG(log_warning, logtype_dsi, "splice() doesn't work for recvfile"); + try_splice_call = false; + errno = ENOSYS; + return -1; + } + break; + } + + to_write = nread; + while (to_write > 0) { + int thistime; + thistime = splice(pipefd[0], NULL, tofd, &splice_offset, to_write, SPLICE_F_MOVE); + if (thistime == -1) + return -1; + to_write -= thistime; + } + + total_written += nread; + count -= nread; + } + +done: + LOG(log_maxdebug, logtype_dsi, "sys_recvfile: total_written: %zu", total_written); + + return total_written; +} +#else + +/***************************************************************** + No recvfile system call - use the default 128 chunk implementation. +*****************************************************************/ + +ssize_t sys_recvfile(int fromfd, int tofd, off_t offset, size_t count) +{ + return default_sys_recvfile(fromfd, tofd, offset, count); +} +#endif + +/* read from a socket and write to an adouble file */ +ssize_t ad_recvfile(struct adouble *ad, int eid, int sock, off_t off, size_t len, int splice_size) +{ + ssize_t cc; + int fd; + off_t off_fork = off; + + fd = ad_recvfile_init(ad, eid, &off_fork); + if ((cc = sys_recvfile(sock, fd, off_fork, len, splice_size)) != len) + return -1; + + if ((eid != ADEID_DFORK) && (off > ad_getentrylen(ad, eid))) + ad_setentrylen(ad, eid, off); + + return cc; +} +#endif diff --git a/libatalk/adouble/ad_sendfile.c b/libatalk/adouble/ad_sendfile.c index 3cb28e78..2730644b 100644 --- a/libatalk/adouble/ad_sendfile.c +++ b/libatalk/adouble/ad_sendfile.c @@ -103,30 +103,4 @@ int ad_readfile_init(const struct adouble *ad, return fd; } - - -/* ------------------------ */ -#if 0 -#ifdef HAVE_SENDFILE_WRITE -/* read from a socket and write to an adouble file */ -ssize_t ad_writefile(struct adouble *ad, const int eid, - const int sock, off_t off, const int end, - const size_t len) -{ -#ifdef __linux__ - ssize_t cc; - int fd; - - fd = ad_sendfile_init(ad, eid, &off, end); - if ((cc = sys_sendfile(fd, sock, &off, len)) < 0) - return -1; - - if ((eid != ADEID_DFORK) && (off > ad_getentrylen(ad, eid))) - ad_setentrylen(ad, eid, off); - - return cc; -#endif /* __linux__ */ -} -#endif /* HAVE_SENDFILE_WRITE */ -#endif /* 0 */ #endif diff --git a/libatalk/dsi/dsi_stream.c b/libatalk/dsi/dsi_stream.c index 770258c5..c8f859ce 100644 --- a/libatalk/dsi/dsi_stream.c +++ b/libatalk/dsi/dsi_stream.c @@ -1,7 +1,7 @@ /* * Copyright (c) 1998 Adrian Sun (asun@zoology.washington.edu) * Copyright (c) 2010,2011,2012 Frank Lahm - * All rights reserved. See COPYRIGHT. +> * All rights reserved. See COPYRIGHT. * * this file provides the following functions: * dsi_stream_write: just write a bunch of bytes. @@ -615,14 +615,23 @@ int dsi_stream_receive(DSI *dsi) return 0; memcpy(&dsi->header.dsi_requestID, block + 2, sizeof(dsi->header.dsi_requestID)); - memcpy(&dsi->header.dsi_data.dsi_code, block + 4, sizeof(dsi->header.dsi_data.dsi_code)); + memcpy(&dsi->header.dsi_data.dsi_doff, block + 4, sizeof(dsi->header.dsi_data.dsi_doff)); + dsi->header.dsi_data.dsi_doff = htonl(dsi->header.dsi_data.dsi_doff); memcpy(&dsi->header.dsi_len, block + 8, sizeof(dsi->header.dsi_len)); + memcpy(&dsi->header.dsi_reserved, block + 12, sizeof(dsi->header.dsi_reserved)); dsi->clientID = ntohs(dsi->header.dsi_requestID); /* make sure we don't over-write our buffers. */ dsi->cmdlen = MIN(ntohl(dsi->header.dsi_len), dsi->server_quantum); - if (dsi_stream_read(dsi, dsi->commands, dsi->cmdlen) != dsi->cmdlen) + + /* Receiving DSIWrite data is done in AFP function, not here */ + if (dsi->header.dsi_data.dsi_doff) { + LOG(log_maxdebug, logtype_dsi, "dsi_stream_receive: write request"); + dsi->cmdlen = dsi->header.dsi_data.dsi_doff; + } + + if (dsi_stream_read(dsi, dsi->commands, dsi->cmdlen) != dsi->cmdlen) return 0; LOG(log_debug, logtype_dsi, "dsi_stream_receive: DSI cmdlen: %zd", dsi->cmdlen); diff --git a/libatalk/dsi/dsi_write.c b/libatalk/dsi/dsi_write.c index dc35cafa..fd8fe2c5 100644 --- a/libatalk/dsi/dsi_write.c +++ b/libatalk/dsi/dsi_write.c @@ -23,35 +23,27 @@ #include #include -/* initialize relevant things for dsi_write. this returns the amount - * of data in the data buffer. the interface has been reworked to allow - * for arbitrary buffers. */ size_t dsi_writeinit(DSI *dsi, void *buf, const size_t buflen _U_) { - size_t len, header; + size_t bytes = 0; + dsi->datasize = ntohl(dsi->header.dsi_len) - dsi->header.dsi_data.dsi_doff; - /* figure out how much data we have. do a couple checks for 0 - * data */ - header = ntohl(dsi->header.dsi_data.dsi_doff); - dsi->datasize = header ? ntohl(dsi->header.dsi_len) - header : 0; + if (dsi->eof > dsi->start) { + /* We have data in the buffer */ + bytes = MIN(dsi->eof - dsi->start, dsi->datasize); + memmove(buf, dsi->start, bytes); + dsi->start += bytes; + dsi->datasize -= bytes; + if (dsi->start >= dsi->eof) + dsi->start = dsi->eof = dsi->buffer; + } - if (dsi->datasize > 0) { - len = MIN(dsi->server_quantum - header, dsi->datasize); + LOG(log_maxdebug, logtype_dsi, "dsi_writeinit: remaining DSI datasize: %jd", (intmax_t)dsi->datasize); - /* write last part of command buffer into buf */ - memmove(buf, dsi->commands + header, len); - - /* recalculate remaining data */ - dsi->datasize -= len; - } else - len = 0; - - LOG(log_maxdebug, logtype_dsi, "dsi_writeinit: len: %ju, remaining DSI datasize: %jd", - (intmax_t)len, (intmax_t)dsi->datasize); - - return len; + return bytes; } + /* fill up buf and then return. this should be called repeatedly * until all the data has been read. i block alarm processing * during the transfer to avoid sending unnecessary tickles. */ diff --git a/libatalk/util/netatalk_conf.c b/libatalk/util/netatalk_conf.c index 5ad32569..d1b46874 100644 --- a/libatalk/util/netatalk_conf.c +++ b/libatalk/util/netatalk_conf.c @@ -1764,6 +1764,8 @@ int afp_config_parse(AFPObj *AFPObj, char *processname) options->flags |= OPTION_SERVERNOTIF; if (!atalk_iniparser_getboolean(config, INISEC_GLOBAL, "use sendfile", 1)) options->flags |= OPTION_NOSENDFILE; + if (atalk_iniparser_getboolean(config, INISEC_GLOBAL, "recvfile", 0)) + options->flags |= OPTION_RECVFILE; if (atalk_iniparser_getboolean(config, INISEC_GLOBAL, "solaris share reservations", 1)) options->flags |= OPTION_SHARE_RESERV; if (atalk_iniparser_getboolean(config, INISEC_GLOBAL, "afpstats", 0)) @@ -1811,6 +1813,7 @@ int afp_config_parse(AFPObj *AFPObj, char *processname) options->fce_fmodwait = atalk_iniparser_getint (config, INISEC_GLOBAL, "fce holdfmod", 60); options->sleep = atalk_iniparser_getint (config, INISEC_GLOBAL, "sleep time", 10); options->disconnected = atalk_iniparser_getint (config, INISEC_GLOBAL, "disconnect time",24); + options->splice_size = atalk_iniparser_getint (config, INISEC_GLOBAL, "splice size", 64*1024); p = atalk_iniparser_getstring(config, INISEC_GLOBAL, "map acls", "rights"); if (STRCMP(p, ==, "rights")) diff --git a/macros/netatalk.m4 b/macros/netatalk.m4 index 86c4c711..060e4679 100644 --- a/macros/netatalk.m4 +++ b/macros/netatalk.m4 @@ -1164,6 +1164,23 @@ if test x"$netatalk_cv_search_sendfile" = x"yes"; then fi ]) +dnl ------ Check for recvfile() -------- +AC_DEFUN([AC_NETATALK_RECVFILE], [ +case "$host_os" in +*linux*) + AC_CHECK_FUNCS([splice], [atalk_cv_use_recvfile=yes]) + ;; + +*) + ;; + +esac + +if test x"$atalk_cv_use_recvfile" = x"yes"; then + AC_DEFINE(WITH_RECVFILE, 1, [Whether recvfile should be used]) +fi +]) + dnl --------------------- Check if realpath() takes NULL AC_DEFUN([AC_NETATALK_REALPATH], [ AC_CACHE_CHECK([if the realpath function allows a NULL argument], diff --git a/man/man5/afp.conf.5.in b/man/man5/afp.conf.5.in index 74d7a609..bf5434eb 100644 --- a/man/man5/afp.conf.5.in +++ b/man/man5/afp.conf.5.in @@ -492,6 +492,16 @@ tcpsndbuf = \fInumber\fR \fB(G)\fR Try to set TCP send buffer using setsockpt()\&. Often OSes impose restrictions on the applications ability to set this value\&. .RE .PP +recvfile = \fIBOOLEAN\fR (default: \fIno\fR) \fB(G)\fR +.RS 4 +Whether to use splice() on Linux for receiving data\&. +.RE +.PP +splice size = \fInumber\fR (default: \fI64k\fR) \fB(G)\fR +.RS 4 +Maximum number of bytes spliced\&. +.RE +.PP use sendfile = \fIBOOLEAN\fR (default: \fIyes\fR) \fB(G)\fR .RS 4 Whether to use sendfile -- 2.39.2