From 9d43f7980c0cc89831724bc1de724cde0ab55630 Mon Sep 17 00:00:00 2001 From: "Costa Tsaousis (ktsaou)" Date: Fri, 10 Jun 2016 23:16:35 +0300 Subject: [PATCH] added more error exit tracing info to trace issue 529; switched to accept4() and added SO_NONBLOCK to all sockets --- configure.ac | 1 + src/daemon.c | 5 ++++- src/log.h | 1 + src/main.c | 10 +++++----- src/popen.c | 39 +++++++++++++++++++++------------------ src/rrd.c | 8 ++------ src/web_client.c | 10 +++++----- src/web_server.c | 33 +++++++++++++++++++++++++++++++++ src/web_server.h | 13 +++++++++++++ 9 files changed, 85 insertions(+), 35 deletions(-) diff --git a/configure.ac b/configure.ac index 240725c1..d2ee0e64 100644 --- a/configure.ac +++ b/configure.ac @@ -34,6 +34,7 @@ AC_PROG_CC AC_PROG_INSTALL PKG_PROG_PKG_CONFIG AC_USE_SYSTEM_EXTENSIONS +AC_CHECK_FUNCS_ONCE(accept4) AC_ARG_ENABLE( [plugin-nfacct], diff --git a/src/daemon.c b/src/daemon.c index 4b7af3c2..82b76329 100644 --- a/src/daemon.c +++ b/src/daemon.c @@ -29,8 +29,11 @@ char pidfile[FILENAME_MAX + 1] = ""; void sig_handler(int signo) { - if(signo) + if(signo) { + error_log_limit_unlimited(); + error("Received signal %d. Exiting...", signo); netdata_exit = 1; + } } static void properly_chown_netdata_generated_file(int fd, uid_t uid, gid_t gid) { diff --git a/src/log.h b/src/log.h index 3f811d9f..4d441e8f 100644 --- a/src/log.h +++ b/src/log.h @@ -50,6 +50,7 @@ extern unsigned long error_log_errors_per_period; extern int error_log_limit(int reset); #define error_log_limit_reset() do { error_log_limit(1); } while(0) +#define error_log_limit_unlimited() do { error_log_throttle_period = 0; } while(0) #define debug(type, args...) do { if(unlikely(!silent && (debug_flags & type))) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args) diff --git a/src/main.c b/src/main.c index 312dab6e..f784592c 100644 --- a/src/main.c +++ b/src/main.c @@ -40,15 +40,15 @@ extern void *cgroups_main(void *ptr); volatile sig_atomic_t netdata_exit = 0; -void netdata_cleanup_and_exit(int ret) -{ +void netdata_cleanup_and_exit(int ret) { netdata_exit = 1; + + error_log_limit_unlimited(); + + info("Called: netdata_cleanup_and_exit()"); rrdset_save_all(); // kill_childs(); - // let it log a few more error messages - error_log_limit_reset(); - if(pidfile[0]) { if(unlink(pidfile) != 0) error("Cannot unlink pidfile '%s'.", pidfile); diff --git a/src/popen.c b/src/popen.c index 06f27c0b..1c311d6a 100644 --- a/src/popen.c +++ b/src/popen.c @@ -93,7 +93,9 @@ FILE *mypopen(const char *command, pid_t *pidptr) // fork again to become session leader pid = fork(); - if(pid == -1) fprintf(stderr, "Cannot fork again on pid %d\n", getpid()); + if(pid == -1) + error("pre-execution of command '%s' on pid %d: Cannot fork 2nd time.", command, getpid()); + if(pid != 0) { // the parent exit(0); @@ -101,13 +103,13 @@ FILE *mypopen(const char *command, pid_t *pidptr) // set a new process group id for just this child if( setpgid(0, 0) != 0 ) - error("Cannot set a new process group for pid %d (%s)", getpid(), strerror(errno)); + error("pre-execution of command '%s' on pid %d: Cannot set a new process group.", command, getpid()); if( getpgid(0) != getpid() ) - error("Process group set is incorrect. Expected %d, found %d", getpid(), getpgid(0)); + error("pre-execution of command '%s' on pid %d: Cannot set a new process group. Process group set is incorrect. Expected %d, found %d", command, getpid(), getpid(), getpgid(0)); if( setsid() != 0 ) - error("Cannot set session id for pid %d (%s)", getpid(), strerror(errno)); + error("pre-execution of command '%s' on pid %d: Cannot set session id.", command, getpid()); fprintf(stdout, "MYPID %d\n", getpid()); fflush(NULL); @@ -118,18 +120,17 @@ FILE *mypopen(const char *command, pid_t *pidptr) sigset_t sigset; sigfillset(&sigset); - if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) { - error("Could not block signals for threads"); - } + if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) + error("pre-execution of command '%s' on pid %d: could not unblock signals for threads.", command, getpid()); + // We only need to reset ignored signals. // Signals with signal handlers are reset by default. struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_handler = SIG_DFL; sa.sa_flags = 0; - if(sigaction(SIGPIPE, &sa, NULL) == -1) { - error("Failed to change signal handler for SIGTERM"); - } + if(sigaction(SIGPIPE, &sa, NULL) == -1) + error("pre-execution of command '%s' on pid %d: failed to set default signal handler for SIGPIPE.", command, getpid()); } @@ -148,41 +149,43 @@ int mypclose(FILE *fp, pid_t pid) { if(waitid(P_PID, (id_t) pid, &info, WEXITED) != -1) { switch(info.si_code) { case CLD_EXITED: - error("pid %d exited with code %d.", info.si_pid, info.si_status); + error("child pid %d exited with code %d.", info.si_pid, info.si_status); return(info.si_status); break; case CLD_KILLED: - error("pid %d killed by signal %d.", info.si_pid, info.si_status); + error("child pid %d killed by signal %d.", info.si_pid, info.si_status); return(-1); break; case CLD_DUMPED: - error("pid %d core dumped by signal %d.", info.si_pid, info.si_status); + error("child pid %d core dumped by signal %d.", info.si_pid, info.si_status); return(-2); break; case CLD_STOPPED: - error("pid %d stopped by signal %d.", info.si_pid, info.si_status); + error("child pid %d stopped by signal %d.", info.si_pid, info.si_status); return(0); break; case CLD_TRAPPED: - error("pid %d trapped by signal %d.", info.si_pid, info.si_status); + error("child pid %d trapped by signal %d.", info.si_pid, info.si_status); return(-4); break; case CLD_CONTINUED: - error("pid %d continued by signal %d.", info.si_pid, info.si_status); + error("child pid %d continued by signal %d.", info.si_pid, info.si_status); return(0); break; default: - error("pid %d gave us a SIGCHLD with code %d and status %d.", info.si_pid, info.si_code, info.si_status); + error("child pid %d gave us a SIGCHLD with code %d and status %d.", info.si_pid, info.si_code, info.si_status); return(-5); break; } } - else error("Cannot waitid() for pid %d", pid); + else + error("Cannot waitid() for pid %d", pid); + return 0; } diff --git a/src/rrd.c b/src/rrd.c index 0d1bed54..2a134fc2 100644 --- a/src/rrd.c +++ b/src/rrd.c @@ -661,12 +661,8 @@ void rrdset_free_all(void) info("Memory cleanup completed..."); } -void rrdset_save_all(void) -{ - debug(D_RRD_CALLS, "rrdset_save_all()"); - - // let it log a few error messages - error_log_limit_reset(); +void rrdset_save_all(void) { + info("Saving database..."); RRDSET *st; RRDDIM *rd; diff --git a/src/web_client.c b/src/web_client.c index 30fdf385..4fe40700 100644 --- a/src/web_client.c +++ b/src/web_client.c @@ -22,6 +22,7 @@ #include "common.h" #include "log.h" +#include "main.h" #include "appconfig.h" #include "url.h" #include "web_buffer.h" @@ -44,8 +45,6 @@ int web_donotrack_comply = 0; int web_enable_gzip = 1, web_gzip_level = 3, web_gzip_strategy = Z_DEFAULT_STRATEGY; #endif /* NETDATA_WITH_ZLIB */ -extern int netdata_exit; - struct web_client *web_clients = NULL; unsigned long long web_clients_count = 0; @@ -99,7 +98,7 @@ struct web_client *web_client_create(int listener) sadr = (struct sockaddr*) &w->clientaddr; addrlen = sizeof(w->clientaddr); - w->ifd = accept(listener, sadr, &addrlen); + w->ifd = accept4(listener, sadr, &addrlen, SOCK_NONBLOCK); if (w->ifd == -1) { error("%llu: Cannot accept new incoming connection.", w->id); free(w); @@ -141,8 +140,6 @@ struct web_client *web_client_create(int listener) flag = 1; if(setsockopt(w->ifd, SOL_SOCKET, SO_KEEPALIVE, (char *) &flag, sizeof(int)) != 0) error("%llu: Cannot set SO_KEEPALIVE on socket.", w->id); - - } w->response.data = buffer_create(INITIAL_WEB_DATA_LENGTH); @@ -446,6 +443,8 @@ int mysendfile(struct web_client *w, char *filename) return 404; } } + if(fcntl(w->ifd, F_SETFL, O_NONBLOCK) < 0) + error("%llu: Cannot set O_NONBLOCK on file '%s'.", w->id, webfilename); // pick a Content-Type for the file if(strstr(filename, ".html") != NULL) w->response.data->contenttype = CT_TEXT_HTML; @@ -1877,6 +1876,7 @@ void web_client_process(struct web_client *w) { else buffer_strcat(w->response.data, "I am doing it already"); + error("web request to exit received."); netdata_exit = 1; } else if(hash == hash_debug && strcmp(tok, "debug") == 0) { diff --git a/src/web_server.c b/src/web_server.c index f4e0f498..9d549743 100644 --- a/src/web_server.c +++ b/src/web_server.c @@ -51,6 +51,39 @@ static void log_allocations(void) } #endif +#ifndef HAVE_ACCEPT4 +int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags) { + int fd = accept(sock, addr, addrlen); + int newflags = 0; + + if (fd < 0) return fd; + + if (flags & SOCK_NONBLOCK) { + newflags |= O_NONBLOCK; + flags &= ~SOCK_NONBLOCK; + } + + if (flags & SOCK_CLOEXEC) { + newflags |= O_CLOEXEC; + flags &= ~SOCK_CLOEXEC; + } + + if (flags) { + errno = -EINVAL; + return -1; + } + + if (fcntl(fd, F_SETFL, newflags) < 0) { + int saved_errno = errno; + close(fd); + errno = saved_errno; + return -1; + } + + return fd; +} +#endif + static int is_ip_anything(const char *ip) { if(!ip || !*ip diff --git a/src/web_server.h b/src/web_server.h index 5df6bfa7..bd1001f9 100644 --- a/src/web_server.h +++ b/src/web_server.h @@ -23,4 +23,17 @@ extern void *socket_listen_main_multi_threaded(void *ptr); extern void *socket_listen_main_single_threaded(void *ptr); extern int create_listen_socket(void); +#ifndef HAVE_ACCEPT4 +extern int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags); + +#ifndef SOCK_NONBLOCK +#define SOCK_NONBLOCK 00004000 +#endif /* #ifndef SOCK_NONBLOCK */ + +#ifndef SOCK_CLOEXEC +#define SOCK_CLOEXEC 02000000 +#endif /* #ifndef SOCK_CLOEXEC */ + +#endif /* #ifndef HAVE_ACCEPT4 */ + #endif /* NETDATA_WEB_SERVER_H */ -- 2.39.2