From 78c8a6bad687723c43930be4307b051af3977c22 Mon Sep 17 00:00:00 2001 From: "Costa Tsaousis (ktsaou)" Date: Sat, 14 Jan 2017 13:18:08 +0200 Subject: [PATCH] improved netdata cleanup/exit procedure --- src/backends.c | 4 ++- src/health.c | 5 +++- src/main.c | 62 ++++++++++++++++++++++------------------- src/main.h | 15 ++++++++++ src/plugin_checks.c | 9 ++++-- src/plugin_freebsd.c | 7 +++-- src/plugin_idlejitter.c | 11 +++++--- src/plugin_macos.c | 7 +++-- src/plugin_nfacct.c | 27 +++++++++--------- src/plugin_proc.c | 7 +++-- src/plugin_tc.c | 16 ++++++----- src/plugins_d.c | 15 +++++++--- src/plugins_d.h | 2 +- src/rrd.c | 16 ++++++++--- src/sys_fs_cgroup.c | 7 +++-- src/web_server.c | 11 ++++++-- 16 files changed, 139 insertions(+), 82 deletions(-) diff --git a/src/backends.c b/src/backends.c index a3b2231d..59afa85d 100644 --- a/src/backends.c +++ b/src/backends.c @@ -129,7 +129,7 @@ static inline int process_opentsdb_response(BUFFER *b) { } void *backends_main(void *ptr) { - (void)ptr; + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; BUFFER *b = buffer_create(1), *response = buffer_create(1); int (*backend_request_formatter)(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) = NULL; @@ -543,6 +543,8 @@ cleanup: info("BACKEND thread exiting"); + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/health.c b/src/health.c index dd2d8264..5469c30e 100755 --- a/src/health.c +++ b/src/health.c @@ -2806,7 +2806,7 @@ static inline int rrdcalc_isrunnable(RRDCALC *rc, time_t now, time_t *next_run) } void *health_main(void *ptr) { - (void)ptr; + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("HEALTH thread created with task id %d", gettid()); @@ -3113,6 +3113,9 @@ void *health_main(void *ptr) { buffer_free(wb); info("HEALTH thread exiting"); + + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/main.c b/src/main.c index d55e1c26..d59b30fb 100644 --- a/src/main.c +++ b/src/main.c @@ -9,6 +9,7 @@ void netdata_cleanup_and_exit(int ret) { debug(D_EXIT, "Called: netdata_cleanup_and_exit()"); #ifdef NETDATA_INTERNAL_CHECKS + kill_childs(); rrdset_free_all(); #else rrdset_save_all(); @@ -24,19 +25,7 @@ void netdata_cleanup_and_exit(int ret) { exit(ret); } -struct netdata_static_thread { - char *name; - - char *config_section; - char *config_name; - - int enabled; - - pthread_t *thread; - - void (*init_routine) (void); - void *(*start_routine) (void *); -} static_threads[] = { +struct netdata_static_thread static_threads[] = { #ifdef INTERNAL_PLUGIN_NFACCT // nfacct requires root access // so, we build it as an external plugin with setuid to root @@ -156,27 +145,32 @@ int killpid(pid_t pid, int sig) void kill_childs() { + error_log_limit_unlimited(); + siginfo_t info; struct web_client *w; for(w = web_clients; w ; w = w->next) { - debug(D_EXIT, "Stopping web client %s", w->client_ip); + info("Stopping web client %s", w->client_ip); pthread_cancel(w->thread); - pthread_join(w->thread, NULL); + // it is detached + // pthread_join(w->thread, NULL); } int i; for (i = 0; static_threads[i].name != NULL ; i++) { - if(static_threads[i].thread) { - debug(D_EXIT, "Stopping %s thread", static_threads[i].name); + if(static_threads[i].enabled && static_threads[i].thread) { + info("Stopping %s thread", static_threads[i].name); pthread_cancel(*static_threads[i].thread); - pthread_join(*static_threads[i].thread, NULL); + // it is detached + // pthread_join(*static_threads[i].thread, NULL); + static_threads[i].thread = NULL; } } if(tc_child_pid) { - debug(D_EXIT, "Killing tc-qos-helper procees"); + info("Killing tc-qos-helper process %d", tc_child_pid); if(killpid(tc_child_pid, SIGTERM) != -1) waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED); } @@ -184,22 +178,32 @@ void kill_childs() struct plugind *cd; for(cd = pluginsd_root ; cd ; cd = cd->next) { - debug(D_EXIT, "Stopping %s plugin thread", cd->id); - pthread_cancel(cd->thread); - pthread_join(cd->thread, NULL); - - if(cd->pid && !cd->obsolete) { - debug(D_EXIT, "killing %s plugin process", cd->id); - if(killpid(cd->pid, SIGTERM) != -1) - waitid(P_PID, (id_t) cd->pid, &info, WEXITED); + if(!cd->obsolete) { + if(cd->thread) { + info("Stopping %s plugin thread", cd->id); + pthread_cancel(cd->thread); + // they are detached + // pthread_join(cd->thread, NULL); + } + + if(cd->pid) { + info("killing %s plugin child process pid %d", cd->id, cd->pid); + if(killpid(cd->pid, SIGTERM) != -1) + waitid(P_PID, (id_t) cd->pid, &info, WEXITED); + + cd->pid = 0; + } + + cd->obsolete = 1; } } // if, for any reason there is any child exited // catch it here + info("Cleaning up an other children"); waitid(P_PID, 0, &info, WEXITED|WNOHANG); - debug(D_EXIT, "All threads/childs stopped."); + info("All threads/childs stopped."); } struct option_def options[] = { @@ -706,7 +710,7 @@ int main(int argc, char **argv) debug(D_SYSTEM, "Starting thread %s.", st->name); - if(pthread_create(st->thread, &attr, st->start_routine, NULL)) + if(pthread_create(st->thread, &attr, st->start_routine, st)) error("failed to create new thread for %s.", st->name); else if(pthread_detach(*st->thread)) diff --git a/src/main.h b/src/main.h index be2d1c67..288536ba 100644 --- a/src/main.h +++ b/src/main.h @@ -24,6 +24,21 @@ struct option_def { */ extern struct option_def options[]; + +struct netdata_static_thread { + char *name; + + char *config_section; + char *config_name; + + volatile int enabled; + + pthread_t *thread; + + void (*init_routine) (void); + void *(*start_routine) (void *); +}; + extern void kill_childs(void); extern int killpid(pid_t pid, int signal); extern void netdata_cleanup_and_exit(int ret) NORETURN; diff --git a/src/plugin_checks.c b/src/plugin_checks.c index 12f48ff8..f39a6210 100644 --- a/src/plugin_checks.c +++ b/src/plugin_checks.c @@ -1,8 +1,7 @@ #include "common.h" -void *checks_main(void *ptr) -{ - if(ptr) { ; } +void *checks_main(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("CHECKS thread created with task id %d", gettid()); @@ -78,6 +77,10 @@ void *checks_main(void *ptr) rrdset_done(check3); } + info("CHECKS thread exiting"); + + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugin_freebsd.c b/src/plugin_freebsd.c index 9231b538..a9a9d643 100644 --- a/src/plugin_freebsd.c +++ b/src/plugin_freebsd.c @@ -1,8 +1,7 @@ #include "common.h" -void *freebsd_main(void *ptr) -{ - (void)ptr; +void *freebsd_main(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("FREEBSD Plugin thread created with task id %d", gettid()); @@ -59,6 +58,8 @@ void *freebsd_main(void *ptr) info("FREEBSD thread exiting"); + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugin_idlejitter.c b/src/plugin_idlejitter.c index 77fab3fa..dbfea2e6 100644 --- a/src/plugin_idlejitter.c +++ b/src/plugin_idlejitter.c @@ -2,11 +2,10 @@ #define CPU_IDLEJITTER_SLEEP_TIME_MS 20 -void *cpuidlejitter_main(void *ptr) -{ - if(ptr) { ; } +void *cpuidlejitter_main(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; - info("CPU Idle Jitter thread created with task id %d", gettid()); + info("IDLEJITTER thread created with task id %d", gettid()); if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) error("Cannot set pthread cancel type to DEFERRED."); @@ -48,6 +47,10 @@ void *cpuidlejitter_main(void *ptr) rrdset_done(st); } + info("IDLEJITTER thread exiting"); + + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugin_macos.c b/src/plugin_macos.c index 9f88d1e2..726d5b73 100644 --- a/src/plugin_macos.c +++ b/src/plugin_macos.c @@ -1,8 +1,7 @@ #include "common.h" -void *macos_main(void *ptr) -{ - (void)ptr; +void *macos_main(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("MACOS Plugin thread created with task id %d", gettid()); @@ -79,6 +78,8 @@ void *macos_main(void *ptr) info("MACOS thread exiting"); + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugin_nfacct.c b/src/plugin_nfacct.c index 03de3791..67dccb6b 100644 --- a/src/plugin_nfacct.c +++ b/src/plugin_nfacct.c @@ -55,7 +55,7 @@ static int nfacct_callback(const struct nlmsghdr *nlh, void *data) { } void *nfacct_main(void *ptr) { - if(ptr) { ; } + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("NFACCT thread created with task id %d", gettid()); @@ -75,15 +75,12 @@ void *nfacct_main(void *ptr) { nl = mnl_socket_open(NETLINK_NETFILTER); if(!nl) { error("nfacct.plugin: mnl_socket_open() failed"); - pthread_exit(NULL); - return NULL; + goto cleanup; } if(mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { - mnl_socket_close(nl); error("nfacct.plugin: mnl_socket_bind() failed"); - pthread_exit(NULL); - return NULL; + goto cleanup; } portid = mnl_socket_get_portid(nl); @@ -104,16 +101,13 @@ void *nfacct_main(void *ptr) { nlh = nfacct_nlmsg_build_hdr(buf, NFNL_MSG_ACCT_GET, NLM_F_DUMP, seq); if(!nlh) { - mnl_socket_close(nl); error("nfacct.plugin: nfacct_nlmsg_build_hdr() failed"); - pthread_exit(NULL); - return NULL; + goto cleanup; } if(mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { error("nfacct.plugin: mnl_socket_send"); - pthread_exit(NULL); - return NULL; + goto cleanup; } if(nfacct_list) nfacct_list->len = 0; @@ -125,8 +119,7 @@ void *nfacct_main(void *ptr) { if (ret == -1) { error("nfacct.plugin: error communicating with kernel."); - pthread_exit(NULL); - return NULL; + goto cleanup; } // -------------------------------------------------------------------- @@ -191,7 +184,13 @@ void *nfacct_main(void *ptr) { memmove(&last, &now, sizeof(struct timeval)); } - mnl_socket_close(nl); +cleanup: + info("NFACCT thread exiting"); + + if(nl) mnl_socket_close(nl); + + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugin_proc.c b/src/plugin_proc.c index 9b99d82e..e20cbd36 100644 --- a/src/plugin_proc.c +++ b/src/plugin_proc.c @@ -56,9 +56,8 @@ static struct proc_module { { .name = NULL, .dim = NULL, .func = NULL } }; -void *proc_main(void *ptr) -{ - (void)ptr; +void *proc_main(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("PROC Plugin thread created with task id %d", gettid()); @@ -140,6 +139,8 @@ void *proc_main(void *ptr) info("PROC thread exiting"); + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugin_tc.c b/src/plugin_tc.c index 1eef22f2..d5fd86aa 100644 --- a/src/plugin_tc.c +++ b/src/plugin_tc.c @@ -749,7 +749,7 @@ static inline void tc_split_words(char *str, char **words, int max_words) { pid_t tc_child_pid = 0; void *tc_main(void *ptr) { - (void)ptr; + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("TC thread created with task id %d", gettid()); @@ -796,8 +796,7 @@ void *tc_main(void *ptr) { fp = mypopen(buffer, &tc_child_pid); if(unlikely(!fp)) { error("TC: Cannot popen(\"%s\", \"r\").", buffer); - pthread_exit(NULL); - return NULL; + goto cleanup; } while(fgets(buffer, TC_LINE_MAX, fp) != NULL) { @@ -998,8 +997,7 @@ void *tc_main(void *ptr) { if(unlikely(netdata_exit)) { tc_device_free_all(); - pthread_exit(NULL); - return NULL; + goto cleanup; } if(code == 1 || code == 127) { @@ -1008,13 +1006,17 @@ void *tc_main(void *ptr) { error("TC: tc-qos-helper.sh exited with code %d. Disabling it.", code); tc_device_free_all(); - pthread_exit(NULL); - return NULL; + goto cleanup; } sleep((unsigned int) rrd_update_every); } +cleanup: + info("TC thread exiting"); + + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugins_d.c b/src/plugins_d.c index 23550bd8..475eb1b9 100644 --- a/src/plugins_d.c +++ b/src/plugins_d.c @@ -428,12 +428,13 @@ void *pluginsd_worker_thread(void *arg) info("PLUGINSD: '%s' thread exiting", cd->fullfilename); cd->obsolete = 1; + cd->thread = 0; pthread_exit(NULL); return NULL; } void *pluginsd_main(void *ptr) { - (void)ptr; + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("PLUGINS.D thread created with task id %d", gettid()); @@ -461,8 +462,7 @@ void *pluginsd_main(void *ptr) { dir = opendir(dir_name); if(unlikely(!dir)) { error("Cannot open directory '%s'.", dir_name); - pthread_exit(NULL); - return NULL; + goto cleanup; } while(likely((file = readdir(dir)))) { @@ -520,7 +520,10 @@ void *pluginsd_main(void *ptr) { } cd->obsolete = 0; - if(unlikely(!cd->enabled)) continue; + if(unlikely(!cd->enabled)) { + cd->obsolete = 1; + continue; + } // spawn a new thread for it if(unlikely(pthread_create(&cd->thread, NULL, pluginsd_worker_thread, cd) != 0)) { @@ -529,14 +532,18 @@ void *pluginsd_main(void *ptr) { } else if(unlikely(pthread_detach(cd->thread) != 0)) error("PLUGINSD: Cannot request detach of newly created thread for plugin '%s'.", cd->filename); + } closedir(dir); sleep((unsigned int) scan_frequency); } +cleanup: info("PLUGINS.D thread exiting"); + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/plugins_d.h b/src/plugins_d.h index 6f1fbd6e..e5af3573 100644 --- a/src/plugins_d.h +++ b/src/plugins_d.h @@ -23,7 +23,7 @@ struct plugind { // without collecting values int update_every; // the plugin default data collection frequency - int obsolete; // do not touch this structure after setting this to 1 + volatile int obsolete; // do not touch this structure after setting this to 1 int enabled; // if this is enabled or not time_t started_t; diff --git a/src/rrd.c b/src/rrd.c index e2de1043..365ad876 100644 --- a/src/rrd.c +++ b/src/rrd.c @@ -648,18 +648,26 @@ RRDSET *rrdset_create(const char *type, const char *id, const char *name, const RRDDIM *rrddim_add(RRDSET *st, const char *id, const char *name, long multiplier, long divisor, int algorithm) { + RRDDIM *rd = rrddim_find(st, id); + if(rd) { + error("Cannot create rrd dimension '%s/%s', it already exists.", st->id, name); + return rd; + } + char filename[FILENAME_MAX + 1]; char fullfilename[FILENAME_MAX + 1]; char varname[CONFIG_MAX_NAME + 1]; - RRDDIM *rd = NULL; unsigned long size = sizeof(RRDDIM) + (st->entries * sizeof(storage_number)); debug(D_RRD_CALLS, "Adding dimension '%s/%s'.", st->id, id); rrdset_strncpyz_name(filename, id, FILENAME_MAX); snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", st->cache_dir, filename); - if(rrd_memory_mode != RRD_MEMORY_MODE_RAM) rd = (RRDDIM *)mymmap(fullfilename, size, ((rrd_memory_mode == RRD_MEMORY_MODE_MAP)?MAP_SHARED:MAP_PRIVATE), 1); + + if(rrd_memory_mode != RRD_MEMORY_MODE_RAM) + rd = (RRDDIM *)mymmap(fullfilename, size, ((rrd_memory_mode == RRD_MEMORY_MODE_MAP)?MAP_SHARED:MAP_PRIVATE), 1); + if(rd) { struct timeval now; now_realtime_timeval(&now); @@ -901,9 +909,9 @@ void rrdset_save_all(void) { RRDSET *st; RRDDIM *rd; - rrdhost_rwlock(&localhost); + rrdhost_rdlock(&localhost); for(st = localhost.rrdset_root; st ; st = st->next) { - pthread_rwlock_wrlock(&st->rwlock); + pthread_rwlock_rdlock(&st->rwlock); if(st->mapped == RRD_MEMORY_MODE_SAVE) { debug(D_RRD_CALLS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename); diff --git a/src/sys_fs_cgroup.c b/src/sys_fs_cgroup.c index c2c68ebc..98e5d527 100644 --- a/src/sys_fs_cgroup.c +++ b/src/sys_fs_cgroup.c @@ -1427,9 +1427,8 @@ int do_sys_fs_cgroup(int update_every, usec_t dt) { return 0; } -void *cgroups_main(void *ptr) -{ - (void)ptr; +void *cgroups_main(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; info("CGROUP Plugin thread created with task id %d", gettid()); @@ -1501,6 +1500,8 @@ void *cgroups_main(void *ptr) info("CGROUP thread exiting"); + static_thread->enabled = 0; + static_thread->thread = NULL; pthread_exit(NULL); return NULL; } diff --git a/src/web_server.c b/src/web_server.c index b0e26283..39e5ef0e 100644 --- a/src/web_server.c +++ b/src/web_server.c @@ -390,7 +390,7 @@ static inline void cleanup_web_clients(void) { #define CLEANUP_EVERY_EVENTS 100 void *socket_listen_main_multi_threaded(void *ptr) { - (void)ptr; + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; web_server_mode = WEB_SERVER_MODE_MULTI_THREADED; info("Multi-threaded WEB SERVER thread created with task id %d", gettid()); @@ -470,6 +470,9 @@ void *socket_listen_main_multi_threaded(void *ptr) { debug(D_WEB_CLIENT, "LISTENER: exit!"); close_listen_sockets(); + static_thread->enabled = 0; + static_thread->thread = NULL; + pthread_exit(NULL); return NULL; } @@ -518,7 +521,7 @@ static inline int single_threaded_unlink_client(struct web_client *w, fd_set *if } void *socket_listen_main_single_threaded(void *ptr) { - (void)ptr; + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; web_server_mode = WEB_SERVER_MODE_SINGLE_THREADED; @@ -637,5 +640,9 @@ void *socket_listen_main_single_threaded(void *ptr) { debug(D_WEB_CLIENT, "LISTENER: exit!"); close_listen_sockets(); + + static_thread->enabled = 0; + static_thread->thread = NULL; + pthread_exit(NULL); return NULL; } -- 2.39.2