X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=src%2Frrdhost.c;h=576de2b16f8b9d13d26f15ad7db45024e4d64d8d;hb=cdf6740f46999b1b15665feb9eea7593c0aafc3c;hp=50ddabe8c96de9b23da3855e51ac1065335c84e5;hpb=f1fed11619c1a4afeeb7ddda81f5ee6526a37bd4;p=netdata.git diff --git a/src/rrdhost.c b/src/rrdhost.c index 50ddabe8..576de2b1 100644 --- a/src/rrdhost.c +++ b/src/rrdhost.c @@ -2,9 +2,11 @@ #include "common.h" RRDHOST *localhost = NULL; - +size_t rrd_hosts_available = 0; pthread_rwlock_t rrd_rwlock = PTHREAD_RWLOCK_INITIALIZER; +time_t rrdset_free_obsolete_time = 3600; +time_t rrdhost_free_orphan_time = 3600; // ---------------------------------------------------------------------------- // RRDHOST index @@ -20,7 +22,7 @@ avl_tree_lock rrdhost_root_index = { .rwlock = AVL_LOCK_INITIALIZER }; -RRDHOST *rrdhost_find(const char *guid, uint32_t hash) { +RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) { debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid); RRDHOST tmp; @@ -30,6 +32,25 @@ RRDHOST *rrdhost_find(const char *guid, uint32_t hash) { return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp); } +RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) { + if(unlikely(!strcmp(hostname, "localhost"))) + return localhost; + + if(unlikely(!hash)) hash = simple_hash(hostname); + + rrd_rdlock(); + RRDHOST *host; + rrdhost_foreach_read(host) { + if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) { + rrd_unlock(); + return host; + } + } + rrd_unlock(); + + return NULL; +} + #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost)) #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost)) @@ -64,7 +85,12 @@ RRDHOST *rrdhost_create(const char *hostname, int update_every, int entries, RRD_MEMORY_MODE memory_mode, - int health_enabled) { + int health_enabled, + int rrdpush_enabled, + char *rrdpush_destination, + char *rrdpush_api_key, + int is_localhost +) { debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid); @@ -73,18 +99,33 @@ RRDHOST *rrdhost_create(const char *hostname, host->rrd_update_every = update_every; host->rrd_history_entries = entries; host->rrd_memory_mode = memory_mode; - host->health_enabled = health_enabled; + host->health_enabled = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled; + host->rrdpush_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key); + host->rrdpush_destination = (host->rrdpush_enabled)?strdupz(rrdpush_destination):NULL; + host->rrdpush_api_key = (host->rrdpush_enabled)?strdupz(rrdpush_api_key):NULL; + + host->rrdpush_pipe[0] = -1; + host->rrdpush_pipe[1] = -1; + host->rrdpush_socket = -1; - pthread_rwlock_init(&(host->rrdhost_rwlock), NULL); + pthread_mutex_init(&host->rrdpush_mutex, NULL); + pthread_rwlock_init(&host->rrdhost_rwlock, NULL); rrdhost_init_hostname(host, hostname); rrdhost_init_machine_guid(host, guid); rrdhost_init_os(host, os); - avl_init_lock(&(host->rrdset_root_index), rrdset_compare); + avl_init_lock(&(host->rrdset_root_index), rrdset_compare); avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name); - avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare); - avl_init_lock(&(host->variables_root_index), rrdvar_compare); + avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare); + avl_init_lock(&(host->variables_root_index), rrdvar_compare); + + if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1)) + rrdhost_flag_set(host, RRDHOST_DELETE_OBSOLETE_FILES); + + if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost) + rrdhost_flag_set(host, RRDHOST_DELETE_ORPHAN_FILES); + // ------------------------------------------------------------------------ // initialize health variables @@ -95,10 +136,10 @@ RRDHOST *rrdhost_create(const char *hostname, host->health_log.next_log_id = host->health_log.next_alarm_id = (uint32_t)now_realtime_sec(); - long n = config_get_number("health", "in memory max health log entries", host->health_log.max); + long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max); if(n < 10) { error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max); - config_set_number("health", "in memory max health log entries", (long)host->health_log.max); + config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max); } else host->health_log.max = (unsigned int)n; @@ -107,15 +148,11 @@ RRDHOST *rrdhost_create(const char *hostname, char filename[FILENAME_MAX + 1]; - if(!localhost) { - // this is localhost + if(is_localhost) { - host->cache_dir = strdupz(netdata_configured_cache_dir); + host->cache_dir = strdupz(netdata_configured_cache_dir); host->varlib_dir = strdupz(netdata_configured_varlib_dir); - snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir); - host->health_log_filename = strdupz(config_get("health", "health db file", filename)); - } else { // this is not localhost - append our GUID to localhost path @@ -136,32 +173,36 @@ RRDHOST *rrdhost_create(const char *hostname, int r = mkdir(host->varlib_dir, 0775); if(r != 0 && errno != EEXIST) error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir); - } + } + } + + if(host->health_enabled) { snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir); int r = mkdir(filename, 0775); if(r != 0 && errno != EEXIST) error("Host '%s': cannot create directory '%s'", host->hostname, filename); - - snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir); - host->health_log_filename = strdupz(filename); - } + snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir); + host->health_log_filename = strdupz(filename); + snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir); - host->health_default_exec = strdupz(config_get("health", "script to execute on alarm", filename)); + host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename)); host->health_default_recipient = strdup("root"); // ------------------------------------------------------------------------ // load health configuration - health_alarm_log_load(host); - health_alarm_log_open(host); + if(host->health_enabled) { + health_alarm_log_load(host); + health_alarm_log_open(host); - rrdhost_wrlock(host); - health_readdir(host, health_config_dir()); - rrdhost_unlock(host); + rrdhost_wrlock(host); + health_readdir(host, health_config_dir()); + rrdhost_unlock(host); + } // ------------------------------------------------------------------------ @@ -169,26 +210,92 @@ RRDHOST *rrdhost_create(const char *hostname, rrd_wrlock(); - if(localhost) { - host->next = localhost->next; - localhost->next = host; + if(is_localhost) { + host->next = localhost; + localhost = host; + } + else { + if(localhost) { + host->next = localhost->next; + localhost->next = host; + } + else localhost = host; } - if(rrdhost_index_add(host) != host) - fatal("Host '%s': cannot add host to index. It already exists.", hostname); + RRDHOST *t = rrdhost_index_add(host); + + if(t != host) { + error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid); + rrdhost_free(host); + host = NULL; + } + else { + info("Host '%s' with guid '%s' initialized" + ", os %s" + ", update every %d" + ", memory mode %s" + ", history entries %d" + ", streaming %s" + " (to '%s' with api key '%s')" + ", health %s" + ", cache_dir '%s'" + ", varlib_dir '%s'" + ", health_log '%s'" + ", alarms default handler '%s'" + ", alarms default recipient '%s'" + , host->hostname + , host->machine_guid + , host->os + , host->rrd_update_every + , rrd_memory_mode_name(host->rrd_memory_mode) + , host->rrd_history_entries + , host->rrdpush_enabled?"enabled":"disabled" + , host->rrdpush_destination?host->rrdpush_destination:"" + , host->rrdpush_api_key?host->rrdpush_api_key:"" + , host->health_enabled?"enabled":"disabled" + , host->cache_dir + , host->varlib_dir + , host->health_log_filename + , host->health_default_exec + , host->health_default_recipient + ); + } + rrd_hosts_available++; rrd_unlock(); - debug(D_RRDHOST, "Host '%s', added with guid '%s'", host->hostname, host->machine_guid); return host; } -RRDHOST *rrdhost_find_or_create(const char *hostname, const char *guid, const char *os, int update_every, int history, RRD_MEMORY_MODE mode, int health_enabled) { +RRDHOST *rrdhost_find_or_create( + const char *hostname + , const char *guid + , const char *os + , int update_every + , int history + , RRD_MEMORY_MODE mode + , int health_enabled + , int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key +) { debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid); - RRDHOST *host = rrdhost_find(guid, 0); + RRDHOST *host = rrdhost_find_by_guid(guid, 0); if(!host) { - host = rrdhost_create(hostname, guid, os, update_every, history, mode, health_enabled); + host = rrdhost_create( + hostname + , guid + , os + , update_every + , history + , mode + , health_enabled + , rrdpush_enabled + , rrdpush_destination + , rrdpush_api_key + , 0 + ); } else { host->health_enabled = health_enabled; @@ -210,22 +317,62 @@ RRDHOST *rrdhost_find_or_create(const char *hostname, const char *guid, const ch error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); } + rrdhost_cleanup_orphan(host); + return host; } +void rrdhost_cleanup_orphan(RRDHOST *protected) { + time_t now = now_realtime_sec(); + + rrd_wrlock(); + + RRDHOST *host; + +restart_after_removal: + rrdhost_foreach_write(host) { + if(host != protected + && host != localhost + && !host->connected_senders + && host->senders_disconnected_time + rrdhost_free_orphan_time < now) { + info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid); + + if(rrdset_flag_check(host, RRDHOST_ORPHAN)) + rrdhost_delete(host); + else + rrdhost_save(host); + + rrdhost_free(host); + goto restart_after_removal; + } + } + + rrd_unlock(); +} + // ---------------------------------------------------------------------------- // RRDHOST global / startup initialization void rrd_init(char *hostname) { - debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname); + rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time); - localhost = rrdhost_create(hostname, - registry_get_this_machine_guid(), - os_type, - default_rrd_update_every, - default_rrd_history_entries, - default_rrd_memory_mode, - default_health_enabled + health_init(); + registry_init(); + rrdpush_init(); + + debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname); + localhost = rrdhost_create( + hostname + , registry_get_this_machine_guid() + , os_type + , default_rrd_update_every + , default_rrd_history_entries + , default_rrd_memory_mode + , default_health_enabled + , default_rrdpush_enabled + , default_rrdpush_destination + , default_rrdpush_api_key + , 1 ); } @@ -274,6 +421,10 @@ void rrdhost_free(RRDHOST *host) { info("Freeing all memory for host '%s'...", host->hostname); rrd_check_wrlock(); // make sure the RRDs are write locked + + // stop a possibly running thread + rrdpush_sender_thread_stop(host); + rrdhost_wrlock(host); // lock this RRDHOST // ------------------------------------------------------------------------ @@ -315,6 +466,8 @@ void rrdhost_free(RRDHOST *host) { freez(host->os); freez(host->cache_dir); freez(host->varlib_dir); + freez(host->rrdpush_api_key); + freez(host->rrdpush_destination); freez(host->health_default_exec); freez(host->health_default_recipient); freez(host->health_log_filename); @@ -322,7 +475,7 @@ void rrdhost_free(RRDHOST *host) { rrdhost_unlock(host); freez(host); - info("Host memory cleanup completed..."); + rrd_hosts_available--; } void rrdhost_free_all(void) { @@ -337,10 +490,9 @@ void rrdhost_free_all(void) { void rrdhost_save(RRDHOST *host) { if(!host) return; - info("Saving host '%s' database...", host->hostname); + info("Saving database of host '%s'...", host->hostname); RRDSET *st; - RRDDIM *rd; // we get a write lock // to ensure only one thread is saving the database @@ -348,19 +500,30 @@ void rrdhost_save(RRDHOST *host) { rrdset_foreach_write(st, host) { rrdset_rdlock(st); + rrdset_save(st); + rrdset_unlock(st); + } - if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) { - debug(D_RRD_STATS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename); - savememory(st->cache_filename, st, st->memsize); - } + rrdhost_unlock(host); +} - rrddim_foreach_read(rd, st) { - if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE)) { - debug(D_RRD_STATS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename); - savememory(rd->cache_filename, rd, rd->memsize); - } - } +// ---------------------------------------------------------------------------- +// RRDHOST - delete files + +void rrdhost_delete(RRDHOST *host) { + if(!host) return; + info("Deleting database of host '%s'...", host->hostname); + + RRDSET *st; + + // we get a write lock + // to ensure only one thread is saving the database + rrdhost_wrlock(host); + + rrdset_foreach_write(st, host) { + rrdset_rdlock(st); + rrdset_delete(st); rrdset_unlock(st); } @@ -368,7 +531,7 @@ void rrdhost_save(RRDHOST *host) { } void rrdhost_save_all(void) { - info("Saving database..."); + info("Saving database [%zu hosts(s)]...", rrd_hosts_available); rrd_rdlock(); @@ -378,3 +541,31 @@ void rrdhost_save_all(void) { rrd_unlock(); } + +void rrdhost_cleanup_obsolete(RRDHOST *host) { + time_t now = now_realtime_sec(); + + RRDSET *st; + +restart_after_removal: + rrdset_foreach_write(st, host) { + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) + && st->last_accessed_time + rrdset_free_obsolete_time < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now + )) { + + rrdset_rdlock(st); + + if(rrdhost_flag_check(host, RRDHOST_DELETE_OBSOLETE_FILES)) + rrdset_delete(st); + else + rrdset_save(st); + + rrdset_unlock(st); + + rrdset_free(st); + goto restart_after_removal; + } + } +}