]> arthur.barton.de Git - netdata.git/blobdiff - src/rrdhost.c
self-cleaning obsolete cgroups and network interfaces from memory; fixes #1163; fixes...
[netdata.git] / src / rrdhost.c
index 7fed5f617f7387e40983eb6d84bc94009d01fbcb..7002c4823472c7d85c97a3e65b7bfad0796e8333 100644 (file)
@@ -2,9 +2,11 @@
 #include "common.h"
 
 RRDHOST *localhost = NULL;
-
+size_t rrd_hosts_available = 0;
 pthread_rwlock_t rrd_rwlock = PTHREAD_RWLOCK_INITIALIZER;
 
+time_t rrdset_free_obsolete_time = 3600;
+time_t rrdhost_free_orphan_time = 3600;
 
 // ----------------------------------------------------------------------------
 // RRDHOST index
@@ -20,7 +22,7 @@ avl_tree_lock rrdhost_root_index = {
         .rwlock = AVL_LOCK_INITIALIZER
 };
 
-RRDHOST *rrdhost_find(const char *guid, uint32_t hash) {
+RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
 
     RRDHOST tmp;
@@ -30,6 +32,25 @@ RRDHOST *rrdhost_find(const char *guid, uint32_t hash) {
     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
 }
 
+RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
+    if(unlikely(!strcmp(hostname, "localhost")))
+        return localhost;
+
+    if(unlikely(!hash)) hash = simple_hash(hostname);
+
+    rrd_rdlock();
+    RRDHOST *host;
+    rrdhost_foreach_read(host) {
+        if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
+            rrd_unlock();
+            return host;
+        }
+    }
+    rrd_unlock();
+
+    return NULL;
+}
+
 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
 
@@ -43,6 +64,11 @@ static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
     host->hash_hostname = simple_hash(host->hostname);
 }
 
+static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
+    freez(host->os);
+    host->os = strdupz(os?os:"unknown");
+}
+
 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
     strncpy(host->machine_guid, machine_guid, GUID_LEN);
     host->machine_guid[GUID_LEN] = '\0';
@@ -55,29 +81,44 @@ static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_
 
 RRDHOST *rrdhost_create(const char *hostname,
         const char *guid,
+        const char *os,
         int update_every,
         int entries,
         RRD_MEMORY_MODE memory_mode,
-        int health_enabled) {
+        int health_enabled,
+        int rrdpush_enabled,
+        char *rrdpush_destination,
+        char *rrdpush_api_key,
+        int is_localhost
+) {
 
-    debug(D_RRDHOST, "Adding host '%s' with guid '%s'", hostname, guid);
+    debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
 
     RRDHOST *host = callocz(1, sizeof(RRDHOST));
 
     host->rrd_update_every    = update_every;
     host->rrd_history_entries = entries;
     host->rrd_memory_mode     = memory_mode;
-    host->health_enabled      = health_enabled;
+    host->health_enabled      = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
+    host->rrdpush_enabled     = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key);
+    host->rrdpush_destination = (host->rrdpush_enabled)?strdupz(rrdpush_destination):NULL;
+    host->rrdpush_api_key     = (host->rrdpush_enabled)?strdupz(rrdpush_api_key):NULL;
+
+    host->rrdpush_pipe[0] = -1;
+    host->rrdpush_pipe[1] = -1;
+    host->rrdpush_socket  = -1;
 
-    pthread_rwlock_init(&(host->rrdhost_rwlock), NULL);
+    pthread_mutex_init(&host->rrdpush_mutex, NULL);
+    pthread_rwlock_init(&host->rrdhost_rwlock, NULL);
 
     rrdhost_init_hostname(host, hostname);
     rrdhost_init_machine_guid(host, guid);
+    rrdhost_init_os(host, os);
 
-    avl_init_lock(&(host->rrdset_root_index), rrdset_compare);
+    avl_init_lock(&(host->rrdset_root_index),      rrdset_compare);
     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
-    avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare);
-    avl_init_lock(&(host->variables_root_index), rrdvar_compare);
+    avl_init_lock(&(host->rrdfamily_root_index),   rrdfamily_compare);
+    avl_init_lock(&(host->variables_root_index),   rrdvar_compare);
 
     // ------------------------------------------------------------------------
     // initialize health variables
@@ -88,10 +129,10 @@ RRDHOST *rrdhost_create(const char *hostname,
     host->health_log.next_log_id =
     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
 
-    long n = config_get_number("health", "in memory max health log entries", host->health_log.max);
+    long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
     if(n < 10) {
-        error("Health configuration has invalid max log entries %ld. Using default %u", n, host->health_log.max);
-        config_set_number("health", "in memory max health log entries", (long)host->health_log.max);
+        error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
+        config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
     }
     else
         host->health_log.max = (unsigned int)n;
@@ -100,15 +141,11 @@ RRDHOST *rrdhost_create(const char *hostname,
 
     char filename[FILENAME_MAX + 1];
 
-    if(!localhost) {
-        // this is localhost
+    if(is_localhost) {
 
-        host->cache_dir = strdupz(netdata_configured_cache_dir);
+        host->cache_dir  = strdupz(netdata_configured_cache_dir);
         host->varlib_dir = strdupz(netdata_configured_varlib_dir);
 
-        snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
-        host->health_log_filename = strdupz(config_get("health", "health db file", filename));
-
     }
     else {
         // this is not localhost - append our GUID to localhost path
@@ -119,7 +156,7 @@ RRDHOST *rrdhost_create(const char *hostname,
         if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
             int r = mkdir(host->cache_dir, 0775);
             if(r != 0 && errno != EEXIST)
-                error("Cannot create directory '%s'", host->cache_dir);
+                error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
         }
 
         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
@@ -128,28 +165,37 @@ RRDHOST *rrdhost_create(const char *hostname,
         if(host->health_enabled) {
             int r = mkdir(host->varlib_dir, 0775);
             if(r != 0 && errno != EEXIST)
-                error("Cannot create directory '%s'", host->varlib_dir);
-        }
+                error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
+       }
 
-        snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
-        host->health_log_filename = strdupz(filename);
+    }
 
+    if(host->health_enabled) {
+        snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
+        int r = mkdir(filename, 0775);
+        if(r != 0 && errno != EEXIST)
+            error("Host '%s': cannot create directory '%s'", host->hostname, filename);
     }
 
+    snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
+    host->health_log_filename = strdupz(filename);
+
     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
-    host->health_default_exec = strdupz(config_get("health", "script to execute on alarm", filename));
+    host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
     host->health_default_recipient = strdup("root");
 
 
     // ------------------------------------------------------------------------
     // load health configuration
 
-    health_alarm_log_load(host);
-    health_alarm_log_open(host);
+    if(host->health_enabled) {
+        health_alarm_log_load(host);
+        health_alarm_log_open(host);
 
-    rrdhost_wrlock(host);
-    health_readdir(host, health_config_dir());
-    rrdhost_unlock(host);
+        rrdhost_wrlock(host);
+        health_readdir(host, health_config_dir());
+        rrdhost_unlock(host);
+    }
 
 
     // ------------------------------------------------------------------------
@@ -157,48 +203,164 @@ RRDHOST *rrdhost_create(const char *hostname,
 
     rrd_wrlock();
 
-    if(localhost) {
-        host->next = localhost->next;
-        localhost->next = host;
+    if(is_localhost) {
+        host->next = localhost;
+        localhost = host;
+    }
+    else {
+        if(localhost) {
+            host->next = localhost->next;
+            localhost->next = host;
+        }
+        else localhost = host;
     }
 
-    if(rrdhost_index_add(host) != host)
-        fatal("Cannot add host '%s' to index. It already exists.", hostname);
+    RRDHOST *t = rrdhost_index_add(host);
 
+    if(t != host) {
+        error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
+        rrdhost_free(host);
+        host = NULL;
+    }
+    else {
+        info("Host '%s' with guid '%s' initialized"
+                     ", os %s"
+                     ", update every %d"
+                     ", memory mode %s"
+                     ", history entries %d"
+                     ", streaming %s"
+                     " (to '%s' with api key '%s')"
+                     ", health %s"
+                     ", cache_dir '%s'"
+                     ", varlib_dir '%s'"
+                     ", health_log '%s'"
+                     ", alarms default handler '%s'"
+                     ", alarms default recipient '%s'"
+             , host->hostname
+             , host->machine_guid
+             , host->os
+             , host->rrd_update_every
+             , rrd_memory_mode_name(host->rrd_memory_mode)
+             , host->rrd_history_entries
+             , host->rrdpush_enabled?"enabled":"disabled"
+             , host->rrdpush_destination?host->rrdpush_destination:""
+             , host->rrdpush_api_key?host->rrdpush_api_key:""
+             , host->health_enabled?"enabled":"disabled"
+             , host->cache_dir
+             , host->varlib_dir
+             , host->health_log_filename
+             , host->health_default_exec
+             , host->health_default_recipient
+        );
+    }
+
+    rrd_hosts_available++;
     rrd_unlock();
 
-    debug(D_RRDHOST, "Added host '%s' with guid '%s'", host->hostname, host->machine_guid);
     return host;
 }
 
-RRDHOST *rrdhost_find_or_create(const char *hostname, const char *guid) {
+RRDHOST *rrdhost_find_or_create(
+          const char *hostname
+        , const char *guid
+        , const char *os
+        , int update_every
+        , int history
+        , RRD_MEMORY_MODE mode
+        , int health_enabled
+        , int rrdpush_enabled
+        , char *rrdpush_destination
+        , char *rrdpush_api_key
+) {
     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
 
-    RRDHOST *host = rrdhost_find(guid, 0);
-    if(!host)
-        host = rrdhost_create(hostname,
-                guid,
-                default_rrd_update_every,
-                default_rrd_history_entries,
-                default_rrd_memory_mode,
-                default_health_enabled
+    RRDHOST *host = rrdhost_find_by_guid(guid, 0);
+    if(!host) {
+        host = rrdhost_create(
+                hostname
+                , guid
+                , os
+                , update_every
+                , history
+                , mode
+                , health_enabled
+                , rrdpush_enabled
+                , rrdpush_destination
+                , rrdpush_api_key
+                , 0
         );
+    }
+    else {
+        host->health_enabled = health_enabled;
+
+        if(strcmp(host->hostname, hostname)) {
+            char *t = host->hostname;
+            char *n = strdupz(hostname);
+            host->hostname = n;
+            freez(t);
+        }
+
+        if(host->rrd_update_every != update_every)
+            error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds.", host->hostname, host->rrd_update_every, update_every);
+
+        if(host->rrd_history_entries != history)
+            error("Host '%s' has history of %d entries, but the wanted one is %d entries.", host->hostname, host->rrd_history_entries, history);
+
+        if(host->rrd_memory_mode != mode)
+            error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
+    }
+
+    rrdhost_cleanup_remote_stale(host);
 
     return host;
 }
 
+void rrdhost_cleanup_remote_stale(RRDHOST *protected) {
+    time_t now = now_realtime_sec();
+
+    rrd_wrlock();
+
+    RRDHOST *h;
+
+restart_after_removal:
+    rrdhost_foreach_write(h) {
+        if(h != protected
+           && h != localhost
+           && !h->connected_senders
+           && h->senders_disconnected_time + rrdhost_free_orphan_time < now) {
+            info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", h->hostname, h->machine_guid);
+            rrdhost_save(h);
+            rrdhost_free(h);
+            goto restart_after_removal;
+        }
+    }
+
+    rrd_unlock();
+}
+
 // ----------------------------------------------------------------------------
 // RRDHOST global / startup initialization
 
 void rrd_init(char *hostname) {
-    debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
+    rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time);
 
-    localhost = rrdhost_create(hostname,
-            registry_get_this_machine_guid(),
-            default_rrd_update_every,
-            default_rrd_history_entries,
-            default_rrd_memory_mode,
-            default_health_enabled
+    health_init();
+    registry_init();
+    rrdpush_init();
+
+    debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
+    localhost = rrdhost_create(
+            hostname
+            , registry_get_this_machine_guid()
+            , os_type
+            , default_rrd_update_every
+            , default_rrd_history_entries
+            , default_rrd_memory_mode
+            , default_health_enabled
+            , default_rrdpush_enabled
+            , default_rrdpush_destination
+            , default_rrdpush_api_key
+            , 1
     );
 }
 
@@ -285,8 +447,13 @@ void rrdhost_free(RRDHOST *host) {
     // ------------------------------------------------------------------------
     // free it
 
+    rrdpush_sender_thread_stop(host);
+
+    freez(host->os);
     freez(host->cache_dir);
     freez(host->varlib_dir);
+    freez(host->rrdpush_api_key);
+    freez(host->rrdpush_destination);
     freez(host->health_default_exec);
     freez(host->health_default_recipient);
     freez(host->health_log_filename);
@@ -294,7 +461,7 @@ void rrdhost_free(RRDHOST *host) {
     rrdhost_unlock(host);
     freez(host);
 
-    info("Host memory cleanup completed...");
+    rrd_hosts_available--;
 }
 
 void rrdhost_free_all(void) {
@@ -309,10 +476,9 @@ void rrdhost_free_all(void) {
 void rrdhost_save(RRDHOST *host) {
     if(!host) return;
 
-    info("Saving host '%s' database...", host->hostname);
+    info("Saving database of host '%s'...", host->hostname);
 
     RRDSET *st;
-    RRDDIM *rd;
 
     // we get a write lock
     // to ensure only one thread is saving the database
@@ -320,19 +486,7 @@ void rrdhost_save(RRDHOST *host) {
 
     rrdset_foreach_write(st, host) {
         rrdset_rdlock(st);
-
-        if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
-            debug(D_RRD_STATS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename);
-            savememory(st->cache_filename, st, st->memsize);
-        }
-
-        rrddim_foreach_read(rd, st) {
-            if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE)) {
-                debug(D_RRD_STATS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename);
-                savememory(rd->cache_filename, rd, rd->memsize);
-            }
-        }
-
+        rrdset_save(st);
         rrdset_unlock(st);
     }
 
@@ -340,7 +494,7 @@ void rrdhost_save(RRDHOST *host) {
 }
 
 void rrdhost_save_all(void) {
-    info("Saving database...");
+    info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
 
     rrd_rdlock();
 
@@ -350,3 +504,26 @@ void rrdhost_save_all(void) {
 
     rrd_unlock();
 }
+
+void rrdhost_cleanup(RRDHOST *host) {
+    time_t now = now_realtime_sec();
+
+    RRDSET *st;
+
+restart_after_removal:
+    rrdset_foreach_write(st, host) {
+        if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
+                    && st->last_accessed_time + rrdset_free_obsolete_time < now
+                    && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
+                    && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
+        )) {
+
+            rrdset_rdlock(st);
+            rrdset_save(st);
+            rrdset_unlock(st);
+
+            rrdset_free(st);
+            goto restart_after_removal;
+        }
+    }
+}