]> arthur.barton.de Git - netdata.git/blobdiff - src/rrdhost.c
added compile option NETDATA_VERIFY_LOCKS to enable the locks
[netdata.git] / src / rrdhost.c
index 12f35ba8bf5683f15f811a29ee87683c0441c79c..fd7f35246abf40312b298fa30fff09859c97e738 100644 (file)
@@ -2,9 +2,11 @@
 #include "common.h"
 
 RRDHOST *localhost = NULL;
+size_t rrd_hosts_available = 0;
+netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
 
-pthread_rwlock_t rrd_rwlock = PTHREAD_RWLOCK_INITIALIZER;
-
+time_t rrdset_free_obsolete_time = 3600;
+time_t rrdhost_free_orphan_time = 3600;
 
 // ----------------------------------------------------------------------------
 // RRDHOST index
@@ -20,7 +22,7 @@ avl_tree_lock rrdhost_root_index = {
         .rwlock = AVL_LOCK_INITIALIZER
 };
 
-RRDHOST *rrdhost_find(const char *guid, uint32_t hash) {
+RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
 
     RRDHOST tmp;
@@ -30,6 +32,25 @@ RRDHOST *rrdhost_find(const char *guid, uint32_t hash) {
     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
 }
 
+RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
+    if(unlikely(!strcmp(hostname, "localhost")))
+        return localhost;
+
+    if(unlikely(!hash)) hash = simple_hash(hostname);
+
+    rrd_rdlock();
+    RRDHOST *host;
+    rrdhost_foreach_read(host) {
+        if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
+            rrd_unlock();
+            return host;
+        }
+    }
+    rrd_unlock();
+
+    return NULL;
+}
+
 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
 
@@ -43,6 +64,11 @@ static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
     host->hash_hostname = simple_hash(host->hostname);
 }
 
+static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
+    freez(host->os);
+    host->os = strdupz(os?os:"unknown");
+}
+
 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
     strncpy(host->machine_guid, machine_guid, GUID_LEN);
     host->machine_guid[GUID_LEN] = '\0';
@@ -55,29 +81,51 @@ static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_
 
 RRDHOST *rrdhost_create(const char *hostname,
         const char *guid,
+        const char *os,
         int update_every,
         int entries,
         RRD_MEMORY_MODE memory_mode,
-        int health_enabled) {
+        int health_enabled,
+        int rrdpush_enabled,
+        char *rrdpush_destination,
+        char *rrdpush_api_key,
+        int is_localhost
+) {
 
-    debug(D_RRDHOST, "Adding host '%s' with guid '%s'", hostname, guid);
+    debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
 
     RRDHOST *host = callocz(1, sizeof(RRDHOST));
 
     host->rrd_update_every    = update_every;
     host->rrd_history_entries = entries;
     host->rrd_memory_mode     = memory_mode;
-    host->health_enabled      = health_enabled;
+    host->health_enabled      = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
+    host->rrdpush_enabled     = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key);
+    host->rrdpush_destination = (host->rrdpush_enabled)?strdupz(rrdpush_destination):NULL;
+    host->rrdpush_api_key     = (host->rrdpush_enabled)?strdupz(rrdpush_api_key):NULL;
+
+    host->rrdpush_pipe[0] = -1;
+    host->rrdpush_pipe[1] = -1;
+    host->rrdpush_socket  = -1;
 
-    pthread_rwlock_init(&(host->rrdhost_rwlock), NULL);
+    netdata_mutex_init(&host->rrdpush_mutex);
+    netdata_rwlock_init(&host->rrdhost_rwlock);
 
     rrdhost_init_hostname(host, hostname);
     rrdhost_init_machine_guid(host, guid);
+    rrdhost_init_os(host, os);
 
-    avl_init_lock(&(host->rrdset_root_index), rrdset_compare);
+    avl_init_lock(&(host->rrdset_root_index),      rrdset_compare);
     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
-    avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare);
-    avl_init_lock(&(host->variables_root_index), rrdvar_compare);
+    avl_init_lock(&(host->rrdfamily_root_index),   rrdfamily_compare);
+    avl_init_lock(&(host->variables_root_index),   rrdvar_compare);
+
+    if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1))
+        rrdhost_flag_set(host, RRDHOST_DELETE_OBSOLETE_FILES);
+
+    if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost)
+        rrdhost_flag_set(host, RRDHOST_DELETE_ORPHAN_FILES);
+
 
     // ------------------------------------------------------------------------
     // initialize health variables
@@ -88,27 +136,23 @@ RRDHOST *rrdhost_create(const char *hostname,
     host->health_log.next_log_id =
     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
 
-    long n = config_get_number("health", "in memory max health log entries", host->health_log.max);
+    long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
     if(n < 10) {
-        error("Health configuration has invalid max log entries %ld. Using default %u", n, host->health_log.max);
-        config_set_number("health", "in memory max health log entries", (long)host->health_log.max);
+        error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
+        config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
     }
     else
         host->health_log.max = (unsigned int)n;
 
-    pthread_rwlock_init(&(host->health_log.alarm_log_rwlock), NULL);
+    netdata_rwlock_init(&host->health_log.alarm_log_rwlock);
 
     char filename[FILENAME_MAX + 1];
 
-    if(!localhost) {
-        // this is localhost
+    if(is_localhost) {
 
-        host->cache_dir = strdupz(netdata_configured_cache_dir);
+        host->cache_dir  = strdupz(netdata_configured_cache_dir);
         host->varlib_dir = strdupz(netdata_configured_varlib_dir);
 
-        snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
-        host->health_log_filename = strdupz(config_get("health", "health db file", filename));
-
     }
     else {
         // this is not localhost - append our GUID to localhost path
@@ -119,7 +163,7 @@ RRDHOST *rrdhost_create(const char *hostname,
         if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
             int r = mkdir(host->cache_dir, 0775);
             if(r != 0 && errno != EEXIST)
-                error("Cannot create directory '%s'", host->cache_dir);
+                error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
         }
 
         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
@@ -128,28 +172,37 @@ RRDHOST *rrdhost_create(const char *hostname,
         if(host->health_enabled) {
             int r = mkdir(host->varlib_dir, 0775);
             if(r != 0 && errno != EEXIST)
-                error("Cannot create directory '%s'", host->varlib_dir);
-        }
+                error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
+       }
 
-        snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
-        host->health_log_filename = strdupz(filename);
+    }
 
+    if(host->health_enabled) {
+        snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
+        int r = mkdir(filename, 0775);
+        if(r != 0 && errno != EEXIST)
+            error("Host '%s': cannot create directory '%s'", host->hostname, filename);
     }
 
+    snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
+    host->health_log_filename = strdupz(filename);
+
     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
-    host->health_default_exec = strdupz(config_get("health", "script to execute on alarm", filename));
+    host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
     host->health_default_recipient = strdup("root");
 
 
     // ------------------------------------------------------------------------
     // load health configuration
 
-    health_alarm_log_load(host);
-    health_alarm_log_open(host);
+    if(host->health_enabled) {
+        health_alarm_log_load(host);
+        health_alarm_log_open(host);
 
-    rrdhost_wrlock(host);
-    health_readdir(host, health_config_dir());
-    rrdhost_unlock(host);
+        rrdhost_wrlock(host);
+        health_readdir(host, health_config_dir());
+        rrdhost_unlock(host);
+    }
 
 
     // ------------------------------------------------------------------------
@@ -157,48 +210,169 @@ RRDHOST *rrdhost_create(const char *hostname,
 
     rrd_wrlock();
 
-    if(localhost) {
-        host->next = localhost->next;
-        localhost->next = host;
+    if(is_localhost) {
+        host->next = localhost;
+        localhost = host;
+    }
+    else {
+        if(localhost) {
+            host->next = localhost->next;
+            localhost->next = host;
+        }
+        else localhost = host;
     }
 
-    if(rrdhost_index_add(host) != host)
-        fatal("Cannot add host '%s' to index. It already exists.", hostname);
+    RRDHOST *t = rrdhost_index_add(host);
 
+    if(t != host) {
+        error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
+        rrdhost_free(host);
+        host = NULL;
+    }
+    else {
+        info("Host '%s' with guid '%s' initialized"
+                     ", os %s"
+                     ", update every %d"
+                     ", memory mode %s"
+                     ", history entries %d"
+                     ", streaming %s"
+                     " (to '%s' with api key '%s')"
+                     ", health %s"
+                     ", cache_dir '%s'"
+                     ", varlib_dir '%s'"
+                     ", health_log '%s'"
+                     ", alarms default handler '%s'"
+                     ", alarms default recipient '%s'"
+             , host->hostname
+             , host->machine_guid
+             , host->os
+             , host->rrd_update_every
+             , rrd_memory_mode_name(host->rrd_memory_mode)
+             , host->rrd_history_entries
+             , host->rrdpush_enabled?"enabled":"disabled"
+             , host->rrdpush_destination?host->rrdpush_destination:""
+             , host->rrdpush_api_key?host->rrdpush_api_key:""
+             , host->health_enabled?"enabled":"disabled"
+             , host->cache_dir
+             , host->varlib_dir
+             , host->health_log_filename
+             , host->health_default_exec
+             , host->health_default_recipient
+        );
+    }
+
+    rrd_hosts_available++;
     rrd_unlock();
 
-    debug(D_RRDHOST, "Added host '%s' with guid '%s'", host->hostname, host->machine_guid);
     return host;
 }
 
-RRDHOST *rrdhost_find_or_create(const char *hostname, const char *guid) {
+RRDHOST *rrdhost_find_or_create(
+          const char *hostname
+        , const char *guid
+        , const char *os
+        , int update_every
+        , int history
+        , RRD_MEMORY_MODE mode
+        , int health_enabled
+        , int rrdpush_enabled
+        , char *rrdpush_destination
+        , char *rrdpush_api_key
+) {
     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
 
-    RRDHOST *host = rrdhost_find(guid, 0);
-    if(!host)
-        host = rrdhost_create(hostname,
-                guid,
-                default_rrd_update_every,
-                default_rrd_history_entries,
-                default_rrd_memory_mode,
-                default_health_enabled
+    RRDHOST *host = rrdhost_find_by_guid(guid, 0);
+    if(!host) {
+        host = rrdhost_create(
+                hostname
+                , guid
+                , os
+                , update_every
+                , history
+                , mode
+                , health_enabled
+                , rrdpush_enabled
+                , rrdpush_destination
+                , rrdpush_api_key
+                , 0
         );
+    }
+    else {
+        host->health_enabled = health_enabled;
+
+        if(strcmp(host->hostname, hostname)) {
+            char *t = host->hostname;
+            host->hostname = strdupz(hostname);
+            host->hash_hostname = simple_hash(host->hostname);
+            freez(t);
+        }
+
+        if(host->rrd_update_every != update_every)
+            error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds.", host->hostname, host->rrd_update_every, update_every);
+
+        if(host->rrd_history_entries != history)
+            error("Host '%s' has history of %d entries, but the wanted one is %d entries.", host->hostname, host->rrd_history_entries, history);
+
+        if(host->rrd_memory_mode != mode)
+            error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
+    }
+
+    rrdhost_cleanup_orphan(host);
 
     return host;
 }
 
+void rrdhost_cleanup_orphan(RRDHOST *protected) {
+    time_t now = now_realtime_sec();
+
+    rrd_wrlock();
+
+    RRDHOST *host;
+
+restart_after_removal:
+    rrdhost_foreach_write(host) {
+        if(host != protected
+           && host != localhost
+           && !host->connected_senders
+           && host->senders_disconnected_time + rrdhost_free_orphan_time < now) {
+            info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid);
+
+            if(rrdset_flag_check(host, RRDHOST_ORPHAN))
+                rrdhost_delete(host);
+            else
+                rrdhost_save(host);
+
+            rrdhost_free(host);
+            goto restart_after_removal;
+        }
+    }
+
+    rrd_unlock();
+}
+
 // ----------------------------------------------------------------------------
 // RRDHOST global / startup initialization
 
 void rrd_init(char *hostname) {
-    debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
+    rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time);
+
+    health_init();
+    registry_init();
+    rrdpush_init();
 
-    localhost = rrdhost_create(hostname,
-            registry_get_this_machine_guid(),
-            default_rrd_update_every,
-            default_rrd_history_entries,
-            default_rrd_memory_mode,
-            default_health_enabled
+    debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
+    localhost = rrdhost_create(
+            hostname
+            , registry_get_this_machine_guid()
+            , os_type
+            , default_rrd_update_every
+            , default_rrd_history_entries
+            , default_rrd_memory_mode
+            , default_health_enabled
+            , default_rrdpush_enabled
+            , default_rrdpush_destination
+            , default_rrdpush_api_key
+            , 1
     );
 }
 
@@ -206,34 +380,34 @@ void rrd_init(char *hostname) {
 // RRDHOST - lock validations
 // there are only used when NETDATA_INTERNAL_CHECKS is set
 
-void rrdhost_check_rdlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
+void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
     debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
 
-    int ret = pthread_rwlock_trywrlock(&host->rrdhost_rwlock);
+    int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock);
     if(ret == 0)
         fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
 }
 
-void rrdhost_check_wrlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
+void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
     debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
 
-    int ret = pthread_rwlock_tryrdlock(&host->rrdhost_rwlock);
+    int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock);
     if(ret == 0)
         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
 }
 
-void rrd_check_rdlock_int(const char *file, const char *function, const unsigned long line) {
+void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) {
     debug(D_RRDHOST, "Checking read lock on all RRDs");
 
-    int ret = pthread_rwlock_trywrlock(&rrd_rwlock);
+    int ret = netdata_rwlock_trywrlock(&rrd_rwlock);
     if(ret == 0)
         fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
 }
 
-void rrd_check_wrlock_int(const char *file, const char *function, const unsigned long line) {
+void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) {
     debug(D_RRDHOST, "Checking write lock on all RRDs");
 
-    int ret = pthread_rwlock_tryrdlock(&rrd_rwlock);
+    int ret = netdata_rwlock_tryrdlock(&rrd_rwlock);
     if(ret == 0)
         fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
 }
@@ -247,6 +421,10 @@ void rrdhost_free(RRDHOST *host) {
     info("Freeing all memory for host '%s'...", host->hostname);
 
     rrd_check_wrlock();     // make sure the RRDs are write locked
+
+    // stop a possibly running thread
+    rrdpush_sender_thread_stop(host);
+
     rrdhost_wrlock(host);   // lock this RRDHOST
 
     // ------------------------------------------------------------------------
@@ -285,16 +463,21 @@ void rrdhost_free(RRDHOST *host) {
     // ------------------------------------------------------------------------
     // free it
 
+    freez(host->os);
     freez(host->cache_dir);
     freez(host->varlib_dir);
+    freez(host->rrdpush_api_key);
+    freez(host->rrdpush_destination);
     freez(host->health_default_exec);
     freez(host->health_default_recipient);
     freez(host->health_log_filename);
     freez(host->hostname);
     rrdhost_unlock(host);
+    netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
+    netdata_rwlock_destroy(&host->rrdhost_rwlock);
     freez(host);
 
-    info("Host memory cleanup completed...");
+    rrd_hosts_available--;
 }
 
 void rrdhost_free_all(void) {
@@ -309,30 +492,40 @@ void rrdhost_free_all(void) {
 void rrdhost_save(RRDHOST *host) {
     if(!host) return;
 
-    info("Saving host '%s' database...", host->hostname);
+    info("Saving database of host '%s'...", host->hostname);
 
     RRDSET *st;
-    RRDDIM *rd;
 
     // we get a write lock
     // to ensure only one thread is saving the database
     rrdhost_wrlock(host);
 
-    for(st = host->rrdset_root; st ; st = st->next) {
+    rrdset_foreach_write(st, host) {
         rrdset_rdlock(st);
+        rrdset_save(st);
+        rrdset_unlock(st);
+    }
 
-        if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
-            debug(D_RRD_STATS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename);
-            savememory(st->cache_filename, st, st->memsize);
-        }
+    rrdhost_unlock(host);
+}
 
-        for(rd = st->dimensions; rd ; rd = rd->next) {
-            if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE)) {
-                debug(D_RRD_STATS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename);
-                savememory(rd->cache_filename, rd, rd->memsize);
-            }
-        }
+// ----------------------------------------------------------------------------
+// RRDHOST - delete files
 
+void rrdhost_delete(RRDHOST *host) {
+    if(!host) return;
+
+    info("Deleting database of host '%s'...", host->hostname);
+
+    RRDSET *st;
+
+    // we get a write lock
+    // to ensure only one thread is saving the database
+    rrdhost_wrlock(host);
+
+    rrdset_foreach_write(st, host) {
+        rrdset_rdlock(st);
+        rrdset_delete(st);
         rrdset_unlock(st);
     }
 
@@ -340,13 +533,41 @@ void rrdhost_save(RRDHOST *host) {
 }
 
 void rrdhost_save_all(void) {
-    info("Saving database...");
+    info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
 
     rrd_rdlock();
 
     RRDHOST *host;
-    for(host = localhost; host ; host = host->next)
+    rrdhost_foreach_read(host)
         rrdhost_save(host);
 
     rrd_unlock();
 }
+
+void rrdhost_cleanup_obsolete(RRDHOST *host) {
+    time_t now = now_realtime_sec();
+
+    RRDSET *st;
+
+restart_after_removal:
+    rrdset_foreach_write(st, host) {
+        if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
+                    && st->last_accessed_time + rrdset_free_obsolete_time < now
+                    && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
+                    && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
+        )) {
+
+            rrdset_rdlock(st);
+
+            if(rrdhost_flag_check(host, RRDHOST_DELETE_OBSOLETE_FILES))
+                rrdset_delete(st);
+            else
+                rrdset_save(st);
+
+            rrdset_unlock(st);
+
+            rrdset_free(st);
+            goto restart_after_removal;
+        }
+    }
+}