]> arthur.barton.de Git - netdata.git/commitdiff
operational health monitoring - we got alarms! - no notifications yet though
authorCosta Tsaousis <costa@tsaousis.gr>
Sun, 14 Aug 2016 23:39:03 +0000 (02:39 +0300)
committerCosta Tsaousis <costa@tsaousis.gr>
Sun, 14 Aug 2016 23:39:03 +0000 (02:39 +0300)
15 files changed:
src/appconfig.c
src/avl.c
src/avl.h
src/eval.c
src/eval.h
src/health.c
src/health.h
src/log.c
src/main.c
src/plugins_d.c
src/rrd.c
src/rrd.h
src/rrd2json.c
src/rrd2json.h
src/web_client.c

index 51d714c20a74cc4e39044a6300bd7f59f25f01c4..991b1c72e982c99b9d1ff16067b99f76566a0a0d 100644 (file)
@@ -478,8 +478,12 @@ void generate_config(BUFFER *wb, int only_changed)
 
         config_global_write_lock();
         for(co = config_root; co ; co = co->next) {
-            if(strcmp(co->name, "global") == 0 || strcmp(co->name, "plugins") == 0 || strcmp(co->name, "registry") == 0) pri = 0;
-            else if(strncmp(co->name, "plugin:", 7) == 0) pri = 1;
+            if(!strcmp(co->name, "global") ||
+                    !strcmp(co->name, "plugins") ||
+                    !strcmp(co->name, "registry") ||
+                    !strcmp(co->name, "health"))
+                pri = 0;
+            else if(!strncmp(co->name, "plugin:", 7)) pri = 1;
             else pri = 2;
 
             if(i == pri) {
index aa45fff44f6afcf17bd8a01a0eca9497666b9fe7..324afeebbad76ad96a17a747a48bde9f5e5a19fa 100644 (file)
--- a/src/avl.c
+++ b/src/avl.c
@@ -278,7 +278,27 @@ avl *avl_remove(avl_tree *tree, avl *item) {
 }
 
 /* ------------------------------------------------------------------------- */
-// these functions are (C) Costa Tsaousis
+// below are functions by (C) Costa Tsaousis
+
+// ---------------------------
+// traversing
+
+void avl_walker(avl *node, void (*callback)(void *)) {
+    if(node->avl_link[0])
+        avl_walker(node->avl_link[0], callback);
+
+    callback(node);
+
+    if(node->avl_link[1])
+        avl_walker(node->avl_link[1], callback);
+}
+
+void avl_traverse(avl_tree *t, void (*callback)(void *)) {
+    avl_walker(t->root, callback);
+}
+
+// ---------------------------
+// locks
 
 void avl_read_lock(avl_tree_lock *t) {
 #ifndef AVL_WITHOUT_PTHREADS
@@ -310,7 +330,8 @@ void avl_unlock(avl_tree_lock *t) {
 #endif /* AVL_WITHOUT_PTHREADS */
 }
 
-/* ------------------------------------------------------------------------- */
+// ---------------------------
+// operations with locking
 
 void avl_init_lock(avl_tree_lock *t, int (*compar)(void *a, void *b)) {
     avl_init(&t->avl_tree, compar);
@@ -351,8 +372,15 @@ avl *avl_insert_lock(avl_tree_lock *t, avl *a) {
     return ret;
 }
 
+void avl_traverse_lock(avl_tree_lock *t, void (*callback)(void *)) {
+    avl_read_lock(t);
+    avl_traverse(&t->avl_tree, callback);
+    avl_unlock(t);
+}
+
 void avl_init(avl_tree *t, int (*compar)(void *a, void *b)) {
     t->root = NULL;
     t->compar = compar;
 }
 
+// ------------------
\ No newline at end of file
index 79237ab25924ebff89d49b9b5a88a8f95a564323..973d68fb1151ec3e67ce53ccac5cd4de3ad0348c 100644 (file)
--- a/src/avl.h
+++ b/src/avl.h
@@ -79,4 +79,8 @@ avl *avl_search(avl_tree *t, avl *a);
 void avl_init_lock(avl_tree_lock *t, int (*compar)(void *a, void *b));
 void avl_init(avl_tree *t, int (*compar)(void *a, void *b));
 
+
+void avl_traverse_lock(avl_tree_lock *t, void (*callback)(void *));
+void avl_traverse(avl_tree *t, void (*callback)(void *));
+
 #endif /* avl.h */
index 56ff2b35c8529d54fd1822e066b23fcd23dc54da..3997e2073a8ef31d065a57d687411079f49d582f 100644 (file)
@@ -52,6 +52,7 @@ static inline EVAL_NODE *parse_full_expression(const char **string, int *error);
 static inline EVAL_NODE *parse_one_full_operand(const char **string, int *error);
 static inline calculated_number eval_node(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error);
 static inline void print_parsed_as_node(BUFFER *out, EVAL_NODE *op, int *error);
+static inline void print_parsed_as_constant(BUFFER *out, calculated_number n);
 
 // ----------------------------------------------------------------------------
 // evaluation of expressions
@@ -71,13 +72,28 @@ static inline calculated_number eval_check_number(calculated_number n, int *erro
 }
 
 static inline calculated_number eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABLE *v, int *error) {
-    // FIXME: do the variable look up here
+    static uint32_t this_hash = 0;
+
+    if(unlikely(this_hash == 0))
+        this_hash = simple_hash("this");
+
+    if(exp->this && v->hash == this_hash && !strcmp(v->name, "this")) {
+        buffer_strcat(exp->error_msg, "[ $this = ");
+        print_parsed_as_constant(exp->error_msg, *exp->this);
+        buffer_strcat(exp->error_msg, " ] ");
+        return *exp->this;
+    }
+
+    calculated_number n;
+    if(exp->rrdcalc && health_variable_lookup(v->name, v->hash, exp->rrdcalc, &n)) {
+        buffer_sprintf(exp->error_msg, "[ $%s = ", v->name);
+        print_parsed_as_constant(exp->error_msg, n);
+        buffer_strcat(exp->error_msg, " ] ");
+        return n;
+    }
 
-//    if(!exp->data) {
     *error = EVAL_ERROR_UNKNOWN_VARIABLE;
     buffer_sprintf(exp->error_msg, "unknown variable '%s'", v->name);
-//    }
-
     return 0;
 }
 
@@ -208,6 +224,16 @@ static inline void print_parsed_as_variable(BUFFER *out, EVAL_VARIABLE *v, int *
 }
 
 static inline void print_parsed_as_constant(BUFFER *out, calculated_number n) {
+    if(unlikely(isnan(n))) {
+        buffer_strcat(out, "NaN");
+        return;
+    }
+
+    if(unlikely(isinf(n))) {
+        buffer_strcat(out, "INFINITE");
+        return;
+    }
+
     char b[100+1], *s;
     snprintfz(b, 100, CALCULATED_NUMBER_FORMAT, n);
 
@@ -641,6 +667,7 @@ static inline void eval_node_set_value_to_variable(EVAL_NODE *op, int pos, const
     op->ops[pos].type = EVAL_VALUE_VARIABLE;
     op->ops[pos].variable = callocz(1, sizeof(EVAL_VARIABLE));
     op->ops[pos].variable->name = strdupz(variable);
+    op->ops[pos].variable->hash = simple_hash(op->ops[pos].variable->name);
 }
 
 static inline void eval_variable_free(EVAL_VARIABLE *v) {
@@ -910,7 +937,7 @@ const char *expression_strerror(int error) {
             return "wrong number of operands for operation - internal error";
 
         case EVAL_ERROR_VALUE_IS_NAN:
-            return "value or variable is missing or is not a number";
+            return "value is unset";
 
         case EVAL_ERROR_VALUE_IS_INFINITE:
             return "computed value is infinite";
index 9b13a644e1ea2904192b9e0974ee670d9ca200a1..0b174a20831a9841ba6604c218c0769233b10113 100644 (file)
@@ -5,6 +5,7 @@
 
 typedef struct eval_variable {
     char *name;
+    uint32_t hash;
     struct rrdvar *rrdvar;
     struct eval_variable *next;
 } EVAL_VARIABLE;
@@ -13,6 +14,7 @@ typedef struct eval_expression {
     const char *source;
     const char *parsed_as;
 
+    calculated_number *this;
     calculated_number result;
 
     int error;
@@ -22,7 +24,7 @@ typedef struct eval_expression {
     void *nodes;
 
     // custom data to be used for looking up variables
-    void *data;
+    struct rrdcalc *rrdcalc;
 } EVAL_EXPRESSION;
 
 #define EVAL_VALUE_INVALID 0
index c284830a67b8f68c292f2f4f779edf3a18349783..bc09c9445aa7eacebfa6a827eb003168d30293b5 100644 (file)
@@ -80,21 +80,83 @@ static inline RRDVAR *rrdvar_create_and_index(const char *scope, avl_tree_lock *
         rv = NULL;
     }
 
-    /*
-     * check
+    return rv;
+}
+
+// ----------------------------------------------------------------------------
+// RRDVAR lookup
+
+calculated_number rrdvar2number(RRDVAR *rv) {
+    switch(rv->type) {
+        case RRDVAR_TYPE_CALCULATED: {
+            calculated_number *n = (calculated_number *)rv->value;
+            return *n;
+        }
+            break;
+
+        case RRDVAR_TYPE_TIME_T: {
+            time_t *n = (time_t *)rv->value;
+            return *n;
+        }
+            break;
+
+        case RRDVAR_TYPE_COLLECTED: {
+            collected_number *n = (collected_number *)rv->value;
+            return *n;
+        }
+            break;
+
+        case RRDVAR_TYPE_TOTAL: {
+            total_number *n = (total_number *)rv->value;
+            return *n;
+        }
+
+        default:
+            error("I don't know how to convert RRDVAR type %d to calculated_number", rv->type);
+            return NAN;
+            break;
+    }
+}
+
+void dump_variable(void *data) {
+    RRDVAR *rv = (RRDVAR *)data;
+    debug(D_HEALTH, "%30s : " CALCULATED_NUMBER_FORMAT, rv->name, rrdvar2number(rv));
+}
+
+int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result) {
+    RRDSET *st = rc->rrdset;
+    RRDVAR *rv;
+
+    if(!st) return 0;
+
+    rv = rrdvar_index_find(&st->variables_root_index, variable, hash);
     if(rv) {
-        RRDVAR *ret = rrdvar_index_find(tree, name, hash);
-        if(ret != rv) fatal("oops! 1");
+        *result = rrdvar2number(rv);
+        return 1;
+    }
 
-        ret = rrdvar_index_del(tree, rv);
-        if(ret != rv) fatal("oops! 2");
+    rv = rrdvar_index_find(&st->rrdcontext->variables_root_index, variable, hash);
+    if(rv) {
+        *result = rrdvar2number(rv);
+        return 1;
+    }
 
-        ret = rrdvar_index_add(tree, rv);
-        if(ret != rv) fatal("oops! 3");
+    rv = rrdvar_index_find(&st->rrdhost->variables_root_index, variable, hash);
+    if(rv) {
+        *result = rrdvar2number(rv);
+        return 1;
     }
-    */
 
-    return rv;
+    debug(D_HEALTH, "Available local chart '%s' variables:", st->id);
+    avl_traverse_lock(&st->variables_root_index, dump_variable);
+
+    debug(D_HEALTH, "Available context '%s' variables:", st->rrdcontext->id);
+    avl_traverse_lock(&st->rrdcontext->variables_root_index, dump_variable);
+
+    debug(D_HEALTH, "Available host '%s' variables:", st->rrdhost->hostname);
+    avl_traverse_lock(&st->rrdhost->variables_root_index, dump_variable);
+
+    return 0;
 }
 
 // ----------------------------------------------------------------------------
@@ -446,7 +508,7 @@ void rrddimvar_free(RRDDIMVAR *rs) {
 
 // ----------------------------------------------------------------------------
 // RRDCALC management
-
+/*
 // this has to be called while the caller has locked
 // the RRDHOST
 static inline void rrdset_linked_optimize_rrdhost(RRDHOST *host, RRDCALC *rc) {
@@ -511,6 +573,7 @@ static inline void rrdcalc_unlinked_optimize_rrdhost(RRDHOST *host, RRDCALC *rc)
         host->calculations = rc;
     }
 }
+*/
 
 static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
     debug(D_HEALTH, "Health linking alarm '%s' from chart '%s' of host '%s'", rc->name, st->id, st->rrdhost->hostname);
@@ -527,12 +590,12 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
     rc->context = rrdvar_create_and_index("context", &st->rrdcontext->variables_root_index, rc->name, rc->hash, RRDVAR_TYPE_CALCULATED, &rc->value);
     rc->host    = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, rc->name, rc->hash, RRDVAR_TYPE_CALCULATED, &rc->value);
 
-    rrdset_linked_optimize_rrdhost(st->rrdhost, rc);
+    // rrdset_linked_optimize_rrdhost(st->rrdhost, rc);
 }
 
 static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) {
-    if((rc->hash_chart == st->hash && !strcmp(rc->name, st->id)) ||
-            (rc->hash_chart == st->hash_name && !strcmp(rc->name, st->name)))
+    if(     (rc->hash_chart == st->hash      && !strcmp(rc->chart, st->id)) ||
+            (rc->hash_chart == st->hash_name && !strcmp(rc->chart, st->name)))
         return 1;
 
     return 0;
@@ -540,12 +603,11 @@ static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) {
 
 // this has to be called while the RRDHOST is locked
 void rrdsetcalc_link_matching(RRDSET *st) {
-    RRDCALC *rc;
+    // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id);
 
+    RRDCALC *rc;
     for(rc = st->rrdhost->calculations; rc ; rc = rc->next) {
-        // since unlinked ones are in front and linked at the end
-        // we stop on the first linked RRDCALC
-        if(rc->rrdset != NULL) break;
+        if(rc->rrdset) continue;
 
         if(rrdcalc_is_matching_this_rrdset(rc, st))
             rrdsetcalc_link(st, rc);
@@ -599,7 +661,7 @@ void rrdsetcalc_unlink(RRDCALC *rc) {
     // so that if the matching chart is found in the future
     // it will be applied automatically
 
-    rrdcalc_unlinked_optimize_rrdhost(host, rc);
+    // rrdcalc_unlinked_optimize_rrdhost(host, rc);
 }
 
 static inline int rrdcalc_exists(RRDHOST *host, const char *name, uint32_t hash) {
@@ -617,6 +679,23 @@ static inline int rrdcalc_exists(RRDHOST *host, const char *name, uint32_t hash)
 }
 
 void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) {
+    rrdhost_check_rdlock(host);
+
+    if(rc->calculation) {
+        rc->calculation->this = &rc->value;
+        rc->calculation->rrdcalc = rc;
+    }
+
+    if(rc->warning) {
+        rc->warning->this = &rc->value;
+        rc->warning->rrdcalc = rc;
+    }
+
+    if(rc->critical) {
+        rc->critical->this = &rc->value;
+        rc->critical->rrdcalc = rc;
+    }
+
     // link it to the host
     rc->next = host->calculations;
     host->calculations = rc;
@@ -634,7 +713,8 @@ void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) {
 RRDCALC *rrdcalc_create(RRDHOST *host, const char *name, const char *chart, const char *dimensions, int group_method,
                         int after, int before, int update_every, uint32_t options,
                         calculated_number green, calculated_number red,
-                        const char *exec, const char *calc, const char *warn, const char *crit) {
+                        const char *exec, const char *source,
+                        const char *calc, const char *warn, const char *crit) {
     uint32_t hash = simple_hash(name);
 
     if(rrdcalc_exists(host, name, hash))
@@ -658,7 +738,10 @@ RRDCALC *rrdcalc_create(RRDHOST *host, const char *name, const char *chart, cons
 
     rc->green = green;
     rc->red = red;
+
     if(exec) rc->exec = strdupz(exec);
+    if(source) rc->source = strdupz(source);
+
     if(calc) {
         rc->calculation = expression_parse(calc, NULL, NULL);
         if(!rc->calculation)
@@ -758,12 +841,16 @@ void rrdcalctemplate_link_matching(RRDSET *st) {
             char buffer[RRDSETVAR_ID_MAX + 1];
             snprintfz(buffer, RRDSETVAR_ID_MAX, "%s.%s", st->family, rt->name);
             variable_fix_name(buffer);
-            rrdcalc_create(st->rrdhost, buffer, st->id,
+            RRDCALC *rc = rrdcalc_create(st->rrdhost, buffer, st->id,
                            rt->dimensions, rt->group, rt->after, rt->before, rt->update_every, rt->options,
-                           rt->green, rt->red, rt->exec,
+                           rt->green, rt->red, rt->exec, rt->source,
                            (rt->calculation)?rt->calculation->source:NULL,
                            (rt->warning)?rt->warning->source:NULL,
                            (rt->critical)?rt->critical->source:NULL);
+
+            // FIXME
+            if(rc->rrdset != st)
+                fatal("RRDCAL '%s' is not linked to chart '%s'", rc->name, st->id);
         }
     }
 }
@@ -1360,6 +1447,8 @@ void health_readdir(const char *path) {
             health_readfile(path, de->d_name);
         }
     }
+
+    closedir(dir);
 }
 
 void health_init(void) {
@@ -1375,11 +1464,173 @@ void health_init(void) {
     {
         char buffer[FILENAME_MAX + 1];
         snprintfz(buffer, FILENAME_MAX, "%s/health.d", config_get("global", "config directory", CONFIG_DIR));
-        path = config_get("health", "configuration files in directory", buffer);
+        path = config_get("health", "health configuration directory", buffer);
 
         snprintfz(buffer, FILENAME_MAX, "%s/alarm.sh", config_get("global", "plugins directory", PLUGINS_DIR));
         health_default_exec = config_get("health", "script to execute on alarm", buffer);
     }
 
+    rrdhost_rwlock(&localhost);
     health_readdir(path);
+    rrdhost_unlock(&localhost);
+}
+
+static inline int rrdcalc_isrunnable(RRDCALC *rc, time_t now, time_t *next_run) {
+    if (unlikely(!rc->rrdset)) {
+        debug(D_HEALTH, "Health not running alarm '%s'. It is not linked to a chart.", rc->name);
+        return 0;
+    }
+
+    if (unlikely(!rc->update_every)) {
+        debug(D_HEALTH, "Health not running alarm '%s'. It does not have an update frequency", rc->name);
+        return 0;
+    }
+
+    if (unlikely(rc->next_update > now)) {
+        if (*next_run > rc->next_update)
+            *next_run = rc->next_update;
+
+        debug(D_HEALTH, "Health not examining alarm '%s' yet (will do in %d secs).", rc->name,
+              (int) (rc->next_update - now));
+        return 0;
+    }
+
+    return 1;
+}
+
+void *health_main(void *ptr) {
+    (void)ptr;
+
+    info("HEALTH thread created with task id %d", gettid());
+
+    if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0)
+        error("Cannot set pthread cancel type to DEFERRED.");
+
+    if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
+        error("Cannot set pthread cancel state to ENABLE.");
+
+    int min_run_every = (int)config_get_number("health", "run at least every seconds", 10);
+    if(min_run_every < 1) min_run_every = 1;
+
+    BUFFER *wb = buffer_create(100);
+
+    unsigned int loop = 0;
+    while(health_enabled) {
+        loop++;
+        debug(D_HEALTH, "Health monitoring iteration no %u started", loop);
+
+        int oldstate, runnable = 0;
+        time_t now = time(NULL);
+        time_t next_run = now + min_run_every;
+        RRDCALC *rc;
+
+        if (unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0))
+            error("Cannot set pthread cancel state to DISABLE.");
+
+        rrdhost_rdlock(&localhost);
+        for (rc = localhost.calculations; rc; rc = rc->next) {
+            if (unlikely(!rrdcalc_isrunnable(rc, now, &next_run)))
+                continue;
+
+            runnable++;
+            debug(D_HEALTH, "Health running alarm '%s'", rc->name);
+
+            // 1. if there is database lookup, do it
+            // 1b. if the lookup has a calculation expression run it
+            // 2. if there is warning expression, do it
+            // 3. if there is critical expression, do it
+
+            if (rc->after) {
+                time_t latest_timestamp;
+                int value_is_null;
+                int ret = rrd2value(rc->rrdset, wb, &rc->value, rc->dimensions, 1, rc->after, rc->before, rc->group,
+                                    rc->options, &latest_timestamp, &value_is_null);
+                if (ret != 200) {
+                    error("Health for alarm '%s', database lookup returned error %d", rc->name, ret);
+                }
+                else {
+                    if(value_is_null) {
+                        rc->value = NAN;
+                        error("Health for alarm '%s', database lookup returned empty value (possibly value is not collected yet)", rc->name);
+                    }
+                    else {
+                        debug(D_HEALTH, "Health for alarm '%s', database lookup gave value "
+                                CALCULATED_NUMBER_FORMAT, rc->name, rc->value);
+
+                        if (rc->calculation) {
+                            if (!expression_evaluate(rc->calculation)) {
+                                error("Health for alarm '%s', failed to evaluate calculation with error: %s", rc->name,
+                                      buffer_tostring(rc->calculation->error_msg));
+                            } else {
+                                debug(D_HEALTH, "Health for alarm '%s', calculation gave value "
+                                        CALCULATED_NUMBER_FORMAT, rc->name, rc->calculation->result);
+                                rc->value = rc->calculation->result;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        rrdhost_unlock(&localhost);
+
+        if (runnable) {
+            rrdhost_rdlock(&localhost);
+
+            for (rc = localhost.calculations; rc; rc = rc->next) {
+                if (unlikely(!rrdcalc_isrunnable(rc, now, &next_run)))
+                    continue;
+
+                if(rc->warning) {
+                    if (!expression_evaluate(rc->warning)) {
+                        error("Health for alarm '%s', failed to evaluate warning expression with error: %s", rc->name,
+                              buffer_tostring(rc->warning->error_msg));
+                    }
+                    else {
+                        debug(D_HEALTH, "Health for alarm '%s', warning expression gave value "
+                                CALCULATED_NUMBER_FORMAT ": %s",
+                              rc->name, rc->warning->result,
+                              buffer_tostring(rc->warning->error_msg)
+                        );
+                    }
+                }
+
+                if(rc->critical) {
+                    if (!expression_evaluate(rc->critical)) {
+                        error("Health for alarm '%s', failed to evaluate critical expression with error: %s", rc->name,
+                              buffer_tostring(rc->critical->error_msg));
+                    }
+                    else {
+                        debug(D_HEALTH, "Health for alarm '%s', critical expression gave value "
+                                CALCULATED_NUMBER_FORMAT ": %s",
+                              rc->name, rc->critical->result,
+                              buffer_tostring(rc->critical->error_msg)
+                        );
+                    }
+                }
+
+                rc->last_updated = now;
+                rc->next_update = now + rc->update_every;
+
+                if (next_run > rc->next_update)
+                    next_run = rc->next_update;
+            }
+
+            rrdhost_unlock(&localhost);
+        }
+
+
+        if (unlikely(pthread_setcancelstate(oldstate, NULL) != 0))
+            error("Cannot set pthread cancel state to RESTORE (%d).", oldstate);
+
+        debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration in %d secs",
+              loop, (int) (next_run - now));
+
+        sleep_usec(1000000 * (unsigned long long) (next_run - now));
+    }
+
+    buffer_free(wb);
+
+    info("HEALTH thread exiting");
+    pthread_exit(NULL);
+    return NULL;
 }
index 4039a211c28c9fbfbdba68acef4ee4812f8e9248..46a389faeb35330252bfd0ff58aeffabf811f397 100644 (file)
@@ -218,5 +218,8 @@ extern void rrdsetcalc_unlink(RRDCALC *rc);
 extern void rrdcalctemplate_link_matching(RRDSET *st);
 
 extern void health_init(void);
+extern void *health_main(void *ptr);
+
+extern int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result);
 
 #endif //NETDATA_HEALTH_H
index 1ad723985f858a01de2815633ffae616d0678593..e952c57243c3f278a4fa25321a85c111f9f9768b 100644 (file)
--- a/src/log.c
+++ b/src/log.c
@@ -75,24 +75,13 @@ int open_log_file(int fd, FILE **fp, const char *filename, int *enabled_syslog)
     }
     else fd = f;
 
-    if(fp && *fp == NULL) {
-        // info("fdopen(%d) on filename '%s'", fd, filename);
-
-        FILE *n = fdopen(fd, "a");
-        if (!n)
+    if(fp && !*fp) {
+        *fp = fdopen(fd, "a");
+        if (!*fp)
             error("Cannot fdopen() fd %d ('%s')", fd, filename);
-
         else {
-            if (setvbuf(n, NULL, _IOLBF, 0) != 0)
+            if (setvbuf(*fp, NULL, _IOLBF, 0) != 0)
                 error("Cannot set line buffering on fd %d ('%s')", fd, filename);
-
-            if(!*fp)
-                *fp = n;
-            else {
-                FILE *o = *fp;
-                *fp = n;
-                fclose(o);
-            }
         }
     }
 
index 3a3d1907fee3d1ab7d0390a413747991357401f9..55742ad3cb1cd51da9381db7f191c70c5b2124aa 100644 (file)
@@ -47,8 +47,9 @@ struct netdata_static_thread {
     {"idlejitter",         "plugins",   "idlejitter", 1, NULL, NULL, cpuidlejitter_main},
     {"proc",               "plugins",   "proc",       1, NULL, NULL, proc_main},
     {"cgroups",            "plugins",   "cgroups",    1, NULL, NULL, cgroups_main},
+    {"check",              "plugins",   "checks",     0, NULL, NULL, checks_main},
+    {"health",              NULL,       NULL,         1, NULL, NULL, health_main},
     {"plugins.d",           NULL,       NULL,         1, NULL, NULL, pluginsd_main},
-    {"check",               "plugins",  "checks",     0, NULL, NULL, checks_main},
     {"web",                 NULL,       NULL,         1, NULL, NULL, socket_listen_main_multi_threaded},
     {"web-single-threaded", NULL,       NULL,         0, NULL, NULL, socket_listen_main_single_threaded},
     {NULL,                  NULL,       NULL,         0, NULL, NULL, NULL}
index a91650554ac656177527ab16e6e871b8a0b00f9a..0f3b8bc6450cd7fbfb296a4a7f991bd85e7c5852 100644 (file)
@@ -439,9 +439,8 @@ void *pluginsd_worker_thread(void *arg)
     return NULL;
 }
 
-void *pluginsd_main(void *ptr)
-{
-    if(ptr) { ; }
+void *pluginsd_main(void *ptr) {
+    (void)ptr;
 
     info("PLUGINS.D thread created with task id %d", gettid());
 
index a2a91a9239c99a88875b8693daa73c5b7e5bfecc..09ecdcba0e7b0c6924bb1e8ee303948b553a16e6 100644 (file)
--- a/src/rrd.c
+++ b/src/rrd.c
@@ -55,10 +55,17 @@ void rrdhost_unlock(RRDHOST *host) {
     pthread_rwlock_unlock(&host->rrdset_root_rwlock);
 }
 
+void rrdhost_check_rdlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
+    int ret = pthread_rwlock_trywrlock(&host->rrdset_root_rwlock);
+
+    if(ret == 0)
+        fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
+}
+
 void rrdhost_check_wrlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
     int ret = pthread_rwlock_tryrdlock(&host->rrdset_root_rwlock);
 
-    if(ret != 0)
+    if(ret == 0)
         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
 }
 
@@ -526,7 +533,7 @@ RRDSET *rrdset_create(const char *type, const char *id, const char *name, const
     avl_init_lock(&st->variables_root_index, rrdvar_compare);
 
     pthread_rwlock_init(&st->rwlock, NULL);
-    pthread_rwlock_wrlock(&localhost.rrdset_root_rwlock);
+    rrdhost_rwlock(&localhost);
 
     if(name && *name) rrdset_set_name(st, name);
     else rrdset_set_name(st, id);
@@ -543,17 +550,19 @@ RRDSET *rrdset_create(const char *type, const char *id, const char *name, const
     st->next = localhost.rrdset_root;
     localhost.rrdset_root = st;
 
-    rrdsetvar_create(st, "last_collected", RRDVAR_TYPE_TIME_T, &st->last_collected_time.tv_sec, 0);
-    rrdsetvar_create(st, "raw_total", RRDVAR_TYPE_TOTAL, &st->collected_total, 0);
-    rrdsetvar_create(st, "green", RRDVAR_TYPE_CALCULATED, &st->green, 0);
-    rrdsetvar_create(st, "red", RRDVAR_TYPE_CALCULATED, &st->red, 0);
+    if(health_enabled) {
+        rrdsetvar_create(st, "last_collected_t", RRDVAR_TYPE_TIME_T, &st->last_collected_time.tv_sec, 0);
+        rrdsetvar_create(st, "collected_total_raw", RRDVAR_TYPE_TOTAL, &st->last_collected_total, 0);
+        rrdsetvar_create(st, "green", RRDVAR_TYPE_CALCULATED, &st->green, 0);
+        rrdsetvar_create(st, "red", RRDVAR_TYPE_CALCULATED, &st->red, 0);
+    }
 
     rrdset_index_add(&localhost, st);
 
     rrdsetcalc_link_matching(st);
     rrdcalctemplate_link_matching(st);
 
-    pthread_rwlock_unlock(&localhost.rrdset_root_rwlock);
+    rrdhost_unlock(&localhost);
 
     return(st);
 }
@@ -681,9 +690,11 @@ RRDDIM *rrddim_add(RRDSET *st, const char *id, const char *name, long multiplier
         td->next = rd;
     }
 
-    rrddimvar_create(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->calculated_value, 0);
-    rrddimvar_create(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->collected_value, 0);
-    rrddimvar_create(rd, RRDVAR_TYPE_TIME_T, NULL, "_last_collected", &rd->last_collected_time.tv_sec, 0);
+    if(health_enabled) {
+        rrddimvar_create(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->last_stored_value, 0);
+        rrddimvar_create(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->last_collected_value, 0);
+        rrddimvar_create(rd, RRDVAR_TYPE_TIME_T, NULL, "_last_collected_t", &rd->last_collected_time.tv_sec, 0);
+    }
 
     pthread_rwlock_unlock(&st->rwlock);
 
@@ -746,7 +757,7 @@ void rrdset_free_all(void)
 {
     info("Freeing all memory...");
 
-    pthread_rwlock_wrlock(&localhost.rrdset_root_rwlock);
+    rrdhost_rwlock(&localhost);
 
     RRDSET *st;
     for(st = localhost.rrdset_root; st ;) {
@@ -789,7 +800,7 @@ void rrdset_free_all(void)
     }
     localhost.rrdset_root = NULL;
 
-    pthread_rwlock_unlock(&localhost.rrdset_root_rwlock);
+    rrdhost_unlock(&localhost);
 
     info("Memory cleanup completed...");
 }
@@ -800,7 +811,7 @@ void rrdset_save_all(void) {
     RRDSET *st;
     RRDDIM *rd;
 
-    pthread_rwlock_wrlock(&localhost.rrdset_root_rwlock);
+    rrdhost_rwlock(&localhost);
     for(st = localhost.rrdset_root; st ; st = st->next) {
         pthread_rwlock_wrlock(&st->rwlock);
 
@@ -818,7 +829,7 @@ void rrdset_save_all(void) {
 
         pthread_rwlock_unlock(&st->rwlock);
     }
-    pthread_rwlock_unlock(&localhost.rrdset_root_rwlock);
+    rrdhost_unlock(&localhost);
 }
 
 
@@ -1326,6 +1337,7 @@ unsigned long long rrdset_done(RRDSET *st)
 
             if(likely(rd->updated && rd->counter > 1 && iterations < st->gap_when_lost_iterations_above)) {
                 rd->values[st->current_entry] = pack_storage_number(new_value, storage_flags );
+                rd->last_stored_value = new_value;
 
                 if(unlikely(st->debug))
                     debug(D_RRD_STATS, "%s/%s: STORE[%ld] "
@@ -1341,6 +1353,7 @@ unsigned long long rrdset_done(RRDSET *st)
                         , st->current_entry
                         );
                 rd->values[st->current_entry] = pack_storage_number(0, SN_NOT_EXISTS);
+                rd->last_stored_value = 0;
             }
 
             stored_entries++;
index bc29308688e6c18b1834796e076531b135c20759..2893d0dfb9e6bb454d16f98608aa3a36a3a2f22b 100644 (file)
--- a/src/rrd.h
+++ b/src/rrd.h
@@ -138,11 +138,13 @@ struct rrddim {
                                                     // this is actual date time we updated the last_collected_value
                                                     // THIS IS DIFFERENT FROM THE SAME MEMBER OF RRDSET
 
-    calculated_number calculated_value;             // the current calculated value, after applying the algorithm
-    calculated_number last_calculated_value;        // the last calculated value
+    calculated_number calculated_value;             // the current calculated value, after applying the algorithm - resets to zero after being used
+    calculated_number last_calculated_value;        // the last calculated value processed
 
-    collected_number collected_value;               // the current value, as collected
-    collected_number last_collected_value;          // the last value that was collected
+    calculated_number last_stored_value;            // the last value as stored in the database (after interpolation)
+
+    collected_number collected_value;               // the current value, as collected - resets to 0 after being used
+    collected_number last_collected_value;          // the last value that was collected, after being processed
 
     // the *_volume members are used to calculate the accuracy of the rounding done by the
     // storage number - they are printed to debug.log when debug is enabled for a set.
@@ -315,10 +317,18 @@ extern RRDHOST localhost;
 
 #ifdef NETDATA_INTERNAL_CHECKS
 #define rrdhost_check_wrlock(host) rrdhost_check_wrlock_int(host, __FILE__, __FUNCTION__, __LINE__)
+#define rrdhost_check_rdlock(host) rrdhost_check_rdlock_int(host, __FILE__, __FUNCTION__, __LINE__)
 #else
+#define rrdhost_check_rdlock(host) (void)0
 #define rrdhost_check_wrlock(host) (void)0
 #endif
+
 extern void rrdhost_check_wrlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line);
+extern void rrdhost_check_rdlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line);
+
+extern void rrdhost_rwlock(RRDHOST *host);
+extern void rrdhost_rdlock(RRDHOST *host);
+extern void rrdhost_unlock(RRDHOST *host);
 
 // ----------------------------------------------------------------------------
 // RRD SET functions
index 78afabb6b9fce40e8a6501d8dbb4357536ee5686..cb7d333babbfb43e7daed76feb5a4477289857e0 100644 (file)
@@ -1039,12 +1039,12 @@ inline static calculated_number rrdr2value(RRDR *r, long i, uint32_t options, in
     }
 
     if(unlikely(all_null)) {
-        if(likely(*all_values_are_null))
+        if(likely(all_values_are_null))
             *all_values_are_null = 1;
         return 0;
     }
     else {
-        if(likely(*all_values_are_null))
+        if(likely(all_values_are_null))
             *all_values_are_null = 0;
     }
 
@@ -1524,7 +1524,7 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g
     return r;
 }
 
-int rrd2value(RRDSET *st, BUFFER *wb, calculated_number *n, BUFFER *dimensions, long points, long long after, long long before, int group_method, uint32_t options, time_t *latest_timestamp, int *value_is_null)
+int rrd2value(RRDSET *st, BUFFER *wb, calculated_number *n, const char *dimensions, long points, long long after, long long before, int group_method, uint32_t options, time_t *latest_timestamp, int *value_is_null)
 {
     RRDR *r = rrd2rrdr(st, points, after, before, group_method, !(options & RRDR_OPTION_NOT_ALIGNED));
     if(!r) {
@@ -1543,10 +1543,10 @@ int rrd2value(RRDSET *st, BUFFER *wb, calculated_number *n, BUFFER *dimensions,
     else if(r->result_options & RRDR_RESULT_OPTION_ABSOLUTE)
         wb->options |= WB_CONTENT_CACHEABLE;
 
-    options = rrdr_check_options(r, options, (dimensions)?buffer_tostring(dimensions):NULL);
+    options = rrdr_check_options(r, options, dimensions);
 
     if(dimensions)
-        rrdr_disable_not_selected_dimensions(r, buffer_tostring(dimensions));
+        rrdr_disable_not_selected_dimensions(r, dimensions);
 
     if(latest_timestamp)
         *latest_timestamp = r->before;
index 929ac1d52f09257c8241095810d25a536efdfed4..99400346efd20413379a178c5b553bf468567b91 100644 (file)
@@ -61,6 +61,6 @@ extern void rrd_stats_all_json(BUFFER *wb);
 extern time_t rrd_stats_json(int type, RRDSET *st, BUFFER *wb, long entries_to_show, long group, int group_method, time_t after, time_t before, int only_non_zero);
 
 extern int rrd2format(RRDSET *st, BUFFER *out, BUFFER *dimensions, uint32_t format, long points, long long after, long long before, int group_method, uint32_t options, time_t *latest_timestamp);
-extern int rrd2value(RRDSET *st, BUFFER *wb, calculated_number *n, BUFFER *dimensions, long points, long long after, long long before, int group_method, uint32_t options, time_t *latest_timestamp, int *value_is_null);
+extern int rrd2value(RRDSET *st, BUFFER *wb, calculated_number *n, const char *dimensions, long points, long long after, long long before, int group_method, uint32_t options, time_t *latest_timestamp, int *value_is_null);
 
 #endif /* NETDATA_RRD2JSON_H */
index 80c8e1c40e8acab5dd8546b4ffda8c76a9aab9dc..8976e925cb8bbf74d74f3ebc31509c18668a2b67 100644 (file)
@@ -759,7 +759,7 @@ int web_client_api_v1_badge(struct web_client *w, char *url) {
 
     // if the collected value is too old, don't calculate its value
     if(rrdset_last_entry_t(st) >= (time(NULL) - (st->update_every * st->gap_when_lost_iterations_above)))
-        ret = rrd2value(st, w->response.data, &n, dimensions, points, after, before, group, options, &latest_timestamp, &value_is_null);
+        ret = rrd2value(st, w->response.data, &n, (dimensions)?buffer_tostring(dimensions):NULL, points, after, before, group, options, &latest_timestamp, &value_is_null);
 
     // if the value cannot be calculated, show empty badge
     if(ret != 200) {