+#define NETDATA_HEALTH_INTERNALS
#include "common.h"
#define RRDVAR_MAX_LENGTH 1024
-struct health_options {
- const char *health_default_exec;
- const char *health_default_recipient;
- const char *log_filename;
- size_t log_entries_written;
- FILE *log_fp;
-};
-
-static struct health_options health = {
- .health_default_exec = NULL,
- .health_default_recipient = "root",
- .log_filename = NULL,
- .log_entries_written = 0,
- .log_fp = NULL
-};
-
int health_enabled = 1;
-// ----------------------------------------------------------------------------
-// health alarm log load/save
-// no need for locking - only one thread is reading / writing the alarms log
-
-static inline int health_alarm_log_open(void) {
- if(health.log_fp)
- fclose(health.log_fp);
-
- health.log_fp = fopen(health.log_filename, "a");
-
- if(health.log_fp) {
- if (setvbuf(health.log_fp, NULL, _IOLBF, 0) != 0)
- error("Health: cannot set line buffering on health log file.");
- return 0;
- }
-
- error("Health: cannot open health log file '%s'. Health data will be lost in case of netdata or server crash.", health.log_filename);
- return -1;
-}
-
-static inline void health_alarm_log_close(void) {
- if(health.log_fp) {
- fclose(health.log_fp);
- health.log_fp = NULL;
- }
-}
-
-static inline void health_log_rotate(void) {
- static size_t rotate_every = 0;
-
- if(unlikely(rotate_every == 0)) {
- rotate_every = (size_t)config_get_number("health", "rotate log every lines", 2000);
- if(rotate_every < 100) rotate_every = 100;
- }
-
- if(unlikely(health.log_entries_written > rotate_every)) {
- health_alarm_log_close();
-
- char old_filename[FILENAME_MAX + 1];
- snprintfz(old_filename, FILENAME_MAX, "%s.old", health.log_filename);
-
- if(unlink(old_filename) == -1 && errno != ENOENT)
- error("Health: cannot remove old alarms log file '%s'", old_filename);
-
- if(link(health.log_filename, old_filename) == -1 && errno != ENOENT)
- error("Health: cannot move file '%s' to '%s'.", health.log_filename, old_filename);
-
- if(unlink(health.log_filename) == -1 && errno != ENOENT)
- error("Health: cannot remove old alarms log file '%s'", health.log_filename);
-
- // open it with truncate
- health.log_fp = fopen(health.log_filename, "w");
-
- if(health.log_fp)
- fclose(health.log_fp);
- else
- error("Health: cannot truncate health log '%s'", health.log_filename);
-
- health.log_fp = NULL;
-
- health.log_entries_written = 0;
- health_alarm_log_open();
- }
-}
-
-static inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
- health_log_rotate();
-
- if(likely(health.log_fp)) {
- if(unlikely(fprintf(health.log_fp
- , "%c\t%s"
- "\t%08x\t%08x\t%08x\t%08x\t%08x"
- "\t%08x\t%08x\t%08x"
- "\t%08x\t%08x\t%08x"
- "\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
- "\t%d\t%d\t%d\t%d"
- "\t%Lf\t%Lf"
- "\n"
- , (ae->flags & HEALTH_ENTRY_FLAG_SAVED)?'U':'A'
- , host->hostname
-
- , ae->unique_id
- , ae->alarm_id
- , ae->alarm_event_id
- , ae->updated_by_id
- , ae->updates_id
-
- , (uint32_t)ae->when
- , (uint32_t)ae->duration
- , (uint32_t)ae->non_clear_duration
- , (uint32_t)ae->flags
- , (uint32_t)ae->exec_run_timestamp
- , (uint32_t)ae->delay_up_to_timestamp
-
- , (ae->name)?ae->name:""
- , (ae->chart)?ae->chart:""
- , (ae->family)?ae->family:""
- , (ae->exec)?ae->exec:""
- , (ae->recipient)?ae->recipient:""
- , (ae->source)?ae->source:""
- , (ae->units)?ae->units:""
- , (ae->info)?ae->info:""
-
- , ae->exec_code
- , ae->new_status
- , ae->old_status
- , ae->delay
-
- , (long double)ae->new_value
- , (long double)ae->old_value
- ) < 0))
- error("Health: failed to save alarm log entry. Health data may be lost in case of abnormal restart.");
- else {
- ae->flags |= HEALTH_ENTRY_FLAG_SAVED;
- health.log_entries_written++;
- }
- }
-}
-
-static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename) {
- static uint32_t max_unique_id = 0, max_alarm_id = 0;
-
- errno = 0;
-
- char *s, *buf = mallocz(65536 + 1);
- size_t line = 0, len = 0;
- ssize_t loaded = 0, updated = 0, errored = 0, duplicate = 0;
-
- pthread_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
-
- while((s = fgets_trim_len(buf, 65536, fp, &len))) {
- health.log_entries_written++;
- line++;
-
- int max_entries = 30, entries = 0;
- char *pointers[max_entries];
-
- pointers[entries++] = s++;
- while(*s) {
- if(unlikely(*s == '\t')) {
- *s = '\0';
- pointers[entries++] = ++s;
- if(entries >= max_entries) {
- error("Health: line %zu of file '%s' has more than %d entries. Ignoring excessive entries.", line, filename, max_entries);
- break;
- }
- }
- else s++;
- }
-
- if(likely(*pointers[0] == 'U' || *pointers[0] == 'A')) {
- ALARM_ENTRY *ae = NULL;
-
- if(entries < 26) {
- error("Health: line %zu of file '%s' should have at least 26 entries, but it has %d. Ignoring it.", line, filename, entries);
- errored++;
- continue;
- }
-
- // check that we have valid ids
- uint32_t unique_id = (uint32_t)strtoul(pointers[2], NULL, 16);
- if(!unique_id) {
- error("Health: line %zu of file '%s' states alarm entry with invalid unique id %u (%s). Ignoring it.", line, filename, unique_id, pointers[2]);
- errored++;
- continue;
- }
-
- uint32_t alarm_id = (uint32_t)strtoul(pointers[3], NULL, 16);
- if(!alarm_id) {
- error("Health: line %zu of file '%s' states alarm entry for invalid alarm id %u (%s). Ignoring it.", line, filename, alarm_id, pointers[3]);
- errored++;
- continue;
- }
-
- if(unlikely(*pointers[0] == 'A')) {
- // make sure it is properly numbered
- if(unlikely(host->health_log.alarms && unique_id < host->health_log.alarms->unique_id)) {
- error("Health: line %zu of file '%s' has alarm log entry with %u in wrong order. Ignoring it.", line, filename, unique_id);
- errored++;
- continue;
- }
-
- ae = callocz(1, sizeof(ALARM_ENTRY));
- }
- else if(unlikely(*pointers[0] == 'U')) {
- // find the original
- for(ae = host->health_log.alarms; ae; ae = ae->next) {
- if(unlikely(unique_id == ae->unique_id)) {
- if(unlikely(*pointers[0] == 'A')) {
- error("Health: line %zu of file '%s' adds duplicate alarm log entry with unique id %u. Using the later."
- , line, filename, unique_id);
- *pointers[0] = 'U';
- duplicate++;
- }
- break;
- }
- else if(unlikely(unique_id > ae->unique_id)) {
- // no need to continue
- // the linked list is sorted
- ae = NULL;
- break;
- }
- }
-
- // if not found, skip this line
- if(!ae) {
- // error("Health: line %zu of file '%s' updates alarm log entry with unique id %u, but it is not found.", line, filename, unique_id);
- continue;
- }
- }
-
- // check for a possible host missmatch
- //if(strcmp(pointers[1], host->hostname))
- // error("Health: line %zu of file '%s' provides an alarm for host '%s' but this is named '%s'.", line, filename, pointers[1], host->hostname);
-
- ae->unique_id = unique_id;
- ae->alarm_id = alarm_id;
- ae->alarm_event_id = (uint32_t)strtoul(pointers[4], NULL, 16);
- ae->updated_by_id = (uint32_t)strtoul(pointers[5], NULL, 16);
- ae->updates_id = (uint32_t)strtoul(pointers[6], NULL, 16);
-
- ae->when = (uint32_t)strtoul(pointers[7], NULL, 16);
- ae->duration = (uint32_t)strtoul(pointers[8], NULL, 16);
- ae->non_clear_duration = (uint32_t)strtoul(pointers[9], NULL, 16);
-
- ae->flags = (uint32_t)strtoul(pointers[10], NULL, 16);
- ae->flags |= HEALTH_ENTRY_FLAG_SAVED;
-
- ae->exec_run_timestamp = (uint32_t)strtoul(pointers[11], NULL, 16);
- ae->delay_up_to_timestamp = (uint32_t)strtoul(pointers[12], NULL, 16);
-
- freez(ae->name);
- ae->name = strdupz(pointers[13]);
- ae->hash_name = simple_hash(ae->name);
-
- freez(ae->chart);
- ae->chart = strdupz(pointers[14]);
- ae->hash_chart = simple_hash(ae->chart);
-
- freez(ae->family);
- ae->family = strdupz(pointers[15]);
-
- freez(ae->exec);
- ae->exec = strdupz(pointers[16]);
- if(!*ae->exec) { freez(ae->exec); ae->exec = NULL; }
-
- freez(ae->recipient);
- ae->recipient = strdupz(pointers[17]);
- if(!*ae->recipient) { freez(ae->recipient); ae->recipient = NULL; }
-
- freez(ae->source);
- ae->source = strdupz(pointers[18]);
- if(!*ae->source) { freez(ae->source); ae->source = NULL; }
-
- freez(ae->units);
- ae->units = strdupz(pointers[19]);
- if(!*ae->units) { freez(ae->units); ae->units = NULL; }
-
- freez(ae->info);
- ae->info = strdupz(pointers[20]);
- if(!*ae->info) { freez(ae->info); ae->info = NULL; }
-
- ae->exec_code = str2i(pointers[21]);
- ae->new_status = str2i(pointers[22]);
- ae->old_status = str2i(pointers[23]);
- ae->delay = str2i(pointers[24]);
-
- ae->new_value = str2l(pointers[25]);
- ae->old_value = str2l(pointers[26]);
-
- static char value_string[100 + 1];
- freez(ae->old_value_string);
- freez(ae->new_value_string);
- ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1));
- ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1));
-
- // add it to host if not already there
- if(unlikely(*pointers[0] == 'A')) {
- ae->next = host->health_log.alarms;
- host->health_log.alarms = ae;
- loaded++;
- }
- else updated++;
-
- if(unlikely(ae->unique_id > max_unique_id))
- max_unique_id = ae->unique_id;
-
- if(unlikely(ae->alarm_id >= max_alarm_id))
- max_alarm_id = ae->alarm_id;
- }
- else {
- error("Health: line %zu of file '%s' is invalid (unrecognized entry type '%s').", line, filename, pointers[0]);
- errored++;
- }
- }
-
- pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock);
-
- freez(buf);
-
- if(!max_unique_id) max_unique_id = (uint32_t)now_realtime_sec();
- if(!max_alarm_id) max_alarm_id = (uint32_t)now_realtime_sec();
-
- host->health_log.next_log_id = max_unique_id + 1;
- host->health_log.next_alarm_id = max_alarm_id + 1;
-
- debug(D_HEALTH, "Health: loaded file '%s' with %zd new alarm entries, updated %zd alarms, errors %zd entries, duplicate %zd", filename, loaded, updated, errored, duplicate);
- return loaded;
-}
-
-static inline void health_alarm_log_load(RRDHOST *host) {
- health_alarm_log_close();
-
- char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s.old", health.log_filename);
- FILE *fp = fopen(filename, "r");
- if(!fp)
- error("Health: cannot open health file: %s", filename);
- else {
- health_alarm_log_read(host, fp, filename);
- fclose(fp);
- }
-
- health.log_entries_written = 0;
- fp = fopen(health.log_filename, "r");
- if(!fp)
- error("Health: cannot open health file: %s", health.log_filename);
- else {
- health_alarm_log_read(host, fp, health.log_filename);
- fclose(fp);
- }
-
- health_alarm_log_open();
-}
-
-
-// ----------------------------------------------------------------------------
-// health alarm log management
-
-static inline void health_alarm_log(
- RRDHOST *host,
- uint32_t alarm_id,
- uint32_t alarm_event_id,
- time_t when,
- const char *name,
- const char *chart,
- const char *family,
- const char *exec,
- const char *recipient,
- time_t duration,
- calculated_number old_value,
- calculated_number new_value,
- int old_status,
- int new_status,
- const char *source,
- const char *units,
- const char *info,
- int delay,
- uint32_t flags
-) {
- debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
-
- ALARM_ENTRY *ae = callocz(1, sizeof(ALARM_ENTRY));
- ae->name = strdupz(name);
- ae->hash_name = simple_hash(ae->name);
-
- if(chart) {
- ae->chart = strdupz(chart);
- ae->hash_chart = simple_hash(ae->chart);
- }
-
- if(family)
- ae->family = strdupz(family);
-
- if(exec) ae->exec = strdupz(exec);
- if(recipient) ae->recipient = strdupz(recipient);
- if(source) ae->source = strdupz(source);
- if(units) ae->units = strdupz(units);
- if(info) ae->info = strdupz(info);
-
- ae->unique_id = host->health_log.next_log_id++;
- ae->alarm_id = alarm_id;
- ae->alarm_event_id = alarm_event_id;
- ae->when = when;
- ae->old_value = old_value;
- ae->new_value = new_value;
-
- static char value_string[100 + 1];
- ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1));
- ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1));
-
- ae->old_status = old_status;
- ae->new_status = new_status;
- ae->duration = duration;
- ae->delay = delay;
- ae->delay_up_to_timestamp = when + delay;
-
- ae->flags |= flags;
-
- if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
- ae->non_clear_duration += ae->duration;
-
- // link it
- pthread_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
- ae->next = host->health_log.alarms;
- host->health_log.alarms = ae;
- host->health_log.count++;
- pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock);
-
- // match previous alarms
- pthread_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
- ALARM_ENTRY *t;
- for(t = host->health_log.alarms ; t ; t = t->next) {
- if(t != ae && t->alarm_id == ae->alarm_id) {
- if(!(t->flags & HEALTH_ENTRY_FLAG_UPDATED) && !t->updated_by_id) {
- t->flags |= HEALTH_ENTRY_FLAG_UPDATED;
- t->updated_by_id = ae->unique_id;
- ae->updates_id = t->unique_id;
-
- if((t->new_status == RRDCALC_STATUS_WARNING || t->new_status == RRDCALC_STATUS_CRITICAL) &&
- (t->old_status == RRDCALC_STATUS_WARNING || t->old_status == RRDCALC_STATUS_CRITICAL))
- ae->non_clear_duration += t->non_clear_duration;
-
- health_alarm_log_save(host, t);
- }
-
- // no need to continue
- break;
- }
- }
- pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock);
-
- health_alarm_log_save(host, ae);
-}
-
// ----------------------------------------------------------------------------
// RRDVAR management
-static inline int rrdvar_fix_name(char *variable) {
+inline int rrdvar_fix_name(char *variable) {
int fixed = 0;
while(*variable) {
if (!isalnum(*variable) && *variable != '.' && *variable != '_') {
return NULL;
}
-static inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) {
+inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) {
RRDCALC *rc;
if(unlikely(!chart)) {
return 0;
}
-static inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) {
+inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) {
if(chart && name) {
uint32_t hash_chart = simple_hash(chart);
uint32_t hash_name = simple_hash(name);
return host->health_log.next_alarm_id++;
}
-static inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) {
+inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) {
rrdhost_check_rdlock(host);
if(rc->calculation) {
}
}
-static inline void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt) {
+inline void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt) {
debug(D_HEALTH, "Health removing template '%s' of host '%s'", rt->name, host->hostname);
if(host->templates) {
}
// ----------------------------------------------------------------------------
-// load health configuration
-
-#define HEALTH_CONF_MAX_LINE 4096
-
-#define HEALTH_ALARM_KEY "alarm"
-#define HEALTH_TEMPLATE_KEY "template"
-#define HEALTH_ON_KEY "on"
-#define HEALTH_FAMILIES_KEY "families"
-#define HEALTH_LOOKUP_KEY "lookup"
-#define HEALTH_CALC_KEY "calc"
-#define HEALTH_EVERY_KEY "every"
-#define HEALTH_GREEN_KEY "green"
-#define HEALTH_RED_KEY "red"
-#define HEALTH_WARN_KEY "warn"
-#define HEALTH_CRIT_KEY "crit"
-#define HEALTH_EXEC_KEY "exec"
-#define HEALTH_RECIPIENT_KEY "to"
-#define HEALTH_UNITS_KEY "units"
-#define HEALTH_INFO_KEY "info"
-#define HEALTH_DELAY_KEY "delay"
-#define HEALTH_OPTIONS_KEY "options"
-
-static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
- if(!rc->chart) {
- error("Health configuration for alarm '%s' does not have a chart", rc->name);
- return 0;
- }
+// health initialization
- if(!rc->update_every) {
- error("Health configuration for alarm '%s.%s' has no frequency (parameter 'every'). Ignoring it.", rc->chart?rc->chart:"NOCHART", rc->name);
- return 0;
- }
+inline char *health_config_dir(void) {
+ char buffer[FILENAME_MAX + 1];
+ snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_config_dir);
+ return config_get("health", "health configuration directory", buffer);
+}
- if(!RRDCALC_HAS_DB_LOOKUP(rc) && !rc->warning && !rc->critical) {
- error("Health configuration for alarm '%s.%s' is useless (no calculation, no warning and no critical evaluation)", rc->chart?rc->chart:"NOCHART", rc->name);
- return 0;
+void health_init(void) {
+ debug(D_HEALTH, "Health configuration initializing");
+
+ if(!(health_enabled = config_get_boolean("health", "enabled", 1))) {
+ debug(D_HEALTH, "Health is disabled.");
+ return;
}
- if (rrdcalc_exists(host, rc->chart, rc->name, rc->hash_chart, rc->hash))
- return 0;
+ char pathname[FILENAME_MAX + 1];
+ snprintfz(pathname, FILENAME_MAX, "%s/health", netdata_configured_varlib_dir);
+ if(mkdir(pathname, 0770) == -1 && errno != EEXIST)
+ fatal("Cannot create directory '%s'.", pathname);
+}
- rc->id = rrdcalc_get_unique_id(localhost, rc->chart, rc->name, &rc->next_event_id);
+// ----------------------------------------------------------------------------
+// re-load health configuration
- debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f",
- rc->chart?rc->chart:"NOCHART",
- rc->name,
- rc->id,
- (rc->exec)?rc->exec:"DEFAULT",
- (rc->recipient)?rc->recipient:"DEFAULT",
- rc->green,
- rc->red,
- rc->group,
- rc->after,
- rc->before,
- rc->options,
- (rc->dimensions)?rc->dimensions:"NONE",
- rc->update_every,
- (rc->calculation)?rc->calculation->parsed_as:"NONE",
- (rc->warning)?rc->warning->parsed_as:"NONE",
- (rc->critical)?rc->critical->parsed_as:"NONE",
- rc->source,
- rc->delay_up_duration,
- rc->delay_down_duration,
- rc->delay_max_duration,
- rc->delay_multiplier
- );
+inline void health_free_host_nolock(RRDHOST *host) {
+ while(host->templates)
+ rrdcalctemplate_free(host, host->templates);
- rrdcalc_create_part2(host, rc);
- return 1;
+ while(host->alarms)
+ rrdcalc_free(host, host->alarms);
}
-static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt) {
- if(unlikely(!rt->context)) {
- error("Health configuration for template '%s' does not have a context", rt->name);
- return 0;
+void health_reload_host(RRDHOST *host) {
+ if(!health_enabled) {
+ error("Health reload is requested, but health is not enabled.");
+ return;
}
- if(unlikely(!rt->update_every)) {
- error("Health configuration for template '%s' has no frequency (parameter 'every'). Ignoring it.", rt->name);
- return 0;
- }
+ char *path = health_config_dir();
- if(unlikely(!RRDCALCTEMPLATE_HAS_CALCULATION(rt) && !rt->warning && !rt->critical)) {
- error("Health configuration for template '%s' is useless (no calculation, no warning and no critical evaluation)", rt->name);
- return 0;
- }
+ // free all running alarms
+ rrdhost_rwlock(host);
+ health_free_host_nolock(host);
+ rrdhost_unlock(host);
- RRDCALCTEMPLATE *t, *last = NULL;
- for (t = host->templates; t ; last = t, t = t->next) {
- if(unlikely(t->hash_name == rt->hash_name && !strcmp(t->name, rt->name))) {
- error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
- return 0;
- }
+ // invalidate all previous entries in the alarm log
+ ALARM_ENTRY *t;
+ for(t = host->health_log.alarms ; t ; t = t->next) {
+ if(t->new_status != RRDCALC_STATUS_REMOVED)
+ t->flags |= HEALTH_ENTRY_FLAG_UPDATED;
}
- debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f",
- rt->name,
- (rt->context)?rt->context:"NONE",
- (rt->exec)?rt->exec:"DEFAULT",
- (rt->recipient)?rt->recipient:"DEFAULT",
- rt->green,
- rt->red,
- rt->group,
- rt->after,
- rt->before,
- rt->options,
- (rt->dimensions)?rt->dimensions:"NONE",
- rt->update_every,
- (rt->calculation)?rt->calculation->parsed_as:"NONE",
- (rt->warning)?rt->warning->parsed_as:"NONE",
- (rt->critical)?rt->critical->parsed_as:"NONE",
- rt->source,
- rt->delay_up_duration,
- rt->delay_down_duration,
- rt->delay_max_duration,
- rt->delay_multiplier
- );
-
- if(likely(last)) {
- last->next = rt;
- }
- else {
- rt->next = host->templates;
- host->templates = rt;
+ // reset all thresholds to all charts
+ RRDSET *st;
+ for(st = host->rrdset_root; st ; st = st->next) {
+ st->green = NAN;
+ st->red = NAN;
}
- return 1;
-}
+ // load the new alarms
+ rrdhost_rwlock(host);
+ health_readdir(host, path);
+ rrdhost_unlock(host);
-static inline int health_parse_duration(char *string, int *result) {
- // make sure it is a number
- if(!*string || !(isdigit(*string) || *string == '+' || *string == '-')) {
- *result = 0;
- return 0;
- }
+ // link the loaded alarms to their charts
+ for(st = host->rrdset_root; st ; st = st->next) {
+ rrdhost_rwlock(host);
- char *e = NULL;
- calculated_number n = strtold(string, &e);
- if(e && *e) {
- switch (*e) {
- case 'Y':
- *result = (int) (n * 86400 * 365);
- break;
- case 'M':
- *result = (int) (n * 86400 * 30);
- break;
- case 'w':
- *result = (int) (n * 86400 * 7);
- break;
- case 'd':
- *result = (int) (n * 86400);
- break;
- case 'h':
- *result = (int) (n * 3600);
- break;
- case 'm':
- *result = (int) (n * 60);
- break;
+ rrdsetcalc_link_matching(st);
+ rrdcalctemplate_link_matching(st);
- default:
- case 's':
- *result = (int) (n);
- break;
- }
+ rrdhost_unlock(host);
}
- else
- *result = (int)(n);
-
- return 1;
}
-static inline int health_parse_delay(
- size_t line, const char *path, const char *file, char *string,
- int *delay_up_duration,
- int *delay_down_duration,
- int *delay_max_duration,
- float *delay_multiplier) {
+void health_reload(void) {
+ RRDHOST *host;
- char given_up = 0;
- char given_down = 0;
- char given_max = 0;
- char given_multiplier = 0;
+ for(host = localhost; host ; host = host->next)
+ health_reload_host(host);
+}
- char *s = string;
- while(*s) {
- char *key = s;
+// ----------------------------------------------------------------------------
+// health main thread and friends
- while(*s && !isspace(*s)) s++;
- while(*s && isspace(*s)) *s++ = '\0';
+static inline int rrdcalc_value2status(calculated_number n) {
+ if(isnan(n) || isinf(n)) return RRDCALC_STATUS_UNDEFINED;
+ if(n) return RRDCALC_STATUS_RAISED;
+ return RRDCALC_STATUS_CLEAR;
+}
- if(!*key) break;
+#define ALARM_EXEC_COMMAND_LENGTH 8192
- char *value = s;
- while(*s && !isspace(*s)) s++;
- while(*s && isspace(*s)) *s++ = '\0';
+static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
+ ae->flags |= HEALTH_ENTRY_FLAG_PROCESSED;
- if(!strcasecmp(key, "up")) {
- if (!health_parse_duration(value, delay_up_duration)) {
- error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
- line, path, file, value, key);
- }
- else given_up = 1;
- }
- else if(!strcasecmp(key, "down")) {
- if (!health_parse_duration(value, delay_down_duration)) {
- error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
- line, path, file, value, key);
- }
- else given_down = 1;
- }
- else if(!strcasecmp(key, "multiplier")) {
- *delay_multiplier = strtof(value, NULL);
- if(isnan(*delay_multiplier) || isinf(*delay_multiplier) || islessequal(*delay_multiplier, 0)) {
- error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
- line, path, file, value, key);
- }
- else given_multiplier = 1;
- }
- else if(!strcasecmp(key, "max")) {
- if (!health_parse_duration(value, delay_max_duration)) {
- error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
- line, path, file, value, key);
- }
- else given_max = 1;
- }
- else {
- error("Health configuration at line %zu of file '%s/%s': unknown keyword '%s'",
- line, path, file, key);
- }
- }
-
- if(!given_up)
- *delay_up_duration = 0;
-
- if(!given_down)
- *delay_down_duration = 0;
-
- if(!given_multiplier)
- *delay_multiplier = 1.0;
-
- if(!given_max) {
- if((*delay_max_duration) < (*delay_up_duration) * (*delay_multiplier))
- *delay_max_duration = (*delay_up_duration) * (*delay_multiplier);
-
- if((*delay_max_duration) < (*delay_down_duration) * (*delay_multiplier))
- *delay_max_duration = (*delay_down_duration) * (*delay_multiplier);
- }
-
- return 1;
-}
-
-static inline uint32_t health_parse_options(const char *s) {
- uint32_t options = 0;
- char buf[100+1] = "";
-
- while(*s) {
- buf[0] = '\0';
-
- // skip spaces
- while(*s && isspace(*s))
- s++;
-
- // find the next space
- size_t count = 0;
- while(*s && count < 100 && !isspace(*s))
- buf[count++] = *s++;
-
- if(buf[0]) {
- buf[count] = '\0';
-
- if(!strcasecmp(buf, "no-clear-notification") || !strcasecmp(buf, "no-clear"))
- options |= RRDCALC_FLAG_NO_CLEAR_NOTIFICATION;
- else
- error("Ignoring unknown alarm option '%s'", buf);
- }
- }
-
- return options;
-}
-
-static inline int health_parse_db_lookup(
- size_t line, const char *path, const char *file, char *string,
- int *group_method, int *after, int *before, int *every,
- uint32_t *options, char **dimensions
-) {
- debug(D_HEALTH, "Health configuration parsing database lookup %zu@%s/%s: %s", line, path, file, string);
-
- if(*dimensions) freez(*dimensions);
- *dimensions = NULL;
- *after = 0;
- *before = 0;
- *every = 0;
- *options = 0;
-
- char *s = string, *key;
-
- // first is the group method
- key = s;
- while(*s && !isspace(*s)) s++;
- while(*s && isspace(*s)) *s++ = '\0';
- if(!*s) {
- error("Health configuration invalid chart calculation at line %zu of file '%s/%s': expected group method followed by the 'after' time, but got '%s'",
- line, path, file, key);
- return 0;
- }
-
- if((*group_method = web_client_api_request_v1_data_group(key, -1)) == -1) {
- error("Health configuration at line %zu of file '%s/%s': invalid group method '%s'",
- line, path, file, key);
- return 0;
- }
-
- // then is the 'after' time
- key = s;
- while(*s && !isspace(*s)) s++;
- while(*s && isspace(*s)) *s++ = '\0';
-
- if(!health_parse_duration(key, after)) {
- error("Health configuration at line %zu of file '%s/%s': invalid duration '%s' after group method",
- line, path, file, key);
- return 0;
- }
-
- // sane defaults
- *every = abs(*after);
-
- // now we may have optional parameters
- while(*s) {
- key = s;
- while(*s && !isspace(*s)) s++;
- while(*s && isspace(*s)) *s++ = '\0';
- if(!*key) break;
-
- if(!strcasecmp(key, "at")) {
- char *value = s;
- while(*s && !isspace(*s)) s++;
- while(*s && isspace(*s)) *s++ = '\0';
-
- if (!health_parse_duration(value, before)) {
- error("Health configuration at line %zu of file '%s/%s': invalid duration '%s' for '%s' keyword",
- line, path, file, value, key);
- }
- }
- else if(!strcasecmp(key, HEALTH_EVERY_KEY)) {
- char *value = s;
- while(*s && !isspace(*s)) s++;
- while(*s && isspace(*s)) *s++ = '\0';
-
- if (!health_parse_duration(value, every)) {
- error("Health configuration at line %zu of file '%s/%s': invalid duration '%s' for '%s' keyword",
- line, path, file, value, key);
- }
- }
- else if(!strcasecmp(key, "absolute") || !strcasecmp(key, "abs") || !strcasecmp(key, "absolute_sum")) {
- *options |= RRDR_OPTION_ABSOLUTE;
- }
- else if(!strcasecmp(key, "min2max")) {
- *options |= RRDR_OPTION_MIN2MAX;
- }
- else if(!strcasecmp(key, "null2zero")) {
- *options |= RRDR_OPTION_NULL2ZERO;
- }
- else if(!strcasecmp(key, "percentage")) {
- *options |= RRDR_OPTION_PERCENTAGE;
- }
- else if(!strcasecmp(key, "unaligned")) {
- *options |= RRDR_OPTION_NOT_ALIGNED;
- }
- else if(!strcasecmp(key, "of")) {
- if(*s && strcasecmp(s, "all"))
- *dimensions = strdupz(s);
- break;
- }
- else {
- error("Health configuration at line %zu of file '%s/%s': unknown keyword '%s'",
- line, path, file, key);
- }
- }
-
- return 1;
-}
-
-static inline char *trim_all_spaces(char *buffer) {
- char *d = buffer, *s = buffer;
-
- // skip spaces
- while(isspace(*s)) s++;
-
- while(*s) {
- // copy the non-space part
- while(*s && !isspace(*s)) *d++ = *s++;
-
- // add a space if we have to
- if(*s && isspace(*s)) {
- *d++ = ' ';
- s++;
- }
-
- // skip spaces
- while(isspace(*s)) s++;
- }
-
- *d = '\0';
-
- if(d > buffer) {
- d--;
- if(isspace(*d)) *d = '\0';
- }
-
- if(!buffer[0]) return NULL;
- return buffer;
-}
-
-static inline char *health_source_file(size_t line, const char *path, const char *filename) {
- char buffer[FILENAME_MAX + 1];
- snprintfz(buffer, FILENAME_MAX, "%zu@%s/%s", line, path, filename);
- return strdupz(buffer);
-}
-
-static inline void strip_quotes(char *s) {
- while(*s) {
- if(*s == '\'' || *s == '"') *s = ' ';
- s++;
- }
-}
-
-int health_readfile(const char *path, const char *filename) {
- debug(D_HEALTH, "Health configuration reading file '%s/%s'", path, filename);
-
- static uint32_t
- hash_alarm = 0,
- hash_template = 0,
- hash_on = 0,
- hash_families = 0,
- hash_calc = 0,
- hash_green = 0,
- hash_red = 0,
- hash_warn = 0,
- hash_crit = 0,
- hash_exec = 0,
- hash_every = 0,
- hash_lookup = 0,
- hash_units = 0,
- hash_info = 0,
- hash_recipient = 0,
- hash_delay = 0,
- hash_options = 0;
-
- char buffer[HEALTH_CONF_MAX_LINE + 1];
-
- if(unlikely(!hash_alarm)) {
- hash_alarm = simple_uhash(HEALTH_ALARM_KEY);
- hash_template = simple_uhash(HEALTH_TEMPLATE_KEY);
- hash_on = simple_uhash(HEALTH_ON_KEY);
- hash_families = simple_uhash(HEALTH_FAMILIES_KEY);
- hash_calc = simple_uhash(HEALTH_CALC_KEY);
- hash_lookup = simple_uhash(HEALTH_LOOKUP_KEY);
- hash_green = simple_uhash(HEALTH_GREEN_KEY);
- hash_red = simple_uhash(HEALTH_RED_KEY);
- hash_warn = simple_uhash(HEALTH_WARN_KEY);
- hash_crit = simple_uhash(HEALTH_CRIT_KEY);
- hash_exec = simple_uhash(HEALTH_EXEC_KEY);
- hash_every = simple_uhash(HEALTH_EVERY_KEY);
- hash_units = simple_hash(HEALTH_UNITS_KEY);
- hash_info = simple_hash(HEALTH_INFO_KEY);
- hash_recipient = simple_hash(HEALTH_RECIPIENT_KEY);
- hash_delay = simple_uhash(HEALTH_DELAY_KEY);
- hash_options = simple_uhash(HEALTH_OPTIONS_KEY);
- }
-
- snprintfz(buffer, HEALTH_CONF_MAX_LINE, "%s/%s", path, filename);
- FILE *fp = fopen(buffer, "r");
- if(!fp) {
- error("Health configuration cannot read file '%s'.", buffer);
- return 0;
- }
-
- RRDCALC *rc = NULL;
- RRDCALCTEMPLATE *rt = NULL;
-
- size_t line = 0, append = 0;
- char *s;
- while((s = fgets(&buffer[append], (int)(HEALTH_CONF_MAX_LINE - append), fp)) || append) {
- int stop_appending = !s;
- line++;
- s = trim(buffer);
- if(!s) continue;
-
- append = strlen(s);
- if(!stop_appending && s[append - 1] == '\\') {
- s[append - 1] = ' ';
- append = &s[append] - buffer;
- if(append < HEALTH_CONF_MAX_LINE)
- continue;
- else {
- error("Health configuration has too long muli-line at line %zu of file '%s/%s'.", line, path, filename);
- }
- }
- append = 0;
-
- char *key = s;
- while(*s && *s != ':') s++;
- if(!*s) {
- error("Health configuration has invalid line %zu of file '%s/%s'. It does not contain a ':'. Ignoring it.", line, path, filename);
- continue;
- }
- *s = '\0';
- s++;
-
- char *value = s;
- key = trim_all_spaces(key);
- value = trim_all_spaces(value);
-
- if(!key) {
- error("Health configuration has invalid line %zu of file '%s/%s'. Keyword is empty. Ignoring it.", line, path, filename);
- continue;
- }
-
- if(!value) {
- error("Health configuration has invalid line %zu of file '%s/%s'. value is empty. Ignoring it.", line, path, filename);
- continue;
- }
-
- uint32_t hash = simple_uhash(key);
-
- if(hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
- if(rc && !rrdcalc_add_alarm_from_config(localhost, rc))
- rrdcalc_free(localhost, rc);
-
- if(rt) {
- if (!rrdcalctemplate_add_template_from_config(localhost, rt))
- rrdcalctemplate_free(localhost, rt);
- rt = NULL;
- }
-
- rc = callocz(1, sizeof(RRDCALC));
- rc->next_event_id = 1;
- rc->name = strdupz(value);
- rc->hash = simple_hash(rc->name);
- rc->source = health_source_file(line, path, filename);
- rc->green = NAN;
- rc->red = NAN;
- rc->value = NAN;
- rc->old_value = NAN;
- rc->delay_multiplier = 1.0;
-
- if(rrdvar_fix_name(rc->name))
- error("Health configuration renamed alarm '%s' to '%s'", value, rc->name);
- }
- else if(hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) {
- if(rc) {
- if(!rrdcalc_add_alarm_from_config(localhost, rc))
- rrdcalc_free(localhost, rc);
- rc = NULL;
- }
-
- if(rt && !rrdcalctemplate_add_template_from_config(localhost, rt))
- rrdcalctemplate_free(localhost, rt);
-
- rt = callocz(1, sizeof(RRDCALCTEMPLATE));
- rt->name = strdupz(value);
- rt->hash_name = simple_hash(rt->name);
- rt->source = health_source_file(line, path, filename);
- rt->green = NAN;
- rt->red = NAN;
- rt->delay_multiplier = 1.0;
-
- if(rrdvar_fix_name(rt->name))
- error("Health configuration renamed template '%s' to '%s'", value, rt->name);
- }
- else if(rc) {
- if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) {
- if(rc->chart) {
- if(strcmp(rc->chart, value))
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rc->name, key, rc->chart, value, value);
-
- freez(rc->chart);
- }
- rc->chart = strdupz(value);
- rc->hash_chart = simple_hash(rc->chart);
- }
- else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
- health_parse_db_lookup(line, path, filename, value, &rc->group, &rc->after, &rc->before,
- &rc->update_every,
- &rc->options, &rc->dimensions);
- }
- else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
- if(!health_parse_duration(value, &rc->update_every))
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' cannot parse duration: '%s'.",
- line, path, filename, rc->name, key, value);
- }
- else if(hash == hash_green && !strcasecmp(key, HEALTH_GREEN_KEY)) {
- char *e;
- rc->green = strtold(value, &e);
- if(e && *e) {
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' leaves this string unmatched: '%s'.",
- line, path, filename, rc->name, key, e);
- }
- }
- else if(hash == hash_red && !strcasecmp(key, HEALTH_RED_KEY)) {
- char *e;
- rc->red = strtold(value, &e);
- if(e && *e) {
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' leaves this string unmatched: '%s'.",
- line, path, filename, rc->name, key, e);
- }
- }
- else if(hash == hash_calc && !strcasecmp(key, HEALTH_CALC_KEY)) {
- const char *failed_at = NULL;
- int error = 0;
- rc->calculation = expression_parse(value, &failed_at, &error);
- if(!rc->calculation) {
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
- line, path, filename, rc->name, key, value, expression_strerror(error), failed_at);
- }
- }
- else if(hash == hash_warn && !strcasecmp(key, HEALTH_WARN_KEY)) {
- const char *failed_at = NULL;
- int error = 0;
- rc->warning = expression_parse(value, &failed_at, &error);
- if(!rc->warning) {
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
- line, path, filename, rc->name, key, value, expression_strerror(error), failed_at);
- }
- }
- else if(hash == hash_crit && !strcasecmp(key, HEALTH_CRIT_KEY)) {
- const char *failed_at = NULL;
- int error = 0;
- rc->critical = expression_parse(value, &failed_at, &error);
- if(!rc->critical) {
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
- line, path, filename, rc->name, key, value, expression_strerror(error), failed_at);
- }
- }
- else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) {
- if(rc->exec) {
- if(strcmp(rc->exec, value))
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rc->name, key, rc->exec, value, value);
-
- freez(rc->exec);
- }
- rc->exec = strdupz(value);
- }
- else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) {
- if(rc->recipient) {
- if(strcmp(rc->recipient, value))
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rc->name, key, rc->recipient, value, value);
-
- freez(rc->recipient);
- }
- rc->recipient = strdupz(value);
- }
- else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) {
- if(rc->units) {
- if(strcmp(rc->units, value))
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rc->name, key, rc->units, value, value);
-
- freez(rc->units);
- }
- rc->units = strdupz(value);
- strip_quotes(rc->units);
- }
- else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) {
- if(rc->info) {
- if(strcmp(rc->info, value))
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rc->name, key, rc->info, value, value);
-
- freez(rc->info);
- }
- rc->info = strdupz(value);
- strip_quotes(rc->info);
- }
- else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
- health_parse_delay(line, path, filename, value, &rc->delay_up_duration, &rc->delay_down_duration, &rc->delay_max_duration, &rc->delay_multiplier);
- }
- else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
- rc->options |= health_parse_options(value);
- }
- else {
- error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has unknown key '%s'.",
- line, path, filename, rc->name, key);
- }
- }
- else if(rt) {
- if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) {
- if(rt->context) {
- if(strcmp(rt->context, value))
- error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rt->name, key, rt->context, value, value);
-
- freez(rt->context);
- }
- rt->context = strdupz(value);
- rt->hash_context = simple_hash(rt->context);
- }
- else if(hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) {
- freez(rt->family_match);
- simple_pattern_free(rt->family_pattern);
-
- rt->family_match = strdupz(value);
- rt->family_pattern = simple_pattern_create(rt->family_match, SIMPLE_PATTERN_EXACT);
- }
- else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
- health_parse_db_lookup(line, path, filename, value, &rt->group, &rt->after, &rt->before,
- &rt->update_every, &rt->options, &rt->dimensions);
- }
- else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
- if(!health_parse_duration(value, &rt->update_every))
- error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' cannot parse duration: '%s'.",
- line, path, filename, rt->name, key, value);
- }
- else if(hash == hash_green && !strcasecmp(key, HEALTH_GREEN_KEY)) {
- char *e;
- rt->green = strtold(value, &e);
- if(e && *e) {
- error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' leaves this string unmatched: '%s'.",
- line, path, filename, rt->name, key, e);
- }
- }
- else if(hash == hash_red && !strcasecmp(key, HEALTH_RED_KEY)) {
- char *e;
- rt->red = strtold(value, &e);
- if(e && *e) {
- error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' leaves this string unmatched: '%s'.",
- line, path, filename, rt->name, key, e);
- }
- }
- else if(hash == hash_calc && !strcasecmp(key, HEALTH_CALC_KEY)) {
- const char *failed_at = NULL;
- int error = 0;
- rt->calculation = expression_parse(value, &failed_at, &error);
- if(!rt->calculation) {
- error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
- line, path, filename, rt->name, key, value, expression_strerror(error), failed_at);
- }
- }
- else if(hash == hash_warn && !strcasecmp(key, HEALTH_WARN_KEY)) {
- const char *failed_at = NULL;
- int error = 0;
- rt->warning = expression_parse(value, &failed_at, &error);
- if(!rt->warning) {
- error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
- line, path, filename, rt->name, key, value, expression_strerror(error), failed_at);
- }
- }
- else if(hash == hash_crit && !strcasecmp(key, HEALTH_CRIT_KEY)) {
- const char *failed_at = NULL;
- int error = 0;
- rt->critical = expression_parse(value, &failed_at, &error);
- if(!rt->critical) {
- error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
- line, path, filename, rt->name, key, value, expression_strerror(error), failed_at);
- }
- }
- else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) {
- if(rt->exec) {
- if(strcmp(rt->exec, value))
- error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rt->name, key, rt->exec, value, value);
-
- freez(rt->exec);
- }
- rt->exec = strdupz(value);
- }
- else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) {
- if(rt->recipient) {
- if(strcmp(rt->recipient, value))
- error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rt->name, key, rt->recipient, value, value);
-
- freez(rt->recipient);
- }
- rt->recipient = strdupz(value);
- }
- else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) {
- if(rt->units) {
- if(strcmp(rt->units, value))
- error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rt->name, key, rt->units, value, value);
-
- freez(rt->units);
- }
- rt->units = strdupz(value);
- strip_quotes(rt->units);
- }
- else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) {
- if(rt->info) {
- if(strcmp(rt->info, value))
- error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
- line, path, filename, rt->name, key, rt->info, value, value);
-
- freez(rt->info);
- }
- rt->info = strdupz(value);
- strip_quotes(rt->info);
- }
- else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
- health_parse_delay(line, path, filename, value, &rt->delay_up_duration, &rt->delay_down_duration, &rt->delay_max_duration, &rt->delay_multiplier);
- }
- else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
- rt->options |= health_parse_options(value);
- }
- else {
- error("Health configuration at line %zu of file '%s/%s' for template '%s' has unknown key '%s'.",
- line, path, filename, rt->name, key);
- }
- }
- else {
- error("Health configuration at line %zu of file '%s/%s' has unknown key '%s'. Expected either '" HEALTH_ALARM_KEY "' or '" HEALTH_TEMPLATE_KEY "'.",
- line, path, filename, key);
- }
- }
-
- if(rc && !rrdcalc_add_alarm_from_config(localhost, rc))
- rrdcalc_free(localhost, rc);
-
- if(rt && !rrdcalctemplate_add_template_from_config(localhost, rt))
- rrdcalctemplate_free(localhost, rt);
-
- fclose(fp);
- return 1;
-}
-
-void health_readdir(const char *path) {
- size_t pathlen = strlen(path);
-
- debug(D_HEALTH, "Health configuration reading directory '%s'", path);
-
- DIR *dir = opendir(path);
- if (!dir) {
- error("Health configuration cannot open directory '%s'.", path);
- return;
- }
-
- struct dirent *de = NULL;
- while ((de = readdir(dir))) {
- size_t len = strlen(de->d_name);
-
- if(de->d_type == DT_DIR
- && (
- (de->d_name[0] == '.' && de->d_name[1] == '\0')
- || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')
- )) {
- debug(D_HEALTH, "Ignoring directory '%s'", de->d_name);
- continue;
- }
-
- else if(de->d_type == DT_DIR) {
- char *s = mallocz(pathlen + strlen(de->d_name) + 2);
- strcpy(s, path);
- strcat(s, "/");
- strcat(s, de->d_name);
- health_readdir(s);
- freez(s);
- continue;
- }
-
- else if((de->d_type == DT_LNK || de->d_type == DT_REG || de->d_type == DT_UNKNOWN) &&
- len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) {
- health_readfile(path, de->d_name);
- }
-
- else debug(D_HEALTH, "Ignoring file '%s'", de->d_name);
- }
-
- closedir(dir);
-}
-
-static inline char *health_config_dir(void) {
- char buffer[FILENAME_MAX + 1];
- snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_config_dir);
- return config_get("health", "health configuration directory", buffer);
-}
-
-void health_init(void) {
- debug(D_HEALTH, "Health configuration initializing");
-
- if(!(health_enabled = config_get_boolean("health", "enabled", 1))) {
- debug(D_HEALTH, "Health is disabled.");
- return;
- }
-
- char pathname[FILENAME_MAX + 1];
- snprintfz(pathname, FILENAME_MAX, "%s/health", netdata_configured_varlib_dir);
- if(mkdir(pathname, 0770) == -1 && errno != EEXIST)
- fatal("Cannot create directory '%s'.", pathname);
-
- char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s/health-log.db", pathname);
- health.log_filename = config_get("health", "health db file", filename);
-
- health_alarm_log_load(localhost);
- health_alarm_log_open();
-
- char *path = health_config_dir();
-
- snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
- health.health_default_exec = config_get("health", "script to execute on alarm", filename);
-
- long n = config_get_number("health", "in memory max health log entries", (long)localhost->health_log.max);
- if(n < 10) {
- error("Health configuration has invalid max log entries %ld. Using default %u", n, localhost->health_log.max);
- config_set_number("health", "in memory max health log entries", (long)localhost->health_log.max);
- }
- else localhost->health_log.max = (unsigned int)n;
-
- rrdhost_rwlock(localhost);
- health_readdir(path);
- rrdhost_unlock(localhost);
-}
-
-// ----------------------------------------------------------------------------
-// JSON generation
-
-static inline void health_string2json(BUFFER *wb, const char *prefix, const char *label, const char *value, const char *suffix) {
- if(value && *value)
- buffer_sprintf(wb, "%s\"%s\":\"%s\"%s", prefix, label, value, suffix);
- else
- buffer_sprintf(wb, "%s\"%s\":null%s", prefix, label, suffix);
-}
-
-static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
- buffer_sprintf(wb,
- "\n\t{\n"
- "\t\t\"hostname\": \"%s\",\n"
- "\t\t\"unique_id\": %u,\n"
- "\t\t\"alarm_id\": %u,\n"
- "\t\t\"alarm_event_id\": %u,\n"
- "\t\t\"name\": \"%s\",\n"
- "\t\t\"chart\": \"%s\",\n"
- "\t\t\"family\": \"%s\",\n"
- "\t\t\"processed\": %s,\n"
- "\t\t\"updated\": %s,\n"
- "\t\t\"exec_run\": %lu,\n"
- "\t\t\"exec_failed\": %s,\n"
- "\t\t\"exec\": \"%s\",\n"
- "\t\t\"recipient\": \"%s\",\n"
- "\t\t\"exec_code\": %d,\n"
- "\t\t\"source\": \"%s\",\n"
- "\t\t\"units\": \"%s\",\n"
- "\t\t\"info\": \"%s\",\n"
- "\t\t\"when\": %lu,\n"
- "\t\t\"duration\": %lu,\n"
- "\t\t\"non_clear_duration\": %lu,\n"
- "\t\t\"status\": \"%s\",\n"
- "\t\t\"old_status\": \"%s\",\n"
- "\t\t\"delay\": %d,\n"
- "\t\t\"delay_up_to_timestamp\": %lu,\n"
- "\t\t\"updated_by_id\": %u,\n"
- "\t\t\"updates_id\": %u,\n"
- "\t\t\"value_string\": \"%s\",\n"
- "\t\t\"old_value_string\": \"%s\",\n"
- , host->hostname
- , ae->unique_id
- , ae->alarm_id
- , ae->alarm_event_id
- , ae->name
- , ae->chart
- , ae->family
- , (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false"
- , (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false"
- , (unsigned long)ae->exec_run_timestamp
- , (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false"
- , ae->exec?ae->exec:health.health_default_exec
- , ae->recipient?ae->recipient:health.health_default_recipient
- , ae->exec_code
- , ae->source
- , ae->units?ae->units:""
- , ae->info?ae->info:""
- , (unsigned long)ae->when
- , (unsigned long)ae->duration
- , (unsigned long)ae->non_clear_duration
- , rrdcalc_status2string(ae->new_status)
- , rrdcalc_status2string(ae->old_status)
- , ae->delay
- , (unsigned long)ae->delay_up_to_timestamp
- , ae->updated_by_id
- , ae->updates_id
- , ae->new_value_string
- , ae->old_value_string
- );
-
- if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
- buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
- }
-
- buffer_strcat(wb, "\t\t\"value\":");
- buffer_rrd_value(wb, ae->new_value);
- buffer_strcat(wb, ",\n");
-
- buffer_strcat(wb, "\t\t\"old_value\":");
- buffer_rrd_value(wb, ae->old_value);
- buffer_strcat(wb, "\n");
-
- buffer_strcat(wb, "\t}");
-}
-
-void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after) {
- pthread_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
-
- buffer_strcat(wb, "[");
-
- unsigned int max = host->health_log.max;
- unsigned int count = 0;
- ALARM_ENTRY *ae;
- for(ae = host->health_log.alarms; ae && count < max ; count++, ae = ae->next) {
- if(ae->unique_id > after) {
- if(likely(count)) buffer_strcat(wb, ",");
- health_alarm_entry2json_nolock(wb, ae, host);
- }
- }
-
- buffer_strcat(wb, "\n]\n");
-
- pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock);
-}
-
-static inline void health_rrdcalc2json_nolock(BUFFER *wb, RRDCALC *rc) {
- char value_string[100 + 1];
- format_value_and_unit(value_string, 100, rc->value, rc->units, -1);
-
- buffer_sprintf(wb,
- "\t\t\"%s.%s\": {\n"
- "\t\t\t\"id\": %lu,\n"
- "\t\t\t\"name\": \"%s\",\n"
- "\t\t\t\"chart\": \"%s\",\n"
- "\t\t\t\"family\": \"%s\",\n"
- "\t\t\t\"active\": %s,\n"
- "\t\t\t\"exec\": \"%s\",\n"
- "\t\t\t\"recipient\": \"%s\",\n"
- "\t\t\t\"source\": \"%s\",\n"
- "\t\t\t\"units\": \"%s\",\n"
- "\t\t\t\"info\": \"%s\",\n"
- "\t\t\t\"status\": \"%s\",\n"
- "\t\t\t\"last_status_change\": %lu,\n"
- "\t\t\t\"last_updated\": %lu,\n"
- "\t\t\t\"next_update\": %lu,\n"
- "\t\t\t\"update_every\": %d,\n"
- "\t\t\t\"delay_up_duration\": %d,\n"
- "\t\t\t\"delay_down_duration\": %d,\n"
- "\t\t\t\"delay_max_duration\": %d,\n"
- "\t\t\t\"delay_multiplier\": %f,\n"
- "\t\t\t\"delay\": %d,\n"
- "\t\t\t\"delay_up_to_timestamp\": %lu,\n"
- "\t\t\t\"value_string\": \"%s\",\n"
- , rc->chart, rc->name
- , (unsigned long)rc->id
- , rc->name
- , rc->chart
- , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
- , (rc->rrdset)?"true":"false"
- , rc->exec?rc->exec:health.health_default_exec
- , rc->recipient?rc->recipient:health.health_default_recipient
- , rc->source
- , rc->units?rc->units:""
- , rc->info?rc->info:""
- , rrdcalc_status2string(rc->status)
- , (unsigned long)rc->last_status_change
- , (unsigned long)rc->last_updated
- , (unsigned long)rc->next_update
- , rc->update_every
- , rc->delay_up_duration
- , rc->delay_down_duration
- , rc->delay_max_duration
- , rc->delay_multiplier
- , rc->delay_last
- , (unsigned long)rc->delay_up_to_timestamp
- , value_string
- );
-
- if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {
- buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
- }
-
- if(RRDCALC_HAS_DB_LOOKUP(rc)) {
- if(rc->dimensions && *rc->dimensions)
- health_string2json(wb, "\t\t\t", "lookup_dimensions", rc->dimensions, ",\n");
-
- buffer_sprintf(wb,
- "\t\t\t\"db_after\": %lu,\n"
- "\t\t\t\"db_before\": %lu,\n"
- "\t\t\t\"lookup_method\": \"%s\",\n"
- "\t\t\t\"lookup_after\": %d,\n"
- "\t\t\t\"lookup_before\": %d,\n"
- "\t\t\t\"lookup_options\": \"",
- (unsigned long) rc->db_after,
- (unsigned long) rc->db_before,
- group_method2string(rc->group),
- rc->after,
- rc->before
- );
- buffer_data_options2string(wb, rc->options);
- buffer_strcat(wb, "\",\n");
- }
-
- if(rc->calculation) {
- health_string2json(wb, "\t\t\t", "calc", rc->calculation->source, ",\n");
- health_string2json(wb, "\t\t\t", "calc_parsed", rc->calculation->parsed_as, ",\n");
- }
-
- if(rc->warning) {
- health_string2json(wb, "\t\t\t", "warn", rc->warning->source, ",\n");
- health_string2json(wb, "\t\t\t", "warn_parsed", rc->warning->parsed_as, ",\n");
- }
-
- if(rc->critical) {
- health_string2json(wb, "\t\t\t", "crit", rc->critical->source, ",\n");
- health_string2json(wb, "\t\t\t", "crit_parsed", rc->critical->parsed_as, ",\n");
- }
-
- buffer_strcat(wb, "\t\t\t\"green\":");
- buffer_rrd_value(wb, rc->green);
- buffer_strcat(wb, ",\n");
-
- buffer_strcat(wb, "\t\t\t\"red\":");
- buffer_rrd_value(wb, rc->red);
- buffer_strcat(wb, ",\n");
-
- buffer_strcat(wb, "\t\t\t\"value\":");
- buffer_rrd_value(wb, rc->value);
- buffer_strcat(wb, "\n");
-
- buffer_strcat(wb, "\t\t}");
-}
-
-//void health_rrdcalctemplate2json_nolock(BUFFER *wb, RRDCALCTEMPLATE *rt) {
-//
-//}
-
-void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) {
- int i;
-
- rrdhost_rdlock(localhost);
- buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\","
- "\n\t\"latest_alarm_log_unique_id\": %u,"
- "\n\t\"status\": %s,"
- "\n\t\"now\": %lu,"
- "\n\t\"alarms\": {\n",
- host->hostname,
- (host->health_log.next_log_id > 0)?(host->health_log.next_log_id - 1):0,
- health_enabled?"true":"false",
- (unsigned long)now_realtime_sec());
-
- RRDCALC *rc;
- for(i = 0, rc = host->alarms; rc ; rc = rc->next) {
- if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
- continue;
-
- if(likely(!all && !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL)))
- continue;
-
- if(likely(i)) buffer_strcat(wb, ",\n");
- health_rrdcalc2json_nolock(wb, rc);
- i++;
- }
-
-// buffer_strcat(wb, "\n\t},\n\t\"templates\": {");
-// RRDCALCTEMPLATE *rt;
-// for(rt = host->templates; rt ; rt = rt->next)
-// health_rrdcalctemplate2json_nolock(wb, rt);
-
- buffer_strcat(wb, "\n\t}\n}\n");
- rrdhost_unlock(localhost);
-}
-
-
-// ----------------------------------------------------------------------------
-// re-load health configuration
-
-static inline void health_free_all_nolock(RRDHOST *host) {
- while(host->templates)
- rrdcalctemplate_free(host, host->templates);
-
- while(host->alarms)
- rrdcalc_free(host, host->alarms);
-}
-
-void health_reload(void) {
- if(!health_enabled) {
- error("Health reload is requested, but health is not enabled.");
- return;
- }
-
- char *path = health_config_dir();
-
- // free all running alarms
- rrdhost_rwlock(localhost);
- health_free_all_nolock(localhost);
- rrdhost_unlock(localhost);
-
- // invalidate all previous entries in the alarm log
- ALARM_ENTRY *t;
- for(t = localhost->health_log.alarms ; t ; t = t->next) {
- if(t->new_status != RRDCALC_STATUS_REMOVED)
- t->flags |= HEALTH_ENTRY_FLAG_UPDATED;
- }
-
- // reset all thresholds to all charts
- RRDSET *st;
- for(st = localhost->rrdset_root; st ; st = st->next) {
- st->green = NAN;
- st->red = NAN;
- }
-
- // load the new alarms
- rrdhost_rwlock(localhost);
- health_readdir(path);
- rrdhost_unlock(localhost);
-
- // link the loaded alarms to their charts
- for(st = localhost->rrdset_root; st ; st = st->next) {
- rrdhost_rwlock(localhost);
-
- rrdsetcalc_link_matching(st);
- rrdcalctemplate_link_matching(st);
-
- rrdhost_unlock(localhost);
- }
-}
-
-// ----------------------------------------------------------------------------
-// health main thread and friends
-
-static inline int rrdcalc_value2status(calculated_number n) {
- if(isnan(n) || isinf(n)) return RRDCALC_STATUS_UNDEFINED;
- if(n) return RRDCALC_STATUS_RAISED;
- return RRDCALC_STATUS_CLEAR;
-}
-
-#define ALARM_EXEC_COMMAND_LENGTH 8192
-
-static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
- ae->flags |= HEALTH_ENTRY_FLAG_PROCESSED;
-
- if(unlikely(ae->new_status < RRDCALC_STATUS_CLEAR)) {
- // do not send notifications for internal statuses
- debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (internal statuses)", ae->chart, ae->name, rrdcalc_status2string(ae->new_status));
- goto done;
+ if(unlikely(ae->new_status < RRDCALC_STATUS_CLEAR)) {
+ // do not send notifications for internal statuses
+ debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (internal statuses)", ae->chart, ae->name, rrdcalc_status2string(ae->new_status));
+ goto done;
}
if(unlikely(ae->new_status <= RRDCALC_STATUS_CLEAR && (ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
static char command_to_run[ALARM_EXEC_COMMAND_LENGTH + 1];
pid_t command_pid;
- const char *exec = ae->exec;
- if(!exec) exec = health.health_default_exec;
-
- const char *recipient = ae->recipient;
- if(!recipient) recipient = health.health_default_recipient;
+ const char *exec = (ae->exec) ? ae->exec : host->health_default_exec;
+ const char *recipient = (ae->recipient) ? ae->recipient : host->health_default_recipient;
snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s' '%s' '%s'",
exec,
if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0))
error("Cannot set pthread cancel state to DISABLE.");
- rrdhost_rdlock(localhost);
+ RRDHOST *host;
+ for(host = localhost; host ; host = host->next) {
+ rrdhost_rdlock(host);
- // the first loop is to lookup values from the db
- for(rc = localhost->alarms; rc; rc = rc->next) {
- if(unlikely(!rrdcalc_isrunnable(rc, now, &next_run))) {
- if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_RUNNABLE))
- rc->rrdcalc_flags &= ~RRDCALC_FLAG_RUNNABLE;
- continue;
- }
+ // the first loop is to lookup values from the db
+ for(rc = host->alarms; rc; rc = rc->next) {
+ if(unlikely(!rrdcalc_isrunnable(rc, now, &next_run))) {
+ if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_RUNNABLE))
+ rc->rrdcalc_flags &= ~RRDCALC_FLAG_RUNNABLE;
+ continue;
+ }
- runnable++;
- rc->old_value = rc->value;
- rc->rrdcalc_flags |= RRDCALC_FLAG_RUNNABLE;
+ runnable++;
+ rc->old_value = rc->value;
+ rc->rrdcalc_flags |= RRDCALC_FLAG_RUNNABLE;
- // 1. if there is database lookup, do it
- // 2. if there is calculation expression, run it
+ // 1. if there is database lookup, do it
+ // 2. if there is calculation expression, run it
- if (unlikely(RRDCALC_HAS_DB_LOOKUP(rc))) {
- /* time_t old_db_timestamp = rc->db_before; */
- int value_is_null = 0;
+ if(unlikely(RRDCALC_HAS_DB_LOOKUP(rc))) {
+ /* time_t old_db_timestamp = rc->db_before; */
+ int value_is_null = 0;
- int ret = rrd2value(rc->rrdset, wb, &rc->value,
- rc->dimensions, 1, rc->after, rc->before, rc->group,
- rc->options, &rc->db_after, &rc->db_before, &value_is_null);
+ int ret = rrd2value(rc->rrdset, wb, &rc->value, rc->dimensions, 1, rc->after, rc->before, rc->group, rc->options, &rc->db_after, &rc->db_before, &value_is_null);
- if (unlikely(ret != 200)) {
- // database lookup failed
- rc->value = NAN;
+ if(unlikely(ret != 200)) {
+ // database lookup failed
+ rc->value = NAN;
- debug(D_HEALTH, "Health alarm '%s.%s': database lookup returned error %d", rc->chart?rc->chart:"NOCHART", rc->name, ret);
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': database lookup returned error %d", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, ret);
- if (unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_DB_ERROR))) {
- rc->rrdcalc_flags |= RRDCALC_FLAG_DB_ERROR;
- error("Health alarm '%s.%s': database lookup returned error %d", rc->chart?rc->chart:"NOCHART", rc->name, ret);
+ if(unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_DB_ERROR))) {
+ rc->rrdcalc_flags |= RRDCALC_FLAG_DB_ERROR;
+ error("Health on host '%s', alarm '%s.%s': database lookup returned error %d", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, ret);
+ }
}
- }
- else if (unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_DB_ERROR))
- rc->rrdcalc_flags &= ~RRDCALC_FLAG_DB_ERROR;
+ else if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_DB_ERROR))
+ rc->rrdcalc_flags &= ~RRDCALC_FLAG_DB_ERROR;
- /* - RRDCALC_FLAG_DB_STALE not currently used
- if (unlikely(old_db_timestamp == rc->db_before)) {
- // database is stale
+ /* - RRDCALC_FLAG_DB_STALE not currently used
+ if (unlikely(old_db_timestamp == rc->db_before)) {
+ // database is stale
- debug(D_HEALTH, "Health alarm '%s.%s': database is stale", rc->chart?rc->chart:"NOCHART", rc->name);
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': database is stale", host->hostname, rc->chart?rc->chart:"NOCHART", rc->name);
- if (unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_DB_STALE))) {
- rc->rrdcalc_flags |= RRDCALC_FLAG_DB_STALE;
- error("Health alarm '%s.%s': database is stale", rc->chart?rc->chart:"NOCHART", rc->name);
+ if (unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_DB_STALE))) {
+ rc->rrdcalc_flags |= RRDCALC_FLAG_DB_STALE;
+ error("Health on host '%s', alarm '%s.%s': database is stale", host->hostname, rc->chart?rc->chart:"NOCHART", rc->name);
+ }
}
- }
- else if (unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_DB_STALE))
- rc->rrdcalc_flags &= ~RRDCALC_FLAG_DB_STALE;
- */
+ else if (unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_DB_STALE))
+ rc->rrdcalc_flags &= ~RRDCALC_FLAG_DB_STALE;
+ */
- if (unlikely(value_is_null)) {
- // collected value is null
+ if(unlikely(value_is_null)) {
+ // collected value is null
- rc->value = NAN;
+ rc->value = NAN;
- debug(D_HEALTH, "Health alarm '%s.%s': database lookup returned empty value (possibly value is not collected yet)",
- rc->chart?rc->chart:"NOCHART", rc->name);
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': database lookup returned empty value (possibly value is not collected yet)", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name);
- if (unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_DB_NAN))) {
- rc->rrdcalc_flags |= RRDCALC_FLAG_DB_NAN;
- error("Health alarm '%s.%s': database lookup returned empty value (possibly value is not collected yet)",
- rc->chart?rc->chart:"NOCHART", rc->name);
+ if(unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_DB_NAN))) {
+ rc->rrdcalc_flags |= RRDCALC_FLAG_DB_NAN;
+ error("Health on host '%s', alarm '%s.%s': database lookup returned empty value (possibly value is not collected yet)", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name);
+ }
}
+ else if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_DB_NAN))
+ rc->rrdcalc_flags &= ~RRDCALC_FLAG_DB_NAN;
+
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': database lookup gave value " CALCULATED_NUMBER_FORMAT, host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, rc->value);
}
- else if (unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_DB_NAN))
- rc->rrdcalc_flags &= ~RRDCALC_FLAG_DB_NAN;
- debug(D_HEALTH, "Health alarm '%s.%s': database lookup gave value "
- CALCULATED_NUMBER_FORMAT, rc->chart?rc->chart:"NOCHART", rc->name, rc->value);
- }
+ if(unlikely(rc->calculation)) {
+ if(unlikely(!expression_evaluate(rc->calculation))) {
+ // calculation failed
- if(unlikely(rc->calculation)) {
- if (unlikely(!expression_evaluate(rc->calculation))) {
- // calculation failed
+ rc->value = NAN;
- rc->value = NAN;
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' failed: %s", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, rc->calculation->parsed_as, buffer_tostring(rc->calculation->error_msg));
- debug(D_HEALTH, "Health alarm '%s.%s': expression '%s' failed: %s",
- rc->chart?rc->chart:"NOCHART", rc->name, rc->calculation->parsed_as, buffer_tostring(rc->calculation->error_msg));
+ if(unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_CALC_ERROR))) {
+ rc->rrdcalc_flags |= RRDCALC_FLAG_CALC_ERROR;
+ error("Health on host '%s', alarm '%s.%s': expression '%s' failed: %s", rc->chart ? rc->chart : "NOCHART", host->hostname, rc->name, rc->calculation->parsed_as, buffer_tostring(rc->calculation->error_msg));
+ }
+ }
+ else {
+ if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_CALC_ERROR))
+ rc->rrdcalc_flags &= ~RRDCALC_FLAG_CALC_ERROR;
+
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' gave value "
+ CALCULATED_NUMBER_FORMAT
+ ": %s (source: %s)", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name
+ , rc->calculation->parsed_as, rc->calculation->result,
+ buffer_tostring(rc->calculation->error_msg), rc->source
+ );
- if (unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_CALC_ERROR))) {
- rc->rrdcalc_flags |= RRDCALC_FLAG_CALC_ERROR;
- error("Health alarm '%s.%s': expression '%s' failed: %s",
- rc->chart?rc->chart:"NOCHART", rc->name, rc->calculation->parsed_as, buffer_tostring(rc->calculation->error_msg));
+ rc->value = rc->calculation->result;
}
}
- else {
- if (unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_CALC_ERROR))
- rc->rrdcalc_flags &= ~RRDCALC_FLAG_CALC_ERROR;
-
- debug(D_HEALTH, "Health alarm '%s.%s': expression '%s' gave value "
- CALCULATED_NUMBER_FORMAT
- ": %s (source: %s)",
- rc->chart?rc->chart:"NOCHART", rc->name,
- rc->calculation->parsed_as,
- rc->calculation->result,
- buffer_tostring(rc->calculation->error_msg),
- rc->source
- );
-
- rc->value = rc->calculation->result;
- }
}
- }
- rrdhost_unlock(localhost);
+ rrdhost_unlock(host);
- if(unlikely(runnable && !netdata_exit)) {
- rrdhost_rdlock(localhost);
+ if(unlikely(runnable && !netdata_exit)) {
+ rrdhost_rdlock(host);
- for(rc = localhost->alarms; rc; rc = rc->next) {
- if(unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_RUNNABLE)))
- continue;
+ for(rc = host->alarms; rc; rc = rc->next) {
+ if(unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_RUNNABLE)))
+ continue;
- int warning_status = RRDCALC_STATUS_UNDEFINED;
- int critical_status = RRDCALC_STATUS_UNDEFINED;
+ int warning_status = RRDCALC_STATUS_UNDEFINED;
+ int critical_status = RRDCALC_STATUS_UNDEFINED;
- if(likely(rc->warning)) {
- if(unlikely(!expression_evaluate(rc->warning))) {
- // calculation failed
+ if(likely(rc->warning)) {
+ if(unlikely(!expression_evaluate(rc->warning))) {
+ // calculation failed
- debug(D_HEALTH, "Health alarm '%s.%s': warning expression failed with error: %s",
- rc->chart?rc->chart:"NOCHART", rc->name, buffer_tostring(rc->warning->error_msg));
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': warning expression failed with error: %s", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, buffer_tostring(rc->warning->error_msg));
- if (unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_WARN_ERROR))) {
- rc->rrdcalc_flags |= RRDCALC_FLAG_WARN_ERROR;
- error("Health alarm '%s.%s': warning expression failed with error: %s",
- rc->chart?rc->chart:"NOCHART", rc->name, buffer_tostring(rc->warning->error_msg));
+ if(unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_WARN_ERROR))) {
+ rc->rrdcalc_flags |= RRDCALC_FLAG_WARN_ERROR;
+ error("Health on host '%s', alarm '%s.%s': warning expression failed with error: %s", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, buffer_tostring(rc->warning->error_msg));
+ }
}
- }
- else {
- if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_WARN_ERROR))
- rc->rrdcalc_flags &= ~RRDCALC_FLAG_WARN_ERROR;
+ else {
+ if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_WARN_ERROR))
+ rc->rrdcalc_flags &= ~RRDCALC_FLAG_WARN_ERROR;
- debug(D_HEALTH, "Health alarm '%s.%s': warning expression gave value "
- CALCULATED_NUMBER_FORMAT
- ": %s (source: %s)",
- rc->chart?rc->chart:"NOCHART", rc->name,
- rc->warning->result,
- buffer_tostring(rc->warning->error_msg),
- rc->source
- );
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': warning expression gave value " CALCULATED_NUMBER_FORMAT ": %s (source: %s)", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, rc->warning->result, buffer_tostring(rc->warning->error_msg), rc->source);
- warning_status = rrdcalc_value2status(rc->warning->result);
+ warning_status = rrdcalc_value2status(rc->warning->result);
+ }
}
- }
- if(likely(rc->critical)) {
- if(unlikely(!expression_evaluate(rc->critical))) {
- // calculation failed
+ if(likely(rc->critical)) {
+ if(unlikely(!expression_evaluate(rc->critical))) {
+ // calculation failed
- debug(D_HEALTH, "Health alarm '%s.%s': critical expression failed with error: %s",
- rc->chart?rc->chart:"NOCHART", rc->name, buffer_tostring(rc->critical->error_msg));
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': critical expression failed with error: %s", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, buffer_tostring(rc->critical->error_msg));
- if (unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_CRIT_ERROR))) {
- rc->rrdcalc_flags |= RRDCALC_FLAG_CRIT_ERROR;
- error("Health alarm '%s.%s': critical expression failed with error: %s",
- rc->chart?rc->chart:"NOCHART", rc->name, buffer_tostring(rc->critical->error_msg));
+ if(unlikely(!(rc->rrdcalc_flags & RRDCALC_FLAG_CRIT_ERROR))) {
+ rc->rrdcalc_flags |= RRDCALC_FLAG_CRIT_ERROR;
+ error("Health on host '%s', alarm '%s.%s': critical expression failed with error: %s", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name, buffer_tostring(rc->critical->error_msg));
+ }
}
- }
- else {
- if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_CRIT_ERROR))
- rc->rrdcalc_flags &= ~RRDCALC_FLAG_CRIT_ERROR;
+ else {
+ if(unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_CRIT_ERROR))
+ rc->rrdcalc_flags &= ~RRDCALC_FLAG_CRIT_ERROR;
- debug(D_HEALTH, "Health alarm '%s.%s': critical expression gave value "
- CALCULATED_NUMBER_FORMAT
- ": %s (source: %s)",
- rc->chart?rc->chart:"NOCHART", rc->name,
- rc->critical->result,
- buffer_tostring(rc->critical->error_msg),
- rc->source
- );
+ debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': critical expression gave value " CALCULATED_NUMBER_FORMAT ": %s (source: %s)", host->hostname, rc->chart ? rc->chart : "NOCHART", rc->name , rc->critical->result, buffer_tostring(rc->critical->error_msg), rc->source);
- critical_status = rrdcalc_value2status(rc->critical->result);
+ critical_status = rrdcalc_value2status(rc->critical->result);
+ }
}
- }
- int status = RRDCALC_STATUS_UNDEFINED;
+ int status = RRDCALC_STATUS_UNDEFINED;
- switch(warning_status) {
- case RRDCALC_STATUS_CLEAR:
- status = RRDCALC_STATUS_CLEAR;
- break;
+ switch(warning_status) {
+ case RRDCALC_STATUS_CLEAR:
+ status = RRDCALC_STATUS_CLEAR;
+ break;
- case RRDCALC_STATUS_RAISED:
- status = RRDCALC_STATUS_WARNING;
- break;
+ case RRDCALC_STATUS_RAISED:
+ status = RRDCALC_STATUS_WARNING;
+ break;
- default:
- break;
- }
+ default:
+ break;
+ }
- switch(critical_status) {
- case RRDCALC_STATUS_CLEAR:
- if(status == RRDCALC_STATUS_UNDEFINED)
- status = RRDCALC_STATUS_CLEAR;
- break;
+ switch(critical_status) {
+ case RRDCALC_STATUS_CLEAR:
+ if(status == RRDCALC_STATUS_UNDEFINED)
+ status = RRDCALC_STATUS_CLEAR;
+ break;
- case RRDCALC_STATUS_RAISED:
- status = RRDCALC_STATUS_CRITICAL;
- break;
+ case RRDCALC_STATUS_RAISED:
+ status = RRDCALC_STATUS_CRITICAL;
+ break;
- default:
- break;
- }
+ default:
+ break;
+ }
- if(status != rc->status) {
- int delay = 0;
+ if(status != rc->status) {
+ int delay = 0;
- if(now > rc->delay_up_to_timestamp) {
- rc->delay_up_current = rc->delay_up_duration;
- rc->delay_down_current = rc->delay_down_duration;
- rc->delay_last = 0;
- rc->delay_up_to_timestamp = 0;
- }
- else {
- rc->delay_up_current = (int)(rc->delay_up_current * rc->delay_multiplier);
- if(rc->delay_up_current > rc->delay_max_duration) rc->delay_up_current = rc->delay_max_duration;
+ if(now > rc->delay_up_to_timestamp) {
+ rc->delay_up_current = rc->delay_up_duration;
+ rc->delay_down_current = rc->delay_down_duration;
+ rc->delay_last = 0;
+ rc->delay_up_to_timestamp = 0;
+ }
+ else {
+ rc->delay_up_current = (int) (rc->delay_up_current * rc->delay_multiplier);
+ if(rc->delay_up_current > rc->delay_max_duration)
+ rc->delay_up_current = rc->delay_max_duration;
+
+ rc->delay_down_current = (int) (rc->delay_down_current * rc->delay_multiplier);
+ if(rc->delay_down_current > rc->delay_max_duration)
+ rc->delay_down_current = rc->delay_max_duration;
+ }
- rc->delay_down_current = (int)(rc->delay_down_current * rc->delay_multiplier);
- if(rc->delay_down_current > rc->delay_max_duration) rc->delay_down_current = rc->delay_max_duration;
+ if(status > rc->status)
+ delay = rc->delay_up_current;
+ else
+ delay = rc->delay_down_current;
+
+ // COMMENTED: because we do need to send raising alarms
+ // if(now + delay < rc->delay_up_to_timestamp)
+ // delay = (int)(rc->delay_up_to_timestamp - now);
+
+ rc->delay_last = delay;
+ rc->delay_up_to_timestamp = now + delay;
+ health_alarm_log(
+ host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id
+ , rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change
+ , rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info
+ , rc->delay_last, (rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)
+ ? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0
+ );
+ rc->last_status_change = now;
+ rc->status = status;
}
- if(status > rc->status)
- delay = rc->delay_up_current;
- else
- delay = rc->delay_down_current;
-
- // COMMENTED: because we do need to send raising alarms
- // if(now + delay < rc->delay_up_to_timestamp)
- // delay = (int)(rc->delay_up_to_timestamp - now);
-
- rc->delay_last = delay;
- rc->delay_up_to_timestamp = now + delay;
- health_alarm_log(
- localhost,
- rc->id,
- rc->next_event_id++,
- now,
- rc->name,
- rc->rrdset->id,
- rc->rrdset->family,
- rc->exec,
- rc->recipient,
- now - rc->last_status_change,
- rc->old_value,
- rc->value,
- rc->status,
- status,
- rc->source,
- rc->units,
- rc->info,
- rc->delay_last,
- (rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)?HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION:0
- );
- rc->last_status_change = now;
- rc->status = status;
- }
+ rc->last_updated = now;
+ rc->next_update = now + rc->update_every;
- rc->last_updated = now;
- rc->next_update = now + rc->update_every;
+ if(next_run > rc->next_update)
+ next_run = rc->next_update;
+ }
- if (next_run > rc->next_update)
- next_run = rc->next_update;
+ rrdhost_unlock(host);
}
- rrdhost_unlock(localhost);
- }
+ if(unlikely(netdata_exit))
+ break;
- if (unlikely(pthread_setcancelstate(oldstate, NULL) != 0))
- error("Cannot set pthread cancel state to RESTORE (%d).", oldstate);
+ // execute notifications
+ // and cleanup
+ health_alarm_log_process(host);
- if(unlikely(netdata_exit))
- break;
+ if(unlikely(netdata_exit))
+ break;
- // execute notifications
- // and cleanup
- health_alarm_log_process(localhost);
+ } /* host loop */
+
+ if(unlikely(pthread_setcancelstate(oldstate, NULL) != 0))
+ error("Cannot set pthread cancel state to RESTORE (%d).", oldstate);
if(unlikely(netdata_exit))
break;
-
+
now = now_realtime_sec();
if(now < next_run) {
- debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration in %d secs",
- loop, (int) (next_run - now));
+ debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration in %d secs", loop, (int) (next_run - now));
sleep_usec(USEC_PER_SEC * (usec_t) (next_run - now));
}
- else {
+ else
debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration now", loop);
- }
}
buffer_free(wb);
--- /dev/null
+#define NETDATA_HEALTH_INTERNALS
+#include "common.h"
+
+#define HEALTH_CONF_MAX_LINE 4096
+
+#define HEALTH_ALARM_KEY "alarm"
+#define HEALTH_TEMPLATE_KEY "template"
+#define HEALTH_ON_KEY "on"
+#define HEALTH_FAMILIES_KEY "families"
+#define HEALTH_LOOKUP_KEY "lookup"
+#define HEALTH_CALC_KEY "calc"
+#define HEALTH_EVERY_KEY "every"
+#define HEALTH_GREEN_KEY "green"
+#define HEALTH_RED_KEY "red"
+#define HEALTH_WARN_KEY "warn"
+#define HEALTH_CRIT_KEY "crit"
+#define HEALTH_EXEC_KEY "exec"
+#define HEALTH_RECIPIENT_KEY "to"
+#define HEALTH_UNITS_KEY "units"
+#define HEALTH_INFO_KEY "info"
+#define HEALTH_DELAY_KEY "delay"
+#define HEALTH_OPTIONS_KEY "options"
+
+static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
+ if(!rc->chart) {
+ error("Health configuration for alarm '%s' does not have a chart", rc->name);
+ return 0;
+ }
+
+ if(!rc->update_every) {
+ error("Health configuration for alarm '%s.%s' has no frequency (parameter 'every'). Ignoring it.", rc->chart?rc->chart:"NOCHART", rc->name);
+ return 0;
+ }
+
+ if(!RRDCALC_HAS_DB_LOOKUP(rc) && !rc->warning && !rc->critical) {
+ error("Health configuration for alarm '%s.%s' is useless (no calculation, no warning and no critical evaluation)", rc->chart?rc->chart:"NOCHART", rc->name);
+ return 0;
+ }
+
+ if (rrdcalc_exists(host, rc->chart, rc->name, rc->hash_chart, rc->hash))
+ return 0;
+
+ rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id);
+
+ debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f",
+ rc->chart?rc->chart:"NOCHART",
+ rc->name,
+ rc->id,
+ (rc->exec)?rc->exec:"DEFAULT",
+ (rc->recipient)?rc->recipient:"DEFAULT",
+ rc->green,
+ rc->red,
+ rc->group,
+ rc->after,
+ rc->before,
+ rc->options,
+ (rc->dimensions)?rc->dimensions:"NONE",
+ rc->update_every,
+ (rc->calculation)?rc->calculation->parsed_as:"NONE",
+ (rc->warning)?rc->warning->parsed_as:"NONE",
+ (rc->critical)?rc->critical->parsed_as:"NONE",
+ rc->source,
+ rc->delay_up_duration,
+ rc->delay_down_duration,
+ rc->delay_max_duration,
+ rc->delay_multiplier
+ );
+
+ rrdcalc_create_part2(host, rc);
+ return 1;
+}
+
+static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt) {
+ if(unlikely(!rt->context)) {
+ error("Health configuration for template '%s' does not have a context", rt->name);
+ return 0;
+ }
+
+ if(unlikely(!rt->update_every)) {
+ error("Health configuration for template '%s' has no frequency (parameter 'every'). Ignoring it.", rt->name);
+ return 0;
+ }
+
+ if(unlikely(!RRDCALCTEMPLATE_HAS_CALCULATION(rt) && !rt->warning && !rt->critical)) {
+ error("Health configuration for template '%s' is useless (no calculation, no warning and no critical evaluation)", rt->name);
+ return 0;
+ }
+
+ RRDCALCTEMPLATE *t, *last = NULL;
+ for (t = host->templates; t ; last = t, t = t->next) {
+ if(unlikely(t->hash_name == rt->hash_name && !strcmp(t->name, rt->name))) {
+ error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
+ return 0;
+ }
+ }
+
+ debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f",
+ rt->name,
+ (rt->context)?rt->context:"NONE",
+ (rt->exec)?rt->exec:"DEFAULT",
+ (rt->recipient)?rt->recipient:"DEFAULT",
+ rt->green,
+ rt->red,
+ rt->group,
+ rt->after,
+ rt->before,
+ rt->options,
+ (rt->dimensions)?rt->dimensions:"NONE",
+ rt->update_every,
+ (rt->calculation)?rt->calculation->parsed_as:"NONE",
+ (rt->warning)?rt->warning->parsed_as:"NONE",
+ (rt->critical)?rt->critical->parsed_as:"NONE",
+ rt->source,
+ rt->delay_up_duration,
+ rt->delay_down_duration,
+ rt->delay_max_duration,
+ rt->delay_multiplier
+ );
+
+ if(likely(last)) {
+ last->next = rt;
+ }
+ else {
+ rt->next = host->templates;
+ host->templates = rt;
+ }
+
+ return 1;
+}
+
+static inline int health_parse_duration(char *string, int *result) {
+ // make sure it is a number
+ if(!*string || !(isdigit(*string) || *string == '+' || *string == '-')) {
+ *result = 0;
+ return 0;
+ }
+
+ char *e = NULL;
+ calculated_number n = strtold(string, &e);
+ if(e && *e) {
+ switch (*e) {
+ case 'Y':
+ *result = (int) (n * 86400 * 365);
+ break;
+ case 'M':
+ *result = (int) (n * 86400 * 30);
+ break;
+ case 'w':
+ *result = (int) (n * 86400 * 7);
+ break;
+ case 'd':
+ *result = (int) (n * 86400);
+ break;
+ case 'h':
+ *result = (int) (n * 3600);
+ break;
+ case 'm':
+ *result = (int) (n * 60);
+ break;
+
+ default:
+ case 's':
+ *result = (int) (n);
+ break;
+ }
+ }
+ else
+ *result = (int)(n);
+
+ return 1;
+}
+
+static inline int health_parse_delay(
+ size_t line, const char *path, const char *file, char *string,
+ int *delay_up_duration,
+ int *delay_down_duration,
+ int *delay_max_duration,
+ float *delay_multiplier) {
+
+ char given_up = 0;
+ char given_down = 0;
+ char given_max = 0;
+ char given_multiplier = 0;
+
+ char *s = string;
+ while(*s) {
+ char *key = s;
+
+ while(*s && !isspace(*s)) s++;
+ while(*s && isspace(*s)) *s++ = '\0';
+
+ if(!*key) break;
+
+ char *value = s;
+ while(*s && !isspace(*s)) s++;
+ while(*s && isspace(*s)) *s++ = '\0';
+
+ if(!strcasecmp(key, "up")) {
+ if (!health_parse_duration(value, delay_up_duration)) {
+ error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
+ line, path, file, value, key);
+ }
+ else given_up = 1;
+ }
+ else if(!strcasecmp(key, "down")) {
+ if (!health_parse_duration(value, delay_down_duration)) {
+ error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
+ line, path, file, value, key);
+ }
+ else given_down = 1;
+ }
+ else if(!strcasecmp(key, "multiplier")) {
+ *delay_multiplier = strtof(value, NULL);
+ if(isnan(*delay_multiplier) || isinf(*delay_multiplier) || islessequal(*delay_multiplier, 0)) {
+ error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
+ line, path, file, value, key);
+ }
+ else given_multiplier = 1;
+ }
+ else if(!strcasecmp(key, "max")) {
+ if (!health_parse_duration(value, delay_max_duration)) {
+ error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword",
+ line, path, file, value, key);
+ }
+ else given_max = 1;
+ }
+ else {
+ error("Health configuration at line %zu of file '%s/%s': unknown keyword '%s'",
+ line, path, file, key);
+ }
+ }
+
+ if(!given_up)
+ *delay_up_duration = 0;
+
+ if(!given_down)
+ *delay_down_duration = 0;
+
+ if(!given_multiplier)
+ *delay_multiplier = 1.0;
+
+ if(!given_max) {
+ if((*delay_max_duration) < (*delay_up_duration) * (*delay_multiplier))
+ *delay_max_duration = (*delay_up_duration) * (*delay_multiplier);
+
+ if((*delay_max_duration) < (*delay_down_duration) * (*delay_multiplier))
+ *delay_max_duration = (*delay_down_duration) * (*delay_multiplier);
+ }
+
+ return 1;
+}
+
+static inline uint32_t health_parse_options(const char *s) {
+ uint32_t options = 0;
+ char buf[100+1] = "";
+
+ while(*s) {
+ buf[0] = '\0';
+
+ // skip spaces
+ while(*s && isspace(*s))
+ s++;
+
+ // find the next space
+ size_t count = 0;
+ while(*s && count < 100 && !isspace(*s))
+ buf[count++] = *s++;
+
+ if(buf[0]) {
+ buf[count] = '\0';
+
+ if(!strcasecmp(buf, "no-clear-notification") || !strcasecmp(buf, "no-clear"))
+ options |= RRDCALC_FLAG_NO_CLEAR_NOTIFICATION;
+ else
+ error("Ignoring unknown alarm option '%s'", buf);
+ }
+ }
+
+ return options;
+}
+
+static inline int health_parse_db_lookup(
+ size_t line, const char *path, const char *file, char *string,
+ int *group_method, int *after, int *before, int *every,
+ uint32_t *options, char **dimensions
+) {
+ debug(D_HEALTH, "Health configuration parsing database lookup %zu@%s/%s: %s", line, path, file, string);
+
+ if(*dimensions) freez(*dimensions);
+ *dimensions = NULL;
+ *after = 0;
+ *before = 0;
+ *every = 0;
+ *options = 0;
+
+ char *s = string, *key;
+
+ // first is the group method
+ key = s;
+ while(*s && !isspace(*s)) s++;
+ while(*s && isspace(*s)) *s++ = '\0';
+ if(!*s) {
+ error("Health configuration invalid chart calculation at line %zu of file '%s/%s': expected group method followed by the 'after' time, but got '%s'",
+ line, path, file, key);
+ return 0;
+ }
+
+ if((*group_method = web_client_api_request_v1_data_group(key, -1)) == -1) {
+ error("Health configuration at line %zu of file '%s/%s': invalid group method '%s'",
+ line, path, file, key);
+ return 0;
+ }
+
+ // then is the 'after' time
+ key = s;
+ while(*s && !isspace(*s)) s++;
+ while(*s && isspace(*s)) *s++ = '\0';
+
+ if(!health_parse_duration(key, after)) {
+ error("Health configuration at line %zu of file '%s/%s': invalid duration '%s' after group method",
+ line, path, file, key);
+ return 0;
+ }
+
+ // sane defaults
+ *every = abs(*after);
+
+ // now we may have optional parameters
+ while(*s) {
+ key = s;
+ while(*s && !isspace(*s)) s++;
+ while(*s && isspace(*s)) *s++ = '\0';
+ if(!*key) break;
+
+ if(!strcasecmp(key, "at")) {
+ char *value = s;
+ while(*s && !isspace(*s)) s++;
+ while(*s && isspace(*s)) *s++ = '\0';
+
+ if (!health_parse_duration(value, before)) {
+ error("Health configuration at line %zu of file '%s/%s': invalid duration '%s' for '%s' keyword",
+ line, path, file, value, key);
+ }
+ }
+ else if(!strcasecmp(key, HEALTH_EVERY_KEY)) {
+ char *value = s;
+ while(*s && !isspace(*s)) s++;
+ while(*s && isspace(*s)) *s++ = '\0';
+
+ if (!health_parse_duration(value, every)) {
+ error("Health configuration at line %zu of file '%s/%s': invalid duration '%s' for '%s' keyword",
+ line, path, file, value, key);
+ }
+ }
+ else if(!strcasecmp(key, "absolute") || !strcasecmp(key, "abs") || !strcasecmp(key, "absolute_sum")) {
+ *options |= RRDR_OPTION_ABSOLUTE;
+ }
+ else if(!strcasecmp(key, "min2max")) {
+ *options |= RRDR_OPTION_MIN2MAX;
+ }
+ else if(!strcasecmp(key, "null2zero")) {
+ *options |= RRDR_OPTION_NULL2ZERO;
+ }
+ else if(!strcasecmp(key, "percentage")) {
+ *options |= RRDR_OPTION_PERCENTAGE;
+ }
+ else if(!strcasecmp(key, "unaligned")) {
+ *options |= RRDR_OPTION_NOT_ALIGNED;
+ }
+ else if(!strcasecmp(key, "of")) {
+ if(*s && strcasecmp(s, "all"))
+ *dimensions = strdupz(s);
+ break;
+ }
+ else {
+ error("Health configuration at line %zu of file '%s/%s': unknown keyword '%s'",
+ line, path, file, key);
+ }
+ }
+
+ return 1;
+}
+
+static inline char *trim_all_spaces(char *buffer) {
+ char *d = buffer, *s = buffer;
+
+ // skip spaces
+ while(isspace(*s)) s++;
+
+ while(*s) {
+ // copy the non-space part
+ while(*s && !isspace(*s)) *d++ = *s++;
+
+ // add a space if we have to
+ if(*s && isspace(*s)) {
+ *d++ = ' ';
+ s++;
+ }
+
+ // skip spaces
+ while(isspace(*s)) s++;
+ }
+
+ *d = '\0';
+
+ if(d > buffer) {
+ d--;
+ if(isspace(*d)) *d = '\0';
+ }
+
+ if(!buffer[0]) return NULL;
+ return buffer;
+}
+
+static inline char *health_source_file(size_t line, const char *path, const char *filename) {
+ char buffer[FILENAME_MAX + 1];
+ snprintfz(buffer, FILENAME_MAX, "%zu@%s/%s", line, path, filename);
+ return strdupz(buffer);
+}
+
+static inline void strip_quotes(char *s) {
+ while(*s) {
+ if(*s == '\'' || *s == '"') *s = ' ';
+ s++;
+ }
+}
+
+int health_readfile(RRDHOST *host, const char *path, const char *filename) {
+ debug(D_HEALTH, "Health configuration reading file '%s/%s'", path, filename);
+
+ static uint32_t
+ hash_alarm = 0,
+ hash_template = 0,
+ hash_on = 0,
+ hash_families = 0,
+ hash_calc = 0,
+ hash_green = 0,
+ hash_red = 0,
+ hash_warn = 0,
+ hash_crit = 0,
+ hash_exec = 0,
+ hash_every = 0,
+ hash_lookup = 0,
+ hash_units = 0,
+ hash_info = 0,
+ hash_recipient = 0,
+ hash_delay = 0,
+ hash_options = 0;
+
+ char buffer[HEALTH_CONF_MAX_LINE + 1];
+
+ if(unlikely(!hash_alarm)) {
+ hash_alarm = simple_uhash(HEALTH_ALARM_KEY);
+ hash_template = simple_uhash(HEALTH_TEMPLATE_KEY);
+ hash_on = simple_uhash(HEALTH_ON_KEY);
+ hash_families = simple_uhash(HEALTH_FAMILIES_KEY);
+ hash_calc = simple_uhash(HEALTH_CALC_KEY);
+ hash_lookup = simple_uhash(HEALTH_LOOKUP_KEY);
+ hash_green = simple_uhash(HEALTH_GREEN_KEY);
+ hash_red = simple_uhash(HEALTH_RED_KEY);
+ hash_warn = simple_uhash(HEALTH_WARN_KEY);
+ hash_crit = simple_uhash(HEALTH_CRIT_KEY);
+ hash_exec = simple_uhash(HEALTH_EXEC_KEY);
+ hash_every = simple_uhash(HEALTH_EVERY_KEY);
+ hash_units = simple_hash(HEALTH_UNITS_KEY);
+ hash_info = simple_hash(HEALTH_INFO_KEY);
+ hash_recipient = simple_hash(HEALTH_RECIPIENT_KEY);
+ hash_delay = simple_uhash(HEALTH_DELAY_KEY);
+ hash_options = simple_uhash(HEALTH_OPTIONS_KEY);
+ }
+
+ snprintfz(buffer, HEALTH_CONF_MAX_LINE, "%s/%s", path, filename);
+ FILE *fp = fopen(buffer, "r");
+ if(!fp) {
+ error("Health configuration cannot read file '%s'.", buffer);
+ return 0;
+ }
+
+ RRDCALC *rc = NULL;
+ RRDCALCTEMPLATE *rt = NULL;
+
+ size_t line = 0, append = 0;
+ char *s;
+ while((s = fgets(&buffer[append], (int)(HEALTH_CONF_MAX_LINE - append), fp)) || append) {
+ int stop_appending = !s;
+ line++;
+ s = trim(buffer);
+ if(!s) continue;
+
+ append = strlen(s);
+ if(!stop_appending && s[append - 1] == '\\') {
+ s[append - 1] = ' ';
+ append = &s[append] - buffer;
+ if(append < HEALTH_CONF_MAX_LINE)
+ continue;
+ else {
+ error("Health configuration has too long muli-line at line %zu of file '%s/%s'.", line, path, filename);
+ }
+ }
+ append = 0;
+
+ char *key = s;
+ while(*s && *s != ':') s++;
+ if(!*s) {
+ error("Health configuration has invalid line %zu of file '%s/%s'. It does not contain a ':'. Ignoring it.", line, path, filename);
+ continue;
+ }
+ *s = '\0';
+ s++;
+
+ char *value = s;
+ key = trim_all_spaces(key);
+ value = trim_all_spaces(value);
+
+ if(!key) {
+ error("Health configuration has invalid line %zu of file '%s/%s'. Keyword is empty. Ignoring it.", line, path, filename);
+ continue;
+ }
+
+ if(!value) {
+ error("Health configuration has invalid line %zu of file '%s/%s'. value is empty. Ignoring it.", line, path, filename);
+ continue;
+ }
+
+ uint32_t hash = simple_uhash(key);
+
+ if(hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
+ if(rc && !rrdcalc_add_alarm_from_config(host, rc))
+ rrdcalc_free(host, rc);
+
+ if(rt) {
+ if (!rrdcalctemplate_add_template_from_config(host, rt))
+ rrdcalctemplate_free(host, rt);
+ rt = NULL;
+ }
+
+ rc = callocz(1, sizeof(RRDCALC));
+ rc->next_event_id = 1;
+ rc->name = strdupz(value);
+ rc->hash = simple_hash(rc->name);
+ rc->source = health_source_file(line, path, filename);
+ rc->green = NAN;
+ rc->red = NAN;
+ rc->value = NAN;
+ rc->old_value = NAN;
+ rc->delay_multiplier = 1.0;
+
+ if(rrdvar_fix_name(rc->name))
+ error("Health configuration renamed alarm '%s' to '%s'", value, rc->name);
+ }
+ else if(hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) {
+ if(rc) {
+ if(!rrdcalc_add_alarm_from_config(host, rc))
+ rrdcalc_free(host, rc);
+ rc = NULL;
+ }
+
+ if(rt && !rrdcalctemplate_add_template_from_config(host, rt))
+ rrdcalctemplate_free(host, rt);
+
+ rt = callocz(1, sizeof(RRDCALCTEMPLATE));
+ rt->name = strdupz(value);
+ rt->hash_name = simple_hash(rt->name);
+ rt->source = health_source_file(line, path, filename);
+ rt->green = NAN;
+ rt->red = NAN;
+ rt->delay_multiplier = 1.0;
+
+ if(rrdvar_fix_name(rt->name))
+ error("Health configuration renamed template '%s' to '%s'", value, rt->name);
+ }
+ else if(rc) {
+ if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) {
+ if(rc->chart) {
+ if(strcmp(rc->chart, value))
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rc->name, key, rc->chart, value, value);
+
+ freez(rc->chart);
+ }
+ rc->chart = strdupz(value);
+ rc->hash_chart = simple_hash(rc->chart);
+ }
+ else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
+ health_parse_db_lookup(line, path, filename, value, &rc->group, &rc->after, &rc->before,
+ &rc->update_every,
+ &rc->options, &rc->dimensions);
+ }
+ else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
+ if(!health_parse_duration(value, &rc->update_every))
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' cannot parse duration: '%s'.",
+ line, path, filename, rc->name, key, value);
+ }
+ else if(hash == hash_green && !strcasecmp(key, HEALTH_GREEN_KEY)) {
+ char *e;
+ rc->green = strtold(value, &e);
+ if(e && *e) {
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' leaves this string unmatched: '%s'.",
+ line, path, filename, rc->name, key, e);
+ }
+ }
+ else if(hash == hash_red && !strcasecmp(key, HEALTH_RED_KEY)) {
+ char *e;
+ rc->red = strtold(value, &e);
+ if(e && *e) {
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' leaves this string unmatched: '%s'.",
+ line, path, filename, rc->name, key, e);
+ }
+ }
+ else if(hash == hash_calc && !strcasecmp(key, HEALTH_CALC_KEY)) {
+ const char *failed_at = NULL;
+ int error = 0;
+ rc->calculation = expression_parse(value, &failed_at, &error);
+ if(!rc->calculation) {
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
+ line, path, filename, rc->name, key, value, expression_strerror(error), failed_at);
+ }
+ }
+ else if(hash == hash_warn && !strcasecmp(key, HEALTH_WARN_KEY)) {
+ const char *failed_at = NULL;
+ int error = 0;
+ rc->warning = expression_parse(value, &failed_at, &error);
+ if(!rc->warning) {
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
+ line, path, filename, rc->name, key, value, expression_strerror(error), failed_at);
+ }
+ }
+ else if(hash == hash_crit && !strcasecmp(key, HEALTH_CRIT_KEY)) {
+ const char *failed_at = NULL;
+ int error = 0;
+ rc->critical = expression_parse(value, &failed_at, &error);
+ if(!rc->critical) {
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
+ line, path, filename, rc->name, key, value, expression_strerror(error), failed_at);
+ }
+ }
+ else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) {
+ if(rc->exec) {
+ if(strcmp(rc->exec, value))
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rc->name, key, rc->exec, value, value);
+
+ freez(rc->exec);
+ }
+ rc->exec = strdupz(value);
+ }
+ else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) {
+ if(rc->recipient) {
+ if(strcmp(rc->recipient, value))
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rc->name, key, rc->recipient, value, value);
+
+ freez(rc->recipient);
+ }
+ rc->recipient = strdupz(value);
+ }
+ else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) {
+ if(rc->units) {
+ if(strcmp(rc->units, value))
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rc->name, key, rc->units, value, value);
+
+ freez(rc->units);
+ }
+ rc->units = strdupz(value);
+ strip_quotes(rc->units);
+ }
+ else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) {
+ if(rc->info) {
+ if(strcmp(rc->info, value))
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rc->name, key, rc->info, value, value);
+
+ freez(rc->info);
+ }
+ rc->info = strdupz(value);
+ strip_quotes(rc->info);
+ }
+ else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
+ health_parse_delay(line, path, filename, value, &rc->delay_up_duration, &rc->delay_down_duration, &rc->delay_max_duration, &rc->delay_multiplier);
+ }
+ else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+ rc->options |= health_parse_options(value);
+ }
+ else {
+ error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has unknown key '%s'.",
+ line, path, filename, rc->name, key);
+ }
+ }
+ else if(rt) {
+ if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) {
+ if(rt->context) {
+ if(strcmp(rt->context, value))
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rt->name, key, rt->context, value, value);
+
+ freez(rt->context);
+ }
+ rt->context = strdupz(value);
+ rt->hash_context = simple_hash(rt->context);
+ }
+ else if(hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) {
+ freez(rt->family_match);
+ simple_pattern_free(rt->family_pattern);
+
+ rt->family_match = strdupz(value);
+ rt->family_pattern = simple_pattern_create(rt->family_match, SIMPLE_PATTERN_EXACT);
+ }
+ else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
+ health_parse_db_lookup(line, path, filename, value, &rt->group, &rt->after, &rt->before,
+ &rt->update_every, &rt->options, &rt->dimensions);
+ }
+ else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
+ if(!health_parse_duration(value, &rt->update_every))
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' cannot parse duration: '%s'.",
+ line, path, filename, rt->name, key, value);
+ }
+ else if(hash == hash_green && !strcasecmp(key, HEALTH_GREEN_KEY)) {
+ char *e;
+ rt->green = strtold(value, &e);
+ if(e && *e) {
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' leaves this string unmatched: '%s'.",
+ line, path, filename, rt->name, key, e);
+ }
+ }
+ else if(hash == hash_red && !strcasecmp(key, HEALTH_RED_KEY)) {
+ char *e;
+ rt->red = strtold(value, &e);
+ if(e && *e) {
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' leaves this string unmatched: '%s'.",
+ line, path, filename, rt->name, key, e);
+ }
+ }
+ else if(hash == hash_calc && !strcasecmp(key, HEALTH_CALC_KEY)) {
+ const char *failed_at = NULL;
+ int error = 0;
+ rt->calculation = expression_parse(value, &failed_at, &error);
+ if(!rt->calculation) {
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
+ line, path, filename, rt->name, key, value, expression_strerror(error), failed_at);
+ }
+ }
+ else if(hash == hash_warn && !strcasecmp(key, HEALTH_WARN_KEY)) {
+ const char *failed_at = NULL;
+ int error = 0;
+ rt->warning = expression_parse(value, &failed_at, &error);
+ if(!rt->warning) {
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
+ line, path, filename, rt->name, key, value, expression_strerror(error), failed_at);
+ }
+ }
+ else if(hash == hash_crit && !strcasecmp(key, HEALTH_CRIT_KEY)) {
+ const char *failed_at = NULL;
+ int error = 0;
+ rt->critical = expression_parse(value, &failed_at, &error);
+ if(!rt->critical) {
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' at key '%s' has unparse-able expression '%s': %s at '%s'",
+ line, path, filename, rt->name, key, value, expression_strerror(error), failed_at);
+ }
+ }
+ else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) {
+ if(rt->exec) {
+ if(strcmp(rt->exec, value))
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rt->name, key, rt->exec, value, value);
+
+ freez(rt->exec);
+ }
+ rt->exec = strdupz(value);
+ }
+ else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) {
+ if(rt->recipient) {
+ if(strcmp(rt->recipient, value))
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rt->name, key, rt->recipient, value, value);
+
+ freez(rt->recipient);
+ }
+ rt->recipient = strdupz(value);
+ }
+ else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) {
+ if(rt->units) {
+ if(strcmp(rt->units, value))
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rt->name, key, rt->units, value, value);
+
+ freez(rt->units);
+ }
+ rt->units = strdupz(value);
+ strip_quotes(rt->units);
+ }
+ else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) {
+ if(rt->info) {
+ if(strcmp(rt->info, value))
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
+ line, path, filename, rt->name, key, rt->info, value, value);
+
+ freez(rt->info);
+ }
+ rt->info = strdupz(value);
+ strip_quotes(rt->info);
+ }
+ else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
+ health_parse_delay(line, path, filename, value, &rt->delay_up_duration, &rt->delay_down_duration, &rt->delay_max_duration, &rt->delay_multiplier);
+ }
+ else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+ rt->options |= health_parse_options(value);
+ }
+ else {
+ error("Health configuration at line %zu of file '%s/%s' for template '%s' has unknown key '%s'.",
+ line, path, filename, rt->name, key);
+ }
+ }
+ else {
+ error("Health configuration at line %zu of file '%s/%s' has unknown key '%s'. Expected either '" HEALTH_ALARM_KEY "' or '" HEALTH_TEMPLATE_KEY "'.",
+ line, path, filename, key);
+ }
+ }
+
+ if(rc && !rrdcalc_add_alarm_from_config(host, rc))
+ rrdcalc_free(host, rc);
+
+ if(rt && !rrdcalctemplate_add_template_from_config(host, rt))
+ rrdcalctemplate_free(host, rt);
+
+ fclose(fp);
+ return 1;
+}
+
+void health_readdir(RRDHOST *host, const char *path) {
+ size_t pathlen = strlen(path);
+
+ debug(D_HEALTH, "Health configuration reading directory '%s'", path);
+
+ DIR *dir = opendir(path);
+ if (!dir) {
+ error("Health configuration cannot open directory '%s'.", path);
+ return;
+ }
+
+ struct dirent *de = NULL;
+ while ((de = readdir(dir))) {
+ size_t len = strlen(de->d_name);
+
+ if(de->d_type == DT_DIR
+ && (
+ (de->d_name[0] == '.' && de->d_name[1] == '\0')
+ || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')
+ )) {
+ debug(D_HEALTH, "Ignoring directory '%s'", de->d_name);
+ continue;
+ }
+
+ else if(de->d_type == DT_DIR) {
+ char *s = mallocz(pathlen + strlen(de->d_name) + 2);
+ strcpy(s, path);
+ strcat(s, "/");
+ strcat(s, de->d_name);
+ health_readdir(host, s);
+ freez(s);
+ continue;
+ }
+
+ else if((de->d_type == DT_LNK || de->d_type == DT_REG || de->d_type == DT_UNKNOWN) &&
+ len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) {
+ health_readfile(host, path, de->d_name);
+ }
+
+ else debug(D_HEALTH, "Ignoring file '%s'", de->d_name);
+ }
+
+ closedir(dir);
+}
+
+