// ----------------------------------------------------------------------------
// health alarm log management
-static inline void health_alarm_log(RRDHOST *host,
- uint32_t alarm_id, uint32_t alarm_event_id,
- time_t when,
- const char *name, const char *chart, const char *family,
- const char *exec, const char *recipient, time_t duration,
- calculated_number old_value, calculated_number new_value,
- int old_status, int new_status,
- const char *source,
- const char *units,
- const char *info,
- int delay
+static inline void health_alarm_log(
+ RRDHOST *host,
+ uint32_t alarm_id,
+ uint32_t alarm_event_id,
+ time_t when,
+ const char *name,
+ const char *chart,
+ const char *family,
+ const char *exec,
+ const char *recipient,
+ time_t duration,
+ calculated_number old_value,
+ calculated_number new_value,
+ int old_status,
+ int new_status,
+ const char *source,
+ const char *units,
+ const char *info,
+ int delay,
+ uint32_t flags
) {
debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
ae->delay = delay;
ae->delay_up_to_timestamp = when + delay;
+ ae->flags |= flags;
+
if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
ae->non_clear_duration += ae->duration;
{
time_t now = now_realtime_sec();
- health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_UNINITIALIZED, rc->source, rc->units, rc->info, 0);
+ health_alarm_log(
+ st->rrdhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_UNINITIALIZED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0
+ );
}
}
{
time_t now = now_realtime_sec();
- health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_REMOVED, rc->source, rc->units, rc->info, 0);
+ health_alarm_log(
+ st->rrdhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_REMOVED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0
+ );
}
RRDHOST *host = st->rrdhost;
#define HEALTH_UNITS_KEY "units"
#define HEALTH_INFO_KEY "info"
#define HEALTH_DELAY_KEY "delay"
+#define HEALTH_OPTIONS_KEY "options"
static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
if(!rc->chart) {
return 1;
}
+static inline uint32_t health_parse_options(const char *s) {
+ uint32_t options = 0;
+ char buf[100+1] = "";
+
+ while(*s) {
+ buf[0] = '\0';
+
+ // skip spaces
+ while(*s && isspace(*s))
+ s++;
+
+ // find the next space
+ size_t count = 0;
+ while(*s && count < 100 && !isspace(*s))
+ buf[count++] = *s++;
+
+ if(buf[0]) {
+ buf[count] = '\0';
+
+ if(!strcasecmp(buf, "no-clear-notification") || !strcasecmp(buf, "no-clear"))
+ options |= RRDCALC_FLAG_NO_CLEAR_NOTIFICATION;
+ }
+ }
+
+ return options;
+}
+
static inline int health_parse_db_lookup(
size_t line, const char *path, const char *file, char *string,
int *group_method, int *after, int *before, int *every,
int health_readfile(const char *path, const char *filename) {
debug(D_HEALTH, "Health configuration reading file '%s/%s'", path, filename);
- static uint32_t hash_alarm = 0, hash_template = 0, hash_on = 0, hash_families = 0, hash_calc = 0, hash_green = 0, hash_red = 0, hash_warn = 0, hash_crit = 0, hash_exec = 0, hash_every = 0, hash_lookup = 0, hash_units = 0, hash_info = 0, hash_recipient = 0, hash_delay = 0;
+ static uint32_t
+ hash_alarm = 0,
+ hash_template = 0,
+ hash_on = 0,
+ hash_families = 0,
+ hash_calc = 0,
+ hash_green = 0,
+ hash_red = 0,
+ hash_warn = 0,
+ hash_crit = 0,
+ hash_exec = 0,
+ hash_every = 0,
+ hash_lookup = 0,
+ hash_units = 0,
+ hash_info = 0,
+ hash_recipient = 0,
+ hash_delay = 0,
+ hash_options = 0;
+
char buffer[HEALTH_CONF_MAX_LINE + 1];
if(unlikely(!hash_alarm)) {
hash_info = simple_hash(HEALTH_INFO_KEY);
hash_recipient = simple_hash(HEALTH_RECIPIENT_KEY);
hash_delay = simple_uhash(HEALTH_DELAY_KEY);
+ hash_options = simple_uhash(HEALTH_OPTIONS_KEY);
}
snprintfz(buffer, HEALTH_CONF_MAX_LINE, "%s/%s", path, filename);
else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
health_parse_delay(line, path, filename, value, &rc->delay_up_duration, &rc->delay_down_duration, &rc->delay_max_duration, &rc->delay_multiplier);
}
+ else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+ rc->options |= health_parse_options(value);
+ }
else {
error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has unknown key '%s'.",
line, path, filename, rc->name, key);
else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
health_parse_delay(line, path, filename, value, &rt->delay_up_duration, &rt->delay_down_duration, &rt->delay_max_duration, &rt->delay_multiplier);
}
+ else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+ rt->options |= health_parse_options(value);
+ }
else {
error("Health configuration at line %zu of file '%s/%s' for template '%s' has unknown key '%s'.",
line, path, filename, rt->name, key);
}
static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
- buffer_sprintf(wb, "\n\t{\n"
- "\t\t\"hostname\": \"%s\",\n"
- "\t\t\"unique_id\": %u,\n"
- "\t\t\"alarm_id\": %u,\n"
- "\t\t\"alarm_event_id\": %u,\n"
- "\t\t\"name\": \"%s\",\n"
- "\t\t\"chart\": \"%s\",\n"
- "\t\t\"family\": \"%s\",\n"
- "\t\t\"processed\": %s,\n"
- "\t\t\"updated\": %s,\n"
- "\t\t\"exec_run\": %lu,\n"
- "\t\t\"exec_failed\": %s,\n"
- "\t\t\"exec\": \"%s\",\n"
- "\t\t\"recipient\": \"%s\",\n"
- "\t\t\"exec_code\": %d,\n"
- "\t\t\"source\": \"%s\",\n"
- "\t\t\"units\": \"%s\",\n"
- "\t\t\"info\": \"%s\",\n"
- "\t\t\"when\": %lu,\n"
- "\t\t\"duration\": %lu,\n"
- "\t\t\"non_clear_duration\": %lu,\n"
- "\t\t\"status\": \"%s\",\n"
- "\t\t\"old_status\": \"%s\",\n"
- "\t\t\"delay\": %d,\n"
- "\t\t\"delay_up_to_timestamp\": %lu,\n"
- "\t\t\"updated_by_id\": %u,\n"
- "\t\t\"updates_id\": %u,\n"
- "\t\t\"value_string\": \"%s\",\n"
- "\t\t\"old_value_string\": \"%s\",\n",
- host->hostname,
- ae->unique_id,
- ae->alarm_id,
- ae->alarm_event_id,
- ae->name,
- ae->chart,
- ae->family,
- (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false",
- (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false",
- (unsigned long)ae->exec_run_timestamp,
- (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false",
- ae->exec?ae->exec:health.health_default_exec,
- ae->recipient?ae->recipient:health.health_default_recipient,
- ae->exec_code,
- ae->source,
- ae->units?ae->units:"",
- ae->info?ae->info:"",
- (unsigned long)ae->when,
- (unsigned long)ae->duration,
- (unsigned long)ae->non_clear_duration,
- rrdcalc_status2string(ae->new_status),
- rrdcalc_status2string(ae->old_status),
- ae->delay,
- (unsigned long)ae->delay_up_to_timestamp,
- ae->updated_by_id,
- ae->updates_id,
- ae->new_value_string,
- ae->old_value_string
+ buffer_sprintf(wb,
+ "\n\t{\n"
+ "\t\t\"hostname\": \"%s\",\n"
+ "\t\t\"unique_id\": %u,\n"
+ "\t\t\"alarm_id\": %u,\n"
+ "\t\t\"alarm_event_id\": %u,\n"
+ "\t\t\"name\": \"%s\",\n"
+ "\t\t\"chart\": \"%s\",\n"
+ "\t\t\"family\": \"%s\",\n"
+ "\t\t\"processed\": %s,\n"
+ "\t\t\"updated\": %s,\n"
+ "\t\t\"exec_run\": %lu,\n"
+ "\t\t\"exec_failed\": %s,\n"
+ "\t\t\"exec\": \"%s\",\n"
+ "\t\t\"recipient\": \"%s\",\n"
+ "\t\t\"exec_code\": %d,\n"
+ "\t\t\"source\": \"%s\",\n"
+ "\t\t\"units\": \"%s\",\n"
+ "\t\t\"info\": \"%s\",\n"
+ "\t\t\"when\": %lu,\n"
+ "\t\t\"duration\": %lu,\n"
+ "\t\t\"non_clear_duration\": %lu,\n"
+ "\t\t\"status\": \"%s\",\n"
+ "\t\t\"old_status\": \"%s\",\n"
+ "\t\t\"delay\": %d,\n"
+ "\t\t\"delay_up_to_timestamp\": %lu,\n"
+ "\t\t\"updated_by_id\": %u,\n"
+ "\t\t\"updates_id\": %u,\n"
+ "\t\t\"value_string\": \"%s\",\n"
+ "\t\t\"old_value_string\": \"%s\",\n"
+ , host->hostname
+ , ae->unique_id
+ , ae->alarm_id
+ , ae->alarm_event_id
+ , ae->name
+ , ae->chart
+ , ae->family
+ , (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false"
+ , (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false"
+ , (unsigned long)ae->exec_run_timestamp
+ , (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false"
+ , ae->exec?ae->exec:health.health_default_exec
+ , ae->recipient?ae->recipient:health.health_default_recipient
+ , ae->exec_code
+ , ae->source
+ , ae->units?ae->units:""
+ , ae->info?ae->info:""
+ , (unsigned long)ae->when
+ , (unsigned long)ae->duration
+ , (unsigned long)ae->non_clear_duration
+ , rrdcalc_status2string(ae->new_status)
+ , rrdcalc_status2string(ae->old_status)
+ , ae->delay
+ , (unsigned long)ae->delay_up_to_timestamp
+ , ae->updated_by_id
+ , ae->updates_id
+ , ae->new_value_string
+ , ae->old_value_string
);
+ if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
+ buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
+ }
+
buffer_strcat(wb, "\t\t\"value\":");
buffer_rrd_value(wb, ae->new_value);
buffer_strcat(wb, ",\n");
"\t\t\t\"delay_multiplier\": %f,\n"
"\t\t\t\"delay\": %d,\n"
"\t\t\t\"delay_up_to_timestamp\": %lu,\n"
- , rc->chart, rc->name
- , (unsigned long)rc->id
- , rc->name
- , rc->chart
- , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
- , (rc->rrdset)?"true":"false"
- , rc->exec?rc->exec:health.health_default_exec
- , rc->recipient?rc->recipient:health.health_default_recipient
- , rc->source
- , rc->units?rc->units:""
- , rc->info?rc->info:""
- , rrdcalc_status2string(rc->status)
- , (unsigned long)rc->last_status_change
- , (unsigned long)rc->last_updated
- , (unsigned long)rc->next_update
- , rc->update_every
- , rc->delay_up_duration
- , rc->delay_down_duration
- , rc->delay_max_duration
- , rc->delay_multiplier
- , rc->delay_last
- , (unsigned long)rc->delay_up_to_timestamp
+ , rc->chart, rc->name
+ , (unsigned long)rc->id
+ , rc->name
+ , rc->chart
+ , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
+ , (rc->rrdset)?"true":"false"
+ , rc->exec?rc->exec:health.health_default_exec
+ , rc->recipient?rc->recipient:health.health_default_recipient
+ , rc->source
+ , rc->units?rc->units:""
+ , rc->info?rc->info:""
+ , rrdcalc_status2string(rc->status)
+ , (unsigned long)rc->last_status_change
+ , (unsigned long)rc->last_updated
+ , (unsigned long)rc->next_update
+ , rc->update_every
+ , rc->delay_up_duration
+ , rc->delay_down_duration
+ , rc->delay_max_duration
+ , rc->delay_multiplier
+ , rc->delay_last
+ , (unsigned long)rc->delay_up_to_timestamp
);
+ if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {
+ buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
+ }
+
if(RRDCALC_HAS_DB_LOOKUP(rc)) {
if(rc->dimensions && *rc->dimensions)
health_string2json(wb, "\t\t\t", "lookup_dimensions", rc->dimensions, ",\n");
goto done;
}
+ if(unlikely(ae->new_status <= RRDCALC_STATUS_CLEAR && ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
+ // do not send notifications for disabled statuses
+ goto done;
+ }
+
// find the previous notification for the same alarm
// which we have run the exec script
{
rc->delay_last = delay;
rc->delay_up_to_timestamp = now + delay;
- health_alarm_log(&localhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info, rc->delay_last);
+ health_alarm_log(
+ &localhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ status,
+ rc->source,
+ rc->units,
+ rc->info,
+ rc->delay_last,
+ (rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)?HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION:0
+ );
rc->last_status_change = now;
rc->status = status;
}