X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=src%2Fhealth.h;h=7028a914b2581e0bb798a5fd1d77732d02021d3a;hb=8679670bdbe3c5928ec2e266d9c72e1a758fdf37;hp=055d4bb2a6c0d5f5d67da1669f9bc6045b0d4e77;hpb=a82c482bfb71ce7ae6c684612004195614e8159f;p=netdata.git diff --git a/src/health.h b/src/health.h index 055d4bb2..7028a914 100644 --- a/src/health.h +++ b/src/health.h @@ -1,39 +1,22 @@ #ifndef NETDATA_HEALTH_H #define NETDATA_HEALTH_H +extern int default_health_enabled; + extern int rrdvar_compare(void *a, void *b); -/* - * RRDVAR - * a variable - * - * There are 4 scopes: local (chart), context, host and global variables - * - * Standard global variables: - * $now - * - * Standard host variables: - * - none - - * - * Standard context variables: - * - none - - * - * Standard local variables: - * $last_updated - * $last_collected_value - * $last_value - * - */ - -#define RRDVAR_TYPE_CALCULATED 1 -#define RRDVAR_TYPE_TIME_T 2 -#define RRDVAR_TYPE_COLLECTED 3 -#define RRDVAR_TYPE_TOTAL 4 +#define RRDVAR_TYPE_CALCULATED 1 +#define RRDVAR_TYPE_TIME_T 2 +#define RRDVAR_TYPE_COLLECTED 3 +#define RRDVAR_TYPE_TOTAL 4 +#define RRDVAR_TYPE_INT 5 +#define RRDVAR_TYPE_CALCULATED_ALLOCATED 6 + // the variables as stored in the variables indexes // there are 3 indexes: // 1. at each chart (RRDSET.variables_root_index) -// 2. at each context (RRDCONTEXT.variables_root_index) +// 2. at each context (RRDFAMILY.variables_root_index) // 3. at each host (RRDHOST.variables_root_index) typedef struct rrdvar { avl avl; @@ -53,25 +36,20 @@ typedef struct rrdvar { // This means, there will be no speed penalty for using // these variables typedef struct rrdsetvar { - char *fullid; // chart type.chart id.variable - uint32_t hash_fullid; - - char *fullname; // chart type.chart name.variable - uint32_t hash_fullname; - + char *key_fullid; // chart type.chart id.variable + char *key_fullname; // chart type.chart name.variable char *variable; // variable - uint32_t hash_variable; int type; void *value; uint32_t options; - RRDVAR *local; - RRDVAR *context; - RRDVAR *host; - RRDVAR *context_name; - RRDVAR *host_name; + RRDVAR *var_local; + RRDVAR *var_family; + RRDVAR *var_host; + RRDVAR *var_family_name; + RRDVAR *var_host_name; struct rrdset *rrdset; @@ -88,41 +66,32 @@ typedef struct rrddimvar { char *prefix; char *suffix; - char *id; // dimension id - uint32_t hash; - - char *name; // dimension name - uint32_t hash_name; - - char *fullidid; // chart type.chart id.dimension id - uint32_t hash_fullidid; - - char *fullidname; // chart type.chart id.dimension name - uint32_t hash_fullidname; - - char *fullnameid; // chart type.chart name.dimension id - uint32_t hash_fullnameid; - - char *fullnamename; // chart type.chart name.dimension name - uint32_t hash_fullnamename; + char *key_id; // dimension id + char *key_name; // dimension name + char *key_contextid; // context + dimension id + char *key_contextname; // context + dimension name + char *key_fullidid; // chart type.chart id + dimension id + char *key_fullidname; // chart type.chart id + dimension name + char *key_fullnameid; // chart type.chart name + dimension id + char *key_fullnamename; // chart type.chart name + dimension name int type; void *value; uint32_t options; - RRDVAR *local_id; - RRDVAR *local_name; + RRDVAR *var_local_id; + RRDVAR *var_local_name; - RRDVAR *context_fullidid; - RRDVAR *context_fullidname; - RRDVAR *context_fullnameid; - RRDVAR *context_fullnamename; + RRDVAR *var_family_id; + RRDVAR *var_family_name; + RRDVAR *var_family_contextid; + RRDVAR *var_family_contextname; - RRDVAR *host_fullidid; - RRDVAR *host_fullidname; - RRDVAR *host_fullnameid; - RRDVAR *host_fullnamename; + RRDVAR *var_host_chartidid; + RRDVAR *var_host_chartidname; + RRDVAR *var_host_chartnameid; + RRDVAR *var_host_chartnamename; struct rrddim *rrddim; @@ -141,45 +110,115 @@ typedef struct rrddimvar { // This double-linked list is maintained sorted at all times // having as RRDSET.calculations the RRDCALC to be processed // next. -typedef struct rrdcalc { - char *name; - uint32_t hash; - char *exec; +#define RRDCALC_STATUS_REMOVED -2 +#define RRDCALC_STATUS_UNDEFINED -1 +#define RRDCALC_STATUS_UNINITIALIZED 0 +#define RRDCALC_STATUS_CLEAR 1 +#define RRDCALC_STATUS_RAISED 2 +#define RRDCALC_STATUS_WARNING 3 +#define RRDCALC_STATUS_CRITICAL 4 + +#define RRDCALC_FLAG_DB_ERROR 0x00000001 +#define RRDCALC_FLAG_DB_NAN 0x00000002 +/* #define RRDCALC_FLAG_DB_STALE 0x00000004 */ +#define RRDCALC_FLAG_CALC_ERROR 0x00000008 +#define RRDCALC_FLAG_WARN_ERROR 0x00000010 +#define RRDCALC_FLAG_CRIT_ERROR 0x00000020 +#define RRDCALC_FLAG_RUNNABLE 0x00000040 +#define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000 - char *chart; // the chart name - uint32_t hash_chart; +typedef struct rrdcalc { + uint32_t id; // the unique id of this alarm + uint32_t next_event_id; // the next event id that will be used for this alarm - char *dimensions; // the chart dimensions + char *name; // the name of this alarm + uint32_t hash; - int group; // grouping method: average, max, etc. - int before; // ending point in time-series - int after; // starting point in time-series - uint32_t options; // calculation options - int update_every; // update frequency for the calculation + char *exec; // the command to execute when this alarm switches state + char *recipient; // the recipient of the alarm (the first parameter to exec) - time_t last_updated; - time_t next_update; + char *chart; // the chart id this should be linked to + uint32_t hash_chart; - EVAL_EXPRESSION *warning; - EVAL_EXPRESSION *critical; + char *source; // the source of this alarm + char *units; // the units of the alarm + char *info; // a short description of the alarm - calculated_number value; + int update_every; // update frequency for the alarm + // the red and green threshold of this alarm (to be set to the chart) calculated_number green; calculated_number red; + // ------------------------------------------------------------------------ + // database lookup settings + + char *dimensions; // the chart dimensions + int group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + uint32_t options; // calculation options + + // ------------------------------------------------------------------------ + // expressions related to the alarm + + EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm + EVAL_EXPRESSION *warning; // expression to check the warning condition + EVAL_EXPRESSION *critical; // expression to check the critical condition + + // ------------------------------------------------------------------------ + // notification delay settings + + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + // while now < delay_up_to + + // ------------------------------------------------------------------------ + // runtime information + + int status; // the current status of the alarm + + calculated_number value; // the current value of the alarm + calculated_number old_value; // the previous value of the alarm + + uint32_t rrdcalc_flags; // check RRDCALC_FLAG_* + + time_t last_updated; // the last update timestamp of the alarm + time_t next_update; // the next update timestamp of the alarm + time_t last_status_change; // the timestamp of the last time this alarm changed status + + time_t db_after; // the first timestamp evaluated by the db lookup + time_t db_before; // the last timestamp evaluated by the db lookup + + time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications + int delay_up_current; // the current up notification delay duration + int delay_down_current; // the current down notification delay duration + int delay_last; // the last delay we used + + // ------------------------------------------------------------------------ + // variables this alarm exposes to the rest of the alarms + RRDVAR *local; - RRDVAR *context; - RRDVAR *host; + RRDVAR *family; + RRDVAR *hostid; + RRDVAR *hostname; + + // ------------------------------------------------------------------------ + // the chart this alarm it is linked to struct rrdset *rrdset; + + // linking of this alarm on its chart struct rrdcalc *rrdset_next; struct rrdcalc *rrdset_prev; struct rrdcalc *next; } RRDCALC; +#define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after) // RRDCALCTEMPLATE // these are to be applied to charts found dynamically @@ -189,24 +228,114 @@ typedef struct rrdcalctemplate { uint32_t hash_name; char *exec; + char *recipient; char *context; uint32_t hash_context; - char *dimensions; + char *family_match; + SIMPLE_PATTERN *family_pattern; - int group; // grouping method: average, max, etc. - int before; // ending point in time-series - int after; // starting point in time-series - uint32_t options; // calculation options - int update_every; // update frequency for the calculation + char *source; // the source of this alarm + char *units; // the units of the alarm + char *info; // a short description of the alarm + int update_every; // update frequency for the alarm + + // the red and green threshold of this alarm (to be set to the chart) calculated_number green; calculated_number red; + // ------------------------------------------------------------------------ + // database lookup settings + + char *dimensions; // the chart dimensions + int group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + uint32_t options; // calculation options + + // ------------------------------------------------------------------------ + // notification delay settings + + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + + // ------------------------------------------------------------------------ + // expressions related to the alarm + + EVAL_EXPRESSION *calculation; + EVAL_EXPRESSION *warning; + EVAL_EXPRESSION *critical; + struct rrdcalctemplate *next; } RRDCALCTEMPLATE; +#define RRDCALCTEMPLATE_HAS_CALCULATION(rt) ((rt)->after) + +#define HEALTH_ENTRY_FLAG_PROCESSED 0x00000001 +#define HEALTH_ENTRY_FLAG_UPDATED 0x00000002 +#define HEALTH_ENTRY_FLAG_EXEC_RUN 0x00000004 +#define HEALTH_ENTRY_FLAG_EXEC_FAILED 0x00000008 +#define HEALTH_ENTRY_FLAG_SAVED 0x10000000 +#define HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION 0x80000000 + +typedef struct alarm_entry { + uint32_t unique_id; + uint32_t alarm_id; + uint32_t alarm_event_id; + + time_t when; + time_t duration; + time_t non_clear_duration; + + char *name; + uint32_t hash_name; + + char *chart; + uint32_t hash_chart; + + char *family; + + char *exec; + char *recipient; + time_t exec_run_timestamp; + int exec_code; + + char *source; + char *units; + char *info; + + calculated_number old_value; + calculated_number new_value; + + char *old_value_string; + char *new_value_string; + + int old_status; + int new_status; + + uint32_t flags; + + int delay; + time_t delay_up_to_timestamp; + + uint32_t updated_by_id; + uint32_t updates_id; + + struct alarm_entry *next; +} ALARM_ENTRY; + +typedef struct alarm_log { + uint32_t next_log_id; + uint32_t next_alarm_id; + unsigned int count; + unsigned int max; + ALARM_ENTRY *alarms; + netdata_rwlock_t alarm_log_rwlock; +} ALARM_LOG; #include "rrd.h" @@ -220,7 +349,78 @@ extern void rrddimvar_free(RRDDIMVAR *rs); extern void rrdsetcalc_link_matching(RRDSET *st); extern void rrdsetcalc_unlink(RRDCALC *rc); +extern void rrdcalctemplate_link_matching(RRDSET *st); +extern RRDCALC *rrdcalc_find(RRDSET *st, const char *name); extern void health_init(void); +extern void *health_main(void *ptr); + +extern void health_reload(void); + +extern int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result); +extern void health_alarms2json(RRDHOST *host, BUFFER *wb, int all); +extern void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after); + +void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf); + +extern RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name); +extern void rrdvar_custom_host_variable_destroy(RRDHOST *host, const char *name); +extern void rrdvar_custom_host_variable_set(RRDVAR *rv, calculated_number value); + +extern const char *rrdcalc_status2string(int status); + + +extern int health_alarm_log_open(RRDHOST *host); +extern void health_alarm_log_close(RRDHOST *host); +extern void health_log_rotate(RRDHOST *host); +extern void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae); +extern ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename); +extern void health_alarm_log_load(RRDHOST *host); +extern void health_alarm_log( + RRDHOST *host, + uint32_t alarm_id, + uint32_t alarm_event_id, + time_t when, + const char *name, + const char *chart, + const char *family, + const char *exec, + const char *recipient, + time_t duration, + calculated_number old_value, + calculated_number new_value, + int old_status, + int new_status, + const char *source, + const char *units, + const char *info, + int delay, + uint32_t flags +); + +extern void health_readdir(RRDHOST *host, const char *path); +extern char *health_config_dir(void); +extern void health_reload_host(RRDHOST *host); +extern void health_alarm_log_free(RRDHOST *host); + +extern void rrdcalc_free(RRDHOST *host, RRDCALC *rc); +extern void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt); + +#ifdef NETDATA_HEALTH_INTERNALS +#define RRDVAR_MAX_LENGTH 1024 + +extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name); +extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id); +extern int rrdvar_fix_name(char *variable); + +extern RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart); +extern void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc); + +extern RRDVAR *rrdvar_create_and_index(const char *scope, avl_tree_lock *tree, const char *name, int type, void *value); +extern void rrdvar_free(RRDHOST *host, avl_tree_lock *tree, RRDVAR *rv); + +extern void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae); + +#endif // NETDATA_HEALTH_INTERNALS #endif //NETDATA_HEALTH_H