1 #ifndef NETDATA_HEALTH_H
2 #define NETDATA_HEALTH_H
4 extern int default_health_enabled;
6 extern int rrdvar_compare(void *a, void *b);
8 #define RRDVAR_TYPE_CALCULATED 1
9 #define RRDVAR_TYPE_TIME_T 2
10 #define RRDVAR_TYPE_COLLECTED 3
11 #define RRDVAR_TYPE_TOTAL 4
12 #define RRDVAR_TYPE_INT 5
13 #define RRDVAR_TYPE_CALCULATED_ALLOCATED 6
16 // the variables as stored in the variables indexes
17 // there are 3 indexes:
18 // 1. at each chart (RRDSET.variables_root_index)
19 // 2. at each context (RRDFAMILY.variables_root_index)
20 // 3. at each host (RRDHOST.variables_root_index)
21 typedef struct rrdvar {
33 // variables linked to charts
34 // We link variables to point to the values that are already
35 // calculated / processed by the normal data collection process
36 // This means, there will be no speed penalty for using
38 typedef struct rrdsetvar {
39 char *key_fullid; // chart type.chart id.variable
40 char *key_fullname; // chart type.chart name.variable
41 char *variable; // variable
51 RRDVAR *var_family_name;
52 RRDVAR *var_host_name;
54 struct rrdset *rrdset;
56 struct rrdsetvar *next;
60 // variables linked to individual dimensions
61 // We link variables to point the values that are already
62 // calculated / processed by the normal data collection process
63 // This means, there will be no speed penalty for using
65 typedef struct rrddimvar {
69 char *key_id; // dimension id
70 char *key_name; // dimension name
71 char *key_contextid; // context + dimension id
72 char *key_contextname; // context + dimension name
73 char *key_fullidid; // chart type.chart id + dimension id
74 char *key_fullidname; // chart type.chart id + dimension name
75 char *key_fullnameid; // chart type.chart name + dimension id
76 char *key_fullnamename; // chart type.chart name + dimension name
84 RRDVAR *var_local_name;
86 RRDVAR *var_family_id;
87 RRDVAR *var_family_name;
88 RRDVAR *var_family_contextid;
89 RRDVAR *var_family_contextname;
91 RRDVAR *var_host_chartidid;
92 RRDVAR *var_host_chartidname;
93 RRDVAR *var_host_chartnameid;
94 RRDVAR *var_host_chartnamename;
96 struct rrddim *rrddim;
98 struct rrddimvar *next;
101 // calculated variables (defined in health configuration)
102 // These aggregate time-series data at fixed intervals
103 // (defined in their update_every member below)
104 // These increase the overhead of netdata.
106 // These calculations are allocated and linked (->next)
108 // Then are also linked to RRDSET (of course only when the
109 // chart is found, via ->rrdset_next and ->rrdset_prev).
110 // This double-linked list is maintained sorted at all times
111 // having as RRDSET.calculations the RRDCALC to be processed
114 #define RRDCALC_STATUS_REMOVED -2
115 #define RRDCALC_STATUS_UNDEFINED -1
116 #define RRDCALC_STATUS_UNINITIALIZED 0
117 #define RRDCALC_STATUS_CLEAR 1
118 #define RRDCALC_STATUS_RAISED 2
119 #define RRDCALC_STATUS_WARNING 3
120 #define RRDCALC_STATUS_CRITICAL 4
122 #define RRDCALC_FLAG_DB_ERROR 0x00000001
123 #define RRDCALC_FLAG_DB_NAN 0x00000002
124 /* #define RRDCALC_FLAG_DB_STALE 0x00000004 */
125 #define RRDCALC_FLAG_CALC_ERROR 0x00000008
126 #define RRDCALC_FLAG_WARN_ERROR 0x00000010
127 #define RRDCALC_FLAG_CRIT_ERROR 0x00000020
128 #define RRDCALC_FLAG_RUNNABLE 0x00000040
129 #define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
131 typedef struct rrdcalc {
132 uint32_t id; // the unique id of this alarm
133 uint32_t next_event_id; // the next event id that will be used for this alarm
135 char *name; // the name of this alarm
138 char *exec; // the command to execute when this alarm switches state
139 char *recipient; // the recipient of the alarm (the first parameter to exec)
141 char *chart; // the chart id this should be linked to
144 char *source; // the source of this alarm
145 char *units; // the units of the alarm
146 char *info; // a short description of the alarm
148 int update_every; // update frequency for the alarm
150 // the red and green threshold of this alarm (to be set to the chart)
151 calculated_number green;
152 calculated_number red;
154 // ------------------------------------------------------------------------
155 // database lookup settings
157 char *dimensions; // the chart dimensions
158 int group; // grouping method: average, max, etc.
159 int before; // ending point in time-series
160 int after; // starting point in time-series
161 uint32_t options; // calculation options
163 // ------------------------------------------------------------------------
164 // expressions related to the alarm
166 EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm
167 EVAL_EXPRESSION *warning; // expression to check the warning condition
168 EVAL_EXPRESSION *critical; // expression to check the critical condition
170 // ------------------------------------------------------------------------
171 // notification delay settings
173 int delay_up_duration; // duration to delay notifications when alarm raises
174 int delay_down_duration; // duration to delay notifications when alarm lowers
175 int delay_max_duration; // the absolute max delay to apply to this alarm
176 float delay_multiplier; // multiplier for all delays when alarms switch status
177 // while now < delay_up_to
179 // ------------------------------------------------------------------------
180 // runtime information
182 int status; // the current status of the alarm
184 calculated_number value; // the current value of the alarm
185 calculated_number old_value; // the previous value of the alarm
187 uint32_t rrdcalc_flags; // check RRDCALC_FLAG_*
189 time_t last_updated; // the last update timestamp of the alarm
190 time_t next_update; // the next update timestamp of the alarm
191 time_t last_status_change; // the timestamp of the last time this alarm changed status
193 time_t db_after; // the first timestamp evaluated by the db lookup
194 time_t db_before; // the last timestamp evaluated by the db lookup
196 time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications
197 int delay_up_current; // the current up notification delay duration
198 int delay_down_current; // the current down notification delay duration
199 int delay_last; // the last delay we used
201 // ------------------------------------------------------------------------
202 // variables this alarm exposes to the rest of the alarms
209 // ------------------------------------------------------------------------
210 // the chart this alarm it is linked to
212 struct rrdset *rrdset;
214 // linking of this alarm on its chart
215 struct rrdcalc *rrdset_next;
216 struct rrdcalc *rrdset_prev;
218 struct rrdcalc *next;
221 #define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after)
224 // these are to be applied to charts found dynamically
225 // based on their context.
226 typedef struct rrdcalctemplate {
234 uint32_t hash_context;
237 SIMPLE_PATTERN *family_pattern;
239 char *source; // the source of this alarm
240 char *units; // the units of the alarm
241 char *info; // a short description of the alarm
243 int update_every; // update frequency for the alarm
245 // the red and green threshold of this alarm (to be set to the chart)
246 calculated_number green;
247 calculated_number red;
249 // ------------------------------------------------------------------------
250 // database lookup settings
252 char *dimensions; // the chart dimensions
253 int group; // grouping method: average, max, etc.
254 int before; // ending point in time-series
255 int after; // starting point in time-series
256 uint32_t options; // calculation options
258 // ------------------------------------------------------------------------
259 // notification delay settings
261 int delay_up_duration; // duration to delay notifications when alarm raises
262 int delay_down_duration; // duration to delay notifications when alarm lowers
263 int delay_max_duration; // the absolute max delay to apply to this alarm
264 float delay_multiplier; // multiplier for all delays when alarms switch status
266 // ------------------------------------------------------------------------
267 // expressions related to the alarm
269 EVAL_EXPRESSION *calculation;
270 EVAL_EXPRESSION *warning;
271 EVAL_EXPRESSION *critical;
273 struct rrdcalctemplate *next;
276 #define RRDCALCTEMPLATE_HAS_CALCULATION(rt) ((rt)->after)
278 #define HEALTH_ENTRY_FLAG_PROCESSED 0x00000001
279 #define HEALTH_ENTRY_FLAG_UPDATED 0x00000002
280 #define HEALTH_ENTRY_FLAG_EXEC_RUN 0x00000004
281 #define HEALTH_ENTRY_FLAG_EXEC_FAILED 0x00000008
282 #define HEALTH_ENTRY_FLAG_SAVED 0x10000000
283 #define HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
285 typedef struct alarm_entry {
288 uint32_t alarm_event_id;
292 time_t non_clear_duration;
304 time_t exec_run_timestamp;
311 calculated_number old_value;
312 calculated_number new_value;
314 char *old_value_string;
315 char *new_value_string;
323 time_t delay_up_to_timestamp;
325 uint32_t updated_by_id;
328 struct alarm_entry *next;
331 typedef struct alarm_log {
332 uint32_t next_log_id;
333 uint32_t next_alarm_id;
337 pthread_rwlock_t alarm_log_rwlock;
342 extern void rrdsetvar_rename_all(RRDSET *st);
343 extern RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, int type, void *value, uint32_t options);
344 extern void rrdsetvar_free(RRDSETVAR *rs);
346 extern void rrddimvar_rename_all(RRDDIM *rd);
347 extern RRDDIMVAR *rrddimvar_create(RRDDIM *rd, int type, const char *prefix, const char *suffix, void *value, uint32_t options);
348 extern void rrddimvar_free(RRDDIMVAR *rs);
350 extern void rrdsetcalc_link_matching(RRDSET *st);
351 extern void rrdsetcalc_unlink(RRDCALC *rc);
352 extern void rrdcalctemplate_link_matching(RRDSET *st);
353 extern RRDCALC *rrdcalc_find(RRDSET *st, const char *name);
355 extern void health_init(void);
356 extern void *health_main(void *ptr);
358 extern void health_reload(void);
360 extern int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result);
361 extern void health_alarms2json(RRDHOST *host, BUFFER *wb, int all);
362 extern void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after);
364 void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf);
366 extern RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name);
367 extern void rrdvar_custom_host_variable_destroy(RRDHOST *host, const char *name);
368 extern void rrdvar_custom_host_variable_set(RRDVAR *rv, calculated_number value);
370 extern const char *rrdcalc_status2string(int status);
373 extern int health_alarm_log_open(RRDHOST *host);
374 extern void health_alarm_log_close(RRDHOST *host);
375 extern void health_log_rotate(RRDHOST *host);
376 extern void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae);
377 extern ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename);
378 extern void health_alarm_log_load(RRDHOST *host);
379 extern void health_alarm_log(
382 uint32_t alarm_event_id,
388 const char *recipient,
390 calculated_number old_value,
391 calculated_number new_value,
401 extern void health_readdir(RRDHOST *host, const char *path);
402 extern char *health_config_dir(void);
403 extern void health_reload_host(RRDHOST *host);
404 extern void health_alarm_log_free(RRDHOST *host);
406 extern void rrdcalc_free(RRDHOST *host, RRDCALC *rc);
407 extern void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt);
409 #ifdef NETDATA_HEALTH_INTERNALS
410 #define RRDVAR_MAX_LENGTH 1024
412 extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name);
413 extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id);
414 extern int rrdvar_fix_name(char *variable);
416 extern RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart);
417 extern void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc);
419 extern RRDVAR *rrdvar_create_and_index(const char *scope, avl_tree_lock *tree, const char *name, int type, void *value);
420 extern void rrdvar_free(RRDHOST *host, avl_tree_lock *tree, RRDVAR *rv);
422 extern void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae);
424 #endif // NETDATA_HEALTH_INTERNALS
426 #endif //NETDATA_HEALTH_H