1 #ifndef NETDATA_HEALTH_H
2 #define NETDATA_HEALTH_H
4 extern int health_enabled;
6 extern int rrdvar_compare(void *a, void *b);
8 #define RRDVAR_TYPE_CALCULATED 1
9 #define RRDVAR_TYPE_TIME_T 2
10 #define RRDVAR_TYPE_COLLECTED 3
11 #define RRDVAR_TYPE_TOTAL 4
12 #define RRDVAR_TYPE_INT 5
13 #define RRDVAR_TYPE_CALCULATED_ALLOCATED 6
16 // the variables as stored in the variables indexes
17 // there are 3 indexes:
18 // 1. at each chart (RRDSET.variables_root_index)
19 // 2. at each context (RRDFAMILY.variables_root_index)
20 // 3. at each host (RRDHOST.variables_root_index)
21 typedef struct rrdvar {
33 // variables linked to charts
34 // We link variables to point to the values that are already
35 // calculated / processed by the normal data collection process
36 // This means, there will be no speed penalty for using
38 typedef struct rrdsetvar {
39 char *key_fullid; // chart type.chart id.variable
40 char *key_fullname; // chart type.chart name.variable
41 char *variable; // variable
51 RRDVAR *var_family_name;
52 RRDVAR *var_host_name;
54 struct rrdset *rrdset;
56 struct rrdsetvar *next;
60 // variables linked to individual dimensions
61 // We link variables to point the values that are already
62 // calculated / processed by the normal data collection process
63 // This means, there will be no speed penalty for using
65 typedef struct rrddimvar {
69 char *key_id; // dimension id
70 char *key_name; // dimension name
71 char *key_contextid; // context + dimension id
72 char *key_contextname; // context + dimension name
73 char *key_fullidid; // chart type.chart id + dimension id
74 char *key_fullidname; // chart type.chart id + dimension name
75 char *key_fullnameid; // chart type.chart name + dimension id
76 char *key_fullnamename; // chart type.chart name + dimension name
84 RRDVAR *var_local_name;
86 RRDVAR *var_family_id;
87 RRDVAR *var_family_name;
88 RRDVAR *var_family_contextid;
89 RRDVAR *var_family_contextname;
91 RRDVAR *var_host_chartidid;
92 RRDVAR *var_host_chartidname;
93 RRDVAR *var_host_chartnameid;
94 RRDVAR *var_host_chartnamename;
96 struct rrddim *rrddim;
98 struct rrddimvar *next;
101 // calculated variables (defined in health configuration)
102 // These aggregate time-series data at fixed intervals
103 // (defined in their update_every member below)
104 // These increase the overhead of netdata.
106 // These calculations are allocated and linked (->next)
108 // Then are also linked to RRDSET (of course only when the
109 // chart is found, via ->rrdset_next and ->rrdset_prev).
110 // This double-linked list is maintained sorted at all times
111 // having as RRDSET.calculations the RRDCALC to be processed
114 #define RRDCALC_STATUS_REMOVED -2
115 #define RRDCALC_STATUS_UNDEFINED -1
116 #define RRDCALC_STATUS_UNINITIALIZED 0
117 #define RRDCALC_STATUS_CLEAR 1
118 #define RRDCALC_STATUS_RAISED 2
119 #define RRDCALC_STATUS_WARNING 3
120 #define RRDCALC_STATUS_CRITICAL 4
122 #define RRDCALC_FLAG_DB_ERROR 0x00000001
123 #define RRDCALC_FLAG_DB_NAN 0x00000002
124 /* #define RRDCALC_FLAG_DB_STALE 0x00000004 */
125 #define RRDCALC_FLAG_CALC_ERROR 0x00000008
126 #define RRDCALC_FLAG_WARN_ERROR 0x00000010
127 #define RRDCALC_FLAG_CRIT_ERROR 0x00000020
128 #define RRDCALC_FLAG_RUNNABLE 0x00000040
130 typedef struct rrdcalc {
131 uint32_t id; // the unique id of this alarm
132 uint32_t next_event_id; // the next event id that will be used for this alarm
134 char *name; // the name of this alarm
137 char *exec; // the command to execute when this alarm switches state
138 char *recipient; // the recipient of the alarm (the first parameter to exec)
140 char *chart; // the chart id this should be linked to
143 char *source; // the source of this alarm
144 char *units; // the units of the alarm
145 char *info; // a short description of the alarm
147 int update_every; // update frequency for the alarm
149 // the red and green threshold of this alarm (to be set to the chart)
150 calculated_number green;
151 calculated_number red;
153 // ------------------------------------------------------------------------
154 // database lookup settings
156 char *dimensions; // the chart dimensions
157 int group; // grouping method: average, max, etc.
158 int before; // ending point in time-series
159 int after; // starting point in time-series
160 uint32_t options; // calculation options
162 // ------------------------------------------------------------------------
163 // expressions related to the alarm
165 EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm
166 EVAL_EXPRESSION *warning; // expression to check the warning condition
167 EVAL_EXPRESSION *critical; // expression to check the critical condition
169 // ------------------------------------------------------------------------
170 // notification delay settings
172 int delay_up_duration; // duration to delay notifications when alarm raises
173 int delay_down_duration; // duration to delay notifications when alarm lowers
174 int delay_max_duration; // the absolute max delay to apply to this alarm
175 float delay_multiplier; // multiplier for all delays when alarms switch status
176 // while now < delay_up_to
178 // ------------------------------------------------------------------------
179 // runtime information
181 int status; // the current status of the alarm
183 calculated_number value; // the current value of the alarm
184 calculated_number old_value; // the previous value of the alarm
186 uint32_t rrdcalc_flags; // check RRDCALC_FLAG_*
188 time_t last_updated; // the last update timestamp of the alarm
189 time_t next_update; // the next update timestamp of the alarm
190 time_t last_status_change; // the timestamp of the last time this alarm changed status
192 time_t db_after; // the first timestamp evaluated by the db lookup
193 time_t db_before; // the last timestamp evaluated by the db lookup
195 time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications
196 int delay_up_current; // the current up notification delay duration
197 int delay_down_current; // the current down notification delay duration
198 int delay_last; // the last delay we used
200 // ------------------------------------------------------------------------
201 // variables this alarm exposes to the rest of the alarms
208 // ------------------------------------------------------------------------
209 // the chart this alarm it is linked to
211 struct rrdset *rrdset;
213 // linking of this alarm on its chart
214 struct rrdcalc *rrdset_next;
215 struct rrdcalc *rrdset_prev;
217 struct rrdcalc *next;
220 #define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after)
223 // these are to be applied to charts found dynamically
224 // based on their context.
225 typedef struct rrdcalctemplate {
233 uint32_t hash_context;
235 char *source; // the source of this alarm
236 char *units; // the units of the alarm
237 char *info; // a short description of the alarm
239 int update_every; // update frequency for the alarm
241 // the red and green threshold of this alarm (to be set to the chart)
242 calculated_number green;
243 calculated_number red;
245 // ------------------------------------------------------------------------
246 // database lookup settings
248 char *dimensions; // the chart dimensions
249 int group; // grouping method: average, max, etc.
250 int before; // ending point in time-series
251 int after; // starting point in time-series
252 uint32_t options; // calculation options
254 // ------------------------------------------------------------------------
255 // notification delay settings
257 int delay_up_duration; // duration to delay notifications when alarm raises
258 int delay_down_duration; // duration to delay notifications when alarm lowers
259 int delay_max_duration; // the absolute max delay to apply to this alarm
260 float delay_multiplier; // multiplier for all delays when alarms switch status
262 // ------------------------------------------------------------------------
263 // expressions related to the alarm
265 EVAL_EXPRESSION *calculation;
266 EVAL_EXPRESSION *warning;
267 EVAL_EXPRESSION *critical;
269 struct rrdcalctemplate *next;
272 #define RRDCALCTEMPLATE_HAS_CALCULATION(rt) ((rt)->after)
274 #define HEALTH_ENTRY_FLAG_PROCESSED 0x00000001
275 #define HEALTH_ENTRY_FLAG_UPDATED 0x00000002
276 #define HEALTH_ENTRY_FLAG_EXEC_RUN 0x00000004
277 #define HEALTH_ENTRY_FLAG_EXEC_FAILED 0x00000008
278 #define HEALTH_ENTRY_FLAG_SAVED 0x10000000
280 typedef struct alarm_entry {
283 uint32_t alarm_event_id;
287 time_t non_clear_duration;
299 time_t exec_run_timestamp;
306 calculated_number old_value;
307 calculated_number new_value;
314 time_t delay_up_to_timestamp;
316 uint32_t updated_by_id;
319 struct alarm_entry *next;
322 typedef struct alarm_log {
323 uint32_t next_log_id;
324 uint32_t next_alarm_id;
328 pthread_rwlock_t alarm_log_rwlock;
333 extern void rrdsetvar_rename_all(RRDSET *st);
334 extern RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, int type, void *value, uint32_t options);
335 extern void rrdsetvar_free(RRDSETVAR *rs);
337 extern void rrddimvar_rename_all(RRDDIM *rd);
338 extern RRDDIMVAR *rrddimvar_create(RRDDIM *rd, int type, const char *prefix, const char *suffix, void *value, uint32_t options);
339 extern void rrddimvar_free(RRDDIMVAR *rs);
341 extern void rrdsetcalc_link_matching(RRDSET *st);
342 extern void rrdsetcalc_unlink(RRDCALC *rc);
343 extern void rrdcalctemplate_link_matching(RRDSET *st);
344 extern RRDCALC *rrdcalc_find(RRDSET *st, const char *name);
346 extern void health_init(void);
347 extern void *health_main(void *ptr);
349 extern void health_reload(void);
351 extern int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result);
352 extern void health_alarms2json(RRDHOST *host, BUFFER *wb, int all);
353 extern void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after);
355 void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf);
357 extern RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name);
358 extern void rrdvar_custom_host_variable_destroy(RRDHOST *host, const char *name);
359 extern void rrdvar_custom_host_variable_set(RRDVAR *rv, calculated_number value);
361 #endif //NETDATA_HEALTH_H