1 #define NETDATA_HEALTH_INTERNALS
4 // ----------------------------------------------------------------------------
7 inline const char *rrdcalc_status2string(int status) {
9 case RRDCALC_STATUS_REMOVED:
12 case RRDCALC_STATUS_UNDEFINED:
15 case RRDCALC_STATUS_UNINITIALIZED:
16 return "UNINITIALIZED";
18 case RRDCALC_STATUS_CLEAR:
21 case RRDCALC_STATUS_RAISED:
24 case RRDCALC_STATUS_WARNING:
27 case RRDCALC_STATUS_CRITICAL:
31 error("Unknown alarm status %d", status);
36 static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
37 debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, st->rrdhost->hostname);
39 rc->last_status_change = now_realtime_sec();
42 rc->rrdset_next = st->alarms;
43 rc->rrdset_prev = NULL;
46 rc->rrdset_next->rrdset_prev = rc;
50 if(rc->update_every < rc->rrdset->update_every) {
51 error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rc->rrdset->id, rc->name, rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every);
52 rc->update_every = rc->rrdset->update_every;
55 if(!isnan(rc->green) && isnan(st->green)) {
56 debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green);
57 st->green = rc->green;
60 if(!isnan(rc->red) && isnan(st->red)) {
61 debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red);
65 rc->local = rrdvar_create_and_index("local", &st->variables_root_index, rc->name, RRDVAR_TYPE_CALCULATED, &rc->value);
66 rc->family = rrdvar_create_and_index("family", &st->rrdfamily->variables_root_index, rc->name, RRDVAR_TYPE_CALCULATED, &rc->value);
68 char fullname[RRDVAR_MAX_LENGTH + 1];
69 snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name);
70 rc->hostid = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value);
72 snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name);
73 rc->hostname = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value);
75 if(!rc->units) rc->units = strdupz(st->units);
78 time_t now = now_realtime_sec();
89 now - rc->last_status_change,
93 RRDCALC_STATUS_UNINITIALIZED,
103 static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) {
104 if( (rc->hash_chart == st->hash && !strcmp(rc->chart, st->id)) ||
105 (rc->hash_chart == st->hash_name && !strcmp(rc->chart, st->name)))
111 // this has to be called while the RRDHOST is locked
112 inline void rrdsetcalc_link_matching(RRDSET *st) {
113 // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id);
116 for(rc = st->rrdhost->alarms; rc ; rc = rc->next) {
117 if(unlikely(rc->rrdset))
120 if(unlikely(rrdcalc_is_matching_this_rrdset(rc, st)))
121 rrdsetcalc_link(st, rc);
125 // this has to be called while the RRDHOST is locked
126 inline void rrdsetcalc_unlink(RRDCALC *rc) {
127 RRDSET *st = rc->rrdset;
130 debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name);
131 error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name);
136 time_t now = now_realtime_sec();
147 now - rc->last_status_change,
151 RRDCALC_STATUS_REMOVED,
160 RRDHOST *host = st->rrdhost;
162 debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname);
166 rc->rrdset_prev->rrdset_next = rc->rrdset_next;
169 rc->rrdset_next->rrdset_prev = rc->rrdset_prev;
172 st->alarms = rc->rrdset_next;
174 rc->rrdset_prev = rc->rrdset_next = NULL;
176 rrdvar_free(st->rrdhost, &st->variables_root_index, rc->local);
179 rrdvar_free(st->rrdhost, &st->rrdfamily->variables_root_index, rc->family);
182 rrdvar_free(st->rrdhost, &st->rrdhost->variables_root_index, rc->hostid);
185 rrdvar_free(st->rrdhost, &st->rrdhost->variables_root_index, rc->hostname);
190 // RRDCALC will remain in RRDHOST
191 // so that if the matching chart is found in the future
192 // it will be applied automatically
195 RRDCALC *rrdcalc_find(RRDSET *st, const char *name) {
197 uint32_t hash = simple_hash(name);
199 for( rc = st->alarms; rc ; rc = rc->rrdset_next ) {
200 if(unlikely(rc->hash == hash && !strcmp(rc->name, name)))
207 inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) {
210 if(unlikely(!chart)) {
211 error("attempt to find RRDCALC '%s' without giving a chart name", name);
215 if(unlikely(!hash_chart)) hash_chart = simple_hash(chart);
216 if(unlikely(!hash_name)) hash_name = simple_hash(name);
218 // make sure it does not already exist
219 for(rc = host->alarms; rc ; rc = rc->next) {
220 if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) {
221 debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname);
222 error("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname);
230 inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) {
232 uint32_t hash_chart = simple_hash(chart);
233 uint32_t hash_name = simple_hash(name);
235 // re-use old IDs, by looking them up in the alarm log
237 for(ae = host->health_log.alarms; ae ;ae = ae->next) {
238 if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) {
239 if(next_event_id) *next_event_id = ae->alarm_event_id + 1;
245 return host->health_log.next_alarm_id++;
248 inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) {
249 rrdhost_check_rdlock(host);
251 if(rc->calculation) {
252 rc->calculation->status = &rc->status;
253 rc->calculation->this = &rc->value;
254 rc->calculation->after = &rc->db_after;
255 rc->calculation->before = &rc->db_before;
256 rc->calculation->rrdcalc = rc;
260 rc->warning->status = &rc->status;
261 rc->warning->this = &rc->value;
262 rc->warning->after = &rc->db_after;
263 rc->warning->before = &rc->db_before;
264 rc->warning->rrdcalc = rc;
268 rc->critical->status = &rc->status;
269 rc->critical->this = &rc->value;
270 rc->critical->after = &rc->db_after;
271 rc->critical->before = &rc->db_before;
272 rc->critical->rrdcalc = rc;
275 // link it to the host
276 if(likely(host->alarms)) {
279 for(t = host->alarms; t && t->next ; t = t->next) ;
286 // link it to its chart
288 rrdset_foreach_read(st, host) {
289 if(rrdcalc_is_matching_this_rrdset(rc, st)) {
290 rrdsetcalc_link(st, rc);
296 inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) {
298 debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name);
300 if(rrdcalc_exists(host, chart, rt->name, 0, 0))
303 RRDCALC *rc = callocz(1, sizeof(RRDCALC));
304 rc->next_event_id = 1;
305 rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id);
306 rc->name = strdupz(rt->name);
307 rc->hash = simple_hash(rc->name);
308 rc->chart = strdupz(chart);
309 rc->hash_chart = simple_hash(rc->chart);
311 if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions);
313 rc->green = rt->green;
318 rc->delay_up_duration = rt->delay_up_duration;
319 rc->delay_down_duration = rt->delay_down_duration;
320 rc->delay_max_duration = rt->delay_max_duration;
321 rc->delay_multiplier = rt->delay_multiplier;
323 rc->group = rt->group;
324 rc->after = rt->after;
325 rc->before = rt->before;
326 rc->update_every = rt->update_every;
327 rc->options = rt->options;
329 if(rt->exec) rc->exec = strdupz(rt->exec);
330 if(rt->recipient) rc->recipient = strdupz(rt->recipient);
331 if(rt->source) rc->source = strdupz(rt->source);
332 if(rt->units) rc->units = strdupz(rt->units);
333 if(rt->info) rc->info = strdupz(rt->info);
335 if(rt->calculation) {
336 rc->calculation = expression_parse(rt->calculation->source, NULL, NULL);
338 error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source);
341 rc->warning = expression_parse(rt->warning->source, NULL, NULL);
343 error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source);
346 rc->critical = expression_parse(rt->critical->source, NULL, NULL);
348 error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source);
351 debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f",
352 (rc->chart)?rc->chart:"NOCHART",
354 (rc->exec)?rc->exec:"DEFAULT",
355 (rc->recipient)?rc->recipient:"DEFAULT",
362 (rc->dimensions)?rc->dimensions:"NONE",
364 (rc->calculation)?rc->calculation->parsed_as:"NONE",
365 (rc->warning)?rc->warning->parsed_as:"NONE",
366 (rc->critical)?rc->critical->parsed_as:"NONE",
368 rc->delay_up_duration,
369 rc->delay_down_duration,
370 rc->delay_max_duration,
374 rrdcalc_create_part2(host, rc);
378 void rrdcalc_free(RRDHOST *host, RRDCALC *rc) {
381 debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname);
383 // unlink it from RRDSET
384 if(rc->rrdset) rrdsetcalc_unlink(rc);
386 // unlink it from RRDHOST
387 if(unlikely(rc == host->alarms))
388 host->alarms = rc->next;
392 for(t = host->alarms; t && t->next != rc; t = t->next) ;
398 error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname);
401 expression_free(rc->calculation);
402 expression_free(rc->warning);
403 expression_free(rc->critical);
408 freez(rc->dimensions);
410 freez(rc->recipient);