]> arthur.barton.de Git - netdata.git/blob - src/rrdcalc.c
dns_query_time plugin: replace "." with "_" in dimensions
[netdata.git] / src / rrdcalc.c
1 #define NETDATA_HEALTH_INTERNALS
2 #include "common.h"
3
4 // ----------------------------------------------------------------------------
5 // RRDCALC management
6
7 inline const char *rrdcalc_status2string(int status) {
8     switch(status) {
9         case RRDCALC_STATUS_REMOVED:
10             return "REMOVED";
11
12         case RRDCALC_STATUS_UNDEFINED:
13             return "UNDEFINED";
14
15         case RRDCALC_STATUS_UNINITIALIZED:
16             return "UNINITIALIZED";
17
18         case RRDCALC_STATUS_CLEAR:
19             return "CLEAR";
20
21         case RRDCALC_STATUS_RAISED:
22             return "RAISED";
23
24         case RRDCALC_STATUS_WARNING:
25             return "WARNING";
26
27         case RRDCALC_STATUS_CRITICAL:
28             return "CRITICAL";
29
30         default:
31             error("Unknown alarm status %d", status);
32             return "UNKNOWN";
33     }
34 }
35
36 static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
37     debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, st->rrdhost->hostname);
38
39     rc->last_status_change = now_realtime_sec();
40     rc->rrdset = st;
41
42     rc->rrdset_next = st->alarms;
43     rc->rrdset_prev = NULL;
44
45     if(rc->rrdset_next)
46         rc->rrdset_next->rrdset_prev = rc;
47
48     st->alarms = rc;
49
50     if(rc->update_every < rc->rrdset->update_every) {
51         error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rc->rrdset->id, rc->name, rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every);
52         rc->update_every = rc->rrdset->update_every;
53     }
54
55     if(!isnan(rc->green) && isnan(st->green)) {
56         debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green);
57         st->green = rc->green;
58     }
59
60     if(!isnan(rc->red) && isnan(st->red)) {
61         debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red);
62         st->red = rc->red;
63     }
64
65     rc->local  = rrdvar_create_and_index("local",  &st->variables_root_index, rc->name, RRDVAR_TYPE_CALCULATED, &rc->value);
66     rc->family = rrdvar_create_and_index("family", &st->rrdfamily->variables_root_index, rc->name, RRDVAR_TYPE_CALCULATED, &rc->value);
67
68     char fullname[RRDVAR_MAX_LENGTH + 1];
69     snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name);
70     rc->hostid   = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value);
71
72     snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name);
73     rc->hostname = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value);
74
75     if(!rc->units) rc->units = strdupz(st->units);
76
77     {
78         time_t now = now_realtime_sec();
79         health_alarm_log(
80                 st->rrdhost,
81                 rc->id,
82                 rc->next_event_id++,
83                 now,
84                 rc->name,
85                 rc->rrdset->id,
86                 rc->rrdset->family,
87                 rc->exec,
88                 rc->recipient,
89                 now - rc->last_status_change,
90                 rc->old_value,
91                 rc->value,
92                 rc->status,
93                 RRDCALC_STATUS_UNINITIALIZED,
94                 rc->source,
95                 rc->units,
96                 rc->info,
97                 0,
98                 0
99         );
100     }
101 }
102
103 static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) {
104     if(     (rc->hash_chart == st->hash      && !strcmp(rc->chart, st->id)) ||
105             (rc->hash_chart == st->hash_name && !strcmp(rc->chart, st->name)))
106         return 1;
107
108     return 0;
109 }
110
111 // this has to be called while the RRDHOST is locked
112 inline void rrdsetcalc_link_matching(RRDSET *st) {
113     // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id);
114
115     RRDCALC *rc;
116     for(rc = st->rrdhost->alarms; rc ; rc = rc->next) {
117         if(unlikely(rc->rrdset))
118             continue;
119
120         if(unlikely(rrdcalc_is_matching_this_rrdset(rc, st)))
121             rrdsetcalc_link(st, rc);
122     }
123 }
124
125 // this has to be called while the RRDHOST is locked
126 inline void rrdsetcalc_unlink(RRDCALC *rc) {
127     RRDSET *st = rc->rrdset;
128
129     if(!st) {
130         debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name);
131         error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name);
132         return;
133     }
134
135     {
136         time_t now = now_realtime_sec();
137         health_alarm_log(
138                 st->rrdhost,
139                 rc->id,
140                 rc->next_event_id++,
141                 now,
142                 rc->name,
143                 rc->rrdset->id,
144                 rc->rrdset->family,
145                 rc->exec,
146                 rc->recipient,
147                 now - rc->last_status_change,
148                 rc->old_value,
149                 rc->value,
150                 rc->status,
151                 RRDCALC_STATUS_REMOVED,
152                 rc->source,
153                 rc->units,
154                 rc->info,
155                 0,
156                 0
157         );
158     }
159
160     RRDHOST *host = st->rrdhost;
161
162     debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname);
163
164     // unlink it
165     if(rc->rrdset_prev)
166         rc->rrdset_prev->rrdset_next = rc->rrdset_next;
167
168     if(rc->rrdset_next)
169         rc->rrdset_next->rrdset_prev = rc->rrdset_prev;
170
171     if(st->alarms == rc)
172         st->alarms = rc->rrdset_next;
173
174     rc->rrdset_prev = rc->rrdset_next = NULL;
175
176     rrdvar_free(st->rrdhost, &st->variables_root_index, rc->local);
177     rc->local = NULL;
178
179     rrdvar_free(st->rrdhost, &st->rrdfamily->variables_root_index, rc->family);
180     rc->family = NULL;
181
182     rrdvar_free(st->rrdhost, &st->rrdhost->variables_root_index, rc->hostid);
183     rc->hostid = NULL;
184
185     rrdvar_free(st->rrdhost, &st->rrdhost->variables_root_index, rc->hostname);
186     rc->hostname = NULL;
187
188     rc->rrdset = NULL;
189
190     // RRDCALC will remain in RRDHOST
191     // so that if the matching chart is found in the future
192     // it will be applied automatically
193 }
194
195 RRDCALC *rrdcalc_find(RRDSET *st, const char *name) {
196     RRDCALC *rc;
197     uint32_t hash = simple_hash(name);
198
199     for( rc = st->alarms; rc ; rc = rc->rrdset_next ) {
200         if(unlikely(rc->hash == hash && !strcmp(rc->name, name)))
201             return rc;
202     }
203
204     return NULL;
205 }
206
207 inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) {
208     RRDCALC *rc;
209
210     if(unlikely(!chart)) {
211         error("attempt to find RRDCALC '%s' without giving a chart name", name);
212         return 1;
213     }
214
215     if(unlikely(!hash_chart)) hash_chart = simple_hash(chart);
216     if(unlikely(!hash_name))  hash_name  = simple_hash(name);
217
218     // make sure it does not already exist
219     for(rc = host->alarms; rc ; rc = rc->next) {
220         if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) {
221             debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname);
222             error("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname);
223             return 1;
224         }
225     }
226
227     return 0;
228 }
229
230 inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) {
231     if(chart && name) {
232         uint32_t hash_chart = simple_hash(chart);
233         uint32_t hash_name = simple_hash(name);
234
235         // re-use old IDs, by looking them up in the alarm log
236         ALARM_ENTRY *ae;
237         for(ae = host->health_log.alarms; ae ;ae = ae->next) {
238             if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) {
239                 if(next_event_id) *next_event_id = ae->alarm_event_id + 1;
240                 return ae->alarm_id;
241             }
242         }
243     }
244
245     return host->health_log.next_alarm_id++;
246 }
247
248 inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) {
249     rrdhost_check_rdlock(host);
250
251     if(rc->calculation) {
252         rc->calculation->status = &rc->status;
253         rc->calculation->this = &rc->value;
254         rc->calculation->after = &rc->db_after;
255         rc->calculation->before = &rc->db_before;
256         rc->calculation->rrdcalc = rc;
257     }
258
259     if(rc->warning) {
260         rc->warning->status = &rc->status;
261         rc->warning->this = &rc->value;
262         rc->warning->after = &rc->db_after;
263         rc->warning->before = &rc->db_before;
264         rc->warning->rrdcalc = rc;
265     }
266
267     if(rc->critical) {
268         rc->critical->status = &rc->status;
269         rc->critical->this = &rc->value;
270         rc->critical->after = &rc->db_after;
271         rc->critical->before = &rc->db_before;
272         rc->critical->rrdcalc = rc;
273     }
274
275     // link it to the host
276     if(likely(host->alarms)) {
277         // append it
278         RRDCALC *t;
279         for(t = host->alarms; t && t->next ; t = t->next) ;
280         t->next = rc;
281     }
282     else {
283         host->alarms = rc;
284     }
285
286     // link it to its chart
287     RRDSET *st;
288     rrdset_foreach_read(st, host) {
289         if(rrdcalc_is_matching_this_rrdset(rc, st)) {
290             rrdsetcalc_link(st, rc);
291             break;
292         }
293     }
294 }
295
296 inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) {
297
298     debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name);
299
300     if(rrdcalc_exists(host, chart, rt->name, 0, 0))
301         return NULL;
302
303     RRDCALC *rc = callocz(1, sizeof(RRDCALC));
304     rc->next_event_id = 1;
305     rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id);
306     rc->name = strdupz(rt->name);
307     rc->hash = simple_hash(rc->name);
308     rc->chart = strdupz(chart);
309     rc->hash_chart = simple_hash(rc->chart);
310
311     if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions);
312
313     rc->green = rt->green;
314     rc->red = rt->red;
315     rc->value = NAN;
316     rc->old_value = NAN;
317
318     rc->delay_up_duration = rt->delay_up_duration;
319     rc->delay_down_duration = rt->delay_down_duration;
320     rc->delay_max_duration = rt->delay_max_duration;
321     rc->delay_multiplier = rt->delay_multiplier;
322
323     rc->group = rt->group;
324     rc->after = rt->after;
325     rc->before = rt->before;
326     rc->update_every = rt->update_every;
327     rc->options = rt->options;
328
329     if(rt->exec) rc->exec = strdupz(rt->exec);
330     if(rt->recipient) rc->recipient = strdupz(rt->recipient);
331     if(rt->source) rc->source = strdupz(rt->source);
332     if(rt->units) rc->units = strdupz(rt->units);
333     if(rt->info) rc->info = strdupz(rt->info);
334
335     if(rt->calculation) {
336         rc->calculation = expression_parse(rt->calculation->source, NULL, NULL);
337         if(!rc->calculation)
338             error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source);
339     }
340     if(rt->warning) {
341         rc->warning = expression_parse(rt->warning->source, NULL, NULL);
342         if(!rc->warning)
343             error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source);
344     }
345     if(rt->critical) {
346         rc->critical = expression_parse(rt->critical->source, NULL, NULL);
347         if(!rc->critical)
348             error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source);
349     }
350
351     debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f",
352             (rc->chart)?rc->chart:"NOCHART",
353             rc->name,
354             (rc->exec)?rc->exec:"DEFAULT",
355             (rc->recipient)?rc->recipient:"DEFAULT",
356             rc->green,
357             rc->red,
358             rc->group,
359             rc->after,
360             rc->before,
361             rc->options,
362             (rc->dimensions)?rc->dimensions:"NONE",
363             rc->update_every,
364             (rc->calculation)?rc->calculation->parsed_as:"NONE",
365             (rc->warning)?rc->warning->parsed_as:"NONE",
366             (rc->critical)?rc->critical->parsed_as:"NONE",
367             rc->source,
368             rc->delay_up_duration,
369             rc->delay_down_duration,
370             rc->delay_max_duration,
371             rc->delay_multiplier
372     );
373
374     rrdcalc_create_part2(host, rc);
375     return rc;
376 }
377
378 void rrdcalc_free(RRDHOST *host, RRDCALC *rc) {
379     if(!rc) return;
380
381     debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname);
382
383     // unlink it from RRDSET
384     if(rc->rrdset) rrdsetcalc_unlink(rc);
385
386     // unlink it from RRDHOST
387     if(unlikely(rc == host->alarms))
388         host->alarms = rc->next;
389
390     else {
391         RRDCALC *t;
392         for(t = host->alarms; t && t->next != rc; t = t->next) ;
393         if(t) {
394             t->next = rc->next;
395             rc->next = NULL;
396         }
397         else
398             error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname);
399     }
400
401     expression_free(rc->calculation);
402     expression_free(rc->warning);
403     expression_free(rc->critical);
404
405     freez(rc->name);
406     freez(rc->chart);
407     freez(rc->family);
408     freez(rc->dimensions);
409     freez(rc->exec);
410     freez(rc->recipient);
411     freez(rc->source);
412     freez(rc->units);
413     freez(rc->info);
414     freez(rc);
415 }