From d4f3ca33af3e40d31eb5479f6dbcb152cd9f984a Mon Sep 17 00:00:00 2001 From: "Costa Tsaousis (ktsaou)" Date: Sun, 20 Nov 2016 17:03:16 +0200 Subject: [PATCH] added backed alarms --- conf.d/Makefile.am | 1 + conf.d/health.d/backend.conf | 44 ++++++++++++++++++++++++++++++++++++ src/backends.c | 6 ++--- 3 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 conf.d/health.d/backend.conf diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am index 690b5e7c..529dedbd 100644 --- a/conf.d/Makefile.am +++ b/conf.d/Makefile.am @@ -52,6 +52,7 @@ dist_pythonconfig_DATA = \ healthconfigdir=$(configdir)/health.d dist_healthconfig_DATA = \ health.d/apache.conf \ + health.d/backend.conf \ health.d/cpu.conf \ health.d/disks.conf \ health.d/entropy.conf \ diff --git a/conf.d/health.d/backend.conf b/conf.d/health.d/backend.conf new file mode 100644 index 00000000..ba40a5f1 --- /dev/null +++ b/conf.d/health.d/backend.conf @@ -0,0 +1,44 @@ + +# make sure we are sending data to backend + + alarm: backend_last_buffering + on: netdata.backend_metrics + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful buffering of backend data + to: dba + + alarm: backend_metrics_sent + on: netdata.backend_metrics + units: % + calc: $sent * 100 / $buffered + every: 10s + warn: $this != 100 + delay: down 5m multiplier 1.5 max 1h + info: percentage of metrics sent to the backend server + to: dba + + alarm: backend_metrics_lost + on: netdata.backend_metrics + units: metrics + calc: $lost + every: 10s + crit: $this != 0 + delay: down 5m multiplier 1.5 max 1h + info: number of metrics lost due to repeating failures to contact the backend server + to: dba + + alarm: backend_slow + on: netdata.backend_latency + units: % + calc: $latency * 100 / ($update_every * 1000) + every: 10s + warn: $this > 50 + crit: $this > 100 + delay: down 5m multiplier 1.5 max 1h + info: the percentage of time between iterations needed by the backend time to process the data sent by netdata + to: dba diff --git a/src/backends.c b/src/backends.c index 75395973..5f330342 100644 --- a/src/backends.c +++ b/src/backends.c @@ -164,7 +164,7 @@ static inline int connect_to_one(const char *definition, int default_port, struc return fd; } -static inline calculated_number backend_duration_average(RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) { +static inline calculated_number backend_calculate_value_from_stored_data(RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) { time_t first_t = rrdset_first_entry_t(st); time_t last_t = rrdset_last_entry_t(st); @@ -227,7 +227,7 @@ static inline int format_dimension_collected_graphite_plaintext(BUFFER *b, const static inline int format_dimension_stored_graphite_plaintext(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) { (void)host; - calculated_number value = backend_duration_average(st, rd, after, before, options); + calculated_number value = backend_calculate_value_from_stored_data(st, rd, after, before, options); if(!isnan(value)) { buffer_sprintf(b, "%s.%s.%s.%s " CALCULATED_NUMBER_FORMAT " %u\n", prefix, hostname, st->id, rd->id, value, (uint32_t) before); return 1; @@ -246,7 +246,7 @@ static inline int format_dimension_collected_opentsdb_telnet(BUFFER *b, const ch static inline int format_dimension_stored_opentsdb_telnet(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) { (void)host; - calculated_number value = backend_duration_average(st, rd, after, before, options); + calculated_number value = backend_calculate_value_from_stored_data(st, rd, after, before, options); if(!isnan(value)) { buffer_sprintf(b, "put %s.%s.%s %u " CALCULATED_NUMBER_FORMAT " host=%s\n", prefix, st->id, rd->id, (uint32_t) before, value, hostname); return 1; -- 2.39.2