From 4d08e3142c8008c518bb0fb1b3fa13eb33e5e8a0 Mon Sep 17 00:00:00 2001 From: "Costa Tsaousis (ktsaou)" Date: Sat, 14 Jan 2017 02:41:01 +0200 Subject: [PATCH] added alarms for common UDP buffer errors --- conf.d/Makefile.am | 1 + conf.d/health.d/udp_errors.conf | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 conf.d/health.d/udp_errors.conf diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am index fbffc0f8..7ab660f0 100644 --- a/conf.d/Makefile.am +++ b/conf.d/Makefile.am @@ -79,6 +79,7 @@ dist_healthconfig_DATA = \ health.d/squid.conf \ health.d/swap.conf \ health.d/tcp_resets.conf \ + health.d/udp_errors.conf \ $(NULL) chartsconfigdir=$(configdir)/charts.d diff --git a/conf.d/health.d/udp_errors.conf b/conf.d/health.d/udp_errors.conf new file mode 100644 index 00000000..98e955c0 --- /dev/null +++ b/conf.d/health.d/udp_errors.conf @@ -0,0 +1,40 @@ +# ----------------------------------------------------------------------------- + + alarm: ipv4_udperrors_last_collected_secs + on: ipv4.udperrors + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: up 0 down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + +# ----------------------------------------------------------------------------- +# UDP receive buffer errors + + alarm: 1m_ipv4_udp_receive_buffer_errors + on: ipv4.udperrors + lookup: sum -1m unaligned absolute of RcvbufErrors + units: errors + every: 10s + warn: $this > 0 + crit: $this > 100 + info: number of UDP receive buffer errors during the last minute + delay: up 0 down 60m multiplier 1.2 max 2h + to: sysadmin + +# ----------------------------------------------------------------------------- +# UDP send buffer errors + + alarm: 1m_ipv4_udp_send_buffer_errors + on: ipv4.udperrors + lookup: sum -1m unaligned absolute of SndbufErrors + units: errors + every: 10s + warn: $this > 0 + crit: $this > 100 + info: number of UDP send buffer errors during the last minute + delay: up 0 down 60m multiplier 1.2 max 2h + to: sysadmin -- 2.39.2