]> arthur.barton.de Git - netdata.git/commitdiff
added alarm to detect port scans or busy service crashes
authorCosta Tsaousis (ktsaou) <costa@tsaousis.gr>
Sat, 17 Sep 2016 15:46:12 +0000 (18:46 +0300)
committerCosta Tsaousis (ktsaou) <costa@tsaousis.gr>
Sat, 17 Sep 2016 15:46:12 +0000 (18:46 +0300)
conf.d/Makefile.am
conf.d/health.d/tcp_resets.conf [new file with mode: 0644]
src/health.c

index 7fccd0eedfbd7a465b519a7d4169fc66ac6f11ea..066744cab721b9c3dad5ab5c56fa0814971d8a24 100644 (file)
@@ -48,6 +48,7 @@ dist_healthconfig_DATA = \
        health.d/cpu.conf \
        health.d/disks.conf \
        health.d/entropy.conf \
+       health.d/tcp_resets.conf \
        health.d/memcached.conf \
        health.d/mysql.conf \
        health.d/named.conf \
diff --git a/conf.d/health.d/tcp_resets.conf b/conf.d/health.d/tcp_resets.conf
new file mode 100644 (file)
index 0000000..8e93c47
--- /dev/null
@@ -0,0 +1,32 @@
+# -----------------------------------------------------------------------------
+
+   alarm: ipv4_tcphandshake_last_collected_secs
+      on: ipv4.tcphandshake
+    calc: $now - $last_collected_t
+   units: seconds ago
+   every: 10s
+    warn: $this > (($status >= $WARNING)  ? (0) : ( 5 * $update_every))
+    crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+   delay: up 0 down 5m multiplier 1.5 max 1h
+    info: number of seconds since the last successful data collection
+      to: sysadmin
+
+# -----------------------------------------------------------------------------
+
+   alarm: 1m_ipv4_tcp_resets
+      on: ipv4.tcphandshake
+  lookup: average -1m at -10s unaligned absolute of OutRsts
+   units: tcp resets/s
+   every: 10s
+    info: average TCP RESETS this host is sending, over the last minute
+
+   alarm: 10s_ipv4_tcp_resets
+      on: ipv4.tcphandshake
+  lookup: average -10s unaligned absolute of OutRsts
+   units: tcp resets/s
+   every: 10s
+    warn: $this > ((($1m_ipv4_tcp_resets < 5)?(5):($1m_ipv4_tcp_resets)) * (($status >= $WARNING)  ? (1) : (4)))
+   delay: up 0 down 60m multiplier 1.2 max 2h
+    info: average TCP RESETS this host is sending, over the last 10 seconds (this can be an indication that a port scan is made, or that a service running on this host has crashed)
+      to: sysadmin
+
index 3eacd022c73b9e7b53efc5d11368fff04e001025..086c2b539f2c5bd6c5dc3e9c77e1515c640ac9e6 100644 (file)
@@ -2126,7 +2126,7 @@ static inline void health_alarm_log_process(RRDHOST *host) {
             if(unlikely(ae->unique_id < first_waiting))
                 first_waiting = ae->unique_id;
 
-            if(likely(now > ae->delay_up_to_timestamp))
+            if(likely(now >= ae->delay_up_to_timestamp))
                 health_process_notifications(host, ae);
         }
     }