updated health configurations

author Costa Tsaousis <costa@tsaousis.gr>

Sat, 20 Aug 2016 18:26:26 +0000 (21:26 +0300)

committer Costa Tsaousis <costa@tsaousis.gr>

Sat, 20 Aug 2016 18:26:26 +0000 (21:26 +0300)
author Costa Tsaousis <costa@tsaousis.gr>
Sat, 20 Aug 2016 18:26:26 +0000 (21:26 +0300)
committer Costa Tsaousis <costa@tsaousis.gr>
Sat, 20 Aug 2016 18:26:26 +0000 (21:26 +0300)
diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf

index 7b6cac24427849991c98995f4880c146b823f28d..d1603104019a3a6e3f79fbd8b1850f4557596240 100644 (file)
--- a/conf.d/health.d/disks.conf
+++ b/conf.d/health.d/disks.conf
@@ -18,7 +18,7 @@ template: disk_full_percent
  
  # calculate the rate the disk fills
  # use as base, the available space change
-# during the last 10 minutes
+# during the last 30 minutes
  
  # this is just a calculation - it has no alarm
  # we will use it in the next template to find
@@ -26,12 +26,12 @@ template: disk_full_percent
  
  template: disk_fill_rate
        on: disk.space
-  lookup: max -1s at -10m unaligned of avail
-    calc: ($this - $avail) / (10 * 60)
-   every: 30s
+  lookup: max -1s at -30m unaligned of avail
+    calc: ($this - $avail) / (30 * 60)
+   every: 15s
  
  
-# calculate the hours remaininig
+# calculate the hours remaining
  # if the disk continues to fill
  # in this rate
  
@@ -48,26 +48,27 @@ template: disk_full_after_hours
  
  # raise an alarm if the disk is congested
  # by calculating the average disk utilization
-# for the last 2 minutes
+# for the last 10 minutes
  
-template: 5min_disk_utilization
+template: 10min_disk_utilization
        on: disk.util
-  lookup: average -5m unaligned
+  lookup: average -10m unaligned
     every: 1m
-   green: 80
-     red: 95
+   green: 90
+     red: 98
      warn: $this > $green
      crit: $this > $red
  
  
  # raise an alarm if the disk backlog
  # is above 1000ms (1s) per second
-# for 2 minutes
+# for 10 minutes
  # (i.e. the disk cannot catch up)
  
-template: 5min_disk_backlog
+template: 10min_disk_backlog
        on: disk.backlog
-  lookup: average -5m every 1m unaligned
+  lookup: average -10m unaligned
+   every: 1m
     green: 1000
       red: 2000
      warn: $this > $green
diff --git a/conf.d/health.d/entropy.conf b/conf.d/health.d/entropy.conf

index e0366dd5e503dce10439cc616b74a094f736ff48..417068b07821d3a4b40858915f213737e8f98997 100644 (file)
--- a/conf.d/health.d/entropy.conf
+++ b/conf.d/health.d/entropy.conf
@@ -1,11 +1,11 @@
  
  # check if entropy is too low
  # the alarm is checked every 1 minute
-# and examines the last 2 minutes of data
+# and examines the last 30 minutes of data
  
-   alarm: min_2min_entropy
+   alarm: min_30min_entropy
        on: system.entropy
-  lookup: min -2m unaligned
+  lookup: min -30m unaligned
     every: 1m
      warn: $this < 200
      crit: $this < 100
diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf

index 7be563058a15630fd6ff82f9f184af7f047877cc..19703608527c4366509efd59a3d7434f0af8fa24 100644 (file)
--- a/conf.d/health.d/net.conf
+++ b/conf.d/health.d/net.conf
@@ -1,22 +1,11 @@
  
  # check if an interface is dropping packets
  # the alarm is checked every 10 seconds
-# and examines the last minute of data
+# and examines the last 30 minutes of data
  
-template: 10min_packet_drops
+template: 30min_packet_drops
        on: net.drops
-  lookup: sum -10m unaligned absolute
+  lookup: sum -30m unaligned absolute
     every: 10s
      crit: $this > 0
  
-
-# check if a QoS class is dropping packets
-# the alarm is checked every 10 seconds
-# and examines the last minute of data
-
-template: 10min_qos_packet_drops
-      on: tc.qos_dropped
-  lookup: sum -10m unaligned absolute
-   every: 30s
-    warn: $this > 0
-
diff --git a/conf.d/health.d/qos.conf b/conf.d/health.d/qos.conf

new file mode 100644 (file)

index 0000000..3984293
--- /dev/null
+++ b/conf.d/health.d/qos.conf
@@ -0,0 +1,11 @@
+
+# check if a QoS class is dropping packets
+# the alarm is checked every 10 seconds
+# and examines the last minute of data
+
+#template: 10min_qos_packet_drops
+#      on: tc.qos_dropped
+#  lookup: sum -10m unaligned absolute
+#   every: 30s
+#    warn: $this > 0
+
author	Costa Tsaousis <costa@tsaousis.gr>
	Sat, 20 Aug 2016 18:26:26 +0000 (21:26 +0300)
committer	Costa Tsaousis <costa@tsaousis.gr>
	Sat, 20 Aug 2016 18:26:26 +0000 (21:26 +0300)
conf.d/health.d/disks.conf		patch \| blob \| history
conf.d/health.d/entropy.conf		patch \| blob \| history
conf.d/health.d/net.conf		patch \| blob \| history
conf.d/health.d/qos.conf	[new file with mode: 0644]	patch \| blob