# raise an alarm if the disk is low on
# available disk space
-template: disk_full_percentage
+template: disk_full_percent
on: disk.space
calc: $used * 100 / ($avail + $used)
every: 1m
# if the disk continues to fill
# in this rate
-template: disk_will_fill_in_hours
+template: disk_full_after_hours
on: disk.space
calc: $avail / $disk_fill_rate / 3600
every: 10s
# raise an alarm if the disk is congested
# by calculating the average disk utilization
-# for the last minute
+# for the last 2 minutes
-template: disk_congested
+template: 2min_disk_utilization
on: disk.util
- lookup: average -1m every 1m unaligned
- green: 70
+ lookup: average -2m every 1m unaligned
+ green: 80
red: 95
warn: $this > $green
crit: $this > $red
+
# raise an alarm if the disk backlog
# is above 1000ms (1s) per second
-# for 1 minute
+# for 2 minutes
# (i.e. the disk cannot catch up)
-template: disk_not_catching_up
+template: 2min_disk_backlog
on: disk.backlog
- lookup: average -1m every 1m unaligned
+ lookup: average -2m every 1m unaligned
green: 1000
red: 2000
warn: $this > $green
crit: $this > $red
-
# the alarm is checked every 1 minute
# and examines the last 2 minutes of data
- alarm: low_entropy_2m
+ alarm: min_2min_entropy
on: system.entropy
lookup: min -2m unaligned
every: 1m
# the alarm is checked every 10 seconds
# and examines the last minute of data
-template: packet_drops_1m
+template: 10min_packet_drops
on: net.drops
- lookup: sum -1m unaligned absolute
+ lookup: sum -10m unaligned absolute
every: 10s
crit: $this > 0
# the alarm is checked every 10 seconds
# and examines the last minute of data
-template: qos_packet_drops_1m
+template: 10min_qos_packet_drops
on: tc.qos_dropped
- lookup: sum -1m unaligned absolute
- every: 10s
- crit: $this > 0
+ lookup: sum -10m unaligned absolute
+ every: 30s
+ warn: $this > 0