# ----------------------------------------------------------------------------- # low disk space # checking the latest collected values # raise an alarm if the disk is low on # available disk space template: disk_full_percentage on: disk.space calc: $used * 100 / ($avail + $used) every: 1m warn: $this > 80 crit: $used > 95 # ----------------------------------------------------------------------------- # disk fill rate # calculate the rate the disk fills # use as base, the available space change # during the last 10 minutes # this is just a calculation - it has no alarm # we will use it in the next template to find # the hours remaining template: disk_fill_rate on: disk.space lookup: max -1s at -10m unaligned of avail calc: ($this - $avail) / (10 * 60) every: 30s # calculate the hours remaininig # if the disk continues to fill # in this rate template: disk_will_fill_in_hours on: disk.space calc: $avail / $disk_fill_rate / 3600 every: 10s warn: $this > 0 and $this < 48 crit: $this > 0 and $this < 24 # ----------------------------------------------------------------------------- # disk congestion # raise an alarm if the disk is congested # by calculating the average disk utilization # for the last minute template: disk_congested on: disk.util lookup: average -1m every 1m unaligned green: 70 red: 95 warn: $this > $green crit: $this > $red # raise an alarm if the disk backlog # is above 1000ms (1s) per second # for 1 minute # (i.e. the disk cannot catch up) template: disk_not_catching_up on: disk.backlog lookup: average -1m every 1m unaligned green: 1000 red: 2000 warn: $this > $green crit: $this > $red