# ----------------------------------------------------------------------------- # low disk space # checking the latest collected values # raise an alarm if the disk is low on # available disk space template: low_disk_space on: disk.space every: 1m warn: $avail * 100 / ($avail + $used) > 80 crit: $avail * 100 / ($avail + $used) > 90 # ----------------------------------------------------------------------------- # disk fill rate # calculate the rate the disk fills # use as base, the available space change # during the last minute template: disk_fill_rate_1m on: disk.space lookup: max -1s at -1m unaligned of avail calc: ($this - $avail) / (1 * 60) every: 1m warn: $this * 2 * 86400 > $avail crit: $this * 1 * 86400 > $avail # calculate the rate the disk fills # use as base, the available space change # during the last hour template: disk_fill_rate_59m on: disk.space lookup: max -1s at -59m unaligned of avail every: 1m calc: ($this - $avail) / (59 * 60) warn: $this * 2 * 86400 > $avail crit: $this * 1 * 86400 > $avail # ----------------------------------------------------------------------------- # disk congestion # raise an alarm if the disk is congested # by calculating the average disk utilization # for the last minute template: disk_congested on: disk.util lookup: average -1m every 1m unaligned green: 70 red: 95 warn: $this > $green crit: $this > $red # raise an alarm if the disk backlog # is above 1000ms (1s) per second # for 1 minute # (i.e. the disk cannot catch up) template: disk_not_catching_up on: disk.backlog lookup: average -1m every 1m unaligned green: 500 red: 1000 warn: $this > $green crit: $this > $red