# ----------------------------------------------------------------------------- # low disk space # checking the latest collected values # raise an alarm if the disk is low on # available disk space template: disk_full_percent on: disk.space calc: $used * 100 / ($avail + $used) every: 1m warn: $this > 80 crit: $this > 95 # ----------------------------------------------------------------------------- # disk fill rate # calculate the rate the disk fills # use as base, the available space change # during the last 10 minutes # this is just a calculation - it has no alarm # we will use it in the next template to find # the hours remaining template: disk_fill_rate on: disk.space lookup: max -1s at -10m unaligned of avail calc: ($this - $avail) / (10 * 60) every: 30s # calculate the hours remaininig # if the disk continues to fill # in this rate template: disk_full_after_hours on: disk.space calc: $avail / $disk_fill_rate / 3600 every: 10s warn: $this > 0 and $this < 48 crit: $this > 0 and $this < 24 # ----------------------------------------------------------------------------- # disk congestion # raise an alarm if the disk is congested # by calculating the average disk utilization # for the last 2 minutes template: 2min_disk_utilization on: disk.util lookup: average -2m every 1m unaligned green: 80 red: 95 warn: $this > $green crit: $this > $red # raise an alarm if the disk backlog # is above 1000ms (1s) per second # for 2 minutes # (i.e. the disk cannot catch up) template: 2min_disk_backlog on: disk.backlog lookup: average -2m every 1m unaligned green: 1000 red: 2000 warn: $this > $green crit: $this > $red