1 # -----------------------------------------------------------------------------
2 # make sure we collect values for each disk
5 template: disk_space_last_collected_secs
7 calc: $now - $last_collected_t
10 warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
11 crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
12 delay: down 5m multiplier 1.5 max 1h
13 info: number of seconds since the last successful data collection of the mount point
17 template: disk_last_collected_secs
19 calc: $now - $last_collected_t
22 warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
23 crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
24 delay: down 5m multiplier 1.5 max 1h
25 info: number of seconds since the last successful data collection of the block device
29 # -----------------------------------------------------------------------------
32 # checking the latest collected values
33 # raise an alarm if the disk is low on
34 # available disk space
36 template: disk_space_usage
38 calc: $used * 100 / ($avail + $used)
41 warn: $this > (($status >= $WARNING ) ? (70) : (80))
42 crit: $this > (($status == $CRITICAL) ? (85) : (95))
43 delay: up 1m down 15m multiplier 1.5 max 1h
44 info: current disk space usage
47 template: disk_inode_usage
49 calc: $used * 100 / ($avail + $used)
52 warn: $this > (($status >= $WARNING) ? (75) : (80))
53 crit: $this > (($status == $CRITICAL) ? (90) : (95))
54 delay: up 1m down 15m multiplier 1.5 max 1h
55 info: current disk inode usage
59 # -----------------------------------------------------------------------------
62 # calculate the rate the disk fills
63 # use as base, the available space change
64 # during the last hour
66 # this is just a calculation - it has no alarm
67 # we will use it in the next template to find
70 template: disk_fill_rate
72 lookup: min -10m at -50m unaligned of avail
73 calc: ($this - $avail) / (($now - $after) / 3600)
76 info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
79 # calculate the hours remaining
80 # if the disk continues to fill
83 template: out_of_disk_space_time
85 calc: $avail / $disk_fill_rate
88 warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
89 crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
90 delay: down 15m multiplier 1.2 max 1h
91 info: estimated time the disk will run out of space, if the system continues to add data with the rate of the last hour
95 # -----------------------------------------------------------------------------
98 # raise an alarm if the disk is congested
99 # by calculating the average disk utilization
100 # for the last 10 minutes
102 template: 10min_disk_utilization
104 lookup: average -10m unaligned
109 warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1))
110 crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1))
111 delay: down 15m multiplier 1.2 max 1h
112 info: the percentage of time the disk was busy, during the last 10 minutes
116 # raise an alarm if the disk backlog
117 # is above 1000ms (1s) per second
119 # (i.e. the disk cannot catch up)
121 template: 10min_disk_backlog
123 lookup: average -10m unaligned
128 warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1))
129 crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1))
130 delay: down 15m multiplier 1.2 max 1h
131 info: average of the kernel estimated disk backlog, for the last 10 minutes