1 # -----------------------------------------------------------------------------
2 # make sure we collect values for each disk
5 template: disk_space_last_collected_secs
7 calc: $now - $last_collected_t
10 warn: $this > ( 5 * $update_every)
11 crit: $this > (60 * $update_every)
12 delay: up 0 down 15m multiplier 1.5 max 1h
13 info: number of seconds since the last successful data collection of the mount point
17 template: disk_last_collected_secs
19 calc: $now - $last_collected_t
22 warn: $this > ( 5 * $update_every)
23 crit: $this > (60 * $update_every)
24 delay: up 0 down 15m multiplier 1.5 max 1h
25 info: number of seconds since the last successful data collection of the block device
29 # -----------------------------------------------------------------------------
32 # checking the latest collected values
33 # raise an alarm if the disk is low on
34 # available disk space
36 template: disk_space_usage
38 calc: $used * 100 / ($avail + $used)
43 delay: up 1m down 15m multiplier 1.5 max 1h
44 info: current disk space usage
47 template: disk_inode_usage
49 calc: $used * 100 / ($avail + $used)
54 delay: up 1m down 15m multiplier 1.5 max 1h
55 info: current disk inode usage
59 # -----------------------------------------------------------------------------
62 # calculate the rate the disk fills
63 # use as base, the available space change
64 # during the last hour
66 # this is just a calculation - it has no alarm
67 # we will use it in the next template to find
70 template: disk_fill_rate
72 lookup: min -10m at -50m unaligned of avail
73 calc: ($this - $avail) / (($now - $after) / 3600)
76 info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
79 # calculate the hours remaining
80 # if the disk continues to fill
83 template: out_of_disk_space_time
85 calc: $avail / $disk_fill_rate
88 warn: $this > 0 and $this < 8
89 crit: $this > 0 and $this < 2
90 delay: up 0 down 15m multiplier 1.5 max 1h
91 info: estimated time the disk will run out of space, if the system continues to add data with the rate of the last hour
95 # -----------------------------------------------------------------------------
98 # raise an alarm if the disk is congested
99 # by calculating the average disk utilization
100 # for the last 10 minutes
102 template: 10min_disk_utilization
104 lookup: average -10m unaligned
111 delay: up 0 down 30m multiplier 1.5 max 1h
112 info: the percentage of time the disk was busy, during the last 10 minutes
116 # raise an alarm if the disk backlog
117 # is above 1000ms (1s) per second
119 # (i.e. the disk cannot catch up)
121 template: 10min_disk_backlog
123 lookup: average -10m unaligned
130 delay: up 1m down 30m multiplier 1.5 max 1h
131 info: average of the kernel estimated disk backlog, for the last 10 minutes