2 # make sure we can collect web log data
4 template: last_collected_secs
5 on: web_log.response_codes
6 calc: $now - $last_collected_t
9 warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
10 crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
11 delay: down 5m multiplier 1.5 max 1h
12 info: number of seconds since the last successful data collection
15 # -----------------------------------------------------------------------------
16 # high level response code alarms
19 on: web_log.response_codes
20 lookup: sum -1m unaligned of 2xx
21 calc: ($this == 0)?(1):($this)
24 info: the sum of successful HTTP requests over the last minute
26 template: 1m_redirects
27 on: web_log.response_codes
28 lookup: sum -1m unaligned of 3xx
29 calc: $this * 100 / ( $1m_2xx + $this )
32 warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 2 ))
33 crit: $this > (($status == $CRITICAL) ? ( 2 ) : ( 5 ))
34 delay: down 15m multiplier 1.5 max 1h
35 info: the ratio of HTTP redirects (3xx) vs the successful requests, \
39 template: 1m_bad_requests
40 on: web_log.response_codes
41 lookup: sum -1m unaligned of 4xx
42 calc: $this * 100 / ( $1m_2xx + $this )
45 warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 5 ))
46 crit: $this > (($status == $CRITICAL) ? ( 5 ) : ( 10 ))
47 delay: down 15m multiplier 1.5 max 1h
48 info: the ratio of HTTP bad requests (4xx) vs the successful requests, \
52 template: 1m_internal_errors
53 on: web_log.response_codes
54 lookup: sum -1m unaligned of 5xx
55 calc: $this * 100 / ( $1m_2xx + $this )
58 warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 2 ))
59 crit: $this > (($status == $CRITICAL) ? ( 2 ) : ( 5 ))
60 delay: down 15m multiplier 1.5 max 1h
61 info: the ratio of HTTP internal server errors (5xx) vs the successful \
62 requests, over the last minute
65 # -----------------------------------------------------------------------------
68 template: 10m_response_time
69 on: web_log.response_time
70 lookup: average -10m unaligned of avg
73 info: the average time to respond to HTTP requests, over the last 10 minutes
77 on: web_log.response_time
78 lookup: sum -1m unaligned of avg
83 warn: $this > $green && $this > ($10m_response_time * 2)
84 crit: $this > $red && $this > ($10m_response_time * 4)
85 delay: down 15m multiplier 1.5 max 1h
86 info: the average time to respond to HTTP requests, over the last 1 minute
89 # -----------------------------------------------------------------------------
90 # web too many or too few requests
93 on: web_log.response_codes
94 lookup: average -5m at -5m unaligned of 2xx
97 info: average successful HTTP requests over the last 5 minutes
100 on: web_log.response_codes
101 lookup: average -5m unaligned of 2xx
104 info: average successful HTTP requests over the last 5 minutes
106 template: 5m_requests_ratio
107 on: web_log.response_codes
108 calc: ($5m_2xx_last > 0)?($5m_2xx_now * 100 / $5m_2xx_last):(100)
111 warn: ($5m_2xx_last > 30)?($this > 200 OR $this < 50):(0)
112 crit: ($5m_2xx_last > 30)?($this > 400 OR $this < 25):(0)
113 delay: down 15m multiplier 1.5 max 1h
114 options: no-clear-notification
115 info: the percentage of web requests over the last 5 minutes, \
116 compared with the previous 15 minutes