2 # make sure we can collect web log data
4 template: last_collected_secs
5 on: web_log.response_codes
7 calc: $now - $last_collected_t
10 warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
11 crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
12 delay: down 5m multiplier 1.5 max 1h
13 info: number of seconds since the last successful data collection
16 # -----------------------------------------------------------------------------
17 # high level response code alarms
20 on: web_log.response_codes
22 lookup: sum -1m unaligned of 2xx
23 calc: ($this == 0)?(1):($this)
26 info: the sum of successful HTTP requests over the last minute
28 template: 1m_redirects
29 on: web_log.response_codes
31 lookup: sum -1m unaligned of 3xx
32 calc: $this * 100 / ( $1m_2xx + $this )
35 warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 2 ))
36 crit: $this > (($status == $CRITICAL) ? ( 2 ) : ( 5 ))
37 delay: down 15m multiplier 1.5 max 1h
38 info: the ratio of HTTP redirects (3xx) vs the successful requests, \
42 template: 1m_bad_requests
43 on: web_log.response_codes
45 lookup: sum -1m unaligned of 4xx
46 calc: $this * 100 / ( $1m_2xx + $this )
49 warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 5 ))
50 crit: $this > (($status == $CRITICAL) ? ( 5 ) : ( 10 ))
51 delay: down 15m multiplier 1.5 max 1h
52 info: the ratio of HTTP bad requests (4xx) vs the successful requests, \
56 template: 1m_internal_errors
57 on: web_log.response_codes
59 lookup: sum -1m unaligned of 5xx
60 calc: $this * 100 / ( $1m_2xx + $this )
63 warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 2 ))
64 crit: $this > (($status == $CRITICAL) ? ( 2 ) : ( 5 ))
65 delay: down 15m multiplier 1.5 max 1h
66 info: the ratio of HTTP internal server errors (5xx) vs the successful \
67 requests, over the last minute
70 # -----------------------------------------------------------------------------
73 template: 10m_response_time
74 on: web_log.response_time
76 lookup: average -10m unaligned of avg
79 info: the average time to respond to HTTP requests, over the last 10 minutes
83 on: web_log.response_time
85 lookup: average -1m unaligned of avg
90 warn: $this > $green && $this > ($10m_response_time * 2)
91 crit: $this > $red && $this > ($10m_response_time * 4)
92 delay: down 15m multiplier 1.5 max 1h
93 info: the average time to respond to HTTP requests, over the last 1 minute
96 # -----------------------------------------------------------------------------
97 # web too many or too few requests
100 on: web_log.response_codes
102 lookup: average -5m at -5m unaligned of 2xx
105 info: average successful HTTP requests over the last 5 minutes
108 on: web_log.response_codes
110 lookup: average -5m unaligned of 2xx
113 info: average successful HTTP requests over the last 5 minutes
115 template: 5m_requests_ratio
116 on: web_log.response_codes
118 calc: ($5m_2xx_last > 0)?($5m_2xx_now * 100 / $5m_2xx_last):(100)
121 warn: ($5m_2xx_last > 30)?($this > 200 OR $this < 50):(0)
122 crit: ($5m_2xx_last > 30)?($this > 400 OR $this < 25):(0)
123 delay: down 15m multiplier 1.5 max 1h
124 options: no-clear-notification
125 info: the percentage of web requests over the last 5 minutes, \
126 compared with the previous 15 minutes