# -----------------------------------------------------------------------------
# high level response code alarms
+template: 1m_requests
+ on: web_log.response_codes
+families: *
+ lookup: sum -1m unaligned
+ calc: ($this == 0)?(1):($this)
+ units: requests
+ every: 10s
+ info: the sum of all HTTP requests over the last minute
+
template: 1m_2xx
on: web_log.response_codes
families: *
calc: ($this == 0)?(1):($this)
units: requests
every: 10s
+ warn: ($1m_requests > 30) ? ($this > ($status >= $WARNING ) ? ( 1 ) : ( 2 )) : ( 0 )
+ crit: ($1m_requests > 30) ? ($this > ($status == $CRITICAL) ? ( 2 ) : ( 5 )) : ( 0 )
+ delay: down 15m multiplier 1.5 max 1h
info: the sum of successful HTTP requests over the last minute
+ to: webmaster
template: 1m_redirects
on: web_log.response_codes
calc: $this * 100 / ( $1m_2xx + $this )
units: %
every: 10s
- warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 2 ))
- crit: $this > (($status == $CRITICAL) ? ( 2 ) : ( 5 ))
+ warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 2 )) ) : ( 0 )
+ crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
info: the ratio of HTTP redirects (3xx) vs the successful requests, \
over the last minute
calc: $this * 100 / ( $1m_2xx + $this )
units: %
every: 10s
- warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 5 ))
- crit: $this > (($status == $CRITICAL) ? ( 5 ) : ( 10 ))
+ warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 5 )) ) : ( 0 )
+ crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 5 ) : ( 10 )) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
info: the ratio of HTTP bad requests (4xx) vs the successful requests, \
over the last minute
calc: $this * 100 / ( $1m_2xx + $this )
units: %
every: 10s
- warn: $this > (($status >= $WARNING) ? ( 1 ) : ( 2 ))
- crit: $this > (($status == $CRITICAL) ? ( 2 ) : ( 5 ))
+ warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 2 )) ) : ( 0 )
+ crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
info: the ratio of HTTP internal server errors (5xx) vs the successful \
requests, over the last minute
every: 30s
info: the average time to respond to HTTP requests, over the last 10 minutes
-
template: web_slow
on: web_log.response_time
families: *
every: 10s
green: 500
red: 1000
- warn: $this > $green && $this > ($10m_response_time * 2)
- crit: $this > $red && $this > ($10m_response_time * 4)
+ warn: ($1m_requests > 30) ? ($this > $green && $this > ($10m_response_time * 2) ) : ( 0 )
+ crit: ($1m_requests > 30) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
info: the average time to respond to HTTP requests, over the last 1 minute
to: webmaster
calc: ($5m_2xx_last > 0)?($5m_2xx_now * 100 / $5m_2xx_last):(100)
units: %
every: 30s
- warn: ($5m_2xx_last > 30)?($this > 200 OR $this < 50):(0)
- crit: ($5m_2xx_last > 30)?($this > 400 OR $this < 25):(0)
+ warn: ($1m_requests > 30) ? (($5m_2xx_last > 30) ? ($this > 200 OR $this < 50) : (0) ) : ( 0 )
+ crit: ($1m_requests > 30) ? (($5m_2xx_last > 30) ? ($this > 400 OR $this < 25) : (0) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
options: no-clear-notification
info: the percentage of web requests over the last 5 minutes, \
job_name = find_job_name(self.override_name, self.name)
self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
- ' "Response Codes" requests/s responses' \
+ ' "Detailed Response Codes" requests/s responses' \
' web_log.detailed_response_codes stacked 1 %s\n' % (job_name, self.update_every)
self.http_method_chart = 'CHART %s.http_method' \
- ' "" "Requests Per HTTP Method" requests/s requests' \
+ ' "" "Requests Per HTTP Method" requests/s "http methods"' \
' web_log.http_method stacked 2 %s\n' % (job_name, self.update_every)
if regex_name == 'access_apache_ext':