info: number of seconds since the last successful data collection
to: webmaster
+
# -----------------------------------------------------------------------------
# high level response code alarms
+# the following alarms trigger only when there are enough data.
+# we assume there are enough data when:
+#
+# $1m_requests > 120
+#
+# i.e. when there are at least 120 requests during the last minute
+
template: 1m_requests
on: web_log.response_codes
families: *
calc: $1m_2xx * 100 / $1m_requests
units: %
every: 10s
- warn: ($1m_requests > 30) ? ($this < (($status >= $WARNING ) ? ( 98 ) : ( 95 )) ) : ( 0 )
- crit: ($1m_requests > 30) ? ($this < (($status == $CRITICAL) ? ( 95 ) : ( 90 )) ) : ( 0 )
+ warn: ($1m_requests > 120) ? ($this < (($status >= $WARNING ) ? ( 98 ) : ( 95 )) ) : ( 0 )
+ crit: ($1m_requests > 120) ? ($this < (($status == $CRITICAL) ? ( 95 ) : ( 90 )) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
- info: the ratio of HTTP redirects (3xx) vs the successful requests, \
- over the last minute
+ info: the ratio of successful HTTP responses (2xx) over the last minute
to: webmaster
template: 1m_redirects
on: web_log.detailed_response_codes
families: *
lookup: sum -1m unaligned of 301,303,307,308
- calc: $this * 100 / ( $1m_2xx + $this )
+ calc: $this * 100 / $1m_requests
units: %
every: 10s
- warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 2 )) ) : ( 0 )
- crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
+ warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 2 )) ) : ( 0 )
+ crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
- info: the ratio of HTTP redirects (301, 303, 307, 308) vs the successful requests, \
- over the last minute
+ info: the ratio of HTTP redirects (301, 303, 307, 308) over the last minute
to: webmaster
template: 1m_bad_requests
on: web_log.response_codes
families: *
lookup: sum -1m unaligned of 4xx
- calc: $this * 100 / ( $1m_2xx + $this )
+ calc: $this * 100 / $1m_requests
units: %
every: 10s
- warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 5 )) ) : ( 0 )
- crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 5 ) : ( 10 )) ) : ( 0 )
+ warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 5 )) ) : ( 0 )
+ crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 5 ) : ( 10 )) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
- info: the ratio of HTTP bad requests (4xx) vs the successful requests, \
- over the last minute
+ info: the ratio of HTTP bad requests (4xx) over the last minute
to: webmaster
template: 1m_internal_errors
on: web_log.response_codes
families: *
lookup: sum -1m unaligned of 5xx
- calc: $this * 100 / ( $1m_2xx + $this )
+ calc: $this * 100 / $1m_requests
units: %
every: 10s
- warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 2 )) ) : ( 0 )
- crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
+ warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 2 )) ) : ( 0 )
+ crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
- info: the ratio of HTTP internal server errors (5xx) vs the successful \
- requests, over the last minute
+ info: the ratio of HTTP internal server errors (5xx), over the last minute
to: webmaster
+
# -----------------------------------------------------------------------------
# web slow
+# the following alarms trigger only when there are enough data.
+# we assume there are enough data when:
+#
+# $1m_requests > 120
+#
+# i.e. when there are at least 120 requests during the last minute
+
template: 10m_response_time
on: web_log.response_time
families: *
every: 10s
green: 500
red: 1000
- warn: ($1m_requests > 30) ? ($this > $green && $this > ($10m_response_time * 2) ) : ( 0 )
- crit: ($1m_requests > 30) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 )
+ warn: ($1m_requests > 120) ? ($this > $green && $this > ($10m_response_time * 2) ) : ( 0 )
+ crit: ($1m_requests > 120) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
info: the average time to respond to HTTP requests, over the last 1 minute
to: webmaster
# -----------------------------------------------------------------------------
# web too many or too few requests
+# the following alarms trigger only when there are enough data.
+# we assume there are enough data when:
+#
+# $5m_2xx_last > 120
+#
+# i.e. when there were at least 120 requests during the 5 minutes starting
+# at -10m and ending at -5m
+
template: 5m_2xx_last
on: web_log.response_codes
families: *
calc: ($5m_2xx_last > 0)?($5m_2xx_now * 100 / $5m_2xx_last):(100)
units: %
every: 30s
- warn: ($1m_requests > 30) ? (($5m_2xx_last > 30) ? ($this > 200 OR $this < 50) : (0) ) : ( 0 )
- crit: ($1m_requests > 30) ? (($5m_2xx_last > 30) ? ($this > 400 OR $this < 25) : (0) ) : ( 0 )
+ warn: ($5m_2xx_last > 120) ? ($this > 200 OR $this < 50) : (0)
+ crit: ($5m_2xx_last > 120) ? ($this > 400 OR $this < 25) : (0)
delay: down 15m multiplier 1.5 max 1h
options: no-clear-notification
info: the percentage of web requests over the last 5 minutes, \
- compared with the previous 15 minutes
+ compared with the previous 5 minutes
to: webmaster
+ ' data-dimensions="2xx"'
+ ' data-chart-library="gauge"'
+ ' data-title="Successful"'
- + ' data-units="requests"'
+ + ' data-units="requests/s"'
+ ' data-gauge-adjust="width"'
+ ' data-width="12%"'
+ ' data-before="0"'
+ ' data-points="CHART_DURATION"'
+ ' data-common-max="' + id + '"'
+ ' data-colors="' + NETDATA.colors[0] + '"'
+ + ' data-decimal-digits="0"'
+ ' role="application"></div>';
},
+ ' data-dimensions="3xx"'
+ ' data-chart-library="gauge"'
+ ' data-title="Redirects"'
- + ' data-units="requests"'
+ + ' data-units="requests/s"'
+ ' data-gauge-adjust="width"'
+ ' data-width="12%"'
+ ' data-before="0"'
+ ' data-points="CHART_DURATION"'
+ ' data-common-max="' + id + '"'
+ ' data-colors="' + NETDATA.colors[2] + '"'
+ + ' data-decimal-digits="0"'
+ ' role="application"></div>';
},
+ ' data-dimensions="4xx"'
+ ' data-chart-library="gauge"'
+ ' data-title="Bad Requests"'
- + ' data-units="requests"'
+ + ' data-units="requests/s"'
+ ' data-gauge-adjust="width"'
+ ' data-width="12%"'
+ ' data-before="0"'
+ ' data-points="CHART_DURATION"'
+ ' data-common-max="' + id + '"'
+ ' data-colors="' + NETDATA.colors[3] + '"'
+ + ' data-decimal-digits="0"'
+ ' role="application"></div>';
},
+ ' data-dimensions="5xx"'
+ ' data-chart-library="gauge"'
+ ' data-title="Server Errors"'
- + ' data-units="requests"'
+ + ' data-units="requests/s"'
+ ' data-gauge-adjust="width"'
+ ' data-width="12%"'
+ ' data-before="0"'
+ ' data-points="CHART_DURATION"'
+ ' data-common-max="' + id + '"'
+ ' data-colors="' + NETDATA.colors[1] + '"'
+ + ' data-decimal-digits="0"'
+ ' role="application"></div>';
}
]
return '<div data-netdata="' + id + '"'
+ ' data-dimensions="avg"'
+ ' data-chart-library="gauge"'
- + ' data-title="Average Response"'
+ + ' data-title="Average Response Time"'
+ ' data-units="milliseconds"'
+ ' data-gauge-adjust="width"'
+ ' data-width="12%"'
+ ' data-after="-CHART_DURATION"'
+ ' data-points="CHART_DURATION"'
+ ' data-colors="' + NETDATA.colors[4] + '"'
+ + ' data-decimal-digits="2"'
+ ' role="application"></div>';
}
]