From: Costa Tsaousis (ktsaou) Date: Sat, 11 Feb 2017 19:31:12 +0000 (+0200) Subject: web_log fixes and improvements X-Git-Tag: ab-debian_0.20170213.01-0ab1~1^2~12^2~2 X-Git-Url: https://arthur.barton.de/gitweb/?a=commitdiff_plain;h=3b051b5a2e2a09d4c57f7a7b4354150a1acf9ce8;hp=8fde4bd8c8d59766249b46fa8e50ab4e41484a10;p=netdata.git web_log fixes and improvements --- diff --git a/conf.d/health.d/web_log.conf b/conf.d/health.d/web_log.conf index b6d2d537..c8d0c112 100644 --- a/conf.d/health.d/web_log.conf +++ b/conf.d/health.d/web_log.conf @@ -13,9 +13,17 @@ families: * info: number of seconds since the last successful data collection to: webmaster + # ----------------------------------------------------------------------------- # high level response code alarms +# the following alarms trigger only when there are enough data. +# we assume there are enough data when: +# +# $1m_requests > 120 +# +# i.e. when there are at least 120 requests during the last minute + template: 1m_requests on: web_log.response_codes families: * @@ -40,58 +48,62 @@ families: * calc: $1m_2xx * 100 / $1m_requests units: % every: 10s - warn: ($1m_requests > 30) ? ($this < (($status >= $WARNING ) ? ( 98 ) : ( 95 )) ) : ( 0 ) - crit: ($1m_requests > 30) ? ($this < (($status == $CRITICAL) ? ( 95 ) : ( 90 )) ) : ( 0 ) + warn: ($1m_requests > 120) ? ($this < (($status >= $WARNING ) ? ( 98 ) : ( 95 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this < (($status == $CRITICAL) ? ( 95 ) : ( 90 )) ) : ( 0 ) delay: down 15m multiplier 1.5 max 1h - info: the ratio of HTTP redirects (3xx) vs the successful requests, \ - over the last minute + info: the ratio of successful HTTP responses (2xx) over the last minute to: webmaster template: 1m_redirects on: web_log.detailed_response_codes families: * lookup: sum -1m unaligned of 301,303,307,308 - calc: $this * 100 / ( $1m_2xx + $this ) + calc: $this * 100 / $1m_requests units: % every: 10s - warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 2 )) ) : ( 0 ) - crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 ) + warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 2 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 ) delay: down 15m multiplier 1.5 max 1h - info: the ratio of HTTP redirects (301, 303, 307, 308) vs the successful requests, \ - over the last minute + info: the ratio of HTTP redirects (301, 303, 307, 308) over the last minute to: webmaster template: 1m_bad_requests on: web_log.response_codes families: * lookup: sum -1m unaligned of 4xx - calc: $this * 100 / ( $1m_2xx + $this ) + calc: $this * 100 / $1m_requests units: % every: 10s - warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 5 )) ) : ( 0 ) - crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 5 ) : ( 10 )) ) : ( 0 ) + warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 5 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 5 ) : ( 10 )) ) : ( 0 ) delay: down 15m multiplier 1.5 max 1h - info: the ratio of HTTP bad requests (4xx) vs the successful requests, \ - over the last minute + info: the ratio of HTTP bad requests (4xx) over the last minute to: webmaster template: 1m_internal_errors on: web_log.response_codes families: * lookup: sum -1m unaligned of 5xx - calc: $this * 100 / ( $1m_2xx + $this ) + calc: $this * 100 / $1m_requests units: % every: 10s - warn: ($1m_requests > 30) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 2 )) ) : ( 0 ) - crit: ($1m_requests > 30) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 ) + warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 2 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 ) delay: down 15m multiplier 1.5 max 1h - info: the ratio of HTTP internal server errors (5xx) vs the successful \ - requests, over the last minute + info: the ratio of HTTP internal server errors (5xx), over the last minute to: webmaster + # ----------------------------------------------------------------------------- # web slow +# the following alarms trigger only when there are enough data. +# we assume there are enough data when: +# +# $1m_requests > 120 +# +# i.e. when there are at least 120 requests during the last minute + template: 10m_response_time on: web_log.response_time families: * @@ -108,8 +120,8 @@ families: * every: 10s green: 500 red: 1000 - warn: ($1m_requests > 30) ? ($this > $green && $this > ($10m_response_time * 2) ) : ( 0 ) - crit: ($1m_requests > 30) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 ) + warn: ($1m_requests > 120) ? ($this > $green && $this > ($10m_response_time * 2) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 ) delay: down 15m multiplier 1.5 max 1h info: the average time to respond to HTTP requests, over the last 1 minute to: webmaster @@ -117,6 +129,14 @@ families: * # ----------------------------------------------------------------------------- # web too many or too few requests +# the following alarms trigger only when there are enough data. +# we assume there are enough data when: +# +# $5m_2xx_last > 120 +# +# i.e. when there were at least 120 requests during the 5 minutes starting +# at -10m and ending at -5m + template: 5m_2xx_last on: web_log.response_codes families: * @@ -139,11 +159,11 @@ families: * calc: ($5m_2xx_last > 0)?($5m_2xx_now * 100 / $5m_2xx_last):(100) units: % every: 30s - warn: ($1m_requests > 30) ? (($5m_2xx_last > 30) ? ($this > 200 OR $this < 50) : (0) ) : ( 0 ) - crit: ($1m_requests > 30) ? (($5m_2xx_last > 30) ? ($this > 400 OR $this < 25) : (0) ) : ( 0 ) + warn: ($5m_2xx_last > 120) ? ($this > 200 OR $this < 50) : (0) + crit: ($5m_2xx_last > 120) ? ($this > 400 OR $this < 25) : (0) delay: down 15m multiplier 1.5 max 1h options: no-clear-notification info: the percentage of web requests over the last 5 minutes, \ - compared with the previous 15 minutes + compared with the previous 5 minutes to: webmaster diff --git a/web/dashboard_info.js b/web/dashboard_info.js index 7da621ef..4eca27e0 100644 --- a/web/dashboard_info.js +++ b/web/dashboard_info.js @@ -857,7 +857,7 @@ netdataDashboard.context = { + ' data-dimensions="2xx"' + ' data-chart-library="gauge"' + ' data-title="Successful"' - + ' data-units="requests"' + + ' data-units="requests/s"' + ' data-gauge-adjust="width"' + ' data-width="12%"' + ' data-before="0"' @@ -865,6 +865,7 @@ netdataDashboard.context = { + ' data-points="CHART_DURATION"' + ' data-common-max="' + id + '"' + ' data-colors="' + NETDATA.colors[0] + '"' + + ' data-decimal-digits="0"' + ' role="application">'; }, @@ -874,7 +875,7 @@ netdataDashboard.context = { + ' data-dimensions="3xx"' + ' data-chart-library="gauge"' + ' data-title="Redirects"' - + ' data-units="requests"' + + ' data-units="requests/s"' + ' data-gauge-adjust="width"' + ' data-width="12%"' + ' data-before="0"' @@ -882,6 +883,7 @@ netdataDashboard.context = { + ' data-points="CHART_DURATION"' + ' data-common-max="' + id + '"' + ' data-colors="' + NETDATA.colors[2] + '"' + + ' data-decimal-digits="0"' + ' role="application">'; }, @@ -891,7 +893,7 @@ netdataDashboard.context = { + ' data-dimensions="4xx"' + ' data-chart-library="gauge"' + ' data-title="Bad Requests"' - + ' data-units="requests"' + + ' data-units="requests/s"' + ' data-gauge-adjust="width"' + ' data-width="12%"' + ' data-before="0"' @@ -899,6 +901,7 @@ netdataDashboard.context = { + ' data-points="CHART_DURATION"' + ' data-common-max="' + id + '"' + ' data-colors="' + NETDATA.colors[3] + '"' + + ' data-decimal-digits="0"' + ' role="application">'; }, @@ -908,7 +911,7 @@ netdataDashboard.context = { + ' data-dimensions="5xx"' + ' data-chart-library="gauge"' + ' data-title="Server Errors"' - + ' data-units="requests"' + + ' data-units="requests/s"' + ' data-gauge-adjust="width"' + ' data-width="12%"' + ' data-before="0"' @@ -916,6 +919,7 @@ netdataDashboard.context = { + ' data-points="CHART_DURATION"' + ' data-common-max="' + id + '"' + ' data-colors="' + NETDATA.colors[1] + '"' + + ' data-decimal-digits="0"' + ' role="application">'; } ] @@ -928,7 +932,7 @@ netdataDashboard.context = { return '
'; } ] diff --git a/web/index.html b/web/index.html index b21998bd..e8e821f4 100644 --- a/web/index.html +++ b/web/index.html @@ -2800,7 +2800,7 @@ }); NETDATA.requiredJs.push({ - url: NETDATA.serverDefault + 'dashboard_info.js?v20170211-19', + url: NETDATA.serverDefault + 'dashboard_info.js?v20170211-20', async: false, isAlreadyLoaded: function() { return false; } });