]> arthur.barton.de Git - netdata.git/commitdiff
Merge pull request #967 from ktsaou/master
authorCosta Tsaousis <costa@tsaousis.gr>
Fri, 16 Sep 2016 19:04:18 +0000 (22:04 +0300)
committerGitHub <noreply@github.com>
Fri, 16 Sep 2016 19:04:18 +0000 (22:04 +0300)
alarm notifications now support different severity per recipient

conf.d/health_alarm_notify.conf
configs.signatures
plugins.d/alarm-notify.sh
src/health.c

index c7dd068f2bdeb008fd0dce44e59fa6a435a4341c..9d3cc330c5d038b1d7d17a7ec248fcb4b70b09d4 100644 (file)
 #
 # This file is a BASH script itself.
 
+
+###############################################################################
+# proxy configuration
+
 # if you need to send curl based notifications (pushover, slack) via a proxy
 # set these:
 #export http_proxy="http://10.0.0.1:3128/"
@@ -33,6 +37,24 @@ sendmail=""
 curl=""
 
 
+###############################################################################
+# RECIPIENT ATTRIBUTES
+
+# When you define recipients (all types):
+#
+#  - emails addresses
+#  - pushover user tokens
+#  - slack channels
+#
+# You can append |critical to limit the notifications to be sent.
+#
+# In these examples, the first recipient receives all the alarms
+# while the second one receives only the critical ones:
+#  email   : "user1@example.com user2@example.com|critical"
+#  pushover: "2987343...9437837 8756278...2362736|critical"
+#  slack   : "alarms disasters|critical"
+
+
 ###############################################################################
 # sending emails
 
@@ -44,6 +66,7 @@ SEND_EMAIL="YES"
 
 # if a role recipient is not configured, an email will be send to:
 DEFAULT_RECIPIENT_EMAIL="root"
+# to recieve only critical alarms, set it to "root|critical"
 
 
 ###############################################################################
index 9d57699cad73d2c3302d4a92ee1c5c81e4038dec..9c6acee2be694210e93856e02c91506ebf25768d 100644 (file)
@@ -152,6 +152,7 @@ declare -A configs_signatures=(
   ['a55133f1b0be0a4255057849dd451b09']='health_alarm_notify.conf'
   ['a89c516a1144435a88decf25509318ac']='health_alarm_notify.conf'
   ['ce2e8768964a936f58c4c2144aee8a01']='health_alarm_notify.conf'
+  ['e3023092e3b2bbb5351e0fe6682f4fe9']='health_alarm_notify.conf'
   ['f8dade4484f1b6a48655388502df7d5a']='health_alarm_notify.conf'
   ['ff1b3d8ae8b2149c711d8da9b7a9c4bd']='health_alarm_notify.conf'
   ['707a63f53f4b32e01d134ae90ba94aad']='health_email_recipients.conf'
index 0b25a6a49411b23e98bf9187825eb13ad38b1a15..9323bde7f00e030fc2bd500615d590abf2a6457c 100755 (executable)
@@ -94,6 +94,46 @@ if [ -f "${NETDATA_CONFIG_DIR}/health_alarm_notify.conf" ]
     source "${NETDATA_CONFIG_DIR}/health_alarm_notify.conf"
 fi
 
+# -----------------------------------------------------------------------------
+# filter recipients based on the criticality of each
+
+filter_recipient_by_criticality() {
+    local method="${1}" x="${2}" r s
+    shift
+
+    r="${x/|*/}"
+    s="${x/*|/}"
+
+    # no severity filtering for this person
+    [ "${r}" = "${s}" ] && return 0
+
+    # the severity is invalid
+    s="${s^^}"
+    [ "${s}" != "CRITICAL" ] && return 0
+
+    # the new or the old status matches the severity
+    if [ "${s}" = "${status}" -o "${s}" = "${old_status}" ]
+        then
+        [ ! -d "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}" ] && \
+            mkdir -p "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}"
+
+        # we need to keep track of the notifications we sent
+        # so that the same user will receive the recovery
+        # even if old_status does not match the required severity
+        touch "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}/${alarm_id}"
+        return 0
+    fi
+
+    # it is a cleared alarm we have sent notification for
+    if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a -f "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}/${alarm_id}" ]
+        then
+        rm "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}/${alarm_id}"
+        return 0
+    fi
+
+    return 1
+}
+
 # -----------------------------------------------------------------------------
 # find the recipient's addresses per method
 
@@ -108,17 +148,26 @@ do
     # email
     a="${role_recipients_email[${recipient}]}"
     [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_EMAIL}"
-    for r in ${a//,/ }; do arr_email[${r}]="1"; done
+    for r in ${a//,/ }
+    do
+        filter_recipient_by_criticality email "${r}" && arr_email[${r/|*/}]="1"
+    done
 
     # pushover
     a="${role_recipients_pushover[${recipient}]}"
     [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PUSHOVER}"
-    for r in ${a//,/ }; do arr_pushover[${r}]="1"; done
+    for r in ${a//,/ }
+    do
+        filter_recipient_by_criticality pushover "${r}" && arr_pushover[${r/|*/}]="1"
+    done
 
     # slack
     a="${role_recipients_slack[${recipient}]}"
     [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_SLACK}"
-    for r in ${a//,/ }; do arr_slack[${r}]="1"; done
+    for r in ${a//,/ }
+    do
+        filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1"
+    done
 done
 
 # build the list of slack recipients (channels)
@@ -136,7 +185,7 @@ do
     [ ! -z "${to_email}" ] && to_email="${to_email}, "
     to_email="${to_email}${x}"
 done
-[ -z "${to_email}" ] && to_email="root"
+[ -z "${to_email}" ] && SEND_EMAIL="NO"
 
 
 # -----------------------------------------------------------------------------
@@ -339,9 +388,9 @@ send_pushover() {
 # slack sender
 
 send_slack() {
-    local webhook="${1}" channels="${2}" when="${3}" username="${4}" image="${5}" author="${6}" httpcode sent=0 channel color
+    local webhook="${1}" channels="${2}" when="${3}" username="${4}" image="${5}" httpcode sent=0 channel color
 
-    if [ "${SEND_SLACK}" = "YES" -a ! -z "${webhook}" -a ! -z "${channels}" -a ! -z "${username}" -a ! -z "${image}" -a ! -z "${author}" ]
+    if [ "${SEND_SLACK}" = "YES" -a ! -z "${webhook}" -a ! -z "${channels}" -a ! -z "${username}" -a ! -z "${image}" ]
         then
 
         case "${status}" in
@@ -354,7 +403,7 @@ send_slack() {
         for channel in ${channels}
         do
             httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null -X POST --data-urlencode \
-                "payload={\"channel\": \"#${channel}\", \"username\": \"${username}\", \"text\": \"${host} ${status_message} - ${author} ${raised_for} - click <${goto_url}|here> to view the netdata dashboard.\", \"icon_url\": \"${image}\", \"attachments\": [{\"fallback\": \"${alarm} - ${info}\", \"color\": \"${color}\", \"title\": \"${alarm}\", \"title_link\": \"${goto_url}\", \"text\": \"${info}\", \"footer\": \"netdata\", \"footer_icon\": \"${images_base_url}/images/seo-performance-128.png\", \"ts\": ${when}}]}" \
+                "payload={\"channel\": \"#${channel}\", \"username\": \"${username}\", \"text\": \"${host} ${status_message} - ${chart} (${family}) ${alarm} - click <${goto_url}|here> to view the netdata dashboard.\", \"icon_url\": \"${image}\", \"attachments\": [{\"fallback\": \"${alarm} - ${info}\", \"color\": \"${color}\", \"title\": \"${alarm}\", \"title_link\": \"${goto_url}\", \"text\": \"${chart} (${family}): ${info}\", \"footer\": \"netdata\", \"footer_icon\": \"${images_base_url}/images/seo-performance-128.png\", \"ts\": ${when}}]}" \
                 "${webhook}")
 
             if [ "${httpcode}" == "200" ]
@@ -389,7 +438,7 @@ severity="${status}"
 # the time the alarm was raised
 duration4human ${duration} >/dev/null; duration_txt="${REPLY}"
 duration4human ${non_clear_duration} >/dev/null; non_clear_duration_txt="${REPLY}"
-raised_for="(was ${old_status,,} for ${duration_txt}"
+raised_for="(was ${old_status,,} for ${duration_txt})"
 
 # the key status message
 status_message="status unknown"
@@ -424,7 +473,7 @@ case "${status}" in
 
                # don't show the value when the status is CLEAR
                # for certain alarms, this value might not have any meaning
-               alarm="${name//_/ }"
+               alarm="${name//_/ } ${raised_for}"
                ;;
 esac
 
@@ -469,7 +518,7 @@ raised_for_html=
 # slack aggregates posts from the same username
 # so we use "${host} ${status}" as the bot username, to make them diff
 
-send_slack "${SLACK_WEBHOOK_URL}" "${to_slack}" "${when}" "${host} ${status}" "${image}" "${chart} (${family})"
+send_slack "${SLACK_WEBHOOK_URL}" "${to_slack}" "${when}" "${host} ${status}" "${image}"
 SENT_SLACK=$?
 
 # -----------------------------------------------------------------------------
index 49dc392041eac47f3c8e1ee87b36eeb4df6a02b0..3eacd022c73b9e7b53efc5d11368fff04e001025 100644 (file)
@@ -2032,7 +2032,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
     // find the previous notification for the same alarm
     ALARM_ENTRY *t;
     for(t = ae->next; t ;t = t->next) {
-        if(t->alarm_id == ae->alarm_id && t->notifications & HEALTH_ENTRY_NOTIFICATIONS_PROCESSED)
+        if(t->alarm_id == ae->alarm_id && t->notifications & HEALTH_ENTRY_NOTIFICATIONS_EXEC_RUN)
             break;
     }
 
@@ -2042,7 +2042,8 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
         return;
     }
 
-    if(ae->old_status == RRDCALC_STATUS_UNINITIALIZED && ae->new_status == RRDCALC_STATUS_CLEAR) {
+    if((ae->old_status == RRDCALC_STATUS_UNDEFINED && ae->new_status == RRDCALC_STATUS_UNINITIALIZED)
+        || (ae->old_status == RRDCALC_STATUS_UNINITIALIZED && ae->new_status == RRDCALC_STATUS_CLEAR)) {
         info("Health not sending notification for first initialization of alarm '%s.%s' status %s", ae->chart, ae->name, rrdcalc_status2string(ae->new_status));
         return;
     }