]> arthur.barton.de Git - netdata.git/blobdiff - plugins.d/alarm-notify.sh
Merge pull request #1384 from ktsaou/master
[netdata.git] / plugins.d / alarm-notify.sh
index dc6fb06fd55cfb21f3927f22ee4d3cf6ecfa8b99..9f5ca1b09d0aae71ba2fc1fdfa406c5d06421aa4 100755 (executable)
@@ -19,7 +19,9 @@
 #  - pushover.net notifications
 #  - pushbullet.com push notifications by Tiago Peralta @tperalta82 PR #1070
 #  - telegram.org notifications by @hashworks PR #1002
-#
+#  - twilio.com notifications by Levi Blaney @shadycuz PR #1211
+#  - kafka notifications
+#  - pagerduty.com notifications by Jim Cooley @jimcooley PR #1373
 
 # -----------------------------------------------------------------------------
 # testing notifications
@@ -40,14 +42,14 @@ then
     for x in "CRITICAL" "WARNING" "CLEAR"
     do
         echo >&2
-        echo >&2 ">> SENDING TEST ${x} ALARM TO ROLE: ${recipient} <<"
+        echo >&2 "# SENDING TEST ${x} ALARM TO ROLE: ${recipient}"
 
-        "${0}" "${recipient}" "$(hostname)" "1" "1" "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" '100' '90' "${0}" "60" "60" "units" "this is a test alarm to verify notifications work"
+        "${0}" "${recipient}" "$(hostname)" 1 1 "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" 100 90 "${0}" 1 $((0 + id)) "units" "this is a test alarm to verify notifications work"
         if [ $? -ne 0 ]
         then
-            echo >&2 ">> FAILED <<"
+            echo >&2 "# FAILED"
         else
-            echo >&2 ">> OK <<"
+            echo >&2 "# OK"
         fi
 
         last="${x}"
@@ -95,7 +97,7 @@ fatal() {
 
 debug=0
 debug() {
-    [ $debug -eq 1 ] && log DEBUG "${@}"
+    [ ${debug} -eq 1 ] && log DEBUG "${@}"
 }
 
 # -----------------------------------------------------------------------------
@@ -169,9 +171,12 @@ sendmail=
 # enable / disable features
 SEND_SLACK="YES"
 SEND_PUSHOVER="YES"
+SEND_TWILIO="YES"
 SEND_TELEGRAM="YES"
 SEND_EMAIL="YES"
 SEND_PUSHBULLET="YES"
+SEND_KAFKA="YES"
+SEND_PD="YES"
 
 # slack configs
 SLACK_WEBHOOK_URL=
@@ -188,11 +193,26 @@ PUSHBULLET_ACCESS_TOKEN=
 DEFAULT_RECIPIENT_PUSHBULLET=
 declare -A role_recipients_pushbullet=()
 
+# twilio configs
+TWILIO_ACCOUNT_SID=
+TWILIO_ACCOUNT_TOKEN=
+TWILIO_NUMBER=
+DEFAULT_RECIPIENT_TWILIO=
+declare -A role_recipients_twilio=()
+
 # telegram configs
 TELEGRAM_BOT_TOKEN=
 DEFAULT_RECIPIENT_TELEGRAM=
 declare -A role_recipients_telegram=()
 
+# kafka configs
+KAFKA_URL=
+KAFKA_SENDER_IP=
+
+# pagerduty.com configs
+PD_SERVICE_KEY=
+declare -A role_recipients_pd=()
+
 # email configs
 DEFAULT_RECIPIENT_EMAIL="root"
 declare -A role_recipients_email=()
@@ -250,7 +270,9 @@ filter_recipient_by_criticality() {
 declare -A arr_slack=()
 declare -A arr_pushover=()
 declare -A arr_pushbullet=()
+declare -A arr_twilio=()
 declare -A arr_telegram=()
+declare -A arr_pd=()
 declare -A arr_email=()
 
 # netdata may call us with multiple roles, and roles may have multiple but
@@ -285,6 +307,14 @@ do
         [ "${r}" != "disabled" ] && filter_recipient_by_criticality pushbullet "${r}" && arr_pushbullet[${r/|*/}]="1"
     done
 
+    # twilio
+    a="${role_recipients_twilio[${x}]}"
+    [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_TWILIO}"
+    for r in ${a//,/ }
+    do
+        [ "${r}" != "disabled" ] && filter_recipient_by_criticality twilio "${r}" && arr_twilio[${r/|*/}]="1"
+    done
+
     # telegram
     a="${role_recipients_telegram[${x}]}"
     [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_TELEGRAM}"
@@ -300,6 +330,14 @@ do
     do
         [ "${r}" != "disabled" ] && filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1"
     done
+
+    # pagerduty.com
+    a="${role_recipients_pd[${x}]}"
+    [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PD}"
+    for r in ${a//,/ }
+    do
+        [ "${r}" != "disabled" ] && filter_recipient_by_criticality pd "${r}" && arr_pd[${r/|*/}]="1"
+    done
 done
 
 # build the list of slack recipients (channels)
@@ -314,10 +352,18 @@ to_pushover="${!arr_pushover[*]}"
 to_pushbullet="${!arr_pushbullet[*]}"
 [ -z "${to_pushbullet}" ] && SEND_PUSHBULLET="NO"
 
+# build the list of twilio recipients (phone numbers)
+to_twilio="${!arr_twilio[*]}"
+[ -z "${to_twilio}" ] && SEND_TWILIO="NO"
+
 # check array of telegram recipients (chat ids)
 to_telegram="${!arr_telegram[*]}"
 [ -z "${to_telegram}" ] && SEND_TELEGRAM="NO"
 
+# build the list of pagerduty recipients (service keys)
+to_pd="${!arr_pd[*]}"
+[ -z "${to_pd}" ] && SEND_PD="NO"
+
 # build the list of email recipients (email addresses)
 to_email=
 for x in "${!arr_email[@]}"
@@ -338,23 +384,49 @@ done
 [ -z "${PUSHOVER_APP_TOKEN}" ] && SEND_PUSHOVER="NO"
 
 # check pushbullet
-[ -z "${DEFAULT_RECIPIENT_PUSHBULLET}" ] && SEND_PUSHBULLET="NO"
+[ -z "${PUSHBULLET_ACCESS_TOKEN}" ] && SEND_PUSHBULLET="NO"
+
+# check twilio
+[ -z "${TWILIO_ACCOUNT_TOKEN}" -o -z "${TWILIO_ACCOUNT_SID}" -o -z "${TWILIO_NUMBER}" ] && SEND_TWILIO="NO"
 
 # check telegram
 [ -z "${TELEGRAM_BOT_TOKEN}" ] && SEND_TELEGRAM="NO"
 
-if [ \( "${SEND_PUSHOVER}" = "YES" -o "${SEND_SLACK}" = "YES" -o "${SEND_TELEGRAM}" = "YES" -o "${SEND_PUSHBULLET}" = "YES" \) -a -z "${curl}" ]
+# check kafka
+[ -z "${KAFKA_URL}" -o -z "${KAFKA_SENDER_IP}" ] && SEND_KAFKA="NO"
+
+# check pagerduty.com
+# if we need pd-send, check for the pd-send command
+# https://www.pagerduty.com/docs/guides/agent-install-guide/
+if [ "${SEND_PD}" = "YES" ]
+    then
+    pd_send="$(which pd-send 2>/dev/null || command -v pd-send 2>/dev/null)"
+    if [ -z "${pd_send}" ]
+        then
+        # no pd-send available
+        # disable pagerduty.com
+        SEND_PD="NO"
+    fi
+fi
+
+# if we need curl, check for the curl command
+if [ \( "${SEND_PUSHOVER}" = "YES" -o "${SEND_SLACK}" = "YES" -o "${SEND_TWILIO}" = "YES" -o "${SEND_TELEGRAM}" = "YES" -o "${SEND_PUSHBULLET}" = "YES" -o "${SEND_KAFKA}" = "YES" \) -a -z "${curl}" ]
     then
     curl="$(which curl 2>/dev/null || command -v curl 2>/dev/null)"
     if [ -z "${curl}" ]
         then
+        # no curl available
+        # disable all curl based methods
         SEND_PUSHOVER="NO"
         SEND_PUSHBULLET="NO"
         SEND_TELEGRAM="NO"
         SEND_SLACK="NO"
+        SEND_TWILIO="NO"
+        SEND_KAFKA="NO"
     fi
 fi
 
+# if we need sendmail, check for the sendmail command
 if [ "${SEND_EMAIL}" = "YES" -a -z "${sendmail}" ]
     then
     sendmail="$(which sendmail 2>/dev/null || command -v sendmail 2>/dev/null)"
@@ -362,13 +434,21 @@ if [ "${SEND_EMAIL}" = "YES" -a -z "${sendmail}" ]
 fi
 
 # check that we have at least a method enabled
-if [ "${SEND_EMAIL}" != "YES" -a "${SEND_PUSHOVER}" != "YES" -a "${SEND_TELEGRAM}" != "YES" -a "${SEND_SLACK}" != "YES" -a "${SEND_PUSHBULLET}" != "YES" ]
+if [   "${SEND_EMAIL}"      != "YES" \
+    -a "${SEND_PUSHOVER}"   != "YES" \
+    -a "${SEND_TELEGRAM}"   != "YES" \
+    -a "${SEND_SLACK}"      != "YES" \
+    -a "${SEND_TWILIO}"     != "YES" \
+    -a "${SEND_PUSHBULLET}" != "YES" \
+    -a "${SEND_KAFKA}"      != "YES" \
+    -a "${SEND_PD}"         != "YES" \
+    ]
     then
-    fatal "All notification methods are disabled. Not sending a notification."
+    fatal "All notification methods are disabled. Not sending notification to '${roles}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'."
 fi
 
 # -----------------------------------------------------------------------------
-# get the system hostname
+# find a suitable hostname to use, if netdata did not supply a hostname
 
 [ -z "${host}" ] && host="${NETDATA_HOSTNAME}"
 [ -z "${host}" ] && host="${NETDATA_REGISTRY_HOSTNAME}"
@@ -381,7 +461,7 @@ date="$(date --date=@${when} 2>/dev/null)"
 [ -z "${date}" ] && date="$(date 2>/dev/null)"
 
 # -----------------------------------------------------------------------------
-# URL encode a string
+# function to URL encode a string
 
 urlencode() {
     local string="${1}" strlen encoded pos c o
@@ -389,14 +469,14 @@ urlencode() {
     strlen=${#string}
     for (( pos=0 ; pos<strlen ; pos++ ))
     do
-        c=${string:$pos:1}
-        case "$c" in
+        c=${string:${pos}:1}
+        case "${c}" in
             [-_.~a-zA-Z0-9])
                 o="${c}"
                 ;;
 
             *)
-                printf -v o '%%%02x' "'$c"
+                printf -v o '%%%02x' "'${c}"
                 ;;
         esac
         encoded+="${o}"
@@ -407,7 +487,7 @@ urlencode() {
 }
 
 # -----------------------------------------------------------------------------
-# convert a duration in seconds, to a human readable duration
+# function to convert a duration in seconds, to a human readable duration
 # using DAYS, MINUTES, SECONDS
 
 duration4human() {
@@ -471,7 +551,7 @@ send_email() {
         "${sendmail}" -t
         ret=$?
 
-        if [ $ret -eq 0 ]
+        if [ ${ret} -eq 0 ]
         then
             info "sent email notification for: ${host} ${chart}.${name} is ${status} to '${to_email}'"
             return 0
@@ -542,7 +622,7 @@ send_pushbullet() {
         for user in ${recipients}
         do
             httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null \
-              --header 'Access-Token: '$userapikey'' \
+              --header 'Access-Token: '${userapikey}'' \
               --header 'Content-Type: application/json' \
               --data-binary  @<(cat <<EOF
                               {"title": "${title}",
@@ -567,6 +647,119 @@ EOF
     return 1
 }
 
+# -----------------------------------------------------------------------------
+# kafka sender
+
+send_kafka() {
+    local httpcode sent=0 
+    if [ "${SEND_KAFKA}" = "YES" ]
+        then
+            httpcode=$(${curl} -X POST --write-out %{http_code} --silent --output /dev/null \
+                --data "{host_ip:\"${KAFKA_SENDER_IP}\",when:${when},name:\"${name}\",chart:\"${chart}\",family:\"${family}\",status:\"${status}\",old_status:\"${old_status}\",value:${value},old_value:${old_value},duration:${duration},non_clear_duration:${non_clear_duration},units:\"${units}\",info:\"${info}\"}" \
+                "${KAFKA_URL}")
+
+            if [ "${httpcode}" == "204" ]
+            then
+                info "sent kafka data for: ${host} ${chart}.${name} is ${status} and ip '${KAFKA_SENDER_IP}'"
+                sent=$((sent + 1))
+            else
+                error "failed to send kafka data for: ${host} ${chart}.${name} is ${status} and ip '${KAFKA_SENDER_IP}' with HTTP error code ${httpcode}."
+            fi
+
+        [ ${sent} -gt 0 ] && return 0
+    fi
+
+    return 1
+}
+
+# -----------------------------------------------------------------------------
+# pagerduty.com sender
+
+send_pd() {
+    local recipients="${1}" sent=0
+    unset t
+    case ${status} in
+        CLEAR)    t='resolve';;
+        WARNING)  t='trigger';;
+        CRITICAL) t='trigger';;
+    esac
+
+    if [ ${SEND_PD} = "YES" -a ! -z "${t}" ]
+        then
+        for PD_SERVICE_KEY in ${recipients}
+        do
+            d="${status} ${name}=${value} ${units} - ${host}, ${family}"
+            ${pd_send} -k ${PD_SERVICE_KEY} \
+                       -t ${t} \
+                       -d "${d}" \
+                       -i ${alarm_id} \
+                       -f 'info'="${info}" \
+                       -f 'value_w_units'="${value} ${units}" \
+                       -f 'when'="${when}" \
+                       -f 'duration'="${duration}" \
+                       -f 'roles'="${roles}" \
+                       -f 'host'="${host}" \
+                       -f 'unique_id'="${unique_id}" \
+                       -f 'alarm_id'="${alarm_id}" \
+                       -f 'event_id'="${event_id}" \
+                       -f 'name'="${name}" \
+                       -f 'chart'="${chart}" \
+                       -f 'family'="${family}" \
+                       -f 'status'="${status}" \
+                       -f 'old_status'="${old_status}" \
+                       -f 'value'="${value}" \
+                       -f 'old_value'="${old_value}" \
+                       -f 'src'="${src}" \
+                       -f 'non_clear_duration'="${non_clear_duration}" \
+                       -f 'units'="${units}"
+            retval=$?
+            if [ ${retval} -eq 0 ]
+                then
+                    info "sent pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}....: ${d}"
+                    sent=$((sent + 1))
+                else
+                    error "failed to send pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}.... (error code ${retval}): ${d}"
+            fi
+        done
+
+        [ ${sent} -gt 0 ] && return 0
+    fi
+
+    return 1
+}
+
+# -----------------------------------------------------------------------------
+# twilio sender
+
+send_twilio() {
+    local accountsid="${1}" accounttoken="${2}" twilionumber="${3}" recipients="${4}"  title="${5}" message="${6}" httpcode sent=0 user
+    if [ "${SEND_TWILIO}" = "YES" -a ! -z "${accountsid}" -a ! -z "${accounttoken}" -a ! -z "${twilionumber}" -a ! -z "${recipients}" -a ! -z "${message}" -a ! -z "${title}" ]
+        then
+        #https://www.twilio.com/packages/labs/code/bash/twilio-sms
+        for user in ${recipients}
+        do
+            httpcode=$(${curl} -X POST --write-out %{http_code} --silent --output /dev/null \
+                --data-urlencode "From=${twilionumber}" \
+                --data-urlencode "To=${user}" \
+                --data-urlencode "Body=${title} ${message}" \
+                -u "${accountsid}:${accounttoken}" \
+                "https://api.twilio.com/2010-04-01/Accounts/${accountsid}/Messages.json")
+
+            if [ "${httpcode}" == "201" ]
+            then
+                info "sent Twilio SMS for: ${host} ${chart}.${name} is ${status} to '${user}'"
+                sent=$((sent + 1))
+            else
+                error "failed to send Twilio SMS for: ${host} ${chart}.${name} is ${status} to '${user}' with HTTP error code ${httpcode}."
+            fi
+        done
+
+        [ ${sent} -gt 0 ] && return 0
+    fi
+
+    return 1
+}
+
 # -----------------------------------------------------------------------------
 # telegram sender
 
@@ -583,8 +776,8 @@ send_telegram() {
             httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null ${disableNotification} \
                 --data-urlencode "parse_mode=HTML" \
                 --data-urlencode "disable_web_page_preview=true" \
-                --data-urlencode "text=$message" \
-                "https://api.telegram.org/bot${bottoken}/sendMessage?chat_id=$chatid")
+                --data-urlencode "text=${message}" \
+                "https://api.telegram.org/bot${bottoken}/sendMessage?chat_id=${chatid}")
 
             if [ "${httpcode}" == "200" ]
             then
@@ -613,10 +806,10 @@ send_slack() {
     [ "${SEND_SLACK}" != "YES" ] && return 1
 
     case "${status}" in
-        WARNING) color="warning" ;;
+        WARNING)  color="warning" ;;
         CRITICAL) color="danger" ;;
-        CLEAR) color="good" ;;
-        *) color="#777777" ;;
+        CLEAR)    color="good" ;;
+        *)        color="#777777" ;;
     esac
 
     for channel in ${channels}
@@ -717,25 +910,25 @@ case "${status}" in
         image="${images_base_url}/images/check-mark-2-128-green.png"
        status_message="recovered"
                color="#77ca6d"
-
-               # don't show the value when the status is CLEAR
-               # for certain alarms, this value might not have any meaning
-               alarm="${name//_/ } ${raised_for}"
                ;;
 esac
 
 if [ "${status}" = "CLEAR" ]
 then
     severity="Recovered from ${old_status}"
-    if [ $non_clear_duration -gt $duration ]
+    if [ ${non_clear_duration} -gt ${duration} ]
     then
         raised_for="(alarm was raised for ${non_clear_duration_txt})"
     fi
 
+    # don't show the value when the status is CLEAR
+    # for certain alarms, this value might not have any meaning
+    alarm="${name//_/ } ${raised_for}"
+
 elif [ "${old_status}" = "WARNING" -a "${status}" = "CRITICAL" ]
 then
     severity="Escalated to ${status}"
-    if [ $non_clear_duration -gt $duration ]
+    if [ ${non_clear_duration} -gt ${duration} ]
     then
         raised_for="(alarm is raised for ${non_clear_duration_txt})"
     fi
@@ -743,7 +936,7 @@ then
 elif [ "${old_status}" = "CRITICAL" -a "${status}" = "WARNING" ]
 then
     severity="Demoted to ${status}"
-    if [ $non_clear_duration -gt $duration ]
+    if [ ${non_clear_duration} -gt ${duration} ]
     then
         raised_for="(alarm is raised for ${non_clear_duration_txt})"
     fi
@@ -795,6 +988,17 @@ The source of this alarm is line ${src}"
 
 SENT_PUSHBULLET=$?
 
+# -----------------------------------------------------------------------------
+# send the twilio SMS
+
+send_twilio "${TWILIO_ACCOUNT_SID}" "${TWILIO_ACCOUNT_TOKEN}" "${TWILIO_NUMBER}" "${to_twilio}" "${host} ${status_message} - ${name//_/ } - ${chart}" "${alarm} 
+Severity: ${severity}
+Chart: ${chart}
+Family: ${family}
+${info}"
+
+SENT_TWILIO=$?
+
 # -----------------------------------------------------------------------------
 # send the telegram.org message
 
@@ -806,6 +1010,21 @@ ${chart} (${family})
 
 SENT_TELEGRAM=$?
 
+
+# -----------------------------------------------------------------------------
+# send the kafka message
+
+send_kafka
+SENT_KAFKA=$?
+
+
+# -----------------------------------------------------------------------------
+# send the pagerduty.com message
+
+send_pd "${to_pd}"
+SENT_PD=$?
+
+
 # -----------------------------------------------------------------------------
 # send the email
 
@@ -904,8 +1123,19 @@ SENT_EMAIL=$?
 # -----------------------------------------------------------------------------
 # let netdata know
 
-# we did send something
-[ ${SENT_EMAIL} -eq 0 -o ${SENT_PUSHOVER} -eq 0 -o ${SENT_TELEGRAM} -eq 0 -o ${SENT_SLACK} -eq 0 -o ${SENT_PUSHBULLET} -eq 0 ] && exit 0
+if [   ${SENT_EMAIL}      -eq 0 \
+    -o ${SENT_PUSHOVER}   -eq 0 \
+    -o ${SENT_TELEGRAM}   -eq 0 \
+    -o ${SENT_SLACK}      -eq 0 \
+    -o ${SENT_TWILIO}     -eq 0 \
+    -o ${SENT_PUSHBULLET} -eq 0 \
+    -o ${SENT_KAFKA}      -eq 0 \
+    -o ${SENT_PD}         -eq 0 \
+    ]
+    then
+    # we did send something
+    exit 0
+fi
 
 # we did not send anything
 exit 1