]> arthur.barton.de Git - netdata.git/commitdiff
Mapping roles to service keys and allowing multiple service keys for pagerduty.com...
authorJim Cooley <jim.cooley@healthvana.com>
Thu, 15 Dec 2016 01:10:44 +0000 (17:10 -0800)
committerJim Cooley <jim.cooley@healthvana.com>
Thu, 15 Dec 2016 01:10:44 +0000 (17:10 -0800)
conf.d/health_alarm_notify.conf
plugins.d/alarm-notify.sh

index ca01dd8f7f0725c2fe246844880b56c16cd4394b..dea4ea0511244604f62dae9f996b69a29af6545e 100644 (file)
@@ -9,6 +9,7 @@
 # - messages to your slack team (slack.com),
 # - messages to your telegram chat / group chat (telegram.org)
 # - sms messages to your cell phone or any sms enabled device (twilio.com)
+# - notifications to users on pagerduty.com
 #
 # The 'to' line given at netdata alarms defines a *role*, so that many
 # people can be notified for each role.
@@ -62,6 +63,7 @@ curl=""
 #  - telegram chat ids
 #  - slack channels
 #  - sms phone numbers
+#  - pagerduty.com (pd) services
 #
 # You can append |critical to limit the notifications to be sent.
 #
@@ -73,9 +75,10 @@ curl=""
 #  telegram: "111827421 112746832|critical"
 #  slack   : "alarms disasters|critical"
 #  twilio  : "+15555555555 +17777777777|critical"
+#  pd      : "<pd_service_key_1> <pd_service_key_2>|critical"
 #
 # If a recipient is set to empty string, the default recipient of the given
-# notification method (email, pushover, telegram, slack) will be used.
+# notification method (email, pushover, telegram, slack, pd) will be used.
 # To disable a notification, use the recipient called: disabled
 # This works for all notification methods (including the default recipients).
 
@@ -219,11 +222,16 @@ KAFKA_SENDER_IP=""
 # a "Generic API" pagerduty service.
 # https://www.pagerduty.com/docs/guides/agent-install-guide/
 
+# multiple recipients can be given like this:
+#              "<pd_service_key_1> <pd_service_key_2> ..."
+
 # enable/disable sending pagerduty notifications
 SEND_PD="YES"
 
-# The service key for your "General API" pagerduty service.
-PD_SERVICE_KEY=''
+# if a role's recipients are not configured, a notification will be sent to
+# the "General API" pagerduty.com service that uses this service key.
+# (empty = do not send a notification for unconfigured roles):
+DEFAULT_RECIPIENT_PD=""
 
 
 ###############################################################################
@@ -245,6 +253,8 @@ role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}"
 
 role_recipients_twilio[sysadmin]="${DEFAULT_RECIPIENT_TWILIO}"
 
+role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}"
+
 # -----------------------------------------------------------------------------
 # DNS related alarms
 
@@ -260,6 +270,8 @@ role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}"
 
 role_recipients_twilio[domainadmin]="${DEFAULT_RECIPIENT_TWILIO}"
 
+role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}"
+
 # -----------------------------------------------------------------------------
 # database servers alarms
 # mysql, redis, memcached, etc
@@ -276,6 +288,8 @@ role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}"
 
 role_recipients_twilio[dba]="${DEFAULT_RECIPIENT_TWILIO}"
 
+role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}"
+
 # -----------------------------------------------------------------------------
 # web servers alarms
 # apache, nginx, etc
@@ -292,6 +306,8 @@ role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}"
 
 role_recipients_twilio[webmaster]="${DEFAULT_RECIPIENT_TWILIO}"
 
+role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}"
+
 # -----------------------------------------------------------------------------
 # proxy servers alarms
 # apache, nginx, etc
@@ -307,3 +323,5 @@ role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}"
 role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}"
 
 role_recipients_twilio[proxyadmin]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}"
index 4cb7ab9e1f3acb991c154cd7478143b812b1347f..9f5ca1b09d0aae71ba2fc1fdfa406c5d06421aa4 100755 (executable)
@@ -211,6 +211,7 @@ KAFKA_SENDER_IP=
 
 # pagerduty.com configs
 PD_SERVICE_KEY=
+declare -A role_recipients_pd=()
 
 # email configs
 DEFAULT_RECIPIENT_EMAIL="root"
@@ -271,6 +272,7 @@ declare -A arr_pushover=()
 declare -A arr_pushbullet=()
 declare -A arr_twilio=()
 declare -A arr_telegram=()
+declare -A arr_pd=()
 declare -A arr_email=()
 
 # netdata may call us with multiple roles, and roles may have multiple but
@@ -328,6 +330,14 @@ do
     do
         [ "${r}" != "disabled" ] && filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1"
     done
+
+    # pagerduty.com
+    a="${role_recipients_pd[${x}]}"
+    [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PD}"
+    for r in ${a//,/ }
+    do
+        [ "${r}" != "disabled" ] && filter_recipient_by_criticality pd "${r}" && arr_pd[${r/|*/}]="1"
+    done
 done
 
 # build the list of slack recipients (channels)
@@ -350,6 +360,10 @@ to_twilio="${!arr_twilio[*]}"
 to_telegram="${!arr_telegram[*]}"
 [ -z "${to_telegram}" ] && SEND_TELEGRAM="NO"
 
+# build the list of pagerduty recipients (service keys)
+to_pd="${!arr_pd[*]}"
+[ -z "${to_pd}" ] && SEND_PD="NO"
+
 # build the list of email recipients (email addresses)
 to_email=
 for x in "${!arr_email[@]}"
@@ -382,8 +396,6 @@ done
 [ -z "${KAFKA_URL}" -o -z "${KAFKA_SENDER_IP}" ] && SEND_KAFKA="NO"
 
 # check pagerduty.com
-[ -z "${PD_SERVICE_KEY}" ] && SEND_PD="NO"
-
 # if we need pd-send, check for the pd-send command
 # https://www.pagerduty.com/docs/guides/agent-install-guide/
 if [ "${SEND_PD}" = "YES" ]
@@ -432,7 +444,7 @@ if [   "${SEND_EMAIL}"      != "YES" \
     -a "${SEND_PD}"         != "YES" \
     ]
     then
-    fatal "All notification methods are disabled. Not sending notification to '${role}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'."
+    fatal "All notification methods are disabled. Not sending notification to '${roles}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'."
 fi
 
 # -----------------------------------------------------------------------------
@@ -664,6 +676,7 @@ send_kafka() {
 # pagerduty.com sender
 
 send_pd() {
+    local recipients="${1}" sent=0
     unset t
     case ${status} in
         CLEAR)    t='resolve';;
@@ -673,37 +686,43 @@ send_pd() {
 
     if [ ${SEND_PD} = "YES" -a ! -z "${t}" ]
         then
-        ${pd_send} -k ${PD_SERVICE_KEY} \
-                   -t ${t} \
-                   -d "${status} ${name}=${value} ${units} - ${host}, ${family}" \
-                   -i ${alarm_id} \
-                   -f 'info'="${info}" \
-                   -f 'value_w_units'="${value} ${units}" \
-                   -f 'when'="${when}" \
-                   -f 'duration'="${duration}" \
-                   -f 'roles'="${roles}" \
-                   -f 'host'="${host}" \
-                   -f 'unique_id'="${unique_id}" \
-                   -f 'alarm_id'="${alarm_id}" \
-                   -f 'event_id'="${event_id}" \
-                   -f 'name'="${name}" \
-                   -f 'chart'="${chart}" \
-                   -f 'family'="${family}" \
-                   -f 'status'="${status}" \
-                   -f 'old_status'="${old_status}" \
-                   -f 'value'="${value}" \
-                   -f 'old_value'="${old_value}" \
-                   -f 'src'="${src}" \
-                   -f 'non_clear_duration'="${non_clear_duration}" \
-                   -f 'units'="${units}"
-        retval=$?
-        if [ ${retval} -eq 0 ]
-            then
-                info "sent pagerduty.com notification for: ${host} ${chart}.${name} is ${status}"
-                return 0
-            else
-                error "failed to send pagerduty.com notification for: ${host} ${chart}.${name} is ${status} with error code ${retval}."
-        fi
+        for PD_SERVICE_KEY in ${recipients}
+        do
+            d="${status} ${name}=${value} ${units} - ${host}, ${family}"
+            ${pd_send} -k ${PD_SERVICE_KEY} \
+                       -t ${t} \
+                       -d "${d}" \
+                       -i ${alarm_id} \
+                       -f 'info'="${info}" \
+                       -f 'value_w_units'="${value} ${units}" \
+                       -f 'when'="${when}" \
+                       -f 'duration'="${duration}" \
+                       -f 'roles'="${roles}" \
+                       -f 'host'="${host}" \
+                       -f 'unique_id'="${unique_id}" \
+                       -f 'alarm_id'="${alarm_id}" \
+                       -f 'event_id'="${event_id}" \
+                       -f 'name'="${name}" \
+                       -f 'chart'="${chart}" \
+                       -f 'family'="${family}" \
+                       -f 'status'="${status}" \
+                       -f 'old_status'="${old_status}" \
+                       -f 'value'="${value}" \
+                       -f 'old_value'="${old_value}" \
+                       -f 'src'="${src}" \
+                       -f 'non_clear_duration'="${non_clear_duration}" \
+                       -f 'units'="${units}"
+            retval=$?
+            if [ ${retval} -eq 0 ]
+                then
+                    info "sent pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}....: ${d}"
+                    sent=$((sent + 1))
+                else
+                    error "failed to send pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}.... (error code ${retval}): ${d}"
+            fi
+        done
+
+        [ ${sent} -gt 0 ] && return 0
     fi
 
     return 1
@@ -1002,7 +1021,7 @@ SENT_KAFKA=$?
 # -----------------------------------------------------------------------------
 # send the pagerduty.com message
 
-send_pd
+send_pd "${to_pd}"
 SENT_PD=$?