From: Jim Cooley Date: Thu, 15 Dec 2016 01:10:44 +0000 (-0800) Subject: Mapping roles to service keys and allowing multiple service keys for pagerduty.com... X-Git-Tag: v1.5.0~129^2 X-Git-Url: https://arthur.barton.de/gitweb/?p=netdata.git;a=commitdiff_plain;h=fcf9823df565109c0ec010f8963c0053e4c7b579 Mapping roles to service keys and allowing multiple service keys for pagerduty.com notifications --- diff --git a/conf.d/health_alarm_notify.conf b/conf.d/health_alarm_notify.conf index ca01dd8f..dea4ea05 100644 --- a/conf.d/health_alarm_notify.conf +++ b/conf.d/health_alarm_notify.conf @@ -9,6 +9,7 @@ # - messages to your slack team (slack.com), # - messages to your telegram chat / group chat (telegram.org) # - sms messages to your cell phone or any sms enabled device (twilio.com) +# - notifications to users on pagerduty.com # # The 'to' line given at netdata alarms defines a *role*, so that many # people can be notified for each role. @@ -62,6 +63,7 @@ curl="" # - telegram chat ids # - slack channels # - sms phone numbers +# - pagerduty.com (pd) services # # You can append |critical to limit the notifications to be sent. # @@ -73,9 +75,10 @@ curl="" # telegram: "111827421 112746832|critical" # slack : "alarms disasters|critical" # twilio : "+15555555555 +17777777777|critical" +# pd : " |critical" # # If a recipient is set to empty string, the default recipient of the given -# notification method (email, pushover, telegram, slack) will be used. +# notification method (email, pushover, telegram, slack, pd) will be used. # To disable a notification, use the recipient called: disabled # This works for all notification methods (including the default recipients). @@ -219,11 +222,16 @@ KAFKA_SENDER_IP="" # a "Generic API" pagerduty service. # https://www.pagerduty.com/docs/guides/agent-install-guide/ +# multiple recipients can be given like this: +# " ..." + # enable/disable sending pagerduty notifications SEND_PD="YES" -# The service key for your "General API" pagerduty service. -PD_SERVICE_KEY='' +# if a role's recipients are not configured, a notification will be sent to +# the "General API" pagerduty.com service that uses this service key. +# (empty = do not send a notification for unconfigured roles): +DEFAULT_RECIPIENT_PD="" ############################################################################### @@ -245,6 +253,8 @@ role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}" role_recipients_twilio[sysadmin]="${DEFAULT_RECIPIENT_TWILIO}" +role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}" + # ----------------------------------------------------------------------------- # DNS related alarms @@ -260,6 +270,8 @@ role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}" role_recipients_twilio[domainadmin]="${DEFAULT_RECIPIENT_TWILIO}" +role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}" + # ----------------------------------------------------------------------------- # database servers alarms # mysql, redis, memcached, etc @@ -276,6 +288,8 @@ role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}" role_recipients_twilio[dba]="${DEFAULT_RECIPIENT_TWILIO}" +role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}" + # ----------------------------------------------------------------------------- # web servers alarms # apache, nginx, etc @@ -292,6 +306,8 @@ role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}" role_recipients_twilio[webmaster]="${DEFAULT_RECIPIENT_TWILIO}" +role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}" + # ----------------------------------------------------------------------------- # proxy servers alarms # apache, nginx, etc @@ -307,3 +323,5 @@ role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}" role_recipients_twilio[proxyadmin]="${DEFAULT_RECIPIENT_TWILIO}" + +role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}" diff --git a/plugins.d/alarm-notify.sh b/plugins.d/alarm-notify.sh index 4cb7ab9e..9f5ca1b0 100755 --- a/plugins.d/alarm-notify.sh +++ b/plugins.d/alarm-notify.sh @@ -211,6 +211,7 @@ KAFKA_SENDER_IP= # pagerduty.com configs PD_SERVICE_KEY= +declare -A role_recipients_pd=() # email configs DEFAULT_RECIPIENT_EMAIL="root" @@ -271,6 +272,7 @@ declare -A arr_pushover=() declare -A arr_pushbullet=() declare -A arr_twilio=() declare -A arr_telegram=() +declare -A arr_pd=() declare -A arr_email=() # netdata may call us with multiple roles, and roles may have multiple but @@ -328,6 +330,14 @@ do do [ "${r}" != "disabled" ] && filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1" done + + # pagerduty.com + a="${role_recipients_pd[${x}]}" + [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PD}" + for r in ${a//,/ } + do + [ "${r}" != "disabled" ] && filter_recipient_by_criticality pd "${r}" && arr_pd[${r/|*/}]="1" + done done # build the list of slack recipients (channels) @@ -350,6 +360,10 @@ to_twilio="${!arr_twilio[*]}" to_telegram="${!arr_telegram[*]}" [ -z "${to_telegram}" ] && SEND_TELEGRAM="NO" +# build the list of pagerduty recipients (service keys) +to_pd="${!arr_pd[*]}" +[ -z "${to_pd}" ] && SEND_PD="NO" + # build the list of email recipients (email addresses) to_email= for x in "${!arr_email[@]}" @@ -382,8 +396,6 @@ done [ -z "${KAFKA_URL}" -o -z "${KAFKA_SENDER_IP}" ] && SEND_KAFKA="NO" # check pagerduty.com -[ -z "${PD_SERVICE_KEY}" ] && SEND_PD="NO" - # if we need pd-send, check for the pd-send command # https://www.pagerduty.com/docs/guides/agent-install-guide/ if [ "${SEND_PD}" = "YES" ] @@ -432,7 +444,7 @@ if [ "${SEND_EMAIL}" != "YES" \ -a "${SEND_PD}" != "YES" \ ] then - fatal "All notification methods are disabled. Not sending notification to '${role}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'." + fatal "All notification methods are disabled. Not sending notification to '${roles}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'." fi # ----------------------------------------------------------------------------- @@ -664,6 +676,7 @@ send_kafka() { # pagerduty.com sender send_pd() { + local recipients="${1}" sent=0 unset t case ${status} in CLEAR) t='resolve';; @@ -673,37 +686,43 @@ send_pd() { if [ ${SEND_PD} = "YES" -a ! -z "${t}" ] then - ${pd_send} -k ${PD_SERVICE_KEY} \ - -t ${t} \ - -d "${status} ${name}=${value} ${units} - ${host}, ${family}" \ - -i ${alarm_id} \ - -f 'info'="${info}" \ - -f 'value_w_units'="${value} ${units}" \ - -f 'when'="${when}" \ - -f 'duration'="${duration}" \ - -f 'roles'="${roles}" \ - -f 'host'="${host}" \ - -f 'unique_id'="${unique_id}" \ - -f 'alarm_id'="${alarm_id}" \ - -f 'event_id'="${event_id}" \ - -f 'name'="${name}" \ - -f 'chart'="${chart}" \ - -f 'family'="${family}" \ - -f 'status'="${status}" \ - -f 'old_status'="${old_status}" \ - -f 'value'="${value}" \ - -f 'old_value'="${old_value}" \ - -f 'src'="${src}" \ - -f 'non_clear_duration'="${non_clear_duration}" \ - -f 'units'="${units}" - retval=$? - if [ ${retval} -eq 0 ] - then - info "sent pagerduty.com notification for: ${host} ${chart}.${name} is ${status}" - return 0 - else - error "failed to send pagerduty.com notification for: ${host} ${chart}.${name} is ${status} with error code ${retval}." - fi + for PD_SERVICE_KEY in ${recipients} + do + d="${status} ${name}=${value} ${units} - ${host}, ${family}" + ${pd_send} -k ${PD_SERVICE_KEY} \ + -t ${t} \ + -d "${d}" \ + -i ${alarm_id} \ + -f 'info'="${info}" \ + -f 'value_w_units'="${value} ${units}" \ + -f 'when'="${when}" \ + -f 'duration'="${duration}" \ + -f 'roles'="${roles}" \ + -f 'host'="${host}" \ + -f 'unique_id'="${unique_id}" \ + -f 'alarm_id'="${alarm_id}" \ + -f 'event_id'="${event_id}" \ + -f 'name'="${name}" \ + -f 'chart'="${chart}" \ + -f 'family'="${family}" \ + -f 'status'="${status}" \ + -f 'old_status'="${old_status}" \ + -f 'value'="${value}" \ + -f 'old_value'="${old_value}" \ + -f 'src'="${src}" \ + -f 'non_clear_duration'="${non_clear_duration}" \ + -f 'units'="${units}" + retval=$? + if [ ${retval} -eq 0 ] + then + info "sent pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}....: ${d}" + sent=$((sent + 1)) + else + error "failed to send pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}.... (error code ${retval}): ${d}" + fi + done + + [ ${sent} -gt 0 ] && return 0 fi return 1 @@ -1002,7 +1021,7 @@ SENT_KAFKA=$? # ----------------------------------------------------------------------------- # send the pagerduty.com message -send_pd +send_pd "${to_pd}" SENT_PD=$?