# - messages to your slack team (slack.com),
# - messages to your telegram chat / group chat (telegram.org)
# - sms messages to your cell phone or any sms enabled device (twilio.com)
+# - notifications to users on pagerduty.com
#
# The 'to' line given at netdata alarms defines a *role*, so that many
# people can be notified for each role.
# - telegram chat ids
# - slack channels
# - sms phone numbers
+# - pagerduty.com (pd) services
#
# You can append |critical to limit the notifications to be sent.
#
# telegram: "111827421 112746832|critical"
# slack : "alarms disasters|critical"
# twilio : "+15555555555 +17777777777|critical"
+# pd : "<pd_service_key_1> <pd_service_key_2>|critical"
#
# If a recipient is set to empty string, the default recipient of the given
-# notification method (email, pushover, telegram, slack) will be used.
+# notification method (email, pushover, telegram, slack, pd) will be used.
# To disable a notification, use the recipient called: disabled
# This works for all notification methods (including the default recipients).
# a "Generic API" pagerduty service.
# https://www.pagerduty.com/docs/guides/agent-install-guide/
+# multiple recipients can be given like this:
+# "<pd_service_key_1> <pd_service_key_2> ..."
+
# enable/disable sending pagerduty notifications
SEND_PD="YES"
-# The service key for your "General API" pagerduty service.
-PD_SERVICE_KEY=''
+# if a role's recipients are not configured, a notification will be sent to
+# the "General API" pagerduty.com service that uses this service key.
+# (empty = do not send a notification for unconfigured roles):
+DEFAULT_RECIPIENT_PD=""
###############################################################################
role_recipients_twilio[sysadmin]="${DEFAULT_RECIPIENT_TWILIO}"
+role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}"
+
# -----------------------------------------------------------------------------
# DNS related alarms
role_recipients_twilio[domainadmin]="${DEFAULT_RECIPIENT_TWILIO}"
+role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}"
+
# -----------------------------------------------------------------------------
# database servers alarms
# mysql, redis, memcached, etc
role_recipients_twilio[dba]="${DEFAULT_RECIPIENT_TWILIO}"
+role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}"
+
# -----------------------------------------------------------------------------
# web servers alarms
# apache, nginx, etc
role_recipients_twilio[webmaster]="${DEFAULT_RECIPIENT_TWILIO}"
+role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}"
+
# -----------------------------------------------------------------------------
# proxy servers alarms
# apache, nginx, etc
role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}"
role_recipients_twilio[proxyadmin]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}"
# pagerduty.com configs
PD_SERVICE_KEY=
+declare -A role_recipients_pd=()
# email configs
DEFAULT_RECIPIENT_EMAIL="root"
declare -A arr_pushbullet=()
declare -A arr_twilio=()
declare -A arr_telegram=()
+declare -A arr_pd=()
declare -A arr_email=()
# netdata may call us with multiple roles, and roles may have multiple but
do
[ "${r}" != "disabled" ] && filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1"
done
+
+ # pagerduty.com
+ a="${role_recipients_pd[${x}]}"
+ [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PD}"
+ for r in ${a//,/ }
+ do
+ [ "${r}" != "disabled" ] && filter_recipient_by_criticality pd "${r}" && arr_pd[${r/|*/}]="1"
+ done
done
# build the list of slack recipients (channels)
to_telegram="${!arr_telegram[*]}"
[ -z "${to_telegram}" ] && SEND_TELEGRAM="NO"
+# build the list of pagerduty recipients (service keys)
+to_pd="${!arr_pd[*]}"
+[ -z "${to_pd}" ] && SEND_PD="NO"
+
# build the list of email recipients (email addresses)
to_email=
for x in "${!arr_email[@]}"
[ -z "${KAFKA_URL}" -o -z "${KAFKA_SENDER_IP}" ] && SEND_KAFKA="NO"
# check pagerduty.com
-[ -z "${PD_SERVICE_KEY}" ] && SEND_PD="NO"
-
# if we need pd-send, check for the pd-send command
# https://www.pagerduty.com/docs/guides/agent-install-guide/
if [ "${SEND_PD}" = "YES" ]
-a "${SEND_PD}" != "YES" \
]
then
- fatal "All notification methods are disabled. Not sending notification to '${role}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'."
+ fatal "All notification methods are disabled. Not sending notification to '${roles}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'."
fi
# -----------------------------------------------------------------------------
# pagerduty.com sender
send_pd() {
+ local recipients="${1}" sent=0
unset t
case ${status} in
CLEAR) t='resolve';;
if [ ${SEND_PD} = "YES" -a ! -z "${t}" ]
then
- ${pd_send} -k ${PD_SERVICE_KEY} \
- -t ${t} \
- -d "${status} ${name}=${value} ${units} - ${host}, ${family}" \
- -i ${alarm_id} \
- -f 'info'="${info}" \
- -f 'value_w_units'="${value} ${units}" \
- -f 'when'="${when}" \
- -f 'duration'="${duration}" \
- -f 'roles'="${roles}" \
- -f 'host'="${host}" \
- -f 'unique_id'="${unique_id}" \
- -f 'alarm_id'="${alarm_id}" \
- -f 'event_id'="${event_id}" \
- -f 'name'="${name}" \
- -f 'chart'="${chart}" \
- -f 'family'="${family}" \
- -f 'status'="${status}" \
- -f 'old_status'="${old_status}" \
- -f 'value'="${value}" \
- -f 'old_value'="${old_value}" \
- -f 'src'="${src}" \
- -f 'non_clear_duration'="${non_clear_duration}" \
- -f 'units'="${units}"
- retval=$?
- if [ ${retval} -eq 0 ]
- then
- info "sent pagerduty.com notification for: ${host} ${chart}.${name} is ${status}"
- return 0
- else
- error "failed to send pagerduty.com notification for: ${host} ${chart}.${name} is ${status} with error code ${retval}."
- fi
+ for PD_SERVICE_KEY in ${recipients}
+ do
+ d="${status} ${name}=${value} ${units} - ${host}, ${family}"
+ ${pd_send} -k ${PD_SERVICE_KEY} \
+ -t ${t} \
+ -d "${d}" \
+ -i ${alarm_id} \
+ -f 'info'="${info}" \
+ -f 'value_w_units'="${value} ${units}" \
+ -f 'when'="${when}" \
+ -f 'duration'="${duration}" \
+ -f 'roles'="${roles}" \
+ -f 'host'="${host}" \
+ -f 'unique_id'="${unique_id}" \
+ -f 'alarm_id'="${alarm_id}" \
+ -f 'event_id'="${event_id}" \
+ -f 'name'="${name}" \
+ -f 'chart'="${chart}" \
+ -f 'family'="${family}" \
+ -f 'status'="${status}" \
+ -f 'old_status'="${old_status}" \
+ -f 'value'="${value}" \
+ -f 'old_value'="${old_value}" \
+ -f 'src'="${src}" \
+ -f 'non_clear_duration'="${non_clear_duration}" \
+ -f 'units'="${units}"
+ retval=$?
+ if [ ${retval} -eq 0 ]
+ then
+ info "sent pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}....: ${d}"
+ sent=$((sent + 1))
+ else
+ error "failed to send pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}.... (error code ${retval}): ${d}"
+ fi
+ done
+
+ [ ${sent} -gt 0 ] && return 0
fi
return 1
# -----------------------------------------------------------------------------
# send the pagerduty.com message
-send_pd
+send_pd "${to_pd}"
SENT_PD=$?