X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=plugins.d%2Falarm-email.sh;fp=plugins.d%2Falarm-email.sh;h=f31bd42003cb3b3da902377de1d524915687a375;hb=935e142404ec673a588498d229f6cab6c056e74a;hp=0000000000000000000000000000000000000000;hpb=9537ff77a56ae275867290e1d4d9af996da7299c;p=netdata.git diff --git a/plugins.d/alarm-email.sh b/plugins.d/alarm-email.sh new file mode 100755 index 00000000..f31bd420 --- /dev/null +++ b/plugins.d/alarm-email.sh @@ -0,0 +1,241 @@ +#!/usr/bin/env bash + +me="${0}" + +sendmail="$(which sendmail 2>/dev/null || command -v sendmail 2>/dev/null)" +if [ -z "${sendmail}" ] +then + echo >&2 "I cannot send emails - there is no sendmail command available." +fi + +sendmail_from_pipe() { + "${sendmail}" -t + + if [ $? -eq 0 ] + then + echo >&2 "${me}: Sent notification email for ${status} on '${chart}.${name}'" + return 0 + else + echo >&2 "${me}: FAILED to send notification email for ${status} on '${chart}.${name}'" + return 1 + fi +} + +name="${1}" # the name of the alarm, as given in netdata health.d entries +chart="${2}" # the name of the chart (type.id) +status="${3}" # the current status : UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL +old_status="${4}" # the previous status: UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL +value="${5}" # the current value +old_value="${6}" # the previous value +src="${7}" # the line number and file the alarm has been configured +duration="${8}" # the duration in seconds the previous state took +non_clear_duration="${9}" # the total duration in seconds this is non-clear + +# get the system hostname +hostname="$(hostname)" + +# get the current date +date="$(date)" + +duration4human() { + local s="${1}" d=0 h=0 m=0 ds="day" hs="hour" ms="minute" ss="second" + d=$(( s / 86400 )) + s=$(( s - (d * 86400) )) + h=$(( s / 3600 )) + s=$(( s - (h * 3600) )) + m=$(( s / 60 )) + s=$(( s - (m * 60) )) + + if [ ${d} -gt 0 ] + then + [ ${m} -ge 30 ] && h=$(( h + 1 )) + [ ${d} -gt 1 ] && ds="days" + [ ${h} -gt 1 ] && hs="hours" + if [ ${h} -gt 0 ] + then + echo "${d} ${ds} and ${h} ${hs}" + else + echo "${d} ${ds}" + fi + elif [ ${h} -gt 0 ] + then + [ ${s} -ge 30 ] && m=$(( m + 1 )) + [ ${h} -gt 1 ] && hs="hours" + [ ${m} -gt 1 ] && ms="minutes" + if [ ${m} -gt 0 ] + then + echo "${h} ${hs} and ${m} ${ms}" + else + echo "${h} ${hs}" + fi + elif [ ${m} -gt 0 ] + then + [ ${m} -gt 1 ] && ms="minutes" + [ ${s} -gt 1 ] && ss="seconds" + if [ ${s} -gt 0 ] + then + echo "${m} ${ms} and ${s} ${ss}" + else + echo "${m} ${ms}" + fi + else + [ ${s} -gt 1 ] && ss="seconds" + echo "${s} ${ss}" + fi +} + +severity="${status}" +raised_for="
(was ${old_status,,} for $(duration4human ${duration}))" +status_message="status unknown" +color="grey" +alarm="${name} = ${value}" + +# prepare the title based on status +case "${status}" in + CRITICAL) + status_message="is critical" + color="#ca414b" + ;; + + WARNING) + status_message="needs attention" + color="#caca4b" + ;; + + CLEAR) + status_message="recovered" + color="#77ca6d" + + # don't show the value when the status is CLEAR + # for certain alarms, this value might not have any meaning + alarm="${name}" + ;; +esac + +if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a "${status}" != "CLEAR" ] +then + # don't do anything if this is not WARNING, CRITICAL or CLEAR + echo >&2 "${me}: not sending notification email for ${status} on '${chart}.${name}'" + exit 0 +elif [ "${old_status}" != "WARNING" -a "${old_status}" != "CRITICAL" -a "${status}" = "CLEAR" ] +then + # don't do anything if this is CLEAR, but it was not WARNING or CRITICAL + echo >&2 "${me}: not sending notification email for ${status} on '${chart}.${name}' (last status was ${old_status})" + exit 0 +elif [ "${status}" = "CLEAR" ] +then + severity="Recovered from ${old_status}" + if [ $non_clear_duration > $duration ] + then + raised_for="
(had issues for $(duration4human ${non_clear_duration}))" + fi + +elif [ "${old_status}" = "WARNING" -a "${status}" = "CRITICAL" ] +then + severity="Escalated to ${status}" + if [ $non_clear_duration > $duration ] + then + raised_for="
(has issues for $(duration4human ${non_clear_duration}))" + fi + +elif [ "${old_status}" = "CRITICAL" -a "${status}" = "WARNING" ] +then + severity="Demoted to ${status}" + if [ $non_clear_duration > $duration ] + then + raised_for="
(has issues for $(duration4human ${non_clear_duration}))" + fi + +else + raised_for= +fi + +# send the email +cat < + + + + + + + + + +
+
+ + + + + + + + + + + + +
+
netdata notification
+
+

${hostname} ${status_message}

+
+
+ + + + + + + + + + + + + + + + + + + + + +
+ ${chart} + Chart +
+ ${alarm} + Alarm +
+ ${severity} + Severity +
${date} + ${raised_for} Time +
The source of this alarm is line ${src} +
Sent by + netdata, the real-time performance monitoring. +
+
+
+
+
+ + +EOF