]> arthur.barton.de Git - netdata.git/blob - plugins.d/alarm-notify.sh
add recipient email to log
[netdata.git] / plugins.d / alarm-notify.sh
1 #!/usr/bin/env bash
2
3 # (C) Costa Tsaousis
4 # pushover support by Jan Arnold
5
6 me="${0}"
7
8 # -----------------------------------------------------------------------------
9 # parse command line parameters
10
11 recipient="${1}"   # the recepient of the email
12 hostname="${2}"    # the hostname this event refers to
13 unique_id="${3}"   # the unique id of this event
14 alarm_id="${4}"    # the unique id of the alarm that generated this event
15 event_id="${5}"    # the incremental id of the event, for this alarm
16 when="${6}"        # the timestamp this event occured
17 name="${7}"        # the name of the alarm, as given in netdata health.d entries
18 chart="${8}"       # the name of the chart (type.id)
19 family="${9}"      # the family of the chart
20 status="${10}"     # the current status : UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
21 old_status="${11}" # the previous status: UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
22 value="${12}"      # the current value
23 old_value="${13}"  # the previous value
24 src="${14}"        # the line number and file the alarm has been configured
25 duration="${15}"   # the duration in seconds the previous state took
26 non_clear_duration="${16}" # the total duration in seconds this is non-clear
27 units="${17}"      # the units of the value
28 info="${18}"       # a short description of the alarm
29
30 # -----------------------------------------------------------------------------
31 # screen statuses we don't need to send a notification
32
33 # don't do anything if this is not WARNING, CRITICAL or CLEAR
34 if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a "${status}" != "CLEAR" ]
35 then
36     echo >&2 "${me}: not sending notification for ${status} on '${chart}.${name}'"
37     exit 1
38 fi
39
40 # don't do anything if this is CLEAR, but it was not WARNING or CRITICAL
41 if [ "${old_status}" != "WARNING" -a "${old_status}" != "CRITICAL" -a "${status}" = "CLEAR" ]
42 then
43     echo >&2 "${me}: not sending notification for ${status} on '${chart}.${name}' (last status was ${old_status})"
44     exit 1
45 fi
46
47 # -----------------------------------------------------------------------------
48 # load configuration
49
50 # needed commands
51 # if empty they will be searched in the system path
52 curl=
53 sendmail=
54
55 # enable / disable features
56 SEND_EMAIL="YES"
57 SEND_PUSHOVER="YES"
58
59 # pushover configs
60 PUSHOVER_APP_TOKEN=
61 DEFAULT_RECIPIENT_PUSHOVER=
62 declare -A role_recipients_pushover=()
63
64 # email configs
65 DEFAULT_RECIPIENT_EMAIL="root"
66 declare -A role_recipients_email=()
67
68 if [ -f "${NETDATA_CONFIG_DIR}/health_alarm_notify.conf" ]
69     then
70     source "${NETDATA_CONFIG_DIR}/health_alarm_notify.conf"
71 fi
72
73 # -----------------------------------------------------------------------------
74 # find the exact recipient per method
75
76 to_email="${role_recipients_email[${recipient}]}"
77 [ -z "${to_email}" ] && to_email="${DEFAULT_RECIPIENT_EMAIL}"
78 [ -z "${to_email}" ] && to_email="root"
79
80 to_pushover="${role_recipients_pushover[${recipient}]}"
81 [ -z "${to_pushover}" ] && to_pushover="${DEFAULT_RECIPIENT_EMAIL}"
82 [ -z "${to_pushover}" ] && SEND_PUSHOVER="NO"
83
84 # -----------------------------------------------------------------------------
85 # verify the delivery methods supported
86
87 [ -z "${PUSHOVER_APP_TOKEN}" ] && SEND_PUSHOVER="NO"
88
89 if [ "${SEND_PUSHOVER}" = "YES" -a -z "${curl}" ]
90     then
91     curl="$(which curl 2>/dev/null || command -v curl 2>/dev/null)"
92     [ -z "${curl}" ] && SEND_PUSHOVER="NO"
93 fi
94
95 if [ "${SEND_EMAIL}" = "YES" -a -z "${sendmail}" ]
96     then
97     sendmail="$(which sendmail 2>/dev/null || command -v sendmail 2>/dev/null)"
98     [ -z "${sendmail}" ] && SEND_EMAIL="NO"
99 fi
100
101 # check that we have at least a method enabled
102 if [ "${SEND_EMAIL}" != "YES" -a "${SEND_PUSHOVER}" != "YES" ]
103     then
104     echo >&2 "I don't have a means to send a notification. Sorry!"
105     exit 1
106 fi
107
108 # -----------------------------------------------------------------------------
109 # get the system hostname
110
111 [ -z "${hostname}" ] && hostname="${NETDATA_HOSTNAME}"
112 [ -z "${hostname}" ] && hostname="${NETDATA_REGISTRY_HOSTNAME}"
113 [ -z "${hostname}" ] && hostname="$(hostname 2>/dev/null)"
114
115 # -----------------------------------------------------------------------------
116 # get the date the alarm happened
117
118 date="$(date --date=@${when} 2>/dev/null)"
119 [ -z "${date}" ] && date="$(date 2>/dev/null)"
120
121 # -----------------------------------------------------------------------------
122 # convert a duration in seconds, to a human readable duration
123 # using DAYS, MINUTES, SECONDS
124
125 duration4human() {
126     local s="${1}" d=0 h=0 m=0 ds="day" hs="hour" ms="minute" ss="second"
127     d=$(( s / 86400 ))
128     s=$(( s - (d * 86400) ))
129     h=$(( s / 3600 ))
130     s=$(( s - (h * 3600) ))
131     m=$(( s / 60 ))
132     s=$(( s - (m * 60) ))
133
134     if [ ${d} -gt 0 ]
135     then
136         [ ${m} -ge 30 ] && h=$(( h + 1 ))
137         [ ${d} -gt 1 ] && ds="days"
138         [ ${h} -gt 1 ] && hs="hours"
139         if [ ${h} -gt 0 ]
140         then
141             echo "${d} ${ds} and ${h} ${hs}"
142         else
143             echo "${d} ${ds}"
144         fi
145     elif [ ${h} -gt 0 ]
146     then
147         [ ${s} -ge 30 ] && m=$(( m + 1 ))
148         [ ${h} -gt 1 ] && hs="hours"
149         [ ${m} -gt 1 ] && ms="minutes"
150         if [ ${m} -gt 0 ]
151         then
152             echo "${h} ${hs} and ${m} ${ms}"
153         else
154             echo "${h} ${hs}"
155         fi
156     elif [ ${m} -gt 0 ]
157     then
158         [ ${m} -gt 1 ] && ms="minutes"
159         [ ${s} -gt 1 ] && ss="seconds"
160         if [ ${s} -gt 0 ]
161         then
162             echo "${m} ${ms} and ${s} ${ss}"
163         else
164             echo "${m} ${ms}"
165         fi
166     else
167         [ ${s} -gt 1 ] && ss="seconds"
168         echo "${s} ${ss}"
169     fi
170 }
171
172 # -----------------------------------------------------------------------------
173 # email sender
174
175 send_email() {
176     if [ "${SEND_EMAIL}" = "YES" ]
177         then
178
179         "${sendmail}" -t
180
181         if [ $? -eq 0 ]
182         then
183             echo >&2 "${me}: Sent notification email for ${status} on '${chart}.${name}' to '${to_email}'"
184             return 0
185         else
186             echo >&2 "${me}: FAILED to send notification email for ${status} on '${chart}.${name}' to '${to_email}'"
187             return 1
188         fi
189     fi
190
191     return 1
192 }
193
194 # -----------------------------------------------------------------------------
195 # pushover sender
196
197 send_pushover() {
198     local apptoken="${1}" usertoken="${2}" when="${3}" url="${4}" status="${5}" title="${6}" message="${7}" httpcode sent=0 user priority
199
200     if [ "${SEND_PUSHOVER}" = "YES" -a ! -z "${apptoken}" -a ! -z "${usertoken}" -a ! -z "${title}" -a ! -z "${message}" ]
201         then
202
203         priority=0
204         [ "${status}" = "CRITICAL" ] && priority=1
205
206         for user in ${usertoken//,/ }
207         do
208             httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null \
209                 --form-string "token=${apptoken}" \
210                 --form-string "user=${user}" \
211                 --form-string "html=1" \
212                 --form-string "title=${title}" \
213                 --form-string "message=${message}" \
214                 --form-string "timestamp=${when}" \
215                 --form-string "url=${url}" \
216                 --form-string "url_title=Open netdata dashboard to view the alarm" \
217                 --form-string "priority=${priority}" \
218                 https://api.pushover.net/1/messages.json)
219
220             if [ "${httpcode}" == "200" ]
221             then
222                 echo >&2 "${me}: Sent notification push for ${status} on '${chart}.${name}' to '${user}'"
223                 sent=$((sent + 1))
224             else
225                 echo >&2 "${me}: FAILED to send notification push for ${status} on '${chart}.${name}' to '${user}' with HTTP error code ${httpcode}."
226             fi
227         done
228
229         [ ${sent} -gt 0 ] && return 0
230     fi
231
232     return 1
233 }
234
235
236 # -----------------------------------------------------------------------------
237 # prepare the content of the notification
238
239 # description of the alarm
240 [ ! -z "${info}" ] && info=" <small><br/>${info}</small>"
241
242 # the url to send the user on click
243 goto_url="${NETDATA_REGISTRY_URL}/goto-host-from-alarm.html?machine_guid=${NETDATA_REGISTRY_UNIQUE_ID}&chart=${chart}&family=${family}"
244
245 # the severity of the alarm
246 severity="${status}"
247
248 # the time the alarm was raised
249 raised_for="<br/><small>(was ${old_status,,} for $(duration4human ${duration}))</small>"
250
251 # the key status message
252 status_message="status unknown"
253
254 # the color of the alarm
255 color="grey"
256
257 # the alarm value
258 alarm="${name//_/ } = ${value} ${units}"
259
260 # prepare the title based on status
261 case "${status}" in
262         CRITICAL)
263         status_message="is critical"
264         color="#ca414b"
265         ;;
266
267     WARNING)
268         status_message="needs attention"
269         color="#caca4b"
270                 ;;
271
272         CLEAR)
273         status_message="recovered"
274                 color="#77ca6d"
275
276                 # don't show the value when the status is CLEAR
277                 # for certain alarms, this value might not have any meaning
278                 alarm="${name//_/ }"
279                 ;;
280 esac
281
282 if [ "${status}" = "CLEAR" ]
283 then
284     severity="Recovered from ${old_status}"
285     if [ $non_clear_duration -gt $duration ]
286     then
287         raised_for="<br/><small>(had issues for $(duration4human ${non_clear_duration}))</small>"
288     fi
289
290 elif [ "${old_status}" = "WARNING" -a "${status}" = "CRITICAL" ]
291 then
292     severity="Escalated to ${status}"
293     if [ $non_clear_duration -gt $duration ]
294     then
295         raised_for="<br/><small>(has issues for $(duration4human ${non_clear_duration}))</small>"
296     fi
297
298 elif [ "${old_status}" = "CRITICAL" -a "${status}" = "WARNING" ]
299 then
300     severity="Demoted to ${status}"
301     if [ $non_clear_duration -gt $duration ]
302     then
303         raised_for="<br/><small>(has issues for $(duration4human ${non_clear_duration}))</small>"
304     fi
305
306 else
307     raised_for=
308 fi
309
310
311 # -----------------------------------------------------------------------------
312 # send the pushover
313
314 send_pushover "${PUSHOVER_APP_TOKEN}" "${to_pushover}" "${when}" "${goto_url}" "${status}" "${hostname} ${status_message} - ${name//_/ } - ${chart}" "
315 <font color=\"${color}\"><b>${alarm}</b></font>${info}<br/>&nbsp;
316 <small><b>${chart}</b><br/>Chart<br/>&nbsp;</small>
317 <small><b>${family}</b><br/>Family<br/>&nbsp;</small>
318 <small><b>${severity}</b><br/>Severity<br/>&nbsp;</small>
319 <small><b>${date}${raised_for}</b><br/>Time<br/>&nbsp;</small>
320 <a href=\"${goto_url}\">View Netdata</a><br/>&nbsp;
321 <small><small>The source of this alarm is line ${src}</small></small>
322 "
323
324 SENT_PUSHOVER=$?
325
326 # -----------------------------------------------------------------------------
327 # send the email
328
329 cat <<EOF | send_email
330 To: ${to_email}
331 Subject: ${hostname} ${status_message} - ${name//_/ } - ${chart}
332 Content-Type: text/html
333
334 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
335 <html xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0; padding: 0;">
336 <body style="font-family:'Helvetica Neue','Helvetica',Helvetica,Arial,sans-serif;font-size:14px;width:100%!important;min-height:100%;line-height:1.6;background:#f6f6f6;margin:0;padding:0">
337 <table>
338     <tbody>
339     <tr>
340         <td style="vertical-align:top;" valign="top"></td>
341         <td width="700" style="vertical-align:top;display:block!important;max-width:700px!important;clear:both!important;margin:0 auto;padding:0" valign="top">
342             <div style="max-width:700px;display:block;margin:0 auto;padding:20px">
343                 <table width="100%" cellpadding="0" cellspacing="0" style="background:#fff;border:1px solid #e9e9e9">
344                     <tbody>
345                     <tr>
346                         <td bgcolor="#eee" style="padding: 5px 20px 5px 20px;background-color:#eee;">
347                             <div style="font-size:20px;color:#777;font-weight: bold;">netdata notification</div>
348                         </td>
349                     </tr>
350                     <tr>
351                         <td bgcolor="${color}" style="font-size:16px;vertical-align:top;font-weight:400;text-align:center;margin:0;padding:10px;color:#ffffff;background:${color}!important;border:1px solid ${color};border-top-color:${color}" align="center" valign="top">
352                             <h1 style="font-weight:400;margin:0">${hostname} ${status_message}</h1>
353                         </td>
354                     </tr>
355                     <tr>
356                         <td style="vertical-align:top" valign="top">
357                             <div style="margin:0;padding:20px;max-width:700px">
358                                 <table width="100%" cellpadding="0" cellspacing="0" style="max-width:700px">
359                                     <tbody>
360                                     <tr>
361                                         <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px" align="left" valign="top">
362                                             <span>${chart}</span>
363                                             <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Chart</span>
364                                         </td>
365                                     </tr>
366                                     <tr style="margin:0;padding:0">
367                                         <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px" align="left" valign="top">
368                                             <span><b>${alarm}</b>${info}</span>
369                                             <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Alarm</span>
370                                         </td>
371                                     </tr>
372                                     <tr>
373                                         <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px" align="left" valign="top">
374                                             <span>${family}</span>
375                                             <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Family</span>
376                                         </td>
377                                     </tr>
378                                     <tr style="margin:0;padding:0">
379                                         <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px" align="left" valign="top">
380                                             <span>${severity}</span>
381                                             <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Severity</span>
382                                         </td>
383                                     </tr>
384                                     <tr style="margin:0;padding:0">
385                                         <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px" align="left" valign="top"><span>${date}</span>
386                                             <span>${raised_for}</span> <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Time</span>
387                                         </td>
388                                     </tr>
389                                     <!--
390                                     <tr style="margin:0;padding:0">
391                                         <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px">
392                                             <a href="${goto_url}" style="font-size:14px;color:#ffffff;text-decoration:none;line-height:1.5;font-weight:bold;text-align:center;display:inline-block;text-transform:capitalize;background:#35568d;border-width:1px;border-style:solid;border-color:#2b4c86;margin:0;padding:10px 15px" target="_blank">View Netdata</a>
393                                         </td>
394                                     </tr>
395                                     -->
396                                     <tr style="text-align:center;margin:0;padding:0">
397                                         <td style="font-size:11px;vertical-align:top;margin:0;padding:10px 0 0 0;color:#666666" align="center" valign="bottom">The source of this alarm is line <code>${src}</code>
398                                         </td>
399                                     </tr>
400                                     <tr style="text-align:center;margin:0;padding:0">
401                                         <td style="font-size:12px;vertical-align:top;margin:0;padding:20px 0 0 0;color:#666666;border-top:1px solid #f0f0f0" align="center" valign="bottom">Sent by
402                                             <a href="https://mynetdata.io/" target="_blank">netdata</a>, the real-time performance monitoring.
403                                         </td>
404                                     </tr>
405                                     </tbody>
406                                 </table>
407                             </div>
408                         </td>
409                     </tr>
410                     </tbody>
411                 </table>
412             </div>
413         </td>
414     </tr>
415     </tbody>
416 </table>
417 </body>
418 </html>
419 EOF
420
421 SENT_EMAIL=$?
422
423 # -----------------------------------------------------------------------------
424 # let netdata know
425
426 # we did send somehting
427 [ ${SENT_EMAIL} -eq 0 -o ${SENT_PUSHOVER} -eq 0 ] && exit 0
428
429 # we did not send anything
430 exit 1