]> arthur.barton.de Git - netdata.git/commitdiff
Merge pull request #1652 from ktsaou/master
authorCosta Tsaousis <costa@tsaousis.gr>
Fri, 27 Jan 2017 18:38:53 +0000 (20:38 +0200)
committerGitHub <noreply@github.com>
Fri, 27 Jan 2017 18:38:53 +0000 (20:38 +0200)
added fping alarms and various aesthetic improvements to alarms

19 files changed:
conf.d/Makefile.am
conf.d/health.d/disks.conf
conf.d/health.d/fping.conf [new file with mode: 0644]
conf.d/health.d/memcached.conf
conf.d/health.d/mysql.conf
conf.d/health.d/net.conf
conf.d/health.d/tcp_resets.conf
conf.d/node.d/snmp.conf.md
configs.signatures
node.d/snmp.node.js
plugins.d/alarm-notify.sh
src/health.c
src/health.h
src/main.c
src/web_buffer_svg.c
src/web_buffer_svg.h
src/web_client.c
web/dashboard.js
web/index.html

index b725e249e8fe7c67e27496a7ac871bdf295f3fd0..e17d8fa929f547cddcb790218cd7472127c346b5 100644 (file)
@@ -64,6 +64,7 @@ dist_healthconfig_DATA = \
     health.d/disks.conf \
     health.d/elasticsearch.conf \
     health.d/entropy.conf \
+    health.d/fping.conf \
     health.d/haproxy.conf \
     health.d/ipc.conf \
     health.d/ipfs.conf \
index 0549bac268b51ea0034c1b8b6edc025d5d2a05d2..ff2d6a605ad3bdae4b9fa3e519356295bc7cb9f5 100644 (file)
@@ -88,7 +88,7 @@ families: *
 template: out_of_disk_space_time
       on: disk.space
 families: *
-    calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (0)
+    calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
    units: hours
    every: 10s
     warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
diff --git a/conf.d/health.d/fping.conf b/conf.d/health.d/fping.conf
new file mode 100644 (file)
index 0000000..69251b1
--- /dev/null
@@ -0,0 +1,53 @@
+
+template: fping_last_collected_secs
+families: *
+      on: fping.latency
+    calc: $now - $last_collected_t
+   units: seconds ago
+   every: 10s
+    warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+    crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+   delay: down 5m multiplier 1.5 max 1h
+    info: number of seconds since the last successful data collection
+      to: sysadmin
+
+template: host_reachable
+families: *
+      on: fping.latency
+    calc: $average != nan
+   units: up/down
+   every: 10s
+    crit: $this == 0
+    info: states if the remote host is reachable
+   delay: down 30m multiplier 1.5 max 2h
+      to: sysadmin
+
+template: host_latency
+families: *
+      on: fping.latency
+  lookup: average -10s unaligned of average
+   units: ms
+   every: 10s
+   green: 300
+     red: 1000
+    warn: $this > $green OR $max > $red
+    crit: $this > $red
+    info: average round trip delay during the last 10 seconds
+   delay: down 30m multiplier 1.5 max 2h
+      to: sysadmin
+
+template: packet_loss
+families: *
+      on: fping.quality
+  lookup: average -10m unaligned of returned
+    calc: 100 - $this
+   green: 1
+     red: 10
+   units: %
+   every: 10s
+    warn: $this > $green
+    crit: $this > $red
+    info: packet loss percentage
+   delay: down 30m multiplier 1.5 max 2h
+      to: sysadmin
+
index 7917e36afb61f031c91e1e52cc39ccccc4aa68f2..d248ef57a7204966a41ae9d2cf73f3dc917dddd3 100644 (file)
@@ -42,7 +42,7 @@ template: cache_fill_rate
 
 template: out_of_cache_space_time
       on: memcached.cache
-    calc: ($cache_fill_rate > 0) ? ($available / $cache_fill_rate) : (0)
+    calc: ($cache_fill_rate > 0) ? ($available / $cache_fill_rate) : (inf)
    units: hours
    every: 10s
     warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
index 78773e5b5b0b141489d334cbdbaf0b5bbe535b11..1eeb993f039a59742d9bfc9c551513d5b2d97878 100644 (file)
@@ -49,7 +49,7 @@ template: mysql_10s_table_locks_waited
 
 template: mysql_10s_waited_locks_ratio
       on: mysql.table_locks
-    calc: ($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)
+    calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
    units: %
    every: 10s
     warn: $this > (($status >= $WARNING)  ? (10) : (25))
@@ -65,7 +65,7 @@ template: mysql_10s_waited_locks_ratio
 template: mysql_replication
       on: mysql.slave_status
     calc: ($sql_running == -1 OR $io_running == -1)?0:1
-   units: status
+   units: ok/failed
    every: 10s
     crit: $this == 0
    delay: down 5m multiplier 1.5 max 1h
index 924acccc3d1b3d7f7e5fff3812854574b1379fba..cac0bbbfbe941741e495389c32a813194420bad2 100644 (file)
@@ -116,6 +116,7 @@ families: *
    units: %
    warn: $this > (($status >= $WARNING)?(200):(1000))
    crit: $this > (($status >= $WARNING)?(1000):(2000))
+options: no-clear-notification
    info: the % of the rate of received packets in the last 10 seconds, compared to the rate of the last minute
-     to: silent
+     to: sysadmin
 
index daf24a1cd6806866a86a5f3107876ed49727a6f9..49fb1b924c43eec838cc8b944619bbe91965f82b 100644 (file)
@@ -28,8 +28,9 @@
    every: 10s
     warn: $this > ((($1m_ipv4_tcp_resets_sent < 5)?(5):($1m_ipv4_tcp_resets_sent)) * (($status >= $WARNING)  ? (1) : (4)))
    delay: up 0 down 60m multiplier 1.2 max 2h
+options: no-clear-notification
     info: average TCP RESETS this host is sending, over the last 10 seconds (this can be an indication that a port scan is made, or that a service running on this host has crashed)
-      to: silent
+      to: sysadmin
 
 # -----------------------------------------------------------------------------
 # tcp resets this host receives
@@ -48,5 +49,6 @@
    every: 10s
     warn: $this > ((($1m_ipv4_tcp_resets_received < 5)?(5):($1m_ipv4_tcp_resets_received)) * (($status >= $WARNING)  ? (1) : (4)))
    delay: up 0 down 60m multiplier 1.2 max 2h
+options: no-clear-notification
     info: average TCP RESETS this host is receiving, over the last 10 seconds (this can be an indication that a service this host needs, has crashed)
-      to: silent
+      to: sysadmin
index bae5bf2078a3104ab4b9793d01ef1ce364c416d6..6b496f7a87eae47b55c1d0e55b60e94193ca19b6 100644 (file)
-# SNMP Data Collector\r
-\r
-Using this collector, netdata can collect data from any SNMP device.\r
-\r
-This collector supports:\r
-\r
-- any number of SNMP devices\r
-- each SNMP device can be used to collect data for any number of charts\r
-- each chart may have any number of dimensions\r
-- each SNMP device may have a different update frequency\r
-- each SNMP device will accept one or more batches to report values (you can set `max_request_size` per SNMP server, to control the size of batches).\r
-\r
-The source code of the plugin is [here](https://github.com/firehol/netdata/blob/master/node.d/snmp.node.js).\r
-\r
-## Configuration\r
-\r
-You will need to create the file `/etc/netdata/node.d/snmp.conf` with data like the following.\r
-\r
-In this example:\r
-\r
- - the SNMP device is `10.11.12.8`.\r
- - the SNMP community is `public`.\r
- - we will update the values every 10 seconds (`update_every: 10` under the server `10.11.12.8`).\r
- - we define 2 charts `snmp_switch.bandwidth_port1` and `snmp_switch.bandwidth_port2`, each having 2 dimensions: `in` and `out`.\r
-\r
-```js\r
-{\r
-    "enable_autodetect": false,\r
-    "update_every": 5,\r
-    "max_request_size": 100,\r
-    "servers": [\r
-        {\r
-            "hostname": "10.11.12.8",\r
-            "community": "public",\r
-            "update_every": 10,\r
-            "max_request_size": 50,\r
-            "options": { "timeout": 10000 },\r
-            "charts": {\r
-                "snmp_switch.bandwidth_port1": {\r
-                    "title": "Switch Bandwidth for port 1",\r
-                    "units": "kilobits/s",\r
-                    "type": "area",\r
-                    "priority": 1,\r
-                    "family": "ports",\r
-                    "dimensions": {\r
-                        "in": {\r
-                            "oid": "1.3.6.1.2.1.2.2.1.10.1",\r
-                            "algorithm": "incremental",\r
-                            "multiplier": 8,\r
-                            "divisor": 1024\r
-                        },\r
-                        "out": {\r
-                            "oid": "1.3.6.1.2.1.2.2.1.16.1",\r
-                            "algorithm": "incremental",\r
-                            "multiplier": -8,\r
-                            "divisor": 1024\r
-                        }\r
-                    }\r
-                },\r
-                "snmp_switch.bandwidth_port2": {\r
-                    "title": "Switch Bandwidth for port 2",\r
-                    "units": "kilobits/s",\r
-                    "type": "area",\r
-                    "priority": 1,\r
-                    "family": "ports",\r
-                    "dimensions": {\r
-                        "in": {\r
-                            "oid": "1.3.6.1.2.1.2.2.1.10.2",\r
-                            "algorithm": "incremental",\r
-                            "multiplier": 8,\r
-                            "divisor": 1024\r
-                        },\r
-                        "out": {\r
-                            "oid": "1.3.6.1.2.1.2.2.1.16.2",\r
-                            "algorithm": "incremental",\r
-                            "multiplier": -8,\r
-                            "divisor": 1024\r
-                        }\r
-                    }\r
-                }\r
-            }\r
-        }\r
-    ]\r
-}\r
-```\r
-\r
-`update_every` is the update frequency for each server, in seconds.\r
-\r
-`max_request_size` limits the maximum number of OIDs that will be requested in a single call. The default is 50. Lower this number of you get `TooBig` errors in netdata error.log.\r
-\r
-`family` sets the name of the submenu of the dashboard each chart will appear under.\r
-\r
-If you need to define many charts using incremental OIDs, you can use something like this:\r
-\r
-This is like the previous, but the option `multiply_range` given, will multiply the current chart from `1` to `24` inclusive, producing 24 charts in total for the 24 ports of the switch `10.11.12.8`.\r
-\r
-Each of the 24 new charts will have its id (1-24) appended at:\r
-\r
-1. its chart unique id, i.e. `snmp_switch.bandwidth_port1` to `snmp_switch.bandwidth_port24`\r
-2. its `title`, i.e. `Switch Bandwidth for port 1` to `Switch Bandwidth for port 24`\r
-3. its `oid` (for all dimensions), i.e. dimension `in` will be `1.3.6.1.2.1.2.2.1.10.1` to `1.3.6.1.2.1.2.2.1.10.24`\r
-3. its priority (which will be incremented for each chart so that the charts will appear on the dashboard in this order)\r
-\r
-```js\r
-{\r
-    "enable_autodetect": false,\r
-    "update_every": 10,\r
-    "servers": [\r
-        {\r
-            "hostname": "10.11.12.8",\r
-            "community": "public",\r
-            "update_every": 10,\r
-            "options": { "timeout": 20000 },\r
-            "charts": {\r
-                "snmp_switch.bandwidth_port": {\r
-                    "title": "Switch Bandwidth for port ",\r
-                    "units": "kilobits/s",\r
-                    "type": "area",\r
-                    "priority": 1,\r
-                    "family": "ports",\r
-                    "multiply_range": [ 1, 24 ],\r
-                    "dimensions": {\r
-                        "in": {\r
-                            "oid": "1.3.6.1.2.1.2.2.1.10.",\r
-                            "algorithm": "incremental",\r
-                            "multiplier": 8,\r
-                            "divisor": 1024\r
-                        },\r
-                        "out": {\r
-                            "oid": "1.3.6.1.2.1.2.2.1.16.",\r
-                            "algorithm": "incremental",\r
-                            "multiplier": -8,\r
-                            "divisor": 1024\r
-                        }\r
-                    }\r
-                }\r
-            }\r
-        }\r
-    ]\r
-}\r
-```\r
-\r
-The `options` given for each server, are:\r
-\r
- - `timeout`, the time to wait for the SNMP device to respond. The default is 5000 ms.\r
- - `version`, the SNMP version to use. `0` is Version 1, `1` is Version 2c. The default is Version 1 (`0`).\r
- - `transport`, the default is `udp4`.\r
- - `port`, the port of the SNMP device to connect to. The default is `161`.\r
- - `retries`, the number of attempts to make to fetch the data. The default is `1`.\r
-\r
-## Retreiving names from snmp\r
-\r
-You can append a value retrieved from SNMP to the title, by adding `titleoid` to the chart.\r
-\r
-You can set a dimension name to a value retrieved from SNMP, by adding `oidname` to the dimension.\r
-\r
-Both of the above will participate in `multiply_range`.\r
-\r
-\r
-## Testing the configuration\r
-\r
-To test it, you can run:\r
-\r
-```sh\r
-/usr/libexec/netdata/plugins.d/node.d.plugin 1 snmp\r
-```\r
-\r
-The above will run it on your console and you will be able to see what netdata sees, but also errors. You can get a very detailed output by appending `debug` to the command line.\r
-\r
-If it works, restart netdata to activate the snmp collector and refresh the dashboard (if your SNMP device responds with a delay, you may need to refresh the dashboard in a few seconds).\r
-\r
-## Data collection speed\r
-\r
-Keep in mind that many SNMP switches are routers are very slow. They may not be able to report values per second. If you run `node.d.plugin` in `debug` mode, it will report the time it took for the SNMP device to respond. My switch, for example, needs 7-8 seconds to respond for the traffic on 24 ports (48 OIDs, in/out).\r
-\r
-Also, if you use many SNMP clients on the same SNMP device at the same time, values may be skipped. This is a problem of the SNMP device, not this collector.\r
-\r
-## Finding OIDs\r
-\r
-Use `snmpwalk`, like this:\r
-\r
-```sh\r
-snmpwalk -t 20 -v 1 -O fn -c public 10.11.12.8\r
-```\r
-\r
-- `-t 20` is the timeout in seconds\r
-- `-v 1` is the SNMP version\r
-- `-O fn` will display full OIDs in numeric format (you may want to run it also without this option to see human readable output of OIDs)\r
-- `-c public` is the SNMP community\r
-- `10.11.12.8` is the SNMP device\r
-\r
-Keep in mind that `snmpwalk` outputs the OIDs with a dot in front them. You should remove this dot when adding OIDs to the configuration file of this collector.\r
-\r
-## Example: Linksys SRW2024P\r
-\r
-This is what I use for my Linksys SRW2024P. It creates:\r
-\r
-1. A chart for power consumption (it is a PoE switch)\r
-2. Two charts for packets received (total packets received and packets received with errors)\r
-3. One chart for packets output\r
-4. 24 charts, one for each port of the switch. It also appends the port names, as defined at the switch, to the chart titles.\r
-\r
-This switch also reports various other metrics, like snmp, packets per port, etc. Unfortunately it does not report CPU utilization or backplane utilization.\r
-\r
-This switch has a very slow SNMP processors. To respond, it needs about 8 seconds, so I have set the refresh frequency (`update_every`) to 15 seconds.\r
-\r
-```js\r
-{\r
-        "enable_autodetect": false,\r
-        "update_every": 5,\r
-        "servers": [\r
-                {\r
-                        "hostname": "10.11.12.8",\r
-                        "community": "public",\r
-                        "update_every": 15,\r
-                        "options": { "timeout": 20000, "version": 1 },\r
-                        "charts": {\r
-                                "snmp_switch.power": {\r
-                                        "title": "Switch Power Supply",\r
-                                        "units": "watts",\r
-                                        "type": "line",\r
-                                        "priority": 10,\r
-                                        "family": "power",\r
-                                        "dimensions": {\r
-                                                "supply": {\r
-                                                        "oid": ".1.3.6.1.2.1.105.1.3.1.1.2.1",\r
-                                                        "algorithm": "absolute",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                },\r
-                                                "used": {\r
-                                                        "oid": ".1.3.6.1.2.1.105.1.3.1.1.4.1",\r
-                                                        "algorithm": "absolute",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                        }\r
-                                }\r
-                                , "snmp_switch.input": {\r
-                                        "title": "Switch Packets Input",\r
-                                        "units": "packets/s",\r
-                                        "type": "area",\r
-                                        "priority": 20,\r
-                                        "family": "IP",\r
-                                        "dimensions": {\r
-                                                "receives": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.3.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                                , "discards": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.8.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                        }\r
-                                }\r
-                                , "snmp_switch.input_errors": {\r
-                                        "title": "Switch Received Packets with Errors",\r
-                                        "units": "packets/s",\r
-                                        "type": "line",\r
-                                        "priority": 30,\r
-                                        "family": "IP",\r
-                                        "dimensions": {\r
-                                                "bad_header": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.4.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                                , "bad_address": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.5.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                                , "unknown_protocol": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.7.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                        }\r
-                                }\r
-                                , "snmp_switch.output": {\r
-                                        "title": "Switch Output Packets",\r
-                                        "units": "packets/s",\r
-                                        "type": "line",\r
-                                        "priority": 40,\r
-                                        "family": "IP",\r
-                                        "dimensions": {\r
-                                                "requests": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.10.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": 1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                                , "discards": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.11.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": -1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                                , "no_route": {\r
-                                                        "oid": ".1.3.6.1.2.1.4.12.0",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": -1,\r
-                                                        "divisor": 1\r
-                                                }\r
-                                        }\r
-                                }\r
-                                , "snmp_switch.bandwidth_port": {\r
-                                        "title": "Switch Bandwidth for port ",\r
-                                        "titleoid": ".1.3.6.1.2.1.31.1.1.1.18.",\r
-                                        "units": "kilobits/s",\r
-                                        "type": "area",\r
-                                        "priority": 100,\r
-                                        "family": "ports",\r
-                                        "multiply_range": [ 1, 24 ],\r
-                                        "dimensions": {\r
-                                                "in": {\r
-                                                        "oid": ".1.3.6.1.2.1.2.2.1.10.",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": 8,\r
-                                                        "divisor": 1024\r
-                                                }\r
-                                                , "out": {\r
-                                                        "oid": ".1.3.6.1.2.1.2.2.1.16.",\r
-                                                        "algorithm": "incremental",\r
-                                                        "multiplier": -8,\r
-                                                        "divisor": 1024\r
-                                                }\r
-                                        }\r
-                                }\r
-                        }\r
-                }\r
-        ]\r
-}\r
-```\r
+# SNMP Data Collector
+
+Using this collector, netdata can collect data from any SNMP device.
+
+This collector supports:
+
+- any number of SNMP devices
+- each SNMP device can be used to collect data for any number of charts
+- each chart may have any number of dimensions
+- each SNMP device may have a different update frequency
+- each SNMP device will accept one or more batches to report values (you can set `max_request_size` per SNMP server, to control the size of batches).
+
+The source code of the plugin is [here](https://github.com/firehol/netdata/blob/master/node.d/snmp.node.js).
+
+## Configuration
+
+You will need to create the file `/etc/netdata/node.d/snmp.conf` with data like the following.
+
+In this example:
+
+ - the SNMP device is `10.11.12.8`.
+ - the SNMP community is `public`.
+ - we will update the values every 10 seconds (`update_every: 10` under the server `10.11.12.8`).
+ - we define 2 charts `snmp_switch.bandwidth_port1` and `snmp_switch.bandwidth_port2`, each having 2 dimensions: `in` and `out`.
+
+```js
+{
+    "enable_autodetect": false,
+    "update_every": 5,
+    "max_request_size": 100,
+    "servers": [
+        {
+            "hostname": "10.11.12.8",
+            "community": "public",
+            "update_every": 10,
+            "max_request_size": 50,
+            "options": { "timeout": 10000 },
+            "charts": {
+                "snmp_switch.bandwidth_port1": {
+                    "title": "Switch Bandwidth for port 1",
+                    "units": "kilobits/s",
+                    "type": "area",
+                    "priority": 1,
+                    "family": "ports",
+                    "dimensions": {
+                        "in": {
+                            "oid": "1.3.6.1.2.1.2.2.1.10.1",
+                            "algorithm": "incremental",
+                            "multiplier": 8,
+                            "divisor": 1024,
+                            "offset": 0
+                        },
+                        "out": {
+                            "oid": "1.3.6.1.2.1.2.2.1.16.1",
+                            "algorithm": "incremental",
+                            "multiplier": -8,
+                            "divisor": 1024,
+                            "offset": 0
+                        }
+                    }
+                },
+                "snmp_switch.bandwidth_port2": {
+                    "title": "Switch Bandwidth for port 2",
+                    "units": "kilobits/s",
+                    "type": "area",
+                    "priority": 1,
+                    "family": "ports",
+                    "dimensions": {
+                        "in": {
+                            "oid": "1.3.6.1.2.1.2.2.1.10.2",
+                            "algorithm": "incremental",
+                            "multiplier": 8,
+                            "divisor": 1024,
+                            "offset": 0
+                        },
+                        "out": {
+                            "oid": "1.3.6.1.2.1.2.2.1.16.2",
+                            "algorithm": "incremental",
+                            "multiplier": -8,
+                            "divisor": 1024,
+                            "offset": 0
+                        }
+                    }
+                }
+            }
+        }
+    ]
+}
+```
+
+`update_every` is the update frequency for each server, in seconds.
+
+`max_request_size` limits the maximum number of OIDs that will be requested in a single call. The default is 50. Lower this number of you get `TooBig` errors in netdata error.log.
+
+`family` sets the name of the submenu of the dashboard each chart will appear under.
+
+If you need to define many charts using incremental OIDs, you can use something like this:
+
+This is like the previous, but the option `multiply_range` given, will multiply the current chart from `1` to `24` inclusive, producing 24 charts in total for the 24 ports of the switch `10.11.12.8`.
+
+Each of the 24 new charts will have its id (1-24) appended at:
+
+1. its chart unique id, i.e. `snmp_switch.bandwidth_port1` to `snmp_switch.bandwidth_port24`
+2. its `title`, i.e. `Switch Bandwidth for port 1` to `Switch Bandwidth for port 24`
+3. its `oid` (for all dimensions), i.e. dimension `in` will be `1.3.6.1.2.1.2.2.1.10.1` to `1.3.6.1.2.1.2.2.1.10.24`
+3. its priority (which will be incremented for each chart so that the charts will appear on the dashboard in this order)
+
+```js
+{
+    "enable_autodetect": false,
+    "update_every": 10,
+    "servers": [
+        {
+            "hostname": "10.11.12.8",
+            "community": "public",
+            "update_every": 10,
+            "options": { "timeout": 20000 },
+            "charts": {
+                "snmp_switch.bandwidth_port": {
+                    "title": "Switch Bandwidth for port ",
+                    "units": "kilobits/s",
+                    "type": "area",
+                    "priority": 1,
+                    "family": "ports",
+                    "multiply_range": [ 1, 24 ],
+                    "dimensions": {
+                        "in": {
+                            "oid": "1.3.6.1.2.1.2.2.1.10.",
+                            "algorithm": "incremental",
+                            "multiplier": 8,
+                            "divisor": 1024,
+                            "offset": 0
+                        },
+                        "out": {
+                            "oid": "1.3.6.1.2.1.2.2.1.16.",
+                            "algorithm": "incremental",
+                            "multiplier": -8,
+                            "divisor": 1024,
+                            "offset": 0
+                        }
+                    }
+                }
+            }
+        }
+    ]
+}
+```
+
+The `options` given for each server, are:
+
+ - `timeout`, the time to wait for the SNMP device to respond. The default is 5000 ms.
+ - `version`, the SNMP version to use. `0` is Version 1, `1` is Version 2c. The default is Version 1 (`0`).
+ - `transport`, the default is `udp4`.
+ - `port`, the port of the SNMP device to connect to. The default is `161`.
+ - `retries`, the number of attempts to make to fetch the data. The default is `1`.
+
+## Retreiving names from snmp
+
+You can append a value retrieved from SNMP to the title, by adding `titleoid` to the chart.
+
+You can set a dimension name to a value retrieved from SNMP, by adding `oidname` to the dimension.
+
+Both of the above will participate in `multiply_range`.
+
+
+## Testing the configuration
+
+To test it, you can run:
+
+```sh
+/usr/libexec/netdata/plugins.d/node.d.plugin 1 snmp
+```
+
+The above will run it on your console and you will be able to see what netdata sees, but also errors. You can get a very detailed output by appending `debug` to the command line.
+
+If it works, restart netdata to activate the snmp collector and refresh the dashboard (if your SNMP device responds with a delay, you may need to refresh the dashboard in a few seconds).
+
+## Data collection speed
+
+Keep in mind that many SNMP switches are routers are very slow. They may not be able to report values per second. If you run `node.d.plugin` in `debug` mode, it will report the time it took for the SNMP device to respond. My switch, for example, needs 7-8 seconds to respond for the traffic on 24 ports (48 OIDs, in/out).
+
+Also, if you use many SNMP clients on the same SNMP device at the same time, values may be skipped. This is a problem of the SNMP device, not this collector.
+
+## Finding OIDs
+
+Use `snmpwalk`, like this:
+
+```sh
+snmpwalk -t 20 -v 1 -O fn -c public 10.11.12.8
+```
+
+- `-t 20` is the timeout in seconds
+- `-v 1` is the SNMP version
+- `-O fn` will display full OIDs in numeric format (you may want to run it also without this option to see human readable output of OIDs)
+- `-c public` is the SNMP community
+- `10.11.12.8` is the SNMP device
+
+Keep in mind that `snmpwalk` outputs the OIDs with a dot in front them. You should remove this dot when adding OIDs to the configuration file of this collector.
+
+## Example: Linksys SRW2024P
+
+This is what I use for my Linksys SRW2024P. It creates:
+
+1. A chart for power consumption (it is a PoE switch)
+2. Two charts for packets received (total packets received and packets received with errors)
+3. One chart for packets output
+4. 24 charts, one for each port of the switch. It also appends the port names, as defined at the switch, to the chart titles.
+
+This switch also reports various other metrics, like snmp, packets per port, etc. Unfortunately it does not report CPU utilization or backplane utilization.
+
+This switch has a very slow SNMP processors. To respond, it needs about 8 seconds, so I have set the refresh frequency (`update_every`) to 15 seconds.
+
+```js
+{
+    "enable_autodetect": false,
+    "update_every": 5,
+    "servers": [
+    {
+        "hostname": "10.11.12.8",
+        "community": "public",
+        "update_every": 15,
+        "options": { "timeout": 20000, "version": 1 },
+        "charts": {
+            "snmp_switch.power": {
+                "title": "Switch Power Supply",
+                "units": "watts",
+                "type": "line",
+                "priority": 10,
+                "family": "power",
+                "dimensions": {
+                    "supply": {
+                        "oid": ".1.3.6.1.2.1.105.1.3.1.1.2.1",
+                        "algorithm": "absolute",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    },
+                    "used": {
+                        "oid": ".1.3.6.1.2.1.105.1.3.1.1.4.1",
+                        "algorithm": "absolute",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                }
+            }
+            , "snmp_switch.input": {
+                "title": "Switch Packets Input",
+                "units": "packets/s",
+                "type": "area",
+                "priority": 20,
+                "family": "IP",
+                "dimensions": {
+                    "receives": {
+                        "oid": ".1.3.6.1.2.1.4.3.0",
+                        "algorithm": "incremental",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                    , "discards": {
+                        "oid": ".1.3.6.1.2.1.4.8.0",
+                        "algorithm": "incremental",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                }
+            }
+            , "snmp_switch.input_errors": {
+                "title": "Switch Received Packets with Errors",
+                "units": "packets/s",
+                "type": "line",
+                "priority": 30,
+                "family": "IP",
+                "dimensions": {
+                    "bad_header": {
+                        "oid": ".1.3.6.1.2.1.4.4.0",
+                        "algorithm": "incremental",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                    , "bad_address": {
+                        "oid": ".1.3.6.1.2.1.4.5.0",
+                        "algorithm": "incremental",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                    , "unknown_protocol": {
+                        "oid": ".1.3.6.1.2.1.4.7.0",
+                        "algorithm": "incremental",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                }
+            }
+            , "snmp_switch.output": {
+                "title": "Switch Output Packets",
+                "units": "packets/s",
+                "type": "line",
+                "priority": 40,
+                "family": "IP",
+                "dimensions": {
+                    "requests": {
+                        "oid": ".1.3.6.1.2.1.4.10.0",
+                        "algorithm": "incremental",
+                        "multiplier": 1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                    , "discards": {
+                        "oid": ".1.3.6.1.2.1.4.11.0",
+                        "algorithm": "incremental",
+                        "multiplier": -1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                    , "no_route": {
+                        "oid": ".1.3.6.1.2.1.4.12.0",
+                        "algorithm": "incremental",
+                        "multiplier": -1,
+                        "divisor": 1,
+                        "offset": 0
+                    }
+                }
+            }
+            , "snmp_switch.bandwidth_port": {
+                "title": "Switch Bandwidth for port ",
+                "titleoid": ".1.3.6.1.2.1.31.1.1.1.18.",
+                "units": "kilobits/s",
+                "type": "area",
+                "priority": 100,
+                "family": "ports",
+                "multiply_range": [ 1, 24 ],
+                "dimensions": {
+                    "in": {
+                        "oid": ".1.3.6.1.2.1.2.2.1.10.",
+                        "algorithm": "incremental",
+                        "multiplier": 8,
+                        "divisor": 1024,
+                        "offset": 0
+                    }
+                    , "out": {
+                        "oid": ".1.3.6.1.2.1.2.2.1.16.",
+                        "algorithm": "incremental",
+                        "multiplier": -8,
+                        "divisor": 1024,
+                        "offset": 0
+                    }
+                }
+            }
+        }
+    }
+    ]
+}
+```
index fbae919c49f8de0481e31a799c8d87f679c92440..817c38902f86dc6cb0112e0aa8ee39199ca5055d 100644 (file)
@@ -37,6 +37,7 @@ declare -A configs_signatures=(
   ['18ee1c6197a4381b1c1631ef6129824f']='apps_groups.conf'
   ['1972e48345e6c3f0d65f94a03317622b']='health_alarm_notify.conf'
   ['1c12b678ab65f271a96da1bbd0a1ab1c']='health.d/softnet.conf'
+  ['1c3168c95b53e999df3d45162b3f50b8']='health.d/fping.conf'
   ['1ea8e8ef1fa8a3a0fcdfba236f4cb195']='python.d/mysql.conf'
   ['1ef0fd38e7969c023bc3fa6d89eaf6d6']='python.d/mdstat.conf'
   ['1f5545b3ff52b3eb75ee05401f67a9bc']='fping.conf'
@@ -67,8 +68,10 @@ declare -A configs_signatures=(
   ['312b4b8e2805e19cf9be554b319567d6']='health.d/softnet.conf'
   ['318bb45755726a25120bb33413d4b582']='health.d/net.conf'
   ['325617412a628e3bc776e3fbb777a2a6']='health.d/redis.conf'
+  ['326e1477131e0f73304711135f70a2a5']='health.d/memcached.conf'
   ['32fde0057c790964f2c743cb3c9aad29']='health.d/nginx.conf'
   ['33b135e28aeaef2b8224ba69a0fde245']='health.d/cpu.conf'
+  ['343bc919a2fbc93f687f9d1339ec5f79']='health.d/net.conf'
   ['3634d5eddc46fb0d50cf47f370670c2c']='health.d/redis.conf'
   ['364b6e0081b116c9ec073b4d329a6dcc']='health_alarm_notify.conf'
   ['367d1463e520eb9dc89223bab161c6d1']='python.d/postgres.conf'
@@ -111,6 +114,7 @@ declare -A configs_signatures=(
   ['4b775fb31342f1478b3773d041a72911']='python.d.conf'
   ['4ccb06fff1ce06dc5bc80e0a9f568f6e']='charts.d.conf'
   ['4d13684cadfa90e73ab465409bf7263b']='health.d/mysql.conf'
+  ['4d91ee6fe4c887ea3865ef36ac63da3c']='health.d/mysql.conf'
   ['4e995acb0d6fd77403a2a9dca984b55b']='charts.d.conf'
   ['4f6a5b47a13f5912cc89e9286701dd08']='health.d/redis.conf'
   ['4f6f4d39c19d7d954f769d3f9d3b4da5']='health.d/memcached.conf'
@@ -218,6 +222,7 @@ declare -A configs_signatures=(
   ['8c1d41e2c88aeca78bc319ed74c8748c']='python.d/phpfpm.conf'
   ['8d0552371a7c9725a04196fa560813d1']='health.d/cpu.conf'
   ['8dc0bd0a70b5117454bd5f5b98f91c2c']='health.d/disks.conf'
+  ['8f4f925c1e97dd164007495ec5135ffc']='health.d/fping.conf'
   ['8fd472a854b0996327e8ed3562161182']='health_alarm_notify.conf'
   ['919911d13901d60a7580f5dfd7fc87bb']='health.d/ram.conf'
   ['91c757ef6be3abdb86906d9dbb9c217a']='fping.conf'
@@ -231,10 +236,12 @@ declare -A configs_signatures=(
   ['99c1617448abbdc493976ab9bda5ce02']='apps_groups.conf'
   ['9a8a459a3841b78d4c6ef07428ad2fe1']='health.d/entropy.conf'
   ['9c0185ceff15415bc59b2ce2c1f04367']='apps_groups.conf'
+  ['9c8ddfa810d83ae58c8614ee5229e66b']='health.d/disks.conf'
   ['9c981c75bdf4b1637f7113e7e45eb2bf']='health.d/memcached.conf'
   ['9e0553ebdc21b64295873fc104cfa79d']='python.d.conf'
   ['9eb3326ae2ee9badeaad31d8dd2eaa2b']='python.d/isc_dhcpd.conf'
   ['a02d14124b19c635c1426cee2e98bac5']='charts.d.conf'
+  ['a03f3e38378385bf87d4c0f81eb1f108']='health.d/tcp_resets.conf'
   ['a09714b5942cf25a89ec3da1dbc18063']='health.d/ram.conf'
   ['a0b3a12389c9c56dfe35964b20b59836']='health.d/bind_rndc.conf'
   ['a0ee8f351f213c0e8af9eb7a4a09cb95']='apps_groups.conf'
@@ -334,6 +341,7 @@ declare -A configs_signatures=(
   ['de02f899a61f21b86adb646940f0bcae']='health.d/net.conf'
   ['def883f35986c9d25de63b1a8e7d0f46']='health.d/entropy.conf'
   ['df381f3a7ca9fb2b4b43ae7cb7a4c492']='python.d/mysql.conf'
+  ['df7e8044902b5e155fad8430c2ddcfa8']='health.d/fping.conf'
   ['dfd5431b11cf2f3852a40d390c1d5a92']='python.d/varnish.conf'
   ['e0242003fd2e3f9ac1b9314e802ada79']='python.d/hddtemp.conf'
   ['e0e96cc47ed61d6492416be5236cd4d3']='python.d/apache_cache.conf'
index 5a478937e99e56e8516cd4e80033cc20aac2abcc..c0974bda6df38c68518f24320dc924446e6fd397 100644 (file)
@@ -1,6 +1,7 @@
 'use strict';
-
+// netdata snmp module
 // This program will connect to one or more SNMP Agents
+//
 
 // example configuration in /etc/netdata/node.d/snmp.conf
 /*
                             "oid": ".1.3.6.1.2.1.2.2.1.10.1",
                             "algorithm": "incremental",
                             "multiplier": 8,
-                            "divisor": 1024
+                            "divisor": 1024,
+                            "offset": 0
                         },
                         "out": {
                             "oid": ".1.3.6.1.2.1.2.2.1.16.1",
                             "algorithm": "incremental",
                             "multiplier": -8,
-                            "divisor": 1024
+                            "divisor": 1024,
+                            "offset": 0
                         }
                     }
                 },
                             "oid": ".1.3.6.1.2.1.2.2.1.10.2",
                             "algorithm": "incremental",
                             "multiplier": 8,
-                            "divisor": 1024
+                            "divisor": 1024,
+                             "offset": 0
                         },
                         "out": {
                             "oid": ".1.3.6.1.2.1.2.2.1.16.2",
                             "algorithm": "incremental",
                             "multiplier": -8,
-                            "divisor": 1024
+                            "divisor": 1024,
+                            "offset": 0
                         }
                     }
                 }
                             "oid": ".1.3.6.1.2.1.2.2.1.10.",
                             "algorithm": "incremental",
                             "multiplier": 8,
-                            "divisor": 1024
+                            "divisor": 1024,
+                            "offset": 0
                         },
                         "out": {
                             "oid": ".1.3.6.1.2.1.2.2.1.16.",
                             "algorithm": "incremental",
                             "multiplier": -8,
-                            "divisor": 1024
+                            "divisor": 1024,
+                            "offset": 0
                         }
                     }
                 }
@@ -360,8 +367,12 @@ var snmp = {
                 for(var j = 0; j < dim_keys_len ; j++) {
                     var d = dim_keys[j];
 
-                    if (dimensions[d].value !== null)
-                        service.set(d, dimensions[d].value);
+                    if (dimensions[d].value !== null) {
+                        if(typeof dimensions[d].offset === 'number')
+                            service.set(d, dimensions[d].value + dimensions[d].offset);
+                        else
+                            service.set(d, dimensions[d].value);
+                    }
                 }
 
                 service.end();
index d6f3d8b2ad11f04361d95437143aa59dc36ec00d..7ef9957cf00639077809b80f434715a2770cc264 100755 (executable)
@@ -46,7 +46,7 @@ then
         echo >&2
         echo >&2 "# SENDING TEST ${x} ALARM TO ROLE: ${recipient}"
 
-        "${0}" "${recipient}" "$(hostname)" 1 1 "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" 100 90 "${0}" 1 $((0 + id)) "units" "this is a test alarm to verify notifications work"
+        "${0}" "${recipient}" "$(hostname)" 1 1 "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" 100 90 "${0}" 1 $((0 + id)) "units" "this is a test alarm to verify notifications work" "new value" "old value"
         if [ $? -ne 0 ]
         then
             echo >&2 "# FAILED"
@@ -138,6 +138,8 @@ duration="${15}"   # the duration in seconds of the previous alarm state
 non_clear_duration="${16}" # the total duration in seconds this is/was non-clear
 units="${17}"      # the units of the value
 info="${18}"       # a short description of the alarm
+value_string="${19}"        # friendly value (with units)
+old_value_string="${20}"    # friendly old value (with units)
 
 # -----------------------------------------------------------------------------
 # screen statuses we don't need to send a notification
@@ -747,13 +749,13 @@ send_pd() {
         then
         for PD_SERVICE_KEY in ${recipients}
         do
-            d="${status} ${name}=${value} ${units} - ${host}, ${family}"
+            d="${status} ${name} = ${value_string} - ${host}, ${family}"
             ${pd_send} -k ${PD_SERVICE_KEY} \
                        -t ${t} \
                        -d "${d}" \
                        -i ${alarm_id} \
                        -f 'info'="${info}" \
-                       -f 'value_w_units'="${value} ${units}" \
+                       -f 'value_w_units'="${value_string}" \
                        -f 'when'="${when}" \
                        -f 'duration'="${duration}" \
                        -f 'roles'="${roles}" \
@@ -1034,7 +1036,7 @@ status_message="status unknown"
 color="grey"
 
 # the alarm value
-alarm="${name//_/ } = ${value} ${units}"
+alarm="${name//_/ } = ${value_string}"
 
 # the image of the alarm
 image="${images_base_url}/images/seo-performance-128.png"
index 193312eec3af855c2587e15a6dc93d485138b24f..7f9be80b18002ae3690d2e17ba01fea622df6c5e 100755 (executable)
@@ -251,34 +251,34 @@ static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char
             ae->exec_run_timestamp      = (uint32_t)strtoul(pointers[11], NULL, 16);
             ae->delay_up_to_timestamp   = (uint32_t)strtoul(pointers[12], NULL, 16);
 
-            if(unlikely(ae->name)) freez(ae->name);
+            freez(ae->name);
             ae->name = strdupz(pointers[13]);
             ae->hash_name = simple_hash(ae->name);
 
-            if(unlikely(ae->chart)) freez(ae->chart);
+            freez(ae->chart);
             ae->chart = strdupz(pointers[14]);
             ae->hash_chart = simple_hash(ae->chart);
 
-            if(unlikely(ae->family)) freez(ae->family);
+            freez(ae->family);
             ae->family = strdupz(pointers[15]);
 
-            if(unlikely(ae->exec)) freez(ae->exec);
+            freez(ae->exec);
             ae->exec = strdupz(pointers[16]);
             if(!*ae->exec) { freez(ae->exec); ae->exec = NULL; }
 
-            if(unlikely(ae->recipient)) freez(ae->recipient);
+            freez(ae->recipient);
             ae->recipient = strdupz(pointers[17]);
             if(!*ae->recipient) { freez(ae->recipient); ae->recipient = NULL; }
 
-            if(unlikely(ae->source)) freez(ae->source);
+            freez(ae->source);
             ae->source = strdupz(pointers[18]);
             if(!*ae->source) { freez(ae->source); ae->source = NULL; }
 
-            if(unlikely(ae->units)) freez(ae->units);
+            freez(ae->units);
             ae->units = strdupz(pointers[19]);
             if(!*ae->units) { freez(ae->units); ae->units = NULL; }
 
-            if(unlikely(ae->info)) freez(ae->info);
+            freez(ae->info);
             ae->info = strdupz(pointers[20]);
             if(!*ae->info) { freez(ae->info); ae->info = NULL; }
 
@@ -290,6 +290,12 @@ static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char
             ae->new_value   = str2l(pointers[25]);
             ae->old_value   = str2l(pointers[26]);
 
+            static char value_string[100 + 1];
+            freez(ae->old_value_string);
+            freez(ae->new_value_string);
+            ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1));
+            ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1));
+
             // add it to host if not already there
             if(unlikely(*pointers[0] == 'A')) {
                 ae->next = host->health_log.alarms;
@@ -353,17 +359,26 @@ static inline void health_alarm_log_load(RRDHOST *host) {
 // ----------------------------------------------------------------------------
 // health alarm log management
 
-static inline void health_alarm_log(RRDHOST *host,
-                uint32_t alarm_id, uint32_t alarm_event_id,
-                time_t when,
-                const char *name, const char *chart, const char *family,
-                const char *exec, const char *recipient, time_t duration,
-                calculated_number old_value, calculated_number new_value,
-                int old_status, int new_status,
-                const char *source,
-                const char *units,
-                const char *info,
-                int delay
+static inline void health_alarm_log(
+        RRDHOST *host,
+        uint32_t alarm_id,
+        uint32_t alarm_event_id,
+        time_t when,
+        const char *name,
+        const char *chart,
+        const char *family,
+        const char *exec,
+        const char *recipient,
+        time_t duration,
+        calculated_number old_value,
+        calculated_number new_value,
+        int old_status,
+        int new_status,
+        const char *source,
+        const char *units,
+        const char *info,
+        int delay,
+        uint32_t flags
 ) {
     debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
 
@@ -391,12 +406,19 @@ static inline void health_alarm_log(RRDHOST *host,
     ae->when = when;
     ae->old_value = old_value;
     ae->new_value = new_value;
+
+    static char value_string[100 + 1];
+    ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1));
+    ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1));
+
     ae->old_status = old_status;
     ae->new_status = new_status;
     ae->duration = duration;
     ae->delay = delay;
     ae->delay_up_to_timestamp = when + delay;
 
+    ae->flags |= flags;
+
     if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
         ae->non_clear_duration += ae->duration;
 
@@ -1095,7 +1117,27 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
 
     {
         time_t now = now_realtime_sec();
-        health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_UNINITIALIZED, rc->source, rc->units, rc->info, 0);
+        health_alarm_log(
+                st->rrdhost,
+                rc->id,
+                rc->next_event_id++,
+                now,
+                rc->name,
+                rc->rrdset->id,
+                rc->rrdset->family,
+                rc->exec,
+                rc->recipient,
+                now - rc->last_status_change,
+                rc->old_value,
+                rc->value,
+                rc->status,
+                RRDCALC_STATUS_UNINITIALIZED,
+                rc->source,
+                rc->units,
+                rc->info,
+                0,
+                0
+        );
     }
 }
 
@@ -1133,7 +1175,27 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) {
 
     {
         time_t now = now_realtime_sec();
-        health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_REMOVED, rc->source, rc->units, rc->info, 0);
+        health_alarm_log(
+                st->rrdhost,
+                rc->id,
+                rc->next_event_id++,
+                now,
+                rc->name,
+                rc->rrdset->id,
+                rc->rrdset->family,
+                rc->exec,
+                rc->recipient,
+                now - rc->last_status_change,
+                rc->old_value,
+                rc->value,
+                rc->status,
+                RRDCALC_STATUS_REMOVED,
+                rc->source,
+                rc->units,
+                rc->info,
+                0,
+                0
+        );
     }
 
     RRDHOST *host = st->rrdhost;
@@ -1472,6 +1534,7 @@ static inline void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt) {
 #define HEALTH_UNITS_KEY "units"
 #define HEALTH_INFO_KEY "info"
 #define HEALTH_DELAY_KEY "delay"
+#define HEALTH_OPTIONS_KEY "options"
 
 static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
     if(!rc->chart) {
@@ -1702,6 +1765,35 @@ static inline int health_parse_delay(
     return 1;
 }
 
+static inline uint32_t health_parse_options(const char *s) {
+    uint32_t options = 0;
+    char buf[100+1] = "";
+
+    while(*s) {
+        buf[0] = '\0';
+
+        // skip spaces
+        while(*s && isspace(*s))
+            s++;
+
+        // find the next space
+        size_t count = 0;
+        while(*s && count < 100 && !isspace(*s))
+            buf[count++] = *s++;
+
+        if(buf[0]) {
+            buf[count] = '\0';
+
+            if(!strcasecmp(buf, "no-clear-notification") || !strcasecmp(buf, "no-clear"))
+                options |= RRDCALC_FLAG_NO_CLEAR_NOTIFICATION;
+            else
+                error("Ignoring unknown alarm option '%s'", buf);
+        }
+    }
+
+    return options;
+}
+
 static inline int health_parse_db_lookup(
         size_t line, const char *path, const char *file, char *string,
         int *group_method, int *after, int *before, int *every,
@@ -1830,7 +1922,25 @@ static inline void strip_quotes(char *s) {
 int health_readfile(const char *path, const char *filename) {
     debug(D_HEALTH, "Health configuration reading file '%s/%s'", path, filename);
 
-    static uint32_t hash_alarm = 0, hash_template = 0, hash_on = 0, hash_families = 0, hash_calc = 0, hash_green = 0, hash_red = 0, hash_warn = 0, hash_crit = 0, hash_exec = 0, hash_every = 0, hash_lookup = 0, hash_units = 0, hash_info = 0, hash_recipient = 0, hash_delay = 0;
+    static uint32_t
+            hash_alarm = 0,
+            hash_template = 0,
+            hash_on = 0,
+            hash_families = 0,
+            hash_calc = 0,
+            hash_green = 0,
+            hash_red = 0,
+            hash_warn = 0,
+            hash_crit = 0,
+            hash_exec = 0,
+            hash_every = 0,
+            hash_lookup = 0,
+            hash_units = 0,
+            hash_info = 0,
+            hash_recipient = 0,
+            hash_delay = 0,
+            hash_options = 0;
+
     char buffer[HEALTH_CONF_MAX_LINE + 1];
 
     if(unlikely(!hash_alarm)) {
@@ -1850,6 +1960,7 @@ int health_readfile(const char *path, const char *filename) {
         hash_info = simple_hash(HEALTH_INFO_KEY);
         hash_recipient = simple_hash(HEALTH_RECIPIENT_KEY);
         hash_delay = simple_uhash(HEALTH_DELAY_KEY);
+        hash_options = simple_uhash(HEALTH_OPTIONS_KEY);
     }
 
     snprintfz(buffer, HEALTH_CONF_MAX_LINE, "%s/%s", path, filename);
@@ -2062,6 +2173,9 @@ int health_readfile(const char *path, const char *filename) {
             else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
                 health_parse_delay(line, path, filename, value, &rc->delay_up_duration, &rc->delay_down_duration, &rc->delay_max_duration, &rc->delay_multiplier);
             }
+            else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+                rc->options |= health_parse_options(value);
+            }
             else {
                 error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has unknown key '%s'.",
                      line, path, filename, rc->name, key);
@@ -2183,6 +2297,9 @@ int health_readfile(const char *path, const char *filename) {
             else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
                 health_parse_delay(line, path, filename, value, &rt->delay_up_duration, &rt->delay_down_duration, &rt->delay_max_duration, &rt->delay_multiplier);
             }
+            else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+                rt->options |= health_parse_options(value);
+            }
             else {
                 error("Health configuration at line %zu of file '%s/%s' for template '%s' has unknown key '%s'.",
                       line, path, filename, rt->name, key);
@@ -2305,61 +2422,70 @@ static inline void health_string2json(BUFFER *wb, const char *prefix, const char
 }
 
 static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
-    buffer_sprintf(wb, "\n\t{\n"
-                           "\t\t\"hostname\": \"%s\",\n"
-                           "\t\t\"unique_id\": %u,\n"
-                           "\t\t\"alarm_id\": %u,\n"
-                           "\t\t\"alarm_event_id\": %u,\n"
-                           "\t\t\"name\": \"%s\",\n"
-                           "\t\t\"chart\": \"%s\",\n"
-                           "\t\t\"family\": \"%s\",\n"
-                           "\t\t\"processed\": %s,\n"
-                           "\t\t\"updated\": %s,\n"
-                           "\t\t\"exec_run\": %lu,\n"
-                           "\t\t\"exec_failed\": %s,\n"
-                           "\t\t\"exec\": \"%s\",\n"
-                           "\t\t\"recipient\": \"%s\",\n"
-                           "\t\t\"exec_code\": %d,\n"
-                           "\t\t\"source\": \"%s\",\n"
-                           "\t\t\"units\": \"%s\",\n"
-                           "\t\t\"info\": \"%s\",\n"
-                           "\t\t\"when\": %lu,\n"
-                           "\t\t\"duration\": %lu,\n"
-                           "\t\t\"non_clear_duration\": %lu,\n"
-                           "\t\t\"status\": \"%s\",\n"
-                           "\t\t\"old_status\": \"%s\",\n"
-                           "\t\t\"delay\": %d,\n"
-                           "\t\t\"delay_up_to_timestamp\": %lu,\n"
-                           "\t\t\"updated_by_id\": %u,\n"
-                           "\t\t\"updates_id\": %u,\n",
-                   host->hostname,
-                   ae->unique_id,
-                   ae->alarm_id,
-                   ae->alarm_event_id,
-                   ae->name,
-                   ae->chart,
-                   ae->family,
-                   (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false",
-                   (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false",
-                   (unsigned long)ae->exec_run_timestamp,
-                   (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false",
-                   ae->exec?ae->exec:health.health_default_exec,
-                   ae->recipient?ae->recipient:health.health_default_recipient,
-                   ae->exec_code,
-                   ae->source,
-                   ae->units?ae->units:"",
-                   ae->info?ae->info:"",
-                   (unsigned long)ae->when,
-                   (unsigned long)ae->duration,
-                   (unsigned long)ae->non_clear_duration,
-                   rrdcalc_status2string(ae->new_status),
-                   rrdcalc_status2string(ae->old_status),
-                   ae->delay,
-                   (unsigned long)ae->delay_up_to_timestamp,
-                   ae->updated_by_id,
-                   ae->updates_id
+    buffer_sprintf(wb,
+            "\n\t{\n"
+                    "\t\t\"hostname\": \"%s\",\n"
+                    "\t\t\"unique_id\": %u,\n"
+                    "\t\t\"alarm_id\": %u,\n"
+                    "\t\t\"alarm_event_id\": %u,\n"
+                    "\t\t\"name\": \"%s\",\n"
+                    "\t\t\"chart\": \"%s\",\n"
+                    "\t\t\"family\": \"%s\",\n"
+                    "\t\t\"processed\": %s,\n"
+                    "\t\t\"updated\": %s,\n"
+                    "\t\t\"exec_run\": %lu,\n"
+                    "\t\t\"exec_failed\": %s,\n"
+                    "\t\t\"exec\": \"%s\",\n"
+                    "\t\t\"recipient\": \"%s\",\n"
+                    "\t\t\"exec_code\": %d,\n"
+                    "\t\t\"source\": \"%s\",\n"
+                    "\t\t\"units\": \"%s\",\n"
+                    "\t\t\"info\": \"%s\",\n"
+                    "\t\t\"when\": %lu,\n"
+                    "\t\t\"duration\": %lu,\n"
+                    "\t\t\"non_clear_duration\": %lu,\n"
+                    "\t\t\"status\": \"%s\",\n"
+                    "\t\t\"old_status\": \"%s\",\n"
+                    "\t\t\"delay\": %d,\n"
+                    "\t\t\"delay_up_to_timestamp\": %lu,\n"
+                    "\t\t\"updated_by_id\": %u,\n"
+                    "\t\t\"updates_id\": %u,\n"
+                    "\t\t\"value_string\": \"%s\",\n"
+                    "\t\t\"old_value_string\": \"%s\",\n"
+            , host->hostname
+            , ae->unique_id
+            , ae->alarm_id
+            , ae->alarm_event_id
+            , ae->name
+            , ae->chart
+            , ae->family
+            , (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false"
+            , (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false"
+            , (unsigned long)ae->exec_run_timestamp
+            , (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false"
+            , ae->exec?ae->exec:health.health_default_exec
+            , ae->recipient?ae->recipient:health.health_default_recipient
+            , ae->exec_code
+            , ae->source
+            , ae->units?ae->units:""
+            , ae->info?ae->info:""
+            , (unsigned long)ae->when
+            , (unsigned long)ae->duration
+            , (unsigned long)ae->non_clear_duration
+            , rrdcalc_status2string(ae->new_status)
+            , rrdcalc_status2string(ae->old_status)
+            , ae->delay
+            , (unsigned long)ae->delay_up_to_timestamp
+            , ae->updated_by_id
+            , ae->updates_id
+            , ae->new_value_string
+            , ae->old_value_string
     );
 
+    if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
+        buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
+    }
+
     buffer_strcat(wb, "\t\t\"value\":");
     buffer_rrd_value(wb, ae->new_value);
     buffer_strcat(wb, ",\n");
@@ -2415,30 +2541,34 @@ static inline void health_rrdcalc2json_nolock(BUFFER *wb, RRDCALC *rc) {
                    "\t\t\t\"delay_multiplier\": %f,\n"
                    "\t\t\t\"delay\": %d,\n"
                    "\t\t\t\"delay_up_to_timestamp\": %lu,\n"
-            , rc->chart, rc->name
-            , (unsigned long)rc->id
-            , rc->name
-            , rc->chart
-            , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
-            , (rc->rrdset)?"true":"false"
-            , rc->exec?rc->exec:health.health_default_exec
-            , rc->recipient?rc->recipient:health.health_default_recipient
-            , rc->source
-            , rc->units?rc->units:""
-            , rc->info?rc->info:""
-            , rrdcalc_status2string(rc->status)
-            , (unsigned long)rc->last_status_change
-            , (unsigned long)rc->last_updated
-            , (unsigned long)rc->next_update
-            , rc->update_every
-            , rc->delay_up_duration
-            , rc->delay_down_duration
-            , rc->delay_max_duration
-            , rc->delay_multiplier
-            , rc->delay_last
-            , (unsigned long)rc->delay_up_to_timestamp
+           , rc->chart, rc->name
+           , (unsigned long)rc->id
+           , rc->name
+           , rc->chart
+           , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
+           , (rc->rrdset)?"true":"false"
+           , rc->exec?rc->exec:health.health_default_exec
+           , rc->recipient?rc->recipient:health.health_default_recipient
+           , rc->source
+           , rc->units?rc->units:""
+           , rc->info?rc->info:""
+           , rrdcalc_status2string(rc->status)
+           , (unsigned long)rc->last_status_change
+           , (unsigned long)rc->last_updated
+           , (unsigned long)rc->next_update
+           , rc->update_every
+           , rc->delay_up_duration
+           , rc->delay_down_duration
+           , rc->delay_max_duration
+           , rc->delay_multiplier
+           , rc->delay_last
+           , (unsigned long)rc->delay_up_to_timestamp
     );
 
+    if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {
+        buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
+    }
+
     if(RRDCALC_HAS_DB_LOOKUP(rc)) {
         if(rc->dimensions && *rc->dimensions)
             health_string2json(wb, "\t\t\t", "lookup_dimensions", rc->dimensions, ",\n");
@@ -2601,12 +2731,21 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
 
     if(unlikely(ae->new_status < RRDCALC_STATUS_CLEAR)) {
         // do not send notifications for internal statuses
+        debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (internal statuses)", ae->chart, ae->name, rrdcalc_status2string(ae->new_status));
+        goto done;
+    }
+
+    if(unlikely(ae->new_status <= RRDCALC_STATUS_CLEAR && (ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
+        // do not send notifications for disabled statuses
+        debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (it has no-clear-notification enabled)", ae->chart, ae->name, rrdcalc_status2string(ae->new_status));
+        // mark it as run, so that we will send the same alarm if it happens again
         goto done;
     }
 
     // find the previous notification for the same alarm
     // which we have run the exec script
-    {
+    // exception: alarms with HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION set
+    if(likely(!(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
         uint32_t id = ae->alarm_id;
         ALARM_ENTRY *t;
         for(t = ae->next; t ; t = t->next) {
@@ -2643,7 +2782,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
     const char *recipient = ae->recipient;
     if(!recipient) recipient = health.health_default_recipient;
 
-    snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s'",
+    snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s' '%s' '%s'",
               exec,
               recipient,
               host->hostname,
@@ -2662,7 +2801,9 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
               (uint32_t)ae->duration,
               (uint32_t)ae->non_clear_duration,
               ae->units?ae->units:"",
-              ae->info?ae->info:""
+              ae->info?ae->info:"",
+              ae->new_value_string,
+              ae->old_value_string
     );
 
     ae->flags |= HEALTH_ENTRY_FLAG_EXEC_RUN;
@@ -2754,6 +2895,8 @@ static inline void health_alarm_log_process(RRDHOST *host) {
         freez(ae->source);
         freez(ae->units);
         freez(ae->info);
+        freez(ae->old_value_string);
+        freez(ae->new_value_string);
         freez(ae);
 
         ae = t;
@@ -3083,7 +3226,27 @@ void *health_main(void *ptr) {
 
                     rc->delay_last = delay;
                     rc->delay_up_to_timestamp = now + delay;
-                    health_alarm_log(&localhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info, rc->delay_last);
+                    health_alarm_log(
+                            &localhost,
+                            rc->id,
+                            rc->next_event_id++,
+                            now,
+                            rc->name,
+                            rc->rrdset->id,
+                            rc->rrdset->family,
+                            rc->exec,
+                            rc->recipient,
+                            now - rc->last_status_change,
+                            rc->old_value,
+                            rc->value,
+                            rc->status,
+                            status,
+                            rc->source,
+                            rc->units,
+                            rc->info,
+                            rc->delay_last,
+                            (rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)?HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION:0
+                    );
                     rc->last_status_change = now;
                     rc->status = status;
                 }
index 79831d4fc563257a24ce26cd7e9f42bad1036649..87f8a1a18e547e2665cd5b007e3483dfcb361a06 100644 (file)
@@ -119,13 +119,14 @@ typedef struct rrddimvar {
 #define RRDCALC_STATUS_WARNING        3
 #define RRDCALC_STATUS_CRITICAL       4
 
-#define RRDCALC_FLAG_DB_ERROR      0x00000001
-#define RRDCALC_FLAG_DB_NAN        0x00000002
-/* #define RRDCALC_FLAG_DB_STALE      0x00000004 */
-#define RRDCALC_FLAG_CALC_ERROR    0x00000008
-#define RRDCALC_FLAG_WARN_ERROR    0x00000010
-#define RRDCALC_FLAG_CRIT_ERROR    0x00000020
-#define RRDCALC_FLAG_RUNNABLE      0x00000040
+#define RRDCALC_FLAG_DB_ERROR              0x00000001
+#define RRDCALC_FLAG_DB_NAN                0x00000002
+/* #define RRDCALC_FLAG_DB_STALE           0x00000004 */
+#define RRDCALC_FLAG_CALC_ERROR            0x00000008
+#define RRDCALC_FLAG_WARN_ERROR            0x00000010
+#define RRDCALC_FLAG_CRIT_ERROR            0x00000020
+#define RRDCALC_FLAG_RUNNABLE              0x00000040
+#define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
 
 typedef struct rrdcalc {
     uint32_t id;                    // the unique id of this alarm
@@ -274,11 +275,12 @@ typedef struct rrdcalctemplate {
 
 #define RRDCALCTEMPLATE_HAS_CALCULATION(rt) ((rt)->after)
 
-#define HEALTH_ENTRY_FLAG_PROCESSED    0x00000001
-#define HEALTH_ENTRY_FLAG_UPDATED      0x00000002
-#define HEALTH_ENTRY_FLAG_EXEC_RUN     0x00000004
-#define HEALTH_ENTRY_FLAG_EXEC_FAILED  0x00000008
-#define HEALTH_ENTRY_FLAG_SAVED        0x10000000
+#define HEALTH_ENTRY_FLAG_PROCESSED             0x00000001
+#define HEALTH_ENTRY_FLAG_UPDATED               0x00000002
+#define HEALTH_ENTRY_FLAG_EXEC_RUN              0x00000004
+#define HEALTH_ENTRY_FLAG_EXEC_FAILED           0x00000008
+#define HEALTH_ENTRY_FLAG_SAVED                 0x10000000
+#define HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
 
 typedef struct alarm_entry {
     uint32_t unique_id;
@@ -308,6 +310,10 @@ typedef struct alarm_entry {
 
     calculated_number old_value;
     calculated_number new_value;
+
+    char *old_value_string;
+    char *new_value_string;
+
     int old_status;
     int new_status;
 
index 7232bdb1d0edcf52e3d75ed49bac73b84e22f55a..fb547e440526a1489b60365203416323c134ccb0 100644 (file)
@@ -552,6 +552,10 @@ int main(int argc, char **argv)
         if(!p) p = "/bin:/usr/bin";
         snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin");
         setenv("PATH", config_get("plugins", "PATH environment variable", path), 1);
+
+        p = getenv("PYTHONPATH");
+        if(!p) p = "";
+        setenv("PYTHONPATH", config_get("plugins", "PYTHONPATH environment variable", p), 1);
     }
 
     char *user = NULL;
index 0f8f3d7dac8a2829c841e96b770d7a1b95eef8f7..bdc1608ed2889087d90f214d41e417d12bf4c6c3 100644 (file)
@@ -368,10 +368,226 @@ cleanup:
     return len - i;
 }
 
-static inline const char *fix_units(const char *units) {
-    if(!units || !*units || !strcmp(units, "empty") || !strcmp(units, "null")) return "";
-    if(!strcmp(units, "percentage") || !strcmp(units, "percent") || !strcmp(units, "pcent")) return "%";
-    return units;
+static inline char *format_value_with_precision_and_unit(char *value_string, size_t value_string_len, calculated_number value, const char *units, int precision) {
+    if(unlikely(isnan(value) || isinf(value)))
+        value = 0.0;
+
+    char *separator = "";
+    if(unlikely(isalnum(*units)))
+        separator = " ";
+
+    if(precision < 0) {
+        int len, lstop = 0, trim_zeros = 1;
+
+        calculated_number abs = value;
+        if(isless(value, 0)) {
+            lstop = 1;
+            abs = -value;
+        }
+
+        if(isgreaterequal(abs, 1000)) {
+            len = snprintfz(value_string, value_string_len, "%0.0Lf", (long double) value);
+            trim_zeros = 0;
+        }
+        else if(isgreaterequal(abs, 100)) len = snprintfz(value_string, value_string_len, "%0.1Lf", (long double) value);
+        else if(isgreaterequal(abs, 1))   len = snprintfz(value_string, value_string_len, "%0.2Lf", (long double) value);
+        else if(isgreaterequal(abs, 0.1)) len = snprintfz(value_string, value_string_len, "%0.3Lf", (long double) value);
+        else                              len = snprintfz(value_string, value_string_len, "%0.4Lf", (long double) value);
+
+        if(unlikely(trim_zeros)) {
+            int l;
+            // remove trailing zeros from the decimal part
+            for(l = len - 1; l > lstop; l--) {
+                if(likely(value_string[l] == '0')) {
+                    value_string[l] = '\0';
+                    len--;
+                }
+
+                else if(unlikely(value_string[l] == '.')) {
+                    value_string[l] = '\0';
+                    len--;
+                    break;
+                }
+
+                else
+                    break;
+            }
+        }
+
+        if(unlikely(len <= 0)) len = 1;
+        snprintfz(&value_string[len], value_string_len - len, "%s%s", separator, units);
+    }
+    else {
+        if(precision > 50) precision = 50;
+        snprintfz(value_string, value_string_len, "%0.*Lf%s%s", precision, (long double) value, separator, units);
+    }
+
+    return value_string;
+}
+
+inline char *format_value_and_unit(char *value_string, size_t value_string_len, calculated_number value, const char *units, int precision) {
+    static uint32_t
+            hash_seconds = 0,
+            hash_seconds_ago = 0,
+            hash_minutes = 0,
+            hash_minutes_ago = 0,
+            hash_hours = 0,
+            hash_hours_ago = 0,
+            hash_onoff = 0,
+            hash_updown = 0,
+            hash_okerror = 0,
+            hash_okfailed = 0,
+            hash_empty = 0,
+            hash_null = 0,
+            hash_percentage = 0,
+            hash_percent = 0,
+            hash_pcent = 0;
+
+    if(unlikely(!hash_seconds)) {
+        hash_seconds     = simple_hash("seconds");
+        hash_seconds_ago = simple_hash("seconds ago");
+        hash_minutes     = simple_hash("minutes");
+        hash_minutes_ago = simple_hash("minutes ago");
+        hash_hours       = simple_hash("hours");
+        hash_hours_ago   = simple_hash("hours ago");
+        hash_onoff       = simple_hash("on/off");
+        hash_updown      = simple_hash("up/down");
+        hash_okerror     = simple_hash("ok/error");
+        hash_okfailed    = simple_hash("ok/failed");
+        hash_empty       = simple_hash("empty");
+        hash_null        = simple_hash("null");
+        hash_percentage  = simple_hash("percentage");
+        hash_percent     = simple_hash("percent");
+        hash_pcent       = simple_hash("pcent");
+    }
+
+    if(unlikely(!units)) units = "";
+
+    uint32_t hash_units = simple_hash(units);
+
+    if(unlikely((hash_units == hash_seconds && !strcmp(units, "seconds")) || (hash_units == hash_seconds_ago && !strcmp(units, "seconds ago")))) {
+        if(value == 0.0) {
+            snprintfz(value_string, value_string_len, "%s", "now");
+            return value_string;
+        }
+        else if(isnan(value) || isinf(value)) {
+            snprintfz(value_string, value_string_len, "%s", "never");
+            return value_string;
+        }
+
+        const char *suffix = (hash_units == hash_seconds_ago)?" ago":"";
+
+        size_t s = (size_t)value;
+        size_t d = s / 86400;
+        s = s % 86400;
+
+        size_t h = s / 3600;
+        s = s % 3600;
+
+        size_t m = s / 60;
+        s = s % 60;
+
+        if(d)
+            snprintfz(value_string, value_string_len, "%zu %s %02zu:%02zu:%02zu%s", d, (d == 1)?"day":"days", h, m, s, suffix);
+        else
+            snprintfz(value_string, value_string_len, "%02zu:%02zu:%02zu%s", h, m, s, suffix);
+
+        return value_string;
+    }
+
+    else if(unlikely((hash_units == hash_minutes && !strcmp(units, "minutes")) || (hash_units == hash_minutes_ago && !strcmp(units, "minutes ago")))) {
+        if(value == 0.0) {
+            snprintfz(value_string, value_string_len, "%s", "now");
+            return value_string;
+        }
+        else if(isnan(value) || isinf(value)) {
+            snprintfz(value_string, value_string_len, "%s", "never");
+            return value_string;
+        }
+
+        const char *suffix = (hash_units == hash_minutes_ago)?" ago":"";
+
+        size_t m = (size_t)value;
+        size_t d = m / (60 * 24);
+        m = m % (60 * 24);
+
+        size_t h = m / 60;
+        m = m % 60;
+
+        if(d)
+            snprintfz(value_string, value_string_len, "%zud %02zuh %02zum%s", d, h, m, suffix);
+        else
+            snprintfz(value_string, value_string_len, "%zuh %zum%s", h, m, suffix);
+
+        return value_string;
+    }
+
+    else if(unlikely((hash_units == hash_hours && !strcmp(units, "hours")) || (hash_units == hash_hours_ago && !strcmp(units, "hours ago")))) {
+        if(value == 0.0) {
+            snprintfz(value_string, value_string_len, "%s", "now");
+            return value_string;
+        }
+        else if(isnan(value) || isinf(value)) {
+            snprintfz(value_string, value_string_len, "%s", "never");
+            return value_string;
+        }
+
+        const char *suffix = (hash_units == hash_hours_ago)?" ago":"";
+
+        size_t h = (size_t)value;
+        size_t d = h / 24;
+        h = h % 24;
+
+        if(d)
+            snprintfz(value_string, value_string_len, "%zud %zuh%s", d, h, suffix);
+        else
+            snprintfz(value_string, value_string_len, "%zuh%s", h, suffix);
+
+        return value_string;
+    }
+
+    else if(unlikely(hash_units == hash_onoff && !strcmp(units, "on/off"))) {
+        snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"on":"off");
+        return value_string;
+    }
+
+    else if(unlikely(hash_units == hash_updown && !strcmp(units, "up/down"))) {
+        snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"up":"down");
+        return value_string;
+    }
+
+    else if(unlikely(hash_units == hash_okerror && !strcmp(units, "ok/error"))) {
+        snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"ok":"error");
+        return value_string;
+    }
+
+    else if(unlikely(hash_units == hash_okfailed && !strcmp(units, "ok/failed"))) {
+        snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"ok":"failed");
+        return value_string;
+    }
+
+    else if(unlikely(hash_units == hash_empty && !strcmp(units, "empty")))
+        units = "";
+
+    else if(unlikely(hash_units == hash_null && !strcmp(units, "null")))
+        units = "";
+
+    else if(unlikely(hash_units == hash_percentage && !strcmp(units, "percentage")))
+        units = "%";
+
+    else if(unlikely(hash_units == hash_percent && !strcmp(units, "percent")))
+        units = "%";
+
+    else if(unlikely(hash_units == hash_pcent && !strcmp(units, "pcent")))
+        units = "%";
+
+
+    if(unlikely(isnan(value) || isinf(value))) {
+        strcpy(value_string, "-");
+        return value_string;
+    }
+
+    return format_value_with_precision_and_unit(value_string, value_string_len, value, units, precision);
 }
 
 static inline const char *color_map(const char *color) {
@@ -391,7 +607,13 @@ static inline const char *color_map(const char *color) {
     return color;
 }
 
-static inline void calc_colorz(const char *color, char *final, size_t len, calculated_number value, int value_is_null) {
+static inline void calc_colorz(const char *color, char *final, size_t len, calculated_number value) {
+    int value_is_null = 0;
+    if(isnan(value) || isinf(value)) {
+        value = 0.0;
+        value_is_null = 1;
+    }
+
     char color_buffer[256 + 1] = "";
     char value_buffer[256 + 1] = "";
     char comparison = '>';
@@ -501,18 +723,7 @@ static inline void calc_colorz(const char *color, char *final, size_t len, calcu
 // colors
 #define COLOR_STRING_SIZE 100
 
-void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int value_is_null, int precision) {
-    static uint32_t hash_seconds = 0, hash_seconds_ago = 0, hash_minutes = 0, hash_minutes_ago = 0, hash_hours = 0, hash_hours_ago = 0;
-
-    if(unlikely(!hash_seconds)) {
-        hash_seconds     = simple_hash("seconds");
-        hash_seconds_ago = simple_hash("seconds ago");
-        hash_minutes     = simple_hash("minutes");
-        hash_minutes_ago = simple_hash("minutes ago");
-        hash_hours       = simple_hash("hours");
-        hash_hours_ago   = simple_hash("hours ago");
-    }
-
+void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision) {
     char      label_buffer[LABEL_STRING_SIZE + 1]
             , value_color_buffer[COLOR_STRING_SIZE + 1]
             , value_string[VALUE_STRING_SIZE + 1]
@@ -527,110 +738,10 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch
         label_color = "#555";
 
     if(unlikely(!value_color || !*value_color))
-        value_color = (value_is_null)?"#999":"#4c1";
+        value_color = (isnan(value) || isinf(value))?"#999":"#4c1";
 
-    units = fix_units(units);
-    calc_colorz(value_color, value_color_buffer, COLOR_STRING_SIZE, value, value_is_null);
-
-    char *separator = "";
-    if(unlikely(isalnum(*units)))
-        separator = " ";
-
-    uint32_t hash_units = simple_hash(units);
-
-    if(unlikely((hash_units == hash_seconds && !strcmp(units, "seconds")) || (hash_units == hash_seconds_ago && !strcmp(units, "seconds ago")))) {
-        char *suffix = (hash_units == hash_seconds_ago)?" ago":"";
-
-        size_t s = (size_t)value;
-        size_t d = s / 86400;
-        s = s % 86400;
-
-        size_t h = s / 3600;
-        s = s % 3600;
-
-        size_t m = s / 60;
-        s = s % 60;
-
-        if(d)
-            snprintfz(value_string, VALUE_STRING_SIZE, "%zu %s %02zu:%02zu:%02zu%s", d, (d == 1)?"day":"days", h, m, s, suffix);
-        else
-            snprintfz(value_string, VALUE_STRING_SIZE, "%02zu:%02zu:%02zu%s", h, m, s, suffix);
-    }
-
-    else if(unlikely((hash_units == hash_minutes && !strcmp(units, "minutes")) || (hash_units == hash_minutes_ago && !strcmp(units, "minutes ago")))) {
-        char *suffix = (hash_units == hash_minutes_ago)?" ago":"";
-
-        size_t m = (size_t)value;
-        size_t d = m / (60 * 24);
-        m = m % (60 * 24);
-
-        size_t h = m / 60;
-        m = m % 60;
-
-        if(d)
-            snprintfz(value_string, VALUE_STRING_SIZE, "%zud %02zuh %02zum%s", d, h, m, suffix);
-        else
-            snprintfz(value_string, VALUE_STRING_SIZE, "%zuh %zum%s", h, m, suffix);
-    }
-
-    else if(unlikely((hash_units == hash_hours && !strcmp(units, "hours")) || (hash_units == hash_hours_ago && !strcmp(units, "hours ago")))) {
-        char *suffix = (hash_units == hash_hours_ago)?" ago":"";
-
-        size_t h = (size_t)value;
-        size_t d = h / 24;
-        h = h % 24;
-
-        if(d)
-            snprintfz(value_string, VALUE_STRING_SIZE, "%zud %zuh%s", d, h, suffix);
-        else
-            snprintfz(value_string, VALUE_STRING_SIZE, "%zuh%s", h, suffix);
-    }
-
-    else if(unlikely(value_is_null))
-        strcpy(value_string, "-");
-
-    else if(precision < 0) {
-        int len, lstop = 0, trim_zeros = 1;
-
-        calculated_number abs = value;
-        if(isless(value, 0)) {
-            lstop = 1;
-            abs = -value;
-        }
-
-        if(isgreaterequal(abs, 1000))     { len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.0Lf", (long double)value); trim_zeros = 0; }
-        else if(isgreaterequal(abs, 100))   len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.1Lf", (long double)value);
-        else if(isgreaterequal(abs, 1))     len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.2Lf", (long double)value);
-        else if(isgreaterequal(abs, 0.1))   len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.3Lf", (long double)value);
-        else                                len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.4Lf", (long double)value);
-
-        if(unlikely(trim_zeros)) {
-            int l;
-            // remove trailing zeros from the decimal part
-            for(l = len - 1; l > lstop ; l--) {
-                if(likely(value_string[l] == '0')) {
-                    value_string[l] = '\0';
-                    len--;
-                }
-
-                else if(unlikely(value_string[l] == '.')) {
-                    value_string[l] = '\0';
-                    len--;
-                    break;
-                }
-
-                else
-                    break;
-            }
-        }
-
-        if(len >= 0)
-            snprintfz(&value_string[len], VALUE_STRING_SIZE - len, "%s%s", separator, units);
-    }
-    else {
-        if(precision > 50) precision = 50;
-        snprintfz(value_string, VALUE_STRING_SIZE, "%0.*Lf%s%s", precision, (long double)value, separator, units);
-    }
+    calc_colorz(value_color, value_color_buffer, COLOR_STRING_SIZE, value);
+    format_value_and_unit(value_string, VALUE_STRING_SIZE, value, units, precision);
 
     // we need to copy the label, since verdana11_width may write to it
     strncpyz(label_buffer, label, LABEL_STRING_SIZE);
index 1281847eb49c4721c7d00cfe3cd61257f8981215..49f73e445c0c2b1210a57ce830e4e44dc574b791 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef NETDATA_WEB_BUFFER_SVG_H
 #define NETDATA_WEB_BUFFER_SVG_H 1
 
-extern void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int value_is_null, int precision);
+extern void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision);
+extern char *format_value_and_unit(char *value_string, size_t value_string_len, calculated_number value, const char *units, int precision);
 
 #endif /* NETDATA_WEB_BUFFER_SVG_H */
index 4b6ccf6469e7d1c9070be807028dd949c6294941..5acb44d5b0537e150e821981601bc71ea0c7c5ae 100644 (file)
@@ -896,7 +896,7 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) {
     if(!st) st = rrdset_find_byname(chart);
     if(!st) {
         buffer_no_cacheable(w->response.data);
-        buffer_svg(w->response.data, "chart not found", 0, "", NULL, NULL, 1, -1);
+        buffer_svg(w->response.data, "chart not found", NAN, "", NULL, NULL, -1);
         ret = 200;
         goto cleanup;
     }
@@ -906,7 +906,7 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) {
         rc = rrdcalc_find(st, alarm);
         if (!rc) {
             buffer_no_cacheable(w->response.data);
-            buffer_svg(w->response.data, "alarm not found", 0, "", NULL, NULL, 1, -1);
+            buffer_svg(w->response.data, "alarm not found", NAN, "", NULL, NULL, -1);
             ret = 200;
             goto cleanup;
         }
@@ -982,9 +982,6 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) {
             );
 
     if(rc) {
-        calculated_number n = rc->value;
-        if(isnan(n) || isinf(n)) n = 0;
-
         if (refresh > 0) {
             buffer_sprintf(w->response.header, "Refresh: %d\r\n", refresh);
             w->response.data->expires = now_realtime_sec() + refresh;
@@ -1020,19 +1017,18 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) {
         }
 
         buffer_svg(w->response.data,
-                   label,
-                   rc->value * multiply / divide,
-                   units,
-                   label_color,
-                   value_color,
-                   0,
-                   precision);
+                label,
+                (isnan(rc->value)||isinf(rc->value)) ? rc->value : rc->value * multiply / divide,
+                units,
+                label_color,
+                value_color,
+                precision);
         ret = 200;
     }
     else {
         time_t latest_timestamp = 0;
         int value_is_null = 1;
-        calculated_number n = 0;
+        calculated_number n = NAN;
         ret = 500;
 
         // if the collected value is too old, don't calculate its value
@@ -1065,13 +1061,12 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) {
 
         // render the badge
         buffer_svg(w->response.data,
-                   label,
-                   n * multiply / divide,
-                   units,
-                   label_color,
-                   value_color,
-                   value_is_null,
-                   precision);
+                label,
+                (value_is_null)?NAN:(n * multiply / divide),
+                units,
+                label_color,
+                value_color,
+                precision);
     }
 
 cleanup:
index 6fc294204249e0f3cd331a3b1c8f350b1f8cfeb0..346a8e9c10d0e3b5e42eb7364c6146c461a7d4df 100644 (file)
@@ -6075,7 +6075,7 @@ var NETDATA = window.NETDATA || {};
 
             var name = entry.name.replace(/_/g, ' ');
             var status = entry.status.toLowerCase();
-            var title = name + ' = ' + ((value === null)?'NaN':Math.floor(value)).toString() + ' ' + entry.units;
+            var title = name + ' = ' + entry.value_string.toString();
             var tag = entry.alarm_id;
             var icon = 'images/seo-performance-128.png';
             var interaction = false;
@@ -6104,7 +6104,11 @@ var NETDATA = window.NETDATA || {};
                         // console.log('alarm' + entry.unique_id + ' switch to CLEAR from ' + entry.old_status);
                         return;
                     }
-                    title = name + ' back to normal';
+                    if(entry.no_clear_notification === true) {
+                        // console.log('alarm' + entry.unique_id + ' is CLEAR but has no_clear_notification flag');
+                        return;
+                    }
+                    title = name + ' back to normal (' + entry.value_string.toString() + ')';
                     icon = 'images/check-mark-2-128-green.png'
                     interaction = false;
                     break;
index d8e1282344daca504ae02de53fc4991bfa4988d2..ea51cc13245e9364b687d4e99b14b6dd9798c9a2 100644 (file)
 
                 function alarm_to_html(alarm, full) {
                     var chart = options.data.charts[alarm.chart];
+                    if(typeof(chart) === 'undefined') {
+                        // this means the charts loaded are incomplete
+                        // probably netdata was restarted and more charts
+                        // are now available.
+                        return '';
+                    }
+
                     var has_alarm = ((typeof alarm.warn !== 'undefined' || typeof alarm.crit !== 'undefined')?true:false);
 
                     var role_href = ((has_alarm === true)?('<br/>&nbsp;<br/>role: <b>' + alarm.recipient + '</b><br/>&nbsp;<br/><b><i class="fa fa-line-chart" aria-hidden="true"></i></b><small>&nbsp;&nbsp;<a href="#" onClick="NETDATA.alarms.scrollToChart(\'' + alarm.chart + '\'); $(\'#alarmsModal\').modal(\'hide\'); return false;">jump to chart</a></small>'):('&nbsp;'));
                         + ((typeof alarm.crit !== 'undefined')?('<tr><td width="10%" style="text-align:right">critical&nbsp;when</td><td><span style="font-family: monospace; color: #e05d44; font-weight: bold;">' + alarm.crit + '</span></td></tr>'):'');
 
                     if(full === true) {
-                            html += ((typeof alarm.lookup_after !== 'undefined')?('<tr><td width="10%" style="text-align:right">db&nbsp;lookup</td><td>' + alarm_lookup_explain(alarm, chart) + '</td></tr>'):'')
+                        var units = chart.units;
+                        if(units === '%') units = '&#37;';
+
+                        html += ((typeof alarm.lookup_after !== 'undefined')?('<tr><td width="10%" style="text-align:right">db&nbsp;lookup</td><td>' + alarm_lookup_explain(alarm, chart) + '</td></tr>'):'')
                             + ((typeof alarm.calc !== 'undefined')?('<tr><td width="10%" style="text-align:right">calculation</td><td><span style="font-family: monospace;">' + alarm.calc + '</span></td></tr>'):'')
-                            + ((chart.green !== null)?('<tr><td width="10%" style="text-align:right">green&nbsp;threshold</td><td><code>' + chart.green + ' ' + chart.units + '</code></td></tr>'):'')
-                            + ((chart.red !== null)?('<tr><td width="10%" style="text-align:right">red&nbsp;threshold</td><td><code>' + chart.red + ' ' + chart.units + '</code></td></tr>'):'');
+                            + ((chart.green !== null)?('<tr><td width="10%" style="text-align:right">green&nbsp;threshold</td><td><code>' + chart.green + ' ' + units + '</code></td></tr>'):'')
+                            + ((chart.red !== null)?('<tr><td width="10%" style="text-align:right">red&nbsp;threshold</td><td><code>' + chart.red + ' ' + units + '</code></td></tr>'):'');
                     }
 
                     var delay = '';
                                 switchable: false,
                                 sortable: true
                             },
+                            {
+                                field: 'value_string',
+                                title: 'Friendly Value',
+                                titleTooltip: 'The value of the alarm, that triggered this event',
+                                align: 'right',
+                                valign: 'middle',
+                                sortable: true
+                            },
+                            {
+                                field: 'old_value_string',
+                                title: 'Friendly Old Value',
+                                titleTooltip: 'The value of the alarm, just before this event',
+                                align: 'right',
+                                valign: 'middle',
+                                visible: false,
+                                sortable: true
+                            },
                             {
                                 field: 'old_value',
                                 title: 'Old Value',
                                 },
                                 align: 'right',
                                 valign: 'middle',
+                                visible: false,
                                 sortable: true
                             },
                             {
                                 titleTooltip: 'The units of the value of the alarm',
                                 align: 'left',
                                 valign: 'middle',
+                                visible: false,
                                 sortable: true
                             },
                             {
     </div>
 </body>
 </html>
-<script type="text/javascript" src="dashboard.js?v20170118-11"></script>
+<script type="text/javascript" src="dashboard.js?v20170127-1"></script>