health.d/disks.conf \
health.d/elasticsearch.conf \
health.d/entropy.conf \
+ health.d/fping.conf \
health.d/haproxy.conf \
health.d/ipc.conf \
health.d/ipfs.conf \
template: out_of_disk_space_time
on: disk.space
families: *
- calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (0)
+ calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
units: hours
every: 10s
warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
--- /dev/null
+
+template: fping_last_collected_secs
+families: *
+ on: fping.latency
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+template: host_reachable
+families: *
+ on: fping.latency
+ calc: $average != nan
+ units: up/down
+ every: 10s
+ crit: $this == 0
+ info: states if the remote host is reachable
+ delay: down 30m multiplier 1.5 max 2h
+ to: sysadmin
+
+template: host_latency
+families: *
+ on: fping.latency
+ lookup: average -10s unaligned of average
+ units: ms
+ every: 10s
+ green: 300
+ red: 1000
+ warn: $this > $green OR $max > $red
+ crit: $this > $red
+ info: average round trip delay during the last 10 seconds
+ delay: down 30m multiplier 1.5 max 2h
+ to: sysadmin
+
+template: packet_loss
+families: *
+ on: fping.quality
+ lookup: average -10m unaligned of returned
+ calc: 100 - $this
+ green: 1
+ red: 10
+ units: %
+ every: 10s
+ warn: $this > $green
+ crit: $this > $red
+ info: packet loss percentage
+ delay: down 30m multiplier 1.5 max 2h
+ to: sysadmin
+
template: out_of_cache_space_time
on: memcached.cache
- calc: ($cache_fill_rate > 0) ? ($available / $cache_fill_rate) : (0)
+ calc: ($cache_fill_rate > 0) ? ($available / $cache_fill_rate) : (inf)
units: hours
every: 10s
warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
template: mysql_10s_waited_locks_ratio
on: mysql.table_locks
- calc: ($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)
+ calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (10) : (25))
template: mysql_replication
on: mysql.slave_status
calc: ($sql_running == -1 OR $io_running == -1)?0:1
- units: status
+ units: ok/failed
every: 10s
crit: $this == 0
delay: down 5m multiplier 1.5 max 1h
units: %
warn: $this > (($status >= $WARNING)?(200):(1000))
crit: $this > (($status >= $WARNING)?(1000):(2000))
+options: no-clear-notification
info: the % of the rate of received packets in the last 10 seconds, compared to the rate of the last minute
- to: silent
+ to: sysadmin
every: 10s
warn: $this > ((($1m_ipv4_tcp_resets_sent < 5)?(5):($1m_ipv4_tcp_resets_sent)) * (($status >= $WARNING) ? (1) : (4)))
delay: up 0 down 60m multiplier 1.2 max 2h
+options: no-clear-notification
info: average TCP RESETS this host is sending, over the last 10 seconds (this can be an indication that a port scan is made, or that a service running on this host has crashed)
- to: silent
+ to: sysadmin
# -----------------------------------------------------------------------------
# tcp resets this host receives
every: 10s
warn: $this > ((($1m_ipv4_tcp_resets_received < 5)?(5):($1m_ipv4_tcp_resets_received)) * (($status >= $WARNING) ? (1) : (4)))
delay: up 0 down 60m multiplier 1.2 max 2h
+options: no-clear-notification
info: average TCP RESETS this host is receiving, over the last 10 seconds (this can be an indication that a service this host needs, has crashed)
- to: silent
+ to: sysadmin
-# SNMP Data Collector\r
-\r
-Using this collector, netdata can collect data from any SNMP device.\r
-\r
-This collector supports:\r
-\r
-- any number of SNMP devices\r
-- each SNMP device can be used to collect data for any number of charts\r
-- each chart may have any number of dimensions\r
-- each SNMP device may have a different update frequency\r
-- each SNMP device will accept one or more batches to report values (you can set `max_request_size` per SNMP server, to control the size of batches).\r
-\r
-The source code of the plugin is [here](https://github.com/firehol/netdata/blob/master/node.d/snmp.node.js).\r
-\r
-## Configuration\r
-\r
-You will need to create the file `/etc/netdata/node.d/snmp.conf` with data like the following.\r
-\r
-In this example:\r
-\r
- - the SNMP device is `10.11.12.8`.\r
- - the SNMP community is `public`.\r
- - we will update the values every 10 seconds (`update_every: 10` under the server `10.11.12.8`).\r
- - we define 2 charts `snmp_switch.bandwidth_port1` and `snmp_switch.bandwidth_port2`, each having 2 dimensions: `in` and `out`.\r
-\r
-```js\r
-{\r
- "enable_autodetect": false,\r
- "update_every": 5,\r
- "max_request_size": 100,\r
- "servers": [\r
- {\r
- "hostname": "10.11.12.8",\r
- "community": "public",\r
- "update_every": 10,\r
- "max_request_size": 50,\r
- "options": { "timeout": 10000 },\r
- "charts": {\r
- "snmp_switch.bandwidth_port1": {\r
- "title": "Switch Bandwidth for port 1",\r
- "units": "kilobits/s",\r
- "type": "area",\r
- "priority": 1,\r
- "family": "ports",\r
- "dimensions": {\r
- "in": {\r
- "oid": "1.3.6.1.2.1.2.2.1.10.1",\r
- "algorithm": "incremental",\r
- "multiplier": 8,\r
- "divisor": 1024\r
- },\r
- "out": {\r
- "oid": "1.3.6.1.2.1.2.2.1.16.1",\r
- "algorithm": "incremental",\r
- "multiplier": -8,\r
- "divisor": 1024\r
- }\r
- }\r
- },\r
- "snmp_switch.bandwidth_port2": {\r
- "title": "Switch Bandwidth for port 2",\r
- "units": "kilobits/s",\r
- "type": "area",\r
- "priority": 1,\r
- "family": "ports",\r
- "dimensions": {\r
- "in": {\r
- "oid": "1.3.6.1.2.1.2.2.1.10.2",\r
- "algorithm": "incremental",\r
- "multiplier": 8,\r
- "divisor": 1024\r
- },\r
- "out": {\r
- "oid": "1.3.6.1.2.1.2.2.1.16.2",\r
- "algorithm": "incremental",\r
- "multiplier": -8,\r
- "divisor": 1024\r
- }\r
- }\r
- }\r
- }\r
- }\r
- ]\r
-}\r
-```\r
-\r
-`update_every` is the update frequency for each server, in seconds.\r
-\r
-`max_request_size` limits the maximum number of OIDs that will be requested in a single call. The default is 50. Lower this number of you get `TooBig` errors in netdata error.log.\r
-\r
-`family` sets the name of the submenu of the dashboard each chart will appear under.\r
-\r
-If you need to define many charts using incremental OIDs, you can use something like this:\r
-\r
-This is like the previous, but the option `multiply_range` given, will multiply the current chart from `1` to `24` inclusive, producing 24 charts in total for the 24 ports of the switch `10.11.12.8`.\r
-\r
-Each of the 24 new charts will have its id (1-24) appended at:\r
-\r
-1. its chart unique id, i.e. `snmp_switch.bandwidth_port1` to `snmp_switch.bandwidth_port24`\r
-2. its `title`, i.e. `Switch Bandwidth for port 1` to `Switch Bandwidth for port 24`\r
-3. its `oid` (for all dimensions), i.e. dimension `in` will be `1.3.6.1.2.1.2.2.1.10.1` to `1.3.6.1.2.1.2.2.1.10.24`\r
-3. its priority (which will be incremented for each chart so that the charts will appear on the dashboard in this order)\r
-\r
-```js\r
-{\r
- "enable_autodetect": false,\r
- "update_every": 10,\r
- "servers": [\r
- {\r
- "hostname": "10.11.12.8",\r
- "community": "public",\r
- "update_every": 10,\r
- "options": { "timeout": 20000 },\r
- "charts": {\r
- "snmp_switch.bandwidth_port": {\r
- "title": "Switch Bandwidth for port ",\r
- "units": "kilobits/s",\r
- "type": "area",\r
- "priority": 1,\r
- "family": "ports",\r
- "multiply_range": [ 1, 24 ],\r
- "dimensions": {\r
- "in": {\r
- "oid": "1.3.6.1.2.1.2.2.1.10.",\r
- "algorithm": "incremental",\r
- "multiplier": 8,\r
- "divisor": 1024\r
- },\r
- "out": {\r
- "oid": "1.3.6.1.2.1.2.2.1.16.",\r
- "algorithm": "incremental",\r
- "multiplier": -8,\r
- "divisor": 1024\r
- }\r
- }\r
- }\r
- }\r
- }\r
- ]\r
-}\r
-```\r
-\r
-The `options` given for each server, are:\r
-\r
- - `timeout`, the time to wait for the SNMP device to respond. The default is 5000 ms.\r
- - `version`, the SNMP version to use. `0` is Version 1, `1` is Version 2c. The default is Version 1 (`0`).\r
- - `transport`, the default is `udp4`.\r
- - `port`, the port of the SNMP device to connect to. The default is `161`.\r
- - `retries`, the number of attempts to make to fetch the data. The default is `1`.\r
-\r
-## Retreiving names from snmp\r
-\r
-You can append a value retrieved from SNMP to the title, by adding `titleoid` to the chart.\r
-\r
-You can set a dimension name to a value retrieved from SNMP, by adding `oidname` to the dimension.\r
-\r
-Both of the above will participate in `multiply_range`.\r
-\r
-\r
-## Testing the configuration\r
-\r
-To test it, you can run:\r
-\r
-```sh\r
-/usr/libexec/netdata/plugins.d/node.d.plugin 1 snmp\r
-```\r
-\r
-The above will run it on your console and you will be able to see what netdata sees, but also errors. You can get a very detailed output by appending `debug` to the command line.\r
-\r
-If it works, restart netdata to activate the snmp collector and refresh the dashboard (if your SNMP device responds with a delay, you may need to refresh the dashboard in a few seconds).\r
-\r
-## Data collection speed\r
-\r
-Keep in mind that many SNMP switches are routers are very slow. They may not be able to report values per second. If you run `node.d.plugin` in `debug` mode, it will report the time it took for the SNMP device to respond. My switch, for example, needs 7-8 seconds to respond for the traffic on 24 ports (48 OIDs, in/out).\r
-\r
-Also, if you use many SNMP clients on the same SNMP device at the same time, values may be skipped. This is a problem of the SNMP device, not this collector.\r
-\r
-## Finding OIDs\r
-\r
-Use `snmpwalk`, like this:\r
-\r
-```sh\r
-snmpwalk -t 20 -v 1 -O fn -c public 10.11.12.8\r
-```\r
-\r
-- `-t 20` is the timeout in seconds\r
-- `-v 1` is the SNMP version\r
-- `-O fn` will display full OIDs in numeric format (you may want to run it also without this option to see human readable output of OIDs)\r
-- `-c public` is the SNMP community\r
-- `10.11.12.8` is the SNMP device\r
-\r
-Keep in mind that `snmpwalk` outputs the OIDs with a dot in front them. You should remove this dot when adding OIDs to the configuration file of this collector.\r
-\r
-## Example: Linksys SRW2024P\r
-\r
-This is what I use for my Linksys SRW2024P. It creates:\r
-\r
-1. A chart for power consumption (it is a PoE switch)\r
-2. Two charts for packets received (total packets received and packets received with errors)\r
-3. One chart for packets output\r
-4. 24 charts, one for each port of the switch. It also appends the port names, as defined at the switch, to the chart titles.\r
-\r
-This switch also reports various other metrics, like snmp, packets per port, etc. Unfortunately it does not report CPU utilization or backplane utilization.\r
-\r
-This switch has a very slow SNMP processors. To respond, it needs about 8 seconds, so I have set the refresh frequency (`update_every`) to 15 seconds.\r
-\r
-```js\r
-{\r
- "enable_autodetect": false,\r
- "update_every": 5,\r
- "servers": [\r
- {\r
- "hostname": "10.11.12.8",\r
- "community": "public",\r
- "update_every": 15,\r
- "options": { "timeout": 20000, "version": 1 },\r
- "charts": {\r
- "snmp_switch.power": {\r
- "title": "Switch Power Supply",\r
- "units": "watts",\r
- "type": "line",\r
- "priority": 10,\r
- "family": "power",\r
- "dimensions": {\r
- "supply": {\r
- "oid": ".1.3.6.1.2.1.105.1.3.1.1.2.1",\r
- "algorithm": "absolute",\r
- "multiplier": 1,\r
- "divisor": 1\r
- },\r
- "used": {\r
- "oid": ".1.3.6.1.2.1.105.1.3.1.1.4.1",\r
- "algorithm": "absolute",\r
- "multiplier": 1,\r
- "divisor": 1\r
- }\r
- }\r
- }\r
- , "snmp_switch.input": {\r
- "title": "Switch Packets Input",\r
- "units": "packets/s",\r
- "type": "area",\r
- "priority": 20,\r
- "family": "IP",\r
- "dimensions": {\r
- "receives": {\r
- "oid": ".1.3.6.1.2.1.4.3.0",\r
- "algorithm": "incremental",\r
- "multiplier": 1,\r
- "divisor": 1\r
- }\r
- , "discards": {\r
- "oid": ".1.3.6.1.2.1.4.8.0",\r
- "algorithm": "incremental",\r
- "multiplier": 1,\r
- "divisor": 1\r
- }\r
- }\r
- }\r
- , "snmp_switch.input_errors": {\r
- "title": "Switch Received Packets with Errors",\r
- "units": "packets/s",\r
- "type": "line",\r
- "priority": 30,\r
- "family": "IP",\r
- "dimensions": {\r
- "bad_header": {\r
- "oid": ".1.3.6.1.2.1.4.4.0",\r
- "algorithm": "incremental",\r
- "multiplier": 1,\r
- "divisor": 1\r
- }\r
- , "bad_address": {\r
- "oid": ".1.3.6.1.2.1.4.5.0",\r
- "algorithm": "incremental",\r
- "multiplier": 1,\r
- "divisor": 1\r
- }\r
- , "unknown_protocol": {\r
- "oid": ".1.3.6.1.2.1.4.7.0",\r
- "algorithm": "incremental",\r
- "multiplier": 1,\r
- "divisor": 1\r
- }\r
- }\r
- }\r
- , "snmp_switch.output": {\r
- "title": "Switch Output Packets",\r
- "units": "packets/s",\r
- "type": "line",\r
- "priority": 40,\r
- "family": "IP",\r
- "dimensions": {\r
- "requests": {\r
- "oid": ".1.3.6.1.2.1.4.10.0",\r
- "algorithm": "incremental",\r
- "multiplier": 1,\r
- "divisor": 1\r
- }\r
- , "discards": {\r
- "oid": ".1.3.6.1.2.1.4.11.0",\r
- "algorithm": "incremental",\r
- "multiplier": -1,\r
- "divisor": 1\r
- }\r
- , "no_route": {\r
- "oid": ".1.3.6.1.2.1.4.12.0",\r
- "algorithm": "incremental",\r
- "multiplier": -1,\r
- "divisor": 1\r
- }\r
- }\r
- }\r
- , "snmp_switch.bandwidth_port": {\r
- "title": "Switch Bandwidth for port ",\r
- "titleoid": ".1.3.6.1.2.1.31.1.1.1.18.",\r
- "units": "kilobits/s",\r
- "type": "area",\r
- "priority": 100,\r
- "family": "ports",\r
- "multiply_range": [ 1, 24 ],\r
- "dimensions": {\r
- "in": {\r
- "oid": ".1.3.6.1.2.1.2.2.1.10.",\r
- "algorithm": "incremental",\r
- "multiplier": 8,\r
- "divisor": 1024\r
- }\r
- , "out": {\r
- "oid": ".1.3.6.1.2.1.2.2.1.16.",\r
- "algorithm": "incremental",\r
- "multiplier": -8,\r
- "divisor": 1024\r
- }\r
- }\r
- }\r
- }\r
- }\r
- ]\r
-}\r
-```\r
+# SNMP Data Collector
+
+Using this collector, netdata can collect data from any SNMP device.
+
+This collector supports:
+
+- any number of SNMP devices
+- each SNMP device can be used to collect data for any number of charts
+- each chart may have any number of dimensions
+- each SNMP device may have a different update frequency
+- each SNMP device will accept one or more batches to report values (you can set `max_request_size` per SNMP server, to control the size of batches).
+
+The source code of the plugin is [here](https://github.com/firehol/netdata/blob/master/node.d/snmp.node.js).
+
+## Configuration
+
+You will need to create the file `/etc/netdata/node.d/snmp.conf` with data like the following.
+
+In this example:
+
+ - the SNMP device is `10.11.12.8`.
+ - the SNMP community is `public`.
+ - we will update the values every 10 seconds (`update_every: 10` under the server `10.11.12.8`).
+ - we define 2 charts `snmp_switch.bandwidth_port1` and `snmp_switch.bandwidth_port2`, each having 2 dimensions: `in` and `out`.
+
+```js
+{
+ "enable_autodetect": false,
+ "update_every": 5,
+ "max_request_size": 100,
+ "servers": [
+ {
+ "hostname": "10.11.12.8",
+ "community": "public",
+ "update_every": 10,
+ "max_request_size": 50,
+ "options": { "timeout": 10000 },
+ "charts": {
+ "snmp_switch.bandwidth_port1": {
+ "title": "Switch Bandwidth for port 1",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 1,
+ "family": "ports",
+ "dimensions": {
+ "in": {
+ "oid": "1.3.6.1.2.1.2.2.1.10.1",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024,
+ "offset": 0
+ },
+ "out": {
+ "oid": "1.3.6.1.2.1.2.2.1.16.1",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024,
+ "offset": 0
+ }
+ }
+ },
+ "snmp_switch.bandwidth_port2": {
+ "title": "Switch Bandwidth for port 2",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 1,
+ "family": "ports",
+ "dimensions": {
+ "in": {
+ "oid": "1.3.6.1.2.1.2.2.1.10.2",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024,
+ "offset": 0
+ },
+ "out": {
+ "oid": "1.3.6.1.2.1.2.2.1.16.2",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024,
+ "offset": 0
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+`update_every` is the update frequency for each server, in seconds.
+
+`max_request_size` limits the maximum number of OIDs that will be requested in a single call. The default is 50. Lower this number of you get `TooBig` errors in netdata error.log.
+
+`family` sets the name of the submenu of the dashboard each chart will appear under.
+
+If you need to define many charts using incremental OIDs, you can use something like this:
+
+This is like the previous, but the option `multiply_range` given, will multiply the current chart from `1` to `24` inclusive, producing 24 charts in total for the 24 ports of the switch `10.11.12.8`.
+
+Each of the 24 new charts will have its id (1-24) appended at:
+
+1. its chart unique id, i.e. `snmp_switch.bandwidth_port1` to `snmp_switch.bandwidth_port24`
+2. its `title`, i.e. `Switch Bandwidth for port 1` to `Switch Bandwidth for port 24`
+3. its `oid` (for all dimensions), i.e. dimension `in` will be `1.3.6.1.2.1.2.2.1.10.1` to `1.3.6.1.2.1.2.2.1.10.24`
+3. its priority (which will be incremented for each chart so that the charts will appear on the dashboard in this order)
+
+```js
+{
+ "enable_autodetect": false,
+ "update_every": 10,
+ "servers": [
+ {
+ "hostname": "10.11.12.8",
+ "community": "public",
+ "update_every": 10,
+ "options": { "timeout": 20000 },
+ "charts": {
+ "snmp_switch.bandwidth_port": {
+ "title": "Switch Bandwidth for port ",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 1,
+ "family": "ports",
+ "multiply_range": [ 1, 24 ],
+ "dimensions": {
+ "in": {
+ "oid": "1.3.6.1.2.1.2.2.1.10.",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024,
+ "offset": 0
+ },
+ "out": {
+ "oid": "1.3.6.1.2.1.2.2.1.16.",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024,
+ "offset": 0
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+The `options` given for each server, are:
+
+ - `timeout`, the time to wait for the SNMP device to respond. The default is 5000 ms.
+ - `version`, the SNMP version to use. `0` is Version 1, `1` is Version 2c. The default is Version 1 (`0`).
+ - `transport`, the default is `udp4`.
+ - `port`, the port of the SNMP device to connect to. The default is `161`.
+ - `retries`, the number of attempts to make to fetch the data. The default is `1`.
+
+## Retreiving names from snmp
+
+You can append a value retrieved from SNMP to the title, by adding `titleoid` to the chart.
+
+You can set a dimension name to a value retrieved from SNMP, by adding `oidname` to the dimension.
+
+Both of the above will participate in `multiply_range`.
+
+
+## Testing the configuration
+
+To test it, you can run:
+
+```sh
+/usr/libexec/netdata/plugins.d/node.d.plugin 1 snmp
+```
+
+The above will run it on your console and you will be able to see what netdata sees, but also errors. You can get a very detailed output by appending `debug` to the command line.
+
+If it works, restart netdata to activate the snmp collector and refresh the dashboard (if your SNMP device responds with a delay, you may need to refresh the dashboard in a few seconds).
+
+## Data collection speed
+
+Keep in mind that many SNMP switches are routers are very slow. They may not be able to report values per second. If you run `node.d.plugin` in `debug` mode, it will report the time it took for the SNMP device to respond. My switch, for example, needs 7-8 seconds to respond for the traffic on 24 ports (48 OIDs, in/out).
+
+Also, if you use many SNMP clients on the same SNMP device at the same time, values may be skipped. This is a problem of the SNMP device, not this collector.
+
+## Finding OIDs
+
+Use `snmpwalk`, like this:
+
+```sh
+snmpwalk -t 20 -v 1 -O fn -c public 10.11.12.8
+```
+
+- `-t 20` is the timeout in seconds
+- `-v 1` is the SNMP version
+- `-O fn` will display full OIDs in numeric format (you may want to run it also without this option to see human readable output of OIDs)
+- `-c public` is the SNMP community
+- `10.11.12.8` is the SNMP device
+
+Keep in mind that `snmpwalk` outputs the OIDs with a dot in front them. You should remove this dot when adding OIDs to the configuration file of this collector.
+
+## Example: Linksys SRW2024P
+
+This is what I use for my Linksys SRW2024P. It creates:
+
+1. A chart for power consumption (it is a PoE switch)
+2. Two charts for packets received (total packets received and packets received with errors)
+3. One chart for packets output
+4. 24 charts, one for each port of the switch. It also appends the port names, as defined at the switch, to the chart titles.
+
+This switch also reports various other metrics, like snmp, packets per port, etc. Unfortunately it does not report CPU utilization or backplane utilization.
+
+This switch has a very slow SNMP processors. To respond, it needs about 8 seconds, so I have set the refresh frequency (`update_every`) to 15 seconds.
+
+```js
+{
+ "enable_autodetect": false,
+ "update_every": 5,
+ "servers": [
+ {
+ "hostname": "10.11.12.8",
+ "community": "public",
+ "update_every": 15,
+ "options": { "timeout": 20000, "version": 1 },
+ "charts": {
+ "snmp_switch.power": {
+ "title": "Switch Power Supply",
+ "units": "watts",
+ "type": "line",
+ "priority": 10,
+ "family": "power",
+ "dimensions": {
+ "supply": {
+ "oid": ".1.3.6.1.2.1.105.1.3.1.1.2.1",
+ "algorithm": "absolute",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ },
+ "used": {
+ "oid": ".1.3.6.1.2.1.105.1.3.1.1.4.1",
+ "algorithm": "absolute",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ }
+ }
+ }
+ , "snmp_switch.input": {
+ "title": "Switch Packets Input",
+ "units": "packets/s",
+ "type": "area",
+ "priority": 20,
+ "family": "IP",
+ "dimensions": {
+ "receives": {
+ "oid": ".1.3.6.1.2.1.4.3.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ }
+ , "discards": {
+ "oid": ".1.3.6.1.2.1.4.8.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ }
+ }
+ }
+ , "snmp_switch.input_errors": {
+ "title": "Switch Received Packets with Errors",
+ "units": "packets/s",
+ "type": "line",
+ "priority": 30,
+ "family": "IP",
+ "dimensions": {
+ "bad_header": {
+ "oid": ".1.3.6.1.2.1.4.4.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ }
+ , "bad_address": {
+ "oid": ".1.3.6.1.2.1.4.5.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ }
+ , "unknown_protocol": {
+ "oid": ".1.3.6.1.2.1.4.7.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ }
+ }
+ }
+ , "snmp_switch.output": {
+ "title": "Switch Output Packets",
+ "units": "packets/s",
+ "type": "line",
+ "priority": 40,
+ "family": "IP",
+ "dimensions": {
+ "requests": {
+ "oid": ".1.3.6.1.2.1.4.10.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1,
+ "offset": 0
+ }
+ , "discards": {
+ "oid": ".1.3.6.1.2.1.4.11.0",
+ "algorithm": "incremental",
+ "multiplier": -1,
+ "divisor": 1,
+ "offset": 0
+ }
+ , "no_route": {
+ "oid": ".1.3.6.1.2.1.4.12.0",
+ "algorithm": "incremental",
+ "multiplier": -1,
+ "divisor": 1,
+ "offset": 0
+ }
+ }
+ }
+ , "snmp_switch.bandwidth_port": {
+ "title": "Switch Bandwidth for port ",
+ "titleoid": ".1.3.6.1.2.1.31.1.1.1.18.",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 100,
+ "family": "ports",
+ "multiply_range": [ 1, 24 ],
+ "dimensions": {
+ "in": {
+ "oid": ".1.3.6.1.2.1.2.2.1.10.",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024,
+ "offset": 0
+ }
+ , "out": {
+ "oid": ".1.3.6.1.2.1.2.2.1.16.",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024,
+ "offset": 0
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
['18ee1c6197a4381b1c1631ef6129824f']='apps_groups.conf'
['1972e48345e6c3f0d65f94a03317622b']='health_alarm_notify.conf'
['1c12b678ab65f271a96da1bbd0a1ab1c']='health.d/softnet.conf'
+ ['1c3168c95b53e999df3d45162b3f50b8']='health.d/fping.conf'
['1ea8e8ef1fa8a3a0fcdfba236f4cb195']='python.d/mysql.conf'
['1ef0fd38e7969c023bc3fa6d89eaf6d6']='python.d/mdstat.conf'
['1f5545b3ff52b3eb75ee05401f67a9bc']='fping.conf'
['312b4b8e2805e19cf9be554b319567d6']='health.d/softnet.conf'
['318bb45755726a25120bb33413d4b582']='health.d/net.conf'
['325617412a628e3bc776e3fbb777a2a6']='health.d/redis.conf'
+ ['326e1477131e0f73304711135f70a2a5']='health.d/memcached.conf'
['32fde0057c790964f2c743cb3c9aad29']='health.d/nginx.conf'
['33b135e28aeaef2b8224ba69a0fde245']='health.d/cpu.conf'
+ ['343bc919a2fbc93f687f9d1339ec5f79']='health.d/net.conf'
['3634d5eddc46fb0d50cf47f370670c2c']='health.d/redis.conf'
['364b6e0081b116c9ec073b4d329a6dcc']='health_alarm_notify.conf'
['367d1463e520eb9dc89223bab161c6d1']='python.d/postgres.conf'
['4b775fb31342f1478b3773d041a72911']='python.d.conf'
['4ccb06fff1ce06dc5bc80e0a9f568f6e']='charts.d.conf'
['4d13684cadfa90e73ab465409bf7263b']='health.d/mysql.conf'
+ ['4d91ee6fe4c887ea3865ef36ac63da3c']='health.d/mysql.conf'
['4e995acb0d6fd77403a2a9dca984b55b']='charts.d.conf'
['4f6a5b47a13f5912cc89e9286701dd08']='health.d/redis.conf'
['4f6f4d39c19d7d954f769d3f9d3b4da5']='health.d/memcached.conf'
['8c1d41e2c88aeca78bc319ed74c8748c']='python.d/phpfpm.conf'
['8d0552371a7c9725a04196fa560813d1']='health.d/cpu.conf'
['8dc0bd0a70b5117454bd5f5b98f91c2c']='health.d/disks.conf'
+ ['8f4f925c1e97dd164007495ec5135ffc']='health.d/fping.conf'
['8fd472a854b0996327e8ed3562161182']='health_alarm_notify.conf'
['919911d13901d60a7580f5dfd7fc87bb']='health.d/ram.conf'
['91c757ef6be3abdb86906d9dbb9c217a']='fping.conf'
['99c1617448abbdc493976ab9bda5ce02']='apps_groups.conf'
['9a8a459a3841b78d4c6ef07428ad2fe1']='health.d/entropy.conf'
['9c0185ceff15415bc59b2ce2c1f04367']='apps_groups.conf'
+ ['9c8ddfa810d83ae58c8614ee5229e66b']='health.d/disks.conf'
['9c981c75bdf4b1637f7113e7e45eb2bf']='health.d/memcached.conf'
['9e0553ebdc21b64295873fc104cfa79d']='python.d.conf'
['9eb3326ae2ee9badeaad31d8dd2eaa2b']='python.d/isc_dhcpd.conf'
['a02d14124b19c635c1426cee2e98bac5']='charts.d.conf'
+ ['a03f3e38378385bf87d4c0f81eb1f108']='health.d/tcp_resets.conf'
['a09714b5942cf25a89ec3da1dbc18063']='health.d/ram.conf'
['a0b3a12389c9c56dfe35964b20b59836']='health.d/bind_rndc.conf'
['a0ee8f351f213c0e8af9eb7a4a09cb95']='apps_groups.conf'
['de02f899a61f21b86adb646940f0bcae']='health.d/net.conf'
['def883f35986c9d25de63b1a8e7d0f46']='health.d/entropy.conf'
['df381f3a7ca9fb2b4b43ae7cb7a4c492']='python.d/mysql.conf'
+ ['df7e8044902b5e155fad8430c2ddcfa8']='health.d/fping.conf'
['dfd5431b11cf2f3852a40d390c1d5a92']='python.d/varnish.conf'
['e0242003fd2e3f9ac1b9314e802ada79']='python.d/hddtemp.conf'
['e0e96cc47ed61d6492416be5236cd4d3']='python.d/apache_cache.conf'
'use strict';
-
+// netdata snmp module
// This program will connect to one or more SNMP Agents
+//
// example configuration in /etc/netdata/node.d/snmp.conf
/*
"oid": ".1.3.6.1.2.1.2.2.1.10.1",
"algorithm": "incremental",
"multiplier": 8,
- "divisor": 1024
+ "divisor": 1024,
+ "offset": 0
},
"out": {
"oid": ".1.3.6.1.2.1.2.2.1.16.1",
"algorithm": "incremental",
"multiplier": -8,
- "divisor": 1024
+ "divisor": 1024,
+ "offset": 0
}
}
},
"oid": ".1.3.6.1.2.1.2.2.1.10.2",
"algorithm": "incremental",
"multiplier": 8,
- "divisor": 1024
+ "divisor": 1024,
+ "offset": 0
},
"out": {
"oid": ".1.3.6.1.2.1.2.2.1.16.2",
"algorithm": "incremental",
"multiplier": -8,
- "divisor": 1024
+ "divisor": 1024,
+ "offset": 0
}
}
}
"oid": ".1.3.6.1.2.1.2.2.1.10.",
"algorithm": "incremental",
"multiplier": 8,
- "divisor": 1024
+ "divisor": 1024,
+ "offset": 0
},
"out": {
"oid": ".1.3.6.1.2.1.2.2.1.16.",
"algorithm": "incremental",
"multiplier": -8,
- "divisor": 1024
+ "divisor": 1024,
+ "offset": 0
}
}
}
for(var j = 0; j < dim_keys_len ; j++) {
var d = dim_keys[j];
- if (dimensions[d].value !== null)
- service.set(d, dimensions[d].value);
+ if (dimensions[d].value !== null) {
+ if(typeof dimensions[d].offset === 'number')
+ service.set(d, dimensions[d].value + dimensions[d].offset);
+ else
+ service.set(d, dimensions[d].value);
+ }
}
service.end();
echo >&2
echo >&2 "# SENDING TEST ${x} ALARM TO ROLE: ${recipient}"
- "${0}" "${recipient}" "$(hostname)" 1 1 "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" 100 90 "${0}" 1 $((0 + id)) "units" "this is a test alarm to verify notifications work"
+ "${0}" "${recipient}" "$(hostname)" 1 1 "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" 100 90 "${0}" 1 $((0 + id)) "units" "this is a test alarm to verify notifications work" "new value" "old value"
if [ $? -ne 0 ]
then
echo >&2 "# FAILED"
non_clear_duration="${16}" # the total duration in seconds this is/was non-clear
units="${17}" # the units of the value
info="${18}" # a short description of the alarm
+value_string="${19}" # friendly value (with units)
+old_value_string="${20}" # friendly old value (with units)
# -----------------------------------------------------------------------------
# screen statuses we don't need to send a notification
then
for PD_SERVICE_KEY in ${recipients}
do
- d="${status} ${name}=${value} ${units} - ${host}, ${family}"
+ d="${status} ${name} = ${value_string} - ${host}, ${family}"
${pd_send} -k ${PD_SERVICE_KEY} \
-t ${t} \
-d "${d}" \
-i ${alarm_id} \
-f 'info'="${info}" \
- -f 'value_w_units'="${value} ${units}" \
+ -f 'value_w_units'="${value_string}" \
-f 'when'="${when}" \
-f 'duration'="${duration}" \
-f 'roles'="${roles}" \
color="grey"
# the alarm value
-alarm="${name//_/ } = ${value} ${units}"
+alarm="${name//_/ } = ${value_string}"
# the image of the alarm
image="${images_base_url}/images/seo-performance-128.png"
ae->exec_run_timestamp = (uint32_t)strtoul(pointers[11], NULL, 16);
ae->delay_up_to_timestamp = (uint32_t)strtoul(pointers[12], NULL, 16);
- if(unlikely(ae->name)) freez(ae->name);
+ freez(ae->name);
ae->name = strdupz(pointers[13]);
ae->hash_name = simple_hash(ae->name);
- if(unlikely(ae->chart)) freez(ae->chart);
+ freez(ae->chart);
ae->chart = strdupz(pointers[14]);
ae->hash_chart = simple_hash(ae->chart);
- if(unlikely(ae->family)) freez(ae->family);
+ freez(ae->family);
ae->family = strdupz(pointers[15]);
- if(unlikely(ae->exec)) freez(ae->exec);
+ freez(ae->exec);
ae->exec = strdupz(pointers[16]);
if(!*ae->exec) { freez(ae->exec); ae->exec = NULL; }
- if(unlikely(ae->recipient)) freez(ae->recipient);
+ freez(ae->recipient);
ae->recipient = strdupz(pointers[17]);
if(!*ae->recipient) { freez(ae->recipient); ae->recipient = NULL; }
- if(unlikely(ae->source)) freez(ae->source);
+ freez(ae->source);
ae->source = strdupz(pointers[18]);
if(!*ae->source) { freez(ae->source); ae->source = NULL; }
- if(unlikely(ae->units)) freez(ae->units);
+ freez(ae->units);
ae->units = strdupz(pointers[19]);
if(!*ae->units) { freez(ae->units); ae->units = NULL; }
- if(unlikely(ae->info)) freez(ae->info);
+ freez(ae->info);
ae->info = strdupz(pointers[20]);
if(!*ae->info) { freez(ae->info); ae->info = NULL; }
ae->new_value = str2l(pointers[25]);
ae->old_value = str2l(pointers[26]);
+ static char value_string[100 + 1];
+ freez(ae->old_value_string);
+ freez(ae->new_value_string);
+ ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1));
+ ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1));
+
// add it to host if not already there
if(unlikely(*pointers[0] == 'A')) {
ae->next = host->health_log.alarms;
// ----------------------------------------------------------------------------
// health alarm log management
-static inline void health_alarm_log(RRDHOST *host,
- uint32_t alarm_id, uint32_t alarm_event_id,
- time_t when,
- const char *name, const char *chart, const char *family,
- const char *exec, const char *recipient, time_t duration,
- calculated_number old_value, calculated_number new_value,
- int old_status, int new_status,
- const char *source,
- const char *units,
- const char *info,
- int delay
+static inline void health_alarm_log(
+ RRDHOST *host,
+ uint32_t alarm_id,
+ uint32_t alarm_event_id,
+ time_t when,
+ const char *name,
+ const char *chart,
+ const char *family,
+ const char *exec,
+ const char *recipient,
+ time_t duration,
+ calculated_number old_value,
+ calculated_number new_value,
+ int old_status,
+ int new_status,
+ const char *source,
+ const char *units,
+ const char *info,
+ int delay,
+ uint32_t flags
) {
debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
ae->when = when;
ae->old_value = old_value;
ae->new_value = new_value;
+
+ static char value_string[100 + 1];
+ ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1));
+ ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1));
+
ae->old_status = old_status;
ae->new_status = new_status;
ae->duration = duration;
ae->delay = delay;
ae->delay_up_to_timestamp = when + delay;
+ ae->flags |= flags;
+
if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
ae->non_clear_duration += ae->duration;
{
time_t now = now_realtime_sec();
- health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_UNINITIALIZED, rc->source, rc->units, rc->info, 0);
+ health_alarm_log(
+ st->rrdhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_UNINITIALIZED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0
+ );
}
}
{
time_t now = now_realtime_sec();
- health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_REMOVED, rc->source, rc->units, rc->info, 0);
+ health_alarm_log(
+ st->rrdhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_REMOVED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0
+ );
}
RRDHOST *host = st->rrdhost;
#define HEALTH_UNITS_KEY "units"
#define HEALTH_INFO_KEY "info"
#define HEALTH_DELAY_KEY "delay"
+#define HEALTH_OPTIONS_KEY "options"
static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
if(!rc->chart) {
return 1;
}
+static inline uint32_t health_parse_options(const char *s) {
+ uint32_t options = 0;
+ char buf[100+1] = "";
+
+ while(*s) {
+ buf[0] = '\0';
+
+ // skip spaces
+ while(*s && isspace(*s))
+ s++;
+
+ // find the next space
+ size_t count = 0;
+ while(*s && count < 100 && !isspace(*s))
+ buf[count++] = *s++;
+
+ if(buf[0]) {
+ buf[count] = '\0';
+
+ if(!strcasecmp(buf, "no-clear-notification") || !strcasecmp(buf, "no-clear"))
+ options |= RRDCALC_FLAG_NO_CLEAR_NOTIFICATION;
+ else
+ error("Ignoring unknown alarm option '%s'", buf);
+ }
+ }
+
+ return options;
+}
+
static inline int health_parse_db_lookup(
size_t line, const char *path, const char *file, char *string,
int *group_method, int *after, int *before, int *every,
int health_readfile(const char *path, const char *filename) {
debug(D_HEALTH, "Health configuration reading file '%s/%s'", path, filename);
- static uint32_t hash_alarm = 0, hash_template = 0, hash_on = 0, hash_families = 0, hash_calc = 0, hash_green = 0, hash_red = 0, hash_warn = 0, hash_crit = 0, hash_exec = 0, hash_every = 0, hash_lookup = 0, hash_units = 0, hash_info = 0, hash_recipient = 0, hash_delay = 0;
+ static uint32_t
+ hash_alarm = 0,
+ hash_template = 0,
+ hash_on = 0,
+ hash_families = 0,
+ hash_calc = 0,
+ hash_green = 0,
+ hash_red = 0,
+ hash_warn = 0,
+ hash_crit = 0,
+ hash_exec = 0,
+ hash_every = 0,
+ hash_lookup = 0,
+ hash_units = 0,
+ hash_info = 0,
+ hash_recipient = 0,
+ hash_delay = 0,
+ hash_options = 0;
+
char buffer[HEALTH_CONF_MAX_LINE + 1];
if(unlikely(!hash_alarm)) {
hash_info = simple_hash(HEALTH_INFO_KEY);
hash_recipient = simple_hash(HEALTH_RECIPIENT_KEY);
hash_delay = simple_uhash(HEALTH_DELAY_KEY);
+ hash_options = simple_uhash(HEALTH_OPTIONS_KEY);
}
snprintfz(buffer, HEALTH_CONF_MAX_LINE, "%s/%s", path, filename);
else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
health_parse_delay(line, path, filename, value, &rc->delay_up_duration, &rc->delay_down_duration, &rc->delay_max_duration, &rc->delay_multiplier);
}
+ else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+ rc->options |= health_parse_options(value);
+ }
else {
error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has unknown key '%s'.",
line, path, filename, rc->name, key);
else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
health_parse_delay(line, path, filename, value, &rt->delay_up_duration, &rt->delay_down_duration, &rt->delay_max_duration, &rt->delay_multiplier);
}
+ else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+ rt->options |= health_parse_options(value);
+ }
else {
error("Health configuration at line %zu of file '%s/%s' for template '%s' has unknown key '%s'.",
line, path, filename, rt->name, key);
}
static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
- buffer_sprintf(wb, "\n\t{\n"
- "\t\t\"hostname\": \"%s\",\n"
- "\t\t\"unique_id\": %u,\n"
- "\t\t\"alarm_id\": %u,\n"
- "\t\t\"alarm_event_id\": %u,\n"
- "\t\t\"name\": \"%s\",\n"
- "\t\t\"chart\": \"%s\",\n"
- "\t\t\"family\": \"%s\",\n"
- "\t\t\"processed\": %s,\n"
- "\t\t\"updated\": %s,\n"
- "\t\t\"exec_run\": %lu,\n"
- "\t\t\"exec_failed\": %s,\n"
- "\t\t\"exec\": \"%s\",\n"
- "\t\t\"recipient\": \"%s\",\n"
- "\t\t\"exec_code\": %d,\n"
- "\t\t\"source\": \"%s\",\n"
- "\t\t\"units\": \"%s\",\n"
- "\t\t\"info\": \"%s\",\n"
- "\t\t\"when\": %lu,\n"
- "\t\t\"duration\": %lu,\n"
- "\t\t\"non_clear_duration\": %lu,\n"
- "\t\t\"status\": \"%s\",\n"
- "\t\t\"old_status\": \"%s\",\n"
- "\t\t\"delay\": %d,\n"
- "\t\t\"delay_up_to_timestamp\": %lu,\n"
- "\t\t\"updated_by_id\": %u,\n"
- "\t\t\"updates_id\": %u,\n",
- host->hostname,
- ae->unique_id,
- ae->alarm_id,
- ae->alarm_event_id,
- ae->name,
- ae->chart,
- ae->family,
- (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false",
- (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false",
- (unsigned long)ae->exec_run_timestamp,
- (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false",
- ae->exec?ae->exec:health.health_default_exec,
- ae->recipient?ae->recipient:health.health_default_recipient,
- ae->exec_code,
- ae->source,
- ae->units?ae->units:"",
- ae->info?ae->info:"",
- (unsigned long)ae->when,
- (unsigned long)ae->duration,
- (unsigned long)ae->non_clear_duration,
- rrdcalc_status2string(ae->new_status),
- rrdcalc_status2string(ae->old_status),
- ae->delay,
- (unsigned long)ae->delay_up_to_timestamp,
- ae->updated_by_id,
- ae->updates_id
+ buffer_sprintf(wb,
+ "\n\t{\n"
+ "\t\t\"hostname\": \"%s\",\n"
+ "\t\t\"unique_id\": %u,\n"
+ "\t\t\"alarm_id\": %u,\n"
+ "\t\t\"alarm_event_id\": %u,\n"
+ "\t\t\"name\": \"%s\",\n"
+ "\t\t\"chart\": \"%s\",\n"
+ "\t\t\"family\": \"%s\",\n"
+ "\t\t\"processed\": %s,\n"
+ "\t\t\"updated\": %s,\n"
+ "\t\t\"exec_run\": %lu,\n"
+ "\t\t\"exec_failed\": %s,\n"
+ "\t\t\"exec\": \"%s\",\n"
+ "\t\t\"recipient\": \"%s\",\n"
+ "\t\t\"exec_code\": %d,\n"
+ "\t\t\"source\": \"%s\",\n"
+ "\t\t\"units\": \"%s\",\n"
+ "\t\t\"info\": \"%s\",\n"
+ "\t\t\"when\": %lu,\n"
+ "\t\t\"duration\": %lu,\n"
+ "\t\t\"non_clear_duration\": %lu,\n"
+ "\t\t\"status\": \"%s\",\n"
+ "\t\t\"old_status\": \"%s\",\n"
+ "\t\t\"delay\": %d,\n"
+ "\t\t\"delay_up_to_timestamp\": %lu,\n"
+ "\t\t\"updated_by_id\": %u,\n"
+ "\t\t\"updates_id\": %u,\n"
+ "\t\t\"value_string\": \"%s\",\n"
+ "\t\t\"old_value_string\": \"%s\",\n"
+ , host->hostname
+ , ae->unique_id
+ , ae->alarm_id
+ , ae->alarm_event_id
+ , ae->name
+ , ae->chart
+ , ae->family
+ , (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false"
+ , (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false"
+ , (unsigned long)ae->exec_run_timestamp
+ , (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false"
+ , ae->exec?ae->exec:health.health_default_exec
+ , ae->recipient?ae->recipient:health.health_default_recipient
+ , ae->exec_code
+ , ae->source
+ , ae->units?ae->units:""
+ , ae->info?ae->info:""
+ , (unsigned long)ae->when
+ , (unsigned long)ae->duration
+ , (unsigned long)ae->non_clear_duration
+ , rrdcalc_status2string(ae->new_status)
+ , rrdcalc_status2string(ae->old_status)
+ , ae->delay
+ , (unsigned long)ae->delay_up_to_timestamp
+ , ae->updated_by_id
+ , ae->updates_id
+ , ae->new_value_string
+ , ae->old_value_string
);
+ if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
+ buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
+ }
+
buffer_strcat(wb, "\t\t\"value\":");
buffer_rrd_value(wb, ae->new_value);
buffer_strcat(wb, ",\n");
"\t\t\t\"delay_multiplier\": %f,\n"
"\t\t\t\"delay\": %d,\n"
"\t\t\t\"delay_up_to_timestamp\": %lu,\n"
- , rc->chart, rc->name
- , (unsigned long)rc->id
- , rc->name
- , rc->chart
- , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
- , (rc->rrdset)?"true":"false"
- , rc->exec?rc->exec:health.health_default_exec
- , rc->recipient?rc->recipient:health.health_default_recipient
- , rc->source
- , rc->units?rc->units:""
- , rc->info?rc->info:""
- , rrdcalc_status2string(rc->status)
- , (unsigned long)rc->last_status_change
- , (unsigned long)rc->last_updated
- , (unsigned long)rc->next_update
- , rc->update_every
- , rc->delay_up_duration
- , rc->delay_down_duration
- , rc->delay_max_duration
- , rc->delay_multiplier
- , rc->delay_last
- , (unsigned long)rc->delay_up_to_timestamp
+ , rc->chart, rc->name
+ , (unsigned long)rc->id
+ , rc->name
+ , rc->chart
+ , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
+ , (rc->rrdset)?"true":"false"
+ , rc->exec?rc->exec:health.health_default_exec
+ , rc->recipient?rc->recipient:health.health_default_recipient
+ , rc->source
+ , rc->units?rc->units:""
+ , rc->info?rc->info:""
+ , rrdcalc_status2string(rc->status)
+ , (unsigned long)rc->last_status_change
+ , (unsigned long)rc->last_updated
+ , (unsigned long)rc->next_update
+ , rc->update_every
+ , rc->delay_up_duration
+ , rc->delay_down_duration
+ , rc->delay_max_duration
+ , rc->delay_multiplier
+ , rc->delay_last
+ , (unsigned long)rc->delay_up_to_timestamp
);
+ if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {
+ buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
+ }
+
if(RRDCALC_HAS_DB_LOOKUP(rc)) {
if(rc->dimensions && *rc->dimensions)
health_string2json(wb, "\t\t\t", "lookup_dimensions", rc->dimensions, ",\n");
if(unlikely(ae->new_status < RRDCALC_STATUS_CLEAR)) {
// do not send notifications for internal statuses
+ debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (internal statuses)", ae->chart, ae->name, rrdcalc_status2string(ae->new_status));
+ goto done;
+ }
+
+ if(unlikely(ae->new_status <= RRDCALC_STATUS_CLEAR && (ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
+ // do not send notifications for disabled statuses
+ debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (it has no-clear-notification enabled)", ae->chart, ae->name, rrdcalc_status2string(ae->new_status));
+ // mark it as run, so that we will send the same alarm if it happens again
goto done;
}
// find the previous notification for the same alarm
// which we have run the exec script
- {
+ // exception: alarms with HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION set
+ if(likely(!(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
uint32_t id = ae->alarm_id;
ALARM_ENTRY *t;
for(t = ae->next; t ; t = t->next) {
const char *recipient = ae->recipient;
if(!recipient) recipient = health.health_default_recipient;
- snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s'",
+ snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s' '%s' '%s'",
exec,
recipient,
host->hostname,
(uint32_t)ae->duration,
(uint32_t)ae->non_clear_duration,
ae->units?ae->units:"",
- ae->info?ae->info:""
+ ae->info?ae->info:"",
+ ae->new_value_string,
+ ae->old_value_string
);
ae->flags |= HEALTH_ENTRY_FLAG_EXEC_RUN;
freez(ae->source);
freez(ae->units);
freez(ae->info);
+ freez(ae->old_value_string);
+ freez(ae->new_value_string);
freez(ae);
ae = t;
rc->delay_last = delay;
rc->delay_up_to_timestamp = now + delay;
- health_alarm_log(&localhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info, rc->delay_last);
+ health_alarm_log(
+ &localhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ status,
+ rc->source,
+ rc->units,
+ rc->info,
+ rc->delay_last,
+ (rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)?HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION:0
+ );
rc->last_status_change = now;
rc->status = status;
}
#define RRDCALC_STATUS_WARNING 3
#define RRDCALC_STATUS_CRITICAL 4
-#define RRDCALC_FLAG_DB_ERROR 0x00000001
-#define RRDCALC_FLAG_DB_NAN 0x00000002
-/* #define RRDCALC_FLAG_DB_STALE 0x00000004 */
-#define RRDCALC_FLAG_CALC_ERROR 0x00000008
-#define RRDCALC_FLAG_WARN_ERROR 0x00000010
-#define RRDCALC_FLAG_CRIT_ERROR 0x00000020
-#define RRDCALC_FLAG_RUNNABLE 0x00000040
+#define RRDCALC_FLAG_DB_ERROR 0x00000001
+#define RRDCALC_FLAG_DB_NAN 0x00000002
+/* #define RRDCALC_FLAG_DB_STALE 0x00000004 */
+#define RRDCALC_FLAG_CALC_ERROR 0x00000008
+#define RRDCALC_FLAG_WARN_ERROR 0x00000010
+#define RRDCALC_FLAG_CRIT_ERROR 0x00000020
+#define RRDCALC_FLAG_RUNNABLE 0x00000040
+#define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
typedef struct rrdcalc {
uint32_t id; // the unique id of this alarm
#define RRDCALCTEMPLATE_HAS_CALCULATION(rt) ((rt)->after)
-#define HEALTH_ENTRY_FLAG_PROCESSED 0x00000001
-#define HEALTH_ENTRY_FLAG_UPDATED 0x00000002
-#define HEALTH_ENTRY_FLAG_EXEC_RUN 0x00000004
-#define HEALTH_ENTRY_FLAG_EXEC_FAILED 0x00000008
-#define HEALTH_ENTRY_FLAG_SAVED 0x10000000
+#define HEALTH_ENTRY_FLAG_PROCESSED 0x00000001
+#define HEALTH_ENTRY_FLAG_UPDATED 0x00000002
+#define HEALTH_ENTRY_FLAG_EXEC_RUN 0x00000004
+#define HEALTH_ENTRY_FLAG_EXEC_FAILED 0x00000008
+#define HEALTH_ENTRY_FLAG_SAVED 0x10000000
+#define HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
typedef struct alarm_entry {
uint32_t unique_id;
calculated_number old_value;
calculated_number new_value;
+
+ char *old_value_string;
+ char *new_value_string;
+
int old_status;
int new_status;
if(!p) p = "/bin:/usr/bin";
snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin");
setenv("PATH", config_get("plugins", "PATH environment variable", path), 1);
+
+ p = getenv("PYTHONPATH");
+ if(!p) p = "";
+ setenv("PYTHONPATH", config_get("plugins", "PYTHONPATH environment variable", p), 1);
}
char *user = NULL;
return len - i;
}
-static inline const char *fix_units(const char *units) {
- if(!units || !*units || !strcmp(units, "empty") || !strcmp(units, "null")) return "";
- if(!strcmp(units, "percentage") || !strcmp(units, "percent") || !strcmp(units, "pcent")) return "%";
- return units;
+static inline char *format_value_with_precision_and_unit(char *value_string, size_t value_string_len, calculated_number value, const char *units, int precision) {
+ if(unlikely(isnan(value) || isinf(value)))
+ value = 0.0;
+
+ char *separator = "";
+ if(unlikely(isalnum(*units)))
+ separator = " ";
+
+ if(precision < 0) {
+ int len, lstop = 0, trim_zeros = 1;
+
+ calculated_number abs = value;
+ if(isless(value, 0)) {
+ lstop = 1;
+ abs = -value;
+ }
+
+ if(isgreaterequal(abs, 1000)) {
+ len = snprintfz(value_string, value_string_len, "%0.0Lf", (long double) value);
+ trim_zeros = 0;
+ }
+ else if(isgreaterequal(abs, 100)) len = snprintfz(value_string, value_string_len, "%0.1Lf", (long double) value);
+ else if(isgreaterequal(abs, 1)) len = snprintfz(value_string, value_string_len, "%0.2Lf", (long double) value);
+ else if(isgreaterequal(abs, 0.1)) len = snprintfz(value_string, value_string_len, "%0.3Lf", (long double) value);
+ else len = snprintfz(value_string, value_string_len, "%0.4Lf", (long double) value);
+
+ if(unlikely(trim_zeros)) {
+ int l;
+ // remove trailing zeros from the decimal part
+ for(l = len - 1; l > lstop; l--) {
+ if(likely(value_string[l] == '0')) {
+ value_string[l] = '\0';
+ len--;
+ }
+
+ else if(unlikely(value_string[l] == '.')) {
+ value_string[l] = '\0';
+ len--;
+ break;
+ }
+
+ else
+ break;
+ }
+ }
+
+ if(unlikely(len <= 0)) len = 1;
+ snprintfz(&value_string[len], value_string_len - len, "%s%s", separator, units);
+ }
+ else {
+ if(precision > 50) precision = 50;
+ snprintfz(value_string, value_string_len, "%0.*Lf%s%s", precision, (long double) value, separator, units);
+ }
+
+ return value_string;
+}
+
+inline char *format_value_and_unit(char *value_string, size_t value_string_len, calculated_number value, const char *units, int precision) {
+ static uint32_t
+ hash_seconds = 0,
+ hash_seconds_ago = 0,
+ hash_minutes = 0,
+ hash_minutes_ago = 0,
+ hash_hours = 0,
+ hash_hours_ago = 0,
+ hash_onoff = 0,
+ hash_updown = 0,
+ hash_okerror = 0,
+ hash_okfailed = 0,
+ hash_empty = 0,
+ hash_null = 0,
+ hash_percentage = 0,
+ hash_percent = 0,
+ hash_pcent = 0;
+
+ if(unlikely(!hash_seconds)) {
+ hash_seconds = simple_hash("seconds");
+ hash_seconds_ago = simple_hash("seconds ago");
+ hash_minutes = simple_hash("minutes");
+ hash_minutes_ago = simple_hash("minutes ago");
+ hash_hours = simple_hash("hours");
+ hash_hours_ago = simple_hash("hours ago");
+ hash_onoff = simple_hash("on/off");
+ hash_updown = simple_hash("up/down");
+ hash_okerror = simple_hash("ok/error");
+ hash_okfailed = simple_hash("ok/failed");
+ hash_empty = simple_hash("empty");
+ hash_null = simple_hash("null");
+ hash_percentage = simple_hash("percentage");
+ hash_percent = simple_hash("percent");
+ hash_pcent = simple_hash("pcent");
+ }
+
+ if(unlikely(!units)) units = "";
+
+ uint32_t hash_units = simple_hash(units);
+
+ if(unlikely((hash_units == hash_seconds && !strcmp(units, "seconds")) || (hash_units == hash_seconds_ago && !strcmp(units, "seconds ago")))) {
+ if(value == 0.0) {
+ snprintfz(value_string, value_string_len, "%s", "now");
+ return value_string;
+ }
+ else if(isnan(value) || isinf(value)) {
+ snprintfz(value_string, value_string_len, "%s", "never");
+ return value_string;
+ }
+
+ const char *suffix = (hash_units == hash_seconds_ago)?" ago":"";
+
+ size_t s = (size_t)value;
+ size_t d = s / 86400;
+ s = s % 86400;
+
+ size_t h = s / 3600;
+ s = s % 3600;
+
+ size_t m = s / 60;
+ s = s % 60;
+
+ if(d)
+ snprintfz(value_string, value_string_len, "%zu %s %02zu:%02zu:%02zu%s", d, (d == 1)?"day":"days", h, m, s, suffix);
+ else
+ snprintfz(value_string, value_string_len, "%02zu:%02zu:%02zu%s", h, m, s, suffix);
+
+ return value_string;
+ }
+
+ else if(unlikely((hash_units == hash_minutes && !strcmp(units, "minutes")) || (hash_units == hash_minutes_ago && !strcmp(units, "minutes ago")))) {
+ if(value == 0.0) {
+ snprintfz(value_string, value_string_len, "%s", "now");
+ return value_string;
+ }
+ else if(isnan(value) || isinf(value)) {
+ snprintfz(value_string, value_string_len, "%s", "never");
+ return value_string;
+ }
+
+ const char *suffix = (hash_units == hash_minutes_ago)?" ago":"";
+
+ size_t m = (size_t)value;
+ size_t d = m / (60 * 24);
+ m = m % (60 * 24);
+
+ size_t h = m / 60;
+ m = m % 60;
+
+ if(d)
+ snprintfz(value_string, value_string_len, "%zud %02zuh %02zum%s", d, h, m, suffix);
+ else
+ snprintfz(value_string, value_string_len, "%zuh %zum%s", h, m, suffix);
+
+ return value_string;
+ }
+
+ else if(unlikely((hash_units == hash_hours && !strcmp(units, "hours")) || (hash_units == hash_hours_ago && !strcmp(units, "hours ago")))) {
+ if(value == 0.0) {
+ snprintfz(value_string, value_string_len, "%s", "now");
+ return value_string;
+ }
+ else if(isnan(value) || isinf(value)) {
+ snprintfz(value_string, value_string_len, "%s", "never");
+ return value_string;
+ }
+
+ const char *suffix = (hash_units == hash_hours_ago)?" ago":"";
+
+ size_t h = (size_t)value;
+ size_t d = h / 24;
+ h = h % 24;
+
+ if(d)
+ snprintfz(value_string, value_string_len, "%zud %zuh%s", d, h, suffix);
+ else
+ snprintfz(value_string, value_string_len, "%zuh%s", h, suffix);
+
+ return value_string;
+ }
+
+ else if(unlikely(hash_units == hash_onoff && !strcmp(units, "on/off"))) {
+ snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"on":"off");
+ return value_string;
+ }
+
+ else if(unlikely(hash_units == hash_updown && !strcmp(units, "up/down"))) {
+ snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"up":"down");
+ return value_string;
+ }
+
+ else if(unlikely(hash_units == hash_okerror && !strcmp(units, "ok/error"))) {
+ snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"ok":"error");
+ return value_string;
+ }
+
+ else if(unlikely(hash_units == hash_okfailed && !strcmp(units, "ok/failed"))) {
+ snprintfz(value_string, value_string_len, "%s", (value != 0.0)?"ok":"failed");
+ return value_string;
+ }
+
+ else if(unlikely(hash_units == hash_empty && !strcmp(units, "empty")))
+ units = "";
+
+ else if(unlikely(hash_units == hash_null && !strcmp(units, "null")))
+ units = "";
+
+ else if(unlikely(hash_units == hash_percentage && !strcmp(units, "percentage")))
+ units = "%";
+
+ else if(unlikely(hash_units == hash_percent && !strcmp(units, "percent")))
+ units = "%";
+
+ else if(unlikely(hash_units == hash_pcent && !strcmp(units, "pcent")))
+ units = "%";
+
+
+ if(unlikely(isnan(value) || isinf(value))) {
+ strcpy(value_string, "-");
+ return value_string;
+ }
+
+ return format_value_with_precision_and_unit(value_string, value_string_len, value, units, precision);
}
static inline const char *color_map(const char *color) {
return color;
}
-static inline void calc_colorz(const char *color, char *final, size_t len, calculated_number value, int value_is_null) {
+static inline void calc_colorz(const char *color, char *final, size_t len, calculated_number value) {
+ int value_is_null = 0;
+ if(isnan(value) || isinf(value)) {
+ value = 0.0;
+ value_is_null = 1;
+ }
+
char color_buffer[256 + 1] = "";
char value_buffer[256 + 1] = "";
char comparison = '>';
// colors
#define COLOR_STRING_SIZE 100
-void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int value_is_null, int precision) {
- static uint32_t hash_seconds = 0, hash_seconds_ago = 0, hash_minutes = 0, hash_minutes_ago = 0, hash_hours = 0, hash_hours_ago = 0;
-
- if(unlikely(!hash_seconds)) {
- hash_seconds = simple_hash("seconds");
- hash_seconds_ago = simple_hash("seconds ago");
- hash_minutes = simple_hash("minutes");
- hash_minutes_ago = simple_hash("minutes ago");
- hash_hours = simple_hash("hours");
- hash_hours_ago = simple_hash("hours ago");
- }
-
+void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision) {
char label_buffer[LABEL_STRING_SIZE + 1]
, value_color_buffer[COLOR_STRING_SIZE + 1]
, value_string[VALUE_STRING_SIZE + 1]
label_color = "#555";
if(unlikely(!value_color || !*value_color))
- value_color = (value_is_null)?"#999":"#4c1";
+ value_color = (isnan(value) || isinf(value))?"#999":"#4c1";
- units = fix_units(units);
- calc_colorz(value_color, value_color_buffer, COLOR_STRING_SIZE, value, value_is_null);
-
- char *separator = "";
- if(unlikely(isalnum(*units)))
- separator = " ";
-
- uint32_t hash_units = simple_hash(units);
-
- if(unlikely((hash_units == hash_seconds && !strcmp(units, "seconds")) || (hash_units == hash_seconds_ago && !strcmp(units, "seconds ago")))) {
- char *suffix = (hash_units == hash_seconds_ago)?" ago":"";
-
- size_t s = (size_t)value;
- size_t d = s / 86400;
- s = s % 86400;
-
- size_t h = s / 3600;
- s = s % 3600;
-
- size_t m = s / 60;
- s = s % 60;
-
- if(d)
- snprintfz(value_string, VALUE_STRING_SIZE, "%zu %s %02zu:%02zu:%02zu%s", d, (d == 1)?"day":"days", h, m, s, suffix);
- else
- snprintfz(value_string, VALUE_STRING_SIZE, "%02zu:%02zu:%02zu%s", h, m, s, suffix);
- }
-
- else if(unlikely((hash_units == hash_minutes && !strcmp(units, "minutes")) || (hash_units == hash_minutes_ago && !strcmp(units, "minutes ago")))) {
- char *suffix = (hash_units == hash_minutes_ago)?" ago":"";
-
- size_t m = (size_t)value;
- size_t d = m / (60 * 24);
- m = m % (60 * 24);
-
- size_t h = m / 60;
- m = m % 60;
-
- if(d)
- snprintfz(value_string, VALUE_STRING_SIZE, "%zud %02zuh %02zum%s", d, h, m, suffix);
- else
- snprintfz(value_string, VALUE_STRING_SIZE, "%zuh %zum%s", h, m, suffix);
- }
-
- else if(unlikely((hash_units == hash_hours && !strcmp(units, "hours")) || (hash_units == hash_hours_ago && !strcmp(units, "hours ago")))) {
- char *suffix = (hash_units == hash_hours_ago)?" ago":"";
-
- size_t h = (size_t)value;
- size_t d = h / 24;
- h = h % 24;
-
- if(d)
- snprintfz(value_string, VALUE_STRING_SIZE, "%zud %zuh%s", d, h, suffix);
- else
- snprintfz(value_string, VALUE_STRING_SIZE, "%zuh%s", h, suffix);
- }
-
- else if(unlikely(value_is_null))
- strcpy(value_string, "-");
-
- else if(precision < 0) {
- int len, lstop = 0, trim_zeros = 1;
-
- calculated_number abs = value;
- if(isless(value, 0)) {
- lstop = 1;
- abs = -value;
- }
-
- if(isgreaterequal(abs, 1000)) { len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.0Lf", (long double)value); trim_zeros = 0; }
- else if(isgreaterequal(abs, 100)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.1Lf", (long double)value);
- else if(isgreaterequal(abs, 1)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.2Lf", (long double)value);
- else if(isgreaterequal(abs, 0.1)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.3Lf", (long double)value);
- else len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.4Lf", (long double)value);
-
- if(unlikely(trim_zeros)) {
- int l;
- // remove trailing zeros from the decimal part
- for(l = len - 1; l > lstop ; l--) {
- if(likely(value_string[l] == '0')) {
- value_string[l] = '\0';
- len--;
- }
-
- else if(unlikely(value_string[l] == '.')) {
- value_string[l] = '\0';
- len--;
- break;
- }
-
- else
- break;
- }
- }
-
- if(len >= 0)
- snprintfz(&value_string[len], VALUE_STRING_SIZE - len, "%s%s", separator, units);
- }
- else {
- if(precision > 50) precision = 50;
- snprintfz(value_string, VALUE_STRING_SIZE, "%0.*Lf%s%s", precision, (long double)value, separator, units);
- }
+ calc_colorz(value_color, value_color_buffer, COLOR_STRING_SIZE, value);
+ format_value_and_unit(value_string, VALUE_STRING_SIZE, value, units, precision);
// we need to copy the label, since verdana11_width may write to it
strncpyz(label_buffer, label, LABEL_STRING_SIZE);
#ifndef NETDATA_WEB_BUFFER_SVG_H
#define NETDATA_WEB_BUFFER_SVG_H 1
-extern void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int value_is_null, int precision);
+extern void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision);
+extern char *format_value_and_unit(char *value_string, size_t value_string_len, calculated_number value, const char *units, int precision);
#endif /* NETDATA_WEB_BUFFER_SVG_H */
if(!st) st = rrdset_find_byname(chart);
if(!st) {
buffer_no_cacheable(w->response.data);
- buffer_svg(w->response.data, "chart not found", 0, "", NULL, NULL, 1, -1);
+ buffer_svg(w->response.data, "chart not found", NAN, "", NULL, NULL, -1);
ret = 200;
goto cleanup;
}
rc = rrdcalc_find(st, alarm);
if (!rc) {
buffer_no_cacheable(w->response.data);
- buffer_svg(w->response.data, "alarm not found", 0, "", NULL, NULL, 1, -1);
+ buffer_svg(w->response.data, "alarm not found", NAN, "", NULL, NULL, -1);
ret = 200;
goto cleanup;
}
);
if(rc) {
- calculated_number n = rc->value;
- if(isnan(n) || isinf(n)) n = 0;
-
if (refresh > 0) {
buffer_sprintf(w->response.header, "Refresh: %d\r\n", refresh);
w->response.data->expires = now_realtime_sec() + refresh;
}
buffer_svg(w->response.data,
- label,
- rc->value * multiply / divide,
- units,
- label_color,
- value_color,
- 0,
- precision);
+ label,
+ (isnan(rc->value)||isinf(rc->value)) ? rc->value : rc->value * multiply / divide,
+ units,
+ label_color,
+ value_color,
+ precision);
ret = 200;
}
else {
time_t latest_timestamp = 0;
int value_is_null = 1;
- calculated_number n = 0;
+ calculated_number n = NAN;
ret = 500;
// if the collected value is too old, don't calculate its value
// render the badge
buffer_svg(w->response.data,
- label,
- n * multiply / divide,
- units,
- label_color,
- value_color,
- value_is_null,
- precision);
+ label,
+ (value_is_null)?NAN:(n * multiply / divide),
+ units,
+ label_color,
+ value_color,
+ precision);
}
cleanup:
var name = entry.name.replace(/_/g, ' ');
var status = entry.status.toLowerCase();
- var title = name + ' = ' + ((value === null)?'NaN':Math.floor(value)).toString() + ' ' + entry.units;
+ var title = name + ' = ' + entry.value_string.toString();
var tag = entry.alarm_id;
var icon = 'images/seo-performance-128.png';
var interaction = false;
// console.log('alarm' + entry.unique_id + ' switch to CLEAR from ' + entry.old_status);
return;
}
- title = name + ' back to normal';
+ if(entry.no_clear_notification === true) {
+ // console.log('alarm' + entry.unique_id + ' is CLEAR but has no_clear_notification flag');
+ return;
+ }
+ title = name + ' back to normal (' + entry.value_string.toString() + ')';
icon = 'images/check-mark-2-128-green.png'
interaction = false;
break;
function alarm_to_html(alarm, full) {
var chart = options.data.charts[alarm.chart];
+ if(typeof(chart) === 'undefined') {
+ // this means the charts loaded are incomplete
+ // probably netdata was restarted and more charts
+ // are now available.
+ return '';
+ }
+
var has_alarm = ((typeof alarm.warn !== 'undefined' || typeof alarm.crit !== 'undefined')?true:false);
var role_href = ((has_alarm === true)?('<br/> <br/>role: <b>' + alarm.recipient + '</b><br/> <br/><b><i class="fa fa-line-chart" aria-hidden="true"></i></b><small> <a href="#" onClick="NETDATA.alarms.scrollToChart(\'' + alarm.chart + '\'); $(\'#alarmsModal\').modal(\'hide\'); return false;">jump to chart</a></small>'):(' '));
+ ((typeof alarm.crit !== 'undefined')?('<tr><td width="10%" style="text-align:right">critical when</td><td><span style="font-family: monospace; color: #e05d44; font-weight: bold;">' + alarm.crit + '</span></td></tr>'):'');
if(full === true) {
- html += ((typeof alarm.lookup_after !== 'undefined')?('<tr><td width="10%" style="text-align:right">db lookup</td><td>' + alarm_lookup_explain(alarm, chart) + '</td></tr>'):'')
+ var units = chart.units;
+ if(units === '%') units = '%';
+
+ html += ((typeof alarm.lookup_after !== 'undefined')?('<tr><td width="10%" style="text-align:right">db lookup</td><td>' + alarm_lookup_explain(alarm, chart) + '</td></tr>'):'')
+ ((typeof alarm.calc !== 'undefined')?('<tr><td width="10%" style="text-align:right">calculation</td><td><span style="font-family: monospace;">' + alarm.calc + '</span></td></tr>'):'')
- + ((chart.green !== null)?('<tr><td width="10%" style="text-align:right">green threshold</td><td><code>' + chart.green + ' ' + chart.units + '</code></td></tr>'):'')
- + ((chart.red !== null)?('<tr><td width="10%" style="text-align:right">red threshold</td><td><code>' + chart.red + ' ' + chart.units + '</code></td></tr>'):'');
+ + ((chart.green !== null)?('<tr><td width="10%" style="text-align:right">green threshold</td><td><code>' + chart.green + ' ' + units + '</code></td></tr>'):'')
+ + ((chart.red !== null)?('<tr><td width="10%" style="text-align:right">red threshold</td><td><code>' + chart.red + ' ' + units + '</code></td></tr>'):'');
}
var delay = '';
switchable: false,
sortable: true
},
+ {
+ field: 'value_string',
+ title: 'Friendly Value',
+ titleTooltip: 'The value of the alarm, that triggered this event',
+ align: 'right',
+ valign: 'middle',
+ sortable: true
+ },
+ {
+ field: 'old_value_string',
+ title: 'Friendly Old Value',
+ titleTooltip: 'The value of the alarm, just before this event',
+ align: 'right',
+ valign: 'middle',
+ visible: false,
+ sortable: true
+ },
{
field: 'old_value',
title: 'Old Value',
},
align: 'right',
valign: 'middle',
+ visible: false,
sortable: true
},
{
titleTooltip: 'The units of the value of the alarm',
align: 'left',
valign: 'middle',
+ visible: false,
sortable: true
},
{
</div>
</body>
</html>
-<script type="text/javascript" src="dashboard.js?v20170118-11"></script>
+<script type="text/javascript" src="dashboard.js?v20170127-1"></script>