health.d/ram.conf \
health.d/redis.conf \
health.d/retroshare.conf \
+ health.d/softnet.conf \
health.d/swap.conf \
health.d/squid.conf \
$(NULL)
--- /dev/null
+# check for common /proc/net/softnet_stat errors
+
+ alarm: 1hour_netdev_backlog_exceeded
+ on: system.softnet_stat
+ lookup: sum -1h unaligned absolute of dropped
+ units: packets
+ every: 1m
+ warn: $this > 0
+ delay: down 30m multiplier 1.5 max 1h
+ info: number of packets dropped because sysctl net.core.netdev_max_backlog was exceeded
+ to: sysadmin
+
+ alarm: 1hour_netdev_budget_ran_outs
+ on: system.softnet_stat
+ lookup: sum -1h unaligned absolute of squeezed
+ units: events
+ every: 1m
+ warn: $this > 0
+ delay: down 30m multiplier 1.5 max 1h
+ info: number of times ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining
+ to: sysadmin
['39f9422b0f0c3eec11a31aff79d89514']='health.d/retroshare.conf'
['46ef6c1b638e40a7dfd62defdc5f99a3']='health.d/retroshare.conf'
['6608c6546b3c6bde084fc1d34b1163c1']='health.d/retroshare.conf'
+ ['312b4b8e2805e19cf9be554b319567d6']='health.d/softnet.conf'
+ ['565f11c38ae6bd5cc9d3c2adb542bc1b']='health.d/softnet.conf'
+ ['a305b400378d6492efd15f9940c2779b']='health.d/softnet.conf'
['23ae815aefa221b1929f96752a1f7556']='health.d/squid.conf'
['3cc6255457d4cba881ae0554ae5d9190']='health.d/squid.conf'
['845023f9b4a526aa0e6493756dbe6034']='health.d/squid.conf'
if [ "${SEND_PUSHOVER}" = "YES" -a ! -z "${apptoken}" -a ! -z "${usertokens}" -a ! -z "${title}" -a ! -z "${message}" ]
then
- priority=0
- [ "${status}" = "CRITICAL" ] && priority=1
+ # https://pushover.net/api
+ priority=-2
+ case "${status}" in
+ CLEAR) priority=-1;; # low priority: no sound or vibration
+ WARNING) priotity=0;; # normal priority: respect quiet hours
+ CRITICAL) priority=1;; # high priority: bypass quiet hours
+ *) priority=-2;; # lowest priority: no notification at all
+ esac
for user in ${usertokens}
do
proc_net_rpc_nfsd.c \
proc_net_snmp.c \
proc_net_snmp6.c \
+ proc_net_softnet_stat.c \
proc_net_stat_conntrack.c \
proc_net_stat_synproxy.c \
proc_stat.c \
else if(likely(host->alarms)) {
RRDCALC *t, *last = host->alarms;
for(t = last->next; t && t != rc; last = t, t = t->next) ;
- if(last && last->next == rc)
+ if(last->next == rc)
last->next = rc->next;
else
error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname);
static inline void health_rrdcalc2json_nolock(BUFFER *wb, RRDCALC *rc) {
buffer_sprintf(wb,
"\t\t\"%s.%s\": {\n"
+ "\t\t\t\"id\": %lu,\n"
"\t\t\t\"name\": \"%s\",\n"
"\t\t\t\"chart\": \"%s\",\n"
"\t\t\t\"family\": \"%s\",\n"
"\t\t\t\"delay\": %d,\n"
"\t\t\t\"delay_up_to_timestamp\": %lu,\n"
, rc->chart, rc->name
+ , (unsigned long)rc->id
, rc->name
, rc->chart
, (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
int vdo_proc_sys_kernel_random_entropy_avail = !config_get_boolean("plugin:proc", "/proc/sys/kernel/random/entropy_avail", 1);
int vdo_proc_interrupts = !config_get_boolean("plugin:proc", "/proc/interrupts", 1);
int vdo_proc_softirqs = !config_get_boolean("plugin:proc", "/proc/softirqs", 1);
+ int vdo_proc_net_softnet_stat = !config_get_boolean("plugin:proc", "/proc/net/softnet_stat", 1);
int vdo_proc_loadavg = !config_get_boolean("plugin:proc", "/proc/loadavg", 1);
int vdo_sys_kernel_mm_ksm = !config_get_boolean("plugin:proc", "/sys/kernel/mm/ksm", 1);
int vdo_cpu_netdata = !config_get_boolean("plugin:proc", "netdata server resources", 1);
unsigned long long sutime_proc_sys_kernel_random_entropy_avail = 0ULL;
unsigned long long sutime_proc_interrupts = 0ULL;
unsigned long long sutime_proc_softirqs = 0ULL;
+ unsigned long long sutime_proc_net_softnet_stat = 0ULL;
unsigned long long sutime_proc_loadavg = 0ULL;
unsigned long long sutime_sys_kernel_mm_ksm = 0ULL;
}
if(unlikely(netdata_exit)) break;
+ if(!vdo_proc_net_softnet_stat) {
+ debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_softnet_stat().");
+ sunow = time_usec();
+ vdo_proc_net_softnet_stat = do_proc_net_softnet_stat(rrd_update_every, (sutime_proc_net_softnet_stat > 0)?sunow - sutime_proc_net_softnet_stat:0ULL);
+ sutime_proc_net_softnet_stat = sunow;
+ }
+ if(unlikely(netdata_exit)) break;
+
if(!vdo_proc_sys_kernel_random_entropy_avail) {
debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_sys_kernel_random_entropy_avail().");
sunow = time_usec();
extern int do_sys_kernel_mm_ksm(int update_every, unsigned long long dt);
extern int do_proc_loadavg(int update_every, unsigned long long dt);
extern int do_proc_net_stat_synproxy(int update_every, unsigned long long dt);
+extern int do_proc_net_softnet_stat(int update_every, unsigned long long dt);
#endif /* NETDATA_PLUGIN_PROC_H */
int c;
for(c = 0; c < cpus ; c++) {
- char id[256+1];
- snprintfz(id, 256, "cpu%d_interrupts", c);
+ char id[50+1];
+ snprintfz(id, 50, "cpu%d_interrupts", c);
st = rrdset_find_bytype("cpu", id);
if(!st) {
- char name[256+1], title[256+1];
- snprintfz(name, 256, "cpu%d_interrupts", c);
- snprintfz(title, 256, "CPU%d Interrupts", c);
- st = rrdset_create("cpu", id, name, "interrupts", "cpu.interrupts", title, "interrupts/s", 1100 + c, update_every, RRDSET_TYPE_STACKED);
+ char title[100+1];
+ snprintfz(title, 100, "CPU%d Interrupts", c);
+ st = rrdset_create("cpu", id, NULL, "interrupts", "cpu.interrupts", title, "interrupts/s", 1100 + c, update_every, RRDSET_TYPE_STACKED);
for(l = 0; l < lines ;l++) {
struct interrupt *irr = irrindex(irrs, l, cpus);
--- /dev/null
+#include "common.h"
+
+static inline char *softnet_column_name(uint32_t column) {
+ switch(column) {
+ // https://github.com/torvalds/linux/blob/a7fd20d1c476af4563e66865213474a2f9f473a4/net/core/net-procfs.c#L161-L166
+ case 0: return "processed";
+ case 1: return "dropped";
+ case 2: return "squeezed";
+ case 9: return "received_rps";
+ case 10: return "flow_limit_count";
+ default: return NULL;
+ }
+}
+
+int do_proc_net_softnet_stat(int update_every, unsigned long long dt) {
+ (void)dt;
+
+ static procfile *ff = NULL;
+ static int do_per_core = -1;
+ static uint32_t allocated_lines = 0, allocated_columns = 0, *data = NULL;
+
+ if(do_per_core == -1) do_per_core = config_get_boolean("plugin:proc:/proc/net/softnet_stat", "softnet_stat per core", 1);
+
+ if(!ff) {
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/net/softnet_stat");
+ ff = procfile_open(config_get("plugin:proc:/proc/net/softnet_stat", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT);
+ }
+ if(!ff) return 1;
+
+ ff = procfile_readall(ff);
+ if(!ff) return 0; // we return 0, so that we will retry to open it next time
+
+ uint32_t lines = procfile_lines(ff), l;
+ uint32_t words = procfile_linewords(ff, 0), w;
+
+ if(!lines || !words) {
+ error("Cannot read /proc/net/softnet_stat, %u lines and %u columns reported.", lines, words);
+ return 1;
+ }
+
+ if(lines > 200) lines = 200;
+ if(words > 50) words = 50;
+
+ if(unlikely(!data || lines > allocated_lines || words > allocated_columns)) {
+ freez(data);
+ allocated_lines = lines;
+ allocated_columns = words;
+ data = mallocz((allocated_lines + 1) * allocated_columns * sizeof(uint32_t));
+ }
+
+ // initialize to zero
+ bzero(data, (allocated_lines + 1) * allocated_columns * sizeof(uint32_t));
+
+ // parse the values
+ for(l = 0; l < lines ;l++) {
+ words = procfile_linewords(ff, l);
+ if(!words) continue;
+
+ if(words > allocated_columns) words = allocated_columns;
+
+ for(w = 0; w < words ; w++) {
+ if(unlikely(softnet_column_name(w))) {
+ uint32_t t = strtoul(procfile_lineword(ff, l, w), NULL, 16);
+ data[w] += t;
+ data[((l + 1) * allocated_columns) + w] = t;
+ }
+ }
+ }
+
+ if(data[(lines * allocated_columns)] == 0)
+ lines--;
+
+ RRDSET *st;
+
+ // --------------------------------------------------------------------
+
+ st = rrdset_find_bytype("system", "softnet_stat");
+ if(!st) {
+ st = rrdset_create("system", "softnet_stat", NULL, "softnet_stat", NULL, "System softnet_stat", "events/s", 955, update_every, RRDSET_TYPE_LINE);
+ for(w = 0; w < allocated_columns ;w++)
+ if(unlikely(softnet_column_name(w)))
+ rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRDDIM_INCREMENTAL);
+ }
+ else rrdset_next(st);
+
+ for(w = 0; w < allocated_columns ;w++)
+ if(unlikely(softnet_column_name(w)))
+ rrddim_set(st, softnet_column_name(w), data[w]);
+
+ rrdset_done(st);
+
+ if(do_per_core) {
+ for(l = 0; l < lines ;l++) {
+ char id[50+1];
+ snprintfz(id, 50, "cpu%d_softnet_stat", l);
+
+ st = rrdset_find_bytype("cpu", id);
+ if(!st) {
+ char title[100+1];
+ snprintfz(title, 100, "CPU%d softnet_stat", l);
+
+ st = rrdset_create("cpu", id, NULL, "softnet_stat", NULL, title, "events/s", 4101 + l, update_every, RRDSET_TYPE_LINE);
+ for(w = 0; w < allocated_columns ;w++)
+ if(unlikely(softnet_column_name(w)))
+ rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRDDIM_INCREMENTAL);
+ }
+ else rrdset_next(st);
+
+ for(w = 0; w < allocated_columns ;w++)
+ if(unlikely(softnet_column_name(w)))
+ rrddim_set(st, softnet_column_name(w), data[((l + 1) * allocated_columns) + w]);
+
+ rrdset_done(st);
+ }
+ }
+
+ return 0;
+}
int c;
for(c = 0; c < cpus ; c++) {
- char id[256+1];
- snprintfz(id, 256, "cpu%d_softirqs", c);
+ char id[50+1];
+ snprintfz(id, 50, "cpu%d_softirqs", c);
st = rrdset_find_bytype("cpu", id);
if(!st) {
}
if(core_sum == 0) continue; // try next core
- char name[256+1], title[256+1];
- snprintfz(name, 256, "cpu%d_softirqs", c);
- snprintfz(title, 256, "CPU%d softirqs", c);
- st = rrdset_create("cpu", id, name, "softirqs", "cpu.softirqs", title, "softirqs/s", 3000 + c, update_every, RRDSET_TYPE_STACKED);
+ char title[100+1];
+ snprintfz(title, 100, "CPU%d softirqs", c);
+ st = rrdset_create("cpu", id, NULL, "softirqs", "cpu.softirqs", title, "softirqs/s", 3000 + c, update_every, RRDSET_TYPE_STACKED);
for(l = 0; l < lines ;l++) {
struct interrupt *irr = irrindex(irrs, l, cpus);
if(chart.priority < families[family].priority)
families[family].priority = chart.priority;
- families[family].arr.push(alarm);
+ families[family].arr.unshift(alarm);
}
// sort the families, like the dashboard menu does
var families_sorted = families_sort.sort(function (a, b) {
if (a.priority > b.priority) return -1;
if (a.priority < b.priority) return 1;
- if (a.id > b.id) return 1;
- if (a.id < b.id) return -1;
return 0;
});