From: Costa Tsaousis (ktsaou) Date: Thu, 15 Sep 2016 21:04:47 +0000 (+0300) Subject: added /proc/net/softnet_stat monitoring which provides information about common netwo... X-Git-Tag: v1.4.0~54^2~6 X-Git-Url: https://arthur.barton.de/gitweb/?a=commitdiff_plain;h=079134376fc83d1d7b1ae96423de31512f5c3e38;p=netdata.git added /proc/net/softnet_stat monitoring which provides information about common network device misconfigurations and issues --- diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am index c398899c..7fccd0ee 100644 --- a/conf.d/Makefile.am +++ b/conf.d/Makefile.am @@ -57,6 +57,7 @@ dist_healthconfig_DATA = \ health.d/ram.conf \ health.d/redis.conf \ health.d/retroshare.conf \ + health.d/softnet.conf \ health.d/swap.conf \ health.d/squid.conf \ $(NULL) diff --git a/conf.d/health.d/softnet.conf b/conf.d/health.d/softnet.conf new file mode 100644 index 00000000..daef9068 --- /dev/null +++ b/conf.d/health.d/softnet.conf @@ -0,0 +1,31 @@ +# check for common /proc/net/softnet_stat errors + + alarm: 1hour_netdev_backlog_exceeded + on: system.softnet_stat + lookup: sum -1h unaligned absolute of dropped + units: packets + every: 5m + warn: $this > 0 + delay: down 30m multiplier 1.5 max 1h + info: number of packets dropped because sysctl net.core.netdev_max_backlog was exceeded + to: sysadmin + + alarm: 1hour_netdev_budget_ran_outs + on: system.softnet_stat + lookup: sum -1h unaligned absolute of squeezed + units: events + every: 5m + warn: $this > 0 + delay: down 30m multiplier 1.5 max 1h + info: number of times ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining + to: sysadmin + + alarm: 1hour_cpu_lock_collisions + on: system.softnet_stat + lookup: sum -1h unaligned absolute of collisions + units: collisions + every: 5m + warn: $this > 0 + delay: down 30m multiplier 1.5 max 1h + info: number of times two cpus collided trying to get a network device queue lock + to: sysadmin diff --git a/src/Makefile.am b/src/Makefile.am index 8fa6d5bd..0ecd5533 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -55,6 +55,7 @@ netdata_SOURCES = \ proc_net_rpc_nfsd.c \ proc_net_snmp.c \ proc_net_snmp6.c \ + proc_net_softnet_stat.c \ proc_net_stat_conntrack.c \ proc_net_stat_synproxy.c \ proc_stat.c \ diff --git a/src/plugin_proc.c b/src/plugin_proc.c index a1bf314d..badbd327 100644 --- a/src/plugin_proc.c +++ b/src/plugin_proc.c @@ -32,6 +32,7 @@ void *proc_main(void *ptr) int vdo_proc_sys_kernel_random_entropy_avail = !config_get_boolean("plugin:proc", "/proc/sys/kernel/random/entropy_avail", 1); int vdo_proc_interrupts = !config_get_boolean("plugin:proc", "/proc/interrupts", 1); int vdo_proc_softirqs = !config_get_boolean("plugin:proc", "/proc/softirqs", 1); + int vdo_proc_net_softnet_stat = !config_get_boolean("plugin:proc", "/proc/net/softnet_stat", 1); int vdo_proc_loadavg = !config_get_boolean("plugin:proc", "/proc/loadavg", 1); int vdo_sys_kernel_mm_ksm = !config_get_boolean("plugin:proc", "/sys/kernel/mm/ksm", 1); int vdo_cpu_netdata = !config_get_boolean("plugin:proc", "netdata server resources", 1); @@ -52,6 +53,7 @@ void *proc_main(void *ptr) unsigned long long sutime_proc_sys_kernel_random_entropy_avail = 0ULL; unsigned long long sutime_proc_interrupts = 0ULL; unsigned long long sutime_proc_softirqs = 0ULL; + unsigned long long sutime_proc_net_softnet_stat = 0ULL; unsigned long long sutime_proc_loadavg = 0ULL; unsigned long long sutime_sys_kernel_mm_ksm = 0ULL; @@ -107,6 +109,14 @@ void *proc_main(void *ptr) } if(unlikely(netdata_exit)) break; + if(!vdo_proc_net_softnet_stat) { + debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_softnet_stat()."); + sunow = time_usec(); + vdo_proc_net_softnet_stat = do_proc_net_softnet_stat(rrd_update_every, (sutime_proc_net_softnet_stat > 0)?sunow - sutime_proc_net_softnet_stat:0ULL); + sutime_proc_net_softnet_stat = sunow; + } + if(unlikely(netdata_exit)) break; + if(!vdo_proc_sys_kernel_random_entropy_avail) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_sys_kernel_random_entropy_avail()."); sunow = time_usec(); diff --git a/src/plugin_proc.h b/src/plugin_proc.h index a512e1cd..565cd0a2 100644 --- a/src/plugin_proc.h +++ b/src/plugin_proc.h @@ -20,5 +20,6 @@ extern int do_proc_softirqs(int update_every, unsigned long long dt); extern int do_sys_kernel_mm_ksm(int update_every, unsigned long long dt); extern int do_proc_loadavg(int update_every, unsigned long long dt); extern int do_proc_net_stat_synproxy(int update_every, unsigned long long dt); +extern int do_proc_net_softnet_stat(int update_every, unsigned long long dt); #endif /* NETDATA_PLUGIN_PROC_H */ diff --git a/src/proc_net_softnet_stat.c b/src/proc_net_softnet_stat.c new file mode 100644 index 00000000..9d66d154 --- /dev/null +++ b/src/proc_net_softnet_stat.c @@ -0,0 +1,122 @@ +#include "common.h" + +static inline char *softnet_column_name(uint32_t column) { + static char buf[4] = "c00"; + char *s; + + switch(column) { + case 0: s = "total"; break; + case 1: s = "dropped"; break; + case 2: s = "squeezed"; break; + case 8: s = "collisions"; break; + default: { + uint32_t c = column + 1; + buf[1] = '0' + ( c / 10); c = c % 10; + buf[2] = '0' + c; + s = buf; + break; + } + } + + return s; +} + +int do_proc_net_softnet_stat(int update_every, unsigned long long dt) { + (void)dt; + + static procfile *ff = NULL; + static int do_per_core = -1; + static uint32_t allocated_lines = 0, allocated_columns = 0, *data = NULL; + + if(do_per_core == -1) do_per_core = config_get_boolean("plugin:proc:/proc/net/softnet_stat", "softnet_stat per core", 1); + + if(!ff) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/net/softnet_stat"); + ff = procfile_open(config_get("plugin:proc:/proc/net/softnet_stat", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + } + if(!ff) return 1; + + ff = procfile_readall(ff); + if(!ff) return 0; // we return 0, so that we will retry to open it next time + + uint32_t lines = procfile_lines(ff), l; + uint32_t words = procfile_linewords(ff, 0), w; + + if(!lines || !words) { + error("Cannot read /proc/net/softnet_stat, %u lines and %u columns reported.", lines, words); + return 1; + } + + if(lines > 200) lines = 200; + if(words > 50) words = 50; + + if(unlikely(!data || lines > allocated_lines || words > allocated_columns)) { + freez(data); + data = mallocz((lines + 1) * words * sizeof(uint32_t)); + allocated_lines = lines; + allocated_columns = words; + } + + // initialize to zero + bzero(data, allocated_lines * allocated_columns * sizeof(uint32_t)); + + // parse the values + for(l = 0; l < lines ;l++) { + words = procfile_linewords(ff, l); + if(!words) continue; + + if(words > allocated_columns) words = allocated_columns; + + for(w = 0; w < words ; w++) { + uint32_t t = strtoul(procfile_lineword(ff, l, w), NULL, 16); + data[w] += t; + data[((l + 1) * allocated_columns) + w] = t; + } + } + + if(data[(lines * allocated_columns)] == 0) + lines--; + + RRDSET *st; + + // -------------------------------------------------------------------- + + st = rrdset_find_bytype("system", "softnet_stat"); + if(!st) { + st = rrdset_create("system", "softnet_stat", NULL, "softnet_stat", NULL, "System softnet_stat", "events/s", 955, update_every, RRDSET_TYPE_LINE); + for(w = 0; w < allocated_columns ;w++) + rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(w = 0; w < allocated_columns ;w++) + rrddim_set(st, softnet_column_name(w), data[w]); + + rrdset_done(st); + + if(do_per_core) { + for(l = 0; l < lines ;l++) { + char id[50+1]; + snprintfz(id, 50, "cpu%d_softnet_stat", l); + + st = rrdset_find_bytype("cpu", id); + if(!st) { + char title[100+1]; + snprintfz(title, 100, "CPU%d softnet_stat", l); + + st = rrdset_create("cpu", id, NULL, "softnet_stat", NULL, title, "events/s", 4101 + l, update_every, RRDSET_TYPE_LINE); + for(w = 0; w < allocated_columns ;w++) + rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(w = 0; w < allocated_columns ;w++) + rrddim_set(st, softnet_column_name(w), data[((l + 1) * allocated_columns) + w]); + + rrdset_done(st); + } + } + + return 0; +}