From: Costa Tsaousis (ktsaou) Date: Sun, 24 Apr 2016 22:33:54 +0000 (+0300) Subject: cgroups are now on their own thread; cgroup names are controlled by plugins.d/cgroup... X-Git-Tag: v1.2.0~49^2~1 X-Git-Url: https://arthur.barton.de/gitweb/?p=netdata.git;a=commitdiff_plain;h=b1dfe19b4f139a7e213d078dab8dd430040851b2 cgroups are now on their own thread; cgroup names are controlled by plugins.d/cgroup-name.sh which uses /etc/netdata/cgroups-names.conf to pick up unique ID (chart names) for the containers --- diff --git a/plugins.d/Makefile.am b/plugins.d/Makefile.am index a89ee4cd..a717cbed 100644 --- a/plugins.d/Makefile.am +++ b/plugins.d/Makefile.am @@ -8,6 +8,7 @@ dist_plugins_DATA = \ $(NULL) dist_plugins_SCRIPTS = \ + cgroup-name.sh \ charts.d.dryrun-helper.sh \ charts.d.plugin \ node.d.plugin \ diff --git a/plugins.d/cgroup-name.sh b/plugins.d/cgroup-name.sh new file mode 100755 index 00000000..8d207fd4 --- /dev/null +++ b/plugins.d/cgroup-name.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" + +NETDATA_CONFIG_DIR="${NETDATA_CONFIG_DIR-/etc/netdata}" +CONFIG="${NETDATA_CONFIG_DIR}/cgroups-names.conf" +CGROUP="${1}" +NAME= + +if [ -z "${CGROUP}" ] + then + echo >&2 "${0}: called without a cgroup name. Nothing to do." + exit 1 +fi + +if [ -f "${CONFIG}" ] + then + NAME="$(cat "${CONFIG}" | grep "^${CGROUP}" | cut -d ' ' -f 2)" + if [ -z "${NAME}" ] + then + echo >&2 "${0}: cannot find cgroup '${CGROUP}' in '${CONFIG}'." + fi +else + echo >&2 "${0}: configuration file '${CONFIG}' is not available." +fi + +if [ -z "${NAME}" ] + then + if [ ${#CGROUP} -gt 12 ] + then + NAME="${CGROUP:0:12}" + else + NAME="${CGROUP}" + fi +fi + +echo >&2 "${0}: cgroup '${CGROUP}' is named as '${NAME}'" +echo "${NAME}" diff --git a/src/apps_plugin.c b/src/apps_plugin.c index 4d05ab05..818f447b 100644 --- a/src/apps_plugin.c +++ b/src/apps_plugin.c @@ -2387,12 +2387,6 @@ void parse_args(int argc, char **argv) } } -unsigned long long sutime() { - struct timeval now; - gettimeofday(&now, NULL); - return now.tv_sec * 1000000ULL + now.tv_usec; -} - int main(int argc, char **argv) { // debug_flags = D_PROCFILE; @@ -2456,11 +2450,11 @@ int main(int argc, char **argv) for(;1; counter++) { #ifndef PROFILING_MODE // delay until it is our time to run - while((sunow = sutime()) < sunext) + while((sunow = timems()) < sunext) usleep((useconds_t)(sunext - sunow)); // find the next time we need to run - while(sutime() > sunext) + while(timems() > sunext) sunext += update_every * 1000000ULL; #endif /* PROFILING_MODE */ diff --git a/src/common.c b/src/common.c index f2abf961..ed04fdb9 100644 --- a/src/common.c +++ b/src/common.c @@ -20,6 +20,13 @@ char *global_host_prefix = ""; int enable_ksm = 1; +// time(NULL) in milliseconds +unsigned long long timems(void) { + struct timeval now; + gettimeofday(&now, NULL); + return now.tv_sec * 1000000ULL + now.tv_usec; +} + unsigned char netdata_map_chart_names[256] = { [0] = '\0', // [1] = '_', // diff --git a/src/common.h b/src/common.h index cb7de224..a028da7c 100644 --- a/src/common.h +++ b/src/common.h @@ -37,6 +37,8 @@ extern void get_HZ(void); extern pid_t gettid(void); +extern unsigned long long timems(void); + /* fix for alpine linux */ #ifndef RUSAGE_THREAD #ifdef RUSAGE_CHILDREN diff --git a/src/main.c b/src/main.c index ad24debf..ba02371f 100644 --- a/src/main.c +++ b/src/main.c @@ -36,7 +36,8 @@ #include "plugin_nfacct.h" #include "main.h" -#include "../config.h" + +extern void *cgroups_main(void *ptr); int netdata_exit = 0; @@ -84,6 +85,7 @@ struct netdata_static_thread static_threads[] = { {"tc", "plugins", "tc", 1, NULL, NULL, tc_main}, {"idlejitter", "plugins", "idlejitter", 1, NULL, NULL, cpuidlejitter_main}, {"proc", "plugins", "proc", 1, NULL, NULL, proc_main}, + {"cgroups", "plugins", "cgroups", 1, NULL, NULL, cgroups_main}, #ifdef INTERNAL_PLUGIN_NFACCT // nfacct requires root access diff --git a/src/plugin_proc.c b/src/plugin_proc.c index 20e85526..632ec1e8 100644 --- a/src/plugin_proc.c +++ b/src/plugin_proc.c @@ -15,12 +15,6 @@ #include "plugin_proc.h" #include "main.h" -unsigned long long sutime() { - struct timeval now; - gettimeofday(&now, NULL); - return now.tv_sec * 1000000ULL + now.tv_usec; -} - void *proc_main(void *ptr) { if(ptr) { ; } @@ -58,7 +52,6 @@ void *proc_main(void *ptr) int vdo_proc_loadavg = !config_get_boolean("plugin:proc", "/proc/loadavg", 1); int vdo_sys_kernel_mm_ksm = !config_get_boolean("plugin:proc", "/sys/kernel/mm/ksm", 1); int vdo_cpu_netdata = !config_get_boolean("plugin:proc", "netdata server resources", 1); - int vdo_sys_fs_cgroup = !config_get_boolean("plugin:proc", "cgroups", 1); // keep track of the time each module was called unsigned long long sutime_proc_net_dev = 0ULL; @@ -78,7 +71,6 @@ void *proc_main(void *ptr) unsigned long long sutime_proc_softirqs = 0ULL; unsigned long long sutime_proc_loadavg = 0ULL; unsigned long long sutime_sys_kernel_mm_ksm = 0ULL; - unsigned long long sutime_sys_fs_cgroup = 0ULL; // the next time we will run - aligned properly unsigned long long sunext = (time(NULL) - (time(NULL) % rrd_update_every) + rrd_update_every) * 1000000ULL; @@ -90,11 +82,11 @@ void *proc_main(void *ptr) if(unlikely(netdata_exit)) break; // delay until it is our time to run - while((sunow = sutime()) < sunext) + while((sunow = timems()) < sunext) usleep((useconds_t)(sunext - sunow)); // find the next time we need to run - while(sutime() > sunext) + while(timems() > sunext) sunext += rrd_update_every * 1000000ULL; if(unlikely(netdata_exit)) break; @@ -104,7 +96,7 @@ void *proc_main(void *ptr) if(!vdo_sys_kernel_mm_ksm) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_sys_kernel_mm_ksm()."); - sunow = sutime(); + sunow = timems(); vdo_sys_kernel_mm_ksm = do_sys_kernel_mm_ksm(rrd_update_every, (sutime_sys_kernel_mm_ksm > 0)?sunow - sutime_sys_kernel_mm_ksm:0ULL); sutime_sys_kernel_mm_ksm = sunow; } @@ -112,7 +104,7 @@ void *proc_main(void *ptr) if(!vdo_proc_loadavg) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_loadavg()."); - sunow = sutime(); + sunow = timems(); vdo_proc_loadavg = do_proc_loadavg(rrd_update_every, (sutime_proc_loadavg > 0)?sunow - sutime_proc_loadavg:0ULL); sutime_proc_loadavg = sunow; } @@ -120,7 +112,7 @@ void *proc_main(void *ptr) if(!vdo_proc_interrupts) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_interrupts()."); - sunow = sutime(); + sunow = timems(); vdo_proc_interrupts = do_proc_interrupts(rrd_update_every, (sutime_proc_interrupts > 0)?sunow - sutime_proc_interrupts:0ULL); sutime_proc_interrupts = sunow; } @@ -128,7 +120,7 @@ void *proc_main(void *ptr) if(!vdo_proc_softirqs) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_softirqs()."); - sunow = sutime(); + sunow = timems(); vdo_proc_softirqs = do_proc_softirqs(rrd_update_every, (sutime_proc_softirqs > 0)?sunow - sutime_proc_softirqs:0ULL); sutime_proc_softirqs = sunow; } @@ -136,7 +128,7 @@ void *proc_main(void *ptr) if(!vdo_proc_sys_kernel_random_entropy_avail) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_sys_kernel_random_entropy_avail()."); - sunow = sutime(); + sunow = timems(); vdo_proc_sys_kernel_random_entropy_avail = do_proc_sys_kernel_random_entropy_avail(rrd_update_every, (sutime_proc_sys_kernel_random_entropy_avail > 0)?sunow - sutime_proc_sys_kernel_random_entropy_avail:0ULL); sutime_proc_sys_kernel_random_entropy_avail = sunow; } @@ -144,7 +136,7 @@ void *proc_main(void *ptr) if(!vdo_proc_net_dev) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_dev()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_dev = do_proc_net_dev(rrd_update_every, (sutime_proc_net_dev > 0)?sunow - sutime_proc_net_dev:0ULL); sutime_proc_net_dev = sunow; } @@ -152,7 +144,7 @@ void *proc_main(void *ptr) if(!vdo_proc_diskstats) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_diskstats()."); - sunow = sutime(); + sunow = timems(); vdo_proc_diskstats = do_proc_diskstats(rrd_update_every, (sutime_proc_diskstats > 0)?sunow - sutime_proc_diskstats:0ULL); sutime_proc_diskstats = sunow; } @@ -160,7 +152,7 @@ void *proc_main(void *ptr) if(!vdo_proc_net_snmp) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_snmp()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_snmp = do_proc_net_snmp(rrd_update_every, (sutime_proc_net_snmp > 0)?sunow - sutime_proc_net_snmp:0ULL); sutime_proc_net_snmp = sunow; } @@ -168,7 +160,7 @@ void *proc_main(void *ptr) if(!vdo_proc_net_snmp6) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_snmp6()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_snmp6 = do_proc_net_snmp6(rrd_update_every, (sutime_proc_net_snmp6 > 0)?sunow - sutime_proc_net_snmp6:0ULL); sutime_proc_net_snmp6 = sunow; } @@ -176,7 +168,7 @@ void *proc_main(void *ptr) if(!vdo_proc_net_netstat) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_netstat()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_netstat = do_proc_net_netstat(rrd_update_every, (sutime_proc_net_netstat > 0)?sunow - sutime_proc_net_netstat:0ULL); sutime_proc_net_netstat = sunow; } @@ -184,7 +176,7 @@ void *proc_main(void *ptr) if(!vdo_proc_net_stat_conntrack) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_stat_conntrack()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_stat_conntrack = do_proc_net_stat_conntrack(rrd_update_every, (sutime_proc_net_stat_conntrack > 0)?sunow - sutime_proc_net_stat_conntrack:0ULL); sutime_proc_net_stat_conntrack = sunow; } @@ -192,7 +184,7 @@ void *proc_main(void *ptr) if(!vdo_proc_net_ip_vs_stats) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling vdo_proc_net_ip_vs_stats()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_ip_vs_stats = do_proc_net_ip_vs_stats(rrd_update_every, (sutime_proc_net_ip_vs_stats > 0)?sunow - sutime_proc_net_ip_vs_stats:0ULL); sutime_proc_net_ip_vs_stats = sunow; } @@ -200,7 +192,7 @@ void *proc_main(void *ptr) if(!vdo_proc_net_stat_synproxy) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling vdo_proc_net_stat_synproxy()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_stat_synproxy = do_proc_net_stat_synproxy(rrd_update_every, (sutime_proc_net_stat_synproxy > 0)?sunow - sutime_proc_net_stat_synproxy:0ULL); sutime_proc_net_stat_synproxy = sunow; } @@ -208,7 +200,7 @@ void *proc_main(void *ptr) if(!vdo_proc_stat) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_stat()."); - sunow = sutime(); + sunow = timems(); vdo_proc_stat = do_proc_stat(rrd_update_every, (sutime_proc_stat > 0)?sunow - sutime_proc_stat:0ULL); sutime_proc_stat = sunow; } @@ -216,7 +208,7 @@ void *proc_main(void *ptr) if(!vdo_proc_meminfo) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling vdo_proc_meminfo()."); - sunow = sutime(); + sunow = timems(); vdo_proc_meminfo = do_proc_meminfo(rrd_update_every, (sutime_proc_meminfo > 0)?sunow - sutime_proc_meminfo:0ULL); sutime_proc_meminfo = sunow; } @@ -224,7 +216,7 @@ void *proc_main(void *ptr) if(!vdo_proc_vmstat) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling vdo_proc_vmstat()."); - sunow = sutime(); + sunow = timems(); vdo_proc_vmstat = do_proc_vmstat(rrd_update_every, (sutime_proc_vmstat > 0)?sunow - sutime_proc_vmstat:0ULL); sutime_proc_vmstat = sunow; } @@ -232,20 +224,12 @@ void *proc_main(void *ptr) if(!vdo_proc_net_rpc_nfsd) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_rpc_nfsd()."); - sunow = sutime(); + sunow = timems(); vdo_proc_net_rpc_nfsd = do_proc_net_rpc_nfsd(rrd_update_every, (sutime_proc_net_rpc_nfsd > 0)?sunow - sutime_proc_net_rpc_nfsd:0ULL); sutime_proc_net_rpc_nfsd = sunow; } if(unlikely(netdata_exit)) break; - if(!vdo_sys_fs_cgroup) { - debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_sys_fs_cgroup()."); - sunow = sutime(); - vdo_sys_fs_cgroup = do_sys_fs_cgroup(rrd_update_every, (sutime_sys_fs_cgroup > 0)?sunow - sutime_sys_fs_cgroup:0ULL); - sutime_sys_fs_cgroup = sunow; - } - if(unlikely(netdata_exit)) break; - // END -- the job is done // -------------------------------------------------------------------- diff --git a/src/plugin_proc.h b/src/plugin_proc.h index 80655d55..a512e1cd 100644 --- a/src/plugin_proc.h +++ b/src/plugin_proc.h @@ -20,6 +20,5 @@ extern int do_proc_softirqs(int update_every, unsigned long long dt); extern int do_sys_kernel_mm_ksm(int update_every, unsigned long long dt); extern int do_proc_loadavg(int update_every, unsigned long long dt); extern int do_proc_net_stat_synproxy(int update_every, unsigned long long dt); -extern int do_sys_fs_cgroup(int update_every, unsigned long long dt); #endif /* NETDATA_PLUGIN_PROC_H */ diff --git a/src/sys_fs_cgroup.c b/src/sys_fs_cgroup.c index a2a32cdf..3cfc2ab2 100644 --- a/src/sys_fs_cgroup.c +++ b/src/sys_fs_cgroup.c @@ -14,6 +14,8 @@ #include "procfile.h" #include "log.h" #include "rrd.h" +#include "main.h" +#include "popen.h" // ---------------------------------------------------------------------------- // cgroup globals @@ -545,6 +547,33 @@ void read_all_cgroups(struct cgroup *cg) { // ---------------------------------------------------------------------------- // add/remove/find cgroup objects +#define CGROUP_NAME_LINE_MAX 1024 + +void cgroup_get_name(struct cgroup *cg) { + pid_t cgroup_pid; + char buffer[CGROUP_NAME_LINE_MAX + 1]; + + snprintf(buffer, CGROUP_NAME_LINE_MAX, "exec %s '%s'", + config_get("plugin:cgroups", "script to get cgroup names", PLUGINS_DIR "/cgroup-name.sh"), cg->name); + + FILE *fp = mypopen(buffer, &cgroup_pid); + if(!fp) { + error("CGROUP: Cannot popen(\"%s\", \"r\").", buffer); + return; + } + char *s = fgets(buffer, CGROUP_NAME_LINE_MAX, fp); + mypclose(fp, cgroup_pid); + + if(s && *s && *s != '\n') { + trim(s); + netdata_fix_chart_name(s); + free(cg->name); + cg->name = strdup(s); + if(!cg->name) + fatal("CGROUP: Cannot allocate memory for name cgroup %s name: '%s'", cg->id, s); + } +} + struct cgroup *cgroup_add(const char *id) { if(cgroup_root_count >= cgroup_root_max) { info("Maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, id); @@ -595,6 +624,9 @@ struct cgroup *cgroup_add(const char *id) { cgroup_root_count++; // fprintf(stderr, " > added cgroup No %d, with id '%s' (%u) and name '%s'\n", cgroup_root_count, cg->id, cg->hash, cg->name); + + // fix the name by calling the external script + cgroup_get_name(cg); } else fatal("Cannot allocate memory for cgroup '%s'", id); @@ -808,9 +840,9 @@ void update_cgroup_charts(int update_every) { if(cg->id[0] == '\0') strcpy(type, "cgroup_host"); else if(cg->id[0] == '/') - snprintf(type, RRD_ID_LENGTH_MAX, "cgroup%s", cg->id); + snprintf(type, RRD_ID_LENGTH_MAX, "cgroup_%s", cg->name); else - snprintf(type, RRD_ID_LENGTH_MAX, "cgroup_%s", cg->id); + snprintf(type, RRD_ID_LENGTH_MAX, "cgroup_%s", cg->name); netdata_fix_chart_id(type); @@ -1061,3 +1093,79 @@ int do_sys_fs_cgroup(int update_every, unsigned long long dt) { return 0; } + +void *cgroups_main(void *ptr) +{ + if(ptr) { ; } + + info("CGROUP Plugin thread created with task id %d", gettid()); + + if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) + error("Cannot set pthread cancel type to DEFERRED."); + + if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) + error("Cannot set pthread cancel state to ENABLE."); + + struct rusage thread; + + // when ZERO, attempt to do it + int vdo_sys_fs_cgroup = 0; + int vdo_cpu_netdata = !config_get_boolean("plugin:cgroups", "netdata server resources", 1); + + // keep track of the time each module was called + unsigned long long sutime_sys_fs_cgroup = 0ULL; + + // the next time we will run - aligned properly + unsigned long long sunext = (time(NULL) - (time(NULL) % rrd_update_every) + rrd_update_every) * 1000000ULL; + unsigned long long sunow; + + RRDSET *stcpu_thread = NULL; + + for(;1;) { + if(unlikely(netdata_exit)) break; + + // delay until it is our time to run + while((sunow = timems()) < sunext) + usleep((useconds_t)(sunext - sunow)); + + // find the next time we need to run + while(timems() > sunext) + sunext += rrd_update_every * 1000000ULL; + + if(unlikely(netdata_exit)) break; + + // BEGIN -- the job to be done + + if(!vdo_sys_fs_cgroup) { + debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_sys_fs_cgroup()."); + sunow = timems(); + vdo_sys_fs_cgroup = do_sys_fs_cgroup(rrd_update_every, (sutime_sys_fs_cgroup > 0)?sunow - sutime_sys_fs_cgroup:0ULL); + sutime_sys_fs_cgroup = sunow; + } + if(unlikely(netdata_exit)) break; + + // END -- the job is done + + // -------------------------------------------------------------------- + + if(!vdo_cpu_netdata) { + getrusage(RUSAGE_THREAD, &thread); + + if(!stcpu_thread) stcpu_thread = rrdset_find("netdata.plugin_cgroups_cpu"); + if(!stcpu_thread) { + stcpu_thread = rrdset_create("netdata", "plugin_cgroups_cpu", NULL, "proc.internal", NULL, "NetData CGroups Plugin CPU usage", "milliseconds/s", 131000, rrd_update_every, RRDSET_TYPE_STACKED); + + rrddim_add(stcpu_thread, "user", NULL, 1, 1000, RRDDIM_INCREMENTAL); + rrddim_add(stcpu_thread, "system", NULL, 1, 1000, RRDDIM_INCREMENTAL); + } + else rrdset_next(stcpu_thread); + + rrddim_set(stcpu_thread, "user" , thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec); + rrddim_set(stcpu_thread, "system", thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec); + rrdset_done(stcpu_thread); + } + } + + pthread_exit(NULL); + return NULL; +}