X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=src%2Fsys_fs_cgroup.c;h=a42178efc3188d14f406880460a712d7a62e7d2d;hb=3b8cfeecd6ee3d94452315a5bb6525ddea06caff;hp=82ae5c7235a367ccd2a4e498dc2634f2bf672924;hpb=def60e69a5c08b1d31dcafece44e19cab6a7a406;p=netdata.git diff --git a/src/sys_fs_cgroup.c b/src/sys_fs_cgroup.c index 82ae5c72..a42178ef 100644 --- a/src/sys_fs_cgroup.c +++ b/src/sys_fs_cgroup.c @@ -138,6 +138,18 @@ struct memory { unsigned long long total_active_file; unsigned long long total_unevictable; */ + + int usage_in_bytes_updated; + char *filename_usage_in_bytes; + unsigned long long usage_in_bytes; + + int msw_usage_in_bytes_updated; + char *filename_msw_usage_in_bytes; + unsigned long long msw_usage_in_bytes; + + int failcnt_updated; + char *filename_failcnt; + unsigned long long failcnt; }; // https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt @@ -160,9 +172,13 @@ struct cpuacct_usage { unsigned long long *cpu_percpu; }; +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 + struct cgroup { - int available; // found in the filesystem - int enabled; // enabled in the config + uint32_t options; + + char available; // found in the filesystem + char enabled; // enabled in the config char *id; uint32_t hash; @@ -253,7 +269,7 @@ void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { } unsigned long i = procfile_linewords(ff, 0); - if(i <= 0) return; + if(i == 0) return; // we may have 1 more CPU reported while(i > 0) { @@ -553,6 +569,24 @@ void cgroup_read_memory(struct memory *mem) { mem->updated = 1; } + + mem->usage_in_bytes_updated = 0; + if(mem->filename_usage_in_bytes) { + if(likely(!read_single_number_file(mem->filename_usage_in_bytes, &mem->usage_in_bytes))) + mem->usage_in_bytes_updated = 1; + } + + mem->msw_usage_in_bytes_updated = 0; + if(mem->filename_msw_usage_in_bytes) { + if(likely(!read_single_number_file(mem->filename_msw_usage_in_bytes, &mem->msw_usage_in_bytes))) + mem->msw_usage_in_bytes_updated = 1; + } + + mem->failcnt_updated = 0; + if(mem->filename_failcnt) { + if(likely(!read_single_number_file(mem->filename_failcnt, &mem->failcnt))) + mem->failcnt_updated = 1; + } } void cgroup_read(struct cgroup *cg) { @@ -616,8 +650,8 @@ void cgroup_get_chart_id(struct cgroup *cg) { freez(cg->chart_id); cg->chart_id = strdupz(s); - netdata_fix_chart_id(cg->chart_id); + cg->hash_chart = simple_hash(cg->chart_id); debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title); } @@ -678,12 +712,11 @@ struct cgroup *cgroup_add(const char *id) { struct cgroup *cg = callocz(1, sizeof(struct cgroup)); - debug(D_CGROUP, "adding cgroup '%s'", id); - cg->id = strdupz(id); cg->hash = simple_hash(cg->id); cg->chart_id = strdupz(chart_id); + netdata_fix_chart_id(cg->chart_id); cg->hash_chart = simple_hash(cg->chart_id); cg->chart_title = strdupz(chart_id); @@ -702,15 +735,35 @@ struct cgroup *cgroup_add(const char *id) { // fix the name by calling the external script cgroup_get_chart_id(cg); + debug(D_CGROUP, "adding cgroup '%s' with chart id '%s'", id, chart_id); + char option[FILENAME_MAX + 1]; snprintfz(option, FILENAME_MAX, "enable cgroup %s", cg->chart_title); cg->enabled = config_get_boolean("plugin:cgroups", option, def); - struct cgroup *t; - for(t = cgroup_root; t ; t = t->next) { - if(t != cg && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id) && t->enabled) { - error("Control group with chart id '%s' already exists and is enabled. Disabling cgroup '%s'.", cg->chart_id, cg->id); - cg->enabled = 0; + if(cg->enabled) { + struct cgroup *t; + for (t = cgroup_root; t; t = t->next) { + if (t != cg && t->enabled && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { + if (!strncmp(t->chart_id, "/system.slice/", 14) && !strncmp(cg->chart_id, "/init.scope/system.slice/", 25)) { + error("Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id, t->id); + debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id, t->id); + t->enabled = 0; + t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + } + else { + error("Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id); + debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id); + cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + } + + break; + } } } @@ -787,9 +840,10 @@ void found_subdir_in_dir(const char *dir) { if(cg) cg->available = 1; } -void find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { +int find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { debug(D_CGROUP, "searching for directories in '%s'", base); + int ret = -1; int enabled = -1; if(!this) this = base; size_t dirlen = strlen(this), baselen = strlen(base); @@ -798,8 +852,9 @@ void find_dir_in_subdirs(const char *base, const char *this, void (*callback)(co DIR *dir = opendir(this); if(!dir) { error("Cannot read cgroups directory '%s'", base); - return; + return ret; } + ret = 1; callback(relative_path); @@ -820,13 +875,20 @@ void find_dir_in_subdirs(const char *base, const char *this, void (*callback)(co if(*r == '\0') r = "/"; else if (*r == '/') r++; + // do not decent in directories we are not interested + // https://github.com/firehol/netdata/issues/345 + int def = 1; + size_t len = strlen(r); + if(len > 5 && !strncmp(&r[len - 5], "-qemu", 5)) + def = 0; + // we check for this option here // so that the config will not have settings // for leaf directories char option[FILENAME_MAX + 1]; snprintfz(option, FILENAME_MAX, "search for cgroups under %s", r); option[FILENAME_MAX] = '\0'; - enabled = config_get_boolean("plugin:cgroups", option, 1); + enabled = config_get_boolean("plugin:cgroups", option, def); } if(enabled) { @@ -834,13 +896,15 @@ void find_dir_in_subdirs(const char *base, const char *this, void (*callback)(co strcpy(s, this); strcat(s, "/"); strcat(s, de->d_name); - find_dir_in_subdirs(base, s, callback); + int ret2 = find_dir_in_subdirs(base, s, callback); + if(ret2 > 0) ret += ret2; freez(s); } } } closedir(dir); + return ret; } void mark_all_cgroups_as_not_available() { @@ -849,8 +913,9 @@ void mark_all_cgroups_as_not_available() { struct cgroup *cg; // mark all as not available - for(cg = cgroup_root; cg ; cg = cg->next) + for(cg = cgroup_root; cg ; cg = cg->next) { cg->available = 0; + } } void cleanup_all_cgroups() { @@ -858,6 +923,18 @@ void cleanup_all_cgroups() { for(; cg ;) { if(!cg->available) { + // enable the first duplicate cgroup + { + struct cgroup *t; + for(t = cgroup_root; t ; t = t->next) { + if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { + debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + t->enabled = 1; + t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; + break; + } + } + } if(!last) cgroup_root = cg->next; @@ -883,17 +960,33 @@ void find_all_cgroups() { mark_all_cgroups_as_not_available(); - if(cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) - find_dir_in_subdirs(cgroup_cpuacct_base, NULL, found_subdir_in_dir); + if(cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { + if (find_dir_in_subdirs(cgroup_cpuacct_base, NULL, found_subdir_in_dir) == -1) { + cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = 0; + error("disabled cgroup cpu statistics."); + } + } - if(cgroup_enable_blkio) - find_dir_in_subdirs(cgroup_blkio_base, NULL, found_subdir_in_dir); + if(cgroup_enable_blkio) { + if (find_dir_in_subdirs(cgroup_blkio_base, NULL, found_subdir_in_dir) == -1) { + cgroup_enable_blkio = 0; + error("disabled cgroup blkio statistics."); + } + } - if(cgroup_enable_memory) - find_dir_in_subdirs(cgroup_memory_base, NULL, found_subdir_in_dir); + if(cgroup_enable_memory) { + if(find_dir_in_subdirs(cgroup_memory_base, NULL, found_subdir_in_dir) == -1) { + cgroup_enable_memory = 0; + error("disabled cgroup memory statistics."); + } + } - if(cgroup_enable_devices) - find_dir_in_subdirs(cgroup_devices_base, NULL, found_subdir_in_dir); + if(cgroup_enable_devices) { + if(find_dir_in_subdirs(cgroup_devices_base, NULL, found_subdir_in_dir) == -1) { + cgroup_enable_devices = 0; + error("disabled cgroup devices statistics."); + } + } // remove any non-existing cgroups cleanup_all_cgroups(); @@ -934,6 +1027,27 @@ void find_all_cgroups() { debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename); } else debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + } + else debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.msw_usage_in_bytes", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + } + else debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_failcnt = strdupz(filename); + debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); + } + else debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(cgroup_enable_blkio) { if(!cg->io_service_bytes.filename) { @@ -1021,7 +1135,7 @@ void update_cgroup_charts(int update_every) { if(cg->cpuacct_stat.updated) { st = rrdset_find_bytype(type, "cpu"); if(!st) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title); st = rrdset_create(type, "cpu", NULL, "cpu", "cgroup.cpu", title, "%", 40000, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "user", NULL, 100, hz, RRDDIM_INCREMENTAL); @@ -1040,7 +1154,7 @@ void update_cgroup_charts(int update_every) { st = rrdset_find_bytype(type, "cpu_per_core"); if(!st) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage Per Core for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) Per Core for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title); st = rrdset_create(type, "cpu_per_core", NULL, "cpu", "cgroup.cpu_per_core", title, "%", 40100, update_every, RRDSET_TYPE_STACKED); for(i = 0; i < cg->cpuacct_usage.cpus ;i++) { @@ -1062,7 +1176,7 @@ void update_cgroup_charts(int update_every) { st = rrdset_find_bytype(type, "mem"); if(!st) { snprintfz(title, CHART_TITLE_MAX, "Memory Usage for cgroup %s", cg->chart_title); - st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40200, update_every, + st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40210, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "cache", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); @@ -1135,12 +1249,44 @@ void update_cgroup_charts(int update_every) { } } + if(cg->memory.usage_in_bytes_updated) { + st = rrdset_find_bytype(type, "mem_usage"); + if(!st) { + snprintfz(title, CHART_TITLE_MAX, "Total Memory for cgroup %s", cg->chart_title); + st = rrdset_create(type, "mem_usage", NULL, "mem", "cgroup.mem_usage", title, "MB", 40200, + update_every, RRDSET_TYPE_STACKED); + + rrddim_add(st, "ram", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); + rrddim_add(st, "swap", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); + } + else rrdset_next(st); + + rrddim_set(st, "ram", cg->memory.usage_in_bytes); + rrddim_set(st, "swap", (cg->memory.msw_usage_in_bytes > cg->memory.usage_in_bytes)?cg->memory.msw_usage_in_bytes - cg->memory.usage_in_bytes:0); + rrdset_done(st); + } + + if(cg->memory.failcnt_updated && cg->memory.failcnt > 0) { + st = rrdset_find_bytype(type, "mem_failcnt"); + if(!st) { + snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures for cgroup %s", cg->chart_title); + st = rrdset_create(type, "mem_failcnt", NULL, "mem", "cgroup.mem_failcnt", title, "MB", 40250, + update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "failures", NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "failures", cg->memory.failcnt); + rrdset_done(st); + } + if(cg->io_service_bytes.updated && cg->io_service_bytes.Read + cg->io_service_bytes.Write > 0) { st = rrdset_find_bytype(type, "io"); if(!st) { snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks) for cgroup %s", cg->chart_title); st = rrdset_create(type, "io", NULL, "disk", "cgroup.io", title, "KB/s", 41200, - update_every, RRDSET_TYPE_LINE); + update_every, RRDSET_TYPE_AREA); rrddim_add(st, "read", NULL, 1, 1024, RRDDIM_INCREMENTAL); rrddim_add(st, "write", NULL, -1, 1024, RRDDIM_INCREMENTAL); @@ -1170,11 +1316,11 @@ void update_cgroup_charts(int update_every) { } if(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.Read + cg->throttle_io_service_bytes.Write > 0) { - st = rrdset_find_bytype(type, "io"); + st = rrdset_find_bytype(type, "throttle_io"); if(!st) { snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks) for cgroup %s", cg->chart_title); - st = rrdset_create(type, "io", NULL, "disk", "cgroup.io", title, "KB/s", 41200, - update_every, RRDSET_TYPE_LINE); + st = rrdset_create(type, "throttle_io", NULL, "disk", "cgroup.throttle_io", title, "KB/s", 41200, + update_every, RRDSET_TYPE_AREA); rrddim_add(st, "read", NULL, 1, 1024, RRDDIM_INCREMENTAL); rrddim_add(st, "write", NULL, -1, 1024, RRDDIM_INCREMENTAL); @@ -1245,12 +1391,12 @@ void update_cgroup_charts(int update_every) { // ---------------------------------------------------------------------------- // cgroups main -int do_sys_fs_cgroup(int update_every, unsigned long long dt) { +int do_sys_fs_cgroup(int update_every, usec_t dt) { (void)dt; static int cgroup_global_config_read = 0; static time_t last_run = 0; - time_t now = time(NULL); + time_t now = now_realtime_sec(); if(unlikely(!cgroup_global_config_read)) { read_cgroup_plugin_configuration(); @@ -1270,7 +1416,7 @@ int do_sys_fs_cgroup(int update_every, unsigned long long dt) { void *cgroups_main(void *ptr) { - if(ptr) { ; } + (void)ptr; info("CGROUP Plugin thread created with task id %d", gettid()); @@ -1287,24 +1433,24 @@ void *cgroups_main(void *ptr) int vdo_cpu_netdata = !config_get_boolean("plugin:cgroups", "cgroups plugin resources", 1); // keep track of the time each module was called - unsigned long long sutime_sys_fs_cgroup = 0ULL; + usec_t sutime_sys_fs_cgroup = 0ULL; // the next time we will run - aligned properly - unsigned long long sunext = (time(NULL) - (time(NULL) % rrd_update_every) + rrd_update_every) * 1000000ULL; - unsigned long long sunow; + usec_t sunext = (now_realtime_sec() - (now_realtime_sec() % rrd_update_every) + rrd_update_every) * USEC_PER_SEC; RRDSET *stcpu_thread = NULL; - for(;1;) { + for(;;) { + usec_t sunow; if(unlikely(netdata_exit)) break; // delay until it is our time to run - while((sunow = time_usec()) < sunext) + while((sunow = now_realtime_usec()) < sunext) sleep_usec(sunext - sunow); // find the next time we need to run - while(time_usec() > sunext) - sunext += rrd_update_every * 1000000ULL; + while(now_realtime_usec() > sunext) + sunext += rrd_update_every * USEC_PER_SEC; if(unlikely(netdata_exit)) break; @@ -1312,7 +1458,7 @@ void *cgroups_main(void *ptr) if(!vdo_sys_fs_cgroup) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_sys_fs_cgroup()."); - sunow = time_usec(); + sunow = now_realtime_usec(); vdo_sys_fs_cgroup = do_sys_fs_cgroup(rrd_update_every, (sutime_sys_fs_cgroup > 0)?sunow - sutime_sys_fs_cgroup:0ULL); sutime_sys_fs_cgroup = sunow; } @@ -1340,6 +1486,8 @@ void *cgroups_main(void *ptr) } } + info("CGROUP thread exiting"); + pthread_exit(NULL); return NULL; }