]> arthur.barton.de Git - netdata.git/blobdiff - src/sys_fs_cgroup.c
use usec_t for microseconds time variables
[netdata.git] / src / sys_fs_cgroup.c
index fe9bbc47328147c0413e203eef3c64d7c53b7521..a42178efc3188d14f406880460a712d7a62e7d2d 100644 (file)
@@ -138,6 +138,18 @@ struct memory {
     unsigned long long total_active_file;
     unsigned long long total_unevictable;
 */
+
+    int usage_in_bytes_updated;
+    char *filename_usage_in_bytes;
+    unsigned long long usage_in_bytes;
+
+    int msw_usage_in_bytes_updated;
+    char *filename_msw_usage_in_bytes;
+    unsigned long long msw_usage_in_bytes;
+
+    int failcnt_updated;
+    char *filename_failcnt;
+    unsigned long long failcnt;
 };
 
 // https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
@@ -257,7 +269,7 @@ void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) {
         }
 
         unsigned long i = procfile_linewords(ff, 0);
-        if(i <= 0) return;
+        if(i == 0) return;
 
         // we may have 1 more CPU reported
         while(i > 0) {
@@ -557,6 +569,24 @@ void cgroup_read_memory(struct memory *mem) {
 
         mem->updated = 1;
     }
+
+    mem->usage_in_bytes_updated = 0;
+    if(mem->filename_usage_in_bytes) {
+        if(likely(!read_single_number_file(mem->filename_usage_in_bytes, &mem->usage_in_bytes)))
+            mem->usage_in_bytes_updated = 1;
+    }
+
+    mem->msw_usage_in_bytes_updated = 0;
+    if(mem->filename_msw_usage_in_bytes) {
+        if(likely(!read_single_number_file(mem->filename_msw_usage_in_bytes, &mem->msw_usage_in_bytes)))
+            mem->msw_usage_in_bytes_updated = 1;
+    }
+
+    mem->failcnt_updated = 0;
+    if(mem->filename_failcnt) {
+        if(likely(!read_single_number_file(mem->filename_failcnt, &mem->failcnt)))
+            mem->failcnt_updated = 1;
+    }
 }
 
 void cgroup_read(struct cgroup *cg) {
@@ -845,13 +875,20 @@ int find_dir_in_subdirs(const char *base, const char *this, void (*callback)(con
                 if(*r == '\0') r = "/";
                 else if (*r == '/') r++;
 
+                // do not decent in directories we are not interested
+                // https://github.com/firehol/netdata/issues/345
+                int def = 1;
+                size_t len = strlen(r);
+                if(len >  5 && !strncmp(&r[len -  5], "-qemu", 5))
+                    def = 0;
+
                 // we check for this option here
                 // so that the config will not have settings
                 // for leaf directories
                 char option[FILENAME_MAX + 1];
                 snprintfz(option, FILENAME_MAX, "search for cgroups under %s", r);
                 option[FILENAME_MAX] = '\0';
-                enabled = config_get_boolean("plugin:cgroups", option, 1);
+                enabled = config_get_boolean("plugin:cgroups", option, def);
             }
 
             if(enabled) {
@@ -990,6 +1027,27 @@ void find_all_cgroups() {
                 debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename);
             }
             else debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename);
+
+            snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id);
+            if(stat(filename, &buf) != -1) {
+                cg->memory.filename_usage_in_bytes = strdupz(filename);
+                debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes);
+            }
+            else debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename);
+
+            snprintfz(filename, FILENAME_MAX, "%s%s/memory.msw_usage_in_bytes", cgroup_memory_base, cg->id);
+            if(stat(filename, &buf) != -1) {
+                cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
+                debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes);
+            }
+            else debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename);
+
+            snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id);
+            if(stat(filename, &buf) != -1) {
+                cg->memory.filename_failcnt = strdupz(filename);
+                debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt);
+            }
+            else debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename);
         }
         if(cgroup_enable_blkio) {
             if(!cg->io_service_bytes.filename) {
@@ -1077,7 +1135,7 @@ void update_cgroup_charts(int update_every) {
         if(cg->cpuacct_stat.updated) {
             st = rrdset_find_bytype(type, "cpu");
             if(!st) {
-                snprintfz(title, CHART_TITLE_MAX, "CPU Usage for cgroup %s", cg->chart_title);
+                snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title);
                 st = rrdset_create(type, "cpu", NULL, "cpu", "cgroup.cpu", title, "%", 40000, update_every, RRDSET_TYPE_STACKED);
 
                 rrddim_add(st, "user", NULL, 100, hz, RRDDIM_INCREMENTAL);
@@ -1096,7 +1154,7 @@ void update_cgroup_charts(int update_every) {
 
             st = rrdset_find_bytype(type, "cpu_per_core");
             if(!st) {
-                snprintfz(title, CHART_TITLE_MAX, "CPU Usage Per Core for cgroup %s", cg->chart_title);
+                snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) Per Core for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title);
                 st = rrdset_create(type, "cpu_per_core", NULL, "cpu", "cgroup.cpu_per_core", title, "%", 40100, update_every, RRDSET_TYPE_STACKED);
 
                 for(i = 0; i < cg->cpuacct_usage.cpus ;i++) {
@@ -1118,7 +1176,7 @@ void update_cgroup_charts(int update_every) {
                 st = rrdset_find_bytype(type, "mem");
                 if(!st) {
                     snprintfz(title, CHART_TITLE_MAX, "Memory Usage for cgroup %s", cg->chart_title);
-                    st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40200, update_every,
+                    st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40210, update_every,
                                        RRDSET_TYPE_STACKED);
 
                     rrddim_add(st, "cache", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE);
@@ -1191,12 +1249,44 @@ void update_cgroup_charts(int update_every) {
             }
         }
 
+        if(cg->memory.usage_in_bytes_updated) {
+            st = rrdset_find_bytype(type, "mem_usage");
+            if(!st) {
+                snprintfz(title, CHART_TITLE_MAX, "Total Memory for cgroup %s", cg->chart_title);
+                st = rrdset_create(type, "mem_usage", NULL, "mem", "cgroup.mem_usage", title, "MB", 40200,
+                                   update_every, RRDSET_TYPE_STACKED);
+
+                rrddim_add(st, "ram", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE);
+                rrddim_add(st, "swap", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE);
+            }
+            else rrdset_next(st);
+
+            rrddim_set(st, "ram", cg->memory.usage_in_bytes);
+            rrddim_set(st, "swap", (cg->memory.msw_usage_in_bytes > cg->memory.usage_in_bytes)?cg->memory.msw_usage_in_bytes - cg->memory.usage_in_bytes:0);
+            rrdset_done(st);
+        }
+
+        if(cg->memory.failcnt_updated && cg->memory.failcnt > 0) {
+            st = rrdset_find_bytype(type, "mem_failcnt");
+            if(!st) {
+                snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures for cgroup %s", cg->chart_title);
+                st = rrdset_create(type, "mem_failcnt", NULL, "mem", "cgroup.mem_failcnt", title, "MB", 40250,
+                                   update_every, RRDSET_TYPE_LINE);
+
+                rrddim_add(st, "failures", NULL, 1, 1, RRDDIM_INCREMENTAL);
+            }
+            else rrdset_next(st);
+
+            rrddim_set(st, "failures", cg->memory.failcnt);
+            rrdset_done(st);
+        }
+
         if(cg->io_service_bytes.updated && cg->io_service_bytes.Read + cg->io_service_bytes.Write > 0) {
             st = rrdset_find_bytype(type, "io");
             if(!st) {
                 snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks) for cgroup %s", cg->chart_title);
                 st = rrdset_create(type, "io", NULL, "disk", "cgroup.io", title, "KB/s", 41200,
-                                   update_every, RRDSET_TYPE_LINE);
+                                   update_every, RRDSET_TYPE_AREA);
 
                 rrddim_add(st, "read", NULL, 1, 1024, RRDDIM_INCREMENTAL);
                 rrddim_add(st, "write", NULL, -1, 1024, RRDDIM_INCREMENTAL);
@@ -1226,11 +1316,11 @@ void update_cgroup_charts(int update_every) {
         }
 
         if(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.Read + cg->throttle_io_service_bytes.Write > 0) {
-            st = rrdset_find_bytype(type, "io");
+            st = rrdset_find_bytype(type, "throttle_io");
             if(!st) {
                 snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks) for cgroup %s", cg->chart_title);
-                st = rrdset_create(type, "io", NULL, "disk", "cgroup.io", title, "KB/s", 41200,
-                                   update_every, RRDSET_TYPE_LINE);
+                st = rrdset_create(type, "throttle_io", NULL, "disk", "cgroup.throttle_io", title, "KB/s", 41200,
+                                   update_every, RRDSET_TYPE_AREA);
 
                 rrddim_add(st, "read", NULL, 1, 1024, RRDDIM_INCREMENTAL);
                 rrddim_add(st, "write", NULL, -1, 1024, RRDDIM_INCREMENTAL);
@@ -1301,12 +1391,12 @@ void update_cgroup_charts(int update_every) {
 // ----------------------------------------------------------------------------
 // cgroups main
 
-int do_sys_fs_cgroup(int update_every, unsigned long long dt) {
+int do_sys_fs_cgroup(int update_every, usec_t dt) {
     (void)dt;
 
     static int cgroup_global_config_read = 0;
     static time_t last_run = 0;
-    time_t now = time(NULL);
+    time_t now = now_realtime_sec();
 
     if(unlikely(!cgroup_global_config_read)) {
         read_cgroup_plugin_configuration();
@@ -1326,7 +1416,7 @@ int do_sys_fs_cgroup(int update_every, unsigned long long dt) {
 
 void *cgroups_main(void *ptr)
 {
-    if(ptr) { ; }
+    (void)ptr;
 
     info("CGROUP Plugin thread created with task id %d", gettid());
 
@@ -1343,24 +1433,24 @@ void *cgroups_main(void *ptr)
     int vdo_cpu_netdata             = !config_get_boolean("plugin:cgroups", "cgroups plugin resources", 1);
 
     // keep track of the time each module was called
-    unsigned long long sutime_sys_fs_cgroup = 0ULL;
+    usec_t sutime_sys_fs_cgroup = 0ULL;
 
     // the next time we will run - aligned properly
-    unsigned long long sunext = (time(NULL) - (time(NULL) % rrd_update_every) + rrd_update_every) * 1000000ULL;
-    unsigned long long sunow;
+    usec_t sunext = (now_realtime_sec() - (now_realtime_sec() % rrd_update_every) + rrd_update_every) * USEC_PER_SEC;
 
     RRDSET *stcpu_thread = NULL;
 
-    for(;1;) {
+    for(;;) {
+        usec_t sunow;
         if(unlikely(netdata_exit)) break;
 
         // delay until it is our time to run
-        while((sunow = time_usec()) < sunext)
+        while((sunow = now_realtime_usec()) < sunext)
             sleep_usec(sunext - sunow);
 
         // find the next time we need to run
-        while(time_usec() > sunext)
-            sunext += rrd_update_every * 1000000ULL;
+        while(now_realtime_usec() > sunext)
+            sunext += rrd_update_every * USEC_PER_SEC;
 
         if(unlikely(netdata_exit)) break;
 
@@ -1368,7 +1458,7 @@ void *cgroups_main(void *ptr)
 
         if(!vdo_sys_fs_cgroup) {
             debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_sys_fs_cgroup().");
-            sunow = time_usec();
+            sunow = now_realtime_usec();
             vdo_sys_fs_cgroup = do_sys_fs_cgroup(rrd_update_every, (sutime_sys_fs_cgroup > 0)?sunow - sutime_sys_fs_cgroup:0ULL);
             sutime_sys_fs_cgroup = sunow;
         }
@@ -1396,6 +1486,8 @@ void *cgroups_main(void *ptr)
         }
     }
 
+    info("CGROUP thread exiting");
+
     pthread_exit(NULL);
     return NULL;
 }