]> arthur.barton.de Git - netdata.git/blobdiff - src/apps_plugin.c
handle usleep() on systems that do not accept more than 999999 usec; implement altern...
[netdata.git] / src / apps_plugin.c
old mode 100755 (executable)
new mode 100644 (file)
index 6c7206b..ba497b7
@@ -1,7 +1,3 @@
-// TODO
-//
-// 1. disable RESET_OR_OVERFLOW check in charts
-
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 #include "procfile.h"
 #include "../config.h"
 
+#ifdef NETDATA_INTERNAL_CHECKS
+#include <sys/prctl.h>
+#endif
+
 #define MAX_COMPARE_NAME 100
 #define MAX_NAME 100
 #define MAX_CMDLINE 1024
 
-unsigned long long Hertz = 1;
-
-long processors = 1;
-long pid_max = 32768;
+int processors = 1;
+pid_t pid_max = 32768;
 int debug = 0;
 
 int update_every = 1;
 unsigned long long file_counter = 0;
 int proc_pid_cmdline_is_needed = 0;
-
+int include_exited_childs = 1;
 char *host_prefix = "";
 char *config_dir = CONFIG_DIR;
 
-#ifdef NETDATA_INTERNAL_CHECKS
-// ----------------------------------------------------------------------------
-// memory debugger
-// do not use in production systems - it mis-aligns allocated memory
-
-struct allocations {
-       size_t allocations;
-       size_t allocated;
-       size_t allocated_max;
-} allocations = { 0, 0, 0 };
-
-#define MALLOC_MARK (uint32_t)(0x0BADCAFE)
-#define MALLOC_PREFIX (sizeof(uint32_t) * 2)
-#define MALLOC_SUFFIX (sizeof(uint32_t))
-#define MALLOC_OVERHEAD (MALLOC_PREFIX + MALLOC_SUFFIX)
-
-void *mark_allocation(void *allocated_ptr, size_t size_without_overheads) {
-       uint32_t *real_ptr = (uint32_t *)allocated_ptr;
-       real_ptr[0] = MALLOC_MARK;
-       real_ptr[1] = (uint32_t) size_without_overheads;
-
-       uint32_t *end_ptr = (uint32_t *)(allocated_ptr + MALLOC_PREFIX + size_without_overheads);
-       end_ptr[0] = MALLOC_MARK;
-
-       // fprintf(stderr, "MEMORY_POINTER: Allocated at %p, returning %p.\n", allocated_ptr, (void *)(allocated_ptr + MALLOC_PREFIX));
-
-       return allocated_ptr + MALLOC_PREFIX;
-}
-
-void *check_allocation(const char *file, int line, const char *function, void *marked_ptr, size_t *size_without_overheads_ptr) {
-       uint32_t *real_ptr = (uint32_t *)(marked_ptr - MALLOC_PREFIX);
-
-       // fprintf(stderr, "MEMORY_POINTER: Checking pointer at %p, real %p for %s/%u@%s.\n", marked_ptr, (void *)(marked_ptr - MALLOC_PREFIX), function, line, file);
-
-       if(real_ptr[0] != MALLOC_MARK) fatal("MEMORY: prefix MARK is not valid for %s/%u@%s.", function, line, file);
-
-       size_t size = real_ptr[1];
-
-       uint32_t *end_ptr = (uint32_t *)(marked_ptr + size);
-       if(end_ptr[0] != MALLOC_MARK) fatal("MEMORY: suffix MARK of allocation with size %zu is not valid for %s/%u@%s.", size, function, line, file);
-
-       if(size_without_overheads_ptr) *size_without_overheads_ptr = size;
-
-       return real_ptr;
-}
-
-void *malloc_debug(const char *file, int line, const char *function, size_t size) {
-       void *ptr = malloc(size + MALLOC_OVERHEAD);
-       if(!ptr) fatal("MEMORY: Cannot allocate %zu bytes for %s/%u@%s.", size, function, line, file);
-
-       allocations.allocated += size;
-       allocations.allocations++;
-
-       debug(D_MEMORY, "MEMORY: Allocated %zu bytes for %s/%u@%s."
-               " Status: allocated %zu in %zu allocs."
-               , size
-               , function, line, file
-               , allocations.allocated
-               , allocations.allocations
-       );
-
-       if(allocations.allocated > allocations.allocated_max) {
-               debug(D_MEMORY, "MEMORY: total allocation peak increased from %zu to %zu", allocations.allocated_max, allocations.allocated);
-               allocations.allocated_max = allocations.allocated;
-       }
-
-       size_t csize;
-       check_allocation(file, line, function, mark_allocation(ptr, size), &csize);
-       if(size != csize) {
-               fatal("Invalid size.");
-       }
-
-       return mark_allocation(ptr, size);
-}
-
-void *calloc_debug(const char *file, int line, const char *function, size_t nmemb, size_t size) {
-       void *ptr = malloc_debug(file, line, function, (nmemb * size));
-       bzero(ptr, nmemb * size);
-       return ptr;
-}
-
-void free_debug(const char *file, int line, const char *function, void *ptr) {
-       size_t size;
-       void *real_ptr = check_allocation(file, line, function, ptr, &size);
-
-       bzero(real_ptr, size + MALLOC_OVERHEAD);
-
-       free(real_ptr);
-       allocations.allocated -= size;
-       allocations.allocations--;
-
-       debug(D_MEMORY, "MEMORY: freed %zu bytes for %s/%u@%s."
-               " Status: allocated %zu in %zu allocs."
-               , size
-               , function, line, file
-               , allocations.allocated
-               , allocations.allocations
-       );
-}
-
-void *realloc_debug(const char *file, int line, const char *function, void *ptr, size_t size) {
-       if(!ptr) return malloc_debug(file, line, function, size);
-       if(!size) { free_debug(file, line, function, ptr); return NULL; }
-
-       size_t old_size;
-       void *real_ptr = check_allocation(file, line, function, ptr, &old_size);
-
-       void *new_ptr = realloc(real_ptr, size + MALLOC_OVERHEAD);
-       if(!new_ptr) fatal("MEMORY: Cannot allocate %zu bytes for %s/%u@%s.", size, function, line, file);
-
-       allocations.allocated += size;
-       allocations.allocated -= old_size;
-
-       debug(D_MEMORY, "MEMORY: Re-allocated from %zu to %zu bytes for %s/%u@%s."
-               " Status: allocated %z in %zu allocs."
-               , old_size, size
-               , function, line, file
-               , allocations.allocated
-               , allocations.allocations
-       );
-
-       if(allocations.allocated > allocations.allocated_max) {
-               debug(D_MEMORY, "MEMORY: total allocation peak increased from %zu to %zu", allocations.allocated_max, allocations.allocated);
-               allocations.allocated_max = allocations.allocated;
-       }
-
-       return mark_allocation(new_ptr, size);
-}
-
-char *strdup_debug(const char *file, int line, const char *function, const char *ptr) {
-       size_t size = 0;
-       const char *s = ptr;
-
-       while(*s++) size++;
-       size++;
 
-       char *p = malloc_debug(file, line, function, size);
-       if(!p) fatal("Cannot allocate %zu bytes.", size);
+// ----------------------------------------------------------------------------
 
-       memcpy(p, ptr, size);
-       return p;
+void netdata_cleanup_and_exit(int ret) {
+       exit(ret);
 }
 
-#define malloc(size) malloc_debug(__FILE__, __LINE__, __FUNCTION__, (size))
-#define calloc(nmemb, size) calloc_debug(__FILE__, __LINE__, __FUNCTION__, (nmemb), (size))
-#define realloc(ptr, size) realloc_debug(__FILE__, __LINE__, __FUNCTION__, (ptr), (size))
-#define free(ptr) free_debug(__FILE__, __LINE__, __FUNCTION__, (ptr))
-
-#ifdef strdup
-#undef strdup
-#endif
-#define strdup(ptr) strdup_debug(__FILE__, __LINE__, __FUNCTION__, (ptr))
-
-#endif /* NETDATA_INTERNAL_CHECKS */
-
 
 // ----------------------------------------------------------------------------
 // system functions
@@ -222,7 +72,7 @@ long get_system_cpus(void) {
        int processors = 0;
 
        char filename[FILENAME_MAX + 1];
-       snprintf(filename, FILENAME_MAX, "%s/proc/stat", host_prefix);
+       snprintfz(filename, FILENAME_MAX, "%s/proc/stat", host_prefix);
 
        ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT);
        if(!ff) return 1;
@@ -246,12 +96,12 @@ long get_system_cpus(void) {
        return processors;
 }
 
-long get_system_pid_max(void) {
+pid_t get_system_pid_max(void) {
        procfile *ff = NULL;
-       long mpid = 32768;
+       pid_t mpid = 32768;
 
        char filename[FILENAME_MAX + 1];
-       snprintf(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", host_prefix);
+       snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", host_prefix);
        ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT);
        if(!ff) return mpid;
 
@@ -261,35 +111,13 @@ long get_system_pid_max(void) {
                return mpid;
        }
 
-       mpid = atol(procfile_lineword(ff, 0, 0));
+       mpid = (pid_t)atoi(procfile_lineword(ff, 0, 0));
        if(!mpid) mpid = 32768;
 
        procfile_close(ff);
        return mpid;
 }
 
-unsigned long long get_system_hertz(void)
-{
-       unsigned long long myhz = 1;
-
-#ifdef _SC_CLK_TCK
-       if((myhz = (unsigned long long int) sysconf(_SC_CLK_TCK)) > 0) {
-               return myhz;
-       }
-#endif
-
-#ifdef HZ
-       myhz = HZ;    /* <asm/param.h> */
-#else /* HZ */
-       /* If 32-bit or big-endian (not Alpha or ia64), assume HZ is 100. */
-       hz = (sizeof(long)==sizeof(int) || htons(999)==999) ? 100UL : 1024UL;
-#endif /* HZ */
-
-       error("Unknown HZ value. Assuming %llu.", myhz);
-       return myhz;
-}
-
-
 // ----------------------------------------------------------------------------
 // target
 // target is the structure that process data are aggregated
@@ -318,14 +146,14 @@ struct target {
        unsigned long long num_threads;
        unsigned long long rss;
 
-       unsigned long long fix_minflt;
-       unsigned long long fix_cminflt;
-       unsigned long long fix_majflt;
-       unsigned long long fix_cmajflt;
-       unsigned long long fix_utime;
-       unsigned long long fix_stime;
-       unsigned long long fix_cutime;
-       unsigned long long fix_cstime;
+       long long fix_minflt;
+       long long fix_cminflt;
+       long long fix_majflt;
+       long long fix_cmajflt;
+       long long fix_utime;
+       long long fix_stime;
+       long long fix_cutime;
+       long long fix_cstime;
 
        unsigned long long statm_size;
        unsigned long long statm_resident;
@@ -398,18 +226,20 @@ struct target *get_users_target(uid_t uid)
                return NULL;
        }
 
-       snprintf(w->compare, MAX_COMPARE_NAME, "%d", uid);
+       snprintfz(w->compare, MAX_COMPARE_NAME, "%u", uid);
        w->comparehash = simple_hash(w->compare);
        w->comparelen = strlen(w->compare);
 
-       snprintf(w->id, MAX_NAME, "%d", uid);
+       snprintfz(w->id, MAX_NAME, "%u", uid);
        w->idhash = simple_hash(w->id);
 
        struct passwd *pw = getpwuid(uid);
        if(!pw)
-               snprintf(w->name, MAX_NAME, "%d", uid);
+               snprintfz(w->name, MAX_NAME, "%u", uid);
        else
-               snprintf(w->name, MAX_NAME, "%s", pw->pw_name);
+               snprintfz(w->name, MAX_NAME, "%s", pw->pw_name);
+
+       netdata_fix_chart_name(w->name);
 
        w->uid = uid;
 
@@ -417,7 +247,7 @@ struct target *get_users_target(uid_t uid)
        users_root_target = w;
 
        if(unlikely(debug))
-               fprintf(stderr, "apps.plugin: added uid %d ('%s') target\n", w->uid, w->name);
+               fprintf(stderr, "apps.plugin: added uid %u ('%s') target\n", w->uid, w->name);
 
        return w;
 }
@@ -434,18 +264,20 @@ struct target *get_groups_target(gid_t gid)
                return NULL;
        }
 
-       snprintf(w->compare, MAX_COMPARE_NAME, "%d", gid);
+       snprintfz(w->compare, MAX_COMPARE_NAME, "%u", gid);
        w->comparehash = simple_hash(w->compare);
        w->comparelen = strlen(w->compare);
 
-       snprintf(w->id, MAX_NAME, "%d", gid);
+       snprintfz(w->id, MAX_NAME, "%u", gid);
        w->idhash = simple_hash(w->id);
 
        struct group *gr = getgrgid(gid);
        if(!gr)
-               snprintf(w->name, MAX_NAME, "%d", gid);
+               snprintfz(w->name, MAX_NAME, "%u", gid);
        else
-               snprintf(w->name, MAX_NAME, "%s", gr->gr_name);
+               snprintfz(w->name, MAX_NAME, "%s", gr->gr_name);
+
+       netdata_fix_chart_name(w->name);
 
        w->gid = gid;
 
@@ -453,7 +285,7 @@ struct target *get_groups_target(gid_t gid)
        groups_root_target = w;
 
        if(unlikely(debug))
-               fprintf(stderr, "apps.plugin: added gid %d ('%s') target\n", w->gid, w->name);
+               fprintf(stderr, "apps.plugin: added gid %u ('%s') target\n", w->gid, w->name);
 
        return w;
 }
@@ -473,10 +305,12 @@ struct target *get_apps_groups_target(const char *id, struct target *target)
        }
        uint32_t hash = simple_hash(id);
 
-       struct target *w;
+       struct target *w, *last = apps_groups_root_target;
        for(w = apps_groups_root_target ; w ; w = w->next) {
                if(w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0)
                        return w;
+
+               last = w;
        }
 
        w = calloc(sizeof(struct target), 1);
@@ -485,12 +319,12 @@ struct target *get_apps_groups_target(const char *id, struct target *target)
                return NULL;
        }
 
-       strncpy(w->id, nid, MAX_NAME);
+       strncpyz(w->id, nid, MAX_NAME);
        w->idhash = simple_hash(w->id);
 
-       strncpy(w->name, nid, MAX_NAME);
+       strncpyz(w->name, nid, MAX_NAME);
 
-       strncpy(w->compare, nid, MAX_COMPARE_NAME);
+       strncpyz(w->compare, nid, MAX_COMPARE_NAME);
        int len = strlen(w->compare);
        if(w->compare[len - 1] == '*') {
                w->compare[len - 1] = '\0';
@@ -508,8 +342,9 @@ struct target *get_apps_groups_target(const char *id, struct target *target)
        w->debug = tdebug;
        w->target = target;
 
-       w->next = apps_groups_root_target;
-       apps_groups_root_target = w;
+       // append it, to maintain the order in apps_groups.conf
+       if(last) last->next = w;
+       else apps_groups_root_target = w;
 
        if(unlikely(debug))
                fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n"
@@ -528,7 +363,7 @@ int read_apps_groups_conf(const char *name)
 {
        char filename[FILENAME_MAX + 1];
 
-       snprintf(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, name);
+       snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, name);
 
        if(unlikely(debug))
                fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename);
@@ -564,7 +399,7 @@ int read_apps_groups_conf(const char *name)
 
                        struct target *n = get_apps_groups_target(s, w);
                        if(!n) {
-                               error("Cannot create target '%s' (line %d, word %d)", s, line, word);
+                               error("Cannot create target '%s' (line %lu, word %lu)", s, line, word);
                                continue;
                        }
 
@@ -580,8 +415,7 @@ int read_apps_groups_conf(const char *name)
                                t++;
                        }
 
-                       strncpy(w->name, t, MAX_NAME);
-                       w->name[MAX_NAME] = '\0';
+                       strncpyz(w->name, t, MAX_NAME);
                        w->hidden = thidden;
                        w->debug = tdebug;
 
@@ -603,7 +437,7 @@ int read_apps_groups_conf(const char *name)
        if(!apps_groups_default_target)
                error("Cannot create default target");
        else
-               strncpy(apps_groups_default_target->name, "other", MAX_NAME);
+               strncpyz(apps_groups_default_target->name, "other", MAX_NAME);
 
        return 0;
 }
@@ -686,14 +520,20 @@ struct pid_stat {
        // we will subtract these values from the old
        // target
        unsigned long long last_minflt;
-       unsigned long long last_cminflt;
        unsigned long long last_majflt;
-       unsigned long long last_cmajflt;
        unsigned long long last_utime;
        unsigned long long last_stime;
+
+       unsigned long long last_cminflt;
+       unsigned long long last_cmajflt;
        unsigned long long last_cutime;
        unsigned long long last_cstime;
 
+       unsigned long long last_fix_cminflt;
+       unsigned long long last_fix_cmajflt;
+       unsigned long long last_fix_cutime;
+       unsigned long long last_fix_cstime;
+
        unsigned long long last_io_logical_bytes_read;
        unsigned long long last_io_logical_bytes_written;
        unsigned long long last_io_read_calls;
@@ -702,27 +542,10 @@ struct pid_stat {
        unsigned long long last_io_storage_bytes_written;
        unsigned long long last_io_cancelled_write_bytes;
 
-#ifdef AGGREGATE_CHILDREN_TO_PARENTS
-       unsigned long long old_utime;
-       unsigned long long old_stime;
-       unsigned long long old_minflt;
-       unsigned long long old_majflt;
-
-       unsigned long long old_cutime;
-       unsigned long long old_cstime;
-       unsigned long long old_cminflt;
-       unsigned long long old_cmajflt;
-
-       unsigned long long fix_cutime;
-       unsigned long long fix_cstime;
        unsigned long long fix_cminflt;
        unsigned long long fix_cmajflt;
-
-       unsigned long long diff_cutime;
-       unsigned long long diff_cstime;
-       unsigned long long diff_cminflt;
-       unsigned long long diff_cmajflt;
-#endif /* AGGREGATE_CHILDREN_TO_PARENTS */
+       unsigned long long fix_cutime;
+       unsigned long long fix_cstime;
 
        int *fds;                                               // array of fds it uses
        int fds_size;                                   // the size of the fds array
@@ -753,13 +576,13 @@ struct pid_stat *get_pid_entry(pid_t pid)
 
        all_pids[pid] = calloc(sizeof(struct pid_stat), 1);
        if(!all_pids[pid]) {
-               error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct pid_stat));
+               error("Cannot allocate %zu bytes of memory", (size_t)sizeof(struct pid_stat));
                return NULL;
        }
 
        all_pids[pid]->fds = calloc(sizeof(int), 100);
        if(!all_pids[pid]->fds)
-               error("Cannot allocate %ld bytes of memory", (unsigned long)(sizeof(int) * 100));
+               error("Cannot allocate %zu bytes of memory", (size_t)(sizeof(int) * 100));
        else all_pids[pid]->fds_size = 100;
 
        if(root_of_pids) root_of_pids->prev = all_pids[pid];
@@ -776,7 +599,8 @@ void del_pid_entry(pid_t pid)
 {
        if(!all_pids[pid]) return;
 
-       if(debug) fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm);
+       if(unlikely(debug))
+               fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm);
 
        if(root_of_pids == all_pids[pid]) root_of_pids = all_pids[pid]->next;
        if(all_pids[pid]->next) all_pids[pid]->next->prev = all_pids[pid]->prev;
@@ -793,7 +617,7 @@ void del_pid_entry(pid_t pid)
 
 int read_proc_pid_cmdline(struct pid_stat *p) {
        char filename[FILENAME_MAX + 1];
-       snprintf(filename, FILENAME_MAX, "%s/proc/%d/cmdline", host_prefix, p->pid);
+       snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", host_prefix, p->pid);
 
        int fd = open(filename, O_RDONLY, 0666);
        if(unlikely(fd == -1)) return 1;
@@ -803,8 +627,7 @@ int read_proc_pid_cmdline(struct pid_stat *p) {
 
        if(bytes <= 0) {
                // copy the command to the command line
-               strncpy(p->cmdline, p->comm, MAX_CMDLINE);
-               p->cmdline[MAX_CMDLINE] = '\0';
+               strncpyz(p->cmdline, p->comm, MAX_CMDLINE);
                return 0;
        }
 
@@ -821,7 +644,7 @@ int read_proc_pid_cmdline(struct pid_stat *p) {
 int read_proc_pid_ownership(struct pid_stat *p) {
        char filename[FILENAME_MAX + 1];
 
-       snprintf(filename, FILENAME_MAX, "%s/proc/%d", host_prefix, p->pid);
+       snprintfz(filename, FILENAME_MAX, "%s/proc/%d", host_prefix, p->pid);
 
        // ----------------------------------------
        // read uid and gid
@@ -841,7 +664,7 @@ int read_proc_pid_stat(struct pid_stat *p) {
 
        char filename[FILENAME_MAX + 1];
 
-       snprintf(filename, FILENAME_MAX, "%s/proc/%d/stat", host_prefix, p->pid);
+       snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", host_prefix, p->pid);
 
        // ----------------------------------------
 
@@ -863,8 +686,7 @@ int read_proc_pid_stat(struct pid_stat *p) {
 
        // parse the process name
        unsigned int i = 0;
-       strncpy(p->comm, procfile_lineword(ff, 0, 1), MAX_COMPARE_NAME);
-       p->comm[MAX_COMPARE_NAME] = '\0';
+       strncpyz(p->comm, procfile_lineword(ff, 0, 1), MAX_COMPARE_NAME);
 
        // p->pid                       = atol(procfile_lineword(ff, 0, 0+i));
        // comm is at 1
@@ -911,7 +733,7 @@ int read_proc_pid_stat(struct pid_stat *p) {
        // p->guest_time        = strtoull(procfile_lineword(ff, 0, 42+i), NULL, 10);
        // p->cguest_time       = strtoull(procfile_lineword(ff, 0, 43), NULL, 10);
 
-       if(debug || (p->target && p->target->debug))
+       if(unlikely(debug || (p->target && p->target->debug)))
                fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", host_prefix, p->pid, p->comm, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads);
 
        // procfile_close(ff);
@@ -923,7 +745,7 @@ int read_proc_pid_statm(struct pid_stat *p) {
 
        char filename[FILENAME_MAX + 1];
 
-       snprintf(filename, FILENAME_MAX, "%s/proc/%d/statm", host_prefix, p->pid);
+       snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", host_prefix, p->pid);
 
        ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
        if(!ff) return 1;
@@ -953,7 +775,7 @@ int read_proc_pid_io(struct pid_stat *p) {
 
        char filename[FILENAME_MAX + 1];
 
-       snprintf(filename, FILENAME_MAX, "%s/proc/%d/io", host_prefix, p->pid);
+       snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", host_prefix, p->pid);
 
        ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
        if(!ff) return 1;
@@ -1021,18 +843,11 @@ int file_descriptor_iterator(avl *a) { if(a) {}; return 0; }
 
 avl_tree all_files_index = {
                NULL,
-               file_descriptor_compare,
-#ifndef AVL_WITHOUT_PTHREADS
-#ifdef AVL_LOCK_WITH_MUTEX
-               PTHREAD_MUTEX_INITIALIZER
-#else
-               PTHREAD_RWLOCK_INITIALIZER
-#endif
-#endif /* AVL_WITHOUT_PTHREADS */
+               file_descriptor_compare
 };
 
 static struct file_descriptor *file_descriptor_find(const char *name, uint32_t hash) {
-       struct file_descriptor *result = NULL, tmp;
+       struct file_descriptor tmp;
        tmp.hash = (hash)?hash:simple_hash(name);
        tmp.name = name;
        tmp.count = 0;
@@ -1041,8 +856,7 @@ static struct file_descriptor *file_descriptor_find(const char *name, uint32_t h
        tmp.magic = 0x0BADCAFE;
 #endif /* NETDATA_INTERNAL_CHECKS */
 
-       avl_search(&all_files_index, (avl *)&tmp, file_descriptor_iterator, (avl **)&result);
-       return result;
+       return (struct file_descriptor *)avl_search(&all_files_index, (avl *) &tmp);
 }
 
 #define file_descriptor_add(fd) avl_insert(&all_files_index, (avl *)(fd))
@@ -1069,13 +883,16 @@ void file_descriptor_not_used(int id)
                }
 #endif /* NETDATA_INTERNAL_CHECKS */
 
-               if(debug) fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count);
+               if(unlikely(debug))
+                       fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count);
 
                if(all_files[id].count > 0) {
                        all_files[id].count--;
 
                        if(!all_files[id].count) {
-                               if(debug) fprintf(stderr, "apps.plugin:   >> slot %d is empty.\n", id);
+                               if(unlikely(debug))
+                                       fprintf(stderr, "apps.plugin:   >> slot %d is empty.\n", id);
+
                                file_descriptor_remove(&all_files[id]);
 #ifdef NETDATA_INTERNAL_CHECKS
                                all_files[id].magic = 0x00000000;
@@ -1094,12 +911,15 @@ int file_descriptor_find_or_add(const char *name)
        static int last_pos = 0;
        uint32_t hash = simple_hash(name);
 
-       if(debug) fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash);
+       if(unlikely(debug))
+               fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash);
 
        struct file_descriptor *fd = file_descriptor_find(name, hash);
        if(fd) {
                // found
-               if(debug) fprintf(stderr, "apps.plugin:   >> found on slot %d\n", fd->pos);
+               if(unlikely(debug))
+                       fprintf(stderr, "apps.plugin:   >> found on slot %d\n", fd->pos);
+
                fd->count++;
                return fd->pos;
        }
@@ -1111,19 +931,25 @@ int file_descriptor_find_or_add(const char *name)
                int i;
 
                // there is no empty slot
-               if(debug) fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP);
+               if(unlikely(debug))
+                       fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP);
+
                all_files = realloc(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor));
 
                // if the address changed, we have to rebuild the index
                // since all pointers are now invalid
                if(old && old != (void *)all_files) {
-                       if(debug) fprintf(stderr, "apps.plugin:   >> re-indexing.\n");
+                       if(unlikely(debug))
+                               fprintf(stderr, "apps.plugin:   >> re-indexing.\n");
+
                        all_files_index.root = NULL;
                        for(i = 0; i < all_files_size; i++) {
                                if(!all_files[i].count) continue;
                                file_descriptor_add(&all_files[i]);
                        }
-                       if(debug) fprintf(stderr, "apps.plugin:   >> re-indexing done.\n");
+
+                       if(unlikely(debug))
+                               fprintf(stderr, "apps.plugin:   >> re-indexing done.\n");
                }
 
                for(i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) {
@@ -1139,7 +965,8 @@ int file_descriptor_find_or_add(const char *name)
                all_files_size += FILE_DESCRIPTORS_INCREASE_STEP;
        }
 
-       if(debug) fprintf(stderr, "apps.plugin:   >> searching for empty slot.\n");
+       if(unlikely(debug))
+               fprintf(stderr, "apps.plugin:   >> searching for empty slot.\n");
 
        // search for an empty slot
        int i, c;
@@ -1148,14 +975,17 @@ int file_descriptor_find_or_add(const char *name)
                if(c == 0) continue;
 
                if(!all_files[c].count) {
-                       if(debug) fprintf(stderr, "apps.plugin:   >> Examining slot %d.\n", c);
+                       if(unlikely(debug))
+                               fprintf(stderr, "apps.plugin:   >> Examining slot %d.\n", c);
 
 #ifdef NETDATA_INTERNAL_CHECKS
                        if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash))
                                error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name);
 #endif /* NETDATA_INTERNAL_CHECKS */
 
-                       if(debug) fprintf(stderr, "apps.plugin:   >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name);
+                       if(unlikely(debug))
+                               fprintf(stderr, "apps.plugin:   >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name);
+
                        if(all_files[c].name) free((void *)all_files[c].name);
                        all_files[c].name = NULL;
                        last_pos = c;
@@ -1166,7 +996,9 @@ int file_descriptor_find_or_add(const char *name)
                fatal("We should find an empty slot, but there isn't any");
                exit(1);
        }
-       if(debug) fprintf(stderr, "apps.plugin:   >> updating slot %d.\n", c);
+
+       if(unlikely(debug))
+               fprintf(stderr, "apps.plugin:   >> updating slot %d.\n", c);
 
        all_files_len++;
 
@@ -1182,11 +1014,15 @@ int file_descriptor_find_or_add(const char *name)
        else if(strcmp(name, "anon_inode:[timerfd]") == 0) type = FILETYPE_TIMERFD;
        else if(strcmp(name, "anon_inode:[signalfd]") == 0) type = FILETYPE_SIGNALFD;
        else if(strncmp(name, "anon_inode:", 11) == 0) {
-               if(debug) fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name);
+               if(unlikely(debug))
+                       fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name);
+
                type = FILETYPE_OTHER;
        }
        else {
-               if(debug) fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name);
+               if(unlikely(debug))
+                       fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name);
+
                type = FILETYPE_OTHER;
        }
 
@@ -1200,7 +1036,8 @@ int file_descriptor_find_or_add(const char *name)
 #endif /* NETDATA_INTERNAL_CHECKS */
        file_descriptor_add(&all_files[c]);
 
-       if(debug) fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name);
+       if(unlikely(debug))
+               fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name);
 
        return c;
 }
@@ -1208,7 +1045,7 @@ int file_descriptor_find_or_add(const char *name)
 int read_pid_file_descriptors(struct pid_stat *p) {
        char dirname[FILENAME_MAX+1];
 
-       snprintf(dirname, FILENAME_MAX, "%s/proc/%d/fd", host_prefix, p->pid);
+       snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", host_prefix, p->pid);
        DIR *fds = opendir(dirname);
        if(fds) {
                int c;
@@ -1229,10 +1066,12 @@ int read_pid_file_descriptors(struct pid_stat *p) {
                        if(fdid < 0) continue;
                        if(fdid >= p->fds_size) {
                                // it is small, extend it
-                               if(debug) fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100);
+                               if(unlikely(debug))
+                                       fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100);
+
                                p->fds = realloc(p->fds, (fdid + 100) * sizeof(int));
                                if(!p->fds) {
-                                       error("Cannot re-allocate fds for %s", p->comm);
+                                       fatal("Cannot re-allocate fds for %s", p->comm);
                                        break;
                                }
 
@@ -1301,7 +1140,7 @@ int collect_data_for_all_processes_from_proc(void)
 {
        char dirname[FILENAME_MAX + 1];
 
-       snprintf(dirname, FILENAME_MAX, "%s/proc", host_prefix);
+       snprintfz(dirname, FILENAME_MAX, "%s/proc", host_prefix);
        DIR *dir = opendir(dirname);
        if(!dir) return 0;
 
@@ -1312,26 +1151,34 @@ int collect_data_for_all_processes_from_proc(void)
        all_pids_count = 0;
        for(p = root_of_pids; p ; p = p->next) {
                all_pids_count++;
-               p->parent = NULL;
-               p->updated = 0;
-               p->children_count = 0;
-               p->merged = 0;
-               p->new_entry = 0;
-
-        p->last_minflt  = p->minflt;
-        p->last_cminflt  = p->cminflt;
-        p->last_majflt  = p->majflt;
-        p->last_cmajflt  = p->cmajflt;
-        p->last_utime  = p->utime;
-        p->last_stime  = p->stime;
-        p->last_cutime  = p->cutime;
-        p->last_cstime  = p->cstime;
-
-        p->last_io_logical_bytes_read  = p->io_logical_bytes_read;
+
+               p->parent           = NULL;
+
+               p->updated          = 0;
+               p->children_count   = 0;
+               p->merged           = 0;
+               p->new_entry        = 0;
+
+        p->last_minflt      = p->minflt;
+        p->last_majflt      = p->majflt;
+        p->last_utime       = p->utime;
+        p->last_stime       = p->stime;
+
+        p->last_cminflt     = p->cminflt;
+        p->last_cmajflt     = p->cmajflt;
+        p->last_cutime      = p->cutime;
+        p->last_cstime      = p->cstime;
+
+        p->last_fix_cminflt = p->fix_cminflt;
+        p->last_fix_cmajflt = p->fix_cmajflt;
+        p->last_fix_cutime  = p->fix_cutime;
+        p->last_fix_cstime  = p->fix_cstime;
+
+        p->last_io_logical_bytes_read     = p->io_logical_bytes_read;
         p->last_io_logical_bytes_written  = p->io_logical_bytes_written;
-        p->last_io_read_calls  = p->io_read_calls;
-        p->last_io_write_calls  = p->io_write_calls;
-        p->last_io_storage_bytes_read  = p->io_storage_bytes_read;
+        p->last_io_read_calls             = p->io_read_calls;
+        p->last_io_write_calls            = p->io_write_calls;
+        p->last_io_storage_bytes_read     = p->io_storage_bytes_read;
         p->last_io_storage_bytes_written  = p->io_storage_bytes_written;
         p->last_io_cancelled_write_bytes  = p->io_cancelled_write_bytes;
        }
@@ -1341,8 +1188,13 @@ int collect_data_for_all_processes_from_proc(void)
                pid_t pid = (pid_t) strtoul(file->d_name, &endptr, 10);
 
                // make sure we read a valid number
-               if(unlikely(pid <= 0 || pid > pid_max || endptr == file->d_name || *endptr != '\0'))
+               if(unlikely(endptr == file->d_name || *endptr != '\0'))
+                       continue;
+
+               if(unlikely(pid <= 0 || pid > pid_max)) {
+                       error("Invalid pid %d read (expected 1 to %d). Ignoring process.", pid, pid_max);
                        continue;
+               }
 
                p = get_pid_entry(pid);
                if(unlikely(!p)) continue;
@@ -1352,34 +1204,22 @@ int collect_data_for_all_processes_from_proc(void)
                // /proc/<pid>/stat
 
                if(unlikely(read_proc_pid_stat(p))) {
-                               error("Cannot process %s/proc/%d/stat", host_prefix, pid);
-
+                       error("Cannot process %s/proc/%d/stat", host_prefix, pid);
                        // there is no reason to proceed if we cannot get its status
                        continue;
                }
 
                // check its parent pid
                if(unlikely(p->ppid < 0 || p->ppid > pid_max)) {
-                               error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid);
-
+                       error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid);
                        p->ppid = 0;
                }
 
-               // --------------------------------------------------------------------
-               // /proc/<pid>/cmdline
-
-               if(proc_pid_cmdline_is_needed) {
-                       if(unlikely(read_proc_pid_cmdline(p))) {
-                                       error("Cannot process %s/proc/%d/cmdline", host_prefix, pid);
-                       }
-               }
-
                // --------------------------------------------------------------------
                // /proc/<pid>/statm
 
                if(unlikely(read_proc_pid_statm(p))) {
-                               error("Cannot process %s/proc/%d/statm", host_prefix, pid);
-
+                       error("Cannot process %s/proc/%d/statm", host_prefix, pid);
                        // there is no reason to proceed if we cannot get its memory status
                        continue;
                }
@@ -1409,9 +1249,18 @@ int collect_data_for_all_processes_from_proc(void)
                // check if it is target
                // we do this only once, the first time this pid is loaded
                if(unlikely(p->new_entry)) {
-                       if(debug) fprintf(stderr, "apps.plugin: \tJust added %s\n", p->comm);
+                       // /proc/<pid>/cmdline
+                       if(proc_pid_cmdline_is_needed) {
+                               if(unlikely(read_proc_pid_cmdline(p))) {
+                                               error("Cannot process %s/proc/%d/cmdline", host_prefix, pid);
+                               }
+                       }
+
+                       if(unlikely(debug))
+                               fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", pid, p->comm);
+
                        uint32_t hash = simple_hash(p->comm);
-                       size_t pclen = strlen(p->comm);
+                       size_t pclen  = strlen(p->comm);
 
                        struct target *w;
                        for(w = apps_groups_root_target; w ; w = w->next) {
@@ -1432,6 +1281,8 @@ int collect_data_for_all_processes_from_proc(void)
 
                                        if(debug || (p->target && p->target->debug))
                                                fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name);
+
+                                       break;
                                }
                        }
                }
@@ -1455,49 +1306,6 @@ int collect_data_for_all_processes_from_proc(void)
        return 1;
 }
 
-
-// ----------------------------------------------------------------------------
-
-#ifdef AGGREGATE_CHILDREN_TO_PARENTS
-// print a tree view of all processes
-int debug_childrens_aggregations(pid_t pid, int level) {
-       struct pid_stat *p = NULL;
-       char b[level+3];
-       int i, ret = 0;
-
-       for(i = 0; i < level; i++) b[i] = '\t';
-       b[level] = '|';
-       b[level+1] = '-';
-       b[level+2] = '\0';
-
-       for(p = root_of_pids; p ; p = p->next) {
-               if(p->ppid == pid) {
-                       ret += debug_childrens_aggregations(p->pid, level+1);
-               }
-       }
-
-       p = all_pids[pid];
-       if(p) {
-               if(!p->updated) ret += 1;
-               if(ret) fprintf(stderr, "%s %s %d [%s, %s] c=%d u=%llu+%llu, s=%llu+%llu, cu=%llu+%llu, cs=%llu+%llu, n=%llu+%llu, j=%llu+%llu, cn=%llu+%llu, cj=%llu+%llu\n"
-                       , b, p->comm, p->pid, p->updated?"OK":"KILLED", p->target->name, p->children_count
-                       , p->utime, p->utime - p->old_utime
-                       , p->stime, p->stime - p->old_stime
-                       , p->cutime, p->cutime - p->old_cutime
-                       , p->cstime, p->cstime - p->old_cstime
-                       , p->minflt, p->minflt - p->old_minflt
-                       , p->majflt, p->majflt - p->old_majflt
-                       , p->cminflt, p->cminflt - p->old_cminflt
-                       , p->cmajflt, p->cmajflt - p->old_cmajflt
-                       );
-       }
-
-       return ret;
-}
-#endif /* AGGREGATE_CHILDREN_TO_PARENTS */
-
-
-
 // ----------------------------------------------------------------------------
 // update statistics on the targets
 
@@ -1515,6 +1323,7 @@ int debug_childrens_aggregations(pid_t pid, int level) {
 // check: update_apps_groups_statistics()
 
 void link_all_processes_to_their_parents(void) {
+       struct pid_stat *init = all_pids[1];
        struct pid_stat *p = NULL;
 
        // link all children to their parents
@@ -1522,81 +1331,110 @@ void link_all_processes_to_their_parents(void) {
        for(p = root_of_pids; p ; p = p->next) {
                // for each process found running
 
-               if(p->ppid > 0
-                               && p->ppid <= pid_max
-                               && all_pids[p->ppid]
-                       ) {
-                       // for valid processes
+               if(likely(p->new_entry && p->updated)) {
+                       // the first time we see an entry
+                       // we remove the exited children figures
+                       // to avoid spikes
+                       p->fix_cminflt = p->cminflt;
+                       p->fix_cmajflt = p->cmajflt;
+                       p->fix_cutime  = p->cutime;
+                       p->fix_cstime  = p->cstime;
+               }
+
+               if(likely(p->ppid > 0 && all_pids[p->ppid])) {
+                       // valid parent processes
 
-                       if(debug || (p->target && p->target->debug))
-                               fprintf(stderr, "apps.plugin: \tparent of %d (%s) is %d (%s)\n", p->pid, p->comm, p->ppid, all_pids[p->ppid]->comm);
+                       struct pid_stat *pp;
 
-                       p->parent = all_pids[p->ppid];
+                       p->parent = pp = all_pids[p->ppid];
                        p->parent->children_count++;
-               }
-               else if(p->ppid != 0)
-                       error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid);
-       }
-}
 
-#ifdef AGGREGATE_CHILDREN_TO_PARENTS
-void aggregate_children_to_parents(void) {
-       struct pid_stat *p = NULL;
+                       if(unlikely(debug || (p->target && p->target->debug)))
+                               fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) has parent %d (%s, %s). Parent: utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n", p->pid, p->comm, p->updated?"running":"exited", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cminflt, pp->cmajflt, pp->fix_cutime, pp->fix_cstime, pp->fix_cminflt, pp->fix_cmajflt);
 
-       // for each killed process, remove its values from the parents
-       // sums (we had already added them in a previous loop)
-       for(p = root_of_pids; p ; p = p->next) {
-               if(p->updated) continue;
-
-               if(debug) fprintf(stderr, "apps.plugin: UNMERGING %d %s\n", p->pid, p->comm);
-
-               unsigned long long diff_utime = p->utime + p->cutime + p->fix_cutime;
-               unsigned long long diff_stime = p->stime + p->cstime + p->fix_cstime;
-               unsigned long long diff_minflt = p->minflt + p->cminflt + p->fix_cminflt;
-               unsigned long long diff_majflt = p->majflt + p->cmajflt + p->fix_cmajflt;
-
-               struct pid_stat *t = p;
-               while((t = t->parent)) {
-                       if(!t->updated) continue;
-
-                       unsigned long long x;
-                       if(diff_utime && t->diff_cutime) {
-                               x = (t->diff_cutime < diff_utime)?t->diff_cutime:diff_utime;
-                               diff_utime -= x;
-                               t->diff_cutime -= x;
-                               t->fix_cutime += x;
-                               if(debug) fprintf(stderr, "apps.plugin: \t cutime %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name);
-                       }
-                       if(diff_stime && t->diff_cstime) {
-                               x = (t->diff_cstime < diff_stime)?t->diff_cstime:diff_stime;
-                               diff_stime -= x;
-                               t->diff_cstime -= x;
-                               t->fix_cstime += x;
-                               if(debug) fprintf(stderr, "apps.plugin: \t cstime %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name);
-                       }
-                       if(diff_minflt && t->diff_cminflt) {
-                               x = (t->diff_cminflt < diff_minflt)?t->diff_cminflt:diff_minflt;
-                               diff_minflt -= x;
-                               t->diff_cminflt -= x;
-                               t->fix_cminflt += x;
-                               if(debug) fprintf(stderr, "apps.plugin: \t cminflt %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name);
-                       }
-                       if(diff_majflt && t->diff_cmajflt) {
-                               x = (t->diff_cmajflt < diff_majflt)?t->diff_cmajflt:diff_majflt;
-                               diff_majflt -= x;
-                               t->diff_cmajflt -= x;
-                               t->fix_cmajflt += x;
-                               if(debug) fprintf(stderr, "apps.plugin: \t cmajflt %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name);
+                       if(unlikely(!p->updated)) {
+                               // this process has exit
+
+                               // find the first parent that has been updated
+                               while(pp && !pp->updated) {
+                                       // we may have to forward link it to its parent
+                                       if(unlikely(!pp->parent && pp->ppid > 0 && all_pids[pp->ppid]))
+                                               pp->parent = all_pids[pp->ppid];
+
+                                       // check again for parent
+                                       pp = pp->parent;
+                               }
+
+                               if(likely(pp)) {
+                                       // this is an exited child with a parent
+                                       // remove the known time from the parent's data
+                                       pp->fix_cminflt += p->last_minflt + p->last_cminflt + p->last_fix_cminflt;
+                                       pp->fix_cmajflt += p->last_majflt + p->last_cmajflt + p->last_fix_cmajflt;
+                                       pp->fix_cutime  += p->last_utime  + p->last_cutime  + p->last_fix_cutime;
+                                       pp->fix_cstime  += p->last_stime  + p->last_cstime  + p->last_fix_cstime;
+
+                                       // The known exited children (the ones we track) may have
+                                       // contributed more than the value accumulated into the process
+                                       // by the kernel.
+                                       // This can happen if the parent process has not waited-for
+                                       // its children (check: man 2 times).
+                                       // In this case, the kernel adds these resources to init (pid 1).
+                                       //
+                                       // The following code, attempts to fix this.
+                                       // Without this code, the charts will have random spikes
+                                       // for example, when an SSH session ends (sshd forks a child
+                                       // to serve the session, but when this session ends, sshd
+                                       // does not wait-for its child, thus all the resources of the
+                                       // ssh session get added to init, resulting in a huge spike on
+                                       // the charts).
+
+                                       if(unlikely(pp->cminflt < pp->fix_cminflt)) {
+                                               if(likely(init && pp != init)) {
+                                                       unsigned long long have = pp->fix_cminflt - pp->cminflt;
+                                                       unsigned long long max = init->cminflt - init->fix_cminflt;
+                                                       if(have > max) have = max;
+                                                       init->fix_cminflt += have;
+                                               }
+                                               pp->fix_cminflt = pp->cminflt;
+                                       }
+                                       if(unlikely(pp->cmajflt < pp->fix_cmajflt)) {
+                                               if(likely(init && pp != init)) {
+                                                       unsigned long long have = pp->fix_cmajflt - pp->cmajflt;
+                                                       unsigned long long max = init->cmajflt - init->fix_cmajflt;
+                                                       if(have > max) have = max;
+                                                       init->fix_cmajflt += have;
+                                               }
+                                               pp->fix_cmajflt = pp->cmajflt;
+                                       }
+                                       if(unlikely(pp->cutime < pp->fix_cutime)) {
+                                               if(likely(init && pp != init)) {
+                                                       unsigned long long have = pp->fix_cutime - pp->cutime;
+                                                       unsigned long long max = init->cutime - init->fix_cutime;
+                                                       if(have > max) have = max;
+                                                       init->fix_cutime += have;
+                                               }
+                                               pp->fix_cutime  = pp->cutime;
+                                       }
+                                       if(unlikely(pp->cstime < pp->fix_cstime)) {
+                                               if(likely(init && pp != init)) {
+                                                       unsigned long long have = pp->fix_cstime - pp->cstime;
+                                                       unsigned long long max = init->cstime - init->fix_cstime;
+                                                       if(have > max) have = max;
+                                                       init->fix_cstime += have;
+                                               }
+                                               pp->fix_cstime = pp->cstime;
+                                       }
+
+                                       if(unlikely(debug))
+                                               fprintf(stderr, "apps.plugin: \tupdating child metrics of %d (%s, %s) to its parent %d (%s, %s). Parent has now: utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n", p->pid, p->comm, p->updated?"running":"exited", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cminflt, pp->cmajflt, pp->fix_cutime, pp->fix_cstime, pp->fix_cminflt, pp->fix_cmajflt);
+                               }
                        }
                }
-
-               if(diff_utime) error("Cannot fix up utime %llu", diff_utime);
-               if(diff_stime) error("Cannot fix up stime %llu", diff_stime);
-               if(diff_minflt) error("Cannot fix up minflt %llu", diff_minflt);
-               if(diff_majflt) error("Cannot fix up majflt %llu", diff_majflt);
+               else if(unlikely(p->ppid != 0))
+                       error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid);
        }
 }
-#endif /* AGGREGATE_CHILDREN_TO_PARENTS */
+
 
 void cleanup_non_existing_pids(void) {
        int c;
@@ -1624,8 +1462,9 @@ void apply_apps_groups_targets_inheritance(void) {
 
        // children that do not have a target
        // inherit their target from their parent
-       int found = 1;
+       int found = 1, loops = 0;
        while(found) {
+               if(unlikely(debug)) loops++;
                found = 0;
                for(p = root_of_pids; p ; p = p->next) {
                        // if this process does not have a target
@@ -1647,6 +1486,7 @@ void apply_apps_groups_targets_inheritance(void) {
        // repeat, until nothing more can be done.
        found = 1;
        while(found) {
+               if(unlikely(debug)) loops++;
                found = 0;
                for(p = root_of_pids; p ; p = p->next) {
                        // if this process does not have any children
@@ -1679,7 +1519,7 @@ void apply_apps_groups_targets_inheritance(void) {
                        }
                }
 
-               if(debug)
+               if(unlikely(debug))
                        fprintf(stderr, "apps.plugin: merged %d processes\n", found);
        }
 
@@ -1688,25 +1528,18 @@ void apply_apps_groups_targets_inheritance(void) {
                all_pids[1]->target = apps_groups_default_target;
 
        // give a default target on all top level processes
+       if(unlikely(debug)) loops++;
        for(p = root_of_pids; p ; p = p->next) {
                // if the process is not merged itself
                // then is is a top level process
                if(!p->merged && !p->target)
                        p->target = apps_groups_default_target;
-
-#ifdef AGGREGATE_CHILDREN_TO_PARENTS
-               // by the way, update the diffs
-               // will be used later for subtracting killed process times
-               p->diff_cutime = p->utime - p->cutime;
-               p->diff_cstime = p->stime - p->cstime;
-               p->diff_cminflt = p->minflt - p->cminflt;
-               p->diff_cmajflt = p->majflt - p->cmajflt;
-#endif /* AGGREGATE_CHILDREN_TO_PARENTS */
        }
 
        // give a target to all merged child processes
        found = 1;
        while(found) {
+               if(unlikely(debug)) loops++;
                found = 0;
                for(p = root_of_pids; p ; p = p->next) {
                        if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) {
@@ -1718,6 +1551,9 @@ void apply_apps_groups_targets_inheritance(void) {
                        }
                }
        }
+
+       if(unlikely(debug))
+               fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops);
 }
 
 long zero_all_targets(struct target *root) {
@@ -1770,10 +1606,13 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target
        }
 
        if(likely(p->updated)) {
-               w->cutime += p->cutime; // - p->fix_cutime;
-               w->cstime += p->cstime; // - p->fix_cstime;
-               w->cminflt += p->cminflt; // - p->fix_cminflt;
-               w->cmajflt += p->cmajflt; // - p->fix_cmajflt;
+               if(unlikely(debug && (p->fix_cutime || p->fix_cstime || p->fix_cminflt || p->fix_cmajflt)))
+                       fprintf(stderr, "apps.plugin: \tadding child counters of %d (%s) to target %s. Currents: cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, Fixes: cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, w->name, p->cutime, p->cstime, p->cminflt, p->cmajflt, p->fix_cutime, p->fix_cstime, p->fix_cminflt, p->fix_cmajflt);
+
+               w->cutime  += p->cutime  - p->fix_cutime;
+               w->cstime  += p->cstime  - p->fix_cstime;
+               w->cminflt += p->cminflt - p->fix_cminflt;
+               w->cmajflt += p->cmajflt - p->fix_cmajflt;
 
                w->utime += p->utime; //+ (p->pid != 1)?(p->cutime - p->fix_cutime):0;
                w->stime += p->stime; //+ (p->pid != 1)?(p->cstime - p->fix_cstime):0;
@@ -1821,7 +1660,7 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target
                }
 
                if(unlikely(debug || w->debug))
-                       fprintf(stderr, "apps.plugin: \tAgregating %s pid %d on %s utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
+                       fprintf(stderr, "apps.plugin: \tAggregating %s pid %d on %s utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->fix_cutime, p->fix_cstime, p->fix_cminflt, p->fix_cmajflt);
 
 /*             if(p->utime - p->old_utime > 100) fprintf(stderr, "BIG CHANGE: %d %s utime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->utime - p->old_utime, p->old_utime, p->utime);
                if(p->cutime - p->old_cutime > 100) fprintf(stderr, "BIG CHANGE: %d %s cutime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cutime - p->old_cutime, p->old_cutime, p->cutime);
@@ -1832,16 +1671,6 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target
                if(p->cminflt - p->old_cminflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cminflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cminflt - p->old_cminflt, p->old_cminflt, p->cminflt);
                if(p->cmajflt - p->old_cmajflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cmajflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cmajflt - p->old_cmajflt, p->old_cmajflt, p->cmajflt);
 */
-#ifdef AGGREGATE_CHILDREN_TO_PARENTS
-               p->old_utime = p->utime;
-               p->old_cutime = p->cutime;
-               p->old_stime = p->stime;
-               p->old_cstime = p->cstime;
-               p->old_minflt = p->minflt;
-               p->old_majflt = p->majflt;
-               p->old_cminflt = p->cminflt;
-               p->old_cmajflt = p->cmajflt;
-#endif /* AGGREGATE_CHILDREN_TO_PARENTS */
 
                if(o) {
                        // since the process switched target
@@ -1852,42 +1681,46 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target
                        // IMPORTANT
                        // We add/subtract the last/OLD values we added to the target
 
-                       w->fix_cutime -= p->last_cutime;
-                       w->fix_cstime -= p->last_cstime;
-                       w->fix_cminflt -= p->last_cminflt;
-                       w->fix_cmajflt -= p->last_cmajflt;
+                       unsigned long long cutime  = p->last_cutime - p->last_fix_cutime;
+                       unsigned long long cstime  = p->last_cstime - p->last_fix_cstime;
+                       unsigned long long cminflt = p->last_cminflt - p->last_fix_cminflt;
+                       unsigned long long cmajflt = p->last_cmajflt - p->last_fix_cmajflt;
+
+                       w->fix_cutime  -= cutime;
+                       w->fix_cstime  -= cstime;
+                       w->fix_cminflt -= cminflt;
+                       w->fix_cmajflt -= cmajflt;
 
-                       w->fix_utime -= p->last_utime;
-                       w->fix_stime -= p->last_stime;
+                       w->fix_utime  -= p->last_utime;
+                       w->fix_stime  -= p->last_stime;
                        w->fix_minflt -= p->last_minflt;
                        w->fix_majflt -= p->last_majflt;
 
-
-                       w->fix_io_logical_bytes_read -= p->last_io_logical_bytes_read;
+                       w->fix_io_logical_bytes_read    -= p->last_io_logical_bytes_read;
                        w->fix_io_logical_bytes_written -= p->last_io_logical_bytes_written;
-                       w->fix_io_read_calls -= p->last_io_read_calls;
-                       w->fix_io_write_calls -= p->last_io_write_calls;
-                       w->fix_io_storage_bytes_read -= p->last_io_storage_bytes_read;
+                       w->fix_io_read_calls            -= p->last_io_read_calls;
+                       w->fix_io_write_calls           -= p->last_io_write_calls;
+                       w->fix_io_storage_bytes_read    -= p->last_io_storage_bytes_read;
                        w->fix_io_storage_bytes_written -= p->last_io_storage_bytes_written;
                        w->fix_io_cancelled_write_bytes -= p->last_io_cancelled_write_bytes;
 
                        // ---
 
-                       o->fix_cutime += p->last_cutime;
-                       o->fix_cstime += p->last_cstime;
-                       o->fix_cminflt += p->last_cminflt;
-                       o->fix_cmajflt += p->last_cmajflt;
+                       o->fix_cutime  += cutime;
+                       o->fix_cstime  += cstime;
+                       o->fix_cminflt += cminflt;
+                       o->fix_cmajflt += cmajflt;
 
-                       o->fix_utime += p->last_utime;
-                       o->fix_stime += p->last_stime;
+                       o->fix_utime  += p->last_utime;
+                       o->fix_stime  += p->last_stime;
                        o->fix_minflt += p->last_minflt;
                        o->fix_majflt += p->last_majflt;
 
-                       o->fix_io_logical_bytes_read += p->last_io_logical_bytes_read;
+                       o->fix_io_logical_bytes_read    += p->last_io_logical_bytes_read;
                        o->fix_io_logical_bytes_written += p->last_io_logical_bytes_written;
-                       o->fix_io_read_calls += p->last_io_read_calls;
-                       o->fix_io_write_calls += p->last_io_write_calls;
-                       o->fix_io_storage_bytes_read += p->last_io_storage_bytes_read;
+                       o->fix_io_read_calls            += p->last_io_read_calls;
+                       o->fix_io_write_calls           += p->last_io_write_calls;
+                       o->fix_io_storage_bytes_read    += p->last_io_storage_bytes_read;
                        o->fix_io_storage_bytes_written += p->last_io_storage_bytes_written;
                        o->fix_io_cancelled_write_bytes += p->last_io_cancelled_write_bytes;
                }
@@ -1897,28 +1730,33 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target
 
                // since the process has exited, the user
                // will see a drop in our charts, because the incremental
-               // values of this process will not be there
+               // values of this process will not be there from now on
 
                // add them to the fix_* values and they will be added to
                // the reported values, so that the report goes steady
-               w->fix_minflt += p->minflt;
-               w->fix_majflt += p->majflt;
-               w->fix_utime += p->utime;
-               w->fix_stime += p->stime;
-               w->fix_cminflt += p->cminflt;
-               w->fix_cmajflt += p->cmajflt;
-               w->fix_cutime += p->cutime;
-               w->fix_cstime += p->cstime;
-
-               w->fix_io_logical_bytes_read += p->io_logical_bytes_read;
-               w->fix_io_logical_bytes_written += p->io_logical_bytes_written;
-               w->fix_io_read_calls += p->io_read_calls;
-               w->fix_io_write_calls += p->io_write_calls;
-               w->fix_io_storage_bytes_read += p->io_storage_bytes_read;
-               w->fix_io_storage_bytes_written += p->io_storage_bytes_written;
-               w->fix_io_cancelled_write_bytes += p->io_cancelled_write_bytes;
+
+               w->fix_minflt  += p->last_minflt;
+               w->fix_majflt  += p->last_majflt;
+               w->fix_utime   += p->last_utime;
+               w->fix_stime   += p->last_stime;
+
+               w->fix_cminflt += (p->last_cminflt - p->last_fix_cminflt);
+               w->fix_cmajflt += (p->last_cmajflt - p->last_fix_cmajflt);
+               w->fix_cutime  += (p->last_cutime  - p->last_fix_cutime);
+               w->fix_cstime  += (p->last_cstime  - p->last_fix_cstime);
+
+               w->fix_io_logical_bytes_read    += p->last_io_logical_bytes_read;
+               w->fix_io_logical_bytes_written += p->last_io_logical_bytes_written;
+               w->fix_io_read_calls            += p->last_io_read_calls;
+               w->fix_io_write_calls           += p->last_io_write_calls;
+               w->fix_io_storage_bytes_read    += p->last_io_storage_bytes_read;
+               w->fix_io_storage_bytes_written += p->last_io_storage_bytes_written;
+               w->fix_io_cancelled_write_bytes += p->last_io_cancelled_write_bytes;
        }
 
+       //if((long long)w->cutime + w->fix_cutime < 0)
+       //      error("Negative total cutime (%llu - %lld) on target %s after adding process %d (%s, %s) with utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n",
+       //                w->cutime, w->fix_cutime, w->name, p->pid, p->comm, p->updated?"running":"exited", p->utime, p->stime, p->minflt, p->majflt, p->cutime, p->cstime, p->cminflt, p->cmajflt, p->fix_cutime, p->fix_cstime, p->fix_cminflt, p->fix_cmajflt);
 }
 
 void count_targets_fds(struct target *root) {
@@ -1988,19 +1826,10 @@ void calculate_netdata_statistics(void)
        link_all_processes_to_their_parents();
        apply_apps_groups_targets_inheritance();
 
-#ifdef AGGREGATE_CHILDREN_TO_PARENTS
-       aggregate_children_to_parents();
-#endif /* AGGREGATE_CHILDREN_TO_PARENTS */
-
        zero_all_targets(users_root_target);
        zero_all_targets(groups_root_target);
        apps_groups_targets = zero_all_targets(apps_groups_root_target);
 
-#ifdef AGGREGATE_CHILDREN_TO_PARENTS
-       if(debug)
-               debug_childrens_aggregations(0, 1);
-#endif /* AGGREGATE_CHILDREN_TO_PARENTS */
-
        // this has to be done, before the cleanup
        struct pid_stat *p = NULL;
        struct target *w = NULL, *o = NULL;
@@ -2023,7 +1852,7 @@ void calculate_netdata_statistics(void)
                        w = p->user_target;
                else {
                        if(unlikely(debug && p->user_target))
-                                       fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %d (%s) to %d.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid);
+                                       fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %u (%s) to %u.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid);
 
                        w = p->user_target = get_users_target(p->uid);
                }
@@ -2041,7 +1870,7 @@ void calculate_netdata_statistics(void)
                        w = p->group_target;
                else {
                        if(unlikely(debug && p->group_target))
-                                       fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %d (%s) to %d.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid);
+                                       fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %u (%s) to %u.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid);
 
                        w = p->group_target = get_groups_target(p->gid);
                }
@@ -2120,7 +1949,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->stime + w->fix_utime + w->fix_stime);
+               fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->stime + w->fix_utime + w->fix_stime + (include_exited_childs?(w->cutime + w->cstime + w->fix_cutime + w->fix_cstime):0));
        }
        fprintf(stdout, "END\n");
 
@@ -2128,7 +1957,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->fix_utime);
+               fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->fix_utime + (include_exited_childs?(w->cutime + w->fix_cutime):0));
        }
        fprintf(stdout, "END\n");
 
@@ -2136,7 +1965,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "SET %s = %llu\n", w->name, w->stime + w->fix_stime);
+               fprintf(stdout, "SET %s = %llu\n", w->name, w->stime + w->fix_stime + (include_exited_childs?(w->cstime + w->fix_cstime):0));
        }
        fprintf(stdout, "END\n");
 
@@ -2168,7 +1997,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "SET %s = %llu\n", w->name, w->minflt + w->fix_minflt);
+               fprintf(stdout, "SET %s = %llu\n", w->name, w->minflt + w->fix_minflt + (include_exited_childs?(w->cminflt + w->fix_cminflt):0));
        }
        fprintf(stdout, "END\n");
 
@@ -2176,7 +2005,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "SET %s = %llu\n", w->name, w->majflt + w->fix_majflt);
+               fprintf(stdout, "SET %s = %llu\n", w->name, w->majflt + w->fix_majflt + (include_exited_childs?(w->cmajflt + w->fix_cmajflt):0));
        }
        fprintf(stdout, "END\n");
 
@@ -2260,11 +2089,11 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const
 
        // we have something new to show
        // update the charts
-       fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
+       fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "DIMENSION %s '' incremental 100 %llu %s\n", w->name, Hertz, w->hidden ? "hidden,noreset" : "noreset");
+               fprintf(stdout, "DIMENSION %s '' incremental 100 %u %s\n", w->name, hz, w->hidden ? "hidden,noreset" : "noreset");
        }
 
        fprintf(stdout, "CHART %s.mem '' '%s Dedicated Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every);
@@ -2288,18 +2117,18 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const
                fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
        }
 
-       fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
+       fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "DIMENSION %s '' incremental 100 %llu noreset\n", w->name, Hertz * processors);
+               fprintf(stdout, "DIMENSION %s '' incremental 100 %u noreset\n", w->name, hz);
        }
 
-       fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
+       fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
        for (w = root; w ; w = w->next) {
                if(w->target || (!w->processes && !w->exposed)) continue;
 
-               fprintf(stdout, "DIMENSION %s '' incremental 100 %llu noreset\n", w->name, Hertz * processors);
+               fprintf(stdout, "DIMENSION %s '' incremental 100 %u noreset\n", w->name, hz);
        }
 
        fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20010 %d\n", type, title, type, update_every);
@@ -2390,6 +2219,16 @@ void parse_args(int argc, char **argv)
                        continue;
                }
 
+               if(strcmp("no-childs", argv[i]) == 0) {
+                       include_exited_childs = 0;
+                       continue;
+               }
+
+               if(strcmp("with-childs", argv[i]) == 0) {
+                       include_exited_childs = 1;
+                       continue;
+               }
+
                if(!name) {
                        name = argv[i];
                        continue;
@@ -2408,12 +2247,6 @@ void parse_args(int argc, char **argv)
        }
 }
 
-unsigned long long sutime() {
-       struct timeval now;
-       gettimeofday(&now, NULL);
-       return now.tv_sec * 1000000ULL + now.tv_usec;
-}
-
 int main(int argc, char **argv)
 {
        // debug_flags = D_PROCFILE;
@@ -2424,6 +2257,10 @@ int main(int argc, char **argv)
        // disable syslog for apps.plugin
        error_log_syslog = 0;
 
+       // set errors flood protection to 100 logs per hour
+       error_log_errors_per_period = 100;
+       error_log_throttle_period = 3600;
+
        host_prefix = getenv("NETDATA_HOST_PREFIX");
        if(host_prefix == NULL) {
                info("NETDATA_HOST_PREFIX is not passed from netdata");
@@ -2438,13 +2275,20 @@ int main(int argc, char **argv)
        }
        else info("Found NETDATA_CONFIG_DIR='%s'", config_dir);
 
-       info("starting...");
+#ifdef NETDATA_INTERNAL_CHECKS
+       if(debug_flags != 0) {
+               struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
+               if(setrlimit(RLIMIT_CORE, &rl) != 0)
+                       info("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
+               prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
+       }
+#endif /* NETDATA_INTERNAL_CHECKS */
 
        procfile_adaptive_initial_allocation = 1;
 
        time_t started_t = time(NULL);
        time_t current_t;
-       Hertz = get_system_hertz();
+       get_HZ();
        pid_max = get_system_pid_max();
        processors = get_system_cpus();
 
@@ -2457,16 +2301,14 @@ int main(int argc, char **argv)
                exit(1);
        }
 
-       fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %d\n", update_every);
-       fprintf(stdout, "DIMENSION user '' incremental 1 %d\n", 1000);
-       fprintf(stdout, "DIMENSION system '' incremental 1 %d\n", 1000);
-
-       fprintf(stdout, "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %d\n", update_every);
-       fprintf(stdout, "DIMENSION files '' incremental 1 1\n");
-       fprintf(stdout, "DIMENSION pids '' absolute 1 1\n");
-       fprintf(stdout, "DIMENSION fds '' absolute 1 1\n");
-       fprintf(stdout, "DIMENSION targets '' absolute 1 1\n");
-
+       fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n"
+                       "DIMENSION user '' incremental 1 1000\n"
+                       "DIMENSION system '' incremental 1 1000\n"
+                       "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n"
+                       "DIMENSION files '' incremental 1 1\n"
+                       "DIMENSION pids '' absolute 1 1\n"
+                       "DIMENSION fds '' absolute 1 1\n"
+                       "DIMENSION targets '' absolute 1 1\n", update_every);
 
 #ifndef PROFILING_MODE
        unsigned long long sunext = (time(NULL) - (time(NULL) % update_every) + update_every) * 1000000ULL;
@@ -2477,11 +2319,11 @@ int main(int argc, char **argv)
        for(;1; counter++) {
 #ifndef PROFILING_MODE
                // delay until it is our time to run
-               while((sunow = sutime()) < sunext)
-                       usleep((useconds_t)(sunext - sunow));
+               while((sunow = timems()) < sunext)
+                       usecsleep(sunext - sunow);
 
                // find the next time we need to run
-               while(sutime() > sunext)
+               while(timems() > sunext)
                        sunext += update_every * 1000000ULL;
 #endif /* PROFILING_MODE */
 
@@ -2504,8 +2346,8 @@ int main(int argc, char **argv)
                send_collected_data_to_netdata(users_root_target, "users", dt);
                send_collected_data_to_netdata(groups_root_target, "groups", dt);
 
-               if(debug) fprintf(stderr, "apps.plugin: done Loop No %llu\n", counter);
-               fflush(NULL);
+               if(unlikely(debug))
+                       fprintf(stderr, "apps.plugin: done Loop No %llu\n", counter);
 
                current_t = time(NULL);