]> arthur.barton.de Git - netdata.git/blob - src/apps_plugin.c
yet another rewrite of apps.plugin to fix cpu spikes on slow systems; fixes #644
[netdata.git] / src / apps_plugin.c
1 #ifdef HAVE_CONFIG_H
2 #include <config.h>
3 #endif
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <time.h>
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/time.h>
11 #include <sys/wait.h>
12 #include <sys/stat.h>
13
14 #include <sys/resource.h>
15 #include <sys/stat.h>
16
17 #include <errno.h>
18 #include <stdarg.h>
19 #include <locale.h>
20 #include <ctype.h>
21 #include <fcntl.h>
22
23 #include <malloc.h>
24 #include <dirent.h>
25 #include <arpa/inet.h>
26
27 #include <sys/types.h>
28 #include <pwd.h>
29 #include <grp.h>
30
31 #include "avl.h"
32
33 #include "common.h"
34 #include "log.h"
35 #include "procfile.h"
36 #include "../config.h"
37
38 #ifdef NETDATA_INTERNAL_CHECKS
39 #include <sys/prctl.h>
40 #endif
41
42 #define MAX_COMPARE_NAME 100
43 #define MAX_NAME 100
44 #define MAX_CMDLINE 1024
45
46 int processors = 1;
47 pid_t pid_max = 32768;
48 int debug = 0;
49
50 int update_every = 1;
51 unsigned long long global_iterations_counter = 1;
52 unsigned long long file_counter = 0;
53 int proc_pid_cmdline_is_needed = 0;
54 int include_exited_childs = 1;
55 char *host_prefix = "";
56 char *config_dir = CONFIG_DIR;
57
58 pid_t *all_pids_sortlist = NULL;
59
60 // ----------------------------------------------------------------------------
61
62 void netdata_cleanup_and_exit(int ret) {
63         exit(ret);
64 }
65
66
67 // ----------------------------------------------------------------------------
68 // system functions
69 // to retrieve settings of the system
70
71 long get_system_cpus(void) {
72         procfile *ff = NULL;
73
74         int processors = 0;
75
76         char filename[FILENAME_MAX + 1];
77         snprintfz(filename, FILENAME_MAX, "%s/proc/stat", host_prefix);
78
79         ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT);
80         if(!ff) return 1;
81
82         ff = procfile_readall(ff);
83         if(!ff) {
84                 procfile_close(ff);
85                 return 1;
86         }
87
88         unsigned int i;
89         for(i = 0; i < procfile_lines(ff); i++) {
90                 if(!procfile_linewords(ff, i)) continue;
91
92                 if(strncmp(procfile_lineword(ff, i, 0), "cpu", 3) == 0) processors++;
93         }
94         processors--;
95         if(processors < 1) processors = 1;
96
97         procfile_close(ff);
98         return processors;
99 }
100
101 pid_t get_system_pid_max(void) {
102         procfile *ff = NULL;
103         pid_t mpid = 32768;
104
105         char filename[FILENAME_MAX + 1];
106         snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", host_prefix);
107         ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT);
108         if(!ff) return mpid;
109
110         ff = procfile_readall(ff);
111         if(!ff) {
112                 procfile_close(ff);
113                 return mpid;
114         }
115
116         mpid = (pid_t)atoi(procfile_lineword(ff, 0, 0));
117         if(!mpid) mpid = 32768;
118
119         procfile_close(ff);
120         return mpid;
121 }
122
123 // ----------------------------------------------------------------------------
124 // target
125 // target is the structure that process data are aggregated
126
127 struct target {
128         char compare[MAX_COMPARE_NAME + 1];
129         uint32_t comparehash;
130         size_t comparelen;
131
132         char id[MAX_NAME + 1];
133         uint32_t idhash;
134
135         char name[MAX_NAME + 1];
136
137         uid_t uid;
138         gid_t gid;
139
140         unsigned long long minflt;
141         unsigned long long cminflt;
142         unsigned long long majflt;
143         unsigned long long cmajflt;
144         unsigned long long utime;
145         unsigned long long stime;
146         unsigned long long cutime;
147         unsigned long long cstime;
148         unsigned long long num_threads;
149         unsigned long long rss;
150
151         unsigned long long statm_size;
152         unsigned long long statm_resident;
153         unsigned long long statm_share;
154         unsigned long long statm_text;
155         unsigned long long statm_lib;
156         unsigned long long statm_data;
157         unsigned long long statm_dirty;
158
159         unsigned long long io_logical_bytes_read;
160         unsigned long long io_logical_bytes_written;
161         unsigned long long io_read_calls;
162         unsigned long long io_write_calls;
163         unsigned long long io_storage_bytes_read;
164         unsigned long long io_storage_bytes_written;
165         unsigned long long io_cancelled_write_bytes;
166
167         int *fds;
168         unsigned long long openfiles;
169         unsigned long long openpipes;
170         unsigned long long opensockets;
171         unsigned long long openinotifies;
172         unsigned long long openeventfds;
173         unsigned long long opentimerfds;
174         unsigned long long opensignalfds;
175         unsigned long long openeventpolls;
176         unsigned long long openother;
177
178         unsigned long processes;        // how many processes have been merged to this
179         int exposed;                            // if set, we have sent this to netdata
180         int hidden;                                     // if set, we set the hidden flag on the dimension
181         int debug;
182         int ends_with;
183         int starts_with;            // if set, the compare string matches only the
184                                                                 // beginning of the command
185
186         struct target *target;          // the one that will be reported to netdata
187         struct target *next;
188 };
189
190
191 // ----------------------------------------------------------------------------
192 // apps_groups.conf
193 // aggregate all processes in groups, to have a limited number of dimensions
194
195 struct target *apps_groups_root_target = NULL;
196 struct target *apps_groups_default_target = NULL;
197 long apps_groups_targets = 0;
198
199 struct target *users_root_target = NULL;
200 struct target *groups_root_target = NULL;
201
202 struct target *get_users_target(uid_t uid)
203 {
204         struct target *w;
205         for(w = users_root_target ; w ; w = w->next)
206                 if(w->uid == uid) return w;
207
208         w = calloc(sizeof(struct target), 1);
209         if(unlikely(!w)) {
210                 error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target));
211                 return NULL;
212         }
213
214         snprintfz(w->compare, MAX_COMPARE_NAME, "%u", uid);
215         w->comparehash = simple_hash(w->compare);
216         w->comparelen = strlen(w->compare);
217
218         snprintfz(w->id, MAX_NAME, "%u", uid);
219         w->idhash = simple_hash(w->id);
220
221         struct passwd *pw = getpwuid(uid);
222         if(!pw)
223                 snprintfz(w->name, MAX_NAME, "%u", uid);
224         else
225                 snprintfz(w->name, MAX_NAME, "%s", pw->pw_name);
226
227         netdata_fix_chart_name(w->name);
228
229         w->uid = uid;
230
231         w->next = users_root_target;
232         users_root_target = w;
233
234         if(unlikely(debug))
235                 fprintf(stderr, "apps.plugin: added uid %u ('%s') target\n", w->uid, w->name);
236
237         return w;
238 }
239
240 struct target *get_groups_target(gid_t gid)
241 {
242         struct target *w;
243         for(w = groups_root_target ; w ; w = w->next)
244                 if(w->gid == gid) return w;
245
246         w = calloc(sizeof(struct target), 1);
247         if(unlikely(!w)) {
248                 error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target));
249                 return NULL;
250         }
251
252         snprintfz(w->compare, MAX_COMPARE_NAME, "%u", gid);
253         w->comparehash = simple_hash(w->compare);
254         w->comparelen = strlen(w->compare);
255
256         snprintfz(w->id, MAX_NAME, "%u", gid);
257         w->idhash = simple_hash(w->id);
258
259         struct group *gr = getgrgid(gid);
260         if(!gr)
261                 snprintfz(w->name, MAX_NAME, "%u", gid);
262         else
263                 snprintfz(w->name, MAX_NAME, "%s", gr->gr_name);
264
265         netdata_fix_chart_name(w->name);
266
267         w->gid = gid;
268
269         w->next = groups_root_target;
270         groups_root_target = w;
271
272         if(unlikely(debug))
273                 fprintf(stderr, "apps.plugin: added gid %u ('%s') target\n", w->gid, w->name);
274
275         return w;
276 }
277
278 // find or create a new target
279 // there are targets that are just aggregated to other target (the second argument)
280 struct target *get_apps_groups_target(const char *id, struct target *target)
281 {
282         int tdebug = 0, thidden = 0, ends_with = 0;
283         const char *nid = id;
284
285         while(nid[0] == '-' || nid[0] == '+' || nid[0] == '*') {
286                 if(nid[0] == '-') thidden = 1;
287                 if(nid[0] == '+') tdebug = 1;
288                 if(nid[0] == '*') ends_with = 1;
289                 nid++;
290         }
291         uint32_t hash = simple_hash(id);
292
293         struct target *w, *last = apps_groups_root_target;
294         for(w = apps_groups_root_target ; w ; w = w->next) {
295                 if(w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0)
296                         return w;
297
298                 last = w;
299         }
300
301         w = calloc(sizeof(struct target), 1);
302         if(unlikely(!w)) {
303                 error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target));
304                 return NULL;
305         }
306
307         strncpyz(w->id, nid, MAX_NAME);
308         w->idhash = simple_hash(w->id);
309
310         strncpyz(w->name, nid, MAX_NAME);
311
312         strncpyz(w->compare, nid, MAX_COMPARE_NAME);
313         int len = strlen(w->compare);
314         if(w->compare[len - 1] == '*') {
315                 w->compare[len - 1] = '\0';
316                 w->starts_with = 1;
317         }
318         w->ends_with = ends_with;
319
320         if(w->starts_with && w->ends_with)
321                 proc_pid_cmdline_is_needed = 1;
322
323         w->comparehash = simple_hash(w->compare);
324         w->comparelen = strlen(w->compare);
325
326         w->hidden = thidden;
327         w->debug = tdebug;
328         w->target = target;
329
330         // append it, to maintain the order in apps_groups.conf
331         if(last) last->next = w;
332         else apps_groups_root_target = w;
333
334         if(unlikely(debug))
335                 fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n"
336                         , w->id
337                                 , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact"))
338                                 , w->target?w->target->id:w->id
339                                 , (w->hidden)?"hidden":"-"
340                                 , (w->debug)?"debug":"-"
341                 );
342
343         return w;
344 }
345
346 // read the apps_groups.conf file
347 int read_apps_groups_conf(const char *name)
348 {
349         char filename[FILENAME_MAX + 1];
350
351         snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, name);
352
353         if(unlikely(debug))
354                 fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename);
355
356         // ----------------------------------------
357
358         procfile *ff = procfile_open(filename, " :\t", PROCFILE_FLAG_DEFAULT);
359         if(!ff) return 1;
360
361         procfile_set_quotes(ff, "'\"");
362
363         ff = procfile_readall(ff);
364         if(!ff) {
365                 procfile_close(ff);
366                 return 1;
367         }
368
369         unsigned long line, lines = procfile_lines(ff);
370
371         for(line = 0; line < lines ;line++) {
372                 unsigned long word, words = procfile_linewords(ff, line);
373                 struct target *w = NULL;
374
375                 char *t = procfile_lineword(ff, line, 0);
376                 if(!t || !*t) continue;
377
378                 for(word = 0; word < words ;word++) {
379                         char *s = procfile_lineword(ff, line, word);
380                         if(!s || !*s) continue;
381                         if(*s == '#') break;
382
383                         if(t == s) continue;
384
385                         struct target *n = get_apps_groups_target(s, w);
386                         if(!n) {
387                                 error("Cannot create target '%s' (line %lu, word %lu)", s, line, word);
388                                 continue;
389                         }
390
391                         if(!w) w = n;
392                 }
393
394                 if(w) {
395                         int tdebug = 0, thidden = 0;
396
397                         while(t[0] == '-' || t[0] == '+') {
398                                 if(t[0] == '-') thidden = 1;
399                                 if(t[0] == '+') tdebug = 1;
400                                 t++;
401                         }
402
403                         strncpyz(w->name, t, MAX_NAME);
404                         w->hidden = thidden;
405                         w->debug = tdebug;
406
407                         if(unlikely(debug))
408                                 fprintf(stderr, "apps.plugin: AGGREGATION TARGET NAME '%s' on ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n"
409                                                 , w->name
410                                                 , w->id
411                                                 , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact"))
412                                                 , w->target?w->target->id:w->id
413                                                 , (w->hidden)?"hidden":"-"
414                                                 , (w->debug)?"debug":"-"
415                                 );
416                 }
417         }
418
419         procfile_close(ff);
420
421         apps_groups_default_target = get_apps_groups_target("p+!o@w#e$i^r&7*5(-i)l-o_", NULL); // match nothing
422         if(!apps_groups_default_target)
423                 error("Cannot create default target");
424         else
425                 strncpyz(apps_groups_default_target->name, "other", MAX_NAME);
426
427         return 0;
428 }
429
430
431 // ----------------------------------------------------------------------------
432 // data to store for each pid
433 // see: man proc
434
435 struct pid_stat {
436         int32_t pid;
437         char comm[MAX_COMPARE_NAME + 1];
438         char cmdline[MAX_CMDLINE + 1];
439
440         // char state;
441         int32_t ppid;
442         // int32_t pgrp;
443         // int32_t session;
444         // int32_t tty_nr;
445         // int32_t tpgid;
446         // uint64_t flags;
447
448         // these are raw values collected
449         unsigned long long minflt_raw;
450         unsigned long long cminflt_raw;
451         unsigned long long majflt_raw;
452         unsigned long long cmajflt_raw;
453         unsigned long long utime_raw;
454         unsigned long long stime_raw;
455         unsigned long long cutime_raw;
456         unsigned long long cstime_raw;
457
458         // these are rates
459         unsigned long long minflt;
460         unsigned long long cminflt;
461         unsigned long long majflt;
462         unsigned long long cmajflt;
463         unsigned long long utime;
464         unsigned long long stime;
465         unsigned long long cutime;
466         unsigned long long cstime;
467
468         // int64_t priority;
469         // int64_t nice;
470         int32_t num_threads;
471         // int64_t itrealvalue;
472         // unsigned long long starttime;
473         // unsigned long long vsize;
474         unsigned long long rss;
475         // unsigned long long rsslim;
476         // unsigned long long starcode;
477         // unsigned long long endcode;
478         // unsigned long long startstack;
479         // unsigned long long kstkesp;
480         // unsigned long long kstkeip;
481         // uint64_t signal;
482         // uint64_t blocked;
483         // uint64_t sigignore;
484         // uint64_t sigcatch;
485         // uint64_t wchan;
486         // uint64_t nswap;
487         // uint64_t cnswap;
488         // int32_t exit_signal;
489         // int32_t processor;
490         // uint32_t rt_priority;
491         // uint32_t policy;
492         // unsigned long long delayacct_blkio_ticks;
493         // uint64_t guest_time;
494         // int64_t cguest_time;
495
496         uid_t uid;
497         gid_t gid;
498
499         unsigned long long statm_size;
500         unsigned long long statm_resident;
501         unsigned long long statm_share;
502         unsigned long long statm_text;
503         unsigned long long statm_lib;
504         unsigned long long statm_data;
505         unsigned long long statm_dirty;
506
507         unsigned long long io_logical_bytes_read_raw;
508         unsigned long long io_logical_bytes_written_raw;
509         unsigned long long io_read_calls_raw;
510         unsigned long long io_write_calls_raw;
511         unsigned long long io_storage_bytes_read_raw;
512         unsigned long long io_storage_bytes_written_raw;
513         unsigned long long io_cancelled_write_bytes_raw;
514
515         unsigned long long io_logical_bytes_read;
516         unsigned long long io_logical_bytes_written;
517         unsigned long long io_read_calls;
518         unsigned long long io_write_calls;
519         unsigned long long io_storage_bytes_read;
520         unsigned long long io_storage_bytes_written;
521         unsigned long long io_cancelled_write_bytes;
522
523         int *fds;                                               // array of fds it uses
524         int fds_size;                                   // the size of the fds array
525
526         int children_count;                             // number of processes directly referencing this
527         int keep;                                               // 1 when we need to keep this process in memory even after it exited
528         int keeploops;                                  // increases by 1 every time keep is 1 and updated 0
529         int updated;                                    // 1 when the process is currently running
530         int merged;                                             // 1 when it has been merged to its parent
531         int new_entry;                                  // 1 when this is a new process, just saw for the first time
532         int read;                                               // 1 when we have already read this process for this iteration
533         int sortlist;                                   // higher numbers = top on the process tree
534                                                                         // each process gets a unique number
535
536         struct target *target;                  // app_groups.conf targets
537         struct target *user_target;             // uid based targets
538         struct target *group_target;    // gid based targets
539
540         unsigned long long stat_collected_usec;
541         unsigned long long last_stat_collected_usec;
542
543         unsigned long long io_collected_usec;
544         unsigned long long last_io_collected_usec;
545
546         char *stat_filename;
547         char *statm_filename;
548         char *io_filename;
549         char *cmdline_filename;
550
551         struct pid_stat *parent;
552         struct pid_stat *prev;
553         struct pid_stat *next;
554 } *root_of_pids = NULL, **all_pids;
555
556 long all_pids_count = 0;
557
558 struct pid_stat *get_pid_entry(pid_t pid) {
559         if(all_pids[pid]) {
560                 all_pids[pid]->new_entry = 0;
561                 return all_pids[pid];
562         }
563
564         all_pids[pid] = calloc(sizeof(struct pid_stat), 1);
565         if(!all_pids[pid]) {
566                 error("Cannot allocate %zu bytes of memory", (size_t)sizeof(struct pid_stat));
567                 return NULL;
568         }
569
570         all_pids[pid]->fds = calloc(sizeof(int), 100);
571         if(!all_pids[pid]->fds)
572                 error("Cannot allocate %zu bytes of memory", (size_t)(sizeof(int) * 100));
573         else all_pids[pid]->fds_size = 100;
574
575         if(root_of_pids) root_of_pids->prev = all_pids[pid];
576         all_pids[pid]->next = root_of_pids;
577         root_of_pids = all_pids[pid];
578
579         all_pids[pid]->pid = pid;
580         all_pids[pid]->new_entry = 1;
581
582         all_pids_count++;
583
584         return all_pids[pid];
585 }
586
587 void del_pid_entry(pid_t pid) {
588         if(!all_pids[pid]) {
589                 error("attempted to free pid %d that is not allocated.", pid);
590                 return;
591         }
592
593         if(unlikely(debug))
594                 fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm);
595
596         if(root_of_pids == all_pids[pid]) root_of_pids = all_pids[pid]->next;
597         if(all_pids[pid]->next) all_pids[pid]->next->prev = all_pids[pid]->prev;
598         if(all_pids[pid]->prev) all_pids[pid]->prev->next = all_pids[pid]->next;
599
600         if(all_pids[pid]->fds) free(all_pids[pid]->fds);
601         if(all_pids[pid]->stat_filename) free(all_pids[pid]->stat_filename);
602         if(all_pids[pid]->statm_filename) free(all_pids[pid]->statm_filename);
603         if(all_pids[pid]->io_filename) free(all_pids[pid]->io_filename);
604         if(all_pids[pid]->cmdline_filename) free(all_pids[pid]->cmdline_filename);
605         free(all_pids[pid]);
606
607         all_pids[pid] = NULL;
608         all_pids_count--;
609 }
610
611
612 // ----------------------------------------------------------------------------
613 // update pids from proc
614
615 int read_proc_pid_cmdline(struct pid_stat *p) {
616         
617         if(unlikely(!p->cmdline_filename)) {
618                 char filename[FILENAME_MAX + 1];
619                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", host_prefix, p->pid);
620                 if(!(p->cmdline_filename = strdup(filename)))
621                         fatal("Cannot allocate memory for filename '%s'", filename);
622         }
623
624         int fd = open(p->cmdline_filename, O_RDONLY, 0666);
625         if(unlikely(fd == -1)) goto cleanup;
626
627         int i, bytes = read(fd, p->cmdline, MAX_CMDLINE);
628         close(fd);
629
630         if(unlikely(bytes <= 0)) goto cleanup;
631
632         p->cmdline[bytes] = '\0';
633         for(i = 0; i < bytes ; i++)
634                 if(unlikely(!p->cmdline[i])) p->cmdline[i] = ' ';
635
636         if(unlikely(debug))
637                 fprintf(stderr, "Read file '%s' contents: %s\n", p->cmdline_filename, p->cmdline);
638
639         return 0;
640
641 cleanup:
642         // copy the command to the command line
643         strncpyz(p->cmdline, p->comm, MAX_CMDLINE);
644         return 0;
645 }
646
647 int read_proc_pid_ownership(struct pid_stat *p) {
648         if(unlikely(!p->stat_filename)) {
649                 error("pid %d does not have a stat_filename", p->pid);
650                 return 1;
651         }
652
653         // ----------------------------------------
654         // read uid and gid
655
656         struct stat st;
657         if(stat(p->stat_filename, &st) != 0) {
658                 error("Cannot stat file '%s'", p->stat_filename);
659                 return 1;
660         }
661
662         p->uid = st.st_uid;
663         p->gid = st.st_gid;
664
665         return 0;
666 }
667
668 int read_proc_pid_stat(struct pid_stat *p) {
669         static procfile *ff = NULL;
670
671         if(unlikely(!p->stat_filename)) {
672                 char filename[FILENAME_MAX + 1];
673                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", host_prefix, p->pid);
674                 if(!(p->stat_filename = strdup(filename)))
675                         fatal("Cannot allocate memory for filename '%s'", filename);
676         }
677
678         int set_quotes = (!ff)?1:0;
679
680         ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
681         if(unlikely(!ff)) goto cleanup;
682
683         // if(set_quotes) procfile_set_quotes(ff, "()");
684         if(set_quotes) procfile_set_open_close(ff, "(", ")");
685
686         ff = procfile_readall(ff);
687         if(unlikely(!ff)) goto cleanup;
688
689         p->last_stat_collected_usec = p->stat_collected_usec;
690         p->stat_collected_usec = timems();
691         file_counter++;
692
693         // parse the process name
694         unsigned int i = 0;
695         strncpyz(p->comm, procfile_lineword(ff, 0, 1), MAX_COMPARE_NAME);
696
697         // p->pid                       = atol(procfile_lineword(ff, 0, 0+i));
698         // comm is at 1
699         // p->state                     = *(procfile_lineword(ff, 0, 2+i));
700         p->ppid                         = (int32_t) atol(procfile_lineword(ff, 0, 3 + i));
701         // p->pgrp                      = atol(procfile_lineword(ff, 0, 4+i));
702         // p->session           = atol(procfile_lineword(ff, 0, 5+i));
703         // p->tty_nr            = atol(procfile_lineword(ff, 0, 6+i));
704         // p->tpgid                     = atol(procfile_lineword(ff, 0, 7+i));
705         // p->flags                     = strtoull(procfile_lineword(ff, 0, 8+i), NULL, 10);
706
707         unsigned long long last;
708
709         last = p->minflt_raw;
710         p->minflt_raw           = strtoull(procfile_lineword(ff, 0, 9+i), NULL, 10);
711         p->minflt = (p->minflt_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
712
713         last = p->cminflt_raw;
714         p->cminflt_raw          = strtoull(procfile_lineword(ff, 0, 10+i), NULL, 10);
715         p->cminflt = (p->cminflt_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
716
717         last = p->majflt_raw;
718         p->majflt_raw           = strtoull(procfile_lineword(ff, 0, 11+i), NULL, 10);
719         p->majflt = (p->majflt_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
720
721         last = p->cmajflt_raw;
722         p->cmajflt_raw          = strtoull(procfile_lineword(ff, 0, 12+i), NULL, 10);
723         p->cmajflt = (p->cmajflt_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
724
725         last = p->utime_raw;
726         p->utime_raw            = strtoull(procfile_lineword(ff, 0, 13+i), NULL, 10);
727         p->utime = (p->utime_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
728
729         last = p->stime_raw;
730         p->stime_raw            = strtoull(procfile_lineword(ff, 0, 14+i), NULL, 10);
731         p->stime = (p->stime_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
732
733         last = p->cutime_raw;
734         p->cutime_raw           = strtoull(procfile_lineword(ff, 0, 15+i), NULL, 10);
735         p->cutime = (p->cutime_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
736
737         last = p->cstime_raw;
738         p->cstime_raw           = strtoull(procfile_lineword(ff, 0, 16+i), NULL, 10);
739         p->cstime = (p->cstime_raw - last) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
740
741         // p->priority          = strtoull(procfile_lineword(ff, 0, 17+i), NULL, 10);
742         // p->nice                      = strtoull(procfile_lineword(ff, 0, 18+i), NULL, 10);
743         p->num_threads          = (int32_t) atol(procfile_lineword(ff, 0, 19 + i));
744         // p->itrealvalue       = strtoull(procfile_lineword(ff, 0, 20+i), NULL, 10);
745         // p->starttime         = strtoull(procfile_lineword(ff, 0, 21+i), NULL, 10);
746         // p->vsize                     = strtoull(procfile_lineword(ff, 0, 22+i), NULL, 10);
747         p->rss                          = strtoull(procfile_lineword(ff, 0, 23+i), NULL, 10);
748         // p->rsslim            = strtoull(procfile_lineword(ff, 0, 24+i), NULL, 10);
749         // p->starcode          = strtoull(procfile_lineword(ff, 0, 25+i), NULL, 10);
750         // p->endcode           = strtoull(procfile_lineword(ff, 0, 26+i), NULL, 10);
751         // p->startstack        = strtoull(procfile_lineword(ff, 0, 27+i), NULL, 10);
752         // p->kstkesp           = strtoull(procfile_lineword(ff, 0, 28+i), NULL, 10);
753         // p->kstkeip           = strtoull(procfile_lineword(ff, 0, 29+i), NULL, 10);
754         // p->signal            = strtoull(procfile_lineword(ff, 0, 30+i), NULL, 10);
755         // p->blocked           = strtoull(procfile_lineword(ff, 0, 31+i), NULL, 10);
756         // p->sigignore         = strtoull(procfile_lineword(ff, 0, 32+i), NULL, 10);
757         // p->sigcatch          = strtoull(procfile_lineword(ff, 0, 33+i), NULL, 10);
758         // p->wchan                     = strtoull(procfile_lineword(ff, 0, 34+i), NULL, 10);
759         // p->nswap                     = strtoull(procfile_lineword(ff, 0, 35+i), NULL, 10);
760         // p->cnswap            = strtoull(procfile_lineword(ff, 0, 36+i), NULL, 10);
761         // p->exit_signal       = atol(procfile_lineword(ff, 0, 37+i));
762         // p->processor         = atol(procfile_lineword(ff, 0, 38+i));
763         // p->rt_priority       = strtoul(procfile_lineword(ff, 0, 39+i), NULL, 10);
764         // p->policy            = strtoul(procfile_lineword(ff, 0, 40+i), NULL, 10);
765         // p->delayacct_blkio_ticks             = strtoull(procfile_lineword(ff, 0, 41+i), NULL, 10);
766         // p->guest_time        = strtoull(procfile_lineword(ff, 0, 42+i), NULL, 10);
767         // p->cguest_time       = strtoull(procfile_lineword(ff, 0, 43), NULL, 10);
768
769         if(unlikely(debug || (p->target && p->target->debug)))
770                 fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads);
771
772         if(unlikely(global_iterations_counter == 1)) {
773                 p->minflt                       = 0;
774                 p->cminflt                      = 0;
775                 p->majflt                       = 0;
776                 p->cmajflt                      = 0;
777                 p->utime                        = 0;
778                 p->stime                        = 0;
779                 p->cutime                       = 0;
780                 p->cstime                       = 0;
781         }
782
783         return 0;
784
785 cleanup:
786         p->minflt                       = 0;
787         p->cminflt                      = 0;
788         p->majflt                       = 0;
789         p->cmajflt                      = 0;
790         p->utime                        = 0;
791         p->stime                        = 0;
792         p->cutime                       = 0;
793         p->cstime                       = 0;
794         p->num_threads          = 0;
795         p->rss                          = 0;
796         return 1;
797 }
798
799 int read_proc_pid_statm(struct pid_stat *p) {
800         static procfile *ff = NULL;
801
802         if(unlikely(!p->statm_filename)) {
803                 char filename[FILENAME_MAX + 1];
804                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", host_prefix, p->pid);
805                 if(!(p->statm_filename = strdup(filename)))
806                         fatal("Cannot allocate memory for filename '%s'", filename);
807         }
808
809         ff = procfile_reopen(ff, p->statm_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
810         if(unlikely(!ff)) goto cleanup;
811
812         ff = procfile_readall(ff);
813         if(unlikely(!ff)) goto cleanup;
814
815         file_counter++;
816
817         p->statm_size                   = strtoull(procfile_lineword(ff, 0, 0), NULL, 10);
818         p->statm_resident               = strtoull(procfile_lineword(ff, 0, 1), NULL, 10);
819         p->statm_share                  = strtoull(procfile_lineword(ff, 0, 2), NULL, 10);
820         p->statm_text                   = strtoull(procfile_lineword(ff, 0, 3), NULL, 10);
821         p->statm_lib                    = strtoull(procfile_lineword(ff, 0, 4), NULL, 10);
822         p->statm_data                   = strtoull(procfile_lineword(ff, 0, 5), NULL, 10);
823         p->statm_dirty                  = strtoull(procfile_lineword(ff, 0, 6), NULL, 10);
824
825         return 0;
826
827 cleanup:
828         p->statm_size                   = 0;
829         p->statm_resident               = 0;
830         p->statm_share                  = 0;
831         p->statm_text                   = 0;
832         p->statm_lib                    = 0;
833         p->statm_data                   = 0;
834         p->statm_dirty                  = 0;
835         return 1;
836 }
837
838 int read_proc_pid_io(struct pid_stat *p) {
839         static procfile *ff = NULL;
840
841         if(unlikely(!p->io_filename)) {
842                 char filename[FILENAME_MAX + 1];
843                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", host_prefix, p->pid);
844                 if(!(p->io_filename = strdup(filename)))
845                         fatal("Cannot allocate memory for filename '%s'", filename);
846         }
847
848         // open the file
849         ff = procfile_reopen(ff, p->io_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
850         if(unlikely(!ff)) goto cleanup;
851
852         ff = procfile_readall(ff);
853         if(unlikely(!ff)) goto cleanup;
854
855         file_counter++;
856
857         p->last_io_collected_usec = p->io_collected_usec;
858         p->io_collected_usec = timems();
859
860         unsigned long long last;
861
862         last = p->io_logical_bytes_read_raw;
863         p->io_logical_bytes_read_raw = strtoull(procfile_lineword(ff, 0, 1), NULL, 10);
864         p->io_logical_bytes_read = (p->io_logical_bytes_read_raw - last) * (update_every * 1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
865
866         last = p->io_logical_bytes_written_raw;
867         p->io_logical_bytes_written_raw = strtoull(procfile_lineword(ff, 1, 1), NULL, 10);
868         p->io_logical_bytes_written = (p->io_logical_bytes_written_raw - last) * (update_every * 1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
869
870         last = p->io_read_calls_raw;
871         p->io_read_calls_raw = strtoull(procfile_lineword(ff, 2, 1), NULL, 10);
872         p->io_read_calls = (p->io_read_calls_raw - last) * (update_every * 1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
873
874         last = p->io_write_calls_raw;
875         p->io_write_calls_raw = strtoull(procfile_lineword(ff, 3, 1), NULL, 10);
876         p->io_write_calls = (p->io_write_calls_raw - last) * (update_every * 1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
877
878         last = p->io_storage_bytes_read_raw;
879         p->io_storage_bytes_read_raw = strtoull(procfile_lineword(ff, 4, 1), NULL, 10);
880         p->io_storage_bytes_read = (p->io_storage_bytes_read_raw - last) * (update_every * 1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
881
882         last = p->io_storage_bytes_written_raw;
883         p->io_storage_bytes_written_raw = strtoull(procfile_lineword(ff, 5, 1), NULL, 10);
884         p->io_storage_bytes_written = (p->io_storage_bytes_written_raw - last) * (update_every * 1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
885
886         last = p->io_cancelled_write_bytes_raw;
887         p->io_cancelled_write_bytes_raw = strtoull(procfile_lineword(ff, 6, 1), NULL, 10);
888         p->io_cancelled_write_bytes = (p->io_cancelled_write_bytes_raw - last) * (update_every * 1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
889
890         if(unlikely(global_iterations_counter == 1)) {
891                 p->io_logical_bytes_read                = 0;
892                 p->io_logical_bytes_written     = 0;
893                 p->io_read_calls                                = 0;
894                 p->io_write_calls                               = 0;
895                 p->io_storage_bytes_read                = 0;
896                 p->io_storage_bytes_written     = 0;
897                 p->io_cancelled_write_bytes             = 0;
898         }
899
900         return 0;
901
902 cleanup:
903         p->io_logical_bytes_read                = 0;
904         p->io_logical_bytes_written     = 0;
905         p->io_read_calls                                = 0;
906         p->io_write_calls                               = 0;
907         p->io_storage_bytes_read                = 0;
908         p->io_storage_bytes_written     = 0;
909         p->io_cancelled_write_bytes             = 0;
910         return 1;
911 }
912
913
914 // ----------------------------------------------------------------------------
915 // file descriptor
916 // this is used to keep a global list of all open files of the system
917 // it is needed in order to calculate the unique files processes have open
918
919 #define FILE_DESCRIPTORS_INCREASE_STEP 100
920
921 struct file_descriptor {
922         avl avl;
923 #ifdef NETDATA_INTERNAL_CHECKS
924         uint32_t magic;
925 #endif /* NETDATA_INTERNAL_CHECKS */
926         uint32_t hash;
927         const char *name;
928         int type;
929         int count;
930         int pos;
931 } *all_files = NULL;
932
933 int all_files_len = 0;
934 int all_files_size = 0;
935
936 int file_descriptor_compare(void* a, void* b) {
937 #ifdef NETDATA_INTERNAL_CHECKS
938         if(((struct file_descriptor *)a)->magic != 0x0BADCAFE || ((struct file_descriptor *)b)->magic != 0x0BADCAFE)
939                 error("Corrupted index data detected. Please report this.");
940 #endif /* NETDATA_INTERNAL_CHECKS */
941
942         if(((struct file_descriptor *)a)->hash < ((struct file_descriptor *)b)->hash)
943                 return -1;
944
945         else if(((struct file_descriptor *)a)->hash > ((struct file_descriptor *)b)->hash)
946                 return 1;
947
948         else
949                 return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name);
950 }
951
952 int file_descriptor_iterator(avl *a) { if(a) {}; return 0; }
953
954 avl_tree all_files_index = {
955                 NULL,
956                 file_descriptor_compare
957 };
958
959 static struct file_descriptor *file_descriptor_find(const char *name, uint32_t hash) {
960         struct file_descriptor tmp;
961         tmp.hash = (hash)?hash:simple_hash(name);
962         tmp.name = name;
963         tmp.count = 0;
964         tmp.pos = 0;
965 #ifdef NETDATA_INTERNAL_CHECKS
966         tmp.magic = 0x0BADCAFE;
967 #endif /* NETDATA_INTERNAL_CHECKS */
968
969         return (struct file_descriptor *)avl_search(&all_files_index, (avl *) &tmp);
970 }
971
972 #define file_descriptor_add(fd) avl_insert(&all_files_index, (avl *)(fd))
973 #define file_descriptor_remove(fd) avl_remove(&all_files_index, (avl *)(fd))
974
975 #define FILETYPE_OTHER 0
976 #define FILETYPE_FILE 1
977 #define FILETYPE_PIPE 2
978 #define FILETYPE_SOCKET 3
979 #define FILETYPE_INOTIFY 4
980 #define FILETYPE_EVENTFD 5
981 #define FILETYPE_EVENTPOLL 6
982 #define FILETYPE_TIMERFD 7
983 #define FILETYPE_SIGNALFD 8
984
985 void file_descriptor_not_used(int id)
986 {
987         if(id > 0 && id < all_files_size) {
988
989 #ifdef NETDATA_INTERNAL_CHECKS
990                 if(all_files[id].magic != 0x0BADCAFE) {
991                         error("Ignoring request to remove empty file id %d.", id);
992                         return;
993                 }
994 #endif /* NETDATA_INTERNAL_CHECKS */
995
996                 if(unlikely(debug))
997                         fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count);
998
999                 if(all_files[id].count > 0) {
1000                         all_files[id].count--;
1001
1002                         if(!all_files[id].count) {
1003                                 if(unlikely(debug))
1004                                         fprintf(stderr, "apps.plugin:   >> slot %d is empty.\n", id);
1005
1006                                 file_descriptor_remove(&all_files[id]);
1007 #ifdef NETDATA_INTERNAL_CHECKS
1008                                 all_files[id].magic = 0x00000000;
1009 #endif /* NETDATA_INTERNAL_CHECKS */
1010                                 all_files_len--;
1011                         }
1012                 }
1013                 else
1014                         error("Request to decrease counter of fd %d (%s), while the use counter is 0", id, all_files[id].name);
1015         }
1016         else    error("Request to decrease counter of fd %d, which is outside the array size (1 to %d)", id, all_files_size);
1017 }
1018
1019 int file_descriptor_find_or_add(const char *name)
1020 {
1021         static int last_pos = 0;
1022         uint32_t hash = simple_hash(name);
1023
1024         if(unlikely(debug))
1025                 fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash);
1026
1027         struct file_descriptor *fd = file_descriptor_find(name, hash);
1028         if(fd) {
1029                 // found
1030                 if(unlikely(debug))
1031                         fprintf(stderr, "apps.plugin:   >> found on slot %d\n", fd->pos);
1032
1033                 fd->count++;
1034                 return fd->pos;
1035         }
1036         // not found
1037
1038         // check we have enough memory to add it
1039         if(!all_files || all_files_len == all_files_size) {
1040                 void *old = all_files;
1041                 int i;
1042
1043                 // there is no empty slot
1044                 if(unlikely(debug))
1045                         fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP);
1046
1047                 all_files = realloc(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor));
1048
1049                 // if the address changed, we have to rebuild the index
1050                 // since all pointers are now invalid
1051                 if(old && old != (void *)all_files) {
1052                         if(unlikely(debug))
1053                                 fprintf(stderr, "apps.plugin:   >> re-indexing.\n");
1054
1055                         all_files_index.root = NULL;
1056                         for(i = 0; i < all_files_size; i++) {
1057                                 if(!all_files[i].count) continue;
1058                                 file_descriptor_add(&all_files[i]);
1059                         }
1060
1061                         if(unlikely(debug))
1062                                 fprintf(stderr, "apps.plugin:   >> re-indexing done.\n");
1063                 }
1064
1065                 for(i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) {
1066                         all_files[i].count = 0;
1067                         all_files[i].name = NULL;
1068 #ifdef NETDATA_INTERNAL_CHECKS
1069                         all_files[i].magic = 0x00000000;
1070 #endif /* NETDATA_INTERNAL_CHECKS */
1071                         all_files[i].pos = i;
1072                 }
1073
1074                 if(!all_files_size) all_files_len = 1;
1075                 all_files_size += FILE_DESCRIPTORS_INCREASE_STEP;
1076         }
1077
1078         if(unlikely(debug))
1079                 fprintf(stderr, "apps.plugin:   >> searching for empty slot.\n");
1080
1081         // search for an empty slot
1082         int i, c;
1083         for(i = 0, c = last_pos ; i < all_files_size ; i++, c++) {
1084                 if(c >= all_files_size) c = 0;
1085                 if(c == 0) continue;
1086
1087                 if(!all_files[c].count) {
1088                         if(unlikely(debug))
1089                                 fprintf(stderr, "apps.plugin:   >> Examining slot %d.\n", c);
1090
1091 #ifdef NETDATA_INTERNAL_CHECKS
1092                         if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash))
1093                                 error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name);
1094 #endif /* NETDATA_INTERNAL_CHECKS */
1095
1096                         if(unlikely(debug))
1097                                 fprintf(stderr, "apps.plugin:   >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name);
1098
1099                         if(all_files[c].name) free((void *)all_files[c].name);
1100                         all_files[c].name = NULL;
1101                         last_pos = c;
1102                         break;
1103                 }
1104         }
1105         if(i == all_files_size) {
1106                 fatal("We should find an empty slot, but there isn't any");
1107                 exit(1);
1108         }
1109
1110         if(unlikely(debug))
1111                 fprintf(stderr, "apps.plugin:   >> updating slot %d.\n", c);
1112
1113         all_files_len++;
1114
1115         // else we have an empty slot in 'c'
1116
1117         int type;
1118         if(name[0] == '/') type = FILETYPE_FILE;
1119         else if(strncmp(name, "pipe:", 5) == 0) type = FILETYPE_PIPE;
1120         else if(strncmp(name, "socket:", 7) == 0) type = FILETYPE_SOCKET;
1121         else if(strcmp(name, "anon_inode:inotify") == 0 || strcmp(name, "inotify") == 0) type = FILETYPE_INOTIFY;
1122         else if(strcmp(name, "anon_inode:[eventfd]") == 0) type = FILETYPE_EVENTFD;
1123         else if(strcmp(name, "anon_inode:[eventpoll]") == 0) type = FILETYPE_EVENTPOLL;
1124         else if(strcmp(name, "anon_inode:[timerfd]") == 0) type = FILETYPE_TIMERFD;
1125         else if(strcmp(name, "anon_inode:[signalfd]") == 0) type = FILETYPE_SIGNALFD;
1126         else if(strncmp(name, "anon_inode:", 11) == 0) {
1127                 if(unlikely(debug))
1128                         fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name);
1129
1130                 type = FILETYPE_OTHER;
1131         }
1132         else {
1133                 if(unlikely(debug))
1134                         fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name);
1135
1136                 type = FILETYPE_OTHER;
1137         }
1138
1139         all_files[c].name = strdup(name);
1140         all_files[c].hash = hash;
1141         all_files[c].type = type;
1142         all_files[c].pos  = c;
1143         all_files[c].count = 1;
1144 #ifdef NETDATA_INTERNAL_CHECKS
1145         all_files[c].magic = 0x0BADCAFE;
1146 #endif /* NETDATA_INTERNAL_CHECKS */
1147         file_descriptor_add(&all_files[c]);
1148
1149         if(unlikely(debug))
1150                 fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name);
1151
1152         return c;
1153 }
1154
1155 int read_pid_file_descriptors(struct pid_stat *p) {
1156         char dirname[FILENAME_MAX+1];
1157
1158         snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", host_prefix, p->pid);
1159         DIR *fds = opendir(dirname);
1160         if(fds) {
1161                 int c;
1162                 struct dirent *de;
1163                 char fdname[FILENAME_MAX + 1];
1164                 char linkname[FILENAME_MAX + 1];
1165
1166                 // make the array negative
1167                 for(c = 0 ; c < p->fds_size ; c++)
1168                         p->fds[c] = -p->fds[c];
1169
1170                 while((de = readdir(fds))) {
1171                         if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1172                                 continue;
1173
1174                         // check if the fds array is small
1175                         int fdid = atoi(de->d_name);
1176                         if(fdid < 0) continue;
1177                         if(fdid >= p->fds_size) {
1178                                 // it is small, extend it
1179                                 if(unlikely(debug))
1180                                         fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100);
1181
1182                                 p->fds = realloc(p->fds, (fdid + 100) * sizeof(int));
1183                                 if(!p->fds) {
1184                                         fatal("Cannot re-allocate fds for %s", p->comm);
1185                                         break;
1186                                 }
1187
1188                                 // and initialize it
1189                                 for(c = p->fds_size ; c < (fdid + 100) ; c++) p->fds[c] = 0;
1190                                 p->fds_size = fdid + 100;
1191                         }
1192
1193                         if(p->fds[fdid] == 0) {
1194                                 // we don't know this fd, get it
1195
1196                                 sprintf(fdname, "%s/proc/%d/fd/%s", host_prefix, p->pid, de->d_name);
1197                                 ssize_t l = readlink(fdname, linkname, FILENAME_MAX);
1198                                 if(l == -1) {
1199                                         if(debug || (p->target && p->target->debug)) {
1200                                                 if(debug || (p->target && p->target->debug))
1201                                                         error("Cannot read link %s", fdname);
1202                                         }
1203                                         continue;
1204                                 }
1205                                 linkname[l] = '\0';
1206                                 file_counter++;
1207
1208                                 // if another process already has this, we will get
1209                                 // the same id
1210                                 p->fds[fdid] = file_descriptor_find_or_add(linkname);
1211                         }
1212
1213                         // else make it positive again, we need it
1214                         // of course, the actual file may have changed, but we don't care so much
1215                         // FIXME: we could compare the inode as returned by readdir direct structure
1216                         else p->fds[fdid] = -p->fds[fdid];
1217                 }
1218                 closedir(fds);
1219
1220                 // remove all the negative file descriptors
1221                 for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] < 0) {
1222                         file_descriptor_not_used(-p->fds[c]);
1223                         p->fds[c] = 0;
1224                 }
1225         }
1226         else return 1;
1227
1228         return 0;
1229 }
1230
1231 // ----------------------------------------------------------------------------
1232
1233 #ifdef NETDATA_INTERNAL_CHECKS
1234 void find_lost_child_debug(struct pid_stat *pe, struct pid_stat *ppe, unsigned long long lost, int type) {
1235         int found = 0;
1236         struct pid_stat *p = NULL, *pp = pe->parent;
1237
1238         log_date(stderr);
1239         fprintf(stderr, "Searching for candidate of lost resources of process %d (%s, %s) which is aggregated on %d (%s, %s)\n", pe->pid, pe->comm, pe->updated?"running":"exited", ppe->pid, ppe->comm, ppe->updated?"running":"exited");
1240         while(pp) {
1241                 fprintf(stderr, " >> parent %d (%s, %s)\n", pp->pid, pp->comm, pp->updated?"running":"exited");
1242                 pp = pp->parent;
1243         }
1244
1245         for(p = root_of_pids; p ; p = p->next) {
1246                 if(p == pe) continue;
1247
1248                 switch(type) {
1249                         case 1:
1250                                 if(p->cminflt > lost) {
1251                                         fprintf(stderr, " > process %d (%s) could use the lost exited child minflt %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1252                                         found++;
1253                                 }
1254                                 break;
1255                                 
1256                         case 2:
1257                                 if(p->cmajflt > lost) {
1258                                         fprintf(stderr, " > process %d (%s) could use the lost exited child majflt %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1259                                         found++;
1260                                 }
1261                                 break;
1262                                 
1263                         case 3:
1264                                 if(p->cutime > lost) {
1265                                         fprintf(stderr, " > process %d (%s) could use the lost exited child utime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1266                                         found++;
1267                                 }
1268                                 break;
1269                                 
1270                         case 4:
1271                                 if(p->cstime > lost) {
1272                                         fprintf(stderr, " > process %d (%s) could use the lost exited child stime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1273                                         found++;
1274                                 }
1275                                 break;
1276                 }
1277         }
1278
1279         if(!found) {
1280                 switch(type) {
1281                         case 1:
1282                                 fprintf(stderr, " > cannot find any process to use the lost exited child minflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1283                                 break;
1284                                 
1285                         case 2:
1286                                 fprintf(stderr, " > cannot find any process to use the lost exited child majflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1287                                 break;
1288                                 
1289                         case 3:
1290                                 fprintf(stderr, " > cannot find any process to use the lost exited child utime %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1291                                 break;
1292                                 
1293                         case 4:
1294                                 fprintf(stderr, " > cannot find any process to use the lost exited child stime %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1295                                 break;
1296                 }
1297         }
1298 }
1299 #endif /* NETDATA_INTERNAL_CHECKS */
1300
1301 void remove_exited_child_from_parent(unsigned long long *field, unsigned long long *pfield, unsigned long long *ifield, struct pid_stat *pe, struct pid_stat *ppe, int type) {
1302         if(pfield) {
1303                 if(*field > *pfield) {
1304                         *field -= *pfield;
1305                         *pfield = 0;
1306                 }
1307                 else {
1308                         *pfield -= *field;
1309                         *field = 0;
1310                 }
1311         }
1312
1313         if(*field) {
1314                 if(ifield && ifield != pfield) {
1315                         if(*field > *ifield) {
1316                                 *field -= *ifield;
1317                                 *ifield = 0;
1318                         }
1319                         else {
1320                                 *ifield -= *field;
1321                                 *field = 0;
1322                         }
1323                 }
1324         }
1325
1326         if(*field) {
1327 #ifdef NETDATA_INTERNAL_CHECKS
1328                 find_lost_child_debug(pe, ppe, *field, type);
1329 #endif
1330                 while(pe && !pe->updated) {
1331                         pe->keep = 1;
1332                         pe = pe->parent;
1333                 }
1334         }
1335 }
1336
1337 void process_exited_processes() {
1338         struct pid_stat *init = all_pids[1];
1339         struct pid_stat *p;
1340
1341         for(p = root_of_pids; p ; p = p->next) {
1342                 if(p->updated || !p->stat_collected_usec) continue;
1343
1344                 struct pid_stat *pp = p->parent;
1345
1346                 // find the first parent that is running
1347                 while(pp && !pp->updated)
1348                         pp = pp->parent;
1349                 
1350                 unsigned long long rate;
1351
1352                 rate = (p->utime_raw + p->cutime_raw) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1353                 remove_exited_child_from_parent(&rate,  (pp)?&pp->cutime:NULL,  (init)?&init->cutime:NULL, p, pp, 3);
1354                 p->cutime_raw = 0;
1355                 p->utime_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (update_every * 1000000 * 100);
1356
1357                 rate = (p->stime_raw + p->cstime_raw) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1358                 remove_exited_child_from_parent(&rate,  (pp)?&pp->cstime:NULL,  (init)?&init->cstime:NULL, p, pp, 4);
1359                 p->cstime_raw = 0;
1360                 p->stime_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (update_every * 1000000 * 100);
1361
1362                 rate = (p->minflt_raw + p->cminflt_raw) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1363                 remove_exited_child_from_parent(&rate, (pp)?&pp->cminflt:NULL, (init)?&init->cminflt:NULL, p, pp, 1);
1364                 p->cminflt_raw = 0;
1365                 p->minflt_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (update_every * 1000000 * 100);
1366
1367                 rate = (p->majflt_raw + p->cmajflt_raw) * (update_every * 1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1368                 remove_exited_child_from_parent(&rate, (pp)?&pp->cmajflt:NULL, (init)?&init->cmajflt:NULL, p, pp, 2);
1369                 p->cmajflt_raw = 0;
1370                 p->majflt_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (update_every * 1000000 * 100);
1371         }
1372 }
1373
1374 void link_all_processes_to_their_parents(void) {
1375         struct pid_stat *p = NULL;
1376
1377         // link all children to their parents
1378         // and update children count on parents
1379         for(p = root_of_pids; p ; p = p->next) {
1380                 // for each process found running
1381
1382                 if(likely(p->ppid > 0 && all_pids[p->ppid])) {
1383                         // valid parent processes
1384
1385                         struct pid_stat *pp;
1386
1387                         p->parent = pp = all_pids[p->ppid];
1388                         p->parent->children_count++;
1389
1390                         if(unlikely(debug || (p->target && p->target->debug)))
1391                                 fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cminflt, pp->cmajflt);
1392                 }
1393                 else if(unlikely(p->ppid != 0))
1394                         error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid);
1395         }
1396 }
1397
1398 // ----------------------------------------------------------------------------
1399
1400 // 1. read all files in /proc
1401 // 2. for each numeric directory:
1402 //    i.   read /proc/pid/stat
1403 //    ii.  read /proc/pid/statm
1404 //    iii. read /proc/pid/io (requires root access)
1405 //    iii. read the entries in directory /proc/pid/fd (requires root access)
1406 //         for each entry:
1407 //         a. find or create a struct file_descriptor
1408 //         b. cleanup any old/unused file_descriptors
1409
1410 // after all these, some pids may be linked to targets, while others may not
1411
1412 // in case of errors, only 1 every 1000 errors is printed
1413 // to avoid filling up all disk space
1414 // if debug is enabled, all errors are printed
1415
1416 static int compar_pid(const void *pid1, const void *pid2) {
1417
1418         struct pid_stat *p1 = all_pids[*((pid_t *)pid1)];
1419         struct pid_stat *p2 = all_pids[*((pid_t *)pid2)];
1420
1421         if(p1->sortlist > p2->sortlist)
1422                 return -1;
1423         else
1424                 return 1;
1425 }
1426
1427 void collect_data_for_pid(pid_t pid) {
1428         if(unlikely(pid <= 0 || pid > pid_max)) {
1429                 error("Invalid pid %d read (expected 1 to %d). Ignoring process.", pid, pid_max);
1430                 return;
1431         }
1432
1433         struct pid_stat *p = get_pid_entry(pid);
1434         if(unlikely(!p || p->read)) return;
1435
1436         // fprintf(stderr, "Reading process %d (%s), sortlist %d\n", p->pid, p->comm, p->sortlist);
1437
1438         p->read             = 1;
1439         p->sortlist         = 0;
1440
1441         // --------------------------------------------------------------------
1442         // /proc/<pid>/stat
1443
1444         if(unlikely(read_proc_pid_stat(p))) {
1445                 error("Cannot process %s/proc/%d/stat", host_prefix, pid);
1446                 // there is no reason to proceed if we cannot get its status
1447                 return;
1448         }
1449
1450         read_proc_pid_ownership(p);
1451
1452         // check its parent pid
1453         if(unlikely(p->ppid < 0 || p->ppid > pid_max)) {
1454                 error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid);
1455                 p->ppid = 0;
1456         }
1457
1458         // --------------------------------------------------------------------
1459         // /proc/<pid>/io
1460
1461         if(unlikely(read_proc_pid_io(p)))
1462                 error("Cannot process %s/proc/%d/io", host_prefix, pid);
1463
1464         // --------------------------------------------------------------------
1465         // /proc/<pid>/statm
1466
1467         if(unlikely(read_proc_pid_statm(p))) {
1468                 error("Cannot process %s/proc/%d/statm", host_prefix, pid);
1469                 // there is no reason to proceed if we cannot get its memory status
1470                 return;
1471         }
1472
1473         // --------------------------------------------------------------------
1474         // link it
1475
1476         // check if it is target
1477         // we do this only once, the first time this pid is loaded
1478         if(unlikely(p->new_entry)) {
1479                 // /proc/<pid>/cmdline
1480                 if(likely(proc_pid_cmdline_is_needed)) {
1481                         if(unlikely(read_proc_pid_cmdline(p)))
1482                                 error("Cannot process %s/proc/%d/cmdline", host_prefix, pid);
1483                 }
1484
1485                 if(unlikely(debug))
1486                         fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", pid, p->comm);
1487
1488                 uint32_t hash = simple_hash(p->comm);
1489                 size_t pclen  = strlen(p->comm);
1490
1491                 struct target *w;
1492                 for(w = apps_groups_root_target; w ; w = w->next) {
1493                         // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm);
1494
1495                         // find it - 4 cases:
1496                         // 1. the target is not a pattern
1497                         // 2. the target has the prefix
1498                         // 3. the target has the suffix
1499                         // 4. the target is something inside cmdline
1500                         if(     (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm))
1501                                || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen))
1502                                || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen]))
1503                                || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(p->cmdline, w->compare))
1504                                         ) {
1505                                 if(w->target) p->target = w->target;
1506                                 else p->target = w;
1507
1508                                 if(debug || (p->target && p->target->debug))
1509                                         fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name);
1510
1511                                 break;
1512                         }
1513                 }
1514         }
1515
1516         // --------------------------------------------------------------------
1517         // /proc/<pid>/fd
1518
1519         if(unlikely(read_pid_file_descriptors(p))) {
1520                 error("Cannot process entries in %s/proc/%d/fd", host_prefix, pid);
1521         }
1522
1523         // --------------------------------------------------------------------
1524         // done!
1525
1526         if(p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read)
1527                 error("read process %d (%s), but its parent %d (%s) is not read\n", p->pid, p->comm, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm);
1528
1529         // mark it as updated
1530         p->updated = 1;
1531         p->keep = 0;
1532         p->keeploops = 0;
1533 }
1534
1535 int collect_data_for_all_processes_from_proc(void) {
1536         struct pid_stat *p = NULL;
1537
1538         if(all_pids_count) {
1539                 // read parents before childs
1540                 // this is needed to prevent a situation where
1541                 // a child is found running, but until we read
1542                 // its parent, it has exited and its parent
1543                 // has accumulated its resources
1544
1545                 long slc = 0;
1546                 for(p = root_of_pids; p ; p = p->next) {
1547                         p->read             = 0;
1548                         p->updated          = 0;
1549                         p->new_entry        = 0;
1550                         p->merged           = 0;
1551                         p->children_count   = 0;
1552                         p->parent           = NULL;
1553
1554 #ifdef NETDATA_INTERNAL_CHECKS
1555                         if(unlikely(slc >= all_pids_count))
1556                                 error("Internal error: I was thinking I had %ld processes in my arrays, but it seems there are more.", all_pids_count);
1557 #endif
1558                         all_pids_sortlist[slc++] = p->pid;
1559                 }
1560
1561                 qsort((void *)all_pids_sortlist, all_pids_count, sizeof(pid_t), compar_pid);
1562
1563                 for(slc = 0; slc < all_pids_count; slc++)
1564                         collect_data_for_pid(all_pids_sortlist[slc]);
1565         }
1566
1567         char dirname[FILENAME_MAX + 1];
1568
1569         snprintfz(dirname, FILENAME_MAX, "%s/proc", host_prefix);
1570         DIR *dir = opendir(dirname);
1571         if(!dir) return 0;
1572
1573         struct dirent *file = NULL;
1574
1575         while((file = readdir(dir))) {
1576                 char *endptr = file->d_name;
1577                 pid_t pid = (pid_t) strtoul(file->d_name, &endptr, 10);
1578
1579                 // make sure we read a valid number
1580                 if(unlikely(endptr == file->d_name || *endptr != '\0'))
1581                         continue;
1582
1583                 collect_data_for_pid(pid);
1584         }
1585         closedir(dir);
1586
1587         // normally this is done
1588         // however we may have processes exited while we collected values
1589         // so let's find the exited ones
1590         // we do this by collecting the ownership of process
1591         // if we manage to get the ownership, the process still runs
1592
1593         link_all_processes_to_their_parents();
1594         process_exited_processes();
1595
1596         return 1;
1597 }
1598
1599 // ----------------------------------------------------------------------------
1600 // update statistics on the targets
1601
1602 // 1. link all childs to their parents
1603 // 2. go from bottom to top, marking as merged all childs to their parents
1604 //    this step links all parents without a target to the child target, if any
1605 // 3. link all top level processes (the ones not merged) to the default target
1606 // 4. go from top to bottom, linking all childs without a target, to their parent target
1607 //    after this step, all processes have a target
1608 // [5. for each killed pid (updated = 0), remove its usage from its target]
1609 // 6. zero all apps_groups_targets
1610 // 7. concentrate all values on the apps_groups_targets
1611 // 8. remove all killed processes
1612 // 9. find the unique file count for each target
1613 // check: update_apps_groups_statistics()
1614
1615 void cleanup_exited_pids(void) {
1616         int c;
1617         struct pid_stat *p = NULL;
1618
1619         for(p = root_of_pids; p ;) {
1620                 if(!p->updated && (!p->keep || p->keeploops > 1)) {
1621 //                      fprintf(stderr, "\tEXITED %d %s [parent %d %s, target %s] utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->parent->pid, p->parent->comm, p->target->name,  p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
1622
1623 #ifdef NETDATA_INTERNAL_CHECKS
1624                         if(p->keep)
1625                                 fprintf(stderr, " > cannot keep exited process %d (%s) anymore - removing it.\n", p->pid, p->comm);
1626 #endif
1627
1628                         for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] > 0) {
1629                                 file_descriptor_not_used(p->fds[c]);
1630                                 p->fds[c] = 0;
1631                         }
1632
1633                         pid_t r = p->pid;
1634                         p = p->next;
1635                         del_pid_entry(r);
1636                 }
1637                 else {
1638                         if(unlikely(p->keep)) p->keeploops++;
1639                         p->keep = 0;
1640                         p = p->next;
1641                 }
1642         }
1643 }
1644
1645 void apply_apps_groups_targets_inheritance(void) {
1646         struct pid_stat *p = NULL;
1647
1648         // children that do not have a target
1649         // inherit their target from their parent
1650         int found = 1, loops = 0;
1651         while(found) {
1652                 if(unlikely(debug)) loops++;
1653                 found = 0;
1654                 for(p = root_of_pids; p ; p = p->next) {
1655                         // if this process does not have a target
1656                         // and it has a parent
1657                         // and its parent has a target
1658                         // then, set the parent's target to this process
1659                         if(unlikely(!p->target && p->parent && p->parent->target)) {
1660                                 p->target = p->parent->target;
1661                                 found++;
1662
1663                                 if(debug || (p->target && p->target->debug))
1664                                         fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
1665                         }
1666                 }
1667         }
1668
1669         // find all the procs with 0 childs and merge them to their parents
1670         // repeat, until nothing more can be done.
1671         int sortlist = 1;
1672         found = 1;
1673         while(found) {
1674                 if(unlikely(debug)) loops++;
1675                 found = 0;
1676
1677                 for(p = root_of_pids; p ; p = p->next) {
1678                         // if this process does not have any children
1679                         // and is not already merged
1680                         // and has a parent
1681                         // and its parent has children
1682                         // and the target of this process and its parent is the same, or the parent does not have a target
1683                         // and its parent is not init
1684                         // then, mark them as merged.
1685                         if(unlikely(
1686                                         !p->children_count
1687                                         && !p->merged
1688                                         && p->parent
1689                                         && p->parent->children_count
1690                                         && (p->target == p->parent->target || !p->parent->target)
1691                                         && p->ppid != 1
1692                                 )) {
1693                                 p->parent->children_count--;
1694                                 p->merged = 1;
1695
1696                                 // the parent inherits the child's target, if it does not have a target itself
1697                                 if(unlikely(p->target && !p->parent->target)) {
1698                                         p->parent->target = p->target;
1699
1700                                         if(debug || (p->target && p->target->debug))
1701                                                 fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).\n", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm);
1702                                 }
1703
1704                                 found++;
1705                         }
1706
1707                         // since this process does not have any childs
1708                         // assign it to the current sortlist
1709                         if(unlikely(!p->sortlist && !p->children_count))
1710                                 p->sortlist = sortlist++;
1711                 }
1712
1713                 if(unlikely(debug))
1714                         fprintf(stderr, "apps.plugin: TARGET INHERITANCE: merged %d processes\n", found);
1715         }
1716
1717         // init goes always to default target
1718         if(all_pids[1])
1719                 all_pids[1]->target = apps_groups_default_target;
1720
1721         // give a default target on all top level processes
1722         if(unlikely(debug)) loops++;
1723         for(p = root_of_pids; p ; p = p->next) {
1724                 // if the process is not merged itself
1725                 // then is is a top level process
1726                 if(unlikely(!p->merged && !p->target))
1727                         p->target = apps_groups_default_target;
1728
1729                 // make sure all processes have a sortlist
1730                 if(unlikely(!p->sortlist))
1731                         p->sortlist = sortlist++;
1732         }
1733
1734         // give a target to all merged child processes
1735         found = 1;
1736         while(found) {
1737                 if(unlikely(debug)) loops++;
1738                 found = 0;
1739                 for(p = root_of_pids; p ; p = p->next) {
1740                         if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) {
1741                                 p->target = p->parent->target;
1742                                 found++;
1743
1744                                 if(debug || (p->target && p->target->debug))
1745                                         fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
1746                         }
1747                 }
1748         }
1749
1750         if(unlikely(debug))
1751                 fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops);
1752 }
1753
1754 long zero_all_targets(struct target *root) {
1755         struct target *w;
1756         long count = 0;
1757
1758         for (w = root; w ; w = w->next) {
1759                 count++;
1760
1761                 if(w->fds) free(w->fds);
1762                 w->fds = NULL;
1763
1764                 w->minflt = 0;
1765                 w->majflt = 0;
1766                 w->utime = 0;
1767                 w->stime = 0;
1768                 w->cminflt = 0;
1769                 w->cmajflt = 0;
1770                 w->cutime = 0;
1771                 w->cstime = 0;
1772                 w->num_threads = 0;
1773                 w->rss = 0;
1774                 w->processes = 0;
1775
1776                 w->statm_size = 0;
1777                 w->statm_resident = 0;
1778                 w->statm_share = 0;
1779                 w->statm_text = 0;
1780                 w->statm_lib = 0;
1781                 w->statm_data = 0;
1782                 w->statm_dirty = 0;
1783
1784                 w->io_logical_bytes_read = 0;
1785                 w->io_logical_bytes_written = 0;
1786                 w->io_read_calls = 0;
1787                 w->io_write_calls = 0;
1788                 w->io_storage_bytes_read = 0;
1789                 w->io_storage_bytes_written = 0;
1790                 w->io_cancelled_write_bytes = 0;
1791         }
1792
1793         return count;
1794 }
1795
1796 void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o) {
1797         (void)o;
1798
1799         if(unlikely(!w->fds)) {
1800                 w->fds = calloc(sizeof(int), (size_t) all_files_size);
1801                 if(unlikely(!w->fds))
1802                         error("Cannot allocate memory for fds in %s", w->name);
1803         }
1804
1805         if(likely(p->updated)) {
1806                 w->cutime  += p->cutime;
1807                 w->cstime  += p->cstime;
1808                 w->cminflt += p->cminflt;
1809                 w->cmajflt += p->cmajflt;
1810
1811                 w->utime  += p->utime;
1812                 w->stime  += p->stime;
1813                 w->minflt += p->minflt;
1814                 w->majflt += p->majflt;
1815
1816                 w->rss += p->rss;
1817
1818                 w->statm_size += p->statm_size;
1819                 w->statm_resident += p->statm_resident;
1820                 w->statm_share += p->statm_share;
1821                 w->statm_text += p->statm_text;
1822                 w->statm_lib += p->statm_lib;
1823                 w->statm_data += p->statm_data;
1824                 w->statm_dirty += p->statm_dirty;
1825
1826                 w->io_logical_bytes_read    += p->io_logical_bytes_read;
1827                 w->io_logical_bytes_written += p->io_logical_bytes_written;
1828                 w->io_read_calls            += p->io_read_calls;
1829                 w->io_write_calls           += p->io_write_calls;
1830                 w->io_storage_bytes_read    += p->io_storage_bytes_read;
1831                 w->io_storage_bytes_written += p->io_storage_bytes_written;
1832                 w->io_cancelled_write_bytes += p->io_cancelled_write_bytes;
1833
1834                 w->processes++;
1835                 w->num_threads += p->num_threads;
1836
1837                 if(likely(w->fds)) {
1838                         int c;
1839                         for(c = 0; c < p->fds_size ;c++) {
1840                                 if(p->fds[c] == 0) continue;
1841
1842                                 if(likely(p->fds[c] < all_files_size)) {
1843                                         if(w->fds) w->fds[p->fds[c]]++;
1844                                 }
1845                                 else
1846                                         error("Invalid fd number %d", p->fds[c]);
1847                         }
1848                 }
1849
1850                 if(unlikely(debug || w->debug))
1851                         fprintf(stderr, "apps.plugin: \taggregating '%s' pid %d on target '%s' utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
1852         }
1853 }
1854
1855 void count_targets_fds(struct target *root) {
1856         int c;
1857         struct target *w;
1858
1859         for (w = root; w ; w = w->next) {
1860                 if(!w->fds) continue;
1861
1862                 w->openfiles = 0;
1863                 w->openpipes = 0;
1864                 w->opensockets = 0;
1865                 w->openinotifies = 0;
1866                 w->openeventfds = 0;
1867                 w->opentimerfds = 0;
1868                 w->opensignalfds = 0;
1869                 w->openeventpolls = 0;
1870                 w->openother = 0;
1871
1872                 for(c = 1; c < all_files_size ;c++) {
1873                         if(w->fds[c] > 0)
1874                                 switch(all_files[c].type) {
1875                                 case FILETYPE_FILE:
1876                                         w->openfiles++;
1877                                         break;
1878
1879                                 case FILETYPE_PIPE:
1880                                         w->openpipes++;
1881                                         break;
1882
1883                                 case FILETYPE_SOCKET:
1884                                         w->opensockets++;
1885                                         break;
1886
1887                                 case FILETYPE_INOTIFY:
1888                                         w->openinotifies++;
1889                                         break;
1890
1891                                 case FILETYPE_EVENTFD:
1892                                         w->openeventfds++;
1893                                         break;
1894
1895                                 case FILETYPE_TIMERFD:
1896                                         w->opentimerfds++;
1897                                         break;
1898
1899                                 case FILETYPE_SIGNALFD:
1900                                         w->opensignalfds++;
1901                                         break;
1902
1903                                 case FILETYPE_EVENTPOLL:
1904                                         w->openeventpolls++;
1905                                         break;
1906
1907                                 default:
1908                                         w->openother++;
1909                         }
1910                 }
1911
1912                 free(w->fds);
1913                 w->fds = NULL;
1914         }
1915 }
1916
1917 void calculate_netdata_statistics(void) {
1918         apply_apps_groups_targets_inheritance();
1919
1920         zero_all_targets(users_root_target);
1921         zero_all_targets(groups_root_target);
1922         apps_groups_targets = zero_all_targets(apps_groups_root_target);
1923
1924         // this has to be done, before the cleanup
1925         struct pid_stat *p = NULL;
1926         struct target *w = NULL, *o = NULL;
1927
1928         // concentrate everything on the apps_groups_targets
1929         for(p = root_of_pids; p ; p = p->next) {
1930
1931                 // --------------------------------------------------------------------
1932                 // apps_groups targets
1933                 if(likely(p->target))
1934                         aggregate_pid_on_target(p->target, p, NULL);
1935                 else
1936                         error("pid %d %s was left without a target!", p->pid, p->comm);
1937
1938
1939                 // --------------------------------------------------------------------
1940                 // user targets
1941                 o = p->user_target;
1942                 if(likely(p->user_target && p->user_target->uid == p->uid))
1943                         w = p->user_target;
1944                 else {
1945                         if(unlikely(debug && p->user_target))
1946                                         fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %u (%s) to %u.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid);
1947
1948                         w = p->user_target = get_users_target(p->uid);
1949                 }
1950
1951                 if(likely(w))
1952                         aggregate_pid_on_target(w, p, o);
1953                 else
1954                         error("pid %d %s was left without a user target!", p->pid, p->comm);
1955
1956
1957                 // --------------------------------------------------------------------
1958                 // group targets
1959                 o = p->group_target;
1960                 if(likely(p->group_target && p->group_target->gid == p->gid))
1961                         w = p->group_target;
1962                 else {
1963                         if(unlikely(debug && p->group_target))
1964                                         fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %u (%s) to %u.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid);
1965
1966                         w = p->group_target = get_groups_target(p->gid);
1967                 }
1968
1969                 if(likely(w))
1970                         aggregate_pid_on_target(w, p, o);
1971                 else
1972                         error("pid %d %s was left without a group target!", p->pid, p->comm);
1973
1974         }
1975
1976         count_targets_fds(apps_groups_root_target);
1977         count_targets_fds(users_root_target);
1978         count_targets_fds(groups_root_target);
1979
1980         cleanup_exited_pids();
1981 }
1982
1983 // ----------------------------------------------------------------------------
1984 // update chart dimensions
1985
1986 unsigned long long send_resource_usage_to_netdata() {
1987         static struct timeval last = { 0, 0 };
1988         static struct rusage me_last;
1989
1990         struct timeval now;
1991         struct rusage me;
1992
1993         unsigned long long usec;
1994         unsigned long long cpuuser;
1995         unsigned long long cpusyst;
1996
1997         if(!last.tv_sec) {
1998                 gettimeofday(&last, NULL);
1999                 getrusage(RUSAGE_SELF, &me_last);
2000
2001                 // the first time, give a zero to allow
2002                 // netdata calibrate to the current time
2003                 // usec = update_every * 1000000ULL;
2004                 usec = 0ULL;
2005                 cpuuser = 0;
2006                 cpusyst = 0;
2007         }
2008         else {
2009                 gettimeofday(&now, NULL);
2010                 getrusage(RUSAGE_SELF, &me);
2011
2012                 usec = usecdiff(&now, &last);
2013                 cpuuser = me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec;
2014                 cpusyst = me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec;
2015
2016                 bcopy(&now, &last, sizeof(struct timeval));
2017                 bcopy(&me, &me_last, sizeof(struct rusage));
2018         }
2019
2020         fprintf(stdout, "BEGIN netdata.apps_cpu %llu\n", usec);
2021         fprintf(stdout, "SET user = %llu\n", cpuuser);
2022         fprintf(stdout, "SET system = %llu\n", cpusyst);
2023         fprintf(stdout, "END\n");
2024
2025         fprintf(stdout, "BEGIN netdata.apps_files %llu\n", usec);
2026         fprintf(stdout, "SET files = %llu\n", file_counter);
2027         fprintf(stdout, "SET pids = %ld\n", all_pids_count);
2028         fprintf(stdout, "SET fds = %d\n", all_files_len);
2029         fprintf(stdout, "SET targets = %ld\n", apps_groups_targets);
2030         fprintf(stdout, "END\n");
2031
2032         return usec;
2033 }
2034
2035 void send_collected_data_to_netdata(struct target *root, const char *type, unsigned long long usec)
2036 {
2037         struct target *w;
2038         int childs = include_exited_childs;
2039
2040         {
2041                 // childs processing introduces spikes
2042                 // here we try to eliminate them by disabling childs processing either for specific dimensions
2043                 // or entirely. Of course, either way, we disable it just a single iteration.
2044
2045                 unsigned long long max = update_every * processors * 100 * 100;
2046                 unsigned long long utime = 0, cutime = 0, stime = 0, cstime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0;
2047
2048                 for (w = root; w ; w = w->next) {
2049                         if(w->target || (!w->processes && !w->exposed)) continue;
2050
2051                         if((w->utime + w->stime + w->cutime + w->cstime) > max) {
2052 #ifdef NETDATA_INTERNAL_CHECKS
2053                                 log_date(stderr);
2054                                 fprintf(stderr, "Prevented a spike on target '%s', reported CPU time = %llu (without childs = %llu)\n", w->name, (w->utime + w->stime + w->cutime + w->cstime) / 100, (w->utime + w->stime) / 100);
2055 #endif
2056                                 w->cutime = w->cstime = w->cminflt = w->majflt = 0;
2057                         }
2058
2059                         utime   += w->utime;
2060                         cutime  += w->cutime;
2061                         stime   += w->stime;
2062                         cstime  += w->cstime;
2063                         minflt  += w->minflt;
2064                         cminflt += w->cminflt;
2065                         majflt  += w->majflt;
2066                         cmajflt += w->cmajflt;
2067                 }
2068
2069                 if((utime + stime + cutime + cstime) > max) {
2070                         childs = 0;
2071 #ifdef NETDATA_INTERNAL_CHECKS
2072                         log_date(stderr);
2073                         fprintf(stderr, "Prevented a spike because the total CPU of all dimensions = %llu (without childs = %llu)\n", (utime + stime + cutime + cstime) / 100, (utime + stime) / 100);
2074 #endif
2075                 }
2076
2077                 if((utime + stime) > max) {
2078                         childs = 0;
2079                         unsigned long long multiplier = max, divider = utime + stime;
2080                         for (w = root; w ; w = w->next) {
2081                                 w->utime  = w->utime * multiplier / divider;
2082                                 w->stime  = w->stime * multiplier / divider;
2083                                 w->minflt = w->minflt * multiplier / divider;
2084                                 w->majflt = w->majflt * multiplier / divider;
2085                         }
2086
2087 #ifdef NETDATA_INTERNAL_CHECKS
2088                         log_date(stderr);
2089                         fprintf(stderr, "Reduced processes utilization (without childs) by %0.2f%% (CPU was %llu)\n", (float)((utime + stime - max * 100.0)/(float)max), (utime + stime) / 100);
2090 #endif
2091                 }
2092
2093         }
2094
2095         fprintf(stdout, "BEGIN %s.cpu %llu\n", type, usec);
2096         for (w = root; w ; w = w->next) {
2097                 if(w->target || (!w->processes && !w->exposed)) continue;
2098
2099                 fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->stime + (childs?(w->cutime + w->cstime):0));
2100         }
2101         fprintf(stdout, "END\n");
2102
2103         fprintf(stdout, "BEGIN %s.cpu_user %llu\n", type, usec);
2104         for (w = root; w ; w = w->next) {
2105                 if(w->target || (!w->processes && !w->exposed)) continue;
2106
2107                 fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + (childs?(w->cutime):0));
2108         }
2109         fprintf(stdout, "END\n");
2110
2111         fprintf(stdout, "BEGIN %s.cpu_system %llu\n", type, usec);
2112         for (w = root; w ; w = w->next) {
2113                 if(w->target || (!w->processes && !w->exposed)) continue;
2114
2115                 fprintf(stdout, "SET %s = %llu\n", w->name, w->stime + (childs?(w->cstime):0));
2116         }
2117         fprintf(stdout, "END\n");
2118
2119         fprintf(stdout, "BEGIN %s.threads %llu\n", type, usec);
2120         for (w = root; w ; w = w->next) {
2121                 if(w->target || (!w->processes && !w->exposed)) continue;
2122
2123                 fprintf(stdout, "SET %s = %llu\n", w->name, w->num_threads);
2124         }
2125         fprintf(stdout, "END\n");
2126
2127         fprintf(stdout, "BEGIN %s.processes %llu\n", type, usec);
2128         for (w = root; w ; w = w->next) {
2129                 if(w->target || (!w->processes && !w->exposed)) continue;
2130
2131                 fprintf(stdout, "SET %s = %lu\n", w->name, w->processes);
2132         }
2133         fprintf(stdout, "END\n");
2134
2135         fprintf(stdout, "BEGIN %s.mem %llu\n", type, usec);
2136         for (w = root; w ; w = w->next) {
2137                 if(w->target || (!w->processes && !w->exposed)) continue;
2138
2139                 fprintf(stdout, "SET %s = %lld\n", w->name, (long long)w->statm_resident - (long long)w->statm_share);
2140         }
2141         fprintf(stdout, "END\n");
2142
2143         fprintf(stdout, "BEGIN %s.minor_faults %llu\n", type, usec);
2144         for (w = root; w ; w = w->next) {
2145                 if(w->target || (!w->processes && !w->exposed)) continue;
2146
2147                 fprintf(stdout, "SET %s = %llu\n", w->name, w->minflt + (childs?(w->cminflt):0));
2148         }
2149         fprintf(stdout, "END\n");
2150
2151         fprintf(stdout, "BEGIN %s.major_faults %llu\n", type, usec);
2152         for (w = root; w ; w = w->next) {
2153                 if(w->target || (!w->processes && !w->exposed)) continue;
2154
2155                 fprintf(stdout, "SET %s = %llu\n", w->name, w->majflt + (childs?(w->cmajflt):0));
2156         }
2157         fprintf(stdout, "END\n");
2158
2159         fprintf(stdout, "BEGIN %s.lreads %llu\n", type, usec);
2160         for (w = root; w ; w = w->next) {
2161                 if(w->target || (!w->processes && !w->exposed)) continue;
2162
2163                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_read);
2164         }
2165         fprintf(stdout, "END\n");
2166
2167         fprintf(stdout, "BEGIN %s.lwrites %llu\n", type, usec);
2168         for (w = root; w ; w = w->next) {
2169                 if(w->target || (!w->processes && !w->exposed)) continue;
2170
2171                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_written);
2172         }
2173         fprintf(stdout, "END\n");
2174
2175         fprintf(stdout, "BEGIN %s.preads %llu\n", type, usec);
2176         for (w = root; w ; w = w->next) {
2177                 if(w->target || (!w->processes && !w->exposed)) continue;
2178
2179                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_read);
2180         }
2181         fprintf(stdout, "END\n");
2182
2183         fprintf(stdout, "BEGIN %s.pwrites %llu\n", type, usec);
2184         for (w = root; w ; w = w->next) {
2185                 if(w->target || (!w->processes && !w->exposed)) continue;
2186
2187                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_written);
2188         }
2189         fprintf(stdout, "END\n");
2190
2191         fprintf(stdout, "BEGIN %s.files %llu\n", type, usec);
2192         for (w = root; w ; w = w->next) {
2193                 if(w->target || (!w->processes && !w->exposed)) continue;
2194
2195                 fprintf(stdout, "SET %s = %llu\n", w->name, w->openfiles);
2196         }
2197         fprintf(stdout, "END\n");
2198
2199         fprintf(stdout, "BEGIN %s.sockets %llu\n", type, usec);
2200         for (w = root; w ; w = w->next) {
2201                 if(w->target || (!w->processes && !w->exposed)) continue;
2202
2203                 fprintf(stdout, "SET %s = %llu\n", w->name, w->opensockets);
2204         }
2205         fprintf(stdout, "END\n");
2206
2207         fprintf(stdout, "BEGIN %s.pipes %llu\n", type, usec);
2208         for (w = root; w ; w = w->next) {
2209                 if(w->target || (!w->processes && !w->exposed)) continue;
2210
2211                 fprintf(stdout, "SET %s = %llu\n", w->name, w->openpipes);
2212         }
2213         fprintf(stdout, "END\n");
2214
2215         fflush(stdout);
2216 }
2217
2218
2219 // ----------------------------------------------------------------------------
2220 // generate the charts
2221
2222 void send_charts_updates_to_netdata(struct target *root, const char *type, const char *title)
2223 {
2224         struct target *w;
2225         int newly_added = 0;
2226
2227         for(w = root ; w ; w = w->next)
2228                 if(!w->exposed && w->processes) {
2229                         newly_added++;
2230                         w->exposed = 1;
2231                         if(debug || w->debug) fprintf(stderr, "apps.plugin: %s just added - regenerating charts.\n", w->name);
2232                 }
2233
2234         // nothing more to show
2235         if(!newly_added) return;
2236
2237         // we have something new to show
2238         // update the charts
2239         fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
2240         for (w = root; w ; w = w->next) {
2241                 if(w->target || (!w->processes && !w->exposed)) continue;
2242
2243                 fprintf(stdout, "DIMENSION %s '' absolute 1 %u %s\n", w->name, hz, w->hidden ? "hidden,noreset" : "noreset");
2244         }
2245
2246         fprintf(stdout, "CHART %s.mem '' '%s Dedicated Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every);
2247         for (w = root; w ; w = w->next) {
2248                 if(w->target || (!w->processes && !w->exposed)) continue;
2249
2250                 fprintf(stdout, "DIMENSION %s '' absolute %ld %ld noreset\n", w->name, sysconf(_SC_PAGESIZE), 1024L*1024L);
2251         }
2252
2253         fprintf(stdout, "CHART %s.threads '' '%s Threads' 'threads' processes %s.threads stacked 20005 %d\n", type, title, type, update_every);
2254         for (w = root; w ; w = w->next) {
2255                 if(w->target || (!w->processes && !w->exposed)) continue;
2256
2257                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2258         }
2259
2260         fprintf(stdout, "CHART %s.processes '' '%s Processes' 'processes' processes %s.processes stacked 20004 %d\n", type, title, type, update_every);
2261         for (w = root; w ; w = w->next) {
2262                 if(w->target || (!w->processes && !w->exposed)) continue;
2263
2264                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2265         }
2266
2267         fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
2268         for (w = root; w ; w = w->next) {
2269                 if(w->target || (!w->processes && !w->exposed)) continue;
2270
2271                 fprintf(stdout, "DIMENSION %s '' absolute 1 %u noreset\n", w->name, hz);
2272         }
2273
2274         fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
2275         for (w = root; w ; w = w->next) {
2276                 if(w->target || (!w->processes && !w->exposed)) continue;
2277
2278                 fprintf(stdout, "DIMENSION %s '' absolute 1 %u noreset\n", w->name, hz);
2279         }
2280
2281         fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20010 %d\n", type, title, type, update_every);
2282         for (w = root; w ; w = w->next) {
2283                 if(w->target || (!w->processes && !w->exposed)) continue;
2284
2285                 fprintf(stdout, "DIMENSION %s '' absolute 1 100 noreset\n", w->name);
2286         }
2287
2288         fprintf(stdout, "CHART %s.minor_faults '' '%s Minor Page Faults' 'page faults/s' mem %s.minor_faults stacked 20011 %d\n", type, title, type, update_every);
2289         for (w = root; w ; w = w->next) {
2290                 if(w->target || (!w->processes && !w->exposed)) continue;
2291
2292                 fprintf(stdout, "DIMENSION %s '' absolute 1 100 noreset\n", w->name);
2293         }
2294
2295         fprintf(stdout, "CHART %s.lreads '' '%s Disk Logical Reads' 'kilobytes/s' disk %s.lreads stacked 20042 %d\n", type, title, type, update_every);
2296         for (w = root; w ; w = w->next) {
2297                 if(w->target || (!w->processes && !w->exposed)) continue;
2298
2299                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2300         }
2301
2302         fprintf(stdout, "CHART %s.lwrites '' '%s I/O Logical Writes' 'kilobytes/s' disk %s.lwrites stacked 20042 %d\n", type, title, type, update_every);
2303         for (w = root; w ; w = w->next) {
2304                 if(w->target || (!w->processes && !w->exposed)) continue;
2305
2306                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2307         }
2308
2309         fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'kilobytes/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every);
2310         for (w = root; w ; w = w->next) {
2311                 if(w->target || (!w->processes && !w->exposed)) continue;
2312
2313                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2314         }
2315
2316         fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'kilobytes/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every);
2317         for (w = root; w ; w = w->next) {
2318                 if(w->target || (!w->processes && !w->exposed)) continue;
2319
2320                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2321         }
2322
2323         fprintf(stdout, "CHART %s.files '' '%s Open Files' 'open files' disk %s.files stacked 20050 %d\n", type, title, type, update_every);
2324         for (w = root; w ; w = w->next) {
2325                 if(w->target || (!w->processes && !w->exposed)) continue;
2326
2327                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2328         }
2329
2330         fprintf(stdout, "CHART %s.sockets '' '%s Open Sockets' 'open sockets' net %s.sockets stacked 20051 %d\n", type, title, type, update_every);
2331         for (w = root; w ; w = w->next) {
2332                 if(w->target || (!w->processes && !w->exposed)) continue;
2333
2334                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2335         }
2336
2337         fprintf(stdout, "CHART %s.pipes '' '%s Pipes' 'open pipes' processes %s.pipes stacked 20053 %d\n", type, title, type, update_every);
2338         for (w = root; w ; w = w->next) {
2339                 if(w->target || (!w->processes && !w->exposed)) continue;
2340
2341                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2342         }
2343 }
2344
2345
2346 // ----------------------------------------------------------------------------
2347 // parse command line arguments
2348
2349 void parse_args(int argc, char **argv)
2350 {
2351         int i, freq = 0;
2352         char *name = NULL;
2353
2354         for(i = 1; i < argc; i++) {
2355                 if(!freq) {
2356                         int n = atoi(argv[i]);
2357                         if(n > 0) {
2358                                 freq = n;
2359                                 continue;
2360                         }
2361                 }
2362
2363                 if(strcmp("debug", argv[i]) == 0) {
2364                         debug = 1;
2365                         // debug_flags = 0xffffffff;
2366                         continue;
2367                 }
2368
2369                 if(strcmp("no-childs", argv[i]) == 0) {
2370                         include_exited_childs = 0;
2371                         continue;
2372                 }
2373
2374                 if(strcmp("with-childs", argv[i]) == 0) {
2375                         include_exited_childs = 1;
2376                         continue;
2377                 }
2378
2379                 if(!name) {
2380                         name = argv[i];
2381                         continue;
2382                 }
2383
2384                 error("Cannot understand option %s", argv[i]);
2385                 exit(1);
2386         }
2387
2388         if(freq > 0) update_every = freq;
2389         if(!name) name = "groups";
2390
2391         if(read_apps_groups_conf(name)) {
2392                 error("Cannot read process groups %s", name);
2393                 exit(1);
2394         }
2395 }
2396
2397 int main(int argc, char **argv)
2398 {
2399         // debug_flags = D_PROCFILE;
2400
2401         // set the name for logging
2402         program_name = "apps.plugin";
2403
2404         // disable syslog for apps.plugin
2405         error_log_syslog = 0;
2406
2407         // set errors flood protection to 100 logs per hour
2408         error_log_errors_per_period = 100;
2409         error_log_throttle_period = 3600;
2410
2411         host_prefix = getenv("NETDATA_HOST_PREFIX");
2412         if(host_prefix == NULL) {
2413                 info("NETDATA_HOST_PREFIX is not passed from netdata");
2414                 host_prefix = "";
2415         }
2416         else info("Found NETDATA_HOST_PREFIX='%s'", host_prefix);
2417
2418         config_dir = getenv("NETDATA_CONFIG_DIR");
2419         if(config_dir == NULL) {
2420                 info("NETDATA_CONFIG_DIR is not passed from netdata");
2421                 config_dir = CONFIG_DIR;
2422         }
2423         else info("Found NETDATA_CONFIG_DIR='%s'", config_dir);
2424
2425 #ifdef NETDATA_INTERNAL_CHECKS
2426         if(debug_flags != 0) {
2427                 struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
2428                 if(setrlimit(RLIMIT_CORE, &rl) != 0)
2429                         info("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
2430                 prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
2431         }
2432 #endif /* NETDATA_INTERNAL_CHECKS */
2433
2434         procfile_adaptive_initial_allocation = 1;
2435
2436         time_t started_t = time(NULL);
2437         time_t current_t;
2438         get_HZ();
2439         pid_max = get_system_pid_max();
2440         processors = get_system_cpus();
2441
2442         parse_args(argc, argv);
2443
2444         all_pids_sortlist = calloc(sizeof(pid_t), (size_t)pid_max);
2445         if(!all_pids_sortlist) {
2446                 error("Cannot allocate %lu bytes of memory.", sizeof(pid_t) * pid_max);
2447                 printf("DISABLE\n");
2448                 exit(1);
2449         }
2450
2451         all_pids = calloc(sizeof(struct pid_stat *), (size_t) pid_max);
2452         if(!all_pids) {
2453                 error("Cannot allocate %lu bytes of memory.", sizeof(struct pid_stat *) * pid_max);
2454                 printf("DISABLE\n");
2455                 exit(1);
2456         }
2457
2458         fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n"
2459                         "DIMENSION user '' incremental 1 1000\n"
2460                         "DIMENSION system '' incremental 1 1000\n"
2461                         "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n"
2462                         "DIMENSION files '' incremental 1 1\n"
2463                         "DIMENSION pids '' absolute 1 1\n"
2464                         "DIMENSION fds '' absolute 1 1\n"
2465                         "DIMENSION targets '' absolute 1 1\n", update_every);
2466
2467 #ifndef PROFILING_MODE
2468         unsigned long long sunext = (time(NULL) - (time(NULL) % update_every) + update_every) * 1000000ULL;
2469         unsigned long long sunow;
2470 #endif /* PROFILING_MODE */
2471
2472         global_iterations_counter = 1;
2473         for(;1; global_iterations_counter++) {
2474 #ifndef PROFILING_MODE
2475                 // delay until it is our time to run
2476                 while((sunow = timems()) < sunext)
2477                         usecsleep(sunext - sunow);
2478
2479                 // find the next time we need to run
2480                 while(timems() > sunext)
2481                         sunext += update_every * 1000000ULL;
2482 #endif /* PROFILING_MODE */
2483
2484                 if(!collect_data_for_all_processes_from_proc()) {
2485                         error("Cannot collect /proc data for running processes. Disabling apps.plugin...");
2486                         printf("DISABLE\n");
2487                         exit(1);
2488                 }
2489
2490                 calculate_netdata_statistics();
2491
2492                 unsigned long long dt = send_resource_usage_to_netdata();
2493
2494                 // this is smart enough to show only newly added apps, when needed
2495                 send_charts_updates_to_netdata(apps_groups_root_target, "apps", "Apps");
2496                 send_charts_updates_to_netdata(users_root_target, "users", "Users");
2497                 send_charts_updates_to_netdata(groups_root_target, "groups", "User Groups");
2498
2499                 send_collected_data_to_netdata(apps_groups_root_target, "apps", dt);
2500                 send_collected_data_to_netdata(users_root_target, "users", dt);
2501                 send_collected_data_to_netdata(groups_root_target, "groups", dt);
2502
2503                 if(unlikely(debug))
2504                         fprintf(stderr, "apps.plugin: done Loop No %llu\n", global_iterations_counter);
2505
2506                 current_t = time(NULL);
2507
2508 #ifndef PROFILING_MODE
2509                 // restart check (14400 seconds)
2510                 if(current_t - started_t > 14400) exit(0);
2511 #else
2512                 if(current_t - started_t > 10) exit(0);
2513 #endif /* PROFILING_MODE */
2514         }
2515 }