]> arthur.barton.de Git - netdata.git/blob - src/apps_plugin.c
Merge pull request #667 from ktsaou/master
[netdata.git] / src / apps_plugin.c
1 #ifdef HAVE_CONFIG_H
2 #include <config.h>
3 #endif
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <time.h>
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/time.h>
11 #include <sys/wait.h>
12 #include <sys/stat.h>
13
14 #include <sys/resource.h>
15 #include <sys/stat.h>
16
17 #include <errno.h>
18 #include <stdarg.h>
19 #include <locale.h>
20 #include <ctype.h>
21 #include <fcntl.h>
22
23 #include <malloc.h>
24 #include <dirent.h>
25 #include <arpa/inet.h>
26
27 #include <sys/types.h>
28 #include <pwd.h>
29 #include <grp.h>
30
31 #include "avl.h"
32
33 #include "common.h"
34 #include "log.h"
35 #include "procfile.h"
36 #include "../config.h"
37
38 #ifdef NETDATA_INTERNAL_CHECKS
39 #include <sys/prctl.h>
40 #endif
41
42 #define MAX_COMPARE_NAME 100
43 #define MAX_NAME 100
44 #define MAX_CMDLINE 1024
45
46 int processors = 1;
47 pid_t pid_max = 32768;
48 int debug = 0;
49
50 int update_every = 1;
51 unsigned long long global_iterations_counter = 1;
52 unsigned long long file_counter = 0;
53 int proc_pid_cmdline_is_needed = 0;
54 int include_exited_childs = 1;
55 char *host_prefix = "";
56 char *config_dir = CONFIG_DIR;
57
58 pid_t *all_pids_sortlist = NULL;
59
60 // ----------------------------------------------------------------------------
61
62 void netdata_cleanup_and_exit(int ret) {
63         exit(ret);
64 }
65
66
67 // ----------------------------------------------------------------------------
68 // system functions
69 // to retrieve settings of the system
70
71 long get_system_cpus(void) {
72         procfile *ff = NULL;
73
74         int processors = 0;
75
76         char filename[FILENAME_MAX + 1];
77         snprintfz(filename, FILENAME_MAX, "%s/proc/stat", host_prefix);
78
79         ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT);
80         if(!ff) return 1;
81
82         ff = procfile_readall(ff);
83         if(!ff) {
84                 procfile_close(ff);
85                 return 1;
86         }
87
88         unsigned int i;
89         for(i = 0; i < procfile_lines(ff); i++) {
90                 if(!procfile_linewords(ff, i)) continue;
91
92                 if(strncmp(procfile_lineword(ff, i, 0), "cpu", 3) == 0) processors++;
93         }
94         processors--;
95         if(processors < 1) processors = 1;
96
97         procfile_close(ff);
98         return processors;
99 }
100
101 pid_t get_system_pid_max(void) {
102         procfile *ff = NULL;
103         pid_t mpid = 32768;
104
105         char filename[FILENAME_MAX + 1];
106         snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", host_prefix);
107         ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT);
108         if(!ff) return mpid;
109
110         ff = procfile_readall(ff);
111         if(!ff) {
112                 procfile_close(ff);
113                 return mpid;
114         }
115
116         mpid = (pid_t)atoi(procfile_lineword(ff, 0, 0));
117         if(!mpid) mpid = 32768;
118
119         procfile_close(ff);
120         return mpid;
121 }
122
123 // ----------------------------------------------------------------------------
124 // target
125 // target is the structure that process data are aggregated
126
127 struct target {
128         char compare[MAX_COMPARE_NAME + 1];
129         uint32_t comparehash;
130         size_t comparelen;
131
132         char id[MAX_NAME + 1];
133         uint32_t idhash;
134
135         char name[MAX_NAME + 1];
136
137         uid_t uid;
138         gid_t gid;
139
140         unsigned long long minflt;
141         unsigned long long cminflt;
142         unsigned long long majflt;
143         unsigned long long cmajflt;
144         unsigned long long utime;
145         unsigned long long stime;
146         unsigned long long cutime;
147         unsigned long long cstime;
148         unsigned long long num_threads;
149         unsigned long long rss;
150
151         unsigned long long statm_size;
152         unsigned long long statm_resident;
153         unsigned long long statm_share;
154         unsigned long long statm_text;
155         unsigned long long statm_lib;
156         unsigned long long statm_data;
157         unsigned long long statm_dirty;
158
159         unsigned long long io_logical_bytes_read;
160         unsigned long long io_logical_bytes_written;
161         unsigned long long io_read_calls;
162         unsigned long long io_write_calls;
163         unsigned long long io_storage_bytes_read;
164         unsigned long long io_storage_bytes_written;
165         unsigned long long io_cancelled_write_bytes;
166
167         int *fds;
168         unsigned long long openfiles;
169         unsigned long long openpipes;
170         unsigned long long opensockets;
171         unsigned long long openinotifies;
172         unsigned long long openeventfds;
173         unsigned long long opentimerfds;
174         unsigned long long opensignalfds;
175         unsigned long long openeventpolls;
176         unsigned long long openother;
177
178         unsigned long processes;        // how many processes have been merged to this
179         int exposed;                            // if set, we have sent this to netdata
180         int hidden;                                     // if set, we set the hidden flag on the dimension
181         int debug;
182         int ends_with;
183         int starts_with;            // if set, the compare string matches only the
184                                                                 // beginning of the command
185
186         struct target *target;          // the one that will be reported to netdata
187         struct target *next;
188 };
189
190
191 // ----------------------------------------------------------------------------
192 // apps_groups.conf
193 // aggregate all processes in groups, to have a limited number of dimensions
194
195 struct target *apps_groups_root_target = NULL;
196 struct target *apps_groups_default_target = NULL;
197 long apps_groups_targets = 0;
198
199 struct target *users_root_target = NULL;
200 struct target *groups_root_target = NULL;
201
202 struct target *get_users_target(uid_t uid)
203 {
204         struct target *w;
205         for(w = users_root_target ; w ; w = w->next)
206                 if(w->uid == uid) return w;
207
208         w = calloc(sizeof(struct target), 1);
209         if(unlikely(!w)) {
210                 error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target));
211                 return NULL;
212         }
213
214         snprintfz(w->compare, MAX_COMPARE_NAME, "%u", uid);
215         w->comparehash = simple_hash(w->compare);
216         w->comparelen = strlen(w->compare);
217
218         snprintfz(w->id, MAX_NAME, "%u", uid);
219         w->idhash = simple_hash(w->id);
220
221         struct passwd *pw = getpwuid(uid);
222         if(!pw)
223                 snprintfz(w->name, MAX_NAME, "%u", uid);
224         else
225                 snprintfz(w->name, MAX_NAME, "%s", pw->pw_name);
226
227         netdata_fix_chart_name(w->name);
228
229         w->uid = uid;
230
231         w->next = users_root_target;
232         users_root_target = w;
233
234         if(unlikely(debug))
235                 fprintf(stderr, "apps.plugin: added uid %u ('%s') target\n", w->uid, w->name);
236
237         return w;
238 }
239
240 struct target *get_groups_target(gid_t gid)
241 {
242         struct target *w;
243         for(w = groups_root_target ; w ; w = w->next)
244                 if(w->gid == gid) return w;
245
246         w = calloc(sizeof(struct target), 1);
247         if(unlikely(!w)) {
248                 error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target));
249                 return NULL;
250         }
251
252         snprintfz(w->compare, MAX_COMPARE_NAME, "%u", gid);
253         w->comparehash = simple_hash(w->compare);
254         w->comparelen = strlen(w->compare);
255
256         snprintfz(w->id, MAX_NAME, "%u", gid);
257         w->idhash = simple_hash(w->id);
258
259         struct group *gr = getgrgid(gid);
260         if(!gr)
261                 snprintfz(w->name, MAX_NAME, "%u", gid);
262         else
263                 snprintfz(w->name, MAX_NAME, "%s", gr->gr_name);
264
265         netdata_fix_chart_name(w->name);
266
267         w->gid = gid;
268
269         w->next = groups_root_target;
270         groups_root_target = w;
271
272         if(unlikely(debug))
273                 fprintf(stderr, "apps.plugin: added gid %u ('%s') target\n", w->gid, w->name);
274
275         return w;
276 }
277
278 // find or create a new target
279 // there are targets that are just aggregated to other target (the second argument)
280 struct target *get_apps_groups_target(const char *id, struct target *target)
281 {
282         int tdebug = 0, thidden = 0, ends_with = 0;
283         const char *nid = id;
284
285         while(nid[0] == '-' || nid[0] == '+' || nid[0] == '*') {
286                 if(nid[0] == '-') thidden = 1;
287                 if(nid[0] == '+') tdebug = 1;
288                 if(nid[0] == '*') ends_with = 1;
289                 nid++;
290         }
291         uint32_t hash = simple_hash(id);
292
293         struct target *w, *last = apps_groups_root_target;
294         for(w = apps_groups_root_target ; w ; w = w->next) {
295                 if(w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0)
296                         return w;
297
298                 last = w;
299         }
300
301         w = calloc(sizeof(struct target), 1);
302         if(unlikely(!w)) {
303                 error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target));
304                 return NULL;
305         }
306
307         strncpyz(w->id, nid, MAX_NAME);
308         w->idhash = simple_hash(w->id);
309
310         strncpyz(w->name, nid, MAX_NAME);
311
312         strncpyz(w->compare, nid, MAX_COMPARE_NAME);
313         int len = strlen(w->compare);
314         if(w->compare[len - 1] == '*') {
315                 w->compare[len - 1] = '\0';
316                 w->starts_with = 1;
317         }
318         w->ends_with = ends_with;
319
320         if(w->starts_with && w->ends_with)
321                 proc_pid_cmdline_is_needed = 1;
322
323         w->comparehash = simple_hash(w->compare);
324         w->comparelen = strlen(w->compare);
325
326         w->hidden = thidden;
327         w->debug = tdebug;
328         w->target = target;
329
330         // append it, to maintain the order in apps_groups.conf
331         if(last) last->next = w;
332         else apps_groups_root_target = w;
333
334         if(unlikely(debug))
335                 fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n"
336                         , w->id
337                                 , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact"))
338                                 , w->target?w->target->id:w->id
339                                 , (w->hidden)?"hidden":"-"
340                                 , (w->debug)?"debug":"-"
341                 );
342
343         return w;
344 }
345
346 // read the apps_groups.conf file
347 int read_apps_groups_conf(const char *name)
348 {
349         char filename[FILENAME_MAX + 1];
350
351         snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, name);
352
353         if(unlikely(debug))
354                 fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename);
355
356         // ----------------------------------------
357
358         procfile *ff = procfile_open(filename, " :\t", PROCFILE_FLAG_DEFAULT);
359         if(!ff) return 1;
360
361         procfile_set_quotes(ff, "'\"");
362
363         ff = procfile_readall(ff);
364         if(!ff) {
365                 procfile_close(ff);
366                 return 1;
367         }
368
369         unsigned long line, lines = procfile_lines(ff);
370
371         for(line = 0; line < lines ;line++) {
372                 unsigned long word, words = procfile_linewords(ff, line);
373                 struct target *w = NULL;
374
375                 char *t = procfile_lineword(ff, line, 0);
376                 if(!t || !*t) continue;
377
378                 for(word = 0; word < words ;word++) {
379                         char *s = procfile_lineword(ff, line, word);
380                         if(!s || !*s) continue;
381                         if(*s == '#') break;
382
383                         if(t == s) continue;
384
385                         struct target *n = get_apps_groups_target(s, w);
386                         if(!n) {
387                                 error("Cannot create target '%s' (line %lu, word %lu)", s, line, word);
388                                 continue;
389                         }
390
391                         if(!w) w = n;
392                 }
393
394                 if(w) {
395                         int tdebug = 0, thidden = 0;
396
397                         while(t[0] == '-' || t[0] == '+') {
398                                 if(t[0] == '-') thidden = 1;
399                                 if(t[0] == '+') tdebug = 1;
400                                 t++;
401                         }
402
403                         strncpyz(w->name, t, MAX_NAME);
404                         w->hidden = thidden;
405                         w->debug = tdebug;
406
407                         if(unlikely(debug))
408                                 fprintf(stderr, "apps.plugin: AGGREGATION TARGET NAME '%s' on ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n"
409                                                 , w->name
410                                                 , w->id
411                                                 , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact"))
412                                                 , w->target?w->target->id:w->id
413                                                 , (w->hidden)?"hidden":"-"
414                                                 , (w->debug)?"debug":"-"
415                                 );
416                 }
417         }
418
419         procfile_close(ff);
420
421         apps_groups_default_target = get_apps_groups_target("p+!o@w#e$i^r&7*5(-i)l-o_", NULL); // match nothing
422         if(!apps_groups_default_target)
423                 error("Cannot create default target");
424         else
425                 strncpyz(apps_groups_default_target->name, "other", MAX_NAME);
426
427         return 0;
428 }
429
430
431 // ----------------------------------------------------------------------------
432 // data to store for each pid
433 // see: man proc
434
435 struct pid_stat {
436         int32_t pid;
437         char comm[MAX_COMPARE_NAME + 1];
438         char cmdline[MAX_CMDLINE + 1];
439
440         // char state;
441         int32_t ppid;
442         // int32_t pgrp;
443         // int32_t session;
444         // int32_t tty_nr;
445         // int32_t tpgid;
446         // uint64_t flags;
447
448         // these are raw values collected
449         unsigned long long minflt_raw;
450         unsigned long long cminflt_raw;
451         unsigned long long majflt_raw;
452         unsigned long long cmajflt_raw;
453         unsigned long long utime_raw;
454         unsigned long long stime_raw;
455         unsigned long long cutime_raw;
456         unsigned long long cstime_raw;
457
458         // these are rates
459         unsigned long long minflt;
460         unsigned long long cminflt;
461         unsigned long long majflt;
462         unsigned long long cmajflt;
463         unsigned long long utime;
464         unsigned long long stime;
465         unsigned long long cutime;
466         unsigned long long cstime;
467
468         // int64_t priority;
469         // int64_t nice;
470         int32_t num_threads;
471         // int64_t itrealvalue;
472         // unsigned long long starttime;
473         // unsigned long long vsize;
474         unsigned long long rss;
475         // unsigned long long rsslim;
476         // unsigned long long starcode;
477         // unsigned long long endcode;
478         // unsigned long long startstack;
479         // unsigned long long kstkesp;
480         // unsigned long long kstkeip;
481         // uint64_t signal;
482         // uint64_t blocked;
483         // uint64_t sigignore;
484         // uint64_t sigcatch;
485         // uint64_t wchan;
486         // uint64_t nswap;
487         // uint64_t cnswap;
488         // int32_t exit_signal;
489         // int32_t processor;
490         // uint32_t rt_priority;
491         // uint32_t policy;
492         // unsigned long long delayacct_blkio_ticks;
493         // uint64_t guest_time;
494         // int64_t cguest_time;
495
496         uid_t uid;
497         gid_t gid;
498
499         unsigned long long statm_size;
500         unsigned long long statm_resident;
501         unsigned long long statm_share;
502         unsigned long long statm_text;
503         unsigned long long statm_lib;
504         unsigned long long statm_data;
505         unsigned long long statm_dirty;
506
507         unsigned long long io_logical_bytes_read_raw;
508         unsigned long long io_logical_bytes_written_raw;
509         unsigned long long io_read_calls_raw;
510         unsigned long long io_write_calls_raw;
511         unsigned long long io_storage_bytes_read_raw;
512         unsigned long long io_storage_bytes_written_raw;
513         unsigned long long io_cancelled_write_bytes_raw;
514
515         unsigned long long io_logical_bytes_read;
516         unsigned long long io_logical_bytes_written;
517         unsigned long long io_read_calls;
518         unsigned long long io_write_calls;
519         unsigned long long io_storage_bytes_read;
520         unsigned long long io_storage_bytes_written;
521         unsigned long long io_cancelled_write_bytes;
522
523         int *fds;                                               // array of fds it uses
524         int fds_size;                                   // the size of the fds array
525
526         int children_count;                             // number of processes directly referencing this
527         int keep;                                               // 1 when we need to keep this process in memory even after it exited
528         int keeploops;                                  // increases by 1 every time keep is 1 and updated 0
529         int updated;                                    // 1 when the process is currently running
530         int merged;                                             // 1 when it has been merged to its parent
531         int new_entry;                                  // 1 when this is a new process, just saw for the first time
532         int read;                                               // 1 when we have already read this process for this iteration
533         int sortlist;                                   // higher numbers = top on the process tree
534                                                                         // each process gets a unique number
535
536         struct target *target;                  // app_groups.conf targets
537         struct target *user_target;             // uid based targets
538         struct target *group_target;    // gid based targets
539
540         unsigned long long stat_collected_usec;
541         unsigned long long last_stat_collected_usec;
542
543         unsigned long long io_collected_usec;
544         unsigned long long last_io_collected_usec;
545
546         char *stat_filename;
547         char *statm_filename;
548         char *io_filename;
549         char *cmdline_filename;
550
551         struct pid_stat *parent;
552         struct pid_stat *prev;
553         struct pid_stat *next;
554 } *root_of_pids = NULL, **all_pids;
555
556 long all_pids_count = 0;
557
558 struct pid_stat *get_pid_entry(pid_t pid) {
559         if(all_pids[pid]) {
560                 all_pids[pid]->new_entry = 0;
561                 return all_pids[pid];
562         }
563
564         all_pids[pid] = calloc(sizeof(struct pid_stat), 1);
565         if(!all_pids[pid]) {
566                 error("Cannot allocate %zu bytes of memory", (size_t)sizeof(struct pid_stat));
567                 return NULL;
568         }
569
570         all_pids[pid]->fds = calloc(sizeof(int), 100);
571         if(!all_pids[pid]->fds)
572                 error("Cannot allocate %zu bytes of memory", (size_t)(sizeof(int) * 100));
573         else all_pids[pid]->fds_size = 100;
574
575         if(root_of_pids) root_of_pids->prev = all_pids[pid];
576         all_pids[pid]->next = root_of_pids;
577         root_of_pids = all_pids[pid];
578
579         all_pids[pid]->pid = pid;
580         all_pids[pid]->new_entry = 1;
581
582         all_pids_count++;
583
584         return all_pids[pid];
585 }
586
587 void del_pid_entry(pid_t pid) {
588         if(!all_pids[pid]) {
589                 error("attempted to free pid %d that is not allocated.", pid);
590                 return;
591         }
592
593         if(unlikely(debug))
594                 fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm);
595
596         if(root_of_pids == all_pids[pid]) root_of_pids = all_pids[pid]->next;
597         if(all_pids[pid]->next) all_pids[pid]->next->prev = all_pids[pid]->prev;
598         if(all_pids[pid]->prev) all_pids[pid]->prev->next = all_pids[pid]->next;
599
600         if(all_pids[pid]->fds) free(all_pids[pid]->fds);
601         if(all_pids[pid]->stat_filename) free(all_pids[pid]->stat_filename);
602         if(all_pids[pid]->statm_filename) free(all_pids[pid]->statm_filename);
603         if(all_pids[pid]->io_filename) free(all_pids[pid]->io_filename);
604         if(all_pids[pid]->cmdline_filename) free(all_pids[pid]->cmdline_filename);
605         free(all_pids[pid]);
606
607         all_pids[pid] = NULL;
608         all_pids_count--;
609 }
610
611
612 // ----------------------------------------------------------------------------
613 // update pids from proc
614
615 int read_proc_pid_cmdline(struct pid_stat *p) {
616         
617         if(unlikely(!p->cmdline_filename)) {
618                 char filename[FILENAME_MAX + 1];
619                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", host_prefix, p->pid);
620                 if(!(p->cmdline_filename = strdup(filename)))
621                         fatal("Cannot allocate memory for filename '%s'", filename);
622         }
623
624         int fd = open(p->cmdline_filename, O_RDONLY, 0666);
625         if(unlikely(fd == -1)) goto cleanup;
626
627         int i, bytes = read(fd, p->cmdline, MAX_CMDLINE);
628         close(fd);
629
630         if(unlikely(bytes <= 0)) goto cleanup;
631
632         p->cmdline[bytes] = '\0';
633         for(i = 0; i < bytes ; i++)
634                 if(unlikely(!p->cmdline[i])) p->cmdline[i] = ' ';
635
636         if(unlikely(debug))
637                 fprintf(stderr, "Read file '%s' contents: %s\n", p->cmdline_filename, p->cmdline);
638
639         return 0;
640
641 cleanup:
642         // copy the command to the command line
643         strncpyz(p->cmdline, p->comm, MAX_CMDLINE);
644         return 0;
645 }
646
647 int read_proc_pid_ownership(struct pid_stat *p) {
648         if(unlikely(!p->stat_filename)) {
649                 error("pid %d does not have a stat_filename", p->pid);
650                 return 1;
651         }
652
653         // ----------------------------------------
654         // read uid and gid
655
656         struct stat st;
657         if(stat(p->stat_filename, &st) != 0) {
658                 error("Cannot stat file '%s'", p->stat_filename);
659                 return 1;
660         }
661
662         p->uid = st.st_uid;
663         p->gid = st.st_gid;
664
665         return 0;
666 }
667
668 int read_proc_pid_stat(struct pid_stat *p) {
669         static procfile *ff = NULL;
670
671         if(unlikely(!p->stat_filename)) {
672                 char filename[FILENAME_MAX + 1];
673                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", host_prefix, p->pid);
674                 if(!(p->stat_filename = strdup(filename)))
675                         fatal("Cannot allocate memory for filename '%s'", filename);
676         }
677
678         int set_quotes = (!ff)?1:0;
679
680         ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
681         if(unlikely(!ff)) goto cleanup;
682
683         // if(set_quotes) procfile_set_quotes(ff, "()");
684         if(set_quotes) procfile_set_open_close(ff, "(", ")");
685
686         ff = procfile_readall(ff);
687         if(unlikely(!ff)) goto cleanup;
688
689         p->last_stat_collected_usec = p->stat_collected_usec;
690         p->stat_collected_usec = timems();
691         file_counter++;
692
693         // parse the process name
694         unsigned int i = 0;
695         strncpyz(p->comm, procfile_lineword(ff, 0, 1), MAX_COMPARE_NAME);
696
697         // p->pid                       = atol(procfile_lineword(ff, 0, 0+i));
698         // comm is at 1
699         // p->state                     = *(procfile_lineword(ff, 0, 2+i));
700         p->ppid                         = (int32_t) atol(procfile_lineword(ff, 0, 3 + i));
701         // p->pgrp                      = atol(procfile_lineword(ff, 0, 4+i));
702         // p->session           = atol(procfile_lineword(ff, 0, 5+i));
703         // p->tty_nr            = atol(procfile_lineword(ff, 0, 6+i));
704         // p->tpgid                     = atol(procfile_lineword(ff, 0, 7+i));
705         // p->flags                     = strtoull(procfile_lineword(ff, 0, 8+i), NULL, 10);
706
707         unsigned long long last;
708
709         last = p->minflt_raw;
710         p->minflt_raw           = strtoull(procfile_lineword(ff, 0, 9+i), NULL, 10);
711         p->minflt = (p->minflt_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
712
713         last = p->cminflt_raw;
714         p->cminflt_raw          = strtoull(procfile_lineword(ff, 0, 10+i), NULL, 10);
715         p->cminflt = (p->cminflt_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
716
717         last = p->majflt_raw;
718         p->majflt_raw           = strtoull(procfile_lineword(ff, 0, 11+i), NULL, 10);
719         p->majflt = (p->majflt_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
720
721         last = p->cmajflt_raw;
722         p->cmajflt_raw          = strtoull(procfile_lineword(ff, 0, 12+i), NULL, 10);
723         p->cmajflt = (p->cmajflt_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
724
725         last = p->utime_raw;
726         p->utime_raw            = strtoull(procfile_lineword(ff, 0, 13+i), NULL, 10);
727         p->utime = (p->utime_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
728
729         last = p->stime_raw;
730         p->stime_raw            = strtoull(procfile_lineword(ff, 0, 14+i), NULL, 10);
731         p->stime = (p->stime_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
732
733         last = p->cutime_raw;
734         p->cutime_raw           = strtoull(procfile_lineword(ff, 0, 15+i), NULL, 10);
735         p->cutime = (p->cutime_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
736
737         last = p->cstime_raw;
738         p->cstime_raw           = strtoull(procfile_lineword(ff, 0, 16+i), NULL, 10);
739         p->cstime = (p->cstime_raw - last) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
740
741         // p->priority          = strtoull(procfile_lineword(ff, 0, 17+i), NULL, 10);
742         // p->nice                      = strtoull(procfile_lineword(ff, 0, 18+i), NULL, 10);
743         p->num_threads          = (int32_t) atol(procfile_lineword(ff, 0, 19 + i));
744         // p->itrealvalue       = strtoull(procfile_lineword(ff, 0, 20+i), NULL, 10);
745         // p->starttime         = strtoull(procfile_lineword(ff, 0, 21+i), NULL, 10);
746         // p->vsize                     = strtoull(procfile_lineword(ff, 0, 22+i), NULL, 10);
747         p->rss                          = strtoull(procfile_lineword(ff, 0, 23+i), NULL, 10);
748         // p->rsslim            = strtoull(procfile_lineword(ff, 0, 24+i), NULL, 10);
749         // p->starcode          = strtoull(procfile_lineword(ff, 0, 25+i), NULL, 10);
750         // p->endcode           = strtoull(procfile_lineword(ff, 0, 26+i), NULL, 10);
751         // p->startstack        = strtoull(procfile_lineword(ff, 0, 27+i), NULL, 10);
752         // p->kstkesp           = strtoull(procfile_lineword(ff, 0, 28+i), NULL, 10);
753         // p->kstkeip           = strtoull(procfile_lineword(ff, 0, 29+i), NULL, 10);
754         // p->signal            = strtoull(procfile_lineword(ff, 0, 30+i), NULL, 10);
755         // p->blocked           = strtoull(procfile_lineword(ff, 0, 31+i), NULL, 10);
756         // p->sigignore         = strtoull(procfile_lineword(ff, 0, 32+i), NULL, 10);
757         // p->sigcatch          = strtoull(procfile_lineword(ff, 0, 33+i), NULL, 10);
758         // p->wchan                     = strtoull(procfile_lineword(ff, 0, 34+i), NULL, 10);
759         // p->nswap                     = strtoull(procfile_lineword(ff, 0, 35+i), NULL, 10);
760         // p->cnswap            = strtoull(procfile_lineword(ff, 0, 36+i), NULL, 10);
761         // p->exit_signal       = atol(procfile_lineword(ff, 0, 37+i));
762         // p->processor         = atol(procfile_lineword(ff, 0, 38+i));
763         // p->rt_priority       = strtoul(procfile_lineword(ff, 0, 39+i), NULL, 10);
764         // p->policy            = strtoul(procfile_lineword(ff, 0, 40+i), NULL, 10);
765         // p->delayacct_blkio_ticks             = strtoull(procfile_lineword(ff, 0, 41+i), NULL, 10);
766         // p->guest_time        = strtoull(procfile_lineword(ff, 0, 42+i), NULL, 10);
767         // p->cguest_time       = strtoull(procfile_lineword(ff, 0, 43), NULL, 10);
768
769         if(unlikely(debug || (p->target && p->target->debug)))
770                 fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads);
771
772         if(unlikely(global_iterations_counter == 1)) {
773                 p->minflt                       = 0;
774                 p->cminflt                      = 0;
775                 p->majflt                       = 0;
776                 p->cmajflt                      = 0;
777                 p->utime                        = 0;
778                 p->stime                        = 0;
779                 p->cutime                       = 0;
780                 p->cstime                       = 0;
781         }
782
783         return 0;
784
785 cleanup:
786         p->minflt                       = 0;
787         p->cminflt                      = 0;
788         p->majflt                       = 0;
789         p->cmajflt                      = 0;
790         p->utime                        = 0;
791         p->stime                        = 0;
792         p->cutime                       = 0;
793         p->cstime                       = 0;
794         p->num_threads          = 0;
795         p->rss                          = 0;
796         return 1;
797 }
798
799 int read_proc_pid_statm(struct pid_stat *p) {
800         static procfile *ff = NULL;
801
802         if(unlikely(!p->statm_filename)) {
803                 char filename[FILENAME_MAX + 1];
804                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", host_prefix, p->pid);
805                 if(!(p->statm_filename = strdup(filename)))
806                         fatal("Cannot allocate memory for filename '%s'", filename);
807         }
808
809         ff = procfile_reopen(ff, p->statm_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
810         if(unlikely(!ff)) goto cleanup;
811
812         ff = procfile_readall(ff);
813         if(unlikely(!ff)) goto cleanup;
814
815         file_counter++;
816
817         p->statm_size                   = strtoull(procfile_lineword(ff, 0, 0), NULL, 10);
818         p->statm_resident               = strtoull(procfile_lineword(ff, 0, 1), NULL, 10);
819         p->statm_share                  = strtoull(procfile_lineword(ff, 0, 2), NULL, 10);
820         p->statm_text                   = strtoull(procfile_lineword(ff, 0, 3), NULL, 10);
821         p->statm_lib                    = strtoull(procfile_lineword(ff, 0, 4), NULL, 10);
822         p->statm_data                   = strtoull(procfile_lineword(ff, 0, 5), NULL, 10);
823         p->statm_dirty                  = strtoull(procfile_lineword(ff, 0, 6), NULL, 10);
824
825         return 0;
826
827 cleanup:
828         p->statm_size                   = 0;
829         p->statm_resident               = 0;
830         p->statm_share                  = 0;
831         p->statm_text                   = 0;
832         p->statm_lib                    = 0;
833         p->statm_data                   = 0;
834         p->statm_dirty                  = 0;
835         return 1;
836 }
837
838 int read_proc_pid_io(struct pid_stat *p) {
839         static procfile *ff = NULL;
840
841         if(unlikely(!p->io_filename)) {
842                 char filename[FILENAME_MAX + 1];
843                 snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", host_prefix, p->pid);
844                 if(!(p->io_filename = strdup(filename)))
845                         fatal("Cannot allocate memory for filename '%s'", filename);
846         }
847
848         // open the file
849         ff = procfile_reopen(ff, p->io_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
850         if(unlikely(!ff)) goto cleanup;
851
852         ff = procfile_readall(ff);
853         if(unlikely(!ff)) goto cleanup;
854
855         file_counter++;
856
857         p->last_io_collected_usec = p->io_collected_usec;
858         p->io_collected_usec = timems();
859
860         unsigned long long last;
861
862         last = p->io_logical_bytes_read_raw;
863         p->io_logical_bytes_read_raw = strtoull(procfile_lineword(ff, 0, 1), NULL, 10);
864         p->io_logical_bytes_read = (p->io_logical_bytes_read_raw - last) * (1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
865
866         last = p->io_logical_bytes_written_raw;
867         p->io_logical_bytes_written_raw = strtoull(procfile_lineword(ff, 1, 1), NULL, 10);
868         p->io_logical_bytes_written = (p->io_logical_bytes_written_raw - last) * (1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
869
870         last = p->io_read_calls_raw;
871         p->io_read_calls_raw = strtoull(procfile_lineword(ff, 2, 1), NULL, 10);
872         p->io_read_calls = (p->io_read_calls_raw - last) * (1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
873
874         last = p->io_write_calls_raw;
875         p->io_write_calls_raw = strtoull(procfile_lineword(ff, 3, 1), NULL, 10);
876         p->io_write_calls = (p->io_write_calls_raw - last) * (1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
877
878         last = p->io_storage_bytes_read_raw;
879         p->io_storage_bytes_read_raw = strtoull(procfile_lineword(ff, 4, 1), NULL, 10);
880         p->io_storage_bytes_read = (p->io_storage_bytes_read_raw - last) * (1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
881
882         last = p->io_storage_bytes_written_raw;
883         p->io_storage_bytes_written_raw = strtoull(procfile_lineword(ff, 5, 1), NULL, 10);
884         p->io_storage_bytes_written = (p->io_storage_bytes_written_raw - last) * (1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
885
886         last = p->io_cancelled_write_bytes_raw;
887         p->io_cancelled_write_bytes_raw = strtoull(procfile_lineword(ff, 6, 1), NULL, 10);
888         p->io_cancelled_write_bytes = (p->io_cancelled_write_bytes_raw - last) * (1000000 * 100) / (p->io_collected_usec - p->last_io_collected_usec);
889
890         if(unlikely(global_iterations_counter == 1)) {
891                 p->io_logical_bytes_read                = 0;
892                 p->io_logical_bytes_written     = 0;
893                 p->io_read_calls                                = 0;
894                 p->io_write_calls                               = 0;
895                 p->io_storage_bytes_read                = 0;
896                 p->io_storage_bytes_written     = 0;
897                 p->io_cancelled_write_bytes             = 0;
898         }
899
900         return 0;
901
902 cleanup:
903         p->io_logical_bytes_read                = 0;
904         p->io_logical_bytes_written     = 0;
905         p->io_read_calls                                = 0;
906         p->io_write_calls                               = 0;
907         p->io_storage_bytes_read                = 0;
908         p->io_storage_bytes_written     = 0;
909         p->io_cancelled_write_bytes             = 0;
910         return 1;
911 }
912
913
914 // ----------------------------------------------------------------------------
915 // file descriptor
916 // this is used to keep a global list of all open files of the system
917 // it is needed in order to calculate the unique files processes have open
918
919 #define FILE_DESCRIPTORS_INCREASE_STEP 100
920
921 struct file_descriptor {
922         avl avl;
923 #ifdef NETDATA_INTERNAL_CHECKS
924         uint32_t magic;
925 #endif /* NETDATA_INTERNAL_CHECKS */
926         uint32_t hash;
927         const char *name;
928         int type;
929         int count;
930         int pos;
931 } *all_files = NULL;
932
933 int all_files_len = 0;
934 int all_files_size = 0;
935
936 int file_descriptor_compare(void* a, void* b) {
937 #ifdef NETDATA_INTERNAL_CHECKS
938         if(((struct file_descriptor *)a)->magic != 0x0BADCAFE || ((struct file_descriptor *)b)->magic != 0x0BADCAFE)
939                 error("Corrupted index data detected. Please report this.");
940 #endif /* NETDATA_INTERNAL_CHECKS */
941
942         if(((struct file_descriptor *)a)->hash < ((struct file_descriptor *)b)->hash)
943                 return -1;
944
945         else if(((struct file_descriptor *)a)->hash > ((struct file_descriptor *)b)->hash)
946                 return 1;
947
948         else
949                 return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name);
950 }
951
952 int file_descriptor_iterator(avl *a) { if(a) {}; return 0; }
953
954 avl_tree all_files_index = {
955                 NULL,
956                 file_descriptor_compare
957 };
958
959 static struct file_descriptor *file_descriptor_find(const char *name, uint32_t hash) {
960         struct file_descriptor tmp;
961         tmp.hash = (hash)?hash:simple_hash(name);
962         tmp.name = name;
963         tmp.count = 0;
964         tmp.pos = 0;
965 #ifdef NETDATA_INTERNAL_CHECKS
966         tmp.magic = 0x0BADCAFE;
967 #endif /* NETDATA_INTERNAL_CHECKS */
968
969         return (struct file_descriptor *)avl_search(&all_files_index, (avl *) &tmp);
970 }
971
972 #define file_descriptor_add(fd) avl_insert(&all_files_index, (avl *)(fd))
973 #define file_descriptor_remove(fd) avl_remove(&all_files_index, (avl *)(fd))
974
975 #define FILETYPE_OTHER 0
976 #define FILETYPE_FILE 1
977 #define FILETYPE_PIPE 2
978 #define FILETYPE_SOCKET 3
979 #define FILETYPE_INOTIFY 4
980 #define FILETYPE_EVENTFD 5
981 #define FILETYPE_EVENTPOLL 6
982 #define FILETYPE_TIMERFD 7
983 #define FILETYPE_SIGNALFD 8
984
985 void file_descriptor_not_used(int id)
986 {
987         if(id > 0 && id < all_files_size) {
988
989 #ifdef NETDATA_INTERNAL_CHECKS
990                 if(all_files[id].magic != 0x0BADCAFE) {
991                         error("Ignoring request to remove empty file id %d.", id);
992                         return;
993                 }
994 #endif /* NETDATA_INTERNAL_CHECKS */
995
996                 if(unlikely(debug))
997                         fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count);
998
999                 if(all_files[id].count > 0) {
1000                         all_files[id].count--;
1001
1002                         if(!all_files[id].count) {
1003                                 if(unlikely(debug))
1004                                         fprintf(stderr, "apps.plugin:   >> slot %d is empty.\n", id);
1005
1006                                 file_descriptor_remove(&all_files[id]);
1007 #ifdef NETDATA_INTERNAL_CHECKS
1008                                 all_files[id].magic = 0x00000000;
1009 #endif /* NETDATA_INTERNAL_CHECKS */
1010                                 all_files_len--;
1011                         }
1012                 }
1013                 else
1014                         error("Request to decrease counter of fd %d (%s), while the use counter is 0", id, all_files[id].name);
1015         }
1016         else    error("Request to decrease counter of fd %d, which is outside the array size (1 to %d)", id, all_files_size);
1017 }
1018
1019 int file_descriptor_find_or_add(const char *name)
1020 {
1021         static int last_pos = 0;
1022         uint32_t hash = simple_hash(name);
1023
1024         if(unlikely(debug))
1025                 fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash);
1026
1027         struct file_descriptor *fd = file_descriptor_find(name, hash);
1028         if(fd) {
1029                 // found
1030                 if(unlikely(debug))
1031                         fprintf(stderr, "apps.plugin:   >> found on slot %d\n", fd->pos);
1032
1033                 fd->count++;
1034                 return fd->pos;
1035         }
1036         // not found
1037
1038         // check we have enough memory to add it
1039         if(!all_files || all_files_len == all_files_size) {
1040                 void *old = all_files;
1041                 int i;
1042
1043                 // there is no empty slot
1044                 if(unlikely(debug))
1045                         fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP);
1046
1047                 all_files = realloc(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor));
1048
1049                 // if the address changed, we have to rebuild the index
1050                 // since all pointers are now invalid
1051                 if(old && old != (void *)all_files) {
1052                         if(unlikely(debug))
1053                                 fprintf(stderr, "apps.plugin:   >> re-indexing.\n");
1054
1055                         all_files_index.root = NULL;
1056                         for(i = 0; i < all_files_size; i++) {
1057                                 if(!all_files[i].count) continue;
1058                                 file_descriptor_add(&all_files[i]);
1059                         }
1060
1061                         if(unlikely(debug))
1062                                 fprintf(stderr, "apps.plugin:   >> re-indexing done.\n");
1063                 }
1064
1065                 for(i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) {
1066                         all_files[i].count = 0;
1067                         all_files[i].name = NULL;
1068 #ifdef NETDATA_INTERNAL_CHECKS
1069                         all_files[i].magic = 0x00000000;
1070 #endif /* NETDATA_INTERNAL_CHECKS */
1071                         all_files[i].pos = i;
1072                 }
1073
1074                 if(!all_files_size) all_files_len = 1;
1075                 all_files_size += FILE_DESCRIPTORS_INCREASE_STEP;
1076         }
1077
1078         if(unlikely(debug))
1079                 fprintf(stderr, "apps.plugin:   >> searching for empty slot.\n");
1080
1081         // search for an empty slot
1082         int i, c;
1083         for(i = 0, c = last_pos ; i < all_files_size ; i++, c++) {
1084                 if(c >= all_files_size) c = 0;
1085                 if(c == 0) continue;
1086
1087                 if(!all_files[c].count) {
1088                         if(unlikely(debug))
1089                                 fprintf(stderr, "apps.plugin:   >> Examining slot %d.\n", c);
1090
1091 #ifdef NETDATA_INTERNAL_CHECKS
1092                         if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash))
1093                                 error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name);
1094 #endif /* NETDATA_INTERNAL_CHECKS */
1095
1096                         if(unlikely(debug))
1097                                 fprintf(stderr, "apps.plugin:   >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name);
1098
1099                         if(all_files[c].name) free((void *)all_files[c].name);
1100                         all_files[c].name = NULL;
1101                         last_pos = c;
1102                         break;
1103                 }
1104         }
1105         if(i == all_files_size) {
1106                 fatal("We should find an empty slot, but there isn't any");
1107                 exit(1);
1108         }
1109
1110         if(unlikely(debug))
1111                 fprintf(stderr, "apps.plugin:   >> updating slot %d.\n", c);
1112
1113         all_files_len++;
1114
1115         // else we have an empty slot in 'c'
1116
1117         int type;
1118         if(name[0] == '/') type = FILETYPE_FILE;
1119         else if(strncmp(name, "pipe:", 5) == 0) type = FILETYPE_PIPE;
1120         else if(strncmp(name, "socket:", 7) == 0) type = FILETYPE_SOCKET;
1121         else if(strcmp(name, "anon_inode:inotify") == 0 || strcmp(name, "inotify") == 0) type = FILETYPE_INOTIFY;
1122         else if(strcmp(name, "anon_inode:[eventfd]") == 0) type = FILETYPE_EVENTFD;
1123         else if(strcmp(name, "anon_inode:[eventpoll]") == 0) type = FILETYPE_EVENTPOLL;
1124         else if(strcmp(name, "anon_inode:[timerfd]") == 0) type = FILETYPE_TIMERFD;
1125         else if(strcmp(name, "anon_inode:[signalfd]") == 0) type = FILETYPE_SIGNALFD;
1126         else if(strncmp(name, "anon_inode:", 11) == 0) {
1127                 if(unlikely(debug))
1128                         fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name);
1129
1130                 type = FILETYPE_OTHER;
1131         }
1132         else {
1133                 if(unlikely(debug))
1134                         fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name);
1135
1136                 type = FILETYPE_OTHER;
1137         }
1138
1139         all_files[c].name = strdup(name);
1140         all_files[c].hash = hash;
1141         all_files[c].type = type;
1142         all_files[c].pos  = c;
1143         all_files[c].count = 1;
1144 #ifdef NETDATA_INTERNAL_CHECKS
1145         all_files[c].magic = 0x0BADCAFE;
1146 #endif /* NETDATA_INTERNAL_CHECKS */
1147         file_descriptor_add(&all_files[c]);
1148
1149         if(unlikely(debug))
1150                 fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name);
1151
1152         return c;
1153 }
1154
1155 int read_pid_file_descriptors(struct pid_stat *p) {
1156         char dirname[FILENAME_MAX+1];
1157
1158         snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", host_prefix, p->pid);
1159         DIR *fds = opendir(dirname);
1160         if(fds) {
1161                 int c;
1162                 struct dirent *de;
1163                 char fdname[FILENAME_MAX + 1];
1164                 char linkname[FILENAME_MAX + 1];
1165
1166                 // make the array negative
1167                 for(c = 0 ; c < p->fds_size ; c++)
1168                         p->fds[c] = -p->fds[c];
1169
1170                 while((de = readdir(fds))) {
1171                         if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1172                                 continue;
1173
1174                         // check if the fds array is small
1175                         int fdid = atoi(de->d_name);
1176                         if(fdid < 0) continue;
1177                         if(fdid >= p->fds_size) {
1178                                 // it is small, extend it
1179                                 if(unlikely(debug))
1180                                         fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100);
1181
1182                                 p->fds = realloc(p->fds, (fdid + 100) * sizeof(int));
1183                                 if(!p->fds) {
1184                                         fatal("Cannot re-allocate fds for %s", p->comm);
1185                                         break;
1186                                 }
1187
1188                                 // and initialize it
1189                                 for(c = p->fds_size ; c < (fdid + 100) ; c++) p->fds[c] = 0;
1190                                 p->fds_size = fdid + 100;
1191                         }
1192
1193                         if(p->fds[fdid] == 0) {
1194                                 // we don't know this fd, get it
1195
1196                                 sprintf(fdname, "%s/proc/%d/fd/%s", host_prefix, p->pid, de->d_name);
1197                                 ssize_t l = readlink(fdname, linkname, FILENAME_MAX);
1198                                 if(l == -1) {
1199                                         if(debug || (p->target && p->target->debug)) {
1200                                                 if(debug || (p->target && p->target->debug))
1201                                                         error("Cannot read link %s", fdname);
1202                                         }
1203                                         continue;
1204                                 }
1205                                 linkname[l] = '\0';
1206                                 file_counter++;
1207
1208                                 // if another process already has this, we will get
1209                                 // the same id
1210                                 p->fds[fdid] = file_descriptor_find_or_add(linkname);
1211                         }
1212
1213                         // else make it positive again, we need it
1214                         // of course, the actual file may have changed, but we don't care so much
1215                         // FIXME: we could compare the inode as returned by readdir direct structure
1216                         else p->fds[fdid] = -p->fds[fdid];
1217                 }
1218                 closedir(fds);
1219
1220                 // remove all the negative file descriptors
1221                 for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] < 0) {
1222                         file_descriptor_not_used(-p->fds[c]);
1223                         p->fds[c] = 0;
1224                 }
1225         }
1226         else return 1;
1227
1228         return 0;
1229 }
1230
1231 // ----------------------------------------------------------------------------
1232
1233 #ifdef NETDATA_INTERNAL_CHECKS
1234 void find_lost_child_debug(struct pid_stat *pe, struct pid_stat *ppe, unsigned long long lost, int type) {
1235         int found = 0;
1236         struct pid_stat *p = NULL, *pp = pe->parent;
1237
1238         log_date(stderr);
1239         fprintf(stderr, "Searching for candidate of lost resources of process %d (%s, %s) which is aggregated on %d (%s, %s)\n", pe->pid, pe->comm, pe->updated?"running":"exited", ppe->pid, ppe->comm, ppe->updated?"running":"exited");
1240         while(pp) {
1241                 fprintf(stderr, " >> parent %d (%s, %s)\n", pp->pid, pp->comm, pp->updated?"running":"exited");
1242                 pp = pp->parent;
1243         }
1244
1245         for(p = root_of_pids; p ; p = p->next) {
1246                 if(p == pe) continue;
1247
1248                 switch(type) {
1249                         case 1:
1250                                 if(p->cminflt > lost) {
1251                                         fprintf(stderr, " > process %d (%s) could use the lost exited child minflt %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1252                                         found++;
1253                                 }
1254                                 break;
1255                                 
1256                         case 2:
1257                                 if(p->cmajflt > lost) {
1258                                         fprintf(stderr, " > process %d (%s) could use the lost exited child majflt %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1259                                         found++;
1260                                 }
1261                                 break;
1262                                 
1263                         case 3:
1264                                 if(p->cutime > lost) {
1265                                         fprintf(stderr, " > process %d (%s) could use the lost exited child utime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1266                                         found++;
1267                                 }
1268                                 break;
1269                                 
1270                         case 4:
1271                                 if(p->cstime > lost) {
1272                                         fprintf(stderr, " > process %d (%s) could use the lost exited child stime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm);
1273                                         found++;
1274                                 }
1275                                 break;
1276                 }
1277         }
1278
1279         if(!found) {
1280                 switch(type) {
1281                         case 1:
1282                                 fprintf(stderr, " > cannot find any process to use the lost exited child minflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1283                                 break;
1284                                 
1285                         case 2:
1286                                 fprintf(stderr, " > cannot find any process to use the lost exited child majflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1287                                 break;
1288                                 
1289                         case 3:
1290                                 fprintf(stderr, " > cannot find any process to use the lost exited child utime %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1291                                 break;
1292                                 
1293                         case 4:
1294                                 fprintf(stderr, " > cannot find any process to use the lost exited child stime %llu of process %d (%s)\n", lost, pe->pid, pe->comm);
1295                                 break;
1296                 }
1297         }
1298 }
1299 #endif /* NETDATA_INTERNAL_CHECKS */
1300
1301 void remove_exited_child_from_parent(unsigned long long *field, unsigned long long *pfield, unsigned long long *ifield, struct pid_stat *pe, struct pid_stat *ppe, int type) {
1302         if(pfield) {
1303                 if(*field > *pfield) {
1304                         *field -= *pfield;
1305                         *pfield = 0;
1306                 }
1307                 else {
1308                         *pfield -= *field;
1309                         *field = 0;
1310                 }
1311         }
1312
1313         if(*field) {
1314                 if(ifield && ifield != pfield) {
1315                         if(*field > *ifield) {
1316                                 *field -= *ifield;
1317                                 *ifield = 0;
1318                         }
1319                         else {
1320                                 *ifield -= *field;
1321                                 *field = 0;
1322                         }
1323                 }
1324         }
1325
1326         if(*field) {
1327 #ifdef NETDATA_INTERNAL_CHECKS
1328                 find_lost_child_debug(pe, ppe, *field, type);
1329 #endif
1330                 while(pe && !pe->updated) {
1331                         pe->keep = 1;
1332                         pe = pe->parent;
1333                 }
1334         }
1335 }
1336
1337 void process_exited_processes() {
1338         struct pid_stat *init = all_pids[1];
1339         struct pid_stat *p;
1340
1341         for(p = root_of_pids; p ; p = p->next) {
1342                 if(p->updated || !p->stat_collected_usec) continue;
1343
1344                 struct pid_stat *pp = p->parent;
1345
1346                 // find the first parent that is running
1347                 while(pp && !pp->updated)
1348                         pp = pp->parent;
1349                 
1350                 unsigned long long rate;
1351
1352                 rate = (p->utime_raw + p->cutime_raw) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1353                 remove_exited_child_from_parent(&rate,  (pp)?&pp->cutime:NULL,  (init)?&init->cutime:NULL, p, pp, 3);
1354                 p->cutime_raw = 0;
1355                 p->utime_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (1000000 * 100);
1356
1357                 rate = (p->stime_raw + p->cstime_raw) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1358                 remove_exited_child_from_parent(&rate,  (pp)?&pp->cstime:NULL,  (init)?&init->cstime:NULL, p, pp, 4);
1359                 p->cstime_raw = 0;
1360                 p->stime_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (1000000 * 100);
1361
1362                 rate = (p->minflt_raw + p->cminflt_raw) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1363                 remove_exited_child_from_parent(&rate, (pp)?&pp->cminflt:NULL, (init)?&init->cminflt:NULL, p, pp, 1);
1364                 p->cminflt_raw = 0;
1365                 p->minflt_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (1000000 * 100);
1366
1367                 rate = (p->majflt_raw + p->cmajflt_raw) * (1000000 * 100) / (p->stat_collected_usec - p->last_stat_collected_usec);
1368                 remove_exited_child_from_parent(&rate, (pp)?&pp->cmajflt:NULL, (init)?&init->cmajflt:NULL, p, pp, 2);
1369                 p->cmajflt_raw = 0;
1370                 p->majflt_raw = rate * (p->stat_collected_usec - p->last_stat_collected_usec) / (1000000 * 100);
1371         }
1372 }
1373
1374 void link_all_processes_to_their_parents(void) {
1375         struct pid_stat *p = NULL;
1376
1377         // link all children to their parents
1378         // and update children count on parents
1379         for(p = root_of_pids; p ; p = p->next) {
1380                 // for each process found running
1381
1382                 if(likely(p->ppid > 0 && all_pids[p->ppid])) {
1383                         // valid parent processes
1384
1385                         struct pid_stat *pp;
1386
1387                         p->parent = pp = all_pids[p->ppid];
1388                         p->parent->children_count++;
1389
1390                         if(unlikely(debug || (p->target && p->target->debug)))
1391                                 fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cminflt, pp->cmajflt);
1392                 }
1393                 else if(unlikely(p->ppid != 0))
1394                         error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid);
1395
1396                 p->sortlist = 0;
1397         }
1398 }
1399
1400 // ----------------------------------------------------------------------------
1401
1402 // 1. read all files in /proc
1403 // 2. for each numeric directory:
1404 //    i.   read /proc/pid/stat
1405 //    ii.  read /proc/pid/statm
1406 //    iii. read /proc/pid/io (requires root access)
1407 //    iii. read the entries in directory /proc/pid/fd (requires root access)
1408 //         for each entry:
1409 //         a. find or create a struct file_descriptor
1410 //         b. cleanup any old/unused file_descriptors
1411
1412 // after all these, some pids may be linked to targets, while others may not
1413
1414 // in case of errors, only 1 every 1000 errors is printed
1415 // to avoid filling up all disk space
1416 // if debug is enabled, all errors are printed
1417
1418 static int compar_pid(const void *pid1, const void *pid2) {
1419
1420         struct pid_stat *p1 = all_pids[*((pid_t *)pid1)];
1421         struct pid_stat *p2 = all_pids[*((pid_t *)pid2)];
1422
1423         if(p1->sortlist > p2->sortlist)
1424                 return -1;
1425         else
1426                 return 1;
1427 }
1428
1429 void collect_data_for_pid(pid_t pid) {
1430         if(unlikely(pid <= 0 || pid > pid_max)) {
1431                 error("Invalid pid %d read (expected 1 to %d). Ignoring process.", pid, pid_max);
1432                 return;
1433         }
1434
1435         struct pid_stat *p = get_pid_entry(pid);
1436         if(unlikely(!p || p->read)) return;
1437         p->read             = 1;
1438
1439         // fprintf(stderr, "Reading process %d (%s), sortlist %d\n", p->pid, p->comm, p->sortlist);
1440
1441         // --------------------------------------------------------------------
1442         // /proc/<pid>/stat
1443
1444         if(unlikely(read_proc_pid_stat(p))) {
1445                 error("Cannot process %s/proc/%d/stat", host_prefix, pid);
1446                 // there is no reason to proceed if we cannot get its status
1447                 return;
1448         }
1449
1450         read_proc_pid_ownership(p);
1451
1452         // check its parent pid
1453         if(unlikely(p->ppid < 0 || p->ppid > pid_max)) {
1454                 error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid);
1455                 p->ppid = 0;
1456         }
1457
1458         // --------------------------------------------------------------------
1459         // /proc/<pid>/io
1460
1461         if(unlikely(read_proc_pid_io(p)))
1462                 error("Cannot process %s/proc/%d/io", host_prefix, pid);
1463
1464         // --------------------------------------------------------------------
1465         // /proc/<pid>/statm
1466
1467         if(unlikely(read_proc_pid_statm(p))) {
1468                 error("Cannot process %s/proc/%d/statm", host_prefix, pid);
1469                 // there is no reason to proceed if we cannot get its memory status
1470                 return;
1471         }
1472
1473         // --------------------------------------------------------------------
1474         // link it
1475
1476         // check if it is target
1477         // we do this only once, the first time this pid is loaded
1478         if(unlikely(p->new_entry)) {
1479                 // /proc/<pid>/cmdline
1480                 if(likely(proc_pid_cmdline_is_needed)) {
1481                         if(unlikely(read_proc_pid_cmdline(p)))
1482                                 error("Cannot process %s/proc/%d/cmdline", host_prefix, pid);
1483                 }
1484
1485                 if(unlikely(debug))
1486                         fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", pid, p->comm);
1487
1488                 uint32_t hash = simple_hash(p->comm);
1489                 size_t pclen  = strlen(p->comm);
1490
1491                 struct target *w;
1492                 for(w = apps_groups_root_target; w ; w = w->next) {
1493                         // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm);
1494
1495                         // find it - 4 cases:
1496                         // 1. the target is not a pattern
1497                         // 2. the target has the prefix
1498                         // 3. the target has the suffix
1499                         // 4. the target is something inside cmdline
1500                         if(     (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm))
1501                                || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen))
1502                                || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen]))
1503                                || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(p->cmdline, w->compare))
1504                                         ) {
1505                                 if(w->target) p->target = w->target;
1506                                 else p->target = w;
1507
1508                                 if(debug || (p->target && p->target->debug))
1509                                         fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name);
1510
1511                                 break;
1512                         }
1513                 }
1514         }
1515
1516         // --------------------------------------------------------------------
1517         // /proc/<pid>/fd
1518
1519         if(unlikely(read_pid_file_descriptors(p))) {
1520                 error("Cannot process entries in %s/proc/%d/fd", host_prefix, pid);
1521         }
1522
1523         // --------------------------------------------------------------------
1524         // done!
1525
1526 #ifdef NETDATA_INTERNAL_CHECKS
1527         if(unlikely(all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read))
1528                 fprintf(stderr, "Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read\n", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist);
1529 #endif
1530
1531         // mark it as updated
1532         p->updated = 1;
1533         p->keep = 0;
1534         p->keeploops = 0;
1535 }
1536
1537 int collect_data_for_all_processes_from_proc(void) {
1538         struct pid_stat *p = NULL;
1539
1540         if(all_pids_count) {
1541                 // read parents before childs
1542                 // this is needed to prevent a situation where
1543                 // a child is found running, but until we read
1544                 // its parent, it has exited and its parent
1545                 // has accumulated its resources
1546
1547                 long slc = 0;
1548                 for(p = root_of_pids; p ; p = p->next) {
1549                         p->read             = 0;
1550                         p->updated          = 0;
1551                         p->new_entry        = 0;
1552                         p->merged           = 0;
1553                         p->children_count   = 0;
1554                         p->parent           = NULL;
1555
1556 #ifdef NETDATA_INTERNAL_CHECKS
1557                         if(unlikely(slc >= all_pids_count))
1558                                 error("Internal error: I was thinking I had %ld processes in my arrays, but it seems there are more.", all_pids_count);
1559 #endif
1560                         all_pids_sortlist[slc++] = p->pid;
1561                 }
1562
1563                 qsort((void *)all_pids_sortlist, all_pids_count, sizeof(pid_t), compar_pid);
1564
1565                 for(slc = 0; slc < all_pids_count; slc++)
1566                         collect_data_for_pid(all_pids_sortlist[slc]);
1567         }
1568
1569         char dirname[FILENAME_MAX + 1];
1570
1571         snprintfz(dirname, FILENAME_MAX, "%s/proc", host_prefix);
1572         DIR *dir = opendir(dirname);
1573         if(!dir) return 0;
1574
1575         struct dirent *file = NULL;
1576
1577         while((file = readdir(dir))) {
1578                 char *endptr = file->d_name;
1579                 pid_t pid = (pid_t) strtoul(file->d_name, &endptr, 10);
1580
1581                 // make sure we read a valid number
1582                 if(unlikely(endptr == file->d_name || *endptr != '\0'))
1583                         continue;
1584
1585                 collect_data_for_pid(pid);
1586         }
1587         closedir(dir);
1588
1589         // normally this is done
1590         // however we may have processes exited while we collected values
1591         // so let's find the exited ones
1592         // we do this by collecting the ownership of process
1593         // if we manage to get the ownership, the process still runs
1594
1595         link_all_processes_to_their_parents();
1596         process_exited_processes();
1597
1598         return 1;
1599 }
1600
1601 // ----------------------------------------------------------------------------
1602 // update statistics on the targets
1603
1604 // 1. link all childs to their parents
1605 // 2. go from bottom to top, marking as merged all childs to their parents
1606 //    this step links all parents without a target to the child target, if any
1607 // 3. link all top level processes (the ones not merged) to the default target
1608 // 4. go from top to bottom, linking all childs without a target, to their parent target
1609 //    after this step, all processes have a target
1610 // [5. for each killed pid (updated = 0), remove its usage from its target]
1611 // 6. zero all apps_groups_targets
1612 // 7. concentrate all values on the apps_groups_targets
1613 // 8. remove all killed processes
1614 // 9. find the unique file count for each target
1615 // check: update_apps_groups_statistics()
1616
1617 void cleanup_exited_pids(void) {
1618         int c;
1619         struct pid_stat *p = NULL;
1620
1621         for(p = root_of_pids; p ;) {
1622                 if(!p->updated && (!p->keep || p->keeploops > 1)) {
1623 //                      fprintf(stderr, "\tEXITED %d %s [parent %d %s, target %s] utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->parent->pid, p->parent->comm, p->target->name,  p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
1624
1625 #ifdef NETDATA_INTERNAL_CHECKS
1626                         if(p->keep)
1627                                 fprintf(stderr, " > cannot keep exited process %d (%s) anymore - removing it.\n", p->pid, p->comm);
1628 #endif
1629
1630                         for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] > 0) {
1631                                 file_descriptor_not_used(p->fds[c]);
1632                                 p->fds[c] = 0;
1633                         }
1634
1635                         pid_t r = p->pid;
1636                         p = p->next;
1637                         del_pid_entry(r);
1638                 }
1639                 else {
1640                         if(unlikely(p->keep)) p->keeploops++;
1641                         p->keep = 0;
1642                         p = p->next;
1643                 }
1644         }
1645 }
1646
1647 void apply_apps_groups_targets_inheritance(void) {
1648         struct pid_stat *p = NULL;
1649
1650         // children that do not have a target
1651         // inherit their target from their parent
1652         int found = 1, loops = 0;
1653         while(found) {
1654                 if(unlikely(debug)) loops++;
1655                 found = 0;
1656                 for(p = root_of_pids; p ; p = p->next) {
1657                         // if this process does not have a target
1658                         // and it has a parent
1659                         // and its parent has a target
1660                         // then, set the parent's target to this process
1661                         if(unlikely(!p->target && p->parent && p->parent->target)) {
1662                                 p->target = p->parent->target;
1663                                 found++;
1664
1665                                 if(debug || (p->target && p->target->debug))
1666                                         fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
1667                         }
1668                 }
1669         }
1670
1671         // find all the procs with 0 childs and merge them to their parents
1672         // repeat, until nothing more can be done.
1673         int sortlist = 1;
1674         found = 1;
1675         while(found) {
1676                 if(unlikely(debug)) loops++;
1677                 found = 0;
1678
1679                 for(p = root_of_pids; p ; p = p->next) {
1680                         // if this process does not have any children
1681                         // and is not already merged
1682                         // and has a parent
1683                         // and its parent has children
1684                         // and the target of this process and its parent is the same, or the parent does not have a target
1685                         // and its parent is not init
1686                         // then, mark them as merged.
1687                         if(unlikely(
1688                                         !p->children_count
1689                                         && !p->merged
1690                                         && p->parent
1691                                         && p->parent->children_count
1692                                         && (p->target == p->parent->target || !p->parent->target)
1693                                         && p->ppid != 1
1694                                 )) {
1695                                 p->parent->children_count--;
1696                                 p->merged = 1;
1697
1698                                 // the parent inherits the child's target, if it does not have a target itself
1699                                 if(unlikely(p->target && !p->parent->target)) {
1700                                         p->parent->target = p->target;
1701
1702                                         if(debug || (p->target && p->target->debug))
1703                                                 fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).\n", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm);
1704                                 }
1705
1706                                 found++;
1707                         }
1708
1709                         // since this process does not have any childs
1710                         // assign it to the current sortlist
1711                         if(unlikely(!p->sortlist && !p->children_count))
1712                                 p->sortlist = sortlist++;
1713                 }
1714
1715                 if(unlikely(debug))
1716                         fprintf(stderr, "apps.plugin: TARGET INHERITANCE: merged %d processes\n", found);
1717         }
1718
1719         // init goes always to default target
1720         if(all_pids[1])
1721                 all_pids[1]->target = apps_groups_default_target;
1722
1723         // give a default target on all top level processes
1724         if(unlikely(debug)) loops++;
1725         for(p = root_of_pids; p ; p = p->next) {
1726                 // if the process is not merged itself
1727                 // then is is a top level process
1728                 if(unlikely(!p->merged && !p->target))
1729                         p->target = apps_groups_default_target;
1730
1731                 // make sure all processes have a sortlist
1732                 if(unlikely(!p->sortlist))
1733                         p->sortlist = sortlist++;
1734         }
1735
1736         // give a target to all merged child processes
1737         found = 1;
1738         while(found) {
1739                 if(unlikely(debug)) loops++;
1740                 found = 0;
1741                 for(p = root_of_pids; p ; p = p->next) {
1742                         if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) {
1743                                 p->target = p->parent->target;
1744                                 found++;
1745
1746                                 if(debug || (p->target && p->target->debug))
1747                                         fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
1748                         }
1749                 }
1750         }
1751
1752         if(unlikely(debug))
1753                 fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops);
1754 }
1755
1756 long zero_all_targets(struct target *root) {
1757         struct target *w;
1758         long count = 0;
1759
1760         for (w = root; w ; w = w->next) {
1761                 count++;
1762
1763                 if(w->fds) free(w->fds);
1764                 w->fds = NULL;
1765
1766                 w->minflt = 0;
1767                 w->majflt = 0;
1768                 w->utime = 0;
1769                 w->stime = 0;
1770                 w->cminflt = 0;
1771                 w->cmajflt = 0;
1772                 w->cutime = 0;
1773                 w->cstime = 0;
1774                 w->num_threads = 0;
1775                 w->rss = 0;
1776                 w->processes = 0;
1777
1778                 w->statm_size = 0;
1779                 w->statm_resident = 0;
1780                 w->statm_share = 0;
1781                 w->statm_text = 0;
1782                 w->statm_lib = 0;
1783                 w->statm_data = 0;
1784                 w->statm_dirty = 0;
1785
1786                 w->io_logical_bytes_read = 0;
1787                 w->io_logical_bytes_written = 0;
1788                 w->io_read_calls = 0;
1789                 w->io_write_calls = 0;
1790                 w->io_storage_bytes_read = 0;
1791                 w->io_storage_bytes_written = 0;
1792                 w->io_cancelled_write_bytes = 0;
1793         }
1794
1795         return count;
1796 }
1797
1798 void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o) {
1799         (void)o;
1800
1801         if(unlikely(!w->fds)) {
1802                 w->fds = calloc(sizeof(int), (size_t) all_files_size);
1803                 if(unlikely(!w->fds))
1804                         error("Cannot allocate memory for fds in %s", w->name);
1805         }
1806
1807         if(likely(p->updated)) {
1808                 w->cutime  += p->cutime;
1809                 w->cstime  += p->cstime;
1810                 w->cminflt += p->cminflt;
1811                 w->cmajflt += p->cmajflt;
1812
1813                 w->utime  += p->utime;
1814                 w->stime  += p->stime;
1815                 w->minflt += p->minflt;
1816                 w->majflt += p->majflt;
1817
1818                 w->rss += p->rss;
1819
1820                 w->statm_size += p->statm_size;
1821                 w->statm_resident += p->statm_resident;
1822                 w->statm_share += p->statm_share;
1823                 w->statm_text += p->statm_text;
1824                 w->statm_lib += p->statm_lib;
1825                 w->statm_data += p->statm_data;
1826                 w->statm_dirty += p->statm_dirty;
1827
1828                 w->io_logical_bytes_read    += p->io_logical_bytes_read;
1829                 w->io_logical_bytes_written += p->io_logical_bytes_written;
1830                 w->io_read_calls            += p->io_read_calls;
1831                 w->io_write_calls           += p->io_write_calls;
1832                 w->io_storage_bytes_read    += p->io_storage_bytes_read;
1833                 w->io_storage_bytes_written += p->io_storage_bytes_written;
1834                 w->io_cancelled_write_bytes += p->io_cancelled_write_bytes;
1835
1836                 w->processes++;
1837                 w->num_threads += p->num_threads;
1838
1839                 if(likely(w->fds)) {
1840                         int c;
1841                         for(c = 0; c < p->fds_size ;c++) {
1842                                 if(p->fds[c] == 0) continue;
1843
1844                                 if(likely(p->fds[c] < all_files_size)) {
1845                                         if(w->fds) w->fds[p->fds[c]]++;
1846                                 }
1847                                 else
1848                                         error("Invalid fd number %d", p->fds[c]);
1849                         }
1850                 }
1851
1852                 if(unlikely(debug || w->debug))
1853                         fprintf(stderr, "apps.plugin: \taggregating '%s' pid %d on target '%s' utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
1854         }
1855 }
1856
1857 void count_targets_fds(struct target *root) {
1858         int c;
1859         struct target *w;
1860
1861         for (w = root; w ; w = w->next) {
1862                 if(!w->fds) continue;
1863
1864                 w->openfiles = 0;
1865                 w->openpipes = 0;
1866                 w->opensockets = 0;
1867                 w->openinotifies = 0;
1868                 w->openeventfds = 0;
1869                 w->opentimerfds = 0;
1870                 w->opensignalfds = 0;
1871                 w->openeventpolls = 0;
1872                 w->openother = 0;
1873
1874                 for(c = 1; c < all_files_size ;c++) {
1875                         if(w->fds[c] > 0)
1876                                 switch(all_files[c].type) {
1877                                 case FILETYPE_FILE:
1878                                         w->openfiles++;
1879                                         break;
1880
1881                                 case FILETYPE_PIPE:
1882                                         w->openpipes++;
1883                                         break;
1884
1885                                 case FILETYPE_SOCKET:
1886                                         w->opensockets++;
1887                                         break;
1888
1889                                 case FILETYPE_INOTIFY:
1890                                         w->openinotifies++;
1891                                         break;
1892
1893                                 case FILETYPE_EVENTFD:
1894                                         w->openeventfds++;
1895                                         break;
1896
1897                                 case FILETYPE_TIMERFD:
1898                                         w->opentimerfds++;
1899                                         break;
1900
1901                                 case FILETYPE_SIGNALFD:
1902                                         w->opensignalfds++;
1903                                         break;
1904
1905                                 case FILETYPE_EVENTPOLL:
1906                                         w->openeventpolls++;
1907                                         break;
1908
1909                                 default:
1910                                         w->openother++;
1911                         }
1912                 }
1913
1914                 free(w->fds);
1915                 w->fds = NULL;
1916         }
1917 }
1918
1919 void calculate_netdata_statistics(void) {
1920         apply_apps_groups_targets_inheritance();
1921
1922         zero_all_targets(users_root_target);
1923         zero_all_targets(groups_root_target);
1924         apps_groups_targets = zero_all_targets(apps_groups_root_target);
1925
1926         // this has to be done, before the cleanup
1927         struct pid_stat *p = NULL;
1928         struct target *w = NULL, *o = NULL;
1929
1930         // concentrate everything on the apps_groups_targets
1931         for(p = root_of_pids; p ; p = p->next) {
1932
1933                 // --------------------------------------------------------------------
1934                 // apps_groups targets
1935                 if(likely(p->target))
1936                         aggregate_pid_on_target(p->target, p, NULL);
1937                 else
1938                         error("pid %d %s was left without a target!", p->pid, p->comm);
1939
1940
1941                 // --------------------------------------------------------------------
1942                 // user targets
1943                 o = p->user_target;
1944                 if(likely(p->user_target && p->user_target->uid == p->uid))
1945                         w = p->user_target;
1946                 else {
1947                         if(unlikely(debug && p->user_target))
1948                                         fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %u (%s) to %u.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid);
1949
1950                         w = p->user_target = get_users_target(p->uid);
1951                 }
1952
1953                 if(likely(w))
1954                         aggregate_pid_on_target(w, p, o);
1955                 else
1956                         error("pid %d %s was left without a user target!", p->pid, p->comm);
1957
1958
1959                 // --------------------------------------------------------------------
1960                 // group targets
1961                 o = p->group_target;
1962                 if(likely(p->group_target && p->group_target->gid == p->gid))
1963                         w = p->group_target;
1964                 else {
1965                         if(unlikely(debug && p->group_target))
1966                                         fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %u (%s) to %u.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid);
1967
1968                         w = p->group_target = get_groups_target(p->gid);
1969                 }
1970
1971                 if(likely(w))
1972                         aggregate_pid_on_target(w, p, o);
1973                 else
1974                         error("pid %d %s was left without a group target!", p->pid, p->comm);
1975
1976         }
1977
1978         count_targets_fds(apps_groups_root_target);
1979         count_targets_fds(users_root_target);
1980         count_targets_fds(groups_root_target);
1981
1982         cleanup_exited_pids();
1983 }
1984
1985 // ----------------------------------------------------------------------------
1986 // update chart dimensions
1987
1988 unsigned long long send_resource_usage_to_netdata() {
1989         static struct timeval last = { 0, 0 };
1990         static struct rusage me_last;
1991
1992         struct timeval now;
1993         struct rusage me;
1994
1995         unsigned long long usec;
1996         unsigned long long cpuuser;
1997         unsigned long long cpusyst;
1998
1999         if(!last.tv_sec) {
2000                 gettimeofday(&last, NULL);
2001                 getrusage(RUSAGE_SELF, &me_last);
2002
2003                 // the first time, give a zero to allow
2004                 // netdata calibrate to the current time
2005                 // usec = update_every * 1000000ULL;
2006                 usec = 0ULL;
2007                 cpuuser = 0;
2008                 cpusyst = 0;
2009         }
2010         else {
2011                 gettimeofday(&now, NULL);
2012                 getrusage(RUSAGE_SELF, &me);
2013
2014                 usec = usecdiff(&now, &last);
2015                 cpuuser = me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec;
2016                 cpusyst = me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec;
2017
2018                 bcopy(&now, &last, sizeof(struct timeval));
2019                 bcopy(&me, &me_last, sizeof(struct rusage));
2020         }
2021
2022         fprintf(stdout, "BEGIN netdata.apps_cpu %llu\n", usec);
2023         fprintf(stdout, "SET user = %llu\n", cpuuser);
2024         fprintf(stdout, "SET system = %llu\n", cpusyst);
2025         fprintf(stdout, "END\n");
2026
2027         fprintf(stdout, "BEGIN netdata.apps_files %llu\n", usec);
2028         fprintf(stdout, "SET files = %llu\n", file_counter);
2029         fprintf(stdout, "SET pids = %ld\n", all_pids_count);
2030         fprintf(stdout, "SET fds = %d\n", all_files_len);
2031         fprintf(stdout, "SET targets = %ld\n", apps_groups_targets);
2032         fprintf(stdout, "END\n");
2033
2034         return usec;
2035 }
2036
2037 void send_collected_data_to_netdata(struct target *root, const char *type, unsigned long long usec)
2038 {
2039         struct target *w;
2040         int childs = include_exited_childs;
2041
2042         {
2043                 // childs processing introduces spikes
2044                 // here we try to eliminate them by disabling childs processing either for specific dimensions
2045                 // or entirely. Of course, either way, we disable it just a single iteration.
2046
2047                 unsigned long long max = processors * hz * 100;
2048                 unsigned long long utime = 0, cutime = 0, stime = 0, cstime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0;
2049
2050                 for (w = root; w ; w = w->next) {
2051                         if(w->target || (!w->processes && !w->exposed)) continue;
2052
2053                         if((w->utime + w->stime + w->cutime + w->cstime) > max) {
2054 #ifdef NETDATA_INTERNAL_CHECKS
2055                                 log_date(stderr);
2056                                 fprintf(stderr, "Prevented a spike on target '%s', reported CPU time = %llu (without childs = %llu)\n", w->name, (w->utime + w->stime + w->cutime + w->cstime) / 100, (w->utime + w->stime) / 100);
2057 #endif
2058                                 w->cutime = w->cstime = w->cminflt = w->majflt = 0;
2059                         }
2060
2061                         utime   += w->utime;
2062                         cutime  += w->cutime;
2063                         stime   += w->stime;
2064                         cstime  += w->cstime;
2065                         minflt  += w->minflt;
2066                         cminflt += w->cminflt;
2067                         majflt  += w->majflt;
2068                         cmajflt += w->cmajflt;
2069                 }
2070
2071                 if((utime + stime + cutime + cstime) > max) {
2072                         childs = 0;
2073 #ifdef NETDATA_INTERNAL_CHECKS
2074                         log_date(stderr);
2075                         fprintf(stderr, "Prevented a spike because the total CPU of all dimensions = %llu (without childs = %llu)\n", (utime + stime + cutime + cstime) / 100, (utime + stime) / 100);
2076 #endif
2077                 }
2078
2079                 if((utime + stime) > max) {
2080                         childs = 0;
2081                         unsigned long long multiplier = max, divider = utime + stime;
2082                         for (w = root; w ; w = w->next) {
2083                                 w->utime  = w->utime * multiplier / divider;
2084                                 w->stime  = w->stime * multiplier / divider;
2085                                 w->minflt = w->minflt * multiplier / divider;
2086                                 w->majflt = w->majflt * multiplier / divider;
2087                         }
2088
2089 #ifdef NETDATA_INTERNAL_CHECKS
2090                         log_date(stderr);
2091                         fprintf(stderr, "Reduced processes utilization (without childs) by %0.2f%% (CPU was %llu)\n", (float)(((utime + stime - max) * 100.0)/(float)max), (utime + stime) / 100);
2092 #endif
2093                 }
2094
2095         }
2096
2097         fprintf(stdout, "BEGIN %s.cpu %llu\n", type, usec);
2098         for (w = root; w ; w = w->next) {
2099                 if(w->target || (!w->processes && !w->exposed)) continue;
2100
2101                 fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->stime + (childs?(w->cutime + w->cstime):0));
2102         }
2103         fprintf(stdout, "END\n");
2104
2105         fprintf(stdout, "BEGIN %s.cpu_user %llu\n", type, usec);
2106         for (w = root; w ; w = w->next) {
2107                 if(w->target || (!w->processes && !w->exposed)) continue;
2108
2109                 fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + (childs?(w->cutime):0));
2110         }
2111         fprintf(stdout, "END\n");
2112
2113         fprintf(stdout, "BEGIN %s.cpu_system %llu\n", type, usec);
2114         for (w = root; w ; w = w->next) {
2115                 if(w->target || (!w->processes && !w->exposed)) continue;
2116
2117                 fprintf(stdout, "SET %s = %llu\n", w->name, w->stime + (childs?(w->cstime):0));
2118         }
2119         fprintf(stdout, "END\n");
2120
2121         fprintf(stdout, "BEGIN %s.threads %llu\n", type, usec);
2122         for (w = root; w ; w = w->next) {
2123                 if(w->target || (!w->processes && !w->exposed)) continue;
2124
2125                 fprintf(stdout, "SET %s = %llu\n", w->name, w->num_threads);
2126         }
2127         fprintf(stdout, "END\n");
2128
2129         fprintf(stdout, "BEGIN %s.processes %llu\n", type, usec);
2130         for (w = root; w ; w = w->next) {
2131                 if(w->target || (!w->processes && !w->exposed)) continue;
2132
2133                 fprintf(stdout, "SET %s = %lu\n", w->name, w->processes);
2134         }
2135         fprintf(stdout, "END\n");
2136
2137         fprintf(stdout, "BEGIN %s.mem %llu\n", type, usec);
2138         for (w = root; w ; w = w->next) {
2139                 if(w->target || (!w->processes && !w->exposed)) continue;
2140
2141                 fprintf(stdout, "SET %s = %lld\n", w->name, (long long)w->statm_resident - (long long)w->statm_share);
2142         }
2143         fprintf(stdout, "END\n");
2144
2145         fprintf(stdout, "BEGIN %s.minor_faults %llu\n", type, usec);
2146         for (w = root; w ; w = w->next) {
2147                 if(w->target || (!w->processes && !w->exposed)) continue;
2148
2149                 fprintf(stdout, "SET %s = %llu\n", w->name, w->minflt + (childs?(w->cminflt):0));
2150         }
2151         fprintf(stdout, "END\n");
2152
2153         fprintf(stdout, "BEGIN %s.major_faults %llu\n", type, usec);
2154         for (w = root; w ; w = w->next) {
2155                 if(w->target || (!w->processes && !w->exposed)) continue;
2156
2157                 fprintf(stdout, "SET %s = %llu\n", w->name, w->majflt + (childs?(w->cmajflt):0));
2158         }
2159         fprintf(stdout, "END\n");
2160
2161         fprintf(stdout, "BEGIN %s.lreads %llu\n", type, usec);
2162         for (w = root; w ; w = w->next) {
2163                 if(w->target || (!w->processes && !w->exposed)) continue;
2164
2165                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_read);
2166         }
2167         fprintf(stdout, "END\n");
2168
2169         fprintf(stdout, "BEGIN %s.lwrites %llu\n", type, usec);
2170         for (w = root; w ; w = w->next) {
2171                 if(w->target || (!w->processes && !w->exposed)) continue;
2172
2173                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_written);
2174         }
2175         fprintf(stdout, "END\n");
2176
2177         fprintf(stdout, "BEGIN %s.preads %llu\n", type, usec);
2178         for (w = root; w ; w = w->next) {
2179                 if(w->target || (!w->processes && !w->exposed)) continue;
2180
2181                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_read);
2182         }
2183         fprintf(stdout, "END\n");
2184
2185         fprintf(stdout, "BEGIN %s.pwrites %llu\n", type, usec);
2186         for (w = root; w ; w = w->next) {
2187                 if(w->target || (!w->processes && !w->exposed)) continue;
2188
2189                 fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_written);
2190         }
2191         fprintf(stdout, "END\n");
2192
2193         fprintf(stdout, "BEGIN %s.files %llu\n", type, usec);
2194         for (w = root; w ; w = w->next) {
2195                 if(w->target || (!w->processes && !w->exposed)) continue;
2196
2197                 fprintf(stdout, "SET %s = %llu\n", w->name, w->openfiles);
2198         }
2199         fprintf(stdout, "END\n");
2200
2201         fprintf(stdout, "BEGIN %s.sockets %llu\n", type, usec);
2202         for (w = root; w ; w = w->next) {
2203                 if(w->target || (!w->processes && !w->exposed)) continue;
2204
2205                 fprintf(stdout, "SET %s = %llu\n", w->name, w->opensockets);
2206         }
2207         fprintf(stdout, "END\n");
2208
2209         fprintf(stdout, "BEGIN %s.pipes %llu\n", type, usec);
2210         for (w = root; w ; w = w->next) {
2211                 if(w->target || (!w->processes && !w->exposed)) continue;
2212
2213                 fprintf(stdout, "SET %s = %llu\n", w->name, w->openpipes);
2214         }
2215         fprintf(stdout, "END\n");
2216
2217         fflush(stdout);
2218 }
2219
2220
2221 // ----------------------------------------------------------------------------
2222 // generate the charts
2223
2224 void send_charts_updates_to_netdata(struct target *root, const char *type, const char *title)
2225 {
2226         struct target *w;
2227         int newly_added = 0;
2228
2229         for(w = root ; w ; w = w->next)
2230                 if(!w->exposed && w->processes) {
2231                         newly_added++;
2232                         w->exposed = 1;
2233                         if(debug || w->debug) fprintf(stderr, "apps.plugin: %s just added - regenerating charts.\n", w->name);
2234                 }
2235
2236         // nothing more to show
2237         if(!newly_added) return;
2238
2239         // we have something new to show
2240         // update the charts
2241         fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
2242         for (w = root; w ; w = w->next) {
2243                 if(w->target || (!w->processes && !w->exposed)) continue;
2244
2245                 fprintf(stdout, "DIMENSION %s '' absolute 1 %u %s\n", w->name, hz, w->hidden ? "hidden,noreset" : "noreset");
2246         }
2247
2248         fprintf(stdout, "CHART %s.mem '' '%s Dedicated Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every);
2249         for (w = root; w ; w = w->next) {
2250                 if(w->target || (!w->processes && !w->exposed)) continue;
2251
2252                 fprintf(stdout, "DIMENSION %s '' absolute %ld %ld noreset\n", w->name, sysconf(_SC_PAGESIZE), 1024L*1024L);
2253         }
2254
2255         fprintf(stdout, "CHART %s.threads '' '%s Threads' 'threads' processes %s.threads stacked 20005 %d\n", type, title, type, update_every);
2256         for (w = root; w ; w = w->next) {
2257                 if(w->target || (!w->processes && !w->exposed)) continue;
2258
2259                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2260         }
2261
2262         fprintf(stdout, "CHART %s.processes '' '%s Processes' 'processes' processes %s.processes stacked 20004 %d\n", type, title, type, update_every);
2263         for (w = root; w ; w = w->next) {
2264                 if(w->target || (!w->processes && !w->exposed)) continue;
2265
2266                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2267         }
2268
2269         fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
2270         for (w = root; w ; w = w->next) {
2271                 if(w->target || (!w->processes && !w->exposed)) continue;
2272
2273                 fprintf(stdout, "DIMENSION %s '' absolute 1 %u noreset\n", w->name, hz);
2274         }
2275
2276         fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
2277         for (w = root; w ; w = w->next) {
2278                 if(w->target || (!w->processes && !w->exposed)) continue;
2279
2280                 fprintf(stdout, "DIMENSION %s '' absolute 1 %u noreset\n", w->name, hz);
2281         }
2282
2283         fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20010 %d\n", type, title, type, update_every);
2284         for (w = root; w ; w = w->next) {
2285                 if(w->target || (!w->processes && !w->exposed)) continue;
2286
2287                 fprintf(stdout, "DIMENSION %s '' absolute 1 100 noreset\n", w->name);
2288         }
2289
2290         fprintf(stdout, "CHART %s.minor_faults '' '%s Minor Page Faults' 'page faults/s' mem %s.minor_faults stacked 20011 %d\n", type, title, type, update_every);
2291         for (w = root; w ; w = w->next) {
2292                 if(w->target || (!w->processes && !w->exposed)) continue;
2293
2294                 fprintf(stdout, "DIMENSION %s '' absolute 1 100 noreset\n", w->name);
2295         }
2296
2297         fprintf(stdout, "CHART %s.lreads '' '%s Disk Logical Reads' 'kilobytes/s' disk %s.lreads stacked 20042 %d\n", type, title, type, update_every);
2298         for (w = root; w ; w = w->next) {
2299                 if(w->target || (!w->processes && !w->exposed)) continue;
2300
2301                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2302         }
2303
2304         fprintf(stdout, "CHART %s.lwrites '' '%s I/O Logical Writes' 'kilobytes/s' disk %s.lwrites stacked 20042 %d\n", type, title, type, update_every);
2305         for (w = root; w ; w = w->next) {
2306                 if(w->target || (!w->processes && !w->exposed)) continue;
2307
2308                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2309         }
2310
2311         fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'kilobytes/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every);
2312         for (w = root; w ; w = w->next) {
2313                 if(w->target || (!w->processes && !w->exposed)) continue;
2314
2315                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2316         }
2317
2318         fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'kilobytes/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every);
2319         for (w = root; w ; w = w->next) {
2320                 if(w->target || (!w->processes && !w->exposed)) continue;
2321
2322                 fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024*100);
2323         }
2324
2325         fprintf(stdout, "CHART %s.files '' '%s Open Files' 'open files' disk %s.files stacked 20050 %d\n", type, title, type, update_every);
2326         for (w = root; w ; w = w->next) {
2327                 if(w->target || (!w->processes && !w->exposed)) continue;
2328
2329                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2330         }
2331
2332         fprintf(stdout, "CHART %s.sockets '' '%s Open Sockets' 'open sockets' net %s.sockets stacked 20051 %d\n", type, title, type, update_every);
2333         for (w = root; w ; w = w->next) {
2334                 if(w->target || (!w->processes && !w->exposed)) continue;
2335
2336                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2337         }
2338
2339         fprintf(stdout, "CHART %s.pipes '' '%s Pipes' 'open pipes' processes %s.pipes stacked 20053 %d\n", type, title, type, update_every);
2340         for (w = root; w ; w = w->next) {
2341                 if(w->target || (!w->processes && !w->exposed)) continue;
2342
2343                 fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name);
2344         }
2345 }
2346
2347
2348 // ----------------------------------------------------------------------------
2349 // parse command line arguments
2350
2351 void parse_args(int argc, char **argv)
2352 {
2353         int i, freq = 0;
2354         char *name = NULL;
2355
2356         for(i = 1; i < argc; i++) {
2357                 if(!freq) {
2358                         int n = atoi(argv[i]);
2359                         if(n > 0) {
2360                                 freq = n;
2361                                 continue;
2362                         }
2363                 }
2364
2365                 if(strcmp("debug", argv[i]) == 0) {
2366                         debug = 1;
2367                         // debug_flags = 0xffffffff;
2368                         continue;
2369                 }
2370
2371                 if(strcmp("no-childs", argv[i]) == 0) {
2372                         include_exited_childs = 0;
2373                         continue;
2374                 }
2375
2376                 if(strcmp("with-childs", argv[i]) == 0) {
2377                         include_exited_childs = 1;
2378                         continue;
2379                 }
2380
2381                 if(!name) {
2382                         name = argv[i];
2383                         continue;
2384                 }
2385
2386                 error("Cannot understand option %s", argv[i]);
2387                 exit(1);
2388         }
2389
2390         if(freq > 0) update_every = freq;
2391         if(!name) name = "groups";
2392
2393         if(read_apps_groups_conf(name)) {
2394                 error("Cannot read process groups %s", name);
2395                 exit(1);
2396         }
2397 }
2398
2399 int main(int argc, char **argv)
2400 {
2401         // debug_flags = D_PROCFILE;
2402
2403         // set the name for logging
2404         program_name = "apps.plugin";
2405
2406         // disable syslog for apps.plugin
2407         error_log_syslog = 0;
2408
2409         // set errors flood protection to 100 logs per hour
2410         error_log_errors_per_period = 100;
2411         error_log_throttle_period = 3600;
2412
2413         host_prefix = getenv("NETDATA_HOST_PREFIX");
2414         if(host_prefix == NULL) {
2415                 info("NETDATA_HOST_PREFIX is not passed from netdata");
2416                 host_prefix = "";
2417         }
2418         else info("Found NETDATA_HOST_PREFIX='%s'", host_prefix);
2419
2420         config_dir = getenv("NETDATA_CONFIG_DIR");
2421         if(config_dir == NULL) {
2422                 info("NETDATA_CONFIG_DIR is not passed from netdata");
2423                 config_dir = CONFIG_DIR;
2424         }
2425         else info("Found NETDATA_CONFIG_DIR='%s'", config_dir);
2426
2427 #ifdef NETDATA_INTERNAL_CHECKS
2428         if(debug_flags != 0) {
2429                 struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
2430                 if(setrlimit(RLIMIT_CORE, &rl) != 0)
2431                         info("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
2432                 prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
2433         }
2434 #endif /* NETDATA_INTERNAL_CHECKS */
2435
2436         procfile_adaptive_initial_allocation = 1;
2437
2438         time_t started_t = time(NULL);
2439         time_t current_t;
2440         get_HZ();
2441         pid_max = get_system_pid_max();
2442         processors = get_system_cpus();
2443
2444         parse_args(argc, argv);
2445
2446         all_pids_sortlist = calloc(sizeof(pid_t), (size_t)pid_max);
2447         if(!all_pids_sortlist) {
2448                 error("Cannot allocate %zu bytes of memory.", sizeof(pid_t) * pid_max);
2449                 printf("DISABLE\n");
2450                 exit(1);
2451         }
2452
2453         all_pids = calloc(sizeof(struct pid_stat *), (size_t) pid_max);
2454         if(!all_pids) {
2455                 error("Cannot allocate %zu bytes of memory.", sizeof(struct pid_stat *) * pid_max);
2456                 printf("DISABLE\n");
2457                 exit(1);
2458         }
2459
2460         fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n"
2461                         "DIMENSION user '' incremental 1 1000\n"
2462                         "DIMENSION system '' incremental 1 1000\n"
2463                         "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n"
2464                         "DIMENSION files '' incremental 1 1\n"
2465                         "DIMENSION pids '' absolute 1 1\n"
2466                         "DIMENSION fds '' absolute 1 1\n"
2467                         "DIMENSION targets '' absolute 1 1\n", update_every);
2468
2469 #ifndef PROFILING_MODE
2470         unsigned long long sunext = (time(NULL) - (time(NULL) % update_every) + update_every) * 1000000ULL;
2471         unsigned long long sunow;
2472 #endif /* PROFILING_MODE */
2473
2474         global_iterations_counter = 1;
2475         for(;1; global_iterations_counter++) {
2476 #ifndef PROFILING_MODE
2477                 // delay until it is our time to run
2478                 while((sunow = timems()) < sunext)
2479                         usecsleep(sunext - sunow);
2480
2481                 // find the next time we need to run
2482                 while(timems() > sunext)
2483                         sunext += update_every * 1000000ULL;
2484 #endif /* PROFILING_MODE */
2485
2486                 if(!collect_data_for_all_processes_from_proc()) {
2487                         error("Cannot collect /proc data for running processes. Disabling apps.plugin...");
2488                         printf("DISABLE\n");
2489                         exit(1);
2490                 }
2491
2492                 calculate_netdata_statistics();
2493
2494                 unsigned long long dt = send_resource_usage_to_netdata();
2495
2496                 // this is smart enough to show only newly added apps, when needed
2497                 send_charts_updates_to_netdata(apps_groups_root_target, "apps", "Apps");
2498                 send_charts_updates_to_netdata(users_root_target, "users", "Users");
2499                 send_charts_updates_to_netdata(groups_root_target, "groups", "User Groups");
2500
2501                 send_collected_data_to_netdata(apps_groups_root_target, "apps", dt);
2502                 send_collected_data_to_netdata(users_root_target, "users", dt);
2503                 send_collected_data_to_netdata(groups_root_target, "groups", dt);
2504
2505                 if(unlikely(debug))
2506                         fprintf(stderr, "apps.plugin: done Loop No %llu\n", global_iterations_counter);
2507
2508                 current_t = time(NULL);
2509
2510 #ifndef PROFILING_MODE
2511                 // restart check (14400 seconds)
2512                 if(current_t - started_t > 14400) exit(0);
2513 #else
2514                 if(current_t - started_t > 10) exit(0);
2515 #endif /* PROFILING_MODE */
2516         }
2517 }