X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=src%2Fapps_plugin.c;h=82b77df655ff5a945c1b5e327f80adfd9fb24b97;hb=ce381953ca239d092b3085ee29798a4908f32747;hp=572a4065ec1090549b5c52c427f39fe38a817369;hpb=67ce94519fe4cf35144460b8110b3e7676b716b0;p=netdata.git diff --git a/src/apps_plugin.c b/src/apps_plugin.c index 572a4065..82b77df6 100644 --- a/src/apps_plugin.c +++ b/src/apps_plugin.c @@ -1,50 +1,136 @@ + +/* + * netdata apps.plugin + * (C) Copyright 2016-2017 Costa Tsaousis + * Released under GPL v3+ + */ + #include "common.h" +#ifdef __FreeBSD__ +#include +#endif + +// ---------------------------------------------------------------------------- +// per O/S configuration + +// the minimum PID of the system +// this is also the pid of the init process +#define INIT_PID 1 + +// if the way apps.plugin will work, will read the entire process list, +// including the resource utilization of each process, instantly +// set this to 1 +// when set to 0, apps.plugin builds a sort list of processes, in order +// to process children processes, before parent processes +#ifdef __FreeBSD__ +#define ALL_PIDS_ARE_READ_INSTANTLY 1 +#else +#define ALL_PIDS_ARE_READ_INSTANTLY 0 +#endif + +// ---------------------------------------------------------------------------- +// string lengths + #define MAX_COMPARE_NAME 100 #define MAX_NAME 100 #define MAX_CMDLINE 1024 -// the rates we are going to send to netdata -// will have this detail -// a value of: -// 1 will send just integer parts to netdata -// 100 will send 2 decimal points -// 1000 will send 3 decimal points + +// ---------------------------------------------------------------------------- +// the rates we are going to send to netdata will have this detail a value of: +// - 1 will send just integer parts to netdata +// - 100 will send 2 decimal points +// - 1000 will send 3 decimal points // etc. #define RATES_DETAIL 10000ULL + +// ---------------------------------------------------------------------------- +// to avoid reallocating too frequently, we can increase the number of spare +// file descriptors used by processes. +// IMPORTANT: +// having a lot of spares, increases the CPU utilization of the plugin. #define MAX_SPARE_FDS 1 -int debug = 0; -int update_every = 1; -unsigned long long global_iterations_counter = 1; -unsigned long long file_counter = 0; -int proc_pid_cmdline_is_needed = 0; -int include_exited_childs = 1; -char *config_dir = CONFIG_DIR; +// ---------------------------------------------------------------------------- +// command line options -pid_t *all_pids_sortlist = NULL; +static int + debug = 0, + update_every = 1, + enable_guest_charts = 0, + enable_file_charts = 1, + enable_users_charts = 1, + enable_groups_charts = 1, + include_exited_childs = 1; -// will be automatically set to 1, if guest values are collected -int show_guest_time = 0; -int show_guest_time_old = 0; -int enable_guest_charts = 0; -int enable_file_charts = 1; -int enable_users_charts = 1; -int enable_groups_charts = 1; +// will be changed to getenv(NETDATA_CONFIG_DIR) if it exists +static char *config_dir = CONFIG_DIR; // ---------------------------------------------------------------------------- +// internal flags +// handled in code (automatically set) + +static int + show_guest_time = 0, // 1 when guest values are collected + show_guest_time_old = 0, + proc_pid_cmdline_is_needed = 0; // 1 when we need to read /proc/cmdline + + +// ---------------------------------------------------------------------------- +// internal counters + +static size_t + global_iterations_counter = 1, + calls_counter = 0, + file_counter = 0; + + +// ---------------------------------------------------------------------------- +// Normalization +// +// With normalization we lower the collected metrics by a factor to make them +// match the total utilization of the system. +// The discrepancy exists because apps.plugin needs some time to collect all +// the metrics. This results in utilization that exceeds the total utilization +// of the system. +// +// With normalization we align the per-process utilization, to the total of +// the system. We first consume the exited children utilization and it the +// collected values is above the total, we proportionally scale each reported +// metric. + +// the total system time, as reported by /proc/stat +static kernel_uint_t + global_utime = 0, + global_stime = 0, + global_gtime = 0; -void netdata_cleanup_and_exit(int ret) { - exit(ret); -} + +// the normalization ratios, as calculated by normalize_utilization() +double utime_fix_ratio = 1.0, + stime_fix_ratio = 1.0, + gtime_fix_ratio = 1.0, + minflt_fix_ratio = 1.0, + majflt_fix_ratio = 1.0, + cutime_fix_ratio = 1.0, + cstime_fix_ratio = 1.0, + cgtime_fix_ratio = 1.0, + cminflt_fix_ratio = 1.0, + cmajflt_fix_ratio = 1.0; // ---------------------------------------------------------------------------- // target -// target is the structure that process data are aggregated +// +// target is the structure that processes are aggregated to be reported +// to netdata. +// +// - Each entry in /etc/apps_groups.conf creates a target. +// - Each user and group used by a process in the system, creates a target. struct target { char compare[MAX_COMPARE_NAME + 1]; @@ -59,74 +145,281 @@ struct target { uid_t uid; gid_t gid; - unsigned long long minflt; - unsigned long long cminflt; - unsigned long long majflt; - unsigned long long cmajflt; - unsigned long long utime; - unsigned long long stime; - unsigned long long gtime; - unsigned long long cutime; - unsigned long long cstime; - unsigned long long cgtime; - unsigned long long num_threads; - // unsigned long long rss; - - unsigned long long statm_size; - unsigned long long statm_resident; - unsigned long long statm_share; - // unsigned long long statm_text; - // unsigned long long statm_lib; - // unsigned long long statm_data; - // unsigned long long statm_dirty; - - unsigned long long io_logical_bytes_read; - unsigned long long io_logical_bytes_written; - // unsigned long long io_read_calls; - // unsigned long long io_write_calls; - unsigned long long io_storage_bytes_read; - unsigned long long io_storage_bytes_written; - // unsigned long long io_cancelled_write_bytes; + kernel_uint_t minflt; + kernel_uint_t cminflt; + kernel_uint_t majflt; + kernel_uint_t cmajflt; + kernel_uint_t utime; + kernel_uint_t stime; + kernel_uint_t gtime; + kernel_uint_t cutime; + kernel_uint_t cstime; + kernel_uint_t cgtime; + kernel_uint_t num_threads; + // kernel_uint_t rss; + + kernel_uint_t statm_size; + kernel_uint_t statm_resident; + kernel_uint_t statm_share; + // kernel_uint_t statm_text; + // kernel_uint_t statm_lib; + // kernel_uint_t statm_data; + // kernel_uint_t statm_dirty; + + kernel_uint_t io_logical_bytes_read; + kernel_uint_t io_logical_bytes_written; + // kernel_uint_t io_read_calls; + // kernel_uint_t io_write_calls; + kernel_uint_t io_storage_bytes_read; + kernel_uint_t io_storage_bytes_written; + // kernel_uint_t io_cancelled_write_bytes; int *target_fds; int target_fds_size; - unsigned long long openfiles; - unsigned long long openpipes; - unsigned long long opensockets; - unsigned long long openinotifies; - unsigned long long openeventfds; - unsigned long long opentimerfds; - unsigned long long opensignalfds; - unsigned long long openeventpolls; - unsigned long long openother; - - unsigned long processes; // how many processes have been merged to this - int exposed; // if set, we have sent this to netdata - int hidden; // if set, we set the hidden flag on the dimension + kernel_uint_t openfiles; + kernel_uint_t openpipes; + kernel_uint_t opensockets; + kernel_uint_t openinotifies; + kernel_uint_t openeventfds; + kernel_uint_t opentimerfds; + kernel_uint_t opensignalfds; + kernel_uint_t openeventpolls; + kernel_uint_t openother; + + unsigned int processes; // how many processes have been merged to this + int exposed; // if set, we have sent this to netdata + int hidden; // if set, we set the hidden flag on the dimension int debug; int ends_with; - int starts_with; // if set, the compare string matches only the - // beginning of the command + int starts_with; // if set, the compare string matches only the + // beginning of the command - struct target *target; // the one that will be reported to netdata + struct target *target; // the one that will be reported to netdata struct target *next; }; +struct target + *apps_groups_default_target = NULL, // the default target + *apps_groups_root_target = NULL, // apps_groups.conf defined + *users_root_target = NULL, // users + *groups_root_target = NULL; // user groups + +size_t + apps_groups_targets_count = 0; // # of apps_groups.conf targets + // ---------------------------------------------------------------------------- -// apps_groups.conf -// aggregate all processes in groups, to have a limited number of dimensions +// pid_stat +// +// structure to store data for each process running +// see: man proc for the description of the fields -struct target *apps_groups_root_target = NULL; -struct target *apps_groups_default_target = NULL; -long apps_groups_targets = 0; +struct pid_stat { + int32_t pid; + char comm[MAX_COMPARE_NAME + 1]; + char cmdline[MAX_CMDLINE + 1]; -struct target *users_root_target = NULL; -struct target *groups_root_target = NULL; + uint32_t log_thrown; -static struct target *get_users_target(uid_t uid) -{ + // char state; + int32_t ppid; + // int32_t pgrp; + // int32_t session; + // int32_t tty_nr; + // int32_t tpgid; + // uint64_t flags; + + // these are raw values collected + kernel_uint_t minflt_raw; + kernel_uint_t cminflt_raw; + kernel_uint_t majflt_raw; + kernel_uint_t cmajflt_raw; + kernel_uint_t utime_raw; + kernel_uint_t stime_raw; + kernel_uint_t gtime_raw; // guest_time + kernel_uint_t cutime_raw; + kernel_uint_t cstime_raw; + kernel_uint_t cgtime_raw; // cguest_time + + // these are rates + kernel_uint_t minflt; + kernel_uint_t cminflt; + kernel_uint_t majflt; + kernel_uint_t cmajflt; + kernel_uint_t utime; + kernel_uint_t stime; + kernel_uint_t gtime; + kernel_uint_t cutime; + kernel_uint_t cstime; + kernel_uint_t cgtime; + + // int64_t priority; + // int64_t nice; + int32_t num_threads; + // int64_t itrealvalue; + // kernel_uint_t starttime; + // kernel_uint_t vsize; + // kernel_uint_t rss; + // kernel_uint_t rsslim; + // kernel_uint_t starcode; + // kernel_uint_t endcode; + // kernel_uint_t startstack; + // kernel_uint_t kstkesp; + // kernel_uint_t kstkeip; + // uint64_t signal; + // uint64_t blocked; + // uint64_t sigignore; + // uint64_t sigcatch; + // uint64_t wchan; + // uint64_t nswap; + // uint64_t cnswap; + // int32_t exit_signal; + // int32_t processor; + // uint32_t rt_priority; + // uint32_t policy; + // kernel_uint_t delayacct_blkio_ticks; + + uid_t uid; + gid_t gid; + + kernel_uint_t statm_size; + kernel_uint_t statm_resident; + kernel_uint_t statm_share; + // kernel_uint_t statm_text; + // kernel_uint_t statm_lib; + // kernel_uint_t statm_data; + // kernel_uint_t statm_dirty; + + kernel_uint_t io_logical_bytes_read_raw; + kernel_uint_t io_logical_bytes_written_raw; + // kernel_uint_t io_read_calls_raw; + // kernel_uint_t io_write_calls_raw; + kernel_uint_t io_storage_bytes_read_raw; + kernel_uint_t io_storage_bytes_written_raw; + // kernel_uint_t io_cancelled_write_bytes_raw; + + kernel_uint_t io_logical_bytes_read; + kernel_uint_t io_logical_bytes_written; + // kernel_uint_t io_read_calls; + // kernel_uint_t io_write_calls; + kernel_uint_t io_storage_bytes_read; + kernel_uint_t io_storage_bytes_written; + // kernel_uint_t io_cancelled_write_bytes; + + int *fds; // array of fds it uses + int fds_size; // the size of the fds array + + int children_count; // number of processes directly referencing this + char keep:1; // 1 when we need to keep this process in memory even after it exited + int keeploops; // increases by 1 every time keep is 1 and updated 0 + char updated:1; // 1 when the process is currently running + char merged:1; // 1 when it has been merged to its parent + char read:1; // 1 when we have already read this process for this iteration + + int sortlist; // higher numbers = top on the process tree + // each process gets a unique number + + struct target *target; // app_groups.conf targets + struct target *user_target; // uid based targets + struct target *group_target; // gid based targets + + usec_t stat_collected_usec; + usec_t last_stat_collected_usec; + + usec_t io_collected_usec; + usec_t last_io_collected_usec; + + char *fds_dirname; // the full directory name in /proc/PID/fd + + char *stat_filename; + char *statm_filename; + char *io_filename; + char *cmdline_filename; + + struct pid_stat *parent; + struct pid_stat *prev; + struct pid_stat *next; +}; + +// log each problem once per process +// log flood protection flags (log_thrown) +#define PID_LOG_IO 0x00000001 +#define PID_LOG_STATM 0x00000002 +#define PID_LOG_CMDLINE 0x00000004 +#define PID_LOG_FDS 0x00000008 +#define PID_LOG_STAT 0x00000010 + +static struct pid_stat + *root_of_pids = NULL, // global list of all processes running + **all_pids = NULL; // to avoid allocations, we pre-allocate the + // the entire pid space. + +static size_t + all_pids_count = 0; // the number of processes running + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) +// Another pre-allocated list of all possible pids. +// We need it to pids and assign them a unique sortlist id, so that we +// read parents before children. This is needed to prevent a situation where +// a child is found running, but until we read its parent, it has exited and +// its parent has accumulated its resources. +static pid_t + *all_pids_sortlist = NULL; +#endif + +// ---------------------------------------------------------------------------- +// file descriptor +// +// this is used to keep a global list of all open files of the system. +// it is needed in order to calculate the unique files processes have open. + +#define FILE_DESCRIPTORS_INCREASE_STEP 100 + +// types for struct file_descriptor->type +typedef enum fd_filetype { + FILETYPE_OTHER, + FILETYPE_FILE, + FILETYPE_PIPE, + FILETYPE_SOCKET, + FILETYPE_INOTIFY, + FILETYPE_EVENTFD, + FILETYPE_EVENTPOLL, + FILETYPE_TIMERFD, + FILETYPE_SIGNALFD +} FD_FILETYPE; + +struct file_descriptor { + avl avl; + +#ifdef NETDATA_INTERNAL_CHECKS + uint32_t magic; +#endif /* NETDATA_INTERNAL_CHECKS */ + + const char *name; + uint32_t hash; + + FD_FILETYPE type; + int count; + int pos; +} *all_files = NULL; + +static int + all_files_len = 0, + all_files_size = 0; + +// ---------------------------------------------------------------------------- +// callback required by fatal() + +void netdata_cleanup_and_exit(int ret) { + exit(ret); +} + +// ---------------------------------------------------------------------------- +// apps_groups.conf +// aggregate all processes in groups, to have a limited number of dimensions + +static struct target *get_users_target(uid_t uid) { struct target *w; for(w = users_root_target ; w ; w = w->next) if(w->uid == uid) return w; @@ -221,10 +514,12 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ if(*name == '-') thidden = 1; name++; } - for(target = apps_groups_root_target ; target ; target = target->next) { + + for(target = apps_groups_root_target ; target != NULL ; target = target->next) { if(!target->target && strcmp(name, target->name) == 0) break; } + if(unlikely(debug)) { if(unlikely(target)) fprintf(stderr, "apps.plugin: REUSING TARGET NAME '%s' on ID '%s'\n", target->name, target->id); @@ -302,10 +597,10 @@ static int read_apps_groups_conf(const char *file) if(!ff) return 1; - unsigned long line, lines = procfile_lines(ff); + size_t line, lines = procfile_lines(ff); for(line = 0; line < lines ;line++) { - unsigned long word, words = procfile_linewords(ff, line); + size_t word, words = procfile_linewords(ff, line); if(!words) continue; char *name = procfile_lineword(ff, line, 0); @@ -326,7 +621,7 @@ static int read_apps_groups_conf(const char *file) // add this target struct target *n = get_apps_groups_target(s, w, name); if(!n) { - error("Cannot create target '%s' (line %lu, word %lu)", s, line, word); + error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); continue; } @@ -351,186 +646,133 @@ static int read_apps_groups_conf(const char *file) // ---------------------------------------------------------------------------- -// data to store for each pid -// see: man proc +// struct pid_stat management -#define PID_LOG_IO 0x00000001 -#define PID_LOG_STATM 0x00000002 -#define PID_LOG_CMDLINE 0x00000004 -#define PID_LOG_FDS 0x00000008 -#define PID_LOG_STAT 0x00000010 +static inline struct pid_stat *get_pid_entry(pid_t pid) { + if(unlikely(all_pids[pid])) + return all_pids[pid]; -struct pid_stat { - int32_t pid; - char comm[MAX_COMPARE_NAME + 1]; - char cmdline[MAX_CMDLINE + 1]; + struct pid_stat *p = callocz(sizeof(struct pid_stat), 1); + p->fds = callocz(sizeof(int), MAX_SPARE_FDS); + p->fds_size = MAX_SPARE_FDS; - uint32_t log_thrown; + if(likely(root_of_pids)) + root_of_pids->prev = p; - // char state; - int32_t ppid; - // int32_t pgrp; - // int32_t session; - // int32_t tty_nr; - // int32_t tpgid; - // uint64_t flags; + p->next = root_of_pids; + root_of_pids = p; - // these are raw values collected - unsigned long long minflt_raw; - unsigned long long cminflt_raw; - unsigned long long majflt_raw; - unsigned long long cmajflt_raw; - unsigned long long utime_raw; - unsigned long long stime_raw; - unsigned long long gtime_raw; // guest_time - unsigned long long cutime_raw; - unsigned long long cstime_raw; - unsigned long long cgtime_raw; // cguest_time + p->pid = pid; - // these are rates - unsigned long long minflt; - unsigned long long cminflt; - unsigned long long majflt; - unsigned long long cmajflt; - unsigned long long utime; - unsigned long long stime; - unsigned long long gtime; - unsigned long long cutime; - unsigned long long cstime; - unsigned long long cgtime; + all_pids[pid] = p; + all_pids_count++; - // int64_t priority; - // int64_t nice; - int32_t num_threads; - // int64_t itrealvalue; - // unsigned long long starttime; - // unsigned long long vsize; - // unsigned long long rss; - // unsigned long long rsslim; - // unsigned long long starcode; - // unsigned long long endcode; - // unsigned long long startstack; - // unsigned long long kstkesp; - // unsigned long long kstkeip; - // uint64_t signal; - // uint64_t blocked; - // uint64_t sigignore; - // uint64_t sigcatch; - // uint64_t wchan; - // uint64_t nswap; - // uint64_t cnswap; - // int32_t exit_signal; - // int32_t processor; - // uint32_t rt_priority; - // uint32_t policy; - // unsigned long long delayacct_blkio_ticks; + return p; +} - uid_t uid; - gid_t gid; +static inline void del_pid_entry(pid_t pid) { + struct pid_stat *p = all_pids[pid]; - unsigned long long statm_size; - unsigned long long statm_resident; - unsigned long long statm_share; - // unsigned long long statm_text; - // unsigned long long statm_lib; - // unsigned long long statm_data; - // unsigned long long statm_dirty; - - unsigned long long io_logical_bytes_read_raw; - unsigned long long io_logical_bytes_written_raw; - // unsigned long long io_read_calls_raw; - // unsigned long long io_write_calls_raw; - unsigned long long io_storage_bytes_read_raw; - unsigned long long io_storage_bytes_written_raw; - // unsigned long long io_cancelled_write_bytes_raw; - - unsigned long long io_logical_bytes_read; - unsigned long long io_logical_bytes_written; - // unsigned long long io_read_calls; - // unsigned long long io_write_calls; - unsigned long long io_storage_bytes_read; - unsigned long long io_storage_bytes_written; - // unsigned long long io_cancelled_write_bytes; + if(unlikely(!p)) { + error("attempted to free pid %d that is not allocated.", pid); + return; + } - int *fds; // array of fds it uses - int fds_size; // the size of the fds array + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, p->comm); - int children_count; // number of processes directly referencing this - int keep; // 1 when we need to keep this process in memory even after it exited - int keeploops; // increases by 1 every time keep is 1 and updated 0 - int updated; // 1 when the process is currently running - int merged; // 1 when it has been merged to its parent - int new_entry; // 1 when this is a new process, just saw for the first time - int read; // 1 when we have already read this process for this iteration - int sortlist; // higher numbers = top on the process tree - // each process gets a unique number + if(root_of_pids == p) + root_of_pids = p->next; - struct target *target; // app_groups.conf targets - struct target *user_target; // uid based targets - struct target *group_target; // gid based targets + if(p->next) p->next->prev = p->prev; + if(p->prev) p->prev->next = p->next; - unsigned long long stat_collected_usec; - unsigned long long last_stat_collected_usec; + freez(p->fds); + freez(p->fds_dirname); + freez(p->stat_filename); + freez(p->statm_filename); + freez(p->io_filename); + freez(p->cmdline_filename); + freez(p); - unsigned long long io_collected_usec; - unsigned long long last_io_collected_usec; + all_pids[pid] = NULL; + all_pids_count--; +} - char *stat_filename; - char *statm_filename; - char *io_filename; - char *cmdline_filename; +// ---------------------------------------------------------------------------- - struct pid_stat *parent; - struct pid_stat *prev; - struct pid_stat *next; -} *root_of_pids = NULL, **all_pids; +static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { + if(unlikely(!status)) { + // error("command failed log %u, errno %d", log, errno); -long all_pids_count = 0; + if(unlikely(debug || errno != ENOENT)) { + if(unlikely(debug || !(p->log_thrown & log))) { + p->log_thrown |= log; + switch(log) { + case PID_LOG_IO: + error("Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + break; -static inline struct pid_stat *get_pid_entry(pid_t pid) { - if(all_pids[pid]) { - all_pids[pid]->new_entry = 0; - return all_pids[pid]; - } + case PID_LOG_STATM: + error("Cannot process %s/proc/%d/statm (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + break; - all_pids[pid] = callocz(sizeof(struct pid_stat), 1); - all_pids[pid]->fds = callocz(sizeof(int), MAX_SPARE_FDS); - all_pids[pid]->fds_size = MAX_SPARE_FDS; + case PID_LOG_CMDLINE: + error("Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + break; - if(root_of_pids) root_of_pids->prev = all_pids[pid]; - all_pids[pid]->next = root_of_pids; - root_of_pids = all_pids[pid]; + case PID_LOG_FDS: + error("Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + break; - all_pids[pid]->pid = pid; - all_pids[pid]->new_entry = 1; + case PID_LOG_STAT: + break; - all_pids_count++; + default: + error("unhandled error for pid %d, command '%s'", p->pid, p->comm); + break; + } + } + } + errno = 0; + } + else if(unlikely(p->log_thrown & log)) { + // error("unsetting log %u on pid %d", log, p->pid); + p->log_thrown &= ~log; + } - return all_pids[pid]; + return status; } -static inline void del_pid_entry(pid_t pid) { - if(!all_pids[pid]) { - error("attempted to free pid %d that is not allocated.", pid); - return; - } +static inline void assign_target_to_pid(struct pid_stat *p) { + uint32_t hash = simple_hash(p->comm); + size_t pclen = strlen(p->comm); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm); + struct target *w; + for(w = apps_groups_root_target; w ; w = w->next) { + // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm); - if(root_of_pids == all_pids[pid]) root_of_pids = all_pids[pid]->next; - if(all_pids[pid]->next) all_pids[pid]->next->prev = all_pids[pid]->prev; - if(all_pids[pid]->prev) all_pids[pid]->prev->next = all_pids[pid]->next; + // find it - 4 cases: + // 1. the target is not a pattern + // 2. the target has the prefix + // 3. the target has the suffix + // 4. the target is something inside cmdline - if(all_pids[pid]->fds) freez(all_pids[pid]->fds); - if(all_pids[pid]->stat_filename) freez(all_pids[pid]->stat_filename); - if(all_pids[pid]->statm_filename) freez(all_pids[pid]->statm_filename); - if(all_pids[pid]->io_filename) freez(all_pids[pid]->io_filename); - if(all_pids[pid]->cmdline_filename) freez(all_pids[pid]->cmdline_filename); - freez(all_pids[pid]); + if(unlikely(( (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) + || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) + || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) + || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(p->cmdline, w->compare)) + ))) { - all_pids[pid] = NULL; - all_pids_count--; + if(w->target) p->target = w->target; + else p->target = w; + + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); + + break; + } + } } @@ -539,9 +781,20 @@ static inline void del_pid_entry(pid_t pid) { static inline int read_proc_pid_cmdline(struct pid_stat *p) { +#ifdef __FreeBSD__ + size_t i, bytes = MAX_CMDLINE; + int mib[4]; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_ARGS; + mib[3] = p->pid; + if (unlikely(sysctl(mib, 4, p->cmdline, &bytes, NULL, 0))) + goto cleanup; +#else if(unlikely(!p->cmdline_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", global_host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); p->cmdline_filename = strdupz(filename); } @@ -552,6 +805,7 @@ static inline int read_proc_pid_cmdline(struct pid_stat *p) { close(fd); if(unlikely(bytes < 0)) goto cleanup; +#endif p->cmdline[bytes] = '\0'; for(i = 0; i < bytes ; i++) @@ -568,7 +822,16 @@ cleanup: return 0; } -static inline int read_proc_pid_ownership(struct pid_stat *p) { +static inline int read_proc_pid_ownership(struct pid_stat *p, void *ptr) { + (void)ptr; +#ifdef __FreeBSD__ + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; + + p->uid = proc_info->ki_uid; + p->gid = proc_info->ki_groups[0]; + + return 1; +#else if(unlikely(!p->stat_filename)) { error("pid %d does not have a stat_filename", p->pid); return 0; @@ -587,14 +850,41 @@ static inline int read_proc_pid_ownership(struct pid_stat *p) { p->gid = st.st_gid; return 1; +#endif } -static inline int read_proc_pid_stat(struct pid_stat *p) { +// ---------------------------------------------------------------------------- +// macro to calculate the incremental rate of a value +// each parameter is accessed only ONCE - so it is safe to pass function calls +// or other macros as parameters + +#define incremental_rate(rate_variable, last_kernel_variable, new_kernel_value, collected_usec, last_collected_usec) { \ + kernel_uint_t _new_tmp = new_kernel_value; \ + rate_variable = (_new_tmp - last_kernel_variable) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); \ + last_kernel_variable = _new_tmp; \ + } + +// the same macro for struct pid members +#define pid_incremental_rate(type, var, value) \ + incremental_rate(var, var##_raw, value, p->type##_collected_usec, p->last_##type##_collected_usec) + + +// ---------------------------------------------------------------------------- + +static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { + (void)ptr; + +#ifdef __FreeBSD__ + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; + + if (unlikely(proc_info->ki_tdflags & TDF_IDLETD)) + goto cleanup; +#else static procfile *ff = NULL; if(unlikely(!p->stat_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", global_host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", netdata_configured_host_prefix, p->pid); p->stat_filename = strdupz(filename); } @@ -604,95 +894,104 @@ static inline int read_proc_pid_stat(struct pid_stat *p) { if(unlikely(!ff)) goto cleanup; // if(set_quotes) procfile_set_quotes(ff, "()"); - if(set_quotes) procfile_set_open_close(ff, "(", ")"); + if(unlikely(set_quotes)) + procfile_set_open_close(ff, "(", ")"); ff = procfile_readall(ff); if(unlikely(!ff)) goto cleanup; +#endif p->last_stat_collected_usec = p->stat_collected_usec; - p->stat_collected_usec = now_realtime_usec(); - file_counter++; + p->stat_collected_usec = now_monotonic_usec(); + calls_counter++; - // p->pid = str2ul(procfile_lineword(ff, 0, 0+i)); +#ifdef __FreeBSD__ + char *comm = proc_info->ki_comm; + p->ppid = proc_info->ki_ppid; +#else + // p->pid = str2pid_t(procfile_lineword(ff, 0, 0)); + char *comm = procfile_lineword(ff, 0, 1); + // p->state = *(procfile_lineword(ff, 0, 2)); + p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + // p->pgrp = (int32_t)str2pid_t(procfile_lineword(ff, 0, 4)); + // p->session = (int32_t)str2pid_t(procfile_lineword(ff, 0, 5)); + // p->tty_nr = (int32_t)str2pid_t(procfile_lineword(ff, 0, 6)); + // p->tpgid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 7)); + // p->flags = str2uint64_t(procfile_lineword(ff, 0, 8)); +#endif - strncpyz(p->comm, procfile_lineword(ff, 0, 1), MAX_COMPARE_NAME); + if(strcmp(p->comm, comm)) { + if(unlikely(debug)) { + if(p->comm[0]) + fprintf(stderr, "apps.plugin: \tpid %d (%s) changed name to '%s'\n", p->pid, p->comm, comm); + else + fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", p->pid, comm); + } - // p->state = *(procfile_lineword(ff, 0, 2)); - p->ppid = (int32_t)str2ul(procfile_lineword(ff, 0, 3)); - // p->pgrp = str2ul(procfile_lineword(ff, 0, 4)); - // p->session = str2ul(procfile_lineword(ff, 0, 5)); - // p->tty_nr = str2ul(procfile_lineword(ff, 0, 6)); - // p->tpgid = str2ul(procfile_lineword(ff, 0, 7)); - // p->flags = str2ull(procfile_lineword(ff, 0, 8)); - - unsigned long long last; - - last = p->minflt_raw; - p->minflt_raw = str2ull(procfile_lineword(ff, 0, 9)); - p->minflt = (p->minflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - last = p->cminflt_raw; - p->cminflt_raw = str2ull(procfile_lineword(ff, 0, 10)); - p->cminflt = (p->cminflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - last = p->majflt_raw; - p->majflt_raw = str2ull(procfile_lineword(ff, 0, 11)); - p->majflt = (p->majflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - last = p->cmajflt_raw; - p->cmajflt_raw = str2ull(procfile_lineword(ff, 0, 12)); - p->cmajflt = (p->cmajflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - last = p->utime_raw; - p->utime_raw = str2ull(procfile_lineword(ff, 0, 13)); - p->utime = (p->utime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - last = p->stime_raw; - p->stime_raw = str2ull(procfile_lineword(ff, 0, 14)); - p->stime = (p->stime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - last = p->cutime_raw; - p->cutime_raw = str2ull(procfile_lineword(ff, 0, 15)); - p->cutime = (p->cutime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - last = p->cstime_raw; - p->cstime_raw = str2ull(procfile_lineword(ff, 0, 16)); - p->cstime = (p->cstime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - // p->priority = str2ull(procfile_lineword(ff, 0, 17)); - // p->nice = str2ull(procfile_lineword(ff, 0, 18)); - p->num_threads = (int32_t)str2ul(procfile_lineword(ff, 0, 19)); - // p->itrealvalue = str2ull(procfile_lineword(ff, 0, 20)); - // p->starttime = str2ull(procfile_lineword(ff, 0, 21)); - // p->vsize = str2ull(procfile_lineword(ff, 0, 22)); - // p->rss = str2ull(procfile_lineword(ff, 0, 23)); - // p->rsslim = str2ull(procfile_lineword(ff, 0, 24)); - // p->starcode = str2ull(procfile_lineword(ff, 0, 25)); - // p->endcode = str2ull(procfile_lineword(ff, 0, 26)); - // p->startstack = str2ull(procfile_lineword(ff, 0, 27)); - // p->kstkesp = str2ull(procfile_lineword(ff, 0, 28)); - // p->kstkeip = str2ull(procfile_lineword(ff, 0, 29)); - // p->signal = str2ull(procfile_lineword(ff, 0, 30)); - // p->blocked = str2ull(procfile_lineword(ff, 0, 31)); - // p->sigignore = str2ull(procfile_lineword(ff, 0, 32)); - // p->sigcatch = str2ull(procfile_lineword(ff, 0, 33)); - // p->wchan = str2ull(procfile_lineword(ff, 0, 34)); - // p->nswap = str2ull(procfile_lineword(ff, 0, 35)); - // p->cnswap = str2ull(procfile_lineword(ff, 0, 36)); - // p->exit_signal = str2ul(procfile_lineword(ff, 0, 37)); - // p->processor = str2ul(procfile_lineword(ff, 0, 38)); - // p->rt_priority = str2ul(procfile_lineword(ff, 0, 39)); - // p->policy = str2ul(procfile_lineword(ff, 0, 40)); - // p->delayacct_blkio_ticks = str2ull(procfile_lineword(ff, 0, 41)); + strncpyz(p->comm, comm, MAX_COMPARE_NAME); + + // /proc//cmdline + if(likely(proc_pid_cmdline_is_needed)) + managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); + + assign_target_to_pid(p); + } + +#ifdef __FreeBSD__ + pid_incremental_rate(stat, p->minflt, (kernel_uint_t)proc_info->ki_rusage.ru_minflt); + pid_incremental_rate(stat, p->cminflt, (kernel_uint_t)proc_info->ki_rusage_ch.ru_minflt); + pid_incremental_rate(stat, p->majflt, (kernel_uint_t)proc_info->ki_rusage.ru_majflt); + pid_incremental_rate(stat, p->cmajflt, (kernel_uint_t)proc_info->ki_rusage_ch.ru_majflt); + pid_incremental_rate(stat, p->utime, (kernel_uint_t)proc_info->ki_rusage.ru_utime.tv_sec * 100 + proc_info->ki_rusage.ru_utime.tv_usec / 10000); + pid_incremental_rate(stat, p->stime, (kernel_uint_t)proc_info->ki_rusage.ru_stime.tv_sec * 100 + proc_info->ki_rusage.ru_stime.tv_usec / 10000); + pid_incremental_rate(stat, p->cutime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_utime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_utime.tv_usec / 10000); + pid_incremental_rate(stat, p->cstime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_utime.tv_usec / 10000); + + p->num_threads = proc_info->ki_numthreads; if(enable_guest_charts) { - last = p->gtime_raw; - p->gtime_raw = str2ull(procfile_lineword(ff, 0, 42)); - p->gtime = (p->gtime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + enable_guest_charts = 0; + info("Guest charts aren't supported by FreeBSD"); + } +#else + pid_incremental_rate(stat, p->minflt, str2kernel_uint_t(procfile_lineword(ff, 0, 9))); + pid_incremental_rate(stat, p->cminflt, str2kernel_uint_t(procfile_lineword(ff, 0, 10))); + pid_incremental_rate(stat, p->majflt, str2kernel_uint_t(procfile_lineword(ff, 0, 11))); + pid_incremental_rate(stat, p->cmajflt, str2kernel_uint_t(procfile_lineword(ff, 0, 12))); + pid_incremental_rate(stat, p->utime, str2kernel_uint_t(procfile_lineword(ff, 0, 13))); + pid_incremental_rate(stat, p->stime, str2kernel_uint_t(procfile_lineword(ff, 0, 14))); + pid_incremental_rate(stat, p->cutime, str2kernel_uint_t(procfile_lineword(ff, 0, 15))); + pid_incremental_rate(stat, p->cstime, str2kernel_uint_t(procfile_lineword(ff, 0, 16))); + // p->priority = str2kernel_uint_t(procfile_lineword(ff, 0, 17)); + // p->nice = str2kernel_uint_t(procfile_lineword(ff, 0, 18)); + p->num_threads = (int32_t)str2uint32_t(procfile_lineword(ff, 0, 19)); + // p->itrealvalue = str2kernel_uint_t(procfile_lineword(ff, 0, 20)); + // p->starttime = str2kernel_uint_t(procfile_lineword(ff, 0, 21)); + // p->vsize = str2kernel_uint_t(procfile_lineword(ff, 0, 22)); + // p->rss = str2kernel_uint_t(procfile_lineword(ff, 0, 23)); + // p->rsslim = str2kernel_uint_t(procfile_lineword(ff, 0, 24)); + // p->starcode = str2kernel_uint_t(procfile_lineword(ff, 0, 25)); + // p->endcode = str2kernel_uint_t(procfile_lineword(ff, 0, 26)); + // p->startstack = str2kernel_uint_t(procfile_lineword(ff, 0, 27)); + // p->kstkesp = str2kernel_uint_t(procfile_lineword(ff, 0, 28)); + // p->kstkeip = str2kernel_uint_t(procfile_lineword(ff, 0, 29)); + // p->signal = str2kernel_uint_t(procfile_lineword(ff, 0, 30)); + // p->blocked = str2kernel_uint_t(procfile_lineword(ff, 0, 31)); + // p->sigignore = str2kernel_uint_t(procfile_lineword(ff, 0, 32)); + // p->sigcatch = str2kernel_uint_t(procfile_lineword(ff, 0, 33)); + // p->wchan = str2kernel_uint_t(procfile_lineword(ff, 0, 34)); + // p->nswap = str2kernel_uint_t(procfile_lineword(ff, 0, 35)); + // p->cnswap = str2kernel_uint_t(procfile_lineword(ff, 0, 36)); + // p->exit_signal = str2kernel_uint_t(procfile_lineword(ff, 0, 37)); + // p->processor = str2kernel_uint_t(procfile_lineword(ff, 0, 38)); + // p->rt_priority = str2kernel_uint_t(procfile_lineword(ff, 0, 39)); + // p->policy = str2kernel_uint_t(procfile_lineword(ff, 0, 40)); + // p->delayacct_blkio_ticks = str2kernel_uint_t(procfile_lineword(ff, 0, 41)); - last = p->cgtime_raw; - p->cgtime_raw = str2ull(procfile_lineword(ff, 0, 43)); - p->cgtime = (p->cgtime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + if(enable_guest_charts) { + + pid_incremental_rate(stat, p->gtime, str2kernel_uint_t(procfile_lineword(ff, 0, 42))); + pid_incremental_rate(stat, p->cgtime, str2kernel_uint_t(procfile_lineword(ff, 0, 43))); if (show_guest_time || p->gtime || p->cgtime) { p->utime -= (p->utime >= p->gtime) ? p->gtime : p->utime; @@ -700,9 +999,10 @@ static inline int read_proc_pid_stat(struct pid_stat *p) { show_guest_time = 1; } } +#endif if(unlikely(debug || (p->target && p->target->debug))) - fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", global_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); + fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d\n", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); if(unlikely(global_iterations_counter == 1)) { p->minflt = 0; @@ -735,12 +1035,16 @@ cleanup: return 0; } -static inline int read_proc_pid_statm(struct pid_stat *p) { +static inline int read_proc_pid_statm(struct pid_stat *p, void *ptr) { + (void)ptr; +#ifdef __FreeBSD__ + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; +#else static procfile *ff = NULL; if(unlikely(!p->statm_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", global_host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", netdata_configured_host_prefix, p->pid); p->statm_filename = strdupz(filename); } @@ -749,16 +1053,23 @@ static inline int read_proc_pid_statm(struct pid_stat *p) { ff = procfile_readall(ff); if(unlikely(!ff)) goto cleanup; +#endif - file_counter++; + calls_counter++; - p->statm_size = str2ull(procfile_lineword(ff, 0, 0)); - p->statm_resident = str2ull(procfile_lineword(ff, 0, 1)); - p->statm_share = str2ull(procfile_lineword(ff, 0, 2)); - // p->statm_text = str2ull(procfile_lineword(ff, 0, 3)); - // p->statm_lib = str2ull(procfile_lineword(ff, 0, 4)); - // p->statm_data = str2ull(procfile_lineword(ff, 0, 5)); - // p->statm_dirty = str2ull(procfile_lineword(ff, 0, 6)); +#ifdef __FreeBSD__ + p->statm_size = proc_info->ki_size / sysconf(_SC_PAGESIZE); + p->statm_resident = proc_info->ki_rssize; + p->statm_share = 0; // do we have to use ru_ixrss here? +#else + p->statm_size = str2kernel_uint_t(procfile_lineword(ff, 0, 0)); + p->statm_resident = str2kernel_uint_t(procfile_lineword(ff, 0, 1)); + p->statm_share = str2kernel_uint_t(procfile_lineword(ff, 0, 2)); + // p->statm_text = str2kernel_uint_t(procfile_lineword(ff, 0, 3)); + // p->statm_lib = str2kernel_uint_t(procfile_lineword(ff, 0, 4)); + // p->statm_data = str2kernel_uint_t(procfile_lineword(ff, 0, 5)); + // p->statm_dirty = str2kernel_uint_t(procfile_lineword(ff, 0, 6)); +#endif return 1; @@ -773,12 +1084,16 @@ cleanup: return 0; } -static inline int read_proc_pid_io(struct pid_stat *p) { +static inline int read_proc_pid_io(struct pid_stat *p, void *ptr) { + (void)ptr; +#ifdef __FreeBSD__ + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; +#else static procfile *ff = NULL; if(unlikely(!p->io_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", global_host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", netdata_configured_host_prefix, p->pid); p->io_filename = strdupz(filename); } @@ -788,41 +1103,25 @@ static inline int read_proc_pid_io(struct pid_stat *p) { ff = procfile_readall(ff); if(unlikely(!ff)) goto cleanup; +#endif - file_counter++; + calls_counter++; p->last_io_collected_usec = p->io_collected_usec; - p->io_collected_usec = now_realtime_usec(); - - unsigned long long last; - - last = p->io_logical_bytes_read_raw; - p->io_logical_bytes_read_raw = str2ull(procfile_lineword(ff, 0, 1)); - p->io_logical_bytes_read = (p->io_logical_bytes_read_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - - last = p->io_logical_bytes_written_raw; - p->io_logical_bytes_written_raw = str2ull(procfile_lineword(ff, 1, 1)); - p->io_logical_bytes_written = (p->io_logical_bytes_written_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - - // last = p->io_read_calls_raw; - // p->io_read_calls_raw = str2ull(procfile_lineword(ff, 2, 1)); - // p->io_read_calls = (p->io_read_calls_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); + p->io_collected_usec = now_monotonic_usec(); - // last = p->io_write_calls_raw; - // p->io_write_calls_raw = str2ull(procfile_lineword(ff, 3, 1)); - // p->io_write_calls = (p->io_write_calls_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - - last = p->io_storage_bytes_read_raw; - p->io_storage_bytes_read_raw = str2ull(procfile_lineword(ff, 4, 1)); - p->io_storage_bytes_read = (p->io_storage_bytes_read_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - - last = p->io_storage_bytes_written_raw; - p->io_storage_bytes_written_raw = str2ull(procfile_lineword(ff, 5, 1)); - p->io_storage_bytes_written = (p->io_storage_bytes_written_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - - // last = p->io_cancelled_write_bytes_raw; - // p->io_cancelled_write_bytes_raw = str2ull(procfile_lineword(ff, 6, 1)); - // p->io_cancelled_write_bytes = (p->io_cancelled_write_bytes_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); +#ifdef __FreeBSD__ + pid_incremental_rate(io, p->io_storage_bytes_read, proc_info->ki_rusage.ru_inblock); + pid_incremental_rate(io, p->io_storage_bytes_written, proc_info->ki_rusage.ru_oublock); +#else + pid_incremental_rate(io, p->io_logical_bytes_read, str2kernel_uint_t(procfile_lineword(ff, 0, 1))); + pid_incremental_rate(io, p->io_logical_bytes_written, str2kernel_uint_t(procfile_lineword(ff, 1, 1))); + // pid_incremental_rate(io, p->io_read_calls, str2kernel_uint_t(procfile_lineword(ff, 2, 1))); + // pid_incremental_rate(io, p->io_write_calls, str2kernel_uint_t(procfile_lineword(ff, 3, 1))); + pid_incremental_rate(io, p->io_storage_bytes_read, str2kernel_uint_t(procfile_lineword(ff, 4, 1))); + pid_incremental_rate(io, p->io_storage_bytes_written, str2kernel_uint_t(procfile_lineword(ff, 5, 1))); + // pid_incremental_rate(io, p->io_cancelled_write_bytes, str2kernel_uint_t(procfile_lineword(ff, 6, 1))); +#endif if(unlikely(global_iterations_counter == 1)) { p->io_logical_bytes_read = 0; @@ -847,58 +1146,74 @@ cleanup: return 0; } -unsigned long long global_utime = 0; -unsigned long long global_stime = 0; -unsigned long long global_gtime = 0; - static inline int read_proc_stat() { +#ifdef __FreeBSD__ + long cp_time[CPUSTATES]; + int i; + + if (unlikely(CPUSTATES != 5)) { + error("FREEBSD: There are %d CPU states (5 was expected)", CPUSTATES); + goto cleanup; + } + if (unlikely(GETSYSCTL("kern.cp_time", cp_time))) goto cleanup; +#else static char filename[FILENAME_MAX + 1] = ""; static procfile *ff = NULL; - static unsigned long long utime_raw = 0, stime_raw = 0, gtime_raw = 0, gntime_raw = 0, ntime_raw = 0; +#endif + static kernel_uint_t utime_raw = 0, stime_raw = 0, gtime_raw = 0, gntime_raw = 0, ntime_raw = 0; static usec_t collected_usec = 0, last_collected_usec = 0; +#ifndef __FreeBSD__ if(unlikely(!ff)) { - snprintfz(filename, FILENAME_MAX, "%s/proc/stat", global_host_prefix); + snprintfz(filename, FILENAME_MAX, "%s/proc/stat", netdata_configured_host_prefix); ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); if(unlikely(!ff)) goto cleanup; } ff = procfile_readall(ff); if(unlikely(!ff)) goto cleanup; +#endif last_collected_usec = collected_usec; - collected_usec = now_realtime_usec(); - - file_counter++; + collected_usec = now_monotonic_usec(); - unsigned long long last; + calls_counter++; - last = utime_raw; - utime_raw = str2ull(procfile_lineword(ff, 0, 1)); - global_utime = (utime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); + // temporary - it is added global_ntime; + kernel_uint_t global_ntime = 0; - // nice time, on user time - last = ntime_raw; - ntime_raw = str2ull(procfile_lineword(ff, 0, 2)); - global_utime += (ntime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); - - last = stime_raw; - stime_raw = str2ull(procfile_lineword(ff, 0, 3)); - global_stime = (stime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); +#ifdef __FreeBSD__ + incremental_rate(global_utime, utime_raw, cp_time[0], collected_usec, last_collected_usec); + incremental_rate(global_ntime, ntime_raw, cp_time[1], collected_usec, last_collected_usec); + incremental_rate(global_stime, stime_raw, cp_time[2], collected_usec, last_collected_usec); +#else + incremental_rate(global_utime, utime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 1)), collected_usec, last_collected_usec); + incremental_rate(global_ntime, ntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 2)), collected_usec, last_collected_usec); + incremental_rate(global_stime, stime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 3)), collected_usec, last_collected_usec); + incremental_rate(global_gtime, gtime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 10)), collected_usec, last_collected_usec); +#endif - last = gtime_raw; - gtime_raw = str2ull(procfile_lineword(ff, 0, 10)); - global_gtime = (gtime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); + global_utime += global_ntime; +#ifdef __FreeBSD__ + if(enable_guest_charts) { + enable_guest_charts = 0; + info("Guest charts aren't supported by FreeBSD"); + } +#else if(enable_guest_charts) { + // temporary - it is added global_ntime; + kernel_uint_t global_gntime = 0; + // guest nice time, on guest time - last = gntime_raw; - gntime_raw = str2ull(procfile_lineword(ff, 0, 11)); - global_gtime += (gntime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); + incremental_rate(global_gntime, gntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 11)), collected_usec, last_collected_usec); + + global_gtime += global_gntime; // remove guest time from user time global_utime -= (global_utime > global_gtime) ? global_gtime : global_utime; } +#endif if(unlikely(global_iterations_counter == 1)) { global_utime = 0; @@ -917,26 +1232,6 @@ cleanup: // ---------------------------------------------------------------------------- -// file descriptor -// this is used to keep a global list of all open files of the system -// it is needed in order to calculate the unique files processes have open - -#define FILE_DESCRIPTORS_INCREASE_STEP 100 - -struct file_descriptor { - avl avl; -#ifdef NETDATA_INTERNAL_CHECKS - uint32_t magic; -#endif /* NETDATA_INTERNAL_CHECKS */ - uint32_t hash; - const char *name; - int type; - int count; - int pos; -} *all_files = NULL; - -int all_files_len = 0; -int all_files_size = 0; int file_descriptor_compare(void* a, void* b) { #ifdef NETDATA_INTERNAL_CHECKS @@ -977,15 +1272,7 @@ static struct file_descriptor *file_descriptor_find(const char *name, uint32_t h #define file_descriptor_add(fd) avl_insert(&all_files_index, (avl *)(fd)) #define file_descriptor_remove(fd) avl_remove(&all_files_index, (avl *)(fd)) -#define FILETYPE_OTHER 0 -#define FILETYPE_FILE 1 -#define FILETYPE_PIPE 2 -#define FILETYPE_SOCKET 3 -#define FILETYPE_INOTIFY 4 -#define FILETYPE_EVENTFD 5 -#define FILETYPE_EVENTPOLL 6 -#define FILETYPE_TIMERFD 7 -#define FILETYPE_SIGNALFD 8 +// ---------------------------------------------------------------------------- static inline void file_descriptor_not_used(int id) { @@ -1066,7 +1353,7 @@ static inline void all_files_grow() { all_files_size += FILE_DESCRIPTORS_INCREASE_STEP; } -static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t hash, int type) { +static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t hash, FD_FILETYPE type) { // check we have enough memory to add it if(!all_files || all_files_len == all_files_size) all_files_grow(); @@ -1147,21 +1434,26 @@ static inline int file_descriptor_find_or_add(const char *name) } // not found - int type; - if(name[0] == '/') type = FILETYPE_FILE; - else if(strncmp(name, "pipe:", 5) == 0) type = FILETYPE_PIPE; - else if(strncmp(name, "socket:", 7) == 0) type = FILETYPE_SOCKET; - else if(strcmp(name, "anon_inode:inotify") == 0 || strcmp(name, "inotify") == 0) type = FILETYPE_INOTIFY; - else if(strcmp(name, "anon_inode:[eventfd]") == 0) type = FILETYPE_EVENTFD; - else if(strcmp(name, "anon_inode:[eventpoll]") == 0) type = FILETYPE_EVENTPOLL; - else if(strcmp(name, "anon_inode:[timerfd]") == 0) type = FILETYPE_TIMERFD; - else if(strcmp(name, "anon_inode:[signalfd]") == 0) type = FILETYPE_SIGNALFD; - else if(strncmp(name, "anon_inode:", 11) == 0) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); + FD_FILETYPE type; + if(likely(name[0] == '/')) type = FILETYPE_FILE; + else if(likely(strncmp(name, "pipe:", 5) == 0)) type = FILETYPE_PIPE; + else if(likely(strncmp(name, "socket:", 7) == 0)) type = FILETYPE_SOCKET; + else if(likely(strncmp(name, "anon_inode:", 11) == 0)) { + const char *t = &name[11]; + + if(strcmp(t, "inotify") == 0) type = FILETYPE_INOTIFY; + else if(strcmp(t, "[eventfd]") == 0) type = FILETYPE_EVENTFD; + else if(strcmp(t, "[eventpoll]") == 0) type = FILETYPE_EVENTPOLL; + else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD; + else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD; + else { + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); - type = FILETYPE_OTHER; + type = FILETYPE_OTHER; + } } + else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY; else { if(unlikely(debug)) fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name); @@ -1172,81 +1464,243 @@ static inline int file_descriptor_find_or_add(const char *name) return file_descriptor_set_on_empty_slot(name, hash, type); } -static inline int read_pid_file_descriptors(struct pid_stat *p) { - char dirname[FILENAME_MAX+1]; +static inline void make_all_pid_fds_negative(struct pid_stat *p) { + int *fd = p->fds, *end = &p->fds[p->fds_size]; + while(fd < end) { + *fd = -(*fd); + fd++; + } +} - snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", global_host_prefix, p->pid); - DIR *fds = opendir(dirname); - if(fds) { - int c; - struct dirent *de; - char fdname[FILENAME_MAX + 1]; - char linkname[FILENAME_MAX + 1]; +static inline void cleanup_negative_pid_fds(struct pid_stat *p) { + int *fd = p->fds, *fdend = &p->fds[p->fds_size]; - // make the array negative - for(c = 0 ; c < p->fds_size ; c++) - p->fds[c] = -p->fds[c]; + while(fd < fdend) { + if(unlikely(*fd < 0)) { + file_descriptor_not_used(-(*fd)); + *fd++ = 0; + } + else + fd++; + } +} - while((de = readdir(fds))) { - if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) - continue; +static inline void zero_pid_fds(struct pid_stat *p, int first, int size) { + int *fd = &p->fds[first], *end = &p->fds[first + size]; + while(fd < end) *fd++ = 0; +} - // check if the fds array is small - int fdid = (int)str2l(de->d_name); - if(fdid < 0) continue; - if(fdid >= p->fds_size) { - // it is small, extend it - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); +static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { + (void)ptr; +#ifdef __FreeBSD__ + int mib[4]; + size_t size; + struct kinfo_file *fds; + static char *fdsbuf; + char *bfdsbuf, *efdsbuf; + char fdsname[FILENAME_MAX + 1]; + + // we make all pid fds negative, so that + // we can detect unused file descriptors + // at the end, to free them + make_all_pid_fds_negative(p); + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_FILEDESC; + mib[3] = p->pid; + + if (unlikely(sysctl(mib, 4, NULL, &size, NULL, 0))) { + error("sysctl error: Can't get file descriptors data size for pid %d", p->pid); + return 0; + } + if (likely(size > 0)) + fdsbuf = reallocz(fdsbuf, size); + if (unlikely(sysctl(mib, 4, fdsbuf, &size, NULL, 0))) { + error("sysctl error: Can't get file descriptors data for pid %d", p->pid); + return 0; + } - p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int)); + bfdsbuf = fdsbuf; + efdsbuf = fdsbuf + size; + while (bfdsbuf < efdsbuf) { + fds = (struct kinfo_file *)(uintptr_t)bfdsbuf; + if (unlikely(fds->kf_structsize == 0)) + break; - // and initialize it - for(c = p->fds_size ; c < (fdid + MAX_SPARE_FDS) ; c++) p->fds[c] = 0; - p->fds_size = fdid + MAX_SPARE_FDS; - } + // do not process file descriptors for current working directory, root directory, + // jail directory, ktrace vnode, text vnode and controlling terminal + if (unlikely(fds->kf_fd < 0)) { + bfdsbuf += fds->kf_structsize; + continue; + } - if(p->fds[fdid] == 0) { - // we don't know this fd, get it + // get file descriptors array index + int fdid = fds->kf_fd; - sprintf(fdname, "%s/proc/%d/fd/%s", global_host_prefix, p->pid, de->d_name); - ssize_t l = readlink(fdname, linkname, FILENAME_MAX); - if(l == -1) { - if(debug || (p->target && p->target->debug)) { - if(debug || (p->target && p->target->debug)) - error("Cannot read link %s", fdname); - } - continue; - } - linkname[l] = '\0'; - file_counter++; + // check if the fds array is small + if (unlikely(fdid >= p->fds_size)) { + // it is small, extend it + + if (unlikely(debug)) + fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); + + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int)); + + // and initialize it + zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = fdid + MAX_SPARE_FDS; + } - // if another process already has this, we will get - // the same id - p->fds[fdid] = file_descriptor_find_or_add(linkname); + if (unlikely(p->fds[fdid] == 0)) { + // we don't know this fd, get it + + switch (fds->kf_type) { + case KF_TYPE_FIFO: + case KF_TYPE_VNODE: + if (unlikely(!fds->kf_path[0])) { + sprintf(fdsname, "other: inode: %lu", fds->kf_un.kf_file.kf_file_fileid); + break; + } + sprintf(fdsname, "%s", fds->kf_path); + break; + case KF_TYPE_SOCKET: + switch (fds->kf_sock_domain) { + case AF_INET: + case AF_INET6: + if (fds->kf_sock_protocol == IPPROTO_TCP) + sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_inpcb); + else + sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_pcb); + break; + case AF_UNIX: + /* print address of pcb and connected pcb */ + sprintf(fdsname, "socket: %lx %lx", fds->kf_un.kf_sock.kf_sock_pcb, fds->kf_un.kf_sock.kf_sock_unpconn); + break; + default: + /* print protocol number and socket address */ + sprintf(fdsname, "socket: other: %d %s %s", fds->kf_sock_protocol, fds->kf_sa_local.__ss_pad1, fds->kf_sa_local.__ss_pad2); + } + break; + case KF_TYPE_PIPE: + sprintf(fdsname, "pipe: %lu %lu", fds->kf_un.kf_pipe.kf_pipe_addr, fds->kf_un.kf_pipe.kf_pipe_peer); + break; + case KF_TYPE_PTS: + sprintf(fdsname, "other: pts: %u", fds->kf_un.kf_pts.kf_pts_dev); + break; + case KF_TYPE_SHM: + sprintf(fdsname, "other: shm: %s size: %lu", fds->kf_path, fds->kf_un.kf_file.kf_file_size); + break; + case KF_TYPE_SEM: + sprintf(fdsname, "other: sem: %u", fds->kf_un.kf_sem.kf_sem_value); + break; + default: + sprintf(fdsname, "other: pid: %d fd: %d", fds->kf_un.kf_proc.kf_pid, fds->kf_fd); } + // if another process already has this, we will get + // the same id + p->fds[fdid] = file_descriptor_find_or_add(fdsname); + } + // else make it positive again, we need it // of course, the actual file may have changed, but we don't care so much // FIXME: we could compare the inode as returned by readdir dirent structure - else p->fds[fdid] = -p->fds[fdid]; + + else + p->fds[fdid] = -p->fds[fdid]; + + bfdsbuf += fds->kf_structsize; + } +#else + if(unlikely(!p->fds_dirname)) { + char dirname[FILENAME_MAX+1]; + snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", netdata_configured_host_prefix, p->pid); + p->fds_dirname = strdupz(dirname); + } + + DIR *fds = opendir(p->fds_dirname); + if(unlikely(!fds)) return 0; + + struct dirent *de; + char fdname[FILENAME_MAX + 1]; + char linkname[FILENAME_MAX + 1]; + + // we make all pid fds negative, so that + // we can detect unused file descriptors + // at the end, to free them + make_all_pid_fds_negative(p); + + while((de = readdir(fds))) { + // we need only files with numeric names + + if(unlikely(de->d_name[0] < '0' || de->d_name[0] > '9')) + continue; + + // get its number + int fdid = (int) str2l(de->d_name); + if(unlikely(fdid < 0)) continue; + + // check if the fds array is small + if(unlikely(fdid >= p->fds_size)) { + // it is small, extend it + + if(unlikely(debug)) + fprintf(stderr + , "apps.plugin: extending fd memory slots for %s from %d to %d\n" + , p->comm + , p->fds_size + , fdid + MAX_SPARE_FDS + ); + + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int)); + + // and initialize it + zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = fdid + MAX_SPARE_FDS; } - closedir(fds); - // remove all the negative file descriptors - for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] < 0) { - file_descriptor_not_used(-p->fds[c]); - p->fds[c] = 0; + if(unlikely(p->fds[fdid] == 0)) { + // we don't know this fd, get it + + sprintf(fdname, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); + ssize_t l = readlink(fdname, linkname, FILENAME_MAX); + if(unlikely(l == -1)) { + if(debug || (p->target && p->target->debug)) { + if(debug || (p->target && p->target->debug)) + error("Cannot read link %s", fdname); + } + continue; + } + else + linkname[l] = '\0'; + + file_counter++; + + // if another process already has this, we will get + // the same id + p->fds[fdid] = file_descriptor_find_or_add(linkname); } + + // else make it positive again, we need it + // of course, the actual file may have changed, but we don't care so much + // FIXME: we could compare the inode as returned by readdir dirent structure + // UPDATE: no we cannot use inodes - under /proc inodes don't change when the link is changed + + else + p->fds[fdid] = -p->fds[fdid]; } - else return 0; + + closedir(fds); +#endif + cleanup_negative_pid_fds(p); return 1; } // ---------------------------------------------------------------------------- -static inline int print_process_and_parents(struct pid_stat *p, unsigned long long time) { +static inline int print_process_and_parents(struct pid_stat *p, usec_t time) { char *prefix = "\\_ "; int indent = 0; @@ -1261,25 +1715,25 @@ static inline int print_process_and_parents(struct pid_stat *p, unsigned long lo for(i = 0; i < indent ;i++) buffer[i] = ' '; buffer[i] = '\0'; - fprintf(stderr, " %s %s%s (%d %s %lld" + fprintf(stderr, " %s %s%s (%d %s %llu" , buffer , prefix , p->comm , p->pid , p->updated?"running":"exited" - , (long long)p->stat_collected_usec - (long long)time + , p->stat_collected_usec - time ); - if(p->utime) fprintf(stderr, " utime=%llu", p->utime); - if(p->stime) fprintf(stderr, " stime=%llu", p->stime); - if(p->gtime) fprintf(stderr, " gtime=%llu", p->gtime); - if(p->cutime) fprintf(stderr, " cutime=%llu", p->cutime); - if(p->cstime) fprintf(stderr, " cstime=%llu", p->cstime); - if(p->cgtime) fprintf(stderr, " cgtime=%llu", p->cgtime); - if(p->minflt) fprintf(stderr, " minflt=%llu", p->minflt); - if(p->cminflt) fprintf(stderr, " cminflt=%llu", p->cminflt); - if(p->majflt) fprintf(stderr, " majflt=%llu", p->majflt); - if(p->cmajflt) fprintf(stderr, " cmajflt=%llu", p->cmajflt); + if(p->utime) fprintf(stderr, " utime=" KERNEL_UINT_FORMAT, p->utime); + if(p->stime) fprintf(stderr, " stime=" KERNEL_UINT_FORMAT, p->stime); + if(p->gtime) fprintf(stderr, " gtime=" KERNEL_UINT_FORMAT, p->gtime); + if(p->cutime) fprintf(stderr, " cutime=" KERNEL_UINT_FORMAT, p->cutime); + if(p->cstime) fprintf(stderr, " cstime=" KERNEL_UINT_FORMAT, p->cstime); + if(p->cgtime) fprintf(stderr, " cgtime=" KERNEL_UINT_FORMAT, p->cgtime); + if(p->minflt) fprintf(stderr, " minflt=" KERNEL_UINT_FORMAT, p->minflt); + if(p->cminflt) fprintf(stderr, " cminflt=" KERNEL_UINT_FORMAT, p->cminflt); + if(p->majflt) fprintf(stderr, " majflt=" KERNEL_UINT_FORMAT, p->majflt); + if(p->cmajflt) fprintf(stderr, " cmajflt=" KERNEL_UINT_FORMAT, p->cmajflt); fprintf(stderr, ")\n"); return indent + 1; @@ -1291,7 +1745,7 @@ static inline void print_process_tree(struct pid_stat *p, char *msg) { print_process_and_parents(p, p->stat_collected_usec); } -static inline void find_lost_child_debug(struct pid_stat *pe, unsigned long long lost, int type) { +static inline void find_lost_child_debug(struct pid_stat *pe, kernel_uint_t lost, int type) { int found = 0; struct pid_stat *p = NULL; @@ -1301,35 +1755,35 @@ static inline void find_lost_child_debug(struct pid_stat *pe, unsigned long long switch(type) { case 1: if(p->cminflt > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child minflt %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + fprintf(stderr, " > process %d (%s) could use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); found++; } break; case 2: if(p->cmajflt > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child majflt %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + fprintf(stderr, " > process %d (%s) could use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); found++; } break; case 3: if(p->cutime > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child utime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + fprintf(stderr, " > process %d (%s) could use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); found++; } break; case 4: if(p->cstime > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child stime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + fprintf(stderr, " > process %d (%s) could use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); found++; } break; case 5: if(p->cgtime > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child gtime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); + fprintf(stderr, " > process %d (%s) could use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); found++; } break; @@ -1339,30 +1793,30 @@ static inline void find_lost_child_debug(struct pid_stat *pe, unsigned long long if(!found) { switch(type) { case 1: - fprintf(stderr, " > cannot find any process to use the lost exited child minflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm); + fprintf(stderr, " > cannot find any process to use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); break; case 2: - fprintf(stderr, " > cannot find any process to use the lost exited child majflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm); + fprintf(stderr, " > cannot find any process to use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); break; case 3: - fprintf(stderr, " > cannot find any process to use the lost exited child utime %llu of process %d (%s)\n", lost, pe->pid, pe->comm); + fprintf(stderr, " > cannot find any process to use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); break; case 4: - fprintf(stderr, " > cannot find any process to use the lost exited child stime %llu of process %d (%s)\n", lost, pe->pid, pe->comm); + fprintf(stderr, " > cannot find any process to use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); break; case 5: - fprintf(stderr, " > cannot find any process to use the lost exited child gtime %llu of process %d (%s)\n", lost, pe->pid, pe->comm); + fprintf(stderr, " > cannot find any process to use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); break; } } } -static inline unsigned long long remove_exited_child_from_parent(unsigned long long *field, unsigned long long *pfield) { - unsigned long long absorbed = 0; +static inline kernel_uint_t remove_exited_child_from_parent(kernel_uint_t *field, kernel_uint_t *pfield) { + kernel_uint_t absorbed = 0; if(*field > *pfield) { absorbed += *pfield; @@ -1385,20 +1839,18 @@ static inline void process_exited_processes() { if(p->updated || !p->stat_collected_usec) continue; - struct pid_stat *pp = p->parent; - - unsigned long long utime = (p->utime_raw + p->cutime_raw) * (1000000ULL * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - unsigned long long stime = (p->stime_raw + p->cstime_raw) * (1000000ULL * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - unsigned long long gtime = (p->gtime_raw + p->cgtime_raw) * (1000000ULL * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - unsigned long long minflt = (p->minflt_raw + p->cminflt_raw) * (1000000ULL * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - unsigned long long majflt = (p->majflt_raw + p->cmajflt_raw) * (1000000ULL * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t utime = (p->utime_raw + p->cutime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t stime = (p->stime_raw + p->cstime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t gtime = (p->gtime_raw + p->cgtime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t minflt = (p->minflt_raw + p->cminflt_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); + kernel_uint_t majflt = (p->majflt_raw + p->cmajflt_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); if(utime + stime + gtime + minflt + majflt == 0) continue; if(unlikely(debug)) { log_date(stderr); - fprintf(stderr, "Absorb %s (%d %s total resources: utime=%llu stime=%llu gtime=%llu minflt=%llu majflt=%llu)\n" + fprintf(stderr, "Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n" , p->comm , p->pid , p->updated?"running":"exited" @@ -1411,29 +1863,30 @@ static inline void process_exited_processes() { print_process_tree(p, "Searching parents"); } + struct pid_stat *pp; for(pp = p->parent; pp ; pp = pp->parent) { if(!pp->updated) continue; - unsigned long long absorbed; + kernel_uint_t absorbed; absorbed = remove_exited_child_from_parent(&utime, &pp->cutime); if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed %llu utime (remaining: %llu)\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); + fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); absorbed = remove_exited_child_from_parent(&stime, &pp->cstime); if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed %llu stime (remaining: %llu)\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); + fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); absorbed = remove_exited_child_from_parent(>ime, &pp->cgtime); if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed %llu gtime (remaining: %llu)\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); + fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); absorbed = remove_exited_child_from_parent(&minflt, &pp->cminflt); if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed %llu minflt (remaining: %llu)\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); + fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); absorbed = remove_exited_child_from_parent(&majflt, &pp->cmajflt); if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed %llu majflt (remaining: %llu)\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); + fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); } if(unlikely(utime + stime + gtime + minflt + majflt > 0)) { @@ -1448,7 +1901,7 @@ static inline void process_exited_processes() { p->keep = 1; if(unlikely(debug)) - fprintf(stderr, " > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=%llu stime=%llu gtime=%llu minflt=%llu majflt=%llu)\n" + fprintf(stderr, " > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n" , p->comm , p->pid , p->updated?"running":"exited" @@ -1513,7 +1966,7 @@ static inline void link_all_processes_to_their_parents(void) { pp->children_count++; if(unlikely(debug || (p->target && p->target->debug))) - fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=%llu, stime=%llu, gtime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cgtime=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); + fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); } else { p->parent = NULL; @@ -1551,52 +2004,9 @@ static int compar_pid(const void *pid1, const void *pid2) { return 1; } -static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { - if(unlikely(!status)) { - // error("command failed log %u, errno %d", log, errno); - - if(unlikely(debug || errno != ENOENT)) { - if(unlikely(debug || !(p->log_thrown & log))) { - p->log_thrown |= log; - switch(log) { - case PID_LOG_IO: - error("Cannot process %s/proc/%d/io (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_STATM: - error("Cannot process %s/proc/%d/statm (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_CMDLINE: - error("Cannot process %s/proc/%d/cmdline (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_FDS: - error("Cannot process entries in %s/proc/%d/fd (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_STAT: - break; - - default: - error("unhandled error for pid %d, command '%s'", p->pid, p->comm); - break; - } - } - } - errno = 0; - } - else if(unlikely(p->log_thrown & log)) { - // error("unsetting log %u on pid %d", log, p->pid); - p->log_thrown &= ~log; - } - - return status; -} - -static inline int collect_data_for_pid(pid_t pid) { - if(unlikely(pid <= 0 || pid > pid_max)) { - error("Invalid pid %d read (expected 1 to %d). Ignoring process.", pid, pid_max); +static inline int collect_data_for_pid(pid_t pid, void *ptr) { + if(unlikely(pid < INIT_PID || pid > pid_max)) { + error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, INIT_PID, pid_max); return 0; } @@ -1609,11 +2019,11 @@ static inline int collect_data_for_pid(pid_t pid) { // -------------------------------------------------------------------- // /proc//stat - if(unlikely(!managed_log(p, PID_LOG_STAT, read_proc_pid_stat(p)))) + if(unlikely(!managed_log(p, PID_LOG_STAT, read_proc_pid_stat(p, ptr)))) // there is no reason to proceed if we cannot get its status return 0; - read_proc_pid_ownership(p); + read_proc_pid_ownership(p, ptr); // check its parent pid if(unlikely(p->ppid < 0 || p->ppid > pid_max)) { @@ -1624,61 +2034,20 @@ static inline int collect_data_for_pid(pid_t pid) { // -------------------------------------------------------------------- // /proc//io - managed_log(p, PID_LOG_IO, read_proc_pid_io(p)); + managed_log(p, PID_LOG_IO, read_proc_pid_io(p, ptr)); // -------------------------------------------------------------------- // /proc//statm - if(unlikely(!managed_log(p, PID_LOG_STATM, read_proc_pid_statm(p)))) + if(unlikely(!managed_log(p, PID_LOG_STATM, read_proc_pid_statm(p, ptr)))) // there is no reason to proceed if we cannot get its memory status return 0; - // -------------------------------------------------------------------- - // link it - - // check if it is target - // we do this only once, the first time this pid is loaded - if(unlikely(p->new_entry)) { - // /proc//cmdline - if(likely(proc_pid_cmdline_is_needed)) - managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); - - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", pid, p->comm); - - uint32_t hash = simple_hash(p->comm); - size_t pclen = strlen(p->comm); - - struct target *w; - for(w = apps_groups_root_target; w ; w = w->next) { - // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm); - - // find it - 4 cases: - // 1. the target is not a pattern - // 2. the target has the prefix - // 3. the target has the suffix - // 4. the target is something inside cmdline - if( (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) - || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) - || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) - || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(p->cmdline, w->compare)) - ) { - if(w->target) p->target = w->target; - else p->target = w; - - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); - - break; - } - } - } - // -------------------------------------------------------------------- // /proc//fd if(enable_file_charts) - managed_log(p, PID_LOG_FDS, read_pid_file_descriptors(p)); + managed_log(p, PID_LOG_FDS, read_pid_file_descriptors(p, ptr)); // -------------------------------------------------------------------- // done! @@ -1694,71 +2063,114 @@ static inline int collect_data_for_pid(pid_t pid) { return 1; } -static int collect_data_for_all_processes_from_proc(void) { +static int collect_data_for_all_processes(void) { struct pid_stat *p = NULL; - if(all_pids_count) { - // read parents before childs - // this is needed to prevent a situation where - // a child is found running, but until we read - // its parent, it has exited and its parent - // has accumulated its resources +#ifdef __FreeBSD__ + int i, procnum; + size_t procbase_size; + static struct kinfo_proc *procbase; + + int mib[3]; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PROC; + if (unlikely(sysctl(mib, 3, NULL, &procbase_size, NULL, 0))) { + error("sysctl error: Can't get processes data size"); + return 0; + } + procbase = reallocz(procbase, procbase_size); + if (unlikely(sysctl(mib, 3, procbase, &procbase_size, NULL, 0))) { + error("sysctl error: Can't get processes data"); + return 0; + } + procnum = procbase_size / sizeof(struct kinfo_proc); +#endif - long slc = 0; + if(all_pids_count) { + size_t slc = 0; for(p = root_of_pids; p ; p = p->next) { - p->read = 0; + p->read = 0; // mark it as not read, so that collect_data_for_pid() will read it p->updated = 0; - p->new_entry = 0; p->merged = 0; p->children_count = 0; p->parent = NULL; +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) all_pids_sortlist[slc++] = p->pid; +#endif } +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) if(unlikely(slc != all_pids_count)) { - error("Internal error: I was thinking I had %ld processes in my arrays, but it seems there are more.", all_pids_count); + error("Internal error: I was thinking I had %zu processes in my arrays, but it seems there are more.", all_pids_count); all_pids_count = slc; } if(include_exited_childs) { + // Read parents before childs + // This is needed to prevent a situation where + // a child is found running, but until we read + // its parent, it has exited and its parent + // has accumulated its resources. + qsort((void *)all_pids_sortlist, (size_t)all_pids_count, sizeof(pid_t), compar_pid); + + // we forward read all running processes + // collect_data_for_pid() is smart enough, + // not to read the same pid twice per iteration for(slc = 0; slc < all_pids_count; slc++) - collect_data_for_pid(all_pids_sortlist[slc]); + collect_data_for_pid(all_pids_sortlist[slc], NULL); } +#endif } +#ifdef __FreeBSD__ + for (i = INIT_PID; i < procnum - INIT_PID; ++i) { + pid_t pid = procbase[i].ki_pid; + collect_data_for_pid(pid, &procbase[i]); + } +#else char dirname[FILENAME_MAX + 1]; - snprintfz(dirname, FILENAME_MAX, "%s/proc", global_host_prefix); + snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); DIR *dir = opendir(dirname); if(!dir) return 0; - struct dirent *file = NULL; + struct dirent *de = NULL; - while((file = readdir(dir))) { - char *endptr = file->d_name; - pid_t pid = (pid_t) strtoul(file->d_name, &endptr, 10); + while((de = readdir(dir))) { + char *endptr = de->d_name; + + if(unlikely(de->d_type != DT_DIR || de->d_name[0] < '0' || de->d_name[0] > '9')) + continue; + + pid_t pid = (pid_t) strtoul(de->d_name, &endptr, 10); // make sure we read a valid number - if(unlikely(endptr == file->d_name || *endptr != '\0')) + if(unlikely(endptr == de->d_name || *endptr != '\0')) continue; - collect_data_for_pid(pid); + collect_data_for_pid(pid, NULL); } closedir(dir); +#endif if(!all_pids_count) return 0; + // we need /proc/stat to normalize the cpu consumption of the exited childs + read_proc_stat(); + + // build the process tree + link_all_processes_to_their_parents(); + // normally this is done // however we may have processes exited while we collected values // so let's find the exited ones // we do this by collecting the ownership of process // if we manage to get the ownership, the process still runs - - read_proc_stat(); - link_all_processes_to_their_parents(); process_exited_processes(); return 1; @@ -1786,15 +2198,14 @@ static void cleanup_exited_pids(void) { for(p = root_of_pids; p ;) { if(!p->updated && (!p->keep || p->keeploops > 0)) { -// fprintf(stderr, "\tEXITED %d %s [parent %d %s, target %s] utime=%llu, stime=%llu, gtime=%llu, cutime=%llu, cstime=%llu, cgtime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->parent->pid, p->parent->comm, p->target->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); - if(unlikely(debug && (p->keep || p->keeploops))) fprintf(stderr, " > CLEANUP cannot keep exited process %d (%s) anymore - removing it.\n", p->pid, p->comm); - for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] > 0) { - file_descriptor_not_used(p->fds[c]); - p->fds[c] = 0; - } + for(c = 0; c < p->fds_size; c++) + if(p->fds[c] > 0) { + file_descriptor_not_used(p->fds[c]); + p->fds[c] = 0; + } pid_t r = p->pid; p = p->next; @@ -1857,7 +2268,7 @@ static void apply_apps_groups_targets_inheritance(void) { && p->parent && p->parent->children_count && (p->target == p->parent->target || !p->parent->target) - && p->ppid != 1 + && p->ppid != INIT_PID )) { p->parent->children_count--; p->merged = 1; @@ -1879,8 +2290,8 @@ static void apply_apps_groups_targets_inheritance(void) { } // init goes always to default target - if(all_pids[1]) - all_pids[1]->target = apps_groups_default_target; + if(all_pids[INIT_PID]) + all_pids[INIT_PID]->target = apps_groups_default_target; // give a default target on all top level processes if(unlikely(debug)) loops++; @@ -1918,9 +2329,9 @@ static void apply_apps_groups_targets_inheritance(void) { fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops); } -static long zero_all_targets(struct target *root) { +static size_t zero_all_targets(struct target *root) { struct target *w; - long count = 0; + size_t count = 0; for (w = root; w ; w = w->next) { count++; @@ -2032,7 +2443,7 @@ static inline void aggregate_fd_on_target(int fd, struct target *w) { w->openeventpolls++; break; - default: + case FILETYPE_OTHER: w->openother++; break; } @@ -2058,9 +2469,9 @@ static inline void aggregate_pid_fds_on_targets(struct pid_stat *p) { if(likely(fd <= 0 || fd >= all_files_size)) continue; - aggregate_fd_on_target(all_files[fd].type, w); - aggregate_fd_on_target(all_files[fd].type, u); - aggregate_fd_on_target(all_files[fd].type, g); + aggregate_fd_on_target(fd, w); + aggregate_fd_on_target(fd, u); + aggregate_fd_on_target(fd, g); } } @@ -2111,7 +2522,7 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, w->num_threads += p->num_threads; if(unlikely(debug || w->debug)) - fprintf(stderr, "apps.plugin: \taggregating '%s' pid %d on target '%s' utime=%llu, stime=%llu, gtime=%llu, cutime=%llu, cstime=%llu, cgtime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); + fprintf(stderr, "apps.plugin: \taggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); } static void calculate_netdata_statistics(void) { @@ -2120,7 +2531,7 @@ static void calculate_netdata_statistics(void) { zero_all_targets(users_root_target); zero_all_targets(groups_root_target); - apps_groups_targets = zero_all_targets(apps_groups_root_target); + apps_groups_targets_count = zero_all_targets(apps_groups_root_target); // this has to be done, before the cleanup struct pid_stat *p = NULL; @@ -2182,21 +2593,18 @@ static void calculate_netdata_statistics(void) { int print_calculated_number(char *str, calculated_number value) { (void)str; (void)value; return 0; } -static inline void send_BEGIN(const char *type, const char *id, unsigned long long usec) { +static inline void send_BEGIN(const char *type, const char *id, usec_t usec) { fprintf(stdout, "BEGIN %s.%s %llu\n", type, id, usec); } -static inline void send_SET(const char *name, unsigned long long value) { - fprintf(stdout, "SET %s = %llu\n", name, value); +static inline void send_SET(const char *name, kernel_uint_t value) { + fprintf(stdout, "SET %s = " KERNEL_UINT_FORMAT "\n", name, value); } static inline void send_END(void) { fprintf(stdout, "END\n"); } -double utime_fix_ratio = 1.0, stime_fix_ratio = 1.0, gtime_fix_ratio = 1.0, cutime_fix_ratio = 1.0, cstime_fix_ratio = 1.0, cgtime_fix_ratio = 1.0; -double minflt_fix_ratio = 1.0, majflt_fix_ratio = 1.0, cminflt_fix_ratio = 1.0, cmajflt_fix_ratio = 1.0; - static usec_t send_resource_usage_to_netdata() { static struct timeval last = { 0, 0 }; static struct rusage me_last; @@ -2209,7 +2617,7 @@ static usec_t send_resource_usage_to_netdata() { usec_t cpusyst; if(!last.tv_sec) { - now_realtime_timeval(&last); + now_monotonic_timeval(&last); getrusage(RUSAGE_SELF, &me_last); // the first time, give a zero to allow @@ -2220,7 +2628,7 @@ static usec_t send_resource_usage_to_netdata() { cpusyst = 0; } else { - now_realtime_timeval(&now); + now_monotonic_timeval(&now); getrusage(RUSAGE_SELF, &me); usec = dt_usec(&now, &last); @@ -2231,73 +2639,112 @@ static usec_t send_resource_usage_to_netdata() { memmove(&me_last, &me, sizeof(struct rusage)); } + static char created_charts = 0; + if(unlikely(!created_charts)) { + created_charts = 1; + + fprintf(stdout + , "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n" + "DIMENSION user '' incremental 1 1000\n" + "DIMENSION system '' incremental 1 1000\n" + "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n" + "DIMENSION calls '' incremental 1 1\n" + "DIMENSION files '' incremental 1 1\n" + "DIMENSION pids '' absolute 1 1\n" + "DIMENSION fds '' absolute 1 1\n" + "DIMENSION targets '' absolute 1 1\n" + "CHART netdata.apps_fix '' 'Apps Plugin Normalization Ratios' 'percentage' apps.plugin netdata.apps_fix line 140002 %1$d\n" + "DIMENSION utime '' absolute 1 %2$llu\n" + "DIMENSION stime '' absolute 1 %2$llu\n" + "DIMENSION gtime '' absolute 1 %2$llu\n" + "DIMENSION minflt '' absolute 1 %2$llu\n" + "DIMENSION majflt '' absolute 1 %2$llu\n" + , update_every + , RATES_DETAIL + ); + + if(include_exited_childs) + fprintf(stdout + , "CHART netdata.apps_children_fix '' 'Apps Plugin Exited Children Normalization Ratios' 'percentage' apps.plugin netdata.apps_children_fix line 140003 %1$d\n" + "DIMENSION cutime '' absolute 1 %2$llu\n" + "DIMENSION cstime '' absolute 1 %2$llu\n" + "DIMENSION cgtime '' absolute 1 %2$llu\n" + "DIMENSION cminflt '' absolute 1 %2$llu\n" + "DIMENSION cmajflt '' absolute 1 %2$llu\n" + , update_every + , RATES_DETAIL + ); + } + fprintf(stdout, "BEGIN netdata.apps_cpu %llu\n" "SET user = %llu\n" "SET system = %llu\n" "END\n" "BEGIN netdata.apps_files %llu\n" - "SET files = %llu\n" - "SET pids = %ld\n" + "SET calls = %zu\n" + "SET files = %zu\n" + "SET pids = %zu\n" "SET fds = %d\n" - "SET targets = %ld\n" + "SET targets = %zu\n" "END\n" "BEGIN netdata.apps_fix %llu\n" - "SET utime = %llu\n" - "SET stime = %llu\n" - "SET gtime = %llu\n" - "SET minflt = %llu\n" - "SET majflt = %llu\n" + "SET utime = %u\n" + "SET stime = %u\n" + "SET gtime = %u\n" + "SET minflt = %u\n" + "SET majflt = %u\n" "END\n" , usec , cpuuser , cpusyst , usec + , calls_counter , file_counter , all_pids_count , all_files_len - , apps_groups_targets + , apps_groups_targets_count , usec - , (unsigned long long)(utime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(stime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(gtime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(minflt_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(majflt_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(utime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(stime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(gtime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(minflt_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(majflt_fix_ratio * 100 * RATES_DETAIL) ); if(include_exited_childs) fprintf(stdout, "BEGIN netdata.apps_children_fix %llu\n" - "SET cutime = %llu\n" - "SET cstime = %llu\n" - "SET cgtime = %llu\n" - "SET cminflt = %llu\n" - "SET cmajflt = %llu\n" + "SET cutime = %u\n" + "SET cstime = %u\n" + "SET cgtime = %u\n" + "SET cminflt = %u\n" + "SET cmajflt = %u\n" "END\n" , usec - , (unsigned long long)(cutime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(cstime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(cgtime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(cminflt_fix_ratio * 100 * RATES_DETAIL) - , (unsigned long long)(cmajflt_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cutime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cstime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cgtime_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cminflt_fix_ratio * 100 * RATES_DETAIL) + , (unsigned int)(cmajflt_fix_ratio * 100 * RATES_DETAIL) ); return usec; } -static void normalize_data(struct target *root) { +static void normalize_utilization(struct target *root) { struct target *w; // childs processing introduces spikes // here we try to eliminate them by disabling childs processing either for specific dimensions // or entirely. Of course, either way, we disable it just a single iteration. - unsigned long long max = processors * hz * RATES_DETAIL; - unsigned long long utime = 0, cutime = 0, stime = 0, cstime = 0, gtime = 0, cgtime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0; + kernel_uint_t max_time = processors * hz * RATES_DETAIL; + kernel_uint_t utime = 0, cutime = 0, stime = 0, cstime = 0, gtime = 0, cgtime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0; - if(global_utime > max) global_utime = max; - if(global_stime > max) global_stime = max; - if(global_gtime > max) global_gtime = max; + if(global_utime > max_time) global_utime = max_time; + if(global_stime > max_time) global_stime = max_time; + if(global_gtime > max_time) global_gtime = max_time; for(w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -2397,11 +2844,11 @@ static void normalize_data(struct target *root) { if(unlikely(debug)) { fprintf(stderr, - "SYSTEM: u=%llu s=%llu g=%llu " - "COLLECTED: u=%llu s=%llu g=%llu cu=%llu cs=%llu cg=%llu " - "DELTA: u=%lld s=%lld g=%lld " + "SYSTEM: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " + "COLLECTED: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " + "DELTA: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " "FIX: u=%0.2f s=%0.2f g=%0.2f cu=%0.2f cs=%0.2f cg=%0.2f " - "FINALLY: u=%llu s=%llu g=%llu cu=%llu cs=%llu cg=%llu " + "FINALLY: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " "\n" , global_utime , global_stime @@ -2412,21 +2859,21 @@ static void normalize_data(struct target *root) { , cutime , cstime , cgtime - , (long long)utime + (long long)cutime - (long long)global_utime - , (long long)stime + (long long)cstime - (long long)global_stime - , (long long)gtime + (long long)cgtime - (long long)global_gtime + , utime + cutime - global_utime + , stime + cstime - global_stime + , gtime + cgtime - global_gtime , utime_fix_ratio , stime_fix_ratio , gtime_fix_ratio , cutime_fix_ratio , cstime_fix_ratio , cgtime_fix_ratio - , (unsigned long long)(utime * utime_fix_ratio) - , (unsigned long long)(stime * stime_fix_ratio) - , (unsigned long long)(gtime * gtime_fix_ratio) - , (unsigned long long)(cutime * cutime_fix_ratio) - , (unsigned long long)(cstime * cstime_fix_ratio) - , (unsigned long long)(cgtime * cgtime_fix_ratio) + , (kernel_uint_t)(utime * utime_fix_ratio) + , (kernel_uint_t)(stime * stime_fix_ratio) + , (kernel_uint_t)(gtime * gtime_fix_ratio) + , (kernel_uint_t)(cutime * cutime_fix_ratio) + , (kernel_uint_t)(cstime * cstime_fix_ratio) + , (kernel_uint_t)(cgtime * cgtime_fix_ratio) ); } } @@ -2437,21 +2884,21 @@ static void send_collected_data_to_netdata(struct target *root, const char *type send_BEGIN(type, "cpu", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, (unsigned long long)(w->utime * utime_fix_ratio) + (unsigned long long)(w->stime * stime_fix_ratio) + (unsigned long long)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((unsigned long long)(w->cutime * cutime_fix_ratio) + (unsigned long long)(w->cstime * cstime_fix_ratio) + (unsigned long long)(w->cgtime * cgtime_fix_ratio)):0ULL)); + send_SET(w->name, (kernel_uint_t)(w->utime * utime_fix_ratio) + (kernel_uint_t)(w->stime * stime_fix_ratio) + (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cutime * cutime_fix_ratio) + (kernel_uint_t)(w->cstime * cstime_fix_ratio) + (kernel_uint_t)(w->cgtime * cgtime_fix_ratio)):0ULL)); } send_END(); send_BEGIN(type, "cpu_user", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, (unsigned long long)(w->utime * utime_fix_ratio) + (include_exited_childs?((unsigned long long)(w->cutime * cutime_fix_ratio)):0ULL)); + send_SET(w->name, (kernel_uint_t)(w->utime * utime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cutime * cutime_fix_ratio)):0ULL)); } send_END(); send_BEGIN(type, "cpu_system", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, (unsigned long long)(w->stime * stime_fix_ratio) + (include_exited_childs?((unsigned long long)(w->cstime * cstime_fix_ratio)):0ULL)); + send_SET(w->name, (kernel_uint_t)(w->stime * stime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cstime * cstime_fix_ratio)):0ULL)); } send_END(); @@ -2459,7 +2906,7 @@ static void send_collected_data_to_netdata(struct target *root, const char *type send_BEGIN(type, "cpu_guest", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, (unsigned long long)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((unsigned long long)(w->cgtime * cgtime_fix_ratio)):0ULL)); + send_SET(w->name, (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cgtime * cgtime_fix_ratio)):0ULL)); } send_END(); } @@ -2495,17 +2942,18 @@ static void send_collected_data_to_netdata(struct target *root, const char *type send_BEGIN(type, "minor_faults", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, (unsigned long long)(w->minflt * minflt_fix_ratio) + (include_exited_childs?((unsigned long long)(w->cminflt * cminflt_fix_ratio)):0ULL)); + send_SET(w->name, (kernel_uint_t)(w->minflt * minflt_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cminflt * cminflt_fix_ratio)):0ULL)); } send_END(); send_BEGIN(type, "major_faults", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, (unsigned long long)(w->majflt * majflt_fix_ratio) + (include_exited_childs?((unsigned long long)(w->cmajflt * cmajflt_fix_ratio)):0ULL)); + send_SET(w->name, (kernel_uint_t)(w->majflt * majflt_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cmajflt * cmajflt_fix_ratio)):0ULL)); } send_END(); +#ifndef __FreeBSD__ send_BEGIN(type, "lreads", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) @@ -2519,6 +2967,7 @@ static void send_collected_data_to_netdata(struct target *root, const char *type send_SET(w->name, w->io_logical_bytes_written); } send_END(); +#endif send_BEGIN(type, "preads", usec); for (w = root; w ; w = w->next) { @@ -2644,6 +3093,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); } +#ifndef __FreeBSD__ fprintf(stdout, "CHART %s.lreads '' '%s Disk Logical Reads' 'kilobytes/s' disk %s.lreads stacked 20042 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) @@ -2655,7 +3105,21 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type if(unlikely(w->exposed)) fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL); } +#endif + +#ifdef __FreeBSD__ + fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'blocks/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); + } + fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'blocks/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); + } +#else fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'kilobytes/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) @@ -2667,6 +3131,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type if(unlikely(w->exposed)) fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL); } +#endif if(enable_file_charts) { fprintf(stdout, "CHART %s.files '' '%s Open Files' 'open files' disk %s.files stacked 20050 %d\n", type, @@ -2696,6 +3161,22 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type // ---------------------------------------------------------------------------- // parse command line arguments +int check_proc_1_io() { + int ret = 0; + + procfile *ff = procfile_open("/proc/1/io", NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(!ff) goto cleanup; + + ff = procfile_readall(ff); + if(!ff) goto cleanup; + + ret = 1; + +cleanup: + procfile_close(ff); + return ret; +} + static void parse_args(int argc, char **argv) { int i, freq = 0; @@ -2715,6 +3196,15 @@ static void parse_args(int argc, char **argv) exit(0); } + if(strcmp("test-permissions", argv[i]) == 0 || strcmp("-t", argv[i]) == 0) { + if(!check_proc_1_io()) { + perror("Tried to read /proc/1/io and it failed"); + exit(1); + } + printf("OK\n"); + exit(0); + } + if(strcmp("debug", argv[i]) == 0) { debug = 1; // debug_flags = 0xffffffff; @@ -2766,7 +3256,7 @@ static void parse_args(int argc, char **argv) "\n" " netdata apps.plugin %s\n" " Copyright (C) 2016-2017 Costa Tsaousis \n" - " Released under GNU Public License v3 or later.\n" + " Released under GNU General Public License v3 or later.\n" " All rights reserved.\n" "\n" " This program is a data collector plugin for netdata.\n" @@ -2819,15 +3309,74 @@ static void parse_args(int argc, char **argv) } } -int main(int argc, char **argv) -{ +static int am_i_running_as_root() { + uid_t uid = getuid(), euid = geteuid(); + + if(uid == 0 || euid == 0) { + if(debug) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); + return 1; + } + + if(debug) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); + return 0; +} + +#ifdef HAVE_CAPABILITY +static int check_capabilities() { + cap_t caps = cap_get_proc(); + if(!caps) { + error("Cannot get current capabilities."); + return 0; + } + else if(debug) + info("Received my capabilities from the system."); + + int ret = 1; + + cap_flag_value_t cfv = CAP_CLEAR; + if(cap_get_flag(caps, CAP_DAC_READ_SEARCH, CAP_EFFECTIVE, &cfv) == -1) { + error("Cannot find if CAP_DAC_READ_SEARCH is effective."); + ret = 0; + } + else { + if(cfv != CAP_SET) { + error("apps.plugin should run with CAP_DAC_READ_SEARCH."); + ret = 0; + } + else if(debug) + info("apps.plugin runs with CAP_DAC_READ_SEARCH."); + } + + cfv = CAP_CLEAR; + if(cap_get_flag(caps, CAP_SYS_PTRACE, CAP_EFFECTIVE, &cfv) == -1) { + error("Cannot find if CAP_SYS_PTRACE is effective."); + ret = 0; + } + else { + if(cfv != CAP_SET) { + error("apps.plugin should run with CAP_SYS_PTRACE."); + ret = 0; + } + else if(debug) + info("apps.plugin runs with CAP_SYS_PTRACE."); + } + + cap_free(caps); + + return ret; +} +#else +static int check_capabilities() { + return 0; +} +#endif + +int main(int argc, char **argv) { // debug_flags = D_PROCFILE; // set the name for logging program_name = "apps.plugin"; - info("started on pid %d", getpid()); - // disable syslog for apps.plugin error_log_syslog = 0; @@ -2835,12 +3384,12 @@ int main(int argc, char **argv) error_log_errors_per_period = 100; error_log_throttle_period = 3600; - global_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(global_host_prefix == NULL) { + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(netdata_configured_host_prefix == NULL) { // info("NETDATA_HOST_PREFIX is not passed from netdata"); - global_host_prefix = ""; + netdata_configured_host_prefix = ""; } - // else info("Found NETDATA_HOST_PREFIX='%s'", global_host_prefix); + // else info("Found NETDATA_HOST_PREFIX='%s'", netdata_configured_host_prefix); config_dir = getenv("NETDATA_CONFIG_DIR"); if(config_dir == NULL) { @@ -2854,58 +3403,53 @@ int main(int argc, char **argv) struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; if(setrlimit(RLIMIT_CORE, &rl) != 0) info("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); +#ifdef HAVE_SYS_PRCTL_H prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif } #endif /* NETDATA_INTERNAL_CHECKS */ procfile_adaptive_initial_allocation = 1; - time_t started_t = now_realtime_sec(); + time_t started_t = now_monotonic_sec(); get_system_HZ(); get_system_pid_max(); get_system_cpus(); parse_args(argc, argv); - all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max); - all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max); - - fprintf(stdout, - "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n" - "DIMENSION user '' incremental 1 1000\n" - "DIMENSION system '' incremental 1 1000\n" - "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n" - "DIMENSION files '' incremental 1 1\n" - "DIMENSION pids '' absolute 1 1\n" - "DIMENSION fds '' absolute 1 1\n" - "DIMENSION targets '' absolute 1 1\n" - "CHART netdata.apps_fix '' 'Apps Plugin Normalization Ratios' 'percentage' apps.plugin netdata.apps_fix line 140002 %1$d\n" - "DIMENSION utime '' absolute 1 %2$llu\n" - "DIMENSION stime '' absolute 1 %2$llu\n" - "DIMENSION gtime '' absolute 1 %2$llu\n" - "DIMENSION minflt '' absolute 1 %2$llu\n" - "DIMENSION majflt '' absolute 1 %2$llu\n" - , update_every - , RATES_DETAIL + if(!check_capabilities() && !am_i_running_as_root() && !check_proc_1_io()) { + uid_t uid = getuid(), euid = geteuid(); +#ifdef HAVE_CAPABILITY + error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " + "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + , uid, euid, argv[0], argv[0], argv[0] + ); +#else + error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "Your system does not support capabilities. " + "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + , uid, euid, argv[0], argv[0] ); +#endif + } - if(include_exited_childs) - fprintf(stdout, - "CHART netdata.apps_children_fix '' 'Apps Plugin Exited Children Normalization Ratios' 'percentage' apps.plugin netdata.apps_children_fix line 140003 %1$d\n" - "DIMENSION cutime '' absolute 1 %2$llu\n" - "DIMENSION cstime '' absolute 1 %2$llu\n" - "DIMENSION cgtime '' absolute 1 %2$llu\n" - "DIMENSION cminflt '' absolute 1 %2$llu\n" - "DIMENSION cmajflt '' absolute 1 %2$llu\n" - , update_every - , RATES_DETAIL - ); + info("started on pid %d", getpid()); + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max); +#endif + + all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max); usec_t step = update_every * USEC_PER_SEC; global_iterations_counter = 1; + heartbeat_t hb; + heartbeat_init(&hb); for(;1; global_iterations_counter++) { - usec_t now = now_realtime_usec(); - usec_t next = now - (now % step) + step; #ifdef NETDATA_PROFILING #warning "compiling for profiling" @@ -2913,20 +3457,17 @@ int main(int argc, char **argv) profiling_count++; if(unlikely(profiling_count > 1000)) exit(0); #else - while(now < next) { - sleep_usec(next - now); - now = now_realtime_usec(); - } + heartbeat_next(&hb, step); #endif - if(!collect_data_for_all_processes_from_proc()) { + if(!collect_data_for_all_processes()) { error("Cannot collect /proc data for running processes. Disabling apps.plugin..."); printf("DISABLE\n"); exit(1); } calculate_netdata_statistics(); - normalize_data(apps_groups_root_target); + normalize_utilization(apps_groups_root_target); usec_t dt = send_resource_usage_to_netdata(); @@ -2952,11 +3493,9 @@ int main(int argc, char **argv) show_guest_time_old = show_guest_time; if(unlikely(debug)) - fprintf(stderr, "apps.plugin: done Loop No %llu\n", global_iterations_counter); - - time_t current_t = now_realtime_sec(); + fprintf(stderr, "apps.plugin: done Loop No %zu\n", global_iterations_counter); // restart check (14400 seconds) - if(current_t - started_t > 14400) exit(0); + if(now_monotonic_sec() - started_t > 14400) exit(0); } }