X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=src%2Fapps_plugin.c;h=af634c0a44e565c9640afc56ade6981e846cfe2e;hb=712f59931625be657baf29c891eb149d84cb6f94;hp=b69171395085b375bf39530c920677ccc3881e4c;hpb=2785e60bf830254c6683ab033e4d080167df1ef0;p=netdata.git diff --git a/src/apps_plugin.c b/src/apps_plugin.c index b6917139..af634c0a 100644 --- a/src/apps_plugin.c +++ b/src/apps_plugin.c @@ -293,7 +293,6 @@ struct pid_stat { int keeploops; // increases by 1 every time keep is 1 and updated 0 char updated:1; // 1 when the process is currently running char merged:1; // 1 when it has been merged to its parent - char new_entry:1; // 1 when this is a new process, just saw for the first time char read:1; // 1 when we have already read this process for this iteration int sortlist; // higher numbers = top on the process tree @@ -354,6 +353,19 @@ static pid_t #define FILE_DESCRIPTORS_INCREASE_STEP 100 +// types for struct file_descriptor->type +typedef enum fd_filetype { + FILETYPE_OTHER, + FILETYPE_FILE, + FILETYPE_PIPE, + FILETYPE_SOCKET, + FILETYPE_INOTIFY, + FILETYPE_EVENTFD, + FILETYPE_EVENTPOLL, + FILETYPE_TIMERFD, + FILETYPE_SIGNALFD +} FD_FILETYPE; + struct file_descriptor { avl avl; @@ -364,7 +376,7 @@ struct file_descriptor { const char *name; uint32_t hash; - char type; + FD_FILETYPE type; int count; int pos; } *all_files = NULL; @@ -373,18 +385,6 @@ static int all_files_len = 0, all_files_size = 0; -// types for struct file_descriptor->type -#define FILETYPE_OTHER 0 -#define FILETYPE_FILE 1 -#define FILETYPE_PIPE 2 -#define FILETYPE_SOCKET 3 -#define FILETYPE_INOTIFY 4 -#define FILETYPE_EVENTFD 5 -#define FILETYPE_EVENTPOLL 6 -#define FILETYPE_TIMERFD 7 -#define FILETYPE_SIGNALFD 8 - - // ---------------------------------------------------------------------------- // callback required by fatal() @@ -626,56 +626,132 @@ static int read_apps_groups_conf(const char *file) // struct pid_stat management static inline struct pid_stat *get_pid_entry(pid_t pid) { - if(unlikely(all_pids[pid])) { - all_pids[pid]->new_entry = 0; + if(unlikely(all_pids[pid])) return all_pids[pid]; - } - all_pids[pid] = callocz(sizeof(struct pid_stat), 1); - all_pids[pid]->fds = callocz(sizeof(int), MAX_SPARE_FDS); - all_pids[pid]->fds_size = MAX_SPARE_FDS; + struct pid_stat *p = callocz(sizeof(struct pid_stat), 1); + p->fds = callocz(sizeof(int), MAX_SPARE_FDS); + p->fds_size = MAX_SPARE_FDS; if(likely(root_of_pids)) - root_of_pids->prev = all_pids[pid]; + root_of_pids->prev = p; - all_pids[pid]->next = root_of_pids; - root_of_pids = all_pids[pid]; + p->next = root_of_pids; + root_of_pids = p; - all_pids[pid]->pid = pid; - all_pids[pid]->new_entry = 1; + p->pid = pid; + all_pids[pid] = p; all_pids_count++; - return all_pids[pid]; + return p; } static inline void del_pid_entry(pid_t pid) { - if(unlikely(!all_pids[pid])) { + struct pid_stat *p = all_pids[pid]; + + if(unlikely(!p)) { error("attempted to free pid %d that is not allocated.", pid); return; } if(unlikely(debug)) - fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm); + fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, p->comm); - if(root_of_pids == all_pids[pid]) - root_of_pids = all_pids[pid]->next; + if(root_of_pids == p) + root_of_pids = p->next; - if(all_pids[pid]->next) all_pids[pid]->next->prev = all_pids[pid]->prev; - if(all_pids[pid]->prev) all_pids[pid]->prev->next = all_pids[pid]->next; + if(p->next) p->next->prev = p->prev; + if(p->prev) p->prev->next = p->next; - freez(all_pids[pid]->fds); - freez(all_pids[pid]->fds_dirname); - freez(all_pids[pid]->stat_filename); - freez(all_pids[pid]->statm_filename); - freez(all_pids[pid]->io_filename); - freez(all_pids[pid]->cmdline_filename); - freez(all_pids[pid]); + freez(p->fds); + freez(p->fds_dirname); + freez(p->stat_filename); + freez(p->statm_filename); + freez(p->io_filename); + freez(p->cmdline_filename); + freez(p); all_pids[pid] = NULL; all_pids_count--; } +// ---------------------------------------------------------------------------- + +static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { + if(unlikely(!status)) { + // error("command failed log %u, errno %d", log, errno); + + if(unlikely(debug || errno != ENOENT)) { + if(unlikely(debug || !(p->log_thrown & log))) { + p->log_thrown |= log; + switch(log) { + case PID_LOG_IO: + error("Cannot process %s/proc/%d/io (command '%s')", global_host_prefix, p->pid, p->comm); + break; + + case PID_LOG_STATM: + error("Cannot process %s/proc/%d/statm (command '%s')", global_host_prefix, p->pid, p->comm); + break; + + case PID_LOG_CMDLINE: + error("Cannot process %s/proc/%d/cmdline (command '%s')", global_host_prefix, p->pid, p->comm); + break; + + case PID_LOG_FDS: + error("Cannot process entries in %s/proc/%d/fd (command '%s')", global_host_prefix, p->pid, p->comm); + break; + + case PID_LOG_STAT: + break; + + default: + error("unhandled error for pid %d, command '%s'", p->pid, p->comm); + break; + } + } + } + errno = 0; + } + else if(unlikely(p->log_thrown & log)) { + // error("unsetting log %u on pid %d", log, p->pid); + p->log_thrown &= ~log; + } + + return status; +} + +static inline void assign_target_to_pid(struct pid_stat *p) { + uint32_t hash = simple_hash(p->comm); + size_t pclen = strlen(p->comm); + + struct target *w; + for(w = apps_groups_root_target; w ; w = w->next) { + // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm); + + // find it - 4 cases: + // 1. the target is not a pattern + // 2. the target has the prefix + // 3. the target has the suffix + // 4. the target is something inside cmdline + + if(unlikely(( (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) + || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) + || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) + || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(p->cmdline, w->compare)) + ))) { + + if(w->target) p->target = w->target; + else p->target = w; + + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); + + break; + } + } +} + // ---------------------------------------------------------------------------- // update pids from proc @@ -754,89 +830,101 @@ static inline int read_proc_pid_stat(struct pid_stat *p) { if(unlikely(!ff)) goto cleanup; p->last_stat_collected_usec = p->stat_collected_usec; - p->stat_collected_usec = now_realtime_usec(); + p->stat_collected_usec = now_monotonic_usec(); file_counter++; - // p->pid = str2pid_t(procfile_lineword(ff, 0, 0+i)); - - if(unlikely(!p->comm[0])) - strncpyz(p->comm, procfile_lineword(ff, 0, 1), MAX_COMPARE_NAME); - + // p->pid = str2pid_t(procfile_lineword(ff, 0, 0)); + char *comm = procfile_lineword(ff, 0, 1); // p->state = *(procfile_lineword(ff, 0, 2)); p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); - // p->pgrp = str2ul(procfile_lineword(ff, 0, 4)); - // p->session = str2ul(procfile_lineword(ff, 0, 5)); - // p->tty_nr = str2ul(procfile_lineword(ff, 0, 6)); - // p->tpgid = str2ul(procfile_lineword(ff, 0, 7)); - // p->flags = str2ull(procfile_lineword(ff, 0, 8)); + // p->pgrp = (int32_t)str2pid_t(procfile_lineword(ff, 0, 4)); + // p->session = (int32_t)str2pid_t(procfile_lineword(ff, 0, 5)); + // p->tty_nr = (int32_t)str2pid_t(procfile_lineword(ff, 0, 6)); + // p->tpgid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 7)); + // p->flags = str2uint64_t(procfile_lineword(ff, 0, 8)); - kernel_uint_t last; + if(strcmp(p->comm, comm)) { + if(unlikely(debug)) { + if(p->comm[0]) + fprintf(stderr, "apps.plugin: \tpid %d (%s) changed name to '%s'\n", p->pid, p->comm, comm); + else + fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", p->pid, comm); + } + + strncpyz(p->comm, comm, MAX_COMPARE_NAME); - last = p->minflt_raw; - p->minflt_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 9)); + // /proc//cmdline + if(likely(proc_pid_cmdline_is_needed)) + managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); + + assign_target_to_pid(p); + } + + kernel_uint_t last = p->minflt_raw; + p->minflt_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 9)); p->minflt = (p->minflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->cminflt_raw; - p->cminflt_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 10)); + p->cminflt_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 10)); p->cminflt = (p->cminflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->majflt_raw; - p->majflt_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 11)); + p->majflt_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 11)); p->majflt = (p->majflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->cmajflt_raw; - p->cmajflt_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 12)); + p->cmajflt_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 12)); p->cmajflt = (p->cmajflt_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->utime_raw; - p->utime_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 13)); + p->utime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 13)); p->utime = (p->utime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->stime_raw; - p->stime_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 14)); + p->stime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 14)); p->stime = (p->stime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->cutime_raw; - p->cutime_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 15)); + p->cutime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 15)); p->cutime = (p->cutime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->cstime_raw; - p->cstime_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 16)); + p->cstime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 16)); p->cstime = (p->cstime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - // p->priority = str2kernel_unit_t(procfile_lineword(ff, 0, 17)); - // p->nice = str2kernel_unit_t(procfile_lineword(ff, 0, 18)); + // p->priority = str2kernel_uint_t(procfile_lineword(ff, 0, 17)); + // p->nice = str2kernel_uint_t(procfile_lineword(ff, 0, 18)); p->num_threads = (int32_t)str2uint32_t(procfile_lineword(ff, 0, 19)); - // p->itrealvalue = str2kernel_unit_t(procfile_lineword(ff, 0, 20)); - // p->starttime = str2kernel_unit_t(procfile_lineword(ff, 0, 21)); - // p->vsize = str2kernel_unit_t(procfile_lineword(ff, 0, 22)); - // p->rss = str2kernel_unit_t(procfile_lineword(ff, 0, 23)); - // p->rsslim = str2kernel_unit_t(procfile_lineword(ff, 0, 24)); - // p->starcode = str2kernel_unit_t(procfile_lineword(ff, 0, 25)); - // p->endcode = str2kernel_unit_t(procfile_lineword(ff, 0, 26)); - // p->startstack = str2kernel_unit_t(procfile_lineword(ff, 0, 27)); - // p->kstkesp = str2kernel_unit_t(procfile_lineword(ff, 0, 28)); - // p->kstkeip = str2kernel_unit_t(procfile_lineword(ff, 0, 29)); - // p->signal = str2kernel_unit_t(procfile_lineword(ff, 0, 30)); - // p->blocked = str2kernel_unit_t(procfile_lineword(ff, 0, 31)); - // p->sigignore = str2kernel_unit_t(procfile_lineword(ff, 0, 32)); - // p->sigcatch = str2kernel_unit_t(procfile_lineword(ff, 0, 33)); - // p->wchan = str2kernel_unit_t(procfile_lineword(ff, 0, 34)); - // p->nswap = str2kernel_unit_t(procfile_lineword(ff, 0, 35)); - // p->cnswap = str2kernel_unit_t(procfile_lineword(ff, 0, 36)); - // p->exit_signal = str2kernel_unit_t(procfile_lineword(ff, 0, 37)); - // p->processor = str2kernel_unit_t(procfile_lineword(ff, 0, 38)); - // p->rt_priority = str2kernel_unit_t(procfile_lineword(ff, 0, 39)); - // p->policy = str2kernel_unit_t(procfile_lineword(ff, 0, 40)); - // p->delayacct_blkio_ticks = str2kernel_unit_t(procfile_lineword(ff, 0, 41)); + // p->itrealvalue = str2kernel_uint_t(procfile_lineword(ff, 0, 20)); + // p->starttime = str2kernel_uint_t(procfile_lineword(ff, 0, 21)); + // p->vsize = str2kernel_uint_t(procfile_lineword(ff, 0, 22)); + // p->rss = str2kernel_uint_t(procfile_lineword(ff, 0, 23)); + // p->rsslim = str2kernel_uint_t(procfile_lineword(ff, 0, 24)); + // p->starcode = str2kernel_uint_t(procfile_lineword(ff, 0, 25)); + // p->endcode = str2kernel_uint_t(procfile_lineword(ff, 0, 26)); + // p->startstack = str2kernel_uint_t(procfile_lineword(ff, 0, 27)); + // p->kstkesp = str2kernel_uint_t(procfile_lineword(ff, 0, 28)); + // p->kstkeip = str2kernel_uint_t(procfile_lineword(ff, 0, 29)); + // p->signal = str2kernel_uint_t(procfile_lineword(ff, 0, 30)); + // p->blocked = str2kernel_uint_t(procfile_lineword(ff, 0, 31)); + // p->sigignore = str2kernel_uint_t(procfile_lineword(ff, 0, 32)); + // p->sigcatch = str2kernel_uint_t(procfile_lineword(ff, 0, 33)); + // p->wchan = str2kernel_uint_t(procfile_lineword(ff, 0, 34)); + // p->nswap = str2kernel_uint_t(procfile_lineword(ff, 0, 35)); + // p->cnswap = str2kernel_uint_t(procfile_lineword(ff, 0, 36)); + // p->exit_signal = str2kernel_uint_t(procfile_lineword(ff, 0, 37)); + // p->processor = str2kernel_uint_t(procfile_lineword(ff, 0, 38)); + // p->rt_priority = str2kernel_uint_t(procfile_lineword(ff, 0, 39)); + // p->policy = str2kernel_uint_t(procfile_lineword(ff, 0, 40)); + // p->delayacct_blkio_ticks = str2kernel_uint_t(procfile_lineword(ff, 0, 41)); if(enable_guest_charts) { last = p->gtime_raw; - p->gtime_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 42)); + p->gtime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 42)); p->gtime = (p->gtime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); last = p->cgtime_raw; - p->cgtime_raw = str2kernel_unit_t(procfile_lineword(ff, 0, 43)); + p->cgtime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 43)); p->cgtime = (p->cgtime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); if (show_guest_time || p->gtime || p->cgtime) { @@ -897,13 +985,13 @@ static inline int read_proc_pid_statm(struct pid_stat *p) { file_counter++; - p->statm_size = str2ull(procfile_lineword(ff, 0, 0)); - p->statm_resident = str2ull(procfile_lineword(ff, 0, 1)); - p->statm_share = str2ull(procfile_lineword(ff, 0, 2)); - // p->statm_text = str2ull(procfile_lineword(ff, 0, 3)); - // p->statm_lib = str2ull(procfile_lineword(ff, 0, 4)); - // p->statm_data = str2ull(procfile_lineword(ff, 0, 5)); - // p->statm_dirty = str2ull(procfile_lineword(ff, 0, 6)); + p->statm_size = str2kernel_uint_t(procfile_lineword(ff, 0, 0)); + p->statm_resident = str2kernel_uint_t(procfile_lineword(ff, 0, 1)); + p->statm_share = str2kernel_uint_t(procfile_lineword(ff, 0, 2)); + // p->statm_text = str2kernel_uint_t(procfile_lineword(ff, 0, 3)); + // p->statm_lib = str2kernel_uint_t(procfile_lineword(ff, 0, 4)); + // p->statm_data = str2kernel_uint_t(procfile_lineword(ff, 0, 5)); + // p->statm_dirty = str2kernel_uint_t(procfile_lineword(ff, 0, 6)); return 1; @@ -937,36 +1025,36 @@ static inline int read_proc_pid_io(struct pid_stat *p) { file_counter++; p->last_io_collected_usec = p->io_collected_usec; - p->io_collected_usec = now_realtime_usec(); + p->io_collected_usec = now_monotonic_usec(); kernel_uint_t last; last = p->io_logical_bytes_read_raw; - p->io_logical_bytes_read_raw = str2ull(procfile_lineword(ff, 0, 1)); + p->io_logical_bytes_read_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 1)); p->io_logical_bytes_read = (p->io_logical_bytes_read_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); last = p->io_logical_bytes_written_raw; - p->io_logical_bytes_written_raw = str2ull(procfile_lineword(ff, 1, 1)); + p->io_logical_bytes_written_raw = str2kernel_uint_t(procfile_lineword(ff, 1, 1)); p->io_logical_bytes_written = (p->io_logical_bytes_written_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); // last = p->io_read_calls_raw; - // p->io_read_calls_raw = str2ull(procfile_lineword(ff, 2, 1)); + // p->io_read_calls_raw = str2kernel_uint_t(procfile_lineword(ff, 2, 1)); // p->io_read_calls = (p->io_read_calls_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); // last = p->io_write_calls_raw; - // p->io_write_calls_raw = str2ull(procfile_lineword(ff, 3, 1)); + // p->io_write_calls_raw = str2kernel_uint_t(procfile_lineword(ff, 3, 1)); // p->io_write_calls = (p->io_write_calls_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); last = p->io_storage_bytes_read_raw; - p->io_storage_bytes_read_raw = str2ull(procfile_lineword(ff, 4, 1)); + p->io_storage_bytes_read_raw = str2kernel_uint_t(procfile_lineword(ff, 4, 1)); p->io_storage_bytes_read = (p->io_storage_bytes_read_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); last = p->io_storage_bytes_written_raw; - p->io_storage_bytes_written_raw = str2ull(procfile_lineword(ff, 5, 1)); + p->io_storage_bytes_written_raw = str2kernel_uint_t(procfile_lineword(ff, 5, 1)); p->io_storage_bytes_written = (p->io_storage_bytes_written_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); // last = p->io_cancelled_write_bytes_raw; - // p->io_cancelled_write_bytes_raw = str2ull(procfile_lineword(ff, 6, 1)); + // p->io_cancelled_write_bytes_raw = str2kernel_uint_t(procfile_lineword(ff, 6, 1)); // p->io_cancelled_write_bytes = (p->io_cancelled_write_bytes_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); if(unlikely(global_iterations_counter == 1)) { @@ -1008,33 +1096,33 @@ static inline int read_proc_stat() { if(unlikely(!ff)) goto cleanup; last_collected_usec = collected_usec; - collected_usec = now_realtime_usec(); + collected_usec = now_monotonic_usec(); file_counter++; kernel_uint_t last; last = utime_raw; - utime_raw = str2ull(procfile_lineword(ff, 0, 1)); + utime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 1)); global_utime = (utime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); // nice time, on user time last = ntime_raw; - ntime_raw = str2ull(procfile_lineword(ff, 0, 2)); + ntime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 2)); global_utime += (ntime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); last = stime_raw; - stime_raw = str2ull(procfile_lineword(ff, 0, 3)); + stime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 3)); global_stime = (stime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); last = gtime_raw; - gtime_raw = str2ull(procfile_lineword(ff, 0, 10)); + gtime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 10)); global_gtime = (gtime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); if(enable_guest_charts) { // guest nice time, on guest time last = gntime_raw; - gntime_raw = str2ull(procfile_lineword(ff, 0, 11)); + gntime_raw = str2kernel_uint_t(procfile_lineword(ff, 0, 11)); global_gtime += (gntime_raw - last) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); // remove guest time from user time @@ -1179,7 +1267,7 @@ static inline void all_files_grow() { all_files_size += FILE_DESCRIPTORS_INCREASE_STEP; } -static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t hash, int type) { +static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t hash, FD_FILETYPE type) { // check we have enough memory to add it if(!all_files || all_files_len == all_files_size) all_files_grow(); @@ -1260,21 +1348,26 @@ static inline int file_descriptor_find_or_add(const char *name) } // not found - int type; - if(name[0] == '/') type = FILETYPE_FILE; - else if(strncmp(name, "pipe:", 5) == 0) type = FILETYPE_PIPE; - else if(strncmp(name, "socket:", 7) == 0) type = FILETYPE_SOCKET; - else if(strcmp(name, "anon_inode:inotify") == 0 || strcmp(name, "inotify") == 0) type = FILETYPE_INOTIFY; - else if(strcmp(name, "anon_inode:[eventfd]") == 0) type = FILETYPE_EVENTFD; - else if(strcmp(name, "anon_inode:[eventpoll]") == 0) type = FILETYPE_EVENTPOLL; - else if(strcmp(name, "anon_inode:[timerfd]") == 0) type = FILETYPE_TIMERFD; - else if(strcmp(name, "anon_inode:[signalfd]") == 0) type = FILETYPE_SIGNALFD; - else if(strncmp(name, "anon_inode:", 11) == 0) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); + FD_FILETYPE type; + if(likely(name[0] == '/')) type = FILETYPE_FILE; + else if(likely(strncmp(name, "pipe:", 5) == 0)) type = FILETYPE_PIPE; + else if(likely(strncmp(name, "socket:", 7) == 0)) type = FILETYPE_SOCKET; + else if(likely(strncmp(name, "anon_inode:", 11) == 0)) { + const char *t = &name[11]; + + if(strcmp(t, "inotify") == 0) type = FILETYPE_INOTIFY; + else if(strcmp(t, "[eventfd]") == 0) type = FILETYPE_EVENTFD; + else if(strcmp(t, "[eventpoll]") == 0) type = FILETYPE_EVENTPOLL; + else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD; + else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD; + else { + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); - type = FILETYPE_OTHER; + type = FILETYPE_OTHER; + } } + else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY; else { if(unlikely(debug)) fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name); @@ -1695,80 +1788,6 @@ static int compar_pid(const void *pid1, const void *pid2) { return 1; } -static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { - if(unlikely(!status)) { - // error("command failed log %u, errno %d", log, errno); - - if(unlikely(debug || errno != ENOENT)) { - if(unlikely(debug || !(p->log_thrown & log))) { - p->log_thrown |= log; - switch(log) { - case PID_LOG_IO: - error("Cannot process %s/proc/%d/io (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_STATM: - error("Cannot process %s/proc/%d/statm (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_CMDLINE: - error("Cannot process %s/proc/%d/cmdline (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_FDS: - error("Cannot process entries in %s/proc/%d/fd (command '%s')", global_host_prefix, p->pid, p->comm); - break; - - case PID_LOG_STAT: - break; - - default: - error("unhandled error for pid %d, command '%s'", p->pid, p->comm); - break; - } - } - } - errno = 0; - } - else if(unlikely(p->log_thrown & log)) { - // error("unsetting log %u on pid %d", log, p->pid); - p->log_thrown &= ~log; - } - - return status; -} - -static inline void assign_target_to_pid(struct pid_stat *p) { - uint32_t hash = simple_hash(p->comm); - size_t pclen = strlen(p->comm); - - struct target *w; - for(w = apps_groups_root_target; w ; w = w->next) { - // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm); - - // find it - 4 cases: - // 1. the target is not a pattern - // 2. the target has the prefix - // 3. the target has the suffix - // 4. the target is something inside cmdline - - if(unlikely(( (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) - || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) - || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) - || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(p->cmdline, w->compare)) - ))) { - - if(w->target) p->target = w->target; - else p->target = w; - - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); - - break; - } - } -} - static inline int collect_data_for_pid(pid_t pid) { if(unlikely(pid <= 0 || pid > pid_max)) { error("Invalid pid %d read (expected 1 to %d). Ignoring process.", pid, pid_max); @@ -1808,22 +1827,6 @@ static inline int collect_data_for_pid(pid_t pid) { // there is no reason to proceed if we cannot get its memory status return 0; - // -------------------------------------------------------------------- - // link it - - // check if it is target - // we do this only once, the first time this pid is loaded - if(unlikely(p->new_entry)) { - // /proc//cmdline - if(likely(proc_pid_cmdline_is_needed)) - managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); - - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", pid, p->comm); - - assign_target_to_pid(p); - } - // -------------------------------------------------------------------- // /proc//fd @@ -1852,7 +1855,6 @@ static int collect_data_for_all_processes(void) { for(p = root_of_pids; p ; p = p->next) { p->read = 0; // mark it as not read, so that collect_data_for_pid() will read it p->updated = 0; - p->new_entry = 0; p->merged = 0; p->children_count = 0; p->parent = NULL; @@ -2191,7 +2193,7 @@ static inline void aggregate_fd_on_target(int fd, struct target *w) { w->openeventpolls++; break; - default: + case FILETYPE_OTHER: w->openother++; break; } @@ -2365,7 +2367,7 @@ static usec_t send_resource_usage_to_netdata() { usec_t cpusyst; if(!last.tv_sec) { - now_realtime_timeval(&last); + now_monotonic_timeval(&last); getrusage(RUSAGE_SELF, &me_last); // the first time, give a zero to allow @@ -2376,7 +2378,7 @@ static usec_t send_resource_usage_to_netdata() { cpusyst = 0; } else { - now_realtime_timeval(&now); + now_monotonic_timeval(&now); getrusage(RUSAGE_SELF, &me); usec = dt_usec(&now, &last); @@ -2387,6 +2389,42 @@ static usec_t send_resource_usage_to_netdata() { memmove(&me_last, &me, sizeof(struct rusage)); } + static char created_charts = 0; + if(unlikely(!created_charts)) { + created_charts = 1; + + fprintf(stdout + , "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n" + "DIMENSION user '' incremental 1 1000\n" + "DIMENSION system '' incremental 1 1000\n" + "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n" + "DIMENSION files '' incremental 1 1\n" + "DIMENSION pids '' absolute 1 1\n" + "DIMENSION fds '' absolute 1 1\n" + "DIMENSION targets '' absolute 1 1\n" + "CHART netdata.apps_fix '' 'Apps Plugin Normalization Ratios' 'percentage' apps.plugin netdata.apps_fix line 140002 %1$d\n" + "DIMENSION utime '' absolute 1 %2$llu\n" + "DIMENSION stime '' absolute 1 %2$llu\n" + "DIMENSION gtime '' absolute 1 %2$llu\n" + "DIMENSION minflt '' absolute 1 %2$llu\n" + "DIMENSION majflt '' absolute 1 %2$llu\n" + , update_every + , RATES_DETAIL + ); + + if(include_exited_childs) + fprintf(stdout + , "CHART netdata.apps_children_fix '' 'Apps Plugin Exited Children Normalization Ratios' 'percentage' apps.plugin netdata.apps_children_fix line 140003 %1$d\n" + "DIMENSION cutime '' absolute 1 %2$llu\n" + "DIMENSION cstime '' absolute 1 %2$llu\n" + "DIMENSION cgtime '' absolute 1 %2$llu\n" + "DIMENSION cminflt '' absolute 1 %2$llu\n" + "DIMENSION cmajflt '' absolute 1 %2$llu\n" + , update_every + , RATES_DETAIL + ); + } + fprintf(stdout, "BEGIN netdata.apps_cpu %llu\n" "SET user = %llu\n" @@ -2922,7 +2960,7 @@ static void parse_args(int argc, char **argv) "\n" " netdata apps.plugin %s\n" " Copyright (C) 2016-2017 Costa Tsaousis \n" - " Released under GNU Public License v3 or later.\n" + " Released under GNU General Public License v3 or later.\n" " All rights reserved.\n" "\n" " This program is a data collector plugin for netdata.\n" @@ -2975,8 +3013,69 @@ static void parse_args(int argc, char **argv) } } -int main(int argc, char **argv) -{ +static int am_i_running_as_root() { + uid_t uid = getuid(), euid = geteuid(); + + if(uid == 0 || euid == 0) { + if(debug) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); + return 1; + } + + if(debug) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); + return 0; +} + +#ifdef HAVE_CAPABILITY +static int check_capabilities() { + cap_t caps = cap_get_proc(); + if(!caps) { + error("Cannot get current capabilities."); + return 0; + } + else if(debug) + info("Received my capabilities from the system."); + + int ret = 1; + + cap_flag_value_t cfv = CAP_CLEAR; + if(cap_get_flag(caps, CAP_DAC_READ_SEARCH, CAP_EFFECTIVE, &cfv) == -1) { + error("Cannot find if CAP_DAC_READ_SEARCH is effective."); + ret = 0; + } + else { + if(cfv != CAP_SET) { + error("apps.plugin should run with CAP_DAC_READ_SEARCH."); + ret = 0; + } + else if(debug) + info("apps.plugin runs with CAP_DAC_READ_SEARCH."); + } + + cfv = CAP_CLEAR; + if(cap_get_flag(caps, CAP_SYS_PTRACE, CAP_EFFECTIVE, &cfv) == -1) { + error("Cannot find if CAP_SYS_PTRACE is effective."); + ret = 0; + } + else { + if(cfv != CAP_SET) { + error("apps.plugin should run with CAP_SYS_PTRACE."); + ret = 0; + } + else if(debug) + info("apps.plugin runs with CAP_SYS_PTRACE."); + } + + cap_free(caps); + + return ret; +} +#else +static int check_capabilities() { + return 0; +} +#endif + +int main(int argc, char **argv) { // debug_flags = D_PROCFILE; // set the name for logging @@ -3010,58 +3109,50 @@ int main(int argc, char **argv) struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; if(setrlimit(RLIMIT_CORE, &rl) != 0) info("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); +#ifdef HAVE_SYS_PRCTL_H prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif } #endif /* NETDATA_INTERNAL_CHECKS */ procfile_adaptive_initial_allocation = 1; - time_t started_t = now_realtime_sec(); + time_t started_t = now_monotonic_sec(); get_system_HZ(); get_system_pid_max(); get_system_cpus(); parse_args(argc, argv); + if(!check_capabilities()) { + if(!am_i_running_as_root()) { + uid_t uid = getuid(), euid = geteuid(); +#ifdef HAVE_CAPABILITY + error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " + "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + , uid, euid, argv[0], argv[0], argv[0] + ); +#else + error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "Your system does not support capabilities. " + "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + , uid, euid, argv[0], argv[0] + ); +#endif + } + } + all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max); all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max); - fprintf(stdout, - "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n" - "DIMENSION user '' incremental 1 1000\n" - "DIMENSION system '' incremental 1 1000\n" - "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n" - "DIMENSION files '' incremental 1 1\n" - "DIMENSION pids '' absolute 1 1\n" - "DIMENSION fds '' absolute 1 1\n" - "DIMENSION targets '' absolute 1 1\n" - "CHART netdata.apps_fix '' 'Apps Plugin Normalization Ratios' 'percentage' apps.plugin netdata.apps_fix line 140002 %1$d\n" - "DIMENSION utime '' absolute 1 %2$llu\n" - "DIMENSION stime '' absolute 1 %2$llu\n" - "DIMENSION gtime '' absolute 1 %2$llu\n" - "DIMENSION minflt '' absolute 1 %2$llu\n" - "DIMENSION majflt '' absolute 1 %2$llu\n" - , update_every - , RATES_DETAIL - ); - - if(include_exited_childs) - fprintf(stdout, - "CHART netdata.apps_children_fix '' 'Apps Plugin Exited Children Normalization Ratios' 'percentage' apps.plugin netdata.apps_children_fix line 140003 %1$d\n" - "DIMENSION cutime '' absolute 1 %2$llu\n" - "DIMENSION cstime '' absolute 1 %2$llu\n" - "DIMENSION cgtime '' absolute 1 %2$llu\n" - "DIMENSION cminflt '' absolute 1 %2$llu\n" - "DIMENSION cmajflt '' absolute 1 %2$llu\n" - , update_every - , RATES_DETAIL - ); - usec_t step = update_every * USEC_PER_SEC; global_iterations_counter = 1; + heartbeat_t hb; + heartbeat_init(&hb); for(;1; global_iterations_counter++) { - usec_t now = now_realtime_usec(); - usec_t next = now - (now % step) + step; #ifdef NETDATA_PROFILING #warning "compiling for profiling" @@ -3069,10 +3160,7 @@ int main(int argc, char **argv) profiling_count++; if(unlikely(profiling_count > 1000)) exit(0); #else - while(now < next) { - sleep_usec(next - now); - now = now_realtime_usec(); - } + heartbeat_next(&hb, step); #endif if(!collect_data_for_all_processes()) { @@ -3110,9 +3198,7 @@ int main(int argc, char **argv) if(unlikely(debug)) fprintf(stderr, "apps.plugin: done Loop No %zu\n", global_iterations_counter); - time_t current_t = now_realtime_sec(); - // restart check (14400 seconds) - if(current_t - started_t > 14400) exit(0); + if(now_monotonic_sec() - started_t > 14400) exit(0); } }