From: Costa Tsaousis Date: Wed, 6 Jul 2016 19:02:17 +0000 (+0300) Subject: Merge pull request #643 from paulfantom/master X-Git-Tag: v1.3.0~84 X-Git-Url: https://arthur.barton.de/gitweb/?p=netdata.git;a=commitdiff_plain;h=edef9f7b9cec229ad17e138031bf9f3300bc9868;hp=2c0ce5338813444ea5e0dfc2f4e71cbad897749c Merge pull request #643 from paulfantom/master python.d squid + hddtemp + tomcat + minor fixes --- diff --git a/Makefile.am b/Makefile.am index 3a2cedfb..85072a52 100644 --- a/Makefile.am +++ b/Makefile.am @@ -23,7 +23,6 @@ EXTRA_DIST = \ LICENSE.md \ COPYING \ autogen.sh \ - netdata-9999.ebuild \ tests/stress.sh \ $(NULL) diff --git a/charts.d/Makefile.am b/charts.d/Makefile.am index c7da39ea..e131d508 100644 --- a/charts.d/Makefile.am +++ b/charts.d/Makefile.am @@ -4,12 +4,10 @@ MAINTAINERCLEANFILES= $(srcdir)/Makefile.in dist_charts_SCRIPTS = \ - airsearches.chart.sh \ ap.chart.sh \ apache.chart.sh \ cpu_apps.chart.sh \ cpufreq.chart.sh \ - crsproxy.chart.sh \ example.chart.sh \ exim.chart.sh \ hddtemp.chart.sh \ diff --git a/charts.d/airsearches.chart.sh b/charts.d/airsearches.chart.sh deleted file mode 100755 index 449b1425..00000000 --- a/charts.d/airsearches.chart.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/sh - -airsearches_url= -airsearches_cmds= -airsearches_update_every=15 - -airsearches_get() { - wget 2>/dev/null -O - "$airsearches_url" |\ - sed -e "s|
|\n|g" -e "s|: |=|g" -e "s| \+|_|g" -e "s/^/airsearches_/g" |\ - tr "[A-Z]\.\!@#\$%^&*()_+\-" "[a-z]_____________" |\ - egrep "^airsearches_[a-z0-9_]+=[0-9]+$" -} - -airsearches_check() { - # make sure we have all the commands we need - require_cmd wget || return 1 - - # make sure we are configured - if [ -z "$airsearches_url" ] - then - echo >&2 "$PROGRAM_NAME: airsearches: not configured. Please set airsearches_url='url' in $confd/airsearches.conf" - return 1 - fi - - # check once if the url works - wget 2>/dev/null -O /dev/null "$airsearches_url" - if [ ! $? -eq 0 ] - then - echo >&2 "$PROGRAM_NAME: airsearches: cannot fetch the url: $airsearches_url. Please set airsearches_url='url' in $confd/airsearches.conf" - return 1 - fi - - # if the admin did not give any commands - # find the available ones - if [ -z "$airsearches_cmds" ] - then - airsearches_cmds="$(airsearches_get | cut -d '=' -f 1 | sed "s/^airsearches_//g" | sort -u)" - echo - fi - - # did we find any commands? - if [ -z "$airsearches_cmds" ] - then - echo >&2 "$PROGRAM_NAME: airsearches: cannot find command list automatically. Please set airsearches_cmds='...' in $confd/airsearches.conf" - return 1 - fi - - # ok we can do it - return 0 -} - -airsearches_create() { - [ -z "$airsearches_cmds" ] && return 1 - - # create the charts - local x= - echo "CHART airsearches.affiliates '' 'Air Searches per affiliate' 'requests / min' airsearches '' stacked 20000 $airsearches_update_every" - for x in $airsearches_cmds - do - echo "DIMENSION $x '' incremental 60 1" - done - - return 0 -} - -airsearches_update() { - # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). - - # do all the work to collect / calculate the values - # for each dimension - # remember: KEEP IT SIMPLE AND SHORT - - # get the values from airsearches - eval "$(airsearches_get)" - - # write the result of the work. - local x= - - echo "BEGIN airsearches.affiliates $1" - for x in $airsearches_cmds - do - eval "v=\$airsearches_$x" - echo "SET $x = $v" - done - echo "END" - - airsearches_dt=0 - - return 0 -} diff --git a/charts.d/crsproxy.chart.sh b/charts.d/crsproxy.chart.sh deleted file mode 100755 index 9ad8b338..00000000 --- a/charts.d/crsproxy.chart.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/sh - -crsproxy_url= -crsproxy_cmds= -crsproxy_update_every=15 - -crsproxy_get() { - wget 2>/dev/null -O - "$crsproxy_url" |\ - sed \ - -e "s/ \+/ /g" \ - -e "s/\./_/g" \ - -e "s/ =/=/g" \ - -e "s/= /=/g" \ - -e "s/^/crsproxy_/g" |\ - egrep "^crsproxy_[a-zA-Z][a-zA-Z0-9_]*=[0-9]+$" -} - -crsproxy_check() { - # make sure we have all the commands we need - require_cmd wget || return 1 - - if [ -z "$crsproxy_url" ] - then - echo >&2 "$PROGRAM_NAME: crsproxy: not configured. Please set crsproxy_url='url' in $confd/crsproxy.conf" - return 1 - fi - - # check once if the url works - wget 2>/dev/null -O /dev/null "$crsproxy_url" - if [ ! $? -eq 0 ] - then - echo >&2 "$PROGRAM_NAME: crsproxy: cannot fetch the url: $crsproxy_url. Please set crsproxy_url='url' in $confd/crsproxy.conf" - return 1 - fi - - # if the user did not request specific commands - # find the commands available - if [ -z "$crsproxy_cmds" ] - then - crsproxy_cmds="$(crsproxy_get | cut -d '=' -f 1 | sed "s/^crsproxy_cmd_//g" | sort -u)" - fi - - # if no commands are available - if [ -z "$crsproxy_cmds" ] - then - echo >&2 "$PROGRAM_NAME: crsproxy: cannot find command list automatically. Please set crsproxy_cmds='...' in $confd/crsproxy.conf" - return 1 - fi - return 0 -} - -crsproxy_create() { - # create the charts - cat <> "${T}"/${PN}-sysctl <<- EOF - kernel.mm.ksm.run = 1 - kernel.mm.ksm.sleep_millisecs = 1000 - EOF - - dodoc "${T}"/${PN}-sysctl - - newinitd system/netdata-openrc ${PN} - systemd_dounit system/netdata.service -} - -pkg_postinst() { - if [[ -e "/sys/kernel/mm/ksm/run" ]]; then - elog "INFORMATION:" - echo "" - elog "I see you have kernel memory de-duper (called Kernel Same-page Merging," - elog "or KSM) available, but it is not currently enabled." - echo "" - elog "To enable it run:" - echo "" - elog "echo 1 >/sys/kernel/mm/ksm/run" - elog "echo 1000 >/sys/kernel/mm/ksm/sleep_millisecs" - echo "" - elog "If you enable it, you will save 20-60% of netdata memory." - else - elog "INFORMATION:" - echo "" - elog "I see you do not have kernel memory de-duper (called Kernel Same-page" - elog "Merging, or KSM) available." - echo "" - elog "To enable it, you need a kernel built with CONFIG_KSM=y" - echo "" - elog "If you can have it, you will save 20-60% of netdata memory." - fi - -} diff --git a/netdata-installer.sh b/netdata-installer.sh index da0ec821..10f54674 100755 --- a/netdata-installer.sh +++ b/netdata-installer.sh @@ -806,7 +806,7 @@ cat >netdata-uninstaller.sh <<-UNINSTALL fi echo >&2 "Stopping a possibly running netdata..." - for p in \$(pidof netdata); do kill \$x; done + for p in \$(pidof netdata); do kill \$p; done sleep 2 deletedir() { diff --git a/plugins.d/charts.d.plugin b/plugins.d/charts.d.plugin index 6b361b4a..109127d6 100755 --- a/plugins.d/charts.d.plugin +++ b/plugins.d/charts.d.plugin @@ -567,7 +567,7 @@ global_update() { exec_start_ms=$now_ms $chart$charts_update $dt ret=$? - + # return the current time in ms in $now_ms current_time_ms; exec_end_ms=$now_ms @@ -582,7 +582,7 @@ global_update() { else charts_serial_failures[$chart]=$(( charts_serial_failures[$chart] + 1 )) - if [ charts_serial_failures[$chart] -gt 10 ] + if [ ${charts_serial_failures[$chart]} -gt 10 ] then echo >&2 "$PROGRAM_NAME: chart '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it." else diff --git a/plugins.d/tc-qos-helper.sh b/plugins.d/tc-qos-helper.sh index 7b473981..94eec44a 100755 --- a/plugins.d/tc-qos-helper.sh +++ b/plugins.d/tc-qos-helper.sh @@ -2,6 +2,31 @@ export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +PROGRAM_FILE="$0" +PROGRAM_NAME="$(basename $0)" +PROGRAM_NAME="${PROGRAM_NAME/.plugin}" + +plugins_dir="${NETDATA_PLUGINS_DIR}" +[ -z "$plugins_dir" ] && plugins_dir="$( dirname $PROGRAM_FILE )" + +config_dir=${NETDATA_CONFIG_DIR-/etc/netdata} +tc="$(which tc 2>/dev/null)" +fireqos_run_dir="/var/run/fireqos" +qos_get_class_names_every=120 +qos_exit_every=3600 + +# check if we have a valid number for interval +t=${1} +update_every=$((t)) +[ $((update_every)) -lt 1 ] && update_every=${NETDATA_UPDATE_EVERY} +[ $((update_every)) -lt 1 ] && update_every=1 + +# allow the user to override our defaults +if [ -f "${config_dir}/tc-qos-helper.conf" ] + then + source "${config_dir}/tc-qos-helper.conf" +fi + # default time function now_ms= current_time_ms() { @@ -17,18 +42,11 @@ loopsleepms() { # if found and included, this file overwrites loopsleepms() # with a high resolution timer function for precise looping. -. "$NETDATA_PLUGINS_DIR/loopsleepms.sh.inc" - -# check if we have a valid number for interval -t=$1 -sleep_time=$((t)) -[ $((sleep_time)) -lt 1 ] && $NETDATA_UPDATE_EVERY -[ $((sleep_time)) -lt 1 ] && sleep_time=1 +. "${plugins_dir}/loopsleepms.sh.inc" -tc_cmd="$(which tc)" -if [ -z "$tc_cmd" ] +if [ -z "${tc}" -o ! -x "${tc}" ] then - echo >&2 "tc: Cannot find a 'tc' command in this system." + echo >&2 "${PROGRAM_NAME}: Cannot find command 'tc' in this system." exit 1 fi @@ -40,44 +58,45 @@ setclassname() { } show_tc() { - local x="$1" + local x="${1}" interface_dev interface_classes interface_classes_monitor - echo "BEGIN $x" - $tc_cmd -s class show dev $x + echo "BEGIN ${x}" + ${tc} -s class show dev ${x} # check FireQOS names for classes - if [ ! -z "$fix_names" -a -f /var/run/fireqos/ifaces/$x ] + if [ ! -z "${fix_names}" -a -f "${fireqos_run_dir}/ifaces/${x}" ] then - name="$(cat /var/run/fireqos/ifaces/$x)" - echo "SETDEVICENAME $name" + name="$(<"${fireqos_run_dir}/ifaces/${x}")" + echo "SETDEVICENAME ${name}" + interface_dev= interface_classes= interface_classes_monitor= - . /var/run/fireqos/$name.conf - for n in $interface_classes_monitor + source "${fireqos_run_dir}/${name}.conf" + for n in ${interface_classes_monitor} do - setclassname $(echo $n | tr '|' ' ') + setclassname ${n//|/ } done - echo "SETDEVICEGROUP $interface_dev" + [ ! -z "${interface_dev}" ] && echo "SETDEVICEGROUP ${interface_dev}" fi - echo "END $x" + echo "END ${x}" } all_devices() { cat /proc/net/dev | grep ":" | cut -d ':' -f 1 | while read dev do - l=$($tc_cmd class show dev $dev | wc -l) - [ $l -ne 0 ] && echo $dev + l=$(${tc} class show dev ${dev} | wc -l) + [ $l -ne 0 ] && echo ${dev} done } # update devices and class names # once every 2 minutes -names_every=$((120 / sleep_time)) +names_every=$((qos_get_class_names_every / update_every)) # exit this script every hour # it will be restarted automatically -exit_after=$((3600 / sleep_time)) +exit_after=$((qos_exit_every / update_every)) c=0 gc=0 @@ -87,21 +106,21 @@ do c=$((c + 1)) gc=$((gc + 1)) - if [ $c -le 1 -o $c -ge $names_every ] + if [ ${c} -le 1 -o ${c} -ge ${names_every} ] then c=1 fix_names="YES" devices="$( all_devices )" fi - for d in $devices + for d in ${devices} do - show_tc $d + show_tc ${d} done - echo "WORKTIME $LOOPSLEEPMS_LASTWORK" + echo "WORKTIME ${LOOPSLEEPMS_LASTWORK}" - loopsleepms $sleep_time + loopsleepms ${update_every} - [ $gc -gt $exit_after ] && exit 0 + [ ${gc} -gt ${exit_after} ] && exit 0 done diff --git a/python.d/Makefile.am b/python.d/Makefile.am index 239d9826..7f14bffa 100644 --- a/python.d/Makefile.am +++ b/python.d/Makefile.am @@ -1,4 +1,7 @@ MAINTAINERCLEANFILES= $(srcdir)/Makefile.in +CLEANFILES = \ + python-modules-installer.sh \ + $(NULL) include $(top_srcdir)/build/subst.inc diff --git a/python.d/python-modules-installer.sh.in b/python.d/python-modules-installer.sh.in index 955762dc..620a332b 100755 --- a/python.d/python-modules-installer.sh.in +++ b/python.d/python-modules-installer.sh.in @@ -30,6 +30,7 @@ do -h|--help) echo "${0} [--dir netdata-python.d-path] [--system]" + echo "Please make sure you have installed packages: python-pip (or python3-pip) python-dev libyaml-dev libmysqlclient-dev" exit 0 ;; @@ -56,14 +57,16 @@ then pv=3 pip="$(which pip3 2>/dev/null)" else - echo >&2 "Cannot detect python version" + echo >&2 "Cannot detect python version. Is python installed?" exit 1 fi [ -z "${pip}" ] && pip="$(which pip 2>/dev/null)" if [ -z "${pip}" ] then - echo >& "pip command is required to install python v${pv} modules" + echo >&2 "pip command is required to install python v${pv} modules." + [ "${pv}" = "2" ] && echo >&2 "Please install python-pip." + [ "${pv}" = "3" ] && echo >&2 "Please install python3-pip." exit 1 fi @@ -83,7 +86,8 @@ failed="" installed="" errors=0 pip_install() { - local ret x + local ret x msg="${1}" + shift echo >&2 echo >&2 @@ -102,16 +106,24 @@ pip_install() { ret=$? fi [ ${ret} -eq 0 ] && break - echo >&2 "failed to install: ${x}" + echo >&2 "failed to install: ${x}. ${msg}" done if [ ${ret} -ne 0 ] then - echo >&2 "ERROR: could not install any of: ${*}" + echo >&2 + echo >&2 + echo >&2 "FAILED: could not install any of: ${*}. ${msg}" + echo >&2 + echo >&2 errors=$(( errors + 1 )) failed="${failed}|${*}" else + echo >&2 + echo >&2 echo >&2 "SUCCESS: we have: ${x}" + echo >&2 + echo >&2 installed="${installed} ${x}" fi return ${ret} @@ -119,11 +131,11 @@ pip_install() { if [ "${pv}" = "2" ] then - pip_install pyyaml yaml - pip_install mysqlclient mysql-python pymysql + pip_install "is libyaml-dev and python-dev installed?" pyyaml + pip_install "is libmysqlclient-dev and python-dev installed?" mysqlclient mysql-python pymysql else - pip_install yaml pyyaml - pip_install mysql-python mysqlclient pymysql + pip_install "is libyaml-dev and python-dev installed?" pyyaml + pip_install "is libmysqlclient-dev and python-dev installed?" mysql-python mysqlclient pymysql fi echo >&2 @@ -131,6 +143,14 @@ echo >&2 if [ ${errors} -ne 0 ] then echo >&2 "Failed to install ${errors} modules: ${failed}" + if [ ! -z "${target}" ] + then + echo >&2 + echo >&2 "If you are getting errors during cleanup from pip, there is a known bug" + echo >&2 "in certain versions of pip that prevents installing packages local to an" + echo >&2 "application. To install them system-wide please run:" + echo >&2 "$0 --system" + fi exit 1 else echo >&2 "All done. We have: ${installed}" diff --git a/src/apps_plugin.c b/src/apps_plugin.c index 39d0efc8..ba497b7b 100644 --- a/src/apps_plugin.c +++ b/src/apps_plugin.c @@ -1,7 +1,3 @@ -// TODO -// -// 1. disable RESET_OR_OVERFLOW check in charts - #ifdef HAVE_CONFIG_H #include #endif @@ -47,171 +43,17 @@ #define MAX_NAME 100 #define MAX_CMDLINE 1024 -long processors = 1; -long pid_max = 32768; +int processors = 1; +pid_t pid_max = 32768; int debug = 0; int update_every = 1; unsigned long long file_counter = 0; int proc_pid_cmdline_is_needed = 0; - +int include_exited_childs = 1; char *host_prefix = ""; char *config_dir = CONFIG_DIR; -#ifdef NETDATA_INTERNAL_CHECKS -// ---------------------------------------------------------------------------- -// memory debugger -// do not use in production systems - it mis-aligns allocated memory - -struct allocations { - size_t allocations; - size_t allocated; - size_t allocated_max; -} allocations = { 0, 0, 0 }; - -#define MALLOC_MARK (uint32_t)(0x0BADCAFE) -#define MALLOC_PREFIX (sizeof(uint32_t) * 2) -#define MALLOC_SUFFIX (sizeof(uint32_t)) -#define MALLOC_OVERHEAD (MALLOC_PREFIX + MALLOC_SUFFIX) - -void *mark_allocation(void *allocated_ptr, size_t size_without_overheads) { - uint32_t *real_ptr = (uint32_t *)allocated_ptr; - real_ptr[0] = MALLOC_MARK; - real_ptr[1] = (uint32_t) size_without_overheads; - - uint32_t *end_ptr = (uint32_t *)(allocated_ptr + MALLOC_PREFIX + size_without_overheads); - end_ptr[0] = MALLOC_MARK; - - // fprintf(stderr, "MEMORY_POINTER: Allocated at %p, returning %p.\n", allocated_ptr, (void *)(allocated_ptr + MALLOC_PREFIX)); - - return allocated_ptr + MALLOC_PREFIX; -} - -void *check_allocation(const char *file, int line, const char *function, void *marked_ptr, size_t *size_without_overheads_ptr) { - uint32_t *real_ptr = (uint32_t *)(marked_ptr - MALLOC_PREFIX); - - // fprintf(stderr, "MEMORY_POINTER: Checking pointer at %p, real %p for %s/%u@%s.\n", marked_ptr, (void *)(marked_ptr - MALLOC_PREFIX), function, line, file); - - if(real_ptr[0] != MALLOC_MARK) fatal("MEMORY: prefix MARK is not valid for %s/%d@%s.", function, line, file); - - size_t size = real_ptr[1]; - - uint32_t *end_ptr = (uint32_t *)(marked_ptr + size); - if(end_ptr[0] != MALLOC_MARK) fatal("MEMORY: suffix MARK of allocation with size %zu is not valid for %s/%d@%s.", size, function, line, file); - - if(size_without_overheads_ptr) *size_without_overheads_ptr = size; - - return real_ptr; -} - -void *malloc_debug(const char *file, int line, const char *function, size_t size) { - void *ptr = malloc(size + MALLOC_OVERHEAD); - if(!ptr) fatal("MEMORY: Cannot allocate %zu bytes for %s/%d@%s.", size, function, line, file); - - allocations.allocated += size; - allocations.allocations++; - - debug(D_MEMORY, "MEMORY: Allocated %zu bytes for %s/%d@%s." - " Status: allocated %zu in %zu allocs." - , size - , function, line, file - , allocations.allocated - , allocations.allocations - ); - - if(allocations.allocated > allocations.allocated_max) { - debug(D_MEMORY, "MEMORY: total allocation peak increased from %zu to %zu", allocations.allocated_max, allocations.allocated); - allocations.allocated_max = allocations.allocated; - } - - size_t csize; - check_allocation(file, line, function, mark_allocation(ptr, size), &csize); - if(size != csize) { - fatal("Invalid size."); - } - - return mark_allocation(ptr, size); -} - -void *calloc_debug(const char *file, int line, const char *function, size_t nmemb, size_t size) { - void *ptr = malloc_debug(file, line, function, (nmemb * size)); - bzero(ptr, nmemb * size); - return ptr; -} - -void free_debug(const char *file, int line, const char *function, void *ptr) { - size_t size; - void *real_ptr = check_allocation(file, line, function, ptr, &size); - - bzero(real_ptr, size + MALLOC_OVERHEAD); - - free(real_ptr); - allocations.allocated -= size; - allocations.allocations--; - - debug(D_MEMORY, "MEMORY: freed %zu bytes for %s/%d@%s." - " Status: allocated %zu in %zu allocs." - , size - , function, line, file - , allocations.allocated - , allocations.allocations - ); -} - -void *realloc_debug(const char *file, int line, const char *function, void *ptr, size_t size) { - if(!ptr) return malloc_debug(file, line, function, size); - if(!size) { free_debug(file, line, function, ptr); return NULL; } - - size_t old_size; - void *real_ptr = check_allocation(file, line, function, ptr, &old_size); - - void *new_ptr = realloc(real_ptr, size + MALLOC_OVERHEAD); - if(!new_ptr) fatal("MEMORY: Cannot allocate %zu bytes for %s/%d@%s.", size, function, line, file); - - allocations.allocated += size; - allocations.allocated -= old_size; - - debug(D_MEMORY, "MEMORY: Re-allocated from %zu to %zu bytes for %s/%d@%s." - " Status: allocated %zu in %zu allocs." - , old_size, size - , function, line, file - , allocations.allocated - , allocations.allocations - ); - - if(allocations.allocated > allocations.allocated_max) { - debug(D_MEMORY, "MEMORY: total allocation peak increased from %zu to %zu", allocations.allocated_max, allocations.allocated); - allocations.allocated_max = allocations.allocated; - } - - return mark_allocation(new_ptr, size); -} - -char *strdup_debug(const char *file, int line, const char *function, const char *ptr) { - size_t size = 0; - const char *s = ptr; - - while(*s++) size++; - size++; - - char *p = malloc_debug(file, line, function, size); - if(!p) fatal("Cannot allocate %zu bytes.", size); - - memcpy(p, ptr, size); - return p; -} - -#define malloc(size) malloc_debug(__FILE__, __LINE__, __FUNCTION__, (size)) -#define calloc(nmemb, size) calloc_debug(__FILE__, __LINE__, __FUNCTION__, (nmemb), (size)) -#define realloc(ptr, size) realloc_debug(__FILE__, __LINE__, __FUNCTION__, (ptr), (size)) -#define free(ptr) free_debug(__FILE__, __LINE__, __FUNCTION__, (ptr)) - -#ifdef strdup -#undef strdup -#endif -#define strdup(ptr) strdup_debug(__FILE__, __LINE__, __FUNCTION__, (ptr)) - -#endif /* NETDATA_INTERNAL_CHECKS */ // ---------------------------------------------------------------------------- @@ -254,9 +96,9 @@ long get_system_cpus(void) { return processors; } -long get_system_pid_max(void) { +pid_t get_system_pid_max(void) { procfile *ff = NULL; - long mpid = 32768; + pid_t mpid = 32768; char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", host_prefix); @@ -269,7 +111,7 @@ long get_system_pid_max(void) { return mpid; } - mpid = atol(procfile_lineword(ff, 0, 0)); + mpid = (pid_t)atoi(procfile_lineword(ff, 0, 0)); if(!mpid) mpid = 32768; procfile_close(ff); @@ -304,14 +146,14 @@ struct target { unsigned long long num_threads; unsigned long long rss; - unsigned long long fix_minflt; - unsigned long long fix_cminflt; - unsigned long long fix_majflt; - unsigned long long fix_cmajflt; - unsigned long long fix_utime; - unsigned long long fix_stime; - unsigned long long fix_cutime; - unsigned long long fix_cstime; + long long fix_minflt; + long long fix_cminflt; + long long fix_majflt; + long long fix_cmajflt; + long long fix_utime; + long long fix_stime; + long long fix_cutime; + long long fix_cstime; unsigned long long statm_size; unsigned long long statm_resident; @@ -463,10 +305,12 @@ struct target *get_apps_groups_target(const char *id, struct target *target) } uint32_t hash = simple_hash(id); - struct target *w; + struct target *w, *last = apps_groups_root_target; for(w = apps_groups_root_target ; w ; w = w->next) { if(w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0) return w; + + last = w; } w = calloc(sizeof(struct target), 1); @@ -498,8 +342,9 @@ struct target *get_apps_groups_target(const char *id, struct target *target) w->debug = tdebug; w->target = target; - w->next = apps_groups_root_target; - apps_groups_root_target = w; + // append it, to maintain the order in apps_groups.conf + if(last) last->next = w; + else apps_groups_root_target = w; if(unlikely(debug)) fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n" @@ -675,14 +520,20 @@ struct pid_stat { // we will subtract these values from the old // target unsigned long long last_minflt; - unsigned long long last_cminflt; unsigned long long last_majflt; - unsigned long long last_cmajflt; unsigned long long last_utime; unsigned long long last_stime; + + unsigned long long last_cminflt; + unsigned long long last_cmajflt; unsigned long long last_cutime; unsigned long long last_cstime; + unsigned long long last_fix_cminflt; + unsigned long long last_fix_cmajflt; + unsigned long long last_fix_cutime; + unsigned long long last_fix_cstime; + unsigned long long last_io_logical_bytes_read; unsigned long long last_io_logical_bytes_written; unsigned long long last_io_read_calls; @@ -691,27 +542,10 @@ struct pid_stat { unsigned long long last_io_storage_bytes_written; unsigned long long last_io_cancelled_write_bytes; -#ifdef AGGREGATE_CHILDREN_TO_PARENTS - unsigned long long old_utime; - unsigned long long old_stime; - unsigned long long old_minflt; - unsigned long long old_majflt; - - unsigned long long old_cutime; - unsigned long long old_cstime; - unsigned long long old_cminflt; - unsigned long long old_cmajflt; - - unsigned long long fix_cutime; - unsigned long long fix_cstime; unsigned long long fix_cminflt; unsigned long long fix_cmajflt; - - unsigned long long diff_cutime; - unsigned long long diff_cstime; - unsigned long long diff_cminflt; - unsigned long long diff_cmajflt; -#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + unsigned long long fix_cutime; + unsigned long long fix_cstime; int *fds; // array of fds it uses int fds_size; // the size of the fds array @@ -765,7 +599,8 @@ void del_pid_entry(pid_t pid) { if(!all_pids[pid]) return; - if(debug) fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, all_pids[pid]->comm); if(root_of_pids == all_pids[pid]) root_of_pids = all_pids[pid]->next; if(all_pids[pid]->next) all_pids[pid]->next->prev = all_pids[pid]->prev; @@ -898,7 +733,7 @@ int read_proc_pid_stat(struct pid_stat *p) { // p->guest_time = strtoull(procfile_lineword(ff, 0, 42+i), NULL, 10); // p->cguest_time = strtoull(procfile_lineword(ff, 0, 43), NULL, 10); - if(debug || (p->target && p->target->debug)) + if(unlikely(debug || (p->target && p->target->debug))) fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", host_prefix, p->pid, p->comm, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); // procfile_close(ff); @@ -1048,13 +883,16 @@ void file_descriptor_not_used(int id) } #endif /* NETDATA_INTERNAL_CHECKS */ - if(debug) fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count); if(all_files[id].count > 0) { all_files[id].count--; if(!all_files[id].count) { - if(debug) fprintf(stderr, "apps.plugin: >> slot %d is empty.\n", id); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> slot %d is empty.\n", id); + file_descriptor_remove(&all_files[id]); #ifdef NETDATA_INTERNAL_CHECKS all_files[id].magic = 0x00000000; @@ -1073,12 +911,15 @@ int file_descriptor_find_or_add(const char *name) static int last_pos = 0; uint32_t hash = simple_hash(name); - if(debug) fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash); struct file_descriptor *fd = file_descriptor_find(name, hash); if(fd) { // found - if(debug) fprintf(stderr, "apps.plugin: >> found on slot %d\n", fd->pos); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> found on slot %d\n", fd->pos); + fd->count++; return fd->pos; } @@ -1090,19 +931,25 @@ int file_descriptor_find_or_add(const char *name) int i; // there is no empty slot - if(debug) fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); + all_files = realloc(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor)); // if the address changed, we have to rebuild the index // since all pointers are now invalid if(old && old != (void *)all_files) { - if(debug) fprintf(stderr, "apps.plugin: >> re-indexing.\n"); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> re-indexing.\n"); + all_files_index.root = NULL; for(i = 0; i < all_files_size; i++) { if(!all_files[i].count) continue; file_descriptor_add(&all_files[i]); } - if(debug) fprintf(stderr, "apps.plugin: >> re-indexing done.\n"); + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> re-indexing done.\n"); } for(i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) { @@ -1118,7 +965,8 @@ int file_descriptor_find_or_add(const char *name) all_files_size += FILE_DESCRIPTORS_INCREASE_STEP; } - if(debug) fprintf(stderr, "apps.plugin: >> searching for empty slot.\n"); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> searching for empty slot.\n"); // search for an empty slot int i, c; @@ -1127,14 +975,17 @@ int file_descriptor_find_or_add(const char *name) if(c == 0) continue; if(!all_files[c].count) { - if(debug) fprintf(stderr, "apps.plugin: >> Examining slot %d.\n", c); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> Examining slot %d.\n", c); #ifdef NETDATA_INTERNAL_CHECKS if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name); #endif /* NETDATA_INTERNAL_CHECKS */ - if(debug) fprintf(stderr, "apps.plugin: >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name); + if(all_files[c].name) free((void *)all_files[c].name); all_files[c].name = NULL; last_pos = c; @@ -1145,7 +996,9 @@ int file_descriptor_find_or_add(const char *name) fatal("We should find an empty slot, but there isn't any"); exit(1); } - if(debug) fprintf(stderr, "apps.plugin: >> updating slot %d.\n", c); + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: >> updating slot %d.\n", c); all_files_len++; @@ -1161,11 +1014,15 @@ int file_descriptor_find_or_add(const char *name) else if(strcmp(name, "anon_inode:[timerfd]") == 0) type = FILETYPE_TIMERFD; else if(strcmp(name, "anon_inode:[signalfd]") == 0) type = FILETYPE_SIGNALFD; else if(strncmp(name, "anon_inode:", 11) == 0) { - if(debug) fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); + type = FILETYPE_OTHER; } else { - if(debug) fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name); + type = FILETYPE_OTHER; } @@ -1179,7 +1036,8 @@ int file_descriptor_find_or_add(const char *name) #endif /* NETDATA_INTERNAL_CHECKS */ file_descriptor_add(&all_files[c]); - if(debug) fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name); return c; } @@ -1208,7 +1066,9 @@ int read_pid_file_descriptors(struct pid_stat *p) { if(fdid < 0) continue; if(fdid >= p->fds_size) { // it is small, extend it - if(debug) fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100); + p->fds = realloc(p->fds, (fdid + 100) * sizeof(int)); if(!p->fds) { fatal("Cannot re-allocate fds for %s", p->comm); @@ -1291,26 +1151,34 @@ int collect_data_for_all_processes_from_proc(void) all_pids_count = 0; for(p = root_of_pids; p ; p = p->next) { all_pids_count++; - p->parent = NULL; - p->updated = 0; - p->children_count = 0; - p->merged = 0; - p->new_entry = 0; - - p->last_minflt = p->minflt; - p->last_cminflt = p->cminflt; - p->last_majflt = p->majflt; - p->last_cmajflt = p->cmajflt; - p->last_utime = p->utime; - p->last_stime = p->stime; - p->last_cutime = p->cutime; - p->last_cstime = p->cstime; - - p->last_io_logical_bytes_read = p->io_logical_bytes_read; + + p->parent = NULL; + + p->updated = 0; + p->children_count = 0; + p->merged = 0; + p->new_entry = 0; + + p->last_minflt = p->minflt; + p->last_majflt = p->majflt; + p->last_utime = p->utime; + p->last_stime = p->stime; + + p->last_cminflt = p->cminflt; + p->last_cmajflt = p->cmajflt; + p->last_cutime = p->cutime; + p->last_cstime = p->cstime; + + p->last_fix_cminflt = p->fix_cminflt; + p->last_fix_cmajflt = p->fix_cmajflt; + p->last_fix_cutime = p->fix_cutime; + p->last_fix_cstime = p->fix_cstime; + + p->last_io_logical_bytes_read = p->io_logical_bytes_read; p->last_io_logical_bytes_written = p->io_logical_bytes_written; - p->last_io_read_calls = p->io_read_calls; - p->last_io_write_calls = p->io_write_calls; - p->last_io_storage_bytes_read = p->io_storage_bytes_read; + p->last_io_read_calls = p->io_read_calls; + p->last_io_write_calls = p->io_write_calls; + p->last_io_storage_bytes_read = p->io_storage_bytes_read; p->last_io_storage_bytes_written = p->io_storage_bytes_written; p->last_io_cancelled_write_bytes = p->io_cancelled_write_bytes; } @@ -1320,9 +1188,14 @@ int collect_data_for_all_processes_from_proc(void) pid_t pid = (pid_t) strtoul(file->d_name, &endptr, 10); // make sure we read a valid number - if(unlikely(pid <= 0 || pid > pid_max || endptr == file->d_name || *endptr != '\0')) + if(unlikely(endptr == file->d_name || *endptr != '\0')) continue; + if(unlikely(pid <= 0 || pid > pid_max)) { + error("Invalid pid %d read (expected 1 to %d). Ignoring process.", pid, pid_max); + continue; + } + p = get_pid_entry(pid); if(unlikely(!p)) continue; @@ -1331,34 +1204,22 @@ int collect_data_for_all_processes_from_proc(void) // /proc//stat if(unlikely(read_proc_pid_stat(p))) { - error("Cannot process %s/proc/%d/stat", host_prefix, pid); - + error("Cannot process %s/proc/%d/stat", host_prefix, pid); // there is no reason to proceed if we cannot get its status continue; } // check its parent pid if(unlikely(p->ppid < 0 || p->ppid > pid_max)) { - error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid); - + error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid); p->ppid = 0; } - // -------------------------------------------------------------------- - // /proc//cmdline - - if(proc_pid_cmdline_is_needed) { - if(unlikely(read_proc_pid_cmdline(p))) { - error("Cannot process %s/proc/%d/cmdline", host_prefix, pid); - } - } - // -------------------------------------------------------------------- // /proc//statm if(unlikely(read_proc_pid_statm(p))) { - error("Cannot process %s/proc/%d/statm", host_prefix, pid); - + error("Cannot process %s/proc/%d/statm", host_prefix, pid); // there is no reason to proceed if we cannot get its memory status continue; } @@ -1388,9 +1249,18 @@ int collect_data_for_all_processes_from_proc(void) // check if it is target // we do this only once, the first time this pid is loaded if(unlikely(p->new_entry)) { - if(debug) fprintf(stderr, "apps.plugin: \tJust added %s\n", p->comm); + // /proc//cmdline + if(proc_pid_cmdline_is_needed) { + if(unlikely(read_proc_pid_cmdline(p))) { + error("Cannot process %s/proc/%d/cmdline", host_prefix, pid); + } + } + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", pid, p->comm); + uint32_t hash = simple_hash(p->comm); - size_t pclen = strlen(p->comm); + size_t pclen = strlen(p->comm); struct target *w; for(w = apps_groups_root_target; w ; w = w->next) { @@ -1411,6 +1281,8 @@ int collect_data_for_all_processes_from_proc(void) if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); + + break; } } } @@ -1434,49 +1306,6 @@ int collect_data_for_all_processes_from_proc(void) return 1; } - -// ---------------------------------------------------------------------------- - -#ifdef AGGREGATE_CHILDREN_TO_PARENTS -// print a tree view of all processes -int debug_childrens_aggregations(pid_t pid, int level) { - struct pid_stat *p = NULL; - char b[level+3]; - int i, ret = 0; - - for(i = 0; i < level; i++) b[i] = '\t'; - b[level] = '|'; - b[level+1] = '-'; - b[level+2] = '\0'; - - for(p = root_of_pids; p ; p = p->next) { - if(p->ppid == pid) { - ret += debug_childrens_aggregations(p->pid, level+1); - } - } - - p = all_pids[pid]; - if(p) { - if(!p->updated) ret += 1; - if(ret) fprintf(stderr, "%s %s %d [%s, %s] c=%d u=%llu+%llu, s=%llu+%llu, cu=%llu+%llu, cs=%llu+%llu, n=%llu+%llu, j=%llu+%llu, cn=%llu+%llu, cj=%llu+%llu\n" - , b, p->comm, p->pid, p->updated?"OK":"KILLED", p->target->name, p->children_count - , p->utime, p->utime - p->old_utime - , p->stime, p->stime - p->old_stime - , p->cutime, p->cutime - p->old_cutime - , p->cstime, p->cstime - p->old_cstime - , p->minflt, p->minflt - p->old_minflt - , p->majflt, p->majflt - p->old_majflt - , p->cminflt, p->cminflt - p->old_cminflt - , p->cmajflt, p->cmajflt - p->old_cmajflt - ); - } - - return ret; -} -#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ - - - // ---------------------------------------------------------------------------- // update statistics on the targets @@ -1494,6 +1323,7 @@ int debug_childrens_aggregations(pid_t pid, int level) { // check: update_apps_groups_statistics() void link_all_processes_to_their_parents(void) { + struct pid_stat *init = all_pids[1]; struct pid_stat *p = NULL; // link all children to their parents @@ -1501,81 +1331,110 @@ void link_all_processes_to_their_parents(void) { for(p = root_of_pids; p ; p = p->next) { // for each process found running - if(p->ppid > 0 - && p->ppid <= pid_max - && all_pids[p->ppid] - ) { - // for valid processes + if(likely(p->new_entry && p->updated)) { + // the first time we see an entry + // we remove the exited children figures + // to avoid spikes + p->fix_cminflt = p->cminflt; + p->fix_cmajflt = p->cmajflt; + p->fix_cutime = p->cutime; + p->fix_cstime = p->cstime; + } + + if(likely(p->ppid > 0 && all_pids[p->ppid])) { + // valid parent processes - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \tparent of %d (%s) is %d (%s)\n", p->pid, p->comm, p->ppid, all_pids[p->ppid]->comm); + struct pid_stat *pp; - p->parent = all_pids[p->ppid]; + p->parent = pp = all_pids[p->ppid]; p->parent->children_count++; - } - else if(p->ppid != 0) - error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); - } -} -#ifdef AGGREGATE_CHILDREN_TO_PARENTS -void aggregate_children_to_parents(void) { - struct pid_stat *p = NULL; + if(unlikely(debug || (p->target && p->target->debug))) + fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) has parent %d (%s, %s). Parent: utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n", p->pid, p->comm, p->updated?"running":"exited", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cminflt, pp->cmajflt, pp->fix_cutime, pp->fix_cstime, pp->fix_cminflt, pp->fix_cmajflt); - // for each killed process, remove its values from the parents - // sums (we had already added them in a previous loop) - for(p = root_of_pids; p ; p = p->next) { - if(p->updated) continue; - - if(debug) fprintf(stderr, "apps.plugin: UNMERGING %d %s\n", p->pid, p->comm); - - unsigned long long diff_utime = p->utime + p->cutime + p->fix_cutime; - unsigned long long diff_stime = p->stime + p->cstime + p->fix_cstime; - unsigned long long diff_minflt = p->minflt + p->cminflt + p->fix_cminflt; - unsigned long long diff_majflt = p->majflt + p->cmajflt + p->fix_cmajflt; - - struct pid_stat *t = p; - while((t = t->parent)) { - if(!t->updated) continue; - - unsigned long long x; - if(diff_utime && t->diff_cutime) { - x = (t->diff_cutime < diff_utime)?t->diff_cutime:diff_utime; - diff_utime -= x; - t->diff_cutime -= x; - t->fix_cutime += x; - if(debug) fprintf(stderr, "apps.plugin: \t cutime %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name); - } - if(diff_stime && t->diff_cstime) { - x = (t->diff_cstime < diff_stime)?t->diff_cstime:diff_stime; - diff_stime -= x; - t->diff_cstime -= x; - t->fix_cstime += x; - if(debug) fprintf(stderr, "apps.plugin: \t cstime %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name); - } - if(diff_minflt && t->diff_cminflt) { - x = (t->diff_cminflt < diff_minflt)?t->diff_cminflt:diff_minflt; - diff_minflt -= x; - t->diff_cminflt -= x; - t->fix_cminflt += x; - if(debug) fprintf(stderr, "apps.plugin: \t cminflt %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name); - } - if(diff_majflt && t->diff_cmajflt) { - x = (t->diff_cmajflt < diff_majflt)?t->diff_cmajflt:diff_majflt; - diff_majflt -= x; - t->diff_cmajflt -= x; - t->fix_cmajflt += x; - if(debug) fprintf(stderr, "apps.plugin: \t cmajflt %llu from %d %s %s\n", x, t->pid, t->comm, t->target->name); + if(unlikely(!p->updated)) { + // this process has exit + + // find the first parent that has been updated + while(pp && !pp->updated) { + // we may have to forward link it to its parent + if(unlikely(!pp->parent && pp->ppid > 0 && all_pids[pp->ppid])) + pp->parent = all_pids[pp->ppid]; + + // check again for parent + pp = pp->parent; + } + + if(likely(pp)) { + // this is an exited child with a parent + // remove the known time from the parent's data + pp->fix_cminflt += p->last_minflt + p->last_cminflt + p->last_fix_cminflt; + pp->fix_cmajflt += p->last_majflt + p->last_cmajflt + p->last_fix_cmajflt; + pp->fix_cutime += p->last_utime + p->last_cutime + p->last_fix_cutime; + pp->fix_cstime += p->last_stime + p->last_cstime + p->last_fix_cstime; + + // The known exited children (the ones we track) may have + // contributed more than the value accumulated into the process + // by the kernel. + // This can happen if the parent process has not waited-for + // its children (check: man 2 times). + // In this case, the kernel adds these resources to init (pid 1). + // + // The following code, attempts to fix this. + // Without this code, the charts will have random spikes + // for example, when an SSH session ends (sshd forks a child + // to serve the session, but when this session ends, sshd + // does not wait-for its child, thus all the resources of the + // ssh session get added to init, resulting in a huge spike on + // the charts). + + if(unlikely(pp->cminflt < pp->fix_cminflt)) { + if(likely(init && pp != init)) { + unsigned long long have = pp->fix_cminflt - pp->cminflt; + unsigned long long max = init->cminflt - init->fix_cminflt; + if(have > max) have = max; + init->fix_cminflt += have; + } + pp->fix_cminflt = pp->cminflt; + } + if(unlikely(pp->cmajflt < pp->fix_cmajflt)) { + if(likely(init && pp != init)) { + unsigned long long have = pp->fix_cmajflt - pp->cmajflt; + unsigned long long max = init->cmajflt - init->fix_cmajflt; + if(have > max) have = max; + init->fix_cmajflt += have; + } + pp->fix_cmajflt = pp->cmajflt; + } + if(unlikely(pp->cutime < pp->fix_cutime)) { + if(likely(init && pp != init)) { + unsigned long long have = pp->fix_cutime - pp->cutime; + unsigned long long max = init->cutime - init->fix_cutime; + if(have > max) have = max; + init->fix_cutime += have; + } + pp->fix_cutime = pp->cutime; + } + if(unlikely(pp->cstime < pp->fix_cstime)) { + if(likely(init && pp != init)) { + unsigned long long have = pp->fix_cstime - pp->cstime; + unsigned long long max = init->cstime - init->fix_cstime; + if(have > max) have = max; + init->fix_cstime += have; + } + pp->fix_cstime = pp->cstime; + } + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: \tupdating child metrics of %d (%s, %s) to its parent %d (%s, %s). Parent has now: utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n", p->pid, p->comm, p->updated?"running":"exited", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cminflt, pp->cmajflt, pp->fix_cutime, pp->fix_cstime, pp->fix_cminflt, pp->fix_cmajflt); + } } } - - if(diff_utime) error("Cannot fix up utime %llu", diff_utime); - if(diff_stime) error("Cannot fix up stime %llu", diff_stime); - if(diff_minflt) error("Cannot fix up minflt %llu", diff_minflt); - if(diff_majflt) error("Cannot fix up majflt %llu", diff_majflt); + else if(unlikely(p->ppid != 0)) + error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); } } -#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + void cleanup_non_existing_pids(void) { int c; @@ -1603,8 +1462,9 @@ void apply_apps_groups_targets_inheritance(void) { // children that do not have a target // inherit their target from their parent - int found = 1; + int found = 1, loops = 0; while(found) { + if(unlikely(debug)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { // if this process does not have a target @@ -1626,6 +1486,7 @@ void apply_apps_groups_targets_inheritance(void) { // repeat, until nothing more can be done. found = 1; while(found) { + if(unlikely(debug)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { // if this process does not have any children @@ -1658,7 +1519,7 @@ void apply_apps_groups_targets_inheritance(void) { } } - if(debug) + if(unlikely(debug)) fprintf(stderr, "apps.plugin: merged %d processes\n", found); } @@ -1667,25 +1528,18 @@ void apply_apps_groups_targets_inheritance(void) { all_pids[1]->target = apps_groups_default_target; // give a default target on all top level processes + if(unlikely(debug)) loops++; for(p = root_of_pids; p ; p = p->next) { // if the process is not merged itself // then is is a top level process if(!p->merged && !p->target) p->target = apps_groups_default_target; - -#ifdef AGGREGATE_CHILDREN_TO_PARENTS - // by the way, update the diffs - // will be used later for subtracting killed process times - p->diff_cutime = p->utime - p->cutime; - p->diff_cstime = p->stime - p->cstime; - p->diff_cminflt = p->minflt - p->cminflt; - p->diff_cmajflt = p->majflt - p->cmajflt; -#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ } // give a target to all merged child processes found = 1; while(found) { + if(unlikely(debug)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) { @@ -1697,6 +1551,9 @@ void apply_apps_groups_targets_inheritance(void) { } } } + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops); } long zero_all_targets(struct target *root) { @@ -1749,10 +1606,13 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target } if(likely(p->updated)) { - w->cutime += p->cutime; // - p->fix_cutime; - w->cstime += p->cstime; // - p->fix_cstime; - w->cminflt += p->cminflt; // - p->fix_cminflt; - w->cmajflt += p->cmajflt; // - p->fix_cmajflt; + if(unlikely(debug && (p->fix_cutime || p->fix_cstime || p->fix_cminflt || p->fix_cmajflt))) + fprintf(stderr, "apps.plugin: \tadding child counters of %d (%s) to target %s. Currents: cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, Fixes: cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, w->name, p->cutime, p->cstime, p->cminflt, p->cmajflt, p->fix_cutime, p->fix_cstime, p->fix_cminflt, p->fix_cmajflt); + + w->cutime += p->cutime - p->fix_cutime; + w->cstime += p->cstime - p->fix_cstime; + w->cminflt += p->cminflt - p->fix_cminflt; + w->cmajflt += p->cmajflt - p->fix_cmajflt; w->utime += p->utime; //+ (p->pid != 1)?(p->cutime - p->fix_cutime):0; w->stime += p->stime; //+ (p->pid != 1)?(p->cstime - p->fix_cstime):0; @@ -1800,7 +1660,7 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target } if(unlikely(debug || w->debug)) - fprintf(stderr, "apps.plugin: \tAggregating %s pid %d on %s utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt); + fprintf(stderr, "apps.plugin: \tAggregating %s pid %d on %s utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->fix_cutime, p->fix_cstime, p->fix_cminflt, p->fix_cmajflt); /* if(p->utime - p->old_utime > 100) fprintf(stderr, "BIG CHANGE: %d %s utime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->utime - p->old_utime, p->old_utime, p->utime); if(p->cutime - p->old_cutime > 100) fprintf(stderr, "BIG CHANGE: %d %s cutime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cutime - p->old_cutime, p->old_cutime, p->cutime); @@ -1811,16 +1671,6 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target if(p->cminflt - p->old_cminflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cminflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cminflt - p->old_cminflt, p->old_cminflt, p->cminflt); if(p->cmajflt - p->old_cmajflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cmajflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cmajflt - p->old_cmajflt, p->old_cmajflt, p->cmajflt); */ -#ifdef AGGREGATE_CHILDREN_TO_PARENTS - p->old_utime = p->utime; - p->old_cutime = p->cutime; - p->old_stime = p->stime; - p->old_cstime = p->cstime; - p->old_minflt = p->minflt; - p->old_majflt = p->majflt; - p->old_cminflt = p->cminflt; - p->old_cmajflt = p->cmajflt; -#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ if(o) { // since the process switched target @@ -1831,42 +1681,46 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target // IMPORTANT // We add/subtract the last/OLD values we added to the target - w->fix_cutime -= p->last_cutime; - w->fix_cstime -= p->last_cstime; - w->fix_cminflt -= p->last_cminflt; - w->fix_cmajflt -= p->last_cmajflt; + unsigned long long cutime = p->last_cutime - p->last_fix_cutime; + unsigned long long cstime = p->last_cstime - p->last_fix_cstime; + unsigned long long cminflt = p->last_cminflt - p->last_fix_cminflt; + unsigned long long cmajflt = p->last_cmajflt - p->last_fix_cmajflt; + + w->fix_cutime -= cutime; + w->fix_cstime -= cstime; + w->fix_cminflt -= cminflt; + w->fix_cmajflt -= cmajflt; - w->fix_utime -= p->last_utime; - w->fix_stime -= p->last_stime; + w->fix_utime -= p->last_utime; + w->fix_stime -= p->last_stime; w->fix_minflt -= p->last_minflt; w->fix_majflt -= p->last_majflt; - - w->fix_io_logical_bytes_read -= p->last_io_logical_bytes_read; + w->fix_io_logical_bytes_read -= p->last_io_logical_bytes_read; w->fix_io_logical_bytes_written -= p->last_io_logical_bytes_written; - w->fix_io_read_calls -= p->last_io_read_calls; - w->fix_io_write_calls -= p->last_io_write_calls; - w->fix_io_storage_bytes_read -= p->last_io_storage_bytes_read; + w->fix_io_read_calls -= p->last_io_read_calls; + w->fix_io_write_calls -= p->last_io_write_calls; + w->fix_io_storage_bytes_read -= p->last_io_storage_bytes_read; w->fix_io_storage_bytes_written -= p->last_io_storage_bytes_written; w->fix_io_cancelled_write_bytes -= p->last_io_cancelled_write_bytes; // --- - o->fix_cutime += p->last_cutime; - o->fix_cstime += p->last_cstime; - o->fix_cminflt += p->last_cminflt; - o->fix_cmajflt += p->last_cmajflt; + o->fix_cutime += cutime; + o->fix_cstime += cstime; + o->fix_cminflt += cminflt; + o->fix_cmajflt += cmajflt; - o->fix_utime += p->last_utime; - o->fix_stime += p->last_stime; + o->fix_utime += p->last_utime; + o->fix_stime += p->last_stime; o->fix_minflt += p->last_minflt; o->fix_majflt += p->last_majflt; - o->fix_io_logical_bytes_read += p->last_io_logical_bytes_read; + o->fix_io_logical_bytes_read += p->last_io_logical_bytes_read; o->fix_io_logical_bytes_written += p->last_io_logical_bytes_written; - o->fix_io_read_calls += p->last_io_read_calls; - o->fix_io_write_calls += p->last_io_write_calls; - o->fix_io_storage_bytes_read += p->last_io_storage_bytes_read; + o->fix_io_read_calls += p->last_io_read_calls; + o->fix_io_write_calls += p->last_io_write_calls; + o->fix_io_storage_bytes_read += p->last_io_storage_bytes_read; o->fix_io_storage_bytes_written += p->last_io_storage_bytes_written; o->fix_io_cancelled_write_bytes += p->last_io_cancelled_write_bytes; } @@ -1876,28 +1730,33 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target // since the process has exited, the user // will see a drop in our charts, because the incremental - // values of this process will not be there + // values of this process will not be there from now on // add them to the fix_* values and they will be added to // the reported values, so that the report goes steady - w->fix_minflt += p->minflt; - w->fix_majflt += p->majflt; - w->fix_utime += p->utime; - w->fix_stime += p->stime; - w->fix_cminflt += p->cminflt; - w->fix_cmajflt += p->cmajflt; - w->fix_cutime += p->cutime; - w->fix_cstime += p->cstime; - - w->fix_io_logical_bytes_read += p->io_logical_bytes_read; - w->fix_io_logical_bytes_written += p->io_logical_bytes_written; - w->fix_io_read_calls += p->io_read_calls; - w->fix_io_write_calls += p->io_write_calls; - w->fix_io_storage_bytes_read += p->io_storage_bytes_read; - w->fix_io_storage_bytes_written += p->io_storage_bytes_written; - w->fix_io_cancelled_write_bytes += p->io_cancelled_write_bytes; + + w->fix_minflt += p->last_minflt; + w->fix_majflt += p->last_majflt; + w->fix_utime += p->last_utime; + w->fix_stime += p->last_stime; + + w->fix_cminflt += (p->last_cminflt - p->last_fix_cminflt); + w->fix_cmajflt += (p->last_cmajflt - p->last_fix_cmajflt); + w->fix_cutime += (p->last_cutime - p->last_fix_cutime); + w->fix_cstime += (p->last_cstime - p->last_fix_cstime); + + w->fix_io_logical_bytes_read += p->last_io_logical_bytes_read; + w->fix_io_logical_bytes_written += p->last_io_logical_bytes_written; + w->fix_io_read_calls += p->last_io_read_calls; + w->fix_io_write_calls += p->last_io_write_calls; + w->fix_io_storage_bytes_read += p->last_io_storage_bytes_read; + w->fix_io_storage_bytes_written += p->last_io_storage_bytes_written; + w->fix_io_cancelled_write_bytes += p->last_io_cancelled_write_bytes; } + //if((long long)w->cutime + w->fix_cutime < 0) + // error("Negative total cutime (%llu - %lld) on target %s after adding process %d (%s, %s) with utime=%llu, stime=%llu, minflt=%llu, majflt=%llu, cutime=%llu, cstime=%llu, cminflt=%llu, cmajflt=%llu, fix_cutime=%llu, fix_cstime=%llu, fix_cminflt=%llu, fix_cmajflt=%llu\n", + // w->cutime, w->fix_cutime, w->name, p->pid, p->comm, p->updated?"running":"exited", p->utime, p->stime, p->minflt, p->majflt, p->cutime, p->cstime, p->cminflt, p->cmajflt, p->fix_cutime, p->fix_cstime, p->fix_cminflt, p->fix_cmajflt); } void count_targets_fds(struct target *root) { @@ -1967,19 +1826,10 @@ void calculate_netdata_statistics(void) link_all_processes_to_their_parents(); apply_apps_groups_targets_inheritance(); -#ifdef AGGREGATE_CHILDREN_TO_PARENTS - aggregate_children_to_parents(); -#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ - zero_all_targets(users_root_target); zero_all_targets(groups_root_target); apps_groups_targets = zero_all_targets(apps_groups_root_target); -#ifdef AGGREGATE_CHILDREN_TO_PARENTS - if(debug) - debug_childrens_aggregations(0, 1); -#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ - // this has to be done, before the cleanup struct pid_stat *p = NULL; struct target *w = NULL, *o = NULL; @@ -2099,7 +1949,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->stime + w->fix_utime + w->fix_stime); + fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->stime + w->fix_utime + w->fix_stime + (include_exited_childs?(w->cutime + w->cstime + w->fix_cutime + w->fix_cstime):0)); } fprintf(stdout, "END\n"); @@ -2107,7 +1957,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->fix_utime); + fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->fix_utime + (include_exited_childs?(w->cutime + w->fix_cutime):0)); } fprintf(stdout, "END\n"); @@ -2115,7 +1965,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->stime + w->fix_stime); + fprintf(stdout, "SET %s = %llu\n", w->name, w->stime + w->fix_stime + (include_exited_childs?(w->cstime + w->fix_cstime):0)); } fprintf(stdout, "END\n"); @@ -2147,7 +1997,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->minflt + w->fix_minflt); + fprintf(stdout, "SET %s = %llu\n", w->name, w->minflt + w->fix_minflt + (include_exited_childs?(w->cminflt + w->fix_cminflt):0)); } fprintf(stdout, "END\n"); @@ -2155,7 +2005,7 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->majflt + w->fix_majflt); + fprintf(stdout, "SET %s = %llu\n", w->name, w->majflt + w->fix_majflt + (include_exited_childs?(w->cmajflt + w->fix_cmajflt):0)); } fprintf(stdout, "END\n"); @@ -2239,7 +2089,7 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const // we have something new to show // update the charts - fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); + fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -2267,18 +2117,18 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name); } - fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); + fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "DIMENSION %s '' incremental 100 %ld noreset\n", w->name, hz * processors); + fprintf(stdout, "DIMENSION %s '' incremental 100 %u noreset\n", w->name, hz); } - fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); + fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "DIMENSION %s '' incremental 100 %ld noreset\n", w->name, hz * processors); + fprintf(stdout, "DIMENSION %s '' incremental 100 %u noreset\n", w->name, hz); } fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20010 %d\n", type, title, type, update_every); @@ -2369,6 +2219,16 @@ void parse_args(int argc, char **argv) continue; } + if(strcmp("no-childs", argv[i]) == 0) { + include_exited_childs = 0; + continue; + } + + if(strcmp("with-childs", argv[i]) == 0) { + include_exited_childs = 1; + continue; + } + if(!name) { name = argv[i]; continue; @@ -2424,8 +2284,6 @@ int main(int argc, char **argv) } #endif /* NETDATA_INTERNAL_CHECKS */ - info("starting..."); - procfile_adaptive_initial_allocation = 1; time_t started_t = time(NULL); @@ -2444,13 +2302,13 @@ int main(int argc, char **argv) } fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %1$d\n" - "DIMENSION user '' incremental 1 1000\n" - "DIMENSION system '' incremental 1 1000\n" - "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n" - "DIMENSION files '' incremental 1 1\n" - "DIMENSION pids '' absolute 1 1\n" - "DIMENSION fds '' absolute 1 1\n" - "DIMENSION targets '' absolute 1 1\n", update_every); + "DIMENSION user '' incremental 1 1000\n" + "DIMENSION system '' incremental 1 1000\n" + "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %1$d\n" + "DIMENSION files '' incremental 1 1\n" + "DIMENSION pids '' absolute 1 1\n" + "DIMENSION fds '' absolute 1 1\n" + "DIMENSION targets '' absolute 1 1\n", update_every); #ifndef PROFILING_MODE unsigned long long sunext = (time(NULL) - (time(NULL) % update_every) + update_every) * 1000000ULL; @@ -2462,7 +2320,7 @@ int main(int argc, char **argv) #ifndef PROFILING_MODE // delay until it is our time to run while((sunow = timems()) < sunext) - usleep((useconds_t)(sunext - sunow)); + usecsleep(sunext - sunow); // find the next time we need to run while(timems() > sunext) @@ -2488,7 +2346,8 @@ int main(int argc, char **argv) send_collected_data_to_netdata(users_root_target, "users", dt); send_collected_data_to_netdata(groups_root_target, "groups", dt); - if(debug) fprintf(stderr, "apps.plugin: done Loop No %llu\n", counter); + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: done Loop No %llu\n", counter); current_t = time(NULL); diff --git a/src/common.c b/src/common.c index 699f58c6..02d37ab6 100644 --- a/src/common.c +++ b/src/common.c @@ -10,7 +10,7 @@ #include #include #include - +#include #include "log.h" #include "common.h" @@ -27,6 +27,51 @@ unsigned long long timems(void) { return now.tv_sec * 1000000ULL + now.tv_usec; } +int usecsleep(unsigned long long usec) { + +#ifdef NETDATA_WITH_NANOSLEEP + // we expect microseconds (1.000.000 per second) + // but timespec is nanoseconds (1.000.000.000 per second) + struct timespec req = { .tv_sec = usec / 1000000, .tv_nsec = (usec % 1000000) * 1000 }, rem; + + while(nanosleep(&req, &rem) == -1) { + error("nanosleep() failed for %llu microseconds.", usec); + + if(likely(errno == EINTR)) { + req.tv_sec = rem.tv_sec; + req.tv_nsec = rem.tv_nsec; + } + else { + error("Cannot nanosleep() for %llu microseconds.", usec); + break; + } + } + + return 0; +#else + int ret = usleep(usec); + if(unlikely(ret == -1 && errno == EINVAL)) { + // on certain systems, usec has to be up to 999999 + if(usec > 999999) { + int counter = usec / 999999; + while(counter--) + usleep(999999); + + usleep(usec % 999999); + } + else { + error("Cannot usleep() for %llu microseconds.", usec); + return ret; + } + } + + if(ret != 0) + error("usleep() failed for %llu microseconds.", usec); + + return ret; +#endif +} + unsigned char netdata_map_chart_names[256] = { [0] = '\0', // [1] = '_', // diff --git a/src/common.h b/src/common.h index 1502379e..f6736df6 100644 --- a/src/common.h +++ b/src/common.h @@ -46,6 +46,7 @@ extern void get_HZ(void); extern pid_t gettid(void); extern unsigned long long timems(void); +extern int usecsleep(unsigned long long usec); extern char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len); diff --git a/src/plugin_proc.c b/src/plugin_proc.c index 0e2d9c12..e7b8d50c 100644 --- a/src/plugin_proc.c +++ b/src/plugin_proc.c @@ -90,7 +90,7 @@ void *proc_main(void *ptr) // delay until it is our time to run while((sunow = timems()) < sunext) - usleep((useconds_t)(sunext - sunow)); + usecsleep(sunext - sunow); // find the next time we need to run while(timems() > sunext) diff --git a/src/proc_stat.c b/src/proc_stat.c index 9272dd3c..f9629794 100644 --- a/src/proc_stat.c +++ b/src/proc_stat.c @@ -12,31 +12,37 @@ #include "rrd.h" #include "plugin_proc.h" -#define RRD_TYPE_STAT "cpu" -#define RRD_TYPE_STAT_LEN strlen(RRD_TYPE_STAT) - int do_proc_stat(int update_every, unsigned long long dt) { + (void)dt; + static procfile *ff = NULL; static int do_cpu = -1, do_cpu_cores = -1, do_interrupts = -1, do_context = -1, do_forks = -1, do_processes = -1; + static uint32_t hash_intr, hash_ctxt, hash_processes, hash_procs_running, hash_procs_blocked; + + if(unlikely(do_cpu == -1)) { + do_cpu = config_get_boolean("plugin:proc:/proc/stat", "cpu utilization", 1); + do_cpu_cores = config_get_boolean("plugin:proc:/proc/stat", "per cpu core utilization", 1); + do_interrupts = config_get_boolean("plugin:proc:/proc/stat", "cpu interrupts", 1); + do_context = config_get_boolean("plugin:proc:/proc/stat", "context switches", 1); + do_forks = config_get_boolean("plugin:proc:/proc/stat", "processes started", 1); + do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", 1); + + hash_intr = simple_hash("intr"); + hash_ctxt = simple_hash("ctxt"); + hash_processes = simple_hash("processes"); + hash_procs_running = simple_hash("procs_running"); + hash_procs_blocked = simple_hash("procs_blocked"); + } - if(do_cpu == -1) do_cpu = config_get_boolean("plugin:proc:/proc/stat", "cpu utilization", 1); - if(do_cpu_cores == -1) do_cpu_cores = config_get_boolean("plugin:proc:/proc/stat", "per cpu core utilization", 1); - if(do_interrupts == -1) do_interrupts = config_get_boolean("plugin:proc:/proc/stat", "cpu interrupts", 1); - if(do_context == -1) do_context = config_get_boolean("plugin:proc:/proc/stat", "context switches", 1); - if(do_forks == -1) do_forks = config_get_boolean("plugin:proc:/proc/stat", "processes started", 1); - if(do_processes == -1) do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", 1); - - if(dt) {}; - - if(!ff) { + if(unlikely(!ff)) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/stat"); ff = procfile_open(config_get("plugin:proc:/proc/stat", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; } - if(!ff) return 1; ff = procfile_readall(ff); - if(!ff) return 0; // we return 0, so that we will retry to open it next time + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time uint32_t lines = procfile_lines(ff), l; uint32_t words; @@ -45,9 +51,13 @@ int do_proc_stat(int update_every, unsigned long long dt) { RRDSET *st; for(l = 0; l < lines ;l++) { - if(strncmp(procfile_lineword(ff, l, 0), "cpu", 3) == 0) { + char *row_key = procfile_lineword(ff, l, 0); + uint32_t hash = simple_hash(row_key); + + // faster strncmp(row_key, "cpu", 3) == 0 + if(likely(row_key[0] == 'c' && row_key[1] == 'p' && row_key[2] == 'u')) { words = procfile_linewords(ff, l); - if(words < 9) { + if(unlikely(words < 9)) { error("Cannot read /proc/stat cpu line. Expected 9 params, read %u.", words); continue; } @@ -55,7 +65,7 @@ int do_proc_stat(int update_every, unsigned long long dt) { char *id; unsigned long long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0; - id = procfile_lineword(ff, l, 0); + id = row_key; user = strtoull(procfile_lineword(ff, l, 1), NULL, 10); nice = strtoull(procfile_lineword(ff, l, 2), NULL, 10); system = strtoull(procfile_lineword(ff, l, 3), NULL, 10); @@ -67,25 +77,30 @@ int do_proc_stat(int update_every, unsigned long long dt) { if(words >= 10) guest = strtoull(procfile_lineword(ff, l, 9), NULL, 10); if(words >= 11) guest_nice = strtoull(procfile_lineword(ff, l, 10), NULL, 10); - char *title = "Core utilization"; - char *type = RRD_TYPE_STAT; - char *context = "cpu.cpu"; - char *family = "utilization"; - long priority = 1000; - int isthistotal = 0; + char *title, *type, *context, *family; + long priority; + int isthistotal; - if(strcmp(id, "cpu") == 0) { - isthistotal = 1; - type = "system"; + if(unlikely(strcmp(id, "cpu")) == 0) { title = "Total CPU utilization"; + type = "system"; context = "system.cpu"; family = id; priority = 100; + isthistotal = 1; + } + else { + title = "Core utilization"; + type = "cpu"; + context = "cpu.cpu"; + family = "utilization"; + priority = 1000; + isthistotal = 0; } - if((isthistotal && do_cpu) || (!isthistotal && do_cpu_cores)) { + if(likely((isthistotal && do_cpu) || (!isthistotal && do_cpu_cores))) { st = rrdset_find_bytype(type, id); - if(!st) { + if(unlikely(!st)) { st = rrdset_create(type, id, NULL, family, context, title, "percentage", priority, update_every, RRDSET_TYPE_STACKED); long multiplier = 1; @@ -119,14 +134,14 @@ int do_proc_stat(int update_every, unsigned long long dt) { rrdset_done(st); } } - else if(strcmp(procfile_lineword(ff, l, 0), "intr") == 0) { + else if(hash == hash_intr && strcmp(row_key, "intr") == 0) { unsigned long long value = strtoull(procfile_lineword(ff, l, 1), NULL, 10); // -------------------------------------------------------------------- - if(do_interrupts) { + if(likely(do_interrupts)) { st = rrdset_find_bytype("system", "intr"); - if(!st) { + if(unlikely(!st)) { st = rrdset_create("system", "intr", NULL, "interrupts", NULL, "CPU Interrupts", "interrupts/s", 900, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; @@ -138,14 +153,14 @@ int do_proc_stat(int update_every, unsigned long long dt) { rrdset_done(st); } } - else if(strcmp(procfile_lineword(ff, l, 0), "ctxt") == 0) { + else if(hash == hash_ctxt && strcmp(row_key, "ctxt") == 0) { unsigned long long value = strtoull(procfile_lineword(ff, l, 1), NULL, 10); // -------------------------------------------------------------------- - if(do_context) { + if(likely(do_context)) { st = rrdset_find_bytype("system", "ctxt"); - if(!st) { + if(unlikely(!st)) { st = rrdset_create("system", "ctxt", NULL, "processes", NULL, "CPU Context Switches", "context switches/s", 800, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "switches", NULL, 1, 1, RRDDIM_INCREMENTAL); @@ -156,22 +171,22 @@ int do_proc_stat(int update_every, unsigned long long dt) { rrdset_done(st); } } - else if(!processes && strcmp(procfile_lineword(ff, l, 0), "processes") == 0) { + else if(hash == hash_processes && !processes && strcmp(row_key, "processes") == 0) { processes = strtoull(procfile_lineword(ff, l, 1), NULL, 10); } - else if(!running && strcmp(procfile_lineword(ff, l, 0), "procs_running") == 0) { + else if(hash == hash_procs_running && !running && strcmp(row_key, "procs_running") == 0) { running = strtoull(procfile_lineword(ff, l, 1), NULL, 10); } - else if(!blocked && strcmp(procfile_lineword(ff, l, 0), "procs_blocked") == 0) { + else if(hash == hash_procs_blocked && !blocked && strcmp(row_key, "procs_blocked") == 0) { blocked = strtoull(procfile_lineword(ff, l, 1), NULL, 10); } } // -------------------------------------------------------------------- - if(do_forks) { + if(likely(do_forks)) { st = rrdset_find_bytype("system", "forks"); - if(!st) { + if(unlikely(!st)) { st = rrdset_create("system", "forks", NULL, "processes", NULL, "Started Processes", "processes/s", 700, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; @@ -185,9 +200,9 @@ int do_proc_stat(int update_every, unsigned long long dt) { // -------------------------------------------------------------------- - if(do_processes) { + if(likely(do_processes)) { st = rrdset_find_bytype("system", "processes"); - if(!st) { + if(unlikely(!st)) { st = rrdset_create("system", "processes", NULL, "processes", NULL, "System Processes", "processes", 600, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "running", NULL, 1, 1, RRDDIM_ABSOLUTE); diff --git a/src/registry.c b/src/registry.c index 203b846e..26004d4d 100644 --- a/src/registry.c +++ b/src/registry.c @@ -1642,6 +1642,9 @@ int registry_init(void) { // registry enabled? registry.enabled = config_get_boolean("registry", "enabled", 0); + if(mkdir(VARLIB_DIR, 0755) == -1 && errno != EEXIST) + error("Cannot create directory '" VARLIB_DIR "'"); + // pathnames registry.pathname = config_get("registry", "registry db directory", VARLIB_DIR "/registry"); if(mkdir(registry.pathname, 0755) == -1 && errno != EEXIST) { diff --git a/src/rrd.c b/src/rrd.c index c0f4f363..dd7698ab 100644 --- a/src/rrd.c +++ b/src/rrd.c @@ -277,7 +277,12 @@ char *rrdset_cache_dir(const char *id) char *ret = NULL; static char *cache_dir = NULL; - if(!cache_dir) cache_dir = config_get("global", "cache directory", CACHE_DIR); + if(!cache_dir) { + cache_dir = config_get("global", "cache directory", CACHE_DIR); + int r = mkdir(cache_dir, 0755); + if(r != 0 && errno != EEXIST) + error("Cannot create directory '%s'", cache_dir); + } char b[FILENAME_MAX + 1]; char n[FILENAME_MAX + 1]; @@ -855,7 +860,7 @@ unsigned long long rrdset_done(RRDSET *st) // calculate the proper last_collected_time, using usec_since_last_update unsigned long long ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec + st->usec_since_last_update; st->last_collected_time.tv_sec = (time_t) (ut / 1000000ULL); - st->last_collected_time.tv_usec = (useconds_t) (ut % 1000000ULL); + st->last_collected_time.tv_usec = (suseconds_t) (ut % 1000000ULL); } // if this set has not been updated in the past @@ -865,7 +870,7 @@ unsigned long long rrdset_done(RRDSET *st) // set a fake last_updated, in the past using usec_since_last_update unsigned long long ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec - st->usec_since_last_update; st->last_updated.tv_sec = (time_t) (ut / 1000000ULL); - st->last_updated.tv_usec = (useconds_t) (ut % 1000000ULL); + st->last_updated.tv_usec = (suseconds_t) (ut % 1000000ULL); // the first entry should not be stored store_this_entry = 0; @@ -885,7 +890,7 @@ unsigned long long rrdset_done(RRDSET *st) unsigned long long ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec - st->usec_since_last_update; st->last_updated.tv_sec = (time_t) (ut / 1000000ULL); - st->last_updated.tv_usec = (useconds_t) (ut % 1000000ULL); + st->last_updated.tv_usec = (suseconds_t) (ut % 1000000ULL); // the first entry should not be stored store_this_entry = 0; @@ -920,7 +925,7 @@ unsigned long long rrdset_done(RRDSET *st) int dimensions; st->collected_total = 0; for( rd = st->dimensions, dimensions = 0 ; likely(rd) ; rd = rd->next, dimensions++ ) - st->collected_total += rd->collected_value; + if(likely(rd->updated)) st->collected_total += rd->collected_value; uint32_t storage_flags = SN_EXISTS; @@ -929,6 +934,11 @@ unsigned long long rrdset_done(RRDSET *st) // at this stage we do not interpolate anything for( rd = st->dimensions ; likely(rd) ; rd = rd->next ) { + if(unlikely(!rd->updated)) { + rd->calculated_value = 0; + continue; + } + if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: START " " last_collected_value = " COLLECTED_NUMBER_FORMAT " collected_value = " COLLECTED_NUMBER_FORMAT @@ -942,34 +952,35 @@ unsigned long long rrdset_done(RRDSET *st) ); switch(rd->algorithm) { - case RRDDIM_ABSOLUTE: - rd->calculated_value = (calculated_number)rd->collected_value - * (calculated_number)rd->multiplier - / (calculated_number)rd->divisor; - - if(unlikely(st->debug)) - debug(D_RRD_STATS, "%s/%s: CALC ABS/ABS-NO-IN " - CALCULATED_NUMBER_FORMAT " = " - COLLECTED_NUMBER_FORMAT - " * " CALCULATED_NUMBER_FORMAT - " / " CALCULATED_NUMBER_FORMAT - , st->id, rd->name - , rd->calculated_value - , rd->collected_value - , (calculated_number)rd->multiplier - , (calculated_number)rd->divisor - ); - break; + case RRDDIM_ABSOLUTE: + rd->calculated_value = (calculated_number)rd->collected_value + * (calculated_number)rd->multiplier + / (calculated_number)rd->divisor; + + if(unlikely(st->debug)) + debug(D_RRD_STATS, "%s/%s: CALC ABS/ABS-NO-IN " + CALCULATED_NUMBER_FORMAT " = " + COLLECTED_NUMBER_FORMAT + " * " CALCULATED_NUMBER_FORMAT + " / " CALCULATED_NUMBER_FORMAT + , st->id, rd->name + , rd->calculated_value + , rd->collected_value + , (calculated_number)rd->multiplier + , (calculated_number)rd->divisor + ); + break; case RRDDIM_PCENT_OVER_ROW_TOTAL: - if(unlikely(!st->collected_total)) rd->calculated_value = 0; + if(unlikely(!st->collected_total)) + rd->calculated_value = 0; else - // the percentage of the current value - // over the total of all dimensions - rd->calculated_value = - (calculated_number)100 - * (calculated_number)rd->collected_value - / (calculated_number)st->collected_total; + // the percentage of the current value + // over the total of all dimensions + rd->calculated_value = + (calculated_number)100 + * (calculated_number)rd->collected_value + / (calculated_number)st->collected_total; if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: CALC PCENT-ROW " @@ -984,7 +995,7 @@ unsigned long long rrdset_done(RRDSET *st) break; case RRDDIM_INCREMENTAL: - if(unlikely(!rd->updated || rd->counter <= 1)) { + if(unlikely(rd->counter <= 1)) { rd->calculated_value = 0; continue; } @@ -1000,9 +1011,10 @@ unsigned long long rrdset_done(RRDSET *st) rd->last_collected_value = rd->collected_value; } - rd->calculated_value = (calculated_number)(rd->collected_value - rd->last_collected_value) - * (calculated_number)rd->multiplier - / (calculated_number)rd->divisor; + rd->calculated_value = + (calculated_number)(rd->collected_value - rd->last_collected_value) + * (calculated_number)rd->multiplier + / (calculated_number)rd->divisor; if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: CALC INC PRE " @@ -1020,18 +1032,20 @@ unsigned long long rrdset_done(RRDSET *st) break; case RRDDIM_PCENT_OVER_DIFF_TOTAL: - if(unlikely(!rd->updated || rd->counter <= 1)) { + if(unlikely(rd->counter <= 1)) { rd->calculated_value = 0; continue; } // the percentage of the current increment // over the increment of all dimensions together - if(unlikely(st->collected_total == st->last_collected_total)) rd->calculated_value = rd->last_calculated_value; - else rd->calculated_value = - (calculated_number)100 - * (calculated_number)(rd->collected_value - rd->last_collected_value) - / (calculated_number)(st->collected_total - st->last_collected_total); + if(unlikely(st->collected_total == st->last_collected_total)) + rd->calculated_value = 0; + else + rd->calculated_value = + (calculated_number)100 + * (calculated_number)(rd->collected_value - rd->last_collected_value) + / (calculated_number)(st->collected_total - st->last_collected_total); if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: CALC PCENT-DIFF " @@ -1233,6 +1247,7 @@ unsigned long long rrdset_done(RRDSET *st) if(likely(stored_entries || !store_this_entry)) { st->last_updated.tv_sec = st->last_collected_time.tv_sec; st->last_updated.tv_usec = st->last_collected_time.tv_usec; + st->last_collected_total = st->collected_total; } for( rd = st->dimensions; likely(rd) ; rd = rd->next ) { @@ -1262,7 +1277,6 @@ unsigned long long rrdset_done(RRDSET *st) , rd->calculated_value ); } - st->last_collected_total = st->collected_total; // ALL DONE ABOUT THE DATA UPDATE // -------------------------------------------------------------------- diff --git a/src/sys_fs_cgroup.c b/src/sys_fs_cgroup.c index 9ce6e33d..5f139c89 100644 --- a/src/sys_fs_cgroup.c +++ b/src/sys_fs_cgroup.c @@ -668,6 +668,7 @@ struct cgroup *cgroup_add(const char *id) { !strcmp(chart_id, "systemd") || !strcmp(chart_id, "system.slice") || !strcmp(chart_id, "machine.slice") || + !strcmp(chart_id, "init.scope") || !strcmp(chart_id, "user") || !strcmp(chart_id, "system") || !strcmp(chart_id, "machine") || @@ -1274,7 +1275,7 @@ void *cgroups_main(void *ptr) // delay until it is our time to run while((sunow = timems()) < sunext) - usleep((useconds_t)(sunext - sunow)); + usecsleep(sunext - sunow); // find the next time we need to run while(timems() > sunext) diff --git a/src/unit_test.c b/src/unit_test.c index 22fb16d6..dbf9190b 100644 --- a/src/unit_test.c +++ b/src/unit_test.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "common.h" #include "storage_number.h" @@ -246,7 +247,7 @@ int unit_test_storage() struct feed_values { unsigned long long microseconds; - calculated_number value; + collected_number value; }; struct test { @@ -262,6 +263,9 @@ struct test { unsigned long result_entries; struct feed_values *feed; calculated_number *results; + + collected_number *feed2; + calculated_number *results2; }; // -------------------------------------------------------------------------------------------------------------------- @@ -295,7 +299,9 @@ struct test test1 = { 10, // feed entries 9, // result entries test1_feed, // feed - test1_results // results + test1_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -329,7 +335,9 @@ struct test test2 = { 10, // feed entries 9, // result entries test2_feed, // feed - test2_results // results + test2_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -362,7 +370,9 @@ struct test test3 = { 10, // feed entries 9, // result entries test3_feed, // feed - test3_results // results + test3_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -395,7 +405,9 @@ struct test test4 = { 10, // feed entries 9, // result entries test4_feed, // feed - test4_results // results + test4_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -428,7 +440,9 @@ struct test test5 = { 10, // feed entries 9, // result entries test5_feed, // feed - test5_results // results + test5_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -467,7 +481,9 @@ struct test test6 = { 16, // feed entries 4, // result entries test6_feed, // feed - test6_results // results + test6_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -500,7 +516,9 @@ struct test test7 = { 10, // feed entries 18, // result entries test7_feed, // feed - test7_results // results + test7_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -529,7 +547,9 @@ struct test test8 = { 6, // feed entries 10, // result entries test8_feed, // feed - test8_results // results + test8_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -568,7 +588,9 @@ struct test test9 = { 16, // feed entries 4, // result entries test9_feed, // feed - test9_results // results + test9_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -600,8 +622,131 @@ struct test test10 = { RRDDIM_INCREMENTAL, // algorithm 10, // feed entries 7, // result entries - test10_feed, // feed - test10_results // results + test10_feed, // feed + test10_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test11 + +struct feed_values test11_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test11_feed2[] = { + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +calculated_number test11_results[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +calculated_number test11_results2[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +struct test test11 = { + "test11", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRDDIM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test11_feed, // feed + test11_results, // results + test11_feed2, // feed2 + test11_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test12 + +struct feed_values test12_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test12_feed2[] = { + 10*3, 20*3, 30*3, 40*3, 50*3, 60*3, 70*3, 80*3, 90*3, 100*3 +}; + +calculated_number test12_results[] = { + 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; + +calculated_number test12_results2[] = { + 75, 75, 75, 75, 75, 75, 75, 75, 75 +}; + +struct test test12 = { + "test12", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRDDIM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test12_feed, // feed + test12_results, // results + test12_feed2, // feed2 + test12_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test13 + +struct feed_values test13_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +calculated_number test13_results[] = { + 83.3333300, 100, 100, 100, 100, 100, 100 +}; + +struct test test13 = { + "test13", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRDDIM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 7, // result entries + test13_feed, // feed + test13_results, // results + NULL, // feed2 + NULL // results2 }; // -------------------------------------------------------------------------------------------------------------------- @@ -618,7 +763,12 @@ int run_test(struct test *test) // create the chart RRDSET *st = rrdset_create("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", 1, 1, RRDSET_TYPE_LINE); - RRDDIM *rd = rrddim_add(st, "dimension", NULL, test->multiplier, test->divisor, test->algorithm); + RRDDIM *rd = rrddim_add(st, "dim1", NULL, test->multiplier, test->divisor, test->algorithm); + + RRDDIM *rd2 = NULL; + if(test->feed2) + rd2 = rrddim_add(st, "dim2", NULL, test->multiplier, test->divisor, test->algorithm); + st->debug = 1; // feed it with the test data @@ -627,14 +777,21 @@ int run_test(struct test *test) if(debug_flags) fprintf(stderr, "\n\n"); if(c) { - fprintf(stderr, " > %s: feeding position %lu, after %llu microseconds, with value " CALCULATED_NUMBER_FORMAT "\n", test->name, c+1, test->feed[c].microseconds, test->feed[c].value); + fprintf(stderr, " > %s: feeding position %lu, after %llu microseconds\n", test->name, c+1, test->feed[c].microseconds); rrdset_next_usec(st, test->feed[c].microseconds); } else { - fprintf(stderr, " > %s: feeding position %lu with value " CALCULATED_NUMBER_FORMAT "\n", test->name, c+1, test->feed[c].value); + fprintf(stderr, " > %s: feeding position %lu\n", test->name, c+1); + } + + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rd->name, test->feed[c].value); + rrddim_set(st, "dim1", test->feed[c].value); + + if(rd2) { + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rd2->name, test->feed2[c]); + rrddim_set(st, "dim2", test->feed2[c]); } - rrddim_set(st, "dimension", test->feed[c].value); rrdset_done(st); // align the first entry to second boundary @@ -654,9 +811,19 @@ int run_test(struct test *test) unsigned long max = (st->counter < test->result_entries)?st->counter:test->result_entries; for(c = 0 ; c < max ; c++) { - calculated_number v = unpack_storage_number(rd->values[c]), n = test->results[c]; - fprintf(stderr, " %s: checking position %lu, expecting value " CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", %s\n", test->name, c+1, n, v, (v == n)?"OK":"### E R R O R ###"); - if(v != n) errors++; + calculated_number v = unpack_storage_number(rd->values[c]); + calculated_number n = test->results[c]; + int same = (roundl(v * 10000000.0) == roundl(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu, expecting value " CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", %s\n", test->name, rd->name, c+1, n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; + + if(rd2) { + v = unpack_storage_number(rd2->values[c]); + n = test->results2[c]; + same = (roundl(v * 10000000.0) == roundl(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu, expecting value " CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", %s\n", test->name, rd2->name, c+1, n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; + } } return errors; @@ -694,6 +861,15 @@ int run_all_mockup_tests(void) if(run_test(&test10)) return 1; + if(run_test(&test11)) + return 1; + + if(run_test(&test12)) + return 1; + + if(run_test(&test13)) + return 1; + return 0; } diff --git a/system/netdata-openrc.in b/system/netdata-openrc.in index 3d5f2cdc..55808364 100644 --- a/system/netdata-openrc.in +++ b/system/netdata-openrc.in @@ -1,4 +1,4 @@ -#!/sbin/runscript +#!/sbin/openrc-run # The user netdata is configured to run as. # If you edit its configuration file to set a different diff --git a/web/dashboard.html b/web/dashboard.html index 49bdc737..a64eb901 100644 --- a/web/dashboard.html +++ b/web/dashboard.html @@ -2,6 +2,7 @@ NetData Dashboard + diff --git a/web/demo.html b/web/demo.html index 051c9421..fc1f9254 100644 --- a/web/demo.html +++ b/web/demo.html @@ -2,6 +2,7 @@ NetData Dashboard + diff --git a/web/demo2.html b/web/demo2.html index ae1c1b3e..f184321c 100644 --- a/web/demo2.html +++ b/web/demo2.html @@ -2,6 +2,7 @@ NetData Dashboard + diff --git a/web/demosites.html b/web/demosites.html index 2be0ec37..252312c7 100644 --- a/web/demosites.html +++ b/web/demosites.html @@ -2,6 +2,7 @@ NetData - Real-time performance monitoring, done right! + @@ -50,20 +51,20 @@ and that you have chown it to be owned by netdata:netdata --> - +