#!/usr/bin/env bash # netdata # real-time performance and health monitoring, done right! # (C) 2016 Costa Tsaousis # GPL v3+ # # charts.d.plugin allows easy development of BASH plugins # # if you need to run parallel charts.d processes, link this file to a different name # in the same directory, with a .plugin suffix and netdata will start both of them, # each will have a different config file and modules configuration directory. # export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" PROGRAM_FILE="$0" PROGRAM_NAME="$(basename $0)" PROGRAM_NAME="${PROGRAM_NAME/.plugin}" MODULE_NAME="main" # ----------------------------------------------------------------------------- # create temp dir debug=0 TMP_DIR= chartsd_cleanup() { if [ ! -z "$TMP_DIR" -a -d "$TMP_DIR" ] then [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: cleaning up temporary directory $TMP_DIR ..." rm -rf "$TMP_DIR" fi exit 0 } trap chartsd_cleanup EXIT trap chartsd_cleanup SIGHUP trap chartsd_cleanup INT if [ $UID = "0" ] then TMP_DIR="$( mktemp -d /var/run/netdata-${PROGRAM_NAME}-XXXXXXXXXX )" else TMP_DIR="$( mktemp -d /tmp/.netdata-${PROGRAM_NAME}-XXXXXXXXXX )" fi logdate() { date "+%Y-%m-%d %H:%M:%S" } log() { local status="${1}" shift echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: ${*}" } warning() { log WARNING "${@}" } error() { log ERROR "${@}" } info() { log INFO "${@}" } fatal() { log FATAL "${@}" echo "DISABLE" exit 1 } debug() { [ $debug -eq 1 ] && log DEBUG "${@}" } # ----------------------------------------------------------------------------- # check a few commands require_cmd() { local x=$(which "${1}" 2>/dev/null || command -v "${1}" 2>/dev/null) if [ -z "${x}" -o ! -x "${x}" ] then warning "command '${1}' is not found in ${PATH}." eval "${1^^}_CMD=\"\"" return 1 fi eval "${1^^}_CMD=\"${x}\"" return 0 } require_cmd date || exit 1 require_cmd sed || exit 1 require_cmd basename || exit 1 require_cmd dirname || exit 1 require_cmd cat || exit 1 require_cmd grep || exit 1 require_cmd egrep || exit 1 require_cmd mktemp || exit 1 require_cmd awk || exit 1 require_cmd timeout || exit 1 require_cmd curl || exit 1 # ----------------------------------------------------------------------------- [ $(( ${BASH_VERSINFO[0]} )) -lt 4 ] && fatal "BASH version 4 or later is required, but found version: ${BASH_VERSION}. Please upgrade." info "started from '$PROGRAM_FILE' with options: $*" # ----------------------------------------------------------------------------- # internal defaults # netdata exposes a few environment variables for us pluginsd="${NETDATA_PLUGINS_DIR}" [ -z "$pluginsd" ] && pluginsd="$( dirname $PROGRAM_FILE )" confd="${NETDATA_CONFIG_DIR-/etc/netdata}" chartsd="$pluginsd/../charts.d" myconfig="$confd/$PROGRAM_NAME.conf" minimum_update_frequency="${NETDATA_UPDATE_EVERY-1}" update_every=${minimum_update_frequency} # this will be overwritten by the command line # work around for non BASH shells charts_create="_create" charts_update="_update" charts_check="_check" charts_undescore="_" # when making iterations, charts.d can loop more frequently # to prevent plugins missing iterations. # this is a percentage relative to update_every to align its # iterations. # The minimum is 10%, the maximum 100%. # So, if update_every is 1 second and time_divisor is 50, # charts.d will iterate every 500ms. # Charts will be called to collect data only if the time # passed since the last time the collected data is equal or # above their update_every. time_divisor=50 # number of seconds to run without restart # after this time, charts.d.plugin will exit # netdata will restart it restart_timeout=$((3600 * 4)) # check if the charts.d plugins are using global variables # they should not. # It does not currently support BASH v4 arrays, so it is # disabled dryrunner=0 # check for timeout command check_for_timeout=1 # the default enable/disable value for all charts enable_all_charts="yes" # ----------------------------------------------------------------------------- # parse parameters check=0 chart_only= while [ ! -z "$1" ] do if [ "$1" = "check" ] then check=1 shift continue fi if [ "$1" = "debug" -o "$1" = "all" ] then debug=1 shift continue fi if [ -f "$chartsd/$1.chart.sh" ] then debug=1 chart_only="$( echo $1.chart.sh | sed "s/\.chart\.sh$//g" )" shift continue fi if [ -f "$chartsd/$1" ] then debug=1 chart_only="$( echo $1 | sed "s/\.chart\.sh$//g" )" shift continue fi # number check n="$1" x=$(( n )) if [ "$x" = "$n" ] then shift update_every=$x [ $update_every -lt $minimum_update_frequency ] && update_every=$minimum_update_frequency continue fi fatal "Cannot understand parameter $1. Aborting." done # ----------------------------------------------------------------------------- # loop control # default sleep function LOOPSLEEPMS_HIGHRES=0 now_ms= current_time_ms_default() { now_ms="$(date +'%s')000" } current_time_ms="current_time_ms_default" current_time_ms_accuracy=1 mysleep="sleep" # if found and included, this file overwrites loopsleepms() # and current_time_ms() with a high resolution timer function # for precise looping. . "$pluginsd/loopsleepms.sh.inc" # ----------------------------------------------------------------------------- # load my configuration if [ -f "$myconfig" ] then . "$myconfig" [ $? -ne 0 ] && fatal "cannot load $myconfig" time_divisor=$((time_divisor)) [ $time_divisor -lt 10 ] && time_divisor=10 [ $time_divisor -gt 100 ] && time_divisor=100 else info "configuration file '$myconfig' not found. Using defaults." fi # we check for the timeout command, after we load our # configuration, so that the user may overwrite the # timeout command we use, providing a function that # can emulate the timeout command we need: # > timeout SECONDS command ... if [ $check_for_timeout -eq 1 ] then require_cmd timeout || exit 1 fi # ----------------------------------------------------------------------------- # internal checks # netdata passes the requested update frequency as the first argument update_every=$(( update_every + 1 - 1)) # makes sure it is a number test $update_every -eq 0 && update_every=1 # if it is zero, make it 1 # check the charts.d directory [ ! -d "$chartsd" ] && fatal "cannot find charts directory '$chartsd'" # ----------------------------------------------------------------------------- # library functions fixid() { echo "$*" |\ tr -c "[A-Z][a-z][0-9]" "_" |\ sed -e "s|^_\+||g" -e "s|_\+$||g" -e "s|_\+|_|g" |\ tr "[A-Z]" "[a-z]" } run() { local ret pid="${BASHPID}" t if [ "z${1}" = "z-t" -a "${2}" != "0" ] then t="${2}" shift 2 timeout ${t} "${@}" 2>"${TMP_DIR}/run.${pid}" ret=$? else "${@}" 2>"${TMP_DIR}/run.${pid}" ret=$? fi if [ ${ret} -ne 0 ] then { printf "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: command '" printf "%q " "${@}" printf "' failed:\n --- BEGIN TRACE ---\n" cat "${TMP_DIR}/run.${pid}" printf " --- END TRACE ---\n" } >&2 fi rm "${TMP_DIR}/run.${pid}" return ${ret} } # convert any floating point number # to integer, give a multiplier # the result is stored in ${FLOAT2INT_RESULT} # so that no fork is necessary # the multiplier must be a power of 10 float2int() { local f m="$2" a b l v=($1) f=${v[0]} # the length of the multiplier - 1 l=$(( ${#m} - 1 )) # check if the number is in scientific notation if [[ ${f} =~ ^[[:space:]]*(-)?[0-9.]+(e|E)(\+|-)[0-9]+ ]] then # convert it to decimal # unfortunately, this fork cannot be avoided # if you know of a way to avoid it, please let me know f=$(printf "%0.${l}f" ${f}) fi # split the floating point number # in integer (a) and decimal (b) a=${f/.*/} b=${f/*./} # if the integer part is missing # set it to zero [ -z "${a}" ] && a="0" # strip leading zeros from the integer part # base 10 convertion a=$((10#$a)) # check the length of the decimal part # against the length of the multiplier if [ ${#b} -gt ${l} ] then # too many digits - take the most significant b=${b:0:${l}} elif [ ${#b} -lt ${l} ] then # too few digits - pad with zero on the right local z="00000000000000000000000" r=$((l - ${#b})) b="${b}${z:0:${r}}" fi # strip leading zeros from the decimal part # base 10 convertion b=$((10#$b)) # store the result FLOAT2INT_RESULT=$(( (a * m) + b )) } # ----------------------------------------------------------------------------- # charts check functions all_charts() { cd "$chartsd" [ $? -ne 0 ] && error "cannot cd to $chartsd" && return 1 ls *.chart.sh | sed "s/\.chart\.sh$//g" } declare -A charts_enable_keyword=( ['apache']="force" ['cpu_apps']="force" ['cpufreq']="force" ['example']="force" ['exim']="force" ['hddtemp']="force" ['load_average']="force" ['mem_apps']="force" ['mysql']="force" ['nginx']="force" ['phpfpm']="force" ['postfix']="force" ['sensors']="force" ['squid']="force" ['tomcat']="force" ) all_enabled_charts() { local charts= enabled= required= # find all enabled charts for chart in $( all_charts ) do MODULE_NAME="${chart}" eval "enabled=\$$chart" if [ -z "${enabled}" ] then enabled="${enable_all_charts}" fi required="${charts_enable_keyword[${chart}]}" [ -z "${required}" ] && required="yes" if [ ! "${enabled}" = "${required}" ] then info "is disabled. Add a line with $chart=$required in $myconfig to enable it (or remove the line that disables it)." else debug "is enabled for auto-detection." local charts="$charts $chart" fi done MODULE_NAME="main" local charts2= for chart in $charts do MODULE_NAME="${chart}" # check the enabled charts local check="$( cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_check()" )" if [ -z "$check" ] then error "module '$chart' does not seem to have a $chart$charts_check() function. Disabling it." continue fi local create="$( cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_create()" )" if [ -z "$create" ] then error "module '$chart' does not seem to have a $chart$charts_create() function. Disabling it." continue fi local update="$( cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_update()" )" if [ -z "$update" ] then error "module '$chart' does not seem to have a $chart$charts_update() function. Disabling it." continue fi # check its config #if [ -f "$confd/$chart.conf" ] #then # if [ ! -z "$( cat "$confd/$chart.conf" | sed "s/^ \+//g" | grep -v "^$" | grep -v "^#" | grep -v "^$chart$charts_undescore" )" ] # then # error "module's $chart config $confd/$chart.conf should only have lines starting with $chart$charts_undescore . Disabling it." # continue # fi #fi #if [ $dryrunner -eq 1 ] # then # "$pluginsd/charts.d.dryrun-helper.sh" "$chart" "$chartsd/$chart.chart.sh" "$confd/$chart.conf" >/dev/null # if [ $? -ne 0 ] # then # error "module's $chart did not pass the dry run check. This means it uses global variables not starting with $chart. Disabling it." # continue # fi #fi local charts2="$charts2 $chart" done MODULE_NAME="main" echo $charts2 debug "enabled charts: $charts2" } # ----------------------------------------------------------------------------- # load the charts suffix_update_every="_update_every" active_charts= for chart in $( all_enabled_charts ) do MODULE_NAME="${chart}" debug "loading module: '$chartsd/$chart.chart.sh'" . "$chartsd/$chart.chart.sh" if [ -f "$confd/$PROGRAM_NAME/$chart.conf" ] then debug "loading module configuration: '$confd/$PROGRAM_NAME/$chart.conf'" . "$confd/$PROGRAM_NAME/$chart.conf" elif [ -f "$confd/$chart.conf" ] then debug "loading module configuration: '$confd/$chart.conf'" . "$confd/$chart.conf" else warning "configuration file '$confd/$PROGRAM_NAME/$chart.conf' not found. Using defaults." fi eval "dt=\$$chart$suffix_update_every" dt=$(( dt + 1 - 1 )) # make sure it is a number if [ $dt -lt $update_every ] then eval "$chart$suffix_update_every=$update_every" fi $chart$charts_check if [ $? -eq 0 ] then debug "module '$chart' activated" active_charts="$active_charts $chart" else error "module's '$chart' check() function reports failure." fi done MODULE_NAME="main" debug "activated modules: $active_charts" # ----------------------------------------------------------------------------- # check overwrites # enable work time reporting debug_time= test $debug -eq 1 && debug_time=tellwork # if we only need a specific chart, remove all the others if [ ! -z "${chart_only}" ] then debug "requested to run only for: '${chart_only}'" check_charts= for chart in $active_charts do if [ "$chart" = "$chart_only" ] then check_charts="$chart" break fi done active_charts="$check_charts" fi debug "activated charts: $active_charts" # stop if we just need a pre-check if [ $check -eq 1 ] then info "CHECK RESULT" info "Will run the charts: $active_charts" exit 0 fi # ----------------------------------------------------------------------------- cd "${TMP_DIR}" || exit 1 # ----------------------------------------------------------------------------- # create charts run_charts= for chart in $active_charts do MODULE_NAME="${chart}" debug "calling '$chart$charts_create()'..." $chart$charts_create if [ $? -eq 0 ] then run_charts="$run_charts $chart" debug "'$chart' initialized." else error "module's '$chart' function '$chart$charts_create()' reports failure." fi done MODULE_NAME="main" debug "run_charts='$run_charts'" # ----------------------------------------------------------------------------- # update dimensions [ -z "$run_charts" ] && fatal "No charts to collect data from." declare -A charts_last_update=() charts_update_every=() charts_next_update=() charts_run_counter=() charts_serial_failures=() global_update() { local exit_at \ c=0 dt ret last_ms exec_start_ms exec_end_ms \ chart now_charts=() next_charts=($run_charts) \ next_ms x seconds millis # return the current time in ms in $now_ms ${current_time_ms} exit_at=$(( now_ms + (restart_timeout * 1000) )) for chart in $run_charts do eval "charts_update_every[$chart]=\$$chart$suffix_update_every" test -z "${charts_update_every[$chart]}" && charts_update_every[$charts]=$update_every charts_last_update[$chart]=$((now_ms - (now_ms % (charts_update_every[$chart] * 1000) ) )) charts_next_update[$chart]=$(( charts_last_update[$chart] + (charts_update_every[$chart] * 1000) )) charts_run_counter[$chart]=0 charts_serial_failures[$chart]=0 echo "CHART netdata.plugin_chartsd_$chart '' 'Execution time for $chart plugin' 'milliseconds / run' charts.d netdata.plugin_charts area 145000 ${charts_update_every[$chart]}" echo "DIMENSION run_time 'run time' absolute 1 1" done # the main loop while [ "${#next_charts[@]}" -gt 0 ] do c=$((c + 1)) now_charts=("${next_charts[@]}") next_charts=() # return the current time in ms in $now_ms ${current_time_ms} for chart in "${now_charts[@]}" do MODULE_NAME="${chart}" if [ ${now_ms} -ge ${charts_next_update[$chart]} ] then last_ms=${charts_last_update[$chart]} dt=$(( (now_ms - last_ms) )) charts_last_update[$chart]=${now_ms} while [ ${charts_next_update[$chart]} -lt ${now_ms} ] do charts_next_update[$chart]=$(( charts_next_update[$chart] + (charts_update_every[$chart] * 1000) )) done # the first call should not give a duration # so that netdata calibrates to current time dt=$(( dt * 1000 )) charts_run_counter[$chart]=$(( charts_run_counter[$chart] + 1 )) if [ ${charts_run_counter[$chart]} -eq 1 ] then dt= fi exec_start_ms=$now_ms $chart$charts_update $dt ret=$? # return the current time in ms in $now_ms ${current_time_ms}; exec_end_ms=$now_ms echo "BEGIN netdata.plugin_chartsd_$chart $dt" echo "SET run_time = $(( exec_end_ms - exec_start_ms ))" echo "END" if [ $ret -eq 0 ] then charts_serial_failures[$chart]=0 next_charts+=($chart) else charts_serial_failures[$chart]=$(( charts_serial_failures[$chart] + 1 )) if [ ${charts_serial_failures[$chart]} -gt 10 ] then error "module's '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it." else error "module's '$chart' update() function reports failure. Will keep trying for a while." next_charts+=($chart) fi fi else next_charts+=($chart) fi done MODULE_NAME="${chart}" # wait the time you are required to next_ms=$((now_ms + (update_every * 1000 * 100) )) for x in "${charts_next_update[@]}"; do [ ${x} -lt ${next_ms} ] && next_ms=${x}; done next_ms=$((next_ms - now_ms)) if [ ${LOOPSLEEPMS_HIGHRES} -eq 1 -a ${next_ms} -gt 0 ] then next_ms=$(( next_ms + current_time_ms_accuracy )) seconds=$(( next_ms / 1000 )) millis=$(( next_ms % 1000 )) if [ ${millis} -lt 10 ] then millis="00${millis}" elif [ ${millis} -lt 100 ] then millis="0${millis}" fi debug "sleeping for ${seconds}.${millis} seconds." ${mysleep} ${seconds}.${millis} else debug "sleeping for ${update_every} seconds." ${mysleep} $update_every fi test ${now_ms} -ge ${exit_at} && exit 0 done fatal "nothing left to do, exiting..." } global_update