int collect_data_for_all_processes_from_proc(void)
{
- static long count_errors = 0;
-
char dirname[FILENAME_MAX + 1];
snprintf(dirname, FILENAME_MAX, "%s/proc", host_prefix);
// /proc/<pid>/stat
if(unlikely(read_proc_pid_stat(p))) {
- if(!count_errors++ || debug || (p->target && p->target->debug))
error("Cannot process %s/proc/%d/stat", host_prefix, pid);
// there is no reason to proceed if we cannot get its status
// check its parent pid
if(unlikely(p->ppid < 0 || p->ppid > pid_max)) {
- if(unlikely(!count_errors++ || debug || (p->target && p->target->debug)))
error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid);
p->ppid = 0;
if(proc_pid_cmdline_is_needed) {
if(unlikely(read_proc_pid_cmdline(p))) {
- if(!count_errors++ || debug || (p->target && p->target->debug))
error("Cannot process %s/proc/%d/cmdline", host_prefix, pid);
}
}
// /proc/<pid>/statm
if(unlikely(read_proc_pid_statm(p))) {
- if(unlikely(!count_errors++ || debug || (p->target && p->target->debug)))
error("Cannot process %s/proc/%d/statm", host_prefix, pid);
// there is no reason to proceed if we cannot get its memory status
// /proc/<pid>/io
if(unlikely(read_proc_pid_io(p))) {
- if(unlikely(!count_errors++ || debug || (p->target && p->target->debug)))
error("Cannot process %s/proc/%d/io", host_prefix, pid);
// on systems without /proc/X/io
// <pid> ownership
if(unlikely(read_proc_pid_ownership(p))) {
- if(unlikely(!count_errors++ || debug || (p->target && p->target->debug)))
error("Cannot stat %s/proc/%d", host_prefix, pid);
}
// /proc/<pid>/fd
if(unlikely(read_pid_file_descriptors(p))) {
- if(unlikely(!count_errors++ || debug || (p->target && p->target->debug)))
error("Cannot process entries in %s/proc/%d/fd", host_prefix, pid);
}
p->updated = 1;
}
- if(unlikely(count_errors > 1000)) {
- error("%ld more errors encountered\n", count_errors - 1);
- count_errors = 0;
- }
-
closedir(dir);
return 1;
int error_log_syslog = 1;
int output_log_syslog = 1; // debug log
+time_t error_log_throttle_period = 1200;
+unsigned long error_log_errors_per_period = 200;
+
+int error_log_limit(int reset) {
+ static time_t start = 0;
+ static unsigned long counter = 0, prevented = 0;
+
+ // do not throttle if the period is 0
+ if(error_log_throttle_period == 0)
+ return 0;
+
+ // prevent all logs if the errors per period is 0
+ if(error_log_errors_per_period == 0)
+ return 1;
+
+ time_t now = time(NULL);
+ if(!start) start = now;
+
+ if(reset) {
+ if(prevented) {
+ log_date(stderr);
+ fprintf(stderr, "%s: Resetting logging for process '%s' (prevented %lu logs in the last %ld seconds).\n"
+ , program_name
+ , program_name
+ , prevented
+ , now - start
+ );
+ }
+
+ start = now;
+ counter = 0;
+ prevented = 0;
+ }
+
+ // detect if we log too much
+ counter++;
+
+ if(now - start > error_log_throttle_period) {
+ if(prevented) {
+ log_date(stderr);
+ fprintf(stderr, "%s: Resuming logging from process '%s' (prevented %lu logs in the last %ld seconds).\n"
+ , program_name
+ , program_name
+ , prevented
+ , error_log_throttle_period
+ );
+ }
+
+ // restart the period accounting
+ start = now;
+ counter = 1;
+ prevented = 0;
+
+ // log this error
+ return 0;
+ }
+
+ if(counter > error_log_errors_per_period) {
+ if(!prevented) {
+ log_date(stderr);
+ fprintf(stderr, "%s: Too many logs (%lu logs in %ld seconds, threshold is set to %lu logs in %ld seconds). Preventing more logs from process '%s' for %ld seconds.\n"
+ , program_name
+ , counter
+ , now - start
+ , error_log_errors_per_period
+ , error_log_throttle_period
+ , program_name
+ , start + error_log_throttle_period - now);
+ }
+
+ prevented++;
+
+ // prevent logging this error
+ return 1;
+ }
+
+ return 0;
+}
+
void log_date(FILE *out)
{
char outstr[200];
{
va_list args;
+ // prevent logging too much
+ if(error_log_limit(0)) return;
+
log_date(stderr);
va_start( args, fmt );
{
va_list args;
+ // prevent logging too much
+ if(error_log_limit(0)) return;
+
log_date(stderr);
va_start( args, fmt );
extern int error_log_syslog;
extern int output_log_syslog;
+extern time_t error_log_throttle_period;
+extern unsigned long error_log_errors_per_period;
+extern int error_log_limit(int reset);
+
+#define error_log_limit_reset() do { error_log_limit(1); } while(0)
+
#define debug(type, args...) do { if(unlikely(!silent && (debug_flags & type))) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
#define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args)
#define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args)
#include "plugin_nfacct.h"
#include "main.h"
+#include "../config.h"
int netdata_exit = 0;
rrdset_save_all();
// kill_childs();
+ // let it log a few more error messages
+ error_log_limit_reset();
+
if(pidfd != -1) {
if(ftruncate(pidfd, 0) != 0)
error("Cannot truncate pidfile '%s'.", pidfile);
}
else error_log_syslog = 0;
+ error_log_throttle_period = config_get_number("global", "errors throttle period", error_log_throttle_period);
+ setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get("global", "errors throttle period" , ""), 1);
+
+ error_log_errors_per_period = config_get_number("global", "errors per throttle period", error_log_errors_per_period);
+ setenv("NETDATA_ERRORS_PER_PERIOD" , config_get("global", "errors per throttle period", ""), 1);
+
// --------------------------------------------------------------------
access_log_file = config_get("global", "access log", LOG_DIR "/access.log");
#include "popen.h"
#include "plugin_tc.h"
#include "main.h"
+#include "../config.h"
#define RRD_TYPE_TC "tc"
#define RRD_TYPE_TC_LEN strlen(RRD_TYPE_TC)
// debug(D_TC_LOOP, "IGNORED line");
//}
}
- mypclose(fp, tc_child_pid);
+ // fgets() failed or loop broke
+ int code = mypclose(fp, tc_child_pid);
tc_child_pid = 0;
if(device) {
return NULL;
}
+ if(code == 1 || code == 127) {
+ // 1 = DISABLE
+ // 127 = cannot even run it
+ error("TC: tc-qos-helper.sh exited with code %d. Disabling it.", code);
+
+ tc_device_free_all();
+ pthread_exit(NULL);
+ return NULL;
+ }
+
sleep((unsigned int) rrd_update_every);
}
pthread_exit(NULL);
return NULL;
}
-
#include "rrd.h"
#include "popen.h"
#include "plugins_d.h"
+#include "../config.h"
struct plugind *pluginsd_root = NULL;
{
debug(D_RRD_CALLS, "rrdset_save_all()");
+ // let it log a few error messages
+ error_log_limit_reset();
+
RRDSET *st;
RRDDIM *rd;