calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: webmaster
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection of the mount point
to: sysadmin
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection of the block device
to: sysadmin
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: dba
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: dba
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: domainadmin
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: sysadmin
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: webmaster
--- /dev/null
+
+# make sure mysql is running
+
+template: postgres_last_collected_secs
+ on: postgres.db_stat_transactions
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: dba
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: dba
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: sysadmin
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: proxyadmin
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: up 0 down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: sysadmin
['80266bddd3df374923c750a6de91d120']='health.d/apache.conf'
['842b1ad5b89bfa5f421d9c5b72e001a4']='health.d/apache.conf'
['a6d5ce2572bf7a1dce9e545fcd29273e']='health.d/apache.conf'
+ ['ccde91d209aeb02c4a6be0e43a8d92b3']='health.d/apache.conf'
['084ee72d64760f2641b0720e79c922f3']='health.d/cpu.conf'
['254de8ec49602bea2da3631676d7cfec']='health.d/cpu.conf'
['5eb670b6fe39da5fec2523d910b0dd1e']='health.d/cpu.conf'
['2385e5d35b440619621c4af62492d91b']='health.d/disks.conf'
['3bc2776623889744a98178bad6fb3b79']='health.d/disks.conf'
['46798cda21e1a5faa769abf4e5d27c48']='health.d/disks.conf'
+ ['508771d8e4611a058991a1bc11039dea']='health.d/disks.conf'
['573398335c0c71c075fa57f702bce287']='health.d/disks.conf'
['5da15d6e17a15213a720749045e5d419']='health.d/disks.conf'
['7aa209fa287c95b3ca04c23681b40770']='health.d/disks.conf'
['297160ae7ee01a547ed14f857b4f2c8d']='health.d/memcached.conf'
['2bbbebf52f84fd27fbefecd2a8a8076f']='health.d/memcached.conf'
['45a77ac36ba9f1898144b902de17204b']='health.d/memcached.conf'
+ ['4f6f4d39c19d7d954f769d3f9d3b4da5']='health.d/memcached.conf'
['621f10b257a11add5ff5aff41e9662e3']='health.d/memcached.conf'
['7e5fc1644aa7a54f9dbb1bd102521b09']='health.d/memcached.conf'
['9c981c75bdf4b1637f7113e7e45eb2bf']='health.d/memcached.conf'
['09264cec953ae1c4c2985e6446abb386']='health.d/mysql.conf'
['4d13684cadfa90e73ab465409bf7263b']='health.d/mysql.conf'
['97eee7a30e6419df4537242e9d4a719d']='health.d/mysql.conf'
+ ['d5dab509d8792f795bece27de39dd476']='health.d/mysql.conf'
['373c1276dc9e65884ff2b26e1f08afe7']='health.d/named.conf'
['58e835b7176865ec5a6f59f7aba832bf']='health.d/named.conf'
['669ebef43ee341f6889d382e86d0e200']='health.d/named.conf'
['6b39de5d85db45115db236347a6896d4']='health.d/named.conf'
['846ce94bfeeb90c0dc6a89e8d25f1a68']='health.d/named.conf'
+ ['899bcb0b3f4375b0a1280296be930201']='health.d/named.conf'
['ddda2bb1c88be03b637d3285406f7910']='health.d/named.conf'
['0529b679d3c0e7e6332753c7f6484731']='health.d/net.conf'
['0856124b1eecf01681b4fdf4e21efb3f']='health.d/net.conf'
['2827de41cf34a91b7a8e4d8724f59668']='health.d/net.conf'
['2ad55a5d1e885cf142849a78d4b00401']='health.d/net.conf'
['318bb45755726a25120bb33413d4b582']='health.d/net.conf'
+ ['3866efafd38e161136428d0f818cac43']='health.d/net.conf'
['43ebb7f224c3b232d8ad044d7e9508b6']='health.d/net.conf'
['cb178b15427274d7def5b14bc4c09441']='health.d/net.conf'
['d11711b3647bc2bdd0292dd7deebbeb1']='health.d/net.conf'
['32fde0057c790964f2c743cb3c9aad29']='health.d/nginx.conf'
['3f170e3343cd784983b019163393f5af']='health.d/nginx.conf'
['64c48f9726ab987baec9c617a9fef7a6']='health.d/nginx.conf'
+ ['66dfe138058ca26a31a118007eb31f35']='health.d/nginx.conf'
['7a985528cc9176564640001aa73e3492']='health.d/nginx.conf'
['81255035f6d53534938085df72cdef23']='health.d/nginx.conf'
['eb5168f0b516bc982aac45e59da6e52e']='health.d/nginx.conf'
+ ['5e6fd588ef6934cf04ddb5e662aa02ea']='health.d/postgres.conf'
['36fdd55665cf10b0db164c2a0cca5e57']='health.d/qos.conf'
['55608bdd908a3806df1468f6ee318b2b']='health.d/qos.conf'
['80f109ff293ac94222bf3959432751bd']='health.d/qos.conf'
['b7d769ce86a7aebba01315da5c0799e6']='health.d/ram.conf'
['cd08e5534c94bf1f2cd28396c76b8bbc']='health.d/ram.conf'
['d55bdb83b9ff606852f6a97c1430258c']='health.d/ram.conf'
+ ['08ff5218f938fc48e09e718821169d14']='health.d/redis.conf'
['325617412a628e3bc776e3fbb777a2a6']='health.d/redis.conf'
['3634d5eddc46fb0d50cf47f370670c2c']='health.d/redis.conf'
['4f6a5b47a13f5912cc89e9286701dd08']='health.d/redis.conf'
['39f9422b0f0c3eec11a31aff79d89514']='health.d/retroshare.conf'
['46ef6c1b638e40a7dfd62defdc5f99a3']='health.d/retroshare.conf'
['6608c6546b3c6bde084fc1d34b1163c1']='health.d/retroshare.conf'
+ ['ee5343881744e6a97e6ee5cdd329cfb8']='health.d/retroshare.conf'
['1c12b678ab65f271a96da1bbd0a1ab1c']='health.d/softnet.conf'
['2472e49550326f7142e2c425ccbca005']='health.d/softnet.conf'
['28df44a90e8ea4c6156314c03e88bf44']='health.d/softnet.conf'
['3cc6255457d4cba881ae0554ae5d9190']='health.d/squid.conf'
['845023f9b4a526aa0e6493756dbe6034']='health.d/squid.conf'
['a4a8660728c6afcb528cc6b378897d6b']='health.d/squid.conf'
+ ['aa4bee249bfc0c4a88ac8c2ffb97aa0d']='health.d/squid.conf'
['d162b7465a56151312e60151c1d74fba']='health.d/squid.conf'
['ef9916ea144878a9f37cbb6b1b29da10']='health.d/squid.conf'
['043f0a35dde85837fabeb85b990a41c1']='health.d/swap.conf'
['c9b792755de59d842ba95f8c315d94c8']='health.d/swap.conf'
['ca08a9b18d38ae0a0f5081a7cdc96863']='health.d/swap.conf'
['da29d2ab1ab7b8fda189960c840e5144']='health.d/swap.conf'
+ ['04138a3d8e907c75329fe60ce2e27c1c']='health.d/tcp_resets.conf'
['4063a01bffb43b0423425d1ba3004967']='health.d/tcp_resets.conf'
['b3fc4749b132e55ac0d3a0f92859237e']='health.d/tcp_resets.conf'
['707a63f53f4b32e01d134ae90ba94aad']='health_email_recipients.conf'
return 0;
}
- if (unlikely(!rc->rrdset->last_collected_time.tv_sec)) {
- debug(D_HEALTH, "Health not running alarm '%s.%s'. Chart is not yet collected.", rc->chart?rc->chart:"NOCHART", rc->name);
+ if (unlikely(!rc->rrdset->last_collected_time.tv_sec || rc->rrdset->counter_done < 2)) {
+ debug(D_HEALTH, "Health not running alarm '%s.%s'. Chart is not fully collected yet.", rc->chart?rc->chart:"NOCHART", rc->name);
return 0;
}
return 0;
}
+ int update_every = rc->rrdset->update_every;
+ time_t first = rrdset_first_entry_t(rc->rrdset);
+ time_t last = rrdset_last_entry_t(rc->rrdset);
+
+ if(now + update_every < first || now - update_every > last) {
+ debug(D_HEALTH
+ , "Health not examining alarm '%s.%s' yet (wanted time is out of bounds - we need %lu but got %lu - %lu)."
+ , rc->chart ? rc->chart : "NOCHART", rc->name, (unsigned long) now, (unsigned long) first
+ , (unsigned long) last);
+ return 0;
+ }
+
+ if (RRDCALC_HAS_DB_LOOKUP(rc)) {
+ time_t needed = now + rc->before + rc->after;
+
+ if(needed + update_every < first || needed - update_every > last) {
+ debug(D_HEALTH
+ , "Health not examining alarm '%s.%s' yet (not enough data yet - we need %lu but got %lu - %lu)."
+ , rc->chart ? rc->chart : "NOCHART", rc->name, (unsigned long) needed, (unsigned long) first
+ , (unsigned long) last);
+ return 0;
+ }
+ }
+
if (unlikely(rc->next_update > now)) {
if (unlikely(*next_run > rc->next_update))
*next_run = rc->next_update;
return 0;
}
- // FIXME
- // we should check that the DB lookup is possible
- // i.e.
- // - the duration of the chart includes the required timeframe
- // we SHOULD NOT check the dimensions - there might be alarms that refer non-existing dimensions (e.g. cpu steal)
-
return 1;
}