- **Sophisticated alarming**<br/>
supports dynamic thresholds, hysteresis, alarm templates,
multiple role-based notification methods (such as email, slack.com,
- pushover.net, pushbullet.com telegram.org, twilio.com, messagebird.com)
+ pushover.net, pushbullet.com, telegram.org, twilio.com, messagebird.com)
- **Extensible**<br/>
you can monitor anything you can get a metric for,
every: 10s
warn: ($1m_requests > 120) ? ($this < (($status >= $WARNING ) ? ( 95 ) : ( 85 )) ) : ( 0 )
crit: ($1m_requests > 120) ? ($this < (($status == $CRITICAL) ? ( 85 ) : ( 75 )) ) : ( 0 )
- delay: down 15m multiplier 1.5 max 1h
+ delay: up 2m down 15m multiplier 1.5 max 1h
info: the ratio of successful HTTP responses (1xx, 2xx, 304) over the last minute
to: webmaster
every: 10s
warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 20 )) ) : ( 0 )
crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 )
- delay: down 15m multiplier 1.5 max 1h
+ delay: up 2m down 15m multiplier 1.5 max 1h
info: the ratio of HTTP redirects (3xx except 304) over the last minute
to: webmaster
every: 10s
warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 10 ) : ( 30 )) ) : ( 0 )
crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 )
- delay: down 15m multiplier 1.5 max 1h
+ delay: up 2m down 15m multiplier 1.5 max 1h
info: the ratio of HTTP bad requests (4xx) over the last minute
to: webmaster
every: 10s
warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 2 )) ) : ( 0 )
crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
- delay: down 15m multiplier 1.5 max 1h
+ delay: up 2m down 15m multiplier 1.5 max 1h
info: the ratio of HTTP internal server errors (5xx), over the last minute
to: webmaster
# enable/disable sending hipchat notifications
SEND_HIPCHAT="YES"
+# define hipchat server
+HIPCHAT_SERVER="api.hipchat.com"
+
# api.hipchat.com authorization token
# Without this, netdata cannot send hipchat notifications.
HIPCHAT_AUTH_TOKEN=""
#
# Additionally to the above, fail2ban also supports the following:
#
-# log_path: 'path to fail2ban.log' # Default: '/var/log/fail2ban.log'
+# log_path: 'path to fail2ban.log' # Default: '/var/log/fail2ban.log'
# conf_path: 'path to jail.local/jail.conf' # Default: '/etc/fail2ban/jail.local'
-# exclude: 'jails you want to exclude from autodetection' # Default: '[]' empty list
+# conf_dir: 'path to jail.d/' # Default: '' empty
+# exclude: 'jails you want to exclude from autodetection' # Default: '[]' empty list
#------------------------------------------------------------------------------------------------------------------
-# IMPORTANT Information
-#
-# fail2ban.log file MUST BE readable by netdata.
-# A good idea is to do this by adding the
-# # create 0640 root netdata
-# to fail2ban conf at logrotate.d
-#
# ------------------------------------------------------------------------------------------------------------------
# AUTO-DETECTION JOBS
# only one of them will run (they have the same name)
-#local:
-# log_path: '/var/log/fail2ban.log'
-# conf_path: '/etc/fail2ban/jail.local'
+local:
+ log_path: '/var/log/fail2ban.log'
+ conf_path: '/etc/fail2ban/jail.local'
+# conf_dir: '/etc/fail2ban/jail.d/'
# exclude: 'dropbear apache'
declare -A role_recipients_twilio=()
# hipchat configs
+HIPCHAT_SERVER=
HIPCHAT_AUTH_TOKEN=
DEFAULT_RECIPIENT_HIPCHAT=
declare -A role_recipients_hipchat=()
send_hipchat() {
local authtoken="${1}" recipients="${2}" message="${3}" httpcode sent=0 room color sender msg_format notify
- if [ "${SEND_HIPCHAT}" = "YES" -a ! -z "${authtoken}" -a ! -z "${recipients}" -a ! -z "${message}" ]
- then
-
+ if [ "${SEND_HIPCHAT}" = "YES" -a ! -z "${HIPCHAT_SERVER}" -a ! -z "${authtoken}" -a ! -z "${recipients}" -a ! -z "${message}" ]
+ then
# A label to be shown in addition to the sender's name
# Valid length range: 0 - 64.
sender="netdata"
# Valid values: html, text.
# Defaults to 'html'.
- msg_format="text"
+ msg_format="html"
# Background color for message. Valid values: yellow, green, red, purple, gray, random. Defaults to 'yellow'.
case "${status}" in
-H "Content-type: application/json" \
-H "Authorization: Bearer ${authtoken}" \
-d "{\"color\": \"${color}\", \"from\": \"${netdata}\", \"message_format\": \"${msg_format}\", \"message\": \"${message}\", \"notify\": \"${notify}\"}" \
- "https://api.hipchat.com/v2/room/${room}/notification")
+ "https://${HIPCHAT_SERVER}/v2/room/${room}/notification")
if [ "${httpcode}" == "204" ]
then
DICT = OrderedDict
msg.info('YAML output is ordered')
except ImportError:
- ORDERED = False
- DICT = dict
- msg.info('YAML output is unordered')
-else:
+ try:
+ from ordereddict import OrderedDict
+ ORDERED = True
+ DICT = OrderedDict
+ msg.info('YAML output is ordered')
+ except ImportError:
+ ORDERED = False
+ DICT = dict
+ msg.info('YAML output is unordered')
+if ORDERED:
def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
class OrderedLoader(Loader):
pass
if not run_rndc.returncode:
# 'rndc' was found, stats file is exist and readable and we can run 'rndc stats'. Lets go!
self.create_charts()
-
+
# BIND APPEND dump on every run 'rndc stats'
# that is why stats file size can be VERY large if update_interval too small
dump_size_24hr = round(86400 / self.update_every * (int(size_after) - int(size_before)) / 1048576, 3)
-
+
# If update_every too small we should WARN user
if self.update_every < 30:
self.info('Update_every %s is NOT recommended for use. Increase the value to > 30' % self.update_every)
-
+
self.info('With current update_interval it will be + %s MB every 24hr. '
'Don\'t forget to create logrotate conf file for %s' % (dump_size_24hr, self.named_stats_path))
named.stats file size
)
"""
-
try:
current_size = getsize(self.named_stats_path)
except OSError:
return None, None
-
+
run_rndc = Popen([self.rndc, 'stats'], shell=False)
run_rndc.wait()
return None
rndc_stats = dict()
-
+
# Result: dict.
# topic = Cache DB RRsets; body = A 178303 NS 86790 ... ; desc = A; value = 178303
# {'Cache DB RRsets': [('A', 178303), ('NS', 286790), ...],
for regex in self.regex_options:
rndc_stats.update({topic: [(desc, int(value)) for value, desc in self.regex_values.findall(body)]
for topic, body in findall(regex, raw_data)})
-
+
nms = dict(rndc_stats.get('Name Server Statistics', []))
- inc_queries = {'i' + k: 0 for k in QUERIES}
- inc_queries.update({'i' + k: v for k, v in rndc_stats.get('Incoming Queries', [])})
- out_queries = {'o' + k: 0 for k in QUERIES}
- out_queries.update({'o' + k: v for k, v in rndc_stats.get('Outgoing Queries', [])})
-
+ inc_queries = dict([('i' + k, 0) for k in QUERIES])
+ inc_queries.update(dict([('i' + k, v) for k, v in rndc_stats.get('Incoming Queries', [])]))
+ out_queries = dict([('o' + k, 0) for k in QUERIES])
+ out_queries.update(dict([('o' + k, v) for k, v in rndc_stats.get('Outgoing Queries', [])]))
+
to_netdata = dict()
to_netdata['requests'] = sum([v for k, v in nms.items() if 'request' in k and 'received' in k])
to_netdata['responses'] = sum([v for k, v in nms.items() if 'responses' in k and 'sent' in k])
to_netdata['duplicate'] = nms.get('duplicate queries received', 0)
to_netdata['rejections'] = nms.get('recursive queries rejected', 0)
to_netdata['stats_size'] = size
-
+
to_netdata.update(inc_queries)
to_netdata.update(out_queries)
return to_netdata
from base import LogService
from re import compile
+
try:
from itertools import filterfalse
except ImportError:
from itertools import ifilterfalse as filterfalse
from os import access as is_accessible, R_OK
+from os.path import isdir
+from glob import glob
priority = 60000
retries = 60
-regex = compile(r'([A-Za-z-]+\]) enabled = ([a-z]+)')
-
+REGEX = compile(r'\[([A-Za-z-_]+)][^\[\]]*?(?<!# )enabled = true')
ORDER = ['jails_group']
self.order = ORDER
self.log_path = self.configuration.get('log_path', '/var/log/fail2ban.log')
self.conf_path = self.configuration.get('conf_path', '/etc/fail2ban/jail.local')
- self.default_jails = ['ssh']
+ self.conf_dir = self.configuration.get('conf_dir', '')
try:
self.exclude = self.configuration['exclude'].split()
except (KeyError, AttributeError):
self.exclude = []
-
def _get_data(self):
"""
Parse new log lines
:return: dict
"""
-
- # If _get_raw_data returns empty list (no new lines in log file) we will send to Netdata this
- self.data = {jail: 0 for jail in self.jails_list}
-
try:
raw = self._get_raw_data()
if raw is None:
# Fail2ban logs looks like
# 2016-12-25 12:36:04,711 fail2ban.actions[2455]: WARNING [ssh] Ban 178.156.32.231
- self.data = dict(
+ data = dict(
zip(
self.jails_list,
[len(list(filterfalse(lambda line: (jail + '] Ban') not in line, raw))) for jail in self.jails_list]
))
+ for jail in data:
+ self.data[jail] += data[jail]
+
return self.data
def check(self):
-
+
# Check "log_path" is accessible.
# If NOT STOP plugin
if not is_accessible(self.log_path, R_OK):
- self.error('Cannot access file %s' % (self.log_path))
+ self.error('Cannot access file %s' % self.log_path)
return False
+ jails_list = list()
+
+ if self.conf_dir:
+ dir_jails, error = parse_conf_dir(self.conf_dir)
+ jails_list.extend(dir_jails)
+ if not dir_jails:
+ self.error(error)
+
+ if self.conf_path:
+ path_jails, error = parse_conf_path(self.conf_path)
+ jails_list.extend(path_jails)
+ if not path_jails:
+ self.error(error)
- # Check "conf_path" is accessible.
- # If "conf_path" is accesible try to parse it to find enabled jails
- if is_accessible(self.conf_path, R_OK):
- with open(self.conf_path, 'rt') as jails_conf:
- jails_list = regex.findall(' '.join(jails_conf.read().split()))
- self.jails_list = [jail[:-1] for jail, status in jails_list if status == 'true']
- else:
- self.jails_list = []
- self.error('Cannot access jail.local file %s.' % (self.conf_path))
-
# If for some reason parse failed we still can START with default jails_list.
- self.jails_list = [jail for jail in self.jails_list if jail not in self.exclude]\
- if self.jails_list else self.default_jails
+ self.jails_list = list(set(jails_list) - set(self.exclude)) or ['ssh']
+ self.data = dict([(jail, 0) for jail in self.jails_list])
self.create_dimensions()
- self.info('Plugin succefully started. Jails: %s' % (self.jails_list))
+ self.info('Plugin successfully started. Jails: %s' % self.jails_list)
return True
def create_dimensions(self):
- self.definitions = {'jails_group':
- {'options':
- [None, "Jails ban statistics", "bans/s", 'Jails', 'jail.ban', 'line'], 'lines': []}}
+ self.definitions = {
+ 'jails_group': {'options': [None, "Jails ban statistics", "bans/s", 'jails', 'jail.ban', 'line'],
+ 'lines': []}}
for jail in self.jails_list:
- self.definitions['jails_group']['lines'].append([jail, jail, 'absolute'])
+ self.definitions['jails_group']['lines'].append([jail, jail, 'incremental'])
+
+
+def parse_conf_dir(conf_dir):
+ if not isdir(conf_dir):
+ return list(), '%s is not a directory' % conf_dir
+
+ jail_local = list(filter(lambda local: is_accessible(local, R_OK), glob(conf_dir + '/*.local')))
+ jail_conf = list(filter(lambda conf: is_accessible(conf, R_OK), glob(conf_dir + '/*.conf')))
+
+ if not (jail_local or jail_conf):
+ return list(), '%s is empty or not readable' % conf_dir
+
+ # According "man jail.conf" files could be *.local AND *.conf
+ # *.conf files parsed first. Changes in *.local overrides configuration in *.conf
+ if jail_conf:
+ jail_local.extend([conf for conf in jail_conf if conf[:-5] not in [local[:-6] for local in jail_local]])
+ jails_list = list()
+ for conf in jail_local:
+ with open(conf, 'rt') as f:
+ raw_data = f.read()
+
+ data = ' '.join(raw_data.split())
+ jails_list.extend(REGEX.findall(data))
+ jails_list = list(set(jails_list))
+
+ return jails_list, 'can\'t locate any jails in %s. Default jail is [\'ssh\']' % conf_dir
+
+
+def parse_conf_path(conf_path):
+ if not is_accessible(conf_path, R_OK):
+ return list(), '%s is not readable' % conf_path
+
+ with open(conf_path, 'rt') as jails_conf:
+ raw_data = jails_conf.read()
+
+ data = raw_data.split()
+ jails_list = REGEX.findall(' '.join(data))
+ return jails_list, 'can\'t locate any jails in %s. Default jail is [\'ssh\']' % conf_path
except (ValueError, IndexError, AttributeError, SyntaxError) as e:
self.error('Pools configurations is incorrect', str(e))
return False
-
+
# Creating static charts
self.order = ['parse_time', 'leases_size', 'utilization', 'total']
self.definitions = {'utilization':
:return: dict
"""
raw_leases = self._get_raw_data()
-
if not raw_leases:
return None
# Result: {ipaddress: end lease time, ...}
- all_leases = {k[6:len(k)-3]:v[7:len(v)-2] for k, v in raw_leases[0].items()}
+ all_leases = dict([(k[6:len(k)-3], v[7:len(v)-2]) for k, v in raw_leases[0].items()])
# Result: [active binding, active binding....]. (Expire time (ends date;) - current time > 0)
active_leases = [k for k, v in all_leases.items() if is_binding_active(all_leases[k])]
for pool in self.pools}
# Bulding dicts to send to netdata
- final_count = {''.join(['le_', k]): v for k, v in pools_count.items()}
- final_util = {''.join(['ut_', k]): v for k, v in pools_util.items()}
-
+ final_count = dict([(''.join(['le_', k]), v) for k, v in pools_count.items()])
+ final_util = dict([(''.join(['ut_', k]), v) for k, v in pools_util.items()])
+
to_netdata = {'total': len(active_leases)}
to_netdata.update({'lsize': int(stat(self.leases_path)[6] / 1024)})
to_netdata.update({'ptime': int(raw_leases[1])})
if not disks:
self.error('Can\'t locate any smartd log files in %s' % self.log_path)
return False
-
+
# List of namedtuples to track smartd log file size
self.disks = [NAMED_DISKS(name=disks[i], size=0, number=i) for i in range(len(disks))]
break
result = f.readline()
- result = result.decode(encoding='utf-8')
+ result = result.decode()
result = self.regex.findall(result)
queue.put([basename(disk), result])
threads, result = list(), list()
queue = Queue()
to_netdata = dict()
-
+
# If the size has not changed there is no reason to poll log files.
disks = [disk for disk in self.disks if self.size_changed(disk)]
if disks:
for elem in result:
for a, n, r in elem[1]:
to_netdata.update({'_'.join([elem[0], a]): r if self.raw_values else n})
-
+
self.previous_data.update(to_netdata)
return to_netdata or None
except OSError:
# Remove unreadable/nonexisting log files from list of disks and previous_data
self.disks.remove(disk)
- self.previous_data = {k: v for k, v in self.previous_data.items() if basename(disk.name) not in k}
+ self.previous_data = dict([(k, v) for k, v in self.previous_data.items() if basename(disk.name) not in k])
return False
def create_charts(self):
self.definitions = dict()
units = 'raw' if self.raw_values else 'normalized'
- for k, v in {k: v for k, v in SMART_ATTR.items() if k in ORDER}.items():
+ for k, v in dict([(k, v) for k, v in SMART_ATTR.items() if k in ORDER]).items():
self.definitions.update({''.join(['attrid', k]): {
'options': [None, v, units, v, 'smartd.attrid' + k, 'line'],
'lines': create_lines(k)}})
return None
# 1. ALL data from 'varnishstat -1'. t - type(MAIN, MEMPOOL etc)
- to_netdata = {k: int(v) for t, k, v in data_all}
+ to_netdata = dict([(k, int(v)) for t, k, v in data_all])
# 2. ADD backend statistics
- to_netdata.update({'_'.join([n, k]): int(v) for n, k, v in data_backend})
+ to_netdata.update(dict([('_'.join([n, k]), int(v)) for n, k, v in data_backend]))
# 3. ADD additional keys to dict
# 3.1 Cache hit/miss/hitpass OVERALL in percent
can be set on any metric monitored by netdata.
Alarm <a href="https://github.com/firehol/netdata/wiki/health-monitoring#alarm-actions" target="_blank" data-ga-category="Outbound links" data-ga-action="Nav click" data-ga-label=AlarmNotifications>notifications</a>
are role-based and support dynamic thresholds, hysteresis and can be dispatched via multiple methods
- (such as email, slack.com, pushover.net, pushbullet.com telegram.org, twilio.com).
+ (such as email, slack.com, pushover.net, pushbullet.com, telegram.org, twilio.com).
</div>
<div class=grid-cell><h3><span class=star>★</span> In real-time</h3>
<p>netdata collects thousands of metrics per server <strong>per second</strong>,