]> arthur.barton.de Git - netdata.git/commitdiff
Merge pull request #1594 from ktsaou/master
authorCosta Tsaousis <costa@tsaousis.gr>
Sun, 22 Jan 2017 04:57:07 +0000 (06:57 +0200)
committerGitHub <noreply@github.com>
Sun, 22 Jan 2017 04:57:07 +0000 (06:57 +0200)
data collection optimizations

conf.d/Makefile.am
conf.d/health.d/elasticsearch.conf [new file with mode: 0644]
conf.d/python.d/elasticsearch.conf [new file with mode: 0644]
netdata-installer.sh
python.d/Makefile.am
python.d/elasticsearch.chart.py [new file with mode: 0644]
python.d/freeradius.chart.py
python.d/varnish.chart.py
src/freebsd_sysctl.c

index 27777144a3117f85a422ed347b4e964a4d4d029e..a7e8224c257a66421edbd4924bd8063a616be618 100644 (file)
@@ -28,6 +28,7 @@ dist_pythonconfig_DATA = \
     python.d/bind_rndc.conf \
     python.d/cpufreq.conf \
     python.d/dovecot.conf \
+    python.d/elasticsearch.conf \
     python.d/example.conf \
     python.d/exim.conf \
     python.d/fail2ban.conf \
@@ -60,6 +61,7 @@ dist_healthconfig_DATA = \
     health.d/bind_rndc.conf \
     health.d/cpu.conf \
     health.d/disks.conf \
+    health.d/elasticsearch.conf \
     health.d/entropy.conf \
     health.d/haproxy.conf \
     health.d/ipc.conf \
diff --git a/conf.d/health.d/elasticsearch.conf b/conf.d/health.d/elasticsearch.conf
new file mode 100644 (file)
index 0000000..dffd409
--- /dev/null
@@ -0,0 +1,9 @@
+   alarm: elasticsearch_last_collected
+      on: elasticsearch_local.cluster_health_status
+    calc: $now - $last_collected_t
+   units: seconds ago
+   every: 10s
+    warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+    crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    info: number of seconds since the last successful data collection
+      to: sysadmin
diff --git a/conf.d/python.d/elasticsearch.conf b/conf.d/python.d/elasticsearch.conf
new file mode 100644 (file)
index 0000000..1faee85
--- /dev/null
@@ -0,0 +1,72 @@
+# netdata python.d.plugin configuration for elasticsearch stats
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+#  - global variables
+#  - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+#     name: myname     # the JOB's name as it will appear at the
+#                      # dashboard (by default is the job_name)
+#                      # JOBs sharing a name are mutually exclusive
+#     update_every: 1  # the JOB's data collection frequency
+#     priority: 60000  # the JOB's order on the dashboard
+#     retries: 5       # the JOB's number of restoration attempts
+#
+# Additionally to the above, elasticsearch plugin also supports the following:
+#
+#     host: 'ipaddress'                # Server ip address or hostname.
+#     port: 'port'                     # Port on which elasticsearch  listen.
+#     cluster_health: False/True       # Calls to cluster health elasticsearch API. Enabled by default.
+#     cluster_stats: False/True        # Calls to cluster stats elasticsearch API. Enabled by default.
+#
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+#
+#local:
+# host: '127.0.0.1'
+# port: '9200'
+# cluster_health: True
+# cluster_stats: True
index f6b4013cd4b4df434fd5876df3e5e0e4c0766497..fa69de196d1076dd2a48551639685b4431e334a4 100755 (executable)
@@ -694,6 +694,7 @@ run find ./system/ -type f -a \! -name \*.in -a \! -name Makefile\* -a \! -name
 NETDATA_ADDED_TO_DOCKER=0
 NETDATA_ADDED_TO_NGINX=0
 NETDATA_ADDED_TO_VARNISH=0
+NETDATA_ADDED_TO_HAPROXY=0
 if [ ${UID} -eq 0 ]
     then
     portable_add_group netdata
@@ -701,6 +702,7 @@ if [ ${UID} -eq 0 ]
     portable_add_user_to_group docker netdata && NETDATA_ADDED_TO_DOCKER=1
     portable_add_user_to_group nginx  netdata && NETDATA_ADDED_TO_NGINX=1
     portable_add_user_to_group varnish  netdata && NETDATA_ADDED_TO_VARNISH=1
+    portable_add_user_to_group haproxy  netdata && NETDATA_ADDED_TO_HAPROXY=1
 
     if [ -d /etc/logrotate.d -a ! -f /etc/logrotate.d/netdata ]
         then
@@ -1355,6 +1357,16 @@ if [ $? -eq 0 -a "${NETDATA_ADDED_TO_VARNISH}" = "1" ]
     echo "   gpasswd -d netdata varnish"
 fi
 
+getent group haproxy > /dev/null
+if [ $? -eq 0 -a "${NETDATA_ADDED_TO_HAPROXY}" = "1" ]
+    then
+    echo
+    echo "You may also want to remove the netdata user from the haproxy group"
+    echo "by running:"
+    echo "   gpasswd -d netdata haproxy"
+fi
+
+
 UNINSTALL
 chmod 750 netdata-uninstaller.sh
 
index 436bceeae776c7ce62f0c8480caf4a2c2b80b816..7706acd6b43c95d638682cd4c096a3ae2247059a 100644 (file)
@@ -14,6 +14,7 @@ dist_python_SCRIPTS = \
     cpufreq.chart.py \
     cpuidle.chart.py \
     dovecot.chart.py \
+    elasticsearch.chart.py \
     example.chart.py \
     exim.chart.py \
     fail2ban.chart.py \
diff --git a/python.d/elasticsearch.chart.py b/python.d/elasticsearch.chart.py
new file mode 100644 (file)
index 0000000..ff841f1
--- /dev/null
@@ -0,0 +1,402 @@
+# -*- coding: utf-8 -*-
+# Description: elastic search node stats netdata python.d module
+# Author: l2isbad
+
+from base import UrlService
+from requests import get
+from socket import gethostbyname
+try:
+        from queue import Queue
+except ImportError:
+        from Queue import Queue
+from threading import Thread
+
+# default module values (can be overridden per job in `config`)
+# update_every = 2
+update_every = 5
+priority = 60000
+retries = 60
+
+# charts order (can be overridden if you want less charts, or different order)
+ORDER = ['search_perf_total', 'search_perf_time', 'search_latency', 'index_perf_total', 'index_perf_time',
+         'index_latency', 'jvm_mem_heap', 'jvm_gc_count', 'jvm_gc_time', 'host_metrics_file_descriptors',
+         'host_metrics_http', 'host_metrics_transport', 'thread_pool_qr', 'fdata_cache', 'fdata_ev_tr',
+         'cluster_health_status', 'cluster_health_nodes', 'cluster_health_shards', 'cluster_stats_nodes',
+         'cluster_stats_query_cache', 'cluster_stats_docs', 'cluster_stats_store', 'cluster_stats_indices_shards']
+
+CHARTS = {
+    'search_perf_total': {
+        'options': [None, 'Number of queries, fetches', 'queries', 'Search performance', 'es.search_query', 'stacked'],
+        'lines': [
+            ['query_total', 'search_total', 'incremental'],
+            ['fetch_total', 'fetch_total', 'incremental'],
+            ['query_current', 'search_current', 'absolute'],
+            ['fetch_current', 'fetch_current', 'absolute']
+        ]},
+    'search_perf_time': {
+        'options': [None, 'Time spent on queries, fetches', 'seconds', 'Search performance', 'es.search_time', 'stacked'],
+        'lines': [
+            ['query_time_in_millis', 'query', 'incremental', 1, 1000],
+            ['fetch_time_in_millis', 'fetch', 'incremental', 1, 1000]
+        ]},
+    'search_latency': {
+        'options': [None, 'Query and fetch latency', 'ms', 'Search performance', 'es.search_latency', 'stacked'],
+        'lines': [
+            ['query_latency', 'query', 'absolute', 1, 1000],
+            ['fetch_latency', 'fetch', 'absolute', 1, 1000]
+        ]},
+    'index_perf_total': {
+        'options': [None, 'Number of documents indexed, index refreshes, flushes', 'documents/indexes',
+                    'Indexing performance', 'es.index_doc', 'stacked'],
+        'lines': [
+            ['indexing_index_total', 'indexed', 'incremental'],
+            ['refresh_total', 'refreshes', 'incremental'],
+            ['flush_total', 'flushes', 'incremental'],
+            ['indexing_index_current', 'indexed_current', 'absolute'],
+        ]},
+    'index_perf_time': {
+        'options': [None, 'Time spent on indexing, refreshing, flushing', 'seconds', 'Indexing performance',
+                    'es.search_time', 'stacked'],
+        'lines': [
+            ['indexing_index_time_in_millis', 'indexing', 'incremental', 1, 1000],
+            ['refresh_total_time_in_millis', 'refreshing', 'incremental', 1, 1000],
+            ['flush_total_time_in_millis', 'flushing', 'incremental', 1, 1000]
+        ]},
+    'index_latency': {
+        'options': [None, 'Indexing and flushing latency', 'ms', 'Indexing performance',
+                    'es.index_latency', 'stacked'],
+        'lines': [
+            ['indexing_latency', 'indexing', 'absolute', 1, 1000],
+            ['flushing_latency', 'flushing', 'absolute', 1, 1000]
+        ]},
+    'jvm_mem_heap': {
+        'options': [None, 'JVM heap currently in use/committed', 'percent/MB', 'Memory usage and gc',
+                    'es.jvm_heap', 'area'],
+        'lines': [
+            ['jvm_heap_percent', 'inuse', 'absolute'],
+            ['jvm_heap_commit', 'commit', 'absolute', -1, 1048576]
+        ]},
+    'jvm_gc_count': {
+        'options': [None, 'Count of garbage collections', 'counts', 'Memory usage and gc', 'es.gc_count', 'stacked'],
+        'lines': [
+            ['young_collection_count', 'young', 'incremental'],
+            ['old_collection_count', 'old', 'incremental']
+        ]},
+    'jvm_gc_time': {
+        'options': [None, 'Time spent on garbage collections', 'ms', 'Memory usage and gc', 'es.gc_time', 'stacked'],
+        'lines': [
+            ['young_collection_time_in_millis', 'young', 'incremental'],
+            ['old_collection_time_in_millis', 'old', 'incremental']
+        ]},
+    'thread_pool_qr': {
+        'options': [None, 'Number of queued/rejected threads in thread pool', 'threads', 'Queues and rejections',
+                    'es.qr', 'stacked'],
+        'lines': [
+            ['bulk_queue', 'bulk_queue', 'absolute'],
+            ['index_queue', 'index_queue', 'absolute'],
+            ['search_queue', 'search_queue', 'absolute'],
+            ['merge_queue', 'merge_queue', 'absolute'],
+            ['bulk_rejected', 'bulk_rej', 'absolute'],
+            ['index_rejected', 'index_rej', 'absolute'],
+            ['search_rejected', 'search_rej', 'absolute'],
+            ['merge_rejected', 'merge_rej', 'absolute']
+        ]},
+    'fdata_cache': {
+        'options': [None, 'Fielddata cache size', 'MB', 'Fielddata cache', 'es.fdata_cache', 'line'],
+        'lines': [
+            ['index_fdata_mem', 'mem_size', 'absolute', 1, 1048576]
+        ]},
+    'fdata_ev_tr': {
+        'options': [None, 'Fielddata evictions and circuit breaker tripped count', 'number of events',
+                    'Fielddata cache', 'es.fdata_ev_tr', 'line'],
+        'lines': [
+            ['index_fdata_evic', 'evictions', 'incremental'],
+            ['breakers_fdata_trip', 'tripped', 'incremental']
+        ]},
+    'cluster_health_nodes': {
+        'options': [None, 'Nodes and tasks statistics', 'units', 'Cluster health API',
+                    'es.cluster_health', 'stacked'],
+        'lines': [
+            ['health_number_of_nodes', 'nodes', 'absolute'],
+            ['health_number_of_data_nodes', 'data_nodes', 'absolute'],
+            ['health_number_of_pending_tasks', 'pending_tasks', 'absolute'],
+            ['health_number_of_in_flight_fetch', 'inflight_fetch', 'absolute']
+        ]},
+    'cluster_health_status': {
+        'options': [None, 'Cluster status', 'status', 'Cluster health API',
+                    'es.cluster_health_status', 'area'],
+        'lines': [
+            ['status_green', 'green', 'absolute'],
+            ['status_red', 'red', 'absolute'],
+            ['status_foo1', None, 'absolute'],
+            ['status_foo2', None, 'absolute'],
+            ['status_foo3', None, 'absolute'],
+            ['status_yellow', 'yellow', 'absolute']
+        ]},
+    'cluster_health_shards': {
+        'options': [None, 'Shards statistics', 'shards', 'Cluster health API',
+                    'es.cluster_health_sharts', 'stacked'],
+        'lines': [
+            ['health_active_shards', 'active_shards', 'absolute'],
+            ['health_relocating_shards', 'relocating_shards', 'absolute'],
+            ['health_unassigned_shards', 'unassigned', 'absolute'],
+            ['health_delayed_unassigned_shards', 'delayed_unassigned', 'absolute'],
+            ['health_initializing_shards', 'initializing', 'absolute'],
+            ['health_active_shards_percent_as_number', 'active_percent', 'absolute']
+        ]},
+    'cluster_stats_nodes': {
+        'options': [None, 'Nodes statistics', 'nodes', 'Cluster stats API',
+                    'es.cluster_stats_nodes', 'stacked'],
+        'lines': [
+            ['count_data_only', 'data_only', 'absolute'],
+            ['count_master_data', 'master_data', 'absolute'],
+            ['count_total', 'total', 'absolute'],
+            ['count_master_only', 'master_only', 'absolute'],
+            ['count_client', 'client', 'absolute']
+        ]},
+    'cluster_stats_query_cache': {
+        'options': [None, 'Query cache statistics', 'queries', 'Cluster stats API',
+                    'es.cluster_stats_query_cache', 'stacked'],
+        'lines': [
+            ['query_cache_hit_count', 'hit', 'incremental'],
+            ['query_cache_miss_count', 'miss', 'incremental']
+        ]},
+    'cluster_stats_docs': {
+        'options': [None, 'Docs statistics', 'count', 'Cluster stats API',
+                    'es.cluster_stats_docs', 'line'],
+        'lines': [
+            ['docs_count', 'docs', 'absolute']
+        ]},
+    'cluster_stats_store': {
+        'options': [None, 'Store statistics', 'MB', 'Cluster stats API',
+                    'es.cluster_stats_store', 'line'],
+        'lines': [
+            ['store_size_in_bytes', 'size', 'absolute', 1, 1048567]
+        ]},
+    'cluster_stats_indices_shards': {
+        'options': [None, 'Indices and shards statistics', 'count', 'Cluster stats API',
+                    'es.cluster_stats_ind_sha', 'stacked'],
+        'lines': [
+            ['indices_count', 'indices', 'absolute'],
+            ['shards_total', 'shards', 'absolute']
+        ]},
+    'host_metrics_transport': {
+        'options': [None, 'Cluster communication transport metrics', 'kbit/s', 'Host metrics',
+                    'es.host_metrics_transport', 'area'],
+        'lines': [
+            ['transport_rx_size_in_bytes', 'in', 'incremental', 8, 1000],
+            ['transport_tx_size_in_bytes', 'out', 'incremental', -8, 1000]
+        ]},
+    'host_metrics_file_descriptors': {
+        'options': [None, 'Available file descriptors in percent', 'percent', 'Host metrics',
+                    'es.host_metrics_descriptors', 'area'],
+        'lines': [
+            ['file_descriptors_used', 'used', 'absolute', 1, 10]
+        ]},
+    'host_metrics_http': {
+        'options': [None, 'Opened HTTP connections', 'connections', 'Host metrics',
+                    'es.host_metrics_http', 'line'],
+        'lines': [
+            ['http_current_open', 'opened', 'absolute', 1, 1]
+        ]}
+}
+
+
+class Service(UrlService):
+    def __init__(self, configuration=None, name=None):
+        UrlService.__init__(self, configuration=configuration, name=name)
+        self.order = ORDER
+        self.definitions = CHARTS
+        self.host = self.configuration.get('host')
+        self.port = self.configuration.get('port')
+        self.user = self.configuration.get('user')
+        self.password = self.configuration.get('pass')
+        self.latency = dict()
+
+    def check(self):
+        # We can't start if <host> AND <port> not specified
+        if not all([self.host, self.port]):
+            return False
+
+        # It as a bad idea to use hostname.
+        # Hostname -> ipaddress
+        try:
+            self.host = gethostbyname(self.host)
+        except Exception as e:
+            self.error(str(e))
+            return False
+
+        # HTTP Auth? NOT TESTED
+        self.auth = self.user and self.password
+
+        # Create URL for every Elasticsearch API
+        url_node_stats = 'http://%s:%s/_nodes/_local/stats' % (self.host, self.port)
+        url_cluster_health = 'http://%s:%s/_cluster/health' % (self.host, self.port)
+        url_cluster_stats = 'http://%s:%s/_cluster/stats' % (self.host, self.port)
+
+        # Create list of enabled API calls
+        user_choice = [bool(self.configuration.get('node_stats', True)),
+                       bool(self.configuration.get('cluster_health', True)),
+                       bool(self.configuration.get('cluster_stats', True))]
+        
+        avail_methods = [(self._get_node_stats, url_node_stats), 
+                        (self._get_cluster_health, url_cluster_health),
+                        (self._get_cluster_stats, url_cluster_stats)]
+
+        # Remove disabled API calls from 'avail methods'
+        self.methods = [avail_methods[_] for _ in range(len(avail_methods)) if user_choice[_]]
+
+        # Run _get_data for ALL active API calls. 
+        api_result = {}
+        for method in self.methods:
+            api_result[method[1]] = (bool(self._get_raw_data(method[1])))
+
+        # We can start ONLY if all active API calls returned NOT None
+        if not all(api_result.values()):
+            self.error('Plugin could not get data from all APIs')
+            self.error('%s' % api_result)
+            return False
+        else:
+            self.info('%s' % api_result)
+            self.info('Plugin was started successfully')
+
+            return True
+
+    def _get_raw_data(self, url):
+        try:
+            if not self.auth:
+                raw_data = get(url)
+            else:
+                raw_data = get(url, auth=(self.user, self.password))
+        except Exception:
+            return None
+
+        return raw_data
+
+    def _get_data(self):
+        threads = list()
+        queue = Queue()
+        result = dict()
+
+        for method in self.methods:
+            th = Thread(target=method[0], args=(queue, method[1]))
+            th.start()
+            threads.append(th)
+
+        for thread in threads:
+            thread.join()
+            result.update(queue.get())
+
+        return result or None
+
+    def _get_cluster_health(self, queue, url):
+        """
+        Format data received from http request
+        :return: dict
+        """
+
+        data = self._get_raw_data(url)
+
+        if not data:
+            queue.put({})
+        else:
+            data = data.json()
+
+            to_netdata = dict()
+            to_netdata.update(update_key('health', data))
+            to_netdata.update({'status_green': 0, 'status_red': 0, 'status_yellow': 0,
+                               'status_foo1': 0, 'status_foo2': 0, 'status_foo3': 0})
+            to_netdata[''.join(['status_', to_netdata.get('health_status', '')])] = 1
+
+            queue.put(to_netdata)
+
+    def _get_cluster_stats(self, queue, url):
+        """
+        Format data received from http request
+        :return: dict
+        """
+
+        data = self._get_raw_data(url)
+
+        if not data:
+            queue.put({})
+        else:
+            data = data.json()
+
+            to_netdata = dict()
+            to_netdata.update(update_key('count', data['nodes']['count']))
+            to_netdata.update(update_key('query_cache', data['indices']['query_cache']))
+            to_netdata.update(update_key('docs', data['indices']['docs']))
+            to_netdata.update(update_key('store', data['indices']['store']))
+            to_netdata['indices_count'] = data['indices']['count']
+            to_netdata['shards_total'] = data['indices']['shards']['total']
+
+            queue.put(to_netdata)
+
+    def _get_node_stats(self, queue, url):
+        """
+        Format data received from http request
+        :return: dict
+        """
+
+        data = self._get_raw_data(url)
+
+        if not data:
+            queue.put({})
+        else:
+            data = data.json()
+            node = list(data['nodes'].keys())[0]
+            to_netdata = dict()
+            # Search performance metrics
+            to_netdata.update(data['nodes'][node]['indices']['search'])
+            to_netdata['query_latency'] = self.find_avg(to_netdata['query_total'],
+                                               to_netdata['query_time_in_millis'], 'query_latency')
+            to_netdata['fetch_latency'] = self.find_avg(to_netdata['fetch_total'],
+                                               to_netdata['fetch_time_in_millis'], 'fetch_latency')
+
+            # Indexing performance metrics
+            for key in ['indexing', 'refresh', 'flush']:
+                to_netdata.update(update_key(key, data['nodes'][node]['indices'].get(key, {})))
+            to_netdata['indexing_latency'] = self.find_avg(to_netdata['indexing_index_total'],
+                                               to_netdata['indexing_index_time_in_millis'], 'index_latency')
+            to_netdata['flushing_latency'] = self.find_avg(to_netdata['flush_total'],
+                                               to_netdata['flush_total_time_in_millis'], 'flush_latency')
+            # Memory usage and garbage collection
+            to_netdata.update(update_key('young', data['nodes'][node]['jvm']['gc']['collectors']['young']))
+            to_netdata.update(update_key('old', data['nodes'][node]['jvm']['gc']['collectors']['old']))
+            to_netdata['jvm_heap_percent'] = data['nodes'][node]['jvm']['mem']['heap_used_percent']
+            to_netdata['jvm_heap_commit'] = data['nodes'][node]['jvm']['mem']['heap_committed_in_bytes']
+
+            # Thread pool queues and rejections
+            for key in ['bulk', 'index', 'search', 'merge']:
+                to_netdata.update(update_key(key, data['nodes'][node]['thread_pool'].get(key, {})))
+
+            # Fielddata cache
+            to_netdata['index_fdata_mem'] = data['nodes'][node]['indices']['fielddata']['memory_size_in_bytes']
+            to_netdata['index_fdata_evic'] = data['nodes'][node]['indices']['fielddata']['evictions']
+            to_netdata['breakers_fdata_trip'] = data['nodes'][node]['breakers']['fielddata']['tripped']
+
+            # Host metrics
+            to_netdata.update(update_key('http', data['nodes'][node]['http']))
+            to_netdata.update(update_key('transport', data['nodes'][node]['transport']))
+            to_netdata['file_descriptors_used'] = round(float(data['nodes'][node]['process']['open_file_descriptors'])
+                                                        / data['nodes'][node]['process']['max_file_descriptors'] * 1000)
+            
+            queue.put(to_netdata)
+
+    def find_avg(self, value1, value2, key):
+        if key not in self.latency:
+            self.latency.update({key: [value1, value2]})
+            return 0
+        else:
+            if not self.latency[key][0] == value1:
+                latency = round(float(value2 - self.latency[key][1]) / float(value1 - self.latency[key][0]) * 1000)
+                self.latency.update({key: [value1, value2]})
+                return latency
+            else:
+                self.latency.update({key: [value1, value2]})
+                return 0
+
+
+def update_key(string, dictionary):
+    return {'_'.join([string, k]): v for k, v in dictionary.items()}
index 335127fd4d8956760444f50a4222fe3e27e4faf3..2ac280f0eb3edf6ea7c444dfa02ccc4412196e42 100644 (file)
@@ -101,8 +101,8 @@ class Service(SimpleService):
         :return: str
         """
         try:
-            process_echo = Popen(self.sub_echo,  stdout=PIPE, shell=False)
-            process_rad = Popen(self.sub_radclient, stdin=process_echo.stdout, stdout=PIPE,  shell=False)
+            process_echo = Popen(self.sub_echo, stdout=PIPE, stderr=PIPE, shell=False)
+            process_rad = Popen(self.sub_radclient, stdin=process_echo.stdout, stdout=PIPE, stderr=PIPE, shell=False)
             process_echo.stdout.close()
             raw_result = process_rad.communicate()[0]
         except Exception:
index 9d2c780e973eda08a33c9cc271752e153a5704d2..9b37c93d5cae330daf4830a8f55e7ea5066898c8 100644 (file)
@@ -60,9 +60,11 @@ CHARTS = {'backend_health':
                        ['s0.g_bytes', 'allocated', 'absolute', -1, 1048576]],
               'options': [None, 'Memory usage', 'megabytes', 'Memory usage', 'varnish.memory_usage', 'stacked']},
           'session': 
-             {'lines': [['sess_conn', 'conn', 'incremental', 1, 1],
-                       ['client_req', 'requests', 'incremental', 1, 1],
-                       ['sess_dropped', 'dropped', 'incremental', 1, 1]],
+             {'lines': [['sess_conn', 'sess_conn', 'incremental', 1, 1],
+                       ['client_req', 'client_requests', 'incremental', 1, 1],
+                       ['client_conn', 'client_conn', 'incremental', 1, 1],
+                       ['client_drop', 'client_drop', 'incremental', 1, 1],
+                       ['sess_dropped', 'sess_dropped', 'incremental', 1, 1]],
               'options': [None, 'Sessions', 'units', 'Client metrics', 'varnish.session', 'line']},
           'threads': 
              {'lines': [['threads', None, 'absolute', 1, 1],
@@ -88,7 +90,7 @@ class Service(SimpleService):
                          if is_executable(''.join([directory, 'varnishstat']), X_OK)][0]
         except IndexError:
             self.varnish = False
-        self.rgx_all = compile(r'([A-Z]+\.)([\d\w_.]+)\s+(\d+)')
+        self.rgx_all = compile(r'([A-Z]+\.)?([\d\w_.]+)\s+(\d+)')
         # Could be
         # VBE.boot.super_backend.pipe_hdrbyte (new)
         # or
@@ -114,7 +116,7 @@ class Service(SimpleService):
         # 2. Output is parsable (list is not empty after regex findall)
         is_parsable = self.rgx_all.findall(reply)
         if not is_parsable:
-            self.error('Cant parse output (only varnish version 4+ supported)')
+            self.error('Cant parse output...')
             return False
 
         # We need to find the right regex for backend parse
@@ -123,7 +125,7 @@ class Service(SimpleService):
             self.rgx_bck = self.rgx_bck[0]
         else:
             self.backend_list = self.rgx_bck[1].findall(reply)[::2]
-            self.rgx_bck = self.rgx_back[1]
+            self.rgx_bck = self.rgx_bck[1]
 
         # We are about to start!
         self.create_charts()
@@ -187,7 +189,8 @@ class Service(SimpleService):
         # 3.3 Problems summary chart
         for elem in ['backend_busy', 'backend_unhealthy', 'esi_errors', 'esi_warnings', 'losthdr', 'sess_drop',
                      'sess_fail', 'sess_pipe_overflow', 'threads_destroyed', 'threads_failed', 'threads_limited', 'thread_queue_len']:
-            to_netdata[''.join([elem, '_b'])] = to_netdata.get(elem, 0)
+            if to_netdata.get(elem) is not None:
+                to_netdata[''.join([elem, '_b'])] = to_netdata.get(elem)
 
         # Ready steady go!
         return to_netdata
@@ -207,7 +210,8 @@ class Service(SimpleService):
         #self.order.extend(extra_charts)
 
         # Create static charts
-        self.definitions = {chart: values for chart, values in CHARTS.items() if chart in self.order}
+        #self.definitions = {chart: values for chart, values in CHARTS.items() if chart in self.order}
+        self.definitions = CHARTS
  
         # Create dynamic backend charts
         if self.backend_list:
index 7edaf4f020b796b2db3a503a08428a2ed68c617c..9400089dbe166fde2489d7492ef68c6d65483644 100644 (file)
@@ -163,6 +163,7 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
     int numdevs;
     static void *devstat_data = NULL;
     struct devstat *dstat;
+    char disk[DEVSTAT_NAME_LEN + 10 + 1]; // 10 - maximum number of digits for int
     struct cur_dstat {
         collected_number duration_read_ms;
         collected_number duration_write_ms;
@@ -609,34 +610,35 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
                 error("DISABLED: disk.io");
             } else {
                 dstat = devstat_data + sizeof(long); // skip generation number
-                collected_number total_disk_reads = 0;
-                collected_number total_disk_writes = 0;
+                collected_number total_disk_kbytes_read = 0;
+                collected_number total_disk_kbytes_write = 0;
 
                 for (i = 0; i < numdevs; i++) {
                     if (((dstat[i].device_type & DEVSTAT_TYPE_MASK) == DEVSTAT_TYPE_DIRECT) || ((dstat[i].device_type & DEVSTAT_TYPE_MASK) == DEVSTAT_TYPE_STORARRAY)) {
+                        sprintf(disk, "%s%d", dstat[i].device_name, dstat[i].unit_number);
 
                         // --------------------------------------------------------------------
 
-                        st = rrdset_find_bytype(RRD_TYPE_DISK, dstat[i].device_name);
+                        st = rrdset_find_bytype(RRD_TYPE_DISK, disk);
                         if (unlikely(!st)) {
-                            st = rrdset_create(RRD_TYPE_DISK, dstat[i].device_name, NULL, dstat[i].device_name, "disk.io", "Disk I/O Bandwidth", "kilobytes/s", 2000, update_every, RRDSET_TYPE_AREA);
+                            st = rrdset_create(RRD_TYPE_DISK, disk, NULL, disk, "disk.io", "Disk I/O Bandwidth", "kilobytes/s", 2000, update_every, RRDSET_TYPE_AREA);
 
                             rrddim_add(st, "reads", NULL, 1, 1024, RRDDIM_INCREMENTAL);
                             rrddim_add(st, "writes", NULL, -1, 1024, RRDDIM_INCREMENTAL);
                         }
                         else rrdset_next(st);
 
-                        total_disk_reads += dstat[i].bytes[DEVSTAT_READ];
-                        total_disk_writes += dstat[i].bytes[DEVSTAT_WRITE];
+                        total_disk_kbytes_read += dstat[i].bytes[DEVSTAT_READ]/KILO_FACTOR;
+                        total_disk_kbytes_write += dstat[i].bytes[DEVSTAT_WRITE]/KILO_FACTOR;
                         prev_dstat.bytes_read = rrddim_set(st, "reads", dstat[i].bytes[DEVSTAT_READ]);
                         prev_dstat.bytes_write = rrddim_set(st, "writes", dstat[i].bytes[DEVSTAT_WRITE]);
                         rrdset_done(st);
 
                         // --------------------------------------------------------------------
 
-                        st = rrdset_find_bytype("disk_ops", dstat[i].device_name);
+                        st = rrdset_find_bytype("disk_ops", disk);
                         if (unlikely(!st)) {
-                            st = rrdset_create("disk_ops", dstat[i].device_name, NULL, dstat[i].device_name, "disk.ops", "Disk Completed I/O Operations", "operations/s", 2001, update_every, RRDSET_TYPE_LINE);
+                            st = rrdset_create("disk_ops", disk, NULL, disk, "disk.ops", "Disk Completed I/O Operations", "operations/s", 2001, update_every, RRDSET_TYPE_LINE);
                             st->isdetail = 1;
 
                             rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
@@ -650,9 +652,9 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
 
                         // --------------------------------------------------------------------
 
-                        st = rrdset_find_bytype("disk_qops", dstat[i].device_name);
+                        st = rrdset_find_bytype("disk_qops", disk);
                         if (unlikely(!st)) {
-                            st = rrdset_create("disk_qops", dstat[i].device_name, NULL, dstat[i].device_name, "disk.qops", "Disk Current I/O Operations", "operations", 2002, update_every, RRDSET_TYPE_LINE);
+                            st = rrdset_create("disk_qops", disk, NULL, disk, "disk.qops", "Disk Current I/O Operations", "operations", 2002, update_every, RRDSET_TYPE_LINE);
                             st->isdetail = 1;
 
                             rrddim_add(st, "operations", NULL, 1, 1, RRDDIM_ABSOLUTE);
@@ -664,9 +666,9 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
 
                         // --------------------------------------------------------------------
 
-                        st = rrdset_find_bytype("disk_util", dstat[i].device_name);
+                        st = rrdset_find_bytype("disk_util", disk);
                         if (unlikely(!st)) {
-                            st = rrdset_create("disk_util", dstat[i].device_name, NULL, dstat[i].device_name, "disk.util", "Disk Utilization Time", "% of time working", 2004, update_every, RRDSET_TYPE_AREA);
+                            st = rrdset_create("disk_util", disk, NULL, disk, "disk.util", "Disk Utilization Time", "% of time working", 2004, update_every, RRDSET_TYPE_AREA);
                             st->isdetail = 1;
 
                             rrddim_add(st, "utilization", NULL, 1, 10, RRDDIM_INCREMENTAL);
@@ -679,9 +681,9 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
 
                         // --------------------------------------------------------------------
 
-                        st = rrdset_find_bytype("disk_iotime", dstat[i].device_name);
+                        st = rrdset_find_bytype("disk_iotime", disk);
                         if (unlikely(!st)) {
-                            st = rrdset_create("disk_iotime", dstat[i].device_name, NULL, dstat[i].device_name, "disk.iotime", "Disk Total I/O Time", "milliseconds/s", 2022, update_every, RRDSET_TYPE_LINE);
+                            st = rrdset_create("disk_iotime", disk, NULL, disk, "disk.iotime", "Disk Total I/O Time", "milliseconds/s", 2022, update_every, RRDSET_TYPE_LINE);
                             st->isdetail = 1;
 
                             rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
@@ -703,9 +705,9 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
 
                             // --------------------------------------------------------------------
 
-                            st = rrdset_find_bytype("disk_await", dstat[i].device_name);
+                            st = rrdset_find_bytype("disk_await", disk);
                             if (unlikely(!st)) {
-                                st = rrdset_create("disk_await", dstat[i].device_name, NULL, dstat[i].device_name, "disk.await", "Average Completed I/O Operation Time", "ms per operation", 2005, update_every, RRDSET_TYPE_LINE);
+                                st = rrdset_create("disk_await", disk, NULL, disk, "disk.await", "Average Completed I/O Operation Time", "ms per operation", 2005, update_every, RRDSET_TYPE_LINE);
                                 st->isdetail = 1;
 
                                 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_ABSOLUTE);
@@ -721,9 +723,9 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
 
                             // --------------------------------------------------------------------
 
-                            st = rrdset_find_bytype("disk_avgsz", dstat[i].device_name);
+                            st = rrdset_find_bytype("disk_avgsz", disk);
                             if (unlikely(!st)) {
-                                st = rrdset_create("disk_avgsz", dstat[i].device_name, NULL, dstat[i].device_name, "disk.avgsz", "Average Completed I/O Operation Bandwidth", "kilobytes per operation", 2006, update_every, RRDSET_TYPE_AREA);
+                                st = rrdset_create("disk_avgsz", disk, NULL, disk, "disk.avgsz", "Average Completed I/O Operation Bandwidth", "kilobytes per operation", 2006, update_every, RRDSET_TYPE_AREA);
                                 st->isdetail = 1;
 
                                 rrddim_add(st, "reads", NULL, 1, 1024, RRDDIM_ABSOLUTE);
@@ -739,9 +741,9 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
 
                             // --------------------------------------------------------------------
 
-                            st = rrdset_find_bytype("disk_svctm", dstat[i].device_name);
+                            st = rrdset_find_bytype("disk_svctm", disk);
                             if (unlikely(!st)) {
-                                st = rrdset_create("disk_svctm", dstat[i].device_name, NULL, dstat[i].device_name, "disk.svctm", "Average Service Time", "ms per operation", 2007, update_every, RRDSET_TYPE_LINE);
+                                st = rrdset_create("disk_svctm", disk, NULL, disk, "disk.svctm", "Average Service Time", "ms per operation", 2007, update_every, RRDSET_TYPE_LINE);
                                 st->isdetail = 1;
 
                                 rrddim_add(st, "svctm", NULL, 1, 1, RRDDIM_ABSOLUTE);
@@ -753,21 +755,21 @@ int do_freebsd_sysctl(int update_every, usec_t dt) {
                             rrdset_done(st);
                         }
                     }
+                }
 
-                    // --------------------------------------------------------------------
-
-                    st = rrdset_find_bytype("system", "io");
-                    if (unlikely(!st)) {
-                        st = rrdset_create("system", "io", NULL, "disk", NULL, "Disk I/O", "kilobytes/s", 150, update_every, RRDSET_TYPE_AREA);
-                        rrddim_add(st, "in",  NULL,  1, 1024, RRDDIM_INCREMENTAL);
-                        rrddim_add(st, "out", NULL, -1, 1024, RRDDIM_INCREMENTAL);
-                    }
-                    else rrdset_next(st);
+                // --------------------------------------------------------------------
 
-                    rrddim_set(st, "in", total_disk_reads);
-                    rrddim_set(st, "out", total_disk_writes);
-                    rrdset_done(st);
+                st = rrdset_find_bytype("system", "io");
+                if (unlikely(!st)) {
+                    st = rrdset_create("system", "io", NULL, "disk", NULL, "Disk I/O", "kilobytes/s", 150, update_every, RRDSET_TYPE_AREA);
+                    rrddim_add(st, "in",  NULL,  1, 1, RRDDIM_INCREMENTAL);
+                    rrddim_add(st, "out", NULL, -1, 1, RRDDIM_INCREMENTAL);
                 }
+                else rrdset_next(st);
+
+                rrddim_set(st, "in", total_disk_kbytes_read);
+                rrddim_set(st, "out", total_disk_kbytes_write);
+                rrdset_done(st);
             }
         }
     }