1 # -*- coding: utf-8 -*-
2 # Description: mongodb netdata python.d module
5 from base import SimpleService
6 from copy import deepcopy
7 from datetime import datetime
8 from sys import exc_info
10 from pymongo import MongoClient, ASCENDING, DESCENDING
11 from pymongo.errors import PyMongoError
16 # default module values (can be overridden per job in `config`)
34 # charts order (can be overridden if you want less charts, or different order)
35 ORDER = ['read_operations', 'write_operations', 'active_clients', 'journaling_transactions',
36 'journaling_volume', 'background_flush_average', 'background_flush_last', 'background_flush_rate',
37 'wiredtiger_read', 'wiredtiger_write', 'cursors', 'connections', 'memory', 'page_faults',
38 'queued_requests', 'record_moves', 'wiredtiger_cache', 'wiredtiger_pages_evicted', 'asserts',
39 'dbstats_objects', 'tcmalloc_generic', 'tcmalloc_metrics', 'command_total_rate', 'command_failed_rate']
43 'options': [None, 'Received read requests', 'requests/s', 'throughput metrics',
44 'mongodb.read_operations', 'line'],
46 ['readWriteOper_query', 'query', 'incremental'],
47 ['readWriteOper_getmore', 'getmore', 'incremental']
50 'options': [None, 'Received write requests', 'requests/s', 'throughput metrics',
51 'mongodb.write_operations', 'line'],
53 ['readWriteOper_insert', 'insert', 'incremental'],
54 ['readWriteOper_update', 'update', 'incremental'],
55 ['readWriteOper_delete', 'delete', 'incremental']
58 'options': [None, 'Clients with read or write operations in progress or queued', 'clients',
59 'throughput metrics', 'mongodb.active_clients', 'line'],
61 ['activeClients_readers', 'readers', 'absolute'],
62 ['activeClients_writers', 'writers', 'absolute']
64 'journaling_transactions': {
65 'options': [None, 'Transactions that have been written to the journal', 'commits',
66 'database performance', 'mongodb.journaling_transactions', 'line'],
68 ['journalTrans_commits', 'commits', 'absolute']
70 'journaling_volume': {
71 'options': [None, 'Volume of data written to the journal', 'MB', 'database performance',
72 'mongodb.journaling_volume', 'line'],
74 ['journalTrans_journaled', 'volume', 'absolute', 1, 100]
76 'background_flush_average': {
77 'options': [None, 'Average time taken by flushes to execute', 'ms', 'database performance',
78 'mongodb.background_flush_average', 'line'],
80 ['background_flush_average', 'time', 'absolute', 1, 100]
82 'background_flush_last': {
83 'options': [None, 'Time taken by the last flush operation to execute', 'ms', 'database performance',
84 'mongodb.background_flush_last', 'line'],
86 ['background_flush_last', 'time', 'absolute', 1, 100]
88 'background_flush_rate': {
89 'options': [None, 'Flushes rate', 'flushes', 'database performance', 'mongodb.background_flush_rate', 'line'],
91 ['background_flush_rate', 'flushes', 'incremental', 1, 1]
94 'options': [None, 'Read tickets in use and remaining', 'tickets', 'database performance',
95 'mongodb.wiredtiger_read', 'stacked'],
97 ['wiredTigerRead_available', 'available', 'absolute', 1, 1],
98 ['wiredTigerRead_out', 'inuse', 'absolute', 1, 1]
100 'wiredtiger_write': {
101 'options': [None, 'Write tickets in use and remaining', 'tickets', 'database performance',
102 'mongodb.wiredtiger_write', 'stacked'],
104 ['wiredTigerWrite_available', 'available', 'absolute', 1, 1],
105 ['wiredTigerWrite_out', 'inuse', 'absolute', 1, 1]
108 'options': [None, 'Currently openned cursors, cursors with timeout disabled and timed out cursors',
109 'cursors', 'database performance', 'mongodb.cursors', 'stacked'],
111 ['cursor_total', 'openned', 'absolute', 1, 1],
112 ['cursor_noTimeout', 'notimeout', 'absolute', 1, 1],
113 ['cursor_timedOut', 'timedout', 'incremental', 1, 1]
116 'options': [None, 'Currently connected clients and unused connections', 'connections',
117 'resource utilization', 'mongodb.connections', 'stacked'],
119 ['connections_available', 'unused', 'absolute', 1, 1],
120 ['connections_current', 'connected', 'absolute', 1, 1]
123 'options': [None, 'Memory metrics', 'MB', 'resource utilization', 'mongodb.memory', 'stacked'],
125 ['memory_virtual', 'virtual', 'absolute', 1, 1],
126 ['memory_resident', 'resident', 'absolute', 1, 1],
127 ['memory_nonmapped', 'nonmapped', 'absolute', 1, 1],
128 ['memory_mapped', 'mapped', 'absolute', 1, 1]
131 'options': [None, 'Number of times MongoDB had to fetch data from disk', 'request/s',
132 'resource utilization', 'mongodb.page_faults', 'line'],
134 ['page_faults', 'page_faults', 'incremental', 1, 1]
137 'options': [None, 'Currently queued read and wrire requests', 'requests', 'resource saturation',
138 'mongodb.queued_requests', 'line'],
140 ['currentQueue_readers', 'readers', 'absolute', 1, 1],
141 ['currentQueue_writers', 'writers', 'absolute', 1, 1]
144 'options': [None, 'Number of times documents had to be moved on-disk', 'number',
145 'resource saturation', 'mongodb.record_moves', 'line'],
147 ['record_moves', 'moves', 'incremental', 1, 1]
150 'options': [None, 'Number of message, warning, regular, corresponding to errors generated'
151 ' by users assertions raised', 'number', 'errors (asserts)', 'mongodb.asserts', 'line'],
153 ['errors_msg', 'msg', 'incremental', 1, 1],
154 ['errors_warning', 'warning', 'incremental', 1, 1],
155 ['errors_regular', 'regular', 'incremental', 1, 1],
156 ['errors_user', 'user', 'incremental', 1, 1]
158 'wiredtiger_cache': {
159 'options': [None, 'The percentage of the wiredTiger cache that is in use and cache with dirty bytes',
160 'percent', 'resource utilization', 'mongodb.wiredtiger_cache', 'stacked'],
162 ['wiredTiger_bytes_in_cache', 'inuse', 'absolute', 1, 1000],
163 ['wiredTiger_dirty_in_cache', 'dirty', 'absolute', 1, 1000]
165 'wiredtiger_pages_evicted': {
166 'options': [None, 'Pages evicted from the cache',
167 'pages', 'resource utilization', 'mongodb.wiredtiger_pages_evicted', 'stacked'],
169 ['wiredTiger_unmodified_pages_evicted', 'unmodified', 'absolute', 1, 1],
170 ['wiredTiger_modified_pages_evicted', 'modified', 'absolute', 1, 1]
173 'options': [None, 'Number of documents in the database among all the collections', 'documents',
174 'storage size metrics', 'mongodb.dbstats_objects', 'stacked'],
177 'tcmalloc_generic': {
178 'options': [None, 'Tcmalloc generic metrics', 'MB', 'tcmalloc', 'mongodb.tcmalloc_generic', 'stacked'],
180 ['current_allocated_bytes', 'allocated', 'absolute', 1, 1048576],
181 ['heap_size', 'heap_size', 'absolute', 1, 1048576]
183 'tcmalloc_metrics': {
184 'options': [None, 'Tcmalloc metrics', 'KB', 'tcmalloc', 'mongodb.tcmalloc_metrics', 'stacked'],
186 ['central_cache_free_bytes', 'central_cache_free', 'absolute', 1, 1024],
187 ['current_total_thread_cache_bytes', 'current_total_thread_cache', 'absolute', 1, 1024],
188 ['pageheap_free_bytes', 'pageheap_free', 'absolute', 1, 1024],
189 ['pageheap_unmapped_bytes', 'pageheap_unmapped', 'absolute', 1, 1024],
190 ['thread_cache_free_bytes', 'thread_cache_free', 'absolute', 1, 1024],
191 ['transfer_cache_free_bytes', 'transfer_cache_free', 'absolute', 1, 1024]
193 'command_total_rate': {
194 'options': [None, 'Commands total rate', 'commands/s', 'commands', 'mongodb.command_total_rate', 'stacked'],
196 ['count_total', 'count', 'incremental', 1, 1],
197 ['createIndexes_total', 'createIndexes', 'incremental', 1, 1],
198 ['delete_total', 'delete', 'incremental', 1, 1],
199 ['eval_total', 'eval', 'incremental', 1, 1],
200 ['findAndModify_total', 'findAndModify', 'incremental', 1, 1],
201 ['insert_total', 'insert', 'incremental', 1, 1],
202 ['update_total', 'update', 'incremental', 1, 1]
204 'command_failed_rate': {
205 'options': [None, 'Commands failed rate', 'commands/s', 'commands', 'mongodb.command_failed_rate', 'stacked'],
207 ['count_failed', 'count', 'incremental', 1, 1],
208 ['createIndexes_failed', 'createIndexes', 'incremental', 1, 1],
209 ['delete_dailed', 'delete', 'incremental', 1, 1],
210 ['eval_failed', 'eval', 'incremental', 1, 1],
211 ['findAndModify_failed', 'findAndModify', 'incremental', 1, 1],
212 ['insert_failed', 'insert', 'incremental', 1, 1],
213 ['update_failed', 'update', 'incremental', 1, 1]
218 class Service(SimpleService):
219 def __init__(self, configuration=None, name=None):
220 SimpleService.__init__(self, configuration=configuration, name=name)
221 self.user = self.configuration.get('user')
222 self.password = self.configuration.get('pass')
223 self.host = self.configuration.get('host', '127.0.0.1')
224 self.port = self.configuration.get('port', 27017)
225 self.timeout = self.configuration.get('timeout', 100)
229 self.error('Pymongo module is needed to use mongodb.chart.py')
231 self.connection, server_status, error = self._create_connection()
237 self.databases = self.connection.database_names()
238 except PyMongoError as error:
239 self.databases = list()
240 self.info('Can\'t collect databases: %s' % str(error))
243 for elem in ['dur', 'backgroundFlushing', 'wiredTiger', 'tcmalloc', 'repl']:
244 self.ss[elem] = in_server_status(elem, server_status)
245 for elem in ['commands', 'cursor']:
246 self.ss[elem] = in_server_status(elem, server_status['metrics'])
250 except (LookupError, SyntaxError, AttributeError):
251 self.error('Type: %s, error: %s' % (str(exc_info()[0]), str( exc_info()[1])))
254 self.create_charts_(server_status)
257 def create_charts_(self, server_status):
259 self.order = ORDER[:]
260 self.definitions = deepcopy(CHARTS)
262 if not self.ss['dur']:
263 self.order.remove('journaling_transactions')
264 self.order.remove('journaling_volume')
266 if not self.ss['backgroundFlushing']:
267 self.order.remove('background_flush_average')
268 self.order.remove('background_flush_last')
269 self.order.remove('background_flush_rate')
271 if not self.ss['cursor']:
272 self.order.remove('cursors')
274 if not self.ss['wiredTiger']:
275 self.order.remove('wiredtiger_write')
276 self.order.remove('wiredtiger_read')
277 self.order.remove('wiredtiger_cache')
279 if not self.ss['tcmalloc']:
280 self.order.remove('tcmalloc_generic')
281 self.order.remove('tcmalloc_metrics')
283 if not self.ss['commands']:
284 self.order.remove('command_total_rate')
285 self.order.remove('command_failed_rate')
287 for dbase in self.databases:
288 self.order.append('_'.join([dbase, 'dbstats']))
289 self.definitions['_'.join([dbase, 'dbstats'])] = {
290 'options': [None, '%s: size of all documents, indexes, extents' % dbase, 'KB',
291 'storage size metrics', 'mongodb.dbstats', 'line'],
293 ['_'.join([dbase, 'dataSize']), 'documents', 'absolute', 1, 1024],
294 ['_'.join([dbase, 'indexSize']), 'indexes', 'absolute', 1, 1024],
295 ['_'.join([dbase, 'storageSize']), 'extents', 'absolute', 1, 1024]
297 self.definitions['dbstats_objects']['lines'].append(['_'.join([dbase, 'objects']), dbase, 'absolute'])
300 def create_lines(hosts, string):
303 dim_id = '_'.join([host, string])
304 lines.append([dim_id, host, 'absolute', 1, 1000])
307 def create_state_lines(states):
309 for state, description in states:
310 dim_id = '_'.join([host, 'state', state])
311 lines.append([dim_id, description, 'absolute', 1, 1])
314 all_hosts = server_status['repl']['hosts']
315 this_host = server_status['repl']['me']
316 other_hosts = [host for host in all_hosts if host != this_host]
318 if 'local' in self.databases:
319 self.order.append('oplog_window')
320 self.definitions['oplog_window'] = {
321 'options': [None, 'Interval of time between the oldest and the latest entries in the oplog',
322 'seconds', 'replication', 'mongodb.oplog_window', 'line'],
323 'lines': [['timeDiff', 'window', 'absolute', 1, 1000]]}
324 # Create "heartbeat delay" chart
325 self.order.append('heartbeat_delay')
326 self.definitions['heartbeat_delay'] = {
327 'options': [None, 'Latency between this node and replica set members (lastHeartbeatRecv)',
328 'seconds', 'replication', 'mongodb.replication_heartbeat_delay', 'stacked'],
329 'lines': create_lines(other_hosts, 'heartbeat_lag')}
330 # Create "optimedate delay" chart
331 self.order.append('optimedate_delay')
332 self.definitions['optimedate_delay'] = {
333 'options': [None, '"optimeDate"(time when last entry from the oplog was applied)'
334 ' diff between all nodes',
335 'seconds', 'replication', 'mongodb.replication_optimedate_delay', 'stacked'],
336 'lines': create_lines(all_hosts, 'optimedate')}
337 # Create "replica set members state" chart
338 for host in all_hosts:
339 chart_name = '_'.join([host, 'state'])
340 self.order.append(chart_name)
341 self.definitions[chart_name] = {
342 'options': [None, '%s state' % host, 'state',
343 'replication', 'mongodb.replication_state', 'line'],
344 'lines': create_state_lines(REPLSET_STATES)}
346 def _get_raw_data(self):
349 raw_data.update(self.get_serverstatus_() or dict())
350 raw_data.update(self.get_dbstats_() or dict())
351 raw_data.update(self.get_replsetgetstatus_() or dict())
352 raw_data.update(self.get_getreplicationinfo_() or dict())
354 return raw_data or None
356 def get_serverstatus_(self):
359 raw_data['serverStatus'] = self.connection.admin.command('serverStatus')
365 def get_dbstats_(self):
366 if not self.databases:
370 raw_data['dbStats'] = dict()
372 for dbase in self.databases:
373 raw_data['dbStats'][dbase] = self.connection[dbase].command('dbStats')
379 def get_replsetgetstatus_(self):
380 if not self.ss['repl']:
385 raw_data['replSetGetStatus'] = self.connection.admin.command('replSetGetStatus')
391 def get_getreplicationinfo_(self):
392 if not (self.ss['repl'] and 'local' in self.databases):
396 raw_data['getReplicationInfo'] = dict()
398 raw_data['getReplicationInfo']['ASCENDING'] = self.connection.local.oplog.rs.find().sort(
399 "$natural", ASCENDING).limit(1)[0]
400 raw_data['getReplicationInfo']['DESCENDING'] = self.connection.local.oplog.rs.find().sort(
401 "$natural", DESCENDING).limit(1)[0]
411 raw_data = self._get_raw_data()
417 serverStatus = raw_data['serverStatus']
418 dbStats = raw_data.get('dbStats')
419 replSetGetStatus = raw_data.get('replSetGetStatus')
420 getReplicationInfo = raw_data.get('getReplicationInfo')
421 utc_now = datetime.utcnow()
424 to_netdata.update(update_dict_key(serverStatus['opcounters'], 'readWriteOper'))
425 to_netdata.update(update_dict_key(serverStatus['globalLock']['activeClients'], 'activeClients'))
426 to_netdata.update(update_dict_key(serverStatus['connections'], 'connections'))
427 to_netdata.update(update_dict_key(serverStatus['mem'], 'memory'))
428 to_netdata['memory_nonmapped'] = (serverStatus['mem']['virtual']
429 - serverStatus['mem'].get('mappedWithJournal', serverStatus['mem']['mapped']))
430 to_netdata.update(update_dict_key(serverStatus['globalLock']['currentQueue'], 'currentQueue'))
431 to_netdata.update(update_dict_key(serverStatus['asserts'], 'errors'))
432 to_netdata['page_faults'] = serverStatus['extra_info']['page_faults']
433 to_netdata['record_moves'] = serverStatus['metrics']['record']['moves']
436 to_netdata['journalTrans_commits'] = serverStatus['dur']['commits']
437 to_netdata['journalTrans_journaled'] = int(serverStatus['dur']['journaledMB'] * 100)
439 if self.ss['backgroundFlushing']:
440 to_netdata['background_flush_average'] = int(serverStatus['backgroundFlushing']['average_ms'] * 100)
441 to_netdata['background_flush_last'] = int(serverStatus['backgroundFlushing']['last_ms'] * 100)
442 to_netdata['background_flush_rate'] = serverStatus['backgroundFlushing']['flushes']
444 if self.ss['cursor']:
445 to_netdata['cursor_timedOut'] = serverStatus['metrics']['cursor']['timedOut']
446 to_netdata.update(update_dict_key(serverStatus['metrics']['cursor']['open'], 'cursor'))
448 if self.ss['wiredTiger']:
449 wired_tiger = serverStatus['wiredTiger']
450 to_netdata.update(update_dict_key(serverStatus['wiredTiger']['concurrentTransactions']['read'],
452 to_netdata.update(update_dict_key(serverStatus['wiredTiger']['concurrentTransactions']['write'],
454 to_netdata['wiredTiger_bytes_in_cache'] = (int(wired_tiger['cache']['bytes currently in the cache']
455 * 100 / wired_tiger['cache']['maximum bytes configured']
457 to_netdata['wiredTiger_dirty_in_cache'] = (int(wired_tiger['cache']['tracked dirty bytes in the cache']
458 * 100 / wired_tiger['cache']['maximum bytes configured']
460 to_netdata['wiredTiger_unmodified_pages_evicted'] = wired_tiger['cache']['unmodified pages evicted']
461 to_netdata['wiredTiger_modified_pages_evicted'] = wired_tiger['cache']['modified pages evicted']
463 if self.ss['tcmalloc']:
464 to_netdata.update(serverStatus['tcmalloc']['generic'])
465 to_netdata.update(dict([(k, v) for k, v in serverStatus['tcmalloc']['tcmalloc'].items()
466 if int_or_float(v)]))
468 if self.ss['commands']:
469 for elem in ['count', 'createIndexes', 'delete', 'eval', 'findAndModify', 'insert', 'update']:
470 to_netdata.update(update_dict_key(serverStatus['metrics']['commands'][elem], elem))
474 for dbase in dbStats:
475 to_netdata.update(update_dict_key(dbStats[dbase], dbase))
480 members = replSetGetStatus['members']
481 unix_epoch = datetime(1970, 1, 1, 0, 0)
483 for member in members:
484 if not member.get('self'):
485 other_hosts.append(member)
486 # Replica set time diff between current time and time when last entry from the oplog was applied
487 if member['optimeDate'] != unix_epoch:
488 member_optimedate = member['name'] + '_optimedate'
489 to_netdata.update({member_optimedate: int(delta_calculation(delta=utc_now - member['optimeDate'],
491 # Replica set members state
492 member_state = member['name'] + '_state'
493 for elem in REPLSET_STATES:
495 to_netdata.update({'_'.join([member_state, state]): 0})
496 to_netdata.update({'_'.join([member_state, str(member['state'])]): member['state']})
497 # Heartbeat lag calculation
498 for other in other_hosts:
499 if other['lastHeartbeatRecv'] != unix_epoch:
500 node = other['name'] + '_heartbeat_lag'
501 to_netdata[node] = int(delta_calculation(delta=utc_now - other['lastHeartbeatRecv'],
504 if getReplicationInfo:
505 first_event = getReplicationInfo['ASCENDING']['ts'].as_datetime()
506 last_event = getReplicationInfo['DESCENDING']['ts'].as_datetime()
507 to_netdata['timeDiff'] = int(delta_calculation(delta=last_event - first_event, multiplier=1000))
511 def _create_connection(self):
512 conn_vars = {'host': self.host, 'port': self.port}
513 if hasattr(MongoClient, 'server_selection_timeout'):
514 conn_vars.update({'serverselectiontimeoutms': self.timeout})
516 connection = MongoClient(**conn_vars)
517 if self.user and self.password:
518 connection.admin.authenticate(name=self.user, password=self.password)
520 # connection.admin.authenticate(name=self.user, mechanism='MONGODB-X509')
521 server_status = connection.admin.command('serverStatus')
522 except PyMongoError as error:
523 return None, None, str(error)
525 return connection, server_status, None
528 def update_dict_key(collection, string):
529 return dict([('_'.join([string, k]), int(round(v))) for k, v in collection.items() if int_or_float(v)])
532 def int_or_float(value):
533 return isinstance(value, (int, float))
536 def in_server_status(elem, server_status):
537 return elem in server_status
540 def delta_calculation(delta, multiplier=1):
541 if hasattr(delta, 'total_seconds'):
542 return delta.total_seconds() * multiplier
544 return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10 ** 6) / 10.0 ** 6 * multiplier