1 # -*- coding: utf-8 -*-
2 # Description: smart netdata python.d module
3 # Author: l2isbad, vorph1
6 from os import listdir, access, R_OK
7 from os.path import isfile, join, getsize, basename, isdir
9 from queue import Queue
11 from Queue import Queue
12 from threading import Thread
13 from base import SimpleService
14 from collections import namedtuple
16 # default module values (can be overridden per job in `config`)
20 # charts order (can be overridden if you want less charts, or different order)
21 ORDER = ['1', '4', '5', '7', '9', '12', '193', '194', '197', '198', '200']
24 '1': 'Read Error Rate',
25 '2': 'Throughput Performance',
27 '4': 'Start/Stop Count',
28 '5': 'Reallocated Sectors Count',
29 '6': 'Read Channel Margin',
30 '7': 'Seek Error Rate',
31 '8': 'Seek Time Performance',
32 '9': 'Power-On Hours Count',
33 '10': 'Spin-up Retries',
34 '11': 'Calibration Retries',
35 '12': 'Power Cycle Count',
36 '13': 'Soft Read Error Rate',
37 '100': 'Erase/Program Cycles',
38 '103': 'Translation Table Rebuild',
39 '108': 'Unknown (108)',
40 '170': 'Reserved Block Count',
41 '171': 'Program Fail Count',
42 '172': 'Erase Fail Count',
43 '173': 'Wear Leveller Worst Case Erase Count',
44 '174': 'Unexpected Power Loss',
45 '175': 'Program Fail Count',
46 '176': 'Erase Fail Count',
47 '177': 'Wear Leveling Count',
48 '178': 'Used Reserved Block Count',
49 '179': 'Used Reserved Block Count',
50 '180': 'Unused Reserved Block Count',
51 '181': 'Program Fail Count',
52 '182': 'Erase Fail Count',
53 '183': 'SATA Downshifts',
54 '184': 'End-to-End error',
55 '185': 'Head Stability',
56 '186': 'Induced Op-Vibration Detection',
57 '187': 'Reported Uncorrectable Errors',
58 '188': 'Command Timeout',
59 '189': 'High Fly Writes',
61 '191': 'G-Sense Errors',
62 '192': 'Power-Off Retract Cycles',
63 '193': 'Load/Unload Cycles',
65 '195': 'Hardware ECC Recovered',
66 '196': 'Reallocation Events',
67 '197': 'Current Pending Sectors',
68 '198': 'Off-line Uncorrectable',
69 '199': 'UDMA CRC Error Rate',
70 '200': 'Write Error Rate',
71 '201': 'Soft Read Errors',
72 '202': 'Data Address Mark Errors',
73 '203': 'Run Out Cancel',
74 '204': 'Soft ECC Corrections',
75 '205': 'Thermal Asperity Rate',
76 '206': 'Flying Height',
77 '207': 'Spin High Current',
78 '209': 'Offline Seek Performance',
80 '221': 'G-Sense Error Rate',
81 '222': 'Loaded Hours',
82 '223': 'Load/Unload Retries',
83 '224': 'Load Friction',
84 '225': 'Load/Unload Cycles',
85 '226': 'Load-in Time',
86 '227': 'Torque Amplification Count',
87 '228': 'Power-Off Retracts',
88 '230': 'GMR Head Amplitude',
90 '232': 'Available Reserved Space',
91 '233': 'Media Wearout Indicator',
92 '240': 'Head Flying Hours',
93 '241': 'Total LBAs Written',
94 '242': 'Total LBAs Read',
95 '250': 'Read Error Retry Rate'
98 NAMED_DISKS = namedtuple('disks', ['name', 'size', 'number'])
101 class Service(SimpleService):
102 def __init__(self, configuration=None, name=None):
103 SimpleService.__init__(self, configuration=configuration, name=name)
104 self.regex = compile(r'(\d+);(\d+);(\d+)')
105 self.log_path = self.configuration.get('log_path', '/var/log/smartd')
106 self.raw_values = self.configuration.get('raw_values')
107 self.attr = self.configuration.get('smart_attributes', [])
108 self.previous_data = dict()
111 # Can\'t start without smartd readable diks log files
112 disks = find_disks_in_log_path(self.log_path)
114 self.error('Can\'t locate any smartd log files in %s' % self.log_path)
117 # List of namedtuples to track smartd log file size
118 self.disks = [NAMED_DISKS(name=disks[i], size=0, number=i) for i in range(len(disks))]
124 self.error('Can\'t collect any data. Sorry.')
127 def _get_raw_data(self, queue, disk):
128 # The idea is to open a file.
130 # Seek backward until '\n' symbol appears
131 # If '\n' is found or it's the beginning of the file
132 # readline()! (last or first line)
133 with open(disk, 'rb') as f:
135 while f.read(1) != b'\n':
139 result = f.readline()
141 result = result.decode(encoding='utf-8')
142 result = self.regex.findall(result)
144 queue.put([basename(disk), result])
147 threads, result = list(), list()
151 # If the size has not changed there is no reason to poll log files.
152 disks = [disk for disk in self.disks if self.size_changed(disk)]
155 th = Thread(target=self._get_raw_data, args=(queue, disk.name))
159 for thread in threads:
161 result.append(queue.get())
163 # Data from last real poll
164 return self.previous_data or None
167 for a, n, r in elem[1]:
168 to_netdata.update({'_'.join([elem[0], a]): r if self.raw_values else n})
170 self.previous_data.update(to_netdata)
172 return to_netdata or None
174 def size_changed(self, disk):
175 # We are not interested in log files:
177 # 2. size is not changed since last poll
179 size = getsize(disk.name)
180 if size != disk.size and size:
181 self.disks[disk.number] = disk._replace(size=size)
186 # Remove unreadable/nonexisting log files from list of disks and previous_data
187 self.disks.remove(disk)
188 self.previous_data = {k: v for k, v in self.previous_data.items() if basename(disk.name) not in k}
191 def create_charts(self):
193 def create_lines(attrid):
195 for disk in self.disks:
196 name = basename(disk.name)
197 result.append(['_'.join([name, attrid]), name[:name.index('.')], 'absolute'])
200 # Add additional smart attributes to the ORDER. If something goes wrong we don't care.
202 ORDER.extend(list(set(self.attr.split()) & SMART_ATTR.keys() - set(ORDER)))
205 self.order = [''.join(['attrid', i]) for i in ORDER]
206 self.definitions = dict()
207 units = 'raw' if self.raw_values else 'normalized'
209 for k, v in {k: v for k, v in SMART_ATTR.items() if k in ORDER}.items():
210 self.definitions.update({''.join(['attrid', k]): {
211 'options': [None, v, units, v, 'smartd.attrid' + k, 'line'],
212 'lines': create_lines(k)}})
214 def find_disks_in_log_path(log_path):
215 # smartd log file is OK if:
217 # 2. file name endswith with 'csv'
218 # 3. file is readable
219 if not isdir(log_path): return None
220 return [join(log_path, f) for f in listdir(log_path)
221 if all([isfile(join(log_path, f)), f.endswith('.csv'), access(join(log_path, f), R_OK)])]