1 # -*- coding: utf-8 -*-
2 # Description: web log netdata python.d module
5 from base import LogService
8 from os import access, R_OK
9 from os.path import getsize
10 from collections import defaultdict, namedtuple
11 from copy import deepcopy
13 from itertools import zip_longest
15 from itertools import izip_longest as zip_longest
20 ORDER = ['response_codes', 'response_time', 'requests_per_url', 'http_method', 'bandwidth', 'clients', 'clients_all']
23 'options': [None, 'Response Codes', 'requests/s', 'responses', 'web_log.response_codes', 'stacked'],
25 ['2xx', '2xx', 'absolute'],
26 ['5xx', '5xx', 'absolute'],
27 ['3xx', '3xx', 'absolute'],
28 ['4xx', '4xx', 'absolute'],
29 ['1xx', '1xx', 'absolute'],
30 ['0xx', 'other', 'absolute'],
31 ['unmatched', 'unmatched', 'absolute']
34 'options': [None, 'Bandwidth', 'KB/s', 'bandwidth', 'web_log.bandwidth', 'area'],
36 ['resp_length', 'received', 'absolute', 1, 1024],
37 ['bytes_sent', 'sent', 'absolute', -1, 1024]
40 'options': [None, 'Processing Time', 'milliseconds', 'timings', 'web_log.response_time', 'area'],
42 ['resp_time_min', 'min', 'absolute', 1, 1],
43 ['resp_time_max', 'max', 'absolute', 1, 1],
44 ['resp_time_avg', 'avg', 'absolute', 1, 1]
47 'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'unique clients', 'web_log.clients', 'stacked'],
49 ['unique_cur_ipv4', 'ipv4', 'absolute', 1, 1],
50 ['unique_cur_ipv6', 'ipv6', 'absolute', 1, 1]
53 'options': [None, 'All Time Unique Client IPs', 'unique ips', 'unique clients', 'web_log.clients_all', 'stacked'],
55 ['unique_tot_ipv4', 'ipv4', 'absolute', 1, 1],
56 ['unique_tot_ipv6', 'ipv6', 'absolute', 1, 1]
59 'options': [None, 'Requests Per HTTP Method', 'requests/s', 'requests', 'web_log.http_method', 'stacked'],
64 NAMED_URL_PATTERN = namedtuple('URL_PATTERN', ['description', 'pattern'])
67 class Service(LogService):
68 def __init__(self, configuration=None, name=None):
69 LogService.__init__(self, configuration=configuration, name=name)
70 # Variables from module configuration file
71 self.log_path = self.configuration.get('path')
72 self.detailed_response_codes = self.configuration.get('detailed_response_codes', True)
73 self.all_time = self.configuration.get('all_time', True)
74 self.url_pattern = self.configuration.get('categories') # dict
76 # sorted list of unique IPs
77 self.unique_all_time = list()
78 # dict for values that should not be zeroed every poll
79 self.storage = {'unique_tot_ipv4': 0, 'unique_tot_ipv6': 0}
80 # if there is no new logs this dict + self.storage returned to netdata
81 self.data = {'bytes_sent': 0, 'resp_length': 0, 'resp_time_min': 0,
82 'resp_time_max': 0, 'resp_time_avg': 0, 'unique_cur_ipv4': 0,
83 'unique_cur_ipv6': 0, '2xx': 0, '5xx': 0, '3xx': 0, '4xx': 0,
84 '1xx': 0, '0xx': 0, 'unmatched': 0}
88 self.error('log path is not specified')
91 # log_path must be readable
92 if not access(self.log_path, R_OK):
93 self.error('%s not readable or not exist' % self.log_path)
96 # log_path file should not be empty
97 if not getsize(self.log_path):
98 self.error('%s is empty' % self.log_path)
101 # Read last line (or first if there is only one line)
102 with open(self.log_path, 'rb') as logs:
104 while logs.read(1) != b'\n':
108 last_line = logs.readline().decode(encoding='utf-8')
111 parsed_line, regex_name = self.find_regex(last_line)
113 self.error('Can\'t parse output')
116 self.create_charts(parsed_line[0], regex_name)
117 if len(parsed_line[0]) == 5:
118 self.info('Not all data collected. You need to modify LogFormat.')
121 def find_regex(self, last_line):
123 :param last_line: str: literally last line from log file
124 :return: parsed line, regex name (the one that matches) OR None, None
125 It's sad but different web servers has different logs formats
126 We need to find appropriate regex for current log file
127 All logic is do a regex search through the string for all patterns
128 until we find something or fail.
130 # REGEX: 1.IPv4 address 2.HTTP method 3. URL 4. Response code
131 # 5. Bytes sent 6. Response length 7. Response process time
132 default = re.compile(r'([\da-f.:]+)'
138 apache_extended = re.compile(r'([\da-f.:]+)'
146 nginx_extended = re.compile(r'([\da-f.:]+)'
154 regex_function = zip([apache_extended, nginx_extended, default],
155 [lambda x: x, lambda x: x * 1000, lambda x: x],
156 ['apache_extended', 'nginx_extended', 'default'])
158 for regex, function, name in regex_function:
159 if regex.search(last_line):
161 self.resp_time_func = function
166 return self.regex.findall(last_line), regex_name
170 def create_charts(self, parsed_line, regex_name):
172 :param parsed_line: list: re.findall result.
173 :param regex_name: str: regex name from 'find_regex' method. Ex.: 'apache_extended', 'nginx_extended'
175 Create additional charts depending on the 'find_regex' result (parsed_line) and configuration file
176 1. 'time_response' chart is removed if there is no 'time_response' in logs.
177 2. We need to change divisor for 'response_time' chart for apache (time in microseconds in logs)
178 3. Other stuff is just remove/add chart depending on yes/no in conf
180 def find_job_name(override_name, name):
182 :param override_name: str: 'name' var from configuration file
183 :param name: str: 'job_name' from configuration file
184 :return: str: new job name
185 We need this for dynamic charts. Actually same logic as in python.d.plugin.
187 add_to_name = override_name or name
189 return '_'.join(['web_log', re.sub('\s+', '_', add_to_name)])
193 self.order = ORDER[:]
194 self.definitions = deepcopy(CHARTS)
196 job_name = find_job_name(self.override_name, self.name)
197 self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
198 ' "Response Codes" requests/s responses' \
199 ' web_log.detailed_resp stacked 1 %s\n' % (job_name, self.update_every)
200 self.http_method_chart = 'CHART %s.http_method' \
201 ' "" "HTTP Methods" requests/s requests' \
202 ' web_log.http_method stacked 2 %s\n' % (job_name, self.update_every)
204 if 'apache' in regex_name:
205 self.definitions['response_time']['lines'][0][4] = 1000
206 self.definitions['response_time']['lines'][1][4] = 1000
207 self.definitions['response_time']['lines'][2][4] = 1000
209 # Remove 'request_time' chart from ORDER if request_time not in logs
210 if len(parsed_line) < 7:
211 self.order.remove('response_time')
212 # Remove 'clients_all' chart from ORDER if specified in the configuration
213 if not self.all_time:
214 self.order.remove('clients_all')
215 # Add 'detailed_response_codes' chart if specified in the configuration
216 if self.detailed_response_codes:
217 self.order.append('detailed_response_codes')
218 self.definitions['detailed_response_codes'] = {'options': [None, 'Detailed Response Codes', 'requests/s',
219 'responses', 'web_log.detailed_resp', 'stacked'],
222 # Add 'requests_per_url' chart if specified in the configuration
224 self.url_pattern = [NAMED_URL_PATTERN(description=k, pattern=re.compile(v)) for k, v in self.url_pattern.items()]
225 self.definitions['requests_per_url'] = {'options': [None, 'Requests Per Url', 'requests/s',
226 'requests', 'web_log.url_pattern', 'stacked'],
227 'lines': [['other_url', 'other', 'absolute']]}
228 for elem in self.url_pattern:
229 self.definitions['requests_per_url']['lines'].append([elem.description, elem.description, 'absolute'])
230 self.data.update({elem.description: 0})
231 self.data.update({'other_url': 0})
233 self.order.remove('requests_per_url')
235 def add_new_dimension(self, dimension, line_list, chart_string, key):
237 :param dimension: str: response status code. Ex.: '202', '499'
238 :param line_list: list: Ex.: ['202', '202', 'Absolute']
239 :param chart_string: Current string we need to pass to netdata to rebuild the chart
240 :param key: str: CHARTS dict key (chart name). Ex.: 'response_time'
241 :return: str: new chart string = previous + new dimensions
243 self.storage.update({dimension: 0})
244 # SET method check if dim in _dimensions
245 self._dimensions.append(dimension)
246 # UPDATE method do SET only if dim in definitions
247 self.definitions[key]['lines'].append(line_list)
249 chart += "%s %s\n" % ('DIMENSION', ' '.join(line_list))
256 :return: dict OR None
257 None if _get_raw_data method fails.
258 In all other cases - dict.
260 raw = self._get_raw_data()
264 request_time, unique_current = list(), list()
265 request_counter = {'count': 0, 'sum': 0}
267 to_netdata.update(self.data)
268 default_dict = defaultdict(lambda: 0)
271 match = self.regex.search(line)
273 match_dict = dict(zip_longest('address method url code sent resp_length resp_time'.split(), match.groups()))
275 code = ''.join([match_dict['code'][0], 'xx'])
276 to_netdata[code] += 1
278 to_netdata['0xx'] += 1
279 # detailed response code
280 if self.detailed_response_codes:
281 self._get_data_detailed_response_codes(match_dict['code'], default_dict)
284 self._get_data_per_url(match_dict['url'], default_dict)
285 # requests per http method
286 self._get_data_http_method(match_dict['method'], default_dict)
288 to_netdata['bytes_sent'] += int(match_dict['sent'])
290 if match_dict['resp_length'] and match_dict['resp_time']:
291 to_netdata['resp_length'] += int(match_dict['resp_length'])
292 resp_time = self.resp_time_func(float(match_dict['resp_time']))
293 bisect.insort_left(request_time, resp_time)
294 request_counter['count'] += 1
295 request_counter['sum'] += resp_time
297 if address_not_in_pool(self.unique_all_time, match_dict['address'],
298 self.storage['unique_tot_ipv4'] + self.storage['unique_tot_ipv6']):
299 if '.' in match_dict['address']:
300 self.storage['unique_tot_ipv4'] += 1
302 self.storage['unique_tot_ipv6'] += 1
303 if address_not_in_pool(unique_current, match_dict['address'],
304 to_netdata['unique_cur_ipv4'] + to_netdata['unique_cur_ipv6']):
305 if '.' in match_dict['address']:
306 to_netdata['unique_cur_ipv4'] += 1
308 to_netdata['unique_cur_ipv6'] += 1
310 to_netdata['unmatched'] += 1
313 to_netdata['resp_time_min'] = request_time[0]
314 to_netdata['resp_time_avg'] = float(request_counter['sum']) / request_counter['count']
315 to_netdata['resp_time_max'] = request_time[-1]
317 to_netdata.update(self.storage)
318 to_netdata.update(default_dict)
321 def _get_data_detailed_response_codes(self, code, default_dict):
323 :param code: str: CODE from parsed line. Ex.: '202, '499'
324 :param default_dict: defaultdict
326 Calls add_new_dimension method If the value is found for the first time
328 if code not in self.storage:
329 chart_string_copy = self.detailed_chart
330 self.detailed_chart = self.add_new_dimension(code, [code, code, 'absolute'],
331 chart_string_copy, 'detailed_response_codes')
332 default_dict[code] += 1
334 def _get_data_http_method(self, method, default_dict):
336 :param method: str: METHOD from parsed line. Ex.: 'GET', 'POST'
337 :param default_dict: defaultdict
339 Calls add_new_dimension method If the value is found for the first time
341 if method not in self.storage:
342 chart_string_copy = self.http_method_chart
343 self.http_method_chart = self.add_new_dimension(method, [method, method, 'absolute'],
344 chart_string_copy, 'http_method')
345 default_dict[method] += 1
347 def _get_data_per_url(self, url, default_dict):
349 :param url: str: URL from parsed line
350 :param default_dict: defaultdict
352 Scan through string looking for the first location where patterns produce a match for all user
356 for elem in self.url_pattern:
357 if elem.pattern.search(url):
358 default_dict[elem.description] += 1
362 default_dict['other_url'] += 1
365 def address_not_in_pool(pool, address, pool_size):
367 :param pool: list of ip addresses
368 :param address: ip address
369 :param pool_size: current size of pool
370 :return: True if address not pool and False address in pool
371 If address not in pool function add address to pool.
373 index = bisect.bisect_left(pool, address)
374 if index < pool_size:
375 if pool[index] == address:
378 bisect.insort_left(pool, address)
381 bisect.insort_left(pool, address)