1 # -*- coding: utf-8 -*-
2 # Description: web log netdata python.d module
5 from base import LogService
8 from os import access, R_OK
9 from os.path import getsize
10 from collections import namedtuple
11 from copy import deepcopy
13 from itertools import zip_longest
15 from itertools import izip_longest as zip_longest
20 ORDER = ['response_statuses', 'response_codes', 'bandwidth', 'response_time', 'requests_per_url', 'http_method',
21 'requests_per_ipproto', 'clients', 'clients_all']
24 'options': [None, 'Response Codes', 'requests/s', 'responses', 'web_log.response_codes', 'stacked'],
26 ['2xx', '2xx', 'incremental'],
27 ['5xx', '5xx', 'incremental'],
28 ['3xx', '3xx', 'incremental'],
29 ['4xx', '4xx', 'incremental'],
30 ['1xx', '1xx', 'incremental'],
31 ['0xx', 'other', 'incremental'],
32 ['unmatched', 'unmatched', 'incremental']
35 'options': [None, 'Bandwidth', 'KB/s', 'bandwidth', 'web_log.bandwidth', 'area'],
37 ['resp_length', 'received', 'incremental', 1, 1024],
38 ['bytes_sent', 'sent', 'incremental', -1, 1024]
41 'options': [None, 'Processing Time', 'milliseconds', 'timings', 'web_log.response_time', 'area'],
43 ['resp_time_min', 'min', 'incremental', 1, 1000],
44 ['resp_time_max', 'max', 'incremental', 1, 1000],
45 ['resp_time_avg', 'avg', 'incremental', 1, 1000]
48 'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'clients', 'web_log.clients', 'stacked'],
50 ['unique_cur_ipv4', 'ipv4', 'incremental', 1, 1],
51 ['unique_cur_ipv6', 'ipv6', 'incremental', 1, 1]
54 'options': [None, 'All Time Unique Client IPs', 'unique ips', 'clients', 'web_log.clients_all', 'stacked'],
56 ['unique_tot_ipv4', 'ipv4', 'absolute', 1, 1],
57 ['unique_tot_ipv6', 'ipv6', 'absolute', 1, 1]
60 'options': [None, 'Requests Per HTTP Method', 'requests/s', 'http methods', 'web_log.http_method', 'stacked'],
63 'requests_per_ipproto': {
64 'options': [None, 'Requests Per IP Protocol', 'requests/s', 'ip protocols', 'web_log.requests_per_ipproto',
67 ['req_ipv4', 'ipv4', 'incremental', 1, 1],
68 ['req_ipv6', 'ipv6', 'incremental', 1, 1]
70 'response_statuses': {
71 'options': [None, 'Response Statuses', 'requests/s', 'responses', 'web_log.response_statuses',
74 ['successful_requests', 'success', 'incremental', 1, 1],
75 ['server_errors', 'error', 'incremental', 1, 1],
76 ['redirects', 'redirect', 'incremental', 1, 1],
77 ['bad_requests', 'bad', 'incremental', 1, 1],
78 ['other_requests', 'other', 'incremental', 1, 1]
82 NAMED_URL_PATTERN = namedtuple('URL_PATTERN', ['description', 'pattern'])
85 class Service(LogService):
86 def __init__(self, configuration=None, name=None):
87 LogService.__init__(self, configuration=configuration, name=name)
88 # Variables from module configuration file
89 self.log_path = self.configuration.get('path')
90 self.detailed_response_codes = self.configuration.get('detailed_response_codes', True)
91 self.all_time = self.configuration.get('all_time', True)
92 self.url_pattern = self.configuration.get('categories') # dict
93 self.regex = None # will be assigned in 'find_regex' method
94 self.resp_time_func = None # will be assigned in 'find_regex' method
95 self._get_data = None # will be assigned in 'check' method.
96 self.order = None # will be assigned in 'create_*_method' method.
97 self.definitions = None # will be assigned in 'create_*_method' method.
98 self.detailed_chart = None # will be assigned in 'create_*_method' method.
99 self.http_method_chart = None # will be assigned in 'create_*_method' method.
100 # sorted list of unique IPs
101 self.unique_all_time = list()
102 # if there is no new logs this dict returned to netdata
103 self.data = {'bytes_sent': 0, 'resp_length': 0, 'resp_time_min': 0, 'resp_time_max': 0,
104 'resp_time_avg': 0, 'unique_cur_ipv4': 0, 'unique_cur_ipv6': 0, '2xx': 0,
105 '5xx': 0, '3xx': 0, '4xx': 0, '1xx': 0, '0xx': 0, 'unmatched': 0, 'req_ipv4': 0,
106 'req_ipv6': 0, 'unique_tot_ipv4': 0, 'unique_tot_ipv6': 0, 'successful_requests': 0,
107 'redirects': 0, 'bad_requests': 0, 'server_errors': 0, 'other_requests': 0}
110 if not self.log_path:
111 self.error('log path is not specified')
114 # log_path must be readable
115 if not access(self.log_path, R_OK):
116 self.error('%s not readable or not exist' % self.log_path)
119 # log_path file should not be empty
120 if not getsize(self.log_path):
121 self.error('%s is empty' % self.log_path)
124 # Read last line (or first if there is only one line)
125 with open(self.log_path, 'rb') as logs:
127 while logs.read(1) != b'\n':
131 last_line = logs.readline()
134 last_line = last_line.decode()
135 except UnicodeDecodeError:
137 last_line = last_line.decode(encoding='utf-8')
138 except (TypeError, UnicodeDecodeError) as error:
139 self.error(str(error))
143 regex_name = self.find_regex(last_line)
145 self.error('Unknown log format. Can\'t parse %s' % self.log_path)
148 if regex_name.startswith('acs_'):
149 self.create_access_charts(regex_name)
150 if regex_name == 'acs_default':
151 self.info('Not all data collected. You need to modify LogFormat.')
152 self._get_data = self._get_access_data
153 self.info('Used regex: %s' % regex_name)
156 # If it's not access_logs.. Not used at the moment
159 def find_regex(self, last_line):
161 :param last_line: str: literally last line from log file
163 It's sad but different web servers has different logs formats
164 We need to find appropriate regex for current log file
165 All logic is do a regex search through the string for all patterns
166 until we find something or fail.
168 # REGEX: 1.IPv4 address 2.HTTP method 3. URL 4. Response code
169 # 5. Bytes sent 6. Response length 7. Response process time
170 acs_default = re.compile(r'([\da-f.:]+)'
176 acs_apache_ext_insert = re.compile(r'([\da-f.:]+)'
184 acs_apache_ext_append = re.compile(r'([\da-f.:]+)'
194 acs_nginx_ext_insert = re.compile(r'([\da-f.:]+)'
202 acs_nginx_ext_append = re.compile(r'([\da-f.:]+)'
211 r_regex = [acs_apache_ext_insert, acs_apache_ext_append, acs_nginx_ext_insert,
212 acs_nginx_ext_append, acs_default]
213 r_function = [lambda x: x, lambda x: x, lambda x: x * 1000000, lambda x: x * 1000000, lambda x: x]
214 r_name = ['acs_apache_ext_insert', 'acs_apache_ext_append', 'acs_nginx_ext_insert',
215 'acs_nginx_ext_append', 'acs_default']
216 regex_function_name = zip(r_regex, r_function, r_name)
219 for regex, function, name in regex_function_name:
220 if regex.search(last_line):
222 self.resp_time_func = function
227 def create_access_charts(self, regex_name):
229 :param regex_name: str: regex name from 'find_regex' method. Ex.: 'apache_extended', 'nginx_extended'
231 Create additional charts depending on the 'find_regex' result (parsed_line) and configuration file
232 1. 'time_response' chart is removed if there is no 'time_response' in logs.
233 2. Other stuff is just remove/add chart depending on yes/no in conf
235 def find_job_name(override_name, name):
237 :param override_name: str: 'name' var from configuration file
238 :param name: str: 'job_name' from configuration file
239 :return: str: new job name
240 We need this for dynamic charts. Actually same logic as in python.d.plugin.
242 add_to_name = override_name or name
244 return '_'.join(['web_log', re.sub('\s+', '_', add_to_name)])
248 self.order = ORDER[:]
249 self.definitions = deepcopy(CHARTS)
251 job_name = find_job_name(self.override_name, self.name)
252 self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
253 ' "Detailed Response Codes" requests/s responses' \
254 ' web_log.detailed_response_codes stacked 1 %s\n' % (job_name, self.update_every)
255 self.http_method_chart = 'CHART %s.http_method' \
256 ' "" "Requests Per HTTP Method" requests/s "http methods"' \
257 ' web_log.http_method stacked 2 %s\n' % (job_name, self.update_every)
259 # Remove 'request_time' chart from ORDER if request_time not in logs
260 if regex_name == 'acs_default':
261 self.order.remove('response_time')
262 # Remove 'clients_all' chart from ORDER if specified in the configuration
263 if not self.all_time:
264 self.order.remove('clients_all')
265 # Add 'detailed_response_codes' chart if specified in the configuration
266 if self.detailed_response_codes:
267 self.order.append('detailed_response_codes')
268 self.definitions['detailed_response_codes'] = {'options': [None, 'Detailed Response Codes', 'requests/s',
269 'responses', 'web_log.detailed_response_codes',
273 # Add 'requests_per_url' chart if specified in the configuration
275 self.url_pattern = [NAMED_URL_PATTERN(description=k, pattern=re.compile(v)) for k, v
276 in self.url_pattern.items()]
277 self.definitions['requests_per_url'] = {'options': [None, 'Requests Per Url', 'requests/s',
278 'urls', 'web_log.requests_per_url', 'stacked'],
279 'lines': [['other_url', 'other', 'incremental']]}
280 for elem in self.url_pattern:
281 self.definitions['requests_per_url']['lines'].append([elem.description, elem.description,
283 self.data.update({elem.description: 0})
284 self.data.update({'other_url': 0})
286 self.order.remove('requests_per_url')
288 def add_new_dimension(self, dimension, line_list, chart_string, key):
290 :param dimension: str: response status code. Ex.: '202', '499'
291 :param line_list: list: Ex.: ['202', '202', 'incremental']
292 :param chart_string: Current string we need to pass to netdata to rebuild the chart
293 :param key: str: CHARTS dict key (chart name). Ex.: 'response_time'
294 :return: str: new chart string = previous + new dimensions
296 self.data.update({dimension: 0})
297 # SET method check if dim in _dimensions
298 self._dimensions.append(dimension)
299 # UPDATE method do SET only if dim in definitions
300 self.definitions[key]['lines'].append(line_list)
302 chart += "%s %s\n" % ('DIMENSION', ' '.join(line_list))
306 def _get_access_data(self):
309 :return: dict OR None
310 None if _get_raw_data method fails.
311 In all other cases - dict.
313 raw = self._get_raw_data()
317 request_time, unique_current = list(), list()
318 request_counter = {'count': 0, 'sum': 0}
319 ip_address_counter = {'unique_cur_ip': 0}
321 match = self.regex.search(line)
323 match_dict = dict(zip_longest('address method url code sent resp_length resp_time'.split(),
326 code = ''.join([match_dict['code'][0], 'xx'])
329 self.data['0xx'] += 1
330 # detailed response code
331 if self.detailed_response_codes:
332 self._get_data_detailed_response_codes(match_dict['code'])
334 self._get_data_statuses(match_dict['code'])
337 self._get_data_per_url(match_dict['url'])
338 # requests per http method
339 self._get_data_http_method(match_dict['method'])
341 self.data['bytes_sent'] += int(match_dict['sent'] if '-' not in match_dict['sent'] else 0)
342 # request processing time and bandwidth received
343 if match_dict['resp_length'] and match_dict['resp_time']:
344 self.data['resp_length'] += int(match_dict['resp_length'])
345 resp_time = self.resp_time_func(float(match_dict['resp_time']))
346 bisect.insort_left(request_time, resp_time)
347 request_counter['count'] += 1
348 request_counter['sum'] += resp_time
349 # requests per ip proto
350 proto = 'ipv4' if '.' in match_dict['address'] else 'ipv6'
351 self.data['req_' + proto] += 1
353 if address_not_in_pool(self.unique_all_time, match_dict['address'],
354 self.data['unique_tot_ipv4'] + self.data['unique_tot_ipv6']):
355 self.data['unique_tot_' + proto] += 1
356 if address_not_in_pool(unique_current, match_dict['address'], ip_address_counter['unique_cur_ip']):
357 self.data['unique_cur_' + proto] += 1
358 ip_address_counter['unique_cur_ip'] += 1
360 self.data['unmatched'] += 1
364 self.data['resp_time_min'] += int(request_time[0])
365 self.data['resp_time_avg'] += int(round(float(request_counter['sum']) / request_counter['count']))
366 self.data['resp_time_max'] += int(request_time[-1])
369 def _get_data_detailed_response_codes(self, code):
371 :param code: str: CODE from parsed line. Ex.: '202, '499'
373 Calls add_new_dimension method If the value is found for the first time
375 if code not in self.data:
376 chart_string_copy = self.detailed_chart
377 self.detailed_chart = self.add_new_dimension(code, [code, code, 'incremental'],
378 chart_string_copy, 'detailed_response_codes')
381 def _get_data_http_method(self, method):
383 :param method: str: METHOD from parsed line. Ex.: 'GET', 'POST'
385 Calls add_new_dimension method If the value is found for the first time
387 if method not in self.data:
388 chart_string_copy = self.http_method_chart
389 self.http_method_chart = self.add_new_dimension(method, [method, method, 'incremental'],
390 chart_string_copy, 'http_method')
391 self.data[method] += 1
393 def _get_data_per_url(self, url):
395 :param url: str: URL from parsed line
397 Scan through string looking for the first location where patterns produce a match for all user
401 for elem in self.url_pattern:
402 if elem.pattern.search(url):
403 self.data[elem.description] += 1
407 self.data['other_url'] += 1
409 def _get_data_statuses(self, code):
411 :param code: str: response status code. Ex.: '202', '499'
415 if code_class == '2' or code == '304' or code_class == '1':
416 self.data['successful_requests'] += 1
417 elif code_class == '3':
418 self.data['redirects'] += 1
419 elif code_class == '4':
420 self.data['bad_requests'] += 1
421 elif code_class == '5':
422 self.data['server_errors'] += 1
424 self.data['other_requests'] += 1
427 def address_not_in_pool(pool, address, pool_size):
429 :param pool: list of ip addresses
430 :param address: ip address
431 :param pool_size: current size of pool
432 :return: True if address not in pool. False if address in pool
434 index = bisect.bisect_left(pool, address)
435 if index < pool_size:
436 if pool[index] == address:
439 bisect.insort_left(pool, address)
442 bisect.insort_left(pool, address)