1 # -*- coding: utf-8 -*-
2 # Description: web log netdata python.d module
5 from base import LogService
8 from os import access, R_OK
9 from os.path import getsize
10 from collections import namedtuple
11 from copy import deepcopy
13 from itertools import zip_longest
15 from itertools import izip_longest as zip_longest
20 ORDER = ['response_statuses', 'response_codes', 'bandwidth', 'response_time', 'requests_per_url', 'http_method',
21 'requests_per_ipproto', 'clients', 'clients_all']
24 'options': [None, 'Response Codes', 'requests/s', 'responses', 'web_log.response_codes', 'stacked'],
26 ['2xx', '2xx', 'incremental'],
27 ['5xx', '5xx', 'incremental'],
28 ['3xx', '3xx', 'incremental'],
29 ['4xx', '4xx', 'incremental'],
30 ['1xx', '1xx', 'incremental'],
31 ['0xx', 'other', 'incremental'],
32 ['unmatched', 'unmatched', 'incremental']
35 'options': [None, 'Bandwidth', 'KB/s', 'bandwidth', 'web_log.bandwidth', 'area'],
37 ['resp_length', 'received', 'incremental', 1, 1024],
38 ['bytes_sent', 'sent', 'incremental', -1, 1024]
41 'options': [None, 'Processing Time', 'milliseconds', 'timings', 'web_log.response_time', 'area'],
43 ['resp_time_min', 'min', 'incremental', 1, 1000],
44 ['resp_time_max', 'max', 'incremental', 1, 1000],
45 ['resp_time_avg', 'avg', 'incremental', 1, 1000]
48 'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'clients', 'web_log.clients', 'stacked'],
50 ['unique_cur_ipv4', 'ipv4', 'incremental', 1, 1],
51 ['unique_cur_ipv6', 'ipv6', 'incremental', 1, 1]
54 'options': [None, 'All Time Unique Client IPs', 'unique ips', 'clients', 'web_log.clients_all', 'stacked'],
56 ['unique_tot_ipv4', 'ipv4', 'absolute', 1, 1],
57 ['unique_tot_ipv6', 'ipv6', 'absolute', 1, 1]
60 'options': [None, 'Requests Per HTTP Method', 'requests/s', 'http methods', 'web_log.http_method', 'stacked'],
63 'requests_per_ipproto': {
64 'options': [None, 'Requests Per IP Protocol', 'requests/s', 'ip protocols', 'web_log.requests_per_ipproto',
67 ['req_ipv4', 'ipv4', 'incremental', 1, 1],
68 ['req_ipv6', 'ipv6', 'incremental', 1, 1]
70 'response_statuses': {
71 'options': [None, 'Response Statuses', 'requests/s', 'responses', 'web_log.response_statuses',
74 ['successful_requests', 'success', 'incremental', 1, 1],
75 ['server_errors', 'error', 'incremental', 1, 1],
76 ['redirects', 'redirect', 'incremental', 1, 1],
77 ['bad_requests', 'bad', 'incremental', 1, 1],
78 ['other_requests', 'other', 'incremental', 1, 1]
82 NAMED_URL_PATTERN = namedtuple('URL_PATTERN', ['description', 'pattern'])
85 class Service(LogService):
86 def __init__(self, configuration=None, name=None):
87 LogService.__init__(self, configuration=configuration, name=name)
88 # Variables from module configuration file
89 self.log_path = self.configuration.get('path')
90 self.detailed_response_codes = self.configuration.get('detailed_response_codes', True)
91 self.all_time = self.configuration.get('all_time', True)
92 self.url_pattern = self.configuration.get('categories') # dict
93 self.regex = None # will be assigned in 'find_regex' method
94 self.resp_time_func = None # will be assigned in 'find_regex' method
95 self._get_data = None # will be assigned in 'check' method.
96 self.order = None # will be assigned in 'create_*_method' method.
97 self.definitions = None # will be assigned in 'create_*_method' method.
98 self.detailed_chart = None # will be assigned in 'create_*_method' method.
99 self.http_method_chart = None # will be assigned in 'create_*_method' method.
100 # sorted list of unique IPs
101 self.unique_all_time = list()
102 # if there is no new logs this dict returned to netdata
103 self.data = {'bytes_sent': 0, 'resp_length': 0, 'resp_time_min': 0, 'resp_time_max': 0,
104 'resp_time_avg': 0, 'unique_cur_ipv4': 0, 'unique_cur_ipv6': 0, '2xx': 0,
105 '5xx': 0, '3xx': 0, '4xx': 0, '1xx': 0, '0xx': 0, 'unmatched': 0, 'req_ipv4': 0,
106 'req_ipv6': 0, 'unique_tot_ipv4': 0, 'unique_tot_ipv6': 0, 'successful_requests': 0,
107 'redirects': 0, 'bad_requests': 0, 'server_errors': 0, 'other_requests': 0}
110 if not self.log_path:
111 self.error('log path is not specified')
114 # log_path must be readable
115 if not access(self.log_path, R_OK):
116 self.error('%s not readable or not exist' % self.log_path)
119 # log_path file should not be empty
120 if not getsize(self.log_path):
121 self.error('%s is empty' % self.log_path)
124 # Read last line (or first if there is only one line)
125 with open(self.log_path, 'rb') as logs:
127 while logs.read(1) != b'\n':
131 last_line = logs.readline().decode(encoding='utf-8')
134 regex_name = self.find_regex(last_line)
136 self.error('Can\'t parse %s' % self.log_path)
139 if regex_name.startswith('acs_'):
140 self.create_access_charts(regex_name)
141 if regex_name == 'acs_default':
142 self.info('Not all data collected. You need to modify LogFormat.')
143 self._get_data = self._get_access_data
144 self.info('Used regex: %s' % regex_name)
147 # If it's not access_logs.. Not used at the moment
150 def find_regex(self, last_line):
152 :param last_line: str: literally last line from log file
154 It's sad but different web servers has different logs formats
155 We need to find appropriate regex for current log file
156 All logic is do a regex search through the string for all patterns
157 until we find something or fail.
159 # REGEX: 1.IPv4 address 2.HTTP method 3. URL 4. Response code
160 # 5. Bytes sent 6. Response length 7. Response process time
161 acs_default = re.compile(r'([\da-f.:]+)'
167 acs_apache_ext_insert = re.compile(r'([\da-f.:]+)'
175 acs_apache_ext_append = re.compile(r'([\da-f.:]+)'
185 acs_nginx_ext_insert = re.compile(r'([\da-f.:]+)'
193 acs_nginx_ext_append = re.compile(r'([\da-f.:]+)'
202 r_regex = [acs_apache_ext_insert, acs_apache_ext_append, acs_nginx_ext_insert,
203 acs_nginx_ext_append, acs_default]
204 r_function = [lambda x: x, lambda x: x, lambda x: x * 1000000, lambda x: x * 1000000, lambda x: x]
205 r_name = ['acs_apache_ext_insert', 'acs_apache_ext_append', 'acs_nginx_ext_insert',
206 'acs_nginx_ext_append', 'acs_default']
207 regex_function_name = zip(r_regex, r_function, r_name)
210 for regex, function, name in regex_function_name:
211 if regex.search(last_line):
213 self.resp_time_func = function
218 def create_access_charts(self, regex_name):
220 :param regex_name: str: regex name from 'find_regex' method. Ex.: 'apache_extended', 'nginx_extended'
222 Create additional charts depending on the 'find_regex' result (parsed_line) and configuration file
223 1. 'time_response' chart is removed if there is no 'time_response' in logs.
224 2. Other stuff is just remove/add chart depending on yes/no in conf
226 def find_job_name(override_name, name):
228 :param override_name: str: 'name' var from configuration file
229 :param name: str: 'job_name' from configuration file
230 :return: str: new job name
231 We need this for dynamic charts. Actually same logic as in python.d.plugin.
233 add_to_name = override_name or name
235 return '_'.join(['web_log', re.sub('\s+', '_', add_to_name)])
239 self.order = ORDER[:]
240 self.definitions = deepcopy(CHARTS)
242 job_name = find_job_name(self.override_name, self.name)
243 self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
244 ' "Detailed Response Codes" requests/s responses' \
245 ' web_log.detailed_response_codes stacked 1 %s\n' % (job_name, self.update_every)
246 self.http_method_chart = 'CHART %s.http_method' \
247 ' "" "Requests Per HTTP Method" requests/s "http methods"' \
248 ' web_log.http_method stacked 2 %s\n' % (job_name, self.update_every)
250 # Remove 'request_time' chart from ORDER if request_time not in logs
251 if regex_name == 'acs_default':
252 self.order.remove('response_time')
253 # Remove 'clients_all' chart from ORDER if specified in the configuration
254 if not self.all_time:
255 self.order.remove('clients_all')
256 # Add 'detailed_response_codes' chart if specified in the configuration
257 if self.detailed_response_codes:
258 self.order.append('detailed_response_codes')
259 self.definitions['detailed_response_codes'] = {'options': [None, 'Detailed Response Codes', 'requests/s',
260 'responses', 'web_log.detailed_response_codes',
264 # Add 'requests_per_url' chart if specified in the configuration
266 self.url_pattern = [NAMED_URL_PATTERN(description=k, pattern=re.compile(v)) for k, v
267 in self.url_pattern.items()]
268 self.definitions['requests_per_url'] = {'options': [None, 'Requests Per Url', 'requests/s',
269 'urls', 'web_log.requests_per_url', 'stacked'],
270 'lines': [['other_url', 'other', 'incremental']]}
271 for elem in self.url_pattern:
272 self.definitions['requests_per_url']['lines'].append([elem.description, elem.description,
274 self.data.update({elem.description: 0})
275 self.data.update({'other_url': 0})
277 self.order.remove('requests_per_url')
279 def add_new_dimension(self, dimension, line_list, chart_string, key):
281 :param dimension: str: response status code. Ex.: '202', '499'
282 :param line_list: list: Ex.: ['202', '202', 'incremental']
283 :param chart_string: Current string we need to pass to netdata to rebuild the chart
284 :param key: str: CHARTS dict key (chart name). Ex.: 'response_time'
285 :return: str: new chart string = previous + new dimensions
287 self.data.update({dimension: 0})
288 # SET method check if dim in _dimensions
289 self._dimensions.append(dimension)
290 # UPDATE method do SET only if dim in definitions
291 self.definitions[key]['lines'].append(line_list)
293 chart += "%s %s\n" % ('DIMENSION', ' '.join(line_list))
297 def _get_access_data(self):
300 :return: dict OR None
301 None if _get_raw_data method fails.
302 In all other cases - dict.
304 raw = self._get_raw_data()
308 request_time, unique_current = list(), list()
309 request_counter = {'count': 0, 'sum': 0}
310 ip_address_counter = {'unique_cur_ip': 0}
312 match = self.regex.search(line)
314 match_dict = dict(zip_longest('address method url code sent resp_length resp_time'.split(),
317 code = ''.join([match_dict['code'][0], 'xx'])
320 self.data['0xx'] += 1
321 # detailed response code
322 if self.detailed_response_codes:
323 self._get_data_detailed_response_codes(match_dict['code'])
325 self._get_data_statuses(match_dict['code'])
328 self._get_data_per_url(match_dict['url'])
329 # requests per http method
330 self._get_data_http_method(match_dict['method'])
332 self.data['bytes_sent'] += int(match_dict['sent'] if '-' not in match_dict['sent'] else 0)
333 # request processing time and bandwidth received
334 if match_dict['resp_length'] and match_dict['resp_time']:
335 self.data['resp_length'] += int(match_dict['resp_length'])
336 resp_time = self.resp_time_func(float(match_dict['resp_time']))
337 bisect.insort_left(request_time, resp_time)
338 request_counter['count'] += 1
339 request_counter['sum'] += resp_time
340 # requests per ip proto
341 proto = 'ipv4' if '.' in match_dict['address'] else 'ipv6'
342 self.data['req_' + proto] += 1
344 if address_not_in_pool(self.unique_all_time, match_dict['address'],
345 self.data['unique_tot_ipv4'] + self.data['unique_tot_ipv6']):
346 self.data['unique_tot_' + proto] += 1
347 if address_not_in_pool(unique_current, match_dict['address'], ip_address_counter['unique_cur_ip']):
348 self.data['unique_cur_' + proto] += 1
349 ip_address_counter['unique_cur_ip'] += 1
351 self.data['unmatched'] += 1
355 self.data['resp_time_min'] += int(request_time[0])
356 self.data['resp_time_avg'] += int(round(float(request_counter['sum']) / request_counter['count']))
357 self.data['resp_time_max'] += int(request_time[-1])
360 def _get_data_detailed_response_codes(self, code):
362 :param code: str: CODE from parsed line. Ex.: '202, '499'
364 Calls add_new_dimension method If the value is found for the first time
366 if code not in self.data:
367 chart_string_copy = self.detailed_chart
368 self.detailed_chart = self.add_new_dimension(code, [code, code, 'incremental'],
369 chart_string_copy, 'detailed_response_codes')
372 def _get_data_http_method(self, method):
374 :param method: str: METHOD from parsed line. Ex.: 'GET', 'POST'
376 Calls add_new_dimension method If the value is found for the first time
378 if method not in self.data:
379 chart_string_copy = self.http_method_chart
380 self.http_method_chart = self.add_new_dimension(method, [method, method, 'incremental'],
381 chart_string_copy, 'http_method')
382 self.data[method] += 1
384 def _get_data_per_url(self, url):
386 :param url: str: URL from parsed line
388 Scan through string looking for the first location where patterns produce a match for all user
392 for elem in self.url_pattern:
393 if elem.pattern.search(url):
394 self.data[elem.description] += 1
398 self.data['other_url'] += 1
400 def _get_data_statuses(self, code):
402 :param code: str: response status code. Ex.: '202', '499'
406 if code_class == '2' or code == '304' or code_class == '1':
407 self.data['successful_requests'] += 1
408 elif code_class == '3':
409 self.data['redirects'] += 1
410 elif code_class == '4':
411 self.data['bad_requests'] += 1
412 elif code_class == '5':
413 self.data['server_errors'] += 1
415 self.data['other_requests'] += 1
418 def address_not_in_pool(pool, address, pool_size):
420 :param pool: list of ip addresses
421 :param address: ip address
422 :param pool_size: current size of pool
423 :return: True if address not in pool. False if address in pool
425 index = bisect.bisect_left(pool, address)
426 if index < pool_size:
427 if pool[index] == address:
430 bisect.insort_left(pool, address)
433 bisect.insort_left(pool, address)