1 # -*- coding: utf-8 -*-
2 # Description: web log netdata python.d module
5 from base import LogService
8 from os import access, R_OK
9 from os.path import getsize
10 from collections import namedtuple
11 from copy import deepcopy
13 from itertools import zip_longest
15 from itertools import izip_longest as zip_longest
20 ORDER = ['response_statuses', 'response_codes', 'bandwidth', 'response_time', 'requests_per_url', 'http_method',
21 'requests_per_ipproto', 'clients', 'clients_all']
24 'options': [None, 'Response Codes', 'requests/s', 'responses', 'web_log.response_codes', 'stacked'],
26 ['2xx', '2xx', 'incremental'],
27 ['5xx', '5xx', 'incremental'],
28 ['3xx', '3xx', 'incremental'],
29 ['4xx', '4xx', 'incremental'],
30 ['1xx', '1xx', 'incremental'],
31 ['0xx', 'other', 'incremental'],
32 ['unmatched', 'unmatched', 'incremental']
35 'options': [None, 'Bandwidth', 'KB/s', 'bandwidth', 'web_log.bandwidth', 'area'],
37 ['resp_length', 'received', 'incremental', 1, 1024],
38 ['bytes_sent', 'sent', 'incremental', -1, 1024]
41 'options': [None, 'Processing Time', 'milliseconds', 'timings', 'web_log.response_time', 'area'],
43 ['resp_time_min', 'min', 'incremental', 1, 1000],
44 ['resp_time_max', 'max', 'incremental', 1, 1000],
45 ['resp_time_avg', 'avg', 'incremental', 1, 1000]
48 'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'clients', 'web_log.clients', 'stacked'],
50 ['unique_cur_ipv4', 'ipv4', 'incremental', 1, 1],
51 ['unique_cur_ipv6', 'ipv6', 'incremental', 1, 1]
54 'options': [None, 'All Time Unique Client IPs', 'unique ips', 'clients', 'web_log.clients_all', 'stacked'],
56 ['unique_tot_ipv4', 'ipv4', 'absolute', 1, 1],
57 ['unique_tot_ipv6', 'ipv6', 'absolute', 1, 1]
60 'options': [None, 'Requests Per HTTP Method', 'requests/s', 'http methods', 'web_log.http_method', 'stacked'],
63 'requests_per_ipproto': {
64 'options': [None, 'Requests Per IP Protocol', 'requests/s', 'ip protocols', 'web_log.requests_per_ipproto',
67 ['req_ipv4', 'ipv4', 'incremental', 1, 1],
68 ['req_ipv6', 'ipv6', 'incremental', 1, 1]
70 'response_statuses': {
71 'options': [None, 'Response Statuses', 'requests/s', 'responses', 'web_log.response_statuses',
74 ['successful_requests', 'successful', 'incremental', 1, 1],
75 ['redirects', None, 'incremental', 1, 1],
76 ['bad_requests', 'bad', 'incremental', 1, 1],
77 ['server_errors', None, 'incremental', 1, 1]
81 NAMED_URL_PATTERN = namedtuple('URL_PATTERN', ['description', 'pattern'])
84 class Service(LogService):
85 def __init__(self, configuration=None, name=None):
86 LogService.__init__(self, configuration=configuration, name=name)
87 # Variables from module configuration file
88 self.log_path = self.configuration.get('path')
89 self.detailed_response_codes = self.configuration.get('detailed_response_codes', True)
90 self.all_time = self.configuration.get('all_time', True)
91 self.url_pattern = self.configuration.get('categories') # dict
92 self.regex = None # will be assigned in 'find_regex' method
93 self.resp_time_func = None # will be assigned in 'find_regex' method
94 self._get_data = None # will be assigned in 'check' method.
95 self.order = None # will be assigned in 'create_*_method' method.
96 self.definitions = None # will be assigned in 'create_*_method' method.
97 self.detailed_chart = None # will be assigned in 'create_*_method' method.
98 self.http_method_chart = None # will be assigned in 'create_*_method' method.
99 # sorted list of unique IPs
100 self.unique_all_time = list()
101 # if there is no new logs this dict returned to netdata
102 self.data = {'bytes_sent': 0, 'resp_length': 0, 'resp_time_min': 0, 'resp_time_max': 0,
103 'resp_time_avg': 0, 'unique_cur_ipv4': 0, 'unique_cur_ipv6': 0, '2xx': 0,
104 '5xx': 0, '3xx': 0, '4xx': 0, '1xx': 0, '0xx': 0, 'unmatched': 0, 'req_ipv4': 0,
105 'req_ipv6': 0, 'unique_tot_ipv4': 0, 'unique_tot_ipv6': 0, 'successful_requests': 0,
106 'redirects': 0, 'bad_requests': 0, 'server_errors': 0}
109 if not self.log_path:
110 self.error('log path is not specified')
113 # log_path must be readable
114 if not access(self.log_path, R_OK):
115 self.error('%s not readable or not exist' % self.log_path)
118 # log_path file should not be empty
119 if not getsize(self.log_path):
120 self.error('%s is empty' % self.log_path)
123 # Read last line (or first if there is only one line)
124 with open(self.log_path, 'rb') as logs:
126 while logs.read(1) != b'\n':
130 last_line = logs.readline().decode(encoding='utf-8')
133 regex_name = self.find_regex(last_line)
135 self.error('Can\'t parse %s' % self.log_path)
138 if regex_name.startswith('acs_'):
139 self.create_access_charts(regex_name)
140 if regex_name == 'acs_default':
141 self.info('Not all data collected. You need to modify LogFormat.')
142 self._get_data = self._get_access_data
143 self.info('Used regex: %s' % regex_name)
146 # If it's not access_logs.. Not used at the moment
149 def find_regex(self, last_line):
151 :param last_line: str: literally last line from log file
153 It's sad but different web servers has different logs formats
154 We need to find appropriate regex for current log file
155 All logic is do a regex search through the string for all patterns
156 until we find something or fail.
158 # REGEX: 1.IPv4 address 2.HTTP method 3. URL 4. Response code
159 # 5. Bytes sent 6. Response length 7. Response process time
160 acs_default = re.compile(r'([\da-f.:]+)'
166 acs_apache_ext_insert = re.compile(r'([\da-f.:]+)'
174 acs_apache_ext_append = re.compile(r'([\da-f.:]+)'
184 acs_nginx_ext_insert = re.compile(r'([\da-f.:]+)'
192 acs_nginx_ext_append = re.compile(r'([\da-f.:]+)'
201 r_regex = [acs_apache_ext_insert, acs_apache_ext_append, acs_nginx_ext_insert,
202 acs_nginx_ext_append, acs_default]
203 r_function = [lambda x: x, lambda x: x, lambda x: x * 1000000, lambda x: x * 1000000, lambda x: x]
204 r_name = ['acs_apache_ext_insert', 'acs_apache_ext_append', 'acs_nginx_ext_insert',
205 'acs_nginx_ext_append', 'acs_default']
206 regex_function_name = zip(r_regex, r_function, r_name)
209 for regex, function, name in regex_function_name:
210 if regex.search(last_line):
212 self.resp_time_func = function
217 def create_access_charts(self, regex_name):
219 :param regex_name: str: regex name from 'find_regex' method. Ex.: 'apache_extended', 'nginx_extended'
221 Create additional charts depending on the 'find_regex' result (parsed_line) and configuration file
222 1. 'time_response' chart is removed if there is no 'time_response' in logs.
223 2. Other stuff is just remove/add chart depending on yes/no in conf
225 def find_job_name(override_name, name):
227 :param override_name: str: 'name' var from configuration file
228 :param name: str: 'job_name' from configuration file
229 :return: str: new job name
230 We need this for dynamic charts. Actually same logic as in python.d.plugin.
232 add_to_name = override_name or name
234 return '_'.join(['web_log', re.sub('\s+', '_', add_to_name)])
238 self.order = ORDER[:]
239 self.definitions = deepcopy(CHARTS)
241 job_name = find_job_name(self.override_name, self.name)
242 self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
243 ' "Detailed Response Codes" requests/s responses' \
244 ' web_log.detailed_response_codes stacked 1 %s\n' % (job_name, self.update_every)
245 self.http_method_chart = 'CHART %s.http_method' \
246 ' "" "Requests Per HTTP Method" requests/s "http methods"' \
247 ' web_log.http_method stacked 2 %s\n' % (job_name, self.update_every)
249 # Remove 'request_time' chart from ORDER if request_time not in logs
250 if regex_name == 'acs_default':
251 self.order.remove('response_time')
252 # Remove 'clients_all' chart from ORDER if specified in the configuration
253 if not self.all_time:
254 self.order.remove('clients_all')
255 # Add 'detailed_response_codes' chart if specified in the configuration
256 if self.detailed_response_codes:
257 self.order.append('detailed_response_codes')
258 self.definitions['detailed_response_codes'] = {'options': [None, 'Detailed Response Codes', 'requests/s',
259 'responses', 'web_log.detailed_response_codes',
263 # Add 'requests_per_url' chart if specified in the configuration
265 self.url_pattern = [NAMED_URL_PATTERN(description=k, pattern=re.compile(v)) for k, v
266 in self.url_pattern.items()]
267 self.definitions['requests_per_url'] = {'options': [None, 'Requests Per Url', 'requests/s',
268 'urls', 'web_log.requests_per_url', 'stacked'],
269 'lines': [['other_url', 'other', 'incremental']]}
270 for elem in self.url_pattern:
271 self.definitions['requests_per_url']['lines'].append([elem.description, elem.description,
273 self.data.update({elem.description: 0})
274 self.data.update({'other_url': 0})
276 self.order.remove('requests_per_url')
278 def add_new_dimension(self, dimension, line_list, chart_string, key):
280 :param dimension: str: response status code. Ex.: '202', '499'
281 :param line_list: list: Ex.: ['202', '202', 'incremental']
282 :param chart_string: Current string we need to pass to netdata to rebuild the chart
283 :param key: str: CHARTS dict key (chart name). Ex.: 'response_time'
284 :return: str: new chart string = previous + new dimensions
286 self.data.update({dimension: 0})
287 # SET method check if dim in _dimensions
288 self._dimensions.append(dimension)
289 # UPDATE method do SET only if dim in definitions
290 self.definitions[key]['lines'].append(line_list)
292 chart += "%s %s\n" % ('DIMENSION', ' '.join(line_list))
296 def _get_access_data(self):
299 :return: dict OR None
300 None if _get_raw_data method fails.
301 In all other cases - dict.
303 raw = self._get_raw_data()
307 request_time, unique_current = list(), list()
308 request_counter = {'count': 0, 'sum': 0}
309 ip_address_counter = {'unique_cur_ip': 0}
311 match = self.regex.search(line)
313 match_dict = dict(zip_longest('address method url code sent resp_length resp_time'.split(),
316 code = ''.join([match_dict['code'][0], 'xx'])
319 self.data['0xx'] += 1
320 # detailed response code
321 if self.detailed_response_codes:
322 self._get_data_detailed_response_codes(match_dict['code'])
324 self._get_data_statuses(match_dict['code'])
327 self._get_data_per_url(match_dict['url'])
328 # requests per http method
329 self._get_data_http_method(match_dict['method'])
331 self.data['bytes_sent'] += int(match_dict['sent'] if '-' not in match_dict['sent'] else 0)
332 # request processing time and bandwidth received
333 if match_dict['resp_length'] and match_dict['resp_time']:
334 self.data['resp_length'] += int(match_dict['resp_length'])
335 resp_time = self.resp_time_func(float(match_dict['resp_time']))
336 bisect.insort_left(request_time, resp_time)
337 request_counter['count'] += 1
338 request_counter['sum'] += resp_time
339 # requests per ip proto
340 proto = 'ipv4' if '.' in match_dict['address'] else 'ipv6'
341 self.data['req_' + proto] += 1
343 if address_not_in_pool(self.unique_all_time, match_dict['address'],
344 self.data['unique_tot_ipv4'] + self.data['unique_tot_ipv6']):
345 self.data['unique_tot_' + proto] += 1
346 if address_not_in_pool(unique_current, match_dict['address'], ip_address_counter['unique_cur_ip']):
347 self.data['unique_cur_' + proto] += 1
348 ip_address_counter['unique_cur_ip'] += 1
350 self.data['unmatched'] += 1
354 self.data['resp_time_min'] += int(request_time[0])
355 self.data['resp_time_avg'] += int(round(float(request_counter['sum']) / request_counter['count']))
356 self.data['resp_time_max'] += int(request_time[-1])
359 def _get_data_detailed_response_codes(self, code):
361 :param code: str: CODE from parsed line. Ex.: '202, '499'
363 Calls add_new_dimension method If the value is found for the first time
365 if code not in self.data:
366 chart_string_copy = self.detailed_chart
367 self.detailed_chart = self.add_new_dimension(code, [code, code, 'incremental'],
368 chart_string_copy, 'detailed_response_codes')
371 def _get_data_http_method(self, method):
373 :param method: str: METHOD from parsed line. Ex.: 'GET', 'POST'
375 Calls add_new_dimension method If the value is found for the first time
377 if method not in self.data:
378 chart_string_copy = self.http_method_chart
379 self.http_method_chart = self.add_new_dimension(method, [method, method, 'incremental'],
380 chart_string_copy, 'http_method')
381 self.data[method] += 1
383 def _get_data_per_url(self, url):
385 :param url: str: URL from parsed line
387 Scan through string looking for the first location where patterns produce a match for all user
391 for elem in self.url_pattern:
392 if elem.pattern.search(url):
393 self.data[elem.description] += 1
397 self.data['other_url'] += 1
399 def _get_data_statuses(self, code):
401 :param code: str: response status code. Ex.: '202', '499'
404 if code[0] == '2' or code == '304' or code[0] == '1':
405 self.data['successful_requests'] += 1
407 self.data['redirects'] += 1
409 self.data['bad_requests'] += 1
411 self.data['server_errors'] += 1
414 def address_not_in_pool(pool, address, pool_size):
416 :param pool: list of ip addresses
417 :param address: ip address
418 :param pool_size: current size of pool
419 :return: True if address not in pool. False if address in pool
421 index = bisect.bisect_left(pool, address)
422 if index < pool_size:
423 if pool[index] == address:
426 bisect.insort_left(pool, address)
429 bisect.insort_left(pool, address)