1 # -*- coding: utf-8 -*-
2 # Description: web log netdata python.d module
5 from base import LogService
8 from os import access, R_OK
9 from os.path import getsize
10 from collections import namedtuple
11 from copy import deepcopy
13 from itertools import zip_longest
15 from itertools import izip_longest as zip_longest
20 ORDER = ['response_codes', 'bandwidth', 'response_time', 'requests_per_url', 'http_method', 'requests_per_ipproto',
21 'clients', 'clients_all']
24 'options': [None, 'Response Codes', 'requests/s', 'responses', 'web_log.response_codes', 'stacked'],
26 ['2xx', '2xx', 'incremental'],
27 ['5xx', '5xx', 'incremental'],
28 ['3xx', '3xx', 'incremental'],
29 ['4xx', '4xx', 'incremental'],
30 ['1xx', '1xx', 'incremental'],
31 ['0xx', 'other', 'incremental'],
32 ['unmatched', 'unmatched', 'incremental']
35 'options': [None, 'Bandwidth', 'KB/s', 'bandwidth', 'web_log.bandwidth', 'area'],
37 ['resp_length', 'received', 'incremental', 1, 1024],
38 ['bytes_sent', 'sent', 'incremental', -1, 1024]
41 'options': [None, 'Processing Time', 'milliseconds', 'timings', 'web_log.response_time', 'area'],
43 ['resp_time_min', 'min', 'incremental', 1, 1000],
44 ['resp_time_max', 'max', 'incremental', 1, 1000],
45 ['resp_time_avg', 'avg', 'incremental', 1, 1000]
48 'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'clients', 'web_log.clients', 'stacked'],
50 ['unique_cur_ipv4', 'ipv4', 'incremental', 1, 1],
51 ['unique_cur_ipv6', 'ipv6', 'incremental', 1, 1]
54 'options': [None, 'All Time Unique Client IPs', 'unique ips', 'clients', 'web_log.clients_all', 'stacked'],
56 ['unique_tot_ipv4', 'ipv4', 'absolute', 1, 1],
57 ['unique_tot_ipv6', 'ipv6', 'absolute', 1, 1]
60 'options': [None, 'Requests Per HTTP Method', 'requests/s', 'http methods', 'web_log.http_method', 'stacked'],
63 'requests_per_ipproto': {
64 'options': [None, 'Requests Per IP Protocol', 'requests/s', 'ip protocols', 'web_log.requests_per_ipproto',
67 ['req_ipv4', 'ipv4', 'incremental', 1, 1],
68 ['req_ipv6', 'ipv6', 'incremental', 1, 1]
72 NAMED_URL_PATTERN = namedtuple('URL_PATTERN', ['description', 'pattern'])
75 class Service(LogService):
76 def __init__(self, configuration=None, name=None):
77 LogService.__init__(self, configuration=configuration, name=name)
78 # Variables from module configuration file
79 self.log_path = self.configuration.get('path')
80 self.detailed_response_codes = self.configuration.get('detailed_response_codes', True)
81 self.all_time = self.configuration.get('all_time', True)
82 self.url_pattern = self.configuration.get('categories') # dict
83 self.regex = None # will be assigned in 'find_regex' method
84 self.resp_time_func = None # will be assigned in 'find_regex' method
85 self._get_data = None # will be assigned in 'check' method.
86 self.order = None # will be assigned in 'create_*_method' method.
87 self.definitions = None # will be assigned in 'create_*_method' method.
88 self.detailed_chart = None # will be assigned in 'create_*_method' method.
89 self.http_method_chart = None # will be assigned in 'create_*_method' method.
90 # sorted list of unique IPs
91 self.unique_all_time = list()
92 # if there is no new logs this dict returned to netdata
93 self.data = {'bytes_sent': 0, 'resp_length': 0, 'resp_time_min': 0,
94 'resp_time_max': 0, 'resp_time_avg': 0, 'unique_cur_ipv4': 0,
95 'unique_cur_ipv6': 0, '2xx': 0, '5xx': 0, '3xx': 0, '4xx': 0,
96 '1xx': 0, '0xx': 0, 'unmatched': 0, 'req_ipv4': 0, 'req_ipv6': 0,
97 'unique_tot_ipv4': 0, 'unique_tot_ipv6': 0}
100 if not self.log_path:
101 self.error('log path is not specified')
104 # log_path must be readable
105 if not access(self.log_path, R_OK):
106 self.error('%s not readable or not exist' % self.log_path)
109 # log_path file should not be empty
110 if not getsize(self.log_path):
111 self.error('%s is empty' % self.log_path)
114 # Read last line (or first if there is only one line)
115 with open(self.log_path, 'rb') as logs:
117 while logs.read(1) != b'\n':
121 last_line = logs.readline().decode(encoding='utf-8')
124 regex_name = self.find_regex(last_line)
126 self.error('Can\'t parse %s' % self.log_path)
129 if regex_name.startswith('acs_'):
130 self.create_access_charts(regex_name)
131 if regex_name == 'acs_default':
132 self.info('Not all data collected. You need to modify LogFormat.')
133 self._get_data = self._get_access_data
134 self.info('Used regex: %s' % regex_name)
137 # If it's not access_logs.. Not used at the moment
140 def find_regex(self, last_line):
142 :param last_line: str: literally last line from log file
144 It's sad but different web servers has different logs formats
145 We need to find appropriate regex for current log file
146 All logic is do a regex search through the string for all patterns
147 until we find something or fail.
149 # REGEX: 1.IPv4 address 2.HTTP method 3. URL 4. Response code
150 # 5. Bytes sent 6. Response length 7. Response process time
151 acs_default = re.compile(r'([\da-f.:]+)'
157 acs_apache_ext_insert = re.compile(r'([\da-f.:]+)'
165 acs_apache_ext_append = re.compile(r'([\da-f.:]+)'
175 acs_nginx_ext_insert = re.compile(r'([\da-f.:]+)'
183 acs_nginx_ext_append = re.compile(r'([\da-f.:]+)'
192 r_regex = [acs_apache_ext_insert, acs_apache_ext_append, acs_nginx_ext_insert,
193 acs_nginx_ext_append, acs_default]
194 r_function = [lambda x: x, lambda x: x, lambda x: x * 1000000, lambda x: x * 1000000, lambda x: x]
195 r_name = ['acs_apache_ext_insert', 'acs_apache_ext_append', 'acs_nginx_ext_insert',
196 'acs_nginx_ext_append', 'acs_default']
197 regex_function_name = zip(r_regex, r_function, r_name)
200 for regex, function, name in regex_function_name:
201 if regex.search(last_line):
203 self.resp_time_func = function
208 def create_access_charts(self, regex_name):
210 :param regex_name: str: regex name from 'find_regex' method. Ex.: 'apache_extended', 'nginx_extended'
212 Create additional charts depending on the 'find_regex' result (parsed_line) and configuration file
213 1. 'time_response' chart is removed if there is no 'time_response' in logs.
214 2. Other stuff is just remove/add chart depending on yes/no in conf
216 def find_job_name(override_name, name):
218 :param override_name: str: 'name' var from configuration file
219 :param name: str: 'job_name' from configuration file
220 :return: str: new job name
221 We need this for dynamic charts. Actually same logic as in python.d.plugin.
223 add_to_name = override_name or name
225 return '_'.join(['web_log', re.sub('\s+', '_', add_to_name)])
229 self.order = ORDER[:]
230 self.definitions = deepcopy(CHARTS)
232 job_name = find_job_name(self.override_name, self.name)
233 self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
234 ' "Detailed Response Codes" requests/s responses' \
235 ' web_log.detailed_response_codes stacked 1 %s\n' % (job_name, self.update_every)
236 self.http_method_chart = 'CHART %s.http_method' \
237 ' "" "Requests Per HTTP Method" requests/s "http methods"' \
238 ' web_log.http_method stacked 2 %s\n' % (job_name, self.update_every)
240 # Remove 'request_time' chart from ORDER if request_time not in logs
241 if regex_name == 'acs_default':
242 self.order.remove('response_time')
243 # Remove 'clients_all' chart from ORDER if specified in the configuration
244 if not self.all_time:
245 self.order.remove('clients_all')
246 # Add 'detailed_response_codes' chart if specified in the configuration
247 if self.detailed_response_codes:
248 self.order.append('detailed_response_codes')
249 self.definitions['detailed_response_codes'] = {'options': [None, 'Detailed Response Codes', 'requests/s',
250 'responses', 'web_log.detailed_response_codes',
254 # Add 'requests_per_url' chart if specified in the configuration
256 self.url_pattern = [NAMED_URL_PATTERN(description=k, pattern=re.compile(v)) for k, v
257 in self.url_pattern.items()]
258 self.definitions['requests_per_url'] = {'options': [None, 'Requests Per Url', 'requests/s',
259 'urls', 'web_log.requests_per_url', 'stacked'],
260 'lines': [['other_url', 'other', 'incremental']]}
261 for elem in self.url_pattern:
262 self.definitions['requests_per_url']['lines'].append([elem.description, elem.description,
264 self.data.update({elem.description: 0})
265 self.data.update({'other_url': 0})
267 self.order.remove('requests_per_url')
269 def add_new_dimension(self, dimension, line_list, chart_string, key):
271 :param dimension: str: response status code. Ex.: '202', '499'
272 :param line_list: list: Ex.: ['202', '202', 'incremental']
273 :param chart_string: Current string we need to pass to netdata to rebuild the chart
274 :param key: str: CHARTS dict key (chart name). Ex.: 'response_time'
275 :return: str: new chart string = previous + new dimensions
277 self.data.update({dimension: 0})
278 # SET method check if dim in _dimensions
279 self._dimensions.append(dimension)
280 # UPDATE method do SET only if dim in definitions
281 self.definitions[key]['lines'].append(line_list)
283 chart += "%s %s\n" % ('DIMENSION', ' '.join(line_list))
287 def _get_access_data(self):
290 :return: dict OR None
291 None if _get_raw_data method fails.
292 In all other cases - dict.
294 raw = self._get_raw_data()
298 request_time, unique_current = list(), list()
299 request_counter = {'count': 0, 'sum': 0}
300 ip_address_counter = {'unique_cur_ip': 0}
302 match = self.regex.search(line)
304 match_dict = dict(zip_longest('address method url code sent resp_length resp_time'.split(),
307 code = ''.join([match_dict['code'][0], 'xx'])
310 self.data['0xx'] += 1
311 # detailed response code
312 if self.detailed_response_codes:
313 self._get_data_detailed_response_codes(match_dict['code'])
316 self._get_data_per_url(match_dict['url'])
317 # requests per http method
318 self._get_data_http_method(match_dict['method'])
320 self.data['bytes_sent'] += int(match_dict['sent'] if '-' not in match_dict['sent'] else 0)
321 # request processing time and bandwidth received
322 if match_dict['resp_length'] and match_dict['resp_time']:
323 self.data['resp_length'] += int(match_dict['resp_length'])
324 resp_time = self.resp_time_func(float(match_dict['resp_time']))
325 bisect.insort_left(request_time, resp_time)
326 request_counter['count'] += 1
327 request_counter['sum'] += resp_time
328 # requests per ip proto
329 proto = 'ipv4' if '.' in match_dict['address'] else 'ipv6'
330 self.data['req_' + proto] += 1
332 if address_not_in_pool(self.unique_all_time, match_dict['address'],
333 self.data['unique_tot_ipv4'] + self.data['unique_tot_ipv6']):
334 self.data['unique_tot_' + proto] += 1
335 if address_not_in_pool(unique_current, match_dict['address'], ip_address_counter['unique_cur_ip']):
336 self.data['unique_cur_' + proto] += 1
337 ip_address_counter['unique_cur_ip'] += 1
339 self.data['unmatched'] += 1
342 self.data['resp_time_min'] += int(request_time[0])
343 self.data['resp_time_avg'] += int(round(float(request_counter['sum']) / request_counter['count']))
344 self.data['resp_time_max'] += int(request_time[-1])
347 def _get_data_detailed_response_codes(self, code):
349 :param code: str: CODE from parsed line. Ex.: '202, '499'
351 Calls add_new_dimension method If the value is found for the first time
353 if code not in self.data:
354 chart_string_copy = self.detailed_chart
355 self.detailed_chart = self.add_new_dimension(code, [code, code, 'incremental'],
356 chart_string_copy, 'detailed_response_codes')
359 def _get_data_http_method(self, method):
361 :param method: str: METHOD from parsed line. Ex.: 'GET', 'POST'
363 Calls add_new_dimension method If the value is found for the first time
365 if method not in self.data:
366 chart_string_copy = self.http_method_chart
367 self.http_method_chart = self.add_new_dimension(method, [method, method, 'incremental'],
368 chart_string_copy, 'http_method')
369 self.data[method] += 1
371 def _get_data_per_url(self, url):
373 :param url: str: URL from parsed line
375 Scan through string looking for the first location where patterns produce a match for all user
379 for elem in self.url_pattern:
380 if elem.pattern.search(url):
381 self.data[elem.description] += 1
385 self.data['other_url'] += 1
388 def address_not_in_pool(pool, address, pool_size):
390 :param pool: list of ip addresses
391 :param address: ip address
392 :param pool_size: current size of pool
393 :return: True if address not in pool. False if address in pool
395 index = bisect.bisect_left(pool, address)
396 if index < pool_size:
397 if pool[index] == address:
400 bisect.insort_left(pool, address)
403 bisect.insort_left(pool, address)