python.d/web_log.chart.py

   1 # -*- coding: utf-8 -*-
   2 # Description: web log netdata python.d module
   3 # Author: l2isbad
   4
   5 from base import LogService
   6 import re
   7 import bisect
   8 from os import access, R_OK
   9 from os.path import getsize
  10 from collections import namedtuple
  11 from copy import deepcopy
  12
  13 priority = 60000
  14 retries = 60
  15
  16 ORDER = ['response_statuses', 'response_codes', 'bandwidth', 'response_time', 'requests_per_url', 'http_method',
  17          'http_version', 'requests_per_ipproto', 'clients', 'clients_all']
  18 CHARTS = {
  19     'response_codes': {
  20         'options': [None, 'Response Codes', 'requests/s', 'responses', 'web_log.response_codes', 'stacked'],
  21         'lines': [
  22             ['2xx', '2xx', 'incremental'],
  23             ['5xx', '5xx', 'incremental'],
  24             ['3xx', '3xx', 'incremental'],
  25             ['4xx', '4xx', 'incremental'],
  26             ['1xx', '1xx', 'incremental'],
  27             ['0xx', 'other', 'incremental'],
  28             ['unmatched', 'unmatched', 'incremental']
  29         ]},
  30     'bandwidth': {
  31         'options': [None, 'Bandwidth', 'KB/s', 'bandwidth', 'web_log.bandwidth', 'area'],
  32         'lines': [
  33             ['resp_length', 'received', 'incremental', 1, 1024],
  34             ['bytes_sent', 'sent', 'incremental', -1, 1024]
  35         ]},
  36     'response_time': {
  37         'options': [None, 'Processing Time', 'milliseconds', 'timings', 'web_log.response_time', 'area'],
  38         'lines': [
  39             ['resp_time_min', 'min', 'incremental', 1, 1000],
  40             ['resp_time_max', 'max', 'incremental', 1, 1000],
  41             ['resp_time_avg', 'avg', 'incremental', 1, 1000]
  42         ]},
  43     'clients': {
  44         'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'clients', 'web_log.clients', 'stacked'],
  45         'lines': [
  46             ['unique_cur_ipv4', 'ipv4', 'incremental', 1, 1],
  47             ['unique_cur_ipv6', 'ipv6', 'incremental', 1, 1]
  48         ]},
  49     'clients_all': {
  50         'options': [None, 'All Time Unique Client IPs', 'unique ips', 'clients', 'web_log.clients_all', 'stacked'],
  51         'lines': [
  52             ['unique_tot_ipv4', 'ipv4', 'absolute', 1, 1],
  53             ['unique_tot_ipv6', 'ipv6', 'absolute', 1, 1]
  54         ]},
  55     'http_method': {
  56         'options': [None, 'Requests Per HTTP Method', 'requests/s', 'http methods', 'web_log.http_method', 'stacked'],
  57         'lines': [
  58             ['GET', 'GET', 'incremental', 1, 1]
  59         ]},
  60     'http_version': {
  61         'options': [None, 'Requests Per HTTP Version', 'requests/s', 'http versions',
  62                     'web_log.http_version', 'stacked'],
  63         'lines': []},
  64     'requests_per_ipproto': {
  65         'options': [None, 'Requests Per IP Protocol', 'requests/s', 'ip protocols', 'web_log.requests_per_ipproto',
  66                     'stacked'],
  67         'lines': [
  68             ['req_ipv4', 'ipv4', 'incremental', 1, 1],
  69             ['req_ipv6', 'ipv6', 'incremental', 1, 1]
  70         ]},
  71     'response_statuses': {
  72         'options': [None, 'Response Statuses', 'requests/s', 'responses', 'web_log.response_statuses',
  73                     'stacked'],
  74         'lines': [
  75             ['successful_requests', 'success', 'incremental', 1, 1],
  76             ['server_errors', 'error', 'incremental', 1, 1],
  77             ['redirects', 'redirect', 'incremental', 1, 1],
  78             ['bad_requests', 'bad', 'incremental', 1, 1],
  79             ['other_requests', 'other', 'incremental', 1, 1]
  80         ]}
  81 }
  82
  83 NAMED_URL_PATTERN = namedtuple('URL_PATTERN', ['description', 'pattern'])
  84
  85
  86 class Service(LogService):
  87     def __init__(self, configuration=None, name=None):
  88         """
  89         :param configuration:
  90         :param name:
  91         # self._get_data = None  # will be assigned in 'check' method.
  92         # self.order = None  # will be assigned in 'create_*_method' method.
  93         # self.definitions = None  # will be assigned in 'create_*_method' method.
  94         """
  95         LogService.__init__(self, configuration=configuration, name=name)
  96         # Variables from module configuration file
  97         self.type = self.configuration.get('type', 'web_access')
  98         self.log_path = self.configuration.get('path')
  99         self.url_pattern = self.configuration.get('categories')  # dict
 100         self.custom_log_format = self.configuration.get('custom_log_format')  # dict
 101         # Instance variables
 102         self.regex = None  # will be assigned in 'find_regex' or 'find_regex_custom' method
 103         self.data = {'bytes_sent': 0, 'resp_length': 0, 'resp_time_min': 0, 'resp_time_max': 0,
 104                      'resp_time_avg': 0, 'unique_cur_ipv4': 0, 'unique_cur_ipv6': 0, '2xx': 0,
 105                      '5xx': 0, '3xx': 0, '4xx': 0, '1xx': 0, '0xx': 0, 'unmatched': 0, 'req_ipv4': 0,
 106                      'req_ipv6': 0, 'unique_tot_ipv4': 0, 'unique_tot_ipv6': 0, 'successful_requests': 0,
 107                      'redirects': 0, 'bad_requests': 0, 'server_errors': 0, 'other_requests': 0, 'GET': 0}
 108
 109     def check(self):
 110         """
 111         :return: bool
 112
 113         1. "log_path" is specified in the module configuration file
 114         2. "log_path" must be readable by netdata user and must exist
 115         3. "log_path' must not be empty. We need at least 1 line to find appropriate pattern to parse
 116         4. other checks depends on log "type"
 117         """
 118         if not self.log_path:
 119             self.error('log path is not specified')
 120             return False
 121
 122         if not access(self.log_path, R_OK):
 123             self.error('%s not readable or not exist' % self.log_path)
 124             return False
 125
 126         if not getsize(self.log_path):
 127             self.error('%s is empty' % self.log_path)
 128             return False
 129
 130         # Read last line (or first if there is only one line)
 131         with open(self.log_path, 'rb') as logs:
 132             logs.seek(-2, 2)
 133             while logs.read(1) != b'\n':
 134                 logs.seek(-2, 1)
 135                 if logs.tell() == 0:
 136                     break
 137             last_line = logs.readline()
 138
 139         try:
 140             last_line = last_line.decode()
 141         except UnicodeDecodeError:
 142             try:
 143                 last_line = last_line.decode(encoding='utf-8')
 144             except (TypeError, UnicodeDecodeError) as error:
 145                 self.error(str(error))
 146                 return False
 147
 148         if self.type == 'web_access':
 149             self.unique_all_time = list()  # sorted list of unique IPs
 150             self.detailed_response_codes = self.configuration.get('detailed_response_codes', True)
 151             self.all_time = self.configuration.get('all_time', True)
 152
 153             # Custom_log_format or predefined log format.
 154             if self.custom_log_format:
 155                 match_dict, error = self.find_regex_custom(last_line)
 156             else:
 157                 match_dict, error = self.find_regex(last_line)
 158
 159             # "match_dict" is None if there are any problems
 160             if match_dict is None:
 161                 self.error(str(error))
 162                 return False
 163
 164             # self.url_pattern check
 165             if self.url_pattern:
 166                 self.url_pattern = check_req_per_url_pattern('rpu', self.url_pattern)
 167
 168             self.create_access_charts(match_dict)  # Create charts
 169             self._get_data = self._get_access_data  # _get_data assignment
 170         else:
 171             self.error('Not implemented')
 172             return False
 173
 174         # Double check
 175         if not self.regex:
 176             self.error('That can not happen, but it happened. "regex" is None')
 177
 178         self.info('Collected data: %s' % list(match_dict.keys()))
 179         return True
 180
 181     def find_regex_custom(self, last_line):
 182         """
 183         :param last_line: str: literally last line from log file
 184         :return: tuple where:
 185         [0]: dict or None:  match_dict or None
 186         [1]: str: error description
 187
 188         We are here only if "custom_log_format" is in logs. We need to make sure:
 189         1. "custom_log_format" is a dict
 190         2. "pattern" in "custom_log_format" and pattern is <str> instance
 191         3. if "time_multiplier" is in "custom_log_format" it must be <int> instance
 192
 193         If all parameters is ok we need to make sure:
 194         1. Pattern search is success
 195         2. Pattern search contains named subgroups (?P<subgroup_name>) (= "match_dict")
 196
 197         If pattern search is success we need to make sure:
 198         1. All mandatory keys ['address', 'code', 'bytes_sent', 'method', 'url'] are in "match_dict"
 199
 200         If this is True we need to make sure:
 201         1. All mandatory key values from "match_dict" have the correct format
 202          ("code" is integer, "method" is uppercase word, etc)
 203
 204         If non mandatory keys in "match_dict" we need to make sure:
 205         1. All non mandatory key values from match_dict ['resp_length', 'resp_time'] have the correct format
 206          ("resp_length" is integer or "-", "resp_time" is integer or float)
 207
 208         """
 209         if not is_dict(self.custom_log_format):
 210             return find_regex_return(msg='Custom log: "custom_log_format" is not a <dict>')
 211
 212         pattern = self.custom_log_format.get('pattern')
 213         if not (pattern and isinstance(pattern, str)):
 214             return find_regex_return(msg='Custom log: "pattern" option is not specified or type is not <str>')
 215
 216         resp_time_func = self.custom_log_format.get('time_multiplier') or 0
 217
 218         if not isinstance(resp_time_func, int):
 219             return find_regex_return(msg='Custom log: "time_multiplier" is not an integer')
 220
 221         try:
 222             regex = re.compile(pattern)
 223         except re.error as error:
 224             return find_regex_return(msg='Pattern compile error: %s' % str(error))
 225
 226         match = regex.search(last_line)
 227         if match:
 228             match_dict = match.groupdict() or None
 229         else:
 230             return find_regex_return(msg='Custom log: pattern search FAILED')
 231
 232         if match_dict is None:
 233             find_regex_return(msg='Custom log: search OK but contains no named subgroups'
 234                                   ' (you need to use ?P<subgroup_name>)')
 235         else:
 236             mandatory_dict = {'address': r'[\da-f.:]+',
 237                               'code': r'[1-9]\d{2}',
 238                               'method': r'[A-Z]+',
 239                               'bytes_sent': r'\d+|-'}
 240             optional_dict = {'resp_length': r'\d+',
 241                              'resp_time': r'[\d.]+',
 242                              'http_version': r'\d\.\d'}
 243
 244             mandatory_values = set(mandatory_dict) - set(match_dict)
 245             if mandatory_values:
 246                 return find_regex_return(msg='Custom log: search OK but some mandatory keys (%s) are missing'
 247                                          % list(mandatory_values))
 248             else:
 249                 for key in mandatory_dict:
 250                     if not re.search(mandatory_dict[key], match_dict[key]):
 251                         return find_regex_return(msg='Custom log: can\'t parse "%s": %s'
 252                                                      % (key, match_dict[key]))
 253
 254             optional_values = set(optional_dict) & set(match_dict)
 255             for key in optional_values:
 256                 if not re.search(optional_dict[key], match_dict[key]):
 257                     return find_regex_return(msg='Custom log: can\'t parse "%s": %s'
 258                                                  % (key, match_dict[key]))
 259
 260             dot_in_time = '.' in match_dict.get('resp_time', '')
 261             if dot_in_time:
 262                 self.resp_time_func = lambda time: time * (resp_time_func or 1000000)
 263             else:
 264                 self.resp_time_func = lambda time: time * (resp_time_func or 1)
 265
 266             self.regex = regex
 267             return find_regex_return(match_dict=match_dict)
 268
 269     def find_regex(self, last_line):
 270         """
 271         :param last_line: str: literally last line from log file
 272         :return: tuple where:
 273         [0]: dict or None:  match_dict or None
 274         [1]: str: error description
 275         We need to find appropriate pattern for current log file
 276         All logic is do a regex search through the string for all predefined patterns
 277         until we find something or fail.
 278         """
 279         # REGEX: 1.IPv4 address 2.HTTP method 3. URL 4. Response code
 280         # 5. Bytes sent 6. Response length 7. Response process time
 281         acs_default = re.compile(r'(?P<address>[\da-f.:]+)'
 282                                  r' -.*?"(?P<method>[A-Z]+)'
 283                                  r' (?P<url>[^ ]+)'
 284                                  r' [A-Z]+/(?P<http_version>\d\.\d)"'
 285                                  r' (?P<code>[1-9]\d{2})'
 286                                  r' (?P<bytes_sent>\d+|-)')
 287
 288         acs_apache_ext_insert = re.compile(r'(?P<address>[\da-f.:]+)'
 289                                            r' -.*?"(?P<method>[A-Z]+)'
 290                                            r' (?P<url>[^ ]+)'
 291                                            r' [A-Z]+/(?P<http_version>\d\.\d)"'
 292                                            r' (?P<code>[1-9]\d{2})'
 293                                            r' (?P<bytes_sent>\d+|-)'
 294                                            r' (?P<resp_length>\d+)'
 295                                            r' (?P<resp_time>\d+) ')
 296
 297         acs_apache_ext_append = re.compile(r'(?P<address>[\da-f.:]+)'
 298                                            r' -.*?"(?P<method>[A-Z]+)'
 299                                            r' (?P<url>[^ ]+)'
 300                                            r' [A-Z]+/(?P<http_version>\d\.\d)"'
 301                                            r' (?P<code>[1-9]\d{2})'
 302                                            r' (?P<bytes_sent>\d+|-)'
 303                                            r' .*?'
 304                                            r' (?P<resp_length>\d+)'
 305                                            r' (?P<resp_time>\d+)'
 306                                            r'(?: |$)')
 307
 308         acs_nginx_ext_insert = re.compile(r'(?P<address>[\da-f.:]+)'
 309                                           r' -.*?"(?P<method>[A-Z]+)'
 310                                           r' (?P<url>[^ ]+)'
 311                                           r' [A-Z]+/(?P<http_version>\d\.\d)"'
 312                                           r' (?P<code>[1-9]\d{2})'
 313                                           r' (?P<bytes_sent>\d+)'
 314                                           r' (?P<resp_length>\d+)'
 315                                           r' (?P<resp_time>\d\.\d+) ')
 316
 317         acs_nginx_ext_append = re.compile(r'(?P<address>[\da-f.:]+)'
 318                                           r' -.*?"(?P<method>[A-Z]+)'
 319                                           r' (?P<url>[^ ]+)'
 320                                           r' [A-Z]+/(?P<http_version>\d\.\d)"'
 321                                           r' (?P<code>[1-9]\d{2})'
 322                                           r' (?P<bytes_sent>\d+)'
 323                                           r' .*?'
 324                                           r' (?P<resp_length>\d+)'
 325                                           r' (?P<resp_time>\d\.\d+)')
 326
 327         def func_usec(time):
 328             return time
 329
 330         def func_sec(time):
 331             return time * 1000000
 332
 333         r_regex = [acs_apache_ext_insert, acs_apache_ext_append, acs_nginx_ext_insert,
 334                    acs_nginx_ext_append, acs_default]
 335         r_function = [func_usec, func_usec, func_sec, func_sec, func_usec]
 336         regex_function = zip(r_regex, r_function)
 337
 338         match_dict = dict()
 339         for regex, function in regex_function:
 340             match = regex.search(last_line)
 341             if match:
 342                 self.regex = regex
 343                 self.resp_time_func = function
 344                 match_dict = match.groupdict()
 345                 break
 346
 347         return find_regex_return(match_dict=match_dict or None,
 348                                  msg='Unknown log format. You need to use "custom_log_format" feature.')
 349
 350     def create_access_charts(self, match_dict):
 351         """
 352         :param match_dict: dict: regex.search.groupdict(). Ex. {'address': '127.0.0.1', 'code': '200', 'method': 'GET'}
 353         :return:
 354         Create additional charts depending on the 'match_dict' keys and configuration file options
 355         1. 'time_response' chart is removed if there is no 'resp_time' in match_dict.
 356         2. Other stuff is just remove/add chart depending on yes/no in conf
 357         """
 358         def find_job_name(override_name, name):
 359             """
 360             :param override_name: str: 'name' var from configuration file
 361             :param name: str: 'job_name' from configuration file
 362             :return: str: new job name
 363             We need this for dynamic charts. Actually same logic as in python.d.plugin.
 364             """
 365             add_to_name = override_name or name
 366             if add_to_name:
 367                 return '_'.join(['web_log', re.sub('\s+', '_', add_to_name)])
 368             else:
 369                 return 'web_log'
 370
 371         self.order = ORDER[:]
 372         self.definitions = deepcopy(CHARTS)
 373
 374         job_name = find_job_name(self.override_name, self.name)
 375         self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
 376                               ' "Detailed Response Codes" requests/s responses' \
 377                               ' web_log.detailed_response_codes stacked 1 %s\n' % (job_name, self.update_every)
 378         self.http_method_chart = 'CHART %s.http_method' \
 379                                  ' "" "Requests Per HTTP Method" requests/s "http methods"' \
 380                                  ' web_log.http_method stacked 2 %s\n' \
 381                                  'DIMENSION GET GET incremental\n' % (job_name, self.update_every)
 382         self.http_version_chart = 'CHART %s.http_version' \
 383                                   ' "" "Requests Per HTTP Version" requests/s "http versions"' \
 384                                   ' web_log.http_version stacked 3 %s\n' % (job_name, self.update_every)
 385
 386         # Remove 'request_time' chart from ORDER if resp_time not in match_dict
 387         if 'resp_time' not in match_dict:
 388             self.order.remove('response_time')
 389         # Remove 'clients_all' chart from ORDER if specified in the configuration
 390         if not self.all_time:
 391             self.order.remove('clients_all')
 392         # Add 'detailed_response_codes' chart if specified in the configuration
 393         if self.detailed_response_codes:
 394             self.order.append('detailed_response_codes')
 395             self.definitions['detailed_response_codes'] = {'options': [None, 'Detailed Response Codes', 'requests/s',
 396                                                                        'responses', 'web_log.detailed_response_codes',
 397                                                                        'stacked'],
 398                                                            'lines': []}
 399
 400         # Add 'requests_per_url' chart if specified in the configuration
 401         if self.url_pattern:
 402             self.definitions['requests_per_url'] = {'options': [None, 'Requests Per Url', 'requests/s',
 403                                                                 'urls', 'web_log.requests_per_url', 'stacked'],
 404                                                     'lines': [['rpu_other', 'other', 'incremental']]}
 405             for elem in self.url_pattern:
 406                 self.definitions['requests_per_url']['lines'].append([elem.description, elem.description[4:],
 407                                                                       'incremental'])
 408                 self.data.update({elem.description: 0})
 409             self.data.update({'rpu_other': 0})
 410         else:
 411             self.order.remove('requests_per_url')
 412
 413     def add_new_dimension(self, dimension, line_list, chart_string, key):
 414         """
 415         :param dimension: str: response status code. Ex.: '202', '499'
 416         :param line_list: list: Ex.: ['202', '202', 'incremental']
 417         :param chart_string: Current string we need to pass to netdata to rebuild the chart
 418         :param key: str: CHARTS dict key (chart name). Ex.: 'response_time'
 419         :return: str: new chart string = previous + new dimensions
 420         """
 421         self.data.update({dimension: 0})
 422         # SET method check if dim in _dimensions
 423         self._dimensions.append(dimension)
 424         # UPDATE method do SET only if dim in definitions
 425         self.definitions[key]['lines'].append(line_list)
 426         chart = chart_string
 427         chart += "%s %s\n" % ('DIMENSION', ' '.join(line_list))
 428         print(chart)
 429         return chart
 430
 431     def _get_access_data(self):
 432         """
 433         Parse new log lines
 434         :return: dict OR None
 435         None if _get_raw_data method fails.
 436         In all other cases - dict.
 437         """
 438         raw = self._get_raw_data()
 439         if raw is None:
 440             return None
 441
 442         request_time, unique_current = list(), list()
 443         request_counter = {'count': 0, 'sum': 0}
 444         ip_address_counter = {'unique_cur_ip': 0}
 445         for line in raw:
 446             match = self.regex.search(line)
 447             if match:
 448                 match_dict = match.groupdict()
 449                 try:
 450                     code = ''.join([match_dict['code'][0], 'xx'])
 451                     self.data[code] += 1
 452                 except KeyError:
 453                     self.data['0xx'] += 1
 454                 # detailed response code
 455                 if self.detailed_response_codes:
 456                     self._get_data_detailed_response_codes(match_dict['code'])
 457                 # response statuses
 458                 self._get_data_statuses(match_dict['code'])
 459                 # requests per url
 460                 if self.url_pattern:
 461                     self._get_data_per_url(match_dict['url'])
 462                 # requests per http method
 463                 self._get_data_http_method(match_dict['method'])
 464                 # requests per http version
 465                 if 'http_version' in match_dict:
 466                     self._get_data_http_version(match_dict['http_version'])
 467                 # bandwidth sent
 468                 bytes_sent = match_dict['bytes_sent'] if '-' not in match_dict['bytes_sent'] else 0
 469                 self.data['bytes_sent'] += int(bytes_sent)
 470                 # request processing time and bandwidth received
 471                 if 'resp_length' in match_dict:
 472                     self.data['resp_length'] += int(match_dict['resp_length'])
 473                 if 'resp_time' in match_dict:
 474                     resp_time = self.resp_time_func(float(match_dict['resp_time']))
 475                     bisect.insort_left(request_time, resp_time)
 476                     request_counter['count'] += 1
 477                     request_counter['sum'] += resp_time
 478                 # requests per ip proto
 479                 proto = 'ipv4' if '.' in match_dict['address'] else 'ipv6'
 480                 self.data['req_' + proto] += 1
 481                 # unique clients ips
 482                 if address_not_in_pool(self.unique_all_time, match_dict['address'],
 483                                        self.data['unique_tot_ipv4'] + self.data['unique_tot_ipv6']):
 484                         self.data['unique_tot_' + proto] += 1
 485                 if address_not_in_pool(unique_current, match_dict['address'], ip_address_counter['unique_cur_ip']):
 486                         self.data['unique_cur_' + proto] += 1
 487                         ip_address_counter['unique_cur_ip'] += 1
 488             else:
 489                 self.data['unmatched'] += 1
 490
 491         # timings
 492         if request_time:
 493             self.data['resp_time_min'] += int(request_time[0])
 494             self.data['resp_time_avg'] += int(round(float(request_counter['sum']) / request_counter['count']))
 495             self.data['resp_time_max'] += int(request_time[-1])
 496         return self.data
 497
 498     def _get_data_detailed_response_codes(self, code):
 499         """
 500         :param code: str: CODE from parsed line. Ex.: '202, '499'
 501         :return:
 502         Calls add_new_dimension method If the value is found for the first time
 503         """
 504         if code not in self.data:
 505             chart_string_copy = self.detailed_chart
 506             self.detailed_chart = self.add_new_dimension(code, [code, code, 'incremental'],
 507                                                          chart_string_copy, 'detailed_response_codes')
 508         self.data[code] += 1
 509
 510     def _get_data_http_method(self, method):
 511         """
 512         :param method: str: METHOD from parsed line. Ex.: 'GET', 'POST'
 513         :return:
 514         Calls add_new_dimension method If the value is found for the first time
 515         """
 516         if method not in self.data:
 517             chart_string_copy = self.http_method_chart
 518             self.http_method_chart = self.add_new_dimension(method, [method, method, 'incremental'],
 519                                                             chart_string_copy, 'http_method')
 520         self.data[method] += 1
 521
 522     def _get_data_http_version(self, http_version):
 523         """
 524         :param http_version: str: METHOD from parsed line. Ex.: '1.1', '1.0'
 525         :return:
 526         Calls add_new_dimension method If the value is found for the first time
 527         """
 528         http_version_dim_id = http_version.replace('.', '_')
 529         if http_version_dim_id not in self.data:
 530             chart_string_copy = self.http_version_chart
 531             self.http_version_chart = self.add_new_dimension(http_version_dim_id,
 532                                                              [http_version_dim_id, http_version, 'incremental'],
 533                                                              chart_string_copy, 'http_version')
 534         self.data[http_version_dim_id] += 1
 535
 536     def _get_data_per_url(self, url):
 537         """
 538         :param url: str: URL from parsed line
 539         :return:
 540         Scan through string looking for the first location where patterns produce a match for all user
 541         defined patterns
 542         """
 543         match = None
 544         for elem in self.url_pattern:
 545             if elem.pattern.search(url):
 546                 self.data[elem.description] += 1
 547                 match = True
 548                 break
 549         if not match:
 550             self.data['rpu_other'] += 1
 551
 552     def _get_data_statuses(self, code):
 553         """
 554         :param code: str: response status code. Ex.: '202', '499'
 555         :return:
 556         """
 557         code_class = code[0]
 558         if code_class == '2' or code == '304' or code_class == '1':
 559             self.data['successful_requests'] += 1
 560         elif code_class == '3':
 561             self.data['redirects'] += 1
 562         elif code_class == '4':
 563             self.data['bad_requests'] += 1
 564         elif code_class == '5':
 565             self.data['server_errors'] += 1
 566         else:
 567             self.data['other_requests'] += 1
 568
 569
 570 def address_not_in_pool(pool, address, pool_size):
 571     """
 572     :param pool: list of ip addresses
 573     :param address: ip address
 574     :param pool_size: current pool size
 575     :return: True if address not in pool. False if address in pool.
 576     """
 577     index = bisect.bisect_left(pool, address)
 578     if index < pool_size:
 579         if pool[index] == address:
 580             return False
 581         else:
 582             bisect.insort_left(pool, address)
 583             return True
 584     else:
 585         bisect.insort_left(pool, address)
 586         return True
 587
 588
 589 def find_regex_return(match_dict=None, msg='Generic error message'):
 590     """
 591     :param match_dict: dict: re.search.groupdict() or None
 592     :param msg: str: error description
 593     :return: tuple:
 594     """
 595     return match_dict, msg
 596
 597
 598 def check_req_per_url_pattern(string, url_pattern):
 599     """
 600     :param string: str:
 601     :param url_pattern: dict: ex. {'dim1': 'pattern1>', 'dim2': '<pattern2>'}
 602     :return: list of named tuples or None:
 603      We need to make sure all patterns are valid regular expressions
 604     """
 605     if not is_dict(url_pattern):
 606         return None
 607
 608     result = list()
 609
 610     def is_valid_pattern(pattern):
 611         """
 612         :param pattern: str
 613         :return: re.compile(pattern) or None
 614         """
 615         if not isinstance(pattern, str):
 616             return False
 617         else:
 618             try:
 619                 compile_pattern = re.compile(pattern)
 620             except re.error:
 621                 return False
 622             else:
 623                 return compile_pattern
 624
 625     for dimension, regex in url_pattern.items():
 626         valid_pattern = is_valid_pattern(regex)
 627         if isinstance(dimension, str) and valid_pattern:
 628             result.append(NAMED_URL_PATTERN(description='_'.join([string, dimension]), pattern=valid_pattern))
 629
 630     return result or None
 631
 632
 633 def is_dict(obj):
 634     """
 635     :param obj: dict:
 636     :return: True or False
 637     obj can be <dict> or <OrderedDict>
 638     """
 639     try:
 640         obj.keys()
 641     except AttributeError:
 642         return False
 643     else:
 644         return True