python.d/web_log.chart.py

   1 # -*- coding: utf-8 -*-
   2 # Description: web log netdata python.d module
   3 # Author: l2isbad
   4
   5 from base import LogService
   6 import re
   7 import bisect
   8 from os import access, R_OK
   9 from os.path import getsize
  10 from collections import namedtuple
  11 from copy import deepcopy
  12
  13 priority = 60000
  14 retries = 60
  15
  16 ORDER = ['response_statuses', 'response_codes', 'bandwidth', 'response_time', 'requests_per_url', 'http_method',
  17          'requests_per_ipproto', 'clients', 'clients_all']
  18 CHARTS = {
  19     'response_codes': {
  20         'options': [None, 'Response Codes', 'requests/s', 'responses', 'web_log.response_codes', 'stacked'],
  21         'lines': [
  22             ['2xx', '2xx', 'incremental'],
  23             ['5xx', '5xx', 'incremental'],
  24             ['3xx', '3xx', 'incremental'],
  25             ['4xx', '4xx', 'incremental'],
  26             ['1xx', '1xx', 'incremental'],
  27             ['0xx', 'other', 'incremental'],
  28             ['unmatched', 'unmatched', 'incremental']
  29         ]},
  30     'bandwidth': {
  31         'options': [None, 'Bandwidth', 'KB/s', 'bandwidth', 'web_log.bandwidth', 'area'],
  32         'lines': [
  33             ['resp_length', 'received', 'incremental', 1, 1024],
  34             ['bytes_sent', 'sent', 'incremental', -1, 1024]
  35         ]},
  36     'response_time': {
  37         'options': [None, 'Processing Time', 'milliseconds', 'timings', 'web_log.response_time', 'area'],
  38         'lines': [
  39             ['resp_time_min', 'min', 'incremental', 1, 1000],
  40             ['resp_time_max', 'max', 'incremental', 1, 1000],
  41             ['resp_time_avg', 'avg', 'incremental', 1, 1000]
  42         ]},
  43     'clients': {
  44         'options': [None, 'Current Poll Unique Client IPs', 'unique ips', 'clients', 'web_log.clients', 'stacked'],
  45         'lines': [
  46             ['unique_cur_ipv4', 'ipv4', 'incremental', 1, 1],
  47             ['unique_cur_ipv6', 'ipv6', 'incremental', 1, 1]
  48         ]},
  49     'clients_all': {
  50         'options': [None, 'All Time Unique Client IPs', 'unique ips', 'clients', 'web_log.clients_all', 'stacked'],
  51         'lines': [
  52             ['unique_tot_ipv4', 'ipv4', 'absolute', 1, 1],
  53             ['unique_tot_ipv6', 'ipv6', 'absolute', 1, 1]
  54         ]},
  55     'http_method': {
  56         'options': [None, 'Requests Per HTTP Method', 'requests/s', 'http methods', 'web_log.http_method', 'stacked'],
  57         'lines': [
  58         ]},
  59     'requests_per_ipproto': {
  60         'options': [None, 'Requests Per IP Protocol', 'requests/s', 'ip protocols', 'web_log.requests_per_ipproto',
  61                     'stacked'],
  62         'lines': [
  63             ['req_ipv4', 'ipv4', 'incremental', 1, 1],
  64             ['req_ipv6', 'ipv6', 'incremental', 1, 1]
  65         ]},
  66     'response_statuses': {
  67         'options': [None, 'Response Statuses', 'requests/s', 'responses', 'web_log.response_statuses',
  68                     'stacked'],
  69         'lines': [
  70             ['successful_requests', 'success', 'incremental', 1, 1],
  71             ['server_errors', 'error', 'incremental', 1, 1],
  72             ['redirects', 'redirect', 'incremental', 1, 1],
  73             ['bad_requests', 'bad', 'incremental', 1, 1],
  74             ['other_requests', 'other', 'incremental', 1, 1]
  75         ]}
  76 }
  77
  78 NAMED_URL_PATTERN = namedtuple('URL_PATTERN', ['description', 'pattern'])
  79
  80
  81 class Service(LogService):
  82     def __init__(self, configuration=None, name=None):
  83         """
  84         :param configuration:
  85         :param name:
  86         # self._get_data = None  # will be assigned in 'check' method.
  87         # self.order = None  # will be assigned in 'create_*_method' method.
  88         # self.definitions = None  # will be assigned in 'create_*_method' method.
  89         # self.detailed_chart = None  # will be assigned in 'create_*_method' method.
  90         # self.http_method_chart = None  # will be assigned in 'create_*_method' method.
  91         """
  92         LogService.__init__(self, configuration=configuration, name=name)
  93         # Variables from module configuration file
  94         self.log_path = self.configuration.get('path')
  95         self.detailed_response_codes = self.configuration.get('detailed_response_codes', True)
  96         self.all_time = self.configuration.get('all_time', True)
  97         self.url_pattern = self.configuration.get('categories')  # dict
  98         self.custom_log_format = self.configuration.get('custom_log_format')  # dict
  99         # Instance variables
 100         self.unique_all_time = list()  # sorted list of unique IPs
 101         self.regex = None  # will be assigned in 'find_regex' or 'find_regex_custom' method
 102         self.resp_time_func = None  # will be assigned in 'find_regex' or 'find_regex_custom' method
 103         self.data = {'bytes_sent': 0, 'resp_length': 0, 'resp_time_min': 0, 'resp_time_max': 0,
 104                      'resp_time_avg': 0, 'unique_cur_ipv4': 0, 'unique_cur_ipv6': 0, '2xx': 0,
 105                      '5xx': 0, '3xx': 0, '4xx': 0, '1xx': 0, '0xx': 0, 'unmatched': 0, 'req_ipv4': 0,
 106                      'req_ipv6': 0, 'unique_tot_ipv4': 0, 'unique_tot_ipv6': 0, 'successful_requests': 0,
 107                      'redirects': 0, 'bad_requests': 0, 'server_errors': 0, 'other_requests': 0}
 108
 109     def check(self):
 110         """
 111         :return: bool
 112
 113         We need to make sure:
 114         1. "log_path" is specified in the module configuration file
 115         2. "log_path" must be readable by netdata user and must exist
 116         3. "log_path' must not be empty. We need at least 1 line to find appropriate pattern to parse
 117         4. Plugin can work using predefined patterns (OK for nginx, apache default log format) or user defined
 118          pattern. So we need to check if we can parse last line from log file with user pattern OR module patterns.
 119         5. All patterns for per_url_request_counter feature are valid regex expressions
 120         """
 121         if not self.log_path:
 122             self.error('log path is not specified')
 123             return False
 124
 125         if not access(self.log_path, R_OK):
 126             self.error('%s not readable or not exist' % self.log_path)
 127             return False
 128
 129         if not getsize(self.log_path):
 130             self.error('%s is empty' % self.log_path)
 131             return False
 132
 133         # Read last line (or first if there is only one line)
 134         with open(self.log_path, 'rb') as logs:
 135             logs.seek(-2, 2)
 136             while logs.read(1) != b'\n':
 137                 logs.seek(-2, 1)
 138                 if logs.tell() == 0:
 139                     break
 140             last_line = logs.readline()
 141
 142         try:
 143             last_line = last_line.decode()
 144         except UnicodeDecodeError:
 145             try:
 146                 last_line = last_line.decode(encoding='utf-8')
 147             except (TypeError, UnicodeDecodeError) as error:
 148                 self.error(str(error))
 149                 return False
 150
 151         # Custom_log_format or predefined log format.
 152         if self.custom_log_format:
 153             match_dict, log_name, error = self.find_regex_custom(last_line)
 154         else:
 155             match_dict, log_name, error = self.find_regex(last_line)
 156
 157         # "match_dict" is None if there are any problems
 158         if match_dict is None:
 159             self.error(str(error))
 160             return False
 161
 162         # self.url_pattern check
 163         if self.url_pattern:
 164             self.url_pattern = check_req_per_url_pattern(self.url_pattern)
 165
 166         # Double check
 167         if not (self.regex and self.resp_time_func):
 168             self.error('That can not happen, but it happened. "regex" or "resp_time_func" is None')
 169
 170         # All is ok. We are about to start.
 171         if log_name == 'web_access':
 172             self.create_access_charts(match_dict)  # Create charts
 173             self._get_data = self._get_access_data
 174             self.info('Collected data: %s' % list(match_dict.keys()))
 175             return True
 176         else:
 177             # If it's not access_logs.. Not used at the moment
 178             return False
 179
 180     def find_regex_custom(self, last_line):
 181         """
 182         :param last_line: str: literally last line from log file
 183         :return: tuple where:
 184         [0]: dict or None:  match_dict or None
 185         [1]: str or None: log_name or None
 186         [2]: str: error description
 187
 188         We are here only if "custom_log_format" is in logs. We need to make sure:
 189         1. "custom_log_format" is a dict
 190         2. "pattern" in "custom_log_format" and pattern is <str> instance
 191         3. if "time_multiplier" is in "custom_log_format" it must be <int> instance
 192
 193         If all parameters is ok we need to make sure:
 194         1. Pattern search is success
 195         2. Pattern search contains named subgroups (?P<subgroup_name>) (= "match_dict")
 196
 197         If pattern search is success we need to make sure:
 198         1. All mandatory keys ['address', 'code', 'bytes_sent', 'method', 'url'] are in "match_dict"
 199
 200         If this is True we need to make sure:
 201         1. All mandatory key values from "match_dict" have the correct format
 202          ("code" is integer, "method" is uppercase word, etc)
 203
 204         If non mandatory keys in "match_dict" we need to make sure:
 205         1. All non mandatory key values from match_dict ['resp_length', 'resp_time'] have the correct format
 206          ("resp_length" is integer or "-", "resp_time" is integer or float)
 207
 208         """
 209         if not is_dict(self.custom_log_format):
 210             return find_regex_return(msg='Custom log: "custom_log_format" is not a <dict>')
 211
 212         pattern = self.custom_log_format.get('pattern')
 213         if not (pattern and isinstance(pattern, str)):
 214             return find_regex_return(msg='Custom log: "pattern" option is not specified or type is not <str>')
 215
 216         resp_time_func = self.custom_log_format.get('time_multiplier') or 0
 217
 218         if not isinstance(resp_time_func, int):
 219             return find_regex_return(msg='Custom log: "time_multiplier" is not an integer')
 220
 221         try:
 222             regex = re.compile(pattern)
 223         except re.error as error:
 224             return find_regex_return(msg='Pattern compile error: %s' % str(error))
 225
 226         match = regex.search(last_line)
 227         if match:
 228             match_dict = match.groupdict() or None
 229         else:
 230             return find_regex_return(msg='Custom log: pattern search FAILED')
 231
 232         if match_dict is None:
 233             find_regex_return(msg='Custom log: search OK but contains no named subgroups'
 234                                   ' (you need to use ?P<subgroup_name>)')
 235         else:
 236             basic_values = {'address', 'method', 'url', 'code', 'bytes_sent'} - set(match_dict)
 237
 238             if basic_values:
 239                 return find_regex_return(msg='Custom log: search OK but some mandatory keys (%s) are missing'
 240                                          % list(basic_values))
 241             else:
 242                 if not re.search(r'[\da-f.:]+', match_dict['address']):
 243                     return find_regex_return(msg='Custom log: can\'t parse "address": %s'
 244                                                  % match_dict['address'])
 245                 if not re.search(r'[1-9]\d{2}', match_dict['code']):
 246                     return find_regex_return(msg='Custom log: can\'t parse "code": %s'
 247                                                  % match_dict['code'])
 248                 if not re.search(r'[A-Z]+', match_dict['method']):
 249                     return find_regex_return(msg='Custom log: can\'t parse "method": %s'
 250                                                  % match_dict['method'])
 251                 if not re.search(r'\d+|-', match_dict['bytes_sent']):
 252                     return find_regex_return(msg='Custom log: can\'t parse "bytes_sent": %s'
 253                                                  % match_dict['bytes_sent'])
 254
 255             if 'resp_length' in match_dict:
 256                 if not re.search(r'\d+', match_dict['resp_length']):
 257                     return find_regex_return(msg='Custom log: can\'t parse "resp_length": %s'
 258                                                  % match_dict['resp_length'])
 259
 260             if 'resp_time' in match_dict:
 261                 if not re.search(r'[\d.]+', match_dict['resp_length']):
 262                     return find_regex_return(msg='Custom log: can\'t parse "resp_time": %s'
 263                                                  % match_dict['resp_time'])
 264                 else:
 265                     if '.' in match_dict['resp_time']:
 266                         self.resp_time_func = lambda time: time * (resp_time_func or 1000000)
 267                     else:
 268                         self.resp_time_func = lambda time: time * (resp_time_func or 1)
 269
 270             self.regex = regex
 271             return find_regex_return(match_dict=match_dict,
 272                                      log_name='web_access',
 273                                      msg='We are fine')
 274
 275     def find_regex(self, last_line):
 276         """
 277         :param last_line: str: literally last line from log file
 278         :return: tuple where:
 279         [0]: dict or None:  match_dict or None
 280         [1]: str or None: log_name or None
 281         [2]: str: error description
 282         We need to find appropriate pattern for current log file
 283         All logic is do a regex search through the string for all predefined patterns
 284         until we find something or fail.
 285         """
 286         # REGEX: 1.IPv4 address 2.HTTP method 3. URL 4. Response code
 287         # 5. Bytes sent 6. Response length 7. Response process time
 288         acs_default = re.compile(r'(?P<address>[\da-f.:]+)'
 289                                  r' -.*?"(?P<method>[A-Z]+)'
 290                                  r' (?P<url>.*?)"'
 291                                  r' (?P<code>[1-9]\d{2})'
 292                                  r' (?P<bytes_sent>\d+|-)')
 293
 294         acs_apache_ext_insert = re.compile(r'(?P<address>[\da-f.:]+)'
 295                                            r' -.*?"(?P<method>[A-Z]+)'
 296                                            r' (?P<url>.*?)"'
 297                                            r' (?P<code>[1-9]\d{2})'
 298                                            r' (?P<bytes_sent>\d+|-)'
 299                                            r' (?P<resp_length>\d+)'
 300                                            r' (?P<resp_time>\d+) ')
 301
 302         acs_apache_ext_append = re.compile(r'(?P<address>[\da-f.:]+)'
 303                                            r' -.*?"(?P<method>[A-Z]+)'
 304                                            r' (?P<url>.*?)"'
 305                                            r' (?P<code>[1-9]\d{2})'
 306                                            r' (?P<bytes_sent>\d+|-)'
 307                                            r' .*?'
 308                                            r' (?P<resp_length>\d+)'
 309                                            r' (?P<resp_time>\d+)'
 310                                            r'(?: |$)')
 311
 312         acs_nginx_ext_insert = re.compile(r'(?P<address>[\da-f.:]+)'
 313                                           r' -.*?"(?P<method>[A-Z]+)'
 314                                           r' (?P<url>.*?)"'
 315                                           r' (?P<code>[1-9]\d{2})'
 316                                           r' (?P<bytes_sent>\d+)'
 317                                           r' (?P<resp_length>\d+)'
 318                                           r' (?P<resp_time>\d\.\d+) ')
 319
 320         acs_nginx_ext_append = re.compile(r'(?P<address>[\da-f.:]+)'
 321                                           r' -.*?"(?P<method>[A-Z]+)'
 322                                           r' (?P<url>.*?)"'
 323                                           r' (?P<code>[1-9]\d{2})'
 324                                           r' (?P<bytes_sent>\d+)'
 325                                           r' .*?'
 326                                           r' (?P<resp_length>\d+)'
 327                                           r' (?P<resp_time>\d\.\d+)')
 328
 329         def func_usec(time):
 330             return time
 331
 332         def func_sec(time):
 333             return time * 1000000
 334
 335         r_regex = [acs_apache_ext_insert, acs_apache_ext_append, acs_nginx_ext_insert,
 336                    acs_nginx_ext_append, acs_default]
 337         r_function = [func_usec, func_usec, func_sec, func_sec, func_usec]
 338         regex_function = zip(r_regex, r_function)
 339
 340         match_dict = dict()
 341         for regex, function in regex_function:
 342             match = regex.search(last_line)
 343             if match:
 344                 self.regex = regex
 345                 self.resp_time_func = function
 346                 match_dict = match.groupdict()
 347                 break
 348
 349         return find_regex_return(match_dict=match_dict or None,
 350                                  log_name='web_access',
 351                                  msg='Unknown log format. You need to use "custom_log_format" feature.')
 352
 353     def create_access_charts(self, match_dict):
 354         """
 355         :param match_dict: dict: regex.search.groupdict(). Ex. {'address': '127.0.0.1', 'code': '200', 'method': 'GET'}
 356         :return:
 357         Create additional charts depending on the 'match_dict' keys and configuration file options
 358         1. 'time_response' chart is removed if there is no 'resp_time' in match_dict.
 359         2. Other stuff is just remove/add chart depending on yes/no in conf
 360         """
 361         def find_job_name(override_name, name):
 362             """
 363             :param override_name: str: 'name' var from configuration file
 364             :param name: str: 'job_name' from configuration file
 365             :return: str: new job name
 366             We need this for dynamic charts. Actually same logic as in python.d.plugin.
 367             """
 368             add_to_name = override_name or name
 369             if add_to_name:
 370                 return '_'.join(['web_log', re.sub('\s+', '_', add_to_name)])
 371             else:
 372                 return 'web_log'
 373
 374         self.order = ORDER[:]
 375         self.definitions = deepcopy(CHARTS)
 376
 377         job_name = find_job_name(self.override_name, self.name)
 378         self.detailed_chart = 'CHART %s.detailed_response_codes ""' \
 379                               ' "Detailed Response Codes" requests/s responses' \
 380                               ' web_log.detailed_response_codes stacked 1 %s\n' % (job_name, self.update_every)
 381         self.http_method_chart = 'CHART %s.http_method' \
 382                                  ' "" "Requests Per HTTP Method" requests/s "http methods"' \
 383                                  ' web_log.http_method stacked 2 %s\n' % (job_name, self.update_every)
 384
 385         # Remove 'request_time' chart from ORDER if resp_time not in match_dict
 386         if 'resp_time' not in match_dict:
 387             self.order.remove('response_time')
 388         # Remove 'clients_all' chart from ORDER if specified in the configuration
 389         if not self.all_time:
 390             self.order.remove('clients_all')
 391         # Add 'detailed_response_codes' chart if specified in the configuration
 392         if self.detailed_response_codes:
 393             self.order.append('detailed_response_codes')
 394             self.definitions['detailed_response_codes'] = {'options': [None, 'Detailed Response Codes', 'requests/s',
 395                                                                        'responses', 'web_log.detailed_response_codes',
 396                                                                        'stacked'],
 397                                                            'lines': []}
 398
 399         # Add 'requests_per_url' chart if specified in the configuration
 400         if self.url_pattern:
 401             self.definitions['requests_per_url'] = {'options': [None, 'Requests Per Url', 'requests/s',
 402                                                                 'urls', 'web_log.requests_per_url', 'stacked'],
 403                                                     'lines': [['other_url', 'other', 'incremental']]}
 404             for elem in self.url_pattern:
 405                 self.definitions['requests_per_url']['lines'].append([elem.description, elem.description,
 406                                                                       'incremental'])
 407                 self.data.update({elem.description: 0})
 408             self.data.update({'other_url': 0})
 409         else:
 410             self.order.remove('requests_per_url')
 411
 412     def add_new_dimension(self, dimension, line_list, chart_string, key):
 413         """
 414         :param dimension: str: response status code. Ex.: '202', '499'
 415         :param line_list: list: Ex.: ['202', '202', 'incremental']
 416         :param chart_string: Current string we need to pass to netdata to rebuild the chart
 417         :param key: str: CHARTS dict key (chart name). Ex.: 'response_time'
 418         :return: str: new chart string = previous + new dimensions
 419         """
 420         self.data.update({dimension: 0})
 421         # SET method check if dim in _dimensions
 422         self._dimensions.append(dimension)
 423         # UPDATE method do SET only if dim in definitions
 424         self.definitions[key]['lines'].append(line_list)
 425         chart = chart_string
 426         chart += "%s %s\n" % ('DIMENSION', ' '.join(line_list))
 427         print(chart)
 428         return chart
 429
 430     def _get_access_data(self):
 431         """
 432         Parse new log lines
 433         :return: dict OR None
 434         None if _get_raw_data method fails.
 435         In all other cases - dict.
 436         """
 437         raw = self._get_raw_data()
 438         if raw is None:
 439             return None
 440
 441         request_time, unique_current = list(), list()
 442         request_counter = {'count': 0, 'sum': 0}
 443         ip_address_counter = {'unique_cur_ip': 0}
 444         for line in raw:
 445             match = self.regex.search(line)
 446             if match:
 447                 match_dict = match.groupdict()
 448                 try:
 449                     code = ''.join([match_dict['code'][0], 'xx'])
 450                     self.data[code] += 1
 451                 except KeyError:
 452                     self.data['0xx'] += 1
 453                 # detailed response code
 454                 if self.detailed_response_codes:
 455                     self._get_data_detailed_response_codes(match_dict['code'])
 456                 # response statuses
 457                 self._get_data_statuses(match_dict['code'])
 458                 # requests per url
 459                 if self.url_pattern:
 460                     self._get_data_per_url(match_dict['url'])
 461                 # requests per http method
 462                 self._get_data_http_method(match_dict['method'])
 463                 # bandwidth sent
 464                 bytes_sent = match_dict['bytes_sent'] if '-' not in match_dict['bytes_sent'] else 0
 465                 self.data['bytes_sent'] += int(bytes_sent)
 466                 # request processing time and bandwidth received
 467                 if 'resp_length' in match_dict:
 468                     self.data['resp_length'] += int(match_dict['resp_length'])
 469                 if 'resp_time' in match_dict:
 470                     resp_time = self.resp_time_func(float(match_dict['resp_time']))
 471                     bisect.insort_left(request_time, resp_time)
 472                     request_counter['count'] += 1
 473                     request_counter['sum'] += resp_time
 474                 # requests per ip proto
 475                 proto = 'ipv4' if '.' in match_dict['address'] else 'ipv6'
 476                 self.data['req_' + proto] += 1
 477                 # unique clients ips
 478                 if address_not_in_pool(self.unique_all_time, match_dict['address'],
 479                                        self.data['unique_tot_ipv4'] + self.data['unique_tot_ipv6']):
 480                         self.data['unique_tot_' + proto] += 1
 481                 if address_not_in_pool(unique_current, match_dict['address'], ip_address_counter['unique_cur_ip']):
 482                         self.data['unique_cur_' + proto] += 1
 483                         ip_address_counter['unique_cur_ip'] += 1
 484             else:
 485                 self.data['unmatched'] += 1
 486
 487         # timings
 488         if request_time:
 489             self.data['resp_time_min'] += int(request_time[0])
 490             self.data['resp_time_avg'] += int(round(float(request_counter['sum']) / request_counter['count']))
 491             self.data['resp_time_max'] += int(request_time[-1])
 492         return self.data
 493
 494     def _get_data_detailed_response_codes(self, code):
 495         """
 496         :param code: str: CODE from parsed line. Ex.: '202, '499'
 497         :return:
 498         Calls add_new_dimension method If the value is found for the first time
 499         """
 500         if code not in self.data:
 501             chart_string_copy = self.detailed_chart
 502             self.detailed_chart = self.add_new_dimension(code, [code, code, 'incremental'],
 503                                                          chart_string_copy, 'detailed_response_codes')
 504         self.data[code] += 1
 505
 506     def _get_data_http_method(self, method):
 507         """
 508         :param method: str: METHOD from parsed line. Ex.: 'GET', 'POST'
 509         :return:
 510         Calls add_new_dimension method If the value is found for the first time
 511         """
 512         if method not in self.data:
 513             chart_string_copy = self.http_method_chart
 514             self.http_method_chart = self.add_new_dimension(method, [method, method, 'incremental'],
 515                                                             chart_string_copy, 'http_method')
 516         self.data[method] += 1
 517
 518     def _get_data_per_url(self, url):
 519         """
 520         :param url: str: URL from parsed line
 521         :return:
 522         Scan through string looking for the first location where patterns produce a match for all user
 523         defined patterns
 524         """
 525         match = None
 526         for elem in self.url_pattern:
 527             if elem.pattern.search(url):
 528                 self.data[elem.description] += 1
 529                 match = True
 530                 break
 531         if not match:
 532             self.data['other_url'] += 1
 533
 534     def _get_data_statuses(self, code):
 535         """
 536         :param code: str: response status code. Ex.: '202', '499'
 537         :return:
 538         """
 539         code_class = code[0]
 540         if code_class == '2' or code == '304' or code_class == '1':
 541             self.data['successful_requests'] += 1
 542         elif code_class == '3':
 543             self.data['redirects'] += 1
 544         elif code_class == '4':
 545             self.data['bad_requests'] += 1
 546         elif code_class == '5':
 547             self.data['server_errors'] += 1
 548         else:
 549             self.data['other_requests'] += 1
 550
 551
 552 def address_not_in_pool(pool, address, pool_size):
 553     """
 554     :param pool: list of ip addresses
 555     :param address: ip address
 556     :param pool_size: current pool size
 557     :return: True if address not in pool. False if address in pool.
 558     """
 559     index = bisect.bisect_left(pool, address)
 560     if index < pool_size:
 561         if pool[index] == address:
 562             return False
 563         else:
 564             bisect.insort_left(pool, address)
 565             return True
 566     else:
 567         bisect.insort_left(pool, address)
 568         return True
 569
 570
 571 def find_regex_return(match_dict=None, log_name=None, msg='Generic error message'):
 572     """
 573     :param match_dict: dict: re.search.groupdict() or None
 574     :param log_name: str: log name
 575     :param msg: str: error description
 576     :return: tuple:
 577     """
 578     return match_dict, log_name, msg
 579
 580
 581 def check_req_per_url_pattern(url_pattern):
 582     """
 583     :param url_pattern: dict: ex. {'dim1': 'pattern1>', 'dim2': '<pattern2>'}
 584     :return: list of named tuples or None:
 585      We need to make sure all patterns are valid regular expressions
 586     """
 587     if not is_dict(url_pattern):
 588         return None
 589
 590     result = list()
 591
 592     def is_valid_pattern(pattern):
 593         """
 594         :param pattern: str
 595         :return: re.compile(pattern) or None
 596         """
 597         if not isinstance(pattern, str):
 598             return False
 599         else:
 600             try:
 601                 compile_pattern = re.compile(pattern)
 602             except re.error:
 603                 return False
 604             else:
 605                 return compile_pattern
 606
 607     for dimension, regex in url_pattern.items():
 608         valid_pattern = is_valid_pattern(regex)
 609         if isinstance(dimension, str) and valid_pattern:
 610             result.append(NAMED_URL_PATTERN(description=dimension, pattern=valid_pattern))
 611
 612     return result or None
 613
 614
 615 def is_dict(obj):
 616     """
 617     :param obj: dict:
 618     :return: True or False
 619     obj can be <dict> or <OrderedDict>
 620     """
 621     try:
 622         obj.keys()
 623     except AttributeError:
 624         return False
 625     else:
 626         return True