]> arthur.barton.de Git - netdata.git/commitdiff
attempt to fix freezes on socket service
authorCosta Tsaousis (ktsaou) <costa@tsaousis.gr>
Wed, 9 Nov 2016 00:20:00 +0000 (02:20 +0200)
committerCosta Tsaousis (ktsaou) <costa@tsaousis.gr>
Wed, 9 Nov 2016 00:20:00 +0000 (02:20 +0200)
plugins.d/python.d.plugin
python.d/memcached.chart.py
python.d/python_modules/base.py

index 5e81fb263c0489807449b9fdff79a0db0c72a6d7..b729c3214391ebd11247a69f97e7b94c1360dca7 100755 (executable)
@@ -292,8 +292,8 @@ class PythonCharts(object):
         if job.name is not None and len(job.name) != 0:
             prefix += "/" + job.name
         try:
+            msg.error("DISABLED:", prefix)
             self.jobs.remove(job)
-            msg.info("Disabled", prefix)
         except Exception as e:
             msg.debug("This shouldn't happen. NO " + prefix + " IN LIST:" + str(self.jobs) + " ERROR: " + str(e))
 
@@ -335,18 +335,18 @@ class PythonCharts(object):
                     msg.error(job.chart_name, "check function failed.")
                     self._stop(job)
                 else:
-                    msg.debug(job.chart_name, "check succeeded")
+                    msg.info("CHECKED OK:", job.chart_name)
                     i += 1
                     try:
                         if job.override_name is not None:
                             new_name = job.__module__ + '_' + job.override_name
                             if new_name in overridden:
-                                msg.error(job.override_name + " already exists. Stopping '" + job.name + "'")
+                                msg.info("DROPPED:", job.name, ", job '" + job.override_name + "' is already served by another job.")
                                 self._stop(job)
                                 i -= 1
                             else:
                                 job.name = job.override_name
-                                msg.debug(job.chart_name + " changing chart name to: '" + new_name + "'")
+                                msg.info("RENAMED:", new_name, ", from " + job.chart_name)
                                 job.chart_name = new_name
                                 overridden.append(job.chart_name)
                     except Exception:
index 5a1400c992da00ab0ac4e4d38458c263d67e0adb..09ceed2210fc322a917aaf895c3ca87f127ba820 100644 (file)
@@ -131,7 +131,7 @@ class Service(SocketService):
             self.error("no data received")
             return None
         if raw[0].startswith('ERROR'):
-            self.error("Memcached returned ERROR")
+            self.error("memcached returned ERROR")
             return None
         data = {}
         for line in raw:
@@ -160,8 +160,10 @@ class Service(SocketService):
 
     def _check_raw_data(self, data):
         if data.endswith('END\r\n'):
+            self.debug("received response")
             return True
         else:
+            self.debug("waiting response")
             return False
 
     def check(self):
index 70c586f8e411a276b829406cddc75cbcf8a4b7cf..cf2f7847e1529a103a235d67229e6d4fbf03bf57 100644 (file)
@@ -519,6 +519,7 @@ class SocketService(SimpleService):
                 if self.__socket_config is None:
                     # establish ipv6 or ipv4 connection.
                     for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, socket.SOCK_STREAM):
+                        self.debug("connecting socket to host '" + str(self.host) + "', port " + str(self.port))
                         try:
                             # noinspection SpellCheckingInspection
                             af, socktype, proto, canonname, sa = res
@@ -547,6 +548,7 @@ class SocketService(SimpleService):
             else:
                 # connect to unix socket
                 try:
+                    self.debug("connecting unix socket '" + str(self.unix_socket) + "'")
                     self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
                     self._sock.connect(self.unix_socket)
                 except socket.error:
@@ -559,19 +561,27 @@ class SocketService(SimpleService):
                        "port:", str(self.port),
                        "socket:", str(self.unix_socket))
             self._sock = None
-        self._sock.setblocking(0)
+
+        if self._sock is not None:
+            self._sock.setblocking(0)
+            self._sock.settimeout(5)
+            self.debug("connected with timeout " + str(self._sock.gettimeout()))
+        else:
+            self.debug("not connected")
 
     def _disconnect(self):
         """
         Close socket connection
         :return:
         """
-        try:
-            self._sock.shutdown(2)  # 0 - read, 1 - write, 2 - all
-            self._sock.close()
-        except Exception:
-            pass
-        self._sock = None
+        if self._sock is not None:
+            try:
+                self.debug("disconnecting")
+                self._sock.shutdown(2)  # 0 - read, 1 - write, 2 - all
+                self._sock.close()
+            except Exception:
+                pass
+            self._sock = None
 
     def _send(self):
         """
@@ -581,6 +591,7 @@ class SocketService(SimpleService):
         # Send request if it is needed
         if self.request != "".encode():
             try:
+                self.debug("sending request")
                 self._sock.send(self.request)
             except Exception as e:
                 self._disconnect()
@@ -599,14 +610,16 @@ class SocketService(SimpleService):
         data = ""
         while True:
             try:
+                self.debug("receiving response")
                 ready_to_read, _, in_error = select.select([self._sock], [], [], 5)
             except Exception as e:
-                self.debug("SELECT", str(e))
+                self.error("timeout while waiting for response:", str(e))
                 self._disconnect()
                 break
             if len(ready_to_read) > 0:
                 buf = self._sock.recv(4096)
-                if len(buf) == 0 or buf is None:  # handle server disconnect
+                if buf is None or len(buf) == 0:  # handle server disconnect
+                    self._disconnect()
                     break
                 data += buf.decode(errors='ignore')
                 if self._check_raw_data(data):