+ //fprintf(stderr, "\nBACKEND BEGIN:\n%s\nBACKEND END\n", buffer_tostring(b)); // FIXME
+ //fprintf(stderr, "after = %lu, before = %lu\n", after, before);
+
+ // prepare for the next iteration
+ // to add incrementally data to buffer
+ after = before;
+
+ // ------------------------------------------------------------------------
+ // if we are connected, receive a response, without blocking
+
+ if(likely(sock != -1)) {
+ errno = 0;
+
+ // loop through to collect all data
+ while(sock != -1 && errno != EWOULDBLOCK) {
+ buffer_need_bytes(response, 4096);
+
+ ssize_t r = recv(sock, &response->buffer[response->len], response->size - response->len, MSG_DONTWAIT);
+ if(likely(r > 0)) {
+ // we received some data
+ response->len += r;
+ chart_received_bytes += r;
+ chart_receptions++;
+ }
+ else if(r == 0) {
+ error("Backend '%s' closed the socket", destination);
+ close(sock);
+ sock = -1;
+ }
+ else {
+ // failed to receive data
+ if(errno != EAGAIN && errno != EWOULDBLOCK) {
+ error("Cannot receive data from backend '%s'.", destination);
+ }
+ }
+ }
+
+ // if we received data, process them
+ if(buffer_strlen(response))
+ backend_response_checker(response);
+ }
+
+ // ------------------------------------------------------------------------
+ // if we are not connected, connect to a backend server
+
+ if(unlikely(sock == -1)) {
+ usec_t start_ut = now_monotonic_usec();
+ const char *s = destination;
+ while(*s) {
+ const char *e = s;
+
+ // skip separators, moving both s(tart) and e(nd)
+ while(isspace(*e) || *e == ',') s = ++e;
+
+ // move e(nd) to the first separator
+ while(*e && !isspace(*e) && *e != ',') e++;
+
+ // is there anything?
+ if(!*s || s == e) break;
+
+ char buf[e - s + 1];
+ strncpyz(buf, s, e - s);
+ chart_backend_reconnects++;
+ sock = connect_to(buf, default_port, &timeout);
+ if(sock != -1) break;
+ s = e;
+ }
+ chart_backend_latency += now_monotonic_usec() - start_ut;
+ }
+
+ if(unlikely(netdata_exit)) break;
+
+ // ------------------------------------------------------------------------
+ // if we are connected, send our buffer to the backend server
+
+ if(likely(sock != -1)) {
+ size_t len = buffer_strlen(b);
+ usec_t start_ut = now_monotonic_usec();
+ int flags = 0;
+#ifdef MSG_NOSIGNAL
+ flags += MSG_NOSIGNAL;
+#endif
+
+ ssize_t written = send(sock, buffer_tostring(b), len, flags);
+ chart_backend_latency += now_monotonic_usec() - start_ut;
+ if(written != -1 && (size_t)written == len) {
+ // we sent the data successfully
+ chart_transmission_successes++;
+ chart_sent_bytes += written;
+ chart_sent_metrics = chart_buffered_metrics;
+
+ // reset the failures count
+ failures = 0;
+
+ // empty the buffer
+ buffer_flush(b);
+ }
+ else {
+ // oops! we couldn't send (all or some of the) data
+ error("Failed to write data to database backend '%s'. Willing to write %zu bytes, wrote %zd bytes. Will re-connect.", destination, len, written);
+ chart_transmission_failures++;
+
+ if(written != -1)
+ chart_sent_bytes += written;
+
+ // increment the counter we check for data loss
+ failures++;
+
+ // close the socket - we will re-open it next time
+ close(sock);
+ sock = -1;
+ }
+ }
+ else {
+ error("Failed to update database backend '%s'", destination);
+ chart_transmission_failures++;
+
+ // increment the counter we check for data loss
+ failures++;
+ }
+
+ if(failures > buffer_on_failures) {
+ // too bad! we are going to lose data
+ chart_lost_bytes += buffer_strlen(b);
+ error("Reached %d backend failures. Flushing buffers to protect this host - this results in data loss on back-end server '%s'", failures, destination);
+ buffer_flush(b);
+ failures = 0;
+ chart_data_lost_events++;
+ chart_lost_metrics = chart_buffered_metrics;
+ }
+
+ if(unlikely(netdata_exit)) break;
+
+ // ------------------------------------------------------------------------
+ // update the monitoring charts
+
+ if(likely(chart_ops->counter_done)) rrdset_next(chart_ops);
+ rrddim_set(chart_ops, "read", chart_receptions);
+ rrddim_set(chart_ops, "write", chart_transmission_successes);
+ rrddim_set(chart_ops, "discard", chart_data_lost_events);
+ rrddim_set(chart_ops, "failure", chart_transmission_failures);
+ rrddim_set(chart_ops, "reconnect", chart_backend_reconnects);
+ rrdset_done(chart_ops);
+
+ if(likely(chart_metrics->counter_done)) rrdset_next(chart_metrics);
+ rrddim_set(chart_metrics, "buffered", chart_buffered_metrics);
+ rrddim_set(chart_metrics, "lost", chart_lost_metrics);
+ rrddim_set(chart_metrics, "sent", chart_sent_metrics);
+ rrdset_done(chart_metrics);
+
+ if(likely(chart_bytes->counter_done)) rrdset_next(chart_bytes);
+ rrddim_set(chart_bytes, "buffered", chart_buffered_bytes);
+ rrddim_set(chart_bytes, "lost", chart_lost_bytes);
+ rrddim_set(chart_bytes, "sent", chart_sent_bytes);
+ rrddim_set(chart_bytes, "received", chart_received_bytes);
+ rrdset_done(chart_bytes);
+
+ /*
+ if(likely(chart_latency->counter_done)) rrdset_next(chart_latency);
+ rrddim_set(chart_latency, "latency", chart_backend_latency);
+ rrdset_done(chart_latency);
+ */
+
+ getrusage(RUSAGE_THREAD, &thread);
+ if(likely(chart_rusage->counter_done)) rrdset_next(chart_rusage);
+ rrddim_set(chart_rusage, "user", thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec);
+ rrddim_set(chart_rusage, "system", thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec);
+ rrdset_done(chart_rusage);
+
+ if(likely(buffer_strlen(b) == 0))
+ chart_buffered_metrics = 0;
+
+ if(unlikely(netdata_exit)) break;