3 #define BACKEND_SOURCE_DATA_AS_COLLECTED 0x00000001
4 #define BACKEND_SOURCE_DATA_AVERAGE 0x00000002
5 #define BACKEND_SOURCE_DATA_SUM 0x00000004
7 int connect_to_socket4(const char *ip, int port) {
10 debug(D_LISTENER, "IPv4 connecting to ip '%s' port %d", ip, port);
12 sock = socket(AF_INET, SOCK_STREAM, 0);
14 error("IPv4 socket() on ip '%s' port %d failed.", ip, port);
18 struct sockaddr_in name;
19 memset(&name, 0, sizeof(struct sockaddr_in));
20 name.sin_family = AF_INET;
21 name.sin_port = htons(port);
23 int ret = inet_pton(AF_INET, ip, (void *)&name.sin_addr.s_addr);
25 error("Failed to convert '%s' to a valid IPv4 address.", ip);
30 if(connect(sock, (struct sockaddr *) &name, sizeof(name)) < 0) {
32 error("IPv4 failed to connect to '%s', port %d", ip, port);
36 debug(D_LISTENER, "Connected to IPv4 ip '%s' port %d", ip, port);
40 int connect_to_socket6(const char *ip, int port) {
44 debug(D_LISTENER, "IPv6 connecting to ip '%s' port %d", ip, port);
46 sock = socket(AF_INET6, SOCK_STREAM, 0);
48 error("IPv6 socket() on ip '%s' port %d failed.", ip, port);
53 if(setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&ipv6only, sizeof(ipv6only)) != 0)
54 error("Cannot set IPV6_V6ONLY on ip '%s' port's %d.", ip, port);
56 struct sockaddr_in6 name;
57 memset(&name, 0, sizeof(struct sockaddr_in6));
58 name.sin6_family = AF_INET6;
59 name.sin6_port = htons ((uint16_t) port);
61 int ret = inet_pton(AF_INET6, ip, (void *)&name.sin6_addr.s6_addr);
63 error("Failed to convert IP '%s' to a valid IPv6 address.", ip);
68 name.sin6_scope_id = 0;
70 if(connect(sock, (struct sockaddr *)&name, sizeof(name)) < 0) {
72 error("IPv6 failed to connect to '%s', port %d", ip, port);
76 debug(D_LISTENER, "Connected to IPv6 ip '%s' port %d", ip, port);
81 static inline int connect_to_one(const char *definition, int default_port) {
82 struct addrinfo hints;
83 struct addrinfo *result = NULL, *rp = NULL;
85 char buffer[strlen(definition) + 1];
86 strcpy(buffer, definition);
89 snprintfz(buffer2, 10, "%d", default_port);
91 char *ip = buffer, *port = buffer2;
96 while(*e && *e != ']') e++;
103 while(*e && *e != ':') e++;
117 memset(&hints, 0, sizeof(struct addrinfo));
118 hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
119 hints.ai_socktype = SOCK_DGRAM; /* Datagram socket */
120 hints.ai_flags = AI_PASSIVE; /* For wildcard IP address */
121 hints.ai_protocol = 0; /* Any protocol */
122 hints.ai_canonname = NULL;
123 hints.ai_addr = NULL;
124 hints.ai_next = NULL;
126 int r = getaddrinfo(ip, port, &hints, &result);
128 error("Cannot resolve host '%s', port '%s': %s\n", ip, port, gai_strerror(r));
133 for (rp = result; rp != NULL && fd == -1; rp = rp->ai_next) {
134 char rip[INET_ADDRSTRLEN + INET6_ADDRSTRLEN] = "INVALID";
137 switch (rp->ai_addr->sa_family) {
139 struct sockaddr_in *sin = (struct sockaddr_in *) rp->ai_addr;
140 inet_ntop(AF_INET, &sin->sin_addr, rip, INET_ADDRSTRLEN);
141 rport = ntohs(sin->sin_port);
142 fd = connect_to_socket4(rip, rport);
147 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) rp->ai_addr;
148 inet_ntop(AF_INET6, &sin6->sin6_addr, rip, INET6_ADDRSTRLEN);
149 rport = ntohs(sin6->sin6_port);
150 fd = connect_to_socket6(rip, rport);
156 freeaddrinfo(result);
161 static inline calculated_number backend_duration_average(RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
162 time_t first_t = rrdset_first_entry_t(st);
163 time_t last_t = rrdset_last_entry_t(st);
165 if(unlikely(before - after < st->update_every && after != after - after % st->update_every))
166 // when st->update_every is bigger than the frequency we send data to backend
167 // skip the iterations that are not aligned to the database
170 // align the time-frame
171 // for 'after' also skip the first value by adding st->update_every
172 after = after - after % st->update_every + st->update_every;
173 before = before - before % st->update_every;
175 if(unlikely(after < first_t))
178 if(unlikely(after > before))
179 // this can happen when the st->update_every > before - after
182 if(unlikely(before > last_t))
186 calculated_number sum = 0;
188 long start_at_slot = rrdset_time2slot(st, before),
189 stop_at_slot = rrdset_time2slot(st, after),
192 for(slot = start_at_slot; !stop_now ; slot--) {
193 if(unlikely(slot < 0)) slot = st->entries - 1;
194 if(unlikely(slot == stop_at_slot)) stop_now = 1;
196 storage_number n = rd->values[slot];
197 if(unlikely(!does_storage_number_exist(n))) continue;
199 calculated_number value = unpack_storage_number(n);
204 if(unlikely(!counter))
207 if(unlikely(options & BACKEND_SOURCE_DATA_SUM))
210 return sum / (calculated_number)counter;
213 static inline int format_dimension_collected_graphite_plaintext(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
218 buffer_sprintf(b, "%s.%s.%s.%s " COLLECTED_NUMBER_FORMAT " %u\n", prefix, hostname, st->id, rd->id, rd->last_collected_value, (uint32_t)rd->last_collected_time.tv_sec);
222 static inline int format_dimension_stored_graphite_plaintext(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
224 calculated_number value = backend_duration_average(st, rd, after, before, options);
226 buffer_sprintf(b, "%s.%s.%s.%s " CALCULATED_NUMBER_FORMAT " %u\n", prefix, hostname, st->id, rd->id, value, (uint32_t) before);
232 static inline int format_dimension_collected_opentsdb_telnet(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
237 buffer_sprintf(b, "put %s.%s.%s %u " COLLECTED_NUMBER_FORMAT " host=%s\n", prefix, st->id, rd->id, (uint32_t)rd->last_collected_time.tv_sec, rd->last_collected_value, hostname);
241 static inline int format_dimension_stored_opentsdb_telnet(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
243 calculated_number value = backend_duration_average(st, rd, after, before, options);
245 buffer_sprintf(b, "put %s.%s.%s %u " CALCULATED_NUMBER_FORMAT " host=%s\n", prefix, st->id, rd->id, (uint32_t) before, value, hostname);
251 void *backends_main(void *ptr) {
254 BUFFER *b = buffer_create(1);
255 int (*formatter)(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options);
257 info("BACKEND thread created with task id %d", gettid());
259 if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0)
260 error("Cannot set pthread cancel type to DEFERRED.");
262 if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
263 error("Cannot set pthread cancel state to ENABLE.");
265 // ------------------------------------------------------------------------
266 // collect configuration options
268 int default_port = 0;
270 uint32_t options = BACKEND_SOURCE_DATA_AVERAGE;
271 int enabled = config_get_boolean("backend", "enable", 0);
272 const char *source = config_get("backend", "data source", "average");
273 const char *type = config_get("backend", "type", "graphite");
274 const char *destination = config_get("backend", "destination", "localhost");
275 const char *prefix = config_get("backend", "prefix", "netdata");
276 const char *hostname = config_get("backend", "hostname", localhost.hostname);
277 int frequency = (int)config_get_number("backend", "update every", 10);
278 int buffer_on_failures = (int)config_get_number("backend", "buffer on failures", 10);
280 // ------------------------------------------------------------------------
281 // validate configuration options
282 // and prepare for sending data to our backend
283 if(!enabled || frequency < 1)
286 if(!strcmp(source, "as collected")) {
287 options = BACKEND_SOURCE_DATA_AS_COLLECTED;
289 else if(!strcmp(source, "average")) {
290 options = BACKEND_SOURCE_DATA_AVERAGE;
292 else if(!strcmp(source, "sum") || !strcmp(source, "volume")) {
293 options = BACKEND_SOURCE_DATA_SUM;
296 error("Invalid data source method '%s' for backend given. Disabling backed.", source);
300 if(!strcmp(type, "graphite") || !strcmp(type, "graphite:plaintext")) {
302 if(options == BACKEND_SOURCE_DATA_AS_COLLECTED)
303 formatter = format_dimension_collected_graphite_plaintext;
305 formatter = format_dimension_stored_graphite_plaintext;
307 else if(!strcmp(type, "opentsdb") || !strcmp(type, "opentsdb:telnet")) {
309 if(options == BACKEND_SOURCE_DATA_AS_COLLECTED)
310 formatter = format_dimension_collected_opentsdb_telnet;
312 formatter = format_dimension_stored_opentsdb_telnet;
315 error("Unknown backend type '%s'", type);
319 // ------------------------------------------------------------------------
320 // prepare the charts for monitoring the backend
323 chart_buffered_metrics = 0,
324 chart_lost_metrics = 0,
325 chart_sent_metrics = 0,
326 chart_buffered_bytes = 0,
327 chart_sent_bytes = 0,
328 chart_transmission_successes = 0,
329 chart_transmission_failures = 0,
330 chart_data_lost_events = 0,
331 chart_lost_bytes = 0,
332 chart_backend_reconnects = 0,
333 chart_backend_latency = 0;
335 RRDSET *chart_metrics = rrdset_find("netdata.backend_metrics");
337 chart_metrics = rrdset_create("netdata", "backend_metrics", NULL, "backend", NULL, "Netdata Buffered Metrics", "metrics", 130600, frequency, RRDSET_TYPE_LINE);
338 rrddim_add(chart_metrics, "buffered", NULL, 1, 1, RRDDIM_ABSOLUTE);
339 rrddim_add(chart_metrics, "lost", NULL, 1, 1, RRDDIM_ABSOLUTE);
340 rrddim_add(chart_metrics, "sent", NULL, 1, 1, RRDDIM_ABSOLUTE);
343 RRDSET *chart_bytes = rrdset_find("netdata.backend_bytes");
345 chart_bytes = rrdset_create("netdata", "backend_bytes", NULL, "backend", NULL, "Netdata Backend Data Size", "KB", 130610, frequency, RRDSET_TYPE_AREA);
346 rrddim_add(chart_bytes, "buffered", NULL, 1, 1024, RRDDIM_ABSOLUTE);
347 rrddim_add(chart_bytes, "lost", NULL, 1, 1024, RRDDIM_ABSOLUTE);
348 rrddim_add(chart_bytes, "sent", NULL, 1, 1024, RRDDIM_ABSOLUTE);
351 RRDSET *chart_ops = rrdset_find("netdata.backend_ops");
353 chart_ops = rrdset_create("netdata", "backend_ops", NULL, "backend", NULL, "Netdata Backend Operations", "operations", 130630, frequency, RRDSET_TYPE_LINE);
354 rrddim_add(chart_ops, "write", NULL, 1, 1, RRDDIM_ABSOLUTE);
355 rrddim_add(chart_ops, "discard", NULL, 1, 1, RRDDIM_ABSOLUTE);
356 rrddim_add(chart_ops, "reconnect", NULL, 1, 1, RRDDIM_ABSOLUTE);
357 rrddim_add(chart_ops, "failure", NULL, 1, 1, RRDDIM_ABSOLUTE);
360 RRDSET *chart_latency = rrdset_find("netdata.backend_latency");
362 chart_latency = rrdset_create("netdata", "backend_latency", NULL, "backend", NULL, "Netdata Backend Latency", "ms", 130620, frequency, RRDSET_TYPE_AREA);
363 rrddim_add(chart_latency, "latency", NULL, 1, 1000, RRDDIM_ABSOLUTE);
366 // ------------------------------------------------------------------------
367 // prepare the backend main loop
369 info("BACKEND configured ('%s' on '%s' sending '%s' data, every %d seconds, as host '%s', with prefix '%s')", type, destination, source, frequency, hostname, prefix);
371 unsigned long long step_ut = frequency * 1000000ULL;
372 unsigned long long random_ut = time_usec() % (step_ut / 2);
373 time_t before = (time_t)((time_usec() - step_ut) / 10000000ULL);
374 time_t after = before;
378 // ------------------------------------------------------------------------
379 // wait for the next iteration point
381 unsigned long long now_ut = time_usec();
382 unsigned long long next_ut = now_ut - (now_ut % step_ut) + step_ut;
383 before = (time_t)(next_ut / 1000000ULL);
385 // add a little delay (1/4 of the step) plus some randomness
386 next_ut += (step_ut / 4) + random_ut;
388 while(now_ut < next_ut) {
389 sleep_usec(next_ut - now_ut);
390 now_ut = time_usec();
393 // ------------------------------------------------------------------------
394 // add to the buffer the data we need to send to the backend
397 int pthreadoldcancelstate;
399 if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &pthreadoldcancelstate) != 0))
400 error("Cannot set pthread cancel state to DISABLE.");
402 rrdhost_rdlock(&localhost);
403 for(st = localhost.rrdset_root; st ;st = st->next) {
404 pthread_rwlock_rdlock(&st->rwlock);
407 for(rd = st->dimensions; rd ;rd = rd->next) {
408 if(rd->last_collected_time.tv_sec >= after)
409 chart_buffered_metrics += formatter(b, prefix, &localhost, hostname, st, rd, after, before, options);
412 pthread_rwlock_unlock(&st->rwlock);
414 rrdhost_unlock(&localhost);
416 if(unlikely(pthread_setcancelstate(pthreadoldcancelstate, NULL) != 0))
417 error("Cannot set pthread cancel state to RESTORE (%d).", pthreadoldcancelstate);
419 chart_buffered_bytes = (collected_number)buffer_strlen(b);
421 // reset the monitoring chart counters
425 chart_transmission_successes =
426 chart_transmission_failures =
427 chart_data_lost_events =
429 chart_backend_reconnects =
430 chart_backend_latency = 0;
432 if(unlikely(netdata_exit)) break;
434 //fprintf(stderr, "\nBACKEND BEGIN:\n%s\nBACKEND END\n", buffer_tostring(b)); // FIXME
435 //fprintf(stderr, "after = %lu, before = %lu\n", after, before);
437 // ------------------------------------------------------------------------
438 // connect to a backend server
440 if(unlikely(sock == -1)) {
441 unsigned long long start_ut = time_usec();
442 const char *s = destination;
446 // skip separators, moving both s(tart) and e(nd)
447 while(isspace(*e) || *e == ',') s = ++e;
449 // move e(nd) to the first separator
450 while(*e && !isspace(*e) && *e != ',') e++;
452 // is there anything?
453 if(!*s || s == e) break;
456 strncpyz(buf, s, e - s);
457 chart_backend_reconnects++;
458 sock = connect_to_one(buf, default_port);
459 if(sock != -1) break;
462 chart_backend_latency += time_usec() - start_ut;
465 if(unlikely(netdata_exit)) break;
467 // ------------------------------------------------------------------------
468 // send our buffer to the backend server
470 if(likely(sock != -1)) {
471 size_t len = buffer_strlen(b);
472 unsigned long long start_ut = time_usec();
473 ssize_t written = write(sock, buffer_tostring(b), len);
474 chart_backend_latency += time_usec() - start_ut;
475 if(written != -1 && (size_t)written == len) {
476 // we sent the data successfully
477 chart_transmission_successes++;
478 chart_sent_bytes += written;
479 chart_sent_metrics = chart_buffered_metrics;
481 // reset the failures count
488 // oops! we couldn't send (all or some of the) data
489 error("Failed to write data to database backend '%s'. Willing to write %zu bytes, wrote %zd bytes. Will re-connect.", destination, len, written);
490 chart_transmission_failures++;
493 chart_sent_bytes += written;
495 // increment the counter we check for data loss
498 // close the socket - we will re-open it next time
503 // either the buffer is empty
504 // or is holding the data we couldn't send
505 // so, make sure the next iteration will continue
506 // from where we are now
510 error("Failed to update database backend '%s'", destination);
511 chart_transmission_failures++;
513 // increment the counter we check for data loss
517 if(failures > buffer_on_failures) {
518 // too bad! we are going to lose data
519 chart_lost_bytes += buffer_strlen(b);
520 error("Reached %d backend failures. Flushing buffers to protect this host - this results in data loss on back-end server '%s'", failures, destination);
523 chart_data_lost_events++;
524 chart_lost_metrics = chart_buffered_metrics;
527 if(unlikely(netdata_exit)) break;
529 // ------------------------------------------------------------------------
530 // update the monitoring charts
532 if(chart_ops->counter_done) rrdset_next(chart_ops);
533 rrddim_set(chart_ops, "write", chart_transmission_successes);
534 rrddim_set(chart_ops, "discard", chart_data_lost_events);
535 rrddim_set(chart_ops, "failure", chart_transmission_failures);
536 rrddim_set(chart_ops, "reconnect", chart_backend_reconnects);
537 rrdset_done(chart_ops);
539 if(chart_metrics->counter_done) rrdset_next(chart_metrics);
540 rrddim_set(chart_metrics, "buffered", chart_buffered_metrics);
541 rrddim_set(chart_metrics, "lost", chart_lost_metrics);
542 rrddim_set(chart_metrics, "sent", chart_sent_metrics);
543 rrdset_done(chart_metrics);
545 if(chart_bytes->counter_done) rrdset_next(chart_bytes);
546 rrddim_set(chart_bytes, "buffered", chart_buffered_bytes);
547 rrddim_set(chart_bytes, "lost", chart_lost_bytes);
548 rrddim_set(chart_bytes, "sent", chart_sent_bytes);
549 rrdset_done(chart_bytes);
551 if(chart_latency->counter_done) rrdset_next(chart_latency);
552 rrddim_set(chart_latency, "latency", chart_backend_latency);
553 rrdset_done(chart_latency);
555 if(likely(buffer_strlen(b) == 0))
556 chart_buffered_metrics = 0;
558 if(unlikely(netdata_exit)) break;
565 info("BACKEND thread exiting");