3 #define BACKEND_SOURCE_DATA_AS_COLLECTED 0x00000001
4 #define BACKEND_SOURCE_DATA_AVERAGE 0x00000002
5 #define BACKEND_SOURCE_DATA_SUM 0x00000004
7 int connect_to_socket4(const char *ip, int port) {
10 debug(D_LISTENER, "IPv4 connecting to ip '%s' port %d", ip, port);
12 sock = socket(AF_INET, SOCK_STREAM, 0);
14 error("IPv4 socket() on ip '%s' port %d failed.", ip, port);
18 struct sockaddr_in name;
19 memset(&name, 0, sizeof(struct sockaddr_in));
20 name.sin_family = AF_INET;
21 name.sin_port = htons(port);
23 int ret = inet_pton(AF_INET, ip, (void *)&name.sin_addr.s_addr);
25 error("Failed to convert '%s' to a valid IPv4 address.", ip);
30 if(connect(sock, (struct sockaddr *) &name, sizeof(name)) < 0) {
32 error("IPv4 failed to connect to '%s', port %d", ip, port);
36 debug(D_LISTENER, "Connected to IPv4 ip '%s' port %d", ip, port);
40 int connect_to_socket6(const char *ip, int port) {
44 debug(D_LISTENER, "IPv6 connecting to ip '%s' port %d", ip, port);
46 sock = socket(AF_INET6, SOCK_STREAM, 0);
48 error("IPv6 socket() on ip '%s' port %d failed.", ip, port);
53 if(setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&ipv6only, sizeof(ipv6only)) != 0)
54 error("Cannot set IPV6_V6ONLY on ip '%s' port's %d.", ip, port);
56 struct sockaddr_in6 name;
57 memset(&name, 0, sizeof(struct sockaddr_in6));
58 name.sin6_family = AF_INET6;
59 name.sin6_port = htons ((uint16_t) port);
61 int ret = inet_pton(AF_INET6, ip, (void *)&name.sin6_addr.s6_addr);
63 error("Failed to convert IP '%s' to a valid IPv6 address.", ip);
68 name.sin6_scope_id = 0;
70 if(connect(sock, (struct sockaddr *)&name, sizeof(name)) < 0) {
72 error("IPv6 failed to connect to '%s', port %d", ip, port);
76 debug(D_LISTENER, "Connected to IPv6 ip '%s' port %d", ip, port);
81 static inline int connect_to_one(const char *definition, int default_port) {
82 struct addrinfo hints;
83 struct addrinfo *result = NULL, *rp = NULL;
85 char buffer[strlen(definition) + 1];
86 strcpy(buffer, definition);
89 snprintfz(buffer2, 10, "%d", default_port);
91 char *ip = buffer, *port = buffer2;
96 while(*e && *e != ']') e++;
103 while(*e && *e != ':') e++;
117 memset(&hints, 0, sizeof(struct addrinfo));
118 hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
119 hints.ai_socktype = SOCK_DGRAM; /* Datagram socket */
120 hints.ai_flags = AI_PASSIVE; /* For wildcard IP address */
121 hints.ai_protocol = 0; /* Any protocol */
122 hints.ai_canonname = NULL;
123 hints.ai_addr = NULL;
124 hints.ai_next = NULL;
126 int r = getaddrinfo(ip, port, &hints, &result);
128 error("Cannot resolve host '%s', port '%s': %s\n", ip, port, gai_strerror(r));
133 for (rp = result; rp != NULL && fd == -1; rp = rp->ai_next) {
134 char rip[INET_ADDRSTRLEN + INET6_ADDRSTRLEN] = "INVALID";
137 switch (rp->ai_addr->sa_family) {
139 struct sockaddr_in *sin = (struct sockaddr_in *) rp->ai_addr;
140 inet_ntop(AF_INET, &sin->sin_addr, rip, INET_ADDRSTRLEN);
141 rport = ntohs(sin->sin_port);
142 fd = connect_to_socket4(rip, rport);
147 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) rp->ai_addr;
148 inet_ntop(AF_INET6, &sin6->sin6_addr, rip, INET6_ADDRSTRLEN);
149 rport = ntohs(sin6->sin6_port);
150 fd = connect_to_socket6(rip, rport);
156 freeaddrinfo(result);
161 static inline calculated_number backend_duration_average(RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
162 time_t first_t = rrdset_first_entry_t(st);
163 time_t last_t = rrdset_last_entry_t(st);
165 if(unlikely(before - after < st->update_every && after != after - after % st->update_every))
166 // when st->update_every is bigger than the frequency we send data to backend
167 // skip the iterations that are not aligned to the database
170 // align the time-frame
171 // for 'after' also skip the first value by adding st->update_every
172 after = after - after % st->update_every + st->update_every;
173 before = before - before % st->update_every;
175 if(unlikely(after < first_t))
178 if(unlikely(after > before))
179 // this can happen when the st->update_every > before - after
182 if(unlikely(before > last_t))
186 calculated_number sum = 0;
188 long start_at_slot = rrdset_time2slot(st, before),
189 stop_at_slot = rrdset_time2slot(st, after),
192 for(slot = start_at_slot; !stop_now ; slot--) {
193 if(unlikely(slot < 0)) slot = st->entries - 1;
194 if(unlikely(slot == stop_at_slot)) stop_now = 1;
196 storage_number n = rd->values[slot];
197 if(unlikely(!does_storage_number_exist(n))) continue;
199 calculated_number value = unpack_storage_number(n);
204 if(unlikely(!counter))
207 if(unlikely(options & BACKEND_SOURCE_DATA_SUM))
210 return sum / (calculated_number)counter;
213 static inline int format_dimension_collected_graphite_plaintext(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
218 buffer_sprintf(b, "%s.%s.%s.%s " COLLECTED_NUMBER_FORMAT " %u\n", prefix, hostname, st->id, rd->id, rd->last_collected_value, (uint32_t)rd->last_collected_time.tv_sec);
222 static inline int format_dimension_stored_graphite_plaintext(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
224 calculated_number value = backend_duration_average(st, rd, after, before, options);
226 buffer_sprintf(b, "%s.%s.%s.%s " CALCULATED_NUMBER_FORMAT " %u\n", prefix, hostname, st->id, rd->id, value, (uint32_t) before);
232 static inline int format_dimension_collected_opentsdb_telnet(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
237 buffer_sprintf(b, "put %s.%s.%s %u " COLLECTED_NUMBER_FORMAT " host=%s\n", prefix, st->id, rd->id, (uint32_t)rd->last_collected_time.tv_sec, rd->last_collected_value, hostname);
241 static inline int format_dimension_stored_opentsdb_telnet(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options) {
243 calculated_number value = backend_duration_average(st, rd, after, before, options);
245 buffer_sprintf(b, "put %s.%s.%s %u " CALCULATED_NUMBER_FORMAT " host=%s\n", prefix, st->id, rd->id, (uint32_t) before, value, hostname);
251 void *backends_main(void *ptr) {
254 BUFFER *b = buffer_create(1);
255 int (*formatter)(BUFFER *b, const char *prefix, RRDHOST *host, const char *hostname, RRDSET *st, RRDDIM *rd, time_t after, time_t before, uint32_t options);
257 info("BACKEND thread created with task id %d", gettid());
259 if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0)
260 error("Cannot set pthread cancel type to DEFERRED.");
262 if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
263 error("Cannot set pthread cancel state to ENABLE.");
265 // ------------------------------------------------------------------------
266 // collect configuration options
268 int default_port = 0;
270 uint32_t options = BACKEND_SOURCE_DATA_AVERAGE;
271 int enabled = config_get_boolean("backend", "enable", 0);
272 const char *source = config_get("backend", "data source", "average");
273 const char *type = config_get("backend", "type", "graphite");
274 const char *destination = config_get("backend", "destination", "localhost");
275 const char *prefix = config_get("backend", "prefix", "netdata");
276 const char *hostname = config_get("backend", "hostname", localhost.hostname);
277 int frequency = (int)config_get_number("backend", "update every", 10);
278 int buffer_on_failures = (int)config_get_number("backend", "buffer on failures", 10);
280 // ------------------------------------------------------------------------
281 // validate configuration options
282 // and prepare for sending data to our backend
283 if(!enabled || frequency < 1)
286 if(!strcmp(source, "as collected")) {
287 options = BACKEND_SOURCE_DATA_AS_COLLECTED;
289 else if(!strcmp(source, "average")) {
290 options = BACKEND_SOURCE_DATA_AVERAGE;
292 else if(!strcmp(source, "sum") || !strcmp(source, "volume")) {
293 options = BACKEND_SOURCE_DATA_SUM;
296 error("Invalid data source method '%s' for backend given. Disabling backed.", source);
300 if(!strcmp(type, "graphite") || !strcmp(type, "graphite:plaintext")) {
302 if(options == BACKEND_SOURCE_DATA_AS_COLLECTED)
303 formatter = format_dimension_collected_graphite_plaintext;
305 formatter = format_dimension_stored_graphite_plaintext;
307 else if(!strcmp(type, "opentsdb") || !strcmp(type, "opentsdb:telnet")) {
309 if(options == BACKEND_SOURCE_DATA_AS_COLLECTED)
310 formatter = format_dimension_collected_opentsdb_telnet;
312 formatter = format_dimension_stored_opentsdb_telnet;
315 error("Unknown backend type '%s'", type);
319 // ------------------------------------------------------------------------
320 // prepare the charts for monitoring the backend
323 chart_buffered_metrics = 0,
324 chart_lost_metrics = 0,
325 chart_sent_metrics = 0,
326 chart_buffered_bytes = 0,
327 chart_sent_bytes = 0,
328 chart_transmission_successes = 0,
329 chart_transmission_failures = 0,
330 chart_data_lost_events = 0,
331 chart_lost_bytes = 0,
332 chart_backend_reconnects = 0;
334 RRDSET *chart_metrics = rrdset_find("netdata.backend_metrics");
336 chart_metrics = rrdset_create("netdata", "backend_metrics", NULL, "backend", NULL, "Netdata Buffered Metrics", "metrics", 130600, frequency, RRDSET_TYPE_LINE);
337 rrddim_add(chart_metrics, "buffered", NULL, 1, 1, RRDDIM_ABSOLUTE);
338 rrddim_add(chart_metrics, "lost", NULL, 1, 1, RRDDIM_ABSOLUTE);
339 rrddim_add(chart_metrics, "sent", NULL, 1, 1, RRDDIM_ABSOLUTE);
342 RRDSET *chart_bytes = rrdset_find("netdata.backend_bytes");
344 chart_bytes = rrdset_create("netdata", "backend_bytes", NULL, "backend", NULL, "Netdata Backend Data Size", "KB", 130610, frequency, RRDSET_TYPE_AREA);
345 rrddim_add(chart_bytes, "buffered", NULL, 1, 1024, RRDDIM_ABSOLUTE);
346 rrddim_add(chart_bytes, "lost", NULL, 1, 1024, RRDDIM_ABSOLUTE);
347 rrddim_add(chart_bytes, "sent", NULL, 1, 1024, RRDDIM_ABSOLUTE);
350 RRDSET *chart_ops = rrdset_find("netdata.backend_ops");
352 chart_ops = rrdset_create("netdata", "backend_ops", NULL, "backend", NULL, "Netdata Backend Operations", "operations", 130620, frequency, RRDSET_TYPE_LINE);
353 rrddim_add(chart_ops, "write", NULL, 1, 1, RRDDIM_ABSOLUTE);
354 rrddim_add(chart_ops, "discard", NULL, 1, 1, RRDDIM_ABSOLUTE);
355 rrddim_add(chart_ops, "reconnect", NULL, 1, 1, RRDDIM_ABSOLUTE);
356 rrddim_add(chart_ops, "failure", NULL, 1, 1, RRDDIM_ABSOLUTE);
359 // ------------------------------------------------------------------------
360 // prepare the backend main loop
362 info("BACKEND configured ('%s' on '%s' sending '%s' data, every %d seconds, as host '%s', with prefix '%s')", type, destination, source, frequency, hostname, prefix);
364 unsigned long long step_ut = frequency * 1000000ULL;
365 unsigned long long random_ut = time_usec() % (step_ut / 2);
366 time_t before = (time_t)((time_usec() - step_ut) / 10000000ULL);
367 time_t after = before;
371 // ------------------------------------------------------------------------
372 // wait for the next iteration point
374 unsigned long long now_ut = time_usec();
375 unsigned long long next_ut = now_ut - (now_ut % step_ut) + step_ut;
376 before = (time_t)(next_ut / 1000000ULL);
378 // add a little delay (1/4 of the step) plus some randomness
379 next_ut += (step_ut / 4) + random_ut;
381 while(now_ut < next_ut) {
382 sleep_usec(next_ut - now_ut);
383 now_ut = time_usec();
386 // ------------------------------------------------------------------------
387 // add to the buffer the data we need to send to the backend
390 int pthreadoldcancelstate;
392 if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &pthreadoldcancelstate) != 0))
393 error("Cannot set pthread cancel state to DISABLE.");
395 rrdhost_rdlock(&localhost);
396 for(st = localhost.rrdset_root; st ;st = st->next) {
397 pthread_rwlock_rdlock(&st->rwlock);
400 for(rd = st->dimensions; rd ;rd = rd->next) {
401 if(rd->last_collected_time.tv_sec >= after)
402 chart_buffered_metrics += formatter(b, prefix, &localhost, hostname, st, rd, after, before, options);
405 pthread_rwlock_unlock(&st->rwlock);
407 rrdhost_unlock(&localhost);
409 if(unlikely(pthread_setcancelstate(pthreadoldcancelstate, NULL) != 0))
410 error("Cannot set pthread cancel state to RESTORE (%d).", pthreadoldcancelstate);
412 chart_buffered_bytes = (collected_number)buffer_strlen(b);
414 // reset the monitoring chart counters
418 chart_transmission_successes =
419 chart_transmission_failures =
420 chart_data_lost_events =
422 chart_backend_reconnects = 0;
424 if(unlikely(netdata_exit)) break;
426 //fprintf(stderr, "\nBACKEND BEGIN:\n%s\nBACKEND END\n", buffer_tostring(b)); // FIXME
427 //fprintf(stderr, "after = %lu, before = %lu\n", after, before);
429 // ------------------------------------------------------------------------
430 // connect to a backend server
432 if(unlikely(sock == -1)) {
433 const char *s = destination;
437 // skip separators, moving both s(tart) and e(nd)
438 while(isspace(*e) || *e == ',') s = ++e;
440 // move e(nd) to the first separator
441 while(*e && !isspace(*e) && *e != ',') e++;
443 // is there anything?
444 if(!*s || s == e) break;
447 strncpyz(buf, s, e - s);
448 chart_backend_reconnects++;
449 sock = connect_to_one(buf, default_port);
450 if(sock != -1) break;
455 if(unlikely(netdata_exit)) break;
457 // ------------------------------------------------------------------------
458 // send our buffer to the backend server
460 if(likely(sock != -1)) {
461 size_t len = buffer_strlen(b);
462 ssize_t written = write(sock, buffer_tostring(b), len);
463 if(written != -1 && (size_t)written == len) {
464 // we sent the data successfully
465 chart_transmission_successes++;
466 chart_sent_bytes += written;
467 chart_sent_metrics = chart_buffered_metrics;
469 // reset the failures count
476 // oops! we couldn't send (all or some of the) data
477 error("Failed to write data to database backend '%s'. Willing to write %zu bytes, wrote %zd bytes. Will re-connect.", destination, len, written);
478 chart_transmission_failures++;
481 chart_sent_bytes += written;
483 // increment the counter we check for data loss
486 // close the socket - we will re-open it next time
491 // either the buffer is empty
492 // or is holding the data we couldn't send
493 // so, make sure the next iteration will continue
494 // from where we are now
498 error("Failed to update database backend '%s'", destination);
499 chart_transmission_failures++;
501 // increment the counter we check for data loss
505 if(failures > buffer_on_failures) {
506 // too bad! we are going to lose data
507 chart_lost_bytes += buffer_strlen(b);
508 error("Reached %d backend failures. Flushing buffers to protect this host - this results in data loss on back-end server '%s'", failures, destination);
511 chart_data_lost_events++;
512 chart_lost_metrics = chart_buffered_metrics;
515 if(unlikely(netdata_exit)) break;
517 // ------------------------------------------------------------------------
518 // update the monitoring charts
520 if(chart_ops->counter_done) rrdset_next(chart_ops);
521 rrddim_set(chart_ops, "write", chart_transmission_successes);
522 rrddim_set(chart_ops, "discard", chart_data_lost_events);
523 rrddim_set(chart_ops, "failure", chart_transmission_failures);
524 rrddim_set(chart_ops, "reconnect", chart_backend_reconnects);
525 rrdset_done(chart_ops);
527 if(chart_metrics->counter_done) rrdset_next(chart_metrics);
528 rrddim_set(chart_metrics, "buffered", chart_buffered_metrics);
529 rrddim_set(chart_metrics, "lost", chart_lost_metrics);
530 rrddim_set(chart_metrics, "sent", chart_sent_metrics);
531 rrdset_done(chart_metrics);
533 if(chart_bytes->counter_done) rrdset_next(chart_bytes);
534 rrddim_set(chart_bytes, "buffered", chart_buffered_bytes);
535 rrddim_set(chart_bytes, "lost", chart_lost_bytes);
536 rrddim_set(chart_bytes, "sent", chart_sent_bytes);
537 rrdset_done(chart_bytes);
539 if(likely(buffer_strlen(b) == 0))
540 chart_buffered_metrics = 0;
542 if(unlikely(netdata_exit)) break;
549 info("BACKEND thread exiting");