3 int rrdpush_enabled = 0;
4 int rrdpush_exclusive = 1;
6 static char *central_netdata = NULL;
7 static char *api_key = NULL;
9 #define CONNECTED_TO_SIZE 100
11 // data collection happens from multiple threads
12 // each of these threads calls rrdset_done()
13 // which in turn calls rrdset_done_push()
14 // which uses this pipe to notify the streaming thread
15 // that there are more data ready to be sent
18 int rrdpush_pipe[2] = { -1, -1 };
20 // a buffer used to store data to be sent.
21 // the format is the same as external plugins.
22 static BUFFER *rrdpush_buffer = NULL;
24 // locking to get exclusive access to shared resources
25 // (rrdpush_pipe[PIPE_WRITE], rrdpush_buffer
26 static pthread_mutex_t rrdpush_mutex = PTHREAD_MUTEX_INITIALIZER;
28 // if the streaming thread is connected to a central netdata
29 // this is set to 1, otherwise 0.
30 static volatile int rrdpush_connected = 0;
32 // to have the remote netdata re-sync the charts
33 // to its current clock, we send for this many
34 // iterations a BEGIN line without microseconds
35 // this is for the first iterations of each chart
36 static unsigned int remote_clock_resync_iterations = 60;
38 #define rrdpush_lock() pthread_mutex_lock(&rrdpush_mutex)
39 #define rrdpush_unlock() pthread_mutex_unlock(&rrdpush_mutex)
41 // checks if the current chart definition has been sent
42 static inline int need_to_send_chart_definition(RRDSET *st) {
44 rrddim_foreach_read(rd, st)
45 if(!rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
51 // sends the current chart definition
52 static inline void send_chart_definition(RRDSET *st) {
53 buffer_sprintf(rrdpush_buffer, "CHART '%s' '%s' '%s' '%s' '%s' '%s' '%s' %ld %d\n"
60 , rrdset_type_name(st->chart_type)
66 rrddim_foreach_read(rd, st) {
67 buffer_sprintf(rrdpush_buffer, "DIMENSION '%s' '%s' '%s' " COLLECTED_NUMBER_FORMAT " " COLLECTED_NUMBER_FORMAT " '%s %s'\n"
70 , rrd_algorithm_name(rd->algorithm)
73 , rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)?"hidden":""
74 , rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":""
76 rrddim_flag_set(rd, RRDDIM_FLAG_EXPOSED);
80 // sends the current chart dimensions
81 static inline void send_chart_metrics(RRDSET *st) {
82 buffer_sprintf(rrdpush_buffer, "BEGIN %s %llu\n", st->id, (st->counter_done > remote_clock_resync_iterations)?st->usec_since_last_update:0);
85 rrddim_foreach_read(rd, st) {
86 if(rrddim_flag_check(rd, RRDDIM_FLAG_UPDATED) && rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
87 buffer_sprintf(rrdpush_buffer, "SET %s = " COLLECTED_NUMBER_FORMAT "\n"
93 buffer_strcat(rrdpush_buffer, "END\n");
96 // resets all the chart, so that their definitions
97 // will be resent to the central netdata
98 static void reset_all_charts(void) {
102 rrdhost_foreach_read(host) {
103 rrdhost_rdlock(host);
106 rrdset_foreach_read(st, host) {
108 // make it re-align the current time
109 // on the remote host
110 st->counter_done = 0;
115 rrddim_foreach_read(rd, st)
116 rrddim_flag_clear(rd, RRDDIM_FLAG_EXPOSED);
120 rrdhost_unlock(host);
125 void rrdset_done_push(RRDSET *st) {
126 static int error_shown = 0;
128 if(unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ENABLED)))
133 if(unlikely(!rrdpush_buffer || !rrdpush_connected)) {
135 error("STREAM: not ready - discarding collected metrics.");
145 if(need_to_send_chart_definition(st))
146 send_chart_definition(st);
148 send_chart_metrics(st);
151 // signal the sender there are more data
152 if(write(rrdpush_pipe[PIPE_WRITE], " ", 1) == -1)
153 error("STREAM: cannot write to internal pipe");
158 static inline void rrdpush_flush(void) {
160 if(buffer_strlen(rrdpush_buffer))
161 error("STREAM: discarding %zu bytes of metrics data already in the buffer.", buffer_strlen(rrdpush_buffer));
163 buffer_flush(rrdpush_buffer);
169 rrdpush_enabled = config_get_boolean("stream", "enabled", rrdpush_enabled);
170 rrdpush_exclusive = config_get_boolean("stream", "exclusive", rrdpush_exclusive);
171 central_netdata = config_get("stream", "stream metrics to", "");
172 api_key = config_get("stream", "api key", "");
174 if(!rrdpush_enabled || !central_netdata || !*central_netdata || !api_key || !*api_key) {
176 rrdpush_exclusive = 0;
179 return rrdpush_enabled;
182 void *rrdpush_sender_thread(void *ptr) {
183 struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
185 info("STREAM: central netdata push thread created with task id %d", gettid());
187 if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0)
188 error("STREAM: cannot set pthread cancel type to DEFERRED.");
190 if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
191 error("STREAM: cannot set pthread cancel state to ENABLE.");
193 int timeout = (int)config_get_number("stream", "timeout seconds", 60);
194 int default_port = (int)config_get_number("stream", "default port", 19999);
195 size_t max_size = (size_t)config_get_number("stream", "buffer size bytes", 1024 * 1024);
196 unsigned int reconnect_delay = (unsigned int)config_get_number("stream", "reconnect delay seconds", 5);
197 remote_clock_resync_iterations = (unsigned int)config_get_number("stream", "initial clock resync iterations", remote_clock_resync_iterations);
200 if(!rrdpush_enabled || !central_netdata || !*central_netdata || !api_key || !*api_key)
203 // initialize rrdpush globals
204 rrdpush_buffer = buffer_create(1);
205 rrdpush_connected = 0;
206 if(pipe(rrdpush_pipe) == -1) fatal("STREAM: cannot create required pipe.");
208 // initialize local variables
210 size_t reconnects_counter = 0;
211 size_t sent_bytes = 0;
212 size_t sent_connection = 0;
214 struct timeval tv = {
219 struct pollfd fds[2], *ifd, *ofd;
225 char connected_to[CONNECTED_TO_SIZE + 1];
228 if(netdata_exit) break;
230 if(unlikely(sock == -1)) {
231 // stop appending data into rrdpush_buffer
232 // they will be lost, so there is no point to do it
233 rrdpush_connected = 0;
235 info("STREAM: connecting to central netdata at: %s", central_netdata);
236 sock = connect_to_one_of(central_netdata, default_port, &tv, &reconnects_counter, connected_to, CONNECTED_TO_SIZE);
238 if(unlikely(sock == -1)) {
239 error("STREAM: failed to connect to central netdata at: %s", central_netdata);
240 sleep(reconnect_delay);
244 info("STREAM: initializing communication to central netdata at: %s", connected_to);
247 snprintfz(http, 1000,
248 "STREAM key=%s&hostname=%s&machine_guid=%s&os=%s&update_every=%d HTTP/1.1\r\n"
249 "User-Agent: netdata-push-service/%s\r\n"
250 "Accept: */*\r\n\r\n"
252 , localhost->hostname
253 , localhost->machine_guid
255 , default_rrd_update_every
259 if(send_timeout(sock, http, strlen(http), 0, timeout) == -1) {
262 error("STREAM: failed to send http header to netdata at: %s", connected_to);
263 sleep(reconnect_delay);
267 info("STREAM: Waiting for STREAM from central netdata at: %s", connected_to);
269 if(recv_timeout(sock, http, 1000, 0, timeout) == -1) {
272 error("STREAM: failed to receive STREAM from netdata at: %s", connected_to);
273 sleep(reconnect_delay);
277 if(strncmp(http, "STREAM", 6)) {
280 error("STREAM: server at %s, did not send STREAM", connected_to);
281 sleep(reconnect_delay);
285 info("STREAM: Established communication with central netdata at: %s - sending metrics...", connected_to);
287 if(fcntl(sock, F_SETFL, O_NONBLOCK) < 0)
288 error("STREAM: cannot set non-blocking mode for socket.");
293 // allow appending data into rrdpush_buffer
294 rrdpush_connected = 1;
297 ifd->fd = rrdpush_pipe[PIPE_READ];
298 ifd->events = POLLIN;
303 if(begin < buffer_strlen(rrdpush_buffer)) {
304 ofd->events = POLLOUT;
312 if(netdata_exit) break;
313 int retval = poll(fds, fdmax, timeout * 1000);
314 if(netdata_exit) break;
316 if(unlikely(retval == -1)) {
317 if(errno == EAGAIN || errno == EINTR)
320 error("STREAM: Failed to poll().");
325 else if(unlikely(!retval)) {
330 if(ifd->revents & POLLIN) {
331 char buffer[1000 + 1];
332 if(read(rrdpush_pipe[PIPE_READ], buffer, 1000) == -1)
333 error("STREAM: Cannot read from internal pipe.");
336 if(ofd->revents & POLLOUT && begin < buffer_strlen(rrdpush_buffer)) {
337 // info("STREAM: send buffer is ready, sending %zu bytes starting at %zu", buffer_strlen(rrdpush_buffer) - begin, begin);
339 // fprintf(stderr, "PUSH BEGIN\n");
340 // fwrite(&rrdpush_buffer->buffer[begin], 1, buffer_strlen(rrdpush_buffer) - begin, stderr);
341 // fprintf(stderr, "\nPUSH END\n");
344 ssize_t ret = send(sock, &rrdpush_buffer->buffer[begin], buffer_strlen(rrdpush_buffer) - begin, MSG_DONTWAIT);
346 if(errno != EAGAIN && errno != EINTR) {
347 error("STREAM: failed to send metrics to central netdata at %s. We have sent %zu bytes on this connection.", connected_to, sent_connection);
353 sent_connection += ret;
356 if(begin == buffer_strlen(rrdpush_buffer)) {
357 buffer_flush(rrdpush_buffer);
364 // protection from overflow
365 if(rrdpush_buffer->len > max_size) {
367 error("STREAM: too many data pending. Buffer is %zu bytes long, %zu unsent. We have sent %zu bytes in total, %zu on this connection. Closing connection to flush the data.", rrdpush_buffer->len, rrdpush_buffer->len - begin, sent_bytes, sent_connection);
376 debug(D_WEB_CLIENT, "STREAM: central netdata push thread exits.");
378 // make sure the data collection threads do not write data
379 rrdpush_connected = 0;
382 if(rrdpush_pipe[PIPE_READ] != -1) close(rrdpush_pipe[PIPE_READ]);
383 if(rrdpush_pipe[PIPE_WRITE] != -1) close(rrdpush_pipe[PIPE_WRITE]);
386 if(sock != -1) close(sock);
389 buffer_free(rrdpush_buffer);
390 rrdpush_buffer = NULL;
393 static_thread->enabled = 0;
399 // ----------------------------------------------------------------------------
402 int rrdpush_receive(int fd, const char *key, const char *hostname, const char *machine_guid, const char *os, int update_every, char *client_ip, char *client_port) {
404 int history = default_rrd_history_entries;
405 RRD_MEMORY_MODE mode = default_rrd_memory_mode;
406 int health_enabled = default_health_enabled;
407 time_t alarms_delay = 60;
409 update_every = (int)appconfig_get_number(&stream_config, machine_guid, "update every", update_every);
410 if(update_every < 0) update_every = 1;
412 history = (int)appconfig_get_number(&stream_config, key, "default history", history);
413 history = (int)appconfig_get_number(&stream_config, machine_guid, "history", history);
414 if(history < 5) history = 5;
416 mode = rrd_memory_mode_id(appconfig_get(&stream_config, key, "default memory mode", rrd_memory_mode_name(mode)));
417 mode = rrd_memory_mode_id(appconfig_get(&stream_config, machine_guid, "memory mode", rrd_memory_mode_name(mode)));
419 health_enabled = appconfig_get_boolean_ondemand(&stream_config, key, "health enabled by default", health_enabled);
420 health_enabled = appconfig_get_boolean_ondemand(&stream_config, machine_guid, "health enabled", health_enabled);
422 alarms_delay = appconfig_get_number(&stream_config, key, "default postpone alarms on connect seconds", alarms_delay);
423 alarms_delay = appconfig_get_number(&stream_config, machine_guid, "postpone alarms on connect seconds", alarms_delay);
425 if(!strcmp(machine_guid, "localhost"))
428 host = rrdhost_find_or_create(hostname, machine_guid, os, update_every, history, mode, health_enabled?1:0);
430 info("STREAM request from client '%s:%s' for host '%s' with machine_guid '%s': update every = %d, history = %d, memory mode = %s, health %s",
431 client_ip, client_port,
432 hostname, machine_guid,
435 rrd_memory_mode_name(mode),
436 (health_enabled == CONFIG_BOOLEAN_NO)?"disabled":((health_enabled == CONFIG_BOOLEAN_YES)?"enabled":"auto")
439 struct plugind cd = {
441 .update_every = default_rrd_update_every,
443 .serial_failures = 0,
444 .successful_collections = 0,
446 .started_t = now_realtime_sec(),
450 // put the client IP and port into the buffers used by plugins.d
451 snprintfz(cd.id, CONFIG_MAX_NAME, "%s:%s", client_ip, client_port);
452 snprintfz(cd.filename, FILENAME_MAX, "%s:%s", client_ip, client_port);
453 snprintfz(cd.fullfilename, FILENAME_MAX, "%s:%s", client_ip, client_port);
454 snprintfz(cd.cmd, PLUGINSD_CMD_MAX, "%s:%s", client_ip, client_port);
456 info("STREAM [%s]:%s: sending STREAM to initiate streaming...", client_ip, client_port);
457 if(send_timeout(fd, "STREAM", 6, 0, 60) != 6) {
458 error("STREAM [%s]:%s: cannot send STREAM.", client_ip, client_port);
462 // remove the non-blocking flag from the socket
463 if(fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) & ~O_NONBLOCK) == -1)
464 error("STREAM [%s]:%s: cannot remove the non-blocking flag from socket %d", client_ip, client_port, fd);
466 // convert the socket to a FILE *
467 FILE *fp = fdopen(fd, "r");
469 error("STREAM [%s]:%s: failed to get a FILE for FD %d.", client_ip, client_port, fd);
473 rrdhost_wrlock(host);
475 if(health_enabled != CONFIG_BOOLEAN_NO)
476 host->health_delay_up_to = now_realtime_sec() + alarms_delay;
477 rrdhost_unlock(host);
479 // call the plugins.d processor to receive the metrics
480 info("STREAM [%s]:%s: connecting client to plugins.d (host '%s', machine GUID '%s').", client_ip, client_port, host->hostname, host->machine_guid);
481 size_t count = pluginsd_process(host, &cd, fp, 1);
482 error("STREAM [%s]:%s: client disconnected (host '%s', machine GUID '%s', completed updates %zu).", client_ip, client_port, host->hostname, host->machine_guid, count);
484 rrdhost_wrlock(host);
486 if(!host->use_counter && health_enabled == CONFIG_BOOLEAN_AUTO)
487 host->health_enabled = 0;
488 rrdhost_unlock(host);
496 struct rrdpush_thread {
507 void *rrdpush_receiver_thread(void *ptr) {
508 struct rrdpush_thread *rpt = (struct rrdpush_thread *)ptr;
510 if (pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0)
511 error("STREAM: cannot set pthread cancel type to DEFERRED.");
513 if (pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
514 error("STREAM: cannot set pthread cancel state to ENABLE.");
517 info("STREAM [%s]:%s: receive thread created (task id %d)", rpt->client_ip, rpt->client_port, gettid());
518 rrdpush_receive(rpt->fd, rpt->key, rpt->hostname, rpt->machine_guid, rpt->os, rpt->update_every, rpt->client_ip, rpt->client_port);
519 info("STREAM [%s]:%s: receive thread ended (task id %d)", rpt->client_ip, rpt->client_port, gettid());
523 freez(rpt->hostname);
524 freez(rpt->machine_guid);
526 freez(rpt->client_ip);
527 freez(rpt->client_port);
534 static inline int rrdpush_receive_validate_api_key(const char *key) {
535 return appconfig_get_boolean(&stream_config, key, "enabled", 0);
538 int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url) {
541 info("STREAM [%s]:%s: client connection.", w->client_ip, w->client_port);
543 char *key = NULL, *hostname = NULL, *machine_guid = NULL, *os = NULL;
544 int update_every = default_rrd_update_every;
547 char *value = mystrsep(&url, "?&");
548 if(!value || !*value) continue;
550 char *name = mystrsep(&value, "=");
551 if(!name || !*name) continue;
552 if(!value || !*value) continue;
554 if(!strcmp(name, "key"))
556 else if(!strcmp(name, "hostname"))
558 else if(!strcmp(name, "machine_guid"))
559 machine_guid = value;
560 else if(!strcmp(name, "update_every"))
561 update_every = (int)strtoul(value, NULL, 0);
562 else if(!strcmp(name, "os"))
567 error("STREAM [%s]:%s: request without an API key. Forbidding access.", w->client_ip, w->client_port);
568 buffer_flush(w->response.data);
569 buffer_sprintf(w->response.data, "You need an API key for this request.");
573 if(!hostname || !*hostname) {
574 error("STREAM [%s]:%s: request without a hostname. Forbidding access.", w->client_ip, w->client_port);
575 buffer_flush(w->response.data);
576 buffer_sprintf(w->response.data, "You need to send a hostname too.");
580 if(!machine_guid || !*machine_guid) {
581 error("STREAM [%s]:%s: request without a machine GUID. Forbidding access.", w->client_ip, w->client_port);
582 buffer_flush(w->response.data);
583 buffer_sprintf(w->response.data, "You need to send a machine GUID too.");
587 if(!rrdpush_receive_validate_api_key(key)) {
588 error("STREAM [%s]:%s: API key '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, key);
589 buffer_flush(w->response.data);
590 buffer_sprintf(w->response.data, "Your API key is not permitted access.");
594 if(!appconfig_get_boolean(&stream_config, machine_guid, "enabled", 1)) {
595 error("STREAM [%s]:%s: machine GUID '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, machine_guid);
596 buffer_flush(w->response.data);
597 buffer_sprintf(w->response.data, "Your machine guide is not permitted access.");
601 struct rrdpush_thread *rpt = mallocz(sizeof(struct rrdpush_thread));
603 rpt->key = strdupz(key);
604 rpt->hostname = strdupz(hostname);
605 rpt->machine_guid = strdupz(machine_guid);
606 rpt->os = strdupz(os);
607 rpt->client_ip = strdupz(w->client_ip);
608 rpt->client_port = strdupz(w->client_port);
609 rpt->update_every = update_every;
611 pthread_t *thread = mallocz(sizeof(pthread_t));
613 debug(D_SYSTEM, "Starting STREAM thread for client [%s]:%s.", w->client_ip, w->client_port);
615 if(pthread_create(thread, NULL, rrdpush_receiver_thread, (void *)rpt))
616 error("failed to create new STREAM thread for client [%s]:%s.", w->client_ip, w->client_port);
618 else if(pthread_detach(*thread))
619 error("Cannot request detach newly created thread for client [%s]:%s.", w->client_ip, w->client_port);
621 // prevent the caller from closing the streaming socket
623 w->ifd = w->ofd = -1;
627 buffer_flush(w->response.data);