]> arthur.barton.de Git - netdata.git/blob - src/rrdpush.c
allow metrics streaming to work in parallel with local database; propagate O/S type...
[netdata.git] / src / rrdpush.c
1 #include "common.h"
2
3 int rrdpush_enabled = 0;
4 int rrdpush_exclusive = 1;
5
6 static char *central_netdata = NULL;
7 static char *api_key = NULL;
8
9 #define CONNECTED_TO_SIZE 100
10
11 // data collection happens from multiple threads
12 // each of these threads calls rrdset_done()
13 // which in turn calls rrdset_done_push()
14 // which uses this pipe to notify the streaming thread
15 // that there are more data ready to be sent
16 #define PIPE_READ 0
17 #define PIPE_WRITE 1
18 int rrdpush_pipe[2] = { -1, -1 };
19
20 // a buffer used to store data to be sent.
21 // the format is the same as external plugins.
22 static BUFFER *rrdpush_buffer = NULL;
23
24 // locking to get exclusive access to shared resources
25 // (rrdpush_pipe[PIPE_WRITE], rrdpush_buffer
26 static pthread_mutex_t rrdpush_mutex = PTHREAD_MUTEX_INITIALIZER;
27
28 // if the streaming thread is connected to a central netdata
29 // this is set to 1, otherwise 0.
30 static volatile int rrdpush_connected = 0;
31
32 // to have the remote netdata re-sync the charts
33 // to its current clock, we send for this many
34 // iterations a BEGIN line without microseconds
35 // this is for the first iterations of each chart
36 static unsigned int remote_clock_resync_iterations = 60;
37
38 #define rrdpush_lock() pthread_mutex_lock(&rrdpush_mutex)
39 #define rrdpush_unlock() pthread_mutex_unlock(&rrdpush_mutex)
40
41 // checks if the current chart definition has been sent
42 static inline int need_to_send_chart_definition(RRDSET *st) {
43     RRDDIM *rd;
44     rrddim_foreach_read(rd, st)
45         if(!rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
46             return 1;
47
48     return 0;
49 }
50
51 // sends the current chart definition
52 static inline void send_chart_definition(RRDSET *st) {
53     buffer_sprintf(rrdpush_buffer, "CHART '%s' '%s' '%s' '%s' '%s' '%s' '%s' %ld %d\n"
54                 , st->id
55                 , st->name
56                 , st->title
57                 , st->units
58                 , st->family
59                 , st->context
60                 , rrdset_type_name(st->chart_type)
61                 , st->priority
62                 , st->update_every
63     );
64
65     RRDDIM *rd;
66     rrddim_foreach_read(rd, st) {
67         buffer_sprintf(rrdpush_buffer, "DIMENSION '%s' '%s' '%s' " COLLECTED_NUMBER_FORMAT " " COLLECTED_NUMBER_FORMAT " '%s %s'\n"
68                        , rd->id
69                        , rd->name
70                        , rrd_algorithm_name(rd->algorithm)
71                        , rd->multiplier
72                        , rd->divisor
73                        , rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)?"hidden":""
74                        , rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":""
75         );
76         rrddim_flag_set(rd, RRDDIM_FLAG_EXPOSED);
77     }
78 }
79
80 // sends the current chart dimensions
81 static inline void send_chart_metrics(RRDSET *st) {
82     buffer_sprintf(rrdpush_buffer, "BEGIN %s %llu\n", st->id, (st->counter_done > remote_clock_resync_iterations)?st->usec_since_last_update:0);
83
84     RRDDIM *rd;
85     rrddim_foreach_read(rd, st) {
86         if(rrddim_flag_check(rd, RRDDIM_FLAG_UPDATED) && rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
87             buffer_sprintf(rrdpush_buffer, "SET %s = " COLLECTED_NUMBER_FORMAT "\n"
88                        , rd->id
89                        , rd->collected_value
90         );
91     }
92
93     buffer_strcat(rrdpush_buffer, "END\n");
94 }
95
96 // resets all the chart, so that their definitions
97 // will be resent to the central netdata
98 static void reset_all_charts(void) {
99     rrd_rdlock();
100
101     RRDHOST *host;
102     rrdhost_foreach_read(host) {
103         rrdhost_rdlock(host);
104
105         RRDSET *st;
106         rrdset_foreach_read(st, host) {
107
108             // make it re-align the current time
109             // on the remote host
110             st->counter_done = 0;
111
112             rrdset_rdlock(st);
113
114             RRDDIM *rd;
115             rrddim_foreach_read(rd, st)
116                 rrddim_flag_clear(rd, RRDDIM_FLAG_EXPOSED);
117
118             rrdset_unlock(st);
119         }
120         rrdhost_unlock(host);
121     }
122     rrd_unlock();
123 }
124
125 void rrdset_done_push(RRDSET *st) {
126     static int error_shown = 0;
127
128     if(unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ENABLED)))
129         return;
130
131     rrdpush_lock();
132
133     if(unlikely(!rrdpush_buffer || !rrdpush_connected)) {
134         if(!error_shown)
135             error("STREAM: not ready - discarding collected metrics.");
136
137         error_shown = 1;
138
139         rrdpush_unlock();
140         return;
141     }
142     error_shown = 0;
143
144     rrdset_rdlock(st);
145     if(need_to_send_chart_definition(st))
146         send_chart_definition(st);
147
148     send_chart_metrics(st);
149     rrdset_unlock(st);
150
151     // signal the sender there are more data
152     if(write(rrdpush_pipe[PIPE_WRITE], " ", 1) == -1)
153         error("STREAM: cannot write to internal pipe");
154
155     rrdpush_unlock();
156 }
157
158 static inline void rrdpush_flush(void) {
159     rrdpush_lock();
160     if(buffer_strlen(rrdpush_buffer))
161         error("STREAM: discarding %zu bytes of metrics data already in the buffer.", buffer_strlen(rrdpush_buffer));
162
163     buffer_flush(rrdpush_buffer);
164     reset_all_charts();
165     rrdpush_unlock();
166 }
167
168 int rrdpush_init() {
169     rrdpush_enabled = config_get_boolean("stream", "enabled", rrdpush_enabled);
170     rrdpush_exclusive = config_get_boolean("stream", "exclusive", rrdpush_exclusive);
171     central_netdata = config_get("stream", "stream metrics to", "");
172     api_key = config_get("stream", "api key", "");
173
174     if(!rrdpush_enabled || !central_netdata || !*central_netdata || !api_key || !*api_key) {
175         rrdpush_enabled = 0;
176         rrdpush_exclusive = 0;
177     }
178
179     return rrdpush_enabled;
180 }
181
182 void *central_netdata_push_thread(void *ptr) {
183     struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
184
185     info("STREAM: central netdata push thread created with task id %d", gettid());
186
187     if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0)
188         error("STREAM: cannot set pthread cancel type to DEFERRED.");
189
190     if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
191         error("STREAM: cannot set pthread cancel state to ENABLE.");
192
193     int timeout = (int)config_get_number("stream", "timeout seconds", 60);
194     int default_port = (int)config_get_number("stream", "default port", 19999);
195     size_t max_size = (size_t)config_get_number("stream", "buffer size bytes", 1024 * 1024);
196     unsigned int reconnect_delay = (unsigned int)config_get_number("stream", "reconnect delay seconds", 5);
197     remote_clock_resync_iterations = (unsigned int)config_get_number("stream", "initial clock resync iterations", remote_clock_resync_iterations);
198     int sock = -1;
199
200     if(!rrdpush_enabled || !central_netdata || !*central_netdata || !api_key || !*api_key)
201         goto cleanup;
202
203     // initialize rrdpush globals
204     rrdpush_buffer = buffer_create(1);
205     rrdpush_connected = 0;
206     if(pipe(rrdpush_pipe) == -1) fatal("STREAM: cannot create required pipe.");
207
208     // initialize local variables
209     size_t begin = 0;
210     size_t reconnects_counter = 0;
211     size_t sent_bytes = 0;
212     size_t sent_connection = 0;
213
214     struct timeval tv = {
215             .tv_sec = timeout,
216             .tv_usec = 0
217     };
218
219     struct pollfd fds[2], *ifd, *ofd;
220     nfds_t fdmax;
221
222     ifd = &fds[0];
223     ofd = &fds[1];
224
225     char connected_to[CONNECTED_TO_SIZE + 1];
226
227     for(;;) {
228         if(netdata_exit) break;
229
230         if(unlikely(sock == -1)) {
231             // stop appending data into rrdpush_buffer
232             // they will be lost, so there is no point to do it
233             rrdpush_connected = 0;
234
235             info("STREAM: connecting to central netdata at: %s", central_netdata);
236             sock = connect_to_one_of(central_netdata, default_port, &tv, &reconnects_counter, connected_to, CONNECTED_TO_SIZE);
237
238             if(unlikely(sock == -1)) {
239                 error("STREAM: failed to connect to central netdata at: %s", central_netdata);
240                 sleep(reconnect_delay);
241                 continue;
242             }
243
244             info("STREAM: initializing communication to central netdata at: %s", connected_to);
245
246             char http[1000 + 1];
247             snprintfz(http, 1000,
248                     "STREAM key=%s&hostname=%s&machine_guid=%s&os=%s&update_every=%d HTTP/1.1\r\n"
249                     "User-Agent: netdata-push-service/%s\r\n"
250                     "Accept: */*\r\n\r\n"
251                       , api_key
252                       , localhost->hostname
253                       , localhost->machine_guid
254                       , localhost->os
255                       , default_rrd_update_every
256                       , program_version
257             );
258
259             if(send_timeout(sock, http, strlen(http), 0, timeout) == -1) {
260                 close(sock);
261                 sock = -1;
262                 error("STREAM: failed to send http header to netdata at: %s", connected_to);
263                 sleep(reconnect_delay);
264                 continue;
265             }
266
267             info("STREAM: Waiting for STREAM from central netdata at: %s", connected_to);
268
269             if(recv_timeout(sock, http, 1000, 0, timeout) == -1) {
270                 close(sock);
271                 sock = -1;
272                 error("STREAM: failed to receive STREAM from netdata at: %s", connected_to);
273                 sleep(reconnect_delay);
274                 continue;
275             }
276
277             if(strncmp(http, "STREAM", 6)) {
278                 close(sock);
279                 sock = -1;
280                 error("STREAM: server at %s, did not send STREAM", connected_to);
281                 sleep(reconnect_delay);
282                 continue;
283             }
284
285             info("STREAM: Established communication with central netdata at: %s - sending metrics...", connected_to);
286
287             if(fcntl(sock, F_SETFL, O_NONBLOCK) < 0)
288                 error("STREAM: cannot set non-blocking mode for socket.");
289
290             rrdpush_flush();
291             sent_connection = 0;
292
293             // allow appending data into rrdpush_buffer
294             rrdpush_connected = 1;
295         }
296
297         ifd->fd = rrdpush_pipe[PIPE_READ];
298         ifd->events = POLLIN;
299         ifd->revents = 0;
300
301         ofd->fd = sock;
302         ofd->revents = 0;
303         if(begin < buffer_strlen(rrdpush_buffer)) {
304             ofd->events = POLLOUT;
305             fdmax = 2;
306         }
307         else {
308             ofd->events = 0;
309             fdmax = 1;
310         }
311
312         if(netdata_exit) break;
313         int retval = poll(fds, fdmax, timeout * 1000);
314         if(netdata_exit) break;
315
316         if(unlikely(retval == -1)) {
317             if(errno == EAGAIN || errno == EINTR)
318                 continue;
319
320             error("STREAM: Failed to poll().");
321             close(sock);
322             sock = -1;
323             break;
324         }
325         else if(unlikely(!retval)) {
326             // timeout
327             continue;
328         }
329
330         if(ifd->revents & POLLIN) {
331             char buffer[1000 + 1];
332             if(read(rrdpush_pipe[PIPE_READ], buffer, 1000) == -1)
333                 error("STREAM: Cannot read from internal pipe.");
334         }
335
336         if(ofd->revents & POLLOUT && begin < buffer_strlen(rrdpush_buffer)) {
337             // info("STREAM: send buffer is ready, sending %zu bytes starting at %zu", buffer_strlen(rrdpush_buffer) - begin, begin);
338
339             // fprintf(stderr, "PUSH BEGIN\n");
340             // fwrite(&rrdpush_buffer->buffer[begin], 1, buffer_strlen(rrdpush_buffer) - begin, stderr);
341             // fprintf(stderr, "\nPUSH END\n");
342
343             rrdpush_lock();
344             ssize_t ret = send(sock, &rrdpush_buffer->buffer[begin], buffer_strlen(rrdpush_buffer) - begin, MSG_DONTWAIT);
345             if(ret == -1) {
346                 if(errno != EAGAIN && errno != EINTR) {
347                     error("STREAM: failed to send metrics to central netdata at %s. We have sent %zu bytes on this connection.", connected_to, sent_connection);
348                     close(sock);
349                     sock = -1;
350                 }
351             }
352             else {
353                 sent_connection += ret;
354                 sent_bytes += ret;
355                 begin += ret;
356                 if(begin == buffer_strlen(rrdpush_buffer)) {
357                     buffer_flush(rrdpush_buffer);
358                     begin = 0;
359                 }
360             }
361             rrdpush_unlock();
362         }
363
364         // protection from overflow
365         if(rrdpush_buffer->len > max_size) {
366             errno = 0;
367             error("STREAM: too many data pending. Buffer is %zu bytes long, %zu unsent. We have sent %zu bytes in total, %zu on this connection. Closing connection to flush the data.", rrdpush_buffer->len, rrdpush_buffer->len - begin, sent_bytes, sent_connection);
368             if(sock != -1) {
369                 close(sock);
370                 sock = -1;
371             }
372         }
373     }
374
375 cleanup:
376     debug(D_WEB_CLIENT, "STREAM: central netdata push thread exits.");
377
378     // make sure the data collection threads do not write data
379     rrdpush_connected = 0;
380
381     // close the pipe
382     if(rrdpush_pipe[PIPE_READ] != -1)  close(rrdpush_pipe[PIPE_READ]);
383     if(rrdpush_pipe[PIPE_WRITE] != -1) close(rrdpush_pipe[PIPE_WRITE]);
384
385     // close the socket
386     if(sock != -1) close(sock);
387
388     rrdpush_lock();
389     buffer_free(rrdpush_buffer);
390     rrdpush_buffer = NULL;
391     rrdpush_unlock();
392
393     static_thread->enabled = 0;
394     pthread_exit(NULL);
395     return NULL;
396 }