]> arthur.barton.de Git - netdata.git/blob - src/rrdpush.c
9d2ad79a9de9c9e2c4a2d69508b79ce9d5575c39
[netdata.git] / src / rrdpush.c
1 #include "common.h"
2
3 #define PIPE_READ 0
4 #define PIPE_WRITE 1
5
6 int rrdpush_pipe[2];
7
8 static BUFFER *rrdpush_buffer = NULL;
9 static pthread_mutex_t rrdpush_mutex = PTHREAD_MUTEX_INITIALIZER;
10 static volatile RRDHOST *last_host = NULL;
11 static volatile int rrdpush_connected = 0;
12
13 static inline void rrdpush_lock() {
14     pthread_mutex_lock(&rrdpush_mutex);
15 }
16
17 static inline void rrdpush_unlock() {
18     pthread_mutex_unlock(&rrdpush_mutex);
19 }
20
21 static inline int need_to_send_chart_definition(RRDSET *st) {
22     RRDDIM *rd;
23     rrddim_foreach_read(rd, st)
24         if(!rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
25             return 1;
26
27
28     // fprintf(stderr, "NOT Sending CHART '%s' '%s'\n", st->id, st->name);
29     return 0;
30 }
31
32 static inline void send_chart_definition(RRDSET *st) {
33     // fprintf(stderr, "Sending CHART '%s' '%s'\n", st->id, st->name);
34
35     buffer_sprintf(rrdpush_buffer, "CHART '%s' '%s' '%s' '%s' '%s' '%s' '%s' %ld %d\n"
36                 , st->id
37                 , st->name
38                 , st->title
39                 , st->units
40                 , st->family
41                 , st->context
42                 , rrdset_type_name(st->chart_type)
43                 , st->priority
44                 , st->update_every
45     );
46
47     RRDDIM *rd;
48     rrddim_foreach_read(rd, st) {
49         buffer_sprintf(rrdpush_buffer, "DIMENSION '%s' '%s' '%s' " COLLECTED_NUMBER_FORMAT " " COLLECTED_NUMBER_FORMAT " '%s %s'\n"
50                        , rd->id
51                        , rd->name
52                        , rrd_algorithm_name(rd->algorithm)
53                        , rd->multiplier
54                        , rd->divisor
55                        , rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)?"hidden":""
56                        , rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":""
57         );
58         rrddim_flag_set(rd, RRDDIM_FLAG_EXPOSED);
59     }
60 }
61
62 static inline void send_chart_metrics(RRDSET *st) {
63     buffer_sprintf(rrdpush_buffer, "BEGIN %s %llu\n", st->id, st->usec_since_last_update);
64
65     RRDDIM *rd;
66     rrddim_foreach_read(rd, st) {
67         if(rrddim_flag_check(rd, RRDDIM_FLAG_UPDATED) && rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
68             buffer_sprintf(rrdpush_buffer, "SET %s = " COLLECTED_NUMBER_FORMAT "\n"
69                        , rd->id
70                        , rd->collected_value
71         );
72     }
73
74     buffer_strcat(rrdpush_buffer, "END\n");
75 }
76
77 static void reset_all_charts(void) {
78     rrd_rdlock();
79
80     RRDHOST *host;
81     rrdhost_foreach_read(host) {
82         rrdhost_rdlock(host);
83
84         RRDSET *st;
85         rrdset_foreach_read(st, host) {
86             rrdset_rdlock(st);
87
88             RRDDIM *rd;
89             rrddim_foreach_read(rd, st)
90                 rrddim_flag_clear(rd, RRDDIM_FLAG_EXPOSED);
91
92             rrdset_unlock(st);
93         }
94         rrdhost_unlock(host);
95     }
96     rrd_unlock();
97
98     last_host = NULL;
99 }
100
101 void rrdset_done_push(RRDSET *st) {
102     static int error_shown = 0;
103
104     if(unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ENABLED)))
105         return;
106
107     if(unlikely(!rrdpush_buffer || !rrdpush_connected)) {
108         if(!error_shown)
109             error("PUSH: not ready - discarding collected metrics.");
110
111         error_shown = 1;
112         return;
113     }
114     error_shown = 0;
115
116     rrdpush_lock();
117     rrdset_rdlock(st);
118
119     if(st->rrdhost != last_host) {
120         buffer_sprintf(rrdpush_buffer, "HOST '%s' '%s'\n", st->rrdhost->machine_guid, st->rrdhost->hostname);
121         last_host = st->rrdhost;
122     }
123
124     if(need_to_send_chart_definition(st))
125         send_chart_definition(st);
126
127     send_chart_metrics(st);
128
129     // signal the sender there are more data
130     if(write(rrdpush_pipe[PIPE_WRITE], " ", 1) == -1)
131         error("Cannot write to internal pipe");
132
133     rrdset_unlock(st);
134     rrdpush_unlock();
135 }
136
137 static inline void rrdpush_flush(void) {
138     rrdpush_lock();
139     if(buffer_strlen(rrdpush_buffer))
140         error("PUSH: discarding %zu bytes of metrics data already in the buffer.", buffer_strlen(rrdpush_buffer));
141
142     buffer_flush(rrdpush_buffer);
143     reset_all_charts();
144     last_host = NULL;
145     rrdpush_unlock();
146 }
147
148 void *central_netdata_push_thread(void *ptr) {
149     struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
150
151     info("Central netdata push thread created with task id %d", gettid());
152
153     if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0)
154         error("Cannot set pthread cancel type to DEFERRED.");
155
156     if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
157         error("Cannot set pthread cancel state to ENABLE.");
158
159
160     rrdpush_buffer = buffer_create(1);
161
162     if(pipe(rrdpush_pipe) == -1)
163         fatal("Cannot create required pipe.");
164
165     struct timeval tv = {
166             .tv_sec = 60,
167             .tv_usec = 0
168     };
169
170     rrdpush_connected = 0;
171     size_t begin = 0;
172     size_t max_size = 1024 * 1024;
173     size_t reconnects_counter = 0;
174     size_t sent_bytes = 0;
175     size_t sent_connection = 0;
176     int sock = -1;
177
178     struct pollfd fds[2], *ifd, *ofd;
179
180     ifd = &fds[0];
181     ofd = &fds[1];
182
183     for(;;) {
184         if(netdata_exit) break;
185
186         if(unlikely(sock == -1)) {
187             rrdpush_connected = 0;
188
189             info("PUSH: connecting to central netdata at: %s", central_netdata_to_push_data);
190             sock = connect_to_one_of(central_netdata_to_push_data, 19999, &tv, &reconnects_counter);
191
192             if(unlikely(sock == -1)) {
193                 error("PUSH: failed to connect to central netdata at: %s", central_netdata_to_push_data);
194                 continue;
195             }
196
197             info("PUSH: connected to central netdata at: %s", central_netdata_to_push_data);
198
199             char http[1000 + 1];
200             snprintfz(http, 1000, "GET /stream?key=%s HTTP/1.1\r\nUser-Agent: netdata-push-service/%s\r\nAccept: */*\r\n\r\n", config_get("global", "central netdata api key", ""), program_version);
201             if(send_timeout(sock, http, strlen(http), 0, 60) == -1) {
202                 close(sock);
203                 sock = -1;
204                 error("PUSH: failed to send http header to netdata at: %s", central_netdata_to_push_data);
205                 sleep(5);
206                 continue;
207             }
208
209             if(recv_timeout(sock, http, 1000, 0, 60) == -1) {
210                 close(sock);
211                 sock = -1;
212                 error("PUSH: failed to receive OK from netdata at: %s", central_netdata_to_push_data);
213                 sleep(5);
214                 continue;
215             }
216
217             if(strncmp(http, "STREAM", 6)) {
218                 close(sock);
219                 sock = -1;
220                 error("PUSH: netdata servers at  %s, did not send STREAM", central_netdata_to_push_data);
221                 sleep(5);
222                 continue;
223             }
224
225             if(fcntl(sock, F_SETFL, O_NONBLOCK) < 0)
226                 error("PUSH: cannot set non-blocking mode for socket.");
227
228             rrdpush_flush();
229             sent_connection = 0;
230             rrdpush_connected = 1;
231         }
232
233         ifd->fd = rrdpush_pipe[PIPE_READ];
234         ifd->events = POLLIN;
235         ifd->revents = 0;
236
237         ofd->fd = sock;
238         ofd->events = POLLOUT;
239         ofd->revents = 0;
240
241         nfds_t fdmax = 2;
242
243         if(begin < buffer_strlen(rrdpush_buffer))
244             ofd->events = POLLOUT;
245         else
246             ofd->events = 0;
247
248         if(netdata_exit) break;
249         int retval = poll(fds, fdmax, 60 * 1000);
250         if(netdata_exit) break;
251
252         if(unlikely(retval == -1)) {
253             if(errno == EAGAIN || errno == EINTR)
254                 continue;
255
256             error("PUSH: Failed to poll().");
257             close(sock);
258             sock = -1;
259             break;
260         }
261         else if(unlikely(!retval)) {
262             // timeout
263             continue;
264         }
265
266         if(ifd->revents & POLLIN) {
267             char buffer[1000 + 1];
268             if(read(rrdpush_pipe[PIPE_READ], buffer, 1000) == -1)
269                 error("PUSH: Cannot read from internal pipe.");
270         }
271
272         if(ofd->revents & POLLOUT && begin < buffer_strlen(rrdpush_buffer)) {
273             // info("PUSH: send buffer is ready, sending %zu bytes starting at %zu", buffer_strlen(rrdpush_buffer) - begin, begin);
274
275             // fprintf(stderr, "PUSH BEGIN\n");
276             // fwrite(&rrdpush_buffer->buffer[begin], 1, buffer_strlen(rrdpush_buffer) - begin, stderr);
277             // fprintf(stderr, "\nPUSH END\n");
278
279             rrdpush_lock();
280             ssize_t ret = send(sock, &rrdpush_buffer->buffer[begin], buffer_strlen(rrdpush_buffer) - begin, MSG_DONTWAIT);
281             if(ret == -1) {
282                 if(errno != EAGAIN && errno != EINTR) {
283                     error("PUSH: failed to send metrics to central netdata at %s. We have sent %zu bytes on this connection.", central_netdata_to_push_data, sent_connection);
284                     close(sock);
285                     sock = -1;
286                 }
287             }
288             else {
289                 sent_connection += ret;
290                 sent_bytes += ret;
291                 begin += ret;
292                 if(begin == buffer_strlen(rrdpush_buffer)) {
293                     buffer_flush(rrdpush_buffer);
294                     begin = 0;
295                 }
296             }
297             rrdpush_unlock();
298         }
299
300         // protection from overflow
301         if(rrdpush_buffer->len > max_size) {
302             errno = 0;
303             error("PUSH: too many data pending. Buffer is %zu bytes long, %zu unsent. We have sent %zu bytes in total, %zu on this connection. Closing connection to flush the data.", rrdpush_buffer->len, rrdpush_buffer->len - begin, sent_bytes, sent_connection);
304             if(sock != -1) {
305                 close(sock);
306                 sock = -1;
307             }
308         }
309     }
310
311     debug(D_WEB_CLIENT, "Central netdata push thread exits.");
312     if(sock != -1) {
313         close(sock);
314     }
315
316     static_thread->enabled = 0;
317     pthread_exit(NULL);
318     return NULL;
319 }