]> arthur.barton.de Git - netdata.git/blobdiff - src/rrdpush.c
more information about metrics streaming; setting to cleanup orphan hosts on the...
[netdata.git] / src / rrdpush.c
index fe6b477639012b0b4347a00073d2e9d825032229..5b626410c803433aa0efd3958df1619bf9f84e2b 100644 (file)
  *
  */
 
+#define START_STREAMING_PROMPT "Hit me baby, push them over..."
+
 int default_rrdpush_enabled = 0;
-static char *rrdpush_destination = NULL;
-static char *rrdpush_api_key = NULL;
+char *default_rrdpush_destination = NULL;
+char *default_rrdpush_api_key = NULL;
 
 int rrdpush_init() {
-    default_rrdpush_enabled   = config_get_boolean(CONFIG_SECTION_STREAM, "enabled", default_rrdpush_enabled);
-    rrdpush_destination       = config_get(CONFIG_SECTION_STREAM, "destination", "");
-    rrdpush_api_key           = config_get(CONFIG_SECTION_STREAM, "api key", "");
+    default_rrdpush_enabled     = appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "enabled", default_rrdpush_enabled);
+    default_rrdpush_destination = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "destination", "");
+    default_rrdpush_api_key     = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "api key", "");
+    rrdhost_free_orphan_time    = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", rrdhost_free_orphan_time);
 
-    if(default_rrdpush_enabled && (!rrdpush_destination || !*rrdpush_destination || !rrdpush_api_key || !*rrdpush_api_key)) {
+    if(default_rrdpush_enabled && (!default_rrdpush_destination || !*default_rrdpush_destination || !default_rrdpush_api_key || !*default_rrdpush_api_key)) {
         error("STREAM [send]: cannot enable sending thread - information is missing.");
         default_rrdpush_enabled = 0;
     }
@@ -63,7 +66,7 @@ static unsigned int remote_clock_resync_iterations = 60;
 static inline int need_to_send_chart_definition(RRDSET *st) {
     RRDDIM *rd;
     rrddim_foreach_read(rd, st)
-        if(!rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
+        if(!rd->exposed)
             return 1;
 
     return 0;
@@ -94,7 +97,7 @@ static inline void send_chart_definition(RRDSET *st) {
                        , rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)?"hidden":""
                        , rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":""
         );
-        rrddim_flag_set(rd, RRDDIM_FLAG_EXPOSED);
+        rd->exposed = 1;
     }
 }
 
@@ -104,7 +107,7 @@ static inline void send_chart_metrics(RRDSET *st) {
 
     RRDDIM *rd;
     rrddim_foreach_read(rd, st) {
-        if(rrddim_flag_check(rd, RRDDIM_FLAG_UPDATED) && rrddim_flag_check(rd, RRDDIM_FLAG_EXPOSED))
+        if(rd->updated && rd->exposed)
             buffer_sprintf(st->rrdhost->rrdpush_buffer, "SET %s = " COLLECTED_NUMBER_FORMAT "\n"
                        , rd->id
                        , rd->collected_value
@@ -172,7 +175,7 @@ static void rrdpush_sender_thread_reset_all_charts(RRDHOST *host) {
 
         RRDDIM *rd;
         rrddim_foreach_read(rd, st)
-            rrddim_flag_clear(rd, RRDDIM_FLAG_EXPOSED);
+            rd->exposed = 0;
 
         rrdset_unlock(st);
     }
@@ -182,47 +185,55 @@ static void rrdpush_sender_thread_reset_all_charts(RRDHOST *host) {
 
 static inline void rrdpush_sender_thread_data_flush(RRDHOST *host) {
     rrdpush_lock(host);
+
     if(buffer_strlen(host->rrdpush_buffer))
         error("STREAM %s [send]: discarding %zu bytes of metrics already in the buffer.", host->hostname, buffer_strlen(host->rrdpush_buffer));
 
     buffer_flush(host->rrdpush_buffer);
-    rrdpush_sender_thread_reset_all_charts(host);
-    rrdpush_unlock(host);
-}
 
-static inline void rrdpush_sender_thread_lock(RRDHOST *host) {
-    if(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL) != 0)
-        error("STREAM %s [send]: cannot set pthread cancel state to DISABLE.", host->hostname);
-
-    rrdpush_lock(host);
-}
-
-static inline void rrdpush_sender_thread_unlock(RRDHOST *host) {
-    if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
-        error("STREAM %s [send]: cannot set pthread cancel state to DISABLE.", host->hostname);
+    rrdpush_sender_thread_reset_all_charts(host);
 
     rrdpush_unlock(host);
 }
 
-void rrdpush_sender_thread_cleanup(RRDHOST *host) {
-    rrdpush_lock(host);
-
+static void rrdpush_sender_thread_cleanup_locked_all(RRDHOST *host) {
     host->rrdpush_connected = 0;
 
-    if(host->rrdpush_socket != -1) close(host->rrdpush_socket);
+    if(host->rrdpush_socket != -1) {
+        close(host->rrdpush_socket);
+        host->rrdpush_socket = -1;
+    }
 
     // close the pipe
-    if(host->rrdpush_pipe[PIPE_READ] != -1)  close(host->rrdpush_pipe[PIPE_READ]);
-    if(host->rrdpush_pipe[PIPE_WRITE] != -1) close(host->rrdpush_pipe[PIPE_WRITE]);
-    host->rrdpush_pipe[PIPE_READ] = -1;
-    host->rrdpush_pipe[PIPE_WRITE] = -1;
+    if(host->rrdpush_pipe[PIPE_READ] != -1) {
+        close(host->rrdpush_pipe[PIPE_READ]);
+        host->rrdpush_pipe[PIPE_READ] = -1;
+    }
+
+    if(host->rrdpush_pipe[PIPE_WRITE] != -1) {
+        close(host->rrdpush_pipe[PIPE_WRITE]);
+        host->rrdpush_pipe[PIPE_WRITE] = -1;
+    }
 
     buffer_free(host->rrdpush_buffer);
     host->rrdpush_buffer = NULL;
 
     host->rrdpush_spawn = 0;
-    host->rrdpush_enabled = 0;
 
+    rrdhost_flag_set(host, RRDHOST_ORPHAN);
+}
+
+void rrdpush_sender_thread_stop(RRDHOST *host) {
+    rrdpush_lock(host);
+    rrdhost_wrlock(host);
+
+    if(host->rrdpush_spawn) {
+        info("STREAM %s [send]: stopping sending thread...", host->hostname);
+        pthread_cancel(host->rrdpush_thread);
+        rrdpush_sender_thread_cleanup_locked_all(host);
+    }
+
+    rrdhost_unlock(host);
     rrdpush_unlock(host);
 }
 
@@ -237,14 +248,14 @@ void *rrdpush_sender_thread(void *ptr) {
     if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
         error("STREAM %s [send]: cannot set pthread cancel state to ENABLE.", host->hostname);
 
-    int timeout = (int)config_get_number(CONFIG_SECTION_STREAM, "timeout seconds", 60);
-    int default_port = (int)config_get_number(CONFIG_SECTION_STREAM, "default port", 19999);
-    size_t max_size = (size_t)config_get_number(CONFIG_SECTION_STREAM, "buffer size bytes", 1024 * 1024);
-    unsigned int reconnect_delay = (unsigned int)config_get_number(CONFIG_SECTION_STREAM, "reconnect delay seconds", 5);
-    remote_clock_resync_iterations = (unsigned int)config_get_number(CONFIG_SECTION_STREAM, "initial clock resync iterations", remote_clock_resync_iterations);
+    int timeout = (int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "timeout seconds", 60);
+    int default_port = (int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "default port", 19999);
+    size_t max_size = (size_t)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "buffer size bytes", 1024 * 1024);
+    unsigned int reconnect_delay = (unsigned int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "reconnect delay seconds", 5);
+    remote_clock_resync_iterations = (unsigned int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "initial clock resync iterations", remote_clock_resync_iterations);
     char connected_to[CONNECTED_TO_SIZE + 1] = "";
 
-    if(!host->rrdpush_enabled || !rrdpush_destination || !*rrdpush_destination || !rrdpush_api_key || !*rrdpush_api_key)
+    if(!host->rrdpush_enabled || !host->rrdpush_destination || !*host->rrdpush_destination || !host->rrdpush_api_key || !*host->rrdpush_api_key)
         goto cleanup;
 
     // initialize rrdpush globals
@@ -276,11 +287,11 @@ void *rrdpush_sender_thread(void *ptr) {
             // they will be lost, so there is no point to do it
             host->rrdpush_connected = 0;
 
-            info("STREAM %s [send to %s]: connecting...", host->hostname, rrdpush_destination);
-            host->rrdpush_socket = connect_to_one_of(rrdpush_destination, default_port, &tv, &reconnects_counter, connected_to, CONNECTED_TO_SIZE);
+            info("STREAM %s [send to %s]: connecting...", host->hostname, host->rrdpush_destination);
+            host->rrdpush_socket = connect_to_one_of(host->rrdpush_destination, default_port, &tv, &reconnects_counter, connected_to, CONNECTED_TO_SIZE);
 
             if(unlikely(host->rrdpush_socket == -1)) {
-                error("STREAM %s [send to %s]: failed to connect", host->hostname, rrdpush_destination);
+                error("STREAM %s [send to %s]: failed to connect", host->hostname, host->rrdpush_destination);
                 sleep(reconnect_delay);
                 continue;
             }
@@ -292,10 +303,10 @@ void *rrdpush_sender_thread(void *ptr) {
                     "STREAM key=%s&hostname=%s&machine_guid=%s&os=%s&update_every=%d HTTP/1.1\r\n"
                     "User-Agent: netdata-push-service/%s\r\n"
                     "Accept: */*\r\n\r\n"
-                      , rrdpush_api_key
-                      , localhost->hostname
-                      , localhost->machine_guid
-                      , localhost->os
+                      , host->rrdpush_api_key
+                      , host->hostname
+                      , host->machine_guid
+                      , host->os
                       , default_rrd_update_every
                       , program_version
             );
@@ -318,7 +329,7 @@ void *rrdpush_sender_thread(void *ptr) {
                 continue;
             }
 
-            if(strncmp(http, "STREAM", 6)) {
+            if(strncmp(http, START_STREAMING_PROMPT, strlen(START_STREAMING_PROMPT))) {
                 close(host->rrdpush_socket);
                 host->rrdpush_socket = -1;
                 error("STREAM %s [send to %s]: server is not replying properly.", host->hostname, connected_to);
@@ -378,7 +389,19 @@ void *rrdpush_sender_thread(void *ptr) {
         }
 
         if(ofd->revents & POLLOUT && begin < buffer_strlen(host->rrdpush_buffer)) {
-            rrdpush_sender_thread_lock(host);
+
+            // BEGIN RRDPUSH LOCKED SESSION
+
+            // during this session, data collectors
+            // will not be able to append data to our buffer
+            // but the socket is in non-blocking mode
+            // so, we will not block at send()
+
+            if(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL) != 0)
+                error("STREAM %s [send]: cannot set pthread cancel state to DISABLE.", host->hostname);
+
+            rrdpush_lock(host);
+
             ssize_t ret = send(host->rrdpush_socket, &host->rrdpush_buffer->buffer[begin], buffer_strlen(host->rrdpush_buffer) - begin, MSG_DONTWAIT);
             if(ret == -1) {
                 if(errno != EAGAIN && errno != EINTR) {
@@ -392,11 +415,19 @@ void *rrdpush_sender_thread(void *ptr) {
                 sent_bytes += ret;
                 begin += ret;
                 if(begin == buffer_strlen(host->rrdpush_buffer)) {
+                    // we send it all
+
                     buffer_flush(host->rrdpush_buffer);
                     begin = 0;
                 }
             }
-            rrdpush_sender_thread_unlock(host);
+
+            rrdpush_unlock(host);
+
+            if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
+                error("STREAM %s [send]: cannot set pthread cancel state to ENABLE.", host->hostname);
+
+            // END RRDPUSH LOCKED SESSION
         }
 
         // protection from overflow
@@ -413,7 +444,17 @@ void *rrdpush_sender_thread(void *ptr) {
 cleanup:
     debug(D_WEB_CLIENT, "STREAM %s [send]: sending thread exits.", host->hostname);
 
-    rrdpush_sender_thread_cleanup(host);
+    if(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL) != 0)
+        error("STREAM %s [send]: cannot set pthread cancel state to DISABLE.", host->hostname);
+
+    rrdpush_lock(host);
+    rrdhost_wrlock(host);
+    rrdpush_sender_thread_cleanup_locked_all(host);
+    rrdhost_unlock(host);
+    rrdpush_unlock(host);
+
+    if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0)
+        error("STREAM %s [send]: cannot set pthread cancel state to ENABLE.", host->hostname);
 
     pthread_exit(NULL);
     return NULL;
@@ -428,6 +469,9 @@ int rrdpush_receive(int fd, const char *key, const char *hostname, const char *m
     int history = default_rrd_history_entries;
     RRD_MEMORY_MODE mode = default_rrd_memory_mode;
     int health_enabled = default_health_enabled;
+    int rrdpush_enabled = default_rrdpush_enabled;
+    char *rrdpush_destination = default_rrdpush_destination;
+    char *rrdpush_api_key = default_rrdpush_api_key;
     time_t alarms_delay = 60;
 
     update_every = (int)appconfig_get_number(&stream_config, machine_guid, "update every", update_every);
@@ -446,22 +490,50 @@ int rrdpush_receive(int fd, const char *key, const char *hostname, const char *m
     alarms_delay = appconfig_get_number(&stream_config, key, "default postpone alarms on connect seconds", alarms_delay);
     alarms_delay = appconfig_get_number(&stream_config, machine_guid, "postpone alarms on connect seconds", alarms_delay);
 
+    rrdpush_enabled = appconfig_get_boolean(&stream_config, key, "default proxy enabled", rrdpush_enabled);
+    rrdpush_enabled = appconfig_get_boolean(&stream_config, machine_guid, "proxy enabled", rrdpush_enabled);
+
+    rrdpush_destination = appconfig_get(&stream_config, key, "default proxy destination", rrdpush_destination);
+    rrdpush_destination = appconfig_get(&stream_config, machine_guid, "proxy destination", rrdpush_destination);
+
+    rrdpush_api_key = appconfig_get(&stream_config, key, "default proxy api key", rrdpush_api_key);
+    rrdpush_api_key = appconfig_get(&stream_config, machine_guid, "proxy api key", rrdpush_api_key);
+
     if(!strcmp(machine_guid, "localhost"))
         host = localhost;
     else
-        host = rrdhost_find_or_create(hostname, machine_guid, os, update_every, history, mode, health_enabled?1:0);
+        host = rrdhost_find_or_create(
+                hostname
+                , machine_guid
+                , os
+                , update_every
+                , history
+                , mode
+                , (health_enabled != CONFIG_BOOLEAN_NO)
+                , (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key)
+                , rrdpush_destination
+                , rrdpush_api_key
+        );
 
-    info("STREAM %s [receive from [%s]:%s]: metrics for host '%s' with machine_guid '%s': update every = %d, history = %d, memory mode = %s, health %s"
-         , host->hostname
+    if(!host) {
+        close(fd);
+        error("STREAM %s [receive from [%s]:%s]: failed to find/create host structure.", hostname, client_ip, client_port);
+        return 1;
+    }
+
+#ifdef NETDATA_INTERNAL_CHECKS
+    info("STREAM %s [receive from [%s]:%s]: client willing to stream metrics for host '%s' with machine_guid '%s': update every = %d, history = %d, memory mode = %s, health %s"
+         , hostname
          , client_ip
          , client_port
-         , hostname
-         , machine_guid
-         , update_every
-         , history
-         , rrd_memory_mode_name(mode)
+         , host->hostname
+         , host->machine_guid
+         , host->rrd_update_every
+         , host->rrd_history_entries
+         , rrd_memory_mode_name(host->rrd_memory_mode)
          , (health_enabled == CONFIG_BOOLEAN_NO)?"disabled":((health_enabled == CONFIG_BOOLEAN_YES)?"enabled":"auto")
     );
+#endif // NETDATA_INTERNAL_CHECKS
 
     struct plugind cd = {
             .enabled = 1,
@@ -481,8 +553,9 @@ int rrdpush_receive(int fd, const char *key, const char *hostname, const char *m
     snprintfz(cd.cmd,          PLUGINSD_CMD_MAX, "%s:%s", client_ip, client_port);
 
     info("STREAM %s [receive from [%s]:%s]: initializing communication...", host->hostname, client_ip, client_port);
-    if(send_timeout(fd, "STREAM", 6, 0, 60) != 6) {
-        error("STREAM %s [receive from [%s]:%s]: cannot send STREAM command.", host->hostname, client_ip, client_port);
+    if(send_timeout(fd, START_STREAMING_PROMPT, strlen(START_STREAMING_PROMPT), 0, 60) != strlen(START_STREAMING_PROMPT)) {
+        error("STREAM %s [receive from [%s]:%s]: cannot send ready command.", host->hostname, client_ip, client_port);
+        close(fd);
         return 0;
     }
 
@@ -494,26 +567,33 @@ int rrdpush_receive(int fd, const char *key, const char *hostname, const char *m
     FILE *fp = fdopen(fd, "r");
     if(!fp) {
         error("STREAM %s [receive from [%s]:%s]: failed to get a FILE for FD %d.", host->hostname, client_ip, client_port, fd);
+        close(fd);
         return 0;
     }
 
     rrdhost_wrlock(host);
-    host->use_counter++;
+    host->connected_senders++;
     if(health_enabled != CONFIG_BOOLEAN_NO)
         host->health_delay_up_to = now_realtime_sec() + alarms_delay;
     rrdhost_unlock(host);
 
     // call the plugins.d processor to receive the metrics
-    info("STREAM %s [receive from [%s]:%s]: receiving metrics... (host '%s', machine GUID '%s').", host->hostname, client_ip, client_port, host->hostname, host->machine_guid);
+    info("STREAM %s [receive from [%s]:%s]: receiving metrics...", host->hostname, client_ip, client_port);
     size_t count = pluginsd_process(host, &cd, fp, 1);
-    error("STREAM %s [receive from [%s]:%s]: disconnected (host '%s', machine GUID '%s', completed updates %zu).", host->hostname, client_ip, client_port, host->hostname, host->machine_guid, count);
+    error("STREAM %s [receive from [%s]:%s]: disconnected (completed updates %zu).", host->hostname, client_ip, client_port, count);
 
     rrdhost_wrlock(host);
-    host->use_counter--;
-    if(!host->use_counter && health_enabled == CONFIG_BOOLEAN_AUTO)
-        host->health_enabled = 0;
+    host->connected_senders--;
+    if(!host->connected_senders) {
+        if(health_enabled == CONFIG_BOOLEAN_AUTO)
+            host->health_enabled = 0;
+
+        host->senders_disconnected_time = now_realtime_sec();
+    }
     rrdhost_unlock(host);
 
+    rrdpush_sender_thread_stop(host);
+
     // cleanup
     fclose(fp);
 
@@ -545,7 +625,6 @@ void *rrdpush_receiver_thread(void *ptr) {
     rrdpush_receive(rpt->fd, rpt->key, rpt->hostname, rpt->machine_guid, rpt->os, rpt->update_every, rpt->client_ip, rpt->client_port);
     info("STREAM %s [receive from [%s]:%s]: receive thread ended (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid());
 
-    close(rpt->fd);
     freez(rpt->key);
     freez(rpt->hostname);
     freez(rpt->machine_guid);
@@ -558,18 +637,21 @@ void *rrdpush_receiver_thread(void *ptr) {
     return NULL;
 }
 
-static inline int rrdpush_receive_validate_api_key(const char *key) {
-    return appconfig_get_boolean(&stream_config, key, "enabled", 0);
-}
-
 void rrdpush_sender_thread_spawn(RRDHOST *host) {
-    if(pthread_create(&host->rrdpush_thread, NULL, rrdpush_sender_thread, (void *)host))
-        error("STREAM %s [send]: failed to create new thread for client.", host->hostname);
+    rrdhost_wrlock(host);
+
+    if(!host->rrdpush_spawn) {
+        if(pthread_create(&host->rrdpush_thread, NULL, rrdpush_sender_thread, (void *) host))
+            error("STREAM %s [send]: failed to create new thread for client.", host->hostname);
 
-    else if(pthread_detach(host->rrdpush_thread))
-        error("STREAM %s [send]: cannot request detach newly created thread.", host->hostname);
+        else if(pthread_detach(host->rrdpush_thread))
+            error("STREAM %s [send]: cannot request detach newly created thread.", host->hostname);
+
+        rrdhost_flag_clear(host, RRDHOST_ORPHAN);
+        host->rrdpush_spawn = 1;
+    }
 
-    host->rrdpush_spawn = 1;
+    rrdhost_unlock(host);
 }
 
 int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url) {
@@ -577,8 +659,9 @@ int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url
 
     info("STREAM [receive from [%s]:%s]: new client connection.", w->client_ip, w->client_port);
 
-    char *key = NULL, *hostname = NULL, *machine_guid = NULL, *os = NULL;
+    char *key = NULL, *hostname = NULL, *machine_guid = NULL, *os = "unknown";
     int update_every = default_rrd_update_every;
+    char buf[GUID_LEN + 1];
 
     while(url) {
         char *value = mystrsep(&url, "?&");
@@ -621,8 +704,22 @@ int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url
         return 400;
     }
 
-    if(!rrdpush_receive_validate_api_key(key)) {
-        error("STREAM [receive from [%s]:%s]: API key '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, key);
+    if(regenerate_guid(key, buf) == -1) {
+        error("STREAM [receive from [%s]:%s]: API key '%s' is not valid GUID. Forbidding access.", w->client_ip, w->client_port, key);
+        buffer_flush(w->response.data);
+        buffer_sprintf(w->response.data, "Your API key is invalid.");
+        return 401;
+    }
+
+    if(regenerate_guid(machine_guid, buf) == -1) {
+        error("STREAM [receive from [%s]:%s]: machine GUID '%s' is not GUID. Forbidding access.", w->client_ip, w->client_port, key);
+        buffer_flush(w->response.data);
+        buffer_sprintf(w->response.data, "Your machine GUID is invalid.");
+        return 404;
+    }
+
+    if(!appconfig_get_boolean(&stream_config, key, "enabled", 0)) {
+        error("STREAM [receive from [%s]:%s]: API key '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, machine_guid);
         buffer_flush(w->response.data);
         buffer_sprintf(w->response.data, "Your API key is not permitted access.");
         return 401;