]> arthur.barton.de Git - netdata.git/blob - src/rrdhost.c
de342bd3ef29bf472e3b8e640e301f8351c00250
[netdata.git] / src / rrdhost.c
1 #define NETDATA_RRD_INTERNALS 1
2 #include "common.h"
3
4 RRDHOST *localhost = NULL;
5 size_t rrd_hosts_available = 0;
6 pthread_rwlock_t rrd_rwlock = PTHREAD_RWLOCK_INITIALIZER;
7
8 time_t rrdhost_free_orphan_time = 3600;
9
10 // ----------------------------------------------------------------------------
11 // RRDHOST index
12
13 int rrdhost_compare(void* a, void* b) {
14     if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
15     else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
16     else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
17 }
18
19 avl_tree_lock rrdhost_root_index = {
20         .avl_tree = { NULL, rrdhost_compare },
21         .rwlock = AVL_LOCK_INITIALIZER
22 };
23
24 RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
25     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
26
27     RRDHOST tmp;
28     strncpyz(tmp.machine_guid, guid, GUID_LEN);
29     tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
30
31     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
32 }
33
34 RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
35     if(unlikely(!strcmp(hostname, "localhost")))
36         return localhost;
37
38     if(unlikely(!hash)) hash = simple_hash(hostname);
39
40     rrd_rdlock();
41     RRDHOST *host;
42     rrdhost_foreach_read(host) {
43         if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
44             rrd_unlock();
45             return host;
46         }
47     }
48     rrd_unlock();
49
50     return NULL;
51 }
52
53 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
54 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
55
56
57 // ----------------------------------------------------------------------------
58 // RRDHOST - internal helpers
59
60 static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
61     freez(host->hostname);
62     host->hostname = strdupz(hostname);
63     host->hash_hostname = simple_hash(host->hostname);
64 }
65
66 static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
67     freez(host->os);
68     host->os = strdupz(os?os:"unknown");
69 }
70
71 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
72     strncpy(host->machine_guid, machine_guid, GUID_LEN);
73     host->machine_guid[GUID_LEN] = '\0';
74     host->hash_machine_guid = simple_hash(host->machine_guid);
75 }
76
77
78 // ----------------------------------------------------------------------------
79 // RRDHOST - add a host
80
81 RRDHOST *rrdhost_create(const char *hostname,
82         const char *guid,
83         const char *os,
84         int update_every,
85         int entries,
86         RRD_MEMORY_MODE memory_mode,
87         int health_enabled,
88         int rrdpush_enabled,
89         char *rrdpush_destination,
90         char *rrdpush_api_key,
91         int is_localhost
92 ) {
93
94     debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
95
96     RRDHOST *host = callocz(1, sizeof(RRDHOST));
97
98     host->rrd_update_every    = update_every;
99     host->rrd_history_entries = entries;
100     host->rrd_memory_mode     = memory_mode;
101     host->health_enabled      = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
102     host->rrdpush_enabled     = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key);
103     host->rrdpush_destination = (host->rrdpush_enabled)?strdupz(rrdpush_destination):NULL;
104     host->rrdpush_api_key     = (host->rrdpush_enabled)?strdupz(rrdpush_api_key):NULL;
105
106     host->rrdpush_pipe[0] = -1;
107     host->rrdpush_pipe[1] = -1;
108     host->rrdpush_socket  = -1;
109
110     pthread_mutex_init(&host->rrdpush_mutex, NULL);
111     pthread_rwlock_init(&host->rrdhost_rwlock, NULL);
112
113     rrdhost_init_hostname(host, hostname);
114     rrdhost_init_machine_guid(host, guid);
115     rrdhost_init_os(host, os);
116
117     avl_init_lock(&(host->rrdset_root_index),      rrdset_compare);
118     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
119     avl_init_lock(&(host->rrdfamily_root_index),   rrdfamily_compare);
120     avl_init_lock(&(host->variables_root_index),   rrdvar_compare);
121
122     // ------------------------------------------------------------------------
123     // initialize health variables
124
125     host->health_log.next_log_id = 1;
126     host->health_log.next_alarm_id = 1;
127     host->health_log.max = 1000;
128     host->health_log.next_log_id =
129     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
130
131     long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
132     if(n < 10) {
133         error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
134         config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
135     }
136     else
137         host->health_log.max = (unsigned int)n;
138
139     pthread_rwlock_init(&(host->health_log.alarm_log_rwlock), NULL);
140
141     char filename[FILENAME_MAX + 1];
142
143     if(is_localhost) {
144
145         host->cache_dir  = strdupz(netdata_configured_cache_dir);
146         host->varlib_dir = strdupz(netdata_configured_varlib_dir);
147
148     }
149     else {
150         // this is not localhost - append our GUID to localhost path
151
152         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
153         host->cache_dir = strdupz(filename);
154
155         if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
156             int r = mkdir(host->cache_dir, 0775);
157             if(r != 0 && errno != EEXIST)
158                 error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
159         }
160
161         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
162         host->varlib_dir = strdupz(filename);
163
164         if(host->health_enabled) {
165             int r = mkdir(host->varlib_dir, 0775);
166             if(r != 0 && errno != EEXIST)
167                 error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
168        }
169
170     }
171
172     if(host->health_enabled) {
173         snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
174         int r = mkdir(filename, 0775);
175         if(r != 0 && errno != EEXIST)
176             error("Host '%s': cannot create directory '%s'", host->hostname, filename);
177     }
178
179     snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
180     host->health_log_filename = strdupz(filename);
181
182     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
183     host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
184     host->health_default_recipient = strdup("root");
185
186
187     // ------------------------------------------------------------------------
188     // load health configuration
189
190     if(host->health_enabled) {
191         health_alarm_log_load(host);
192         health_alarm_log_open(host);
193
194         rrdhost_wrlock(host);
195         health_readdir(host, health_config_dir());
196         rrdhost_unlock(host);
197     }
198
199
200     // ------------------------------------------------------------------------
201     // link it and add it to the index
202
203     rrd_wrlock();
204
205     if(is_localhost) {
206         host->next = localhost;
207         localhost = host;
208     }
209     else {
210         if(localhost) {
211             host->next = localhost->next;
212             localhost->next = host;
213         }
214         else localhost = host;
215     }
216
217     RRDHOST *t = rrdhost_index_add(host);
218
219     if(t != host) {
220         error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
221         rrdhost_free(host);
222         host = NULL;
223     }
224     else {
225         info("Host '%s' with guid '%s' initialized"
226                      ", os %s"
227                      ", update every %d"
228                      ", memory mode %s"
229                      ", history entries %d"
230                      ", streaming %s"
231                      " (to '%s' with api key '%s')"
232                      ", health %s"
233                      ", cache_dir '%s'"
234                      ", varlib_dir '%s'"
235                      ", health_log '%s'"
236                      ", alarms default handler '%s'"
237                      ", alarms default recipient '%s'"
238              , host->hostname
239              , host->machine_guid
240              , host->os
241              , host->rrd_update_every
242              , rrd_memory_mode_name(host->rrd_memory_mode)
243              , host->rrd_history_entries
244              , host->rrdpush_enabled?"enabled":"disabled"
245              , host->rrdpush_destination?host->rrdpush_destination:""
246              , host->rrdpush_api_key?host->rrdpush_api_key:""
247              , host->health_enabled?"enabled":"disabled"
248              , host->cache_dir
249              , host->varlib_dir
250              , host->health_log_filename
251              , host->health_default_exec
252              , host->health_default_recipient
253         );
254     }
255
256     rrd_hosts_available++;
257     rrd_unlock();
258
259     return host;
260 }
261
262 RRDHOST *rrdhost_find_or_create(
263           const char *hostname
264         , const char *guid
265         , const char *os
266         , int update_every
267         , int history
268         , RRD_MEMORY_MODE mode
269         , int health_enabled
270         , int rrdpush_enabled
271         , char *rrdpush_destination
272         , char *rrdpush_api_key
273 ) {
274     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
275
276     RRDHOST *host = rrdhost_find_by_guid(guid, 0);
277     if(!host) {
278         host = rrdhost_create(
279                 hostname
280                 , guid
281                 , os
282                 , update_every
283                 , history
284                 , mode
285                 , health_enabled
286                 , rrdpush_enabled
287                 , rrdpush_destination
288                 , rrdpush_api_key
289                 , 0
290         );
291     }
292     else {
293         host->health_enabled = health_enabled;
294
295         if(strcmp(host->hostname, hostname)) {
296             char *t = host->hostname;
297             char *n = strdupz(hostname);
298             host->hostname = n;
299             freez(t);
300         }
301
302         if(host->rrd_update_every != update_every)
303             error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds.", host->hostname, host->rrd_update_every, update_every);
304
305         if(host->rrd_history_entries != history)
306             error("Host '%s' has history of %d entries, but the wanted one is %d entries.", host->hostname, host->rrd_history_entries, history);
307
308         if(host->rrd_memory_mode != mode)
309             error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
310     }
311
312     rrdhost_cleanup_remote_stale(host);
313
314     return host;
315 }
316
317 void rrdhost_cleanup_remote_stale(RRDHOST *protected) {
318     rrd_wrlock();
319
320     RRDHOST *h;
321     rrdhost_foreach_write(h) {
322         if(h != protected
323            && h != localhost
324            && !h->connected_senders
325            && h->senders_disconnected_time + rrdhost_free_orphan_time > now_realtime_sec()) {
326             info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", h->hostname, h->machine_guid);
327             rrdhost_save(h);
328             rrdhost_free(h);
329             break;
330         }
331     }
332
333     rrd_unlock();
334 }
335
336 // ----------------------------------------------------------------------------
337 // RRDHOST global / startup initialization
338
339 void rrd_init(char *hostname) {
340     health_init();
341     registry_init();
342     rrdpush_init();
343
344     debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
345     localhost = rrdhost_create(
346             hostname
347             , registry_get_this_machine_guid()
348             , os_type
349             , default_rrd_update_every
350             , default_rrd_history_entries
351             , default_rrd_memory_mode
352             , default_health_enabled
353             , default_rrdpush_enabled
354             , default_rrdpush_destination
355             , default_rrdpush_api_key
356             , 1
357     );
358 }
359
360 // ----------------------------------------------------------------------------
361 // RRDHOST - lock validations
362 // there are only used when NETDATA_INTERNAL_CHECKS is set
363
364 void rrdhost_check_rdlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
365     debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
366
367     int ret = pthread_rwlock_trywrlock(&host->rrdhost_rwlock);
368     if(ret == 0)
369         fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
370 }
371
372 void rrdhost_check_wrlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
373     debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
374
375     int ret = pthread_rwlock_tryrdlock(&host->rrdhost_rwlock);
376     if(ret == 0)
377         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
378 }
379
380 void rrd_check_rdlock_int(const char *file, const char *function, const unsigned long line) {
381     debug(D_RRDHOST, "Checking read lock on all RRDs");
382
383     int ret = pthread_rwlock_trywrlock(&rrd_rwlock);
384     if(ret == 0)
385         fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
386 }
387
388 void rrd_check_wrlock_int(const char *file, const char *function, const unsigned long line) {
389     debug(D_RRDHOST, "Checking write lock on all RRDs");
390
391     int ret = pthread_rwlock_tryrdlock(&rrd_rwlock);
392     if(ret == 0)
393         fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
394 }
395
396 // ----------------------------------------------------------------------------
397 // RRDHOST - free
398
399 void rrdhost_free(RRDHOST *host) {
400     if(!host) return;
401
402     info("Freeing all memory for host '%s'...", host->hostname);
403
404     rrd_check_wrlock();     // make sure the RRDs are write locked
405     rrdhost_wrlock(host);   // lock this RRDHOST
406
407     // ------------------------------------------------------------------------
408     // release its children resources
409
410     while(host->rrdset_root) rrdset_free(host->rrdset_root);
411
412     while(host->alarms) rrdcalc_free(host, host->alarms);
413     while(host->templates) rrdcalctemplate_free(host, host->templates);
414     health_alarm_log_free(host);
415
416
417     // ------------------------------------------------------------------------
418     // remove it from the indexes
419
420     if(rrdhost_index_del(host) != host)
421         error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
422
423
424     // ------------------------------------------------------------------------
425     // unlink it from the host
426
427     if(host == localhost) {
428         localhost = host->next;
429     }
430     else {
431         // find the previous one
432         RRDHOST *h;
433         for(h = localhost; h && h->next != host ; h = h->next) ;
434
435         // bypass it
436         if(h) h->next = host->next;
437         else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
438     }
439
440     // ------------------------------------------------------------------------
441     // free it
442
443     rrdpush_sender_thread_stop(host);
444
445     freez(host->os);
446     freez(host->cache_dir);
447     freez(host->varlib_dir);
448     freez(host->rrdpush_api_key);
449     freez(host->rrdpush_destination);
450     freez(host->health_default_exec);
451     freez(host->health_default_recipient);
452     freez(host->health_log_filename);
453     freez(host->hostname);
454     rrdhost_unlock(host);
455     freez(host);
456
457     rrd_hosts_available--;
458 }
459
460 void rrdhost_free_all(void) {
461     rrd_wrlock();
462     while(localhost) rrdhost_free(localhost);
463     rrd_unlock();
464 }
465
466 // ----------------------------------------------------------------------------
467 // RRDHOST - save
468
469 void rrdhost_save(RRDHOST *host) {
470     if(!host) return;
471
472     info("Saving database of host '%s'...", host->hostname);
473
474     RRDSET *st;
475     RRDDIM *rd;
476
477     // we get a write lock
478     // to ensure only one thread is saving the database
479     rrdhost_wrlock(host);
480
481     rrdset_foreach_write(st, host) {
482         rrdset_rdlock(st);
483
484         if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
485             debug(D_RRD_STATS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename);
486             savememory(st->cache_filename, st, st->memsize);
487         }
488
489         rrddim_foreach_read(rd, st) {
490             if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE)) {
491                 debug(D_RRD_STATS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename);
492                 savememory(rd->cache_filename, rd, rd->memsize);
493             }
494         }
495
496         rrdset_unlock(st);
497     }
498
499     rrdhost_unlock(host);
500 }
501
502 void rrdhost_save_all(void) {
503     info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
504
505     rrd_rdlock();
506
507     RRDHOST *host;
508     rrdhost_foreach_read(host)
509         rrdhost_save(host);
510
511     rrd_unlock();
512 }