]> arthur.barton.de Git - netdata.git/blob - src/rrdhost.c
Merge branch 'master' into ab-debian
[netdata.git] / src / rrdhost.c
1 #define NETDATA_RRD_INTERNALS 1
2 #include "common.h"
3
4 RRDHOST *localhost = NULL;
5 size_t rrd_hosts_available = 0;
6 netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
7
8 time_t rrdset_free_obsolete_time = 3600;
9 time_t rrdhost_free_orphan_time = 3600;
10
11 // ----------------------------------------------------------------------------
12 // RRDHOST index
13
14 int rrdhost_compare(void* a, void* b) {
15     if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
16     else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
17     else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
18 }
19
20 avl_tree_lock rrdhost_root_index = {
21         .avl_tree = { NULL, rrdhost_compare },
22         .rwlock = AVL_LOCK_INITIALIZER
23 };
24
25 RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
26     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
27
28     RRDHOST tmp;
29     strncpyz(tmp.machine_guid, guid, GUID_LEN);
30     tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
31
32     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
33 }
34
35 RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
36     if(unlikely(!strcmp(hostname, "localhost")))
37         return localhost;
38
39     if(unlikely(!hash)) hash = simple_hash(hostname);
40
41     rrd_rdlock();
42     RRDHOST *host;
43     rrdhost_foreach_read(host) {
44         if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
45             rrd_unlock();
46             return host;
47         }
48     }
49     rrd_unlock();
50
51     return NULL;
52 }
53
54 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
55 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
56
57
58 // ----------------------------------------------------------------------------
59 // RRDHOST - internal helpers
60
61 static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
62     freez(host->hostname);
63     host->hostname = strdupz(hostname);
64     host->hash_hostname = simple_hash(host->hostname);
65 }
66
67 static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
68     freez(host->os);
69     host->os = strdupz(os?os:"unknown");
70 }
71
72 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
73     strncpy(host->machine_guid, machine_guid, GUID_LEN);
74     host->machine_guid[GUID_LEN] = '\0';
75     host->hash_machine_guid = simple_hash(host->machine_guid);
76 }
77
78
79 // ----------------------------------------------------------------------------
80 // RRDHOST - add a host
81
82 RRDHOST *rrdhost_create(const char *hostname,
83         const char *guid,
84         const char *os,
85         int update_every,
86         long entries,
87         RRD_MEMORY_MODE memory_mode,
88         int health_enabled,
89         int rrdpush_enabled,
90         char *rrdpush_destination,
91         char *rrdpush_api_key,
92         int is_localhost
93 ) {
94     debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
95
96     rrd_check_wrlock();
97
98     RRDHOST *host = callocz(1, sizeof(RRDHOST));
99
100     host->rrd_update_every    = update_every;
101     host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries);
102     host->rrd_memory_mode     = memory_mode;
103     host->health_enabled      = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
104     host->rrdpush_enabled     = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key);
105     host->rrdpush_destination = (host->rrdpush_enabled)?strdupz(rrdpush_destination):NULL;
106     host->rrdpush_api_key     = (host->rrdpush_enabled)?strdupz(rrdpush_api_key):NULL;
107
108     host->rrdpush_pipe[0] = -1;
109     host->rrdpush_pipe[1] = -1;
110     host->rrdpush_socket  = -1;
111
112     netdata_mutex_init(&host->rrdpush_mutex);
113     netdata_rwlock_init(&host->rrdhost_rwlock);
114
115     rrdhost_init_hostname(host, hostname);
116     rrdhost_init_machine_guid(host, guid);
117     rrdhost_init_os(host, os);
118
119     avl_init_lock(&(host->rrdset_root_index),      rrdset_compare);
120     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
121     avl_init_lock(&(host->rrdfamily_root_index),   rrdfamily_compare);
122     avl_init_lock(&(host->variables_root_index),   rrdvar_compare);
123
124     if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1))
125         rrdhost_flag_set(host, RRDHOST_DELETE_OBSOLETE_FILES);
126
127     if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost)
128         rrdhost_flag_set(host, RRDHOST_DELETE_ORPHAN_FILES);
129
130
131     // ------------------------------------------------------------------------
132     // initialize health variables
133
134     host->health_log.next_log_id = 1;
135     host->health_log.next_alarm_id = 1;
136     host->health_log.max = 1000;
137     host->health_log.next_log_id =
138     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
139
140     long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
141     if(n < 10) {
142         error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
143         config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
144     }
145     else
146         host->health_log.max = (unsigned int)n;
147
148     netdata_rwlock_init(&host->health_log.alarm_log_rwlock);
149
150     char filename[FILENAME_MAX + 1];
151
152     if(is_localhost) {
153
154         host->cache_dir  = strdupz(netdata_configured_cache_dir);
155         host->varlib_dir = strdupz(netdata_configured_varlib_dir);
156
157     }
158     else {
159         // this is not localhost - append our GUID to localhost path
160
161         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
162         host->cache_dir = strdupz(filename);
163
164         if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
165             int r = mkdir(host->cache_dir, 0775);
166             if(r != 0 && errno != EEXIST)
167                 error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
168         }
169
170         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
171         host->varlib_dir = strdupz(filename);
172
173         if(host->health_enabled) {
174             int r = mkdir(host->varlib_dir, 0775);
175             if(r != 0 && errno != EEXIST)
176                 error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
177        }
178
179     }
180
181     if(host->health_enabled) {
182         snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
183         int r = mkdir(filename, 0775);
184         if(r != 0 && errno != EEXIST)
185             error("Host '%s': cannot create directory '%s'", host->hostname, filename);
186     }
187
188     snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
189     host->health_log_filename = strdupz(filename);
190
191     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
192     host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
193     host->health_default_recipient = strdup("root");
194
195
196     // ------------------------------------------------------------------------
197     // load health configuration
198
199     if(host->health_enabled) {
200         health_alarm_log_load(host);
201         health_alarm_log_open(host);
202
203         rrdhost_wrlock(host);
204         health_readdir(host, health_config_dir());
205         rrdhost_unlock(host);
206     }
207
208
209     // ------------------------------------------------------------------------
210     // link it and add it to the index
211
212     if(is_localhost) {
213         host->next = localhost;
214         localhost = host;
215     }
216     else {
217         if(localhost) {
218             host->next = localhost->next;
219             localhost->next = host;
220         }
221         else localhost = host;
222     }
223
224     RRDHOST *t = rrdhost_index_add(host);
225
226     if(t != host) {
227         error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
228         rrdhost_free(host);
229         host = NULL;
230     }
231     else {
232         info("Host '%s' with guid '%s' initialized"
233                      ", os %s"
234                      ", update every %d"
235                      ", memory mode %s"
236                      ", history entries %ld"
237                      ", streaming %s"
238                      " (to '%s' with api key '%s')"
239                      ", health %s"
240                      ", cache_dir '%s'"
241                      ", varlib_dir '%s'"
242                      ", health_log '%s'"
243                      ", alarms default handler '%s'"
244                      ", alarms default recipient '%s'"
245              , host->hostname
246              , host->machine_guid
247              , host->os
248              , host->rrd_update_every
249              , rrd_memory_mode_name(host->rrd_memory_mode)
250              , host->rrd_history_entries
251              , host->rrdpush_enabled?"enabled":"disabled"
252              , host->rrdpush_destination?host->rrdpush_destination:""
253              , host->rrdpush_api_key?host->rrdpush_api_key:""
254              , host->health_enabled?"enabled":"disabled"
255              , host->cache_dir
256              , host->varlib_dir
257              , host->health_log_filename
258              , host->health_default_exec
259              , host->health_default_recipient
260         );
261     }
262
263     rrd_hosts_available++;
264
265     return host;
266 }
267
268 RRDHOST *rrdhost_find_or_create(
269           const char *hostname
270         , const char *guid
271         , const char *os
272         , int update_every
273         , long history
274         , RRD_MEMORY_MODE mode
275         , int health_enabled
276         , int rrdpush_enabled
277         , char *rrdpush_destination
278         , char *rrdpush_api_key
279 ) {
280     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
281
282     rrd_wrlock();
283     RRDHOST *host = rrdhost_find_by_guid(guid, 0);
284     if(!host) {
285         host = rrdhost_create(
286                 hostname
287                 , guid
288                 , os
289                 , update_every
290                 , history
291                 , mode
292                 , health_enabled
293                 , rrdpush_enabled
294                 , rrdpush_destination
295                 , rrdpush_api_key
296                 , 0
297         );
298     }
299     else {
300         host->health_enabled = health_enabled;
301
302         if(strcmp(host->hostname, hostname)) {
303             char *t = host->hostname;
304             host->hostname = strdupz(hostname);
305             host->hash_hostname = simple_hash(host->hostname);
306             freez(t);
307         }
308
309         if(host->rrd_update_every != update_every)
310             error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds.", host->hostname, host->rrd_update_every, update_every);
311
312         if(host->rrd_history_entries != history)
313             error("Host '%s' has history of %ld entries, but the wanted one is %ld entries.", host->hostname, host->rrd_history_entries, history);
314
315         if(host->rrd_memory_mode != mode)
316             error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
317     }
318     rrd_unlock();
319
320     rrdhost_cleanup_orphan(host);
321
322     return host;
323 }
324
325 static inline int rrdhost_should_be_deleted(RRDHOST *host, RRDHOST *protected, time_t now) {
326     if(host != protected
327        && host != localhost
328        && !host->connected_senders
329        && host->senders_disconnected_time
330        && host->senders_disconnected_time + rrdhost_free_orphan_time < now)
331         return 1;
332
333     return 0;
334 }
335
336 void rrdhost_cleanup_orphan(RRDHOST *protected) {
337     time_t now = now_realtime_sec();
338
339     rrd_wrlock();
340
341     RRDHOST *host;
342
343 restart_after_removal:
344     rrdhost_foreach_write(host) {
345         if(rrdhost_should_be_deleted(host, protected, now)) {
346             info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid);
347
348             if(rrdset_flag_check(host, RRDHOST_ORPHAN))
349                 rrdhost_delete(host);
350             else
351                 rrdhost_save(host);
352
353             rrdhost_free(host);
354             goto restart_after_removal;
355         }
356     }
357
358     rrd_unlock();
359 }
360
361 // ----------------------------------------------------------------------------
362 // RRDHOST global / startup initialization
363
364 void rrd_init(char *hostname) {
365     rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time);
366
367     health_init();
368     registry_init();
369     rrdpush_init();
370
371     debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
372     rrd_wrlock();
373     localhost = rrdhost_create(
374             hostname
375             , registry_get_this_machine_guid()
376             , os_type
377             , default_rrd_update_every
378             , default_rrd_history_entries
379             , default_rrd_memory_mode
380             , default_health_enabled
381             , default_rrdpush_enabled
382             , default_rrdpush_destination
383             , default_rrdpush_api_key
384             , 1
385     );
386     rrd_unlock();
387 }
388
389 // ----------------------------------------------------------------------------
390 // RRDHOST - lock validations
391 // there are only used when NETDATA_INTERNAL_CHECKS is set
392
393 void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
394     debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
395
396     int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock);
397     if(ret == 0)
398         fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
399 }
400
401 void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
402     debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
403
404     int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock);
405     if(ret == 0)
406         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
407 }
408
409 void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) {
410     debug(D_RRDHOST, "Checking read lock on all RRDs");
411
412     int ret = netdata_rwlock_trywrlock(&rrd_rwlock);
413     if(ret == 0)
414         fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
415 }
416
417 void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) {
418     debug(D_RRDHOST, "Checking write lock on all RRDs");
419
420     int ret = netdata_rwlock_tryrdlock(&rrd_rwlock);
421     if(ret == 0)
422         fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
423 }
424
425 // ----------------------------------------------------------------------------
426 // RRDHOST - free
427
428 void rrdhost_free(RRDHOST *host) {
429     if(!host) return;
430
431     info("Freeing all memory for host '%s'...", host->hostname);
432
433     rrd_check_wrlock();     // make sure the RRDs are write locked
434
435     // stop a possibly running thread
436     rrdpush_sender_thread_stop(host);
437
438     rrdhost_wrlock(host);   // lock this RRDHOST
439
440     // ------------------------------------------------------------------------
441     // release its children resources
442
443     while(host->rrdset_root) rrdset_free(host->rrdset_root);
444
445     while(host->alarms) rrdcalc_free(host, host->alarms);
446     while(host->templates) rrdcalctemplate_free(host, host->templates);
447     health_alarm_log_free(host);
448
449
450     // ------------------------------------------------------------------------
451     // remove it from the indexes
452
453     if(rrdhost_index_del(host) != host)
454         error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
455
456
457     // ------------------------------------------------------------------------
458     // unlink it from the host
459
460     if(host == localhost) {
461         localhost = host->next;
462     }
463     else {
464         // find the previous one
465         RRDHOST *h;
466         for(h = localhost; h && h->next != host ; h = h->next) ;
467
468         // bypass it
469         if(h) h->next = host->next;
470         else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
471     }
472
473     // ------------------------------------------------------------------------
474     // free it
475
476     freez(host->os);
477     freez(host->cache_dir);
478     freez(host->varlib_dir);
479     freez(host->rrdpush_api_key);
480     freez(host->rrdpush_destination);
481     freez(host->health_default_exec);
482     freez(host->health_default_recipient);
483     freez(host->health_log_filename);
484     freez(host->hostname);
485     rrdhost_unlock(host);
486     netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
487     netdata_rwlock_destroy(&host->rrdhost_rwlock);
488     freez(host);
489
490     rrd_hosts_available--;
491 }
492
493 void rrdhost_free_all(void) {
494     rrd_wrlock();
495     while(localhost) rrdhost_free(localhost);
496     rrd_unlock();
497 }
498
499 // ----------------------------------------------------------------------------
500 // RRDHOST - save
501
502 void rrdhost_save(RRDHOST *host) {
503     if(!host) return;
504
505     info("Saving database of host '%s'...", host->hostname);
506
507     RRDSET *st;
508
509     // we get a write lock
510     // to ensure only one thread is saving the database
511     rrdhost_wrlock(host);
512
513     rrdset_foreach_write(st, host) {
514         rrdset_rdlock(st);
515         rrdset_save(st);
516         rrdset_unlock(st);
517     }
518
519     rrdhost_unlock(host);
520 }
521
522 // ----------------------------------------------------------------------------
523 // RRDHOST - delete files
524
525 void rrdhost_delete(RRDHOST *host) {
526     if(!host) return;
527
528     info("Deleting database of host '%s'...", host->hostname);
529
530     RRDSET *st;
531
532     // we get a write lock
533     // to ensure only one thread is saving the database
534     rrdhost_wrlock(host);
535
536     rrdset_foreach_write(st, host) {
537         rrdset_rdlock(st);
538         rrdset_delete(st);
539         rrdset_unlock(st);
540     }
541
542     rrdhost_unlock(host);
543 }
544
545 void rrdhost_save_all(void) {
546     info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
547
548     rrd_rdlock();
549
550     RRDHOST *host;
551     rrdhost_foreach_read(host)
552         rrdhost_save(host);
553
554     rrd_unlock();
555 }
556
557 void rrdhost_cleanup_obsolete(RRDHOST *host) {
558     time_t now = now_realtime_sec();
559
560     RRDSET *st;
561
562 restart_after_removal:
563     rrdset_foreach_write(st, host) {
564         if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
565                     && st->last_accessed_time + rrdset_free_obsolete_time < now
566                     && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
567                     && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
568         )) {
569
570             rrdset_rdlock(st);
571
572             if(rrdhost_flag_check(host, RRDHOST_DELETE_OBSOLETE_FILES))
573                 rrdset_delete(st);
574             else
575                 rrdset_save(st);
576
577             rrdset_unlock(st);
578
579             rrdset_free(st);
580             goto restart_after_removal;
581         }
582     }
583 }