]> arthur.barton.de Git - netdata.git/blob - src/rrdhost.c
added compile option NETDATA_VERIFY_LOCKS to enable the locks
[netdata.git] / src / rrdhost.c
1 #define NETDATA_RRD_INTERNALS 1
2 #include "common.h"
3
4 RRDHOST *localhost = NULL;
5 size_t rrd_hosts_available = 0;
6 netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
7
8 time_t rrdset_free_obsolete_time = 3600;
9 time_t rrdhost_free_orphan_time = 3600;
10
11 // ----------------------------------------------------------------------------
12 // RRDHOST index
13
14 int rrdhost_compare(void* a, void* b) {
15     if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
16     else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
17     else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
18 }
19
20 avl_tree_lock rrdhost_root_index = {
21         .avl_tree = { NULL, rrdhost_compare },
22         .rwlock = AVL_LOCK_INITIALIZER
23 };
24
25 RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
26     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
27
28     RRDHOST tmp;
29     strncpyz(tmp.machine_guid, guid, GUID_LEN);
30     tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
31
32     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
33 }
34
35 RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
36     if(unlikely(!strcmp(hostname, "localhost")))
37         return localhost;
38
39     if(unlikely(!hash)) hash = simple_hash(hostname);
40
41     rrd_rdlock();
42     RRDHOST *host;
43     rrdhost_foreach_read(host) {
44         if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
45             rrd_unlock();
46             return host;
47         }
48     }
49     rrd_unlock();
50
51     return NULL;
52 }
53
54 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
55 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
56
57
58 // ----------------------------------------------------------------------------
59 // RRDHOST - internal helpers
60
61 static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
62     freez(host->hostname);
63     host->hostname = strdupz(hostname);
64     host->hash_hostname = simple_hash(host->hostname);
65 }
66
67 static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
68     freez(host->os);
69     host->os = strdupz(os?os:"unknown");
70 }
71
72 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
73     strncpy(host->machine_guid, machine_guid, GUID_LEN);
74     host->machine_guid[GUID_LEN] = '\0';
75     host->hash_machine_guid = simple_hash(host->machine_guid);
76 }
77
78
79 // ----------------------------------------------------------------------------
80 // RRDHOST - add a host
81
82 RRDHOST *rrdhost_create(const char *hostname,
83         const char *guid,
84         const char *os,
85         int update_every,
86         int entries,
87         RRD_MEMORY_MODE memory_mode,
88         int health_enabled,
89         int rrdpush_enabled,
90         char *rrdpush_destination,
91         char *rrdpush_api_key,
92         int is_localhost
93 ) {
94
95     debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
96
97     RRDHOST *host = callocz(1, sizeof(RRDHOST));
98
99     host->rrd_update_every    = update_every;
100     host->rrd_history_entries = entries;
101     host->rrd_memory_mode     = memory_mode;
102     host->health_enabled      = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
103     host->rrdpush_enabled     = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key);
104     host->rrdpush_destination = (host->rrdpush_enabled)?strdupz(rrdpush_destination):NULL;
105     host->rrdpush_api_key     = (host->rrdpush_enabled)?strdupz(rrdpush_api_key):NULL;
106
107     host->rrdpush_pipe[0] = -1;
108     host->rrdpush_pipe[1] = -1;
109     host->rrdpush_socket  = -1;
110
111     netdata_mutex_init(&host->rrdpush_mutex);
112     netdata_rwlock_init(&host->rrdhost_rwlock);
113
114     rrdhost_init_hostname(host, hostname);
115     rrdhost_init_machine_guid(host, guid);
116     rrdhost_init_os(host, os);
117
118     avl_init_lock(&(host->rrdset_root_index),      rrdset_compare);
119     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
120     avl_init_lock(&(host->rrdfamily_root_index),   rrdfamily_compare);
121     avl_init_lock(&(host->variables_root_index),   rrdvar_compare);
122
123     if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1))
124         rrdhost_flag_set(host, RRDHOST_DELETE_OBSOLETE_FILES);
125
126     if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost)
127         rrdhost_flag_set(host, RRDHOST_DELETE_ORPHAN_FILES);
128
129
130     // ------------------------------------------------------------------------
131     // initialize health variables
132
133     host->health_log.next_log_id = 1;
134     host->health_log.next_alarm_id = 1;
135     host->health_log.max = 1000;
136     host->health_log.next_log_id =
137     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
138
139     long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
140     if(n < 10) {
141         error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
142         config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
143     }
144     else
145         host->health_log.max = (unsigned int)n;
146
147     netdata_rwlock_init(&host->health_log.alarm_log_rwlock);
148
149     char filename[FILENAME_MAX + 1];
150
151     if(is_localhost) {
152
153         host->cache_dir  = strdupz(netdata_configured_cache_dir);
154         host->varlib_dir = strdupz(netdata_configured_varlib_dir);
155
156     }
157     else {
158         // this is not localhost - append our GUID to localhost path
159
160         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
161         host->cache_dir = strdupz(filename);
162
163         if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
164             int r = mkdir(host->cache_dir, 0775);
165             if(r != 0 && errno != EEXIST)
166                 error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
167         }
168
169         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
170         host->varlib_dir = strdupz(filename);
171
172         if(host->health_enabled) {
173             int r = mkdir(host->varlib_dir, 0775);
174             if(r != 0 && errno != EEXIST)
175                 error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
176        }
177
178     }
179
180     if(host->health_enabled) {
181         snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
182         int r = mkdir(filename, 0775);
183         if(r != 0 && errno != EEXIST)
184             error("Host '%s': cannot create directory '%s'", host->hostname, filename);
185     }
186
187     snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
188     host->health_log_filename = strdupz(filename);
189
190     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
191     host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
192     host->health_default_recipient = strdup("root");
193
194
195     // ------------------------------------------------------------------------
196     // load health configuration
197
198     if(host->health_enabled) {
199         health_alarm_log_load(host);
200         health_alarm_log_open(host);
201
202         rrdhost_wrlock(host);
203         health_readdir(host, health_config_dir());
204         rrdhost_unlock(host);
205     }
206
207
208     // ------------------------------------------------------------------------
209     // link it and add it to the index
210
211     rrd_wrlock();
212
213     if(is_localhost) {
214         host->next = localhost;
215         localhost = host;
216     }
217     else {
218         if(localhost) {
219             host->next = localhost->next;
220             localhost->next = host;
221         }
222         else localhost = host;
223     }
224
225     RRDHOST *t = rrdhost_index_add(host);
226
227     if(t != host) {
228         error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
229         rrdhost_free(host);
230         host = NULL;
231     }
232     else {
233         info("Host '%s' with guid '%s' initialized"
234                      ", os %s"
235                      ", update every %d"
236                      ", memory mode %s"
237                      ", history entries %d"
238                      ", streaming %s"
239                      " (to '%s' with api key '%s')"
240                      ", health %s"
241                      ", cache_dir '%s'"
242                      ", varlib_dir '%s'"
243                      ", health_log '%s'"
244                      ", alarms default handler '%s'"
245                      ", alarms default recipient '%s'"
246              , host->hostname
247              , host->machine_guid
248              , host->os
249              , host->rrd_update_every
250              , rrd_memory_mode_name(host->rrd_memory_mode)
251              , host->rrd_history_entries
252              , host->rrdpush_enabled?"enabled":"disabled"
253              , host->rrdpush_destination?host->rrdpush_destination:""
254              , host->rrdpush_api_key?host->rrdpush_api_key:""
255              , host->health_enabled?"enabled":"disabled"
256              , host->cache_dir
257              , host->varlib_dir
258              , host->health_log_filename
259              , host->health_default_exec
260              , host->health_default_recipient
261         );
262     }
263
264     rrd_hosts_available++;
265     rrd_unlock();
266
267     return host;
268 }
269
270 RRDHOST *rrdhost_find_or_create(
271           const char *hostname
272         , const char *guid
273         , const char *os
274         , int update_every
275         , int history
276         , RRD_MEMORY_MODE mode
277         , int health_enabled
278         , int rrdpush_enabled
279         , char *rrdpush_destination
280         , char *rrdpush_api_key
281 ) {
282     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
283
284     RRDHOST *host = rrdhost_find_by_guid(guid, 0);
285     if(!host) {
286         host = rrdhost_create(
287                 hostname
288                 , guid
289                 , os
290                 , update_every
291                 , history
292                 , mode
293                 , health_enabled
294                 , rrdpush_enabled
295                 , rrdpush_destination
296                 , rrdpush_api_key
297                 , 0
298         );
299     }
300     else {
301         host->health_enabled = health_enabled;
302
303         if(strcmp(host->hostname, hostname)) {
304             char *t = host->hostname;
305             host->hostname = strdupz(hostname);
306             host->hash_hostname = simple_hash(host->hostname);
307             freez(t);
308         }
309
310         if(host->rrd_update_every != update_every)
311             error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds.", host->hostname, host->rrd_update_every, update_every);
312
313         if(host->rrd_history_entries != history)
314             error("Host '%s' has history of %d entries, but the wanted one is %d entries.", host->hostname, host->rrd_history_entries, history);
315
316         if(host->rrd_memory_mode != mode)
317             error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
318     }
319
320     rrdhost_cleanup_orphan(host);
321
322     return host;
323 }
324
325 void rrdhost_cleanup_orphan(RRDHOST *protected) {
326     time_t now = now_realtime_sec();
327
328     rrd_wrlock();
329
330     RRDHOST *host;
331
332 restart_after_removal:
333     rrdhost_foreach_write(host) {
334         if(host != protected
335            && host != localhost
336            && !host->connected_senders
337            && host->senders_disconnected_time + rrdhost_free_orphan_time < now) {
338             info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid);
339
340             if(rrdset_flag_check(host, RRDHOST_ORPHAN))
341                 rrdhost_delete(host);
342             else
343                 rrdhost_save(host);
344
345             rrdhost_free(host);
346             goto restart_after_removal;
347         }
348     }
349
350     rrd_unlock();
351 }
352
353 // ----------------------------------------------------------------------------
354 // RRDHOST global / startup initialization
355
356 void rrd_init(char *hostname) {
357     rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time);
358
359     health_init();
360     registry_init();
361     rrdpush_init();
362
363     debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
364     localhost = rrdhost_create(
365             hostname
366             , registry_get_this_machine_guid()
367             , os_type
368             , default_rrd_update_every
369             , default_rrd_history_entries
370             , default_rrd_memory_mode
371             , default_health_enabled
372             , default_rrdpush_enabled
373             , default_rrdpush_destination
374             , default_rrdpush_api_key
375             , 1
376     );
377 }
378
379 // ----------------------------------------------------------------------------
380 // RRDHOST - lock validations
381 // there are only used when NETDATA_INTERNAL_CHECKS is set
382
383 void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
384     debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
385
386     int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock);
387     if(ret == 0)
388         fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
389 }
390
391 void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
392     debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
393
394     int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock);
395     if(ret == 0)
396         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
397 }
398
399 void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) {
400     debug(D_RRDHOST, "Checking read lock on all RRDs");
401
402     int ret = netdata_rwlock_trywrlock(&rrd_rwlock);
403     if(ret == 0)
404         fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
405 }
406
407 void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) {
408     debug(D_RRDHOST, "Checking write lock on all RRDs");
409
410     int ret = netdata_rwlock_tryrdlock(&rrd_rwlock);
411     if(ret == 0)
412         fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
413 }
414
415 // ----------------------------------------------------------------------------
416 // RRDHOST - free
417
418 void rrdhost_free(RRDHOST *host) {
419     if(!host) return;
420
421     info("Freeing all memory for host '%s'...", host->hostname);
422
423     rrd_check_wrlock();     // make sure the RRDs are write locked
424
425     // stop a possibly running thread
426     rrdpush_sender_thread_stop(host);
427
428     rrdhost_wrlock(host);   // lock this RRDHOST
429
430     // ------------------------------------------------------------------------
431     // release its children resources
432
433     while(host->rrdset_root) rrdset_free(host->rrdset_root);
434
435     while(host->alarms) rrdcalc_free(host, host->alarms);
436     while(host->templates) rrdcalctemplate_free(host, host->templates);
437     health_alarm_log_free(host);
438
439
440     // ------------------------------------------------------------------------
441     // remove it from the indexes
442
443     if(rrdhost_index_del(host) != host)
444         error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
445
446
447     // ------------------------------------------------------------------------
448     // unlink it from the host
449
450     if(host == localhost) {
451         localhost = host->next;
452     }
453     else {
454         // find the previous one
455         RRDHOST *h;
456         for(h = localhost; h && h->next != host ; h = h->next) ;
457
458         // bypass it
459         if(h) h->next = host->next;
460         else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
461     }
462
463     // ------------------------------------------------------------------------
464     // free it
465
466     freez(host->os);
467     freez(host->cache_dir);
468     freez(host->varlib_dir);
469     freez(host->rrdpush_api_key);
470     freez(host->rrdpush_destination);
471     freez(host->health_default_exec);
472     freez(host->health_default_recipient);
473     freez(host->health_log_filename);
474     freez(host->hostname);
475     rrdhost_unlock(host);
476     netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
477     netdata_rwlock_destroy(&host->rrdhost_rwlock);
478     freez(host);
479
480     rrd_hosts_available--;
481 }
482
483 void rrdhost_free_all(void) {
484     rrd_wrlock();
485     while(localhost) rrdhost_free(localhost);
486     rrd_unlock();
487 }
488
489 // ----------------------------------------------------------------------------
490 // RRDHOST - save
491
492 void rrdhost_save(RRDHOST *host) {
493     if(!host) return;
494
495     info("Saving database of host '%s'...", host->hostname);
496
497     RRDSET *st;
498
499     // we get a write lock
500     // to ensure only one thread is saving the database
501     rrdhost_wrlock(host);
502
503     rrdset_foreach_write(st, host) {
504         rrdset_rdlock(st);
505         rrdset_save(st);
506         rrdset_unlock(st);
507     }
508
509     rrdhost_unlock(host);
510 }
511
512 // ----------------------------------------------------------------------------
513 // RRDHOST - delete files
514
515 void rrdhost_delete(RRDHOST *host) {
516     if(!host) return;
517
518     info("Deleting database of host '%s'...", host->hostname);
519
520     RRDSET *st;
521
522     // we get a write lock
523     // to ensure only one thread is saving the database
524     rrdhost_wrlock(host);
525
526     rrdset_foreach_write(st, host) {
527         rrdset_rdlock(st);
528         rrdset_delete(st);
529         rrdset_unlock(st);
530     }
531
532     rrdhost_unlock(host);
533 }
534
535 void rrdhost_save_all(void) {
536     info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
537
538     rrd_rdlock();
539
540     RRDHOST *host;
541     rrdhost_foreach_read(host)
542         rrdhost_save(host);
543
544     rrd_unlock();
545 }
546
547 void rrdhost_cleanup_obsolete(RRDHOST *host) {
548     time_t now = now_realtime_sec();
549
550     RRDSET *st;
551
552 restart_after_removal:
553     rrdset_foreach_write(st, host) {
554         if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
555                     && st->last_accessed_time + rrdset_free_obsolete_time < now
556                     && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
557                     && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
558         )) {
559
560             rrdset_rdlock(st);
561
562             if(rrdhost_flag_check(host, RRDHOST_DELETE_OBSOLETE_FILES))
563                 rrdset_delete(st);
564             else
565                 rrdset_save(st);
566
567             rrdset_unlock(st);
568
569             rrdset_free(st);
570             goto restart_after_removal;
571         }
572     }
573 }