]> arthur.barton.de Git - netdata.git/blob - src/rrdhost.c
Merge pull request #1858 from ktsaou/master
[netdata.git] / src / rrdhost.c
1 #define NETDATA_RRD_INTERNALS 1
2 #include "common.h"
3
4 RRDHOST *localhost = NULL;
5 size_t rrd_hosts_available = 0;
6 pthread_rwlock_t rrd_rwlock = PTHREAD_RWLOCK_INITIALIZER;
7
8 time_t rrdhost_free_orphan_time = 3600;
9
10 // ----------------------------------------------------------------------------
11 // RRDHOST index
12
13 int rrdhost_compare(void* a, void* b) {
14     if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
15     else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
16     else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
17 }
18
19 avl_tree_lock rrdhost_root_index = {
20         .avl_tree = { NULL, rrdhost_compare },
21         .rwlock = AVL_LOCK_INITIALIZER
22 };
23
24 RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
25     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
26
27     RRDHOST tmp;
28     strncpyz(tmp.machine_guid, guid, GUID_LEN);
29     tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
30
31     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
32 }
33
34 RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
35     if(unlikely(!strcmp(hostname, "localhost")))
36         return localhost;
37
38     if(unlikely(!hash)) hash = simple_hash(hostname);
39
40     rrd_rdlock();
41     RRDHOST *host;
42     rrdhost_foreach_read(host) {
43         if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
44             rrd_unlock();
45             return host;
46         }
47     }
48     rrd_unlock();
49
50     return NULL;
51 }
52
53 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
54 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
55
56
57 // ----------------------------------------------------------------------------
58 // RRDHOST - internal helpers
59
60 static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
61     freez(host->hostname);
62     host->hostname = strdupz(hostname);
63     host->hash_hostname = simple_hash(host->hostname);
64 }
65
66 static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
67     freez(host->os);
68     host->os = strdupz(os?os:"unknown");
69 }
70
71 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
72     strncpy(host->machine_guid, machine_guid, GUID_LEN);
73     host->machine_guid[GUID_LEN] = '\0';
74     host->hash_machine_guid = simple_hash(host->machine_guid);
75 }
76
77
78 // ----------------------------------------------------------------------------
79 // RRDHOST - add a host
80
81 RRDHOST *rrdhost_create(const char *hostname,
82         const char *guid,
83         const char *os,
84         int update_every,
85         int entries,
86         RRD_MEMORY_MODE memory_mode,
87         int health_enabled,
88         int rrdpush_enabled,
89         char *rrdpush_destination,
90         char *rrdpush_api_key,
91         int is_localhost
92 ) {
93
94     debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
95
96     RRDHOST *host = callocz(1, sizeof(RRDHOST));
97
98     host->rrd_update_every    = update_every;
99     host->rrd_history_entries = entries;
100     host->rrd_memory_mode     = memory_mode;
101     host->health_enabled      = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
102     host->rrdpush_enabled     = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key);
103     host->rrdpush_destination = (host->rrdpush_enabled)?strdupz(rrdpush_destination):NULL;
104     host->rrdpush_api_key     = (host->rrdpush_enabled)?strdupz(rrdpush_api_key):NULL;
105
106     host->rrdpush_pipe[0] = -1;
107     host->rrdpush_pipe[1] = -1;
108     host->rrdpush_socket  = -1;
109
110     pthread_mutex_init(&host->rrdpush_mutex, NULL);
111     pthread_rwlock_init(&host->rrdhost_rwlock, NULL);
112
113     rrdhost_init_hostname(host, hostname);
114     rrdhost_init_machine_guid(host, guid);
115     rrdhost_init_os(host, os);
116
117     avl_init_lock(&(host->rrdset_root_index),      rrdset_compare);
118     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
119     avl_init_lock(&(host->rrdfamily_root_index),   rrdfamily_compare);
120     avl_init_lock(&(host->variables_root_index),   rrdvar_compare);
121
122     // ------------------------------------------------------------------------
123     // initialize health variables
124
125     host->health_log.next_log_id = 1;
126     host->health_log.next_alarm_id = 1;
127     host->health_log.max = 1000;
128     host->health_log.next_log_id =
129     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
130
131     long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
132     if(n < 10) {
133         error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
134         config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
135     }
136     else
137         host->health_log.max = (unsigned int)n;
138
139     pthread_rwlock_init(&(host->health_log.alarm_log_rwlock), NULL);
140
141     char filename[FILENAME_MAX + 1];
142
143     if(is_localhost) {
144
145         host->cache_dir  = strdupz(netdata_configured_cache_dir);
146         host->varlib_dir = strdupz(netdata_configured_varlib_dir);
147
148     }
149     else {
150         // this is not localhost - append our GUID to localhost path
151
152         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
153         host->cache_dir = strdupz(filename);
154
155         if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
156             int r = mkdir(host->cache_dir, 0775);
157             if(r != 0 && errno != EEXIST)
158                 error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
159         }
160
161         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
162         host->varlib_dir = strdupz(filename);
163
164         if(host->health_enabled) {
165             int r = mkdir(host->varlib_dir, 0775);
166             if(r != 0 && errno != EEXIST)
167                 error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
168
169             snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
170             r = mkdir(filename, 0775);
171             if(r != 0 && errno != EEXIST)
172                 error("Host '%s': cannot create directory '%s'", host->hostname, filename);
173         }
174
175     }
176
177     snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
178     host->health_log_filename = strdupz(filename);
179
180     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
181     host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
182     host->health_default_recipient = strdup("root");
183
184
185     // ------------------------------------------------------------------------
186     // load health configuration
187
188     if(host->health_enabled) {
189         health_alarm_log_load(host);
190         health_alarm_log_open(host);
191
192         rrdhost_wrlock(host);
193         health_readdir(host, health_config_dir());
194         rrdhost_unlock(host);
195     }
196
197
198     // ------------------------------------------------------------------------
199     // link it and add it to the index
200
201     rrd_wrlock();
202
203     if(is_localhost) {
204         host->next = localhost;
205         localhost = host;
206     }
207     else {
208         if(localhost) {
209             host->next = localhost->next;
210             localhost->next = host;
211         }
212         else localhost = host;
213     }
214
215     RRDHOST *t = rrdhost_index_add(host);
216
217     if(t != host) {
218         error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
219         rrdhost_free(host);
220         host = NULL;
221     }
222     else {
223         info("Host '%s' with guid '%s' initialized"
224                      ", os %s"
225                      ", update every %d"
226                      ", memory mode %s"
227                      ", history entries %d"
228                      ", streaming %s"
229                      " (to '%s' with api key '%s')"
230                      ", health %s"
231                      ", cache_dir '%s'"
232                      ", varlib_dir '%s'"
233                      ", health_log '%s'"
234                      ", alarms default handler '%s'"
235                      ", alarms default recipient '%s'"
236              , host->hostname
237              , host->machine_guid
238              , host->os
239              , host->rrd_update_every
240              , rrd_memory_mode_name(host->rrd_memory_mode)
241              , host->rrd_history_entries
242              , host->rrdpush_enabled?"enabled":"disabled"
243              , host->rrdpush_destination?host->rrdpush_destination:""
244              , host->rrdpush_api_key?host->rrdpush_api_key:""
245              , host->health_enabled?"enabled":"disabled"
246              , host->cache_dir
247              , host->varlib_dir
248              , host->health_log_filename
249              , host->health_default_exec
250              , host->health_default_recipient
251         );
252     }
253
254     rrd_hosts_available++;
255     rrd_unlock();
256
257     return host;
258 }
259
260 RRDHOST *rrdhost_find_or_create(
261           const char *hostname
262         , const char *guid
263         , const char *os
264         , int update_every
265         , int history
266         , RRD_MEMORY_MODE mode
267         , int health_enabled
268         , int rrdpush_enabled
269         , char *rrdpush_destination
270         , char *rrdpush_api_key
271 ) {
272     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
273
274     RRDHOST *host = rrdhost_find_by_guid(guid, 0);
275     if(!host) {
276         host = rrdhost_create(
277                 hostname
278                 , guid
279                 , os
280                 , update_every
281                 , history
282                 , mode
283                 , health_enabled
284                 , rrdpush_enabled
285                 , rrdpush_destination
286                 , rrdpush_api_key
287                 , 0
288         );
289     }
290     else {
291         host->health_enabled = health_enabled;
292
293         if(strcmp(host->hostname, hostname)) {
294             char *t = host->hostname;
295             char *n = strdupz(hostname);
296             host->hostname = n;
297             freez(t);
298         }
299
300         if(host->rrd_update_every != update_every)
301             error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds.", host->hostname, host->rrd_update_every, update_every);
302
303         if(host->rrd_history_entries != history)
304             error("Host '%s' has history of %d entries, but the wanted one is %d entries.", host->hostname, host->rrd_history_entries, history);
305
306         if(host->rrd_memory_mode != mode)
307             error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
308     }
309
310     rrdhost_cleanup_remote_stale(host);
311
312     return host;
313 }
314
315 void rrdhost_cleanup_remote_stale(RRDHOST *protected) {
316     rrd_wrlock();
317
318     RRDHOST *h;
319     rrdhost_foreach_write(h) {
320         if(h != protected
321            && h != localhost
322            && !h->connected_senders
323            && h->senders_disconnected_time + rrdhost_free_orphan_time > now_realtime_sec()) {
324             info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", h->hostname, h->machine_guid);
325             rrdhost_save(h);
326             rrdhost_free(h);
327             break;
328         }
329     }
330
331     rrd_unlock();
332 }
333
334 // ----------------------------------------------------------------------------
335 // RRDHOST global / startup initialization
336
337 void rrd_init(char *hostname) {
338     health_init();
339     registry_init();
340     rrdpush_init();
341
342     debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
343     localhost = rrdhost_create(
344             hostname
345             , registry_get_this_machine_guid()
346             , os_type
347             , default_rrd_update_every
348             , default_rrd_history_entries
349             , default_rrd_memory_mode
350             , default_health_enabled
351             , default_rrdpush_enabled
352             , default_rrdpush_destination
353             , default_rrdpush_api_key
354             , 1
355     );
356 }
357
358 // ----------------------------------------------------------------------------
359 // RRDHOST - lock validations
360 // there are only used when NETDATA_INTERNAL_CHECKS is set
361
362 void rrdhost_check_rdlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
363     debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
364
365     int ret = pthread_rwlock_trywrlock(&host->rrdhost_rwlock);
366     if(ret == 0)
367         fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
368 }
369
370 void rrdhost_check_wrlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
371     debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
372
373     int ret = pthread_rwlock_tryrdlock(&host->rrdhost_rwlock);
374     if(ret == 0)
375         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
376 }
377
378 void rrd_check_rdlock_int(const char *file, const char *function, const unsigned long line) {
379     debug(D_RRDHOST, "Checking read lock on all RRDs");
380
381     int ret = pthread_rwlock_trywrlock(&rrd_rwlock);
382     if(ret == 0)
383         fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
384 }
385
386 void rrd_check_wrlock_int(const char *file, const char *function, const unsigned long line) {
387     debug(D_RRDHOST, "Checking write lock on all RRDs");
388
389     int ret = pthread_rwlock_tryrdlock(&rrd_rwlock);
390     if(ret == 0)
391         fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
392 }
393
394 // ----------------------------------------------------------------------------
395 // RRDHOST - free
396
397 void rrdhost_free(RRDHOST *host) {
398     if(!host) return;
399
400     info("Freeing all memory for host '%s'...", host->hostname);
401
402     rrd_check_wrlock();     // make sure the RRDs are write locked
403     rrdhost_wrlock(host);   // lock this RRDHOST
404
405     // ------------------------------------------------------------------------
406     // release its children resources
407
408     while(host->rrdset_root) rrdset_free(host->rrdset_root);
409
410     while(host->alarms) rrdcalc_free(host, host->alarms);
411     while(host->templates) rrdcalctemplate_free(host, host->templates);
412     health_alarm_log_free(host);
413
414
415     // ------------------------------------------------------------------------
416     // remove it from the indexes
417
418     if(rrdhost_index_del(host) != host)
419         error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
420
421
422     // ------------------------------------------------------------------------
423     // unlink it from the host
424
425     if(host == localhost) {
426         localhost = host->next;
427     }
428     else {
429         // find the previous one
430         RRDHOST *h;
431         for(h = localhost; h && h->next != host ; h = h->next) ;
432
433         // bypass it
434         if(h) h->next = host->next;
435         else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
436     }
437
438     // ------------------------------------------------------------------------
439     // free it
440
441     rrdpush_sender_thread_stop(host);
442
443     freez(host->os);
444     freez(host->cache_dir);
445     freez(host->varlib_dir);
446     freez(host->rrdpush_api_key);
447     freez(host->rrdpush_destination);
448     freez(host->health_default_exec);
449     freez(host->health_default_recipient);
450     freez(host->health_log_filename);
451     freez(host->hostname);
452     rrdhost_unlock(host);
453     freez(host);
454
455     rrd_hosts_available--;
456 }
457
458 void rrdhost_free_all(void) {
459     rrd_wrlock();
460     while(localhost) rrdhost_free(localhost);
461     rrd_unlock();
462 }
463
464 // ----------------------------------------------------------------------------
465 // RRDHOST - save
466
467 void rrdhost_save(RRDHOST *host) {
468     if(!host) return;
469
470     info("Saving database of host '%s'...", host->hostname);
471
472     RRDSET *st;
473     RRDDIM *rd;
474
475     // we get a write lock
476     // to ensure only one thread is saving the database
477     rrdhost_wrlock(host);
478
479     rrdset_foreach_write(st, host) {
480         rrdset_rdlock(st);
481
482         if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
483             debug(D_RRD_STATS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename);
484             savememory(st->cache_filename, st, st->memsize);
485         }
486
487         rrddim_foreach_read(rd, st) {
488             if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE)) {
489                 debug(D_RRD_STATS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename);
490                 savememory(rd->cache_filename, rd, rd->memsize);
491             }
492         }
493
494         rrdset_unlock(st);
495     }
496
497     rrdhost_unlock(host);
498 }
499
500 void rrdhost_save_all(void) {
501     info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
502
503     rrd_rdlock();
504
505     RRDHOST *host;
506     rrdhost_foreach_read(host)
507         rrdhost_save(host);
508
509     rrd_unlock();
510 }