]> arthur.barton.de Git - netdata.git/blob - src/rrdhost.c
detect duplicate hosts
[netdata.git] / src / rrdhost.c
1 #define NETDATA_RRD_INTERNALS 1
2 #include "common.h"
3
4 RRDHOST *localhost = NULL;
5
6 pthread_rwlock_t rrd_rwlock = PTHREAD_RWLOCK_INITIALIZER;
7
8
9 // ----------------------------------------------------------------------------
10 // RRDHOST index
11
12 int rrdhost_compare(void* a, void* b) {
13     if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
14     else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
15     else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
16 }
17
18 avl_tree_lock rrdhost_root_index = {
19         .avl_tree = { NULL, rrdhost_compare },
20         .rwlock = AVL_LOCK_INITIALIZER
21 };
22
23 RRDHOST *rrdhost_find(const char *guid, uint32_t hash) {
24     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
25
26     RRDHOST tmp;
27     strncpyz(tmp.machine_guid, guid, GUID_LEN);
28     tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
29
30     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
31 }
32
33 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
34 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
35
36
37 // ----------------------------------------------------------------------------
38 // RRDHOST - internal helpers
39
40 static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
41     freez(host->hostname);
42     host->hostname = strdupz(hostname);
43     host->hash_hostname = simple_hash(host->hostname);
44 }
45
46 static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
47     freez(host->os);
48     host->os = strdupz(os?os:"unknown");
49 }
50
51 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
52     strncpy(host->machine_guid, machine_guid, GUID_LEN);
53     host->machine_guid[GUID_LEN] = '\0';
54     host->hash_machine_guid = simple_hash(host->machine_guid);
55 }
56
57
58 // ----------------------------------------------------------------------------
59 // RRDHOST - add a host
60
61 RRDHOST *rrdhost_create(const char *hostname,
62         const char *guid,
63         const char *os,
64         int update_every,
65         int entries,
66         RRD_MEMORY_MODE memory_mode,
67         int health_enabled,
68         int is_localhost
69 ) {
70
71     debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
72
73     RRDHOST *host = callocz(1, sizeof(RRDHOST));
74
75     host->rrd_update_every    = update_every;
76     host->rrd_history_entries = entries;
77     host->rrd_memory_mode     = memory_mode;
78     host->health_enabled      = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
79     host->rrdpush_enabled     = default_rrdpush_enabled;
80
81     host->rrdpush_pipe[0] = -1;
82     host->rrdpush_pipe[1] = -1;
83     host->rrdpush_socket = -1;
84
85     pthread_mutex_init(&host->rrdpush_mutex, NULL);
86     pthread_rwlock_init(&host->rrdhost_rwlock, NULL);
87
88     rrdhost_init_hostname(host, hostname);
89     rrdhost_init_machine_guid(host, guid);
90     rrdhost_init_os(host, os);
91
92     avl_init_lock(&(host->rrdset_root_index),      rrdset_compare);
93     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
94     avl_init_lock(&(host->rrdfamily_root_index),   rrdfamily_compare);
95     avl_init_lock(&(host->variables_root_index),   rrdvar_compare);
96
97     // ------------------------------------------------------------------------
98     // initialize health variables
99
100     host->health_log.next_log_id = 1;
101     host->health_log.next_alarm_id = 1;
102     host->health_log.max = 1000;
103     host->health_log.next_log_id =
104     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
105
106     long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
107     if(n < 10) {
108         error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
109         config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
110     }
111     else
112         host->health_log.max = (unsigned int)n;
113
114     pthread_rwlock_init(&(host->health_log.alarm_log_rwlock), NULL);
115
116     char filename[FILENAME_MAX + 1];
117
118     if(is_localhost) {
119
120         host->cache_dir  = strdupz(netdata_configured_cache_dir);
121         host->varlib_dir = strdupz(netdata_configured_varlib_dir);
122
123     }
124     else {
125         // this is not localhost - append our GUID to localhost path
126
127         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
128         host->cache_dir = strdupz(filename);
129
130         if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
131             int r = mkdir(host->cache_dir, 0775);
132             if(r != 0 && errno != EEXIST)
133                 error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
134         }
135
136         snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
137         host->varlib_dir = strdupz(filename);
138
139         if(host->health_enabled) {
140             int r = mkdir(host->varlib_dir, 0775);
141             if(r != 0 && errno != EEXIST)
142                 error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
143
144             snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
145             r = mkdir(filename, 0775);
146             if(r != 0 && errno != EEXIST)
147                 error("Host '%s': cannot create directory '%s'", host->hostname, filename);
148         }
149
150     }
151
152     snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
153     host->health_log_filename = strdupz(filename);
154
155     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
156     host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
157     host->health_default_recipient = strdup("root");
158
159
160     // ------------------------------------------------------------------------
161     // load health configuration
162
163     if(host->health_enabled) {
164         health_alarm_log_load(host);
165         health_alarm_log_open(host);
166
167         rrdhost_wrlock(host);
168         health_readdir(host, health_config_dir());
169         rrdhost_unlock(host);
170     }
171
172
173     // ------------------------------------------------------------------------
174     // link it and add it to the index
175
176     rrd_wrlock();
177
178     if(is_localhost) {
179         host->next = localhost;
180         localhost = host;
181     }
182     else {
183         if(localhost) {
184             host->next = localhost->next;
185             localhost->next = host;
186         }
187         else localhost = host;
188     }
189
190     RRDHOST *t = rrdhost_index_add(host);
191
192     if(t != host) {
193         error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
194         rrdhost_free(host);
195         host = NULL;
196     }
197     else {
198         info("Host '%s' with guid '%s' initialized"
199                      ", os: %s"
200                      ", update every: %d"
201                      ", memory mode: %s"
202                      ", history entries: %d"
203                      ", streaming: %s"
204                      ", health: %s"
205                      ", cache_dir: '%s'"
206                      ", varlib_dir: '%s'"
207                      ", health_log: '%s'"
208                      ", alarms default handler: '%s'"
209                      ", alarms default recipient: '%s'"
210              , host->hostname
211              , host->machine_guid
212              , host->os
213              , host->rrd_update_every
214              , rrd_memory_mode_name(host->rrd_memory_mode)
215              , host->rrd_history_entries
216              , host->rrdpush_enabled?"enabled":"disabled"
217              , host->health_enabled?"enabled":"disabled"
218              , host->cache_dir
219              , host->varlib_dir
220              , host->health_log_filename
221              , host->health_default_exec
222              , host->health_default_recipient
223         );
224     }
225
226     rrd_unlock();
227
228     return host;
229 }
230
231 RRDHOST *rrdhost_find_or_create(const char *hostname, const char *guid, const char *os, int update_every, int history, RRD_MEMORY_MODE mode, int health_enabled) {
232     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
233
234     RRDHOST *host = rrdhost_find(guid, 0);
235     if(!host) {
236         host = rrdhost_create(hostname, guid, os, update_every, history, mode, health_enabled, 0);
237     }
238     else {
239         host->health_enabled = health_enabled;
240
241         if(strcmp(host->hostname, hostname)) {
242             char *t = host->hostname;
243             char *n = strdupz(hostname);
244             host->hostname = n;
245             freez(t);
246         }
247
248         if(host->rrd_update_every != update_every)
249             error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds.", host->hostname, host->rrd_update_every, update_every);
250
251         if(host->rrd_history_entries != history)
252             error("Host '%s' has history of %d entries, but the wanted one is %d entries.", host->hostname, host->rrd_history_entries, history);
253
254         if(host->rrd_memory_mode != mode)
255             error("Host '%s' has memory mode '%s', but the wanted one is '%s'.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
256     }
257
258     return host;
259 }
260
261 // ----------------------------------------------------------------------------
262 // RRDHOST global / startup initialization
263
264 void rrd_init(char *hostname) {
265     health_init();
266     registry_init();
267     rrdpush_init();
268
269     debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
270     localhost = rrdhost_create(hostname,
271             registry_get_this_machine_guid(),
272             os_type,
273             default_rrd_update_every,
274             default_rrd_history_entries,
275             default_rrd_memory_mode,
276             default_health_enabled,
277             1
278     );
279 }
280
281 // ----------------------------------------------------------------------------
282 // RRDHOST - lock validations
283 // there are only used when NETDATA_INTERNAL_CHECKS is set
284
285 void rrdhost_check_rdlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
286     debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
287
288     int ret = pthread_rwlock_trywrlock(&host->rrdhost_rwlock);
289     if(ret == 0)
290         fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
291 }
292
293 void rrdhost_check_wrlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
294     debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
295
296     int ret = pthread_rwlock_tryrdlock(&host->rrdhost_rwlock);
297     if(ret == 0)
298         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
299 }
300
301 void rrd_check_rdlock_int(const char *file, const char *function, const unsigned long line) {
302     debug(D_RRDHOST, "Checking read lock on all RRDs");
303
304     int ret = pthread_rwlock_trywrlock(&rrd_rwlock);
305     if(ret == 0)
306         fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
307 }
308
309 void rrd_check_wrlock_int(const char *file, const char *function, const unsigned long line) {
310     debug(D_RRDHOST, "Checking write lock on all RRDs");
311
312     int ret = pthread_rwlock_tryrdlock(&rrd_rwlock);
313     if(ret == 0)
314         fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
315 }
316
317 // ----------------------------------------------------------------------------
318 // RRDHOST - free
319
320 void rrdhost_free(RRDHOST *host) {
321     if(!host) return;
322
323     info("Freeing all memory for host '%s'...", host->hostname);
324
325     rrd_check_wrlock();     // make sure the RRDs are write locked
326     rrdhost_wrlock(host);   // lock this RRDHOST
327
328     // ------------------------------------------------------------------------
329     // release its children resources
330
331     while(host->rrdset_root) rrdset_free(host->rrdset_root);
332
333     while(host->alarms) rrdcalc_free(host, host->alarms);
334     while(host->templates) rrdcalctemplate_free(host, host->templates);
335     health_alarm_log_free(host);
336
337
338     // ------------------------------------------------------------------------
339     // remove it from the indexes
340
341     if(rrdhost_index_del(host) != host)
342         error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
343
344
345     // ------------------------------------------------------------------------
346     // unlink it from the host
347
348     if(host == localhost) {
349         localhost = host->next;
350     }
351     else {
352         // find the previous one
353         RRDHOST *h;
354         for(h = localhost; h && h->next != host ; h = h->next) ;
355
356         // bypass it
357         if(h) h->next = host->next;
358         else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
359     }
360
361     // ------------------------------------------------------------------------
362     // free it
363
364     if(host->rrdpush_spawn) {
365         pthread_cancel(host->rrdpush_thread);
366         rrdpush_sender_thread_cleanup(host);
367     }
368
369     freez(host->os);
370     freez(host->cache_dir);
371     freez(host->varlib_dir);
372     freez(host->health_default_exec);
373     freez(host->health_default_recipient);
374     freez(host->health_log_filename);
375     freez(host->hostname);
376     rrdhost_unlock(host);
377     freez(host);
378
379     info("Host memory cleanup completed...");
380 }
381
382 void rrdhost_free_all(void) {
383     rrd_wrlock();
384     while(localhost) rrdhost_free(localhost);
385     rrd_unlock();
386 }
387
388 // ----------------------------------------------------------------------------
389 // RRDHOST - save
390
391 void rrdhost_save(RRDHOST *host) {
392     if(!host) return;
393
394     info("Saving host '%s' database...", host->hostname);
395
396     RRDSET *st;
397     RRDDIM *rd;
398
399     // we get a write lock
400     // to ensure only one thread is saving the database
401     rrdhost_wrlock(host);
402
403     rrdset_foreach_write(st, host) {
404         rrdset_rdlock(st);
405
406         if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
407             debug(D_RRD_STATS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename);
408             savememory(st->cache_filename, st, st->memsize);
409         }
410
411         rrddim_foreach_read(rd, st) {
412             if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE)) {
413                 debug(D_RRD_STATS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename);
414                 savememory(rd->cache_filename, rd, rd->memsize);
415             }
416         }
417
418         rrdset_unlock(st);
419     }
420
421     rrdhost_unlock(host);
422 }
423
424 void rrdhost_save_all(void) {
425     info("Saving database...");
426
427     rrd_rdlock();
428
429     RRDHOST *host;
430     rrdhost_foreach_read(host)
431         rrdhost_save(host);
432
433     rrd_unlock();
434 }