]> arthur.barton.de Git - netdata.git/blob - src/rrdhost.c
every host has its own health
[netdata.git] / src / rrdhost.c
1 #define NETDATA_RRD_INTERNALS 1
2 #include "common.h"
3
4 RRDHOST *localhost = NULL;
5
6 // ----------------------------------------------------------------------------
7 // RRDHOST index
8
9 int rrdhost_compare(void* a, void* b) {
10     if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
11     else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
12     else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
13 }
14
15 avl_tree_lock rrdhost_root_index = {
16         .avl_tree = { NULL, rrdhost_compare },
17         .rwlock = AVL_LOCK_INITIALIZER
18 };
19
20 RRDHOST *rrdhost_find(const char *guid, uint32_t hash) {
21     debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
22
23     RRDHOST tmp;
24     strncpyz(tmp.machine_guid, guid, GUID_LEN);
25     tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
26
27     return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
28 }
29
30 #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
31 #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
32
33
34 // ----------------------------------------------------------------------------
35 // RRDHOST - internal helpers
36
37 static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
38     freez(host->hostname);
39     host->hostname = strdupz(hostname);
40     host->hash_hostname = simple_hash(host->hostname);
41 }
42
43 static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
44     strncpy(host->machine_guid, machine_guid, GUID_LEN);
45     host->machine_guid[GUID_LEN] = '\0';
46     host->hash_machine_guid = simple_hash(host->machine_guid);
47 }
48
49 // ----------------------------------------------------------------------------
50 // RRDHOST - add a host
51
52 RRDHOST *rrdhost_create(const char *hostname, const char *guid) {
53     debug(D_RRDHOST, "Adding host '%s' with guid '%s'", hostname, guid);
54
55     RRDHOST *host = callocz(1, sizeof(RRDHOST));
56
57     pthread_rwlock_init(&(host->rrdset_root_rwlock), NULL);
58
59     rrdhost_init_hostname(host, hostname);
60     rrdhost_init_machine_guid(host, guid);
61
62     avl_init_lock(&(host->rrdset_root_index), rrdset_compare);
63     avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
64     avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare);
65     avl_init_lock(&(host->variables_root_index), rrdvar_compare);
66
67
68     // ------------------------------------------------------------------------
69     // initialize health variables
70
71     host->health_log.next_log_id = 1;
72     host->health_log.next_alarm_id = 1;
73     host->health_log.max = 1000;
74     host->health_log.next_log_id =
75     host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
76
77     long n = config_get_number("health", "in memory max health log entries", host->health_log.max);
78     if(n < 10) {
79         error("Health configuration has invalid max log entries %ld. Using default %u", n, host->health_log.max);
80         config_set_number("health", "in memory max health log entries", (long)host->health_log.max);
81     }
82     else
83         host->health_log.max = (unsigned int)n;
84
85     pthread_rwlock_init(&(host->health_log.alarm_log_rwlock), NULL);
86
87     char filename[FILENAME_MAX + 1];
88
89     if(!localhost) {
90         // this is localhost
91         snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", netdata_configured_varlib_dir);
92         host->health_log_filename = strdupz(config_get("health", "health db file", filename));
93     }
94     else {
95         // this is not localhost - append our GUID to localhost path
96         snprintfz(filename, FILENAME_MAX, "%s.%s", localhost->health_log_filename, host->machine_guid);
97         host->health_log_filename = strdupz(filename);
98     }
99
100     snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir);
101     host->health_default_exec = strdupz(config_get("health", "script to execute on alarm", filename));
102     host->health_default_recipient = strdup("root");
103
104
105     // ------------------------------------------------------------------------
106     // load health configuration
107
108     health_alarm_log_load(host);
109     health_alarm_log_open(host);
110
111     rrdhost_rwlock(host);
112     health_readdir(host, health_config_dir());
113     rrdhost_unlock(host);
114
115
116     // ------------------------------------------------------------------------
117     // add it to the index
118
119     if(rrdhost_index_add(host) != host)
120         fatal("Cannot add host '%s' to index. It already exists.", hostname);
121
122     debug(D_RRDHOST, "Added host '%s' with guid '%s'", host->hostname, host->machine_guid);
123     return host;
124 }
125
126 RRDHOST *rrdhost_find_or_create(const char *hostname, const char *guid) {
127     debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
128
129     RRDHOST *host = rrdhost_find(guid, 0);
130     if(!host)
131         host = rrdhost_create(hostname, guid);
132
133     return host;
134 }
135
136 // ----------------------------------------------------------------------------
137 // RRDHOST global / startup initialization
138
139 void rrd_init(char *hostname) {
140     debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
141     localhost = rrdhost_create(hostname, registry_get_this_machine_guid());
142 }
143
144 // ----------------------------------------------------------------------------
145 // RRDHOST - locks
146
147 void rrdhost_rwlock(RRDHOST *host) {
148     debug(D_RRDHOST, "Write lock host '%s'", host->hostname);
149     pthread_rwlock_wrlock(&host->rrdset_root_rwlock);
150 }
151
152 void rrdhost_rdlock(RRDHOST *host) {
153     debug(D_RRDHOST, "Read lock host '%s'", host->hostname);
154     pthread_rwlock_rdlock(&host->rrdset_root_rwlock);
155 }
156
157 void rrdhost_unlock(RRDHOST *host) {
158     debug(D_RRDHOST, "Unlock host '%s'", host->hostname);
159     pthread_rwlock_unlock(&host->rrdset_root_rwlock);
160 }
161
162 void rrdhost_check_rdlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
163     debug(D_RRDHOST, "Read lock host '%s'", host->hostname);
164
165     int ret = pthread_rwlock_trywrlock(&host->rrdset_root_rwlock);
166     if(ret == 0)
167         fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
168 }
169
170 void rrdhost_check_wrlock_int(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
171     debug(D_RRDHOST, "Write lock host '%s'", host->hostname);
172
173     int ret = pthread_rwlock_tryrdlock(&host->rrdset_root_rwlock);
174     if(ret == 0)
175         fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
176 }
177
178 void rrdhost_free(RRDHOST *host) {
179     if(!host) return;
180
181     info("Freeing all memory for host '%s'...", host->hostname);
182
183     rrdhost_rwlock(host);
184
185     RRDSET *st;
186     for(st = host->rrdset_root; st ;) {
187         RRDSET *next = st->next;
188
189         pthread_rwlock_wrlock(&st->rwlock);
190
191         while(st->variables)  rrdsetvar_free(st->variables);
192         while(st->alarms)     rrdsetcalc_unlink(st->alarms);
193         while(st->dimensions) rrddim_free(st, st->dimensions);
194
195         if(unlikely(rrdset_index_del(host, st) != st))
196             error("RRDSET: INTERNAL ERROR: attempt to remove from index chart '%s', removed a different chart.", st->id);
197
198         rrdset_index_del_name(host, st);
199
200         st->rrdfamily->use_count--;
201         if(!st->rrdfamily->use_count)
202             rrdfamily_free(host, st->rrdfamily);
203
204         pthread_rwlock_unlock(&st->rwlock);
205
206         if(st->mapped == RRD_MEMORY_MODE_SAVE || st->mapped == RRD_MEMORY_MODE_MAP) {
207             debug(D_RRD_CALLS, "Unmapping stats '%s'.", st->name);
208             munmap(st, st->memsize);
209         }
210         else
211             freez(st);
212
213         st = next;
214     }
215     host->rrdset_root = NULL;
216
217     freez(host->health_default_exec);
218     freez(host->health_default_recipient);
219     freez(host->health_log_filename);
220     freez(host->hostname);
221     rrdhost_unlock(host);
222     freez(host);
223
224     info("Host memory cleanup completed...");
225 }
226
227 void rrdhost_save(RRDHOST *host) {
228     if(!host) return;
229
230     info("Saving host '%s' database...", host->hostname);
231
232     RRDSET *st;
233     RRDDIM *rd;
234
235     // we get an write lock
236     // to ensure only one thread is saving the database
237     rrdhost_rwlock(host);
238
239     for(st = host->rrdset_root; st ; st = st->next) {
240         pthread_rwlock_rdlock(&st->rwlock);
241
242         if(st->mapped == RRD_MEMORY_MODE_SAVE) {
243             debug(D_RRD_CALLS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename);
244             savememory(st->cache_filename, st, st->memsize);
245         }
246
247         for(rd = st->dimensions; rd ; rd = rd->next) {
248             if(likely(rd->memory_mode == RRD_MEMORY_MODE_SAVE)) {
249                 debug(D_RRD_CALLS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename);
250                 savememory(rd->cache_filename, rd, rd->memsize);
251             }
252         }
253
254         pthread_rwlock_unlock(&st->rwlock);
255     }
256
257     rrdhost_unlock(host);
258 }
259
260 void rrdhost_free_all(void) {
261     RRDHOST *host = localhost;
262
263     // FIXME: lock all hosts
264
265     while(host) {
266         RRDHOST *next = host = host->next;
267         rrdhost_free(host);
268         host = next;
269     }
270
271     localhost = NULL;
272
273     // FIXME: unlock all hosts
274 }
275
276 void rrdhost_save_all(void) {
277     RRDHOST *host;
278     for(host = localhost; host ; host = host->next)
279         rrdhost_save(host);
280 }