]> arthur.barton.de Git - netdata.git/blob - src/procfile.c
Merge remote-tracking branch 'upstream/master' into health
[netdata.git] / src / procfile.c
1 #include "common.h"
2
3 #define PF_PREFIX "PROCFILE"
4
5 #define PFWORDS_INCREASE_STEP 200
6 #define PFLINES_INCREASE_STEP 10
7 #define PROCFILE_INCREMENT_BUFFER 512
8
9 int procfile_adaptive_initial_allocation = 0;
10
11 // if adaptive allocation is set, these store the
12 // max values we have seen so far
13 uint32_t procfile_max_lines = PFLINES_INCREASE_STEP;
14 uint32_t procfile_max_words = PFWORDS_INCREASE_STEP;
15 size_t procfile_max_allocation = PROCFILE_INCREMENT_BUFFER;
16
17 // ----------------------------------------------------------------------------
18 // An array of words
19
20
21 pfwords *pfwords_add(pfwords *fw, char *str) {
22         // debug(D_PROCFILE, PF_PREFIX ":       adding word No %d: '%s'", fw->len, str);
23
24         if(unlikely(fw->len == fw->size)) {
25                 // debug(D_PROCFILE, PF_PREFIX ":       expanding words");
26
27                 fw = reallocz(fw, sizeof(pfwords) + (fw->size + PFWORDS_INCREASE_STEP) * sizeof(char *));
28                 fw->size += PFWORDS_INCREASE_STEP;
29         }
30
31         fw->words[fw->len++] = str;
32
33         return fw;
34 }
35
36 pfwords *pfwords_new(void) {
37         // debug(D_PROCFILE, PF_PREFIX ":       initializing words");
38
39         uint32_t size = (procfile_adaptive_initial_allocation) ? procfile_max_words : PFWORDS_INCREASE_STEP;
40
41         pfwords *new = mallocz(sizeof(pfwords) + size * sizeof(char *));
42         new->len = 0;
43         new->size = size;
44         return new;
45 }
46
47 void pfwords_reset(pfwords *fw) {
48         // debug(D_PROCFILE, PF_PREFIX ":       reseting words");
49         fw->len = 0;
50 }
51
52 void pfwords_free(pfwords *fw) {
53         // debug(D_PROCFILE, PF_PREFIX ":       freeing words");
54
55         freez(fw);
56 }
57
58
59 // ----------------------------------------------------------------------------
60 // An array of lines
61
62 pflines *pflines_add(pflines *fl, uint32_t first_word) {
63         // debug(D_PROCFILE, PF_PREFIX ":       adding line %d at word %d", fl->len, first_word);
64
65         if(unlikely(fl->len == fl->size)) {
66                 // debug(D_PROCFILE, PF_PREFIX ":       expanding lines");
67
68                 fl = reallocz(fl, sizeof(pflines) + (fl->size + PFLINES_INCREASE_STEP) * sizeof(ffline));
69                 fl->size += PFLINES_INCREASE_STEP;
70         }
71
72         fl->lines[fl->len].words = 0;
73         fl->lines[fl->len++].first = first_word;
74
75         return fl;
76 }
77
78 pflines *pflines_new(void) {
79         // debug(D_PROCFILE, PF_PREFIX ":       initializing lines");
80
81         uint32_t size = (unlikely(procfile_adaptive_initial_allocation)) ? procfile_max_words : PFLINES_INCREASE_STEP;
82
83         pflines *new = mallocz(sizeof(pflines) + size * sizeof(ffline));
84         new->len = 0;
85         new->size = size;
86         return new;
87 }
88
89 void pflines_reset(pflines *fl) {
90         // debug(D_PROCFILE, PF_PREFIX ":       reseting lines");
91
92         fl->len = 0;
93 }
94
95 void pflines_free(pflines *fl) {
96         // debug(D_PROCFILE, PF_PREFIX ":       freeing lines");
97
98         freez(fl);
99 }
100
101
102 // ----------------------------------------------------------------------------
103 // The procfile
104
105 #define PF_CHAR_IS_SEPARATOR    ' '
106 #define PF_CHAR_IS_NEWLINE              'N'
107 #define PF_CHAR_IS_WORD                 'W'
108 #define PF_CHAR_IS_QUOTE        'Q'
109 #define PF_CHAR_IS_OPEN         'O'
110 #define PF_CHAR_IS_CLOSE        'C'
111
112 void procfile_close(procfile *ff) {
113         debug(D_PROCFILE, PF_PREFIX ": Closing file '%s'", ff->filename);
114
115         if(likely(ff->lines)) pflines_free(ff->lines);
116         if(likely(ff->words)) pfwords_free(ff->words);
117
118         if(likely(ff->fd != -1)) close(ff->fd);
119         freez(ff);
120 }
121
122 procfile *procfile_parser(procfile *ff) {
123         debug(D_PROCFILE, PF_PREFIX ": Parsing file '%s'", ff->filename);
124
125         char *s = ff->data, *e = &ff->data[ff->len], *t = ff->data, quote = 0;
126         uint32_t l = 0, w = 0;
127         int opened = 0;
128
129         ff->lines = pflines_add(ff->lines, w);
130         if(unlikely(!ff->lines)) goto cleanup;
131
132         while(likely(s < e)) {
133                 // we are not at the end
134
135                 switch(ff->separators[(uint8_t)(*s)]) {
136                         case PF_CHAR_IS_OPEN:
137                                 if(s == t) {
138                                         opened++;
139                                         t = ++s;
140                                 }
141                                 else if(opened) {
142                                         opened++;
143                                         s++;
144                                 }
145                                 else
146                                         s++;
147                                 continue;
148
149                         case PF_CHAR_IS_CLOSE:
150                                 if(opened) {
151                                         opened--;
152
153                                         if(!opened) {
154                                                 *s = '\0';
155                                                 ff->words = pfwords_add(ff->words, t);
156                                                 if(unlikely(!ff->words)) goto cleanup;
157
158                                                 ff->lines->lines[l].words++;
159                                                 w++;
160
161                                                 t = ++s;
162                                         }
163                                         else
164                                                 s++;
165                                 }
166                                 else
167                                         s++;
168                                 continue;
169
170                         case PF_CHAR_IS_QUOTE:
171                                 if(unlikely(!quote && s == t)) {
172                                         // quote opened at the beginning
173                                         quote = *s;
174                                         t = ++s;
175                                 }
176                                 else if(unlikely(quote && quote == *s)) {
177                                         // quote closed
178                                         quote = 0;
179
180                                         *s = '\0';
181                                         ff->words = pfwords_add(ff->words, t);
182                                         if(unlikely(!ff->words)) goto cleanup;
183
184                                         ff->lines->lines[l].words++;
185                                         w++;
186
187                                         t = ++s;
188                                 }
189                                 else
190                                         s++;
191                                 continue;
192
193                         case PF_CHAR_IS_SEPARATOR:
194                                 if(unlikely(quote || opened)) {
195                                         // we are inside a quote
196                                         s++;
197                                         continue;
198                                 }
199
200                                 if(unlikely(s == t)) {
201                                         // skip all leading white spaces
202                                         t = ++s;
203                                         continue;
204                                 }
205
206                                 // end of word
207                                 *s = '\0';
208
209                                 ff->words = pfwords_add(ff->words, t);
210                                 if(unlikely(!ff->words)) goto cleanup;
211
212                                 ff->lines->lines[l].words++;
213                                 w++;
214
215                                 t = ++s;
216                                 continue;
217
218                         case PF_CHAR_IS_NEWLINE:
219                                 // end of line
220                                 *s = '\0';
221
222                                 ff->words = pfwords_add(ff->words, t);
223                                 if(unlikely(!ff->words)) goto cleanup;
224
225                                 ff->lines->lines[l].words++;
226                                 w++;
227
228                                 // debug(D_PROCFILE, PF_PREFIX ":       ended line %d with %d words", l, ff->lines->lines[l].words);
229
230                                 ff->lines = pflines_add(ff->lines, w);
231                                 if(unlikely(!ff->lines)) goto cleanup;
232                                 l++;
233
234                                 t = ++s;
235                                 continue;
236
237                         default:
238                                 s++;
239                                 continue;
240                 }
241         }
242
243         if(likely(s > t && t < e)) {
244                 // the last word
245                 if(likely(ff->len < ff->size))
246                         *s = '\0';
247                 else {
248                         // we are going to loose the last byte
249                         ff->data[ff->size - 1] = '\0';
250                 }
251
252                 ff->words = pfwords_add(ff->words, t);
253                 if(unlikely(!ff->words)) goto cleanup;
254
255                 ff->lines->lines[l].words++;
256                 w++;
257         }
258
259         return ff;
260
261 cleanup:
262         error(PF_PREFIX ": Failed to parse file '%s'", ff->filename);
263         procfile_close(ff);
264         return NULL;
265 }
266
267 procfile *procfile_readall(procfile *ff) {
268         debug(D_PROCFILE, PF_PREFIX ": Reading file '%s'.", ff->filename);
269
270         ssize_t s, r = 1, x;
271         ff->len = 0;
272
273         while(likely(r > 0)) {
274                 s = ff->len;
275                 x = ff->size - s;
276
277                 if(!x) {
278                         debug(D_PROCFILE, PF_PREFIX ": Expanding data buffer for file '%s'.", ff->filename);
279
280                         ff = reallocz(ff, sizeof(procfile) + ff->size + PROCFILE_INCREMENT_BUFFER);
281                         ff->size += PROCFILE_INCREMENT_BUFFER;
282                 }
283
284                 debug(D_PROCFILE, "Reading file '%s', from position %ld with length %lu", ff->filename, s, ff->size - s);
285                 r = read(ff->fd, &ff->data[s], ff->size - s);
286                 if(unlikely(r == -1)) {
287                         if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot read from file '%s'", ff->filename);
288                         procfile_close(ff);
289                         return NULL;
290                 }
291
292                 ff->len += r;
293         }
294
295         debug(D_PROCFILE, "Rewinding file '%s'", ff->filename);
296         if(unlikely(lseek(ff->fd, 0, SEEK_SET) == -1)) {
297                 if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot rewind on file '%s'.", ff->filename);
298                 procfile_close(ff);
299                 return NULL;
300         }
301
302         pflines_reset(ff->lines);
303         pfwords_reset(ff->words);
304
305         ff = procfile_parser(ff);
306
307         if(unlikely(procfile_adaptive_initial_allocation)) {
308                 if(unlikely(ff->len > procfile_max_allocation)) procfile_max_allocation = ff->len;
309                 if(unlikely(ff->lines->len > procfile_max_lines)) procfile_max_lines = ff->lines->len;
310                 if(unlikely(ff->words->len > procfile_max_words)) procfile_max_words = ff->words->len;
311         }
312
313         debug(D_PROCFILE, "File '%s' updated.", ff->filename);
314         return ff;
315 }
316
317 static void procfile_set_separators(procfile *ff, const char *separators) {
318         static char def[256] = { [0 ... 255] = 0 };
319         int i;
320
321         if(unlikely(!def[255])) {
322                 // this is thread safe
323                 // we check that the last byte is non-zero
324                 // if it is zero, multiple threads may be executing this at the same time
325                 // setting in def[] the exact same values
326                 for(i = 0; likely(i < 256) ;i++) {
327                         if(unlikely(i == '\n' || i == '\r')) def[i] = PF_CHAR_IS_NEWLINE;
328                         else if(unlikely(isspace(i) || !isprint(i))) def[i] = PF_CHAR_IS_SEPARATOR;
329                         else def[i] = PF_CHAR_IS_WORD;
330                 }
331         }
332
333         // copy the default
334         char *ffs = ff->separators, *ffd = def, *ffe = &def[256];
335         while(likely(ffd != ffe)) *ffs++ = *ffd++;
336
337         // set the separators
338         if(unlikely(!separators))
339                 separators = " \t=|";
340
341         ffs = ff->separators;
342         const char *s = separators;
343         while(likely(*s))
344                 ffs[(int)*s++] = PF_CHAR_IS_SEPARATOR;
345 }
346
347 void procfile_set_quotes(procfile *ff, const char *quotes) {
348         // remove all quotes
349         int i;
350         for(i = 0; i < 256 ; i++)
351                 if(ff->separators[i] == PF_CHAR_IS_QUOTE)
352                         ff->separators[i] = PF_CHAR_IS_WORD;
353
354         // if nothing given, return
355         if(unlikely(!quotes || !*quotes))
356                 return;
357
358         // set the quotes
359         char *ffs = ff->separators;
360         const char *s = quotes;
361         while(likely(*s))
362                 ffs[(int)*s++] = PF_CHAR_IS_QUOTE;
363 }
364
365 void procfile_set_open_close(procfile *ff, const char *open, const char *close) {
366         // remove all open/close
367         int i;
368         for(i = 0; i < 256 ; i++)
369                 if(ff->separators[i] == PF_CHAR_IS_OPEN || ff->separators[i] == PF_CHAR_IS_CLOSE)
370                         ff->separators[i] = PF_CHAR_IS_WORD;
371
372         // if nothing given, return
373         if(unlikely(!open || !*open || !close || !*close))
374                 return;
375
376         // set the openings
377         char *ffs = ff->separators;
378         const char *s = open;
379         while(likely(*s))
380                 ffs[(int)*s++] = PF_CHAR_IS_OPEN;
381
382         s = close;
383         while(likely(*s))
384                 ffs[(int)*s++] = PF_CHAR_IS_CLOSE;
385 }
386
387 procfile *procfile_open(const char *filename, const char *separators, uint32_t flags) {
388         debug(D_PROCFILE, PF_PREFIX ": Opening file '%s'", filename);
389
390         int fd = open(filename, O_RDONLY, 0666);
391         if(unlikely(fd == -1)) {
392                 if(unlikely(!(flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot open file '%s'", filename);
393                 return NULL;
394         }
395
396         size_t size = (unlikely(procfile_adaptive_initial_allocation)) ? procfile_max_allocation : PROCFILE_INCREMENT_BUFFER;
397         procfile *ff = mallocz(sizeof(procfile) + size);
398         strncpyz(ff->filename, filename, FILENAME_MAX);
399
400         ff->fd = fd;
401         ff->size = size;
402         ff->len = 0;
403         ff->flags = flags;
404
405         ff->lines = pflines_new();
406         ff->words = pfwords_new();
407
408         if(unlikely(!ff->lines || !ff->words)) {
409                 error(PF_PREFIX ": Cannot initialize parser for file '%s'", filename);
410                 procfile_close(ff);
411                 return NULL;
412         }
413
414         procfile_set_separators(ff, separators);
415
416         debug(D_PROCFILE, "File '%s' opened.", filename);
417         return ff;
418 }
419
420 procfile *procfile_reopen(procfile *ff, const char *filename, const char *separators, uint32_t flags) {
421         if(unlikely(!ff)) return procfile_open(filename, separators, flags);
422
423         if(likely(ff->fd != -1)) close(ff->fd);
424
425         ff->fd = open(filename, O_RDONLY, 0666);
426         if(unlikely(ff->fd == -1)) {
427                 procfile_close(ff);
428                 return NULL;
429         }
430
431         strncpyz(ff->filename, filename, FILENAME_MAX);
432
433         ff->flags = flags;
434
435         // do not do the separators again if NULL is given
436         if(likely(separators)) procfile_set_separators(ff, separators);
437
438         return ff;
439 }
440
441 // ----------------------------------------------------------------------------
442 // example parsing of procfile data
443
444 void procfile_print(procfile *ff) {
445         uint32_t lines = procfile_lines(ff), l;
446         uint32_t words, w;
447         char *s;
448
449         debug(D_PROCFILE, "File '%s' with %u lines and %u words", ff->filename, ff->lines->len, ff->words->len);
450
451         for(l = 0; likely(l < lines) ;l++) {
452                 words = procfile_linewords(ff, l);
453
454                 debug(D_PROCFILE, "     line %u starts at word %u and has %u words", l, ff->lines->lines[l].first, ff->lines->lines[l].words);
455
456                 for(w = 0; likely(w < words) ;w++) {
457                         s = procfile_lineword(ff, l, w);
458                         debug(D_PROCFILE, "             [%u.%u] '%s'", l, w, s);
459                 }
460         }
461 }