]> arthur.barton.de Git - netdata.git/commitdiff
faster procfile parser
authorCosta Tsaousis (ktsaou) <costa@tsaousis.gr>
Sun, 29 Jan 2017 15:16:42 +0000 (17:16 +0200)
committerCosta Tsaousis (ktsaou) <costa@tsaousis.gr>
Sun, 29 Jan 2017 15:16:42 +0000 (17:16 +0200)
src/procfile.c
src/procfile.h

index 671d5c7029bd1b89e38a7f3a748fce5d3719aca4..dc4a5409b46a54a6efeaa09d23f681118866a30c 100644 (file)
@@ -127,13 +127,6 @@ static inline void pflines_free(pflines *fl) {
 // ----------------------------------------------------------------------------
 // The procfile
 
-#define PF_CHAR_IS_SEPARATOR    ' '
-#define PF_CHAR_IS_NEWLINE      'N'
-#define PF_CHAR_IS_WORD         'W'
-#define PF_CHAR_IS_QUOTE        'Q'
-#define PF_CHAR_IS_OPEN         'O'
-#define PF_CHAR_IS_CLOSE        'C'
-
 void procfile_close(procfile *ff) {
     debug(D_PROCFILE, PF_PREFIX ": Closing file '%s'", procfile_filename(ff));
 
@@ -147,113 +140,119 @@ void procfile_close(procfile *ff) {
 static inline void procfile_parser(procfile *ff) {
     // debug(D_PROCFILE, PF_PREFIX ": Parsing file '%s'", ff->filename);
 
-    register char *s = ff->data, *e = &ff->data[ff->len], *t = ff->data;
-    register char *separators = ff->separators;
-    char quote = 0;
-    size_t l = 0, w = 0, opened = 0;
+    char  *s = ff->data                 // our current position
+        , *e = &ff->data[ff->len]       // the terminating null
+        , *t = ff->data;                // the first character of a quoted or a parenthesized string
+
+                                        // the look up array to find our type of character
+    PF_CHAR_TYPE *separators = ff->separators;
+
+    char quote = 0;                     // the quote character - only when in quoted string
+
+    size_t
+          l = 0                         // counts the number of lines we added
+        , w = 0                         // counts the number of words we added
+        , opened = 0;                   // counts the number of open parenthesis
 
     ff->lines = pflines_add(ff->lines, w);
 
     while(likely(s < e)) {
         // we are not at the end
+        PF_CHAR_TYPE ct = separators[(unsigned char)(*s)];
 
-        switch(separators[(unsigned char)(*s)]) {
-            case PF_CHAR_IS_OPEN:
-                if(s == t) {
-                    opened++;
-                    t = ++s;
-                }
-                else if(opened) {
-                    opened++;
-                    s++;
-                }
-                else
-                    s++;
-                break;
-
-            case PF_CHAR_IS_CLOSE:
-                if(opened) {
-                    opened--;
-
-                    if(!opened) {
-                        *s = '\0';
-                        ff->words = pfwords_add(ff->words, t);
-                        ff->lines->lines[l].words++;
-                        w++;
-
-                        t = ++s;
-                    }
-                    else
-                        s++;
-                }
-                else
-                    s++;
-                break;
+        // this is faster than a switch()
+        if(likely(ct == PF_CHAR_IS_WORD)) {
+            s++;
+        }
+        else if(likely(ct == PF_CHAR_IS_SEPARATOR)) {
+            if(unlikely(quote || opened)) {
+                // we are inside a quote
+                s++;
+                continue;
+            }
 
-            case PF_CHAR_IS_QUOTE:
-                if(unlikely(!quote && s == t)) {
-                    // quote opened at the beginning
-                    quote = *s;
-                    t = ++s;
-                }
-                else if(unlikely(quote && quote == *s)) {
-                    // quote closed
-                    quote = 0;
+            if(unlikely(s == t)) {
+                // skip all leading white spaces
+                t = ++s;
+                continue;
+            }
 
-                    *s = '\0';
-                    ff->words = pfwords_add(ff->words, t);
-                    ff->lines->lines[l].words++;
-                    w++;
+            // end of word
+            *s = '\0';
 
-                    t = ++s;
-                }
-                else
-                    s++;
-                break;
+            ff->words = pfwords_add(ff->words, t);
+            ff->lines->lines[l].words++;
+            w++;
 
-            case PF_CHAR_IS_SEPARATOR:
-                if(unlikely(quote || opened)) {
-                    // we are inside a quote
-                    s++;
-                    break;
-                }
+            t = ++s;
+        }
+        else if(likely(ct == PF_CHAR_IS_NEWLINE)) {
+            // end of line
+            *s = '\0';
 
-                if(unlikely(s == t)) {
-                    // skip all leading white spaces
-                    t = ++s;
-                    break;
-                }
+            ff->words = pfwords_add(ff->words, t);
+            ff->lines->lines[l].words++;
+            w++;
 
-                // end of word
-                *s = '\0';
+            // debug(D_PROCFILE, PF_PREFIX ":   ended line %d with %d words", l, ff->lines->lines[l].words);
 
-                ff->words = pfwords_add(ff->words, t);
-                ff->lines->lines[l].words++;
-                w++;
+            ff->lines = pflines_add(ff->lines, w);
+            l++;
 
+            t = ++s;
+        }
+        else if(likely(ct == PF_CHAR_IS_QUOTE)) {
+            if(unlikely(!quote && s == t)) {
+                // quote opened at the beginning
+                quote = *s;
                 t = ++s;
-                break;
+            }
+            else if(unlikely(quote && quote == *s)) {
+                // quote closed
+                quote = 0;
 
-            case PF_CHAR_IS_NEWLINE:
-                // end of line
                 *s = '\0';
-
                 ff->words = pfwords_add(ff->words, t);
                 ff->lines->lines[l].words++;
                 w++;
 
-                // debug(D_PROCFILE, PF_PREFIX ":   ended line %d with %d words", l, ff->lines->lines[l].words);
-
-                ff->lines = pflines_add(ff->lines, w);
-                l++;
-
                 t = ++s;
-                break;
+            }
+            else
+                s++;
+        }
+        else if(likely(ct == PF_CHAR_IS_OPEN)) {
+            if(s == t) {
+                opened++;
+                t = ++s;
+            }
+            else if(opened) {
+                opened++;
+                s++;
+            }
+            else
+                s++;
+        }
+        else if(likely(ct == PF_CHAR_IS_CLOSE)) {
+            if(opened) {
+                opened--;
+
+                if(!opened) {
+                    *s = '\0';
+                    ff->words = pfwords_add(ff->words, t);
+                    ff->lines->lines[l].words++;
+                    w++;
 
-            default:
+                    t = ++s;
+                }
+                else
+                    s++;
+            }
+            else
                 s++;
-                break;
         }
+        else
+            fatal("Internal Error: procfile_readall() does not handle all the cases.");
     }
 
     if(likely(s > t && t < e)) {
@@ -317,25 +316,33 @@ procfile *procfile_readall(procfile *ff) {
     return ff;
 }
 
-static void procfile_set_separators(procfile *ff, const char *separators) {
-    static char def[256] = { [0 ... 255] = 0 };
+static inline void procfile_set_separators(procfile *ff, const char *separators) {
+    static PF_CHAR_TYPE def[256];
+    static char initilized = 0;
 
-    if(unlikely(!def[255])) {
+    if(unlikely(!initilized)) {
         // this is thread safe
-        // we check that the last byte is non-zero
-        // if it is zero, multiple threads may be executing this at the same time
-        // setting in def[] the exact same values
-        int i;
-        for(i = 0; likely(i < 256) ;i++) {
-            if(unlikely(i == '\n' || i == '\r')) def[i] = PF_CHAR_IS_NEWLINE;
-            else if(unlikely(isspace(i) || !isprint(i))) def[i] = PF_CHAR_IS_SEPARATOR;
-            else def[i] = PF_CHAR_IS_WORD;
+        // if initialized is zero, multiple threads may be executing
+        // this code at the same time, setting in def[] the exact same values
+        int i = 256;
+        while(i--) {
+            if(unlikely(i == '\n' || i == '\r'))
+                def[i] = PF_CHAR_IS_NEWLINE;
+
+            else if(unlikely(isspace(i) || !isprint(i)))
+                def[i] = PF_CHAR_IS_SEPARATOR;
+
+            else
+                def[i] = PF_CHAR_IS_WORD;
         }
+
+        initilized = 1;
     }
 
     // copy the default
-    char *ffs = ff->separators, *ffd = def, *ffe = &def[256];
-    while(likely(ffd != ffe)) *ffs++ = *ffd++;
+    PF_CHAR_TYPE *ffs = ff->separators, *ffd = def, *ffe = &def[256];
+    while(ffd != ffe)
+        *ffs++ = *ffd++;
 
     // set the separators
     if(unlikely(!separators))
@@ -343,47 +350,50 @@ static void procfile_set_separators(procfile *ff, const char *separators) {
 
     ffs = ff->separators;
     const char *s = separators;
-    while(likely(*s))
+    while(*s)
         ffs[(int)*s++] = PF_CHAR_IS_SEPARATOR;
 }
 
 void procfile_set_quotes(procfile *ff, const char *quotes) {
+    PF_CHAR_TYPE *ffs = ff->separators;
+
     // remove all quotes
-    int i;
-    for(i = 0; i < 256 ; i++)
-        if(unlikely(ff->separators[i] == PF_CHAR_IS_QUOTE))
-            ff->separators[i] = PF_CHAR_IS_WORD;
+    int i = 256;
+    while(i--)
+        if(unlikely(ffs[i] == PF_CHAR_IS_QUOTE))
+            ffs[i] = PF_CHAR_IS_WORD;
 
     // if nothing given, return
     if(unlikely(!quotes || !*quotes))
         return;
 
     // set the quotes
-    char *ffs = ff->separators;
     const char *s = quotes;
-    while(likely(*s))
+    while(*s)
         ffs[(int)*s++] = PF_CHAR_IS_QUOTE;
 }
 
 void procfile_set_open_close(procfile *ff, const char *open, const char *close) {
+    PF_CHAR_TYPE *ffs = ff->separators;
+
     // remove all open/close
-    int i;
-    for(i = 0; i < 256 ; i++)
-        if(unlikely(ff->separators[i] == PF_CHAR_IS_OPEN || ff->separators[i] == PF_CHAR_IS_CLOSE))
-            ff->separators[i] = PF_CHAR_IS_WORD;
+    int i = 256;
+    while(i--)
+        if(unlikely(ffs[i] == PF_CHAR_IS_OPEN || ffs[i] == PF_CHAR_IS_CLOSE))
+            ffs[i] = PF_CHAR_IS_WORD;
 
     // if nothing given, return
     if(unlikely(!open || !*open || !close || !*close))
         return;
 
     // set the openings
-    char *ffs = ff->separators;
     const char *s = open;
-    while(likely(*s))
+    while(*s)
         ffs[(int)*s++] = PF_CHAR_IS_OPEN;
 
+    // set the closings
     s = close;
-    while(likely(*s))
+    while(*s)
         ffs[(int)*s++] = PF_CHAR_IS_CLOSE;
 }
 
index dae5a0fc272092deaea95dd565392ae77a9ee5b0..98765697f59ea07dc735a4e6e7a1bdf44a31705c 100644 (file)
@@ -58,6 +58,15 @@ typedef struct {
 #define PROCFILE_FLAG_DEFAULT             0x00000000
 #define PROCFILE_FLAG_NO_ERROR_ON_FILE_IO 0x00000001
 
+typedef enum procfile_separator {
+    PF_CHAR_IS_SEPARATOR,
+    PF_CHAR_IS_NEWLINE,
+    PF_CHAR_IS_WORD,
+    PF_CHAR_IS_QUOTE,
+    PF_CHAR_IS_OPEN,
+    PF_CHAR_IS_CLOSE
+} PF_CHAR_TYPE;
+
 typedef struct {
     char filename[FILENAME_MAX + 1]; // not populated until profile_filename() is called
 
@@ -67,7 +76,7 @@ typedef struct {
     size_t size;          // the bytes we have allocated for data
     pflines *lines;
     pfwords *words;
-    char separators[256];
+    PF_CHAR_TYPE separators[256];
     char data[];          // allocated buffer to keep file contents
 } procfile;