]> arthur.barton.de Git - netatalk.git/blobdiff - libatalk/unicode/util_unistr.c
Merge branch-2-1
[netatalk.git] / libatalk / unicode / util_unistr.c
index 4f4921b3ebb03923a2de7b149ab4f1ee1dd0db58..ef48f21b27841b3d2920cf19972e7c5b2e95ad18 100644 (file)
@@ -17,7 +17,9 @@
 #include "precompose.h"
 #include "byteorder.h"
 
-
+/*******************************************************************
+ Convert a wide character to upper/lower case.
+********************************************************************/
 ucs2_t toupper_w(ucs2_t val)
 {
        if ( val >= 0x0040 && val <= 0x007F)
@@ -67,16 +69,16 @@ ucs2_t tolower_w(ucs2_t val)
 ********************************************************************/
 int strlower_w(ucs2_t *s)
 {
-        int ret = 0;
-        while (*s) {
-                ucs2_t v = tolower_w(*s);
-                if (v != *s) {
-                        *s = v;
-                        ret = 1;
-                }
-                s++;
-        }
-        return ret;
+       int ret = 0;
+       while (*s) {
+               ucs2_t v = tolower_w(*s);
+               if (v != *s) {
+                       *s = v;
+                       ret = 1;
+               }
+               s++;
+       }
+       return ret;
 }
 
 /*******************************************************************
@@ -85,16 +87,16 @@ int strlower_w(ucs2_t *s)
 ********************************************************************/
 int strupper_w(ucs2_t *s)
 {
-        int ret = 0;
-        while (*s) {
-                ucs2_t v = toupper_w(*s);
-                if (v != *s) {
-                        *s = v;
-                        ret = 1;
-                }
-                s++;
-        }
-        return ret;
+       int ret = 0;
+       while (*s) {
+               ucs2_t v = toupper_w(*s);
+               if (v != *s) {
+                       *s = v;
+                       ret = 1;
+               }
+               s++;
+       }
+       return ret;
 }
 
 
@@ -168,18 +170,18 @@ ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
 
 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
 {
-        while (*b && *a == *b) { a++; b++; }
-        return (*a - *b);
-        /* warning: if *a != *b and both are not 0 we retrun a random
-                greater or lesser than 0 number not realted to which
-                string is longer */
+       while (*b && *a == *b) { a++; b++; }
+       return (*a - *b);
+       /* warning: if *a != *b and both are not 0 we retrun a random
+          greater or lesser than 0 number not realted to which
+          string is longer */
 }
 
 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
 {
-        size_t n = 0;
-        while ((n < len) && *b && *a == *b) { a++; b++; n++;}
-        return (len - n)?(*a - *b):0;
+       size_t n = 0;
+       while ((n < len) && *b && *a == *b) { a++; b++; n++;}
+       return (len - n)?(*a - *b):0;
 }
 
 /*******************************************************************
@@ -225,8 +227,8 @@ case insensitive string comparison
 ********************************************************************/
 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
 {
-        while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
-        return (tolower_w(*a) - tolower_w(*b));
+       while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
+       return (tolower_w(*a) - tolower_w(*b));
 }
 
 /*******************************************************************
@@ -234,9 +236,9 @@ case insensitive string comparison, lenght limited
 ********************************************************************/
 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
 {
-        size_t n = 0;
-        while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
-        return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
+       size_t n = 0;
+       while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
+       return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
 }
 
 /*******************************************************************
@@ -245,24 +247,24 @@ duplicate string
 /* if len == 0 then duplicate the whole string */
 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
 {
-        ucs2_t *dest;
+       ucs2_t *dest;
 
-        if (!len) len = strlen_w(src);
-        dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
-        if (!dest) {
-                LOG (log_error, logtype_default, "strdup_w: out of memory!");
-                return NULL;
-        }
+       if (!len) len = strlen_w(src);
+       dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
+       if (!dest) {
+               LOG (log_error, logtype_default, "strdup_w: out of memory!");
+               return NULL;
+       }
 
-        memcpy(dest, src, len * sizeof(ucs2_t));
-        dest[len] = 0;
+       memcpy(dest, src, len * sizeof(ucs2_t));
+       dest[len] = 0;
 
-        return dest;
+       return dest;
 }
 
 ucs2_t *strdup_w(const ucs2_t *src)
 {
-        return strndup_w(src, 0);
+       return strndup_w(src, 0);
 }
 
 /*******************************************************************
@@ -271,16 +273,16 @@ copy a string with max len
 
 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
 {
-        size_t len;
+       size_t len;
 
-        if (!dest || !src) return NULL;
+       if (!dest || !src) return NULL;
 
-        for (len = 0; (src[len] != 0) && (len < max); len++)
-                dest[len] = src[len];
-        while (len < max)
-                dest[len++] = 0;
+       for (len = 0; (src[len] != 0) && (len < max); len++)
+               dest[len] = src[len];
+       while (len < max)
+               dest[len++] = 0;
 
-        return dest;
+       return dest;
 }
 
 
@@ -290,203 +292,383 @@ append a string of len bytes and add a terminator
 
 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
 {
-        size_t start;
-        size_t len;
+       size_t start;
+       size_t len;
 
-        if (!dest || !src) return NULL;
+       if (!dest || !src) return NULL;
 
-        start = strlen_w(dest);
-        len = strnlen_w(src, max);
+       start = strlen_w(dest);
+       len = strnlen_w(src, max);
 
-        memcpy(&dest[start], src, len*sizeof(ucs2_t));
-        dest[start+len] = 0;
+       memcpy(&dest[start], src, len*sizeof(ucs2_t));
+       dest[start+len] = 0;
 
-        return dest;
+       return dest;
 }
 
 
 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
 {
-        size_t start;
-        size_t len;
+       size_t start;
+       size_t len;
 
-        if (!dest || !src) return NULL;
+       if (!dest || !src) return NULL;
 
-        start = strlen_w(dest);
-        len = strlen_w(src);
+       start = strlen_w(dest);
+       len = strlen_w(src);
 
-        memcpy(&dest[start], src, len*sizeof(ucs2_t));
-        dest[start+len] = 0;
+       memcpy(&dest[start], src, len*sizeof(ucs2_t));
+       dest[start+len] = 0;
 
-        return dest;
+       return dest;
 }
 
 
+/*******************************************************************
+binary search for pre|decomposition
+********************************************************************/
+
+static ucs2_t do_precomposition(unsigned int base, unsigned int comb) 
+{
+       int min = 0;
+       int max = PRECOMP_COUNT - 1;
+       int mid;
+       u_int32_t sought = (base << 16) | comb, that;
+
+       /* binary search */
+       while (max >= min) {
+               mid = (min + max) / 2;
+               that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
+               if (that < sought) {
+                       min = mid + 1;
+               } else if (that > sought) {
+                       max = mid - 1;
+               } else {
+                       return precompositions[mid].replacement;
+               }
+       }
+       /* no match */
+       return 0;
+}
+
 /* ------------------------ */
-ucs2_t do_precomposition(unsigned int base, unsigned int comb
+static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp
 {
-       int min = 0;
-       int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1;
-       int mid;
-       u_int32_t sought = (base << 16) | comb, that;
-
-       /* binary search */
-       while (max >= min) {
-               mid = (min + max) / 2;
-               that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
-               if (that < sought) {
-                       min = mid + 1;
-               } else if (that > sought) {
-                       max = mid - 1;
-               } else {
-                       return precompositions[mid].replacement;
-               }
-       }
-       /* no match */
-       return 0;
+       int min = 0;
+       int max = PRECOMP_SP_COUNT - 1;
+       int mid;
+       u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp;
+
+       /* binary search */
+       while (max >= min) {
+               mid = (min + max) / 2;
+               that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp);
+               if (that_sp < sought_sp) {
+                       min = mid + 1;
+               } else if (that_sp > sought_sp) {
+                       max = mid - 1;
+               } else {
+                       return precompositions_sp[mid].replacement_sp;
+               }
+       }
+       /* no match */
+       return 0;
 }
 
 /* -------------------------- */
-u_int32_t do_decomposition(ucs2_t base) 
+static u_int32_t do_decomposition(ucs2_t base) 
 {
-       int min = 0;
-       int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1;
-       int mid;
-       u_int32_t sought = base;
-       u_int32_t result, that;
-
-       /* binary search */
-       while (max >= min) {
-               mid = (min + max) / 2;
-               that = decompositions[mid].replacement;
-               if (that < sought) {
-                       min = mid + 1;
-               } else if (that > sought) {
-                       max = mid - 1;
-               } else {
-                       result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
-                       return result;
-               }
-       }
-       /* no match */
-       return 0;
+       int min = 0;
+       int max = DECOMP_COUNT - 1;
+       int mid;
+       u_int32_t sought = base;
+       u_int32_t result, that;
+
+       /* binary search */
+       while (max >= min) {
+               mid = (min + max) / 2;
+               that = decompositions[mid].replacement;
+               if (that < sought) {
+                       min = mid + 1;
+               } else if (that > sought) {
+                       max = mid - 1;
+               } else {
+                       result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
+                       return result;
+               }
+       }
+       /* no match */
+       return 0;
 }
 
-/* we can't use static, this stuff needs to be reentrant */
-/* static char comp[MAXPATHLEN +1]; */
+/* -------------------------- */
+static u_int64_t do_decomposition_sp(unsigned int base_sp) 
+{
+       int min = 0;
+       int max = DECOMP_SP_COUNT - 1;
+       int mid;
+       u_int32_t sought_sp = base_sp;
+       u_int32_t that_sp;
+       u_int64_t result_sp;
+
+       /* binary search */
+       while (max >= min) {
+               mid = (min + max) / 2;
+               that_sp = decompositions_sp[mid].replacement_sp;
+               if (that_sp < sought_sp) {
+                       min = mid + 1;
+               } else if (that_sp > sought_sp) {
+                       max = mid - 1;
+               } else {
+                       result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp);
+                       return result_sp;
+               }
+       }
+       /* no match */
+       return 0;
+}
+
+/*******************************************************************
+pre|decomposition
+
+   we can't use static, this stuff needs to be reentrant
+   static char comp[MAXPATHLEN +1];
+
+   We don't implement Singleton and Canonical Ordering.
+   We ignore CompositionExclusions.txt.
+   because they cause the problem of the roundtrip
+   such as Dancing Icon.
+
+   exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
+   in precompose.h from composition according to AFP 3.x spec
+********************************************************************/
 
 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
 {
        size_t i;
        ucs2_t base, comb;
+       u_int32_t base_sp, comb_sp;
        ucs2_t *in, *out;
+       ucs2_t lindex, vindex;
        ucs2_t result;
+       u_int32_t result_sp;
        size_t o_len = *outlen;
+       
+       if (!inplen || (inplen & 1) || inplen > o_len)
+               return (size_t)-1;
+       
+       i = 0;
+       in  = name;
+       out = comp;
+       
+       base = *in;
+       while (*outlen > 2) {
+               i += 2;
+               in++;
 
-       if (!inplen || (inplen & 1) || inplen > o_len)
-               return (size_t)-1;
-       i = 0;
-       in  = name;
-       out = (ucs2_t *)comp;
-    
-       base = *in;
-       while (*outlen > 2) {
-               i += 2;
-               in++;
-               if (i == inplen) {
-                       *out = base;
+               if (i == inplen) {
+                       *out = base;
                        out++;
                        *out = 0;
-                       *outlen -= 2;
-                       return o_len - *outlen;
-               }
-               comb = *in;
-               if (comb >= 0x300 && (result = do_precomposition(base, comb))) {
-                       *out = result;
-                       out++;
-                       *outlen -= 2;
-                       i += 2;
-                       in++;
-                       if (i == inplen) {
-                               *out = 0;
-                               return o_len - *outlen;
+                       *outlen -= 2;
+                       return o_len - *outlen;
+               }
+
+               comb = *in;
+               result = 0;
+
+               /* Non-Combination Character */
+               if (comb < 0x300) ;
+               
+               /* Unicode Standard Annex #15 A10.3 Hangul Composition */
+               /* Step 1 <L,V> */
+               else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) {
+                       if ((LBASE <= base) && (base < LBASE + LCOUNT)) {
+                               result = 1;
+                               lindex = base - LBASE;
+                               vindex = comb - VBASE;
+                               base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
                        }
-                       base = *in;
-               }
-               else {
-                       *out = base;
-                       out++;
-                       *outlen -= 2;
-                       base = comb;
-               }
-       }
-       
+               }
+               
+               /* Step 2 <LV,T> */
+               else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) {
+                       if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) {
+                               result = 1;
+                               base += comb - TBASE;
+                       }
+               }
+               
+               /* Binary Search for Surrogate Pair */
+               else if ((0xD800 <= base) && (base < 0xDC00)) {
+                       if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
+                               base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
+                               do {
+                                       comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
+                                       if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
+                                               base_sp = result_sp;
+                                               i += 4;
+                                               in +=2;
+                                       }
+                               } while ((i + 4 <= inplen) && result_sp) ;
+
+                               *out = base_sp >> 16;
+                               out++;
+                               *outlen -= 2;
+
+                               if (*outlen <= 2) {
+                                       errno = E2BIG;
+                                       return (size_t)-1;
+                               }
+
+                               *out = base_sp & 0xFFFF;
+                               out++;
+                               *outlen -= 2;
+
+                               i += 2;
+                               in++;
+                               base = *in;
+
+                               result = 1;
+                       }
+               }
+
+               /* Binary Search for BMP */
+               else if (result = do_precomposition(base, comb)) {
+                       base = result;
+               }
+               
+               if (!result) {
+                       *out = base;
+                       out++;
+                       *outlen -= 2;
+                       base = comb;
+               }
+       }
+
        errno = E2BIG;
        return (size_t)-1;
 }
 
 /* --------------- */
-
 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
 {
        size_t i;
-       ucs2_t base;
+       size_t comblen;
+       ucs2_t base, comb[COMBBUFLEN];
+       u_int32_t base_sp;
+       ucs2_t sindex, tjamo;
        ucs2_t *in, *out;
        unsigned int result;
+       u_int64_t result_sp;
        size_t o_len = *outlen;
 
-       if (!inplen || (inplen & 1))
-               return (size_t)-1;
+       if (!inplen || (inplen & 1))
+               return (size_t)-1;
        i = 0;
        in  = name;
-       out = (ucs2_t *)comp;
-    
-       while (i < inplen) {
-               if (*outlen < 2) {
+       out = comp;
+
+       while (i < inplen) {
+               base = *in;
+               comblen = 0;
+               
+               /* check ASCII first. this is frequent. */
+               if (base <= 0x007f) ;
+               
+               /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
+               else if ((SBASE <= base) && (base < SBASE + SCOUNT)) {
+                       sindex = base - SBASE;
+                       base = LBASE + sindex / NCOUNT;
+                       comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT;
+                       
+                       /* <L,V> */
+                       if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) {
+                               comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
+                               comblen = 1;
+                       }
+                       
+                       /* <L,V,T> */
+                       else {
+                               comb[COMBBUFLEN-1] = tjamo;
+                               comblen = 2;
+                       }
+               }
+               
+               /* Binary Search for Surrogate Pair */
+               else if ((0xD800 <= base) && (base < 0xDC00)) {
+                       if (i + 2 < inplen) {
+                               base_sp =  ((u_int32_t)base << 16) | (u_int32_t)in[1];
+                               do {
+                                       if ( !(result_sp = do_decomposition_sp(base_sp))) break;
+                                       comblen += 2;
+                                       base_sp = result_sp >> 32;
+                                       comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF;  /* hi */
+                                       comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF;        /* lo */
+                               } while (comblen < MAXCOMBSPLEN);
+
+                               if (*outlen < (comblen + 1) << 1) {
+                                       errno = E2BIG;
+                                       return (size_t)-1;
+                               }
+
+                               *out = base_sp >> 16;   /* hi */
+                               out++;
+                               *outlen -= 2;
+                               
+                               base = base_sp & 0xFFFF; /* lo */
+                               
+                               i += 2;
+                               in++;
+                       }
+               }
+                       
+               /* Binary Search for BMP */
+               else {
+                       do {
+                               if ( !(result = do_decomposition(base))) break;
+                               comblen++;
+                               base = result  >> 16;
+                               comb[COMBBUFLEN-comblen] = result & 0xFFFF;
+                       } while ((0x007f < base) && (comblen < MAXCOMBLEN));
+               }
+               
+               if (*outlen < (comblen + 1) << 1) {
                        errno = E2BIG;
-                       return (size_t)-1;
-               }
-               base = *in;
-               if ( (base > 0x1fff && base < 0x3000) || (base > 0xfe2f && base < 0xfe50)) {
-                       /* exclude these ranges from decomposition according to AFP 3.1 spec */
-                       /* page 97 */
-                       *out = base;
+                       return (size_t)-1;
+               }
+               
+               *out = base;
+               out++;
+               *outlen -= 2;
+               
+               while ( comblen > 0 ) {
+                       *out = comb[COMBBUFLEN-comblen];
                        out++;
                        *outlen -= 2;
+                       comblen--;
                }
-               else if ((result = do_decomposition(base))) {
-                       if ( *outlen < 4 ) {
-                               errno = E2BIG;
-                               return (size_t)-1;
-                       }
-                       *out = result  >> 16;
-                       out++;
-                       *outlen -= 2;
-                       *out = result & 0xffff;
-                       out++;
-                       *outlen -= 2;
-               }
-               else {
-                       *out = base;
-                       out++;
-                       *outlen -= 2;
-               }
-               i += 2;
-               in++;
-       }
-
+               
+               i += 2;
+               in++;
+       }
+       
        *out = 0;
        return o_len-*outlen;
 }
 
+/*******************************************************************
+length of UTF-8 character and string
+********************************************************************/
+
 size_t utf8_charlen ( char* utf8 )
 {
-        unsigned char *p;
+       unsigned char *p;
 
-        p = (unsigned char*) utf8;
+       p = (unsigned char*) utf8;
        
        if ( *p < 0x80 )
-               return (1);
+               return (1);
        else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
                return (2);
        else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
@@ -506,43 +688,42 @@ size_t utf8_charlen ( char* utf8 )
 
 size_t utf8_strlen_validate ( char * utf8 )
 {
-        size_t len;
-        unsigned char *p;
+       size_t len;
+       unsigned char *p;
 
-        p = (unsigned char*) utf8;
-        len = 0;
+       p = (unsigned char*) utf8;
+       len = 0;
 
-        /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
+       /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
 
-        while ( *p != '\0')
-        {
-                if ( *p < 0x80 )
-                        p++;
+       while ( *p != '\0')
+       {
+               if ( *p < 0x80 )
+                       p++;
 
-                else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
-                        p += 2;
+               else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
+                       p += 2;
 
-                else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
-                        p += 3;
+               else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
+                       p += 3;
 
-                else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
-                        p += 3;
+               else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
+                       p += 3;
 
-                else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
-                        p += 4;
+               else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
+                       p += 4;
 
-                else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
-                        p += 4;
+               else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
+                       p += 4;
 
-                else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
-                        p += 4;
+               else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
+                       p += 4;
 
-                else
-                        return ((size_t) -1);
+               else
+                       return ((size_t) -1);
 
-                len++;
-        }
+               len++;
+       }
 
-        return (len);
+       return (len);
 }
-