]> arthur.barton.de Git - netatalk.git/blob - libatalk/unicode/util_unistr.c
composition of surrogate pair
[netatalk.git] / libatalk / unicode / util_unistr.c
1 #ifdef HAVE_CONFIG_H
2 #include "config.h"
3 #endif /* HAVE_CONFIG_H */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/param.h>
9 #include <sys/stat.h>
10 #include <atalk/logger.h>
11 #include <errno.h>
12
13 #include <netatalk/endian.h>
14
15 #include <atalk/unicode.h>
16 #include "ucs2_casetable.h"
17 #include "precompose.h"
18 #include "byteorder.h"
19
20 #define HANGUL_SBASE 0xAC00
21 #define HANGUL_LBASE 0x1100
22 #define HANGUL_VBASE 0x1161
23 #define HANGUL_TBASE 0x11A7
24 #define HANGUL_LCOUNT 19
25 #define HANGUL_VCOUNT 21
26 #define HANGUL_TCOUNT 28
27 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)   /* 588 */
28 #define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT)   /* 11172 */
29
30 #define MAXCOMBLEN 3
31 #define MAXCOMBSPLEN 2
32 #define COMBBUFLEN 4     /* max(MAXCOMBLEN, MAXCOMBSPLEN*2) */
33
34 /*******************************************************************
35  Convert a wide character to upper/lower case.
36 ********************************************************************/
37 ucs2_t toupper_w(ucs2_t val)
38 {
39         if ( val >= 0x0040 && val <= 0x007F)
40                 return upcase_table_1[val-0x0040];
41         if ( val >= 0x00C0 && val <= 0x02BF)
42                 return upcase_table_2[val-0x00C0];
43         if ( val >= 0x0380 && val <= 0x04FF)
44                 return upcase_table_3[val-0x0380];
45         if ( val >= 0x0540 && val <= 0x05BF)
46                 return upcase_table_4[val-0x0540];
47         if ( val >= 0x1E00 && val <= 0x1FFF)
48                 return upcase_table_5[val-0x1E00];
49         if ( val >= 0x2140 && val <= 0x217F)
50                 return upcase_table_6[val-0x2140];
51         if ( val >= 0x24C0 && val <= 0x24FF)
52                 return upcase_table_7[val-0x24C0];
53         if ( val >= 0xFF40 && val <= 0xFF7F)
54                 return upcase_table_8[val-0xFF40];
55
56         return (val);
57 }
58
59
60 ucs2_t tolower_w(ucs2_t val)
61 {
62         if ( val >= 0x0040 && val <= 0x007F)
63                 return lowcase_table_1[val-0x0040];
64         if ( val >= 0x00C0 && val <= 0x023F)
65                 return lowcase_table_2[val-0x00C0];
66         if ( val >= 0x0380 && val <= 0x057F)
67                 return lowcase_table_3[val-0x0380];
68         if ( val >= 0x1E00 && val <= 0x1FFF)
69                 return lowcase_table_4[val-0x1E00];
70         if ( val >= 0x2140 && val <= 0x217F)
71                 return lowcase_table_5[val-0x2140];
72         if ( val >= 0x2480 && val <= 0x24FF)
73                 return lowcase_table_6[val-0x2480];
74         if ( val >= 0xFF00 && val <= 0xFF3F)
75                 return lowcase_table_7[val-0xFF00];
76
77         return (val);
78 }
79
80 /*******************************************************************
81  Convert a string to lower case.
82  return True if any char is converted
83 ********************************************************************/
84 int strlower_w(ucs2_t *s)
85 {
86         int ret = 0;
87         while (*s) {
88                 ucs2_t v = tolower_w(*s);
89                 if (v != *s) {
90                         *s = v;
91                         ret = 1;
92                 }
93                 s++;
94         }
95         return ret;
96 }
97
98 /*******************************************************************
99  Convert a string to upper case.
100  return True if any char is converted
101 ********************************************************************/
102 int strupper_w(ucs2_t *s)
103 {
104         int ret = 0;
105         while (*s) {
106                 ucs2_t v = toupper_w(*s);
107                 if (v != *s) {
108                         *s = v;
109                         ret = 1;
110                 }
111                 s++;
112         }
113         return ret;
114 }
115
116
117 /*******************************************************************
118 determine if a character is lowercase
119 ********************************************************************/
120 int islower_w(ucs2_t c)
121 {
122         return ( c == tolower_w(c));
123 }
124
125 /*******************************************************************
126 determine if a character is uppercase
127 ********************************************************************/
128 int isupper_w(ucs2_t c)
129 {
130         return ( c == toupper_w(c));
131 }
132
133
134 /*******************************************************************
135  Count the number of characters in a ucs2_t string.
136 ********************************************************************/
137 size_t strlen_w(const ucs2_t *src)
138 {
139         size_t len;
140
141         for(len = 0; *src++; len++) ;
142
143         return len;
144 }
145
146 /*******************************************************************
147  Count up to max number of characters in a ucs2_t string.
148 ********************************************************************/
149 size_t strnlen_w(const ucs2_t *src, size_t max)
150 {
151         size_t len;
152
153         for(len = 0; *src++ && (len < max); len++) ;
154
155         return len;
156 }
157
158 /*******************************************************************
159 wide strchr()
160 ********************************************************************/
161 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
162 {
163         while (*s != 0) {
164                 if (c == *s) return (ucs2_t *)s;
165                 s++;
166         }
167         if (c == *s) return (ucs2_t *)s;
168
169         return NULL;
170 }
171
172 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
173 {
174         while (*s != 0) {
175 /*              LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
176                 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
177                 s++;
178         }
179         if (c == *s) return (ucs2_t *)s;
180
181         return NULL;
182 }
183
184
185 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
186 {
187         while (*b && *a == *b) { a++; b++; }
188         return (*a - *b);
189         /* warning: if *a != *b and both are not 0 we retrun a random
190            greater or lesser than 0 number not realted to which
191            string is longer */
192 }
193
194 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
195 {
196         size_t n = 0;
197         while ((n < len) && *b && *a == *b) { a++; b++; n++;}
198         return (len - n)?(*a - *b):0;
199 }
200
201 /*******************************************************************
202 wide strstr()
203 ********************************************************************/
204 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
205 {
206         ucs2_t *r;
207         size_t slen, inslen;
208
209         if (!s || !*s || !ins || !*ins) return NULL;
210         slen = strlen_w(s);
211         inslen = strlen_w(ins);
212         r = (ucs2_t *)s;
213         while ((r = strchr_w(r, *ins))) {
214                 if (strncmp_w(r, ins, inslen) == 0) return r;
215                 r++;
216         }
217         return NULL;
218 }
219
220 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
221 {
222         ucs2_t *r;
223         size_t slen, inslen;
224
225         if (!s || !*s || !ins || !*ins) return NULL;
226         slen = strlen_w(s);
227         inslen = strlen_w(ins);
228         r = (ucs2_t *)s;
229         while ((r = strcasechr_w(r, *ins))) {
230                 if (strncasecmp_w(r, ins, inslen) == 0) return r;
231                 r++;
232         }
233         return NULL;
234 }
235
236
237
238
239 /*******************************************************************
240 case insensitive string comparison
241 ********************************************************************/
242 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
243 {
244         while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
245         return (tolower_w(*a) - tolower_w(*b));
246 }
247
248 /*******************************************************************
249 case insensitive string comparison, lenght limited
250 ********************************************************************/
251 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
252 {
253         size_t n = 0;
254         while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
255         return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
256 }
257
258 /*******************************************************************
259 duplicate string
260 ********************************************************************/
261 /* if len == 0 then duplicate the whole string */
262 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
263 {
264         ucs2_t *dest;
265
266         if (!len) len = strlen_w(src);
267         dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
268         if (!dest) {
269                 LOG (log_error, logtype_default, "strdup_w: out of memory!");
270                 return NULL;
271         }
272
273         memcpy(dest, src, len * sizeof(ucs2_t));
274         dest[len] = 0;
275
276         return dest;
277 }
278
279 ucs2_t *strdup_w(const ucs2_t *src)
280 {
281         return strndup_w(src, 0);
282 }
283
284 /*******************************************************************
285 copy a string with max len
286 ********************************************************************/
287
288 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
289 {
290         size_t len;
291
292         if (!dest || !src) return NULL;
293
294         for (len = 0; (src[len] != 0) && (len < max); len++)
295                 dest[len] = src[len];
296         while (len < max)
297                 dest[len++] = 0;
298
299         return dest;
300 }
301
302
303 /*******************************************************************
304 append a string of len bytes and add a terminator
305 ********************************************************************/
306
307 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
308 {
309         size_t start;
310         size_t len;
311
312         if (!dest || !src) return NULL;
313
314         start = strlen_w(dest);
315         len = strnlen_w(src, max);
316
317         memcpy(&dest[start], src, len*sizeof(ucs2_t));
318         dest[start+len] = 0;
319
320         return dest;
321 }
322
323
324 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
325 {
326         size_t start;
327         size_t len;
328
329         if (!dest || !src) return NULL;
330
331         start = strlen_w(dest);
332         len = strlen_w(src);
333
334         memcpy(&dest[start], src, len*sizeof(ucs2_t));
335         dest[start+len] = 0;
336
337         return dest;
338 }
339
340
341 /*******************************************************************
342 binary search for pre|decomposition
343 ********************************************************************/
344
345 static ucs2_t do_precomposition(unsigned int base, unsigned int comb) 
346 {
347         int min = 0;
348         int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1;
349         int mid;
350         u_int32_t sought = (base << 16) | comb, that;
351
352         /* binary search */
353         while (max >= min) {
354                 mid = (min + max) / 2;
355                 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
356                 if (that < sought) {
357                         min = mid + 1;
358                 } else if (that > sought) {
359                         max = mid - 1;
360                 } else {
361                         return precompositions[mid].replacement;
362                 }
363         }
364         /* no match */
365         return 0;
366 }
367
368 /* ------------------------ */
369 static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp) 
370 {
371         int min = 0;
372         int max = sizeof(precompositions_sp) / sizeof(precompositions_sp[0]) - 1;
373         int mid;
374         u_int64_t sought = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that;
375
376         /* binary search */
377         while (max >= min) {
378                 mid = (min + max) / 2;
379                 that = ((u_int64_t)precompositions_sp[mid].base << 32) | ((u_int64_t)precompositions_sp[mid].comb);
380                 if (that < sought) {
381                         min = mid + 1;
382                 } else if (that > sought) {
383                         max = mid - 1;
384                 } else {
385                         return precompositions_sp[mid].replacement;
386                 }
387         }
388         /* no match */
389         return 0;
390 }
391
392 /* -------------------------- */
393 static u_int32_t do_decomposition(ucs2_t base) 
394 {
395         int min = 0;
396         int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1;
397         int mid;
398         u_int32_t sought = base;
399         u_int32_t result, that;
400
401         /* binary search */
402         while (max >= min) {
403                 mid = (min + max) / 2;
404                 that = decompositions[mid].replacement;
405                 if (that < sought) {
406                         min = mid + 1;
407                 } else if (that > sought) {
408                         max = mid - 1;
409                 } else {
410                         result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
411                         return result;
412                 }
413         }
414         /* no match */
415         return 0;
416 }
417
418 /* -------------------------- */
419 static u_int64_t do_decomposition_sp(unsigned int base) 
420 {
421         int min = 0;
422         int max = sizeof(decompositions_sp) / sizeof(decompositions_sp[0]) - 1;
423         int mid;
424         u_int32_t sought = base;
425         u_int32_t that;
426         u_int64_t result;
427
428         /* binary search */
429         while (max >= min) {
430                 mid = (min + max) / 2;
431                 that = decompositions_sp[mid].replacement;
432                 if (that < sought) {
433                         min = mid + 1;
434                 } else if (that > sought) {
435                         max = mid - 1;
436                 } else {
437                         result = ((u_int64_t)decompositions_sp[mid].base << 32) | ((u_int64_t)decompositions_sp[mid].comb);
438                         return result;
439                 }
440         }
441         /* no match */
442         return 0;
443 }
444
445 /*******************************************************************
446 pre|decomposition
447
448    we can't use static, this stuff needs to be reentrant
449    static char comp[MAXPATHLEN +1];
450
451    exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
452    in decompositions[] from decomposition according to AFP 3.x spec
453
454    We don't implement Singleton and Canonical Ordering
455    because they cause the problem of the roundtrip
456    such as Dancing Icon
457 ********************************************************************/
458
459 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
460 {
461         size_t i;
462         ucs2_t base, comb;
463         u_int32_t base_sp, comb_sp;
464         ucs2_t *in, *out;
465         ucs2_t hangul_lindex, hangul_vindex;
466         ucs2_t result;
467         u_int32_t result_sp;
468         size_t o_len = *outlen;
469         
470         if (!inplen || (inplen & 1) || inplen > o_len)
471                 return (size_t)-1;
472         
473         i = 0;
474         in  = name;
475         out = comp;
476         
477         base = *in;
478         while (*outlen > 2) {
479                 i += 2;
480                 in++;
481
482                 if (i == inplen) {
483                         *out = base;
484                         out++;
485                         *out = 0;
486                         *outlen -= 2;
487                         return o_len - *outlen;
488                 }
489
490                 comb = *in;
491                 result = 0;
492
493                 /* Non-Combination Character */
494                 if (comb < 0x300) ;
495                 
496                 /* Unicode Standard Annex #15 A10.3 Hangul Composition */
497                 /* Step 1 <L,V> */
498                 else if ((HANGUL_VBASE <= comb) && (comb <= HANGUL_VBASE + HANGUL_VCOUNT)) {
499                         if ((HANGUL_LBASE <= base) && (base < HANGUL_LBASE + HANGUL_LCOUNT)) {
500                                 result = 1;
501                                 hangul_lindex = base - HANGUL_LBASE;
502                                 hangul_vindex = comb - HANGUL_VBASE;
503                                 base = HANGUL_SBASE + (hangul_lindex * HANGUL_VCOUNT + hangul_vindex) * HANGUL_TCOUNT;
504                         }
505                 }
506                 
507                 /* Step 2 <LV,T> */
508                 else if ((HANGUL_TBASE < comb) && (comb < HANGUL_TBASE + HANGUL_TCOUNT)) {
509                         if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE +HANGUL_SCOUNT) && (((base - HANGUL_SBASE) % HANGUL_TCOUNT) == 0)) {
510                                 result = 1;
511                                 base += comb - HANGUL_TBASE;
512                         }
513                 }
514                 
515                 /* Binary Search for Surrogate Pair */
516                 else if ((0xD800 <= base) && (base < 0xDC00)) {
517                         if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
518                                 base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
519                                 do {
520                                         comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
521                                         if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
522                                                 base_sp = result_sp;
523                                                 i += 4;
524                                                 in +=2;
525                                         }
526                                 } while ((i + 4 <= inplen) && result_sp) ;
527
528                                 *out = base_sp >> 16;
529                                 out++;
530                                 *outlen -= 2;
531
532                                 if (*outlen <= 2) {
533                                         errno = E2BIG;
534                                         return (size_t)-1;
535                                 }
536
537                                 *out = base_sp & 0xFFFF;
538                                 out++;
539                                 *outlen -= 2;
540
541                                 i += 2;
542                                 in++;
543                                 base = *in;
544
545                                 result = 1;
546                         }
547                 }
548
549                 /* Binary Search for BMP */
550                 else if (result = do_precomposition(base, comb)) {
551                         base = result;
552                 }
553                 
554                 if (!result) {
555                         *out = base;
556                         out++;
557                         *outlen -= 2;
558                         base = comb;
559                 }
560         }
561
562         errno = E2BIG;
563         return (size_t)-1;
564 }
565
566 /* --------------- */
567 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
568 {
569         size_t i;
570         size_t comblen;
571         ucs2_t base, comb[COMBBUFLEN];
572         u_int32_t base_sp;
573         ucs2_t hangul_sindex, tjamo;
574         ucs2_t *in, *out;
575         unsigned int result;
576         u_int64_t result_sp;
577         size_t o_len = *outlen;
578
579         if (!inplen || (inplen & 1))
580                 return (size_t)-1;
581         i = 0;
582         in  = name;
583         out = comp;
584
585         while (i < inplen) {
586                 base = *in;
587                 comblen = 0;
588                 
589                 /* check ASCII first. this is frequent. */
590                 if (base <= 0x007f) ;
591                 
592                 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
593                 else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) {
594                         hangul_sindex = base - HANGUL_SBASE;
595                         base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT;
596                         comb[COMBBUFLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
597                         
598                         /* <L,V> */
599                         if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) {
600                                 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
601                                 comblen = 1;
602                         }
603                         
604                         /* <L,V,T> */
605                         else {
606                                 comb[COMBBUFLEN-1] = tjamo;
607                                 comblen = 2;
608                         }
609                 }
610                 
611                 /* Binary Search for Surrogate Pair */
612                 else if ((0xD800 <= base) && (base < 0xDC00)) {
613                         if (i + 2 < inplen) {
614                                 base_sp =  ((u_int32_t)base << 16) | (u_int32_t)in[1];
615                                 do {
616                                         if ( !(result_sp = do_decomposition_sp(base_sp))) break;
617                                         comblen += 2;
618                                         base_sp = result_sp >> 32;
619                                         comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF;  /* hi */
620                                         comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF;        /* lo */
621                                 } while (comblen < (MAXCOMBSPLEN<<1));
622
623                                 if (*outlen < (comblen + 1) << 1) {
624                                         errno = E2BIG;
625                                         return (size_t)-1;
626                                 }
627
628                                 *out = base_sp >> 16;   /* hi */
629                                 out++;
630                                 *outlen -= 2;
631                                 
632                                 base = base_sp & 0xFFFF; /* lo */
633                                 
634                                 i += 2;
635                                 in++;
636                         }
637                 }
638                         
639                 /* Binary Search for BMP */
640                 else {
641                         do {
642                                 if ( !(result = do_decomposition(base))) break;
643                                 comblen++;
644                                 base = result  >> 16;
645                                 comb[COMBBUFLEN-comblen] = result & 0xFFFF;
646                         } while ((0x007f < base) && (comblen < MAXCOMBLEN));
647                 }
648                 
649                 if (*outlen < (comblen + 1) << 1) {
650                         errno = E2BIG;
651                         return (size_t)-1;
652                 }
653                 
654                 *out = base;
655                 out++;
656                 *outlen -= 2;
657                 
658                 while ( comblen > 0 ) {
659                         *out = comb[COMBBUFLEN-comblen];
660                         out++;
661                         *outlen -= 2;
662                         comblen--;
663                 }
664                 
665                 i += 2;
666                 in++;
667         }
668         
669         *out = 0;
670         return o_len-*outlen;
671 }
672
673 /*******************************************************************
674 length of UTF-8 character and string
675 ********************************************************************/
676
677 size_t utf8_charlen ( char* utf8 )
678 {
679         unsigned char *p;
680
681         p = (unsigned char*) utf8;
682         
683         if ( *p < 0x80 )
684                 return (1);
685         else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
686                 return (2);
687         else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
688                 return (3);
689         else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
690                 return (3);
691         else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
692                 return (4);
693         else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
694                 return (4);
695         else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
696                 return (4);
697         else
698                 return ((size_t) -1);
699 }
700
701
702 size_t utf8_strlen_validate ( char * utf8 )
703 {
704         size_t len;
705         unsigned char *p;
706
707         p = (unsigned char*) utf8;
708         len = 0;
709
710         /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
711
712         while ( *p != '\0')
713         {
714                 if ( *p < 0x80 )
715                         p++;
716
717                 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
718                         p += 2;
719
720                 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
721                         p += 3;
722
723                 else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
724                         p += 3;
725
726                 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
727                         p += 4;
728
729                 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
730                         p += 4;
731
732                 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
733                         p += 4;
734
735                 else
736                         return ((size_t) -1);
737
738                 len++;
739         }
740
741         return (len);
742 }