]> arthur.barton.de Git - netatalk.git/blob - libatalk/unicode/util_unistr.c
move macros from util_unistr.c to precompose.h
[netatalk.git] / libatalk / unicode / util_unistr.c
1 #ifdef HAVE_CONFIG_H
2 #include "config.h"
3 #endif /* HAVE_CONFIG_H */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/param.h>
9 #include <sys/stat.h>
10 #include <atalk/logger.h>
11 #include <errno.h>
12
13 #include <netatalk/endian.h>
14
15 #include <atalk/unicode.h>
16 #include "ucs2_casetable.h"
17 #include "precompose.h"
18 #include "byteorder.h"
19
20 /*******************************************************************
21  Convert a wide character to upper/lower case.
22 ********************************************************************/
23 ucs2_t toupper_w(ucs2_t val)
24 {
25         if ( val >= 0x0040 && val <= 0x007F)
26                 return upcase_table_1[val-0x0040];
27         if ( val >= 0x00C0 && val <= 0x02BF)
28                 return upcase_table_2[val-0x00C0];
29         if ( val >= 0x0380 && val <= 0x04FF)
30                 return upcase_table_3[val-0x0380];
31         if ( val >= 0x0540 && val <= 0x05BF)
32                 return upcase_table_4[val-0x0540];
33         if ( val >= 0x1E00 && val <= 0x1FFF)
34                 return upcase_table_5[val-0x1E00];
35         if ( val >= 0x2140 && val <= 0x217F)
36                 return upcase_table_6[val-0x2140];
37         if ( val >= 0x24C0 && val <= 0x24FF)
38                 return upcase_table_7[val-0x24C0];
39         if ( val >= 0xFF40 && val <= 0xFF7F)
40                 return upcase_table_8[val-0xFF40];
41
42         return (val);
43 }
44
45
46 ucs2_t tolower_w(ucs2_t val)
47 {
48         if ( val >= 0x0040 && val <= 0x007F)
49                 return lowcase_table_1[val-0x0040];
50         if ( val >= 0x00C0 && val <= 0x023F)
51                 return lowcase_table_2[val-0x00C0];
52         if ( val >= 0x0380 && val <= 0x057F)
53                 return lowcase_table_3[val-0x0380];
54         if ( val >= 0x1E00 && val <= 0x1FFF)
55                 return lowcase_table_4[val-0x1E00];
56         if ( val >= 0x2140 && val <= 0x217F)
57                 return lowcase_table_5[val-0x2140];
58         if ( val >= 0x2480 && val <= 0x24FF)
59                 return lowcase_table_6[val-0x2480];
60         if ( val >= 0xFF00 && val <= 0xFF3F)
61                 return lowcase_table_7[val-0xFF00];
62
63         return (val);
64 }
65
66 /*******************************************************************
67  Convert a string to lower case.
68  return True if any char is converted
69 ********************************************************************/
70 int strlower_w(ucs2_t *s)
71 {
72         int ret = 0;
73         while (*s) {
74                 ucs2_t v = tolower_w(*s);
75                 if (v != *s) {
76                         *s = v;
77                         ret = 1;
78                 }
79                 s++;
80         }
81         return ret;
82 }
83
84 /*******************************************************************
85  Convert a string to upper case.
86  return True if any char is converted
87 ********************************************************************/
88 int strupper_w(ucs2_t *s)
89 {
90         int ret = 0;
91         while (*s) {
92                 ucs2_t v = toupper_w(*s);
93                 if (v != *s) {
94                         *s = v;
95                         ret = 1;
96                 }
97                 s++;
98         }
99         return ret;
100 }
101
102
103 /*******************************************************************
104 determine if a character is lowercase
105 ********************************************************************/
106 int islower_w(ucs2_t c)
107 {
108         return ( c == tolower_w(c));
109 }
110
111 /*******************************************************************
112 determine if a character is uppercase
113 ********************************************************************/
114 int isupper_w(ucs2_t c)
115 {
116         return ( c == toupper_w(c));
117 }
118
119
120 /*******************************************************************
121  Count the number of characters in a ucs2_t string.
122 ********************************************************************/
123 size_t strlen_w(const ucs2_t *src)
124 {
125         size_t len;
126
127         for(len = 0; *src++; len++) ;
128
129         return len;
130 }
131
132 /*******************************************************************
133  Count up to max number of characters in a ucs2_t string.
134 ********************************************************************/
135 size_t strnlen_w(const ucs2_t *src, size_t max)
136 {
137         size_t len;
138
139         for(len = 0; *src++ && (len < max); len++) ;
140
141         return len;
142 }
143
144 /*******************************************************************
145 wide strchr()
146 ********************************************************************/
147 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
148 {
149         while (*s != 0) {
150                 if (c == *s) return (ucs2_t *)s;
151                 s++;
152         }
153         if (c == *s) return (ucs2_t *)s;
154
155         return NULL;
156 }
157
158 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
159 {
160         while (*s != 0) {
161 /*              LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
162                 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
163                 s++;
164         }
165         if (c == *s) return (ucs2_t *)s;
166
167         return NULL;
168 }
169
170
171 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
172 {
173         while (*b && *a == *b) { a++; b++; }
174         return (*a - *b);
175         /* warning: if *a != *b and both are not 0 we retrun a random
176            greater or lesser than 0 number not realted to which
177            string is longer */
178 }
179
180 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
181 {
182         size_t n = 0;
183         while ((n < len) && *b && *a == *b) { a++; b++; n++;}
184         return (len - n)?(*a - *b):0;
185 }
186
187 /*******************************************************************
188 wide strstr()
189 ********************************************************************/
190 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
191 {
192         ucs2_t *r;
193         size_t slen, inslen;
194
195         if (!s || !*s || !ins || !*ins) return NULL;
196         slen = strlen_w(s);
197         inslen = strlen_w(ins);
198         r = (ucs2_t *)s;
199         while ((r = strchr_w(r, *ins))) {
200                 if (strncmp_w(r, ins, inslen) == 0) return r;
201                 r++;
202         }
203         return NULL;
204 }
205
206 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
207 {
208         ucs2_t *r;
209         size_t slen, inslen;
210
211         if (!s || !*s || !ins || !*ins) return NULL;
212         slen = strlen_w(s);
213         inslen = strlen_w(ins);
214         r = (ucs2_t *)s;
215         while ((r = strcasechr_w(r, *ins))) {
216                 if (strncasecmp_w(r, ins, inslen) == 0) return r;
217                 r++;
218         }
219         return NULL;
220 }
221
222
223
224
225 /*******************************************************************
226 case insensitive string comparison
227 ********************************************************************/
228 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
229 {
230         while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
231         return (tolower_w(*a) - tolower_w(*b));
232 }
233
234 /*******************************************************************
235 case insensitive string comparison, lenght limited
236 ********************************************************************/
237 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
238 {
239         size_t n = 0;
240         while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
241         return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
242 }
243
244 /*******************************************************************
245 duplicate string
246 ********************************************************************/
247 /* if len == 0 then duplicate the whole string */
248 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
249 {
250         ucs2_t *dest;
251
252         if (!len) len = strlen_w(src);
253         dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
254         if (!dest) {
255                 LOG (log_error, logtype_default, "strdup_w: out of memory!");
256                 return NULL;
257         }
258
259         memcpy(dest, src, len * sizeof(ucs2_t));
260         dest[len] = 0;
261
262         return dest;
263 }
264
265 ucs2_t *strdup_w(const ucs2_t *src)
266 {
267         return strndup_w(src, 0);
268 }
269
270 /*******************************************************************
271 copy a string with max len
272 ********************************************************************/
273
274 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
275 {
276         size_t len;
277
278         if (!dest || !src) return NULL;
279
280         for (len = 0; (src[len] != 0) && (len < max); len++)
281                 dest[len] = src[len];
282         while (len < max)
283                 dest[len++] = 0;
284
285         return dest;
286 }
287
288
289 /*******************************************************************
290 append a string of len bytes and add a terminator
291 ********************************************************************/
292
293 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
294 {
295         size_t start;
296         size_t len;
297
298         if (!dest || !src) return NULL;
299
300         start = strlen_w(dest);
301         len = strnlen_w(src, max);
302
303         memcpy(&dest[start], src, len*sizeof(ucs2_t));
304         dest[start+len] = 0;
305
306         return dest;
307 }
308
309
310 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
311 {
312         size_t start;
313         size_t len;
314
315         if (!dest || !src) return NULL;
316
317         start = strlen_w(dest);
318         len = strlen_w(src);
319
320         memcpy(&dest[start], src, len*sizeof(ucs2_t));
321         dest[start+len] = 0;
322
323         return dest;
324 }
325
326
327 /*******************************************************************
328 binary search for pre|decomposition
329 ********************************************************************/
330
331 static ucs2_t do_precomposition(unsigned int base, unsigned int comb) 
332 {
333         int min = 0;
334         int max = PRECOMP_COUNT - 1;
335         int mid;
336         u_int32_t sought = (base << 16) | comb, that;
337
338         /* binary search */
339         while (max >= min) {
340                 mid = (min + max) / 2;
341                 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
342                 if (that < sought) {
343                         min = mid + 1;
344                 } else if (that > sought) {
345                         max = mid - 1;
346                 } else {
347                         return precompositions[mid].replacement;
348                 }
349         }
350         /* no match */
351         return 0;
352 }
353
354 /* ------------------------ */
355 static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp) 
356 {
357         int min = 0;
358         int max = PRECOMP_SP_COUNT - 1;
359         int mid;
360         u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp;
361
362         /* binary search */
363         while (max >= min) {
364                 mid = (min + max) / 2;
365                 that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp);
366                 if (that_sp < sought_sp) {
367                         min = mid + 1;
368                 } else if (that_sp > sought_sp) {
369                         max = mid - 1;
370                 } else {
371                         return precompositions_sp[mid].replacement_sp;
372                 }
373         }
374         /* no match */
375         return 0;
376 }
377
378 /* -------------------------- */
379 static u_int32_t do_decomposition(ucs2_t base) 
380 {
381         int min = 0;
382         int max = DECOMP_COUNT - 1;
383         int mid;
384         u_int32_t sought = base;
385         u_int32_t result, that;
386
387         /* binary search */
388         while (max >= min) {
389                 mid = (min + max) / 2;
390                 that = decompositions[mid].replacement;
391                 if (that < sought) {
392                         min = mid + 1;
393                 } else if (that > sought) {
394                         max = mid - 1;
395                 } else {
396                         result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
397                         return result;
398                 }
399         }
400         /* no match */
401         return 0;
402 }
403
404 /* -------------------------- */
405 static u_int64_t do_decomposition_sp(unsigned int base_sp) 
406 {
407         int min = 0;
408         int max = DECOMP_SP_COUNT - 1;
409         int mid;
410         u_int32_t sought_sp = base_sp;
411         u_int32_t that_sp;
412         u_int64_t result_sp;
413
414         /* binary search */
415         while (max >= min) {
416                 mid = (min + max) / 2;
417                 that_sp = decompositions_sp[mid].replacement_sp;
418                 if (that_sp < sought_sp) {
419                         min = mid + 1;
420                 } else if (that_sp > sought_sp) {
421                         max = mid - 1;
422                 } else {
423                         result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp);
424                         return result_sp;
425                 }
426         }
427         /* no match */
428         return 0;
429 }
430
431 /*******************************************************************
432 pre|decomposition
433
434    we can't use static, this stuff needs to be reentrant
435    static char comp[MAXPATHLEN +1];
436
437    We don't implement Singleton and Canonical Ordering.
438    We ignore CompositionExclusions.txt.
439    because they cause the problem of the roundtrip
440    such as Dancing Icon.
441
442    exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
443    in precompose.h from composition according to AFP 3.x spec
444 ********************************************************************/
445
446 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
447 {
448         size_t i;
449         ucs2_t base, comb;
450         u_int32_t base_sp, comb_sp;
451         ucs2_t *in, *out;
452         ucs2_t hangul_lindex, hangul_vindex;
453         ucs2_t result;
454         u_int32_t result_sp;
455         size_t o_len = *outlen;
456         
457         if (!inplen || (inplen & 1) || inplen > o_len)
458                 return (size_t)-1;
459         
460         i = 0;
461         in  = name;
462         out = comp;
463         
464         base = *in;
465         while (*outlen > 2) {
466                 i += 2;
467                 in++;
468
469                 if (i == inplen) {
470                         *out = base;
471                         out++;
472                         *out = 0;
473                         *outlen -= 2;
474                         return o_len - *outlen;
475                 }
476
477                 comb = *in;
478                 result = 0;
479
480                 /* Non-Combination Character */
481                 if (comb < 0x300) ;
482                 
483                 /* Unicode Standard Annex #15 A10.3 Hangul Composition */
484                 /* Step 1 <L,V> */
485                 else if ((HANGUL_VBASE <= comb) && (comb <= HANGUL_VBASE + HANGUL_VCOUNT)) {
486                         if ((HANGUL_LBASE <= base) && (base < HANGUL_LBASE + HANGUL_LCOUNT)) {
487                                 result = 1;
488                                 hangul_lindex = base - HANGUL_LBASE;
489                                 hangul_vindex = comb - HANGUL_VBASE;
490                                 base = HANGUL_SBASE + (hangul_lindex * HANGUL_VCOUNT + hangul_vindex) * HANGUL_TCOUNT;
491                         }
492                 }
493                 
494                 /* Step 2 <LV,T> */
495                 else if ((HANGUL_TBASE < comb) && (comb < HANGUL_TBASE + HANGUL_TCOUNT)) {
496                         if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE +HANGUL_SCOUNT) && (((base - HANGUL_SBASE) % HANGUL_TCOUNT) == 0)) {
497                                 result = 1;
498                                 base += comb - HANGUL_TBASE;
499                         }
500                 }
501                 
502                 /* Binary Search for Surrogate Pair */
503                 else if ((0xD800 <= base) && (base < 0xDC00)) {
504                         if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
505                                 base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
506                                 do {
507                                         comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
508                                         if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
509                                                 base_sp = result_sp;
510                                                 i += 4;
511                                                 in +=2;
512                                         }
513                                 } while ((i + 4 <= inplen) && result_sp) ;
514
515                                 *out = base_sp >> 16;
516                                 out++;
517                                 *outlen -= 2;
518
519                                 if (*outlen <= 2) {
520                                         errno = E2BIG;
521                                         return (size_t)-1;
522                                 }
523
524                                 *out = base_sp & 0xFFFF;
525                                 out++;
526                                 *outlen -= 2;
527
528                                 i += 2;
529                                 in++;
530                                 base = *in;
531
532                                 result = 1;
533                         }
534                 }
535
536                 /* Binary Search for BMP */
537                 else if (result = do_precomposition(base, comb)) {
538                         base = result;
539                 }
540                 
541                 if (!result) {
542                         *out = base;
543                         out++;
544                         *outlen -= 2;
545                         base = comb;
546                 }
547         }
548
549         errno = E2BIG;
550         return (size_t)-1;
551 }
552
553 /* --------------- */
554 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
555 {
556         size_t i;
557         size_t comblen;
558         ucs2_t base, comb[COMBBUFLEN];
559         u_int32_t base_sp;
560         ucs2_t hangul_sindex, tjamo;
561         ucs2_t *in, *out;
562         unsigned int result;
563         u_int64_t result_sp;
564         size_t o_len = *outlen;
565
566         if (!inplen || (inplen & 1))
567                 return (size_t)-1;
568         i = 0;
569         in  = name;
570         out = comp;
571
572         while (i < inplen) {
573                 base = *in;
574                 comblen = 0;
575                 
576                 /* check ASCII first. this is frequent. */
577                 if (base <= 0x007f) ;
578                 
579                 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
580                 else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) {
581                         hangul_sindex = base - HANGUL_SBASE;
582                         base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT;
583                         comb[COMBBUFLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
584                         
585                         /* <L,V> */
586                         if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) {
587                                 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
588                                 comblen = 1;
589                         }
590                         
591                         /* <L,V,T> */
592                         else {
593                                 comb[COMBBUFLEN-1] = tjamo;
594                                 comblen = 2;
595                         }
596                 }
597                 
598                 /* Binary Search for Surrogate Pair */
599                 else if ((0xD800 <= base) && (base < 0xDC00)) {
600                         if (i + 2 < inplen) {
601                                 base_sp =  ((u_int32_t)base << 16) | (u_int32_t)in[1];
602                                 do {
603                                         if ( !(result_sp = do_decomposition_sp(base_sp))) break;
604                                         comblen += 2;
605                                         base_sp = result_sp >> 32;
606                                         comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF;  /* hi */
607                                         comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF;        /* lo */
608                                 } while (comblen < MAXCOMBSPLEN);
609
610                                 if (*outlen < (comblen + 1) << 1) {
611                                         errno = E2BIG;
612                                         return (size_t)-1;
613                                 }
614
615                                 *out = base_sp >> 16;   /* hi */
616                                 out++;
617                                 *outlen -= 2;
618                                 
619                                 base = base_sp & 0xFFFF; /* lo */
620                                 
621                                 i += 2;
622                                 in++;
623                         }
624                 }
625                         
626                 /* Binary Search for BMP */
627                 else {
628                         do {
629                                 if ( !(result = do_decomposition(base))) break;
630                                 comblen++;
631                                 base = result  >> 16;
632                                 comb[COMBBUFLEN-comblen] = result & 0xFFFF;
633                         } while ((0x007f < base) && (comblen < MAXCOMBLEN));
634                 }
635                 
636                 if (*outlen < (comblen + 1) << 1) {
637                         errno = E2BIG;
638                         return (size_t)-1;
639                 }
640                 
641                 *out = base;
642                 out++;
643                 *outlen -= 2;
644                 
645                 while ( comblen > 0 ) {
646                         *out = comb[COMBBUFLEN-comblen];
647                         out++;
648                         *outlen -= 2;
649                         comblen--;
650                 }
651                 
652                 i += 2;
653                 in++;
654         }
655         
656         *out = 0;
657         return o_len-*outlen;
658 }
659
660 /*******************************************************************
661 length of UTF-8 character and string
662 ********************************************************************/
663
664 size_t utf8_charlen ( char* utf8 )
665 {
666         unsigned char *p;
667
668         p = (unsigned char*) utf8;
669         
670         if ( *p < 0x80 )
671                 return (1);
672         else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
673                 return (2);
674         else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
675                 return (3);
676         else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
677                 return (3);
678         else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
679                 return (4);
680         else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
681                 return (4);
682         else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
683                 return (4);
684         else
685                 return ((size_t) -1);
686 }
687
688
689 size_t utf8_strlen_validate ( char * utf8 )
690 {
691         size_t len;
692         unsigned char *p;
693
694         p = (unsigned char*) utf8;
695         len = 0;
696
697         /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
698
699         while ( *p != '\0')
700         {
701                 if ( *p < 0x80 )
702                         p++;
703
704                 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
705                         p += 2;
706
707                 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
708                         p += 3;
709
710                 else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
711                         p += 3;
712
713                 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
714                         p += 4;
715
716                 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
717                         p += 4;
718
719                 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
720                         p += 4;
721
722                 else
723                         return ((size_t) -1);
724
725                 len++;
726         }
727
728         return (len);
729 }