3 #endif /* HAVE_CONFIG_H */
10 #include <atalk/logger.h>
13 #include <netatalk/endian.h>
15 #include <atalk/unicode.h>
16 #include "ucs2_casetable.h"
17 #include "precompose.h"
18 #include "byteorder.h"
20 /*******************************************************************
21 Convert a wide character to upper/lower case.
22 ********************************************************************/
23 ucs2_t toupper_w(ucs2_t val)
25 if ( val >= 0x0040 && val <= 0x007F)
26 return upcase_table_1[val-0x0040];
27 if ( val >= 0x00C0 && val <= 0x02BF)
28 return upcase_table_2[val-0x00C0];
29 if ( val >= 0x0380 && val <= 0x04FF)
30 return upcase_table_3[val-0x0380];
31 if ( val >= 0x0540 && val <= 0x05BF)
32 return upcase_table_4[val-0x0540];
33 if ( val >= 0x1E00 && val <= 0x1FFF)
34 return upcase_table_5[val-0x1E00];
35 if ( val >= 0x2140 && val <= 0x217F)
36 return upcase_table_6[val-0x2140];
37 if ( val >= 0x24C0 && val <= 0x24FF)
38 return upcase_table_7[val-0x24C0];
39 if ( val >= 0xFF40 && val <= 0xFF7F)
40 return upcase_table_8[val-0xFF40];
46 ucs2_t tolower_w(ucs2_t val)
48 if ( val >= 0x0040 && val <= 0x007F)
49 return lowcase_table_1[val-0x0040];
50 if ( val >= 0x00C0 && val <= 0x023F)
51 return lowcase_table_2[val-0x00C0];
52 if ( val >= 0x0380 && val <= 0x057F)
53 return lowcase_table_3[val-0x0380];
54 if ( val >= 0x1E00 && val <= 0x1FFF)
55 return lowcase_table_4[val-0x1E00];
56 if ( val >= 0x2140 && val <= 0x217F)
57 return lowcase_table_5[val-0x2140];
58 if ( val >= 0x2480 && val <= 0x24FF)
59 return lowcase_table_6[val-0x2480];
60 if ( val >= 0xFF00 && val <= 0xFF3F)
61 return lowcase_table_7[val-0xFF00];
66 /*******************************************************************
67 Convert a string to lower case.
68 return True if any char is converted
69 ********************************************************************/
70 int strlower_w(ucs2_t *s)
74 ucs2_t v = tolower_w(*s);
84 /*******************************************************************
85 Convert a string to upper case.
86 return True if any char is converted
87 ********************************************************************/
88 int strupper_w(ucs2_t *s)
92 ucs2_t v = toupper_w(*s);
103 /*******************************************************************
104 determine if a character is lowercase
105 ********************************************************************/
106 int islower_w(ucs2_t c)
108 return ( c == tolower_w(c));
111 /*******************************************************************
112 determine if a character is uppercase
113 ********************************************************************/
114 int isupper_w(ucs2_t c)
116 return ( c == toupper_w(c));
120 /*******************************************************************
121 Count the number of characters in a ucs2_t string.
122 ********************************************************************/
123 size_t strlen_w(const ucs2_t *src)
127 for(len = 0; *src++; len++) ;
132 /*******************************************************************
133 Count up to max number of characters in a ucs2_t string.
134 ********************************************************************/
135 size_t strnlen_w(const ucs2_t *src, size_t max)
139 for(len = 0; *src++ && (len < max); len++) ;
144 /*******************************************************************
146 ********************************************************************/
147 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
150 if (c == *s) return (ucs2_t *)s;
153 if (c == *s) return (ucs2_t *)s;
158 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
161 /* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
162 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
165 if (c == *s) return (ucs2_t *)s;
171 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
173 while (*b && *a == *b) { a++; b++; }
175 /* warning: if *a != *b and both are not 0 we retrun a random
176 greater or lesser than 0 number not realted to which
180 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
183 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
184 return (len - n)?(*a - *b):0;
187 /*******************************************************************
189 ********************************************************************/
190 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
195 if (!s || !*s || !ins || !*ins) return NULL;
197 inslen = strlen_w(ins);
199 while ((r = strchr_w(r, *ins))) {
200 if (strncmp_w(r, ins, inslen) == 0) return r;
206 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
211 if (!s || !*s || !ins || !*ins) return NULL;
213 inslen = strlen_w(ins);
215 while ((r = strcasechr_w(r, *ins))) {
216 if (strncasecmp_w(r, ins, inslen) == 0) return r;
225 /*******************************************************************
226 case insensitive string comparison
227 ********************************************************************/
228 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
230 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
231 return (tolower_w(*a) - tolower_w(*b));
234 /*******************************************************************
235 case insensitive string comparison, lenght limited
236 ********************************************************************/
237 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
240 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
241 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
244 /*******************************************************************
246 ********************************************************************/
247 /* if len == 0 then duplicate the whole string */
248 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
252 if (!len) len = strlen_w(src);
253 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
255 LOG (log_error, logtype_default, "strdup_w: out of memory!");
259 memcpy(dest, src, len * sizeof(ucs2_t));
265 ucs2_t *strdup_w(const ucs2_t *src)
267 return strndup_w(src, 0);
270 /*******************************************************************
271 copy a string with max len
272 ********************************************************************/
274 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
278 if (!dest || !src) return NULL;
280 for (len = 0; (src[len] != 0) && (len < max); len++)
281 dest[len] = src[len];
289 /*******************************************************************
290 append a string of len bytes and add a terminator
291 ********************************************************************/
293 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
298 if (!dest || !src) return NULL;
300 start = strlen_w(dest);
301 len = strnlen_w(src, max);
303 memcpy(&dest[start], src, len*sizeof(ucs2_t));
310 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
315 if (!dest || !src) return NULL;
317 start = strlen_w(dest);
320 memcpy(&dest[start], src, len*sizeof(ucs2_t));
327 /*******************************************************************
328 binary search for pre|decomposition
329 ********************************************************************/
331 static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
334 int max = PRECOMP_COUNT - 1;
336 u_int32_t sought = (base << 16) | comb, that;
340 mid = (min + max) / 2;
341 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
344 } else if (that > sought) {
347 return precompositions[mid].replacement;
354 /* ------------------------ */
355 static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
358 int max = PRECOMP_SP_COUNT - 1;
360 u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp;
364 mid = (min + max) / 2;
365 that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp);
366 if (that_sp < sought_sp) {
368 } else if (that_sp > sought_sp) {
371 return precompositions_sp[mid].replacement_sp;
378 /* -------------------------- */
379 static u_int32_t do_decomposition(ucs2_t base)
382 int max = DECOMP_COUNT - 1;
384 u_int32_t sought = base;
385 u_int32_t result, that;
389 mid = (min + max) / 2;
390 that = decompositions[mid].replacement;
393 } else if (that > sought) {
396 result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
404 /* -------------------------- */
405 static u_int64_t do_decomposition_sp(unsigned int base_sp)
408 int max = DECOMP_SP_COUNT - 1;
410 u_int32_t sought_sp = base_sp;
416 mid = (min + max) / 2;
417 that_sp = decompositions_sp[mid].replacement_sp;
418 if (that_sp < sought_sp) {
420 } else if (that_sp > sought_sp) {
423 result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp);
431 /*******************************************************************
434 we can't use static, this stuff needs to be reentrant
435 static char comp[MAXPATHLEN +1];
437 We don't implement Singleton and Canonical Ordering.
438 We ignore CompositionExclusions.txt.
439 because they cause the problem of the roundtrip
440 such as Dancing Icon.
442 exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
443 in precompose.h from composition according to AFP 3.x spec
444 ********************************************************************/
446 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
450 u_int32_t base_sp, comb_sp;
452 ucs2_t hangul_lindex, hangul_vindex;
455 size_t o_len = *outlen;
457 if (!inplen || (inplen & 1) || inplen > o_len)
465 while (*outlen > 2) {
474 return o_len - *outlen;
480 /* Non-Combination Character */
483 /* Unicode Standard Annex #15 A10.3 Hangul Composition */
485 else if ((HANGUL_VBASE <= comb) && (comb <= HANGUL_VBASE + HANGUL_VCOUNT)) {
486 if ((HANGUL_LBASE <= base) && (base < HANGUL_LBASE + HANGUL_LCOUNT)) {
488 hangul_lindex = base - HANGUL_LBASE;
489 hangul_vindex = comb - HANGUL_VBASE;
490 base = HANGUL_SBASE + (hangul_lindex * HANGUL_VCOUNT + hangul_vindex) * HANGUL_TCOUNT;
495 else if ((HANGUL_TBASE < comb) && (comb < HANGUL_TBASE + HANGUL_TCOUNT)) {
496 if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE +HANGUL_SCOUNT) && (((base - HANGUL_SBASE) % HANGUL_TCOUNT) == 0)) {
498 base += comb - HANGUL_TBASE;
502 /* Binary Search for Surrogate Pair */
503 else if ((0xD800 <= base) && (base < 0xDC00)) {
504 if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
505 base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
507 comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
508 if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
513 } while ((i + 4 <= inplen) && result_sp) ;
515 *out = base_sp >> 16;
524 *out = base_sp & 0xFFFF;
536 /* Binary Search for BMP */
537 else if (result = do_precomposition(base, comb)) {
553 /* --------------- */
554 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
558 ucs2_t base, comb[COMBBUFLEN];
560 ucs2_t hangul_sindex, tjamo;
564 size_t o_len = *outlen;
566 if (!inplen || (inplen & 1))
576 /* check ASCII first. this is frequent. */
577 if (base <= 0x007f) ;
579 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
580 else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) {
581 hangul_sindex = base - HANGUL_SBASE;
582 base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT;
583 comb[COMBBUFLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
586 if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) {
587 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
593 comb[COMBBUFLEN-1] = tjamo;
598 /* Binary Search for Surrogate Pair */
599 else if ((0xD800 <= base) && (base < 0xDC00)) {
600 if (i + 2 < inplen) {
601 base_sp = ((u_int32_t)base << 16) | (u_int32_t)in[1];
603 if ( !(result_sp = do_decomposition_sp(base_sp))) break;
605 base_sp = result_sp >> 32;
606 comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */
607 comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */
608 } while (comblen < MAXCOMBSPLEN);
610 if (*outlen < (comblen + 1) << 1) {
615 *out = base_sp >> 16; /* hi */
619 base = base_sp & 0xFFFF; /* lo */
626 /* Binary Search for BMP */
629 if ( !(result = do_decomposition(base))) break;
632 comb[COMBBUFLEN-comblen] = result & 0xFFFF;
633 } while ((0x007f < base) && (comblen < MAXCOMBLEN));
636 if (*outlen < (comblen + 1) << 1) {
645 while ( comblen > 0 ) {
646 *out = comb[COMBBUFLEN-comblen];
657 return o_len-*outlen;
660 /*******************************************************************
661 length of UTF-8 character and string
662 ********************************************************************/
664 size_t utf8_charlen ( char* utf8 )
668 p = (unsigned char*) utf8;
672 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
674 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
676 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
678 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
680 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
682 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
685 return ((size_t) -1);
689 size_t utf8_strlen_validate ( char * utf8 )
694 p = (unsigned char*) utf8;
697 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
704 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
707 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
710 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
713 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
716 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
719 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
723 return ((size_t) -1);