3 #endif /* HAVE_CONFIG_H */
10 #include <atalk/logger.h>
13 #include <netatalk/endian.h>
15 #include <atalk/unicode.h>
16 #include "ucs2_casetable.h"
17 #include "precompose.h"
18 #include "byteorder.h"
20 #define HANGUL_SBASE 0xAC00
21 #define HANGUL_LBASE 0x1100
22 #define HANGUL_VBASE 0x1161
23 #define HANGUL_TBASE 0x11A7
24 #define HANGUL_LCOUNT 19
25 #define HANGUL_VCOUNT 21
26 #define HANGUL_TCOUNT 28
27 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) /* 588 */
28 #define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT) /* 11172 */
31 #define MAXCOMBSPLEN 2
32 #define COMBBUFLEN 4 /* max(MAXCOMBLEN, MAXCOMBSPLEN*2) */
34 /*******************************************************************
35 Convert a wide character to upper/lower case.
36 ********************************************************************/
37 ucs2_t toupper_w(ucs2_t val)
39 if ( val >= 0x0040 && val <= 0x007F)
40 return upcase_table_1[val-0x0040];
41 if ( val >= 0x00C0 && val <= 0x02BF)
42 return upcase_table_2[val-0x00C0];
43 if ( val >= 0x0380 && val <= 0x04FF)
44 return upcase_table_3[val-0x0380];
45 if ( val >= 0x0540 && val <= 0x05BF)
46 return upcase_table_4[val-0x0540];
47 if ( val >= 0x1E00 && val <= 0x1FFF)
48 return upcase_table_5[val-0x1E00];
49 if ( val >= 0x2140 && val <= 0x217F)
50 return upcase_table_6[val-0x2140];
51 if ( val >= 0x24C0 && val <= 0x24FF)
52 return upcase_table_7[val-0x24C0];
53 if ( val >= 0xFF40 && val <= 0xFF7F)
54 return upcase_table_8[val-0xFF40];
60 ucs2_t tolower_w(ucs2_t val)
62 if ( val >= 0x0040 && val <= 0x007F)
63 return lowcase_table_1[val-0x0040];
64 if ( val >= 0x00C0 && val <= 0x023F)
65 return lowcase_table_2[val-0x00C0];
66 if ( val >= 0x0380 && val <= 0x057F)
67 return lowcase_table_3[val-0x0380];
68 if ( val >= 0x1E00 && val <= 0x1FFF)
69 return lowcase_table_4[val-0x1E00];
70 if ( val >= 0x2140 && val <= 0x217F)
71 return lowcase_table_5[val-0x2140];
72 if ( val >= 0x2480 && val <= 0x24FF)
73 return lowcase_table_6[val-0x2480];
74 if ( val >= 0xFF00 && val <= 0xFF3F)
75 return lowcase_table_7[val-0xFF00];
80 /*******************************************************************
81 Convert a string to lower case.
82 return True if any char is converted
83 ********************************************************************/
84 int strlower_w(ucs2_t *s)
88 ucs2_t v = tolower_w(*s);
98 /*******************************************************************
99 Convert a string to upper case.
100 return True if any char is converted
101 ********************************************************************/
102 int strupper_w(ucs2_t *s)
106 ucs2_t v = toupper_w(*s);
117 /*******************************************************************
118 determine if a character is lowercase
119 ********************************************************************/
120 int islower_w(ucs2_t c)
122 return ( c == tolower_w(c));
125 /*******************************************************************
126 determine if a character is uppercase
127 ********************************************************************/
128 int isupper_w(ucs2_t c)
130 return ( c == toupper_w(c));
134 /*******************************************************************
135 Count the number of characters in a ucs2_t string.
136 ********************************************************************/
137 size_t strlen_w(const ucs2_t *src)
141 for(len = 0; *src++; len++) ;
146 /*******************************************************************
147 Count up to max number of characters in a ucs2_t string.
148 ********************************************************************/
149 size_t strnlen_w(const ucs2_t *src, size_t max)
153 for(len = 0; *src++ && (len < max); len++) ;
158 /*******************************************************************
160 ********************************************************************/
161 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
164 if (c == *s) return (ucs2_t *)s;
167 if (c == *s) return (ucs2_t *)s;
172 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
175 /* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
176 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
179 if (c == *s) return (ucs2_t *)s;
185 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
187 while (*b && *a == *b) { a++; b++; }
189 /* warning: if *a != *b and both are not 0 we retrun a random
190 greater or lesser than 0 number not realted to which
194 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
197 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
198 return (len - n)?(*a - *b):0;
201 /*******************************************************************
203 ********************************************************************/
204 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
209 if (!s || !*s || !ins || !*ins) return NULL;
211 inslen = strlen_w(ins);
213 while ((r = strchr_w(r, *ins))) {
214 if (strncmp_w(r, ins, inslen) == 0) return r;
220 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
225 if (!s || !*s || !ins || !*ins) return NULL;
227 inslen = strlen_w(ins);
229 while ((r = strcasechr_w(r, *ins))) {
230 if (strncasecmp_w(r, ins, inslen) == 0) return r;
239 /*******************************************************************
240 case insensitive string comparison
241 ********************************************************************/
242 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
244 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
245 return (tolower_w(*a) - tolower_w(*b));
248 /*******************************************************************
249 case insensitive string comparison, lenght limited
250 ********************************************************************/
251 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
254 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
255 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
258 /*******************************************************************
260 ********************************************************************/
261 /* if len == 0 then duplicate the whole string */
262 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
266 if (!len) len = strlen_w(src);
267 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
269 LOG (log_error, logtype_default, "strdup_w: out of memory!");
273 memcpy(dest, src, len * sizeof(ucs2_t));
279 ucs2_t *strdup_w(const ucs2_t *src)
281 return strndup_w(src, 0);
284 /*******************************************************************
285 copy a string with max len
286 ********************************************************************/
288 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
292 if (!dest || !src) return NULL;
294 for (len = 0; (src[len] != 0) && (len < max); len++)
295 dest[len] = src[len];
303 /*******************************************************************
304 append a string of len bytes and add a terminator
305 ********************************************************************/
307 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
312 if (!dest || !src) return NULL;
314 start = strlen_w(dest);
315 len = strnlen_w(src, max);
317 memcpy(&dest[start], src, len*sizeof(ucs2_t));
324 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
329 if (!dest || !src) return NULL;
331 start = strlen_w(dest);
334 memcpy(&dest[start], src, len*sizeof(ucs2_t));
341 /*******************************************************************
342 binary search for pre|decomposition
343 ********************************************************************/
345 static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
348 int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1;
350 u_int32_t sought = (base << 16) | comb, that;
354 mid = (min + max) / 2;
355 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
358 } else if (that > sought) {
361 return precompositions[mid].replacement;
368 /* ------------------------ */
369 static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
372 int max = sizeof(precompositions_sp) / sizeof(precompositions_sp[0]) - 1;
374 u_int64_t sought = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that;
378 mid = (min + max) / 2;
379 that = ((u_int64_t)precompositions_sp[mid].base << 32) | ((u_int64_t)precompositions_sp[mid].comb);
382 } else if (that > sought) {
385 return precompositions_sp[mid].replacement;
392 /* -------------------------- */
393 static u_int32_t do_decomposition(ucs2_t base)
396 int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1;
398 u_int32_t sought = base;
399 u_int32_t result, that;
403 mid = (min + max) / 2;
404 that = decompositions[mid].replacement;
407 } else if (that > sought) {
410 result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
418 /* -------------------------- */
419 static u_int64_t do_decomposition_sp(unsigned int base)
422 int max = sizeof(decompositions_sp) / sizeof(decompositions_sp[0]) - 1;
424 u_int32_t sought = base;
430 mid = (min + max) / 2;
431 that = decompositions_sp[mid].replacement;
434 } else if (that > sought) {
437 result = ((u_int64_t)decompositions_sp[mid].base << 32) | ((u_int64_t)decompositions_sp[mid].comb);
445 /*******************************************************************
448 we can't use static, this stuff needs to be reentrant
449 static char comp[MAXPATHLEN +1];
451 exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
452 in decompositions[] from decomposition according to AFP 3.x spec
454 We don't implement Singleton and Canonical Ordering
455 because they cause the problem of the roundtrip
457 ********************************************************************/
459 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
463 u_int32_t base_sp, comb_sp;
465 ucs2_t hangul_lindex, hangul_vindex;
468 size_t o_len = *outlen;
470 if (!inplen || (inplen & 1) || inplen > o_len)
478 while (*outlen > 2) {
487 return o_len - *outlen;
493 /* Non-Combination Character */
496 /* Unicode Standard Annex #15 A10.3 Hangul Composition */
498 else if ((HANGUL_VBASE <= comb) && (comb <= HANGUL_VBASE + HANGUL_VCOUNT)) {
499 if ((HANGUL_LBASE <= base) && (base < HANGUL_LBASE + HANGUL_LCOUNT)) {
501 hangul_lindex = base - HANGUL_LBASE;
502 hangul_vindex = comb - HANGUL_VBASE;
503 base = HANGUL_SBASE + (hangul_lindex * HANGUL_VCOUNT + hangul_vindex) * HANGUL_TCOUNT;
508 else if ((HANGUL_TBASE < comb) && (comb < HANGUL_TBASE + HANGUL_TCOUNT)) {
509 if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE +HANGUL_SCOUNT) && (((base - HANGUL_SBASE) % HANGUL_TCOUNT) == 0)) {
511 base += comb - HANGUL_TBASE;
515 /* Binary Search for Surrogate Pair */
516 else if ((0xD800 <= base) && (base < 0xDC00)) {
517 if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
518 base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
520 comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
521 if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
526 } while ((i + 4 <= inplen) && result_sp) ;
528 *out = base_sp >> 16;
537 *out = base_sp & 0xFFFF;
549 /* Binary Search for BMP */
550 else if (result = do_precomposition(base, comb)) {
566 /* --------------- */
567 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
571 ucs2_t base, comb[COMBBUFLEN];
573 ucs2_t hangul_sindex, tjamo;
577 size_t o_len = *outlen;
579 if (!inplen || (inplen & 1))
589 /* check ASCII first. this is frequent. */
590 if (base <= 0x007f) ;
592 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
593 else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) {
594 hangul_sindex = base - HANGUL_SBASE;
595 base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT;
596 comb[COMBBUFLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
599 if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) {
600 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
606 comb[COMBBUFLEN-1] = tjamo;
611 /* Binary Search for Surrogate Pair */
612 else if ((0xD800 <= base) && (base < 0xDC00)) {
613 if (i + 2 < inplen) {
614 base_sp = ((u_int32_t)base << 16) | (u_int32_t)in[1];
616 if ( !(result_sp = do_decomposition_sp(base_sp))) break;
618 base_sp = result_sp >> 32;
619 comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */
620 comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */
621 } while (comblen < (MAXCOMBSPLEN<<1));
623 if (*outlen < (comblen + 1) << 1) {
628 *out = base_sp >> 16; /* hi */
632 base = base_sp & 0xFFFF; /* lo */
639 /* Binary Search for BMP */
642 if ( !(result = do_decomposition(base))) break;
645 comb[COMBBUFLEN-comblen] = result & 0xFFFF;
646 } while ((0x007f < base) && (comblen < MAXCOMBLEN));
649 if (*outlen < (comblen + 1) << 1) {
658 while ( comblen > 0 ) {
659 *out = comb[COMBBUFLEN-comblen];
670 return o_len-*outlen;
673 /*******************************************************************
674 length of UTF-8 character and string
675 ********************************************************************/
677 size_t utf8_charlen ( char* utf8 )
681 p = (unsigned char*) utf8;
685 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
687 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
689 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
691 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
693 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
695 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
698 return ((size_t) -1);
702 size_t utf8_strlen_validate ( char * utf8 )
707 p = (unsigned char*) utf8;
710 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
717 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
720 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
723 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
726 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
729 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
732 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
736 return ((size_t) -1);