3 #endif /* HAVE_CONFIG_H */
10 #include <atalk/logger.h>
12 #include <arpa/inet.h>
14 #include <atalk/unicode.h>
15 #include "ucs2_casetable.h"
16 #include "precompose.h"
17 #include "byteorder.h"
19 /*******************************************************************
20 Convert a wide character to upper/lower case.
21 ********************************************************************/
22 ucs2_t toupper_w(ucs2_t val)
24 if ( val >= 0x0040 && val <= 0x007F)
25 return upcase_table_1[val-0x0040];
26 if ( val >= 0x00C0 && val <= 0x02BF)
27 return upcase_table_2[val-0x00C0];
28 if ( val >= 0x0380 && val <= 0x04FF)
29 return upcase_table_3[val-0x0380];
30 if ( val >= 0x0540 && val <= 0x05BF)
31 return upcase_table_4[val-0x0540];
32 if ( val >= 0x1E00 && val <= 0x1FFF)
33 return upcase_table_5[val-0x1E00];
34 if ( val >= 0x2140 && val <= 0x217F)
35 return upcase_table_6[val-0x2140];
36 if ( val >= 0x24C0 && val <= 0x24FF)
37 return upcase_table_7[val-0x24C0];
38 if ( val >= 0xFF40 && val <= 0xFF7F)
39 return upcase_table_8[val-0xFF40];
45 ucs2_t tolower_w(ucs2_t val)
47 if ( val >= 0x0040 && val <= 0x007F)
48 return lowcase_table_1[val-0x0040];
49 if ( val >= 0x00C0 && val <= 0x023F)
50 return lowcase_table_2[val-0x00C0];
51 if ( val >= 0x0380 && val <= 0x057F)
52 return lowcase_table_3[val-0x0380];
53 if ( val >= 0x1E00 && val <= 0x1FFF)
54 return lowcase_table_4[val-0x1E00];
55 if ( val >= 0x2140 && val <= 0x217F)
56 return lowcase_table_5[val-0x2140];
57 if ( val >= 0x2480 && val <= 0x24FF)
58 return lowcase_table_6[val-0x2480];
59 if ( val >= 0xFF00 && val <= 0xFF3F)
60 return lowcase_table_7[val-0xFF00];
65 /*******************************************************************
66 Convert a string to lower case.
67 return True if any char is converted
68 ********************************************************************/
69 int strlower_w(ucs2_t *s)
73 ucs2_t v = tolower_w(*s);
83 /*******************************************************************
84 Convert a string to upper case.
85 return True if any char is converted
86 ********************************************************************/
87 int strupper_w(ucs2_t *s)
91 ucs2_t v = toupper_w(*s);
102 /*******************************************************************
103 determine if a character is lowercase
104 ********************************************************************/
105 int islower_w(ucs2_t c)
107 return ( c == tolower_w(c));
110 /*******************************************************************
111 determine if a character is uppercase
112 ********************************************************************/
113 int isupper_w(ucs2_t c)
115 return ( c == toupper_w(c));
119 /*******************************************************************
120 Count the number of characters in a ucs2_t string.
121 ********************************************************************/
122 size_t strlen_w(const ucs2_t *src)
126 for(len = 0; *src++; len++) ;
131 /*******************************************************************
132 Count up to max number of characters in a ucs2_t string.
133 ********************************************************************/
134 size_t strnlen_w(const ucs2_t *src, size_t max)
138 for(len = 0; *src++ && (len < max); len++) ;
143 /*******************************************************************
145 ********************************************************************/
146 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
149 if (c == *s) return (ucs2_t *)s;
152 if (c == *s) return (ucs2_t *)s;
157 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
160 /* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
161 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
164 if (c == *s) return (ucs2_t *)s;
170 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
172 while (*b && *a == *b) { a++; b++; }
174 /* warning: if *a != *b and both are not 0 we retrun a random
175 greater or lesser than 0 number not realted to which
179 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
182 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
183 return (len - n)?(*a - *b):0;
186 /*******************************************************************
188 ********************************************************************/
189 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
194 if (!s || !*s || !ins || !*ins) return NULL;
196 inslen = strlen_w(ins);
198 while ((r = strchr_w(r, *ins))) {
199 if (strncmp_w(r, ins, inslen) == 0) return r;
205 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
210 if (!s || !*s || !ins || !*ins) return NULL;
212 inslen = strlen_w(ins);
214 while ((r = strcasechr_w(r, *ins))) {
215 if (strncasecmp_w(r, ins, inslen) == 0) return r;
224 /*******************************************************************
225 case insensitive string comparison
226 ********************************************************************/
227 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
229 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
230 return (tolower_w(*a) - tolower_w(*b));
233 /*******************************************************************
234 case insensitive string comparison, lenght limited
235 ********************************************************************/
236 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
239 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
240 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
243 /*******************************************************************
245 ********************************************************************/
246 /* if len == 0 then duplicate the whole string */
247 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
251 if (!len) len = strlen_w(src);
252 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
254 LOG (log_error, logtype_default, "strdup_w: out of memory!");
258 memcpy(dest, src, len * sizeof(ucs2_t));
264 ucs2_t *strdup_w(const ucs2_t *src)
266 return strndup_w(src, 0);
269 /*******************************************************************
270 copy a string with max len
271 ********************************************************************/
273 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
277 if (!dest || !src) return NULL;
279 for (len = 0; (src[len] != 0) && (len < max); len++)
280 dest[len] = src[len];
288 /*******************************************************************
289 append a string of len bytes and add a terminator
290 ********************************************************************/
292 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
297 if (!dest || !src) return NULL;
299 start = strlen_w(dest);
300 len = strnlen_w(src, max);
302 memcpy(&dest[start], src, len*sizeof(ucs2_t));
309 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
314 if (!dest || !src) return NULL;
316 start = strlen_w(dest);
319 memcpy(&dest[start], src, len*sizeof(ucs2_t));
326 /*******************************************************************
327 binary search for pre|decomposition
328 ********************************************************************/
330 static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
333 int max = PRECOMP_COUNT - 1;
335 u_int32_t sought = (base << 16) | comb, that;
339 mid = (min + max) / 2;
340 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
343 } else if (that > sought) {
346 return precompositions[mid].replacement;
353 /* ------------------------ */
354 static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
357 int max = PRECOMP_SP_COUNT - 1;
359 u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp;
363 mid = (min + max) / 2;
364 that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp);
365 if (that_sp < sought_sp) {
367 } else if (that_sp > sought_sp) {
370 return precompositions_sp[mid].replacement_sp;
377 /* -------------------------- */
378 static u_int32_t do_decomposition(ucs2_t base)
381 int max = DECOMP_COUNT - 1;
383 u_int32_t sought = base;
384 u_int32_t result, that;
388 mid = (min + max) / 2;
389 that = decompositions[mid].replacement;
392 } else if (that > sought) {
395 result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
403 /* -------------------------- */
404 static u_int64_t do_decomposition_sp(unsigned int base_sp)
407 int max = DECOMP_SP_COUNT - 1;
409 u_int32_t sought_sp = base_sp;
415 mid = (min + max) / 2;
416 that_sp = decompositions_sp[mid].replacement_sp;
417 if (that_sp < sought_sp) {
419 } else if (that_sp > sought_sp) {
422 result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp);
430 /*******************************************************************
433 we can't use static, this stuff needs to be reentrant
434 static char comp[MAXPATHLEN +1];
436 We don't implement Singleton and Canonical Ordering.
437 We ignore CompositionExclusions.txt.
438 because they cause the problem of the roundtrip
439 such as Dancing Icon.
441 exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
442 in precompose.h from composition according to AFP 3.x spec
443 ********************************************************************/
445 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
449 u_int32_t base_sp, comb_sp;
451 ucs2_t lindex, vindex;
454 size_t o_len = *outlen;
456 if (!inplen || (inplen & 1) || inplen > o_len)
464 while (*outlen > 2) {
473 return o_len - *outlen;
479 /* Non-Combination Character */
482 /* Unicode Standard Annex #15 A10.3 Hangul Composition */
484 else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) {
485 if ((LBASE <= base) && (base < LBASE + LCOUNT)) {
487 lindex = base - LBASE;
488 vindex = comb - VBASE;
489 base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
494 else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) {
495 if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) {
497 base += comb - TBASE;
501 /* Binary Search for Surrogate Pair */
502 else if ((0xD800 <= base) && (base < 0xDC00)) {
503 if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
504 base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
506 comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
507 if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
512 } while ((i + 4 <= inplen) && result_sp) ;
514 *out = base_sp >> 16;
523 *out = base_sp & 0xFFFF;
535 /* Binary Search for BMP */
536 else if (result = do_precomposition(base, comb)) {
552 /* --------------- */
553 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
557 ucs2_t base, comb[COMBBUFLEN];
559 ucs2_t sindex, tjamo;
563 size_t o_len = *outlen;
565 if (!inplen || (inplen & 1))
575 /* check ASCII first. this is frequent. */
576 if (base <= 0x007f) ;
578 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
579 else if ((SBASE <= base) && (base < SBASE + SCOUNT)) {
580 sindex = base - SBASE;
581 base = LBASE + sindex / NCOUNT;
582 comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT;
585 if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) {
586 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
592 comb[COMBBUFLEN-1] = tjamo;
597 /* Binary Search for Surrogate Pair */
598 else if ((0xD800 <= base) && (base < 0xDC00)) {
599 if (i + 2 < inplen) {
600 base_sp = ((u_int32_t)base << 16) | (u_int32_t)in[1];
602 if ( !(result_sp = do_decomposition_sp(base_sp))) break;
604 base_sp = result_sp >> 32;
605 comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */
606 comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */
607 } while (comblen < MAXCOMBSPLEN);
609 if (*outlen < (comblen + 1) << 1) {
614 *out = base_sp >> 16; /* hi */
618 base = base_sp & 0xFFFF; /* lo */
625 /* Binary Search for BMP */
628 if ( !(result = do_decomposition(base))) break;
631 comb[COMBBUFLEN-comblen] = result & 0xFFFF;
632 } while ((0x007f < base) && (comblen < MAXCOMBLEN));
635 if (*outlen < (comblen + 1) << 1) {
644 while ( comblen > 0 ) {
645 *out = comb[COMBBUFLEN-comblen];
656 return o_len-*outlen;
659 /*******************************************************************
660 length of UTF-8 character and string
661 ********************************************************************/
663 size_t utf8_charlen ( char* utf8 )
667 p = (unsigned char*) utf8;
671 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
673 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
675 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
677 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
679 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
681 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
684 return ((size_t) -1);
688 size_t utf8_strlen_validate ( char * utf8 )
693 p = (unsigned char*) utf8;
696 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
703 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
706 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
709 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
712 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
715 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
718 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
722 return ((size_t) -1);