3 #endif /* HAVE_CONFIG_H */
10 #include <atalk/logger.h>
13 #include <netatalk/endian.h>
15 #include <atalk/unicode.h>
16 #include "ucs2_casetable.h"
17 #include "precompose.h"
18 #include "byteorder.h"
20 #define HANGUL_SBASE 0xAC00
21 #define HANGUL_LBASE 0x1100
22 #define HANGUL_VBASE 0x1161
23 #define HANGUL_TBASE 0x11A7
24 #define HANGUL_LCOUNT 19
25 #define HANGUL_VCOUNT 21
26 #define HANGUL_TCOUNT 28
27 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) /* 588 */
28 #define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT) /* 11172 */
32 ucs2_t toupper_w(ucs2_t val)
34 if ( val >= 0x0040 && val <= 0x007F)
35 return upcase_table_1[val-0x0040];
36 if ( val >= 0x00C0 && val <= 0x02BF)
37 return upcase_table_2[val-0x00C0];
38 if ( val >= 0x0380 && val <= 0x04FF)
39 return upcase_table_3[val-0x0380];
40 if ( val >= 0x0540 && val <= 0x05BF)
41 return upcase_table_4[val-0x0540];
42 if ( val >= 0x1E00 && val <= 0x1FFF)
43 return upcase_table_5[val-0x1E00];
44 if ( val >= 0x2140 && val <= 0x217F)
45 return upcase_table_6[val-0x2140];
46 if ( val >= 0x24C0 && val <= 0x24FF)
47 return upcase_table_7[val-0x24C0];
48 if ( val >= 0xFF40 && val <= 0xFF7F)
49 return upcase_table_8[val-0xFF40];
55 ucs2_t tolower_w(ucs2_t val)
57 if ( val >= 0x0040 && val <= 0x007F)
58 return lowcase_table_1[val-0x0040];
59 if ( val >= 0x00C0 && val <= 0x023F)
60 return lowcase_table_2[val-0x00C0];
61 if ( val >= 0x0380 && val <= 0x057F)
62 return lowcase_table_3[val-0x0380];
63 if ( val >= 0x1E00 && val <= 0x1FFF)
64 return lowcase_table_4[val-0x1E00];
65 if ( val >= 0x2140 && val <= 0x217F)
66 return lowcase_table_5[val-0x2140];
67 if ( val >= 0x2480 && val <= 0x24FF)
68 return lowcase_table_6[val-0x2480];
69 if ( val >= 0xFF00 && val <= 0xFF3F)
70 return lowcase_table_7[val-0xFF00];
75 /*******************************************************************
76 Convert a string to lower case.
77 return True if any char is converted
78 ********************************************************************/
79 int strlower_w(ucs2_t *s)
83 ucs2_t v = tolower_w(*s);
93 /*******************************************************************
94 Convert a string to upper case.
95 return True if any char is converted
96 ********************************************************************/
97 int strupper_w(ucs2_t *s)
101 ucs2_t v = toupper_w(*s);
112 /*******************************************************************
113 determine if a character is lowercase
114 ********************************************************************/
115 int islower_w(ucs2_t c)
117 return ( c == tolower_w(c));
120 /*******************************************************************
121 determine if a character is uppercase
122 ********************************************************************/
123 int isupper_w(ucs2_t c)
125 return ( c == toupper_w(c));
129 /*******************************************************************
130 Count the number of characters in a ucs2_t string.
131 ********************************************************************/
132 size_t strlen_w(const ucs2_t *src)
136 for(len = 0; *src++; len++) ;
141 /*******************************************************************
142 Count up to max number of characters in a ucs2_t string.
143 ********************************************************************/
144 size_t strnlen_w(const ucs2_t *src, size_t max)
148 for(len = 0; *src++ && (len < max); len++) ;
153 /*******************************************************************
155 ********************************************************************/
156 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
159 if (c == *s) return (ucs2_t *)s;
162 if (c == *s) return (ucs2_t *)s;
167 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
170 /* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
171 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
174 if (c == *s) return (ucs2_t *)s;
180 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
182 while (*b && *a == *b) { a++; b++; }
184 /* warning: if *a != *b and both are not 0 we retrun a random
185 greater or lesser than 0 number not realted to which
189 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
192 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
193 return (len - n)?(*a - *b):0;
196 /*******************************************************************
198 ********************************************************************/
199 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
204 if (!s || !*s || !ins || !*ins) return NULL;
206 inslen = strlen_w(ins);
208 while ((r = strchr_w(r, *ins))) {
209 if (strncmp_w(r, ins, inslen) == 0) return r;
215 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
220 if (!s || !*s || !ins || !*ins) return NULL;
222 inslen = strlen_w(ins);
224 while ((r = strcasechr_w(r, *ins))) {
225 if (strncasecmp_w(r, ins, inslen) == 0) return r;
234 /*******************************************************************
235 case insensitive string comparison
236 ********************************************************************/
237 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
239 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
240 return (tolower_w(*a) - tolower_w(*b));
243 /*******************************************************************
244 case insensitive string comparison, lenght limited
245 ********************************************************************/
246 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
249 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
250 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
253 /*******************************************************************
255 ********************************************************************/
256 /* if len == 0 then duplicate the whole string */
257 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
261 if (!len) len = strlen_w(src);
262 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
264 LOG (log_error, logtype_default, "strdup_w: out of memory!");
268 memcpy(dest, src, len * sizeof(ucs2_t));
274 ucs2_t *strdup_w(const ucs2_t *src)
276 return strndup_w(src, 0);
279 /*******************************************************************
280 copy a string with max len
281 ********************************************************************/
283 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
287 if (!dest || !src) return NULL;
289 for (len = 0; (src[len] != 0) && (len < max); len++)
290 dest[len] = src[len];
298 /*******************************************************************
299 append a string of len bytes and add a terminator
300 ********************************************************************/
302 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
307 if (!dest || !src) return NULL;
309 start = strlen_w(dest);
310 len = strnlen_w(src, max);
312 memcpy(&dest[start], src, len*sizeof(ucs2_t));
319 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
324 if (!dest || !src) return NULL;
326 start = strlen_w(dest);
329 memcpy(&dest[start], src, len*sizeof(ucs2_t));
336 /* ------------------------ */
337 static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
340 int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1;
342 u_int32_t sought = (base << 16) | comb, that;
346 mid = (min + max) / 2;
347 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
350 } else if (that > sought) {
353 return precompositions[mid].replacement;
360 /* -------------------------- */
361 static u_int32_t do_decomposition(ucs2_t base)
364 int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1;
366 u_int32_t sought = base;
367 u_int32_t result, that;
371 mid = (min + max) / 2;
372 that = decompositions[mid].replacement;
375 } else if (that > sought) {
378 result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
386 /* we can't use static, this stuff needs to be reentrant */
387 /* static char comp[MAXPATHLEN +1]; */
389 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
394 ucs2_t hangul_lindex, hangul_vindex;
396 size_t o_len = *outlen;
398 if (!inplen || (inplen & 1) || inplen > o_len)
402 /* Decomposition and Canonical Ordering are necessary here. */
404 /* Ex. in = CanonicalOrdering(decompose_w(name)) */
406 /* A new mapping table is needed for CanonicalOrdering. */
413 while (*outlen > 2) {
421 return o_len - *outlen;
426 /* Non-Combination Character */
429 /* Unicode Standard Annex #15 A10.3 Hangul Composition */
431 else if ((HANGUL_VBASE <= comb) && (comb <= HANGUL_VBASE + HANGUL_VCOUNT)) {
432 if ((HANGUL_LBASE <= base) && (base < HANGUL_LBASE + HANGUL_LCOUNT)) {
434 hangul_lindex = base - HANGUL_LBASE;
435 hangul_vindex = comb - HANGUL_VBASE;
436 base = HANGUL_SBASE + (hangul_lindex * HANGUL_VCOUNT + hangul_vindex) * HANGUL_TCOUNT;
441 else if ((HANGUL_TBASE < comb) && (comb < HANGUL_TBASE + HANGUL_TCOUNT)) {
442 if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE +HANGUL_SCOUNT) && (((base - HANGUL_SBASE) % HANGUL_TCOUNT) == 0)) {
444 base += comb - HANGUL_TBASE;
448 /* Combining Sequence */
449 else if ((result = do_precomposition(base, comb))) {
465 /* --------------- */
467 /* Singleton Decomposition is unsupported. */
468 /* A new mapping table is needed for implementation. */
470 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
475 ucs2_t comb[MAXCOMBLEN];
476 ucs2_t hangul_sindex, tjamo;
479 size_t o_len = *outlen;
481 if (!inplen || (inplen & 1))
491 /* check ASCII first. this is frequent. */
492 if (base <= 0x007f) ;
494 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
495 else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) {
496 hangul_sindex = base - HANGUL_SBASE;
497 base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT;
498 comb[MAXCOMBLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
501 if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) {
502 comb[MAXCOMBLEN-1] = comb[MAXCOMBLEN-2];
508 comb[MAXCOMBLEN-1] = tjamo;
513 /* Combining Sequence */
514 /* exclude U2000-U2FFF and UFE30-UFE4F ranges in decompositions[] */
515 /* from decomposition according to AFP 3.1 spec */
518 if ((comblen >= MAXCOMBLEN) || !(result = do_decomposition(base))) break;
521 comb[MAXCOMBLEN-comblen] = result & 0xffff;
522 } while (0x007f < base) ;
525 if (*outlen < (comblen + 1) << 1) {
534 while ( comblen > 0 ) {
535 *out = comb[MAXCOMBLEN-comblen];
545 /* Is Canonical Ordering necessary here? */
548 return o_len-*outlen;
551 size_t utf8_charlen ( char* utf8 )
555 p = (unsigned char*) utf8;
559 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
561 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
563 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
565 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
567 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
569 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
572 return ((size_t) -1);
576 size_t utf8_strlen_validate ( char * utf8 )
581 p = (unsigned char*) utf8;
584 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
591 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
594 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
597 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
600 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
603 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
606 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
610 return ((size_t) -1);