3 #endif /* HAVE_CONFIG_H */
10 #include <atalk/logger.h>
13 #include <netatalk/endian.h>
15 #include <atalk/unicode.h>
16 #include "precompose.h"
17 #include "byteorder.h"
19 /*******************************************************************
20 Convert a string to lower case.
21 return True if any char is converted
22 ********************************************************************/
23 int strlower_w(ucs2_t *s)
27 ucs2_t v = tolower_w(*s);
37 /*******************************************************************
38 Convert a string to upper case.
39 return True if any char is converted
40 ********************************************************************/
41 int strupper_w(ucs2_t *s)
45 ucs2_t v = toupper_w(*s);
56 /*******************************************************************
57 determine if a character is lowercase
58 ********************************************************************/
59 int islower_w(ucs2_t c)
61 return ( c == tolower_w(c));
64 /*******************************************************************
65 determine if a character is uppercase
66 ********************************************************************/
67 int isupper_w(ucs2_t c)
69 return ( c == toupper_w(c));
73 /*******************************************************************
74 Count the number of characters in a ucs2_t string.
75 ********************************************************************/
76 size_t strlen_w(const ucs2_t *src)
80 for(len = 0; *src++; len++) ;
85 /*******************************************************************
86 Count up to max number of characters in a ucs2_t string.
87 ********************************************************************/
88 size_t strnlen_w(const ucs2_t *src, size_t max)
92 for(len = 0; *src++ && (len < max); len++) ;
97 /*******************************************************************
99 ********************************************************************/
100 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
103 if (c == *s) return (ucs2_t *)s;
106 if (c == *s) return (ucs2_t *)s;
111 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
114 /* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
115 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
118 if (c == *s) return (ucs2_t *)s;
124 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
126 while (*b && *a == *b) { a++; b++; }
128 /* warning: if *a != *b and both are not 0 we retrun a random
129 greater or lesser than 0 number not realted to which
133 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
136 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
137 return (len - n)?(*a - *b):0;
140 /*******************************************************************
142 ********************************************************************/
143 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
148 if (!s || !*s || !ins || !*ins) return NULL;
150 inslen = strlen_w(ins);
152 while ((r = strchr_w(r, *ins))) {
153 if (strncmp_w(r, ins, inslen) == 0) return r;
159 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
164 if (!s || !*s || !ins || !*ins) return NULL;
166 inslen = strlen_w(ins);
168 while ((r = strcasechr_w(r, *ins))) {
169 if (strncasecmp_w(r, ins, inslen) == 0) return r;
178 /*******************************************************************
179 case insensitive string comparison
180 ********************************************************************/
181 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
183 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
184 return (tolower_w(*a) - tolower_w(*b));
187 /*******************************************************************
188 case insensitive string comparison, lenght limited
189 ********************************************************************/
190 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
193 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
194 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
197 /*******************************************************************
199 ********************************************************************/
200 /* if len == 0 then duplicate the whole string */
201 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
205 if (!len) len = strlen_w(src);
206 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
208 LOG (log_error, logtype_default, "strdup_w: out of memory!");
212 memcpy(dest, src, len * sizeof(ucs2_t));
218 ucs2_t *strdup_w(const ucs2_t *src)
220 return strndup_w(src, 0);
223 /*******************************************************************
224 copy a string with max len
225 ********************************************************************/
227 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
231 if (!dest || !src) return NULL;
233 for (len = 0; (src[len] != 0) && (len < max); len++)
234 dest[len] = src[len];
242 /*******************************************************************
243 append a string of len bytes and add a terminator
244 ********************************************************************/
246 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
251 if (!dest || !src) return NULL;
253 start = strlen_w(dest);
254 len = strnlen_w(src, max);
256 memcpy(&dest[start], src, len*sizeof(ucs2_t));
263 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
268 if (!dest || !src) return NULL;
270 start = strlen_w(dest);
273 memcpy(&dest[start], src, len*sizeof(ucs2_t));
280 /*******************************************************************
281 binary search for pre|decomposition
282 ********************************************************************/
284 static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
287 int max = PRECOMP_COUNT - 1;
289 u_int32_t sought = (base << 16) | comb, that;
293 mid = (min + max) / 2;
294 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
297 } else if (that > sought) {
300 return precompositions[mid].replacement;
307 /* ------------------------ */
308 static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
311 int max = PRECOMP_SP_COUNT - 1;
313 u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp;
317 mid = (min + max) / 2;
318 that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp);
319 if (that_sp < sought_sp) {
321 } else if (that_sp > sought_sp) {
324 return precompositions_sp[mid].replacement_sp;
331 /* -------------------------- */
332 static u_int32_t do_decomposition(ucs2_t base)
335 int max = DECOMP_COUNT - 1;
337 u_int32_t sought = base;
338 u_int32_t result, that;
342 mid = (min + max) / 2;
343 that = decompositions[mid].replacement;
346 } else if (that > sought) {
349 result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
357 /* -------------------------- */
358 static u_int64_t do_decomposition_sp(unsigned int base_sp)
361 int max = DECOMP_SP_COUNT - 1;
363 u_int32_t sought_sp = base_sp;
369 mid = (min + max) / 2;
370 that_sp = decompositions_sp[mid].replacement_sp;
371 if (that_sp < sought_sp) {
373 } else if (that_sp > sought_sp) {
376 result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp);
384 /*******************************************************************
387 we can't use static, this stuff needs to be reentrant
388 static char comp[MAXPATHLEN +1];
390 We don't implement Singleton and Canonical Ordering.
391 We ignore CompositionExclusions.txt.
392 because they cause the problem of the roundtrip
393 such as Dancing Icon.
395 exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
396 in precompose.h from composition according to AFP 3.x spec
397 ********************************************************************/
399 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
403 u_int32_t base_sp, comb_sp;
405 ucs2_t lindex, vindex;
408 size_t o_len = *outlen;
410 if (!inplen || (inplen & 1) || inplen > o_len)
418 while (*outlen > 2) {
427 return o_len - *outlen;
433 /* Non-Combination Character */
436 /* Unicode Standard Annex #15 A10.3 Hangul Composition */
438 else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) {
439 if ((LBASE <= base) && (base < LBASE + LCOUNT)) {
441 lindex = base - LBASE;
442 vindex = comb - VBASE;
443 base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
448 else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) {
449 if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) {
451 base += comb - TBASE;
455 /* Binary Search for Surrogate Pair */
456 else if ((0xD800 <= base) && (base < 0xDC00)) {
457 if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
458 base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
460 comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
461 if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
466 } while ((i + 4 <= inplen) && result_sp) ;
468 *out = base_sp >> 16;
477 *out = base_sp & 0xFFFF;
489 /* Binary Search for BMP */
490 else if (result = do_precomposition(base, comb)) {
506 /* --------------- */
507 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
511 ucs2_t base, comb[COMBBUFLEN];
513 ucs2_t sindex, tjamo;
517 size_t o_len = *outlen;
519 if (!inplen || (inplen & 1))
529 /* check ASCII first. this is frequent. */
530 if (base <= 0x007f) ;
532 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
533 else if ((SBASE <= base) && (base < SBASE + SCOUNT)) {
534 sindex = base - SBASE;
535 base = LBASE + sindex / NCOUNT;
536 comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT;
539 if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) {
540 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
546 comb[COMBBUFLEN-1] = tjamo;
551 /* Binary Search for Surrogate Pair */
552 else if ((0xD800 <= base) && (base < 0xDC00)) {
553 if (i + 2 < inplen) {
554 base_sp = ((u_int32_t)base << 16) | (u_int32_t)in[1];
556 if ( !(result_sp = do_decomposition_sp(base_sp))) break;
558 base_sp = result_sp >> 32;
559 comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */
560 comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */
561 } while (comblen < MAXCOMBSPLEN);
563 if (*outlen < (comblen + 1) << 1) {
568 *out = base_sp >> 16; /* hi */
572 base = base_sp & 0xFFFF; /* lo */
579 /* Binary Search for BMP */
582 if ( !(result = do_decomposition(base))) break;
585 comb[COMBBUFLEN-comblen] = result & 0xFFFF;
586 } while ((0x007f < base) && (comblen < MAXCOMBLEN));
589 if (*outlen < (comblen + 1) << 1) {
598 while ( comblen > 0 ) {
599 *out = comb[COMBBUFLEN-comblen];
610 return o_len-*outlen;
613 /*******************************************************************
614 length of UTF-8 character and string
615 ********************************************************************/
617 size_t utf8_charlen ( char* utf8 )
621 p = (unsigned char*) utf8;
625 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
627 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
629 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
631 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
633 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
635 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
638 return ((size_t) -1);
642 size_t utf8_strlen_validate ( char * utf8 )
647 p = (unsigned char*) utf8;
650 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
657 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
660 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
663 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
666 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
669 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
672 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
676 return ((size_t) -1);