3 #endif /* HAVE_CONFIG_H */
10 #include <atalk/logger.h>
13 #include <netatalk/endian.h>
15 #include <atalk/unicode.h>
16 #include "ucs2_casetable.h"
17 #include "precompose.h"
18 #include "byteorder.h"
21 ucs2_t toupper_w(ucs2_t val)
23 if ( val >= 0x0040 && val <= 0x007F)
24 return upcase_table_1[val-0x0040];
25 if ( val >= 0x00C0 && val <= 0x02BF)
26 return upcase_table_2[val-0x00C0];
27 if ( val >= 0x0380 && val <= 0x04FF)
28 return upcase_table_3[val-0x0380];
29 if ( val >= 0x0540 && val <= 0x05BF)
30 return upcase_table_4[val-0x0540];
31 if ( val >= 0x1E00 && val <= 0x1FFF)
32 return upcase_table_5[val-0x1E00];
33 if ( val >= 0x2140 && val <= 0x217F)
34 return upcase_table_6[val-0x2140];
35 if ( val >= 0x24C0 && val <= 0x24FF)
36 return upcase_table_7[val-0x24C0];
37 if ( val >= 0xFF40 && val <= 0xFF7F)
38 return upcase_table_8[val-0xFF40];
44 ucs2_t tolower_w(ucs2_t val)
46 if ( val >= 0x0040 && val <= 0x007F)
47 return lowcase_table_1[val-0x0040];
48 if ( val >= 0x00C0 && val <= 0x023F)
49 return lowcase_table_2[val-0x00C0];
50 if ( val >= 0x0380 && val <= 0x057F)
51 return lowcase_table_3[val-0x0380];
52 if ( val >= 0x1E00 && val <= 0x1FFF)
53 return lowcase_table_4[val-0x1E00];
54 if ( val >= 0x2140 && val <= 0x217F)
55 return lowcase_table_5[val-0x2140];
56 if ( val >= 0x2480 && val <= 0x24FF)
57 return lowcase_table_6[val-0x2480];
58 if ( val >= 0xFF00 && val <= 0xFF3F)
59 return lowcase_table_7[val-0xFF00];
64 /*******************************************************************
65 Convert a string to lower case.
66 return True if any char is converted
67 ********************************************************************/
68 int strlower_w(ucs2_t *s)
72 ucs2_t v = tolower_w(*s);
82 /*******************************************************************
83 Convert a string to upper case.
84 return True if any char is converted
85 ********************************************************************/
86 int strupper_w(ucs2_t *s)
90 ucs2_t v = toupper_w(*s);
101 /*******************************************************************
102 determine if a character is lowercase
103 ********************************************************************/
104 int islower_w(ucs2_t c)
106 return ( c == tolower_w(c));
109 /*******************************************************************
110 determine if a character is uppercase
111 ********************************************************************/
112 int isupper_w(ucs2_t c)
114 return ( c == toupper_w(c));
118 /*******************************************************************
119 Count the number of characters in a ucs2_t string.
120 ********************************************************************/
121 size_t strlen_w(const ucs2_t *src)
125 for(len = 0; *src++; len++) ;
130 /*******************************************************************
131 Count up to max number of characters in a ucs2_t string.
132 ********************************************************************/
133 size_t strnlen_w(const ucs2_t *src, size_t max)
137 for(len = 0; *src++ && (len < max); len++) ;
142 /*******************************************************************
144 ********************************************************************/
145 ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
148 if (c == *s) return (ucs2_t *)s;
151 if (c == *s) return (ucs2_t *)s;
156 ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
159 /* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
160 if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
163 if (c == *s) return (ucs2_t *)s;
169 int strcmp_w(const ucs2_t *a, const ucs2_t *b)
171 while (*b && *a == *b) { a++; b++; }
173 /* warning: if *a != *b and both are not 0 we retrun a random
174 greater or lesser than 0 number not realted to which
178 int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
181 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
182 return (len - n)?(*a - *b):0;
185 /*******************************************************************
187 ********************************************************************/
188 ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
193 if (!s || !*s || !ins || !*ins) return NULL;
195 inslen = strlen_w(ins);
197 while ((r = strchr_w(r, *ins))) {
198 if (strncmp_w(r, ins, inslen) == 0) return r;
204 ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
209 if (!s || !*s || !ins || !*ins) return NULL;
211 inslen = strlen_w(ins);
213 while ((r = strcasechr_w(r, *ins))) {
214 if (strncasecmp_w(r, ins, inslen) == 0) return r;
223 /*******************************************************************
224 case insensitive string comparison
225 ********************************************************************/
226 int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
228 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
229 return (tolower_w(*a) - tolower_w(*b));
232 /*******************************************************************
233 case insensitive string comparison, lenght limited
234 ********************************************************************/
235 int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
238 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
239 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
242 /*******************************************************************
244 ********************************************************************/
245 /* if len == 0 then duplicate the whole string */
246 ucs2_t *strndup_w(const ucs2_t *src, size_t len)
250 if (!len) len = strlen_w(src);
251 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
253 LOG (log_error, logtype_default, "strdup_w: out of memory!\n");
257 memcpy(dest, src, len * sizeof(ucs2_t));
263 ucs2_t *strdup_w(const ucs2_t *src)
265 return strndup_w(src, 0);
268 /*******************************************************************
269 copy a string with max len
270 ********************************************************************/
272 ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
276 if (!dest || !src) return NULL;
278 for (len = 0; (src[len] != 0) && (len < max); len++)
279 dest[len] = src[len];
287 /*******************************************************************
288 append a string of len bytes and add a terminator
289 ********************************************************************/
291 ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
296 if (!dest || !src) return NULL;
298 start = strlen_w(dest);
299 len = strnlen_w(src, max);
301 memcpy(&dest[start], src, len*sizeof(ucs2_t));
308 ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
313 if (!dest || !src) return NULL;
315 start = strlen_w(dest);
318 memcpy(&dest[start], src, len*sizeof(ucs2_t));
325 /* ------------------------ */
326 ucs2_t do_precomposition(unsigned int base, unsigned int comb)
329 int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1;
331 u_int32_t sought = (base << 16) | comb, that;
335 mid = (min + max) / 2;
336 that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
339 } else if (that > sought) {
342 return precompositions[mid].replacement;
349 /* -------------------------- */
350 u_int32_t do_decomposition(ucs2_t base)
353 int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1;
355 u_int32_t sought = base;
356 u_int32_t result, that;
360 mid = (min + max) / 2;
361 that = decompositions[mid].replacement;
364 } else if (that > sought) {
367 result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
375 /* we can't use static, this stuff needs to be reentrant */
376 /* static char comp[MAXPATHLEN +1]; */
378 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
384 size_t o_len = *outlen;
386 if (!inplen || (inplen & 1) || inplen > o_len)
390 out = (ucs2_t *)comp;
393 while (*outlen > 2) {
400 return o_len - *outlen;
403 if (comb >= 0x300 && (result = do_precomposition(base, comb))) {
411 return o_len - *outlen;
427 /* --------------- */
429 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
435 size_t o_len = *outlen;
437 if (!inplen || (inplen & 1))
441 out = (ucs2_t *)comp;
449 if ( (base > 0x1fff && base < 0x3000) || (base > 0xfe2f && base < 0xfe50)) {
450 /* exclude these ranges from decomposition according to AFP 3.1 spec */
456 if ((result = do_decomposition(base))) {
464 *out = result & 0xffff;
478 return o_len-*outlen;
481 size_t utf8_charlen ( char* utf8 )
485 p = (unsigned char*) utf8;
489 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
491 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
493 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
495 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
497 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
499 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
502 return ((size_t) -1);
506 size_t utf8_strlen_validate ( char * utf8 )
511 p = (unsigned char*) utf8;
514 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
521 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
524 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
527 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
530 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
533 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
536 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
540 return ((size_t) -1);