+/*******************************************************************
+ NOTE:
+ The early netatalk 2.x was based on UCS-2.
+ UCS-2 don't support chars above U+10000.
+ Recent netatalk is based on UTF-16.
+ UTF-16 can support chars above U+10000, using Surrogate Pair.
+ However, Surrogate Pair is complex, dirty, filthy and disagreeable.
+ There might still be latent bugs...
+********************************************************************/
+
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */
#include <sys/stat.h>
#include <atalk/logger.h>
#include <errno.h>
-
-#include <netatalk/endian.h>
+#include <arpa/inet.h>
#include <atalk/unicode.h>
-#include "ucs2_casetable.h"
#include "precompose.h"
#include "byteorder.h"
-#define HANGUL_SBASE 0xAC00
-#define HANGUL_LBASE 0x1100
-#define HANGUL_VBASE 0x1161
-#define HANGUL_TBASE 0x11A7
-#define HANGUL_LCOUNT 19
-#define HANGUL_VCOUNT 21
-#define HANGUL_TCOUNT 28
-#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) /* 588 */
-#define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT) /* 11172 */
-
-#define MAXCOMBLEN 3
-#define MAXCOMBSPLEN 2
-#define COMBBUFLEN 4 /* max(MAXCOMBLEN, MAXCOMBSPLEN*2) */
-
-/*******************************************************************
- Convert a wide character to upper/lower case.
-********************************************************************/
-ucs2_t toupper_w(ucs2_t val)
-{
- if ( val >= 0x0040 && val <= 0x007F)
- return upcase_table_1[val-0x0040];
- if ( val >= 0x00C0 && val <= 0x02BF)
- return upcase_table_2[val-0x00C0];
- if ( val >= 0x0380 && val <= 0x04FF)
- return upcase_table_3[val-0x0380];
- if ( val >= 0x0540 && val <= 0x05BF)
- return upcase_table_4[val-0x0540];
- if ( val >= 0x1E00 && val <= 0x1FFF)
- return upcase_table_5[val-0x1E00];
- if ( val >= 0x2140 && val <= 0x217F)
- return upcase_table_6[val-0x2140];
- if ( val >= 0x24C0 && val <= 0x24FF)
- return upcase_table_7[val-0x24C0];
- if ( val >= 0xFF40 && val <= 0xFF7F)
- return upcase_table_8[val-0xFF40];
-
- return (val);
-}
-
-
-ucs2_t tolower_w(ucs2_t val)
-{
- if ( val >= 0x0040 && val <= 0x007F)
- return lowcase_table_1[val-0x0040];
- if ( val >= 0x00C0 && val <= 0x023F)
- return lowcase_table_2[val-0x00C0];
- if ( val >= 0x0380 && val <= 0x057F)
- return lowcase_table_3[val-0x0380];
- if ( val >= 0x1E00 && val <= 0x1FFF)
- return lowcase_table_4[val-0x1E00];
- if ( val >= 0x2140 && val <= 0x217F)
- return lowcase_table_5[val-0x2140];
- if ( val >= 0x2480 && val <= 0x24FF)
- return lowcase_table_6[val-0x2480];
- if ( val >= 0xFF00 && val <= 0xFF3F)
- return lowcase_table_7[val-0xFF00];
-
- return (val);
-}
-
/*******************************************************************
Convert a string to lower case.
return True if any char is converted
********************************************************************/
+/* surrogate pair support */
+
int strlower_w(ucs2_t *s)
{
int ret = 0;
+
while (*s) {
- ucs2_t v = tolower_w(*s);
- if (v != *s) {
- *s = v;
- ret = 1;
+ if ((0xD800 <= *s) && (*s < 0xDC00)) {
+ if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) {
+ uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1];
+ uint32_t v_sp = tolower_sp(s_sp);
+ if (v_sp != s_sp) {
+ *s = v_sp >> 16;
+ s++;
+ *s = v_sp & 0xFFFF;
+ ret = 1;
+ }
+ }
+ } else {
+ ucs2_t v = tolower_w(*s);
+ if (v != *s) {
+ *s = v;
+ ret = 1;
+ }
}
s++;
}
Convert a string to upper case.
return True if any char is converted
********************************************************************/
+/* surrogate pair support */
+
int strupper_w(ucs2_t *s)
{
int ret = 0;
+
while (*s) {
- ucs2_t v = toupper_w(*s);
- if (v != *s) {
- *s = v;
- ret = 1;
+ if ((0xD800 <= *s) && (*s < 0xDC00)) {
+ if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) {
+ uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1];
+ uint32_t v_sp = toupper_sp(s_sp);
+ if (v_sp != s_sp) {
+ *s = v_sp >> 16;
+ s++;
+ *s = v_sp & 0xFFFF;
+ ret = 1;
+ }
+ }
+ } else {
+ ucs2_t v = toupper_w(*s);
+ if (v != *s) {
+ *s = v;
+ ret = 1;
+ }
}
s++;
}
return ret;
}
-
/*******************************************************************
+wide & sp islower()
determine if a character is lowercase
********************************************************************/
+/* These functions are not used. */
+
int islower_w(ucs2_t c)
{
return ( c == tolower_w(c));
}
+int islower_sp(uint32_t c_sp)
+{
+ return ( c_sp == tolower_sp(c_sp));
+}
+
/*******************************************************************
+wide & sp isupper()
determine if a character is uppercase
********************************************************************/
+/* These functions are not used. */
+
int isupper_w(ucs2_t c)
{
return ( c == toupper_w(c));
}
+int isupper_sp(uint32_t c_sp)
+{
+ return ( c_sp == toupper_sp(c_sp));
+}
/*******************************************************************
- Count the number of characters in a ucs2_t string.
+wide strlen()
+ Count the number of characters in a UTF-16 string.
********************************************************************/
+/* NOTE: one surrogate pair is two characters. */
+
size_t strlen_w(const ucs2_t *src)
{
size_t len;
}
/*******************************************************************
- Count up to max number of characters in a ucs2_t string.
+wide strnlen()
+ Count up to max number of characters in a UTF-16 string.
********************************************************************/
+/* NOTE: one surrogate pair is two characters. */
+
size_t strnlen_w(const ucs2_t *src, size_t max)
{
size_t len;
/*******************************************************************
wide strchr()
********************************************************************/
+/* NOTE: hi and lo of surrogate pair are separately processed. */
+
ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
{
while (*s != 0) {
return NULL;
}
+/*******************************************************************
+wide & sp strcasechr()
+********************************************************************/
+/* NOTE: separately process BMP and surrogate pair */
+
ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
{
while (*s != 0) {
-/* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/
- if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s;
+ if (tolower_w(c) == tolower_w(*s)) return (ucs2_t *)s;
s++;
}
if (c == *s) return (ucs2_t *)s;
return NULL;
}
+ucs2_t *strcasechr_sp(const ucs2_t *s, uint32_t c_sp)
+{
+ if (*s == 0) return NULL;
+ while (s[1] != 0) {
+ if (tolower_sp(c_sp) == tolower_sp((uint32_t)*s << 16 | (uint32_t)s[1])) return (ucs2_t *)s;
+ s++;
+ }
+
+ return NULL;
+}
+
+/*******************************************************************
+wide strcmp()
+********************************************************************/
+/* no problem of surrogate pair */
int strcmp_w(const ucs2_t *a, const ucs2_t *b)
{
string is longer */
}
+/*******************************************************************
+wide strncmp()
+********************************************************************/
+/* no problem of surrogate pair */
+
int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
{
size_t n = 0;
/*******************************************************************
wide strstr()
********************************************************************/
+/* no problem of surrogate pair */
+
ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
{
ucs2_t *r;
return NULL;
}
+/*******************************************************************
+wide strcasestr()
+********************************************************************/
+/* surrogate pair support */
+
ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
{
ucs2_t *r;
slen = strlen_w(s);
inslen = strlen_w(ins);
r = (ucs2_t *)s;
- while ((r = strcasechr_w(r, *ins))) {
- if (strncasecmp_w(r, ins, inslen) == 0) return r;
- r++;
+
+ if ((0xD800 <= *ins) && (*ins < 0xDC00)) {
+ if ((0xDC00 <= ins[1]) && (ins[1] < 0xE000)) {
+ u_int32_t ins_sp = (u_int32_t)*ins << 16 | (u_int32_t)ins[1];
+ while ((r = strcasechr_sp(r, ins_sp))) {
+ if (strncasecmp_w(r, ins, inslen) == 0) return r;
+ r++;
+ }
+ } else {
+ return NULL; /* illegal sequence */
+ }
+ } else {
+ while ((r = strcasechr_w(r, *ins))) {
+ if (strncasecmp_w(r, ins, inslen) == 0) return r;
+ r++;
+ }
}
return NULL;
}
-
-
-
/*******************************************************************
+wide strcasecmp()
case insensitive string comparison
********************************************************************/
+/* surrogate pair support */
+
int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
{
- while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
+ int ret;
+
+ while (*a && *b) {
+ if ((0xD800 <= *a) && (*a < 0xDC00)) {
+ if (ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1])) return ret;
+ a++;
+ b++;
+ if (!(*a && *b)) return (tolower_w(*a) - tolower_w(*b)); /* avoid buffer over run */
+ } else {
+ if (ret = tolower_w(*a) - tolower_w(*b)) return ret;
+ }
+ a++;
+ b++;
+ }
return (tolower_w(*a) - tolower_w(*b));
}
/*******************************************************************
-case insensitive string comparison, lenght limited
+wide strncasecmp()
+case insensitive string comparison, length limited
********************************************************************/
+/* NOTE: compare up to 'len+1' if 'len' isolate surrogate pair */
+
int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
{
size_t n = 0;
- while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
+ int ret;
+
+ while ((n < len) && *a && *b) {
+ if ((0xD800 <= *a) && (*a < 0xDC00)) {
+ if (ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1])) return ret;
+ a++;
+ b++;
+ n++;
+ if (!((n < len) && *a && *b)) return (tolower_w(*a) - tolower_w(*b));
+ } else {
+ if (ret = tolower_w(*a) - tolower_w(*b)) return ret;
+ }
+ a++;
+ b++;
+ n++;
+ }
return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
}
/*******************************************************************
+wide strndup()
duplicate string
********************************************************************/
+/* NOTE: not check isolation of surrogate pair */
/* if len == 0 then duplicate the whole string */
+
ucs2_t *strndup_w(const ucs2_t *src, size_t len)
{
ucs2_t *dest;
return dest;
}
+/*******************************************************************
+wide strdup()
+duplicate string
+********************************************************************/
+/* no problem of surrogate pair */
+
ucs2_t *strdup_w(const ucs2_t *src)
{
return strndup_w(src, 0);
/*******************************************************************
copy a string with max len
********************************************************************/
+/* This function is not used. */
+/* NOTE: not check isolation of surrogate pair */
ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
{
/*******************************************************************
append a string of len bytes and add a terminator
********************************************************************/
+/* These functions are not used. */
+/* NOTE: not check isolation of surrogate pair */
ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
{
size_t start;
return dest;
}
-
+/* no problem of surrogate pair */
ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
{
size_t start;
static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
{
int min = 0;
- int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1;
+ int max = PRECOMP_COUNT - 1;
int mid;
- u_int32_t sought = (base << 16) | comb, that;
+ uint32_t sought = (base << 16) | comb, that;
/* binary search */
while (max >= min) {
}
/* ------------------------ */
-static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
+static uint32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
{
int min = 0;
- int max = sizeof(precompositions_sp) / sizeof(precompositions_sp[0]) - 1;
+ int max = PRECOMP_SP_COUNT - 1;
int mid;
- u_int64_t sought = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that;
+ uint64_t sought_sp = ((uint64_t)base_sp << 32) | (uint64_t)comb_sp, that_sp;
/* binary search */
while (max >= min) {
mid = (min + max) / 2;
- that = ((u_int64_t)precompositions_sp[mid].base << 32) | ((u_int64_t)precompositions_sp[mid].comb);
- if (that < sought) {
+ that_sp = ((uint64_t)precompositions_sp[mid].base_sp << 32) | ((uint64_t)precompositions_sp[mid].comb_sp);
+ if (that_sp < sought_sp) {
min = mid + 1;
- } else if (that > sought) {
+ } else if (that_sp > sought_sp) {
max = mid - 1;
} else {
- return precompositions_sp[mid].replacement;
+ return precompositions_sp[mid].replacement_sp;
}
}
/* no match */
}
/* -------------------------- */
-static u_int32_t do_decomposition(ucs2_t base)
+static uint32_t do_decomposition(ucs2_t base)
{
int min = 0;
- int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1;
+ int max = DECOMP_COUNT - 1;
int mid;
- u_int32_t sought = base;
- u_int32_t result, that;
+ uint32_t sought = base;
+ uint32_t result, that;
/* binary search */
while (max >= min) {
}
/* -------------------------- */
-static u_int64_t do_decomposition_sp(unsigned int base)
+static uint64_t do_decomposition_sp(unsigned int base_sp)
{
int min = 0;
- int max = sizeof(decompositions_sp) / sizeof(decompositions_sp[0]) - 1;
+ int max = DECOMP_SP_COUNT - 1;
int mid;
- u_int32_t sought = base;
- u_int32_t that;
- u_int64_t result;
+ uint32_t sought_sp = base_sp;
+ uint32_t that_sp;
+ uint64_t result_sp;
/* binary search */
while (max >= min) {
mid = (min + max) / 2;
- that = decompositions_sp[mid].replacement;
- if (that < sought) {
+ that_sp = decompositions_sp[mid].replacement_sp;
+ if (that_sp < sought_sp) {
min = mid + 1;
- } else if (that > sought) {
+ } else if (that_sp > sought_sp) {
max = mid - 1;
} else {
- result = ((u_int64_t)decompositions_sp[mid].base << 32) | ((u_int64_t)decompositions_sp[mid].comb);
- return result;
+ result_sp = ((uint64_t)decompositions_sp[mid].base_sp << 32) | ((uint64_t)decompositions_sp[mid].comb_sp);
+ return result_sp;
}
}
/* no match */
we can't use static, this stuff needs to be reentrant
static char comp[MAXPATHLEN +1];
- exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
- in decompositions[] from decomposition according to AFP 3.x spec
-
- We don't implement Singleton and Canonical Ordering
+ We don't implement Singleton and Canonical Ordering.
+ We ignore CompositionExclusions.txt.
because they cause the problem of the roundtrip
- such as Dancing Icon
+ such as Dancing Icon.
+
+ exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
+ in precompose.h from composition according to AFP 3.x spec
********************************************************************/
size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
{
size_t i;
ucs2_t base, comb;
- u_int32_t base_sp, comb_sp;
+ uint32_t base_sp, comb_sp;
ucs2_t *in, *out;
- ucs2_t hangul_lindex, hangul_vindex;
+ ucs2_t lindex, vindex;
ucs2_t result;
- u_int32_t result_sp;
+ uint32_t result_sp;
size_t o_len = *outlen;
if (!inplen || (inplen & 1) || inplen > o_len)
base = *in;
while (*outlen > 2) {
i += 2;
- in++;
-
if (i == inplen) {
*out = base;
out++;
*outlen -= 2;
return o_len - *outlen;
}
-
+ in++;
comb = *in;
result = 0;
/* Unicode Standard Annex #15 A10.3 Hangul Composition */
/* Step 1 <L,V> */
- else if ((HANGUL_VBASE <= comb) && (comb <= HANGUL_VBASE + HANGUL_VCOUNT)) {
- if ((HANGUL_LBASE <= base) && (base < HANGUL_LBASE + HANGUL_LCOUNT)) {
+ else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) {
+ if ((LBASE <= base) && (base < LBASE + LCOUNT)) {
result = 1;
- hangul_lindex = base - HANGUL_LBASE;
- hangul_vindex = comb - HANGUL_VBASE;
- base = HANGUL_SBASE + (hangul_lindex * HANGUL_VCOUNT + hangul_vindex) * HANGUL_TCOUNT;
+ lindex = base - LBASE;
+ vindex = comb - VBASE;
+ base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
}
}
/* Step 2 <LV,T> */
- else if ((HANGUL_TBASE < comb) && (comb < HANGUL_TBASE + HANGUL_TCOUNT)) {
- if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE +HANGUL_SCOUNT) && (((base - HANGUL_SBASE) % HANGUL_TCOUNT) == 0)) {
+ else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) {
+ if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) {
result = 1;
- base += comb - HANGUL_TBASE;
+ base += comb - TBASE;
}
}
/* Binary Search for Surrogate Pair */
else if ((0xD800 <= base) && (base < 0xDC00)) {
- if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
- base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
+ if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 6 <= inplen)) {
+ base_sp = ((uint32_t)base << 16) | (uint32_t)comb;
do {
- comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
+ comb_sp = ((uint32_t)in[1] << 16) | (uint32_t)in[2];
if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
base_sp = result_sp;
i += 4;
in +=2;
}
- } while ((i + 4 <= inplen) && result_sp) ;
+ } while ((i + 6 <= inplen) && result_sp) ;
*out = base_sp >> 16;
out++;
*outlen -= 2;
i += 2;
+ if (i == inplen) {
+ out++;
+ *out = 0;
+ return o_len - *outlen;
+ }
in++;
base = *in;
size_t i;
size_t comblen;
ucs2_t base, comb[COMBBUFLEN];
- u_int32_t base_sp;
- ucs2_t hangul_sindex, tjamo;
+ uint32_t base_sp;
+ ucs2_t sindex, tjamo;
ucs2_t *in, *out;
unsigned int result;
- u_int64_t result_sp;
+ uint64_t result_sp;
size_t o_len = *outlen;
if (!inplen || (inplen & 1))
if (base <= 0x007f) ;
/* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
- else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) {
- hangul_sindex = base - HANGUL_SBASE;
- base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT;
- comb[COMBBUFLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
+ else if ((SBASE <= base) && (base < SBASE + SCOUNT)) {
+ sindex = base - SBASE;
+ base = LBASE + sindex / NCOUNT;
+ comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT;
/* <L,V> */
- if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) {
+ if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) {
comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
comblen = 1;
}
/* Binary Search for Surrogate Pair */
else if ((0xD800 <= base) && (base < 0xDC00)) {
if (i + 2 < inplen) {
- base_sp = ((u_int32_t)base << 16) | (u_int32_t)in[1];
+ base_sp = ((uint32_t)base << 16) | (uint32_t)in[1];
do {
if ( !(result_sp = do_decomposition_sp(base_sp))) break;
comblen += 2;
base_sp = result_sp >> 32;
comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */
comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */
- } while (comblen < (MAXCOMBSPLEN<<1));
+ } while (comblen < MAXCOMBSPLEN);
if (*outlen < (comblen + 1) << 1) {
errno = E2BIG;