X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libatalk%2Funicode%2Futil_unistr.c;h=c163d6f6e3f6366d75d7503ba64787dd3356e255;hb=654f86c6e05414423af719716ad028f7af7a65f5;hp=d08f86262e659c2ca643b596c2d672fd04f02de7;hpb=381bdbb4d06e6a9050050c7351264656f6a5a8df;p=netatalk.git diff --git a/libatalk/unicode/util_unistr.c b/libatalk/unicode/util_unistr.c index d08f8626..c163d6f6 100644 --- a/libatalk/unicode/util_unistr.c +++ b/libatalk/unicode/util_unistr.c @@ -1,3 +1,13 @@ +/******************************************************************* + NOTE: + The early netatalk 2.x was based on UCS-2. + UCS-2 don't support chars above U+10000. + Recent netatalk is based on UTF-16. + UTF-16 can support chars above U+10000, using Surrogate Pair. + However, Surrogate Pair is complex, dirty, filthy and disagreeable. + There might still be latent bugs... +********************************************************************/ + #ifdef HAVE_CONFIG_H #include "config.h" #endif /* HAVE_CONFIG_H */ @@ -9,81 +19,41 @@ #include #include #include - -#include +#include #include -#include "ucs2_casetable.h" -#include "precompose.h" -#include "byteorder.h" - -#define HANGUL_SBASE 0xAC00 -#define HANGUL_LBASE 0x1100 -#define HANGUL_VBASE 0x1161 -#define HANGUL_TBASE 0x11A7 -#define HANGUL_LCOUNT 19 -#define HANGUL_VCOUNT 21 -#define HANGUL_TCOUNT 28 -#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) /* 588 */ -#define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT) /* 11172 */ - -#define MAXCOMBLEN 3 - -ucs2_t toupper_w(ucs2_t val) -{ - if ( val >= 0x0040 && val <= 0x007F) - return upcase_table_1[val-0x0040]; - if ( val >= 0x00C0 && val <= 0x02BF) - return upcase_table_2[val-0x00C0]; - if ( val >= 0x0380 && val <= 0x04FF) - return upcase_table_3[val-0x0380]; - if ( val >= 0x0540 && val <= 0x05BF) - return upcase_table_4[val-0x0540]; - if ( val >= 0x1E00 && val <= 0x1FFF) - return upcase_table_5[val-0x1E00]; - if ( val >= 0x2140 && val <= 0x217F) - return upcase_table_6[val-0x2140]; - if ( val >= 0x24C0 && val <= 0x24FF) - return upcase_table_7[val-0x24C0]; - if ( val >= 0xFF40 && val <= 0xFF7F) - return upcase_table_8[val-0xFF40]; - - return (val); -} +#include - -ucs2_t tolower_w(ucs2_t val) -{ - if ( val >= 0x0040 && val <= 0x007F) - return lowcase_table_1[val-0x0040]; - if ( val >= 0x00C0 && val <= 0x023F) - return lowcase_table_2[val-0x00C0]; - if ( val >= 0x0380 && val <= 0x057F) - return lowcase_table_3[val-0x0380]; - if ( val >= 0x1E00 && val <= 0x1FFF) - return lowcase_table_4[val-0x1E00]; - if ( val >= 0x2140 && val <= 0x217F) - return lowcase_table_5[val-0x2140]; - if ( val >= 0x2480 && val <= 0x24FF) - return lowcase_table_6[val-0x2480]; - if ( val >= 0xFF00 && val <= 0xFF3F) - return lowcase_table_7[val-0xFF00]; - - return (val); -} +#include "precompose.h" /******************************************************************* Convert a string to lower case. return True if any char is converted ********************************************************************/ +/* surrogate pair support */ + int strlower_w(ucs2_t *s) { int ret = 0; + while (*s) { - ucs2_t v = tolower_w(*s); - if (v != *s) { - *s = v; - ret = 1; + if ((0xD800 <= *s) && (*s < 0xDC00)) { + if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) { + uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1]; + uint32_t v_sp = tolower_sp(s_sp); + if (v_sp != s_sp) { + *s = v_sp >> 16; + s++; + *s = v_sp & 0xFFFF; + ret = 1; + } + } + } else { + ucs2_t v = tolower_w(*s); + if (v != *s) { + *s = v; + ret = 1; + } } s++; } @@ -94,41 +64,74 @@ int strlower_w(ucs2_t *s) Convert a string to upper case. return True if any char is converted ********************************************************************/ +/* surrogate pair support */ + int strupper_w(ucs2_t *s) { int ret = 0; + while (*s) { - ucs2_t v = toupper_w(*s); - if (v != *s) { - *s = v; - ret = 1; + if ((0xD800 <= *s) && (*s < 0xDC00)) { + if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) { + uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1]; + uint32_t v_sp = toupper_sp(s_sp); + if (v_sp != s_sp) { + *s = v_sp >> 16; + s++; + *s = v_sp & 0xFFFF; + ret = 1; + } + } + } else { + ucs2_t v = toupper_w(*s); + if (v != *s) { + *s = v; + ret = 1; + } } s++; } return ret; } - /******************************************************************* +wide & sp islower() determine if a character is lowercase ********************************************************************/ +/* These functions are not used. */ + int islower_w(ucs2_t c) { return ( c == tolower_w(c)); } +int islower_sp(uint32_t c_sp) +{ + return ( c_sp == tolower_sp(c_sp)); +} + /******************************************************************* +wide & sp isupper() determine if a character is uppercase ********************************************************************/ +/* These functions are not used. */ + int isupper_w(ucs2_t c) { return ( c == toupper_w(c)); } +int isupper_sp(uint32_t c_sp) +{ + return ( c_sp == toupper_sp(c_sp)); +} /******************************************************************* - Count the number of characters in a ucs2_t string. +wide strlen() + Count the number of characters in a UTF-16 string. ********************************************************************/ +/* NOTE: one surrogate pair is two characters. */ + size_t strlen_w(const ucs2_t *src) { size_t len; @@ -139,8 +142,11 @@ size_t strlen_w(const ucs2_t *src) } /******************************************************************* - Count up to max number of characters in a ucs2_t string. +wide strnlen() + Count up to max number of characters in a UTF-16 string. ********************************************************************/ +/* NOTE: one surrogate pair is two characters. */ + size_t strnlen_w(const ucs2_t *src, size_t max) { size_t len; @@ -153,6 +159,8 @@ size_t strnlen_w(const ucs2_t *src, size_t max) /******************************************************************* wide strchr() ********************************************************************/ +/* NOTE: hi and lo of surrogate pair are separately processed. */ + ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c) { while (*s != 0) { @@ -164,11 +172,15 @@ ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c) return NULL; } +/******************************************************************* +wide & sp strcasechr() +********************************************************************/ +/* NOTE: separately process BMP and surrogate pair */ + ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c) { while (*s != 0) { -/* LOG(log_debug, logtype_default, "Comparing %X to %X (%X - %X)", c, *s, toupper_w(c), toupper_w(*s));*/ - if (toupper_w(c) == toupper_w(*s)) return (ucs2_t *)s; + if (tolower_w(c) == tolower_w(*s)) return (ucs2_t *)s; s++; } if (c == *s) return (ucs2_t *)s; @@ -176,6 +188,21 @@ ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c) return NULL; } +ucs2_t *strcasechr_sp(const ucs2_t *s, uint32_t c_sp) +{ + if (*s == 0) return NULL; + while (s[1] != 0) { + if (tolower_sp(c_sp) == tolower_sp((uint32_t)*s << 16 | (uint32_t)s[1])) return (ucs2_t *)s; + s++; + } + + return NULL; +} + +/******************************************************************* +wide strcmp() +********************************************************************/ +/* no problem of surrogate pair */ int strcmp_w(const ucs2_t *a, const ucs2_t *b) { @@ -186,6 +213,11 @@ int strcmp_w(const ucs2_t *a, const ucs2_t *b) string is longer */ } +/******************************************************************* +wide strncmp() +********************************************************************/ +/* no problem of surrogate pair */ + int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len) { size_t n = 0; @@ -196,6 +228,8 @@ int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len) /******************************************************************* wide strstr() ********************************************************************/ +/* no problem of surrogate pair */ + ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins) { ucs2_t *r; @@ -212,6 +246,11 @@ ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins) return NULL; } +/******************************************************************* +wide strcasestr() +********************************************************************/ +/* surrogate pair support */ + ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins) { ucs2_t *r; @@ -221,39 +260,86 @@ ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins) slen = strlen_w(s); inslen = strlen_w(ins); r = (ucs2_t *)s; - while ((r = strcasechr_w(r, *ins))) { - if (strncasecmp_w(r, ins, inslen) == 0) return r; - r++; + + if ((0xD800 <= *ins) && (*ins < 0xDC00)) { + if ((0xDC00 <= ins[1]) && (ins[1] < 0xE000)) { + uint32_t ins_sp = (uint32_t)*ins << 16 | (uint32_t)ins[1]; + while ((r = strcasechr_sp(r, ins_sp))) { + if (strncasecmp_w(r, ins, inslen) == 0) return r; + r++; + } + } else { + return NULL; /* illegal sequence */ + } + } else { + while ((r = strcasechr_w(r, *ins))) { + if (strncasecmp_w(r, ins, inslen) == 0) return r; + r++; + } } return NULL; } - - - /******************************************************************* +wide strcasecmp() case insensitive string comparison ********************************************************************/ +/* surrogate pair support */ + int strcasecmp_w(const ucs2_t *a, const ucs2_t *b) { - while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; } + int ret; + + while (*a && *b) { + if ((0xD800 <= *a) && (*a < 0xDC00)) { + if (ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1])) return ret; + a++; + b++; + if (!(*a && *b)) return (tolower_w(*a) - tolower_w(*b)); /* avoid buffer over run */ + } else { + if (ret = tolower_w(*a) - tolower_w(*b)) return ret; + } + a++; + b++; + } return (tolower_w(*a) - tolower_w(*b)); } /******************************************************************* -case insensitive string comparison, lenght limited +wide strncasecmp() +case insensitive string comparison, length limited ********************************************************************/ +/* NOTE: compare up to 'len+1' if 'len' isolate surrogate pair */ + int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len) { size_t n = 0; - while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; } + int ret; + + while ((n < len) && *a && *b) { + if ((0xD800 <= *a) && (*a < 0xDC00)) { + if (ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1])) return ret; + a++; + b++; + n++; + if (!((n < len) && *a && *b)) return (tolower_w(*a) - tolower_w(*b)); + } else { + if (ret = tolower_w(*a) - tolower_w(*b)) return ret; + } + a++; + b++; + n++; + } return (len - n)?(tolower_w(*a) - tolower_w(*b)):0; } /******************************************************************* +wide strndup() duplicate string ********************************************************************/ +/* NOTE: not check isolation of surrogate pair */ /* if len == 0 then duplicate the whole string */ + ucs2_t *strndup_w(const ucs2_t *src, size_t len) { ucs2_t *dest; @@ -271,6 +357,12 @@ ucs2_t *strndup_w(const ucs2_t *src, size_t len) return dest; } +/******************************************************************* +wide strdup() +duplicate string +********************************************************************/ +/* no problem of surrogate pair */ + ucs2_t *strdup_w(const ucs2_t *src) { return strndup_w(src, 0); @@ -279,6 +371,8 @@ ucs2_t *strdup_w(const ucs2_t *src) /******************************************************************* copy a string with max len ********************************************************************/ +/* This function is not used. */ +/* NOTE: not check isolation of surrogate pair */ ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max) { @@ -298,7 +392,9 @@ ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max) /******************************************************************* append a string of len bytes and add a terminator ********************************************************************/ +/* These functions are not used. */ +/* NOTE: not check isolation of surrogate pair */ ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max) { size_t start; @@ -315,7 +411,7 @@ ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max) return dest; } - +/* no problem of surrogate pair */ ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src) { size_t start; @@ -333,13 +429,16 @@ ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src) } -/* ------------------------ */ +/******************************************************************* +binary search for pre|decomposition +********************************************************************/ + static ucs2_t do_precomposition(unsigned int base, unsigned int comb) { int min = 0; - int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1; + int max = PRECOMP_COUNT - 1; int mid; - u_int32_t sought = (base << 16) | comb, that; + uint32_t sought = (base << 16) | comb, that; /* binary search */ while (max >= min) { @@ -357,14 +456,38 @@ static ucs2_t do_precomposition(unsigned int base, unsigned int comb) return 0; } +/* ------------------------ */ +static uint32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp) +{ + int min = 0; + int max = PRECOMP_SP_COUNT - 1; + int mid; + uint64_t sought_sp = ((uint64_t)base_sp << 32) | (uint64_t)comb_sp, that_sp; + + /* binary search */ + while (max >= min) { + mid = (min + max) / 2; + that_sp = ((uint64_t)precompositions_sp[mid].base_sp << 32) | ((uint64_t)precompositions_sp[mid].comb_sp); + if (that_sp < sought_sp) { + min = mid + 1; + } else if (that_sp > sought_sp) { + max = mid - 1; + } else { + return precompositions_sp[mid].replacement_sp; + } + } + /* no match */ + return 0; +} + /* -------------------------- */ -static u_int32_t do_decomposition(ucs2_t base) +static uint32_t do_decomposition(ucs2_t base) { int min = 0; - int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1; + int max = DECOMP_COUNT - 1; int mid; - u_int32_t sought = base; - u_int32_t result, that; + uint32_t sought = base; + uint32_t result, that; /* binary search */ while (max >= min) { @@ -383,36 +506,69 @@ static u_int32_t do_decomposition(ucs2_t base) return 0; } -/* we can't use static, this stuff needs to be reentrant */ -/* static char comp[MAXPATHLEN +1]; */ +/* -------------------------- */ +static uint64_t do_decomposition_sp(unsigned int base_sp) +{ + int min = 0; + int max = DECOMP_SP_COUNT - 1; + int mid; + uint32_t sought_sp = base_sp; + uint32_t that_sp; + uint64_t result_sp; + + /* binary search */ + while (max >= min) { + mid = (min + max) / 2; + that_sp = decompositions_sp[mid].replacement_sp; + if (that_sp < sought_sp) { + min = mid + 1; + } else if (that_sp > sought_sp) { + max = mid - 1; + } else { + result_sp = ((uint64_t)decompositions_sp[mid].base_sp << 32) | ((uint64_t)decompositions_sp[mid].comb_sp); + return result_sp; + } + } + /* no match */ + return 0; +} + +/******************************************************************* +pre|decomposition + + we can't use static, this stuff needs to be reentrant + static char comp[MAXPATHLEN +1]; + + We don't implement Singleton and Canonical Ordering. + We ignore CompositionExclusions.txt. + because they cause the problem of the roundtrip + such as Dancing Icon. + + exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges + in precompose.h from composition according to AFP 3.x spec +********************************************************************/ size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) { size_t i; ucs2_t base, comb; + uint32_t base_sp, comb_sp; ucs2_t *in, *out; - ucs2_t hangul_lindex, hangul_vindex; + ucs2_t lindex, vindex; ucs2_t result; + uint32_t result_sp; size_t o_len = *outlen; - + if (!inplen || (inplen & 1) || inplen > o_len) return (size_t)-1; - /* Actually, */ - /* Decomposition and Canonical Ordering are necessary here. */ - /* */ - /* Ex. in = CanonicalOrdering(decompose_w(name)) */ - /* */ - /* A new mapping table is needed for CanonicalOrdering. */ - i = 0; in = name; out = comp; - + base = *in; while (*outlen > 2) { i += 2; - in++; if (i == inplen) { *out = base; out++; @@ -420,33 +576,73 @@ size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) *outlen -= 2; return o_len - *outlen; } + in++; comb = *in; result = 0; - + /* Non-Combination Character */ if (comb < 0x300) ; /* Unicode Standard Annex #15 A10.3 Hangul Composition */ /* Step 1 */ - else if ((HANGUL_VBASE <= comb) && (comb <= HANGUL_VBASE + HANGUL_VCOUNT)) { - if ((HANGUL_LBASE <= base) && (base < HANGUL_LBASE + HANGUL_LCOUNT)) { + else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) { + if ((LBASE <= base) && (base < LBASE + LCOUNT)) { result = 1; - hangul_lindex = base - HANGUL_LBASE; - hangul_vindex = comb - HANGUL_VBASE; - base = HANGUL_SBASE + (hangul_lindex * HANGUL_VCOUNT + hangul_vindex) * HANGUL_TCOUNT; + lindex = base - LBASE; + vindex = comb - VBASE; + base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT; } } /* Step 2 */ - else if ((HANGUL_TBASE < comb) && (comb < HANGUL_TBASE + HANGUL_TCOUNT)) { - if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE +HANGUL_SCOUNT) && (((base - HANGUL_SBASE) % HANGUL_TCOUNT) == 0)) { + else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) { + if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) { result = 1; - base += comb - HANGUL_TBASE; + base += comb - TBASE; } } - /* Combining Sequence */ - else if ((result = do_precomposition(base, comb))) { + /* Binary Search for Surrogate Pair */ + else if ((0xD800 <= base) && (base < 0xDC00)) { + if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 6 <= inplen)) { + base_sp = ((uint32_t)base << 16) | (uint32_t)comb; + do { + comb_sp = ((uint32_t)in[1] << 16) | (uint32_t)in[2]; + if (result_sp = do_precomposition_sp(base_sp, comb_sp)) { + base_sp = result_sp; + i += 4; + in +=2; + } + } while ((i + 6 <= inplen) && result_sp) ; + + *out = base_sp >> 16; + out++; + *outlen -= 2; + + if (*outlen <= 2) { + errno = E2BIG; + return (size_t)-1; + } + + *out = base_sp & 0xFFFF; + out++; + *outlen -= 2; + + i += 2; + if (i == inplen) { + out++; + *out = 0; + return o_len - *outlen; + } + in++; + base = *in; + + result = 1; + } + } + + /* Binary Search for BMP */ + else if (result = do_precomposition(base, comb)) { base = result; } @@ -457,25 +653,22 @@ size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) base = comb; } } - + errno = E2BIG; return (size_t)-1; } /* --------------- */ - -/* Singleton Decomposition is unsupported. */ -/* A new mapping table is needed for implementation. */ - size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) { size_t i; size_t comblen; - ucs2_t base; - ucs2_t comb[MAXCOMBLEN]; - ucs2_t hangul_sindex, tjamo; + ucs2_t base, comb[COMBBUFLEN]; + uint32_t base_sp; + ucs2_t sindex, tjamo; ucs2_t *in, *out; unsigned int result; + uint64_t result_sp; size_t o_len = *outlen; if (!inplen || (inplen & 1)) @@ -492,34 +685,60 @@ size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) if (base <= 0x007f) ; /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */ - else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) { - hangul_sindex = base - HANGUL_SBASE; - base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT; - comb[MAXCOMBLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT; + else if ((SBASE <= base) && (base < SBASE + SCOUNT)) { + sindex = base - SBASE; + base = LBASE + sindex / NCOUNT; + comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT; /* */ - if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) { - comb[MAXCOMBLEN-1] = comb[MAXCOMBLEN-2]; + if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) { + comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2]; comblen = 1; } /* */ else { - comb[MAXCOMBLEN-1] = tjamo; + comb[COMBBUFLEN-1] = tjamo; comblen = 2; } } - /* Combining Sequence */ - /* exclude U2000-U2FFF and UFE30-UFE4F ranges in decompositions[] */ - /* from decomposition according to AFP 3.1 spec */ + /* Binary Search for Surrogate Pair */ + else if ((0xD800 <= base) && (base < 0xDC00)) { + if (i + 2 < inplen) { + base_sp = ((uint32_t)base << 16) | (uint32_t)in[1]; + do { + if ( !(result_sp = do_decomposition_sp(base_sp))) break; + comblen += 2; + base_sp = result_sp >> 32; + comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */ + comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */ + } while (comblen < MAXCOMBSPLEN); + + if (*outlen < (comblen + 1) << 1) { + errno = E2BIG; + return (size_t)-1; + } + + *out = base_sp >> 16; /* hi */ + out++; + *outlen -= 2; + + base = base_sp & 0xFFFF; /* lo */ + + i += 2; + in++; + } + } + + /* Binary Search for BMP */ else { do { - if ((comblen >= MAXCOMBLEN) || !(result = do_decomposition(base))) break; + if ( !(result = do_decomposition(base))) break; comblen++; base = result >> 16; - comb[MAXCOMBLEN-comblen] = result & 0xffff; - } while (0x007f < base) ; + comb[COMBBUFLEN-comblen] = result & 0xFFFF; + } while ((0x007f < base) && (comblen < MAXCOMBLEN)); } if (*outlen < (comblen + 1) << 1) { @@ -532,7 +751,7 @@ size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) *outlen -= 2; while ( comblen > 0 ) { - *out = comb[MAXCOMBLEN-comblen]; + *out = comb[COMBBUFLEN-comblen]; out++; *outlen -= 2; comblen--; @@ -541,13 +760,15 @@ size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) i += 2; in++; } - - /* Is Canonical Ordering necessary here? */ *out = 0; return o_len-*outlen; } +/******************************************************************* +length of UTF-8 character and string +********************************************************************/ + size_t utf8_charlen ( char* utf8 ) { unsigned char *p;