X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libatalk%2Funicode%2Fcharcnv.c;h=4e772e165a16e4f60f0f7dd331d176bf542645da;hb=939eb9da6116bd502cdae97f84541993848071b4;hp=7859a228ea1f9756b4a6c5fd72dc585eeff1aadf;hpb=14874ef66d68c5200a9a42a7408d022d58211898;p=netatalk.git diff --git a/libatalk/unicode/charcnv.c b/libatalk/unicode/charcnv.c index 7859a228..4e772e16 100644 --- a/libatalk/unicode/charcnv.c +++ b/libatalk/unicode/charcnv.c @@ -36,12 +36,6 @@ #ifdef HAVE_USABLE_ICONV #include #endif -#if HAVE_LOCALE_H -#include -#endif -#if HAVE_LANGINFO_H -#include -#endif #include #include @@ -78,29 +72,6 @@ static struct charset_functions* charsets[MAX_CHARSETS]; static char hexdig[] = "0123456789abcdef"; #define hextoint( c ) ( isdigit( c ) ? c - '0' : c + 10 - 'a' ) -static char* read_charsets_from_env(charset_t ch) -{ - char *name; - - switch (ch) { - case CH_MAC: - if (( name = getenv( "ATALK_MAC_CHARSET" )) != NULL ) - return name; - else - return "MAC_ROMAN"; - break; - case CH_UNIX: - if (( name = getenv( "ATALK_UNIX_CHARSET" )) != NULL ) - return name; - else - return "LOCALE"; - break; - default: - break; - } - return "ASCII"; -} - /** * Return the name of a charset to give to iconv(). @@ -108,66 +79,30 @@ static char* read_charsets_from_env(charset_t ch) static const char *charset_name(charset_t ch) { const char *ret = NULL; - static int first = 1; - static char macname[128]; - static char unixname[128]; - - if (first) { - memset(macname, 0, sizeof(macname)); - memset(unixname, 0, sizeof(unixname)); - first = 0; - } if (ch == CH_UCS2) ret = "UCS-2"; else if (ch == CH_UTF8) ret = "UTF8"; else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC"; - else if (ch == CH_UNIX) { - if (unixname[0] == '\0') { - ret = read_charsets_from_env(CH_UNIX); - strlcpy(unixname, ret, sizeof(unixname)); - } - else - ret = unixname; - } - else if (ch == CH_MAC) { - if (macname[0] == '\0') { - ret = read_charsets_from_env(CH_MAC); - strlcpy(macname, ret, sizeof(macname)); - } - else - ret = macname; - } + else ret = charset_names[ch]; + return ret; +} - if (!ret) - ret = charset_names[ch]; - -#if defined(CODESET) - if (ret && strcasecmp(ret, "LOCALE") == 0) { - const char *ln = NULL; - - setlocale(LC_ALL, ""); - ln = nl_langinfo(CODESET); - if (ln) { - /* Check whether the charset name is supported - by iconv */ - atalk_iconv_t handle = atalk_iconv_open(ln, "UCS-2"); - if (handle == (atalk_iconv_t) -1) { - LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln); - ln = "ASCII"; - } else { - atalk_iconv_close(handle); - } - if (ch==CH_UNIX) - strlcpy(unixname, ln, sizeof(unixname)); +int set_charset_name(charset_t ch, const char *name) +{ + if (ch >= NUM_CHARSETS) + return -1; + charset_names[ch] = strdup(name); + return 0; +} + +void free_charset_names(void) +{ + for (int ch = 0; ch < MAX_CHARSETS; ch++) { + if (charset_names[ch]) { + free(charset_names[ch]); + charset_names[ch] = NULL; } - ret = ln; } -#else /* system doesn't have LOCALE support */ - if (ch == CH_UNIX) ret = NULL; -#endif - - if (!ret || !*ret) ret = "ASCII"; - return ret; } static struct charset_functions* get_charset_functions (charset_t ch) @@ -768,7 +703,7 @@ char * debug_out ( char * seq, size_t len) * for e.g. HFS cdroms. */ -static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, uint16_t *flags) +static size_t pull_charset_flags (charset_t from_set, charset_t to_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, uint16_t *flags) { const uint16_t option = (flags ? *flags : 0); size_t i_len, o_len; @@ -777,6 +712,7 @@ static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const c char* outbuf = dest; atalk_iconv_t descriptor; atalk_iconv_t descriptor_cap; + char escch; /* 150210: uninitialized OK, depends on j */ if (srclen == (size_t)-1) srclen = strlen(src) + 1; @@ -792,14 +728,32 @@ static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const c i_len=srclen; o_len=destlen; + if ((option & CONV_ESCAPEDOTS) && i_len >= 2 && inbuf[0] == '.') { + if (o_len < 6) { + errno = E2BIG; + goto end; + } + ucs2_t ucs2 = ':'; + memcpy(outbuf, &ucs2, sizeof(ucs2_t)); + ucs2 = '2'; + memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t)); + ucs2 = 'e'; + memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t)); + outbuf += 6; + o_len -= 6; + inbuf++; + i_len--; + *flags |= CONV_REQESCAPE; + } + while (i_len > 0) { - if ((option & CONV_UNESCAPEHEX)) { - for (j = 0; j < i_len; ++j) { - if (inbuf[j] == ':') break; + for (j = 0; j < i_len; ++j) + if (inbuf[j] == ':' || inbuf[j] == '/') { + escch = inbuf[j]; + break; } - j = i_len - j; - i_len -= j; - } + j = i_len - j; + i_len -= j; if (i_len > 0 && atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) { @@ -827,36 +781,108 @@ static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const c } if (j) { - /* we're at the start on an hex encoded ucs2 char */ - char h[MAXPATHLEN]; - size_t hlen = 0; - + /* we have a ':' or '/' */ i_len = j, j = 0; - while (i_len >= 3 && inbuf[0] == ':' && - isxdigit(inbuf[1]) && isxdigit(inbuf[2])) { - h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]); - inbuf += 3; - i_len -= 3; - } - if (hlen) { - const char *h_buf = h; - if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) { - i_len += hlen * 3; - inbuf -= hlen * 3; - if (errno == EILSEQ && (option & CONV_IGNORE)) { - *flags |= CONV_REQMANGLE; - return destlen - o_len; + + if (escch == ':') { + if ((option & CONV_UNESCAPEHEX)) { + /* treat it as a CAP hex encoded char */ + char h[MAXPATHLEN]; + size_t hlen = 0; + + while (i_len >= 3 && inbuf[0] == ':' && + isxdigit(inbuf[1]) && isxdigit(inbuf[2])) { + h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]); + inbuf += 3; + i_len -= 3; + } + if (hlen) { + const char *h_buf = h; + if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) { + i_len += hlen * 3; + inbuf -= hlen * 3; + if (errno == EILSEQ && (option & CONV_IGNORE)) { + *flags |= CONV_REQMANGLE; + return destlen - o_len; + } + goto end; + } + } else { + /* We have an invalid :xx sequence */ + errno = EILSEQ; + if ((option & CONV_IGNORE)) { + *flags |= CONV_REQMANGLE; + return destlen - o_len; + } + goto end; + } + } else if (option & CONV_ESCAPEHEX) { + if (o_len < 6) { + errno = E2BIG; + goto end; } - goto end; + ucs2_t ucs2 = ':'; + memcpy(outbuf, &ucs2, sizeof(ucs2_t)); + ucs2 = '3'; + memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t)); + ucs2 = 'a'; + memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t)); + outbuf += 6; + o_len -= 6; + inbuf++; + i_len--; + } else if (to_set == CH_UTF8_MAC || to_set == CH_MAC) { + /* convert to a '/' */ + ucs2_t slash = 0x002f; + memcpy(outbuf, &slash, sizeof(ucs2_t)); + outbuf += 2; + o_len -= 2; + inbuf++; + i_len--; + } else { + /* keep as ':' */ + ucs2_t ucs2 = 0x003a; + memcpy(outbuf, &ucs2, sizeof(ucs2_t)); + outbuf += 2; + o_len -= 2; + inbuf++; + i_len--; } } else { - /* We have an invalid :xx sequence */ - errno = EILSEQ; - if ((option & CONV_IGNORE)) { - *flags |= CONV_REQMANGLE; - return destlen - o_len; + /* '/' */ + if (option & CONV_ESCAPEHEX) { + if (o_len < 6) { + errno = E2BIG; + goto end; + } + ucs2_t ucs2 = ':'; + memcpy(outbuf, &ucs2, sizeof(ucs2_t)); + ucs2 = '2'; + memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t)); + ucs2 = 'f'; + memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t)); + outbuf += 6; + o_len -= 6; + inbuf++; + i_len--; + } else if ((from_set == CH_UTF8_MAC || from_set == CH_MAC) + && (to_set != CH_UTF8_MAC || to_set != CH_MAC)) { + /* convert to ':' */ + ucs2_t ucs2 = 0x003a; + memcpy(outbuf, &ucs2, sizeof(ucs2_t)); + outbuf += 2; + o_len -= 2; + inbuf++; + i_len--; + } else { + /* keep as '/' */ + ucs2_t ucs2 = 0x002f; + memcpy(outbuf, &ucs2, sizeof(ucs2_t)); + outbuf += 2; + o_len -= 2; + inbuf++; + i_len--; } - goto end; } } } @@ -889,7 +915,6 @@ static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src char* outbuf = (char*)dest; atalk_iconv_t descriptor; atalk_iconv_t descriptor_cap; - char escch; /* 150210: uninitialized OK, depends on j */ descriptor = conv_handles[CH_UCS2][to_set]; descriptor_cap = conv_handles[CH_UCS2][cap_set]; @@ -902,43 +927,7 @@ static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src i_len=srclen; o_len=destlen; - if ((option & CONV_ESCAPEDOTS) && - i_len >= 2 && SVAL(inbuf, 0) == 0x002e) { /* 0x002e = . */ - if (o_len < 3) { - errno = E2BIG; - goto end; - } - *outbuf++ = ':'; - *outbuf++ = '2'; - *outbuf++ = 'e'; - o_len -= 3; - inbuf += 2; - i_len -= 2; - *flags |= CONV_REQESCAPE; - } - while (i_len >= 2) { - if ((option & CONV_ESCAPEHEX)) { - for (i = 0; i < i_len; i += 2) { - ucs2_t c = SVAL(inbuf, i); - switch (c) { - case 0x003a: /* 0x003a = ':' */ - if ( ! (option & CONV_ALLOW_COLON)) { - errno = EILSEQ; - goto end; - } - escch = c; - j = i_len - i; - i_len = i; - break; - case 0x002f: /* 0x002f = '/' */ - escch = c; - j = i_len - i; - i_len = i; - break; - } - } - } while (i_len > 0 && atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) { if (errno == EILSEQ) { @@ -987,39 +976,8 @@ static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src } goto end; } + } /* while (i_len >= 2) */ - if (j) { - i_len = j, j = 0; - if (o_len < 3) { - errno = E2BIG; - goto end; - } - switch (escch) { - case '/': - *outbuf++ = ':'; - *outbuf++ = '2'; - *outbuf++ = 'f'; - break; - case ':': - *outbuf++ = ':'; - *outbuf++ = '3'; - *outbuf++ = 'a'; - break; - default: - /* - * THIS SHOULD NEVER BE REACHED !!! - * As a safety net I put in a ' ' here - */ - *outbuf++ = ':'; - *outbuf++ = '2'; - *outbuf++ = '0'; - break; - } - o_len -= 3; - inbuf += 2; - i_len -= 2; - } - } if (i_len > 0) errno = EINVAL; end: return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1; @@ -1039,7 +997,7 @@ size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_cha lazy_initialize_conv(); /* convert from_set to UCS2 */ - if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len, + if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, to_set, cap_charset, src, src_len, (char *) buffer, sizeof(buffer) -2, flags)) ) { LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set)); return (size_t) -1;