-/*
- Unix SMB/CIFS implementation.
- Character set conversion Extensions
- Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
- Copyright (C) Andrew Tridgell 2001
- Copyright (C) Simo Sorce 2001
- Copyright (C) Martin Pool 2003
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+/*
+ Unix SMB/CIFS implementation.
+ Character set conversion Extensions
+ Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
+ Copyright (C) Andrew Tridgell 2001
+ Copyright (C) Simo Sorce 2001
+ Copyright (C) Martin Pool 2003
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifdef HAVE_CONFIG_H
* @file
*
* @brief Character-set conversion routines built on our iconv.
- *
+ *
* @note Samba's internal character set (at least in the 3.0 series)
* is always the same as the one for the Unix filesystem. It is
* <b>not</b> necessarily UTF-8 and may be different on machines that
*/
-#define MAX_CHARSETS 10
+#define MAX_CHARSETS 20
#define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
static char hexdig[] = "0123456789abcdef";
#define hextoint( c ) ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
-static char* read_charsets_from_env(charset_t ch)
+static char* read_charsets_from_env(charset_t ch)
{
- char *name;
-
- switch (ch) {
- case CH_MAC:
- if (( name = getenv( "ATALK_MAC_CHARSET" )) != NULL )
- return name;
- else
- return "MAC_ROMAN";
- break;
- case CH_UNIX:
- if (( name = getenv( "ATALK_UNIX_CHARSET" )) != NULL )
- return name;
- else
- return "LOCALE";
- break;
- default:
- break;
- }
- return "ASCII";
-}
-
+ char *name;
+
+ switch (ch) {
+ case CH_MAC:
+ if (( name = getenv( "ATALK_MAC_CHARSET" )) != NULL )
+ return name;
+ else
+ return "MAC_ROMAN";
+ break;
+ case CH_UNIX:
+ if (( name = getenv( "ATALK_UNIX_CHARSET" )) != NULL )
+ return name;
+ else
+ return "LOCALE";
+ break;
+ default:
+ break;
+ }
+ return "ASCII";
+}
+
/**
* Return the name of a charset to give to iconv().
**/
static const char *charset_name(charset_t ch)
{
- const char *ret = NULL;
- static int first = 1;
- static char macname[128];
- static char unixname[128];
-
- if (first) {
- memset(macname, 0, sizeof(macname));
- memset(unixname, 0, sizeof(unixname));
- first = 0;
- }
-
- if (ch == CH_UCS2) ret = "UCS-2";
- else if (ch == CH_UTF8) ret = "UTF8";
- else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
- else if (ch == CH_UNIX) {
- if (unixname[0] == '\0') {
- ret = read_charsets_from_env(CH_UNIX);
- strlcpy(unixname, ret, sizeof(unixname));
- }
- else
- ret = unixname;
- }
- else if (ch == CH_MAC) {
- if (macname[0] == '\0') {
- ret = read_charsets_from_env(CH_MAC);
- strlcpy(macname, ret, sizeof(macname));
- }
- else
- ret = macname;
- }
-
- if (!ret)
- ret = charset_names[ch];
-
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
- if (ret && strcasecmp(ret, "LOCALE") == 0) {
- const char *ln = NULL;
-
-#ifdef HAVE_SETLOCALE
- setlocale(LC_ALL, "");
-#endif
- ln = nl_langinfo(CODESET);
- if (ln) {
- /* Check whether the charset name is supported
- by iconv */
- atalk_iconv_t handle = atalk_iconv_open(ln, "UCS-2");
- if (handle == (atalk_iconv_t) -1) {
- LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln);
- ln = "ASCII";
- } else {
- atalk_iconv_close(handle);
- }
- if (ch==CH_UNIX)
- strlcpy(unixname, ln, sizeof(unixname));
- }
- ret = ln;
- }
+ const char *ret = NULL;
+ static int first = 1;
+ static char macname[128];
+ static char unixname[128];
+
+ if (first) {
+ memset(macname, 0, sizeof(macname));
+ memset(unixname, 0, sizeof(unixname));
+ first = 0;
+ }
+
+ if (ch == CH_UCS2) ret = "UCS-2";
+ else if (ch == CH_UTF8) ret = "UTF8";
+ else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
+ else if (ch == CH_UNIX) {
+ if (unixname[0] == '\0') {
+ ret = read_charsets_from_env(CH_UNIX);
+ strlcpy(unixname, ret, sizeof(unixname));
+ }
+ else
+ ret = unixname;
+ }
+ else if (ch == CH_MAC) {
+ if (macname[0] == '\0') {
+ ret = read_charsets_from_env(CH_MAC);
+ strlcpy(macname, ret, sizeof(macname));
+ }
+ else
+ ret = macname;
+ }
+
+ if (!ret)
+ ret = charset_names[ch];
+
+#if defined(CODESET)
+ if (ret && strcasecmp(ret, "LOCALE") == 0) {
+ const char *ln = NULL;
+
+ setlocale(LC_ALL, "");
+ ln = nl_langinfo(CODESET);
+ if (ln) {
+ /* Check whether the charset name is supported
+ by iconv */
+ atalk_iconv_t handle = atalk_iconv_open(ln, "UCS-2");
+ if (handle == (atalk_iconv_t) -1) {
+ LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln);
+ ln = "ASCII";
+ } else {
+ atalk_iconv_close(handle);
+ }
+ if (ch==CH_UNIX)
+ strlcpy(unixname, ln, sizeof(unixname));
+ }
+ ret = ln;
+ }
#else /* system doesn't have LOCALE support */
-if (ch == CH_UNIX) ret = NULL;
+ if (ch == CH_UNIX) ret = NULL;
#endif
- if (!ret || !*ret) ret = "ASCII";
- return ret;
+ if (!ret || !*ret) ret = "ASCII";
+ return ret;
}
-struct charset_functions* get_charset_functions (charset_t ch)
+static struct charset_functions* get_charset_functions (charset_t ch)
{
- if (charsets[ch] != NULL)
- return charsets[ch];
+ if (charsets[ch] != NULL)
+ return charsets[ch];
- charsets[ch] = find_charset_functions(charset_name(ch));
+ charsets[ch] = find_charset_functions(charset_name(ch));
- return charsets[ch];
+ return charsets[ch];
}
-
-void lazy_initialize_conv(void)
+
+static void lazy_initialize_conv(void)
{
- static int initialized = 0;
+ static int initialized = 0;
- if (!initialized) {
- initialized = 1;
- init_iconv();
- }
+ if (!initialized) {
+ initialized = 1;
+ init_iconv();
+ }
}
-charset_t add_charset(char* name)
+charset_t add_charset(const char* name)
{
- static charset_t max_charset_t = NUM_CHARSETS-1;
- charset_t cur_charset_t = max_charset_t+1;
- unsigned int c1;
-
- lazy_initialize_conv();
-
- for (c1=0; c1<=max_charset_t;c1++) {
- if ( strcasecmp(name, charset_name(c1)) == 0)
- return (c1);
- }
-
- if ( cur_charset_t >= MAX_CHARSETS ) {
- LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
- name, MAX_CHARSETS);
- return (charset_t) -1;
- }
-
- /* First try to setup the required conversions */
-
- conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
- if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
- LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
- name, charset_name(CH_UCS2));
- conv_handles[cur_charset_t][CH_UCS2] = NULL;
- return (charset_t) -1;
- }
-
- conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
- if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
- LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
- charset_name(CH_UCS2), name);
- conv_handles[CH_UCS2][cur_charset_t] = NULL;
- return (charset_t) -1;
- }
-
- /* register the new charset_t name */
- charset_names[cur_charset_t] = strdup(name);
-
- charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
- max_charset_t++;
+ static charset_t max_charset_t = NUM_CHARSETS-1;
+ charset_t cur_charset_t = max_charset_t+1;
+ unsigned int c1;
+
+ lazy_initialize_conv();
+
+ for (c1=0; c1<=max_charset_t;c1++) {
+ if ( strcasecmp(name, charset_name(c1)) == 0)
+ return (c1);
+ }
+
+ if ( cur_charset_t >= MAX_CHARSETS ) {
+ LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
+ name, MAX_CHARSETS);
+ return (charset_t) -1;
+ }
+
+ /* First try to setup the required conversions */
+
+ conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
+ if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
+ name, charset_name(CH_UCS2));
+ conv_handles[cur_charset_t][CH_UCS2] = NULL;
+ return (charset_t) -1;
+ }
+
+ conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
+ if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
+ charset_name(CH_UCS2), name);
+ conv_handles[CH_UCS2][cur_charset_t] = NULL;
+ return (charset_t) -1;
+ }
+
+ /* register the new charset_t name */
+ charset_names[cur_charset_t] = strdup(name);
+
+ charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
+ max_charset_t++;
#ifdef DEBUG
- LOG(log_debug, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
-#endif /* DEBUG */
- return (cur_charset_t);
+ LOG(log_debug9, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
+#endif
+ return (cur_charset_t);
}
/**
**/
void init_iconv(void)
{
- int c1;
-
- for (c1=0;c1<NUM_CHARSETS;c1++) {
- const char *name = charset_name((charset_t)c1);
-
- conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
- if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
- LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
- name, charset_name(CH_UCS2));
- conv_handles[c1][CH_UCS2] = NULL;
- }
-
- if (c1 != CH_UCS2) { /* avoid lost memory, make valgrind happy */
- conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
- if (conv_handles[CH_UCS2][c1] == (atalk_iconv_t)-1) {
- LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
- charset_name(CH_UCS2), name);
- conv_handles[CH_UCS2][c1] = NULL;
- }
- }
-
- charsets[c1] = get_charset_functions (c1);
- }
+ int c1;
+
+ for (c1=0;c1<NUM_CHARSETS;c1++) {
+ const char *name = charset_name((charset_t)c1);
+
+ conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
+ if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
+ name, charset_name(CH_UCS2));
+ conv_handles[c1][CH_UCS2] = NULL;
+ }
+
+ if (c1 != CH_UCS2) { /* avoid lost memory, make valgrind happy */
+ conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
+ if (conv_handles[CH_UCS2][c1] == (atalk_iconv_t)-1) {
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
+ charset_name(CH_UCS2), name);
+ conv_handles[CH_UCS2][c1] = NULL;
+ }
+ }
+
+ charsets[c1] = get_charset_functions (c1);
+ }
}
/**
**/
static size_t add_null(charset_t to, char *buf, size_t bytesleft, size_t len)
{
- /* Terminate the string */
- if (to == CH_UCS2 && bytesleft >= 2) {
- buf[len] = 0;
- buf[len+1] = 0;
-
- }
- else if ( to != CH_UCS2 && bytesleft > 0 )
- buf[len] = 0;
- else {
- errno = E2BIG;
- return (size_t)(-1);
- }
-
- return len;
+ /* Terminate the string */
+ if (to == CH_UCS2 && bytesleft >= 2) {
+ buf[len] = 0;
+ buf[len+1] = 0;
+
+ }
+ else if ( to != CH_UCS2 && bytesleft > 0 )
+ buf[len] = 0;
+ else {
+ errno = E2BIG;
+ return (size_t)(-1);
+ }
+
+ return len;
}
-
+
/**
* Convert string from one encoding to another, making error checking etc
*
* @returns the number of bytes occupied in the destination
**/
static size_t convert_string_internal(charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen)
+ void const *src, size_t srclen,
+ void *dest, size_t destlen)
{
- size_t i_len, o_len;
- size_t retval;
- const char* inbuf = (const char*)src;
- char* outbuf = (char*)dest;
- char* o_save = outbuf;
- atalk_iconv_t descriptor;
-
- /* Fixed based on Samba 3.0.6 */
- if (srclen == (size_t)-1) {
- if (from == CH_UCS2) {
- srclen = (strlen_w((const ucs2_t *)src)+1) * 2;
- } else {
- srclen = strlen((const char *)src)+1;
- }
- }
-
-
- lazy_initialize_conv();
-
- descriptor = conv_handles[from][to];
-
- if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
- return (size_t) -1;
- }
-
- i_len=srclen;
- o_len=destlen;
- retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
- if(retval==(size_t)-1) {
- const char *reason="unknown error";
- switch(errno) {
- case EINVAL:
- reason="Incomplete multibyte sequence";
- break;
- case E2BIG:
- reason="No more room";
- break;
- case EILSEQ:
- reason="Illegal multibyte sequence";
- break;
- }
- LOG(log_debug, logtype_default,"Conversion error: %s",reason);
- return (size_t)-1;
- }
-
- /* Terminate the string */
- return add_null( to, o_save, o_len, destlen -o_len);
+ size_t i_len, o_len;
+ size_t retval;
+ const char* inbuf = (const char*)src;
+ char* outbuf = (char*)dest;
+ char* o_save = outbuf;
+ atalk_iconv_t descriptor;
+
+ /* Fixed based on Samba 3.0.6 */
+ if (srclen == (size_t)-1) {
+ if (from == CH_UCS2) {
+ srclen = (strlen_w((const ucs2_t *)src)) * 2;
+ } else {
+ srclen = strlen((const char *)src);
+ }
+ }
+
+
+ lazy_initialize_conv();
+
+ descriptor = conv_handles[from][to];
+
+ if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
+ return (size_t) -1;
+ }
+
+ i_len=srclen;
+ o_len=destlen;
+ retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
+ if(retval==(size_t)-1) {
+ const char *reason="unknown error";
+ switch(errno) {
+ case EINVAL:
+ reason="Incomplete multibyte sequence";
+ break;
+ case E2BIG:
+ reason="No more room";
+ break;
+ case EILSEQ:
+ reason="Illegal multibyte sequence";
+ break;
+ }
+ LOG(log_debug, logtype_default,"Conversion error: %s",reason);
+ return (size_t)-1;
+ }
+
+ /* Terminate the string */
+ return add_null( to, o_save, o_len, destlen -o_len);
}
size_t convert_string(charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen)
+ void const *src, size_t srclen,
+ void *dest, size_t destlen)
{
- size_t i_len, o_len;
- ucs2_t *u;
- ucs2_t buffer[MAXPATHLEN];
- ucs2_t buffer2[MAXPATHLEN];
- int composition = 0;
-
- lazy_initialize_conv();
-
- /* convert from_set to UCS2 */
- if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
- (char*) buffer, sizeof(buffer))) ) {
- LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
- return (size_t) -1;
- }
-
- /* Do pre/decomposition */
- if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
- (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
- composition = 1;
- if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
- composition = 2;
-
- i_len = sizeof(buffer2);
- u = buffer2;
-
- switch (composition) {
- case 0:
- u = buffer;
- i_len = o_len;
- break;
- case 1:
- if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
- return (size_t)(-1);
- break;
- case 2:
- if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
- return (size_t)(-1);
- break;
- }
-
- /* Convert UCS2 to to_set */
- if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
- LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
- return (size_t) -1;
- }
-
- return o_len;
-}
-
-
+ size_t i_len, o_len;
+ ucs2_t *u;
+ ucs2_t buffer[MAXPATHLEN];
+ ucs2_t buffer2[MAXPATHLEN];
+
+ /* convert from_set to UCS2 */
+ if ((size_t)-1 == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
+ (char*) buffer, sizeof(buffer))) ) {
+ LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
+ return (size_t) -1;
+ }
+
+ /* Do pre/decomposition */
+ i_len = sizeof(buffer2);
+ u = buffer2;
+ if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
+ if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
+ return (size_t)-1;
+ }
+ else if (!charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED)) {
+ if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
+ return (size_t)-1;
+ }
+ else {
+ u = buffer;
+ i_len = o_len;
+ }
+ /* Convert UCS2 to to_set */
+ if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
+ LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
+ return (size_t) -1;
+ }
+
+ return o_len;
+}
+
+
/**
* Convert between character sets, allocating a new buffer for the result.
**/
static size_t convert_string_allocate_internal(charset_t from, charset_t to,
- void const *src, size_t srclen, char **dest)
+ void const *src, size_t srclen, char **dest)
{
- size_t i_len, o_len, destlen;
- size_t retval;
- const char *inbuf = (const char *)src;
- char *outbuf = NULL, *ob = NULL;
- atalk_iconv_t descriptor;
+ size_t i_len, o_len, destlen;
+ size_t retval;
+ const char *inbuf = (const char *)src;
+ char *outbuf = NULL, *ob = NULL;
+ atalk_iconv_t descriptor;
- *dest = NULL;
+ *dest = NULL;
- if (src == NULL || srclen == (size_t)-1)
- return (size_t)-1;
+ if (src == NULL || srclen == (size_t)-1)
+ return (size_t)-1;
- lazy_initialize_conv();
+ lazy_initialize_conv();
- descriptor = conv_handles[from][to];
+ descriptor = conv_handles[from][to];
- if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
- /* conversion not supported, return -1*/
- LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!");
- return -1;
- }
+ if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
+ /* conversion not supported, return -1*/
+ LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!");
+ return -1;
+ }
- destlen = MAX(srclen, 512);
+ destlen = MAX(srclen, 512);
convert:
- destlen = destlen * 2;
- outbuf = (char *)realloc(ob, destlen);
- if (!outbuf) {
- LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!");
- SAFE_FREE(ob);
- return (size_t)-1;
- } else {
- ob = outbuf;
- }
- inbuf = src; /* this restarts the whole conversion if buffer needed to be increased */
- i_len = srclen;
- o_len = destlen;
- retval = atalk_iconv(descriptor,
- &inbuf, &i_len,
- &outbuf, &o_len);
- if(retval == (size_t)-1) {
- const char *reason="unknown error";
- switch(errno) {
- case EINVAL:
- reason="Incomplete multibyte sequence";
- break;
- case E2BIG:
- goto convert;
- case EILSEQ:
- reason="Illegal multibyte sequence";
- break;
- }
- LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
- SAFE_FREE(ob);
- return (size_t)-1;
- }
-
-
- destlen = destlen - o_len;
-
- /* Terminate the string */
- if (to == CH_UCS2 && o_len >= 2) {
- ob[destlen] = 0;
- ob[destlen+1] = 0;
- *dest = (char *)realloc(ob,destlen+2);
- }
- else if ( to != CH_UCS2 && o_len > 0 ) {
- ob[destlen] = 0;
- *dest = (char *)realloc(ob,destlen+1);
- }
- else {
- goto convert; /* realloc */
- }
-
- if (destlen && !*dest) {
- LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!");
- SAFE_FREE(ob);
- return (size_t)-1;
- }
-
- return destlen;
+ destlen = destlen * 2;
+ outbuf = (char *)realloc(ob, destlen);
+ if (!outbuf) {
+ LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!");
+ SAFE_FREE(ob);
+ return (size_t)-1;
+ } else {
+ ob = outbuf;
+ }
+ inbuf = src; /* this restarts the whole conversion if buffer needed to be increased */
+ i_len = srclen;
+ o_len = destlen;
+ retval = atalk_iconv(descriptor,
+ &inbuf, &i_len,
+ &outbuf, &o_len);
+ if(retval == (size_t)-1) {
+ const char *reason="unknown error";
+ switch(errno) {
+ case EINVAL:
+ reason="Incomplete multibyte sequence";
+ break;
+ case E2BIG:
+ goto convert;
+ case EILSEQ:
+ reason="Illegal multibyte sequence";
+ break;
+ }
+ LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
+ SAFE_FREE(ob);
+ return (size_t)-1;
+ }
+
+
+ destlen = destlen - o_len;
+
+ /* Terminate the string */
+ if (to == CH_UCS2 && o_len >= 2) {
+ ob[destlen] = 0;
+ ob[destlen+1] = 0;
+ *dest = (char *)realloc(ob,destlen+2);
+ }
+ else if ( to != CH_UCS2 && o_len > 0 ) {
+ ob[destlen] = 0;
+ *dest = (char *)realloc(ob,destlen+1);
+ }
+ else {
+ goto convert; /* realloc */
+ }
+
+ if (destlen && !*dest) {
+ LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!");
+ SAFE_FREE(ob);
+ return (size_t)-1;
+ }
+
+ return destlen;
}
size_t convert_string_allocate(charset_t from, charset_t to,
- void const *src, size_t srclen,
- char ** dest)
+ void const *src, size_t srclen,
+ char ** dest)
{
- size_t i_len, o_len;
- ucs2_t *u;
- ucs2_t buffer[MAXPATHLEN];
- ucs2_t buffer2[MAXPATHLEN];
- int composition = 0;
-
- lazy_initialize_conv();
-
- *dest = NULL;
-
- /* convert from_set to UCS2 */
- if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
- buffer, sizeof(buffer))) ) {
- LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
- return (size_t) -1;
- }
-
- /* Do pre/decomposition */
- if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
- (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
- composition = 1;
- if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
- composition = 2;
-
- i_len = sizeof(buffer2);
- u = buffer2;
-
- switch (composition) {
- case 0:
- u = buffer;
- i_len = o_len;
- break;
- case 1:
- if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
- return (size_t)(-1);
- break;
- case 2:
- if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
- return (size_t)(-1);
- break;
- }
-
- /* Convert UCS2 to to_set */
- if ((size_t)(-1) == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
- LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
-
- return o_len;
+ size_t i_len, o_len;
+ ucs2_t *u;
+ ucs2_t buffer[MAXPATHLEN];
+ ucs2_t buffer2[MAXPATHLEN];
+
+ *dest = NULL;
+
+ /* convert from_set to UCS2 */
+ if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
+ buffer, sizeof(buffer))) ) {
+ LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
+ return (size_t) -1;
+ }
+
+ /* Do pre/decomposition */
+ i_len = sizeof(buffer2);
+ u = buffer2;
+ if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
+ if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
+ return (size_t)-1;
+ }
+ else if ( !charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED) ) {
+ if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
+ return (size_t)-1;
+ }
+ else {
+ u = buffer;
+ i_len = o_len;
+ }
+
+ /* Convert UCS2 to to_set */
+ if ((size_t)-1 == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
+ LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
+
+ return o_len;
}
size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
{
- size_t size;
- char *buffer;
-
- size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
- (char**) &buffer);
- if (size == (size_t)-1) {
- SAFE_FREE(buffer);
- return size;
- }
- if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
- free(buffer);
- return srclen;
- }
-
- size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
- free(buffer);
- return size;
+ size_t size;
+ char *buffer;
+
+ size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
+ (char**) &buffer);
+ if (size == (size_t)-1) {
+ SAFE_FREE(buffer);
+ return size;
+ }
+ if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
+ free(buffer);
+ return srclen;
+ }
+
+ size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
+ free(buffer);
+ return size;
}
size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
{
- size_t size;
- char *buffer;
-
- size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
- (char **) &buffer);
- if (size == (size_t)-1) {
- SAFE_FREE(buffer);
- return size;
- }
- if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
- free(buffer);
- return srclen;
- }
-
- size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
- free(buffer);
- return size;
+ size_t size;
+ char *buffer;
+
+ size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
+ (char **) &buffer);
+ if (size == (size_t)-1) {
+ SAFE_FREE(buffer);
+ return size;
+ }
+ if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
+ free(buffer);
+ return srclen;
+ }
+
+ size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
+ free(buffer);
+ return size;
}
size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
{
- return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
+ return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
}
size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
{
- return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
+ return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
}
size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
{
- return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
+ return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
}
size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
{
- return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
+ return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
}
/**
* Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
*
- * @param dest always set at least to NULL
+ * @param dest always set at least to NULL
*
* @returns The number of bytes occupied by the string in the destination
* or -1 in case of error.
size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
{
- size_t src_len = strlen(src);
+ size_t src_len = strlen(src);
- *dest = NULL;
- return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
+ *dest = NULL;
+ return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
}
/** -----------------------------------
* Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
*
- * @param dest always set at least to NULL
+ * @param dest always set at least to NULL
*
* @returns The number of bytes occupied by the string in the destination
**/
size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
{
- size_t src_len = strlen(src);
+ size_t src_len = strlen(src);
- *dest = NULL;
- return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
+ *dest = NULL;
+ return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
}
/** -----------------------------------
* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
*
- * @param dest always set at least to NULL
+ * @param dest always set at least to NULL
*
* @returns The number of bytes occupied by the string in the destination
**/
size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen)
{
- size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
- return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
+ size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
+ return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
}
/* --------------------------------- */
size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
{
- size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
- *dest = NULL;
- return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
+ size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
+ *dest = NULL;
+ return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
}
/** ---------------------------------
* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
*
- * @param dest always set at least to NULL
+ * @param dest always set at least to NULL
*
* @returns The number of bytes occupied by the string in the destination
**/
size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
{
- size_t src_len = strlen(src);
- *dest = NULL;
- return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
+ size_t src_len = strlen(src);
+ *dest = NULL;
+ return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
}
size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
{
- char *buffer;
- ucs2_t u[MAXPATHLEN];
- size_t len;
- size_t ilen;
-
- if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
- return len;
-
- ilen=sizeof(u);
-
- if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
- free (buffer);
- return (size_t)(-1);
- }
-
- if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
- free (buffer);
- return (size_t)(-1);
- }
-
- free(buffer);
- return (len);
+ char *buffer;
+ ucs2_t u[MAXPATHLEN];
+ size_t len;
+ size_t ilen;
+
+ if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
+ return len;
+
+ ilen=sizeof(u);
+
+ if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
+
+ if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
+
+ free(buffer);
+ return (len);
}
size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
{
- char *buffer;
- ucs2_t u[MAXPATHLEN];
- size_t len;
- size_t ilen;
+ char *buffer;
+ ucs2_t u[MAXPATHLEN];
+ size_t len;
+ size_t ilen;
- if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
- return len;
+ if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
+ return len;
- ilen=sizeof(u);
+ ilen=sizeof(u);
- if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
- free (buffer);
- return (size_t)(-1);
- }
+ if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
- if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
- free (buffer);
- return (size_t)(-1);
- }
+ if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
- free(buffer);
- return (len);
+ free(buffer);
+ return (len);
}
size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
{
- return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
+ return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
}
size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
{
- return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
+ return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
}
#if 0
static char debugbuf[ MAXPATHLEN +1 ];
char * debug_out ( char * seq, size_t len)
{
- size_t i = 0;
- unsigned char *p;
- char *q;
-
- p = (unsigned char*) seq;
- q = debugbuf;
-
- for ( i = 0; i<=(len-1); i++)
- {
- sprintf(q, "%2.2x.", *p);
- q += 3;
- p++;
- }
- *q=0;
- q = debugbuf;
- return q;
+ size_t i = 0;
+ unsigned char *p;
+ char *q;
+
+ p = (unsigned char*) seq;
+ q = debugbuf;
+
+ for ( i = 0; i<=(len-1); i++)
+ {
+ sprintf(q, "%2.2x.", *p);
+ q += 3;
+ p++;
+ }
+ *q=0;
+ q = debugbuf;
+ return q;
}
#endif
-/*
- * Convert from MB to UCS2 charset
+/*
+ * Convert from MB to UCS2 charset
* Flags:
- * CONV_UNESCAPEHEX: ':XX' will be converted to an UCS2 character
- * CONV_IGNORE: return the first convertable characters.
- * CONV_FORCE: force convertion
+ * CONV_UNESCAPEHEX: ':XX' will be converted to an UCS2 character
+ * CONV_IGNORE: return the first convertable characters.
+ * CONV_FORCE: force convertion
* FIXME:
- * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
- * The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
- * for e.g. HFS cdroms.
+ * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
+ * The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
+ * for e.g. HFS cdroms.
*/
-static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
+static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
{
- const u_int16_t option = (flags ? *flags : 0);
- size_t i_len, o_len;
- size_t j = 0;
- const char* inbuf = (const char*)src;
- char* outbuf = dest;
- atalk_iconv_t descriptor;
- atalk_iconv_t descriptor_cap;
-
- if (srclen == (size_t)-1)
- srclen = strlen(src) + 1;
-
- lazy_initialize_conv();
-
- descriptor = conv_handles[from_set][CH_UCS2];
- descriptor_cap = conv_handles[cap_set][CH_UCS2];
-
- if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
- errno = EINVAL;
- return (size_t)-1;
- }
-
- i_len=srclen;
- o_len=destlen;
-
- while (i_len > 0) {
- if ((option & CONV_UNESCAPEHEX)) {
- for (j = 0; j < i_len; ++j) {
- if (inbuf[j] == ':') break;
- }
- j = i_len - j;
- i_len -= j;
+ const u_int16_t option = (flags ? *flags : 0);
+ size_t i_len, o_len;
+ size_t j = 0;
+ const char* inbuf = (const char*)src;
+ char* outbuf = dest;
+ atalk_iconv_t descriptor;
+ atalk_iconv_t descriptor_cap;
+
+ if (srclen == (size_t)-1)
+ srclen = strlen(src) + 1;
+
+ descriptor = conv_handles[from_set][CH_UCS2];
+ descriptor_cap = conv_handles[cap_set][CH_UCS2];
+
+ if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
+ errno = EINVAL;
+ return (size_t)-1;
}
- if (i_len > 0 &&
- atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
- if (errno == EILSEQ || errno == EINVAL) {
- errno = EILSEQ;
- if ((option & CONV_IGNORE)) {
- *flags |= CONV_REQMANGLE;
- return destlen - o_len;
- }
- if ((option & CONV__EILSEQ)) {
- if (o_len < 2) {
- errno = E2BIG;
- goto end;
- }
- *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */
- inbuf++;
- i_len--;
- outbuf += 2;
- o_len -= 2;
- /* FIXME reset stat ? */
- continue;
- }
- }
- goto end;
- }
+ i_len=srclen;
+ o_len=destlen;
- if (j) {
- /* we're at the start on an hex encoded ucs2 char */
- char h[MAXPATHLEN];
- size_t hlen = 0;
-
- i_len = j, j = 0;
- while (i_len >= 3 && inbuf[0] == ':' &&
- isxdigit(inbuf[1]) && isxdigit(inbuf[2])) {
- h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]);
- inbuf += 3;
- i_len -= 3;
- }
- if (hlen) {
- const char *h_buf = h;
- if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) {
- i_len += hlen * 3;
- inbuf -= hlen * 3;
- if (errno == EILSEQ && (option & CONV_IGNORE)) {
- *flags |= CONV_REQMANGLE;
- return destlen - o_len;
- }
- goto end;
- }
- } else {
- /* We have an invalid :xx sequence */
- errno = EILSEQ;
- if ((option & CONV_IGNORE)) {
- *flags |= CONV_REQMANGLE;
- return destlen - o_len;
- }
- goto end;
- }
+ while (i_len > 0) {
+ if ((option & CONV_UNESCAPEHEX)) {
+ for (j = 0; j < i_len; ++j) {
+ if (inbuf[j] == ':') break;
+ }
+ j = i_len - j;
+ i_len -= j;
+ }
+
+ if (i_len > 0 &&
+ atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
+ if (errno == EILSEQ || errno == EINVAL) {
+ errno = EILSEQ;
+ if ((option & CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen - o_len;
+ }
+ if ((option & CONV__EILSEQ)) {
+ if (o_len < 2) {
+ errno = E2BIG;
+ goto end;
+ }
+ *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */
+ inbuf++;
+ i_len--;
+ outbuf += 2;
+ o_len -= 2;
+ /* FIXME reset stat ? */
+ continue;
+ }
+ }
+ goto end;
+ }
+
+ if (j) {
+ /* we're at the start on an hex encoded ucs2 char */
+ char h[MAXPATHLEN];
+ size_t hlen = 0;
+
+ i_len = j, j = 0;
+ while (i_len >= 3 && inbuf[0] == ':' &&
+ isxdigit(inbuf[1]) && isxdigit(inbuf[2])) {
+ h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]);
+ inbuf += 3;
+ i_len -= 3;
+ }
+ if (hlen) {
+ const char *h_buf = h;
+ if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) {
+ i_len += hlen * 3;
+ inbuf -= hlen * 3;
+ if (errno == EILSEQ && (option & CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen - o_len;
+ }
+ goto end;
+ }
+ } else {
+ /* We have an invalid :xx sequence */
+ errno = EILSEQ;
+ if ((option & CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen - o_len;
+ }
+ goto end;
+ }
+ }
}
- }
- end:
- return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
+end:
+ return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
}
-/*
- * Convert from UCS2 to MB charset
+/*
+ * Convert from UCS2 to MB charset
* Flags:
- * CONV_ESCAPEDOTS: escape leading dots
- * CONV_ESCAPEHEX: unconvertable characters and '/' will be escaped to :XX
- * CONV_IGNORE: return the first convertable characters.
- * CONV__EILSEQ: unconvertable characters will be replaced with '_'
- * CONV_FORCE: force convertion
+ * CONV_ESCAPEDOTS: escape leading dots
+ * CONV_ESCAPEHEX: unconvertable characters and '/' will be escaped to :XX
+ * CONV_IGNORE: return the first convertable characters.
+ * CONV__EILSEQ: unconvertable characters will be replaced with '_'
+ * CONV_FORCE: force convertion
* FIXME:
- * CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
- * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
- * The escape scheme is not compatible to the old cap style escape. This is bad, we need it
- * for e.g. HFS cdroms.
+ * CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
+ * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
+ * The escape scheme is not compatible to the old cap style escape. This is bad, we need it
+ * for e.g. HFS cdroms.
*/
static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
{
- const u_int16_t option = (flags ? *flags : 0);
- size_t i_len, o_len, i;
- size_t j = 0;
- const char* inbuf = (const char*)src;
- char* outbuf = (char*)dest;
- atalk_iconv_t descriptor;
- atalk_iconv_t descriptor_cap;
-
- lazy_initialize_conv();
-
- descriptor = conv_handles[CH_UCS2][to_set];
- descriptor_cap = conv_handles[CH_UCS2][cap_set];
-
- if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
- errno = EINVAL;
- return (size_t) -1;
- }
-
- i_len=srclen;
- o_len=destlen;
-
- if ((option & CONV_ESCAPEDOTS) &&
- i_len >= 2 && SVAL(inbuf, 0) == 0x002e) { /* 0x002e = . */
- if (o_len < 3) {
- errno = E2BIG;
- goto end;
+ const u_int16_t option = (flags ? *flags : 0);
+ size_t i_len, o_len, i;
+ size_t j = 0;
+ const char* inbuf = (const char*)src;
+ char* outbuf = (char*)dest;
+ atalk_iconv_t descriptor;
+ atalk_iconv_t descriptor_cap;
+ char escch; /* 150210: uninitialized OK, depends on j */
+
+ descriptor = conv_handles[CH_UCS2][to_set];
+ descriptor_cap = conv_handles[CH_UCS2][cap_set];
+
+ if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
+ errno = EINVAL;
+ return (size_t) -1;
}
- *outbuf++ = ':';
- *outbuf++ = '2';
- *outbuf++ = 'e';
- o_len -= 3;
- inbuf += 2;
- i_len -= 2;
- *flags |= CONV_REQESCAPE;
- }
-
- while (i_len >= 2) {
- if ((option & CONV_ESCAPEHEX)) {
- for (i = 0; i < i_len; i += 2) {
- ucs2_t c = SVAL(inbuf, i);
- if (c == 0x002f) { /* 0x002f = / */
- j = i_len - i;
- i_len = i;
- break;
- } else if (c == 0x003a) { /* 0x003a = : */
- errno = EILSEQ;
- goto end;
- }
- }
+
+ i_len=srclen;
+ o_len=destlen;
+
+ if ((option & CONV_ESCAPEDOTS) &&
+ i_len >= 2 && SVAL(inbuf, 0) == 0x002e) { /* 0x002e = . */
+ if (o_len < 3) {
+ errno = E2BIG;
+ goto end;
+ }
+ *outbuf++ = ':';
+ *outbuf++ = '2';
+ *outbuf++ = 'e';
+ o_len -= 3;
+ inbuf += 2;
+ i_len -= 2;
+ *flags |= CONV_REQESCAPE;
}
- while (i_len > 0 &&
- atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
- if (errno == EILSEQ) {
- if ((option & CONV_IGNORE)) {
- *flags |= CONV_REQMANGLE;
- return destlen - o_len;
- }
- if ((option & CONV_ESCAPEHEX)) {
- const size_t bufsiz = o_len / 3 + 1;
- char *buf = malloc(bufsiz);
- size_t buflen;
-
- if (!buf)
- goto end;
- i = i_len;
- for (buflen = 1; buflen <= bufsiz; ++buflen) {
- char *b = buf;
- size_t o = buflen;
- if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) {
- buflen -= o;
- break;
- } else if (errno != E2BIG) {
- SAFE_FREE(buf);
- goto end;
- } else if (o < buflen) {
- buflen -= o;
- break;
+
+ while (i_len >= 2) {
+ if ((option & CONV_ESCAPEHEX)) {
+ for (i = 0; i < i_len; i += 2) {
+ ucs2_t c = SVAL(inbuf, i);
+ switch (c) {
+ case 0x003a: /* 0x003a = ':' */
+ if ( ! (option & CONV_ALLOW_COLON)) {
+ errno = EILSEQ;
+ goto end;
+ }
+ escch = c;
+ j = i_len - i;
+ i_len = i;
+ break;
+ case 0x002f: /* 0x002f = '/' */
+ escch = c;
+ j = i_len - i;
+ i_len = i;
+ break;
+ }
}
- }
- if (o_len < buflen * 3) {
- SAFE_FREE(buf);
- errno = E2BIG;
- goto end;
- }
- o_len -= buflen * 3;
- i_len = i;
- for (i = 0; i < buflen; ++i) {
- *outbuf++ = ':';
- *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f];
- *outbuf++ = hexdig[buf[i] & 0x0f];
- }
- SAFE_FREE(buf);
- *flags |= CONV_REQESCAPE;
- continue;
- }
- }
- goto end;
- }
+ }
+ while (i_len > 0 &&
+ atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
+ if (errno == EILSEQ) {
+ if ((option & CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen - o_len;
+ }
+ if ((option & CONV_ESCAPEHEX)) {
+ const size_t bufsiz = o_len / 3 + 1;
+ char *buf = malloc(bufsiz);
+ size_t buflen;
+
+ if (!buf)
+ goto end;
+ i = i_len;
+ for (buflen = 1; buflen <= bufsiz; ++buflen) {
+ char *b = buf;
+ size_t o = buflen;
+ if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) {
+ buflen -= o;
+ break;
+ } else if (errno != E2BIG) {
+ SAFE_FREE(buf);
+ goto end;
+ } else if (o < buflen) {
+ buflen -= o;
+ break;
+ }
+ }
+ if (o_len < buflen * 3) {
+ SAFE_FREE(buf);
+ errno = E2BIG;
+ goto end;
+ }
+ o_len -= buflen * 3;
+ i_len = i;
+ for (i = 0; i < buflen; ++i) {
+ *outbuf++ = ':';
+ *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f];
+ *outbuf++ = hexdig[buf[i] & 0x0f];
+ }
+ SAFE_FREE(buf);
+ *flags |= CONV_REQESCAPE;
+ continue;
+ }
+ }
+ goto end;
+ }
- if (j) {
- i_len = j, j = 0;
- if (o_len < 3) {
- errno = E2BIG;
- goto end;
- }
- *outbuf++ = ':';
- *outbuf++ = '2';
- *outbuf++ = 'f';
- o_len -= 3;
- inbuf += 2;
- i_len -= 2;
+ if (j) {
+ i_len = j, j = 0;
+ if (o_len < 3) {
+ errno = E2BIG;
+ goto end;
+ }
+ switch (escch) {
+ case '/':
+ *outbuf++ = ':';
+ *outbuf++ = '2';
+ *outbuf++ = 'f';
+ break;
+ case ':':
+ *outbuf++ = ':';
+ *outbuf++ = '3';
+ *outbuf++ = 'a';
+ break;
+ default:
+ /*
+ * THIS SHOULD NEVER BE REACHED !!!
+ * As a safety net I put in a ' ' here
+ */
+ *outbuf++ = ':';
+ *outbuf++ = '2';
+ *outbuf++ = '0';
+ break;
+ }
+ o_len -= 3;
+ inbuf += 2;
+ i_len -= 2;
+ }
}
- }
- if (i_len > 0) errno = EINVAL;
- end:
- return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
+ if (i_len > 0) errno = EINVAL;
+end:
+ return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
}
/*
* FIXME the size is a mess we really need a malloc/free logic
*`dest size must be dest_len +2
-*/
-size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, char* src, size_t src_len, char* dest, size_t dest_len, u_int16_t *flags)
+ */
+size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, const char *src, size_t src_len, char *dest, size_t dest_len, u_int16_t *flags)
{
- size_t i_len, o_len;
- ucs2_t *u;
- ucs2_t buffer[MAXPATHLEN +2];
- ucs2_t buffer2[MAXPATHLEN +2];
- int composition = 0;
-
- lazy_initialize_conv();
-
- /* convert from_set to UCS2 */
- if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len,
- (char *) buffer, sizeof(buffer) -2, flags)) ) {
- LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
- return (size_t) -1;
- }
-
- if ( o_len == 0)
- return o_len;
-
- /* Do pre/decomposition */
- if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) ||
- ((!(charsets[to_set]) || !(charsets[to_set]->flags & CHARSET_DECOMPOSED)) &&
- (!(charsets[from_set]) || (charsets[from_set]->flags & CHARSET_DECOMPOSED))))
- composition = 1;
- if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && charsets[to_set]->flags & CHARSET_DECOMPOSED) )
- composition = 2;
-
- i_len = sizeof(buffer2) -2;
- u = buffer2;
-
- switch (composition) {
- case 0:
- u = buffer;
- i_len = o_len;
- break;
- case 1:
- if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
- return (size_t)(-1);
- break;
- case 2:
- if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
- return (size_t)(-1);
- break;
- }
- /* null terminate */
- u[i_len] = 0;
- u[i_len +1] = 0;
-
- /* Do case conversions */
- if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
- strupper_w(u);
- }
- if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
- strlower_w(u);
- }
-
- /* Convert UCS2 to to_set */
- if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
- LOG(log_error, logtype_default,
- "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
- return (size_t) -1;
- }
- /* null terminate */
- dest[o_len] = 0;
- dest[o_len +1] = 0;
-
- return o_len;
+ size_t i_len, o_len;
+ ucs2_t *u;
+ ucs2_t buffer[MAXPATHLEN +2];
+ ucs2_t buffer2[MAXPATHLEN +2];
+
+ lazy_initialize_conv();
+
+ /* convert from_set to UCS2 */
+ if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len,
+ (char *) buffer, sizeof(buffer) -2, flags)) ) {
+ LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
+ return (size_t) -1;
+ }
+
+ if ( o_len == 0)
+ return o_len;
+
+ /* Do pre/decomposition */
+ i_len = sizeof(buffer2) -2;
+ u = buffer2;
+ if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && (charsets[to_set]->flags & CHARSET_DECOMPOSED)) ) {
+ if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
+ return (size_t)(-1);
+ }
+ else if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) || !charsets[from_set] || (charsets[from_set]->flags & CHARSET_DECOMPOSED)) {
+ if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
+ return (size_t)(-1);
+ }
+ else {
+ u = buffer;
+ i_len = o_len;
+ }
+ /* null terminate */
+ u[i_len] = 0;
+ u[i_len +1] = 0;
+
+ /* Do case conversions */
+ if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
+ strupper_w(u);
+ }
+ else if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
+ strlower_w(u);
+ }
+
+ /* Convert UCS2 to to_set */
+ if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
+ LOG(log_error, logtype_default,
+ "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
+ return (size_t) -1;
+ }
+ /* null terminate */
+ dest[o_len] = 0;
+ dest[o_len +1] = 0;
+
+ return o_len;
}