X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libatalk%2Funicode%2Fcharcnv.c;h=9e4ec67acfcd8bfd04b2d98ff8ca3e1ed3415afc;hb=53b0b6204387cf693280781355e0f27700f8eace;hp=67ca07a83e779d9dee95ed2750d9c80182be38fa;hpb=6590d40ae8032ad963288710c826a068039525c3;p=netatalk.git diff --git a/libatalk/unicode/charcnv.c b/libatalk/unicode/charcnv.c index 67ca07a8..9e4ec67a 100644 --- a/libatalk/unicode/charcnv.c +++ b/libatalk/unicode/charcnv.c @@ -1,24 +1,24 @@ -/* - Unix SMB/CIFS implementation. - Character set conversion Extensions - Copyright (C) Igor Vergeichik 2001 - Copyright (C) Andrew Tridgell 2001 - Copyright (C) Simo Sorce 2001 - Copyright (C) Martin Pool 2003 - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +/* + Unix SMB/CIFS implementation. + Character set conversion Extensions + Copyright (C) Igor Vergeichik 2001 + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Simo Sorce 2001 + Copyright (C) Martin Pool 2003 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef HAVE_CONFIG_H @@ -54,7 +54,7 @@ * @file * * @brief Character-set conversion routines built on our iconv. - * + * * @note Samba's internal character set (at least in the 3.0 series) * is always the same as the one for the Unix filesystem. It is * not necessarily UTF-8 and may be different on machines that @@ -66,7 +66,7 @@ */ -#define MAX_CHARSETS 10 +#define MAX_CHARSETS 20 #define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 ) @@ -76,168 +76,168 @@ static struct charset_functions* charsets[MAX_CHARSETS]; static char hexdig[] = "0123456789abcdef"; #define hextoint( c ) ( isdigit( c ) ? c - '0' : c + 10 - 'a' ) -static char* read_charsets_from_env(charset_t ch) +static char* read_charsets_from_env(charset_t ch) { - char *name; - - switch (ch) { - case CH_MAC: - if (( name = getenv( "ATALK_MAC_CHARSET" )) != NULL ) - return name; - else - return "MAC_ROMAN"; - break; - case CH_UNIX: - if (( name = getenv( "ATALK_UNIX_CHARSET" )) != NULL ) - return name; - else - return "LOCALE"; - break; - default: - break; - } - return "ASCII"; -} - + char *name; + + switch (ch) { + case CH_MAC: + if (( name = getenv( "ATALK_MAC_CHARSET" )) != NULL ) + return name; + else + return "MAC_ROMAN"; + break; + case CH_UNIX: + if (( name = getenv( "ATALK_UNIX_CHARSET" )) != NULL ) + return name; + else + return "LOCALE"; + break; + default: + break; + } + return "ASCII"; +} + /** * Return the name of a charset to give to iconv(). **/ static const char *charset_name(charset_t ch) { - const char *ret = NULL; - static int first = 1; - static char macname[128]; - static char unixname[128]; - - if (first) { - memset(macname, 0, sizeof(macname)); - memset(unixname, 0, sizeof(unixname)); - first = 0; - } - - if (ch == CH_UCS2) ret = "UCS-2"; - else if (ch == CH_UTF8) ret = "UTF8"; - else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC"; - else if (ch == CH_UNIX) { - if (unixname[0] == '\0') { - ret = read_charsets_from_env(CH_UNIX); - strlcpy(unixname, ret, sizeof(unixname)); - } - else - ret = unixname; - } - else if (ch == CH_MAC) { - if (macname[0] == '\0') { - ret = read_charsets_from_env(CH_MAC); - strlcpy(macname, ret, sizeof(macname)); - } - else - ret = macname; - } - - if (!ret) - ret = charset_names[ch]; + const char *ret = NULL; + static int first = 1; + static char macname[128]; + static char unixname[128]; + + if (first) { + memset(macname, 0, sizeof(macname)); + memset(unixname, 0, sizeof(unixname)); + first = 0; + } + + if (ch == CH_UCS2) ret = "UCS-2"; + else if (ch == CH_UTF8) ret = "UTF8"; + else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC"; + else if (ch == CH_UNIX) { + if (unixname[0] == '\0') { + ret = read_charsets_from_env(CH_UNIX); + strlcpy(unixname, ret, sizeof(unixname)); + } + else + ret = unixname; + } + else if (ch == CH_MAC) { + if (macname[0] == '\0') { + ret = read_charsets_from_env(CH_MAC); + strlcpy(macname, ret, sizeof(macname)); + } + else + ret = macname; + } + + if (!ret) + ret = charset_names[ch]; #if defined(HAVE_NL_LANGINFO) && defined(CODESET) - if (ret && strcasecmp(ret, "LOCALE") == 0) { - const char *ln = NULL; + if (ret && strcasecmp(ret, "LOCALE") == 0) { + const char *ln = NULL; #ifdef HAVE_SETLOCALE - setlocale(LC_ALL, ""); + setlocale(LC_ALL, ""); #endif - ln = nl_langinfo(CODESET); - if (ln) { - /* Check whether the charset name is supported - by iconv */ - atalk_iconv_t handle = atalk_iconv_open(ln, "UCS-2"); - if (handle == (atalk_iconv_t) -1) { - LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln); - ln = "ASCII"; - } else { - atalk_iconv_close(handle); - } - if (ch==CH_UNIX) - strlcpy(unixname, ln, sizeof(unixname)); - } - ret = ln; - } + ln = nl_langinfo(CODESET); + if (ln) { + /* Check whether the charset name is supported + by iconv */ + atalk_iconv_t handle = atalk_iconv_open(ln, "UCS-2"); + if (handle == (atalk_iconv_t) -1) { + LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln); + ln = "ASCII"; + } else { + atalk_iconv_close(handle); + } + if (ch==CH_UNIX) + strlcpy(unixname, ln, sizeof(unixname)); + } + ret = ln; + } #else /* system doesn't have LOCALE support */ -if (ch == CH_UNIX) ret = NULL; + if (ch == CH_UNIX) ret = NULL; #endif - if (!ret || !*ret) ret = "ASCII"; - return ret; + if (!ret || !*ret) ret = "ASCII"; + return ret; } -struct charset_functions* get_charset_functions (charset_t ch) +static struct charset_functions* get_charset_functions (charset_t ch) { - if (charsets[ch] != NULL) - return charsets[ch]; + if (charsets[ch] != NULL) + return charsets[ch]; - charsets[ch] = find_charset_functions(charset_name(ch)); + charsets[ch] = find_charset_functions(charset_name(ch)); - return charsets[ch]; + return charsets[ch]; } - -void lazy_initialize_conv(void) + +static void lazy_initialize_conv(void) { - static int initialized = 0; + static int initialized = 0; - if (!initialized) { - initialized = 1; - init_iconv(); - } + if (!initialized) { + initialized = 1; + init_iconv(); + } } -charset_t add_charset(char* name) +charset_t add_charset(const char* name) { - static charset_t max_charset_t = NUM_CHARSETS-1; - charset_t cur_charset_t = max_charset_t+1; - unsigned int c1; - - lazy_initialize_conv(); - - for (c1=0; c1<=max_charset_t;c1++) { - if ( strcasecmp(name, charset_name(c1)) == 0) - return (c1); - } - - if ( cur_charset_t >= MAX_CHARSETS ) { - LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)", - name, MAX_CHARSETS); - return (charset_t) -1; - } - - /* First try to setup the required conversions */ - - conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name); - if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) { - LOG(log_error, logtype_default, "Required conversion from %s to %s not supported", - name, charset_name(CH_UCS2)); - conv_handles[cur_charset_t][CH_UCS2] = NULL; - return (charset_t) -1; - } - - conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2)); - if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) { - LOG(log_error, logtype_default, "Required conversion from %s to %s not supported", - charset_name(CH_UCS2), name); - conv_handles[CH_UCS2][cur_charset_t] = NULL; - return (charset_t) -1; - } - - /* register the new charset_t name */ - charset_names[cur_charset_t] = strdup(name); - - charsets[cur_charset_t] = get_charset_functions (cur_charset_t); - max_charset_t++; + static charset_t max_charset_t = NUM_CHARSETS-1; + charset_t cur_charset_t = max_charset_t+1; + unsigned int c1; + + lazy_initialize_conv(); + + for (c1=0; c1<=max_charset_t;c1++) { + if ( strcasecmp(name, charset_name(c1)) == 0) + return (c1); + } + + if ( cur_charset_t >= MAX_CHARSETS ) { + LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)", + name, MAX_CHARSETS); + return (charset_t) -1; + } + + /* First try to setup the required conversions */ + + conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name); + if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) { + LOG(log_error, logtype_default, "Required conversion from %s to %s not supported", + name, charset_name(CH_UCS2)); + conv_handles[cur_charset_t][CH_UCS2] = NULL; + return (charset_t) -1; + } + + conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2)); + if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) { + LOG(log_error, logtype_default, "Required conversion from %s to %s not supported", + charset_name(CH_UCS2), name); + conv_handles[CH_UCS2][cur_charset_t] = NULL; + return (charset_t) -1; + } + + /* register the new charset_t name */ + charset_names[cur_charset_t] = strdup(name); + + charsets[cur_charset_t] = get_charset_functions (cur_charset_t); + max_charset_t++; #ifdef DEBUG - LOG(log_debug, logtype_default, "Added charset %s with handle %u", name, cur_charset_t); -#endif /* DEBUG */ - return (cur_charset_t); + LOG(log_debug9, logtype_default, "Added charset %s with handle %u", name, cur_charset_t); +#endif + return (cur_charset_t); } /** @@ -249,29 +249,29 @@ charset_t add_charset(char* name) **/ void init_iconv(void) { - int c1; - - for (c1=0;c1= 2) { - buf[len] = 0; - buf[len+1] = 0; - - } - else if ( to != CH_UCS2 && bytesleft > 0 ) - buf[len] = 0; - else { - errno = E2BIG; - return (size_t)(-1); - } - - return len; + /* Terminate the string */ + if (to == CH_UCS2 && bytesleft >= 2) { + buf[len] = 0; + buf[len+1] = 0; + + } + else if ( to != CH_UCS2 && bytesleft > 0 ) + buf[len] = 0; + else { + errno = E2BIG; + return (size_t)(-1); + } + + return len; } - + /** * Convert string from one encoding to another, making error checking etc * @@ -306,113 +306,100 @@ static size_t add_null(charset_t to, char *buf, size_t bytesleft, size_t len) * @returns the number of bytes occupied in the destination **/ static size_t convert_string_internal(charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen) + void const *src, size_t srclen, + void *dest, size_t destlen) { - size_t i_len, o_len; - size_t retval; - const char* inbuf = (const char*)src; - char* outbuf = (char*)dest; - char* o_save = outbuf; - atalk_iconv_t descriptor; - - /* Fixed based on Samba 3.0.6 */ - if (srclen == (size_t)-1) { - if (from == CH_UCS2) { - srclen = (strlen_w((const ucs2_t *)src)+1) * 2; - } else { - srclen = strlen((const char *)src)+1; - } - } - - - lazy_initialize_conv(); - - descriptor = conv_handles[from][to]; - - if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { - return (size_t) -1; - } - - i_len=srclen; - o_len=destlen; - retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); - if(retval==(size_t)-1) { - const char *reason="unknown error"; - switch(errno) { - case EINVAL: - reason="Incomplete multibyte sequence"; - break; - case E2BIG: - reason="No more room"; - break; - case EILSEQ: - reason="Illegal multibyte sequence"; - break; - } - LOG(log_debug, logtype_default,"Conversion error: %s",reason); - return (size_t)-1; - } - - /* Terminate the string */ - return add_null( to, o_save, o_len, destlen -o_len); + size_t i_len, o_len; + size_t retval; + const char* inbuf = (const char*)src; + char* outbuf = (char*)dest; + char* o_save = outbuf; + atalk_iconv_t descriptor; + + /* Fixed based on Samba 3.0.6 */ + if (srclen == (size_t)-1) { + if (from == CH_UCS2) { + srclen = (strlen_w((const ucs2_t *)src)) * 2; + } else { + srclen = strlen((const char *)src); + } + } + + + lazy_initialize_conv(); + + descriptor = conv_handles[from][to]; + + if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { + return (size_t) -1; + } + + i_len=srclen; + o_len=destlen; + retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); + if(retval==(size_t)-1) { + const char *reason="unknown error"; + switch(errno) { + case EINVAL: + reason="Incomplete multibyte sequence"; + break; + case E2BIG: + reason="No more room"; + break; + case EILSEQ: + reason="Illegal multibyte sequence"; + break; + } + LOG(log_debug, logtype_default,"Conversion error: %s",reason); + return (size_t)-1; + } + + /* Terminate the string */ + return add_null( to, o_save, o_len, destlen -o_len); } size_t convert_string(charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen) + void const *src, size_t srclen, + void *dest, size_t destlen) { - size_t i_len, o_len; - ucs2_t *u; - ucs2_t buffer[MAXPATHLEN]; - ucs2_t buffer2[MAXPATHLEN]; - int composition = 0; - - lazy_initialize_conv(); - - /* convert from_set to UCS2 */ - if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, - (char*) buffer, sizeof(buffer))) ) { - LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from)); - return (size_t) -1; - } - - /* Do pre/decomposition */ - if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) && - (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED)))) - composition = 1; - if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) ) - composition = 2; - - i_len = sizeof(buffer2); - u = buffer2; - - switch (composition) { - case 0: - u = buffer; - i_len = o_len; - break; - case 1: - if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) ) - return (size_t)(-1); - break; - case 2: - if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) ) - return (size_t)(-1); - break; - } - - /* Convert UCS2 to to_set */ - if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) { - LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno)); - return (size_t) -1; - } - - return o_len; -} - - + size_t i_len, o_len; + ucs2_t *u; + ucs2_t buffer[MAXPATHLEN]; + ucs2_t buffer2[MAXPATHLEN]; + + /* convert from_set to UCS2 */ + if ((size_t)-1 == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, + (char*) buffer, sizeof(buffer))) ) { + LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from)); + return (size_t) -1; + } + + /* Do pre/decomposition */ + i_len = sizeof(buffer2); + u = buffer2; + if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) { + if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) ) + return (size_t)-1; + } + else if (!charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED)) { + if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) ) + return (size_t)-1; + } + else { + u = buffer; + i_len = o_len; + } + /* Convert UCS2 to to_set */ + if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) { + LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno)); + return (size_t) -1; + } + + return o_len; +} + + /** * Convert between character sets, allocating a new buffer for the result. @@ -425,211 +412,199 @@ size_t convert_string(charset_t from, charset_t to, **/ static size_t convert_string_allocate_internal(charset_t from, charset_t to, - void const *src, size_t srclen, char **dest) + void const *src, size_t srclen, char **dest) { - size_t i_len, o_len, destlen; - size_t retval; - const char *inbuf = (const char *)src; - char *outbuf = NULL, *ob = NULL; - atalk_iconv_t descriptor; + size_t i_len, o_len, destlen; + size_t retval; + const char *inbuf = (const char *)src; + char *outbuf = NULL, *ob = NULL; + atalk_iconv_t descriptor; - *dest = NULL; + *dest = NULL; - if (src == NULL || srclen == (size_t)-1) - return (size_t)-1; + if (src == NULL || srclen == (size_t)-1) + return (size_t)-1; - lazy_initialize_conv(); + lazy_initialize_conv(); - descriptor = conv_handles[from][to]; + descriptor = conv_handles[from][to]; - if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { - /* conversion not supported, return -1*/ - LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!"); - return -1; - } + if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { + /* conversion not supported, return -1*/ + LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!"); + return -1; + } - destlen = MAX(srclen, 512); + destlen = MAX(srclen, 512); convert: - destlen = destlen * 2; - outbuf = (char *)realloc(ob, destlen); - if (!outbuf) { - LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!"); - SAFE_FREE(ob); - return (size_t)-1; - } else { - ob = outbuf; - } - inbuf = src; /* this restarts the whole conversion if buffer needed to be increased */ - i_len = srclen; - o_len = destlen; - retval = atalk_iconv(descriptor, - &inbuf, &i_len, - &outbuf, &o_len); - if(retval == (size_t)-1) { - const char *reason="unknown error"; - switch(errno) { - case EINVAL: - reason="Incomplete multibyte sequence"; - break; - case E2BIG: - goto convert; - case EILSEQ: - reason="Illegal multibyte sequence"; - break; - } - LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf); - SAFE_FREE(ob); - return (size_t)-1; - } - - - destlen = destlen - o_len; - - /* Terminate the string */ - if (to == CH_UCS2 && o_len >= 2) { - ob[destlen] = 0; - ob[destlen+1] = 0; - *dest = (char *)realloc(ob,destlen+2); - } - else if ( to != CH_UCS2 && o_len > 0 ) { - ob[destlen] = 0; - *dest = (char *)realloc(ob,destlen+1); - } - else { - goto convert; /* realloc */ - } - - if (destlen && !*dest) { - LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!"); - SAFE_FREE(ob); - return (size_t)-1; - } - - return destlen; + destlen = destlen * 2; + outbuf = (char *)realloc(ob, destlen); + if (!outbuf) { + LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!"); + SAFE_FREE(ob); + return (size_t)-1; + } else { + ob = outbuf; + } + inbuf = src; /* this restarts the whole conversion if buffer needed to be increased */ + i_len = srclen; + o_len = destlen; + retval = atalk_iconv(descriptor, + &inbuf, &i_len, + &outbuf, &o_len); + if(retval == (size_t)-1) { + const char *reason="unknown error"; + switch(errno) { + case EINVAL: + reason="Incomplete multibyte sequence"; + break; + case E2BIG: + goto convert; + case EILSEQ: + reason="Illegal multibyte sequence"; + break; + } + LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf); + SAFE_FREE(ob); + return (size_t)-1; + } + + + destlen = destlen - o_len; + + /* Terminate the string */ + if (to == CH_UCS2 && o_len >= 2) { + ob[destlen] = 0; + ob[destlen+1] = 0; + *dest = (char *)realloc(ob,destlen+2); + } + else if ( to != CH_UCS2 && o_len > 0 ) { + ob[destlen] = 0; + *dest = (char *)realloc(ob,destlen+1); + } + else { + goto convert; /* realloc */ + } + + if (destlen && !*dest) { + LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!"); + SAFE_FREE(ob); + return (size_t)-1; + } + + return destlen; } size_t convert_string_allocate(charset_t from, charset_t to, - void const *src, size_t srclen, - char ** dest) + void const *src, size_t srclen, + char ** dest) { - size_t i_len, o_len; - ucs2_t *u; - ucs2_t buffer[MAXPATHLEN]; - ucs2_t buffer2[MAXPATHLEN]; - int composition = 0; - - lazy_initialize_conv(); - - *dest = NULL; - - /* convert from_set to UCS2 */ - if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, - buffer, sizeof(buffer))) ) { - LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from)); - return (size_t) -1; - } - - /* Do pre/decomposition */ - if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) && - (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED)))) - composition = 1; - if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) ) - composition = 2; - - i_len = sizeof(buffer2); - u = buffer2; - - switch (composition) { - case 0: - u = buffer; - i_len = o_len; - break; - case 1: - if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) ) - return (size_t)(-1); - break; - case 2: - if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) ) - return (size_t)(-1); - break; - } - - /* Convert UCS2 to to_set */ - if ((size_t)(-1) == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) ) - LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno)); - - return o_len; + size_t i_len, o_len; + ucs2_t *u; + ucs2_t buffer[MAXPATHLEN]; + ucs2_t buffer2[MAXPATHLEN]; + + *dest = NULL; + + /* convert from_set to UCS2 */ + if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, + buffer, sizeof(buffer))) ) { + LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from)); + return (size_t) -1; + } + + /* Do pre/decomposition */ + i_len = sizeof(buffer2); + u = buffer2; + if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) { + if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) ) + return (size_t)-1; + } + else if ( !charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED) ) { + if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) ) + return (size_t)-1; + } + else { + u = buffer; + i_len = o_len; + } + + /* Convert UCS2 to to_set */ + if ((size_t)-1 == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) ) + LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno)); + + return o_len; } size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen) { - size_t size; - char *buffer; - - size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen, - (char**) &buffer); - if (size == (size_t)-1) { - SAFE_FREE(buffer); - return size; - } - if (!strupper_w((ucs2_t *)buffer) && (dest == src)) { - free(buffer); - return srclen; - } - - size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen); - free(buffer); - return size; + size_t size; + char *buffer; + + size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen, + (char**) &buffer); + if (size == (size_t)-1) { + SAFE_FREE(buffer); + return size; + } + if (!strupper_w((ucs2_t *)buffer) && (dest == src)) { + free(buffer); + return srclen; + } + + size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen); + free(buffer); + return size; } size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen) { - size_t size; - char *buffer; - - size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen, - (char **) &buffer); - if (size == (size_t)-1) { - SAFE_FREE(buffer); - return size; - } - if (!strlower_w((ucs2_t *)buffer) && (dest == src)) { - free(buffer); - return srclen; - } - - size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen); - free(buffer); - return size; + size_t size; + char *buffer; + + size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen, + (char **) &buffer); + if (size == (size_t)-1) { + SAFE_FREE(buffer); + return size; + } + if (!strlower_w((ucs2_t *)buffer) && (dest == src)) { + free(buffer); + return srclen; + } + + size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen); + free(buffer); + return size; } size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen) { - return charset_strupper( CH_UNIX, src, srclen, dest, destlen); + return charset_strupper( CH_UNIX, src, srclen, dest, destlen); } size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen) { - return charset_strlower( CH_UNIX, src, srclen, dest, destlen); + return charset_strlower( CH_UNIX, src, srclen, dest, destlen); } size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen) { - return charset_strupper( CH_UTF8, src, srclen, dest, destlen); + return charset_strupper( CH_UTF8, src, srclen, dest, destlen); } size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen) { - return charset_strlower( CH_UTF8, src, srclen, dest, destlen); + return charset_strlower( CH_UTF8, src, srclen, dest, destlen); } /** * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer * - * @param dest always set at least to NULL + * @param dest always set at least to NULL * * @returns The number of bytes occupied by the string in the destination * or -1 in case of error. @@ -637,456 +612,478 @@ size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen) size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src) { - size_t src_len = strlen(src); + size_t src_len = strlen(src); - *dest = NULL; - return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest); + *dest = NULL; + return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest); } /** ----------------------------------- * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer * - * @param dest always set at least to NULL + * @param dest always set at least to NULL * * @returns The number of bytes occupied by the string in the destination **/ size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src) { - size_t src_len = strlen(src); + size_t src_len = strlen(src); - *dest = NULL; - return convert_string_allocate(ch, CH_UTF8, src, src_len, dest); + *dest = NULL; + return convert_string_allocate(ch, CH_UTF8, src, src_len, dest); } /** ----------------------------------- * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer * - * @param dest always set at least to NULL + * @param dest always set at least to NULL * * @returns The number of bytes occupied by the string in the destination **/ size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen) { - size_t src_len = (strlen_w(src)) * sizeof(ucs2_t); - return convert_string(CH_UCS2, ch, src, src_len, dest, destlen); + size_t src_len = (strlen_w(src)) * sizeof(ucs2_t); + return convert_string(CH_UCS2, ch, src, src_len, dest, destlen); } /* --------------------------------- */ size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src) { - size_t src_len = (strlen_w(src)) * sizeof(ucs2_t); - *dest = NULL; - return convert_string_allocate(CH_UCS2, ch, src, src_len, dest); + size_t src_len = (strlen_w(src)) * sizeof(ucs2_t); + *dest = NULL; + return convert_string_allocate(CH_UCS2, ch, src, src_len, dest); } /** --------------------------------- * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer * - * @param dest always set at least to NULL + * @param dest always set at least to NULL * * @returns The number of bytes occupied by the string in the destination **/ size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src) { - size_t src_len = strlen(src); - *dest = NULL; - return convert_string_allocate(CH_UTF8, ch, src, src_len, dest); + size_t src_len = strlen(src); + *dest = NULL; + return convert_string_allocate(CH_UTF8, ch, src, src_len, dest); } size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen) { - char *buffer; - ucs2_t u[MAXPATHLEN]; - size_t len; - size_t ilen; - - if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) ) - return len; - - ilen=sizeof(u); - - if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) { - free (buffer); - return (size_t)(-1); - } - - if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) { - free (buffer); - return (size_t)(-1); - } - - free(buffer); - return (len); + char *buffer; + ucs2_t u[MAXPATHLEN]; + size_t len; + size_t ilen; + + if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) ) + return len; + + ilen=sizeof(u); + + if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) { + free (buffer); + return (size_t)(-1); + } + + if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) { + free (buffer); + return (size_t)(-1); + } + + free(buffer); + return (len); } size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen) { - char *buffer; - ucs2_t u[MAXPATHLEN]; - size_t len; - size_t ilen; + char *buffer; + ucs2_t u[MAXPATHLEN]; + size_t len; + size_t ilen; - if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) ) - return len; + if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) ) + return len; - ilen=sizeof(u); + ilen=sizeof(u); - if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) { - free (buffer); - return (size_t)(-1); - } + if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) { + free (buffer); + return (size_t)(-1); + } - if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) { - free (buffer); - return (size_t)(-1); - } + if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) { + free (buffer); + return (size_t)(-1); + } - free(buffer); - return (len); + free(buffer); + return (len); } size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen) { - return charset_precompose ( CH_UTF8, src, inlen, dst, outlen); + return charset_precompose ( CH_UTF8, src, inlen, dst, outlen); } size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen) { - return charset_decompose ( CH_UTF8, src, inlen, dst, outlen); + return charset_decompose ( CH_UTF8, src, inlen, dst, outlen); } #if 0 static char debugbuf[ MAXPATHLEN +1 ]; char * debug_out ( char * seq, size_t len) { - size_t i = 0; - unsigned char *p; - char *q; - - p = (unsigned char*) seq; - q = debugbuf; - - for ( i = 0; i<=(len-1); i++) - { - sprintf(q, "%2.2x.", *p); - q += 3; - p++; - } - *q=0; - q = debugbuf; - return q; + size_t i = 0; + unsigned char *p; + char *q; + + p = (unsigned char*) seq; + q = debugbuf; + + for ( i = 0; i<=(len-1); i++) + { + sprintf(q, "%2.2x.", *p); + q += 3; + p++; + } + *q=0; + q = debugbuf; + return q; } #endif -/* - * Convert from MB to UCS2 charset +/* + * Convert from MB to UCS2 charset * Flags: - * CONV_UNESCAPEHEX: ':XX' will be converted to an UCS2 character - * CONV_IGNORE: return the first convertable characters. - * CONV_FORCE: force convertion + * CONV_UNESCAPEHEX: ':XX' will be converted to an UCS2 character + * CONV_IGNORE: return the first convertable characters. + * CONV_FORCE: force convertion * FIXME: - * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail - * The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it - * for e.g. HFS cdroms. + * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail + * The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it + * for e.g. HFS cdroms. */ -static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags) +static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags) { - const u_int16_t option = (flags ? *flags : 0); - size_t i_len, o_len; - size_t j = 0; - const char* inbuf = (const char*)src; - char* outbuf = dest; - atalk_iconv_t descriptor; - atalk_iconv_t descriptor_cap; - - if (srclen == (size_t)-1) - srclen = strlen(src) + 1; - - lazy_initialize_conv(); - - descriptor = conv_handles[from_set][CH_UCS2]; - descriptor_cap = conv_handles[cap_set][CH_UCS2]; - - if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { - errno = EINVAL; - return (size_t)-1; - } - - i_len=srclen; - o_len=destlen; - - while (i_len > 0) { - if ((option & CONV_UNESCAPEHEX)) { - for (j = 0; j < i_len; ++j) { - if (inbuf[j] == ':') break; - } - j = i_len - j; - i_len -= j; + const u_int16_t option = (flags ? *flags : 0); + size_t i_len, o_len; + size_t j = 0; + const char* inbuf = (const char*)src; + char* outbuf = dest; + atalk_iconv_t descriptor; + atalk_iconv_t descriptor_cap; + + if (srclen == (size_t)-1) + srclen = strlen(src) + 1; + + descriptor = conv_handles[from_set][CH_UCS2]; + descriptor_cap = conv_handles[cap_set][CH_UCS2]; + + if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { + errno = EINVAL; + return (size_t)-1; } - if (i_len > 0 && - atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) { - if (errno == EILSEQ || errno == EINVAL) { - errno = EILSEQ; - if ((option & CONV_IGNORE)) { - *flags |= CONV_REQMANGLE; - return destlen - o_len; - } - if ((option & CONV__EILSEQ)) { - if (o_len < 2) { - errno = E2BIG; - goto end; - } - *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */ - inbuf++; - i_len--; - outbuf += 2; - o_len -= 2; - /* FIXME reset stat ? */ - continue; - } - } - goto end; - } + i_len=srclen; + o_len=destlen; - if (j) { - /* we're at the start on an hex encoded ucs2 char */ - char h[MAXPATHLEN]; - size_t hlen = 0; - - i_len = j, j = 0; - while (i_len >= 3 && inbuf[0] == ':' && - isxdigit(inbuf[1]) && isxdigit(inbuf[2])) { - h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]); - inbuf += 3; - i_len -= 3; - } - if (hlen) { - const char *h_buf = h; - if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) { - i_len += hlen * 3; - inbuf -= hlen * 3; - if (errno == EILSEQ && (option & CONV_IGNORE)) { - *flags |= CONV_REQMANGLE; - return destlen - o_len; - } - goto end; - } - } else { - /* We have an invalid :xx sequence */ - errno = EILSEQ; - if ((option & CONV_IGNORE)) { - *flags |= CONV_REQMANGLE; - return destlen - o_len; - } - goto end; - } + while (i_len > 0) { + if ((option & CONV_UNESCAPEHEX)) { + for (j = 0; j < i_len; ++j) { + if (inbuf[j] == ':') break; + } + j = i_len - j; + i_len -= j; + } + + if (i_len > 0 && + atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) { + if (errno == EILSEQ || errno == EINVAL) { + errno = EILSEQ; + if ((option & CONV_IGNORE)) { + *flags |= CONV_REQMANGLE; + return destlen - o_len; + } + if ((option & CONV__EILSEQ)) { + if (o_len < 2) { + errno = E2BIG; + goto end; + } + *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */ + inbuf++; + i_len--; + outbuf += 2; + o_len -= 2; + /* FIXME reset stat ? */ + continue; + } + } + goto end; + } + + if (j) { + /* we're at the start on an hex encoded ucs2 char */ + char h[MAXPATHLEN]; + size_t hlen = 0; + + i_len = j, j = 0; + while (i_len >= 3 && inbuf[0] == ':' && + isxdigit(inbuf[1]) && isxdigit(inbuf[2])) { + h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]); + inbuf += 3; + i_len -= 3; + } + if (hlen) { + const char *h_buf = h; + if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) { + i_len += hlen * 3; + inbuf -= hlen * 3; + if (errno == EILSEQ && (option & CONV_IGNORE)) { + *flags |= CONV_REQMANGLE; + return destlen - o_len; + } + goto end; + } + } else { + /* We have an invalid :xx sequence */ + errno = EILSEQ; + if ((option & CONV_IGNORE)) { + *flags |= CONV_REQMANGLE; + return destlen - o_len; + } + goto end; + } + } } - } - end: - return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1; +end: + return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1; } -/* - * Convert from UCS2 to MB charset +/* + * Convert from UCS2 to MB charset * Flags: - * CONV_ESCAPEDOTS: escape leading dots - * CONV_ESCAPEHEX: unconvertable characters and '/' will be escaped to :XX - * CONV_IGNORE: return the first convertable characters. - * CONV__EILSEQ: unconvertable characters will be replaced with '_' - * CONV_FORCE: force convertion + * CONV_ESCAPEDOTS: escape leading dots + * CONV_ESCAPEHEX: unconvertable characters and '/' will be escaped to :XX + * CONV_IGNORE: return the first convertable characters. + * CONV__EILSEQ: unconvertable characters will be replaced with '_' + * CONV_FORCE: force convertion * FIXME: - * CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ? - * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail - * The escape scheme is not compatible to the old cap style escape. This is bad, we need it - * for e.g. HFS cdroms. + * CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ? + * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail + * The escape scheme is not compatible to the old cap style escape. This is bad, we need it + * for e.g. HFS cdroms. */ static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags) { - const u_int16_t option = (flags ? *flags : 0); - size_t i_len, o_len, i; - size_t j = 0; - const char* inbuf = (const char*)src; - char* outbuf = (char*)dest; - atalk_iconv_t descriptor; - atalk_iconv_t descriptor_cap; - - lazy_initialize_conv(); - - descriptor = conv_handles[CH_UCS2][to_set]; - descriptor_cap = conv_handles[CH_UCS2][cap_set]; - - if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { - errno = EINVAL; - return (size_t) -1; - } - - i_len=srclen; - o_len=destlen; - - if ((option & CONV_ESCAPEDOTS) && - i_len >= 2 && SVAL(inbuf, 0) == 0x002e) { /* 0x002e = . */ - if (o_len < 3) { - errno = E2BIG; - goto end; + const u_int16_t option = (flags ? *flags : 0); + size_t i_len, o_len, i; + size_t j = 0; + const char* inbuf = (const char*)src; + char* outbuf = (char*)dest; + atalk_iconv_t descriptor; + atalk_iconv_t descriptor_cap; + char escch; + + descriptor = conv_handles[CH_UCS2][to_set]; + descriptor_cap = conv_handles[CH_UCS2][cap_set]; + + if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) { + errno = EINVAL; + return (size_t) -1; } - *outbuf++ = ':'; - *outbuf++ = '2'; - *outbuf++ = 'e'; - o_len -= 3; - inbuf += 2; - i_len -= 2; - *flags |= CONV_REQESCAPE; - } - - while (i_len >= 2) { - if ((option & CONV_ESCAPEHEX)) { - for (i = 0; i < i_len; i += 2) { - ucs2_t c = SVAL(inbuf, i); - if (c == 0x002f) { /* 0x002f = / */ - j = i_len - i; - i_len = i; - break; - } else if (c == 0x003a) { /* 0x003a = : */ - errno = EILSEQ; - goto end; - } - } + + i_len=srclen; + o_len=destlen; + + if ((option & CONV_ESCAPEDOTS) && + i_len >= 2 && SVAL(inbuf, 0) == 0x002e) { /* 0x002e = . */ + if (o_len < 3) { + errno = E2BIG; + goto end; + } + *outbuf++ = ':'; + *outbuf++ = '2'; + *outbuf++ = 'e'; + o_len -= 3; + inbuf += 2; + i_len -= 2; + *flags |= CONV_REQESCAPE; } - while (i_len > 0 && - atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) { - if (errno == EILSEQ) { - if ((option & CONV_IGNORE)) { - *flags |= CONV_REQMANGLE; - return destlen - o_len; - } - if ((option & CONV_ESCAPEHEX)) { - const size_t bufsiz = o_len / 3 + 1; - char *buf = malloc(bufsiz); - size_t buflen; - - if (!buf) - goto end; - i = i_len; - for (buflen = 1; buflen <= bufsiz; ++buflen) { - char *b = buf; - size_t o = buflen; - if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) { - buflen -= o; - break; - } else if (errno != E2BIG) { - SAFE_FREE(buf); - goto end; - } else if (o < buflen) { - buflen -= o; - break; + + while (i_len >= 2) { + if ((option & CONV_ESCAPEHEX)) { + for (i = 0; i < i_len; i += 2) { + ucs2_t c = SVAL(inbuf, i); + switch (c) { + case 0x003a: /* 0x003a = ':' */ + if ( ! (option & CONV_ALLOW_COLON)) { + errno = EILSEQ; + goto end; + } + escch = c; + j = i_len - i; + i_len = i; + break; + case 0x002f: /* 0x002f = '/' */ + escch = c; + j = i_len - i; + i_len = i; + break; + } } - } - if (o_len < buflen * 3) { - SAFE_FREE(buf); - errno = E2BIG; - goto end; - } - o_len -= buflen * 3; - i_len = i; - for (i = 0; i < buflen; ++i) { - *outbuf++ = ':'; - *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f]; - *outbuf++ = hexdig[buf[i] & 0x0f]; - } - SAFE_FREE(buf); - *flags |= CONV_REQESCAPE; - continue; - } - } - goto end; - } + } + while (i_len > 0 && + atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) { + if (errno == EILSEQ) { + if ((option & CONV_IGNORE)) { + *flags |= CONV_REQMANGLE; + return destlen - o_len; + } + if ((option & CONV_ESCAPEHEX)) { + const size_t bufsiz = o_len / 3 + 1; + char *buf = malloc(bufsiz); + size_t buflen; + + if (!buf) + goto end; + i = i_len; + for (buflen = 1; buflen <= bufsiz; ++buflen) { + char *b = buf; + size_t o = buflen; + if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) { + buflen -= o; + break; + } else if (errno != E2BIG) { + SAFE_FREE(buf); + goto end; + } else if (o < buflen) { + buflen -= o; + break; + } + } + if (o_len < buflen * 3) { + SAFE_FREE(buf); + errno = E2BIG; + goto end; + } + o_len -= buflen * 3; + i_len = i; + for (i = 0; i < buflen; ++i) { + *outbuf++ = ':'; + *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f]; + *outbuf++ = hexdig[buf[i] & 0x0f]; + } + SAFE_FREE(buf); + *flags |= CONV_REQESCAPE; + continue; + } + } + goto end; + } - if (j) { - i_len = j, j = 0; - if (o_len < 3) { - errno = E2BIG; - goto end; - } - *outbuf++ = ':'; - *outbuf++ = '2'; - *outbuf++ = 'f'; - o_len -= 3; - inbuf += 2; - i_len -= 2; + if (j) { + i_len = j, j = 0; + if (o_len < 3) { + errno = E2BIG; + goto end; + } + switch (escch) { + case '/': + *outbuf++ = ':'; + *outbuf++ = '2'; + *outbuf++ = 'f'; + break; + case ':': + *outbuf++ = ':'; + *outbuf++ = '3'; + *outbuf++ = 'a'; + break; + default: + /* + * THIS SHOULD NEVER BE REACHED !!! + * As a safety net I put in a ' ' here + */ + *outbuf++ = ':'; + *outbuf++ = '2'; + *outbuf++ = '0'; + break; + } + o_len -= 3; + inbuf += 2; + i_len -= 2; + } } - } - if (i_len > 0) errno = EINVAL; - end: - return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1; + if (i_len > 0) errno = EINVAL; +end: + return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1; } -size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, char* src, size_t src_len, char* dest, size_t dest_len, u_int16_t *flags) +/* + * FIXME the size is a mess we really need a malloc/free logic + *`dest size must be dest_len +2 + */ +size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, const char *src, size_t src_len, char *dest, size_t dest_len, u_int16_t *flags) { - size_t i_len, o_len; - ucs2_t *u; - ucs2_t buffer[MAXPATHLEN]; - ucs2_t buffer2[MAXPATHLEN]; - int composition = 0; - - lazy_initialize_conv(); - - /* convert from_set to UCS2 */ - if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len, - (char *) buffer, sizeof(buffer), flags)) ) { - LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set)); - return (size_t) -1; - } - - if ( o_len == 0) - return o_len; - - /* Do pre/decomposition */ - if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) || - ((!(charsets[to_set]) || !(charsets[to_set]->flags & CHARSET_DECOMPOSED)) && - (!(charsets[from_set]) || (charsets[from_set]->flags & CHARSET_DECOMPOSED)))) - composition = 1; - if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && charsets[to_set]->flags & CHARSET_DECOMPOSED) ) - composition = 2; - - i_len = sizeof(buffer2); - u = buffer2; - - switch (composition) { - case 0: - u = buffer; - i_len = o_len; - break; - case 1: - if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) ) - return (size_t)(-1); - break; - case 2: - if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) ) - return (size_t)(-1); - break; - } - - /* Do case conversions */ - if (CHECK_FLAGS(flags, CONV_TOUPPER)) { - strupper_w(u); - } - if (CHECK_FLAGS(flags, CONV_TOLOWER)) { - strlower_w(u); - } - - /* Convert UCS2 to to_set */ - if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) { - LOG(log_error, logtype_default, - "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno)); - return (size_t) -1; - } - - return o_len; + size_t i_len, o_len; + ucs2_t *u; + ucs2_t buffer[MAXPATHLEN +2]; + ucs2_t buffer2[MAXPATHLEN +2]; + + lazy_initialize_conv(); + + /* convert from_set to UCS2 */ + if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len, + (char *) buffer, sizeof(buffer) -2, flags)) ) { + LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set)); + return (size_t) -1; + } + + if ( o_len == 0) + return o_len; + + /* Do pre/decomposition */ + i_len = sizeof(buffer2) -2; + u = buffer2; + if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && (charsets[to_set]->flags & CHARSET_DECOMPOSED)) ) { + if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) ) + return (size_t)(-1); + } + else if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) || !charsets[from_set] || (charsets[from_set]->flags & CHARSET_DECOMPOSED)) { + if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) ) + return (size_t)(-1); + } + else { + u = buffer; + i_len = o_len; + } + /* null terminate */ + u[i_len] = 0; + u[i_len +1] = 0; + + /* Do case conversions */ + if (CHECK_FLAGS(flags, CONV_TOUPPER)) { + strupper_w(u); + } + else if (CHECK_FLAGS(flags, CONV_TOLOWER)) { + strlower_w(u); + } + + /* Convert UCS2 to to_set */ + if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) { + LOG(log_error, logtype_default, + "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno)); + return (size_t) -1; + } + /* null terminate */ + dest[o_len] = 0; + dest[o_len +1] = 0; + + return o_len; }