#include <unistd.h>
#include <string.h>
#include <sys/param.h>
+#include <ctype.h>
#include <sys/stat.h>
#include <atalk/logger.h>
#include <errno.h>
#include <iconv.h>
#endif
+#if HAVE_LOCALE_H
+#include <locale.h>
+#endif
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+#include "byteorder.h"
+
/**
* @file
#define MAX_CHARSETS 10
-static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
+#define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
+static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
static char* charset_names[MAX_CHARSETS];
-
-struct charset {
- const char *name;
- charset_t ch_charset_t;
- struct charset *prev, *next;
-};
+static struct charset_functions* charsets[MAX_CHARSETS];
+static char hexdig[] = "0123456789abcdef";
+#define hextoint( c ) ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
/**
* Return the name of a charset to give to iconv().
{
const char *ret = NULL;
- if (ch == CH_UCS2) ret = "UCS-2LE";
- else if (ch == CH_UNIX) ret = "ASCII"; /*lp_unix_charset();*/
- else if (ch == CH_MAC) ret = "MAC"; /*lp_display_charset();*/
+ if (ch == CH_UCS2) ret = "UCS-2";
+ else if (ch == CH_UNIX) ret = "LOCALE"; /*lp_unix_charset();*/
+ else if (ch == CH_MAC) ret = "MAC_ROMAN"; /*lp_display_charset();*/
else if (ch == CH_UTF8) ret = "UTF8";
+ else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
if (!ret)
ret = charset_names[ch];
+#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
+ if (ret && strcasecmp(ret, "LOCALE") == 0) {
+ const char *ln = NULL;
+
+#ifdef HAVE_SETLOCALE
+ setlocale(LC_ALL, "");
+#endif
+ ln = nl_langinfo(CODESET);
+ if (ln) {
+ /* Check whether the charset name is supported
+ by iconv */
+ atalk_iconv_t handle = atalk_iconv_open(ln,"UCS-2");
+ if (handle == (atalk_iconv_t) -1) {
+ LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln);
+ ln = NULL;
+ } else {
+ atalk_iconv_close(handle);
+ }
+ }
+ ret = ln;
+ }
+#else /* system doesn't have LOCALE support */
+if (ch == CH_UNIX) ret = NULL;
+#endif
+
if (!ret || !*ret) ret = "ASCII";
return ret;
}
+struct charset_functions* get_charset_functions (charset_t ch)
+{
+ if (charsets[ch] != NULL)
+ return charsets[ch];
+
+ charsets[ch] = find_charset_functions(charset_name(ch));
+
+ return charsets[ch];
+}
+
+
void lazy_initialize_conv(void)
{
static int initialized = 0;
{
static charset_t max_charset_t = NUM_CHARSETS-1;
charset_t cur_charset_t = max_charset_t+1;
- int c1, c2;
+ unsigned int c1;
+
+ lazy_initialize_conv();
for (c1=0; c1<=max_charset_t;c1++) {
- if ( strcmp(name, charset_name(c1)) == 0)
+ if ( strcasecmp(name, charset_name(c1)) == 0)
return (c1);
}
if ( cur_charset_t >= MAX_CHARSETS ) {
LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
name, MAX_CHARSETS);
- return 0;
+ return (charset_t) -1;
}
/* First try to setup the required conversions */
conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
- LOG(log_error, logtype_default, "Required conversion from %s to %s not supported\n",
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
name, charset_name(CH_UCS2));
conv_handles[cur_charset_t][CH_UCS2] = NULL;
- return 0;
+ return (charset_t) -1;
}
conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
- LOG(log_error, logtype_default, "Required conversion from %s to %s not supported\n",
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
charset_name(CH_UCS2), name);
conv_handles[CH_UCS2][cur_charset_t] = NULL;
- return 0;
+ return (charset_t) -1;
}
/* register the new charset_t name */
charset_names[cur_charset_t] = strdup(name);
-
- for (c1=0;c1<=cur_charset_t;c1++) {
- for (c2=0;c2<=cur_charset_t;c2++) {
- const char *n1 = charset_name((charset_t)c1);
- const char *n2 = charset_name((charset_t)c2);
- if (conv_handles[c1][c2] &&
- strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
- strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
- continue;
-
- if (conv_handles[c1][c2])
- atalk_iconv_close(conv_handles[c1][c2]);
-
- conv_handles[c1][c2] = atalk_iconv_open(n2,n1);
- if (conv_handles[c1][c2] == (atalk_iconv_t)-1) {
- LOG(log_debug, logtype_default, "Conversion from %s to %s not supported\n",
- charset_name((charset_t)c1), charset_name((charset_t)c2));
- conv_handles[c1][c2] = NULL;
- }
- }
- }
-
+ charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
max_charset_t++;
+#ifdef DEBUG
LOG(log_debug, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
+#endif /* DEBUG */
return (cur_charset_t);
}
**/
void init_iconv(void)
{
- int c1, c2;
+ int c1;
/* so that charset_name() works we need to get the UNIX<->UCS2 going
first */
if (!conv_handles[CH_UNIX][CH_UCS2])
- conv_handles[CH_UNIX][CH_UCS2] = atalk_iconv_open("UCS-2LE", "ASCII");
+ conv_handles[CH_UNIX][CH_UCS2] = atalk_iconv_open("UCS-2", "ASCII");
if (!conv_handles[CH_UCS2][CH_UNIX])
- conv_handles[CH_UCS2][CH_UNIX] = atalk_iconv_open("ASCII", "UCS-2LE");
+ conv_handles[CH_UCS2][CH_UNIX] = atalk_iconv_open("ASCII", "UCS-2");
for (c1=0;c1<NUM_CHARSETS;c1++) {
- for (c2=0;c2<NUM_CHARSETS;c2++) {
- const char *n1 = charset_name((charset_t)c1);
- const char *n2 = charset_name((charset_t)c2);
- if (conv_handles[c1][c2] &&
- strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
- strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
- continue;
-
- if (conv_handles[c1][c2])
- atalk_iconv_close(conv_handles[c1][c2]);
-
- conv_handles[c1][c2] = atalk_iconv_open(n2,n1);
- if (conv_handles[c1][c2] == (atalk_iconv_t)-1) {
- LOG(log_debug, logtype_default, "Conversion from %s to %s not supported\n",
- charset_name((charset_t)c1), charset_name((charset_t)c2));
- conv_handles[c1][c2] = NULL;
- }
+ const char *name = charset_name((charset_t)c1);
+
+ conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
+ if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
+ name, charset_name(CH_UCS2));
+ conv_handles[c1][CH_UCS2] = NULL;
+ }
+
+ conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
+ if (conv_handles[CH_UCS2][1] == (atalk_iconv_t)-1) {
+ LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
+ charset_name(CH_UCS2), name);
+ conv_handles[c1][c1] = NULL;
}
+
+ charsets[c1] = get_charset_functions (c1);
}
}
* @param destlen maximal length allowed for string
* @returns the number of bytes occupied in the destination
**/
-size_t convert_string(charset_t from, charset_t to,
+static size_t convert_string_internal(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen)
{
size_t retval;
const char* inbuf = (const char*)src;
char* outbuf = (char*)dest;
+ char* o_save = outbuf;
atalk_iconv_t descriptor;
if (srclen == (size_t)-1)
descriptor = conv_handles[from][to];
if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
- /* conversion not supported, use as is */
- size_t len = MIN(srclen,destlen);
- memcpy(dest,src,len);
- return len;
+ return (size_t) -1;
}
i_len=srclen;
break;
case E2BIG:
reason="No more room";
- LOG(log_debug, logtype_default, "convert_string: Required %d, available %d\n",
- srclen, destlen);
- /* we are not sure we need srclen bytes,
- may be more, may be less.
- We only know we need more than destlen
- bytes ---simo */
break;
case EILSEQ:
reason="Illegal multibyte sequence";
break;
}
+ LOG(log_debug, logtype_default,"Conversion error: %s(%s)\n",reason,inbuf);
return (size_t)-1;
- /* smb_panic(reason); */
}
+
+ /* Terminate the string */
+ if (to == CH_UCS2 && destlen-o_len >= 2) {
+ o_save[destlen-o_len] = 0;
+ o_save[destlen-o_len+1] = 0;
+ }
+ else if ( destlen-o_len > 0)
+ o_save[destlen-o_len] = 0;
+
return destlen-o_len;
}
+
+size_t convert_string(charset_t from, charset_t to,
+ void const *src, size_t srclen,
+ void *dest, size_t destlen)
+{
+ size_t i_len, o_len;
+ char *u;
+ char buffer[MAXPATHLEN];
+ char buffer2[MAXPATHLEN];
+ int composition = 0;
+
+ lazy_initialize_conv();
+
+ /* convert from_set to UCS2 */
+ if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, buffer, MAXPATHLEN)) ) {
+ LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
+ return (size_t) -1;
+ }
+
+ /* Do pre/decomposition */
+ if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
+ (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
+ composition = 1;
+ if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
+ composition = 2;
+
+ i_len = MAXPATHLEN;
+ u = buffer2;
+
+ switch (composition) {
+ case 0:
+ u = buffer;
+ i_len = o_len;
+ break;
+ case 1:
+ if ( (size_t)-1 == (i_len = precompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
+ return (size_t)(-1);
+ break;
+ case 2:
+ if ( (size_t)-1 == (i_len = decompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
+ return (size_t)(-1);
+ break;
+ }
+
+ /* Convert UCS2 to to_set */
+ if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, u, i_len, dest, destlen)) ) {
+ LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
+ return (size_t) -1;
+ }
+
+ return o_len;
+}
+
+
+
/**
* Convert between character sets, allocating a new buffer for the result.
*
* @returns Size in bytes of the converted string; or -1 in case of error.
**/
-size_t convert_string_allocate(charset_t from, charset_t to,
- void const *src, size_t srclen, void **dest)
+static size_t convert_string_allocate_internal(charset_t from, charset_t to,
+ void const *src, size_t srclen, char **dest)
{
size_t i_len, o_len, destlen;
size_t retval;
/* smb_panic(reason); */
return (size_t)-1;
}
+
destlen = destlen - o_len;
- *dest = (char *)realloc(ob,destlen);
+
+ /* Terminate the string */
+ if (to == CH_UCS2 && destlen-o_len >= 2) {
+ ob[destlen] = 0;
+ ob[destlen+1] = 0;
+ *dest = (char *)realloc(ob,destlen+2);
+ }
+ else if ( destlen-o_len > 0) {
+ ob[destlen] = 0;
+ *dest = (char *)realloc(ob,destlen+1);
+ }
+
if (destlen && !*dest) {
LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!\n");
SAFE_FREE(ob);
}
-size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
+size_t convert_string_allocate(charset_t from, charset_t to,
+ void const *src, size_t srclen,
+ char ** dest)
{
- size_t size;
- ucs2_t *buffer;
-
- size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
- (void **) &buffer);
- if (size == -1) {
- free(buffer);
- return size;
+ size_t i_len, o_len;
+ char *u;
+ char buffer[MAXPATHLEN];
+ char buffer2[MAXPATHLEN];
+ int composition = 0;
+
+ lazy_initialize_conv();
+
+ /* convert from_set to UCS2 */
+ if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, buffer, MAXPATHLEN)) ) {
+ LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
+ return (size_t) -1;
}
- if (!strupper_w(buffer) && (dest == src)) {
- free(buffer);
- return srclen;
+
+ /* Do pre/decomposition */
+ if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
+ (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
+ composition = 1;
+ if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
+ composition = 2;
+
+ i_len = MAXPATHLEN;
+ u = buffer2;
+
+ switch (composition) {
+ case 0:
+ u = buffer;
+ i_len = o_len;
+ break;
+ case 1:
+ if ( (size_t)-1 == (i_len = precompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
+ return (size_t)(-1);
+ break;
+ case 2:
+ if ( (size_t)-1 == (i_len = decompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
+ return (size_t)(-1);
+ break;
}
-
- size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
- free(buffer);
- return size;
+
+ /* Convert UCS2 to to_set */
+ if ((size_t)(-1) == ( o_len = convert_string_allocate_internal( CH_UCS2, to, u, i_len, dest)) )
+ LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
+
+ return o_len;
+
}
-size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
+size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
{
size_t size;
- ucs2_t *buffer;
+ char *buffer;
- size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
- (void **) &buffer);
- if (size == -1) {
+ size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
+ (char**) &buffer);
+ if (size == (size_t)-1) {
free(buffer);
return size;
- /* smb_panic("failed to create UCS2 buffer");*/
}
- if (!strlower_w(buffer) && (dest == src)) {
+ if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
free(buffer);
return srclen;
}
- size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
+
+ size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
free(buffer);
return size;
}
-size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
+size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
{
size_t size;
- ucs2_t *buffer;
+ char *buffer;
- size = convert_string_allocate(CH_UTF8, CH_UCS2, src, srclen,
- (void **) &buffer);
- if (size == -1) {
+ size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
+ (char **) &buffer);
+ if (size == (size_t)-1) {
free(buffer);
return size;
}
- if (!strupper_w(buffer) && (dest == src)) {
+ if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
free(buffer);
return srclen;
}
- size = convert_string(CH_UCS2, CH_UTF8, buffer, size, dest, destlen);
+ size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
free(buffer);
return size;
}
-size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
+
+size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
{
- size_t size;
- ucs2_t *buffer;
-
- size = convert_string_allocate(CH_UTF8, CH_UCS2, src, srclen,
- (void **) &buffer);
- if (size == -1) {
- free(buffer);
- return size;
- }
- if (!strlower_w(buffer) && (dest == src)) {
- free(buffer);
- return srclen;
- }
-
- size = convert_string(CH_UCS2, CH_UTF8, buffer, size, dest, destlen);
- free(buffer);
- return size;
+ return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
}
-size_t mac_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
+size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
{
- size_t size;
- ucs2_t *buffer;
-
- size = convert_string_allocate(CH_MAC, CH_UCS2, src, srclen,
- (void **) &buffer);
- if (size == -1) {
- free(buffer);
- return size;
- }
- if (!strupper_w(buffer) && (dest == src)) {
- free(buffer);
- return srclen;
- }
-
- size = convert_string(CH_UCS2, CH_MAC, buffer, size, dest, destlen);
- free(buffer);
- return size;
+ return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
}
-size_t mac_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
+size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
{
- size_t size;
- ucs2_t *buffer;
-
- size = convert_string_allocate(CH_MAC, CH_UCS2, src, srclen,
- (void **) &buffer);
- if (size == -1) {
- free(buffer);
- return size;
- }
- if (!strlower_w(buffer) && (dest == src)) {
- free(buffer);
- return srclen;
- }
-
- size = convert_string(CH_UCS2, CH_MAC, buffer, size, dest, destlen);
- free(buffer);
- return size;
+ return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
+}
+
+size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
+{
+ return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
}
/**
- * Copy a string from a mac char* src to a UCS2 destination, allocating a buffer
+ * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
*
* @param dest always set at least to NULL
*
* or -1 in case of error.
**/
-size_t mac_to_ucs2_allocate(ucs2_t **dest, const char *src)
+size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
{
size_t src_len = strlen(src)+1;
*dest = NULL;
- return convert_string_allocate(CH_MAC, CH_UCS2, src, src_len, (void **)dest);
+ return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
}
/**
- * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
+ * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
*
* @param dest always set at least to NULL
*
* @returns The number of bytes occupied by the string in the destination
**/
-size_t mac_to_utf8_allocate(char **dest, const char *src)
+size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
{
size_t src_len = strlen(src)+1;
*dest = NULL;
- return convert_string_allocate(CH_MAC, CH_UTF8, src, src_len, (void **)dest);
+ return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
}
/**
* @returns The number of bytes occupied by the string in the destination
**/
-size_t ucs2_to_mac_allocate(char **dest, const ucs2_t *src)
+size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
{
size_t src_len = (strlen_w(src)+1) * sizeof(ucs2_t);
*dest = NULL;
- return convert_string_allocate(CH_UCS2, CH_MAC, src, src_len, (void **)dest);
+ return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
}
/**
* @returns The number of bytes occupied by the string in the destination
**/
-static char convbuf[MAXPATHLEN+1];
-size_t utf8_to_mac_allocate(void **dest, const char *src)
+size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
{
size_t src_len = strlen(src)+1;
*dest = NULL;
+ return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
+}
+
+size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
+{
+ char *buffer;
+ char u[MAXPATHLEN];
+ size_t len;
+ size_t ilen;
+
+ if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
+ return len;
+
+ ilen=MAXPATHLEN;
+
+ if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, (ucs2_t *)u, &ilen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
+
+ if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, u, ilen, dst, outlen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
+
+ free(buffer);
+ dst[len] = 0;
+ return (len);
+}
+
+size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
+{
+ char *buffer;
+ char u[MAXPATHLEN];
+ size_t len;
+ size_t ilen;
+
+ if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
+ return len;
+
+ ilen=MAXPATHLEN;
- src_len = utf8_precompose ( (char *) src, src_len, convbuf, MAXPATHLEN);
- return convert_string_allocate(CH_UTF8, CH_MAC, convbuf, src_len, dest);
+ if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, (ucs2_t *)u, &ilen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
+
+ if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, u, ilen, dst, outlen)) ) {
+ free (buffer);
+ return (size_t)(-1);
+ }
+
+ free(buffer);
+ dst[len] = 0;
+ return (len);
}
-size_t utf8_to_mac ( char* src, size_t src_len, char* dest, size_t dest_len)
+size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
{
- src_len = utf8_precompose ( (char *) src, src_len, convbuf, MAXPATHLEN);
- return convert_string(CH_UTF8, CH_MAC, convbuf, src_len, dest, dest_len);
+ return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
+}
+
+size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
+{
+ return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
}
static char debugbuf[ MAXPATHLEN +1 ];
return q;
}
+/*
+ * Convert from MB to UCS2 charset
+ * Flags:
+ * CONV_UNESCAPEHEX: ':XX' will be converted to an UCS2 character
+ * CONV_IGNORE: return the first convertable characters.
+ * FIXME:
+ * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
+ * The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
+ * for e.g. HFS cdroms.
+ */
-size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
+static size_t pull_charset_flags (charset_t from_set, charset_t cap_charset, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
{
- char *u;
- size_t len;
- size_t ilen;
-
- if ((size_t)(-1) == (len = convert_string(CH_UTF8, CH_UCS2, src, inlen, convbuf, MAXPATHLEN)) )
- return len;
+ size_t i_len, o_len, hlen;
+ size_t retval, j = 0;
+ const char* inbuf = (const char*)src;
+ char* outbuf = (char*)dest;
+ atalk_iconv_t descriptor;
+ atalk_iconv_t descriptor_cap;
+ char *o_save, *s;
+ char h[MAXPATHLEN];
+ const char *h_buf;
- if ( NULL == (u = precompose_w((ucs2_t *)convbuf, len, &ilen)) )
- return (size_t)(-1);
+ if (srclen == (size_t)-1)
+ srclen = strlen(src)+1;
- if ((size_t)(-1) == (len = convert_string( CH_UCS2, CH_UTF8, u, ilen, dst, outlen)) )
- return (size_t)(-1);
+ lazy_initialize_conv();
- dst[len] = 0;
- return (len);
-}
+ descriptor = conv_handles[from_set][CH_UCS2];
+ descriptor_cap = conv_handles[cap_charset][CH_UCS2];
-size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
-{
- char *u;
- size_t len;
- size_t ilen;
+ if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
+ return (size_t) -1;
+ }
- if ((size_t)(-1) == (len = convert_string(CH_UTF8, CH_UCS2, src, inlen, convbuf, MAXPATHLEN)) )
- return len;
+ i_len=srclen;
+ o_len=destlen;
+ o_save=outbuf;
+
+conversion_loop:
+ if ( flags && (*flags & CONV_UNESCAPEHEX)) {
+ if ( NULL != (s = strchr ( inbuf, ':'))) {
+ j = i_len - (s - inbuf);
+ if ( 0 == (i_len = (s - inbuf)))
+ goto unhex_char;
+ }
+ }
+
+ retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
+ if(retval==(size_t)-1) {
+ if (errno == EILSEQ && flags && (*flags & CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen-o_len;
+ }
+ else
+ return (size_t) -1;
+ }
+
+unhex_char:
+ if (j && flags && (*flags & CONV_UNESCAPEHEX )) {
+ /* we're at the start on an hex encoded ucs2 char */
+ if (o_len < 2) {
+ errno = E2BIG;
+ return (size_t) -1;
+ }
+ if ( j >= 3 &&
+ isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
+ hlen = 0;
+ while ( *inbuf == ':' && j >=3 &&
+ isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
+ inbuf++;
+ h[hlen] = hextoint( *inbuf ) << 4;
+ inbuf++;
+ h[hlen++] |= hextoint( *inbuf );
+ inbuf++;
+ j -= 3;
+ }
+ h_buf = (const char*) h;
+ if ((size_t) -1 == (retval = atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len)) ) {
+ if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen-o_len;
+ }
+ else {
+ return retval;
+ }
+ }
+ }
+ else {
+ /* We have an invalid :xx sequence */
+ if (CHECK_FLAGS(flags, CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen-o_len;
+ }
+ else {
+ errno=EILSEQ;
+ return (size_t) -1;
+ }
+ }
+ i_len = j;
+ j = 0;
+ if (i_len > 0)
+ goto conversion_loop;
+ }
- if ( NULL == (u = decompose_w((ucs2_t *)convbuf, len, &ilen)) )
- return (size_t)(-1);
- if ((size_t)(-1) == (len = convert_string( CH_UCS2, CH_UTF8, u, ilen, dst, outlen)) )
- return (size_t)(-1);
- dst[len] = 0;
- return (len);
+ return destlen-o_len;
}
+/*
+ * Convert from UCS2 to MB charset
+ * Flags:
+ * CONV_ESCAPEDOTS: escape leading dots
+ * CONV_ESCAPEHEX: unconvertable characters and '/' will be escaped to :XX
+ * CONV_IGNORE: unconvertable characters will be replaced with '_'
+ * FIXME:
+ * CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
+ * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
+ * The escape scheme is not compatible to the old cap style escape. This is bad, we need it
+ * for e.g. HFS cdroms.
+ */
+
-size_t utf8_to_mac_charset ( charset_t ch, char* src, size_t src_len, char* dest, size_t dest_len, int* mangle)
+static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
{
- size_t i_len, o_len;
- size_t retval;
- const char* inbuf;
- char* outbuf = (char*)dest;
- atalk_iconv_t descriptor;
+ size_t i_len, o_len, i;
+ size_t retval, j = 0;
+ const char* inbuf = (const char*)src;
+ char* outbuf = (char*)dest;
+ atalk_iconv_t descriptor;
+ char *o_save;
+ char *buf, *buf_save;
+ size_t buflen;
+
+ lazy_initialize_conv();
+
+ descriptor = conv_handles[CH_UCS2][to_set];
+
+ if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
+ return (size_t) -1;
+ }
+
+ i_len=srclen;
+ o_len=destlen;
+ o_save=outbuf;
+
+ if ( SVAL(inbuf,0) == 0x002e && flags && (*flags & CONV_ESCAPEDOTS)) { /* 0x002e = . */
+ if (o_len < 3) {
+ errno = E2BIG;
+ return (size_t) -1;
+ }
+ o_save[0] = ':';
+ o_save[1] = '2';
+ o_save[2] = 'e';
+ o_len -= 3;
+ inbuf += 2;
+ i_len -= 2;
+ outbuf = o_save + 3;
+ if (flags) *flags |= CONV_REQESCAPE;
+ }
+
+conversion_loop:
+ if ( flags && (*flags & CONV_ESCAPEHEX)) {
+ for ( i = 0; i < i_len; i+=2) {
+ if ( SVAL((inbuf+i),0) == 0x002f) { /* 0x002f = / */
+ j = i_len - i;
+ if ( 0 == ( i_len = i))
+ goto escape_slash;
+ break;
+ } else if ( SVAL(inbuf+i,0) == 0x003a) { /* 0x003a = : */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
+ }
+ }
+
+ retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
+ if (retval==(size_t)-1) {
+ if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
+ *flags |= CONV_REQMANGLE;
+ return destlen -o_len;
+ }
+ else if ( errno == EILSEQ && flags && (*flags & CONV_ESCAPEHEX)) {
+ if (o_len < 3) {
+ errno = E2BIG;
+ return (size_t) -1;
+ }
+ if ((size_t) -1 == (buflen = convert_string_allocate_internal(CH_UCS2, cap_set, inbuf, 2, &buf)) )
+ return buflen;
+ buf_save = buf;
+ while (buflen > 0) {
+ if ( o_len < 3) {
+ errno = E2BIG;
+ return (size_t) -1;
+ }
+ *outbuf++ = ':';
+ *outbuf++ = hexdig[ ( *buf & 0xf0 ) >> 4 ];
+ *outbuf++ = hexdig[ *buf & 0x0f ];
+ buf++;
+ buflen--;
+ o_len -= 3;
+ }
+ SAFE_FREE(buf_save);
+ buflen = 0;
+ i_len -= 2;
+ inbuf += 2;
+ if (flags) *flags |= CONV_REQESCAPE;
+ if ( i_len > 0)
+ goto conversion_loop;
+ }
+ else
+ return (size_t)(-1);
+ }
+
+escape_slash:
+ if (j && flags && (*flags & CONV_ESCAPEHEX)) {
+ if (o_len < 3) {
+ errno = E2BIG;
+ return (size_t) -1;
+ }
+ o_save[destlen -o_len] = ':';
+ o_save[destlen -o_len+1] = '2';
+ o_save[destlen -o_len+2] = 'f';
+ inbuf += 2;
+ i_len = j-2;
+ o_len -= 3;
+ outbuf += 3;
+ j = 0;
+ if ( i_len > 0)
+ goto conversion_loop;
+ }
+ return destlen -o_len;
+}
+size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, char* src, size_t src_len, char* dest, size_t dest_len, u_int16_t *flags)
+{
+ size_t i_len, o_len;
+ char *u;
+ char buffer[MAXPATHLEN];
+ char buffer2[MAXPATHLEN];
+ int composition = 0;
+
lazy_initialize_conv();
- src_len = utf8_precompose ( (char *) src, src_len+1, convbuf, MAXPATHLEN);
-
- descriptor = conv_handles[CH_UTF8][ch];
-
- if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
- LOG(log_error, logtype_default, "Conversion not supported ( UTF8 to %s )", charset_name(ch));
- return (size_t)(-1);
+ /* convert from_set to UCS2 */
+ if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len, buffer, MAXPATHLEN, flags)) ) {
+ LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
+ return (size_t) -1;
}
- inbuf = (const char*) convbuf;
- i_len=src_len;
- o_len=dest_len;
-
- retval = atalk_iconv_ignore(descriptor, &inbuf, &i_len, &outbuf, &o_len, mangle);
+ if ( o_len == 0)
+ return o_len;
+
+ /* Do pre/decomposition */
+ if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) ||
+ ((!(charsets[to_set]) || !(charsets[to_set]->flags & CHARSET_DECOMPOSED)) &&
+ (!(charsets[from_set]) || (charsets[from_set]->flags & CHARSET_DECOMPOSED))))
+ composition = 1;
+ if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && charsets[to_set]->flags & CHARSET_DECOMPOSED) )
+ composition = 2;
+
+ i_len = MAXPATHLEN;
+ u = buffer2;
+
+ switch (composition) {
+ case 0:
+ u = buffer;
+ i_len = o_len;
+ break;
+ case 1:
+ if ( (size_t)-1 == (i_len = precompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
+ return (size_t)(-1);
+ break;
+ case 2:
+ if ( (size_t)-1 == (i_len = decompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
+ return (size_t)(-1);
+ break;
+ }
+
+ /* Do case conversions */
+ if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
+ if (!strupper_w((ucs2_t *) u))
+ return (size_t)(-1);
+ }
+ if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
+ if (!strlower_w((ucs2_t *) u))
+ return (size_t)(-1);
+ }
+
+ /* Convert UCS2 to to_set */
+ if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, u, i_len, dest, dest_len, flags )) ) {
+ LOG(log_error, logtype_default,
+ "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
+ return (size_t) -1;
+ }
- if(retval==(size_t)-1)
- return (size_t)(-1);
-
- dest[dest_len-o_len] = 0;
- return dest_len-o_len;
+ return o_len;
}
-