2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #endif /* HAVE_CONFIG_H */
35 #include <sys/param.h>
36 #ifdef HAVE_USABLE_ICONV
46 #include <netatalk/endian.h>
47 #include <atalk/logger.h>
48 #include <atalk/unicode.h>
49 #include "byteorder.h"
55 * @brief Character-set conversion routines built on our iconv.
57 * @note Samba's internal character set (at least in the 3.0 series)
58 * is always the same as the one for the Unix filesystem. It is
59 * <b>not</b> necessarily UTF-8 and may be different on machines that
60 * need i18n filenames to be compatible with Unix software. It does
61 * have to be a superset of ASCII. All multibyte sequences must start
62 * with a byte with the high bit set.
68 #define MAX_CHARSETS 10
70 #define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
72 static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
73 static char* charset_names[MAX_CHARSETS];
74 static struct charset_functions* charsets[MAX_CHARSETS];
75 static char hexdig[] = "0123456789abcdef";
76 #define hextoint( c ) ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
79 * Return the name of a charset to give to iconv().
81 static const char *charset_name(charset_t ch)
83 const char *ret = NULL;
85 if (ch == CH_UCS2) ret = "UCS-2";
86 else if (ch == CH_UNIX) ret = "LOCALE"; /*lp_unix_charset();*/
87 else if (ch == CH_MAC) ret = "MAC_ROMAN"; /*lp_display_charset();*/
88 else if (ch == CH_UTF8) ret = "UTF8";
89 else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
92 ret = charset_names[ch];
94 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
95 if (ret && strcasecmp(ret, "LOCALE") == 0) {
96 const char *ln = NULL;
99 setlocale(LC_ALL, "");
101 ln = nl_langinfo(CODESET);
103 /* Check whether the charset name is supported
105 atalk_iconv_t handle = atalk_iconv_open(ln,"UCS-2");
106 if (handle == (atalk_iconv_t) -1) {
107 LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln);
110 atalk_iconv_close(handle);
115 #else /* system doesn't have LOCALE support */
116 if (ch == CH_UNIX) ret = NULL;
119 if (!ret || !*ret) ret = "ASCII";
123 struct charset_functions* get_charset_functions (charset_t ch)
125 if (charsets[ch] != NULL)
128 charsets[ch] = find_charset_functions(charset_name(ch));
134 void lazy_initialize_conv(void)
136 static int initialized = 0;
144 charset_t add_charset(char* name)
146 static charset_t max_charset_t = NUM_CHARSETS-1;
147 charset_t cur_charset_t = max_charset_t+1;
150 lazy_initialize_conv();
152 for (c1=0; c1<=max_charset_t;c1++) {
153 if ( strcasecmp(name, charset_name(c1)) == 0)
157 if ( cur_charset_t >= MAX_CHARSETS ) {
158 LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
160 return (charset_t) -1;
163 /* First try to setup the required conversions */
165 conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
166 if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
167 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
168 name, charset_name(CH_UCS2));
169 conv_handles[cur_charset_t][CH_UCS2] = NULL;
170 return (charset_t) -1;
173 conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
174 if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
175 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
176 charset_name(CH_UCS2), name);
177 conv_handles[CH_UCS2][cur_charset_t] = NULL;
178 return (charset_t) -1;
181 /* register the new charset_t name */
182 charset_names[cur_charset_t] = strdup(name);
184 charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
188 LOG(log_debug, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
190 return (cur_charset_t);
194 * Initialize iconv conversion descriptors.
196 * This is called the first time it is needed, and also called again
197 * every time the configuration is reloaded, because the charset or
198 * codepage might have changed.
200 void init_iconv(void)
204 /* so that charset_name() works we need to get the UNIX<->UCS2 going
207 if (!conv_handles[CH_UNIX][CH_UCS2])
208 conv_handles[CH_UNIX][CH_UCS2] = atalk_iconv_open("UCS-2", "ASCII");
210 if (!conv_handles[CH_UCS2][CH_UNIX])
211 conv_handles[CH_UCS2][CH_UNIX] = atalk_iconv_open("ASCII", "UCS-2");
214 for (c1=0;c1<NUM_CHARSETS;c1++) {
215 const char *name = charset_name((charset_t)c1);
217 conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
218 if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
219 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
220 name, charset_name(CH_UCS2));
221 conv_handles[c1][CH_UCS2] = NULL;
224 conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
225 if (conv_handles[CH_UCS2][1] == (atalk_iconv_t)-1) {
226 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
227 charset_name(CH_UCS2), name);
228 conv_handles[c1][c1] = NULL;
231 charsets[c1] = get_charset_functions (c1);
236 * Convert string from one encoding to another, making error checking etc
238 * @param src pointer to source string (multibyte or singlebyte)
239 * @param srclen length of the source string in bytes
240 * @param dest pointer to destination string (multibyte or singlebyte)
241 * @param destlen maximal length allowed for string
242 * @returns the number of bytes occupied in the destination
244 static size_t convert_string_internal(charset_t from, charset_t to,
245 void const *src, size_t srclen,
246 void *dest, size_t destlen)
250 const char* inbuf = (const char*)src;
251 char* outbuf = (char*)dest;
252 char* o_save = outbuf;
253 atalk_iconv_t descriptor;
255 if (srclen == (size_t)-1)
256 srclen = strlen(src)+1;
258 lazy_initialize_conv();
260 descriptor = conv_handles[from][to];
262 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
268 retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
269 if(retval==(size_t)-1) {
270 const char *reason="unknown error";
273 reason="Incomplete multibyte sequence";
276 reason="No more room";
279 reason="Illegal multibyte sequence";
282 LOG(log_debug, logtype_default,"Conversion error: %s",reason);
286 /* Terminate the string */
287 if (to == CH_UCS2 && destlen-o_len >= 2) {
288 o_save[destlen-o_len] = 0;
289 o_save[destlen-o_len+1] = 0;
291 else if ( to != CH_UCS2 && destlen-o_len > 0 )
292 o_save[destlen-o_len] = 0;
294 /* FIXME: what should we do here, string *might* be unterminated. E2BIG? */
297 return destlen-o_len;
301 size_t convert_string(charset_t from, charset_t to,
302 void const *src, size_t srclen,
303 void *dest, size_t destlen)
307 ucs2_t buffer[MAXPATHLEN];
308 ucs2_t buffer2[MAXPATHLEN];
311 lazy_initialize_conv();
313 /* convert from_set to UCS2 */
314 if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
315 (char*) buffer, sizeof(buffer))) ) {
316 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
320 /* Do pre/decomposition */
321 if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
322 (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
324 if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
327 i_len = sizeof(buffer2);
330 switch (composition) {
336 if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
340 if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
345 /* Convert UCS2 to to_set */
346 if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
347 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
357 * Convert between character sets, allocating a new buffer for the result.
359 * @param srclen length of source buffer.
360 * @param dest always set at least to NULL
361 * @note -1 is not accepted for srclen.
363 * @returns Size in bytes of the converted string; or -1 in case of error.
366 static size_t convert_string_allocate_internal(charset_t from, charset_t to,
367 void const *src, size_t srclen, char **dest)
369 size_t i_len, o_len, destlen;
371 const char *inbuf = (const char *)src;
372 char *outbuf = NULL, *ob = NULL;
373 atalk_iconv_t descriptor;
377 if (src == NULL || srclen == (size_t)-1)
380 lazy_initialize_conv();
382 descriptor = conv_handles[from][to];
384 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
385 /* conversion not supported, return -1*/
386 LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!\n");
390 destlen = MAX(srclen, 512);
392 destlen = destlen * 2;
393 ob = (char *)realloc(ob, destlen);
395 LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!\n");
401 inbuf = src; /* this restarts the whole conversion if buffer needed to be increased */
404 retval = atalk_iconv(descriptor,
407 if(retval == (size_t)-1) {
408 const char *reason="unknown error";
411 reason="Incomplete multibyte sequence";
416 reason="Illegal multibyte sequence";
419 LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
424 destlen = destlen - o_len;
426 /* Terminate the string */
427 if (to == CH_UCS2 && o_len >= 2) {
430 *dest = (char *)realloc(ob,destlen+2);
432 else if ( to != CH_UCS2 && o_len > 0 ) {
434 *dest = (char *)realloc(ob,destlen+1);
437 goto convert; /* realloc */
440 if (destlen && !*dest) {
441 LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!\n");
450 size_t convert_string_allocate(charset_t from, charset_t to,
451 void const *src, size_t srclen,
456 ucs2_t buffer[MAXPATHLEN];
457 ucs2_t buffer2[MAXPATHLEN];
460 lazy_initialize_conv();
464 /* convert from_set to UCS2 */
465 if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
466 buffer, sizeof(buffer))) ) {
467 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
471 /* Do pre/decomposition */
472 if ( ((!(charsets[to]) || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
473 (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
475 if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
478 i_len = sizeof(buffer2);
481 switch (composition) {
487 if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
491 if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
496 /* Convert UCS2 to to_set */
497 if ((size_t)(-1) == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
498 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
504 size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
509 size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
511 if (size == (size_t)-1) {
515 if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
520 size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
525 size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
530 size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
532 if (size == (size_t)-1) {
536 if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
541 size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
547 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
549 return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
552 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
554 return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
557 size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
559 return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
562 size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
564 return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
568 * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
570 * @param dest always set at least to NULL
572 * @returns The number of bytes occupied by the string in the destination
573 * or -1 in case of error.
576 size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
578 size_t src_len = strlen(src);
581 return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
585 * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
587 * @param dest always set at least to NULL
589 * @returns The number of bytes occupied by the string in the destination
592 size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
594 size_t src_len = strlen(src);
597 return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
601 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
603 * @param dest always set at least to NULL
605 * @returns The number of bytes occupied by the string in the destination
608 size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen)
610 size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
611 return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
615 size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
617 size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
619 return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
623 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
625 * @param dest always set at least to NULL
627 * @returns The number of bytes occupied by the string in the destination
630 size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
632 size_t src_len = strlen(src);
634 return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
637 size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
640 ucs2_t u[MAXPATHLEN];
644 if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
649 if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
654 if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
664 size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
667 ucs2_t u[MAXPATHLEN];
671 if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
676 if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
681 if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
691 size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
693 return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
696 size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
698 return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
701 static char debugbuf[ MAXPATHLEN +1 ];
702 char * debug_out ( char * seq, size_t len)
708 p = (unsigned char*) seq;
711 for ( i = 0; i<=(len-1); i++)
713 sprintf(q, "%2.2x.", *p);
723 * Convert from MB to UCS2 charset
725 * CONV_UNESCAPEHEX: ':XX' will be converted to an UCS2 character
726 * CONV_IGNORE: return the first convertable characters.
728 * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
729 * The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
730 * for e.g. HFS cdroms.
733 static size_t pull_charset_flags (charset_t from_set, charset_t cap_charset, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
735 size_t i_len, o_len, hlen;
736 size_t retval, j = 0;
737 const char* inbuf = (const char*)src;
738 char* outbuf = (char*)dest;
739 atalk_iconv_t descriptor;
740 atalk_iconv_t descriptor_cap;
745 if (srclen == (size_t)-1)
746 srclen = strlen(src)+1;
748 lazy_initialize_conv();
750 descriptor = conv_handles[from_set][CH_UCS2];
751 descriptor_cap = conv_handles[cap_charset][CH_UCS2];
753 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
762 if ( flags && (*flags & CONV_UNESCAPEHEX)) {
763 if ( NULL != (s = strchr ( inbuf, ':'))) {
764 j = i_len - (s - inbuf);
765 if ( 0 == (i_len = (s - inbuf)))
770 retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
771 if(retval==(size_t)-1) {
772 if (errno == EILSEQ && flags && (*flags & CONV_IGNORE)) {
773 *flags |= CONV_REQMANGLE;
774 return destlen-o_len;
781 if (j && flags && (*flags & CONV_UNESCAPEHEX )) {
782 /* we're at the start on an hex encoded ucs2 char */
788 isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
790 while ( *inbuf == ':' && j >=3 &&
791 isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
793 h[hlen] = hextoint( *inbuf ) << 4;
795 h[hlen++] |= hextoint( *inbuf );
799 h_buf = (const char*) h;
800 if ((size_t) -1 == (retval = atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len)) ) {
801 if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
802 *flags |= CONV_REQMANGLE;
803 return destlen-o_len;
811 /* We have an invalid :xx sequence */
812 if (CHECK_FLAGS(flags, CONV_IGNORE)) {
813 *flags |= CONV_REQMANGLE;
814 return destlen-o_len;
824 goto conversion_loop;
829 return destlen-o_len;
833 * Convert from UCS2 to MB charset
835 * CONV_ESCAPEDOTS: escape leading dots
836 * CONV_ESCAPEHEX: unconvertable characters and '/' will be escaped to :XX
837 * CONV_IGNORE: unconvertable characters will be replaced with '_'
839 * CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
840 * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
841 * The escape scheme is not compatible to the old cap style escape. This is bad, we need it
842 * for e.g. HFS cdroms.
846 static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
848 size_t i_len, o_len, i;
849 size_t retval, j = 0;
850 const char* inbuf = (const char*)src;
851 char* outbuf = (char*)dest;
852 atalk_iconv_t descriptor;
854 char *buf, *buf_save;
857 lazy_initialize_conv();
859 descriptor = conv_handles[CH_UCS2][to_set];
861 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
869 if ( SVAL(inbuf,0) == 0x002e && flags && (*flags & CONV_ESCAPEDOTS)) { /* 0x002e = . */
881 if (flags) *flags |= CONV_REQESCAPE;
885 if ( flags && (*flags & CONV_ESCAPEHEX)) {
886 for ( i = 0; i < i_len; i+=2) {
887 if ( SVAL((inbuf+i),0) == 0x002f) { /* 0x002f = / */
889 if ( 0 == ( i_len = i))
892 } else if ( SVAL(inbuf+i,0) == 0x003a) { /* 0x003a = : */
899 retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
900 if (retval==(size_t)-1) {
901 if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
902 *flags |= CONV_REQMANGLE;
903 return destlen -o_len;
905 else if ( errno == EILSEQ && flags && (*flags & CONV_ESCAPEHEX)) {
910 if ((size_t) -1 == (buflen = convert_string_allocate_internal(CH_UCS2, cap_set, inbuf, 2, &buf)) )
919 *outbuf++ = hexdig[ ( *buf & 0xf0 ) >> 4 ];
920 *outbuf++ = hexdig[ *buf & 0x0f ];
929 if (flags) *flags |= CONV_REQESCAPE;
931 goto conversion_loop;
938 if (j && flags && (*flags & CONV_ESCAPEHEX)) {
943 o_save[destlen -o_len] = ':';
944 o_save[destlen -o_len+1] = '2';
945 o_save[destlen -o_len+2] = 'f';
952 goto conversion_loop;
954 return destlen -o_len;
957 size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, char* src, size_t src_len, char* dest, size_t dest_len, u_int16_t *flags)
961 ucs2_t buffer[MAXPATHLEN];
962 ucs2_t buffer2[MAXPATHLEN];
965 lazy_initialize_conv();
967 /* convert from_set to UCS2 */
968 if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len,
969 (char *) buffer, sizeof(buffer), flags)) ) {
970 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
977 /* Do pre/decomposition */
978 if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) ||
979 ((!(charsets[to_set]) || !(charsets[to_set]->flags & CHARSET_DECOMPOSED)) &&
980 (!(charsets[from_set]) || (charsets[from_set]->flags & CHARSET_DECOMPOSED))))
982 if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && charsets[to_set]->flags & CHARSET_DECOMPOSED) )
985 i_len = sizeof(buffer2);
988 switch (composition) {
994 if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
998 if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
1003 /* Do case conversions */
1004 if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
1006 return (size_t)(-1);
1008 if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
1010 return (size_t)(-1);
1013 /* Convert UCS2 to to_set */
1014 if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
1015 LOG(log_error, logtype_default,
1016 "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));