2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #endif /* HAVE_CONFIG_H */
35 #include <sys/param.h>
36 #ifdef HAVE_USABLE_ICONV
39 #include <arpa/inet.h>
41 #include <atalk/logger.h>
42 #include <atalk/unicode.h>
43 #include <atalk/util.h>
44 #include <atalk/compat.h>
45 #include <atalk/byteorder.h>
51 * @brief Character-set conversion routines built on our iconv.
53 * @note Samba's internal character set (at least in the 3.0 series)
54 * is always the same as the one for the Unix filesystem. It is
55 * <b>not</b> necessarily UTF-8 and may be different on machines that
56 * need i18n filenames to be compatible with Unix software. It does
57 * have to be a superset of ASCII. All multibyte sequences must start
58 * with a byte with the high bit set.
64 #define MAX_CHARSETS 20
66 #define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
68 static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
69 static char* charset_names[MAX_CHARSETS];
70 static struct charset_functions* charsets[MAX_CHARSETS];
71 static char hexdig[] = "0123456789abcdef";
72 #define hextoint( c ) ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
76 * Return the name of a charset to give to iconv().
78 static const char *charset_name(charset_t ch)
80 const char *ret = NULL;
82 if (ch == CH_UCS2) ret = "UCS-2";
83 else if (ch == CH_UTF8) ret = "UTF8";
84 else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
85 else ret = charset_names[ch];
89 int set_charset_name(charset_t ch, const char *name)
91 if (ch >= NUM_CHARSETS)
93 charset_names[ch] = strdup(name);
97 void free_charset_names(void)
99 for (int ch = 0; ch < MAX_CHARSETS; ch++) {
100 if (charset_names[ch]) {
101 free(charset_names[ch]);
102 charset_names[ch] = NULL;
107 static struct charset_functions* get_charset_functions (charset_t ch)
109 if (charsets[ch] != NULL)
112 charsets[ch] = find_charset_functions(charset_name(ch));
118 static void lazy_initialize_conv(void)
120 static int initialized = 0;
128 charset_t add_charset(const char* name)
130 static charset_t max_charset_t = NUM_CHARSETS-1;
131 charset_t cur_charset_t = max_charset_t+1;
134 lazy_initialize_conv();
136 for (c1=0; c1<=max_charset_t;c1++) {
137 if ( strcasecmp(name, charset_name(c1)) == 0)
141 if ( cur_charset_t >= MAX_CHARSETS ) {
142 LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
144 return (charset_t) -1;
147 /* First try to setup the required conversions */
149 conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
150 if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
151 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
152 name, charset_name(CH_UCS2));
153 conv_handles[cur_charset_t][CH_UCS2] = NULL;
154 return (charset_t) -1;
157 conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
158 if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
159 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
160 charset_name(CH_UCS2), name);
161 conv_handles[CH_UCS2][cur_charset_t] = NULL;
162 return (charset_t) -1;
165 /* register the new charset_t name */
166 charset_names[cur_charset_t] = strdup(name);
168 charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
172 LOG(log_debug9, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
174 return (cur_charset_t);
178 * Initialize iconv conversion descriptors.
180 * This is called the first time it is needed, and also called again
181 * every time the configuration is reloaded, because the charset or
182 * codepage might have changed.
184 void init_iconv(void)
188 for (c1=0;c1<NUM_CHARSETS;c1++) {
189 const char *name = charset_name((charset_t)c1);
191 conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
192 if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
193 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
194 name, charset_name(CH_UCS2));
195 conv_handles[c1][CH_UCS2] = NULL;
198 if (c1 != CH_UCS2) { /* avoid lost memory, make valgrind happy */
199 conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
200 if (conv_handles[CH_UCS2][c1] == (atalk_iconv_t)-1) {
201 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
202 charset_name(CH_UCS2), name);
203 conv_handles[CH_UCS2][c1] = NULL;
207 charsets[c1] = get_charset_functions (c1);
214 static size_t add_null(charset_t to, char *buf, size_t bytesleft, size_t len)
216 /* Terminate the string */
217 if (to == CH_UCS2 && bytesleft >= 2) {
222 else if ( to != CH_UCS2 && bytesleft > 0 )
234 * Convert string from one encoding to another, making error checking etc
236 * @param src pointer to source string (multibyte or singlebyte)
237 * @param srclen length of the source string in bytes
238 * @param dest pointer to destination string (multibyte or singlebyte)
239 * @param destlen maximal length allowed for string
240 * @returns the number of bytes occupied in the destination
242 static size_t convert_string_internal(charset_t from, charset_t to,
243 void const *src, size_t srclen,
244 void *dest, size_t destlen)
248 const char* inbuf = (const char*)src;
249 char* outbuf = (char*)dest;
250 char* o_save = outbuf;
251 atalk_iconv_t descriptor;
253 /* Fixed based on Samba 3.0.6 */
254 if (srclen == (size_t)-1) {
255 if (from == CH_UCS2) {
256 srclen = (strlen_w((const ucs2_t *)src)) * 2;
258 srclen = strlen((const char *)src);
263 lazy_initialize_conv();
265 descriptor = conv_handles[from][to];
267 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
273 retval = atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
274 if(retval==(size_t)-1) {
275 const char *reason="unknown error";
278 reason="Incomplete multibyte sequence";
281 reason="No more room";
284 reason="Illegal multibyte sequence";
287 LOG(log_debug, logtype_default,"Conversion error: %s",reason);
291 /* Terminate the string */
292 return add_null( to, o_save, o_len, destlen -o_len);
296 size_t convert_string(charset_t from, charset_t to,
297 void const *src, size_t srclen,
298 void *dest, size_t destlen)
302 ucs2_t buffer[MAXPATHLEN];
303 ucs2_t buffer2[MAXPATHLEN];
305 /* convert from_set to UCS2 */
306 if ((size_t)-1 == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
307 (char*) buffer, sizeof(buffer))) ) {
308 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
312 /* Do pre/decomposition */
313 i_len = sizeof(buffer2);
315 if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
316 if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
319 else if (!charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED)) {
320 if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
327 /* Convert UCS2 to to_set */
328 if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
329 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
339 * Convert between character sets, allocating a new buffer for the result.
341 * @param srclen length of source buffer.
342 * @param dest always set at least to NULL
343 * @note -1 is not accepted for srclen.
345 * @returns Size in bytes of the converted string; or -1 in case of error.
348 static size_t convert_string_allocate_internal(charset_t from, charset_t to,
349 void const *src, size_t srclen, char **dest)
351 size_t i_len, o_len, destlen;
353 const char *inbuf = (const char *)src;
354 char *outbuf = NULL, *ob = NULL;
355 atalk_iconv_t descriptor;
359 if (src == NULL || srclen == (size_t)-1)
362 lazy_initialize_conv();
364 descriptor = conv_handles[from][to];
366 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
367 /* conversion not supported, return -1*/
368 LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!");
372 destlen = MAX(srclen, 512);
374 destlen = destlen * 2;
375 outbuf = (char *)realloc(ob, destlen);
377 LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!");
383 inbuf = src; /* this restarts the whole conversion if buffer needed to be increased */
386 retval = atalk_iconv(descriptor,
389 if(retval == (size_t)-1) {
390 const char *reason="unknown error";
393 reason="Incomplete multibyte sequence";
398 reason="Illegal multibyte sequence";
401 LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
407 destlen = destlen - o_len;
409 /* Terminate the string */
410 if (to == CH_UCS2 && o_len >= 2) {
413 *dest = (char *)realloc(ob,destlen+2);
415 else if ( to != CH_UCS2 && o_len > 0 ) {
417 *dest = (char *)realloc(ob,destlen+1);
420 goto convert; /* realloc */
423 if (destlen && !*dest) {
424 LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!");
433 size_t convert_string_allocate(charset_t from, charset_t to,
434 void const *src, size_t srclen,
439 ucs2_t buffer[MAXPATHLEN];
440 ucs2_t buffer2[MAXPATHLEN];
444 /* convert from_set to UCS2 */
445 if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
446 buffer, sizeof(buffer))) ) {
447 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
451 /* Do pre/decomposition */
452 i_len = sizeof(buffer2);
454 if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
455 if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
458 else if ( !charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED) ) {
459 if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
467 /* Convert UCS2 to to_set */
468 if ((size_t)-1 == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
469 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
475 size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
480 size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
482 if (size == (size_t)-1) {
486 if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
491 size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
496 size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
501 size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
503 if (size == (size_t)-1) {
507 if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
512 size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
518 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
520 return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
523 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
525 return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
528 size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
530 return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
533 size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
535 return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
539 * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
541 * @param dest always set at least to NULL
543 * @returns The number of bytes occupied by the string in the destination
544 * or -1 in case of error.
547 size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
549 size_t src_len = strlen(src);
552 return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
555 /** -----------------------------------
556 * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
558 * @param dest always set at least to NULL
560 * @returns The number of bytes occupied by the string in the destination
563 size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
565 size_t src_len = strlen(src);
568 return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
571 /** -----------------------------------
572 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
574 * @param dest always set at least to NULL
576 * @returns The number of bytes occupied by the string in the destination
579 size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen)
581 size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
582 return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
585 /* --------------------------------- */
586 size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
588 size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
590 return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
593 /** ---------------------------------
594 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
596 * @param dest always set at least to NULL
598 * @returns The number of bytes occupied by the string in the destination
601 size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
603 size_t src_len = strlen(src);
605 return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
608 size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
611 ucs2_t u[MAXPATHLEN];
615 if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
620 if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
625 if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
634 size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
637 ucs2_t u[MAXPATHLEN];
641 if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
646 if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
651 if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
660 size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
662 return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
665 size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
667 return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
671 static char debugbuf[ MAXPATHLEN +1 ];
672 char * debug_out ( char * seq, size_t len)
678 p = (unsigned char*) seq;
681 for ( i = 0; i<=(len-1); i++)
683 sprintf(q, "%2.2x.", *p);
694 * Convert from MB to UCS2 charset
696 * CONV_UNESCAPEHEX: ':XX' will be converted to an UCS2 character
697 * CONV_IGNORE: return the first convertable characters.
698 * CONV_FORCE: force convertion
700 * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
701 * The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
702 * for e.g. HFS cdroms.
705 static size_t pull_charset_flags (charset_t from_set, charset_t to_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, uint16_t *flags)
707 const uint16_t option = (flags ? *flags : 0);
710 const char* inbuf = (const char*)src;
712 atalk_iconv_t descriptor;
713 atalk_iconv_t descriptor_cap;
714 char escch; /* 150210: uninitialized OK, depends on j */
716 if (srclen == (size_t)-1)
717 srclen = strlen(src) + 1;
719 descriptor = conv_handles[from_set][CH_UCS2];
720 descriptor_cap = conv_handles[cap_set][CH_UCS2];
722 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
730 if ((option & CONV_ESCAPEDOTS) && i_len >= 2 && inbuf[0] == '.') {
736 memcpy(outbuf, &ucs2, sizeof(ucs2_t));
738 memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
740 memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
745 *flags |= CONV_REQESCAPE;
749 for (j = 0; j < i_len; ++j)
750 if (inbuf[j] == ':' || inbuf[j] == '/') {
758 atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
759 if (errno == EILSEQ || errno == EINVAL) {
761 if ((option & CONV_IGNORE)) {
762 *flags |= CONV_REQMANGLE;
763 return destlen - o_len;
765 if ((option & CONV__EILSEQ)) {
770 *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */
775 /* FIXME reset stat ? */
783 /* we have a ':' or '/' */
787 if ((option & CONV_UNESCAPEHEX)) {
788 /* treat it as a CAP hex encoded char */
792 while (i_len >= 3 && inbuf[0] == ':' &&
793 isxdigit(inbuf[1]) && isxdigit(inbuf[2])) {
794 h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]);
799 const char *h_buf = h;
800 if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) {
803 if (errno == EILSEQ && (option & CONV_IGNORE)) {
804 *flags |= CONV_REQMANGLE;
805 return destlen - o_len;
810 /* We have an invalid :xx sequence */
812 if ((option & CONV_IGNORE)) {
813 *flags |= CONV_REQMANGLE;
814 return destlen - o_len;
818 } else if (option & CONV_ESCAPEHEX) {
824 memcpy(outbuf, &ucs2, sizeof(ucs2_t));
826 memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
828 memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
833 } else if (to_set == CH_UTF8_MAC || to_set == CH_MAC) {
834 /* convert to a '/' */
835 ucs2_t slash = 0x002f;
836 memcpy(outbuf, &slash, sizeof(ucs2_t));
843 ucs2_t ucs2 = 0x003a;
844 memcpy(outbuf, &ucs2, sizeof(ucs2_t));
852 if (option & CONV_ESCAPEHEX) {
858 memcpy(outbuf, &ucs2, sizeof(ucs2_t));
860 memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
862 memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
867 } else if ((from_set == CH_UTF8_MAC || from_set == CH_MAC)
868 && (to_set != CH_UTF8_MAC || to_set != CH_MAC)) {
870 ucs2_t ucs2 = 0x003a;
871 memcpy(outbuf, &ucs2, sizeof(ucs2_t));
878 ucs2_t ucs2 = 0x002f;
879 memcpy(outbuf, &ucs2, sizeof(ucs2_t));
889 return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
893 * Convert from UCS2 to MB charset
895 * CONV_ESCAPEDOTS: escape leading dots
896 * CONV_ESCAPEHEX: unconvertable characters and '/' will be escaped to :XX
897 * CONV_IGNORE: return the first convertable characters.
898 * CONV__EILSEQ: unconvertable characters will be replaced with '_'
899 * CONV_FORCE: force convertion
901 * CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
902 * This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
903 * The escape scheme is not compatible to the old cap style escape. This is bad, we need it
904 * for e.g. HFS cdroms.
908 static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, uint16_t *flags)
910 const uint16_t option = (flags ? *flags : 0);
911 size_t i_len, o_len, i;
913 const char* inbuf = (const char*)src;
914 char* outbuf = (char*)dest;
915 atalk_iconv_t descriptor;
916 atalk_iconv_t descriptor_cap;
918 descriptor = conv_handles[CH_UCS2][to_set];
919 descriptor_cap = conv_handles[CH_UCS2][cap_set];
921 if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
931 atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
932 if (errno == EILSEQ) {
933 if ((option & CONV_IGNORE)) {
934 *flags |= CONV_REQMANGLE;
935 return destlen - o_len;
937 if ((option & CONV_ESCAPEHEX)) {
938 const size_t bufsiz = o_len / 3 + 1;
939 char *buf = malloc(bufsiz);
945 for (buflen = 1; buflen <= bufsiz; ++buflen) {
948 if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) {
951 } else if (errno != E2BIG) {
954 } else if (o < buflen) {
959 if (o_len < buflen * 3) {
966 for (i = 0; i < buflen; ++i) {
968 *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f];
969 *outbuf++ = hexdig[buf[i] & 0x0f];
972 *flags |= CONV_REQESCAPE;
978 } /* while (i_len >= 2) */
980 if (i_len > 0) errno = EINVAL;
982 return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
986 * FIXME the size is a mess we really need a malloc/free logic
987 *`dest size must be dest_len +2
989 size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, const char *src, size_t src_len, char *dest, size_t dest_len, uint16_t *flags)
993 ucs2_t buffer[MAXPATHLEN +2];
994 ucs2_t buffer2[MAXPATHLEN +2];
996 lazy_initialize_conv();
998 /* convert from_set to UCS2 */
999 if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, to_set, cap_charset, src, src_len,
1000 (char *) buffer, sizeof(buffer) -2, flags)) ) {
1001 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
1008 /* Do pre/decomposition */
1009 i_len = sizeof(buffer2) -2;
1011 if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && (charsets[to_set]->flags & CHARSET_DECOMPOSED)) ) {
1012 if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
1013 return (size_t)(-1);
1015 else if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) || !charsets[from_set] || (charsets[from_set]->flags & CHARSET_DECOMPOSED)) {
1016 if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
1017 return (size_t)(-1);
1023 /* null terminate */
1027 /* Do case conversions */
1028 if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
1031 else if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
1035 /* Convert UCS2 to to_set */
1036 if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
1037 LOG(log_error, logtype_default,
1038 "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
1041 /* null terminate */