libatalk/unicode/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif /* HAVE_CONFIG_H */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <unistd.h>
  31 #include <string.h>
  32 #include <ctype.h>
  33 #include <errno.h>
  34 #include <sys/stat.h>
  35 #include <sys/param.h>
  36 #ifdef HAVE_USABLE_ICONV
  37 #include <iconv.h>
  38 #endif
  39 #if HAVE_LOCALE_H
  40 #include <locale.h>
  41 #endif
  42 #if HAVE_LANGINFO_H
  43 #include <langinfo.h>
  44 #endif
  45
  46 #include <netatalk/endian.h>
  47 #include <atalk/logger.h>
  48 #include <atalk/unicode.h>
  49 #include "byteorder.h"
  50
  51
  52 /**
  53  * @file
  54  *
  55  * @brief Character-set conversion routines built on our iconv.
  56  *
  57  * @note Samba's internal character set (at least in the 3.0 series)
  58  * is always the same as the one for the Unix filesystem.  It is
  59  * <b>not</b> necessarily UTF-8 and may be different on machines that
  60  * need i18n filenames to be compatible with Unix software.  It does
  61  * have to be a superset of ASCII.  All multibyte sequences must start
  62  * with a byte with the high bit set.
  63  *
  64  * @sa lib/iconv.c
  65  */
  66
  67
  68 #define MAX_CHARSETS 10
  69
  70 #define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
  71
  72 static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
  73 static char* charset_names[MAX_CHARSETS];
  74 static struct charset_functions* charsets[MAX_CHARSETS];
  75 static char hexdig[] = "0123456789abcdef";
  76 #define hextoint( c )   ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
  77
  78 /**
  79  * Return the name of a charset to give to iconv().
  80  **/
  81 static const char *charset_name(charset_t ch)
  82 {
  83         const char *ret = NULL;
  84
  85         if (ch == CH_UCS2) ret = "UCS-2";
  86         else if (ch == CH_UNIX) ret = "LOCALE"; /*lp_unix_charset();*/
  87         else if (ch == CH_MAC) ret = "MAC_ROMAN"; /*lp_display_charset();*/
  88         else if (ch == CH_UTF8) ret = "UTF8";
  89         else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
  90
  91         if (!ret)
  92                 ret = charset_names[ch];
  93
  94 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  95         if (ret && strcasecmp(ret, "LOCALE") == 0) {
  96                 const char *ln = NULL;
  97
  98 #ifdef HAVE_SETLOCALE
  99                 setlocale(LC_ALL, "");
 100 #endif
 101                 ln = nl_langinfo(CODESET);
 102                 if (ln) {
 103                         /* Check whether the charset name is supported
 104                            by iconv */
 105                         atalk_iconv_t handle = atalk_iconv_open(ln,"UCS-2");
 106                         if (handle == (atalk_iconv_t) -1) {
 107                                 LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln);
 108                                 ln = NULL;
 109                         } else {
 110                                 atalk_iconv_close(handle);
 111                         }
 112                 }
 113                 ret = ln;
 114         }
 115 #else /* system doesn't have LOCALE support */
 116 if (ch == CH_UNIX) ret = NULL;
 117 #endif
 118
 119         if (!ret || !*ret) ret = "ASCII";
 120         return ret;
 121 }
 122
 123 struct charset_functions* get_charset_functions (charset_t ch)
 124 {
 125         if (charsets[ch] != NULL)
 126                 return charsets[ch];
 127
 128         charsets[ch] = find_charset_functions(charset_name(ch));
 129
 130         return charsets[ch];
 131 }
 132
 133
 134 void lazy_initialize_conv(void)
 135 {
 136         static int initialized = 0;
 137
 138         if (!initialized) {
 139                 initialized = 1;
 140                 init_iconv();
 141         }
 142 }
 143
 144 charset_t add_charset(char* name)
 145 {
 146         static charset_t max_charset_t = NUM_CHARSETS-1;
 147         charset_t cur_charset_t = max_charset_t+1;
 148         unsigned int c1;
 149
 150         lazy_initialize_conv();
 151
 152         for (c1=0; c1<=max_charset_t;c1++) {
 153                 if ( strcasecmp(name, charset_name(c1)) == 0)
 154                         return (c1);
 155         }
 156
 157         if ( cur_charset_t >= MAX_CHARSETS )  {
 158                 LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
 159                         name, MAX_CHARSETS);
 160                 return (charset_t) -1;
 161         }
 162
 163         /* First try to setup the required conversions */
 164
 165         conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 166         if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
 167                 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 168                         name,  charset_name(CH_UCS2));
 169                 conv_handles[cur_charset_t][CH_UCS2] = NULL;
 170                 return (charset_t) -1;
 171         }
 172
 173         conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
 174         if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
 175                 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 176                         charset_name(CH_UCS2), name);
 177                 conv_handles[CH_UCS2][cur_charset_t] = NULL;
 178                 return (charset_t) -1;
 179         }
 180
 181         /* register the new charset_t name */
 182         charset_names[cur_charset_t] = strdup(name);
 183
 184         charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
 185         max_charset_t++;
 186
 187 #ifdef DEBUG
 188         LOG(log_debug, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
 189 #endif /* DEBUG */
 190         return (cur_charset_t);
 191 }
 192
 193 /**
 194  * Initialize iconv conversion descriptors.
 195  *
 196  * This is called the first time it is needed, and also called again
 197  * every time the configuration is reloaded, because the charset or
 198  * codepage might have changed.
 199  **/
 200 void init_iconv(void)
 201 {
 202         int c1;
 203
 204         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 205            first */
 206 #if 0
 207         if (!conv_handles[CH_UNIX][CH_UCS2])
 208                 conv_handles[CH_UNIX][CH_UCS2] = atalk_iconv_open("UCS-2", "ASCII");
 209
 210         if (!conv_handles[CH_UCS2][CH_UNIX])
 211                 conv_handles[CH_UCS2][CH_UNIX] = atalk_iconv_open("ASCII", "UCS-2");
 212 #endif
 213
 214         for (c1=0;c1<NUM_CHARSETS;c1++) {
 215                 const char *name = charset_name((charset_t)c1);
 216
 217                 conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 218                 if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
 219                         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 220                                 name,  charset_name(CH_UCS2));
 221                         conv_handles[c1][CH_UCS2] = NULL;
 222                 }
 223
 224                 conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
 225                 if (conv_handles[CH_UCS2][1] == (atalk_iconv_t)-1) {
 226                         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 227                                 charset_name(CH_UCS2), name);
 228                         conv_handles[c1][c1] = NULL;
 229                 }
 230
 231                 charsets[c1] = get_charset_functions (c1);
 232         }
 233 }
 234
 235 /**
 236  * Convert string from one encoding to another, making error checking etc
 237  *
 238  * @param src pointer to source string (multibyte or singlebyte)
 239  * @param srclen length of the source string in bytes
 240  * @param dest pointer to destination string (multibyte or singlebyte)
 241  * @param destlen maximal length allowed for string
 242  * @returns the number of bytes occupied in the destination
 243  **/
 244 static size_t convert_string_internal(charset_t from, charset_t to,
 245                       void const *src, size_t srclen,
 246                       void *dest, size_t destlen)
 247 {
 248         size_t i_len, o_len;
 249         size_t retval;
 250         const char* inbuf = (const char*)src;
 251         char* outbuf = (char*)dest;
 252         char* o_save = outbuf;
 253         atalk_iconv_t descriptor;
 254
 255         if (srclen == (size_t)-1)
 256                 srclen = strlen(src)+1;
 257
 258         lazy_initialize_conv();
 259
 260         descriptor = conv_handles[from][to];
 261
 262         if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 263                 return (size_t) -1;
 264         }
 265
 266         i_len=srclen;
 267         o_len=destlen;
 268         retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 269         if(retval==(size_t)-1) {
 270                 const char *reason="unknown error";
 271                 switch(errno) {
 272                         case EINVAL:
 273                                 reason="Incomplete multibyte sequence";
 274                                 break;
 275                         case E2BIG:
 276                                 reason="No more room";
 277                                break;
 278                         case EILSEQ:
 279                                reason="Illegal multibyte sequence";
 280                                break;
 281                 }
 282                 LOG(log_debug, logtype_default,"Conversion error: %s",reason);
 283                 return (size_t)-1;
 284         }
 285
 286         /* Terminate the string */
 287         if (to == CH_UCS2 && destlen-o_len >= 2) {
 288                 o_save[destlen-o_len]   = 0;
 289                 o_save[destlen-o_len+1] = 0;
 290         }
 291         else if ( to != CH_UCS2 && destlen-o_len > 0 )
 292                 o_save[destlen-o_len] = 0;
 293         else {
 294                 /* FIXME: what should we do here, string *might* be unterminated. E2BIG? */
 295         }
 296
 297         return destlen-o_len;
 298 }
 299
 300
 301 size_t convert_string(charset_t from, charset_t to,
 302                       void const *src, size_t srclen,
 303                       void *dest, size_t destlen)
 304 {
 305         size_t i_len, o_len;
 306         ucs2_t *u;
 307         ucs2_t buffer[MAXPATHLEN];
 308         ucs2_t buffer2[MAXPATHLEN];
 309         int composition = 0;
 310
 311         lazy_initialize_conv();
 312
 313         /* convert from_set to UCS2 */
 314         if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
 315                                                                (char*) buffer, sizeof(buffer))) ) {
 316                 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 317                 return (size_t) -1;
 318         }
 319
 320         /* Do pre/decomposition */
 321         if ( ((!(charsets[to])   || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
 322                 (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
 323             composition = 1;
 324         if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
 325             composition = 2;
 326
 327         i_len = sizeof(buffer2);
 328         u = buffer2;
 329
 330         switch (composition) {
 331         case 0:
 332             u = buffer;
 333             i_len = o_len;
 334             break;
 335         case 1:
 336             if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
 337                 return (size_t)(-1);
 338             break;
 339         case 2:
 340             if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
 341                 return (size_t)(-1);
 342             break;
 343         }
 344
 345         /* Convert UCS2 to to_set */
 346         if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
 347                 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 348                 return (size_t) -1;
 349         }
 350
 351         return o_len;
 352 }
 353
 354
 355
 356 /**
 357  * Convert between character sets, allocating a new buffer for the result.
 358  *
 359  * @param srclen length of source buffer.
 360  * @param dest always set at least to NULL
 361  * @note -1 is not accepted for srclen.
 362  *
 363  * @returns Size in bytes of the converted string; or -1 in case of error.
 364  **/
 365
 366 static size_t convert_string_allocate_internal(charset_t from, charset_t to,
 367                                void const *src, size_t srclen, char **dest)
 368 {
 369         size_t i_len, o_len, destlen;
 370         size_t retval;
 371         const char *inbuf = (const char *)src;
 372         char *outbuf = NULL, *ob = NULL;
 373         atalk_iconv_t descriptor;
 374
 375         *dest = NULL;
 376
 377         if (src == NULL || srclen == (size_t)-1)
 378                 return (size_t)-1;
 379
 380         lazy_initialize_conv();
 381
 382         descriptor = conv_handles[from][to];
 383
 384         if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 385                 /* conversion not supported, return -1*/
 386                 LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!\n");
 387                 return -1;
 388         }
 389
 390         destlen = MAX(srclen, 512);
 391 convert:
 392         destlen = destlen * 2;
 393         ob = (char *)realloc(ob, destlen);
 394         if (!ob) {
 395                 LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!\n");
 396                 SAFE_FREE(outbuf);
 397                 return (size_t)-1;
 398         } else {
 399                 outbuf = ob;
 400         }
 401         inbuf = src;   /* this restarts the whole conversion if buffer needed to be increased */
 402         i_len = srclen;
 403         o_len = destlen;
 404         retval = atalk_iconv(descriptor,
 405                            &inbuf, &i_len,
 406                            &outbuf, &o_len);
 407         if(retval == (size_t)-1)                {
 408                 const char *reason="unknown error";
 409                 switch(errno) {
 410                         case EINVAL:
 411                                 reason="Incomplete multibyte sequence";
 412                                 break;
 413                         case E2BIG:
 414                                 goto convert;
 415                         case EILSEQ:
 416                                 reason="Illegal multibyte sequence";
 417                                 break;
 418                 }
 419                 LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
 420                 return (size_t)-1;
 421         }
 422
 423
 424         destlen = destlen - o_len;
 425
 426         /* Terminate the string */
 427         if (to == CH_UCS2 && o_len >= 2) {
 428                 ob[destlen] = 0;
 429                 ob[destlen+1] = 0;
 430                 *dest = (char *)realloc(ob,destlen+2);
 431         }
 432         else if ( to != CH_UCS2 && o_len > 0 ) {
 433                 ob[destlen] = 0;
 434                 *dest = (char *)realloc(ob,destlen+1);
 435         }
 436         else {
 437                 goto convert; /* realloc */
 438         }
 439
 440         if (destlen && !*dest) {
 441                 LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!\n");
 442                 SAFE_FREE(ob);
 443                 return (size_t)-1;
 444         }
 445
 446         return destlen;
 447 }
 448
 449
 450 size_t convert_string_allocate(charset_t from, charset_t to,
 451                       void const *src, size_t srclen,
 452                       char ** dest)
 453 {
 454         size_t i_len, o_len;
 455         ucs2_t *u;
 456         ucs2_t buffer[MAXPATHLEN];
 457         ucs2_t buffer2[MAXPATHLEN];
 458         int composition = 0;
 459
 460         lazy_initialize_conv();
 461
 462         *dest = NULL;
 463
 464         /* convert from_set to UCS2 */
 465         if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
 466                                                                buffer, sizeof(buffer))) ) {
 467                 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 468                 return (size_t) -1;
 469         }
 470
 471         /* Do pre/decomposition */
 472         if ( ((!(charsets[to])   || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
 473                 (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
 474             composition = 1;
 475         if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
 476             composition = 2;
 477
 478         i_len = sizeof(buffer2);
 479         u = buffer2;
 480
 481         switch (composition) {
 482         case 0:
 483             u = buffer;
 484             i_len = o_len;
 485             break;
 486         case 1:
 487             if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
 488                 return (size_t)(-1);
 489             break;
 490         case 2:
 491             if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
 492                 return (size_t)(-1);
 493             break;
 494         }
 495
 496         /* Convert UCS2 to to_set */
 497         if ((size_t)(-1) == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
 498                 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 499
 500         return o_len;
 501
 502 }
 503
 504 size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 505 {
 506         size_t size;
 507         char *buffer;
 508
 509         size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 510                                        (char**) &buffer);
 511         if (size == (size_t)-1) {
 512                 SAFE_FREE(buffer);
 513                 return size;
 514         }
 515         if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
 516                 free(buffer);
 517                 return srclen;
 518         }
 519
 520         size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 521         free(buffer);
 522         return size;
 523 }
 524
 525 size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 526 {
 527         size_t size;
 528         char *buffer;
 529
 530         size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 531                                        (char **) &buffer);
 532         if (size == (size_t)-1) {
 533                 SAFE_FREE(buffer);
 534                 return size;
 535         }
 536         if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
 537                 free(buffer);
 538                 return srclen;
 539         }
 540
 541         size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 542         free(buffer);
 543         return size;
 544 }
 545
 546
 547 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 548 {
 549         return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
 550 }
 551
 552 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 553 {
 554         return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
 555 }
 556
 557 size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 558 {
 559         return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
 560 }
 561
 562 size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 563 {
 564         return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
 565 }
 566
 567 /**
 568  * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
 569  *
 570  * @param dest always set at least to NULL
 571  *
 572  * @returns The number of bytes occupied by the string in the destination
 573  *         or -1 in case of error.
 574  **/
 575
 576 size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
 577 {
 578         size_t src_len = strlen(src);
 579
 580         *dest = NULL;
 581         return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
 582 }
 583
 584 /**
 585  * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
 586  *
 587  * @param dest always set at least to NULL
 588  *
 589  * @returns The number of bytes occupied by the string in the destination
 590  **/
 591
 592 size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
 593 {
 594         size_t src_len = strlen(src);
 595
 596         *dest = NULL;
 597         return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
 598 }
 599
 600 /**
 601  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
 602  *
 603  * @param dest always set at least to NULL
 604  *
 605  * @returns The number of bytes occupied by the string in the destination
 606  **/
 607
 608 size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen)
 609 {
 610         size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
 611         return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
 612 }
 613
 614
 615 size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
 616 {
 617         size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
 618         *dest = NULL;
 619         return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
 620 }
 621
 622 /**
 623  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
 624  *
 625  * @param dest always set at least to NULL
 626  *
 627  * @returns The number of bytes occupied by the string in the destination
 628  **/
 629
 630 size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
 631 {
 632         size_t src_len = strlen(src);
 633         *dest = NULL;
 634         return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
 635 }
 636
 637 size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 638 {
 639         char *buffer;
 640         ucs2_t u[MAXPATHLEN];
 641         size_t len;
 642         size_t ilen;
 643
 644         if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 645             return len;
 646
 647         ilen=sizeof(u);
 648
 649         if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
 650             free (buffer);
 651             return (size_t)(-1);
 652         }
 653
 654         if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
 655             free (buffer);
 656             return (size_t)(-1);
 657         }
 658
 659         free(buffer);
 660         dst[len] = 0;
 661         return (len);
 662 }
 663
 664 size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 665 {
 666         char *buffer;
 667         ucs2_t u[MAXPATHLEN];
 668         size_t len;
 669         size_t ilen;
 670
 671         if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 672             return len;
 673
 674         ilen=sizeof(u);
 675
 676         if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
 677             free (buffer);
 678             return (size_t)(-1);
 679         }
 680
 681         if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
 682             free (buffer);
 683             return (size_t)(-1);
 684         }
 685
 686         free(buffer);
 687         dst[len] = 0;
 688         return (len);
 689 }
 690
 691 size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
 692 {
 693         return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
 694 }
 695
 696 size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
 697 {
 698         return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
 699 }
 700
 701 static char  debugbuf[ MAXPATHLEN +1 ];
 702 char * debug_out ( char * seq, size_t len)
 703 {
 704         size_t i = 0;
 705         unsigned char *p;
 706         char *q;
 707
 708         p = (unsigned char*) seq;
 709         q = debugbuf;
 710
 711         for ( i = 0; i<=(len-1); i++)
 712         {
 713                 sprintf(q, "%2.2x.", *p);
 714                 q += 3;
 715                 p++;
 716         }
 717         *q=0;
 718         q = debugbuf;
 719         return q;
 720 }
 721
 722 /*
 723  * Convert from MB to UCS2 charset
 724  * Flags:
 725  *              CONV_UNESCAPEHEX:        ':XX' will be converted to an UCS2 character
 726  *              CONV_IGNORE:             return the first convertable characters.
 727  * FIXME:
 728  *              This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 729  *              The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
 730  *              for e.g. HFS cdroms.
 731  */
 732
 733 static size_t pull_charset_flags (charset_t from_set, charset_t cap_charset, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
 734 {
 735         size_t i_len, o_len, hlen;
 736         size_t retval, j = 0;
 737         const char* inbuf = (const char*)src;
 738         char* outbuf = (char*)dest;
 739         atalk_iconv_t descriptor;
 740         atalk_iconv_t descriptor_cap;
 741         char *o_save, *s;
 742         char h[MAXPATHLEN];
 743         const char *h_buf;
 744
 745         if (srclen == (size_t)-1)
 746                 srclen = strlen(src)+1;
 747
 748         lazy_initialize_conv();
 749
 750         descriptor = conv_handles[from_set][CH_UCS2];
 751         descriptor_cap = conv_handles[cap_charset][CH_UCS2];
 752
 753         if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 754                 return (size_t) -1;
 755         }
 756
 757         i_len=srclen;
 758         o_len=destlen;
 759         o_save=outbuf;
 760
 761 conversion_loop:
 762         if ( flags && (*flags & CONV_UNESCAPEHEX)) {
 763                 if ( NULL != (s = strchr ( inbuf, ':'))) {
 764                         j = i_len - (s - inbuf);
 765                         if ( 0 == (i_len = (s - inbuf)))
 766                                 goto unhex_char;
 767         }
 768         }
 769
 770         retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 771         if(retval==(size_t)-1) {
 772             if (errno == EILSEQ && flags && (*flags & CONV_IGNORE)) {
 773                                 *flags |= CONV_REQMANGLE;
 774                                 return destlen-o_len;
 775             }
 776             else
 777                 return (size_t) -1;
 778     }
 779
 780 unhex_char:
 781         if (j && flags && (*flags & CONV_UNESCAPEHEX )) {
 782                 /* we're at the start on an hex encoded ucs2 char */
 783                 if (o_len < 2) {
 784                         errno = E2BIG;
 785                         return (size_t) -1;
 786                 }
 787                 if ( j >= 3 &&
 788                         isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
 789                         hlen = 0;
 790                         while ( *inbuf == ':' && j >=3 &&
 791                                 isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
 792                                 inbuf++;
 793                                 h[hlen]   = hextoint( *inbuf ) << 4;
 794                                 inbuf++;
 795                                 h[hlen++] |= hextoint( *inbuf );
 796                                 inbuf++;
 797                                 j -= 3;
 798                         }
 799                         h_buf = (const char*) h;
 800                         if ((size_t) -1 == (retval = atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len)) ) {
 801                                 if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
 802                                         *flags |= CONV_REQMANGLE;
 803                                         return destlen-o_len;
 804                                 }
 805                                 else {
 806                                         return retval;
 807                                 }
 808                         }
 809                 }
 810                 else {
 811                         /* We have an invalid :xx sequence */
 812                         if (CHECK_FLAGS(flags, CONV_IGNORE)) {
 813                                 *flags |= CONV_REQMANGLE;
 814                                 return destlen-o_len;
 815                         }
 816                         else {
 817                                 errno=EILSEQ;
 818                                 return (size_t) -1;
 819                         }
 820                 }
 821                 i_len = j;
 822                 j = 0;
 823                 if (i_len > 0)
 824                         goto conversion_loop;
 825         }
 826
 827
 828
 829         return destlen-o_len;
 830 }
 831
 832 /*
 833  * Convert from UCS2 to MB charset
 834  * Flags:
 835  *              CONV_ESCAPEDOTS: escape leading dots
 836  *              CONV_ESCAPEHEX:  unconvertable characters and '/' will be escaped to :XX
 837  *              CONV_IGNORE:     unconvertable characters will be replaced with '_'
 838  * FIXME:
 839  *              CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
 840  *              This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 841  *              The escape scheme is not compatible to the old cap style escape. This is bad, we need it
 842  *              for e.g. HFS cdroms.
 843  */
 844
 845
 846 static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
 847 {
 848     size_t i_len, o_len, i;
 849     size_t retval, j = 0;
 850     const char* inbuf = (const char*)src;
 851     char* outbuf = (char*)dest;
 852     atalk_iconv_t descriptor;
 853     char *o_save;
 854     char *buf, *buf_save;
 855     size_t buflen;
 856
 857     lazy_initialize_conv();
 858
 859     descriptor = conv_handles[CH_UCS2][to_set];
 860
 861     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 862         return (size_t) -1;
 863     }
 864
 865     i_len=srclen;
 866     o_len=destlen;
 867     o_save=outbuf;
 868
 869     if ( SVAL(inbuf,0) == 0x002e && flags && (*flags & CONV_ESCAPEDOTS)) { /* 0x002e = . */
 870         if (o_len < 3) {
 871             errno = E2BIG;
 872             return (size_t) -1;
 873         }
 874         o_save[0] = ':';
 875         o_save[1] = '2';
 876         o_save[2] = 'e';
 877         o_len -= 3;
 878         inbuf += 2;
 879         i_len -= 2;
 880         outbuf = o_save + 3;
 881         if (flags) *flags |= CONV_REQESCAPE;
 882     }
 883
 884 conversion_loop:
 885     if ( flags && (*flags & CONV_ESCAPEHEX)) {
 886         for ( i = 0; i < i_len; i+=2) {
 887             if ( SVAL((inbuf+i),0) == 0x002f) { /* 0x002f = / */
 888                 j = i_len - i;
 889                 if ( 0 == ( i_len = i))
 890                     goto escape_slash;
 891                 break;
 892             } else if ( SVAL(inbuf+i,0) == 0x003a) { /* 0x003a = : */
 893                 errno = EILSEQ;
 894                 return (size_t) -1;
 895             }
 896         }
 897     }
 898
 899     retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 900     if (retval==(size_t)-1) {
 901         if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
 902             *flags |= CONV_REQMANGLE;
 903             return destlen -o_len;
 904         }
 905         else if ( errno == EILSEQ && flags && (*flags & CONV_ESCAPEHEX)) {
 906             if (o_len < 3) {
 907                 errno = E2BIG;
 908                 return (size_t) -1;
 909             }
 910             if ((size_t) -1 == (buflen = convert_string_allocate_internal(CH_UCS2, cap_set, inbuf, 2, &buf)) )
 911                 return buflen;
 912             buf_save = buf;
 913             while (buflen > 0) {
 914                 if ( o_len < 3) {
 915                         errno = E2BIG;
 916                         return (size_t) -1;
 917                 }
 918                 *outbuf++ = ':';
 919                 *outbuf++ = hexdig[ ( *buf & 0xf0 ) >> 4 ];
 920                 *outbuf++ = hexdig[ *buf & 0x0f ];
 921                 buf++;
 922                 buflen--;
 923                 o_len -= 3;
 924             }
 925             SAFE_FREE(buf_save);
 926             buflen = 0;
 927             i_len -= 2;
 928             inbuf += 2;
 929             if (flags) *flags |= CONV_REQESCAPE;
 930             if ( i_len > 0)
 931                 goto conversion_loop;
 932         }
 933         else
 934            return (size_t)(-1);
 935     }
 936
 937 escape_slash:
 938     if (j && flags && (*flags & CONV_ESCAPEHEX)) {
 939         if (o_len < 3) {
 940             errno = E2BIG;
 941             return (size_t) -1;
 942         }
 943         o_save[destlen -o_len]   = ':';
 944         o_save[destlen -o_len+1] = '2';
 945         o_save[destlen -o_len+2] = 'f';
 946         inbuf  += 2;
 947         i_len   = j-2;
 948         o_len  -= 3;
 949         outbuf += 3;
 950         j = 0;
 951         if ( i_len > 0)
 952                 goto conversion_loop;
 953     }
 954     return destlen -o_len;
 955 }
 956
 957 size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, char* src, size_t src_len, char* dest, size_t dest_len, u_int16_t *flags)
 958 {
 959         size_t i_len, o_len;
 960         ucs2_t *u;
 961         ucs2_t buffer[MAXPATHLEN];
 962         ucs2_t buffer2[MAXPATHLEN];
 963         int composition = 0;
 964
 965         lazy_initialize_conv();
 966
 967         /* convert from_set to UCS2 */
 968         if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len,
 969                                                           (char *) buffer, sizeof(buffer), flags)) ) {
 970                 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
 971                 return (size_t) -1;
 972         }
 973
 974         if ( o_len == 0)
 975                 return o_len;
 976
 977         /* Do pre/decomposition */
 978         if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) ||
 979                 ((!(charsets[to_set])   || !(charsets[to_set]->flags & CHARSET_DECOMPOSED)) &&
 980                 (!(charsets[from_set]) || (charsets[from_set]->flags & CHARSET_DECOMPOSED))))
 981             composition = 1;
 982         if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && charsets[to_set]->flags & CHARSET_DECOMPOSED) )
 983             composition = 2;
 984
 985         i_len = sizeof(buffer2);
 986         u = buffer2;
 987
 988         switch (composition) {
 989         case 0:
 990             u = buffer;
 991             i_len = o_len;
 992             break;
 993         case 1:
 994             if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
 995                 return (size_t)(-1);
 996             break;
 997         case 2:
 998             if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
 999                 return (size_t)(-1);
1000             break;
1001         }
1002
1003         /* Do case conversions */
1004         if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
1005             if (!strupper_w(u))
1006                 return (size_t)(-1);
1007         }
1008         if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
1009             if (!strlower_w(u))
1010                 return (size_t)(-1);
1011         }
1012
1013         /* Convert UCS2 to to_set */
1014         if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
1015                 LOG(log_error, logtype_default,
1016                        "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
1017                 return (size_t) -1;
1018         }
1019
1020         return o_len;
1021 }