libatalk/unicode/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif /* HAVE_CONFIG_H */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <unistd.h>
  31 #include <string.h>
  32 #include <sys/param.h>
  33 #include <ctype.h>
  34 #include <sys/stat.h>
  35 #include <atalk/logger.h>
  36 #include <errno.h>
  37
  38 #include <netatalk/endian.h>
  39 #include <atalk/unicode.h>
  40
  41 #ifdef HAVE_USABLE_ICONV
  42 #include <iconv.h>
  43 #endif
  44
  45 #if HAVE_LOCALE_H
  46 #include <locale.h>
  47 #endif
  48
  49 #if HAVE_LANGINFO_H
  50 #include <langinfo.h>
  51 #endif
  52 #include "byteorder.h"
  53
  54
  55 /**
  56  * @file
  57  *
  58  * @brief Character-set conversion routines built on our iconv.
  59  *
  60  * @note Samba's internal character set (at least in the 3.0 series)
  61  * is always the same as the one for the Unix filesystem.  It is
  62  * <b>not</b> necessarily UTF-8 and may be different on machines that
  63  * need i18n filenames to be compatible with Unix software.  It does
  64  * have to be a superset of ASCII.  All multibyte sequences must start
  65  * with a byte with the high bit set.
  66  *
  67  * @sa lib/iconv.c
  68  */
  69
  70
  71 #define MAX_CHARSETS 10
  72
  73 #define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
  74
  75 static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
  76 static char* charset_names[MAX_CHARSETS];
  77 static struct charset_functions* charsets[MAX_CHARSETS];
  78 static char hexdig[] = "0123456789abcdef";
  79 #define hextoint( c )   ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
  80
  81 /**
  82  * Return the name of a charset to give to iconv().
  83  **/
  84 static const char *charset_name(charset_t ch)
  85 {
  86         const char *ret = NULL;
  87
  88         if (ch == CH_UCS2) ret = "UCS-2";
  89         else if (ch == CH_UNIX) ret = "LOCALE"; /*lp_unix_charset();*/
  90         else if (ch == CH_MAC) ret = "MAC_ROMAN"; /*lp_display_charset();*/
  91         else if (ch == CH_UTF8) ret = "UTF8";
  92         else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
  93
  94         if (!ret)
  95                 ret = charset_names[ch];
  96
  97 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  98         if (ret && strcasecmp(ret, "LOCALE") == 0) {
  99                 const char *ln = NULL;
 100
 101 #ifdef HAVE_SETLOCALE
 102                 setlocale(LC_ALL, "");
 103 #endif
 104                 ln = nl_langinfo(CODESET);
 105                 if (ln) {
 106                         /* Check whether the charset name is supported
 107                            by iconv */
 108                         atalk_iconv_t handle = atalk_iconv_open(ln,"UCS-2");
 109                         if (handle == (atalk_iconv_t) -1) {
 110                                 LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln);
 111                                 ln = NULL;
 112                         } else {
 113                                 atalk_iconv_close(handle);
 114                         }
 115                 }
 116                 ret = ln;
 117         }
 118 #else /* system doesn't have LOCALE support */
 119 if (ch == CH_UNIX) ret = NULL;
 120 #endif
 121
 122         if (!ret || !*ret) ret = "ASCII";
 123         return ret;
 124 }
 125
 126 struct charset_functions* get_charset_functions (charset_t ch)
 127 {
 128         if (charsets[ch] != NULL)
 129                 return charsets[ch];
 130
 131         charsets[ch] = find_charset_functions(charset_name(ch));
 132
 133         return charsets[ch];
 134 }
 135
 136
 137 void lazy_initialize_conv(void)
 138 {
 139         static int initialized = 0;
 140
 141         if (!initialized) {
 142                 initialized = 1;
 143                 init_iconv();
 144         }
 145 }
 146
 147 charset_t add_charset(char* name)
 148 {
 149         static charset_t max_charset_t = NUM_CHARSETS-1;
 150         charset_t cur_charset_t = max_charset_t+1;
 151         unsigned int c1;
 152
 153         lazy_initialize_conv();
 154
 155         for (c1=0; c1<=max_charset_t;c1++) {
 156                 if ( strcasecmp(name, charset_name(c1)) == 0)
 157                         return (c1);
 158         }
 159
 160         if ( cur_charset_t >= MAX_CHARSETS )  {
 161                 LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
 162                         name, MAX_CHARSETS);
 163                 return (charset_t) -1;
 164         }
 165
 166         /* First try to setup the required conversions */
 167
 168         conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 169         if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
 170                 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 171                         name,  charset_name(CH_UCS2));
 172                 conv_handles[cur_charset_t][CH_UCS2] = NULL;
 173                 return (charset_t) -1;
 174         }
 175
 176         conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
 177         if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
 178                 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 179                         charset_name(CH_UCS2), name);
 180                 conv_handles[CH_UCS2][cur_charset_t] = NULL;
 181                 return (charset_t) -1;
 182         }
 183
 184         /* register the new charset_t name */
 185         charset_names[cur_charset_t] = strdup(name);
 186
 187         charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
 188         max_charset_t++;
 189
 190 #ifdef DEBUG
 191         LOG(log_debug, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
 192 #endif /* DEBUG */
 193         return (cur_charset_t);
 194 }
 195
 196 /**
 197  * Initialize iconv conversion descriptors.
 198  *
 199  * This is called the first time it is needed, and also called again
 200  * every time the configuration is reloaded, because the charset or
 201  * codepage might have changed.
 202  **/
 203 void init_iconv(void)
 204 {
 205         int c1;
 206
 207         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 208            first */
 209         if (!conv_handles[CH_UNIX][CH_UCS2])
 210                 conv_handles[CH_UNIX][CH_UCS2] = atalk_iconv_open("UCS-2", "ASCII");
 211
 212         if (!conv_handles[CH_UCS2][CH_UNIX])
 213                 conv_handles[CH_UCS2][CH_UNIX] = atalk_iconv_open("ASCII", "UCS-2");
 214
 215         for (c1=0;c1<NUM_CHARSETS;c1++) {
 216                 const char *name = charset_name((charset_t)c1);
 217
 218                 conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 219                 if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
 220                         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 221                                 name,  charset_name(CH_UCS2));
 222                         conv_handles[c1][CH_UCS2] = NULL;
 223                 }
 224
 225                 conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
 226                 if (conv_handles[CH_UCS2][1] == (atalk_iconv_t)-1) {
 227                         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 228                                 charset_name(CH_UCS2), name);
 229                         conv_handles[c1][c1] = NULL;
 230                 }
 231
 232                 charsets[c1] = get_charset_functions (c1);
 233         }
 234 }
 235
 236 /**
 237  * Convert string from one encoding to another, making error checking etc
 238  *
 239  * @param src pointer to source string (multibyte or singlebyte)
 240  * @param srclen length of the source string in bytes
 241  * @param dest pointer to destination string (multibyte or singlebyte)
 242  * @param destlen maximal length allowed for string
 243  * @returns the number of bytes occupied in the destination
 244  **/
 245 static size_t convert_string_internal(charset_t from, charset_t to,
 246                       void const *src, size_t srclen,
 247                       void *dest, size_t destlen)
 248 {
 249         size_t i_len, o_len;
 250         size_t retval;
 251         const char* inbuf = (const char*)src;
 252         char* outbuf = (char*)dest;
 253         char* o_save = outbuf;
 254         atalk_iconv_t descriptor;
 255
 256         if (srclen == (size_t)-1)
 257                 srclen = strlen(src)+1;
 258
 259         lazy_initialize_conv();
 260
 261         descriptor = conv_handles[from][to];
 262
 263         if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 264                 return (size_t) -1;
 265         }
 266
 267         i_len=srclen;
 268         o_len=destlen;
 269         retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 270         if(retval==(size_t)-1) {
 271                 const char *reason="unknown error";
 272                 switch(errno) {
 273                         case EINVAL:
 274                                 reason="Incomplete multibyte sequence";
 275                                 break;
 276                         case E2BIG:
 277                                 reason="No more room";
 278                                break;
 279                         case EILSEQ:
 280                                reason="Illegal multibyte sequence";
 281                                break;
 282                 }
 283                 LOG(log_debug, logtype_default,"Conversion error: %s(%s)\n",reason,inbuf);
 284                 return (size_t)-1;
 285         }
 286
 287         /* Terminate the string */
 288         if (to == CH_UCS2 && destlen-o_len >= 2) {
 289                 o_save[destlen-o_len]   = 0;
 290                 o_save[destlen-o_len+1] = 0;
 291         }
 292         else if ( destlen-o_len > 0)
 293                 o_save[destlen-o_len] = 0;
 294
 295         return destlen-o_len;
 296 }
 297
 298
 299 size_t convert_string(charset_t from, charset_t to,
 300                       void const *src, size_t srclen,
 301                       void *dest, size_t destlen)
 302 {
 303         size_t i_len, o_len;
 304         char *u;
 305         char buffer[MAXPATHLEN];
 306         char buffer2[MAXPATHLEN];
 307         int composition = 0;
 308
 309         lazy_initialize_conv();
 310
 311         /* convert from_set to UCS2 */
 312         if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, buffer, MAXPATHLEN)) ) {
 313                 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 314                 return (size_t) -1;
 315         }
 316
 317         /* Do pre/decomposition */
 318         if ( ((!(charsets[to])   || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
 319                 (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
 320             composition = 1;
 321         if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
 322             composition = 2;
 323
 324         i_len = MAXPATHLEN;
 325         u = buffer2;
 326
 327         switch (composition) {
 328         case 0:
 329             u = buffer;
 330             i_len = o_len;
 331             break;
 332         case 1:
 333             if ( (size_t)-1 == (i_len = precompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
 334                 return (size_t)(-1);
 335             break;
 336         case 2:
 337             if ( (size_t)-1 == (i_len = decompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
 338                 return (size_t)(-1);
 339             break;
 340         }
 341
 342         /* Convert UCS2 to to_set */
 343         if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, u, i_len, dest, destlen)) ) {
 344                 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 345                 return (size_t) -1;
 346         }
 347
 348         return o_len;
 349 }
 350
 351
 352
 353 /**
 354  * Convert between character sets, allocating a new buffer for the result.
 355  *
 356  * @param srclen length of source buffer.
 357  * @param dest always set at least to NULL
 358  * @note -1 is not accepted for srclen.
 359  *
 360  * @returns Size in bytes of the converted string; or -1 in case of error.
 361  **/
 362
 363 static size_t convert_string_allocate_internal(charset_t from, charset_t to,
 364                                void const *src, size_t srclen, char **dest)
 365 {
 366         size_t i_len, o_len, destlen;
 367         size_t retval;
 368         const char *inbuf = (const char *)src;
 369         char *outbuf, *ob;
 370         atalk_iconv_t descriptor;
 371
 372         *dest = NULL;
 373
 374         if (src == NULL || srclen == (size_t)-1)
 375                 return (size_t)-1;
 376
 377         lazy_initialize_conv();
 378
 379         descriptor = conv_handles[from][to];
 380
 381         if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 382                 /* conversion not supported, return -1*/
 383                 LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!\n");
 384                 return -1;
 385         }
 386
 387         destlen = MAX(srclen, 512);
 388         outbuf = NULL;
 389 convert:
 390         destlen = destlen * 2;
 391         ob = (char *)realloc(outbuf, destlen);
 392         if (!ob) {
 393                 LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!\n");
 394                 SAFE_FREE(outbuf);
 395                 return (size_t)-1;
 396         } else {
 397                 outbuf = ob;
 398         }
 399         i_len = srclen;
 400         o_len = destlen;
 401         retval = atalk_iconv(descriptor,
 402                            &inbuf, &i_len,
 403                            &outbuf, &o_len);
 404         if(retval == (size_t)-1)                {
 405                 const char *reason="unknown error";
 406                 switch(errno) {
 407                         case EINVAL:
 408                                 reason="Incomplete multibyte sequence";
 409                                 break;
 410                         case E2BIG:
 411                                 goto convert;
 412                         case EILSEQ:
 413                                 reason="Illegal multibyte sequence";
 414                                 break;
 415                 }
 416                 LOG(log_debug, logtype_default,"Conversion error: %s(%s)\n",reason,inbuf);
 417                 /* smb_panic(reason); */
 418                 return (size_t)-1;
 419         }
 420
 421
 422         destlen = destlen - o_len;
 423
 424         /* Terminate the string */
 425         if (to == CH_UCS2 && destlen-o_len >= 2) {
 426                 ob[destlen] = 0;
 427                 ob[destlen+1] = 0;
 428                 *dest = (char *)realloc(ob,destlen+2);
 429         }
 430         else if ( destlen-o_len > 0) {
 431                 ob[destlen] = 0;
 432                 *dest = (char *)realloc(ob,destlen+1);
 433         }
 434
 435         if (destlen && !*dest) {
 436                 LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!\n");
 437                 SAFE_FREE(ob);
 438                 return (size_t)-1;
 439         }
 440
 441         return destlen;
 442 }
 443
 444
 445 size_t convert_string_allocate(charset_t from, charset_t to,
 446                       void const *src, size_t srclen,
 447                       char ** dest)
 448 {
 449         size_t i_len, o_len;
 450         char *u;
 451         char buffer[MAXPATHLEN];
 452         char buffer2[MAXPATHLEN];
 453         int composition = 0;
 454
 455         lazy_initialize_conv();
 456
 457         /* convert from_set to UCS2 */
 458         if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen, buffer, MAXPATHLEN)) ) {
 459                 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 460                 return (size_t) -1;
 461         }
 462
 463         /* Do pre/decomposition */
 464         if ( ((!(charsets[to])   || !(charsets[to]->flags & CHARSET_DECOMPOSED)) &&
 465                 (!(charsets[from]) || (charsets[from]->flags & CHARSET_DECOMPOSED))))
 466             composition = 1;
 467         if ((charsets[to] && charsets[to]->flags & CHARSET_DECOMPOSED) )
 468             composition = 2;
 469
 470         i_len = MAXPATHLEN;
 471         u = buffer2;
 472
 473         switch (composition) {
 474         case 0:
 475             u = buffer;
 476             i_len = o_len;
 477             break;
 478         case 1:
 479             if ( (size_t)-1 == (i_len = precompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
 480                 return (size_t)(-1);
 481             break;
 482         case 2:
 483             if ( (size_t)-1 == (i_len = decompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
 484                 return (size_t)(-1);
 485             break;
 486         }
 487
 488         /* Convert UCS2 to to_set */
 489         if ((size_t)(-1) == ( o_len = convert_string_allocate_internal( CH_UCS2, to, u, i_len, dest)) )
 490                 LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 491
 492         return o_len;
 493
 494 }
 495
 496 size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 497 {
 498         size_t size;
 499         char *buffer;
 500
 501         size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 502                                        (char**) &buffer);
 503         if (size == (size_t)-1) {
 504                 free(buffer);
 505                 return size;
 506         }
 507         if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
 508                 free(buffer);
 509                 return srclen;
 510         }
 511
 512         size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 513         free(buffer);
 514         return size;
 515 }
 516
 517 size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 518 {
 519         size_t size;
 520         char *buffer;
 521
 522         size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 523                                        (char **) &buffer);
 524         if (size == (size_t)-1) {
 525                 free(buffer);
 526                 return size;
 527         }
 528         if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
 529                 free(buffer);
 530                 return srclen;
 531         }
 532
 533         size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 534         free(buffer);
 535         return size;
 536 }
 537
 538
 539 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 540 {
 541         return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
 542 }
 543
 544 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 545 {
 546         return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
 547 }
 548
 549 size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 550 {
 551         return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
 552 }
 553
 554 size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 555 {
 556         return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
 557 }
 558
 559 /**
 560  * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
 561  *
 562  * @param dest always set at least to NULL
 563  *
 564  * @returns The number of bytes occupied by the string in the destination
 565  *         or -1 in case of error.
 566  **/
 567
 568 size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
 569 {
 570         size_t src_len = strlen(src)+1;
 571
 572         *dest = NULL;
 573         return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
 574 }
 575
 576 /**
 577  * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
 578  *
 579  * @param dest always set at least to NULL
 580  *
 581  * @returns The number of bytes occupied by the string in the destination
 582  **/
 583
 584 size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
 585 {
 586         size_t src_len = strlen(src)+1;
 587
 588         *dest = NULL;
 589         return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
 590 }
 591
 592 /**
 593  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
 594  *
 595  * @param dest always set at least to NULL
 596  *
 597  * @returns The number of bytes occupied by the string in the destination
 598  **/
 599
 600 size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
 601 {
 602         size_t src_len = (strlen_w(src)+1) * sizeof(ucs2_t);
 603         *dest = NULL;
 604         return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
 605 }
 606
 607 /**
 608  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
 609  *
 610  * @param dest always set at least to NULL
 611  *
 612  * @returns The number of bytes occupied by the string in the destination
 613  **/
 614
 615 size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
 616 {
 617         size_t src_len = strlen(src)+1;
 618         *dest = NULL;
 619         return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
 620 }
 621
 622 size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 623 {
 624         char *buffer;
 625         char u[MAXPATHLEN];
 626         size_t len;
 627         size_t ilen;
 628
 629         if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 630             return len;
 631
 632         ilen=MAXPATHLEN;
 633
 634         if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, (ucs2_t *)u, &ilen)) ) {
 635             free (buffer);
 636             return (size_t)(-1);
 637         }
 638
 639         if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, u, ilen, dst, outlen)) ) {
 640             free (buffer);
 641             return (size_t)(-1);
 642         }
 643
 644         free(buffer);
 645         dst[len] = 0;
 646         return (len);
 647 }
 648
 649 size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 650 {
 651         char *buffer;
 652         char u[MAXPATHLEN];
 653         size_t len;
 654         size_t ilen;
 655
 656         if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 657             return len;
 658
 659         ilen=MAXPATHLEN;
 660
 661         if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, (ucs2_t *)u, &ilen)) ) {
 662             free (buffer);
 663             return (size_t)(-1);
 664         }
 665
 666         if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, u, ilen, dst, outlen)) ) {
 667             free (buffer);
 668             return (size_t)(-1);
 669         }
 670
 671         free(buffer);
 672         dst[len] = 0;
 673         return (len);
 674 }
 675
 676 size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
 677 {
 678         return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
 679 }
 680
 681 size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
 682 {
 683         return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
 684 }
 685
 686 static char  debugbuf[ MAXPATHLEN +1 ];
 687 char * debug_out ( char * seq, size_t len)
 688 {
 689         size_t i = 0;
 690         unsigned char *p;
 691         char *q;
 692
 693         p = (unsigned char*) seq;
 694         q = debugbuf;
 695
 696         for ( i = 0; i<=(len-1); i++)
 697         {
 698                 sprintf(q, "%2.2x.", *p);
 699                 q += 3;
 700                 p++;
 701         }
 702         *q=0;
 703         q = debugbuf;
 704         return q;
 705 }
 706
 707 /*
 708  * Convert from MB to UCS2 charset
 709  * Flags:
 710  *              CONV_UNESCAPEHEX:        ':XX' will be converted to an UCS2 character
 711  *              CONV_IGNORE:             return the first convertable characters.
 712  * FIXME:
 713  *              This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 714  *              The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
 715  *              for e.g. HFS cdroms.
 716  */
 717
 718 static size_t pull_charset_flags (charset_t from_set, charset_t cap_charset, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
 719 {
 720         size_t i_len, o_len, hlen;
 721         size_t retval, j = 0;
 722         const char* inbuf = (const char*)src;
 723         char* outbuf = (char*)dest;
 724         atalk_iconv_t descriptor;
 725         atalk_iconv_t descriptor_cap;
 726         char *o_save, *s;
 727         char h[MAXPATHLEN];
 728         const char *h_buf;
 729
 730         if (srclen == (size_t)-1)
 731                 srclen = strlen(src)+1;
 732
 733         lazy_initialize_conv();
 734
 735         descriptor = conv_handles[from_set][CH_UCS2];
 736         descriptor_cap = conv_handles[cap_charset][CH_UCS2];
 737
 738         if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 739                 return (size_t) -1;
 740         }
 741
 742         i_len=srclen;
 743         o_len=destlen;
 744         o_save=outbuf;
 745
 746 conversion_loop:
 747         if ( flags && (*flags & CONV_UNESCAPEHEX)) {
 748                 if ( NULL != (s = strchr ( inbuf, ':'))) {
 749                         j = i_len - (s - inbuf);
 750                         if ( 0 == (i_len = (s - inbuf)))
 751                                 goto unhex_char;
 752         }
 753         }
 754
 755         retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 756         if(retval==(size_t)-1) {
 757             if (errno == EILSEQ && flags && (*flags & CONV_IGNORE)) {
 758                                 *flags |= CONV_REQMANGLE;
 759                                 return destlen-o_len;
 760             }
 761             else
 762                 return (size_t) -1;
 763     }
 764
 765 unhex_char:
 766         if (j && flags && (*flags & CONV_UNESCAPEHEX )) {
 767                 /* we're at the start on an hex encoded ucs2 char */
 768                 if (o_len < 2) {
 769                         errno = E2BIG;
 770                         return (size_t) -1;
 771                 }
 772                 if ( j >= 3 &&
 773                         isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
 774                         hlen = 0;
 775                         while ( *inbuf == ':' && j >=3 &&
 776                                 isxdigit( *(inbuf+1)) && isxdigit( *(inbuf+2)) ) {
 777                                 inbuf++;
 778                                 h[hlen]   = hextoint( *inbuf ) << 4;
 779                                 inbuf++;
 780                                 h[hlen++] |= hextoint( *inbuf );
 781                                 inbuf++;
 782                                 j -= 3;
 783                         }
 784                         h_buf = (const char*) h;
 785                         if ((size_t) -1 == (retval = atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len)) ) {
 786                                 if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
 787                                         *flags |= CONV_REQMANGLE;
 788                                         return destlen-o_len;
 789                                 }
 790                                 else {
 791                                         return retval;
 792                                 }
 793                         }
 794                 }
 795                 else {
 796                         /* We have an invalid :xx sequence */
 797                         if (CHECK_FLAGS(flags, CONV_IGNORE)) {
 798                                 *flags |= CONV_REQMANGLE;
 799                                 return destlen-o_len;
 800                         }
 801                         else {
 802                                 errno=EILSEQ;
 803                                 return (size_t) -1;
 804                         }
 805                 }
 806                 i_len = j;
 807                 j = 0;
 808                 if (i_len > 0)
 809                         goto conversion_loop;
 810         }
 811
 812
 813
 814         return destlen-o_len;
 815 }
 816
 817 /*
 818  * Convert from UCS2 to MB charset
 819  * Flags:
 820  *              CONV_ESCAPEDOTS: escape leading dots
 821  *              CONV_ESCAPEHEX:  unconvertable characters and '/' will be escaped to :XX
 822  *              CONV_IGNORE:     unconvertable characters will be replaced with '_'
 823  * FIXME:
 824  *              CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
 825  *              This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 826  *              The escape scheme is not compatible to the old cap style escape. This is bad, we need it
 827  *              for e.g. HFS cdroms.
 828  */
 829
 830
 831 static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
 832 {
 833     size_t i_len, o_len, i;
 834     size_t retval, j = 0;
 835     const char* inbuf = (const char*)src;
 836     char* outbuf = (char*)dest;
 837     atalk_iconv_t descriptor;
 838     char *o_save;
 839     char *buf, *buf_save;
 840     size_t buflen;
 841
 842     lazy_initialize_conv();
 843
 844     descriptor = conv_handles[CH_UCS2][to_set];
 845
 846     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 847         return (size_t) -1;
 848     }
 849
 850     i_len=srclen;
 851     o_len=destlen;
 852     o_save=outbuf;
 853
 854     if ( SVAL(inbuf,0) == 0x002e && flags && (*flags & CONV_ESCAPEDOTS)) { /* 0x002e = . */
 855         if (o_len < 3) {
 856             errno = E2BIG;
 857             return (size_t) -1;
 858         }
 859         o_save[0] = ':';
 860         o_save[1] = '2';
 861         o_save[2] = 'e';
 862         o_len -= 3;
 863         inbuf += 2;
 864         i_len -= 2;
 865         outbuf = o_save + 3;
 866         if (flags) *flags |= CONV_REQESCAPE;
 867     }
 868
 869 conversion_loop:
 870     if ( flags && (*flags & CONV_ESCAPEHEX)) {
 871         for ( i = 0; i < i_len; i+=2) {
 872             if ( SVAL((inbuf+i),0) == 0x002f) { /* 0x002f = / */
 873                 j = i_len - i;
 874                 if ( 0 == ( i_len = i))
 875                     goto escape_slash;
 876                 break;
 877             } else if ( SVAL(inbuf+i,0) == 0x003a) { /* 0x003a = : */
 878                 errno = EILSEQ;
 879                 return (size_t) -1;
 880             }
 881         }
 882     }
 883
 884     retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 885     if (retval==(size_t)-1) {
 886         if (errno == EILSEQ && CHECK_FLAGS(flags, CONV_IGNORE)) {
 887             *flags |= CONV_REQMANGLE;
 888             return destlen -o_len;
 889         }
 890         else if ( errno == EILSEQ && flags && (*flags & CONV_ESCAPEHEX)) {
 891             if (o_len < 3) {
 892                 errno = E2BIG;
 893                 return (size_t) -1;
 894             }
 895             if ((size_t) -1 == (buflen = convert_string_allocate_internal(CH_UCS2, cap_set, inbuf, 2, &buf)) )
 896                 return buflen;
 897             buf_save = buf;
 898             while (buflen > 0) {
 899                 if ( o_len < 3) {
 900                         errno = E2BIG;
 901                         return (size_t) -1;
 902                 }
 903                 *outbuf++ = ':';
 904                 *outbuf++ = hexdig[ ( *buf & 0xf0 ) >> 4 ];
 905                 *outbuf++ = hexdig[ *buf & 0x0f ];
 906                 buf++;
 907                 buflen--;
 908                 o_len -= 3;
 909             }
 910             SAFE_FREE(buf_save);
 911             buflen = 0;
 912             i_len -= 2;
 913             inbuf += 2;
 914             if (flags) *flags |= CONV_REQESCAPE;
 915             if ( i_len > 0)
 916                 goto conversion_loop;
 917         }
 918         else
 919            return (size_t)(-1);
 920     }
 921
 922 escape_slash:
 923     if (j && flags && (*flags & CONV_ESCAPEHEX)) {
 924         if (o_len < 3) {
 925             errno = E2BIG;
 926             return (size_t) -1;
 927         }
 928         o_save[destlen -o_len]   = ':';
 929         o_save[destlen -o_len+1] = '2';
 930         o_save[destlen -o_len+2] = 'f';
 931         inbuf  += 2;
 932         i_len   = j-2;
 933         o_len  -= 3;
 934         outbuf += 3;
 935         j = 0;
 936         if ( i_len > 0)
 937                 goto conversion_loop;
 938     }
 939     return destlen -o_len;
 940 }
 941
 942 size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, char* src, size_t src_len, char* dest, size_t dest_len, u_int16_t *flags)
 943 {
 944         size_t i_len, o_len;
 945         char *u;
 946         char buffer[MAXPATHLEN];
 947         char buffer2[MAXPATHLEN];
 948         int composition = 0;
 949
 950         lazy_initialize_conv();
 951
 952         /* convert from_set to UCS2 */
 953         if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len, buffer, MAXPATHLEN, flags)) ) {
 954                 LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
 955                 return (size_t) -1;
 956         }
 957
 958         if ( o_len == 0)
 959                 return o_len;
 960
 961         /* Do pre/decomposition */
 962         if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) ||
 963                 ((!(charsets[to_set])   || !(charsets[to_set]->flags & CHARSET_DECOMPOSED)) &&
 964                 (!(charsets[from_set]) || (charsets[from_set]->flags & CHARSET_DECOMPOSED))))
 965             composition = 1;
 966         if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && charsets[to_set]->flags & CHARSET_DECOMPOSED) )
 967             composition = 2;
 968
 969         i_len = MAXPATHLEN;
 970         u = buffer2;
 971
 972         switch (composition) {
 973         case 0:
 974             u = buffer;
 975             i_len = o_len;
 976             break;
 977         case 1:
 978             if ( (size_t)-1 == (i_len = precompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
 979                 return (size_t)(-1);
 980             break;
 981         case 2:
 982             if ( (size_t)-1 == (i_len = decompose_w((ucs2_t *)buffer, o_len, (ucs2_t *)u, &i_len)) )
 983                 return (size_t)(-1);
 984             break;
 985         }
 986
 987         /* Do case conversions */
 988         if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
 989             if (!strupper_w((ucs2_t *) u))
 990                 return (size_t)(-1);
 991         }
 992         if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
 993             if (!strlower_w((ucs2_t *) u))
 994                 return (size_t)(-1);
 995         }
 996
 997         /* Convert UCS2 to to_set */
 998         if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, u, i_len, dest, dest_len, flags )) ) {
 999                 LOG(log_error, logtype_default,
1000                        "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
1001                 return (size_t) -1;
1002         }
1003
1004         return o_len;
1005 }