libatalk/unicode/charcnv.c

   1 /*
   2   Unix SMB/CIFS implementation.
   3   Character set conversion Extensions
   4   Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5   Copyright (C) Andrew Tridgell 2001
   6   Copyright (C) Simo Sorce 2001
   7   Copyright (C) Martin Pool 2003
   8
   9   This program is free software; you can redistribute it and/or modify
  10   it under the terms of the GNU General Public License as published by
  11   the Free Software Foundation; either version 2 of the License, or
  12   (at your option) any later version.
  13
  14   This program is distributed in the hope that it will be useful,
  15   but WITHOUT ANY WARRANTY; without even the implied warranty of
  16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17   GNU General Public License for more details.
  18
  19   You should have received a copy of the GNU General Public License
  20   along with this program; if not, write to the Free Software
  21   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif /* HAVE_CONFIG_H */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <unistd.h>
  31 #include <string.h>
  32 #include <ctype.h>
  33 #include <errno.h>
  34 #include <sys/stat.h>
  35 #include <sys/param.h>
  36 #ifdef HAVE_USABLE_ICONV
  37 #include <iconv.h>
  38 #endif
  39 #include <arpa/inet.h>
  40
  41 #include <atalk/logger.h>
  42 #include <atalk/unicode.h>
  43 #include <atalk/util.h>
  44 #include <atalk/compat.h>
  45 #include <atalk/byteorder.h>
  46
  47
  48 /**
  49  * @file
  50  *
  51  * @brief Character-set conversion routines built on our iconv.
  52  *
  53  * @note Samba's internal character set (at least in the 3.0 series)
  54  * is always the same as the one for the Unix filesystem.  It is
  55  * <b>not</b> necessarily UTF-8 and may be different on machines that
  56  * need i18n filenames to be compatible with Unix software.  It does
  57  * have to be a superset of ASCII.  All multibyte sequences must start
  58  * with a byte with the high bit set.
  59  *
  60  * @sa lib/iconv.c
  61  */
  62
  63
  64 #define MAX_CHARSETS 20
  65
  66 #define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
  67
  68 static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
  69 static char* charset_names[MAX_CHARSETS];
  70 static struct charset_functions* charsets[MAX_CHARSETS];
  71 static char hexdig[] = "0123456789abcdef";
  72 #define hextoint( c )   ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
  73
  74
  75 /**
  76  * Return the name of a charset to give to iconv().
  77  **/
  78 static const char *charset_name(charset_t ch)
  79 {
  80     const char *ret = NULL;
  81
  82     if (ch == CH_UCS2) ret = "UCS-2";
  83     else if (ch == CH_UTF8) ret = "UTF8";
  84     else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
  85     else ret = charset_names[ch];
  86     return ret;
  87 }
  88
  89 int set_charset_name(charset_t ch, const char *name)
  90 {
  91     if (ch >= NUM_CHARSETS)
  92         return -1;
  93     charset_names[ch] = strdup(name);
  94     return 0;
  95 }
  96
  97 static struct charset_functions* get_charset_functions (charset_t ch)
  98 {
  99     if (charsets[ch] != NULL)
 100         return charsets[ch];
 101
 102     charsets[ch] = find_charset_functions(charset_name(ch));
 103
 104     return charsets[ch];
 105 }
 106
 107
 108 static void lazy_initialize_conv(void)
 109 {
 110     static int initialized = 0;
 111
 112     if (!initialized) {
 113         initialized = 1;
 114         init_iconv();
 115     }
 116 }
 117
 118 charset_t add_charset(const char* name)
 119 {
 120     static charset_t max_charset_t = NUM_CHARSETS-1;
 121     charset_t cur_charset_t = max_charset_t+1;
 122     unsigned int c1;
 123
 124     lazy_initialize_conv();
 125
 126     for (c1=0; c1<=max_charset_t;c1++) {
 127         if ( strcasecmp(name, charset_name(c1)) == 0)
 128             return (c1);
 129     }
 130
 131     if ( cur_charset_t >= MAX_CHARSETS )  {
 132         LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
 133              name, MAX_CHARSETS);
 134         return (charset_t) -1;
 135     }
 136
 137     /* First try to setup the required conversions */
 138
 139     conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 140     if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
 141         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 142             name,  charset_name(CH_UCS2));
 143         conv_handles[cur_charset_t][CH_UCS2] = NULL;
 144         return (charset_t) -1;
 145     }
 146
 147     conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
 148     if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
 149         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 150             charset_name(CH_UCS2), name);
 151         conv_handles[CH_UCS2][cur_charset_t] = NULL;
 152         return (charset_t) -1;
 153     }
 154
 155     /* register the new charset_t name */
 156     charset_names[cur_charset_t] = strdup(name);
 157
 158     charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
 159     max_charset_t++;
 160
 161 #ifdef DEBUG
 162     LOG(log_debug9, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
 163 #endif
 164     return (cur_charset_t);
 165 }
 166
 167 /**
 168  * Initialize iconv conversion descriptors.
 169  *
 170  * This is called the first time it is needed, and also called again
 171  * every time the configuration is reloaded, because the charset or
 172  * codepage might have changed.
 173  **/
 174 void init_iconv(void)
 175 {
 176     int c1;
 177
 178     for (c1=0;c1<NUM_CHARSETS;c1++) {
 179         const char *name = charset_name((charset_t)c1);
 180
 181         conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 182         if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
 183             LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 184                 name,  charset_name(CH_UCS2));
 185             conv_handles[c1][CH_UCS2] = NULL;
 186         }
 187
 188         if (c1 != CH_UCS2) { /* avoid lost memory, make valgrind happy */
 189             conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
 190             if (conv_handles[CH_UCS2][c1] == (atalk_iconv_t)-1) {
 191                 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 192                     charset_name(CH_UCS2), name);
 193                 conv_handles[CH_UCS2][c1] = NULL;
 194             }
 195         }
 196
 197         charsets[c1] = get_charset_functions (c1);
 198     }
 199 }
 200
 201 /**
 202  *
 203  **/
 204 static size_t add_null(charset_t to, char *buf, size_t bytesleft, size_t len)
 205 {
 206     /* Terminate the string */
 207     if (to == CH_UCS2 && bytesleft >= 2) {
 208         buf[len]   = 0;
 209         buf[len+1] = 0;
 210
 211     }
 212     else if ( to != CH_UCS2 && bytesleft > 0 )
 213         buf[len]   = 0;
 214     else {
 215         errno = E2BIG;
 216         return (size_t)(-1);
 217     }
 218
 219     return len;
 220 }
 221
 222
 223 /**
 224  * Convert string from one encoding to another, making error checking etc
 225  *
 226  * @param src pointer to source string (multibyte or singlebyte)
 227  * @param srclen length of the source string in bytes
 228  * @param dest pointer to destination string (multibyte or singlebyte)
 229  * @param destlen maximal length allowed for string
 230  * @returns the number of bytes occupied in the destination
 231  **/
 232 static size_t convert_string_internal(charset_t from, charset_t to,
 233                                       void const *src, size_t srclen,
 234                                       void *dest, size_t destlen)
 235 {
 236     size_t i_len, o_len;
 237     size_t retval;
 238     const char* inbuf = (const char*)src;
 239     char* outbuf = (char*)dest;
 240     char* o_save = outbuf;
 241     atalk_iconv_t descriptor;
 242
 243     /* Fixed based on Samba 3.0.6 */
 244     if (srclen == (size_t)-1) {
 245         if (from == CH_UCS2) {
 246             srclen = (strlen_w((const ucs2_t *)src)) * 2;
 247         } else {
 248             srclen = strlen((const char *)src);
 249         }
 250     }
 251
 252
 253     lazy_initialize_conv();
 254
 255     descriptor = conv_handles[from][to];
 256
 257     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 258         return (size_t) -1;
 259     }
 260
 261     i_len=srclen;
 262     o_len=destlen;
 263     retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 264     if(retval==(size_t)-1) {
 265         const char *reason="unknown error";
 266         switch(errno) {
 267         case EINVAL:
 268             reason="Incomplete multibyte sequence";
 269             break;
 270         case E2BIG:
 271             reason="No more room";
 272             break;
 273         case EILSEQ:
 274             reason="Illegal multibyte sequence";
 275             break;
 276         }
 277         LOG(log_debug, logtype_default,"Conversion error: %s",reason);
 278         return (size_t)-1;
 279     }
 280
 281     /* Terminate the string */
 282     return add_null( to, o_save, o_len, destlen -o_len);
 283 }
 284
 285
 286 size_t convert_string(charset_t from, charset_t to,
 287                       void const *src, size_t srclen,
 288                       void *dest, size_t destlen)
 289 {
 290     size_t i_len, o_len;
 291     ucs2_t *u;
 292     ucs2_t buffer[MAXPATHLEN];
 293     ucs2_t buffer2[MAXPATHLEN];
 294
 295     /* convert from_set to UCS2 */
 296     if ((size_t)-1 == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
 297                                                            (char*) buffer, sizeof(buffer))) ) {
 298         LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 299         return (size_t) -1;
 300     }
 301
 302     /* Do pre/decomposition */
 303     i_len = sizeof(buffer2);
 304     u = buffer2;
 305     if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
 306         if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
 307             return (size_t)-1;
 308     }
 309     else if (!charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED)) {
 310         if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
 311             return (size_t)-1;
 312     }
 313     else {
 314         u = buffer;
 315         i_len = o_len;
 316     }
 317     /* Convert UCS2 to to_set */
 318     if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
 319         LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 320         return (size_t) -1;
 321     }
 322
 323     return o_len;
 324 }
 325
 326
 327
 328 /**
 329  * Convert between character sets, allocating a new buffer for the result.
 330  *
 331  * @param srclen length of source buffer.
 332  * @param dest always set at least to NULL
 333  * @note -1 is not accepted for srclen.
 334  *
 335  * @returns Size in bytes of the converted string; or -1 in case of error.
 336  **/
 337
 338 static size_t convert_string_allocate_internal(charset_t from, charset_t to,
 339                                                void const *src, size_t srclen, char **dest)
 340 {
 341     size_t i_len, o_len, destlen;
 342     size_t retval;
 343     const char *inbuf = (const char *)src;
 344     char *outbuf = NULL, *ob = NULL;
 345     atalk_iconv_t descriptor;
 346
 347     *dest = NULL;
 348
 349     if (src == NULL || srclen == (size_t)-1)
 350         return (size_t)-1;
 351
 352     lazy_initialize_conv();
 353
 354     descriptor = conv_handles[from][to];
 355
 356     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 357         /* conversion not supported, return -1*/
 358         LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!");
 359         return -1;
 360     }
 361
 362     destlen = MAX(srclen, 512);
 363 convert:
 364     destlen = destlen * 2;
 365     outbuf = (char *)realloc(ob, destlen);
 366     if (!outbuf) {
 367         LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!");
 368         SAFE_FREE(ob);
 369         return (size_t)-1;
 370     } else {
 371         ob = outbuf;
 372     }
 373     inbuf = src;   /* this restarts the whole conversion if buffer needed to be increased */
 374     i_len = srclen;
 375     o_len = destlen;
 376     retval = atalk_iconv(descriptor,
 377                          &inbuf, &i_len,
 378                          &outbuf, &o_len);
 379     if(retval == (size_t)-1)        {
 380         const char *reason="unknown error";
 381         switch(errno) {
 382         case EINVAL:
 383             reason="Incomplete multibyte sequence";
 384             break;
 385         case E2BIG:
 386             goto convert;
 387         case EILSEQ:
 388             reason="Illegal multibyte sequence";
 389             break;
 390         }
 391         LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
 392         SAFE_FREE(ob);
 393         return (size_t)-1;
 394     }
 395
 396
 397     destlen = destlen - o_len;
 398
 399     /* Terminate the string */
 400     if (to == CH_UCS2 && o_len >= 2) {
 401         ob[destlen] = 0;
 402         ob[destlen+1] = 0;
 403         *dest = (char *)realloc(ob,destlen+2);
 404     }
 405     else if ( to != CH_UCS2 && o_len > 0 ) {
 406         ob[destlen] = 0;
 407         *dest = (char *)realloc(ob,destlen+1);
 408     }
 409     else {
 410         goto convert; /* realloc */
 411     }
 412
 413     if (destlen && !*dest) {
 414         LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!");
 415         SAFE_FREE(ob);
 416         return (size_t)-1;
 417     }
 418
 419     return destlen;
 420 }
 421
 422
 423 size_t convert_string_allocate(charset_t from, charset_t to,
 424                                void const *src, size_t srclen,
 425                                char ** dest)
 426 {
 427     size_t i_len, o_len;
 428     ucs2_t *u;
 429     ucs2_t buffer[MAXPATHLEN];
 430     ucs2_t buffer2[MAXPATHLEN];
 431
 432     *dest = NULL;
 433
 434     /* convert from_set to UCS2 */
 435     if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
 436                                                            buffer, sizeof(buffer))) ) {
 437         LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 438         return (size_t) -1;
 439     }
 440
 441     /* Do pre/decomposition */
 442     i_len = sizeof(buffer2);
 443     u = buffer2;
 444     if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
 445         if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
 446             return (size_t)-1;
 447     }
 448     else if ( !charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED) ) {
 449         if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
 450             return (size_t)-1;
 451     }
 452     else {
 453         u = buffer;
 454         i_len = o_len;
 455     }
 456
 457     /* Convert UCS2 to to_set */
 458     if ((size_t)-1 == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
 459         LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 460
 461     return o_len;
 462
 463 }
 464
 465 size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 466 {
 467     size_t size;
 468     char *buffer;
 469
 470     size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 471                                             (char**) &buffer);
 472     if (size == (size_t)-1) {
 473         SAFE_FREE(buffer);
 474         return size;
 475     }
 476     if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
 477         free(buffer);
 478         return srclen;
 479     }
 480
 481     size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 482     free(buffer);
 483     return size;
 484 }
 485
 486 size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 487 {
 488     size_t size;
 489     char *buffer;
 490
 491     size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 492                                             (char **) &buffer);
 493     if (size == (size_t)-1) {
 494         SAFE_FREE(buffer);
 495         return size;
 496     }
 497     if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
 498         free(buffer);
 499         return srclen;
 500     }
 501
 502     size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 503     free(buffer);
 504     return size;
 505 }
 506
 507
 508 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 509 {
 510     return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
 511 }
 512
 513 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 514 {
 515     return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
 516 }
 517
 518 size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 519 {
 520     return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
 521 }
 522
 523 size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 524 {
 525     return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
 526 }
 527
 528 /**
 529  * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
 530  *
 531  * @param dest always set at least to NULL
 532  *
 533  * @returns The number of bytes occupied by the string in the destination
 534  *         or -1 in case of error.
 535  **/
 536
 537 size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
 538 {
 539     size_t src_len = strlen(src);
 540
 541     *dest = NULL;
 542     return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
 543 }
 544
 545 /** -----------------------------------
 546  * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
 547  *
 548  * @param dest always set at least to NULL
 549  *
 550  * @returns The number of bytes occupied by the string in the destination
 551  **/
 552
 553 size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
 554 {
 555     size_t src_len = strlen(src);
 556
 557     *dest = NULL;
 558     return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
 559 }
 560
 561 /** -----------------------------------
 562  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
 563  *
 564  * @param dest always set at least to NULL
 565  *
 566  * @returns The number of bytes occupied by the string in the destination
 567  **/
 568
 569 size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen)
 570 {
 571     size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
 572     return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
 573 }
 574
 575 /* --------------------------------- */
 576 size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
 577 {
 578     size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
 579     *dest = NULL;
 580     return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
 581 }
 582
 583 /** ---------------------------------
 584  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
 585  *
 586  * @param dest always set at least to NULL
 587  *
 588  * @returns The number of bytes occupied by the string in the destination
 589  **/
 590
 591 size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
 592 {
 593     size_t src_len = strlen(src);
 594     *dest = NULL;
 595     return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
 596 }
 597
 598 size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 599 {
 600     char *buffer;
 601     ucs2_t u[MAXPATHLEN];
 602     size_t len;
 603     size_t ilen;
 604
 605     if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 606         return len;
 607
 608     ilen=sizeof(u);
 609
 610     if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
 611         free (buffer);
 612         return (size_t)(-1);
 613     }
 614
 615     if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
 616         free (buffer);
 617         return (size_t)(-1);
 618     }
 619
 620     free(buffer);
 621     return (len);
 622 }
 623
 624 size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 625 {
 626     char *buffer;
 627     ucs2_t u[MAXPATHLEN];
 628     size_t len;
 629     size_t ilen;
 630
 631     if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 632         return len;
 633
 634     ilen=sizeof(u);
 635
 636     if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
 637         free (buffer);
 638         return (size_t)(-1);
 639     }
 640
 641     if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
 642         free (buffer);
 643         return (size_t)(-1);
 644     }
 645
 646     free(buffer);
 647     return (len);
 648 }
 649
 650 size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
 651 {
 652     return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
 653 }
 654
 655 size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
 656 {
 657     return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
 658 }
 659
 660 #if 0
 661 static char  debugbuf[ MAXPATHLEN +1 ];
 662 char * debug_out ( char * seq, size_t len)
 663 {
 664     size_t i = 0;
 665     unsigned char *p;
 666     char *q;
 667
 668     p = (unsigned char*) seq;
 669     q = debugbuf;
 670
 671     for ( i = 0; i<=(len-1); i++)
 672     {
 673         sprintf(q, "%2.2x.", *p);
 674         q += 3;
 675         p++;
 676     }
 677     *q=0;
 678     q = debugbuf;
 679     return q;
 680 }
 681 #endif
 682
 683 /*
 684  * Convert from MB to UCS2 charset
 685  * Flags:
 686  *      CONV_UNESCAPEHEX:    ':XX' will be converted to an UCS2 character
 687  *      CONV_IGNORE:         return the first convertable characters.
 688  *      CONV_FORCE:  force convertion
 689  * FIXME:
 690  *      This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 691  *      The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
 692  *      for e.g. HFS cdroms.
 693  */
 694
 695 static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, uint16_t *flags)
 696 {
 697     const uint16_t option = (flags ? *flags : 0);
 698     size_t i_len, o_len;
 699     size_t j = 0;
 700     const char* inbuf = (const char*)src;
 701     char* outbuf = dest;
 702     atalk_iconv_t descriptor;
 703     atalk_iconv_t descriptor_cap;
 704
 705     if (srclen == (size_t)-1)
 706         srclen = strlen(src) + 1;
 707
 708     descriptor = conv_handles[from_set][CH_UCS2];
 709     descriptor_cap = conv_handles[cap_set][CH_UCS2];
 710
 711     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 712         errno = EINVAL;
 713         return (size_t)-1;
 714     }
 715
 716     i_len=srclen;
 717     o_len=destlen;
 718
 719     while (i_len > 0) {
 720         for (j = 0; j < i_len; ++j)
 721             if (inbuf[j] == ':')
 722                 break;
 723         j = i_len - j;
 724         i_len -= j;
 725
 726         if (i_len > 0 &&
 727             atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
 728             if (errno == EILSEQ || errno == EINVAL) {
 729                 errno = EILSEQ;
 730                 if ((option & CONV_IGNORE)) {
 731                     *flags |= CONV_REQMANGLE;
 732                     return destlen - o_len;
 733                 }
 734                 if ((option & CONV__EILSEQ)) {
 735                     if (o_len < 2) {
 736                         errno = E2BIG;
 737                         goto end;
 738                     }
 739                     *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */
 740                     inbuf++;
 741                     i_len--;
 742                     outbuf += 2;
 743                     o_len -= 2;
 744                     /* FIXME reset stat ? */
 745                     continue;
 746                 }
 747             }
 748             goto end;
 749         }
 750
 751         if (j) {
 752             /* we have a ':' */
 753             i_len = j, j = 0;
 754
 755             if ((option & CONV_UNESCAPEHEX)) {
 756                 /* treat it as a CAP hex encoded char */
 757                 char h[MAXPATHLEN];
 758                 size_t hlen = 0;
 759
 760                 while (i_len >= 3 && inbuf[0] == ':' &&
 761                        isxdigit(inbuf[1]) && isxdigit(inbuf[2])) {
 762                     h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]);
 763                     inbuf += 3;
 764                     i_len -= 3;
 765                 }
 766                 if (hlen) {
 767                     const char *h_buf = h;
 768                     if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) {
 769                         i_len += hlen * 3;
 770                         inbuf -= hlen * 3;
 771                         if (errno == EILSEQ && (option & CONV_IGNORE)) {
 772                             *flags |= CONV_REQMANGLE;
 773                             return destlen - o_len;
 774                         }
 775                         goto end;
 776                     }
 777                 } else {
 778                     /* We have an invalid :xx sequence */
 779                     errno = EILSEQ;
 780                     if ((option & CONV_IGNORE)) {
 781                         *flags |= CONV_REQMANGLE;
 782                         return destlen - o_len;
 783                     }
 784                     goto end;
 785                 }
 786             } else {
 787                 /* a ':' that we just convert to a '/' */
 788                 ucs2_t slash = 0x002f;
 789                 memcpy(outbuf, &slash, sizeof(ucs2_t));
 790                 outbuf += 2;
 791                 o_len -= 2;
 792                 inbuf++;
 793                 i_len--;
 794             }
 795         }
 796     }
 797 end:
 798     return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
 799 }
 800
 801 /*
 802  * Convert from UCS2 to MB charset
 803  * Flags:
 804  *      CONV_ESCAPEDOTS: escape leading dots
 805  *      CONV_ESCAPEHEX:  unconvertable characters and '/' will be escaped to :XX
 806  *      CONV_IGNORE:     return the first convertable characters.
 807  *      CONV__EILSEQ:    unconvertable characters will be replaced with '_'
 808  *      CONV_FORCE:  force convertion
 809  * FIXME:
 810  *      CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
 811  *      This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 812  *      The escape scheme is not compatible to the old cap style escape. This is bad, we need it
 813  *      for e.g. HFS cdroms.
 814  */
 815
 816
 817 static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, uint16_t *flags)
 818 {
 819     const uint16_t option = (flags ? *flags : 0);
 820     size_t i_len, o_len, i;
 821     size_t j = 0;
 822     const char* inbuf = (const char*)src;
 823     char* outbuf = (char*)dest;
 824     atalk_iconv_t descriptor;
 825     atalk_iconv_t descriptor_cap;
 826     char escch;                 /* 150210: uninitialized OK, depends on j */
 827
 828     descriptor = conv_handles[CH_UCS2][to_set];
 829     descriptor_cap = conv_handles[CH_UCS2][cap_set];
 830
 831     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 832         errno = EINVAL;
 833         return (size_t) -1;
 834     }
 835
 836     i_len=srclen;
 837     o_len=destlen;
 838
 839     if ((option & CONV_ESCAPEDOTS) &&
 840         i_len >= 2 && SVAL(inbuf, 0) == 0x002e) { /* 0x002e = . */
 841         if (o_len < 3) {
 842             errno = E2BIG;
 843             goto end;
 844         }
 845         *outbuf++ = ':';
 846         *outbuf++ = '2';
 847         *outbuf++ = 'e';
 848         o_len -= 3;
 849         inbuf += 2;
 850         i_len -= 2;
 851         *flags |= CONV_REQESCAPE;
 852     }
 853
 854     while (i_len >= 2) {
 855         for (i = 0; i < i_len; i += 2) {
 856             ucs2_t c = SVAL(inbuf, i);
 857             switch (c) {
 858             case 0x003a: /* 0x003a = ':' */
 859                 if ( ! (option & CONV_ALLOW_COLON)) {
 860                     errno = EILSEQ;
 861                     goto end;
 862                 }
 863                 escch = c;
 864                 j = i_len - i;
 865                 i_len = i;
 866                 break;
 867             case 0x002f: /* 0x002f = '/' */
 868                 if (option & CONV_ALLOW_SLASH) break;
 869                 escch = c;
 870                 j = i_len - i;
 871                 i_len = i;
 872                 break;
 873             }
 874         }
 875         while (i_len > 0 &&
 876                atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
 877             if (errno == EILSEQ) {
 878                 if ((option & CONV_IGNORE)) {
 879                     *flags |= CONV_REQMANGLE;
 880                     return destlen - o_len;
 881                 }
 882                 if ((option & CONV_ESCAPEHEX)) {
 883                     const size_t bufsiz = o_len / 3 + 1;
 884                     char *buf = malloc(bufsiz);
 885                     size_t buflen;
 886
 887                     if (!buf)
 888                         goto end;
 889                     i = i_len;
 890                     for (buflen = 1; buflen <= bufsiz; ++buflen) {
 891                         char *b = buf;
 892                         size_t o = buflen;
 893                         if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) {
 894                             buflen -= o;
 895                             break;
 896                         } else if (errno != E2BIG) {
 897                             SAFE_FREE(buf);
 898                             goto end;
 899                         } else if (o < buflen) {
 900                             buflen -= o;
 901                             break;
 902                         }
 903                     }
 904                     if (o_len < buflen * 3) {
 905                         SAFE_FREE(buf);
 906                         errno = E2BIG;
 907                         goto end;
 908                     }
 909                     o_len -= buflen * 3;
 910                     i_len = i;
 911                     for (i = 0; i < buflen; ++i) {
 912                         *outbuf++ = ':';
 913                         *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f];
 914                         *outbuf++ = hexdig[buf[i] & 0x0f];
 915                     }
 916                     SAFE_FREE(buf);
 917                     *flags |= CONV_REQESCAPE;
 918                     continue;
 919                 }
 920             }
 921             goto end;
 922         }
 923
 924         if (j) {
 925             /* we have a ':' or '/' */
 926             i_len = j, j = 0;
 927
 928             if ((option & CONV_ESCAPEHEX)) {
 929                 /* CAP hex encode it */
 930                 if (o_len < 3) {
 931                     errno = E2BIG;
 932                     goto end;
 933                 }
 934                 switch (escch) {
 935                 case '/':
 936                     *outbuf++ = ':';
 937                     *outbuf++ = '2';
 938                     *outbuf++ = 'f';
 939                     break;
 940                 case ':':
 941                     *outbuf++ = ':';
 942                     *outbuf++ = '3';
 943                     *outbuf++ = 'a';
 944                     break;
 945                 default:
 946                     /*
 947                      *  THIS SHOULD NEVER BE REACHED !!!
 948                      *  As a safety net I put in a ' ' here
 949                      */
 950                     *outbuf++ = ':';
 951                     *outbuf++ = '2';
 952                     *outbuf++ = '0';
 953                     break;
 954                 }
 955                 o_len -= 3;
 956                 inbuf += 2;
 957                 i_len -= 2;
 958             } else {
 959                 switch (escch) {
 960                 case '/':
 961                 case ':':
 962                     *outbuf++ = ':';
 963                     break;
 964                 default: /* should never be reached */
 965                     *outbuf++ = ' ';
 966                     break;
 967                 }
 968                 o_len--;
 969                 inbuf += 2;
 970                 i_len -= 2;
 971             }
 972         }
 973     }
 974     if (i_len > 0) errno = EINVAL;
 975 end:
 976     return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
 977 }
 978
 979 /*
 980  * FIXME the size is a mess we really need a malloc/free logic
 981  *`dest size must be dest_len +2
 982  */
 983 size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, const char *src, size_t src_len, char *dest, size_t dest_len, uint16_t *flags)
 984 {
 985     size_t i_len, o_len;
 986     ucs2_t *u;
 987     ucs2_t buffer[MAXPATHLEN +2];
 988     ucs2_t buffer2[MAXPATHLEN +2];
 989
 990     lazy_initialize_conv();
 991
 992     /* convert from_set to UCS2 */
 993     if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len,
 994                                                       (char *) buffer, sizeof(buffer) -2, flags)) ) {
 995         LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
 996         return (size_t) -1;
 997     }
 998
 999     if ( o_len == 0)
1000         return o_len;
1001
1002     /* Do pre/decomposition */
1003     i_len = sizeof(buffer2) -2;
1004     u = buffer2;
1005     if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && (charsets[to_set]->flags & CHARSET_DECOMPOSED)) ) {
1006         if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
1007             return (size_t)(-1);
1008     }
1009     else if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) || !charsets[from_set] || (charsets[from_set]->flags & CHARSET_DECOMPOSED)) {
1010         if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
1011             return (size_t)(-1);
1012     }
1013     else {
1014         u = buffer;
1015         i_len = o_len;
1016     }
1017     /* null terminate */
1018     u[i_len] = 0;
1019     u[i_len +1] = 0;
1020
1021     /* Do case conversions */
1022     if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
1023         strupper_w(u);
1024     }
1025     else if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
1026         strlower_w(u);
1027     }
1028
1029     /* Convert UCS2 to to_set */
1030     if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
1031         LOG(log_error, logtype_default,
1032             "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
1033         return (size_t) -1;
1034     }
1035     /* null terminate */
1036     dest[o_len] = 0;
1037     dest[o_len +1] = 0;
1038
1039     return o_len;
1040 }