libatalk/unicode/charcnv.c

   1 /*
   2   Unix SMB/CIFS implementation.
   3   Character set conversion Extensions
   4   Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5   Copyright (C) Andrew Tridgell 2001
   6   Copyright (C) Simo Sorce 2001
   7   Copyright (C) Martin Pool 2003
   8
   9   This program is free software; you can redistribute it and/or modify
  10   it under the terms of the GNU General Public License as published by
  11   the Free Software Foundation; either version 2 of the License, or
  12   (at your option) any later version.
  13
  14   This program is distributed in the hope that it will be useful,
  15   but WITHOUT ANY WARRANTY; without even the implied warranty of
  16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17   GNU General Public License for more details.
  18
  19   You should have received a copy of the GNU General Public License
  20   along with this program; if not, write to the Free Software
  21   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif /* HAVE_CONFIG_H */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <unistd.h>
  31 #include <string.h>
  32 #include <ctype.h>
  33 #include <errno.h>
  34 #include <sys/stat.h>
  35 #include <sys/param.h>
  36 #ifdef HAVE_USABLE_ICONV
  37 #include <iconv.h>
  38 #endif
  39 #include <arpa/inet.h>
  40
  41 #include <atalk/logger.h>
  42 #include <atalk/unicode.h>
  43 #include <atalk/util.h>
  44 #include <atalk/compat.h>
  45 #include <atalk/byteorder.h>
  46
  47
  48 /**
  49  * @file
  50  *
  51  * @brief Character-set conversion routines built on our iconv.
  52  *
  53  * @note Samba's internal character set (at least in the 3.0 series)
  54  * is always the same as the one for the Unix filesystem.  It is
  55  * <b>not</b> necessarily UTF-8 and may be different on machines that
  56  * need i18n filenames to be compatible with Unix software.  It does
  57  * have to be a superset of ASCII.  All multibyte sequences must start
  58  * with a byte with the high bit set.
  59  *
  60  * @sa lib/iconv.c
  61  */
  62
  63
  64 #define MAX_CHARSETS 20
  65
  66 #define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
  67
  68 static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
  69 static char* charset_names[MAX_CHARSETS];
  70 static struct charset_functions* charsets[MAX_CHARSETS];
  71 static char hexdig[] = "0123456789abcdef";
  72 #define hextoint( c )   ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
  73
  74
  75 /**
  76  * Return the name of a charset to give to iconv().
  77  **/
  78 static const char *charset_name(charset_t ch)
  79 {
  80     const char *ret = NULL;
  81
  82     if (ch == CH_UCS2) ret = "UCS-2";
  83     else if (ch == CH_UTF8) ret = "UTF8";
  84     else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
  85     else ret = charset_names[ch];
  86     return ret;
  87 }
  88
  89 int set_charset_name(charset_t ch, const char *name)
  90 {
  91     if (ch >= NUM_CHARSETS)
  92         return -1;
  93     charset_names[ch] = strdup(name);
  94     return 0;
  95 }
  96
  97 void free_charset_names(void)
  98 {
  99     for (int ch = 0; ch < MAX_CHARSETS; ch++) {
 100         if (charset_names[ch]) {
 101             free(charset_names[ch]);
 102             charset_names[ch] = NULL;
 103         }
 104     }
 105 }
 106
 107 static struct charset_functions* get_charset_functions (charset_t ch)
 108 {
 109     if (charsets[ch] != NULL)
 110         return charsets[ch];
 111
 112     charsets[ch] = find_charset_functions(charset_name(ch));
 113
 114     return charsets[ch];
 115 }
 116
 117
 118 static void lazy_initialize_conv(void)
 119 {
 120     static int initialized = 0;
 121
 122     if (!initialized) {
 123         initialized = 1;
 124         init_iconv();
 125     }
 126 }
 127
 128 charset_t add_charset(const char* name)
 129 {
 130     static charset_t max_charset_t = NUM_CHARSETS-1;
 131     charset_t cur_charset_t = max_charset_t+1;
 132     unsigned int c1;
 133
 134     lazy_initialize_conv();
 135
 136     for (c1=0; c1<=max_charset_t;c1++) {
 137         if ( strcasecmp(name, charset_name(c1)) == 0)
 138             return (c1);
 139     }
 140
 141     if ( cur_charset_t >= MAX_CHARSETS )  {
 142         LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
 143              name, MAX_CHARSETS);
 144         return (charset_t) -1;
 145     }
 146
 147     /* First try to setup the required conversions */
 148
 149     conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 150     if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
 151         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 152             name,  charset_name(CH_UCS2));
 153         conv_handles[cur_charset_t][CH_UCS2] = NULL;
 154         return (charset_t) -1;
 155     }
 156
 157     conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
 158     if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
 159         LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 160             charset_name(CH_UCS2), name);
 161         conv_handles[CH_UCS2][cur_charset_t] = NULL;
 162         return (charset_t) -1;
 163     }
 164
 165     /* register the new charset_t name */
 166     charset_names[cur_charset_t] = strdup(name);
 167
 168     charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
 169     max_charset_t++;
 170
 171 #ifdef DEBUG
 172     LOG(log_debug9, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
 173 #endif
 174     return (cur_charset_t);
 175 }
 176
 177 /**
 178  * Initialize iconv conversion descriptors.
 179  *
 180  * This is called the first time it is needed, and also called again
 181  * every time the configuration is reloaded, because the charset or
 182  * codepage might have changed.
 183  **/
 184 void init_iconv(void)
 185 {
 186     int c1;
 187
 188     for (c1=0;c1<NUM_CHARSETS;c1++) {
 189         const char *name = charset_name((charset_t)c1);
 190
 191         conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
 192         if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
 193             LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 194                 name,  charset_name(CH_UCS2));
 195             conv_handles[c1][CH_UCS2] = NULL;
 196         }
 197
 198         if (c1 != CH_UCS2) { /* avoid lost memory, make valgrind happy */
 199             conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
 200             if (conv_handles[CH_UCS2][c1] == (atalk_iconv_t)-1) {
 201                 LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
 202                     charset_name(CH_UCS2), name);
 203                 conv_handles[CH_UCS2][c1] = NULL;
 204             }
 205         }
 206
 207         charsets[c1] = get_charset_functions (c1);
 208     }
 209 }
 210
 211 /**
 212  *
 213  **/
 214 static size_t add_null(charset_t to, char *buf, size_t bytesleft, size_t len)
 215 {
 216     /* Terminate the string */
 217     if (to == CH_UCS2 && bytesleft >= 2) {
 218         buf[len]   = 0;
 219         buf[len+1] = 0;
 220
 221     }
 222     else if ( to != CH_UCS2 && bytesleft > 0 )
 223         buf[len]   = 0;
 224     else {
 225         errno = E2BIG;
 226         return (size_t)(-1);
 227     }
 228
 229     return len;
 230 }
 231
 232
 233 /**
 234  * Convert string from one encoding to another, making error checking etc
 235  *
 236  * @param src pointer to source string (multibyte or singlebyte)
 237  * @param srclen length of the source string in bytes
 238  * @param dest pointer to destination string (multibyte or singlebyte)
 239  * @param destlen maximal length allowed for string
 240  * @returns the number of bytes occupied in the destination
 241  **/
 242 static size_t convert_string_internal(charset_t from, charset_t to,
 243                                       void const *src, size_t srclen,
 244                                       void *dest, size_t destlen)
 245 {
 246     size_t i_len, o_len;
 247     size_t retval;
 248     const char* inbuf = (const char*)src;
 249     char* outbuf = (char*)dest;
 250     char* o_save = outbuf;
 251     atalk_iconv_t descriptor;
 252
 253     /* Fixed based on Samba 3.0.6 */
 254     if (srclen == (size_t)-1) {
 255         if (from == CH_UCS2) {
 256             srclen = (strlen_w((const ucs2_t *)src)) * 2;
 257         } else {
 258             srclen = strlen((const char *)src);
 259         }
 260     }
 261
 262
 263     lazy_initialize_conv();
 264
 265     descriptor = conv_handles[from][to];
 266
 267     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 268         return (size_t) -1;
 269     }
 270
 271     i_len=srclen;
 272     o_len=destlen;
 273     retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
 274     if(retval==(size_t)-1) {
 275         const char *reason="unknown error";
 276         switch(errno) {
 277         case EINVAL:
 278             reason="Incomplete multibyte sequence";
 279             break;
 280         case E2BIG:
 281             reason="No more room";
 282             break;
 283         case EILSEQ:
 284             reason="Illegal multibyte sequence";
 285             break;
 286         }
 287         LOG(log_debug, logtype_default,"Conversion error: %s",reason);
 288         return (size_t)-1;
 289     }
 290
 291     /* Terminate the string */
 292     return add_null( to, o_save, o_len, destlen -o_len);
 293 }
 294
 295
 296 size_t convert_string(charset_t from, charset_t to,
 297                       void const *src, size_t srclen,
 298                       void *dest, size_t destlen)
 299 {
 300     size_t i_len, o_len;
 301     ucs2_t *u;
 302     ucs2_t buffer[MAXPATHLEN];
 303     ucs2_t buffer2[MAXPATHLEN];
 304
 305     /* convert from_set to UCS2 */
 306     if ((size_t)-1 == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
 307                                                            (char*) buffer, sizeof(buffer))) ) {
 308         LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 309         return (size_t) -1;
 310     }
 311
 312     /* Do pre/decomposition */
 313     i_len = sizeof(buffer2);
 314     u = buffer2;
 315     if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
 316         if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
 317             return (size_t)-1;
 318     }
 319     else if (!charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED)) {
 320         if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
 321             return (size_t)-1;
 322     }
 323     else {
 324         u = buffer;
 325         i_len = o_len;
 326     }
 327     /* Convert UCS2 to to_set */
 328     if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
 329         LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 330         return (size_t) -1;
 331     }
 332
 333     return o_len;
 334 }
 335
 336
 337
 338 /**
 339  * Convert between character sets, allocating a new buffer for the result.
 340  *
 341  * @param srclen length of source buffer.
 342  * @param dest always set at least to NULL
 343  * @note -1 is not accepted for srclen.
 344  *
 345  * @returns Size in bytes of the converted string; or -1 in case of error.
 346  **/
 347
 348 static size_t convert_string_allocate_internal(charset_t from, charset_t to,
 349                                                void const *src, size_t srclen, char **dest)
 350 {
 351     size_t i_len, o_len, destlen;
 352     size_t retval;
 353     const char *inbuf = (const char *)src;
 354     char *outbuf = NULL, *ob = NULL;
 355     atalk_iconv_t descriptor;
 356
 357     *dest = NULL;
 358
 359     if (src == NULL || srclen == (size_t)-1)
 360         return (size_t)-1;
 361
 362     lazy_initialize_conv();
 363
 364     descriptor = conv_handles[from][to];
 365
 366     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 367         /* conversion not supported, return -1*/
 368         LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!");
 369         return -1;
 370     }
 371
 372     destlen = MAX(srclen, 512);
 373 convert:
 374     destlen = destlen * 2;
 375     outbuf = (char *)realloc(ob, destlen);
 376     if (!outbuf) {
 377         LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!");
 378         SAFE_FREE(ob);
 379         return (size_t)-1;
 380     } else {
 381         ob = outbuf;
 382     }
 383     inbuf = src;   /* this restarts the whole conversion if buffer needed to be increased */
 384     i_len = srclen;
 385     o_len = destlen;
 386     retval = atalk_iconv(descriptor,
 387                          &inbuf, &i_len,
 388                          &outbuf, &o_len);
 389     if(retval == (size_t)-1)        {
 390         const char *reason="unknown error";
 391         switch(errno) {
 392         case EINVAL:
 393             reason="Incomplete multibyte sequence";
 394             break;
 395         case E2BIG:
 396             goto convert;
 397         case EILSEQ:
 398             reason="Illegal multibyte sequence";
 399             break;
 400         }
 401         LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
 402         SAFE_FREE(ob);
 403         return (size_t)-1;
 404     }
 405
 406
 407     destlen = destlen - o_len;
 408
 409     /* Terminate the string */
 410     if (to == CH_UCS2 && o_len >= 2) {
 411         ob[destlen] = 0;
 412         ob[destlen+1] = 0;
 413         *dest = (char *)realloc(ob,destlen+2);
 414     }
 415     else if ( to != CH_UCS2 && o_len > 0 ) {
 416         ob[destlen] = 0;
 417         *dest = (char *)realloc(ob,destlen+1);
 418     }
 419     else {
 420         goto convert; /* realloc */
 421     }
 422
 423     if (destlen && !*dest) {
 424         LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!");
 425         SAFE_FREE(ob);
 426         return (size_t)-1;
 427     }
 428
 429     return destlen;
 430 }
 431
 432
 433 size_t convert_string_allocate(charset_t from, charset_t to,
 434                                void const *src, size_t srclen,
 435                                char ** dest)
 436 {
 437     size_t i_len, o_len;
 438     ucs2_t *u;
 439     ucs2_t buffer[MAXPATHLEN];
 440     ucs2_t buffer2[MAXPATHLEN];
 441
 442     *dest = NULL;
 443
 444     /* convert from_set to UCS2 */
 445     if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
 446                                                            buffer, sizeof(buffer))) ) {
 447         LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
 448         return (size_t) -1;
 449     }
 450
 451     /* Do pre/decomposition */
 452     i_len = sizeof(buffer2);
 453     u = buffer2;
 454     if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
 455         if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
 456             return (size_t)-1;
 457     }
 458     else if ( !charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED) ) {
 459         if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
 460             return (size_t)-1;
 461     }
 462     else {
 463         u = buffer;
 464         i_len = o_len;
 465     }
 466
 467     /* Convert UCS2 to to_set */
 468     if ((size_t)-1 == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
 469         LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
 470
 471     return o_len;
 472
 473 }
 474
 475 size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 476 {
 477     size_t size;
 478     char *buffer;
 479
 480     size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 481                                             (char**) &buffer);
 482     if (size == (size_t)-1) {
 483         SAFE_FREE(buffer);
 484         return size;
 485     }
 486     if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
 487         free(buffer);
 488         return srclen;
 489     }
 490
 491     size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 492     free(buffer);
 493     return size;
 494 }
 495
 496 size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
 497 {
 498     size_t size;
 499     char *buffer;
 500
 501     size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
 502                                             (char **) &buffer);
 503     if (size == (size_t)-1) {
 504         SAFE_FREE(buffer);
 505         return size;
 506     }
 507     if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
 508         free(buffer);
 509         return srclen;
 510     }
 511
 512     size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
 513     free(buffer);
 514     return size;
 515 }
 516
 517
 518 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 519 {
 520     return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
 521 }
 522
 523 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 524 {
 525     return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
 526 }
 527
 528 size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 529 {
 530     return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
 531 }
 532
 533 size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 534 {
 535     return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
 536 }
 537
 538 /**
 539  * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
 540  *
 541  * @param dest always set at least to NULL
 542  *
 543  * @returns The number of bytes occupied by the string in the destination
 544  *         or -1 in case of error.
 545  **/
 546
 547 size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
 548 {
 549     size_t src_len = strlen(src);
 550
 551     *dest = NULL;
 552     return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
 553 }
 554
 555 /** -----------------------------------
 556  * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
 557  *
 558  * @param dest always set at least to NULL
 559  *
 560  * @returns The number of bytes occupied by the string in the destination
 561  **/
 562
 563 size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
 564 {
 565     size_t src_len = strlen(src);
 566
 567     *dest = NULL;
 568     return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
 569 }
 570
 571 /** -----------------------------------
 572  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
 573  *
 574  * @param dest always set at least to NULL
 575  *
 576  * @returns The number of bytes occupied by the string in the destination
 577  **/
 578
 579 size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen)
 580 {
 581     size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
 582     return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
 583 }
 584
 585 /* --------------------------------- */
 586 size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
 587 {
 588     size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
 589     *dest = NULL;
 590     return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
 591 }
 592
 593 /** ---------------------------------
 594  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
 595  *
 596  * @param dest always set at least to NULL
 597  *
 598  * @returns The number of bytes occupied by the string in the destination
 599  **/
 600
 601 size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
 602 {
 603     size_t src_len = strlen(src);
 604     *dest = NULL;
 605     return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
 606 }
 607
 608 size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 609 {
 610     char *buffer;
 611     ucs2_t u[MAXPATHLEN];
 612     size_t len;
 613     size_t ilen;
 614
 615     if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 616         return len;
 617
 618     ilen=sizeof(u);
 619
 620     if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
 621         free (buffer);
 622         return (size_t)(-1);
 623     }
 624
 625     if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
 626         free (buffer);
 627         return (size_t)(-1);
 628     }
 629
 630     free(buffer);
 631     return (len);
 632 }
 633
 634 size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
 635 {
 636     char *buffer;
 637     ucs2_t u[MAXPATHLEN];
 638     size_t len;
 639     size_t ilen;
 640
 641     if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
 642         return len;
 643
 644     ilen=sizeof(u);
 645
 646     if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
 647         free (buffer);
 648         return (size_t)(-1);
 649     }
 650
 651     if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
 652         free (buffer);
 653         return (size_t)(-1);
 654     }
 655
 656     free(buffer);
 657     return (len);
 658 }
 659
 660 size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
 661 {
 662     return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
 663 }
 664
 665 size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
 666 {
 667     return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
 668 }
 669
 670 #if 0
 671 static char  debugbuf[ MAXPATHLEN +1 ];
 672 char * debug_out ( char * seq, size_t len)
 673 {
 674     size_t i = 0;
 675     unsigned char *p;
 676     char *q;
 677
 678     p = (unsigned char*) seq;
 679     q = debugbuf;
 680
 681     for ( i = 0; i<=(len-1); i++)
 682     {
 683         sprintf(q, "%2.2x.", *p);
 684         q += 3;
 685         p++;
 686     }
 687     *q=0;
 688     q = debugbuf;
 689     return q;
 690 }
 691 #endif
 692
 693 /*
 694  * Convert from MB to UCS2 charset
 695  * Flags:
 696  *      CONV_UNESCAPEHEX:    ':XX' will be converted to an UCS2 character
 697  *      CONV_IGNORE:         return the first convertable characters.
 698  *      CONV_FORCE:  force convertion
 699  * FIXME:
 700  *      This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 701  *      The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
 702  *      for e.g. HFS cdroms.
 703  */
 704
 705 static size_t pull_charset_flags (charset_t from_set, charset_t to_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, uint16_t *flags)
 706 {
 707     const uint16_t option = (flags ? *flags : 0);
 708     size_t i_len, o_len;
 709     size_t j = 0;
 710     const char* inbuf = (const char*)src;
 711     char* outbuf = dest;
 712     atalk_iconv_t descriptor;
 713     atalk_iconv_t descriptor_cap;
 714     char escch;                 /* 150210: uninitialized OK, depends on j */
 715
 716     if (srclen == (size_t)-1)
 717         srclen = strlen(src) + 1;
 718
 719     descriptor = conv_handles[from_set][CH_UCS2];
 720     descriptor_cap = conv_handles[cap_set][CH_UCS2];
 721
 722     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 723         errno = EINVAL;
 724         return (size_t)-1;
 725     }
 726
 727     i_len=srclen;
 728     o_len=destlen;
 729
 730     if ((option & CONV_ESCAPEDOTS) && i_len >= 2 && inbuf[0] == '.') {
 731         if (o_len < 6) {
 732             errno = E2BIG;
 733             goto end;
 734         }
 735         ucs2_t ucs2 = ':';
 736         memcpy(outbuf, &ucs2, sizeof(ucs2_t));
 737         ucs2 = '2';
 738         memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
 739         ucs2 = 'e';
 740         memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
 741         outbuf += 6;
 742         o_len -= 6;
 743         inbuf++;
 744         i_len--;
 745         *flags |= CONV_REQESCAPE;
 746     }
 747
 748     while (i_len > 0) {
 749         for (j = 0; j < i_len; ++j)
 750             if (inbuf[j] == ':' || inbuf[j] == '/') {
 751                 escch = inbuf[j];
 752                 break;
 753             }
 754         j = i_len - j;
 755         i_len -= j;
 756
 757         if (i_len > 0 &&
 758             atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
 759             if (errno == EILSEQ || errno == EINVAL) {
 760                 errno = EILSEQ;
 761                 if ((option & CONV_IGNORE)) {
 762                     *flags |= CONV_REQMANGLE;
 763                     return destlen - o_len;
 764                 }
 765                 if ((option & CONV__EILSEQ)) {
 766                     if (o_len < 2) {
 767                         errno = E2BIG;
 768                         goto end;
 769                     }
 770                     *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */
 771                     inbuf++;
 772                     i_len--;
 773                     outbuf += 2;
 774                     o_len -= 2;
 775                     /* FIXME reset stat ? */
 776                     continue;
 777                 }
 778             }
 779             goto end;
 780         }
 781
 782         if (j) {
 783             /* we have a ':' or '/' */
 784             i_len = j, j = 0;
 785
 786             if (escch == ':') {
 787                 if ((option & CONV_UNESCAPEHEX)) {
 788                     /* treat it as a CAP hex encoded char */
 789                     char h[MAXPATHLEN];
 790                     size_t hlen = 0;
 791
 792                     while (i_len >= 3 && inbuf[0] == ':' &&
 793                            isxdigit(inbuf[1]) && isxdigit(inbuf[2])) {
 794                         h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]);
 795                         inbuf += 3;
 796                         i_len -= 3;
 797                     }
 798                     if (hlen) {
 799                         const char *h_buf = h;
 800                         if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) {
 801                             i_len += hlen * 3;
 802                             inbuf -= hlen * 3;
 803                             if (errno == EILSEQ && (option & CONV_IGNORE)) {
 804                                 *flags |= CONV_REQMANGLE;
 805                                 return destlen - o_len;
 806                             }
 807                             goto end;
 808                         }
 809                     } else {
 810                         /* We have an invalid :xx sequence */
 811                         errno = EILSEQ;
 812                         if ((option & CONV_IGNORE)) {
 813                             *flags |= CONV_REQMANGLE;
 814                             return destlen - o_len;
 815                         }
 816                         goto end;
 817                     }
 818                 } else if (option & CONV_ESCAPEHEX) {
 819                     if (o_len < 6) {
 820                         errno = E2BIG;
 821                         goto end;
 822                     }
 823                     ucs2_t ucs2 = ':';
 824                     memcpy(outbuf, &ucs2, sizeof(ucs2_t));
 825                     ucs2 = '3';
 826                     memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
 827                     ucs2 = 'a';
 828                     memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
 829                     outbuf += 6;
 830                     o_len -= 6;
 831                     inbuf++;
 832                     i_len--;
 833                 } else if (to_set == CH_UTF8_MAC || to_set == CH_MAC) {
 834                     /* convert to a '/' */
 835                     ucs2_t slash = 0x002f;
 836                     memcpy(outbuf, &slash, sizeof(ucs2_t));
 837                     outbuf += 2;
 838                     o_len -= 2;
 839                     inbuf++;
 840                     i_len--;
 841                 } else {
 842                     /* keep as ':' */
 843                     ucs2_t ucs2 = 0x003a;
 844                     memcpy(outbuf, &ucs2, sizeof(ucs2_t));
 845                     outbuf += 2;
 846                     o_len -= 2;
 847                     inbuf++;
 848                     i_len--;
 849                 }
 850             } else {
 851                 /* '/' */
 852                 if (option & CONV_ESCAPEHEX) {
 853                     if (o_len < 6) {
 854                         errno = E2BIG;
 855                         goto end;
 856                     }
 857                     ucs2_t ucs2 = ':';
 858                     memcpy(outbuf, &ucs2, sizeof(ucs2_t));
 859                     ucs2 = '2';
 860                     memcpy(outbuf + sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
 861                     ucs2 = 'f';
 862                     memcpy(outbuf + 2 * sizeof(ucs2_t), &ucs2, sizeof(ucs2_t));
 863                     outbuf += 6;
 864                     o_len -= 6;
 865                     inbuf++;
 866                     i_len--;
 867                 } else if ((from_set == CH_UTF8_MAC || from_set == CH_MAC)
 868                            && (to_set != CH_UTF8_MAC  || to_set != CH_MAC)) {
 869                     /* convert to ':' */
 870                     ucs2_t ucs2 = 0x003a;
 871                     memcpy(outbuf, &ucs2, sizeof(ucs2_t));
 872                     outbuf += 2;
 873                     o_len -= 2;
 874                     inbuf++;
 875                     i_len--;
 876                 } else {
 877                     /* keep as '/' */
 878                     ucs2_t ucs2 = 0x002f;
 879                     memcpy(outbuf, &ucs2, sizeof(ucs2_t));
 880                     outbuf += 2;
 881                     o_len -= 2;
 882                     inbuf++;
 883                     i_len--;
 884                 }
 885             }
 886         }
 887     }
 888 end:
 889     return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
 890 }
 891
 892 /*
 893  * Convert from UCS2 to MB charset
 894  * Flags:
 895  *      CONV_ESCAPEDOTS: escape leading dots
 896  *      CONV_ESCAPEHEX:  unconvertable characters and '/' will be escaped to :XX
 897  *      CONV_IGNORE:     return the first convertable characters.
 898  *      CONV__EILSEQ:    unconvertable characters will be replaced with '_'
 899  *      CONV_FORCE:  force convertion
 900  * FIXME:
 901  *      CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
 902  *      This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
 903  *      The escape scheme is not compatible to the old cap style escape. This is bad, we need it
 904  *      for e.g. HFS cdroms.
 905  */
 906
 907
 908 static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, uint16_t *flags)
 909 {
 910     const uint16_t option = (flags ? *flags : 0);
 911     size_t i_len, o_len, i;
 912     size_t j = 0;
 913     const char* inbuf = (const char*)src;
 914     char* outbuf = (char*)dest;
 915     atalk_iconv_t descriptor;
 916     atalk_iconv_t descriptor_cap;
 917
 918     descriptor = conv_handles[CH_UCS2][to_set];
 919     descriptor_cap = conv_handles[CH_UCS2][cap_set];
 920
 921     if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
 922         errno = EINVAL;
 923         return (size_t) -1;
 924     }
 925
 926     i_len=srclen;
 927     o_len=destlen;
 928
 929     while (i_len >= 2) {
 930         while (i_len > 0 &&
 931                atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
 932             if (errno == EILSEQ) {
 933                 if ((option & CONV_IGNORE)) {
 934                     *flags |= CONV_REQMANGLE;
 935                     return destlen - o_len;
 936                 }
 937                 if ((option & CONV_ESCAPEHEX)) {
 938                     const size_t bufsiz = o_len / 3 + 1;
 939                     char *buf = malloc(bufsiz);
 940                     size_t buflen;
 941
 942                     if (!buf)
 943                         goto end;
 944                     i = i_len;
 945                     for (buflen = 1; buflen <= bufsiz; ++buflen) {
 946                         char *b = buf;
 947                         size_t o = buflen;
 948                         if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) {
 949                             buflen -= o;
 950                             break;
 951                         } else if (errno != E2BIG) {
 952                             SAFE_FREE(buf);
 953                             goto end;
 954                         } else if (o < buflen) {
 955                             buflen -= o;
 956                             break;
 957                         }
 958                     }
 959                     if (o_len < buflen * 3) {
 960                         SAFE_FREE(buf);
 961                         errno = E2BIG;
 962                         goto end;
 963                     }
 964                     o_len -= buflen * 3;
 965                     i_len = i;
 966                     for (i = 0; i < buflen; ++i) {
 967                         *outbuf++ = ':';
 968                         *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f];
 969                         *outbuf++ = hexdig[buf[i] & 0x0f];
 970                     }
 971                     SAFE_FREE(buf);
 972                     *flags |= CONV_REQESCAPE;
 973                     continue;
 974                 }
 975             }
 976             goto end;
 977         }
 978     } /* while (i_len >= 2) */
 979
 980     if (i_len > 0) errno = EINVAL;
 981 end:
 982     return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
 983 }
 984
 985 /*
 986  * FIXME the size is a mess we really need a malloc/free logic
 987  *`dest size must be dest_len +2
 988  */
 989 size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, const char *src, size_t src_len, char *dest, size_t dest_len, uint16_t *flags)
 990 {
 991     size_t i_len, o_len;
 992     ucs2_t *u;
 993     ucs2_t buffer[MAXPATHLEN +2];
 994     ucs2_t buffer2[MAXPATHLEN +2];
 995
 996     lazy_initialize_conv();
 997
 998     /* convert from_set to UCS2 */
 999     if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, to_set, cap_charset, src, src_len,
1000                                                       (char *) buffer, sizeof(buffer) -2, flags)) ) {
1001         LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
1002         return (size_t) -1;
1003     }
1004
1005     if ( o_len == 0)
1006         return o_len;
1007
1008     /* Do pre/decomposition */
1009     i_len = sizeof(buffer2) -2;
1010     u = buffer2;
1011     if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && (charsets[to_set]->flags & CHARSET_DECOMPOSED)) ) {
1012         if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
1013             return (size_t)(-1);
1014     }
1015     else if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) || !charsets[from_set] || (charsets[from_set]->flags & CHARSET_DECOMPOSED)) {
1016         if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
1017             return (size_t)(-1);
1018     }
1019     else {
1020         u = buffer;
1021         i_len = o_len;
1022     }
1023     /* null terminate */
1024     u[i_len] = 0;
1025     u[i_len +1] = 0;
1026
1027     /* Do case conversions */
1028     if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
1029         strupper_w(u);
1030     }
1031     else if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
1032         strlower_w(u);
1033     }
1034
1035     /* Convert UCS2 to to_set */
1036     if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
1037         LOG(log_error, logtype_default,
1038             "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
1039         return (size_t) -1;
1040     }
1041     /* null terminate */
1042     dest[o_len] = 0;
1043     dest[o_len +1] = 0;
1044
1045     return o_len;
1046 }