]> arthur.barton.de Git - netatalk.git/blobdiff - libatalk/unicode/iconv.c
libiconv warning fix
[netatalk.git] / libatalk / unicode / iconv.c
index e415be8dfe80593e2c83a9bbec6704a0a5277198..48f903bc7e3482225ca3cfeaf06c2b4e8de2e4aa 100644 (file)
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
+#include <ctype.h>
+#include <errno.h>
 #include <sys/param.h>
 #include <sys/stat.h>
-#include <atalk/logger.h>
-#include <errno.h>
-
-#include <netatalk/endian.h>
-#include <atalk/unicode.h>
-
 #ifdef HAVE_USABLE_ICONV
 #include <iconv.h>
 #endif
 
+#include <netatalk/endian.h>
+#include <atalk/unicode.h>
+#include <atalk/logger.h>
+#include "byteorder.h"
 
-#include "mac_roman.h"
-#include "mac_hebrew.h"
 
 /**
  * @file
  *
  * @sa Samba Developers Guide
  **/
+#define CHARSET_WIDECHAR    32
+
+#ifdef WITH_LIBICONV
+#define UCS2ICONV "UCS-2-INTERNAL"
+#else
+#if BYTE_ORDER==LITTLE_ENDIAN
+#define UCS2ICONV "UCS-2LE"
+#else
+#define UCS2ICONV "UCS-2BE"
+#endif
+#endif
+
 
 static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
 static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
-static size_t  utf8_pull(void *,char **, size_t *, char **, size_t *);
-static size_t  utf8_push(void *,char **, size_t *, char **, size_t *);
 static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
 
-static size_t   mac_pull(void *,char **, size_t *, char **, size_t *);
-static size_t   mac_push(void *,char **, size_t *, char **, size_t *);
+struct charset_functions charset_ucs2 =
+{
+        "UCS-2",
+        0,
+        iconv_copy,
+        iconv_copy,
+        CHARSET_WIDECHAR | CHARSET_PRECOMPOSED
+};
+
+struct charset_functions charset_ascii =
+{
+        "ASCII",
+        0,
+        ascii_pull,
+        ascii_push,
+        CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED 
+};
+
+struct charset_functions charset_iconv =
+{
+       NULL,
+       0,
+       NULL,
+       NULL,
+       CHARSET_ICONV | CHARSET_PRECOMPOSED
+};
+
+
+extern  struct charset_functions charset_mac_roman;
+extern  struct charset_functions charset_mac_hebrew;
+extern  struct charset_functions charset_mac_centraleurope;
+extern  struct charset_functions charset_mac_cyrillic;
+extern  struct charset_functions charset_mac_turkish;
+extern  struct charset_functions charset_utf8;
+extern  struct charset_functions charset_utf8_mac;
 
-static size_t   mac_hebrew_pull(void *,char **, size_t *, char **, size_t *);
-static size_t   mac_hebrew_push(void *,char **, size_t *, char **, size_t *);
 
 static struct charset_functions builtin_functions[] = {
-       {"UCS-2LE",   iconv_copy, iconv_copy},
-       {"UTF8",      utf8_pull,  utf8_push},
-       {"UTF-8",     utf8_pull,  utf8_push},
-       {"ASCII",     ascii_pull, ascii_push},
-       {"MAC",       mac_pull,  mac_push},
-       {"MAC-HEBR",  mac_hebrew_pull,  mac_hebrew_push},
-       {NULL, NULL, NULL}
+       {"UCS-2",   0, iconv_copy, iconv_copy, CHARSET_WIDECHAR},
+       {"ASCII",     0, ascii_pull, ascii_push, CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED},
+       {NULL, 0, NULL, NULL, 0}
 };
 
+
 #define DLIST_ADD(list, p) \
 { \
         if (!(list)) { \
@@ -106,11 +142,9 @@ static struct charset_functions builtin_functions[] = {
         }\
 }
 
-
-
 static struct charset_functions *charsets = NULL;
 
-static struct charset_functions *find_charset_functions(const char *name) 
+struct charset_functions *find_charset_functions(const char *name) 
 {
        struct charset_functions *c = charsets;
 
@@ -130,7 +164,6 @@ int atalk_register_charset(struct charset_functions *funcs)
                return -1;
        }
 
-       LOG(log_debug, logtype_default, "Attempting to register new charset %s", funcs->name);
        /* Check whether we already have this charset... */
        if (find_charset_functions(funcs->name)) {
                LOG (log_debug, logtype_default, "Duplicate charset %s, not registering", funcs->name);
@@ -138,7 +171,6 @@ int atalk_register_charset(struct charset_functions *funcs)
        }
 
        funcs->next = funcs->prev = NULL;
-       LOG(log_debug, logtype_default, "Registered charset %s", funcs->name);
        DLIST_ADD(charsets, funcs);
        return 0;
 }
@@ -152,6 +184,15 @@ void lazy_initialize_iconv(void)
                initialized = 1;
                for(i = 0; builtin_functions[i].name; i++) 
                        atalk_register_charset(&builtin_functions[i]);
+
+               /* register additional charsets */
+               atalk_register_charset(&charset_utf8);
+               atalk_register_charset(&charset_utf8_mac);
+               atalk_register_charset(&charset_mac_roman);
+               atalk_register_charset(&charset_mac_hebrew);
+               atalk_register_charset(&charset_mac_turkish);
+               atalk_register_charset(&charset_mac_centraleurope);
+               atalk_register_charset(&charset_mac_cyrillic);
        }
 }
 
@@ -164,7 +205,7 @@ static size_t sys_iconv(void *cd,
 {
 #ifdef HAVE_USABLE_ICONV
        size_t ret = iconv((iconv_t)cd, 
-                          inbuf, inbytesleft, 
+                          (ICONV_CONST char**)inbuf, inbytesleft, 
                           outbuf, outbytesleft);
        if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
        return ret;
@@ -217,51 +258,6 @@ size_t atalk_iconv(atalk_iconv_t cd,
 }
 
 
-size_t atalk_iconv_ignore(atalk_iconv_t cd, 
-                const char **inbuf, size_t *inbytesleft,
-                char **outbuf, size_t *outbytesleft, int *ignore)
-{
-       char cvtbuf[2048];
-       char *bufp = cvtbuf;
-       size_t bufsize;
-       size_t outlen = *outbytesleft;
-       char *o_save;
-       
-       /* we have to do it chunks at a time */
-       while (*inbytesleft > 0) {
-               bufp = cvtbuf;
-               bufsize = sizeof(cvtbuf);
-               
-               if (cd->pull(cd->cd_pull, (char **)inbuf, inbytesleft, &bufp, &bufsize) == (size_t)-1
-                       && errno != E2BIG) {
-                   return -1;
-               }
-
-               bufp = cvtbuf;
-               bufsize = sizeof(cvtbuf) - bufsize;
-
-               o_save = *outbuf;
-convert_push:
-               if (cd->push(cd->cd_push, 
-                            &bufp, &bufsize, 
-                            outbuf, outbytesleft) == (size_t)-1) {
-                   if (errno == EILSEQ) {
-                       o_save[outlen-*outbytesleft] = '_';
-                       (*outbuf) = o_save + outlen-*outbytesleft+1;
-                       (*outbytesleft) -=1;
-                       bufp += 2;
-                       bufsize -= 2;
-                       //outlen=*outbytesleft;
-                       *ignore = 1;
-                       goto convert_push;
-                   }
-                   else
-                       return (size_t)(-1);
-               }
-       }
-       return 0;
-}
-
 /*
   simple iconv_open() wrapper
  */
@@ -301,13 +297,13 @@ atalk_iconv_t atalk_iconv_open(const char *tocode, const char *fromcode)
        /* check if we can use iconv for this conversion */
 #ifdef HAVE_USABLE_ICONV
        if (!ret->pull) {
-               ret->cd_pull = iconv_open("UCS-2LE", fromcode);
+               ret->cd_pull = iconv_open(UCS2ICONV, fromcode);
                if (ret->cd_pull != (iconv_t)-1)
                        ret->pull = sys_iconv;
        }
 
        if (!ret->push) {
-               ret->cd_push = iconv_open(tocode, "UCS-2LE");
+               ret->cd_push = iconv_open(tocode, UCS2ICONV);
                if (ret->cd_push != (iconv_t)-1)
                        ret->push = sys_iconv;
        }
@@ -322,13 +318,13 @@ atalk_iconv_t atalk_iconv_open(const char *tocode, const char *fromcode)
        }
 
        /* check for conversion to/from ucs2 */
-       if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
+       if (strcasecmp(fromcode, "UCS-2") == 0 && to) {
                ret->direct = to->push;
                ret->push = ret->pull = NULL;
                return ret;
        }
 
-       if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
+       if (strcasecmp(tocode, "UCS-2") == 0 && from) {
                ret->direct = from->pull;
                ret->push = ret->pull = NULL;
                return ret;
@@ -336,13 +332,13 @@ atalk_iconv_t atalk_iconv_open(const char *tocode, const char *fromcode)
 
        /* Check if we can do the conversion direct */
 #ifdef HAVE_USABLE_ICONV
-       if (strcasecmp(fromcode, "UCS-2LE") == 0) {
+       if (strcasecmp(fromcode, "UCS-2") == 0) {
                ret->direct = sys_iconv;
                ret->cd_direct = ret->cd_push;
                ret->cd_push = NULL;
                return ret;
        }
-       if (strcasecmp(tocode, "UCS-2LE") == 0) {
+       if (strcasecmp(tocode, "UCS-2") == 0) {
                ret->direct = sys_iconv;
                ret->cd_direct = ret->cd_pull;
                ret->cd_pull = NULL;
@@ -380,9 +376,17 @@ int atalk_iconv_close (atalk_iconv_t cd)
 static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
+       ucs2_t curchar;
+
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-               (*outbuf)[0] = (*inbuf)[0];
-               (*outbuf)[1] = 0;
+               if ((unsigned char)(*inbuf)[0] < 0x80) {
+                       curchar = (ucs2_t) (*inbuf)[0];
+                       SSVAL((*outbuf),0,curchar);
+               }
+               else {
+                       errno = EILSEQ;
+                       return -1;
+               }
                (*inbytesleft)  -= 1;
                (*outbytesleft) -= 2;
                (*inbuf)  += 1;
@@ -401,10 +405,17 @@ static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        int ir_count=0;
+       ucs2_t curchar;
 
        while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-               (*outbuf)[0] = (*inbuf)[0] & 0x7F;
-               if ((*inbuf)[1]) ir_count++;
+               curchar = SVAL((*inbuf), 0);
+               if (curchar < 0x0080) {
+                       (*outbuf)[0] = curchar;
+               }
+               else {
+                       errno = EILSEQ;
+                       return -1;
+               }       
                (*inbytesleft)  -= 2;
                (*outbytesleft) -= 1;
                (*inbuf)  += 2;
@@ -448,391 +459,3 @@ static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft,
 }
 
 /* ------------------------ */
-static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
-                        char **outbuf, size_t *outbytesleft)
-{
-       while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-               unsigned char *c = (unsigned char *)*inbuf;
-               unsigned char *uc = (unsigned char *)*outbuf;
-               int len = 1;
-
-               if ((c[0] & 0x80) == 0) {
-                       uc[0] = c[0];
-                       uc[1] = 0;
-               } else if ((c[0] & 0xf0) == 0xe0) {
-                       if (*inbytesleft < 3) {
-                               LOG(log_debug, logtype_default, "short utf8 char\n");
-                               goto badseq;
-                       }
-                       uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
-                       uc[0] = (c[1]<<6) | (c[2]&0x3f);
-                       len = 3;
-               } else if ((c[0] & 0xe0) == 0xc0) {
-                       if (*inbytesleft < 2) {
-                               LOG(log_debug, logtype_default, "short utf8 char\n");
-                               goto badseq;
-                       }
-                       uc[1] = (c[0]>>2) & 0x7;
-                       uc[0] = (c[0]<<6) | (c[1]&0x3f);
-                       len = 2;
-               }
-
-               (*inbuf)  += len;
-               (*inbytesleft)  -= len;
-               (*outbytesleft) -= 2;
-               (*outbuf) += 2;
-       }
-
-       if (*inbytesleft > 0) {
-               errno = E2BIG;
-               return -1;
-       }
-       
-       return 0;
-
-badseq:
-       errno = EINVAL;
-       return -1;
-}
-
-/* ------------------------ */
-static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
-                        char **outbuf, size_t *outbytesleft)
-{
-       while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-               unsigned char *c = (unsigned char *)*outbuf;
-               unsigned char *uc = (unsigned char *)*inbuf;
-               int len=1;
-
-               if (uc[1] & 0xf8) {
-                       if (*outbytesleft < 3) {
-                               LOG(log_debug, logtype_default, "short utf8 write\n");
-                               goto toobig;
-                       }
-                       c[0] = 0xe0 | (uc[1]>>4);
-                       c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
-                       c[2] = 0x80 | (uc[0]&0x3f);
-                       len = 3;
-               } else if (uc[1] | (uc[0] & 0x80)) {
-                       if (*outbytesleft < 2) {
-                               LOG(log_debug, logtype_default, "short utf8 write\n");
-                               goto toobig;
-                       }
-                       c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
-                       c[1] = 0x80 | (uc[0]&0x3f);
-                       len = 2;
-               } else {
-                       c[0] = uc[0];
-               }
-
-
-               (*inbytesleft)  -= 2;
-               (*outbytesleft) -= len;
-               (*inbuf)  += 2;
-               (*outbuf) += len;
-       }
-
-       if (*inbytesleft == 1) {
-               errno = EINVAL;
-               return -1;
-       }
-
-       if (*inbytesleft > 1) {
-               errno = E2BIG;
-               return -1;
-       }
-       
-       return 0;
-
-toobig:
-       errno = E2BIG;
-       return -1;
-}
-
-/* ------------------------ */
-static int
-char_ucs2_to_mac_roman ( unsigned char *r, ucs2_t wc)
-{
-       unsigned char c = 0;
-       if (wc < 0x0080) {
-               *r = wc;
-               return 1;
-       }
-       else if (wc >= 0x00a0 && wc < 0x0100)
-               c = mac_roman_page00[wc-0x00a0];
-       else if (wc >= 0x0130 && wc < 0x0198)
-               c = mac_roman_page01[wc-0x0130];
-       else if (wc >= 0x02c0 && wc < 0x02e0)
-               c = mac_roman_page02[wc-0x02c0];
-       else if (wc == 0x03c0)
-               c = 0xb9;
-       else if (wc >= 0x2010 && wc < 0x2048)
-               c = mac_roman_page20[wc-0x2010];
-       else if (wc >= 0x2120 && wc < 0x2128)
-               c = mac_roman_page21[wc-0x2120];
-       else if (wc >= 0x2200 && wc < 0x2268)
-               c = mac_roman_page22[wc-0x2200];
-       else if (wc == 0x25ca)
-               c = 0xd7;
-       else if (wc >= 0xfb00 && wc < 0xfb08)
-               c = mac_roman_pagefb[wc-0xfb00];
-       else if (wc == 0xf8ff)
-               c = 0xf0;
-
-       if (c != 0) {
-               *r = c;
-               return 1;
-       }
-       return 0;
-}
-
-static size_t mac_push( void *cd, char **inbuf, size_t *inbytesleft,
-                         char **outbuf, size_t *outbytesleft)
-{
-        int len = 0;
-       unsigned char *tmpptr = (unsigned char *) *outbuf;
-
-        while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-
-               ucs2_t *inptr = (ucs2_t *) *inbuf;
-               if (char_ucs2_to_mac_roman ( tmpptr, *inptr)) {
-                       (*inbuf) += 2;
-                       tmpptr++;
-                       len++;
-                       (*inbytesleft)  -= 2;
-                       (*outbytesleft) -= 1;
-               }
-               else    
-               {
-                       errno = EILSEQ;
-                       return (size_t) -1;     
-               }
-        }
-
-        if (*inbytesleft > 0) {
-                errno = E2BIG;
-                return -1;
-        }
-
-        return len;
-}
-
-/* ------------------------ */
-static int
-char_mac_roman_to_ucs2 (ucs2_t *pwc, const unsigned char *s)
-{
-       unsigned char c = *s;
-       if (c < 0x80) {
-               *pwc = (ucs2_t) c;
-               return 1;
-       }
-       else {
-               unsigned short wc = mac_roman_2uni[c-0x80];
-               *pwc = (ucs2_t) wc;
-               return 1;
-       }
-       return 0;
-}
-
-static size_t mac_pull ( void *cd, char **inbuf, size_t *inbytesleft,
-                         char **outbuf, size_t *outbytesleft)
-{
-       ucs2_t          *temp;
-       unsigned char   *inptr;
-        size_t  len = 0;
-
-        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-
-               inptr = (unsigned char *) *inbuf;
-               temp  = (ucs2_t*) *outbuf;      
-               if (char_mac_roman_to_ucs2 ( temp, inptr)) {
-                       (*inbuf)        +=1;
-                       (*outbuf)       +=2;
-                       (*inbytesleft) -=1;
-                       (*outbytesleft)-=2;
-                       len++;
-                       
-               }
-               else    
-               {
-                       errno = EILSEQ;
-                       return (size_t) -1;     
-               }
-        }
-
-        if (*inbytesleft > 0) {
-                errno = E2BIG;
-                return (size_t) -1;
-        }
-
-        return len;
-
-}
-
-/* ------------------------ 
- * from unicode to mac hebrew code page
-*/
-static int
-char_ucs2_to_mac_hebrew ( unsigned char *r, ucs2_t wc)
-{
-    unsigned char c = 0;
-    if (wc < 0x0080) {
-       *r = wc;
-       return 1;
-    }
-    else if (wc >= 0x00a0 && wc < 0x0100)
-        c = mac_hebrew_page00[wc-0x00a0];
-    else if (wc >= 0x05b0 && wc < 0x05f0)
-        c = mac_hebrew_page05[wc-0x05b0];
-    else if (wc >= 0x2010 && wc < 0x2028)
-        c = mac_hebrew_page20[wc-0x2010];
-    else if (wc == 0x20aa)
-        c = 0xa6;
-    else if (wc >= 0xfb18 && wc < 0xfb50)
-        c = mac_hebrew_pagefb[wc-0xfb18];
-    if (c != 0) {
-       *r = c;
-       return 1;
-    }
-    return 0;
-}
-
-static size_t mac_hebrew_push( void *cd, char **inbuf, size_t *inbytesleft,
-                         char **outbuf, size_t *outbytesleft)
-{
-    unsigned char c = 0;
-    int len = 0;
-    unsigned char *tmpptr = (unsigned char *) *outbuf;
-
-    while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-        ucs2_t *inptr = (ucs2_t *) *inbuf;
-       if (*inptr == 0x05b8) {
-           (*inbuf) += 2;
-           (*inbytesleft)  -= 2;
-           if (*inbytesleft >= 2 && *((ucs2_t *)*inbuf) == 0xf87f ) {
-               (*inbuf) += 2;
-               (*inbytesleft)  -= 2;
-               c = 0xde;
-           }
-           else {
-               c = 0xcb;
-           }
-           *tmpptr = c; 
-       }
-       else if (*inptr == 0x05f2 && *inbytesleft >= 4 && *(inptr +1) == 0x05b7) {
-           (*inbuf) += 4;
-           (*inbytesleft)  -= 4;
-           *tmpptr = 0x81;
-       }
-       else if (*inptr == 0xf86a && *inbytesleft >= 6 && *(inptr +1) == 0x05dc && *(inptr +2) == 0x05b9) {
-           (*inbuf) += 6;
-           (*inbytesleft)  -= 6;
-           *tmpptr = 0xc0;
-       }
-       else if (char_ucs2_to_mac_hebrew ( tmpptr, *inptr)) {
-           (*inbuf) += 2;
-           (*inbytesleft)  -= 2;
-       }
-       else {
-           errno = EILSEQ;
-           return (size_t) -1;
-       }
-       (*outbytesleft) -= 1;
-       tmpptr++;
-       len++;
-    }
-
-    if (*inbytesleft > 0) {
-        errno = E2BIG;
-        return -1;
-    }
-
-    return len;
-}
-
-/* ------------------------ */
-static int
-char_mac_hebrew_to_ucs2 (ucs2_t *pwc, const unsigned char *s)
-{
-       unsigned char c = *s;
-       if (c < 0x80) {
-               *pwc = (ucs2_t) c;
-               return 1;
-       }
-       else {
-               unsigned short wc = mac_hebrew_2uni[c-0x80];
-               if (wc != 0xfffd) {
-                   *pwc = (ucs2_t) wc;
-                   return 1;
-               }
-       }
-       return 0;
-}
-
-static size_t mac_hebrew_pull ( void *cd, char **inbuf, size_t *inbytesleft,
-                         char **outbuf, size_t *outbytesleft)
-{
-    ucs2_t         *temp;
-    unsigned char  *inptr;
-    size_t         len = 0;
-
-    while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-        inptr = (unsigned char *) *inbuf;
-       temp  = (ucs2_t*) *outbuf;      
-       if (char_mac_hebrew_to_ucs2 ( temp, inptr)) {
-           if (*temp == 1) {       /* 0x81 --> 0x05f2+0x05b7 */
-               if (*outbytesleft < 4) {
-                   errno = EILSEQ;
-                   return (size_t) -1; 
-               }
-               *temp = 0x05f2;
-               *(temp +1) = 0x05b7;
-               (*outbuf)      +=4;
-               (*outbytesleft)-=4;
-               len += 2;
-           }
-           else if (*temp == 2) { /* 0xc0 -> 0xf86a 0x05dc 0x05b9*/
-               if (*outbytesleft < 6) {
-                   errno = EILSEQ;
-                   return (size_t) -1; 
-               }
-               *temp = 0xf86a;
-               *(temp +1) = 0x05dc;
-               *(temp +2) = 0x05b9;
-               (*outbuf)      +=6;
-               (*outbytesleft)-=6;
-               len += 3;
-           }
-           else if (*temp == 3) { /* 0xde --> 0x05b8 0xf87f */
-               if (*outbytesleft < 4) {
-                   errno = EILSEQ;
-                   return (size_t) -1; 
-               }
-               *temp = 0x05b8;
-               *(temp +1) = 0xf87f;
-               (*outbuf)      +=4;
-               (*outbytesleft)-=4;
-               len += 2;
-           }
-           else {
-               (*outbuf)      +=2;
-               (*outbytesleft)-=2;
-               len++;
-           }
-           (*inbuf)        +=1;
-           (*inbytesleft) -=1;
-       }
-       else    
-       {
-           errno = EILSEQ;
-           return (size_t) -1; 
-       }
-    }
-
-    if (*inbytesleft > 0) {
-        errno = E2BIG;
-        return (size_t) -1;
-    }
-    return len;
-}
-