2 Unix SMB/CIFS implementation.
3 minimal iconv implementation
4 Copyright (C) Andrew Tridgell 2001
5 Copyright (C) Jelmer Vernooij 2002,2003
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 From samba 3.0 beta and GNU libiconv-1.8
22 It's bad but most of the time we can't use libc iconv service:
23 - it doesn't round trip for most encoding
24 - it doesn't know about Apple extension
29 #endif /* HAVE_CONFIG_H */
33 #include <netatalk/endian.h>
34 #include <atalk/unicode.h>
35 #include <atalk/logger.h>
36 #include <atalk/unicode.h>
37 #include "byteorder.h"
40 static size_t utf8_pull(void *,char **, size_t *, char **, size_t *);
41 static size_t utf8_push(void *,char **, size_t *, char **, size_t *);
43 struct charset_functions charset_utf8 =
49 CHARSET_VOLUME | CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED,
53 struct charset_functions charset_utf8_mac =
59 CHARSET_VOLUME | CHARSET_CLIENT | CHARSET_MULTIBYTE | CHARSET_DECOMPOSED,
63 /* ------------------------ */
64 static size_t utf8_pull(void *cd _U_, char **inbuf, size_t *inbytesleft,
65 char **outbuf, size_t *outbytesleft)
70 while (*inbytesleft >= 1 && *outbytesleft >= 2) {
71 unsigned char *c = (unsigned char *)*inbuf;
74 if ((c[0] & 0x80) == 0) {
76 } else if ((c[0] & 0xf0) == 0xe0) {
77 if (*inbytesleft < 3) {
78 LOG(log_debug, logtype_default, "short utf8 char");
81 uc = ((ucs2_t) (c[0] & 0x0f) << 12) | ((ucs2_t) (c[1] ^ 0x80) << 6) | (ucs2_t) (c[2] ^ 0x80);
83 } else if ((c[0] & 0xe0) == 0xc0) {
84 if (*inbytesleft < 2) {
85 LOG(log_debug, logtype_default, "short utf8 char");
88 uc = ((ucs2_t) (c[0] & 0x1f) << 6) | (ucs2_t) (c[1] ^ 0x80);
98 (*inbytesleft) -= len;
103 if (*inbytesleft > 0) {
115 /* ------------------------ */
116 static size_t utf8_push(void *cd _U_, char **inbuf, size_t *inbytesleft,
117 char **outbuf, size_t *outbytesleft)
122 while (*inbytesleft >= 2 && *outbytesleft >= 1) {
123 unsigned char *c = (unsigned char *)*outbuf;
124 uc = SVAL((*inbuf),0);
128 if ( uc >= 0x202a && uc <= 0x202e ) {
129 /* ignore bidi hint characters */
133 if (*outbytesleft < 3) {
134 LOG(log_debug, logtype_default, "short utf8 write");
137 c[2] = 0x80 | (uc & 0x3f);
140 c[1] = 0x80 | (uc&0x3f);
146 } else if (uc >= 0x80) {
147 if (*outbytesleft < 2) {
148 LOG(log_debug, logtype_default, "short utf8 write");
151 c[1] = 0x80 | (uc&0x3f);
161 (*outbytesleft) -= len;
166 if (*inbytesleft == 1) {
171 if (*inbytesleft > 1) {