]> arthur.barton.de Git - netatalk.git/blob - libatalk/unicode/utf8.c
Fix compilation with Solaris 10 on x86, from HEAD
[netatalk.git] / libatalk / unicode / utf8.c
1 /* 
2    Unix SMB/CIFS implementation.
3    minimal iconv implementation
4    Copyright (C) Andrew Tridgell 2001
5    Copyright (C) Jelmer Vernooij 2002,2003
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20    
21    From samba 3.0 beta and GNU libiconv-1.8
22    It's bad but most of the time we can't use libc iconv service:
23    - it doesn't round trip for most encoding
24    - it doesn't know about Apple extension
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif /* HAVE_CONFIG_H */
30 #include <stdlib.h>
31 #include <errno.h>
32
33 #include <netatalk/endian.h>
34 #include <atalk/unicode.h>
35 #include <atalk/logger.h>
36 #include <atalk/unicode.h>
37 #include "byteorder.h"
38
39
40 static size_t   utf8_pull(void *,char **, size_t *, char **, size_t *);
41 static size_t   utf8_push(void *,char **, size_t *, char **, size_t *);
42
43 struct charset_functions charset_utf8 =
44 {
45         "UTF8",
46         0x08000103,
47         utf8_pull,
48         utf8_push,
49         CHARSET_VOLUME | CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED,
50         NULL, NULL
51 };
52
53 struct charset_functions charset_utf8_mac =
54 {
55         "UTF8-MAC",
56         0x08000103,
57         utf8_pull,
58         utf8_push,
59         CHARSET_VOLUME | CHARSET_CLIENT | CHARSET_MULTIBYTE | CHARSET_DECOMPOSED,
60         NULL, NULL
61 };
62
63 /* ------------------------ */
64 static size_t utf8_pull(void *cd _U_, char **inbuf, size_t *inbytesleft,
65                          char **outbuf, size_t *outbytesleft)
66 {
67         ucs2_t uc = 0;
68         int len;
69
70         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
71                 unsigned char *c = (unsigned char *)*inbuf;
72                 len = 1;
73
74                 if ((c[0] & 0x80) == 0) {
75                         uc = c[0];
76                 } else if ((c[0] & 0xf0) == 0xe0) {
77                         if (*inbytesleft < 3) {
78                                 LOG(log_debug, logtype_default, "short utf8 char");
79                                 goto badseq;
80                         }
81                         uc = ((ucs2_t) (c[0] & 0x0f) << 12) | ((ucs2_t) (c[1] ^ 0x80) << 6) | (ucs2_t) (c[2] ^ 0x80);
82                         len = 3;
83                 } else if ((c[0] & 0xe0) == 0xc0) {
84                         if (*inbytesleft < 2) {
85                                 LOG(log_debug, logtype_default, "short utf8 char");
86                                 goto badseq;
87                         }
88                         uc = ((ucs2_t) (c[0] & 0x1f) << 6) | (ucs2_t) (c[1] ^ 0x80);
89                         len = 2;
90                 }
91                 else {
92                         errno = EINVAL;
93                         return -1;
94                 }
95
96                 SSVAL(*outbuf,0,uc);
97                 (*inbuf)  += len;
98                 (*inbytesleft)  -= len;
99                 (*outbytesleft) -= 2;
100                 (*outbuf) += 2;
101         }
102
103         if (*inbytesleft > 0) {
104                 errno = E2BIG;
105                 return -1;
106         }
107         
108         return 0;
109
110 badseq:
111         errno = EINVAL;
112         return -1;
113 }
114
115 /* ------------------------ */
116 static size_t utf8_push(void *cd _U_, char **inbuf, size_t *inbytesleft,
117                          char **outbuf, size_t *outbytesleft)
118 {
119         ucs2_t uc=0;
120         int len;
121
122         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
123                 unsigned char *c = (unsigned char *)*outbuf;
124                 uc = SVAL((*inbuf),0);
125                 len=1;
126
127                 if ( uc >= 0x800 ) {
128                         if ( uc >= 0x202a && uc <= 0x202e ) {
129                                 /* ignore bidi hint characters */
130                            len = 0;
131                         }
132                         else {
133                                 if (*outbytesleft < 3) {
134                                         LOG(log_debug, logtype_default, "short utf8 write");
135                                         goto toobig;
136                                 }
137                                 c[2] = 0x80 | (uc & 0x3f);
138                                 uc = uc >> 6;
139                                 uc |= 0x800;
140                                 c[1] = 0x80 | (uc&0x3f);
141                                 uc = uc >> 6;
142                                 uc |= 0xc0;
143                                 c[0] = uc;
144                                 len = 3;
145                         }
146                 } else if (uc >= 0x80) {
147                         if (*outbytesleft < 2) {
148                                 LOG(log_debug, logtype_default, "short utf8 write");
149                                 goto toobig;
150                         }
151                         c[1] = 0x80 | (uc&0x3f);
152                         uc = uc >> 6;
153                         uc |= 0xc0;
154                         c[0] = uc;
155                         len = 2;
156                 } else {
157                         c[0] = uc;
158                 }
159
160                 (*inbytesleft)  -= 2;
161                 (*outbytesleft) -= len;
162                 (*inbuf)  += 2;
163                 (*outbuf) += len;
164         }
165
166         if (*inbytesleft == 1) {
167                 errno = EINVAL;
168                 return -1;
169         }
170
171         if (*inbytesleft > 1) {
172                 errno = E2BIG;
173                 return -1;
174         }
175         
176         return 0;
177
178 toobig:
179         errno = E2BIG;
180         return -1;
181 }