]> arthur.barton.de Git - netatalk.git/blob - libatalk/unicode/utf8.c
68e967470585fd056f2227fd7f69ef2697821a48
[netatalk.git] / libatalk / unicode / utf8.c
1 /* 
2    Unix SMB/CIFS implementation.
3    minimal iconv implementation
4    Copyright (C) Andrew Tridgell 2001
5    Copyright (C) Jelmer Vernooij 2002,2003
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20    
21    From samba 3.0 beta and GNU libiconv-1.8
22    It's bad but most of the time we can't use libc iconv service:
23    - it doesn't round trip for most encoding
24    - it doesn't know about Apple extension
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif /* HAVE_CONFIG_H */
30 #include <stdlib.h>
31 #include <errno.h>
32
33 #include <netatalk/endian.h>
34 #include <atalk/unicode.h>
35 #include <atalk/logger.h>
36 #include <atalk/unicode.h>
37 #include "byteorder.h"
38
39
40 static size_t   utf8_pull(void *,char **, size_t *, char **, size_t *);
41 static size_t   utf8_push(void *,char **, size_t *, char **, size_t *);
42
43 struct charset_functions charset_utf8 =
44 {
45         "UTF8",
46         0x08000103,
47         utf8_pull,
48         utf8_push,
49         CHARSET_VOLUME | CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED
50 };
51
52 struct charset_functions charset_utf8_mac =
53 {
54         "UTF8-MAC",
55         0x08000103,
56         utf8_pull,
57         utf8_push,
58         CHARSET_VOLUME | CHARSET_CLIENT | CHARSET_MULTIBYTE | CHARSET_DECOMPOSED
59 };
60
61 /* ------------------------ */
62 static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
63                          char **outbuf, size_t *outbytesleft)
64 {
65         ucs2_t uc = 0;
66         int len;
67
68         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
69                 unsigned char *c = (unsigned char *)*inbuf;
70                 len = 1;
71
72                 if ((c[0] & 0x80) == 0) {
73                         uc = c[0];
74                 } else if ((c[0] & 0xf0) == 0xe0) {
75                         if (*inbytesleft < 3) {
76                                 LOG(log_debug, logtype_default, "short utf8 char");
77                                 goto badseq;
78                         }
79                         uc = ((ucs2_t) (c[0] & 0x0f) << 12) | ((ucs2_t) (c[1] ^ 0x80) << 6) | (ucs2_t) (c[2] ^ 0x80);
80                         len = 3;
81                 } else if ((c[0] & 0xe0) == 0xc0) {
82                         if (*inbytesleft < 2) {
83                                 LOG(log_debug, logtype_default, "short utf8 char");
84                                 goto badseq;
85                         }
86                         uc = ((ucs2_t) (c[0] & 0x1f) << 6) | (ucs2_t) (c[1] ^ 0x80);
87                         len = 2;
88                 }
89                 else {
90                         errno = EINVAL;
91                         return -1;
92                 }
93
94                 SSVAL(*outbuf,0,uc);
95                 (*inbuf)  += len;
96                 (*inbytesleft)  -= len;
97                 (*outbytesleft) -= 2;
98                 (*outbuf) += 2;
99         }
100
101         if (*inbytesleft > 0) {
102                 errno = E2BIG;
103                 return -1;
104         }
105         
106         return 0;
107
108 badseq:
109         errno = EINVAL;
110         return -1;
111 }
112
113 /* ------------------------ */
114 static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
115                          char **outbuf, size_t *outbytesleft)
116 {
117         ucs2_t uc=0;
118         int len;
119
120         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
121                 unsigned char *c = (unsigned char *)*outbuf;
122                 uc = SVAL((*inbuf),0);
123                 len=1;
124
125                 if ( uc >= 0x800 ) {
126                         if (*outbytesleft < 3) {
127                                 LOG(log_debug, logtype_default, "short utf8 write");
128                                 goto toobig;
129                         }
130                         c[2] = 0x80 | (uc & 0x3f);
131                         uc = uc >> 6;
132                         uc |= 0x800;
133                         c[1] = 0x80 | (uc&0x3f);
134                         uc = uc >> 6;
135                         uc |= 0xc0;
136                         c[0] = uc;
137                         len = 3;
138                 } else if (uc >= 0x80) {
139                         if (*outbytesleft < 2) {
140                                 LOG(log_debug, logtype_default, "short utf8 write");
141                                 goto toobig;
142                         }
143                         c[1] = 0x80 | (uc&0x3f);
144                         uc = uc >> 6;
145                         uc |= 0xc0;
146                         c[0] = uc;
147                         len = 2;
148                 } else {
149                         c[0] = uc;
150                 }
151
152
153                 (*inbytesleft)  -= 2;
154                 (*outbytesleft) -= len;
155                 (*inbuf)  += 2;
156                 (*outbuf) += len;
157         }
158
159         if (*inbytesleft == 1) {
160                 errno = EINVAL;
161                 return -1;
162         }
163
164         if (*inbytesleft > 1) {
165                 errno = E2BIG;
166                 return -1;
167         }
168         
169         return 0;
170
171 toobig:
172         errno = E2BIG;
173         return -1;
174 }