]> arthur.barton.de Git - netatalk.git/blob - libatalk/unicode/utf8.c
big merge for db frontend and unicode.
[netatalk.git] / libatalk / unicode / utf8.c
1 /* 
2    Unix SMB/CIFS implementation.
3    minimal iconv implementation
4    Copyright (C) Andrew Tridgell 2001
5    Copyright (C) Jelmer Vernooij 2002,2003
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20    
21    From samba 3.0 beta and GNU libiconv-1.8
22    It's bad but most of the time we can't use libc iconv service:
23    - it doesn't round trip for most encoding
24    - it doesn't know about Apple extension
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif /* HAVE_CONFIG_H */
30 #include <stdlib.h>
31
32 #include <netatalk/endian.h>
33 #include <atalk/unicode.h>
34
35
36 #include <atalk/logger.h>
37 #include <errno.h>
38
39 #include <atalk/unicode.h>
40
41
42 static size_t   utf8_pull(void *,char **, size_t *, char **, size_t *);
43 static size_t   utf8_push(void *,char **, size_t *, char **, size_t *);
44
45 struct charset_functions charset_utf8 =
46 {
47         "UTF8",
48         0x08000103,
49         utf8_pull,
50         utf8_push,
51         CHARSET_VOLUME | CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED
52 };
53
54 struct charset_functions charset_utf8_mac =
55 {
56         "UTF8-MAC",
57         0x08000103,
58         utf8_pull,
59         utf8_push,
60         CHARSET_VOLUME | CHARSET_CLIENT | CHARSET_MULTIBYTE | CHARSET_DECOMPOSED
61 };
62
63 /* ------------------------ */
64 static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
65                          char **outbuf, size_t *outbytesleft)
66 {
67         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
68                 unsigned char *c = (unsigned char *)*inbuf;
69                 unsigned char *uc = (unsigned char *)*outbuf;
70                 int len = 1;
71
72                 if ((c[0] & 0x80) == 0) {
73                         uc[0] = c[0];
74                         uc[1] = 0;
75                 } else if ((c[0] & 0xf0) == 0xe0) {
76                         if (*inbytesleft < 3) {
77                                 LOG(log_debug, logtype_default, "short utf8 char\n");
78                                 goto badseq;
79                         }
80                         uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
81                         uc[0] = (c[1]<<6) | (c[2]&0x3f);
82                         len = 3;
83                 } else if ((c[0] & 0xe0) == 0xc0) {
84                         if (*inbytesleft < 2) {
85                                 LOG(log_debug, logtype_default, "short utf8 char\n");
86                                 goto badseq;
87                         }
88                         uc[1] = (c[0]>>2) & 0x7;
89                         uc[0] = (c[0]<<6) | (c[1]&0x3f);
90                         len = 2;
91                 }
92
93                 (*inbuf)  += len;
94                 (*inbytesleft)  -= len;
95                 (*outbytesleft) -= 2;
96                 (*outbuf) += 2;
97         }
98
99         if (*inbytesleft > 0) {
100                 errno = E2BIG;
101                 return -1;
102         }
103         
104         return 0;
105
106 badseq:
107         errno = EINVAL;
108         return -1;
109 }
110
111 /* ------------------------ */
112 static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
113                          char **outbuf, size_t *outbytesleft)
114 {
115         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
116                 unsigned char *c = (unsigned char *)*outbuf;
117                 unsigned char *uc = (unsigned char *)*inbuf;
118                 int len=1;
119
120                 if (uc[1] & 0xf8) {
121                         if (*outbytesleft < 3) {
122                                 LOG(log_debug, logtype_default, "short utf8 write\n");
123                                 goto toobig;
124                         }
125                         c[0] = 0xe0 | (uc[1]>>4);
126                         c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
127                         c[2] = 0x80 | (uc[0]&0x3f);
128                         len = 3;
129                 } else if (uc[1] | (uc[0] & 0x80)) {
130                         if (*outbytesleft < 2) {
131                                 LOG(log_debug, logtype_default, "short utf8 write\n");
132                                 goto toobig;
133                         }
134                         c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
135                         c[1] = 0x80 | (uc[0]&0x3f);
136                         len = 2;
137                 } else {
138                         c[0] = uc[0];
139                 }
140
141
142                 (*inbytesleft)  -= 2;
143                 (*outbytesleft) -= len;
144                 (*inbuf)  += 2;
145                 (*outbuf) += len;
146         }
147
148         if (*inbytesleft == 1) {
149                 errno = EINVAL;
150                 return -1;
151         }
152
153         if (*inbytesleft > 1) {
154                 errno = E2BIG;
155                 return -1;
156         }
157         
158         return 0;
159
160 toobig:
161         errno = E2BIG;
162         return -1;
163 }