1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 20 dec 2002.
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "mbfilter.h"
35 #include "mbfilter_utf32.h"
36 
37 static const char *mbfl_encoding_utf32_aliases[] = {"utf32", NULL};
38 
39 const mbfl_encoding mbfl_encoding_utf32 = {
40 	mbfl_no_encoding_utf32,
41 	"UTF-32",
42 	"UTF-32",
43 	(const char *(*)[])&mbfl_encoding_utf32_aliases,
44 	NULL,
45 	MBFL_ENCTYPE_WCS4BE,
46 	&vtbl_utf32_wchar,
47 	&vtbl_wchar_utf32
48 };
49 
50 const mbfl_encoding mbfl_encoding_utf32be = {
51 	mbfl_no_encoding_utf32be,
52 	"UTF-32BE",
53 	"UTF-32BE",
54 	NULL,
55 	NULL,
56 	MBFL_ENCTYPE_WCS4BE,
57 	&vtbl_utf32be_wchar,
58 	&vtbl_wchar_utf32be
59 };
60 
61 const mbfl_encoding mbfl_encoding_utf32le = {
62 	mbfl_no_encoding_utf32le,
63 	"UTF-32LE",
64 	"UTF-32LE",
65 	NULL,
66 	NULL,
67 	MBFL_ENCTYPE_WCS4LE,
68 	&vtbl_utf32le_wchar,
69 	&vtbl_wchar_utf32le
70 };
71 
72 const struct mbfl_convert_vtbl vtbl_utf32_wchar = {
73 	mbfl_no_encoding_utf32,
74 	mbfl_no_encoding_wchar,
75 	mbfl_filt_conv_common_ctor,
76 	mbfl_filt_conv_common_dtor,
77 	mbfl_filt_conv_utf32_wchar,
78 	mbfl_filt_conv_common_flush
79 };
80 
81 const struct mbfl_convert_vtbl vtbl_wchar_utf32 = {
82 	mbfl_no_encoding_wchar,
83 	mbfl_no_encoding_utf32,
84 	mbfl_filt_conv_common_ctor,
85 	mbfl_filt_conv_common_dtor,
86 	mbfl_filt_conv_wchar_utf32be,
87 	mbfl_filt_conv_common_flush
88 };
89 
90 const struct mbfl_convert_vtbl vtbl_utf32be_wchar = {
91 	mbfl_no_encoding_utf32be,
92 	mbfl_no_encoding_wchar,
93 	mbfl_filt_conv_common_ctor,
94 	mbfl_filt_conv_common_dtor,
95 	mbfl_filt_conv_utf32be_wchar,
96 	mbfl_filt_conv_common_flush
97 };
98 
99 const struct mbfl_convert_vtbl vtbl_wchar_utf32be = {
100 	mbfl_no_encoding_wchar,
101 	mbfl_no_encoding_utf32be,
102 	mbfl_filt_conv_common_ctor,
103 	mbfl_filt_conv_common_dtor,
104 	mbfl_filt_conv_wchar_utf32be,
105 	mbfl_filt_conv_common_flush
106 };
107 
108 const struct mbfl_convert_vtbl vtbl_utf32le_wchar = {
109 	mbfl_no_encoding_utf32le,
110 	mbfl_no_encoding_wchar,
111 	mbfl_filt_conv_common_ctor,
112 	mbfl_filt_conv_common_dtor,
113 	mbfl_filt_conv_utf32le_wchar,
114 	mbfl_filt_conv_common_flush
115 };
116 
117 const struct mbfl_convert_vtbl vtbl_wchar_utf32le = {
118 	mbfl_no_encoding_wchar,
119 	mbfl_no_encoding_utf32le,
120 	mbfl_filt_conv_common_ctor,
121 	mbfl_filt_conv_common_dtor,
122 	mbfl_filt_conv_wchar_utf32le,
123 	mbfl_filt_conv_common_flush
124 };
125 
126 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
127 
128 /*
129  * UTF-32 => wchar
130  */
mbfl_filt_conv_utf32_wchar(int c,mbfl_convert_filter * filter)131 int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter)
132 {
133 	int n, endian;
134 
135 	endian = filter->status & 0xff00;
136 	switch (filter->status & 0xff) {
137 	case 0:
138 		if (endian) {
139 			n = c & 0xff;
140 		} else {
141 			n = (c & 0xffu) << 24;
142 		}
143 		filter->cache = n;
144 		filter->status++;
145 		break;
146 	case 1:
147 		if (endian) {
148 			n = (c & 0xff) << 8;
149 		} else {
150 			n = (c & 0xff) << 16;
151 		}
152 		filter->cache |= n;
153 		filter->status++;
154 		break;
155 	case 2:
156 		if (endian) {
157 			n = (c & 0xff) << 16;
158 		} else {
159 			n = (c & 0xff) << 8;
160 		}
161 		filter->cache |= n;
162 		filter->status++;
163 		break;
164 	default:
165 		if (endian) {
166 			n = (c & 0xffu) << 24;
167 		} else {
168 			n = c & 0xff;
169 		}
170 		n |= filter->cache;
171 		if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) {
172 			if (endian) {
173 				filter->status = 0;		/* big-endian */
174 			} else {
175 				filter->status = 0x100;		/* little-endian */
176 			}
177 			CK((*filter->output_function)(0xfeff, filter->data));
178 		} else {
179 			filter->status &= ~0xff;
180 			if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
181 				CK((*filter->output_function)(n, filter->data));
182 			} else {
183 				n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH;
184 				CK((*filter->output_function)(n, filter->data));
185 			}
186 		}
187 		break;
188 	}
189 
190 	return c;
191 }
192 
193 /*
194  * UTF-32BE => wchar
195  */
mbfl_filt_conv_utf32be_wchar(int c,mbfl_convert_filter * filter)196 int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter)
197 {
198 	int n;
199 
200 	if (filter->status == 0) {
201 		filter->status = 1;
202 		n = (c & 0xffu) << 24;
203 		filter->cache = n;
204 	} else if (filter->status == 1) {
205 		filter->status = 2;
206 		n = (c & 0xff) << 16;
207 		filter->cache |= n;
208 	} else if (filter->status == 2) {
209 		filter->status = 3;
210 		n = (c & 0xff) << 8;
211 		filter->cache |= n;
212 	} else {
213 		filter->status = 0;
214 		n = (c & 0xff) | filter->cache;
215 		if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
216 			CK((*filter->output_function)(n, filter->data));
217 		} else {
218 			n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH;
219 			CK((*filter->output_function)(n, filter->data));
220 		}
221 	}
222 	return c;
223 }
224 
225 /*
226  * wchar => UTF-32BE
227  */
mbfl_filt_conv_wchar_utf32be(int c,mbfl_convert_filter * filter)228 int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter)
229 {
230 	if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
231 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
232 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
233 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
234 		CK((*filter->output_function)(c & 0xff, filter->data));
235 	} else {
236 		CK(mbfl_filt_conv_illegal_output(c, filter));
237 	}
238 
239 	return c;
240 }
241 
242 /*
243  * UTF-32LE => wchar
244  */
mbfl_filt_conv_utf32le_wchar(int c,mbfl_convert_filter * filter)245 int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter)
246 {
247 	int n;
248 
249 	if (filter->status == 0) {
250 		filter->status = 1;
251 		n = (c & 0xff);
252 		filter->cache = n;
253 	} else if (filter->status == 1) {
254 		filter->status = 2;
255 		n = (c & 0xff) << 8;
256 		filter->cache |= n;
257 	} else if (filter->status == 2) {
258 		filter->status = 3;
259 		n = (c & 0xff) << 16;
260 		filter->cache |= n;
261 	} else {
262 		filter->status = 0;
263 		n = ((c & 0xffu) << 24) | filter->cache;
264 		if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
265 			CK((*filter->output_function)(n, filter->data));
266 		} else {
267 			n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH;
268 			CK((*filter->output_function)(n, filter->data));
269 		}
270 	}
271 	return c;
272 }
273 
274 /*
275  * wchar => UTF-32LE
276  */
mbfl_filt_conv_wchar_utf32le(int c,mbfl_convert_filter * filter)277 int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter)
278 {
279 	if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
280 		CK((*filter->output_function)(c & 0xff, filter->data));
281 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
282 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
283 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
284 	} else {
285 		CK(mbfl_filt_conv_illegal_output(c, filter));
286 	}
287 
288 	return c;
289 }
290