1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_ucs4.h"
32 
33 static const char *mbfl_encoding_ucs4_aliases[] = {"ISO-10646-UCS-4", "UCS4", NULL};
34 
35 /* This library historically had encodings called 'byte4be' and 'byte4le'
36  * which were almost identical to UCS-4
37  * Maintain minimal support by aliasing to UCS-4 */
38 static const char *mbfl_encoding_ucs4be_aliases[] = {"byte4be", NULL};
39 static const char *mbfl_encoding_ucs4le_aliases[] = {"byte4le", NULL};
40 
41 static int mbfl_filt_conv_ucs4_wchar_flush(mbfl_convert_filter *filter);
42 
43 const mbfl_encoding mbfl_encoding_ucs4 = {
44 	mbfl_no_encoding_ucs4,
45 	"UCS-4",
46 	"UCS-4",
47 	mbfl_encoding_ucs4_aliases,
48 	NULL,
49 	MBFL_ENCTYPE_WCS4,
50 	&vtbl_ucs4_wchar,
51 	&vtbl_wchar_ucs4,
52 	NULL
53 };
54 
55 const mbfl_encoding mbfl_encoding_ucs4be = {
56 	mbfl_no_encoding_ucs4be,
57 	"UCS-4BE",
58 	"UCS-4BE",
59 	mbfl_encoding_ucs4be_aliases,
60 	NULL,
61 	MBFL_ENCTYPE_WCS4,
62 	&vtbl_ucs4be_wchar,
63 	&vtbl_wchar_ucs4be,
64 	NULL
65 };
66 
67 const mbfl_encoding mbfl_encoding_ucs4le = {
68 	mbfl_no_encoding_ucs4le,
69 	"UCS-4LE",
70 	"UCS-4LE",
71 	mbfl_encoding_ucs4le_aliases,
72 	NULL,
73 	MBFL_ENCTYPE_WCS4,
74 	&vtbl_ucs4le_wchar,
75 	&vtbl_wchar_ucs4le,
76 	NULL
77 };
78 
79 const struct mbfl_convert_vtbl vtbl_ucs4_wchar = {
80 	mbfl_no_encoding_ucs4,
81 	mbfl_no_encoding_wchar,
82 	mbfl_filt_conv_common_ctor,
83 	NULL,
84 	mbfl_filt_conv_ucs4_wchar,
85 	mbfl_filt_conv_ucs4_wchar_flush,
86 	NULL,
87 };
88 
89 const struct mbfl_convert_vtbl vtbl_wchar_ucs4 = {
90 	mbfl_no_encoding_wchar,
91 	mbfl_no_encoding_ucs4,
92 	mbfl_filt_conv_common_ctor,
93 	NULL,
94 	mbfl_filt_conv_wchar_ucs4be,
95 	mbfl_filt_conv_common_flush,
96 	NULL,
97 };
98 
99 const struct mbfl_convert_vtbl vtbl_ucs4be_wchar = {
100 	mbfl_no_encoding_ucs4be,
101 	mbfl_no_encoding_wchar,
102 	mbfl_filt_conv_common_ctor,
103 	NULL,
104 	mbfl_filt_conv_ucs4be_wchar,
105 	mbfl_filt_conv_ucs4_wchar_flush,
106 	NULL,
107 };
108 
109 const struct mbfl_convert_vtbl vtbl_wchar_ucs4be = {
110 	mbfl_no_encoding_wchar,
111 	mbfl_no_encoding_ucs4be,
112 	mbfl_filt_conv_common_ctor,
113 	NULL,
114 	mbfl_filt_conv_wchar_ucs4be,
115 	mbfl_filt_conv_common_flush,
116 	NULL,
117 };
118 
119 const struct mbfl_convert_vtbl vtbl_ucs4le_wchar = {
120 	mbfl_no_encoding_ucs4le,
121 	mbfl_no_encoding_wchar,
122 	mbfl_filt_conv_common_ctor,
123 	NULL,
124 	mbfl_filt_conv_ucs4le_wchar,
125 	mbfl_filt_conv_ucs4_wchar_flush,
126 	NULL,
127 };
128 
129 const struct mbfl_convert_vtbl vtbl_wchar_ucs4le = {
130 	mbfl_no_encoding_wchar,
131 	mbfl_no_encoding_ucs4le,
132 	mbfl_filt_conv_common_ctor,
133 	NULL,
134 	mbfl_filt_conv_wchar_ucs4le,
135 	mbfl_filt_conv_common_flush,
136 	NULL,
137 };
138 
139 
140 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
141 
142 /*
143  * UCS-4 => wchar
144  */
mbfl_filt_conv_ucs4_wchar(int c,mbfl_convert_filter * filter)145 int mbfl_filt_conv_ucs4_wchar(int c, mbfl_convert_filter *filter)
146 {
147 	int n, endian;
148 
149 	endian = filter->status & 0xff00;
150 	switch (filter->status & 0xff) {
151 	case 0:
152 		if (endian) {
153 			n = c & 0xff;
154 		} else {
155 			n = (c & 0xffu) << 24;
156 		}
157 		filter->cache = n;
158 		filter->status++;
159 		break;
160 	case 1:
161 		if (endian) {
162 			n = (c & 0xff) << 8;
163 		} else {
164 			n = (c & 0xff) << 16;
165 		}
166 		filter->cache |= n;
167 		filter->status++;
168 		break;
169 	case 2:
170 		if (endian) {
171 			n = (c & 0xff) << 16;
172 		} else {
173 			n = (c & 0xff) << 8;
174 		}
175 		filter->cache |= n;
176 		filter->status++;
177 		break;
178 	default:
179 		if (endian) {
180 			n = (c & 0xffu) << 24;
181 		} else {
182 			n = c & 0xff;
183 		}
184 		n |= filter->cache;
185 		if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) {
186 			if (endian) {
187 				filter->status = 0;		/* big-endian */
188 			} else {
189 				filter->status = 0x100;		/* little-endian */
190 			}
191 		} else if (n != 0xfeff) {
192 			CK((*filter->output_function)(n, filter->data));
193 		}
194 		filter->status &= ~0xff;
195 		break;
196 	}
197 
198 	return 0;
199 }
200 
201 /*
202  * UCS-4BE => wchar
203  */
mbfl_filt_conv_ucs4be_wchar(int c,mbfl_convert_filter * filter)204 int mbfl_filt_conv_ucs4be_wchar(int c, mbfl_convert_filter *filter)
205 {
206 	int n;
207 
208 	if (filter->status == 0) {
209 		filter->status = 1;
210 		n = (c & 0xffu) << 24;
211 		filter->cache = n;
212 	} else if (filter->status == 1) {
213 		filter->status = 2;
214 		n = (c & 0xff) << 16;
215 		filter->cache |= n;
216 	} else if (filter->status == 2) {
217 		filter->status = 3;
218 		n = (c & 0xff) << 8;
219 		filter->cache |= n;
220 	} else {
221 		filter->status = 0;
222 		n = (c & 0xff) | filter->cache;
223 		CK((*filter->output_function)(n, filter->data));
224 	}
225 	return 0;
226 }
227 
228 /*
229  * wchar => UCS-4BE
230  */
mbfl_filt_conv_wchar_ucs4be(int c,mbfl_convert_filter * filter)231 int mbfl_filt_conv_wchar_ucs4be(int c, mbfl_convert_filter *filter)
232 {
233 	if (c != MBFL_BAD_INPUT) {
234 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
235 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
236 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
237 		CK((*filter->output_function)(c & 0xff, filter->data));
238 	} else {
239 		CK(mbfl_filt_conv_illegal_output(c, filter));
240 	}
241 
242 	return 0;
243 }
244 
245 /*
246  * UCS-4LE => wchar
247  */
mbfl_filt_conv_ucs4le_wchar(int c,mbfl_convert_filter * filter)248 int mbfl_filt_conv_ucs4le_wchar(int c, mbfl_convert_filter *filter)
249 {
250 	int n;
251 
252 	if (filter->status == 0) {
253 		filter->status = 1;
254 		n = (c & 0xff);
255 		filter->cache = n;
256 	} else if (filter->status == 1) {
257 		filter->status = 2;
258 		n = (c & 0xff) << 8;
259 		filter->cache |= n;
260 	} else if (filter->status == 2) {
261 		filter->status = 3;
262 		n = (c & 0xff) << 16;
263 		filter->cache |= n;
264 	} else {
265 		filter->status = 0;
266 		n = ((c & 0xffu) << 24) | filter->cache;
267 		CK((*filter->output_function)(n, filter->data));
268 	}
269 	return 0;
270 }
271 
272 /*
273  * wchar => UCS-4LE
274  */
mbfl_filt_conv_wchar_ucs4le(int c,mbfl_convert_filter * filter)275 int mbfl_filt_conv_wchar_ucs4le(int c, mbfl_convert_filter *filter)
276 {
277 	if (c != MBFL_BAD_INPUT) {
278 		CK((*filter->output_function)(c & 0xff, filter->data));
279 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
280 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
281 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
282 	} else {
283 		CK(mbfl_filt_conv_illegal_output(c, filter));
284 	}
285 
286 	return 0;
287 }
288 
mbfl_filt_conv_ucs4_wchar_flush(mbfl_convert_filter * filter)289 static int mbfl_filt_conv_ucs4_wchar_flush(mbfl_convert_filter *filter)
290 {
291 	if (filter->status & 0xF) {
292 		/* Input string was truncated */
293 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
294 	}
295 	filter->status = 0;
296 
297 	if (filter->flush_function) {
298 		(*filter->flush_function)(filter->data);
299 	}
300 
301 	return 0;
302 }
303