1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "mbfilter.h"
35 #include "mbfilter_utf16.h"
36 
37 static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
38 
39 const mbfl_encoding mbfl_encoding_utf16 = {
40 	mbfl_no_encoding_utf16,
41 	"UTF-16",
42 	"UTF-16",
43 	(const char *(*)[])&mbfl_encoding_utf16_aliases,
44 	NULL,
45 	MBFL_ENCTYPE_MWC2BE
46 };
47 
48 const mbfl_encoding mbfl_encoding_utf16be = {
49 	mbfl_no_encoding_utf16be,
50 	"UTF-16BE",
51 	"UTF-16BE",
52 	NULL,
53 	NULL,
54 	MBFL_ENCTYPE_MWC2BE
55 };
56 
57 const mbfl_encoding mbfl_encoding_utf16le = {
58 	mbfl_no_encoding_utf16le,
59 	"UTF-16LE",
60 	"UTF-16LE",
61 	NULL,
62 	NULL,
63 	MBFL_ENCTYPE_MWC2LE
64 };
65 
66 const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
67 	mbfl_no_encoding_utf16,
68 	mbfl_no_encoding_wchar,
69 	mbfl_filt_conv_common_ctor,
70 	mbfl_filt_conv_common_dtor,
71 	mbfl_filt_conv_utf16_wchar,
72 	mbfl_filt_conv_common_flush
73 };
74 
75 const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
76 	mbfl_no_encoding_wchar,
77 	mbfl_no_encoding_utf16,
78 	mbfl_filt_conv_common_ctor,
79 	mbfl_filt_conv_common_dtor,
80 	mbfl_filt_conv_wchar_utf16be,
81 	mbfl_filt_conv_common_flush
82 };
83 
84 const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
85 	mbfl_no_encoding_utf16be,
86 	mbfl_no_encoding_wchar,
87 	mbfl_filt_conv_common_ctor,
88 	mbfl_filt_conv_common_dtor,
89 	mbfl_filt_conv_utf16be_wchar,
90 	mbfl_filt_conv_common_flush
91 };
92 
93 const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
94 	mbfl_no_encoding_wchar,
95 	mbfl_no_encoding_utf16be,
96 	mbfl_filt_conv_common_ctor,
97 	mbfl_filt_conv_common_dtor,
98 	mbfl_filt_conv_wchar_utf16be,
99 	mbfl_filt_conv_common_flush
100 };
101 
102 const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
103 	mbfl_no_encoding_utf16le,
104 	mbfl_no_encoding_wchar,
105 	mbfl_filt_conv_common_ctor,
106 	mbfl_filt_conv_common_dtor,
107 	mbfl_filt_conv_utf16le_wchar,
108 	mbfl_filt_conv_common_flush
109 };
110 
111 const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
112 	mbfl_no_encoding_wchar,
113 	mbfl_no_encoding_utf16le,
114 	mbfl_filt_conv_common_ctor,
115 	mbfl_filt_conv_common_dtor,
116 	mbfl_filt_conv_wchar_utf16le,
117 	mbfl_filt_conv_common_flush
118 };
119 
120 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
121 
122 /*
123  * UTF-16 => wchar
124  */
mbfl_filt_conv_utf16_wchar(int c,mbfl_convert_filter * filter)125 int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
126 {
127 	int n, endian;
128 
129 	endian = filter->status & 0xff00;
130 	switch (filter->status & 0x0f) {
131 	case 0:
132 		if (endian) {
133 			n = c & 0xff;
134 		} else {
135 			n = (c & 0xff) << 8;
136 		}
137 		filter->cache |= n;
138 		filter->status++;
139 		break;
140 	default:
141 		if (endian) {
142 			n = (c & 0xff) << 8;
143 		} else {
144 			n = c & 0xff;
145 		}
146 		n |= filter->cache & 0xffff;
147 		filter->status &= ~0x0f;
148 		if (n >= 0xd800 && n < 0xdc00) {
149 			filter->cache = ((n & 0x3ff) << 16) + 0x400000;
150 		} else if (n >= 0xdc00 && n < 0xe000) {
151 			n &= 0x3ff;
152 			n |= (filter->cache & 0xfff0000) >> 6;
153 			filter->cache = 0;
154 			if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
155 				CK((*filter->output_function)(n, filter->data));
156 			} else {		/* illegal character */
157 				n &= MBFL_WCSGROUP_MASK;
158 				n |= MBFL_WCSGROUP_THROUGH;
159 				CK((*filter->output_function)(n, filter->data));
160 			}
161 		} else {
162 			int is_first = filter->status & 0x10;
163 			filter->cache = 0;
164 			filter->status |= 0x10;
165 			if (!is_first) {
166 				if (n == 0xfffe) {
167 					if (endian) {
168 						filter->status &= ~0x100;		/* big-endian */
169 					} else {
170 						filter->status |= 0x100;		/* little-endian */
171 					}
172 					break;
173 				} else if (n == 0xfeff) {
174 					break;
175 				}
176 			}
177 			CK((*filter->output_function)(n, filter->data));
178 		}
179 		break;
180 	}
181 
182 	return c;
183 }
184 
185 /*
186  * UTF-16BE => wchar
187  */
mbfl_filt_conv_utf16be_wchar(int c,mbfl_convert_filter * filter)188 int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
189 {
190 	int n;
191 
192 	switch (filter->status) {
193 	case 0:
194 		filter->status = 1;
195 		n = (c & 0xff) << 8;
196 		filter->cache |= n;
197 		break;
198 	default:
199 		filter->status = 0;
200 		n = (filter->cache & 0xff00) | (c & 0xff);
201 		if (n >= 0xd800 && n < 0xdc00) {
202 			filter->cache = ((n & 0x3ff) << 16) + 0x400000;
203 		} else if (n >= 0xdc00 && n < 0xe000) {
204 			n &= 0x3ff;
205 			n |= (filter->cache & 0xfff0000) >> 6;
206 			filter->cache = 0;
207 			if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
208 				CK((*filter->output_function)(n, filter->data));
209 			} else {		/* illegal character */
210 				n &= MBFL_WCSGROUP_MASK;
211 				n |= MBFL_WCSGROUP_THROUGH;
212 				CK((*filter->output_function)(n, filter->data));
213 			}
214 		} else {
215 			filter->cache = 0;
216 			CK((*filter->output_function)(n, filter->data));
217 		}
218 		break;
219 	}
220 
221 	return c;
222 }
223 
224 /*
225  * wchar => UTF-16BE
226  */
mbfl_filt_conv_wchar_utf16be(int c,mbfl_convert_filter * filter)227 int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
228 {
229 	int n;
230 
231 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
232 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
233 		CK((*filter->output_function)(c & 0xff, filter->data));
234 	} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
235 		n = ((c >> 10) - 0x40) | 0xd800;
236 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
237 		CK((*filter->output_function)(n & 0xff, filter->data));
238 		n = (c & 0x3ff) | 0xdc00;
239 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
240 		CK((*filter->output_function)(n & 0xff, filter->data));
241 	} else {
242 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
243 			CK(mbfl_filt_conv_illegal_output(c, filter));
244 		}
245 	}
246 
247 	return c;
248 }
249 
250 /*
251  * UTF-16LE => wchar
252  */
mbfl_filt_conv_utf16le_wchar(int c,mbfl_convert_filter * filter)253 int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
254 {
255 	int n;
256 
257 	switch (filter->status) {
258 	case 0:
259 		filter->status = 1;
260 		n = c & 0xff;
261 		filter->cache |= n;
262 		break;
263 	default:
264 		filter->status = 0;
265 		n = (filter->cache & 0xff) | ((c & 0xff) << 8);
266 		if (n >= 0xd800 && n < 0xdc00) {
267 			filter->cache = ((n & 0x3ff) << 16) + 0x400000;
268 		} else if (n >= 0xdc00 && n < 0xe000) {
269 			n &= 0x3ff;
270 			n |= (filter->cache & 0xfff0000) >> 6;
271 			filter->cache = 0;
272 			if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
273 				CK((*filter->output_function)(n, filter->data));
274 			} else {		/* illegal character */
275 				n &= MBFL_WCSGROUP_MASK;
276 				n |= MBFL_WCSGROUP_THROUGH;
277 				CK((*filter->output_function)(n, filter->data));
278 			}
279 		} else {
280 			filter->cache = 0;
281 			CK((*filter->output_function)(n, filter->data));
282 		}
283 		break;
284 	}
285 
286 	return c;
287 }
288 
289 /*
290  * wchar => UTF-16LE
291  */
mbfl_filt_conv_wchar_utf16le(int c,mbfl_convert_filter * filter)292 int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
293 {
294 	int n;
295 
296 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
297 		CK((*filter->output_function)(c & 0xff, filter->data));
298 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
299 	} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
300 		n = ((c >> 10) - 0x40) | 0xd800;
301 		CK((*filter->output_function)(n & 0xff, filter->data));
302 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
303 		n = (c & 0x3ff) | 0xdc00;
304 		CK((*filter->output_function)(n & 0xff, filter->data));
305 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
306 	} else {
307 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
308 			CK(mbfl_filt_conv_illegal_output(c, filter));
309 		}
310 	}
311 
312 	return c;
313 }
314 
315 
316 
317