1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "mbfilter.h"
35 #include "mbfilter_ucs4.h"
36 
37 static const char *mbfl_encoding_ucs4_aliases[] = {"ISO-10646-UCS-4", "UCS4", NULL};
38 
39 const mbfl_encoding mbfl_encoding_ucs4 = {
40 	mbfl_no_encoding_ucs4,
41 	"UCS-4",
42 	"UCS-4",
43 	(const char *(*)[])&mbfl_encoding_ucs4_aliases,
44 	NULL,
45 	MBFL_ENCTYPE_WCS4BE
46 };
47 
48 const mbfl_encoding mbfl_encoding_ucs4be = {
49 	mbfl_no_encoding_ucs4be,
50 	"UCS-4BE",
51 	"UCS-4BE",
52 	NULL,
53 	NULL,
54 	MBFL_ENCTYPE_WCS4BE
55 };
56 
57 const mbfl_encoding mbfl_encoding_ucs4le = {
58 	mbfl_no_encoding_ucs4le,
59 	"UCS-4LE",
60 	"UCS-4LE",
61 	NULL,
62 	NULL,
63 	MBFL_ENCTYPE_WCS4LE
64 };
65 
66 const struct mbfl_convert_vtbl vtbl_ucs4_wchar = {
67 	mbfl_no_encoding_ucs4,
68 	mbfl_no_encoding_wchar,
69 	mbfl_filt_conv_common_ctor,
70 	mbfl_filt_conv_common_dtor,
71 	mbfl_filt_conv_ucs4_wchar,
72 	mbfl_filt_conv_common_flush
73 };
74 
75 const struct mbfl_convert_vtbl vtbl_wchar_ucs4 = {
76 	mbfl_no_encoding_wchar,
77 	mbfl_no_encoding_ucs4,
78 	mbfl_filt_conv_common_ctor,
79 	mbfl_filt_conv_common_dtor,
80 	mbfl_filt_conv_wchar_ucs4be,
81 	mbfl_filt_conv_common_flush
82 };
83 
84 const struct mbfl_convert_vtbl vtbl_ucs4be_wchar = {
85 	mbfl_no_encoding_ucs4be,
86 	mbfl_no_encoding_wchar,
87 	mbfl_filt_conv_common_ctor,
88 	mbfl_filt_conv_common_dtor,
89 	mbfl_filt_conv_ucs4be_wchar,
90 	mbfl_filt_conv_common_flush
91 };
92 
93 const struct mbfl_convert_vtbl vtbl_wchar_ucs4be = {
94 	mbfl_no_encoding_wchar,
95 	mbfl_no_encoding_ucs4be,
96 	mbfl_filt_conv_common_ctor,
97 	mbfl_filt_conv_common_dtor,
98 	mbfl_filt_conv_wchar_ucs4be,
99 	mbfl_filt_conv_common_flush
100 };
101 
102 const struct mbfl_convert_vtbl vtbl_ucs4le_wchar = {
103 	mbfl_no_encoding_ucs4le,
104 	mbfl_no_encoding_wchar,
105 	mbfl_filt_conv_common_ctor,
106 	mbfl_filt_conv_common_dtor,
107 	mbfl_filt_conv_ucs4le_wchar,
108 	mbfl_filt_conv_common_flush
109 };
110 
111 const struct mbfl_convert_vtbl vtbl_wchar_ucs4le = {
112 	mbfl_no_encoding_wchar,
113 	mbfl_no_encoding_ucs4le,
114 	mbfl_filt_conv_common_ctor,
115 	mbfl_filt_conv_common_dtor,
116 	mbfl_filt_conv_wchar_ucs4le,
117 	mbfl_filt_conv_common_flush
118 };
119 
120 
121 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
122 
123 /*
124  * UCS-4 => wchar
125  */
mbfl_filt_conv_ucs4_wchar(int c,mbfl_convert_filter * filter)126 int mbfl_filt_conv_ucs4_wchar(int c, mbfl_convert_filter *filter)
127 {
128 	int n, endian;
129 
130 	endian = filter->status & 0xff00;
131 	switch (filter->status & 0xff) {
132 	case 0:
133 		if (endian) {
134 			n = c & 0xff;
135 		} else {
136 			n = (c & 0xff) << 24;
137 		}
138 		filter->cache = n;
139 		filter->status++;
140 		break;
141 	case 1:
142 		if (endian) {
143 			n = (c & 0xff) << 8;
144 		} else {
145 			n = (c & 0xff) << 16;
146 		}
147 		filter->cache |= n;
148 		filter->status++;
149 		break;
150 	case 2:
151 		if (endian) {
152 			n = (c & 0xff) << 16;
153 		} else {
154 			n = (c & 0xff) << 8;
155 		}
156 		filter->cache |= n;
157 		filter->status++;
158 		break;
159 	default:
160 		if (endian) {
161 			n = (c & 0xff) << 24;
162 		} else {
163 			n = c & 0xff;
164 		}
165 		n |= filter->cache;
166 		if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) {
167 			if (endian) {
168 				filter->status = 0;		/* big-endian */
169 			} else {
170 				filter->status = 0x100;		/* little-endian */
171 			}
172 			CK((*filter->output_function)(0xfeff, filter->data));
173 		} else {
174 			filter->status &= ~0xff;
175 			CK((*filter->output_function)(n, filter->data));
176 		}
177 		break;
178 	}
179 
180 	return c;
181 }
182 
183 /*
184  * UCS-4BE => wchar
185  */
mbfl_filt_conv_ucs4be_wchar(int c,mbfl_convert_filter * filter)186 int mbfl_filt_conv_ucs4be_wchar(int c, mbfl_convert_filter *filter)
187 {
188 	int n;
189 
190 	if (filter->status == 0) {
191 		filter->status = 1;
192 		n = (c & 0xff) << 24;
193 		filter->cache = n;
194 	} else if (filter->status == 1) {
195 		filter->status = 2;
196 		n = (c & 0xff) << 16;
197 		filter->cache |= n;
198 	} else if (filter->status == 2) {
199 		filter->status = 3;
200 		n = (c & 0xff) << 8;
201 		filter->cache |= n;
202 	} else {
203 		filter->status = 0;
204 		n = (c & 0xff) | filter->cache;
205 		CK((*filter->output_function)(n, filter->data));
206 	}
207 	return c;
208 }
209 
210 /*
211  * wchar => UCS-4BE
212  */
mbfl_filt_conv_wchar_ucs4be(int c,mbfl_convert_filter * filter)213 int mbfl_filt_conv_wchar_ucs4be(int c, mbfl_convert_filter *filter)
214 {
215 	if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
216 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
217 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
218 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
219 		CK((*filter->output_function)(c & 0xff, filter->data));
220 	} else {
221 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
222 			CK(mbfl_filt_conv_illegal_output(c, filter));
223 		}
224 	}
225 
226 	return c;
227 }
228 
229 /*
230  * UCS-4LE => wchar
231  */
mbfl_filt_conv_ucs4le_wchar(int c,mbfl_convert_filter * filter)232 int mbfl_filt_conv_ucs4le_wchar(int c, mbfl_convert_filter *filter)
233 {
234 	int n;
235 
236 	if (filter->status == 0) {
237 		filter->status = 1;
238 		n = (c & 0xff);
239 		filter->cache = n;
240 	} else if (filter->status == 1) {
241 		filter->status = 2;
242 		n = (c & 0xff) << 8;
243 		filter->cache |= n;
244 	} else if (filter->status == 2) {
245 		filter->status = 3;
246 		n = (c & 0xff) << 16;
247 		filter->cache |= n;
248 	} else {
249 		filter->status = 0;
250 		n = ((c & 0xff) << 24) | filter->cache;
251 		CK((*filter->output_function)(n, filter->data));
252 	}
253 	return c;
254 }
255 
256 /*
257  * wchar => UCS-4LE
258  */
mbfl_filt_conv_wchar_ucs4le(int c,mbfl_convert_filter * filter)259 int mbfl_filt_conv_wchar_ucs4le(int c, mbfl_convert_filter *filter)
260 {
261 	if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
262 		CK((*filter->output_function)(c & 0xff, filter->data));
263 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
264 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
265 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
266 	} else {
267 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
268 			CK(mbfl_filt_conv_illegal_output(c, filter));
269 		}
270 	}
271 
272 	return c;
273 }
274