1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 20 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_utf32.h"
32 
33 static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter);
34 
35 static const char *mbfl_encoding_utf32_aliases[] = {"utf32", NULL};
36 
37 const mbfl_encoding mbfl_encoding_utf32 = {
38 	mbfl_no_encoding_utf32,
39 	"UTF-32",
40 	"UTF-32",
41 	mbfl_encoding_utf32_aliases,
42 	NULL,
43 	MBFL_ENCTYPE_WCS4,
44 	&vtbl_utf32_wchar,
45 	&vtbl_wchar_utf32,
46 	NULL
47 };
48 
49 const mbfl_encoding mbfl_encoding_utf32be = {
50 	mbfl_no_encoding_utf32be,
51 	"UTF-32BE",
52 	"UTF-32BE",
53 	NULL,
54 	NULL,
55 	MBFL_ENCTYPE_WCS4,
56 	&vtbl_utf32be_wchar,
57 	&vtbl_wchar_utf32be,
58 	NULL
59 };
60 
61 const mbfl_encoding mbfl_encoding_utf32le = {
62 	mbfl_no_encoding_utf32le,
63 	"UTF-32LE",
64 	"UTF-32LE",
65 	NULL,
66 	NULL,
67 	MBFL_ENCTYPE_WCS4,
68 	&vtbl_utf32le_wchar,
69 	&vtbl_wchar_utf32le,
70 	NULL
71 };
72 
73 const struct mbfl_convert_vtbl vtbl_utf32_wchar = {
74 	mbfl_no_encoding_utf32,
75 	mbfl_no_encoding_wchar,
76 	mbfl_filt_conv_common_ctor,
77 	NULL,
78 	mbfl_filt_conv_utf32_wchar,
79 	mbfl_filt_conv_utf32_wchar_flush,
80 	NULL,
81 };
82 
83 const struct mbfl_convert_vtbl vtbl_wchar_utf32 = {
84 	mbfl_no_encoding_wchar,
85 	mbfl_no_encoding_utf32,
86 	mbfl_filt_conv_common_ctor,
87 	NULL,
88 	mbfl_filt_conv_wchar_utf32be,
89 	mbfl_filt_conv_common_flush,
90 	NULL,
91 };
92 
93 const struct mbfl_convert_vtbl vtbl_utf32be_wchar = {
94 	mbfl_no_encoding_utf32be,
95 	mbfl_no_encoding_wchar,
96 	mbfl_filt_conv_common_ctor,
97 	NULL,
98 	mbfl_filt_conv_utf32be_wchar,
99 	mbfl_filt_conv_utf32_wchar_flush,
100 	NULL,
101 };
102 
103 const struct mbfl_convert_vtbl vtbl_wchar_utf32be = {
104 	mbfl_no_encoding_wchar,
105 	mbfl_no_encoding_utf32be,
106 	mbfl_filt_conv_common_ctor,
107 	NULL,
108 	mbfl_filt_conv_wchar_utf32be,
109 	mbfl_filt_conv_common_flush,
110 	NULL,
111 };
112 
113 const struct mbfl_convert_vtbl vtbl_utf32le_wchar = {
114 	mbfl_no_encoding_utf32le,
115 	mbfl_no_encoding_wchar,
116 	mbfl_filt_conv_common_ctor,
117 	NULL,
118 	mbfl_filt_conv_utf32le_wchar,
119 	mbfl_filt_conv_utf32_wchar_flush,
120 	NULL,
121 };
122 
123 const struct mbfl_convert_vtbl vtbl_wchar_utf32le = {
124 	mbfl_no_encoding_wchar,
125 	mbfl_no_encoding_utf32le,
126 	mbfl_filt_conv_common_ctor,
127 	NULL,
128 	mbfl_filt_conv_wchar_utf32le,
129 	mbfl_filt_conv_common_flush,
130 	NULL,
131 };
132 
133 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
134 
emit_char_if_valid(int n,mbfl_convert_filter * filter)135 static int emit_char_if_valid(int n, mbfl_convert_filter *filter)
136 {
137 	if (n >= 0 && n < MBFL_WCSPLANE_UTF32MAX && (n < 0xD800 || n > 0xDFFF)) {
138 		CK((*filter->output_function)(n, filter->data));
139 	} else {
140 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
141 	}
142 	return 0;
143 }
144 
mbfl_filt_conv_utf32_wchar(int c,mbfl_convert_filter * filter)145 int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter)
146 {
147 	if (filter->status < 3) {
148 		filter->cache = (filter->cache << 8) | (c & 0xFF);
149 		filter->status++;
150 	} else {
151 		int n = ((unsigned int)filter->cache << 8) | (c & 0xFF);
152 		filter->cache = filter->status = 0;
153 
154 		if (n == 0xFFFE0000) {
155 			/* Found a little-endian byte order mark */
156 			filter->filter_function = mbfl_filt_conv_utf32le_wchar;
157 		} else {
158 			filter->filter_function = mbfl_filt_conv_utf32be_wchar;
159 			if (n != 0xFEFF) {
160 				CK(emit_char_if_valid(n, filter));
161 			}
162 		}
163 	}
164 
165 	return 0;
166 }
167 
mbfl_filt_conv_utf32be_wchar(int c,mbfl_convert_filter * filter)168 int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter)
169 {
170 	if (filter->status < 3) {
171 		filter->cache = (filter->cache << 8) | (c & 0xFF);
172 		filter->status++;
173 	} else {
174 		int n = ((unsigned int)filter->cache << 8) | (c & 0xFF);
175 		filter->cache = filter->status = 0;
176 		CK(emit_char_if_valid(n, filter));
177 	}
178 	return 0;
179 }
180 
mbfl_filt_conv_wchar_utf32be(int c,mbfl_convert_filter * filter)181 int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter)
182 {
183 	if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
184 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
185 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
186 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
187 		CK((*filter->output_function)(c & 0xff, filter->data));
188 	} else {
189 		CK(mbfl_filt_conv_illegal_output(c, filter));
190 	}
191 
192 	return 0;
193 }
194 
mbfl_filt_conv_utf32le_wchar(int c,mbfl_convert_filter * filter)195 int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter)
196 {
197 	if (filter->status < 3) {
198 		filter->cache |= ((c & 0xFFU) << (8 * filter->status));
199 		filter->status++;
200 	} else {
201 		int n = ((c & 0xFFU) << 24) | filter->cache;
202 		filter->cache = filter->status = 0;
203 		CK(emit_char_if_valid(n, filter));
204 	}
205 	return 0;
206 }
207 
mbfl_filt_conv_wchar_utf32le(int c,mbfl_convert_filter * filter)208 int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter)
209 {
210 	if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
211 		CK((*filter->output_function)(c & 0xff, filter->data));
212 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
213 		CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
214 		CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
215 	} else {
216 		CK(mbfl_filt_conv_illegal_output(c, filter));
217 	}
218 
219 	return 0;
220 }
221 
mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter * filter)222 static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter)
223 {
224 	if (filter->status) {
225 		/* Input string was truncated */
226 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
227 	}
228 	filter->cache = filter->status = 0;
229 
230 	if (filter->flush_function) {
231 		(*filter->flush_function)(filter->data);
232 	}
233 
234 	return 0;
235 }
236