1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 20 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_utf32.h"
32
33 static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter);
34
35 static const char *mbfl_encoding_utf32_aliases[] = {"utf32", NULL};
36
37 const mbfl_encoding mbfl_encoding_utf32 = {
38 mbfl_no_encoding_utf32,
39 "UTF-32",
40 "UTF-32",
41 mbfl_encoding_utf32_aliases,
42 NULL,
43 MBFL_ENCTYPE_WCS4,
44 &vtbl_utf32_wchar,
45 &vtbl_wchar_utf32,
46 NULL
47 };
48
49 const mbfl_encoding mbfl_encoding_utf32be = {
50 mbfl_no_encoding_utf32be,
51 "UTF-32BE",
52 "UTF-32BE",
53 NULL,
54 NULL,
55 MBFL_ENCTYPE_WCS4,
56 &vtbl_utf32be_wchar,
57 &vtbl_wchar_utf32be,
58 NULL
59 };
60
61 const mbfl_encoding mbfl_encoding_utf32le = {
62 mbfl_no_encoding_utf32le,
63 "UTF-32LE",
64 "UTF-32LE",
65 NULL,
66 NULL,
67 MBFL_ENCTYPE_WCS4,
68 &vtbl_utf32le_wchar,
69 &vtbl_wchar_utf32le,
70 NULL
71 };
72
73 const struct mbfl_convert_vtbl vtbl_utf32_wchar = {
74 mbfl_no_encoding_utf32,
75 mbfl_no_encoding_wchar,
76 mbfl_filt_conv_common_ctor,
77 NULL,
78 mbfl_filt_conv_utf32_wchar,
79 mbfl_filt_conv_utf32_wchar_flush,
80 NULL,
81 };
82
83 const struct mbfl_convert_vtbl vtbl_wchar_utf32 = {
84 mbfl_no_encoding_wchar,
85 mbfl_no_encoding_utf32,
86 mbfl_filt_conv_common_ctor,
87 NULL,
88 mbfl_filt_conv_wchar_utf32be,
89 mbfl_filt_conv_common_flush,
90 NULL,
91 };
92
93 const struct mbfl_convert_vtbl vtbl_utf32be_wchar = {
94 mbfl_no_encoding_utf32be,
95 mbfl_no_encoding_wchar,
96 mbfl_filt_conv_common_ctor,
97 NULL,
98 mbfl_filt_conv_utf32be_wchar,
99 mbfl_filt_conv_utf32_wchar_flush,
100 NULL,
101 };
102
103 const struct mbfl_convert_vtbl vtbl_wchar_utf32be = {
104 mbfl_no_encoding_wchar,
105 mbfl_no_encoding_utf32be,
106 mbfl_filt_conv_common_ctor,
107 NULL,
108 mbfl_filt_conv_wchar_utf32be,
109 mbfl_filt_conv_common_flush,
110 NULL,
111 };
112
113 const struct mbfl_convert_vtbl vtbl_utf32le_wchar = {
114 mbfl_no_encoding_utf32le,
115 mbfl_no_encoding_wchar,
116 mbfl_filt_conv_common_ctor,
117 NULL,
118 mbfl_filt_conv_utf32le_wchar,
119 mbfl_filt_conv_utf32_wchar_flush,
120 NULL,
121 };
122
123 const struct mbfl_convert_vtbl vtbl_wchar_utf32le = {
124 mbfl_no_encoding_wchar,
125 mbfl_no_encoding_utf32le,
126 mbfl_filt_conv_common_ctor,
127 NULL,
128 mbfl_filt_conv_wchar_utf32le,
129 mbfl_filt_conv_common_flush,
130 NULL,
131 };
132
133 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
134
emit_char_if_valid(int n,mbfl_convert_filter * filter)135 static int emit_char_if_valid(int n, mbfl_convert_filter *filter)
136 {
137 if (n >= 0 && n < MBFL_WCSPLANE_UTF32MAX && (n < 0xD800 || n > 0xDFFF)) {
138 CK((*filter->output_function)(n, filter->data));
139 } else {
140 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
141 }
142 return 0;
143 }
144
mbfl_filt_conv_utf32_wchar(int c,mbfl_convert_filter * filter)145 int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter)
146 {
147 if (filter->status < 3) {
148 filter->cache = (filter->cache << 8) | (c & 0xFF);
149 filter->status++;
150 } else {
151 int n = ((unsigned int)filter->cache << 8) | (c & 0xFF);
152 filter->cache = filter->status = 0;
153
154 if (n == 0xFFFE0000) {
155 /* Found a little-endian byte order mark */
156 filter->filter_function = mbfl_filt_conv_utf32le_wchar;
157 } else {
158 filter->filter_function = mbfl_filt_conv_utf32be_wchar;
159 if (n != 0xFEFF) {
160 CK(emit_char_if_valid(n, filter));
161 }
162 }
163 }
164
165 return 0;
166 }
167
mbfl_filt_conv_utf32be_wchar(int c,mbfl_convert_filter * filter)168 int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter)
169 {
170 if (filter->status < 3) {
171 filter->cache = (filter->cache << 8) | (c & 0xFF);
172 filter->status++;
173 } else {
174 int n = ((unsigned int)filter->cache << 8) | (c & 0xFF);
175 filter->cache = filter->status = 0;
176 CK(emit_char_if_valid(n, filter));
177 }
178 return 0;
179 }
180
mbfl_filt_conv_wchar_utf32be(int c,mbfl_convert_filter * filter)181 int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter)
182 {
183 if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
184 CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
185 CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
186 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
187 CK((*filter->output_function)(c & 0xff, filter->data));
188 } else {
189 CK(mbfl_filt_conv_illegal_output(c, filter));
190 }
191
192 return 0;
193 }
194
mbfl_filt_conv_utf32le_wchar(int c,mbfl_convert_filter * filter)195 int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter)
196 {
197 if (filter->status < 3) {
198 filter->cache |= ((c & 0xFFU) << (8 * filter->status));
199 filter->status++;
200 } else {
201 int n = ((c & 0xFFU) << 24) | filter->cache;
202 filter->cache = filter->status = 0;
203 CK(emit_char_if_valid(n, filter));
204 }
205 return 0;
206 }
207
mbfl_filt_conv_wchar_utf32le(int c,mbfl_convert_filter * filter)208 int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter)
209 {
210 if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
211 CK((*filter->output_function)(c & 0xff, filter->data));
212 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
213 CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
214 CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
215 } else {
216 CK(mbfl_filt_conv_illegal_output(c, filter));
217 }
218
219 return 0;
220 }
221
mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter * filter)222 static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter)
223 {
224 if (filter->status) {
225 /* Input string was truncated */
226 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
227 }
228 filter->cache = filter->status = 0;
229
230 if (filter->flush_function) {
231 (*filter->flush_function)(filter->data);
232 }
233
234 return 0;
235 }
236