1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_ucs2.h"
32
33 static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter);
34
35 static const char *mbfl_encoding_ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL};
36
37 /* This library historically had encodings called 'byte2be' and 'byte2le'
38 * which were almost identical to UCS-2, except that they would truncate
39 * Unicode codepoints higher than 0xFFFF quietly
40 * Maintain minimal support by aliasing to UCS-2 */
41 static const char *mbfl_encoding_ucs2be_aliases[] = {"byte2be", NULL};
42 static const char *mbfl_encoding_ucs2le_aliases[] = {"byte2le", NULL};
43
44 const mbfl_encoding mbfl_encoding_ucs2 = {
45 mbfl_no_encoding_ucs2,
46 "UCS-2",
47 "UCS-2",
48 mbfl_encoding_ucs2_aliases,
49 NULL,
50 MBFL_ENCTYPE_WCS2,
51 &vtbl_ucs2_wchar,
52 &vtbl_wchar_ucs2,
53 NULL
54 };
55
56 const mbfl_encoding mbfl_encoding_ucs2be = {
57 mbfl_no_encoding_ucs2be,
58 "UCS-2BE",
59 "UCS-2BE",
60 mbfl_encoding_ucs2be_aliases,
61 NULL,
62 MBFL_ENCTYPE_WCS2,
63 &vtbl_ucs2be_wchar,
64 &vtbl_wchar_ucs2be,
65 NULL
66 };
67
68 const mbfl_encoding mbfl_encoding_ucs2le = {
69 mbfl_no_encoding_ucs2le,
70 "UCS-2LE",
71 "UCS-2LE",
72 mbfl_encoding_ucs2le_aliases,
73 NULL,
74 MBFL_ENCTYPE_WCS2,
75 &vtbl_ucs2le_wchar,
76 &vtbl_wchar_ucs2le,
77 NULL
78 };
79
80 const struct mbfl_convert_vtbl vtbl_ucs2_wchar = {
81 mbfl_no_encoding_ucs2,
82 mbfl_no_encoding_wchar,
83 mbfl_filt_conv_common_ctor,
84 NULL,
85 mbfl_filt_conv_ucs2_wchar,
86 mbfl_filt_conv_ucs2_wchar_flush,
87 NULL,
88 };
89
90 const struct mbfl_convert_vtbl vtbl_wchar_ucs2 = {
91 mbfl_no_encoding_wchar,
92 mbfl_no_encoding_ucs2,
93 mbfl_filt_conv_common_ctor,
94 NULL,
95 mbfl_filt_conv_wchar_ucs2be,
96 mbfl_filt_conv_common_flush,
97 NULL,
98 };
99
100 const struct mbfl_convert_vtbl vtbl_ucs2be_wchar = {
101 mbfl_no_encoding_ucs2be,
102 mbfl_no_encoding_wchar,
103 mbfl_filt_conv_common_ctor,
104 NULL,
105 mbfl_filt_conv_ucs2be_wchar,
106 mbfl_filt_conv_ucs2_wchar_flush,
107 NULL,
108 };
109
110 const struct mbfl_convert_vtbl vtbl_wchar_ucs2be = {
111 mbfl_no_encoding_wchar,
112 mbfl_no_encoding_ucs2be,
113 mbfl_filt_conv_common_ctor,
114 NULL,
115 mbfl_filt_conv_wchar_ucs2be,
116 mbfl_filt_conv_common_flush,
117 NULL,
118 };
119
120 const struct mbfl_convert_vtbl vtbl_ucs2le_wchar = {
121 mbfl_no_encoding_ucs2le,
122 mbfl_no_encoding_wchar,
123 mbfl_filt_conv_common_ctor,
124 NULL,
125 mbfl_filt_conv_ucs2le_wchar,
126 mbfl_filt_conv_ucs2_wchar_flush,
127 NULL,
128 };
129
130 const struct mbfl_convert_vtbl vtbl_wchar_ucs2le = {
131 mbfl_no_encoding_wchar,
132 mbfl_no_encoding_ucs2le,
133 mbfl_filt_conv_common_ctor,
134 NULL,
135 mbfl_filt_conv_wchar_ucs2le,
136 mbfl_filt_conv_common_flush,
137 NULL,
138 };
139
140 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
141
mbfl_filt_conv_ucs2_wchar(int c,mbfl_convert_filter * filter)142 int mbfl_filt_conv_ucs2_wchar(int c, mbfl_convert_filter *filter)
143 {
144 if (filter->status == 0) {
145 filter->status = 1;
146 filter->cache = c & 0xFF;
147 } else {
148 filter->status = 0;
149 int n = (filter->cache << 8) | (c & 0xFF);
150 if (n == 0xFFFE) {
151 /* Found little-endian byte order mark */
152 filter->filter_function = mbfl_filt_conv_ucs2le_wchar;
153 } else {
154 filter->filter_function = mbfl_filt_conv_ucs2be_wchar;
155 if (n != 0xFEFF) {
156 CK((*filter->output_function)(n, filter->data));
157 }
158 }
159 }
160 return 0;
161 }
162
mbfl_filt_conv_ucs2be_wchar(int c,mbfl_convert_filter * filter)163 int mbfl_filt_conv_ucs2be_wchar(int c, mbfl_convert_filter *filter)
164 {
165 if (filter->status == 0) {
166 filter->status = 1;
167 filter->cache = (c & 0xFF) << 8;
168 } else {
169 filter->status = 0;
170 CK((*filter->output_function)((c & 0xFF) | filter->cache, filter->data));
171 }
172 return 0;
173 }
174
mbfl_filt_conv_wchar_ucs2be(int c,mbfl_convert_filter * filter)175 int mbfl_filt_conv_wchar_ucs2be(int c, mbfl_convert_filter *filter)
176 {
177 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
178 CK((*filter->output_function)((c >> 8) & 0xFF, filter->data));
179 CK((*filter->output_function)(c & 0xFF, filter->data));
180 } else {
181 CK(mbfl_filt_conv_illegal_output(c, filter));
182 }
183 return 0;
184 }
185
mbfl_filt_conv_ucs2le_wchar(int c,mbfl_convert_filter * filter)186 int mbfl_filt_conv_ucs2le_wchar(int c, mbfl_convert_filter *filter)
187 {
188 if (filter->status == 0) {
189 filter->status = 1;
190 filter->cache = c & 0xFF;
191 } else {
192 filter->status = 0;
193 CK((*filter->output_function)(((c & 0xFF) << 8) | filter->cache, filter->data));
194 }
195 return 0;
196 }
197
mbfl_filt_conv_wchar_ucs2le(int c,mbfl_convert_filter * filter)198 int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter)
199 {
200 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
201 CK((*filter->output_function)(c & 0xFF, filter->data));
202 CK((*filter->output_function)((c >> 8) & 0xFF, filter->data));
203 } else {
204 CK(mbfl_filt_conv_illegal_output(c, filter));
205 }
206 return 0;
207 }
208
mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter * filter)209 static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter)
210 {
211 if (filter->status) {
212 /* Input string was truncated */
213 filter->status = 0;
214 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
215 }
216
217 if (filter->flush_function) {
218 (*filter->flush_function)(filter->data);
219 }
220
221 return 0;
222 }
223