1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_ucs2.h"
32 
33 static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter);
34 
35 static const char *mbfl_encoding_ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL};
36 
37 /* This library historically had encodings called 'byte2be' and 'byte2le'
38  * which were almost identical to UCS-2, except that they would truncate
39  * Unicode codepoints higher than 0xFFFF quietly
40  * Maintain minimal support by aliasing to UCS-2 */
41 static const char *mbfl_encoding_ucs2be_aliases[] = {"byte2be", NULL};
42 static const char *mbfl_encoding_ucs2le_aliases[] = {"byte2le", NULL};
43 
44 const mbfl_encoding mbfl_encoding_ucs2 = {
45 	mbfl_no_encoding_ucs2,
46 	"UCS-2",
47 	"UCS-2",
48 	mbfl_encoding_ucs2_aliases,
49 	NULL,
50 	MBFL_ENCTYPE_WCS2,
51 	&vtbl_ucs2_wchar,
52 	&vtbl_wchar_ucs2,
53 	NULL
54 };
55 
56 const mbfl_encoding mbfl_encoding_ucs2be = {
57 	mbfl_no_encoding_ucs2be,
58 	"UCS-2BE",
59 	"UCS-2BE",
60 	mbfl_encoding_ucs2be_aliases,
61 	NULL,
62 	MBFL_ENCTYPE_WCS2,
63 	&vtbl_ucs2be_wchar,
64 	&vtbl_wchar_ucs2be,
65 	NULL
66 };
67 
68 const mbfl_encoding mbfl_encoding_ucs2le = {
69 	mbfl_no_encoding_ucs2le,
70 	"UCS-2LE",
71 	"UCS-2LE",
72 	mbfl_encoding_ucs2le_aliases,
73 	NULL,
74 	MBFL_ENCTYPE_WCS2,
75 	&vtbl_ucs2le_wchar,
76 	&vtbl_wchar_ucs2le,
77 	NULL
78 };
79 
80 const struct mbfl_convert_vtbl vtbl_ucs2_wchar = {
81 	mbfl_no_encoding_ucs2,
82 	mbfl_no_encoding_wchar,
83 	mbfl_filt_conv_common_ctor,
84 	NULL,
85 	mbfl_filt_conv_ucs2_wchar,
86 	mbfl_filt_conv_ucs2_wchar_flush,
87 	NULL,
88 };
89 
90 const struct mbfl_convert_vtbl vtbl_wchar_ucs2 = {
91 	mbfl_no_encoding_wchar,
92 	mbfl_no_encoding_ucs2,
93 	mbfl_filt_conv_common_ctor,
94 	NULL,
95 	mbfl_filt_conv_wchar_ucs2be,
96 	mbfl_filt_conv_common_flush,
97 	NULL,
98 };
99 
100 const struct mbfl_convert_vtbl vtbl_ucs2be_wchar = {
101 	mbfl_no_encoding_ucs2be,
102 	mbfl_no_encoding_wchar,
103 	mbfl_filt_conv_common_ctor,
104 	NULL,
105 	mbfl_filt_conv_ucs2be_wchar,
106 	mbfl_filt_conv_ucs2_wchar_flush,
107 	NULL,
108 };
109 
110 const struct mbfl_convert_vtbl vtbl_wchar_ucs2be = {
111 	mbfl_no_encoding_wchar,
112 	mbfl_no_encoding_ucs2be,
113 	mbfl_filt_conv_common_ctor,
114 	NULL,
115 	mbfl_filt_conv_wchar_ucs2be,
116 	mbfl_filt_conv_common_flush,
117 	NULL,
118 };
119 
120 const struct mbfl_convert_vtbl vtbl_ucs2le_wchar = {
121 	mbfl_no_encoding_ucs2le,
122 	mbfl_no_encoding_wchar,
123 	mbfl_filt_conv_common_ctor,
124 	NULL,
125 	mbfl_filt_conv_ucs2le_wchar,
126 	mbfl_filt_conv_ucs2_wchar_flush,
127 	NULL,
128 };
129 
130 const struct mbfl_convert_vtbl vtbl_wchar_ucs2le = {
131 	mbfl_no_encoding_wchar,
132 	mbfl_no_encoding_ucs2le,
133 	mbfl_filt_conv_common_ctor,
134 	NULL,
135 	mbfl_filt_conv_wchar_ucs2le,
136 	mbfl_filt_conv_common_flush,
137 	NULL,
138 };
139 
140 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
141 
mbfl_filt_conv_ucs2_wchar(int c,mbfl_convert_filter * filter)142 int mbfl_filt_conv_ucs2_wchar(int c, mbfl_convert_filter *filter)
143 {
144 	if (filter->status == 0) {
145 		filter->status = 1;
146 		filter->cache = c & 0xFF;
147 	} else {
148 		filter->status = 0;
149 		int n = (filter->cache << 8) | (c & 0xFF);
150 		if (n == 0xFFFE) {
151 			/* Found little-endian byte order mark */
152 			filter->filter_function = mbfl_filt_conv_ucs2le_wchar;
153 		} else {
154 			filter->filter_function = mbfl_filt_conv_ucs2be_wchar;
155 			if (n != 0xFEFF) {
156 				CK((*filter->output_function)(n, filter->data));
157 			}
158 		}
159 	}
160 	return 0;
161 }
162 
mbfl_filt_conv_ucs2be_wchar(int c,mbfl_convert_filter * filter)163 int mbfl_filt_conv_ucs2be_wchar(int c, mbfl_convert_filter *filter)
164 {
165 	if (filter->status == 0) {
166 		filter->status = 1;
167 		filter->cache = (c & 0xFF) << 8;
168 	} else {
169 		filter->status = 0;
170 		CK((*filter->output_function)((c & 0xFF) | filter->cache, filter->data));
171 	}
172 	return 0;
173 }
174 
mbfl_filt_conv_wchar_ucs2be(int c,mbfl_convert_filter * filter)175 int mbfl_filt_conv_wchar_ucs2be(int c, mbfl_convert_filter *filter)
176 {
177 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
178 		CK((*filter->output_function)((c >> 8) & 0xFF, filter->data));
179 		CK((*filter->output_function)(c & 0xFF, filter->data));
180 	} else {
181 		CK(mbfl_filt_conv_illegal_output(c, filter));
182 	}
183 	return 0;
184 }
185 
mbfl_filt_conv_ucs2le_wchar(int c,mbfl_convert_filter * filter)186 int mbfl_filt_conv_ucs2le_wchar(int c, mbfl_convert_filter *filter)
187 {
188 	if (filter->status == 0) {
189 		filter->status = 1;
190 		filter->cache = c & 0xFF;
191 	} else {
192 		filter->status = 0;
193 		CK((*filter->output_function)(((c & 0xFF) << 8) | filter->cache, filter->data));
194 	}
195 	return 0;
196 }
197 
mbfl_filt_conv_wchar_ucs2le(int c,mbfl_convert_filter * filter)198 int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter)
199 {
200 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
201 		CK((*filter->output_function)(c & 0xFF, filter->data));
202 		CK((*filter->output_function)((c >> 8) & 0xFF, filter->data));
203 	} else {
204 		CK(mbfl_filt_conv_illegal_output(c, filter));
205 	}
206 	return 0;
207 }
208 
mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter * filter)209 static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter)
210 {
211 	if (filter->status) {
212 		/* Input string was truncated */
213 		filter->status = 0;
214 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
215 	}
216 
217 	if (filter->flush_function) {
218 		(*filter->flush_function)(filter->data);
219 	}
220 
221 	return 0;
222 }
223