1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file: Rui Hirokawa <hirokawa@php.net>
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter_tw.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_euc_tw.h"
32
33 #include "unicode_table_cns11643.h"
34
35 static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter);
36
37 static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
53 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
54 };
55
56
57 static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL};
58
59 const mbfl_encoding mbfl_encoding_euc_tw = {
60 mbfl_no_encoding_euc_tw,
61 "EUC-TW",
62 "EUC-TW",
63 mbfl_encoding_euc_tw_aliases,
64 mblen_table_euctw,
65 0,
66 &vtbl_euctw_wchar,
67 &vtbl_wchar_euctw,
68 NULL
69 };
70
71 const struct mbfl_convert_vtbl vtbl_euctw_wchar = {
72 mbfl_no_encoding_euc_tw,
73 mbfl_no_encoding_wchar,
74 mbfl_filt_conv_common_ctor,
75 NULL,
76 mbfl_filt_conv_euctw_wchar,
77 mbfl_filt_conv_euctw_wchar_flush,
78 NULL,
79 };
80
81 const struct mbfl_convert_vtbl vtbl_wchar_euctw = {
82 mbfl_no_encoding_wchar,
83 mbfl_no_encoding_euc_tw,
84 mbfl_filt_conv_common_ctor,
85 NULL,
86 mbfl_filt_conv_wchar_euctw,
87 mbfl_filt_conv_common_flush,
88 NULL,
89 };
90
91 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
92
mbfl_filt_conv_euctw_wchar(int c,mbfl_convert_filter * filter)93 int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter)
94 {
95 int c1, s, w;
96
97 switch (filter->status) {
98 case 0:
99 if (c >= 0 && c < 0x80) { /* latin */
100 CK((*filter->output_function)(c, filter->data));
101 } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) { /* 2-byte character, first byte */
102 filter->status = 1;
103 filter->cache = c;
104 } else if (c == 0x8E) { /* 4-byte character, first byte */
105 filter->status = 2;
106 } else {
107 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
108 }
109 break;
110
111 case 1: /* 2-byte character, second byte */
112 filter->status = 0;
113 c1 = filter->cache;
114 if (c > 0xA0 && c < 0xFF) {
115 w = (c1 - 0xA1)*94 + (c - 0xA1);
116 if (w >= 0 && w < cns11643_1_ucs_table_size) {
117 w = cns11643_1_ucs_table[w];
118 } else {
119 w = 0;
120 }
121
122 if (w <= 0) {
123 w = MBFL_BAD_INPUT;
124 }
125
126 CK((*filter->output_function)(w, filter->data));
127 } else {
128 filter->status = filter->cache = 0;
129 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
130 }
131 break;
132
133 case 2: /* got 0x8e, second byte */
134 if (c == 0xA1 || c == 0xA2 || c == 0xAE) {
135 filter->status = 3;
136 filter->cache = c - 0xA1;
137 } else {
138 filter->status = filter->cache = 0;
139 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
140 }
141 break;
142
143 case 3: /* got 0x8e, third byte */
144 filter->status = 0;
145 c1 = filter->cache;
146 if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) ||
147 (c1 == 1 && c <= 0xF2) || (c1 == 13 && c <= 0xE7))) {
148 filter->status = 4;
149 filter->cache = (c1 << 8) + c - 0xA1;
150 } else {
151 filter->status = filter->cache = 0;
152 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
153 }
154 break;
155
156 case 4: /* multi-byte character, fourth byte */
157 filter->status = 0;
158 c1 = filter->cache;
159 if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF) {
160 int plane = (c1 & 0xF00) >> 8; /* This is actually the CNS-11643 plane minus one */
161 s = (c1 & 0xFF)*94 + c - 0xA1;
162 w = 0;
163 if (s >= 0) {
164 /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3",
165 * and added tens of thousands more characters in planes 4, 5, 6, and 7
166 * We only support the older version of CNS-11643
167 * This is the same as iconv from glibc 2.2 */
168 if (plane == 0 && s < cns11643_1_ucs_table_size) {
169 w = cns11643_1_ucs_table[s];
170 } else if (plane == 1 && s < cns11643_2_ucs_table_size) {
171 w = cns11643_2_ucs_table[s];
172 } else if (plane == 13 && s < cns11643_14_ucs_table_size) {
173 w = cns11643_14_ucs_table[s];
174 }
175 }
176
177 if (w <= 0) {
178 w = MBFL_BAD_INPUT;
179 }
180
181 CK((*filter->output_function)(w, filter->data));
182 } else {
183 filter->status = filter->cache = 0;
184 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
185 }
186 break;
187
188 default:
189 filter->status = 0;
190 break;
191 }
192
193 return 0;
194 }
195
mbfl_filt_conv_wchar_euctw(int c,mbfl_convert_filter * filter)196 int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter)
197 {
198 int s = 0;
199
200 if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) {
201 s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min];
202 } else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) {
203 s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min];
204 } else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) {
205 s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min];
206 } else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) {
207 s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min];
208 } else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) {
209 s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min];
210 }
211
212 if (s <= 0) {
213 if (c == 0) {
214 s = 0;
215 } else if (s <= 0) {
216 s = -1;
217 }
218 }
219
220 if (s >= 0) {
221 int plane = (s & 0x1F0000) >> 16;
222 if (plane <= 1) {
223 if (s < 0x80) { /* latin */
224 CK((*filter->output_function)(s, filter->data));
225 } else {
226 s = (s & 0xFFFF) | 0x8080;
227 CK((*filter->output_function)((s >> 8) & 0xFF, filter->data));
228 CK((*filter->output_function)(s & 0xFF, filter->data));
229 }
230 } else {
231 s = (0x8EA00000 + (plane << 16)) | ((s & 0xFFFF) | 0x8080);
232 CK((*filter->output_function)(0x8e , filter->data));
233 CK((*filter->output_function)((s >> 16) & 0xFF, filter->data));
234 CK((*filter->output_function)((s >> 8) & 0xFF, filter->data));
235 CK((*filter->output_function)(s & 0xFF, filter->data));
236 }
237 } else {
238 CK(mbfl_filt_conv_illegal_output(c, filter));
239 }
240 return 0;
241 }
242
mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter * filter)243 static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter)
244 {
245 if (filter->status) {
246 /* 2-byte or 4-byte character was truncated */
247 filter->status = 0;
248 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
249 }
250
251 if (filter->flush_function) {
252 (*filter->flush_function)(filter->data);
253 }
254
255 return 0;
256 }
257