1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter_ja.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_euc_jp_win.h"
32
33 #include "unicode_table_cp932_ext.h"
34 #include "unicode_table_jis.h"
35 #include "cp932_table.h"
36
37 static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter);
38
39 static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
53 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
55 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
56 };
57
58 static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL};
59
60 const mbfl_encoding mbfl_encoding_eucjp_win = {
61 mbfl_no_encoding_eucjp_win,
62 "eucJP-win",
63 "EUC-JP",
64 mbfl_encoding_eucjp_win_aliases,
65 mblen_table_eucjp,
66 0,
67 &vtbl_eucjpwin_wchar,
68 &vtbl_wchar_eucjpwin,
69 NULL
70 };
71
72 const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = {
73 mbfl_no_encoding_eucjp_win,
74 mbfl_no_encoding_wchar,
75 mbfl_filt_conv_common_ctor,
76 NULL,
77 mbfl_filt_conv_eucjpwin_wchar,
78 mbfl_filt_conv_eucjpwin_wchar_flush,
79 NULL,
80 };
81
82 const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = {
83 mbfl_no_encoding_wchar,
84 mbfl_no_encoding_eucjp_win,
85 mbfl_filt_conv_common_ctor,
86 NULL,
87 mbfl_filt_conv_wchar_eucjpwin,
88 mbfl_filt_conv_common_flush,
89 NULL,
90 };
91
92 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
93
mbfl_filt_conv_eucjpwin_wchar(int c,mbfl_convert_filter * filter)94 int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
95 {
96 int c1, s, w, n;
97
98 switch (filter->status) {
99 case 0:
100 if (c >= 0 && c < 0x80) { /* latin */
101 CK((*filter->output_function)(c, filter->data));
102 } else if (c >= 0xa1 && c <= 0xfe) { /* CP932 first char */
103 filter->status = 1;
104 filter->cache = c;
105 } else if (c == 0x8e) { /* kana first char */
106 filter->status = 2;
107 } else if (c == 0x8f) { /* X 0212 first char */
108 filter->status = 3;
109 } else {
110 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
111 }
112 break;
113
114 case 1: /* got first half */
115 filter->status = 0;
116 c1 = filter->cache;
117 if (c > 0xa0 && c < 0xff) {
118 w = 0;
119 s = (c1 - 0xa1)*94 + c - 0xa1;
120 if (s <= 137) {
121 if (s == 31) {
122 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
123 } else if (s == 32) {
124 w = 0xff5e; /* FULLWIDTH TILDE */
125 } else if (s == 33) {
126 w = 0x2225; /* PARALLEL TO */
127 } else if (s == 60) {
128 w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
129 } else if (s == 80) {
130 w = 0xffe0; /* FULLWIDTH CENT SIGN */
131 } else if (s == 81) {
132 w = 0xffe1; /* FULLWIDTH POUND SIGN */
133 } else if (s == 137) {
134 w = 0xffe2; /* FULLWIDTH NOT SIGN */
135 }
136 }
137
138 if (w == 0) {
139 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
140 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
141 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
142 w = jisx0208_ucs_table[s];
143 } else if (s >= (84 * 94)) { /* user (85ku - 94ku) */
144 w = s - (84 * 94) + 0xe000;
145 }
146 }
147
148 if (w <= 0) {
149 w = MBFL_BAD_INPUT;
150 }
151 CK((*filter->output_function)(w, filter->data));
152 } else {
153 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
154 }
155 break;
156
157 case 2: /* got 0x8e, X0201 kana */
158 filter->status = 0;
159 if (c > 0xa0 && c < 0xe0) {
160 w = 0xfec0 + c;
161 CK((*filter->output_function)(w, filter->data));
162 } else {
163 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
164 }
165 break;
166
167 case 3: /* got 0x8f, X 0212 first char */
168 filter->status++;
169 filter->cache = c;
170 break;
171 case 4: /* got 0x8f, X 0212 second char */
172 filter->status = 0;
173 c1 = filter->cache;
174 if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
175 s = (c1 - 0xa1)*94 + c - 0xa1;
176
177 if (s >= 0 && s < jisx0212_ucs_table_size) {
178 w = jisx0212_ucs_table[s];
179
180 if (w == 0x007e) {
181 w = 0xff5e; /* FULLWIDTH TILDE */
182 }
183 } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */
184 s = (c1 << 8) | c;
185 w = 0;
186 n = 0;
187 while (n < cp932ext3_eucjp_table_size) {
188 if (s == cp932ext3_eucjp_table[n]) {
189 if (n < (cp932ext3_ucs_table_max - cp932ext3_ucs_table_min)) {
190 w = cp932ext3_ucs_table[n];
191 }
192 break;
193 }
194 n++;
195 }
196 } else if (s >= (84*94)) { /* user (85ku - 94ku) */
197 w = s - (84*94) + (0xe000 + (94*10));
198 } else {
199 w = 0;
200 }
201
202 if (w == 0x00A6) {
203 w = 0xFFE4; /* FULLWIDTH BROKEN BAR */
204 }
205
206 if (w <= 0) {
207 w = MBFL_BAD_INPUT;
208 }
209 CK((*filter->output_function)(w, filter->data));
210 } else {
211 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
212 }
213 break;
214
215 default:
216 filter->status = 0;
217 break;
218 }
219
220 return 0;
221 }
222
mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter * filter)223 static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter)
224 {
225 if (filter->status) {
226 (*filter->output_function)(MBFL_BAD_INPUT, filter->data);
227 filter->status = 0;
228 }
229
230 if (filter->flush_function) {
231 (*filter->flush_function)(filter->data);
232 }
233
234 return 0;
235 }
236
mbfl_filt_conv_wchar_eucjpwin(int c,mbfl_convert_filter * filter)237 int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
238 {
239 int c1, c2, s1 = 0;
240
241 if (c == 0xAF) { /* U+00AF is MACRON */
242 s1 = 0xA2B4; /* Use JIS X 0212 overline */
243 } else if (c == 0x203E) {
244 s1 = 0x7E;
245 } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
246 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
247 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
248 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
249 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
250 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
251 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
252 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
253 } else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */
254 s1 = c - 0xe000;
255 c1 = s1/94 + 0x75;
256 c2 = s1%94 + 0x21;
257 s1 = (c1 << 8) | c2;
258 } else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */
259 s1 = c - (0xe000 + 10*94);
260 c1 = s1/94 + 0xf5;
261 c2 = s1%94 + 0xa1;
262 s1 = (c1 << 8) | c2;
263 }
264
265 if (s1 == 0xa2f1) {
266 s1 = 0x2d62; /* NUMERO SIGN */
267 }
268
269 if (s1 <= 0) {
270 if (c == 0xa5) { /* YEN SIGN */
271 s1 = 0x5C;
272 } else if (c == 0x2014) {
273 s1 = 0x213D;
274 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
275 s1 = 0x2140;
276 } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
277 s1 = 0x2141;
278 } else if (c == 0x2225) { /* PARALLEL TO */
279 s1 = 0x2142;
280 } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
281 s1 = 0x215d;
282 } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
283 s1 = 0x2171;
284 } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
285 s1 = 0x2172;
286 } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
287 s1 = 0x224c;
288 } else {
289 s1 = -1;
290 c1 = 0;
291 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
292 while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
293 const int oh = cp932ext1_ucs_table_min / 94;
294
295 if (c == cp932ext1_ucs_table[c1]) {
296 s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21);
297 break;
298 }
299 c1++;
300 }
301 if (s1 < 0) {
302 c1 = 0;
303 c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
304 while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
305 if (c == cp932ext3_ucs_table[c1]) {
306 if (c1 < cp932ext3_eucjp_table_size) {
307 s1 = cp932ext3_eucjp_table[c1];
308 }
309 break;
310 }
311 c1++;
312 }
313 }
314 }
315
316 if (c == 0) {
317 s1 = 0;
318 } else if (s1 <= 0) {
319 s1 = -1;
320 }
321 }
322
323 if (s1 >= 0) {
324 if (s1 < 0x80) { /* latin */
325 CK((*filter->output_function)(s1, filter->data));
326 } else if (s1 < 0x100) { /* kana */
327 CK((*filter->output_function)(0x8e, filter->data));
328 CK((*filter->output_function)(s1, filter->data));
329 } else if (s1 < 0x8080) { /* X 0208 */
330 CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data));
331 CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data));
332 } else { /* X 0212 */
333 CK((*filter->output_function)(0x8f, filter->data));
334 CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data));
335 CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data));
336 }
337 } else {
338 CK(mbfl_filt_conv_illegal_output(c, filter));
339 }
340
341 return 0;
342 }
343