1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp>
22  *
23  */
24 
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28 
29 #include "mbfilter.h"
30 #include "mbfilter_cp5022x.h"
31 #include "mbfilter_jis.h"
32 #include "mbfilter_tl_jisx0201_jisx0208.h"
33 
34 #include "unicode_table_cp932_ext.h"
35 #include "unicode_table_jis.h"
36 #include "cp932_table.h"
37 
38 typedef struct _mbfl_filt_conv_wchar_cp50220_ctx {
39 	mbfl_filt_tl_jisx0201_jisx0208_param tl_param;
40 	mbfl_convert_filter last;
41 } mbfl_filt_conv_wchar_cp50220_ctx;
42 
43 static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter);
44 static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter);
45 static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter);
46 static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter);
47 static void mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt);
48 static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt);
49 static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest);
50 
51 const mbfl_encoding mbfl_encoding_jis_ms = {
52 	mbfl_no_encoding_jis_ms,
53 	"JIS-ms",
54 	"ISO-2022-JP",
55 	NULL,
56 	NULL,
57 	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
58 };
59 
60 const mbfl_encoding mbfl_encoding_cp50220 = {
61 	mbfl_no_encoding_cp50220,
62 	"CP50220",
63 	"ISO-2022-JP",
64 	(const char *(*)[])NULL,
65 	NULL,
66 	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
67 };
68 
69 const mbfl_encoding mbfl_encoding_cp50220raw = {
70 	mbfl_no_encoding_cp50220raw,
71 	"CP50220raw",
72 	"ISO-2022-JP",
73 	(const char *(*)[])NULL,
74 	NULL,
75 	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
76 };
77 
78 const mbfl_encoding mbfl_encoding_cp50221 = {
79 	mbfl_no_encoding_cp50221,
80 	"CP50221",
81 	"ISO-2022-JP",
82 	NULL,
83 	NULL,
84 	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
85 };
86 
87 const mbfl_encoding mbfl_encoding_cp50222 = {
88 	mbfl_no_encoding_cp50222,
89 	"CP50222",
90 	"ISO-2022-JP",
91 	NULL,
92 	NULL,
93 	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
94 };
95 
96 const struct mbfl_identify_vtbl vtbl_identify_jis_ms = {
97 	mbfl_no_encoding_jis_ms,
98 	mbfl_filt_ident_common_ctor,
99 	mbfl_filt_ident_common_dtor,
100 	mbfl_filt_ident_jis_ms
101 };
102 
103 const struct mbfl_identify_vtbl vtbl_identify_cp50220 = {
104 	mbfl_no_encoding_cp50220,
105 	mbfl_filt_ident_common_ctor,
106 	mbfl_filt_ident_common_dtor,
107 	mbfl_filt_ident_cp50220
108 };
109 
110 const struct mbfl_identify_vtbl vtbl_identify_cp50220raw = {
111 	mbfl_no_encoding_cp50220raw,
112 	mbfl_filt_ident_common_ctor,
113 	mbfl_filt_ident_common_dtor,
114 	mbfl_filt_ident_cp50220
115 };
116 
117 const struct mbfl_identify_vtbl vtbl_identify_cp50221 = {
118 	mbfl_no_encoding_cp50221,
119 	mbfl_filt_ident_common_ctor,
120 	mbfl_filt_ident_common_dtor,
121 	mbfl_filt_ident_cp50221
122 };
123 
124 const struct mbfl_identify_vtbl vtbl_identify_cp50222 = {
125 	mbfl_no_encoding_cp50222,
126 	mbfl_filt_ident_common_ctor,
127 	mbfl_filt_ident_common_dtor,
128 	mbfl_filt_ident_cp50222
129 };
130 
131 const struct mbfl_convert_vtbl vtbl_jis_ms_wchar = {
132 	mbfl_no_encoding_jis_ms,
133 	mbfl_no_encoding_wchar,
134 	mbfl_filt_conv_common_ctor,
135 	mbfl_filt_conv_common_dtor,
136 	mbfl_filt_conv_jis_ms_wchar,
137 	mbfl_filt_conv_common_flush,
138 };
139 
140 const struct mbfl_convert_vtbl vtbl_wchar_jis_ms = {
141 	mbfl_no_encoding_wchar,
142 	mbfl_no_encoding_jis_ms,
143 	mbfl_filt_conv_common_ctor,
144 	mbfl_filt_conv_common_dtor,
145 	mbfl_filt_conv_wchar_jis_ms,
146 	mbfl_filt_conv_any_jis_flush
147 };
148 
149 const struct mbfl_convert_vtbl vtbl_cp50220_wchar = {
150 	mbfl_no_encoding_cp50220,
151 	mbfl_no_encoding_wchar,
152 	mbfl_filt_conv_common_ctor,
153 	mbfl_filt_conv_common_dtor,
154 	mbfl_filt_conv_jis_ms_wchar,
155 	mbfl_filt_conv_common_flush
156 };
157 
158 const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = {
159 	mbfl_no_encoding_wchar,
160 	mbfl_no_encoding_cp50220,
161 	mbfl_filt_conv_wchar_cp50220_ctor,
162 	mbfl_filt_conv_wchar_cp50220_dtor,
163 	mbfl_filt_conv_wchar_cp50221,
164 	mbfl_filt_conv_any_jis_flush,
165 	mbfl_filt_conv_wchar_cp50220_copy
166 };
167 
168 const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar = {
169 	mbfl_no_encoding_cp50220raw,
170 	mbfl_no_encoding_wchar,
171 	mbfl_filt_conv_common_ctor,
172 	mbfl_filt_conv_common_dtor,
173 	mbfl_filt_conv_jis_ms_wchar,
174 	mbfl_filt_conv_common_flush
175 };
176 
177 const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw = {
178 	mbfl_no_encoding_wchar,
179 	mbfl_no_encoding_cp50220raw,
180 	mbfl_filt_conv_wchar_cp50220_ctor,
181 	mbfl_filt_conv_wchar_cp50220_dtor,
182 	mbfl_filt_conv_wchar_cp50220raw,
183 	mbfl_filt_conv_any_jis_flush,
184 	mbfl_filt_conv_wchar_cp50220_copy
185 };
186 
187 const struct mbfl_convert_vtbl vtbl_cp50221_wchar = {
188 	mbfl_no_encoding_cp50221,
189 	mbfl_no_encoding_wchar,
190 	mbfl_filt_conv_common_ctor,
191 	mbfl_filt_conv_common_dtor,
192 	mbfl_filt_conv_jis_ms_wchar,
193 	mbfl_filt_conv_common_flush
194 };
195 
196 const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = {
197 	mbfl_no_encoding_wchar,
198 	mbfl_no_encoding_cp50221,
199 	mbfl_filt_conv_common_ctor,
200 	mbfl_filt_conv_common_dtor,
201 	mbfl_filt_conv_wchar_cp50221,
202 	mbfl_filt_conv_any_jis_flush
203 };
204 
205 const struct mbfl_convert_vtbl vtbl_cp50222_wchar = {
206 	mbfl_no_encoding_cp50222,
207 	mbfl_no_encoding_wchar,
208 	mbfl_filt_conv_common_ctor,
209 	mbfl_filt_conv_common_dtor,
210 	mbfl_filt_conv_jis_ms_wchar,
211 	mbfl_filt_conv_common_flush
212 };
213 
214 const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = {
215 	mbfl_no_encoding_wchar,
216 	mbfl_no_encoding_cp50222,
217 	mbfl_filt_conv_common_ctor,
218 	mbfl_filt_conv_common_dtor,
219 	mbfl_filt_conv_wchar_cp50222,
220 	mbfl_filt_conv_wchar_cp50222_flush
221 };
222 
223 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
224 
225 /*
226  * JIS-ms => wchar
227  */
228 int
mbfl_filt_conv_jis_ms_wchar(int c,mbfl_convert_filter * filter)229 mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter)
230 {
231 	int c1, s, w;
232 
233 retry:
234 	switch (filter->status & 0xf) {
235 /*	case 0x00:	 ASCII */
236 /*	case 0x10:	 X 0201 latin */
237 /*	case 0x20:	 X 0201 kana */
238 /*	case 0x80:	 X 0208 */
239 /*	case 0x90:	 X 0212 */
240 	case 0:
241 		if (c == 0x1b) {
242 			filter->status += 2;
243 		} else if (c == 0x0e) {		/* "kana in" */
244 			filter->status = 0x20;
245 		} else if (c == 0x0f) {		/* "kana out" */
246 			filter->status = 0;
247 		} else if (filter->status == 0x10 && c == 0x5c) {	/* YEN SIGN */
248 			CK((*filter->output_function)(0xa5, filter->data));
249 		} else if (filter->status == 0x10 && c == 0x7e) {	/* OVER LINE */
250 			CK((*filter->output_function)(0x203e, filter->data));
251 		} else if (filter->status == 0x20 && c > 0x20 && c < 0x60) {		/* kana */
252 			CK((*filter->output_function)(0xff40 + c, filter->data));
253 		} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x93) {		/* kanji first char */
254 			filter->cache = c;
255 			filter->status += 1;
256 		} else if (c >= 0 && c < 0x80) {		/* latin, CTLs */
257 			CK((*filter->output_function)(c, filter->data));
258 		} else if (c > 0xa0 && c < 0xe0) {	/* GR kana */
259 			CK((*filter->output_function)(0xfec0 + c, filter->data));
260 		} else {
261 			w = c & MBFL_WCSGROUP_MASK;
262 			w |= MBFL_WCSGROUP_THROUGH;
263 			CK((*filter->output_function)(w, filter->data));
264 		}
265 		break;
266 
267 /*	case 0x81:	 X 0208 second char */
268 /*	case 0x91:	 X 0212 second char */
269 	case 1:
270 		filter->status &= ~0xf;
271 		c1 = filter->cache;
272 		if (c > 0x20 && c < 0x7f) {
273 			s = (c1 - 0x21)*94 + c - 0x21;
274 			if (filter->status == 0x80) {
275 				if (s >= 0 && s < jisx0208_ucs_table_size) {
276 					w = jisx0208_ucs_table[s];
277 				} else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
278 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
279 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
280 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
281 				} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
282 					w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
283 				} else if (s >= 94 * 94 && s < 114 * 94) {
284 					/* user-defined => PUA (Microsoft extended) */
285 					w = s - 94*94 + 0xe000;
286 				} else {
287 					w = 0;
288 				}
289 				if (w <= 0) {
290 					w = (c1 << 8) | c;
291 					w &= MBFL_WCSPLANE_MASK;
292 					w |= MBFL_WCSPLANE_JIS0208;
293 				}
294 			} else {
295 				if (s >= 0 && s < jisx0212_ucs_table_size) {
296 					w = jisx0212_ucs_table[s];
297 				} else {
298 					w = 0;
299 				}
300 				if (w <= 0) {
301 					w = (c1 << 8) | c;
302 					w &= MBFL_WCSPLANE_MASK;
303 					w |= MBFL_WCSPLANE_JIS0212;
304 				}
305 			}
306 			CK((*filter->output_function)(w, filter->data));
307 		} else if (c == 0x1b) {
308 			filter->status += 2;
309 		} else if ((c >= 0 && c < 0x21) || c == 0x7f) {		/* CTLs */
310 			CK((*filter->output_function)(c, filter->data));
311 		} else {
312 			w = (c1 << 8) | c;
313 			w &= MBFL_WCSGROUP_MASK;
314 			w |= MBFL_WCSGROUP_THROUGH;
315 			CK((*filter->output_function)(w, filter->data));
316 		}
317 		break;
318 
319 	/* ESC */
320 /*	case 0x02:	*/
321 /*	case 0x12:	*/
322 /*	case 0x22:	*/
323 /*	case 0x82:	*/
324 /*	case 0x92:	*/
325 	case 2:
326 		if (c == 0x24) {		/* '$' */
327 			filter->status++;
328 		} else if (c == 0x28) {		/* '(' */
329 			filter->status += 3;
330 		} else {
331 			filter->status &= ~0xf;
332 			CK((*filter->output_function)(0x1b, filter->data));
333 			goto retry;
334 		}
335 		break;
336 
337 	/* ESC $ */
338 /*	case 0x03:	*/
339 /*	case 0x13:	*/
340 /*	case 0x23:	*/
341 /*	case 0x83:	*/
342 /*	case 0x93:	*/
343 	case 3:
344 		if (c == 0x40 || c == 0x42) {	/* '@' or 'B' */
345 			filter->status = 0x80;
346 		} else if (c == 0x28) {			/* '(' */
347 			filter->status++;
348 		} else {
349 			filter->status &= ~0xf;
350 			CK((*filter->output_function)(0x1b, filter->data));
351 			CK((*filter->output_function)(0x24, filter->data));
352 			goto retry;
353 		}
354 		break;
355 
356 	/* ESC $ ( */
357 /*	case 0x04:	*/
358 /*	case 0x14:	*/
359 /*	case 0x24:	*/
360 /*	case 0x84:	*/
361 /*	case 0x94:	*/
362 	case 4:
363 		if (c == 0x40 || c == 0x42) {	/* '@' or 'B' */
364 			filter->status = 0x80;
365 		} else if (c == 0x44) {			/* 'D' */
366 			filter->status = 0x90;
367 		} else {
368 			filter->status &= ~0xf;
369 			CK((*filter->output_function)(0x1b, filter->data));
370 			CK((*filter->output_function)(0x24, filter->data));
371 			CK((*filter->output_function)(0x28, filter->data));
372 			goto retry;
373 		}
374 		break;
375 
376 	/* ESC ( */
377 /*	case 0x05:	*/
378 /*	case 0x15:	*/
379 /*	case 0x25:	*/
380 /*	case 0x85:	*/
381 /*	case 0x95:	*/
382 	case 5:
383 		if (c == 0x42 || c == 0x48) {		/* 'B' or 'H' */
384 			filter->status = 0;
385 		} else if (c == 0x4a) {		/* 'J' */
386 			filter->status = 0x10;
387 		} else if (c == 0x49) {		/* 'I' */
388 			filter->status = 0x20;
389 		} else {
390 			filter->status &= ~0xf;
391 			CK((*filter->output_function)(0x1b, filter->data));
392 			CK((*filter->output_function)(0x28, filter->data));
393 			goto retry;
394 		}
395 		break;
396 
397 	default:
398 		filter->status = 0;
399 		break;
400 	}
401 
402 	return c;
403 }
404 
405 /*
406  * wchar => JIS
407  */
408 int
mbfl_filt_conv_wchar_jis_ms(int c,mbfl_convert_filter * filter)409 mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter)
410 {
411 	int c1, s;
412 
413 	s = 0;
414 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
415 		s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
416 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
417 		s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
418 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
419 		s = ucs_i_jis_table[c - ucs_i_jis_table_min];
420 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
421 		s = ucs_r_jis_table[c - ucs_r_jis_table_min];
422 	} else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) {
423 		/* PUE => Microsoft extended (pseudo 95ku - 114ku) */
424 		/* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
425 		s = c - 0xe000;
426 		s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21);
427 	} else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) {
428 		/* PUE => JISX0212 user-defined (G3 85ku - 94ku) */
429 		/* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
430 		s = c - (0xe000 + 10 * 94);
431 		s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1);
432 	}
433 
434 	/* do some transliteration */
435 	if (s <= 0) {
436 		c1 = c & ~MBFL_WCSPLANE_MASK;
437 		if (c1 == MBFL_WCSPLANE_JIS0208) {
438 			s = c & MBFL_WCSPLANE_MASK;
439 		} else if (c1 == MBFL_WCSPLANE_JIS0212) {
440 			s = c & MBFL_WCSPLANE_MASK;
441 			s |= 0x8080;
442 		} else if (c == 0xa5) {		/* YEN SIGN */
443 			s = 0x1005c;
444 		} else if (c == 0x203e) {	/* OVER LINE */
445 			s = 0x1007e;
446 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
447 			s = 0x2140;
448 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
449 			s = 0x2141;
450 		} else if (c == 0x2225) {	/* PARALLEL TO */
451 			s = 0x2142;
452 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
453 			s = 0x215d;
454 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
455 			s = 0x2171;
456 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
457 			s = 0x2172;
458 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
459 			s = 0x224c;
460 		}
461 	}
462 	if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
463 		int i;
464 		s = -1;
465 
466 		for (i = 0;
467 				i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
468 			const int oh = cp932ext1_ucs_table_min / 94;
469 
470 			if (c == cp932ext1_ucs_table[i]) {
471 				s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
472 				break;
473 			}
474 		}
475 
476 		if (s < 0) {
477 			const int oh = cp932ext2_ucs_table_min / 94;
478 			const int cp932ext2_ucs_table_size =
479 					cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
480 			for (i = 0; i < cp932ext2_ucs_table_size; i++) {
481 				if (c == cp932ext2_ucs_table[i]) {
482 					s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
483 					break;
484 				}
485 			}
486 		}
487 
488 		if (s < 0) {
489 			const int cp932ext3_ucs_table_size =
490 					cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
491 			const int limit = cp932ext3_ucs_table_size >
492 					cp932ext3_eucjp_table_size ?
493 						cp932ext3_eucjp_table_size:
494 						cp932ext3_ucs_table_size;
495 			for (i = 0; i < limit; i++) {
496 				if (c == cp932ext3_ucs_table[i]) {
497 					s = cp932ext3_eucjp_table[i];
498 					break;
499 				}
500 			}
501 		}
502 
503 		if (c == 0) {
504 			s = 0;
505 		} else if (s <= 0) {
506 			s = -1;
507 		}
508 	}
509 
510 	if (s >= 0) {
511 		if (s < 0x80) { /* ASCII */
512 			if ((filter->status & 0xff00) != 0) {
513 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
514 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
515 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
516 			}
517 			filter->status = 0;
518 			CK((*filter->output_function)(s, filter->data));
519 		} else if (s < 0x100) { /* kana */
520 			if ((filter->status & 0xff00) != 0x100) {
521 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
522 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
523 				CK((*filter->output_function)(0x49, filter->data));		/* 'I' */
524 			}
525 			filter->status = 0x100;
526 			CK((*filter->output_function)(s & 0x7f, filter->data));
527 		} else if (s < 0x8080) { /* X 0208 */
528 			if ((filter->status & 0xff00) != 0x200) {
529 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
530 				CK((*filter->output_function)(0x24, filter->data));		/* '$' */
531 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
532 			}
533 			filter->status = 0x200;
534 			CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
535 			CK((*filter->output_function)(s & 0x7f, filter->data));
536 		} else if (s < 0x10000) { /* X 0212 */
537 			if ((filter->status & 0xff00) != 0x300) {
538 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
539 				CK((*filter->output_function)(0x24, filter->data));		/* '$' */
540 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
541 				CK((*filter->output_function)(0x44, filter->data));		/* 'D' */
542 			}
543 			filter->status = 0x300;
544 			CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
545 			CK((*filter->output_function)(s & 0x7f, filter->data));
546 		} else { /* X 0201 latin */
547 			if ((filter->status & 0xff00) != 0x400) {
548 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
549 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
550 				CK((*filter->output_function)(0x4a, filter->data));		/* 'J' */
551 			}
552 			filter->status = 0x400;
553 			CK((*filter->output_function)(s & 0x7f, filter->data));
554 		}
555 	} else {
556 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
557 			CK(mbfl_filt_conv_illegal_output(c, filter));
558 		}
559 	}
560 
561 	return c;
562 }
563 
564 /*
565  * wchar => CP50220
566  */
567 static void
mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter * filt)568 mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt)
569 {
570 	mbfl_filt_conv_wchar_cp50220_ctx *ctx;
571 
572 	mbfl_filt_conv_common_ctor(filt);
573 
574 	ctx = mbfl_malloc(sizeof(mbfl_filt_conv_wchar_cp50220_ctx));
575 	if (ctx == NULL) {
576 		mbfl_filt_conv_common_dtor(filt);
577 		return;
578 	}
579 
580 	ctx->tl_param.mode = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE;
581 
582 	ctx->last = *filt;
583 	ctx->last.opaque = ctx;
584 	ctx->last.data = filt->data;
585 	filt->filter_function = vtbl_tl_jisx0201_jisx0208.filter_function;
586 	filt->filter_flush = vtbl_tl_jisx0201_jisx0208.filter_flush;
587 	filt->output_function = (int(*)(int, void *))ctx->last.filter_function;
588 	filt->flush_function = (int(*)(void *))ctx->last.filter_flush;
589 	filt->data = &ctx->last;
590 	filt->opaque = ctx;
591 	vtbl_tl_jisx0201_jisx0208.filter_ctor(filt);
592 }
593 
594 static void
mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter * src,mbfl_convert_filter * dest)595 mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest)
596 {
597 	mbfl_filt_conv_wchar_cp50220_ctx *ctx;
598 
599 	*dest = *src;
600 	ctx = mbfl_malloc(sizeof(mbfl_filt_conv_wchar_cp50220_ctx));
601 	if (ctx != NULL) {
602 		*ctx = *(mbfl_filt_conv_wchar_cp50220_ctx*)src->opaque;
603 	}
604 
605 	dest->opaque = ctx;
606 	dest->data = &ctx->last;
607 }
608 
609 static void
mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter * filt)610 mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt)
611 {
612 	vtbl_tl_jisx0201_jisx0208.filter_dtor(filt);
613 
614 	if (filt->opaque != NULL) {
615 		mbfl_free(filt->opaque);
616 	}
617 
618 	mbfl_filt_conv_common_dtor(filt);
619 }
620 
621 /*
622  * wchar => cp50220raw
623  */
624 int
mbfl_filt_conv_wchar_cp50220raw(int c,mbfl_convert_filter * filter)625 mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter)
626 {
627 	if (c & MBFL_WCSPLANE_JIS0208) {
628 		const int s = c & MBFL_WCSPLANE_MASK;
629 
630 		if ((filter->status & 0xff00) != 0x200) {
631 			CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
632 			CK((*filter->output_function)(0x24, filter->data));		/* '$' */
633 			CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
634 			filter->status = 0x200;
635 		}
636 		CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
637 		CK((*filter->output_function)(s & 0x7f, filter->data));
638 		return c;
639 	} else {
640 		return mbfl_filt_conv_wchar_cp50221(c, filter);
641 	}
642 }
643 
644 /*
645  * wchar => CP50221
646  */
647 int
mbfl_filt_conv_wchar_cp50221(int c,mbfl_convert_filter * filter)648 mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
649 {
650 	int s = 0;
651 
652 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
653 		s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
654 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
655 		s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
656 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
657 		s = ucs_i_jis_table[c - ucs_i_jis_table_min];
658 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
659 		s = ucs_r_jis_table[c - ucs_r_jis_table_min];
660 	} else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) {
661 		/* PUE => Microsoft extended */
662 		/* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
663 		s = c - 0xe000;
664 		s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21);
665 	} else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) {
666 		/* PUE => JISX0212 user-defined (G3 85ku - 94ku) */
667 		/* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
668 		s = c - (0xe000 + 10 * 94);
669 		s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1);
670 	}
671 
672 	if (s <= 0) {
673 		if (c == 0xa5) {			/* YEN SIGN */
674 			s = 0x1005c;
675 		} else if (c == 0x203e) {	/* OVER LINE */
676 			s = 0x1007e;
677 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
678 			s = 0x2140;
679 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
680 			s = 0x2141;
681 		} else if (c == 0x2225) {	/* PARALLEL TO */
682 			s = 0x2142;
683 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
684 			s = 0x215d;
685 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
686 			s = 0x2171;
687 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
688 			s = 0x2172;
689 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
690 			s = 0x224c;
691 		}
692 	}
693 	if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
694 		int i;
695 		s = -1;
696 
697 		for (i = 0;
698 				i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
699 				i++) {
700 			const int oh = cp932ext1_ucs_table_min / 94;
701 
702 			if (c == cp932ext1_ucs_table[i]) {
703 				s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
704 				break;
705 			}
706 		}
707 
708 		if (s < 0) {
709 			const int oh = cp932ext2_ucs_table_min / 94;
710 			const int cp932ext2_ucs_table_size =
711 					cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
712 			for (i = 0; i < cp932ext2_ucs_table_size; i++) {
713 				if (c == cp932ext2_ucs_table[i]) {
714 					s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
715 					break;
716 				}
717 			}
718 		}
719 
720 		if (s < 0) {
721 			const int cp932ext3_ucs_table_size =
722 					cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
723 			const int limit = cp932ext3_ucs_table_size >
724 					cp932ext3_eucjp_table_size ?
725 						cp932ext3_eucjp_table_size:
726 						cp932ext3_ucs_table_size;
727 			for (i = 0; i < limit; i++) {
728 				if (c == cp932ext3_ucs_table[i]) {
729 					s = cp932ext3_eucjp_table[i];
730 					break;
731 				}
732 			}
733 		}
734 
735 		if (c == 0) {
736 			s = 0;
737 		} else if (s <= 0) {
738 			s = -1;
739 		}
740 	}
741 
742 	if (s >= 0) {
743 		if (s < 0x80) { /* ASCII */
744 			if ((filter->status & 0xff00) != 0) {
745 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
746 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
747 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
748 				filter->status = 0;
749 			}
750 			CK((*filter->output_function)(s, filter->data));
751 		} else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */
752 			if ((filter->status & 0xff00) != 0x500) {
753 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
754 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
755 				CK((*filter->output_function)(0x49, filter->data));		/* 'I' */
756 				filter->status = 0x500;
757 			}
758 			CK((*filter->output_function)(s - 0x80, filter->data));
759 		} else if (s < 0x8080) { /* X 0208 */
760 			if ((filter->status & 0xff00) != 0x200) {
761 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
762 				CK((*filter->output_function)(0x24, filter->data));		/* '$' */
763 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
764 				filter->status = 0x200;
765 			}
766 			CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
767 			CK((*filter->output_function)(s & 0x7f, filter->data));
768 		} else if (s < 0x10000) { /* X0212 */
769 			if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
770 				CK(mbfl_filt_conv_illegal_output(c, filter));
771 			}
772 		} else { /* X 0201 latin */
773 			if ((filter->status & 0xff00) != 0x400) {
774 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
775 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
776 				CK((*filter->output_function)(0x4a, filter->data));		/* 'J' */
777 			}
778 			filter->status = 0x400;
779 			CK((*filter->output_function)(s & 0x7f, filter->data));
780 		}
781 	} else {
782 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
783 			CK(mbfl_filt_conv_illegal_output(c, filter));
784 		}
785 	}
786 
787 	return c;
788 }
789 
790 /*
791  * wchar => CP50222
792  */
793 int
mbfl_filt_conv_wchar_cp50222(int c,mbfl_convert_filter * filter)794 mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
795 {
796 	int s;
797 
798 	s = 0;
799 
800 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
801 		s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
802 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
803 		s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
804 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
805 		s = ucs_i_jis_table[c - ucs_i_jis_table_min];
806 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
807 		s = ucs_r_jis_table[c - ucs_r_jis_table_min];
808 	} else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) {
809 		/* PUE => Microsoft extended */
810 		/* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
811 		s = c - 0xe000;
812 		s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21);
813 	} else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) {
814 		/* PUE => JISX0212 user-defined (G3 85ku - 94ku) */
815 		/* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
816 		s = c - (0xe000 + 10 * 94);
817 		s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1);
818 	}
819 
820 	if (s <= 0) {
821 		if (c == 0xa5) {			/* YEN SIGN */
822 			s = 0x1005c;
823 		} else if (c == 0x203e) {	/* OVER LINE */
824 			s = 0x1007e;
825 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
826 			s = 0x2140;
827 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
828 			s = 0x2141;
829 		} else if (c == 0x2225) {	/* PARALLEL TO */
830 			s = 0x2142;
831 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
832 			s = 0x215d;
833 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
834 			s = 0x2171;
835 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
836 			s = 0x2172;
837 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
838 			s = 0x224c;
839 		}
840 	}
841 	if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
842 		int i;
843 		s = -1;
844 
845 		for (i = 0;
846 				i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
847 			const int oh = cp932ext1_ucs_table_min / 94;
848 
849 			if (c == cp932ext1_ucs_table[i]) {
850 				s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
851 				break;
852 			}
853 		}
854 
855 		if (s <= 0) {
856 			const int oh = cp932ext2_ucs_table_min / 94;
857 			const int cp932ext2_ucs_table_size =
858 					cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
859 			for (i = 0; i < cp932ext2_ucs_table_size; i++) {
860 				if (c == cp932ext2_ucs_table[i]) {
861 					s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
862 					break;
863 				}
864 			}
865 		}
866 
867 		if (s <= 0) {
868 			const int cp932ext3_ucs_table_size =
869 					cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
870 			const int limit = cp932ext3_ucs_table_size >
871 					cp932ext3_eucjp_table_size ?
872 						cp932ext3_eucjp_table_size:
873 						cp932ext3_ucs_table_size;
874 			for (i = 0; i < limit; i++) {
875 				if (c == cp932ext3_ucs_table[i]) {
876 					s = cp932ext3_eucjp_table[i];
877 					break;
878 				}
879 			}
880 		}
881 
882 		if (c == 0) {
883 			s = 0;
884 		} else if (s <= 0) {
885 			s = -1;
886 		}
887 	}
888 
889 	if (s >= 0) {
890 		if (s < 0x80) { /* ASCII */
891 			if ((filter->status & 0xff00) == 0x500) {
892 				CK((*filter->output_function)(0x0f, filter->data));		/* SO */
893 				filter->status = 0;
894 			} else if ((filter->status & 0xff00) != 0) {
895 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
896 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
897 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
898 				filter->status = 0;
899 			}
900 			CK((*filter->output_function)(s, filter->data));
901 		} else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */
902 			if ((filter->status & 0xff00) != 0x500) {
903 				CK((*filter->output_function)(0x0e, filter->data));		/* SI */
904 				filter->status = 0x500;
905 			}
906 			CK((*filter->output_function)(s - 0x80, filter->data));
907 		} else if (s < 0x8080) { /* X 0208 */
908 			if ((filter->status & 0xff00) == 0x500) {
909 				CK((*filter->output_function)(0x0f, filter->data));		/* SO */
910 				filter->status = 0;
911 			}
912 			if ((filter->status & 0xff00) != 0x200) {
913 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
914 				CK((*filter->output_function)(0x24, filter->data));		/* '$' */
915 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
916 				filter->status = 0x200;
917 			}
918 			CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
919 			CK((*filter->output_function)(s & 0x7f, filter->data));
920 		} else if (s < 0x10000) { /* X0212 */
921 			if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
922 				CK(mbfl_filt_conv_illegal_output(c, filter));
923 			}
924 		} else { /* X 0201 latin */
925 			if ((filter->status & 0xff00) == 0x500) {
926 				CK((*filter->output_function)(0x0f, filter->data));		/* SO */
927 				filter->status = 0;
928 			}
929 			if ((filter->status & 0xff00) != 0x400) {
930 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
931 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
932 				CK((*filter->output_function)(0x4a, filter->data));		/* 'J' */
933 			}
934 			filter->status = 0x400;
935 			CK((*filter->output_function)(s & 0x7f, filter->data));
936 		}
937 	} else {
938 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
939 			CK(mbfl_filt_conv_illegal_output(c, filter));
940 		}
941 	}
942 
943 	return c;
944 }
945 
946 int
mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter * filter)947 mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter)
948 {
949 	/* back to latin */
950 	if ((filter->status & 0xff00) == 0x500) {
951 		CK((*filter->output_function)(0x0f, filter->data));		/* SO */
952 	} else if ((filter->status & 0xff00) != 0) {
953 		CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
954 		CK((*filter->output_function)(0x28, filter->data));		/* '(' */
955 		CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
956 	}
957 	filter->status &= 0xff;
958 
959 	if (filter->flush_function != NULL) {
960 		return (*filter->flush_function)(filter->data);
961 	}
962 
963 	return 0;
964 }
965 
966 
mbfl_filt_ident_jis_ms(int c,mbfl_identify_filter * filter)967 static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter)
968 {
969 retry:
970 	switch (filter->status & 0xf) {
971 /*	case 0x00:	 ASCII */
972 /*	case 0x10:	 X 0201 latin */
973 /*	case 0x20:	 X 0201 kana */
974 /*	case 0x80:	 X 0208 */
975 /*	case 0x90:	 X 0212 */
976 	case 0:
977 		if (c == 0x1b) {
978 			filter->status += 2;
979 		} else if (c == 0x0e) {			/* "kana in" */
980 			filter->status = 0x20;
981 		} else if (c == 0x0f) {			/* "kana out" */
982 			filter->status = 0;
983 		} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) {		/* kanji first char */
984 			filter->status += 1;
985 		} else if (c >= 0 && c < 0x80) {		/* latin, CTLs */
986 			;
987 		} else {
988 			filter->flag = 1;	/* bad */
989 		}
990 		break;
991 
992 /*	case 0x81:	 X 0208 second char */
993 /*	case 0x91:	 X 0212 second char */
994 	case 1:
995 		filter->status &= ~0xf;
996 		if (c == 0x1b) {
997 			goto retry;
998 		} else if (c < 0x21 || c > 0x7e) {		/* bad */
999 			filter->flag = 1;
1000 		}
1001 		break;
1002 
1003 	/* ESC */
1004 	case 2:
1005 		if (c == 0x24) {		/* '$' */
1006 			filter->status++;
1007 		} else if (c == 0x28) {		/* '(' */
1008 			filter->status += 3;
1009 		} else {
1010 			filter->flag = 1;	/* bad */
1011 			filter->status &= ~0xf;
1012 			goto retry;
1013 		}
1014 		break;
1015 
1016 	/* ESC $ */
1017 	case 3:
1018 		if (c == 0x40 || c == 0x42) {		/* '@' or 'B' */
1019 			filter->status = 0x80;
1020 		} else if (c == 0x28) {		/* '(' */
1021 			filter->status++;
1022 		} else {
1023 			filter->flag = 1;	/* bad */
1024 			filter->status &= ~0xf;
1025 			goto retry;
1026 		}
1027 		break;
1028 
1029 	/* ESC $ ( */
1030 	case 4:
1031 		if (c == 0x40 || c == 0x42) {		/* '@' or 'B' */
1032 			filter->status = 0x80;
1033 		} else if (c == 0x44) {		/* 'D' */
1034 			filter->status = 0x90;
1035 		} else {
1036 			filter->flag = 1;	/* bad */
1037 			filter->status &= ~0xf;
1038 			goto retry;
1039 		}
1040 		break;
1041 
1042 	/* ESC ( */
1043 	case 5:
1044 		if (c == 0x42 || c == 0x48) {		/* 'B' or 'H' */
1045 			filter->status = 0;
1046 		} else if (c == 0x4a) {		/* 'J' */
1047 			filter->status = 0x10;
1048 		} else if (c == 0x49) {		/* 'I' */
1049 			filter->status = 0x20;
1050 		} else {
1051 			filter->flag = 1;	/* bad */
1052 			filter->status &= ~0xf;
1053 			goto retry;
1054 		}
1055 		break;
1056 
1057 	default:
1058 		filter->status = 0;
1059 		break;
1060 	}
1061 
1062 	return c;
1063 }
1064 
mbfl_filt_ident_cp50220(int c,mbfl_identify_filter * filter)1065 static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter)
1066 {
1067 retry:
1068 	switch (filter->status & 0xf) {
1069 /*	case 0x00:	 ASCII */
1070 /*	case 0x10:	 X 0201 latin */
1071 /*	case 0x80:	 X 0208 */
1072 	case 0:
1073 		if (c == 0x1b) {
1074 			filter->status += 2;
1075 		} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) {		/* kanji first char */
1076 			filter->status += 1;
1077 		} else if (c >= 0 && c < 0x80) {		/* latin, CTLs */
1078 			;
1079 		} else {
1080 			filter->flag = 1;	/* bad */
1081 		}
1082 		break;
1083 
1084 /*	case 0x81:	 X 0208 second char */
1085 	case 1:
1086 		if (c == 0x1b) {
1087 			filter->status++;
1088 		} else {
1089 			filter->status &= ~0xf;
1090 			if (c < 0x21 || c > 0x7e) {		/* bad */
1091 				filter->flag = 1;
1092 			}
1093 		}
1094 		break;
1095 
1096 	/* ESC */
1097 	case 2:
1098 		if (c == 0x24) {		/* '$' */
1099 			filter->status++;
1100 		} else if (c == 0x28) {		/* '(' */
1101 			filter->status += 3;
1102 		} else {
1103 			filter->flag = 1;	/* bad */
1104 			filter->status &= ~0xf;
1105 			goto retry;
1106 		}
1107 		break;
1108 
1109 	/* ESC $ */
1110 	case 3:
1111 		if (c == 0x40 || c == 0x42) {		/* '@' or 'B' */
1112 			filter->status = 0x80;
1113 		} else {
1114 			filter->flag = 1;	/* bad */
1115 			filter->status &= ~0xf;
1116 			goto retry;
1117 		}
1118 		break;
1119 
1120 	/* ESC ( */
1121 	case 5:
1122 		if (c == 0x42) {		/* 'B' */
1123 			filter->status = 0;
1124 		} else if (c == 0x4a) {		/* 'J' */
1125 			filter->status = 0x10;
1126 		} else {
1127 			filter->flag = 1;	/* bad */
1128 			filter->status &= ~0xf;
1129 			goto retry;
1130 		}
1131 		break;
1132 
1133 	default:
1134 		filter->status = 0;
1135 		break;
1136 	}
1137 
1138 	return c;
1139 }
1140 
mbfl_filt_ident_cp50221(int c,mbfl_identify_filter * filter)1141 static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter)
1142 {
1143 retry:
1144 	switch (filter->status & 0xf) {
1145 /*	case 0x00:	 ASCII */
1146 /*	case 0x10:	 X 0201 latin */
1147 /*	case 0x80:	 X 0208 */
1148 	case 0:
1149 		if (c == 0x1b) {
1150 			filter->status += 2;
1151 		} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) {		/* kanji first char */
1152 			filter->status += 1;
1153 		} else if (c >= 0 && c < 0x80) {		/* latin, CTLs */
1154 			;
1155 		} else {
1156 			filter->flag = 1;	/* bad */
1157 		}
1158 		break;
1159 
1160 /*	case 0x81:	 X 0208 second char */
1161 	case 1:
1162 		if (c == 0x1b) {
1163 			filter->status++;
1164 		} else {
1165 			filter->status &= ~0xf;
1166 			if (c < 0x21 || c > 0x7e) {		/* bad */
1167 				filter->flag = 1;
1168 			}
1169 		}
1170 		break;
1171 
1172 	/* ESC */
1173 	case 2:
1174 		if (c == 0x24) {		/* '$' */
1175 			filter->status++;
1176 		} else if (c == 0x28) {		/* '(' */
1177 			filter->status += 3;
1178 		} else {
1179 			filter->flag = 1;	/* bad */
1180 			filter->status &= ~0xf;
1181 			goto retry;
1182 		}
1183 		break;
1184 
1185 	/* ESC $ */
1186 	case 3:
1187 		if (c == 0x40 || c == 0x42) {		/* '@' or 'B' */
1188 			filter->status = 0x80;
1189 		} else {
1190 			filter->flag = 1;	/* bad */
1191 			filter->status &= ~0xf;
1192 			goto retry;
1193 		}
1194 		break;
1195 
1196 	/* ESC ( */
1197 	case 5:
1198 		if (c == 0x42) {		/* 'B' */
1199 			filter->status = 0;
1200 		} else if (c == 0x4a) {		/* 'J' */
1201 			filter->status = 0x10;
1202 		} else if (c == 0x49) {		/* 'I' */
1203 			filter->status = 0x20;
1204 		} else {
1205 			filter->flag = 1;	/* bad */
1206 			filter->status &= ~0xf;
1207 			goto retry;
1208 		}
1209 		break;
1210 
1211 	default:
1212 		filter->status = 0;
1213 		break;
1214 	}
1215 
1216 	return c;
1217 }
1218 
mbfl_filt_ident_cp50222(int c,mbfl_identify_filter * filter)1219 static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter)
1220 {
1221 retry:
1222 	switch (filter->status & 0xf) {
1223 /*	case 0x00:	 ASCII */
1224 /*	case 0x10:	 X 0201 latin */
1225 /*	case 0x80:	 X 0208 */
1226 	case 0:
1227 		if (c == 0x1b) {
1228 			filter->status += 2;
1229 		} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) {		/* kanji first char */
1230 			filter->status += 1;
1231 		} else if (c >= 0 && c < 0x80) {		/* latin, CTLs */
1232 			;
1233 		} else {
1234 			filter->flag = 1;	/* bad */
1235 		}
1236 		break;
1237 
1238 /*	case 0x81:	 X 0208 second char */
1239 	case 1:
1240 		if (c == 0x1b) {
1241 			filter->status++;
1242 		} else {
1243 			filter->status &= ~0xf;
1244 			if (c < 0x21 || c > 0x7e) {		/* bad */
1245 				filter->flag = 1;
1246 			}
1247 		}
1248 		break;
1249 
1250 	/* ESC */
1251 	case 2:
1252 		if (c == 0x24) {		/* '$' */
1253 			filter->status++;
1254 		} else if (c == 0x28) {		/* '(' */
1255 			filter->status += 3;
1256 		} else {
1257 			filter->flag = 1;	/* bad */
1258 			filter->status &= ~0xf;
1259 			goto retry;
1260 		}
1261 		break;
1262 
1263 	/* ESC $ */
1264 	case 3:
1265 		if (c == 0x40 || c == 0x42) {		/* '@' or 'B' */
1266 			filter->status = 0x80;
1267 		} else {
1268 			filter->flag = 1;	/* bad */
1269 			filter->status &= ~0xf;
1270 			goto retry;
1271 		}
1272 		break;
1273 
1274 	/* ESC ( */
1275 	case 5:
1276 		if (c == 0x42) {		/* 'B' */
1277 			filter->status = 0;
1278 		} else if (c == 0x4a) {		/* 'J' */
1279 			filter->status = 0x10;
1280 		} else {
1281 			filter->flag = 1;	/* bad */
1282 			filter->status &= ~0xf;
1283 			goto retry;
1284 		}
1285 		break;
1286 
1287 	default:
1288 		filter->status = 0;
1289 		break;
1290 	}
1291 
1292 	return c;
1293 }
1294