1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp>
22  *
23  */
24 
25 #include "mbfilter.h"
26 #include "mbfilter_cp5022x.h"
27 #include "mbfilter_jis.h"
28 #include "mbfilter_tl_jisx0201_jisx0208.h"
29 
30 #include "unicode_table_cp932_ext.h"
31 #include "unicode_table_jis.h"
32 #include "cp932_table.h"
33 
34 static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter);
35 static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter);
36 static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter);
37 
38 /* Previously, a dubious 'encoding' called 'cp50220raw' was supported
39  * This was just CP50220, but the implementation was less strict regarding
40  * invalid characters; it would silently pass some through
41  * This 'encoding' only existed in mbstring. In case some poor, lost soul is
42  * still using it, retain minimal support by aliasing it to CP50220
43  *
44  * Further, mbstring also had a made-up encoding called "JIS-ms"
45  * This was the same as CP5022{0,1,2}, but without their special ways of
46  * handling conversion of Unicode half-width katakana */
47 static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", "JIS-ms", NULL};
48 
49 const mbfl_encoding mbfl_encoding_cp50220 = {
50 	mbfl_no_encoding_cp50220,
51 	"CP50220",
52 	"ISO-2022-JP",
53 	cp50220_aliases,
54 	NULL,
55 	MBFL_ENCTYPE_GL_UNSAFE,
56 	&vtbl_cp50220_wchar,
57 	&vtbl_wchar_cp50220,
58 	NULL
59 };
60 
61 const mbfl_encoding mbfl_encoding_cp50221 = {
62 	mbfl_no_encoding_cp50221,
63 	"CP50221",
64 	"ISO-2022-JP",
65 	NULL,
66 	NULL,
67 	MBFL_ENCTYPE_GL_UNSAFE,
68 	&vtbl_cp50221_wchar,
69 	&vtbl_wchar_cp50221,
70 	NULL
71 };
72 
73 const mbfl_encoding mbfl_encoding_cp50222 = {
74 	mbfl_no_encoding_cp50222,
75 	"CP50222",
76 	"ISO-2022-JP",
77 	NULL,
78 	NULL,
79 	MBFL_ENCTYPE_GL_UNSAFE,
80 	&vtbl_cp50222_wchar,
81 	&vtbl_wchar_cp50222,
82 	NULL
83 };
84 
85 const struct mbfl_convert_vtbl vtbl_cp50220_wchar = {
86 	mbfl_no_encoding_cp50220,
87 	mbfl_no_encoding_wchar,
88 	mbfl_filt_conv_common_ctor,
89 	NULL,
90 	mbfl_filt_conv_cp5022x_wchar,
91 	mbfl_filt_conv_cp5022x_wchar_flush,
92 	NULL,
93 };
94 
95 const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = {
96 	mbfl_no_encoding_wchar,
97 	mbfl_no_encoding_cp50220,
98 	mbfl_filt_conv_common_ctor,
99 	NULL,
100 	mbfl_filt_conv_wchar_cp50220,
101 	mbfl_filt_conv_wchar_cp50220_flush,
102 	NULL,
103 };
104 
105 const struct mbfl_convert_vtbl vtbl_cp50221_wchar = {
106 	mbfl_no_encoding_cp50221,
107 	mbfl_no_encoding_wchar,
108 	mbfl_filt_conv_common_ctor,
109 	NULL,
110 	mbfl_filt_conv_cp5022x_wchar,
111 	mbfl_filt_conv_cp5022x_wchar_flush,
112 	NULL,
113 };
114 
115 const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = {
116 	mbfl_no_encoding_wchar,
117 	mbfl_no_encoding_cp50221,
118 	mbfl_filt_conv_common_ctor,
119 	NULL,
120 	mbfl_filt_conv_wchar_cp50221,
121 	mbfl_filt_conv_any_jis_flush,
122 	NULL,
123 };
124 
125 const struct mbfl_convert_vtbl vtbl_cp50222_wchar = {
126 	mbfl_no_encoding_cp50222,
127 	mbfl_no_encoding_wchar,
128 	mbfl_filt_conv_common_ctor,
129 	NULL,
130 	mbfl_filt_conv_cp5022x_wchar,
131 	mbfl_filt_conv_cp5022x_wchar_flush,
132 	NULL,
133 };
134 
135 const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = {
136 	mbfl_no_encoding_wchar,
137 	mbfl_no_encoding_cp50222,
138 	mbfl_filt_conv_common_ctor,
139 	NULL,
140 	mbfl_filt_conv_wchar_cp50222,
141 	mbfl_filt_conv_wchar_cp50222_flush,
142 	NULL,
143 };
144 
145 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
146 
mbfl_filt_conv_cp5022x_wchar(int c,mbfl_convert_filter * filter)147 int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter)
148 {
149 	int c1, s, w;
150 
151 retry:
152 	switch (filter->status & 0xf) {
153 /*	case 0x00:	 ASCII */
154 /*	case 0x10:	 X 0201 latin */
155 /*	case 0x20:	 X 0201 kana */
156 /*	case 0x80:	 X 0208 */
157 /*	case 0x90:	 X 0212 */
158 	case 0:
159 		if (c == 0x1b) {
160 			filter->status += 2;
161 		} else if (c == 0x0e) {		/* "kana in" */
162 			filter->status = 0x20;
163 		} else if (c == 0x0f) {		/* "kana out" */
164 			filter->status = 0;
165 		} else if (filter->status == 0x10 && c == 0x5c) {	/* YEN SIGN */
166 			CK((*filter->output_function)(0xa5, filter->data));
167 		} else if (filter->status == 0x10 && c == 0x7e) {	/* OVER LINE */
168 			CK((*filter->output_function)(0x203e, filter->data));
169 		} else if (filter->status == 0x20 && c > 0x20 && c < 0x60) {		/* kana */
170 			CK((*filter->output_function)(0xff40 + c, filter->data));
171 		} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c <= 0x97) { /* kanji first char */
172 			filter->cache = c;
173 			filter->status += 1;
174 		} else if (c >= 0 && c < 0x80) {		/* latin, CTLs */
175 			CK((*filter->output_function)(c, filter->data));
176 		} else if (c > 0xa0 && c < 0xe0) {	/* GR kana */
177 			CK((*filter->output_function)(0xfec0 + c, filter->data));
178 		} else {
179 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
180 		}
181 		break;
182 
183 /*	case 0x81:	 X 0208 second char */
184 /*	case 0x91:	 X 0212 second char */
185 	case 1:
186 		filter->status &= ~0xf;
187 		c1 = filter->cache;
188 		if (c > 0x20 && c < 0x7f) {
189 			s = (c1 - 0x21)*94 + c - 0x21;
190 			if (filter->status == 0x80) {
191 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
192 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
193 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {
194 					w = jisx0208_ucs_table[s];
195 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
196 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
197 				} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
198 					w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
199 				} else if (s >= 94 * 94 && s < 114 * 94) {
200 					/* user-defined => PUA (Microsoft extended) */
201 					w = s - 94*94 + 0xe000;
202 				} else {
203 					w = 0;
204 				}
205 
206 				if (w <= 0) {
207 					w = MBFL_BAD_INPUT;
208 				}
209 			} else {
210 				if (s >= 0 && s < jisx0212_ucs_table_size) {
211 					w = jisx0212_ucs_table[s];
212 				} else {
213 					w = 0;
214 				}
215 
216 				if (w <= 0) {
217 					w = MBFL_BAD_INPUT;
218 				}
219 			}
220 			CK((*filter->output_function)(w, filter->data));
221 		} else {
222 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
223 		}
224 		break;
225 
226 	/* ESC */
227 /*	case 0x02:	*/
228 /*	case 0x12:	*/
229 /*	case 0x22:	*/
230 /*	case 0x82:	*/
231 /*	case 0x92:	*/
232 	case 2:
233 		if (c == 0x24) {		/* '$' */
234 			filter->status++;
235 		} else if (c == 0x28) {		/* '(' */
236 			filter->status += 3;
237 		} else {
238 			filter->status &= ~0xf;
239 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
240 			goto retry;
241 		}
242 		break;
243 
244 	/* ESC $ */
245 /*	case 0x03:	*/
246 /*	case 0x13:	*/
247 /*	case 0x23:	*/
248 /*	case 0x83:	*/
249 /*	case 0x93:	*/
250 	case 3:
251 		if (c == 0x40 || c == 0x42) {	/* '@' or 'B' */
252 			filter->status = 0x80;
253 		} else if (c == 0x28) {			/* '(' */
254 			filter->status++;
255 		} else {
256 			filter->status &= ~0xf;
257 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
258 			CK((*filter->output_function)(0x24, filter->data));
259 			goto retry;
260 		}
261 		break;
262 
263 	/* ESC $ ( */
264 /*	case 0x04:	*/
265 /*	case 0x14:	*/
266 /*	case 0x24:	*/
267 /*	case 0x84:	*/
268 /*	case 0x94:	*/
269 	case 4:
270 		if (c == 0x40 || c == 0x42) {	/* '@' or 'B' */
271 			filter->status = 0x80;
272 		} else if (c == 0x44) {			/* 'D' */
273 			filter->status = 0x90;
274 		} else {
275 			filter->status &= ~0xf;
276 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
277 			CK((*filter->output_function)(0x24, filter->data));
278 			CK((*filter->output_function)(0x28, filter->data));
279 			goto retry;
280 		}
281 		break;
282 
283 	/* ESC ( */
284 /*	case 0x05:	*/
285 /*	case 0x15:	*/
286 /*	case 0x25:	*/
287 /*	case 0x85:	*/
288 /*	case 0x95:	*/
289 	case 5:
290 		if (c == 0x42 || c == 0x48) {		/* 'B' or 'H' */
291 			filter->status = 0;
292 		} else if (c == 0x4a) {		/* 'J' */
293 			filter->status = 0x10;
294 		} else if (c == 0x49) {		/* 'I' */
295 			filter->status = 0x20;
296 		} else {
297 			filter->status &= ~0xf;
298 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
299 			CK((*filter->output_function)(0x28, filter->data));
300 			goto retry;
301 		}
302 		break;
303 
304 	default:
305 		filter->status = 0;
306 		break;
307 	}
308 
309 	return 0;
310 }
311 
mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter * filter)312 static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter)
313 {
314 	if ((filter->status & 0xF) == 1) {
315 		/* 2-byte (JIS X 0208 or 0212) character was truncated */
316 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
317 	}
318 	filter->status = 0;
319 
320 	if (filter->flush_function) {
321 		(*filter->flush_function)(filter->data);
322 	}
323 
324 	return 0;
325 }
326 
mbfl_filt_conv_wchar_cp50220(int c,mbfl_convert_filter * filter)327 static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter)
328 {
329 	int mode = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE, second = 0;
330 	bool consumed = false;
331 
332 	if (filter->cache) {
333 		int s = mbfl_convert_kana(filter->cache, c, &consumed, &second, mode);
334 		filter->cache = consumed ? 0 : c;
335 		mbfl_filt_conv_wchar_cp50221(s, filter);
336 		if (second) {
337 			mbfl_filt_conv_wchar_cp50221(second, filter);
338 		}
339 	} else if (c == 0) {
340 		/* This case has to be handled separately, since `filter->cache == 0` means
341 		 * no codepoint is cached */
342 		(*filter->output_function)(0, filter->data);
343 	} else {
344 		filter->cache = c;
345 	}
346 
347 	return 0;
348 }
349 
mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter * filter)350 static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter)
351 {
352 	int mode = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE, second = 0;
353 
354 	if (filter->cache) {
355 		int s = mbfl_convert_kana(filter->cache, 0, NULL, &second, mode);
356 		mbfl_filt_conv_wchar_cp50221(s, filter);
357 		if (second) {
358 			mbfl_filt_conv_wchar_cp50221(s, filter);
359 		}
360 		filter->cache = 0;
361 	}
362 
363 	return mbfl_filt_conv_any_jis_flush(filter);
364 }
365 
mbfl_filt_conv_wchar_cp50221(int c,mbfl_convert_filter * filter)366 int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
367 {
368 	int s = 0;
369 
370 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
371 		s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
372 	} else if (c == 0x203E) { /* OVERLINE */
373 		s = 0x1007E; /* Convert to JISX 0201 OVERLINE */
374 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
375 		s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
376 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
377 		s = ucs_i_jis_table[c - ucs_i_jis_table_min];
378 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
379 		s = ucs_r_jis_table[c - ucs_r_jis_table_min];
380 	} else if (c >= 0xE000 && c <= 0xE757) {
381 		/* 'private'/'user' codepoints */
382 		s = c - 0xE000;
383 		s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21);
384 	}
385 
386 	if (s <= 0) {
387 		if (c == 0xa5) {			/* YEN SIGN */
388 			s = 0x1005c;
389 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
390 			s = 0x2140;
391 		} else if (c == 0x2225) {	/* PARALLEL TO */
392 			s = 0x2142;
393 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
394 			s = 0x215d;
395 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
396 			s = 0x2171;
397 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
398 			s = 0x2172;
399 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
400 			s = 0x224c;
401 		}
402 	}
403 
404 	/* Above, we do a series of lookups in `ucs_*_jis_table` to find a
405 	 * corresponding kuten code for this Unicode codepoint
406 	 * If we get zero, that means the codepoint is not in JIS X 0208
407 	 * On the other hand, if we get a result with the high bits set on both
408 	 * upper and lower bytes, that is not a code in JIS X 0208 but rather
409 	 * in JIS X 0213
410 	 * In either case, check if this codepoint is one of the extensions added
411 	 * to JIS X 0208 by MicroSoft (to make CP932) */
412 	if (s == 0 || ((s & 0x8000) && (s & 0x80))) {
413 		int i;
414 		s = -1;
415 
416 		for (i = 0;
417 				i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
418 				i++) {
419 			const int oh = cp932ext1_ucs_table_min / 94;
420 
421 			if (c == cp932ext1_ucs_table[i]) {
422 				s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
423 				break;
424 			}
425 		}
426 
427 		if (s < 0) {
428 			const int oh = cp932ext2_ucs_table_min / 94;
429 			const int cp932ext2_ucs_table_size =
430 					cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
431 			for (i = 0; i < cp932ext2_ucs_table_size; i++) {
432 				if (c == cp932ext2_ucs_table[i]) {
433 					s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
434 					break;
435 				}
436 			}
437 		}
438 
439 		if (s < 0) {
440 			const int cp932ext3_ucs_table_size =
441 					cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
442 			const int limit = cp932ext3_ucs_table_size >
443 					cp932ext3_eucjp_table_size ?
444 						cp932ext3_eucjp_table_size:
445 						cp932ext3_ucs_table_size;
446 			for (i = 0; i < limit; i++) {
447 				if (c == cp932ext3_ucs_table[i]) {
448 					s = cp932ext3_eucjp_table[i];
449 					break;
450 				}
451 			}
452 		}
453 
454 		if (c == 0) {
455 			s = 0;
456 		} else if (s <= 0) {
457 			s = -1;
458 		}
459 	}
460 
461 	if (s >= 0) {
462 		if (s < 0x80) { /* ASCII */
463 			if ((filter->status & 0xff00) != 0) {
464 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
465 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
466 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
467 				filter->status = 0;
468 			}
469 			CK((*filter->output_function)(s, filter->data));
470 		} else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */
471 			if ((filter->status & 0xff00) != 0x500) {
472 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
473 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
474 				CK((*filter->output_function)(0x49, filter->data));		/* 'I' */
475 				filter->status = 0x500;
476 			}
477 			CK((*filter->output_function)(s - 0x80, filter->data));
478 		} else if (s <= 0x927E) { /* X 0208 + extensions */
479 			if ((filter->status & 0xff00) != 0x200) {
480 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
481 				CK((*filter->output_function)(0x24, filter->data));		/* '$' */
482 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
483 				filter->status = 0x200;
484 			}
485 			CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
486 			CK((*filter->output_function)(s & 0xff, filter->data));
487 		} else if (s < 0x10000) { /* X0212 */
488 			CK(mbfl_filt_conv_illegal_output(c, filter));
489 		} else { /* X 0201 latin */
490 			if ((filter->status & 0xff00) != 0x400) {
491 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
492 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
493 				CK((*filter->output_function)(0x4a, filter->data));		/* 'J' */
494 			}
495 			filter->status = 0x400;
496 			CK((*filter->output_function)(s & 0x7f, filter->data));
497 		}
498 	} else {
499 		CK(mbfl_filt_conv_illegal_output(c, filter));
500 	}
501 
502 	return 0;
503 }
504 
505 /*
506  * wchar => CP50222
507  */
mbfl_filt_conv_wchar_cp50222(int c,mbfl_convert_filter * filter)508 int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
509 {
510 	int s = 0;
511 
512 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
513 		s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
514 	} else if (c == 0x203E) { /* OVERLINE */
515 		s = 0x1007E; /* Convert to JISX 0201 OVERLINE */
516 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
517 		s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
518 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
519 		s = ucs_i_jis_table[c - ucs_i_jis_table_min];
520 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
521 		s = ucs_r_jis_table[c - ucs_r_jis_table_min];
522 	} else if (c >= 0xE000 && c <= 0xE757) {
523 		/* 'private'/'user' codepoints */
524 		s = c - 0xE000;
525 		s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21);
526 	}
527 
528 	if (s <= 0) {
529 		if (c == 0xa5) {			/* YEN SIGN */
530 			s = 0x1005c;
531 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
532 			s = 0x2140;
533 		} else if (c == 0x2225) {	/* PARALLEL TO */
534 			s = 0x2142;
535 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
536 			s = 0x215d;
537 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
538 			s = 0x2171;
539 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
540 			s = 0x2172;
541 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
542 			s = 0x224c;
543 		}
544 	}
545 	if (s == 0 || ((s & 0x8000) && (s & 0x80))) {
546 		int i;
547 		s = -1;
548 
549 		for (i = 0;
550 				i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
551 			const int oh = cp932ext1_ucs_table_min / 94;
552 
553 			if (c == cp932ext1_ucs_table[i]) {
554 				s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
555 				break;
556 			}
557 		}
558 
559 		if (s <= 0) {
560 			const int oh = cp932ext2_ucs_table_min / 94;
561 			const int cp932ext2_ucs_table_size =
562 					cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
563 			for (i = 0; i < cp932ext2_ucs_table_size; i++) {
564 				if (c == cp932ext2_ucs_table[i]) {
565 					s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
566 					break;
567 				}
568 			}
569 		}
570 
571 		if (s <= 0) {
572 			const int cp932ext3_ucs_table_size =
573 					cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
574 			const int limit = cp932ext3_ucs_table_size >
575 					cp932ext3_eucjp_table_size ?
576 						cp932ext3_eucjp_table_size:
577 						cp932ext3_ucs_table_size;
578 			for (i = 0; i < limit; i++) {
579 				if (c == cp932ext3_ucs_table[i]) {
580 					s = cp932ext3_eucjp_table[i];
581 					break;
582 				}
583 			}
584 		}
585 
586 		if (c == 0) {
587 			s = 0;
588 		} else if (s <= 0) {
589 			s = -1;
590 		}
591 	}
592 
593 	if (s >= 0) {
594 		if (s < 0x80) { /* ASCII */
595 			if ((filter->status & 0xff00) == 0x500) {
596 				CK((*filter->output_function)(0x0f, filter->data));		/* SO */
597 				filter->status = 0;
598 			} else if ((filter->status & 0xff00) != 0) {
599 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
600 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
601 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
602 				filter->status = 0;
603 			}
604 			CK((*filter->output_function)(s, filter->data));
605 		} else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */
606 			if ((filter->status & 0xff00) != 0x500) {
607 				CK((*filter->output_function)(0x0e, filter->data));		/* SI */
608 				filter->status = 0x500;
609 			}
610 			CK((*filter->output_function)(s - 0x80, filter->data));
611 		} else if (s <= 0x927E) { /* X 0208 */
612 			if ((filter->status & 0xff00) == 0x500) {
613 				CK((*filter->output_function)(0x0f, filter->data));		/* SO */
614 				filter->status = 0;
615 			}
616 			if ((filter->status & 0xff00) != 0x200) {
617 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
618 				CK((*filter->output_function)(0x24, filter->data));		/* '$' */
619 				CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
620 				filter->status = 0x200;
621 			}
622 			CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
623 			CK((*filter->output_function)(s & 0xff, filter->data));
624 		} else if (s < 0x10000) { /* X0212 */
625 			CK(mbfl_filt_conv_illegal_output(c, filter));
626 		} else { /* X 0201 latin */
627 			if ((filter->status & 0xff00) == 0x500) {
628 				CK((*filter->output_function)(0x0f, filter->data));		/* SO */
629 				filter->status = 0;
630 			}
631 			if ((filter->status & 0xff00) != 0x400) {
632 				CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
633 				CK((*filter->output_function)(0x28, filter->data));		/* '(' */
634 				CK((*filter->output_function)(0x4a, filter->data));		/* 'J' */
635 			}
636 			filter->status = 0x400;
637 			CK((*filter->output_function)(s & 0x7f, filter->data));
638 		}
639 	} else {
640 		CK(mbfl_filt_conv_illegal_output(c, filter));
641 	}
642 
643 	return 0;
644 }
645 
646 int
mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter * filter)647 mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter)
648 {
649 	/* back to latin */
650 	if ((filter->status & 0xff00) == 0x500) {
651 		CK((*filter->output_function)(0x0f, filter->data));		/* SO */
652 	} else if ((filter->status & 0xff00) != 0) {
653 		CK((*filter->output_function)(0x1b, filter->data));		/* ESC */
654 		CK((*filter->output_function)(0x28, filter->data));		/* '(' */
655 		CK((*filter->output_function)(0x42, filter->data));		/* 'B' */
656 	}
657 	filter->status = 0;
658 
659 	if (filter->flush_function) {
660 		(*filter->flush_function)(filter->data);
661 	}
662 
663 	return 0;
664 }
665