1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * the source code included in this files was separated from mbfilter_sjis_open.c
26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27 *
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include "mbfilter.h"
35 #include "mbfilter_sjis_mac.h"
36
37 #include "unicode_table_cp932_ext.h"
38 #include "unicode_table_jis.h"
39
40 #include "sjis_mac2uni.h"
41
42 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
43 extern const unsigned char mblen_table_sjis[];
44
45 static int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter);
46
47 static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL};
48
49 const mbfl_encoding mbfl_encoding_sjis_mac = {
50 mbfl_no_encoding_sjis_mac,
51 "SJIS-mac",
52 "Shift_JIS",
53 (const char *(*)[])&mbfl_encoding_sjis_mac_aliases,
54 mblen_table_sjis,
55 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
56 &vtbl_sjis_mac_wchar,
57 &vtbl_wchar_sjis_mac
58 };
59
60 const struct mbfl_identify_vtbl vtbl_identify_sjis_mac = {
61 mbfl_no_encoding_sjis_mac,
62 mbfl_filt_ident_common_ctor,
63 mbfl_filt_ident_common_dtor,
64 mbfl_filt_ident_sjis
65 };
66
67 const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = {
68 mbfl_no_encoding_sjis_mac,
69 mbfl_no_encoding_wchar,
70 mbfl_filt_conv_common_ctor,
71 mbfl_filt_conv_common_dtor,
72 mbfl_filt_conv_sjis_mac_wchar,
73 mbfl_filt_conv_common_flush
74 };
75
76 const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = {
77 mbfl_no_encoding_wchar,
78 mbfl_no_encoding_sjis_mac,
79 mbfl_filt_conv_common_ctor,
80 mbfl_filt_conv_common_dtor,
81 mbfl_filt_conv_wchar_sjis_mac,
82 mbfl_filt_conv_sjis_mac_flush
83 };
84
85 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
86
87 #define SJIS_ENCODE(c1,c2,s1,s2) \
88 do { \
89 s1 = c1; \
90 s1--; \
91 s1 >>= 1; \
92 if ((c1) < 0x5f) { \
93 s1 += 0x71; \
94 } else { \
95 s1 += 0xb1; \
96 } \
97 s2 = c2; \
98 if ((c1) & 1) { \
99 if ((c2) < 0x60) { \
100 s2--; \
101 } \
102 s2 += 0x20; \
103 } else { \
104 s2 += 0x7e; \
105 } \
106 } while (0)
107
108 #define SJIS_DECODE(c1,c2,s1,s2) \
109 do { \
110 s1 = c1; \
111 if (s1 < 0xa0) { \
112 s1 -= 0x81; \
113 } else { \
114 s1 -= 0xc1; \
115 } \
116 s1 <<= 1; \
117 s1 += 0x21; \
118 s2 = c2; \
119 if (s2 < 0x9f) { \
120 if (s2 < 0x7f) { \
121 s2++; \
122 } \
123 s2 -= 0x20; \
124 } else { \
125 s1++; \
126 s2 -= 0x7e; \
127 } \
128 } while (0)
129
130 /*
131 * SJIS-mac => wchar
132 */
133 int
mbfl_filt_conv_sjis_mac_wchar(int c,mbfl_convert_filter * filter)134 mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
135 {
136 int i, j, n;
137 int c1, s, s1, s2, w;
138
139 switch (filter->status) {
140 case 0:
141 if (c >= 0 && c < 0x80 && c != 0x5c) { /* latin */
142 CK((*filter->output_function)(c, filter->data));
143 } else if (c > 0xa0 && c < 0xe0) { /* kana */
144 CK((*filter->output_function)(0xfec0 + c, filter->data));
145 } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
146 filter->status = 1;
147 filter->cache = c;
148 } else if (c == 0x5c) {
149 CK((*filter->output_function)(0x00a5, filter->data));
150 } else if (c == 0x80) {
151 CK((*filter->output_function)(0x005c, filter->data));
152 } else if (c == 0xa0) {
153 CK((*filter->output_function)(0x00a0, filter->data));
154 } else if (c == 0xfd) {
155 CK((*filter->output_function)(0x00a9, filter->data));
156 } else if (c == 0xfe) {
157 CK((*filter->output_function)(0x2122, filter->data));
158 } else if (c == 0xff) {
159 CK((*filter->output_function)(0x2026, filter->data));
160 CK((*filter->output_function)(0xf87f, filter->data));
161 } else {
162 w = c & MBFL_WCSGROUP_MASK;
163 w |= MBFL_WCSGROUP_THROUGH;
164 CK((*filter->output_function)(w, filter->data));
165 }
166 break;
167
168 case 1: /* kanji second char */
169 filter->status = 0;
170 c1 = filter->cache;
171 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
172 w = 0;
173 SJIS_DECODE(c1, c, s1, s2);
174 s = (s1 - 0x21)*94 + s2 - 0x21;
175 if (s <= 0x89) {
176 if (s == 0x1c) {
177 w = 0x2014; /* EM DASH */
178 } else if (s == 0x1f) {
179 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
180 } else if (s == 0x20) {
181 w = 0x301c; /* FULLWIDTH TILDE */
182 } else if (s == 0x21) {
183 w = 0x2016; /* PARALLEL TO */
184 } else if (s == 0x3c) {
185 w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */
186 } else if (s == 0x50) {
187 w = 0x00a2; /* FULLWIDTH CENT SIGN */
188 } else if (s == 0x51) {
189 w = 0x00a3; /* FULLWIDTH POUND SIGN */
190 } else if (s == 0x89) {
191 w = 0x00ac; /* FULLWIDTH NOT SIGN */
192 }
193 }
194
195 /* apple gaiji area 0x8540 - 0x886d */
196 if (w == 0) {
197 for (i=0; i<7; i++) {
198 if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) {
199 w = s - code_tbl[i][0] + code_tbl[i][2];
200 break;
201 }
202 }
203 }
204
205 if (w == 0) {
206
207 for (i=0; i<code_tbl_m_len; i++) {
208 if (s == code_tbl_m[i][0]) {
209 if (code_tbl_m[i][1] == 0xf860) {
210 n = 4;
211 } else if (code_tbl_m[i][1] == 0xf861) {
212 n = 5;
213 } else {
214 n = 6;
215 }
216 for (j=1; j<n-1; j++) {
217 CK((*filter->output_function)(code_tbl_m[i][j], filter->data));
218 }
219 w = code_tbl_m[i][n-1];
220 break;
221 }
222 }
223 }
224
225 if (w == 0) {
226 for (i=0; i<8; i++) {
227 if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) {
228 w = code_map[i][s - code_ofst_tbl[i][0]];
229 s2 = 0;
230 if (s >= 0x043e && s <= 0x0441) {
231 s2 = 0xf87a;
232 } else if (s == 0x03b1 || s == 0x03b7) {
233 s2 = 0xf87f;
234 } else if (s == 0x04b8 || s == 0x04b9 || s == 0x04c4) {
235 s2 = 0x20dd;
236 } else if (s == 0x1ed9 || s == 0x1eda || s == 0x1ee8 || s == 0x1ef3 ||
237 (s >= 0x1ef5 && s <= 0x1efb) || s == 0x1f05 || s == 0x1f06 ||
238 s == 0x1f18 || (s >= 0x1ff2 && s <= 0x20a5)) {
239 s2 = 0xf87e;
240 }
241 if (s2 > 0) {
242 CK((*filter->output_function)(w, filter->data));
243 w = s2;
244 }
245 break;
246 }
247 }
248 }
249
250 if (w == 0 && s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
251 w = jisx0208_ucs_table[s];
252 }
253
254 if (w <= 0) {
255 w = (s1 << 8) | s2;
256 w &= MBFL_WCSPLANE_MASK;
257 w |= MBFL_WCSPLANE_WINCP932;
258 }
259 CK((*filter->output_function)(w, filter->data));
260 } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
261 CK((*filter->output_function)(c, filter->data));
262 } else {
263 w = (c1 << 8) | c;
264 w &= MBFL_WCSGROUP_MASK;
265 w |= MBFL_WCSGROUP_THROUGH;
266 CK((*filter->output_function)(w, filter->data));
267 }
268 break;
269
270 default:
271 filter->status = 0;
272 break;
273 }
274
275 return c;
276 }
277
278 /*
279 * wchar => SJIS-mac
280 */
281 int
mbfl_filt_conv_wchar_sjis_mac(int c,mbfl_convert_filter * filter)282 mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
283 {
284 int i;
285 int c1, c2, s1, s2, mode;
286
287 s1 = 0;
288 s2 = 0;
289
290 // a1: U+0000 -> U+046F
291 // a2: U+2000 -> U+30FF
292 // i: U+4E00 -> U+9FFF
293 // r: U+FF00 -> U+FFFF
294
295 switch (filter->status) {
296
297 case 1:
298 c1 = filter->cache;
299 filter->cache = 0;
300 filter->status = 0;
301
302 s1 = 0;
303 s2 = 0;
304
305 if (c == 0xf87a) {
306 for (i=0;i<4;i++) {
307 if (c1 == s_form_tbl[i+34+3+3]) {
308 s1 = s_form_sjis_tbl[i+34+3+3];
309 break;
310 }
311 }
312 if (s1 <= 0) {
313 s2 = c1;
314 }
315 } else if (c == 0x20dd) {
316 for (i=0;i<3;i++) {
317 if (c1 == s_form_tbl[i+34+3]) {
318 s1 = s_form_sjis_tbl[i+34+3];
319 break;
320 }
321 }
322 if (s1 <= 0) {
323 s2 = c1;
324 }
325 } else if (c == 0xf87f) {
326 for (i=0;i<3;i++) {
327 if (c1 == s_form_tbl[i+34]) {
328 s1 = s_form_sjis_tbl[i+34];
329 break;
330 }
331 }
332 if (s1 <= 0) {
333 s2 = c1; s1 = -1;
334 }
335 } else if (c == 0xf87e) {
336 for (i=0;i<34;i++) {
337 if (c1 == s_form_tbl[i]) {
338 s1 = s_form_sjis_tbl[i];
339 break;
340 }
341 }
342 if (s1 <= 0) {
343 s2 = c1; s1 = -1;
344 }
345 } else {
346 s2 = c1;
347 s1 = c;
348 }
349
350 if (s2 > 0) {
351 for (i=0;i<s_form_tbl_len;i++) {
352 if (c1 == s_form_tbl[i]) {
353 s1 = s_form_sjis_fallback_tbl[i];
354 break;
355 }
356 }
357 }
358
359 if (s1 >= 0) {
360 if (s1 < 0x100) {
361 CK((*filter->output_function)(s1, filter->data));
362 } else {
363 CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
364 CK((*filter->output_function)(s1 & 0xff, filter->data));
365 }
366 } else {
367 CK(mbfl_filt_conv_illegal_output(c, filter));
368 }
369
370 if (s2 <= 0 || s1 == -1) {
371 break;
372 }
373
374 case 0:
375
376 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
377 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
378 if (c == 0x5c) {
379 s1 = 0x80;
380 } else if (c == 0xa9) {
381 s1 = 0xfd;
382 }
383 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
384 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
385 if (c == 0x2122) {
386 s1 = 0xfe;
387 } else if (c == 0x2014) {
388 s1 = 0x213d;
389 } else if (c == 0x2116) {
390 s1 = 0x2c1d;
391 }
392 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
393 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
394 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
395 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
396 }
397
398 if (c >= 0x2000) {
399 for (i=0;i<s_form_tbl_len;i++) {
400 if (c == s_form_tbl[i]) {
401 filter->status = 1;
402 filter->cache = c;
403 return c;
404 }
405 }
406
407 if (c == 0xf860 || c == 0xf861 || c == 0xf862) {
408 filter->status = 2;
409 filter->cache = c;
410 return c;
411 }
412 }
413
414 if (s1 <= 0) {
415 c1 = c & ~MBFL_WCSPLANE_MASK;
416 if (c1 == MBFL_WCSPLANE_WINCP932) {
417 s1 = c & MBFL_WCSPLANE_MASK;
418 s2 = 1;
419 } else if (c1 == MBFL_WCSPLANE_JIS0208) {
420 s1 = c & MBFL_WCSPLANE_MASK;
421 } else if (c1 == MBFL_WCSPLANE_JIS0212) {
422 s1 = c & MBFL_WCSPLANE_MASK;
423 s1 |= 0x8080;
424 } else if (c == 0xa0) {
425 s1 = 0x00a0;
426 } else if (c == 0xa5) { /* YEN SIGN */
427 s1 = 0x216f; /* FULLWIDTH YEN SIGN */
428 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
429 s1 = 0x2140;
430 }
431 }
432
433 if (s1 <= 0) {
434 for (i=0; i<wchar2sjis_mac_r_tbl_len; i++) {
435 if (c >= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) {
436 s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2];
437 break;
438 }
439 }
440
441 if (s1 <= 0) {
442 for (i=0; i<wchar2sjis_mac_r_map_len; i++) {
443 if (c >= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) {
444 s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]];
445 break;
446 }
447 }
448 }
449
450 if (s1 <= 0) {
451 for (i=0; i<wchar2sjis_mac_wchar_tbl_len ; i++) {
452 if ( c == wchar2sjis_mac_wchar_tbl[i][0]) {
453 s1 = wchar2sjis_mac_wchar_tbl[i][1] & 0xffff;
454 break;
455 }
456 }
457 }
458
459 if (s1 > 0) {
460 c1 = s1/94+0x21;
461 c2 = s1-94*(c1-0x21)+0x21;
462 s1 = (c1 << 8) | c2;
463 s2 = 1;
464 }
465 }
466
467 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
468 s1 = -1;
469 c1 = 0;
470
471 if (c == 0) {
472 s1 = 0;
473 } else if (s1 <= 0) {
474 s1 = -1;
475 }
476 }
477
478 if (s1 >= 0) {
479 if (s1 < 0x100) { /* latin or kana */
480 CK((*filter->output_function)(s1, filter->data));
481 } else { /* kanji */
482 c1 = (s1 >> 8) & 0xff;
483 c2 = s1 & 0xff;
484 SJIS_ENCODE(c1, c2, s1, s2);
485 CK((*filter->output_function)(s1, filter->data));
486 CK((*filter->output_function)(s2, filter->data));
487 }
488 } else {
489 CK(mbfl_filt_conv_illegal_output(c, filter));
490 }
491 break;
492
493
494 case 2:
495 c1 = filter->cache;
496 filter->cache = 0;
497 filter->status = 0;
498 if (c1 == 0xf860) {
499 for (i=0; i<5; i++) {
500 if (c == code_tbl_m[i][2]) {
501 filter->cache = c | 0x10000;
502 filter->status = 3;
503 break;
504 }
505 }
506 } else if (c1 == 0xf861) {
507 for (i=0; i<3; i++) {
508 if (c == code_tbl_m[i+5][2]) {
509 filter->cache = c | 0x20000;
510 filter->status = 3;
511 break;
512 }
513 }
514 } else if (c1 == 0xf862) {
515 for (i=0; i<4; i++) {
516 if (c == code_tbl_m[i+5+3][2]) {
517 filter->cache = c | 0x40000;
518 filter->status = 3;
519 break;
520 }
521 }
522 }
523
524 if (filter->status == 0) {
525 CK(mbfl_filt_conv_illegal_output(c1, filter));
526 CK(mbfl_filt_conv_illegal_output(c, filter));
527 }
528
529 break;
530
531 case 3:
532 s1 = 0;
533 c1 = filter->cache & 0xffff;
534 mode = (filter->cache & 0xf0000) >> 16;
535
536 filter->cache = 0;
537 filter->status = 0;
538
539 if (mode == 0x1) {
540 for (i=0; i<5; i++) {
541 if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) {
542 s1 = code_tbl_m[i][0];
543 break;
544 }
545 }
546
547 if (s1 > 0) {
548 c1 = s1/94+0x21;
549 c2 = s1-94*(c1-0x21)+0x21;
550 SJIS_ENCODE(c1, c2, s1, s2);
551 CK((*filter->output_function)(s1, filter->data));
552 CK((*filter->output_function)(s2, filter->data));
553 }
554
555 if (s1 <= 0) {
556 CK(mbfl_filt_conv_illegal_output(0xf860, filter));
557 CK(mbfl_filt_conv_illegal_output(c1, filter));
558 CK(mbfl_filt_conv_illegal_output(c, filter));
559 }
560
561 } else if (mode == 0x2) {
562 for (i=0; i<3; i++) {
563 if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) {
564 filter->cache = c | 0x20000;
565 filter->status = 4;
566 break;
567 }
568 }
569 } else if (mode == 0x4) {
570 for (i=0; i<4; i++) {
571 if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) {
572 filter->cache = c | 0x40000;
573 filter->status = 4;
574 break;
575 }
576 }
577 }
578 break;
579
580 case 4:
581 s1 = 0;
582 c1 = filter->cache & 0xffff;
583 mode = (filter->cache & 0xf0000) >> 16;
584
585 filter->cache = 0;
586 filter->status = 0;
587
588 if (mode == 0x2) {
589 for (i=0; i<3; i++) {
590 if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) {
591 s1 = code_tbl_m[i+5][0];
592 break;
593 }
594 }
595
596 if (s1 > 0) {
597 c1 = s1/94+0x21;
598 c2 = s1-94*(c1-0x21)+0x21;
599 SJIS_ENCODE(c1, c2, s1, s2);
600 CK((*filter->output_function)(s1, filter->data));
601 CK((*filter->output_function)(s2, filter->data));
602 }
603
604 if (s1 <= 0) {
605 CK(mbfl_filt_conv_illegal_output(0xf861, filter));
606 for (i=0; i<3; i++) {
607 if (c1 == code_tbl_m[i+5][3]) {
608 CK(mbfl_filt_conv_illegal_output(code_tbl_m[i+5][2], filter));
609 break;
610 }
611 }
612 CK(mbfl_filt_conv_illegal_output(c1, filter));
613 CK(mbfl_filt_conv_illegal_output(c, filter));
614 }
615 } else if (mode == 0x4) {
616 for (i=0; i<4; i++) {
617 if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) {
618 filter->cache = c | 0x40000;
619 filter->status = 5;
620 break;
621 }
622 }
623 }
624 break;
625
626 case 5:
627 s1 = 0;
628 c1 = filter->cache & 0xffff;
629 mode = (filter->cache & 0xf0000) >> 16;
630
631 filter->cache = 0;
632 filter->status = 0;
633
634 if (mode == 0x4) {
635 for (i=0; i<4; i++) {
636 if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) {
637 s1 = code_tbl_m[i+8][0];
638 break;
639 }
640 }
641
642 if (s1 > 0) {
643 c1 = s1/94+0x21;
644 c2 = s1-94*(c1-0x21)+0x21;
645 SJIS_ENCODE(c1, c2, s1, s2);
646 CK((*filter->output_function)(s1, filter->data));
647 CK((*filter->output_function)(s2, filter->data));
648 }
649
650 if (s1 <= 0) {
651 CK(mbfl_filt_conv_illegal_output(0xf862, filter));
652 for (i=0; i<4; i++) {
653 if (c1 == code_tbl_m[i+8][4]) {
654 CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][2], filter));
655 CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][3], filter));
656 break;
657 }
658 }
659 CK(mbfl_filt_conv_illegal_output(c1, filter));
660 CK(mbfl_filt_conv_illegal_output(c, filter));
661 }
662 }
663 break;
664
665 default:
666 filter->status = 0;
667 break;
668 }
669 return c;
670 }
671
672 static int
mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter * filter)673 mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter)
674 {
675 int i, c1, s1 = 0;
676 if (filter->status == 1 && filter->cache > 0) {
677 c1 = filter->cache;
678 for (i=0;i<s_form_tbl_len;i++) {
679 if (c1 == s_form_tbl[i]) {
680 s1 = s_form_sjis_fallback_tbl[i];
681 break;
682 }
683 }
684 if (s1 > 0) {
685 CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
686 CK((*filter->output_function)(s1 & 0xff, filter->data));
687 }
688 }
689 filter->cache = 0;
690 filter->status = 0;
691
692 if (filter->flush_function != NULL) {
693 return (*filter->flush_function)(filter->data);
694 }
695
696 return 0;
697 }
698