1
2 /*
3 * "streamable kanji code filter and converter"
4 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
5 *
6 * LICENSE NOTICES
7 *
8 * This file is part of "streamable kanji code filter and converter",
9 * which is distributed under the terms of GNU Lesser General Public
10 * License (version 2) as published by the Free Software Foundation.
11 *
12 * This software is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with "streamable kanji code filter and converter";
19 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
20 * Suite 330, Boston, MA 02111-1307 USA
21 *
22 * The author of this file:
23 *
24 */
25 /*
26 * the source code included in this files was separated from mbfilter_sjis_open.c
27 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
28 *
29 */
30
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include "mbfilter.h"
36 #include "mbfilter_sjis_mobile.h"
37
38 #include "unicode_table_cp932_ext.h"
39 #include "unicode_table_jis.h"
40
41 #include "emoji2uni.h"
42
43 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
44 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
45 extern const unsigned char mblen_table_sjis[];
46
47 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
48 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
49 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
50
51 const mbfl_encoding mbfl_encoding_sjis_docomo = {
52 mbfl_no_encoding_sjis_docomo,
53 "SJIS-Mobile#DOCOMO",
54 "Shift_JIS",
55 (const char *(*)[])&mbfl_encoding_sjis_docomo_aliases,
56 mblen_table_sjis,
57 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
58 };
59
60 const mbfl_encoding mbfl_encoding_sjis_kddi = {
61 mbfl_no_encoding_sjis_kddi,
62 "SJIS-Mobile#KDDI",
63 "Shift_JIS",
64 (const char *(*)[])&mbfl_encoding_sjis_kddi_aliases,
65 mblen_table_sjis,
66 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
67 };
68
69 const mbfl_encoding mbfl_encoding_sjis_sb = {
70 mbfl_no_encoding_sjis_sb,
71 "SJIS-Mobile#SOFTBANK",
72 "Shift_JIS",
73 (const char *(*)[])&mbfl_encoding_sjis_sb_aliases,
74 mblen_table_sjis,
75 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
76 };
77
78 const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
79 mbfl_no_encoding_sjis_docomo,
80 mbfl_filt_ident_common_ctor,
81 mbfl_filt_ident_common_dtor,
82 mbfl_filt_ident_sjis
83 };
84
85 const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
86 mbfl_no_encoding_sjis_kddi,
87 mbfl_filt_ident_common_ctor,
88 mbfl_filt_ident_common_dtor,
89 mbfl_filt_ident_sjis
90 };
91
92 const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
93 mbfl_no_encoding_sjis_sb,
94 mbfl_filt_ident_common_ctor,
95 mbfl_filt_ident_common_dtor,
96 mbfl_filt_ident_sjis
97 };
98
99 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
100 mbfl_no_encoding_sjis_docomo,
101 mbfl_no_encoding_wchar,
102 mbfl_filt_conv_common_ctor,
103 mbfl_filt_conv_common_dtor,
104 mbfl_filt_conv_sjis_mobile_wchar,
105 mbfl_filt_conv_common_flush
106 };
107
108 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
109 mbfl_no_encoding_wchar,
110 mbfl_no_encoding_sjis_docomo,
111 mbfl_filt_conv_common_ctor,
112 mbfl_filt_conv_common_dtor,
113 mbfl_filt_conv_wchar_sjis_mobile,
114 mbfl_filt_conv_sjis_mobile_flush
115 };
116
117 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
118 mbfl_no_encoding_sjis_kddi,
119 mbfl_no_encoding_wchar,
120 mbfl_filt_conv_common_ctor,
121 mbfl_filt_conv_common_dtor,
122 mbfl_filt_conv_sjis_mobile_wchar,
123 mbfl_filt_conv_common_flush
124 };
125
126 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
127 mbfl_no_encoding_wchar,
128 mbfl_no_encoding_sjis_kddi,
129 mbfl_filt_conv_common_ctor,
130 mbfl_filt_conv_common_dtor,
131 mbfl_filt_conv_wchar_sjis_mobile,
132 mbfl_filt_conv_sjis_mobile_flush
133 };
134
135 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
136 mbfl_no_encoding_sjis_sb,
137 mbfl_no_encoding_wchar,
138 mbfl_filt_conv_common_ctor,
139 mbfl_filt_conv_common_dtor,
140 mbfl_filt_conv_sjis_mobile_wchar,
141 mbfl_filt_conv_common_flush
142 };
143
144 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
145 mbfl_no_encoding_wchar,
146 mbfl_no_encoding_sjis_sb,
147 mbfl_filt_conv_common_ctor,
148 mbfl_filt_conv_common_dtor,
149 mbfl_filt_conv_wchar_sjis_mobile,
150 mbfl_filt_conv_sjis_mobile_flush
151 };
152
153 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
154 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
155 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
156
157 const unsigned short mbfl_docomo2uni_pua[4][3] = {
158 {0x28c2, 0x292f, 0xe63e},
159 {0x2930, 0x2934, 0xe6ac},
160 {0x2935, 0x2951, 0xe6b1},
161 {0x2952, 0x29db, 0xe6ce},
162 };
163
164 const unsigned short mbfl_kddi2uni_pua[7][3] = {
165 {0x26ec, 0x2838, 0xe468},
166 {0x284c, 0x2863, 0xe5b5},
167 {0x24b8, 0x24ca, 0xe5cd},
168 {0x24cb, 0x2545, 0xea80},
169 {0x2839, 0x284b, 0xeafb},
170 {0x2546, 0x25c0, 0xeb0e},
171 {0x25c1, 0x25c6, 0xeb89},
172 };
173
174 const unsigned short mbfl_sb2uni_pua[6][3] = {
175 {0x27a9, 0x2802, 0xe101},
176 {0x2808, 0x2861, 0xe201},
177 {0x2921, 0x297a, 0xe001},
178 {0x2980, 0x29cc, 0xe301},
179 {0x2a99, 0x2ae4, 0xe401},
180 {0x2af8, 0x2b35, 0xe501},
181 };
182
183 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
184 {0x24b8, 0x24f6, 0xec40},
185 {0x24f7, 0x2573, 0xec80},
186 {0x2574, 0x25b2, 0xed40},
187 {0x25b3, 0x25c6, 0xed80},
188 {0x26ec, 0x272a, 0xef40},
189 {0x272b, 0x27a7, 0xef80},
190 {0x27a8, 0x27e6, 0xf040},
191 {0x27e7, 0x2863, 0xf080},
192 };
193
194 #define NFLAGS(c) (0x1F1A5+(int)(c))
195
196 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
197
198 #define SJIS_ENCODE(c1,c2,s1,s2) \
199 do { \
200 s1 = c1; \
201 s1--; \
202 s1 >>= 1; \
203 if ((c1) < 0x5f) { \
204 s1 += 0x71; \
205 } else { \
206 s1 += 0xb1; \
207 } \
208 s2 = c2; \
209 if ((c1) & 1) { \
210 if ((c2) < 0x60) { \
211 s2--; \
212 } \
213 s2 += 0x20; \
214 } else { \
215 s2 += 0x7e; \
216 } \
217 } while (0)
218
219 #define SJIS_DECODE(c1,c2,s1,s2) \
220 do { \
221 s1 = c1; \
222 if (s1 < 0xa0) { \
223 s1 -= 0x81; \
224 } else { \
225 s1 -= 0xc1; \
226 } \
227 s1 <<= 1; \
228 s1 += 0x21; \
229 s2 = c2; \
230 if (s2 < 0x9f) { \
231 if (s2 < 0x7f) { \
232 s2++; \
233 } \
234 s2 -= 0x20; \
235 } else { \
236 s1++; \
237 s2 -= 0x7e; \
238 } \
239 } while (0)
240
241 #define CODE2JIS(c1,c2,s1,s2) \
242 c1 = (s1)/94+0x21; \
243 c2 = (s1)-94*((c1)-0x21)+0x21; \
244 s1 = ((c1) << 8) | (c2); \
245 s2 = 1
246
247 int
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)248 mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
249 {
250 int i, match = 0;
251
252 for (i = 0; i < n; i++) {
253 if (map[i][0] <= c && c <= map[i][1]) {
254 *w = c - map[i][0] + map[i][2];
255 match = 1;
256 break;
257 }
258 }
259 return match;
260 }
261
262 int
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)263 mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
264 {
265 int i, match = 0;
266
267 for (i = 0; i < n; i++) {
268 if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
269 *w = c + map[i][0] - map[i][2];
270 match = 1;
271 break;
272 }
273 }
274 return match;
275 }
276
277 int
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)278 mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
279 {
280 int w = s;
281 if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
282 if (s >= mb_tbl_code2uni_docomo1_min + 0x00a2 &&
283 s <= mb_tbl_code2uni_docomo1_min + 0x00ad &&
284 s != mb_tbl_code2uni_docomo1_min + 0x00a3) {
285 w = 0x20E3;
286 *snd = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
287 if (*snd > 0xf000) {
288 *snd += 0x10000;
289 }
290 } else {
291 w = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
292 if (w > 0xf000) {
293 w += 0x10000;
294 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
295 w += 0xf0000;
296 }
297 *snd = 0;
298 if (!w) {
299 w = s;
300 }
301 }
302 }
303
304 return w;
305 }
306
307 int
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)308 mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
309 {
310 int w = s, si, c;
311 const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
312
313 *snd = 0;
314 if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
315 si = s - mb_tbl_code2uni_kddi1_min;
316 if (si == 0x0008) { /* ES */
317 *snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
318 } else if (si == 0x0009) { /* RU */
319 *snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
320 } else if (si >= 0x008d && si <= 0x0092) {
321 c = nflags_order_kddi[si-0x008d];
322 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
323 } else if (si == 0x0104) {
324 *snd = 0x0023; w = 0x20E3;
325 } else {
326 w = mb_tbl_code2uni_kddi1[si];
327 if (w > 0xf000) {
328 w += 0x10000;
329 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
330 w += 0xf0000;
331 }
332 }
333 } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
334 si = s - mb_tbl_code2uni_kddi2_min;
335 if (si == 100) { /* JP */
336 *snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
337 } else if (si >= 0x00ba && si <= 0x00c2) {
338 *snd = si-0x00ba+0x0031; w = 0x20E3;
339 } else if (si == 0x010b) { /* US */
340 *snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
341 } else if (si == 0x0144) {
342 *snd = 0x0030; w = 0x20E3;
343 } else {
344 w = mb_tbl_code2uni_kddi2[si];
345 if (w > 0xf000) {
346 w += 0x10000;
347 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
348 w += 0xf0000;
349 }
350 }
351 }
352 return w;
353 }
354
355 int
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)356 mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
357 {
358 int w = s, si, c;
359 const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
360
361 *snd = 0;
362 if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
363 si = s - mb_tbl_code2uni_sb1_min;
364 if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
365 *snd = mb_tbl_code2uni_sb1[si];
366 if (*snd > 0xf000) {
367 *snd += 0x10000;
368 }
369 w = 0x20E3;
370 } else {
371 w = mb_tbl_code2uni_sb1[si];
372 if (w > 0xf000) {
373 w += 0x10000;
374 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
375 w += 0xf0000;
376 }
377 }
378 } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
379 si = s - mb_tbl_code2uni_sb2_min;
380 w = mb_tbl_code2uni_sb2[si];
381 if (w > 0xf000) {
382 w += 0x10000;
383 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
384 w += 0xf0000;
385 }
386 } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
387 si = s - mb_tbl_code2uni_sb3_min;
388 if (si >= 0x0069 && si <= 0x0072) {
389 c = nflags_order_sb[si-0x0069];
390 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
391 } else {
392 w = mb_tbl_code2uni_sb3[si];
393 if (w > 0xf000) {
394 w += 0x10000;
395 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
396 w += 0xf0000;
397 }
398 }
399 }
400 return w;
401 }
402
403 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)404 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
405 {
406 int i, match = 0, c1s;
407
408 if (filter->status == 1) {
409 c1s = filter->cache;
410 filter->cache = 0;
411 filter->status = 0;
412 if (c == 0x20E3) {
413 if (c1s == 0x0023) {
414 *s1 = 0x2964;
415 match = 1;
416 } else if (c1s == 0x0030) {
417 *s1 = 0x296f;
418 match = 1;
419 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
420 *s1 = 0x2966 + (c1s - 0x0031);
421 match = 1;
422 }
423 } else {
424 CK((*filter->output_function)(c1s, filter->data));
425 }
426 } else {
427 if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
428 filter->status = 1;
429 filter->cache = c;
430 *s1 = -1;
431 return match;
432 }
433
434 if (c == 0x00A9) {
435 *s1 = 0x29b5; match = 1;
436 } else if (c == 0x00AE) {
437 *s1 = 0x29ba; match = 1;
438 } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
439 i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
440 if (i >= 0) {
441 *s1 = mb_tbl_uni_docomo2code2_value[i];
442 match = 1;
443 }
444 } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
445 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
446 if (i >= 0) {
447 *s1 = mb_tbl_uni_docomo2code3_value[i];
448 match = 1;
449 }
450 } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
451 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
452 if (i >= 0) {
453 *s1 = mb_tbl_uni_docomo2code5_val[i];
454 match = 1;
455 }
456 }
457 }
458
459 return match;
460 }
461
462 int
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)463 mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
464 {
465 int i, match = 0, c1s;
466
467 if (filter->status == 1) {
468 c1s = filter->cache;
469 filter->cache = 0;
470 filter->status = 0;
471 if (c == 0x20E3) {
472 if (c1s == 0x0023) {
473 *s1 = 0x25bc;
474 match = 1;
475 } else if (c1s == 0x0030) {
476 *s1 = 0x2830;
477 match = 1;
478 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
479 *s1 = 0x27a6 + (c1s - 0x0031);
480 match = 1;
481 }
482 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
483 for (i=0; i<10; i++) {
484 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
485 *s1 = nflags_code_kddi[i];
486 match = 1;
487 break;
488 }
489 }
490 } else {
491 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
492 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
493 CK((*filter->output_function)(c1s, filter->data));
494 }
495 }
496 } else {
497 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
498 (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
499 filter->status = 1;
500 filter->cache = c;
501 *s1 = -1;
502 return match;
503 }
504
505 if (c == 0x00A9) {
506 *s1 = 0x27dc; match = 1;
507 } else if (c == 0x00AE) {
508 *s1 = 0x27dd; match = 1;
509 } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
510 i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
511 if (i >= 0) {
512 *s1 = mb_tbl_uni_kddi2code2_value[i];
513 match = 1;
514 }
515 } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
516 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
517 if (i >= 0) {
518 *s1 = mb_tbl_uni_kddi2code3_value[i];
519 match = 1;
520 }
521 } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
522 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
523 if (i >= 0) {
524 *s1 = mb_tbl_uni_kddi2code5_val[i];
525 match = 1;
526 }
527 }
528 }
529
530 return match;
531 }
532
533 int
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)534 mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
535 {
536 int i, match = 0, c1s;
537
538 if (filter->status == 1) {
539 filter->status = 0;
540 c1s = filter->cache;
541 filter->cache = 0;
542 if (c == 0x20E3) {
543 if (c1s == 0x0023) {
544 *s1 = 0x2817;
545 match = 1;
546 } else if (c1s == 0x0030) {
547 *s1 = 0x282c;
548 match = 1;
549 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
550 *s1 = 0x2823 + (c1s - 0x0031);
551 match = 1;
552 }
553 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
554 for (i=0; i<10; i++) {
555 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
556 *s1 = nflags_code_sb[i];
557 match = 1;
558 break;
559 }
560 }
561 } else {
562 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
563 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
564 CK((*filter->output_function)(c1s, filter->data));
565 }
566 }
567 } else {
568 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
569 filter->status = 1;
570 filter->cache = c;
571 *s1 = -1;
572 return match;
573 }
574
575 if (c == 0x00A9) {
576 *s1 = 0x2855; match = 1;
577 } else if (c == 0x00AE) {
578 *s1 = 0x2856; match = 1;
579 } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
580 i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
581 if (i >= 0) {
582 *s1 = mb_tbl_uni_sb2code2_value[i];
583 match = 1;
584 }
585 } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
586 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
587 if (i >= 0) {
588 *s1 = mb_tbl_uni_sb2code3_value[i];
589 match = 1;
590 }
591 } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
592 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
593 if (i >= 0) {
594 *s1 = mb_tbl_uni_sb2code5_val[i];
595 match = 1;
596 }
597 }
598 }
599 return match;
600 }
601
602 /*
603 * SJIS-win => wchar
604 */
605 int
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)606 mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
607 {
608 int c1, s, s1 = 0, s2 = 0, w;
609 int snd = 0;
610
611 retry:
612 switch (filter->status) {
613 case 0:
614 if (c >= 0 && c < 0x80) { /* latin */
615 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb && c == 0x1b) {
616 filter->cache = c;
617 filter->status = 2;
618 } else {
619 CK((*filter->output_function)(c, filter->data));
620 }
621 } else if (c > 0xa0 && c < 0xe0) { /* kana */
622 CK((*filter->output_function)(0xfec0 + c, filter->data));
623 } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
624 filter->status = 1;
625 filter->cache = c;
626 } else {
627 w = c & MBFL_WCSGROUP_MASK;
628 w |= MBFL_WCSGROUP_THROUGH;
629 CK((*filter->output_function)(w, filter->data));
630 }
631 break;
632
633 case 1: /* kanji second char */
634 filter->status = 0;
635 c1 = filter->cache;
636 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
637 w = 0;
638 SJIS_DECODE(c1, c, s1, s2);
639 s = (s1 - 0x21)*94 + s2 - 0x21;
640 if (s <= 137) {
641 if (s == 31) {
642 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
643 } else if (s == 32) {
644 w = 0xff5e; /* FULLWIDTH TILDE */
645 } else if (s == 33) {
646 w = 0x2225; /* PARALLEL TO */
647 } else if (s == 60) {
648 w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
649 } else if (s == 80) {
650 w = 0xffe0; /* FULLWIDTH CENT SIGN */
651 } else if (s == 81) {
652 w = 0xffe1; /* FULLWIDTH POUND SIGN */
653 } else if (s == 137) {
654 w = 0xffe2; /* FULLWIDTH NOT SIGN */
655 }
656 }
657 if (w == 0) {
658 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
659 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
660 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
661 w = jisx0208_ucs_table[s];
662 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
663 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
664 } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
665 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
666 } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
667 w = s - (94*94) + 0xe000;
668 }
669
670 if (s >= (94*94) && s < 119*94) {
671 if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
672 w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
673 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
674 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
675 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
676 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
677 }
678
679 if (w > 0 && snd > 0) {
680 CK((*filter->output_function)(snd, filter->data));
681 }
682 }
683 }
684 if (w <= 0) {
685 w = (s1 << 8) | s2;
686 w &= MBFL_WCSPLANE_MASK;
687 w |= MBFL_WCSPLANE_WINCP932;
688 }
689 CK((*filter->output_function)(w, filter->data));
690 } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
691 CK((*filter->output_function)(c, filter->data));
692 } else {
693 w = (c1 << 8) | c;
694 w &= MBFL_WCSGROUP_MASK;
695 w |= MBFL_WCSGROUP_THROUGH;
696 CK((*filter->output_function)(w, filter->data));
697 }
698 break;
699 /* ESC : Softbank Emoji */
700 case 2:
701 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
702 c == 0x24) {
703 filter->cache = c;
704 filter->status++;
705 } else {
706 filter->cache = 0;
707 filter->status = 0;
708 CK((*filter->output_function)(0x1b, filter->data));
709 goto retry;
710 }
711 break;
712
713 /* ESC $ : Softbank Emoji */
714 case 3:
715 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
716 ((c >= 0x45 && c <= 0x47) || (c >= 0x4f && c <= 0x51))) {
717 filter->cache = c;
718 filter->status++;
719 } else {
720 filter->cache = 0;
721 filter->status = 0;
722 CK((*filter->output_function)(0x1b, filter->data));
723 CK((*filter->output_function)(0x24, filter->data));
724 goto retry;
725 }
726 break;
727
728 /* ESC [GEFOPQ] : Softbank Emoji */
729 case 4:
730 w = 0;
731 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
732 c1 = filter->cache;
733
734 if (c == 0x0f) {
735 w = c;
736 filter->cache = 0;
737 filter->status = 0;
738 } else {
739 if (c1 == 0x47 && c >= 0x21 && c <= 0x7a) {
740 s1 = 0x91; s2 = c;
741 } else if (c1 == 0x45 && c >= 0x21 && c <= 0x7a) {
742 s1 = 0x8d; s2 = c;
743 } else if (c1 == 0x46 && c >= 0x21 && c <= 0x7a) {
744 s1 = 0x8e; s2 = c;
745 } else if (c1 == 0x4f && c >= 0x21 && c <= 0x6d) {
746 s1 = 0x92; s2 = c;
747 } else if (c1 == 0x50 && c >= 0x21 && c <= 0x6c) {
748 s1 = 0x95; s2 = c;
749 } else if (c1 == 0x51 && c >= 0x21 && c <= 0x5e) {
750 s1 = 0x96; s2 = c;
751 }
752 s = (s1 - 0x21)*94 + s2 - 0x21;
753 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
754 if (w > 0) {
755 if (snd > 0) {
756 CK((*filter->output_function)(snd, filter->data));
757 }
758 CK((*filter->output_function)(w, filter->data));
759 }
760 }
761 }
762
763 if (w <= 0) {
764 c1 = filter->cache;
765 filter->cache = 0;
766 filter->status = 0;
767 CK((*filter->output_function)(0x1b, filter->data));
768 CK((*filter->output_function)(0x24, filter->data));
769 CK((*filter->output_function)(c1 & 0xff, filter->data));
770 goto retry;
771 }
772 break;
773
774 default:
775 filter->status = 0;
776 break;
777 }
778
779 return c;
780 }
781
782 /*
783 * wchar => SJIS-win
784 */
785 int
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)786 mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
787 {
788 int c1, c2, s1, s2;
789
790 s1 = 0;
791 s2 = 0;
792 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
793 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
794 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
795 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
796 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
797 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
798 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
799 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
800 } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
801 s1 = c - 0xe000;
802 c1 = s1/94 + 0x7f;
803 c2 = s1%94 + 0x21;
804 s1 = (c1 << 8) | c2;
805 s2 = 1;
806 }
807 if (s1 <= 0) {
808 c1 = c & ~MBFL_WCSPLANE_MASK;
809 if (c1 == MBFL_WCSPLANE_WINCP932) {
810 s1 = c & MBFL_WCSPLANE_MASK;
811 s2 = 1;
812 } else if (c1 == MBFL_WCSPLANE_JIS0208) {
813 s1 = c & MBFL_WCSPLANE_MASK;
814 } else if (c1 == MBFL_WCSPLANE_JIS0212) {
815 s1 = c & MBFL_WCSPLANE_MASK;
816 s1 |= 0x8080;
817 } else if (c == 0xa5) { /* YEN SIGN */
818 s1 = 0x216f; /* FULLWIDTH YEN SIGN */
819 } else if (c == 0x203e) { /* OVER LINE */
820 s1 = 0x2131; /* FULLWIDTH MACRON */
821 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
822 s1 = 0x2140;
823 } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
824 s1 = 0x2141;
825 } else if (c == 0x2225) { /* PARALLEL TO */
826 s1 = 0x2142;
827 } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
828 s1 = 0x215d;
829 } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
830 s1 = 0x2171;
831 } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
832 s1 = 0x2172;
833 } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
834 s1 = 0x224c;
835 }
836 }
837
838 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
839 s1 = -1;
840 c1 = 0;
841 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
842 while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
843 if (c == cp932ext1_ucs_table[c1]) {
844 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
845 break;
846 }
847 c1++;
848 }
849 if (s1 <= 0) {
850 c1 = 0;
851 c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
852 while (c1 < c2) { /* CP932 vendor ext2 (115ku - 119ku) */
853 if (c == cp932ext2_ucs_table[c1]) {
854 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
855 break;
856 }
857 c1++;
858 }
859 }
860
861 if (s1 <= 0) {
862 c1 = 0;
863 c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
864 while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
865 if (c == cp932ext3_ucs_table[c1]) {
866 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
867 break;
868 }
869 c1++;
870 }
871 }
872 if (c == 0) {
873 s1 = 0;
874 } else if (s1 <= 0) {
875 s1 = -1;
876 }
877 }
878
879 if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
880 mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
881 (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
882 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
883 (filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
884 mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 )) {
885 CODE2JIS(c1,c2,s1,s2);
886 }
887
888 if (filter->status == 1 && filter->cache > 0) {
889 return c;
890 }
891
892 if (s1 >= 0) {
893 if (s1 < 0x100) { /* latin or kana */
894 CK((*filter->output_function)(s1, filter->data));
895 } else { /* kanji */
896 c1 = (s1 >> 8) & 0xff;
897 c2 = s1 & 0xff;
898 SJIS_ENCODE(c1, c2, s1, s2);
899 CK((*filter->output_function)(s1, filter->data));
900 CK((*filter->output_function)(s2, filter->data));
901 }
902 } else {
903 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
904 CK(mbfl_filt_conv_illegal_output(c, filter));
905 }
906 }
907
908 return c;
909 }
910
911 int
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)912 mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
913 {
914 int c1 = filter->cache;
915 if (filter->status == 1 && (c1 == 0x0023 || (c1 >= 0x0030 && c1<=0x0039))) {
916 CK((*filter->output_function)(c1, filter->data));
917 }
918 filter->status = 0;
919 filter->cache = 0;
920
921 if (filter->flush_function != NULL) {
922 return (*filter->flush_function)(filter->data);
923 }
924
925 return 0;
926 }
927
928