1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * the source code included in this files was separated from mbfilter_sjis_open.c
26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27 *
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include "mbfilter.h"
35 #include "mbfilter_sjis_mobile.h"
36
37 #include "unicode_table_cp932_ext.h"
38 #include "unicode_table_jis.h"
39
40 #include "emoji2uni.h"
41
42 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
43 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
44 extern const unsigned char mblen_table_sjis[];
45
46 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
47 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
48 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
49
50 const mbfl_encoding mbfl_encoding_sjis_docomo = {
51 mbfl_no_encoding_sjis_docomo,
52 "SJIS-Mobile#DOCOMO",
53 "Shift_JIS",
54 (const char *(*)[])&mbfl_encoding_sjis_docomo_aliases,
55 mblen_table_sjis,
56 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
57 };
58
59 const mbfl_encoding mbfl_encoding_sjis_kddi = {
60 mbfl_no_encoding_sjis_kddi,
61 "SJIS-Mobile#KDDI",
62 "Shift_JIS",
63 (const char *(*)[])&mbfl_encoding_sjis_kddi_aliases,
64 mblen_table_sjis,
65 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
66 };
67
68 const mbfl_encoding mbfl_encoding_sjis_sb = {
69 mbfl_no_encoding_sjis_sb,
70 "SJIS-Mobile#SOFTBANK",
71 "Shift_JIS",
72 (const char *(*)[])&mbfl_encoding_sjis_sb_aliases,
73 mblen_table_sjis,
74 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
75 };
76
77 const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
78 mbfl_no_encoding_sjis_docomo,
79 mbfl_filt_ident_common_ctor,
80 mbfl_filt_ident_common_dtor,
81 mbfl_filt_ident_sjis
82 };
83
84 const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
85 mbfl_no_encoding_sjis_kddi,
86 mbfl_filt_ident_common_ctor,
87 mbfl_filt_ident_common_dtor,
88 mbfl_filt_ident_sjis
89 };
90
91 const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
92 mbfl_no_encoding_sjis_sb,
93 mbfl_filt_ident_common_ctor,
94 mbfl_filt_ident_common_dtor,
95 mbfl_filt_ident_sjis
96 };
97
98 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
99 mbfl_no_encoding_sjis_docomo,
100 mbfl_no_encoding_wchar,
101 mbfl_filt_conv_common_ctor,
102 mbfl_filt_conv_common_dtor,
103 mbfl_filt_conv_sjis_mobile_wchar,
104 mbfl_filt_conv_common_flush
105 };
106
107 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
108 mbfl_no_encoding_wchar,
109 mbfl_no_encoding_sjis_docomo,
110 mbfl_filt_conv_common_ctor,
111 mbfl_filt_conv_common_dtor,
112 mbfl_filt_conv_wchar_sjis_mobile,
113 mbfl_filt_conv_sjis_mobile_flush
114 };
115
116 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
117 mbfl_no_encoding_sjis_kddi,
118 mbfl_no_encoding_wchar,
119 mbfl_filt_conv_common_ctor,
120 mbfl_filt_conv_common_dtor,
121 mbfl_filt_conv_sjis_mobile_wchar,
122 mbfl_filt_conv_common_flush
123 };
124
125 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
126 mbfl_no_encoding_wchar,
127 mbfl_no_encoding_sjis_kddi,
128 mbfl_filt_conv_common_ctor,
129 mbfl_filt_conv_common_dtor,
130 mbfl_filt_conv_wchar_sjis_mobile,
131 mbfl_filt_conv_sjis_mobile_flush
132 };
133
134 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
135 mbfl_no_encoding_sjis_sb,
136 mbfl_no_encoding_wchar,
137 mbfl_filt_conv_common_ctor,
138 mbfl_filt_conv_common_dtor,
139 mbfl_filt_conv_sjis_mobile_wchar,
140 mbfl_filt_conv_common_flush
141 };
142
143 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
144 mbfl_no_encoding_wchar,
145 mbfl_no_encoding_sjis_sb,
146 mbfl_filt_conv_common_ctor,
147 mbfl_filt_conv_common_dtor,
148 mbfl_filt_conv_wchar_sjis_mobile,
149 mbfl_filt_conv_sjis_mobile_flush
150 };
151
152 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
153 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
154 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
155
156 const unsigned short mbfl_docomo2uni_pua[4][3] = {
157 {0x28c2, 0x292f, 0xe63e},
158 {0x2930, 0x2934, 0xe6ac},
159 {0x2935, 0x2951, 0xe6b1},
160 {0x2952, 0x29db, 0xe6ce},
161 };
162
163 const unsigned short mbfl_kddi2uni_pua[7][3] = {
164 {0x26ec, 0x2838, 0xe468},
165 {0x284c, 0x2863, 0xe5b5},
166 {0x24b8, 0x24ca, 0xe5cd},
167 {0x24cb, 0x2545, 0xea80},
168 {0x2839, 0x284b, 0xeafb},
169 {0x2546, 0x25c0, 0xeb0e},
170 {0x25c1, 0x25c6, 0xeb89},
171 };
172
173 const unsigned short mbfl_sb2uni_pua[6][3] = {
174 {0x27a9, 0x2802, 0xe101},
175 {0x2808, 0x2861, 0xe201},
176 {0x2921, 0x297a, 0xe001},
177 {0x2980, 0x29cc, 0xe301},
178 {0x2a99, 0x2ae4, 0xe401},
179 {0x2af8, 0x2b35, 0xe501},
180 };
181
182 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
183 {0x24b8, 0x24f6, 0xec40},
184 {0x24f7, 0x2573, 0xec80},
185 {0x2574, 0x25b2, 0xed40},
186 {0x25b3, 0x25c6, 0xed80},
187 {0x26ec, 0x272a, 0xef40},
188 {0x272b, 0x27a7, 0xef80},
189 {0x27a8, 0x27e6, 0xf040},
190 {0x27e7, 0x2863, 0xf080},
191 };
192
193 #define NFLAGS(c) (0x1F1A5+(int)(c))
194
195 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
196
197 #define SJIS_ENCODE(c1,c2,s1,s2) \
198 do { \
199 s1 = c1; \
200 s1--; \
201 s1 >>= 1; \
202 if ((c1) < 0x5f) { \
203 s1 += 0x71; \
204 } else { \
205 s1 += 0xb1; \
206 } \
207 s2 = c2; \
208 if ((c1) & 1) { \
209 if ((c2) < 0x60) { \
210 s2--; \
211 } \
212 s2 += 0x20; \
213 } else { \
214 s2 += 0x7e; \
215 } \
216 } while (0)
217
218 #define SJIS_DECODE(c1,c2,s1,s2) \
219 do { \
220 s1 = c1; \
221 if (s1 < 0xa0) { \
222 s1 -= 0x81; \
223 } else { \
224 s1 -= 0xc1; \
225 } \
226 s1 <<= 1; \
227 s1 += 0x21; \
228 s2 = c2; \
229 if (s2 < 0x9f) { \
230 if (s2 < 0x7f) { \
231 s2++; \
232 } \
233 s2 -= 0x20; \
234 } else { \
235 s1++; \
236 s2 -= 0x7e; \
237 } \
238 } while (0)
239
240 #define CODE2JIS(c1,c2,s1,s2) \
241 c1 = (s1)/94+0x21; \
242 c2 = (s1)-94*((c1)-0x21)+0x21; \
243 s1 = ((c1) << 8) | (c2); \
244 s2 = 1
245
246 int
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)247 mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
248 {
249 int i, match = 0;
250
251 for (i = 0; i < n; i++) {
252 if (map[i][0] <= c && c <= map[i][1]) {
253 *w = c - map[i][0] + map[i][2];
254 match = 1;
255 break;
256 }
257 }
258 return match;
259 }
260
261 int
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)262 mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
263 {
264 int i, match = 0;
265
266 for (i = 0; i < n; i++) {
267 if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
268 *w = c + map[i][0] - map[i][2];
269 match = 1;
270 break;
271 }
272 }
273 return match;
274 }
275
276 int
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)277 mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
278 {
279 int w = s;
280 if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
281 if (s >= mb_tbl_code2uni_docomo1_min + 0x00a2 &&
282 s <= mb_tbl_code2uni_docomo1_min + 0x00ad &&
283 s != mb_tbl_code2uni_docomo1_min + 0x00a3) {
284 w = 0x20E3;
285 *snd = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
286 if (*snd > 0xf000) {
287 *snd += 0x10000;
288 }
289 } else {
290 w = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
291 if (w > 0xf000) {
292 w += 0x10000;
293 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
294 w += 0xf0000;
295 }
296 *snd = 0;
297 if (!w) {
298 w = s;
299 }
300 }
301 }
302
303 return w;
304 }
305
306 int
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)307 mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
308 {
309 int w = s, si, c;
310 const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
311
312 *snd = 0;
313 if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
314 si = s - mb_tbl_code2uni_kddi1_min;
315 if (si == 0x0008) { /* ES */
316 *snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
317 } else if (si == 0x0009) { /* RU */
318 *snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
319 } else if (si >= 0x008d && si <= 0x0092) {
320 c = nflags_order_kddi[si-0x008d];
321 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
322 } else if (si == 0x0104) {
323 *snd = 0x0023; w = 0x20E3;
324 } else {
325 w = mb_tbl_code2uni_kddi1[si];
326 if (w > 0xf000) {
327 w += 0x10000;
328 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
329 w += 0xf0000;
330 }
331 }
332 } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
333 si = s - mb_tbl_code2uni_kddi2_min;
334 if (si == 100) { /* JP */
335 *snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
336 } else if (si >= 0x00ba && si <= 0x00c2) {
337 *snd = si-0x00ba+0x0031; w = 0x20E3;
338 } else if (si == 0x010b) { /* US */
339 *snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
340 } else if (si == 0x0144) {
341 *snd = 0x0030; w = 0x20E3;
342 } else {
343 w = mb_tbl_code2uni_kddi2[si];
344 if (w > 0xf000) {
345 w += 0x10000;
346 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
347 w += 0xf0000;
348 }
349 }
350 }
351 return w;
352 }
353
354 int
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)355 mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
356 {
357 int w = s, si, c;
358 const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
359
360 *snd = 0;
361 if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
362 si = s - mb_tbl_code2uni_sb1_min;
363 if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
364 *snd = mb_tbl_code2uni_sb1[si];
365 if (*snd > 0xf000) {
366 *snd += 0x10000;
367 }
368 w = 0x20E3;
369 } else {
370 w = mb_tbl_code2uni_sb1[si];
371 if (w > 0xf000) {
372 w += 0x10000;
373 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
374 w += 0xf0000;
375 }
376 }
377 } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
378 si = s - mb_tbl_code2uni_sb2_min;
379 w = mb_tbl_code2uni_sb2[si];
380 if (w > 0xf000) {
381 w += 0x10000;
382 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
383 w += 0xf0000;
384 }
385 } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
386 si = s - mb_tbl_code2uni_sb3_min;
387 if (si >= 0x0069 && si <= 0x0072) {
388 c = nflags_order_sb[si-0x0069];
389 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
390 } else {
391 w = mb_tbl_code2uni_sb3[si];
392 if (w > 0xf000) {
393 w += 0x10000;
394 } else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
395 w += 0xf0000;
396 }
397 }
398 }
399 return w;
400 }
401
402 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)403 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
404 {
405 int i, match = 0, c1s;
406
407 if (filter->status == 1) {
408 c1s = filter->cache;
409 filter->cache = 0;
410 filter->status = 0;
411 if (c == 0x20E3) {
412 if (c1s == 0x0023) {
413 *s1 = 0x2964;
414 match = 1;
415 } else if (c1s == 0x0030) {
416 *s1 = 0x296f;
417 match = 1;
418 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
419 *s1 = 0x2966 + (c1s - 0x0031);
420 match = 1;
421 }
422 } else {
423 CK((*filter->output_function)(c1s, filter->data));
424 }
425 } else {
426 if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
427 filter->status = 1;
428 filter->cache = c;
429 *s1 = -1;
430 return match;
431 }
432
433 if (c == 0x00A9) {
434 *s1 = 0x29b5; match = 1;
435 } else if (c == 0x00AE) {
436 *s1 = 0x29ba; match = 1;
437 } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
438 i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
439 if (i >= 0) {
440 *s1 = mb_tbl_uni_docomo2code2_value[i];
441 match = 1;
442 }
443 } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
444 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
445 if (i >= 0) {
446 *s1 = mb_tbl_uni_docomo2code3_value[i];
447 match = 1;
448 }
449 } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
450 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
451 if (i >= 0) {
452 *s1 = mb_tbl_uni_docomo2code5_val[i];
453 match = 1;
454 }
455 }
456 }
457
458 return match;
459 }
460
461 int
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)462 mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
463 {
464 int i, match = 0, c1s;
465
466 if (filter->status == 1) {
467 c1s = filter->cache;
468 filter->cache = 0;
469 filter->status = 0;
470 if (c == 0x20E3) {
471 if (c1s == 0x0023) {
472 *s1 = 0x25bc;
473 match = 1;
474 } else if (c1s == 0x0030) {
475 *s1 = 0x2830;
476 match = 1;
477 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
478 *s1 = 0x27a6 + (c1s - 0x0031);
479 match = 1;
480 }
481 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
482 for (i=0; i<10; i++) {
483 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
484 *s1 = nflags_code_kddi[i];
485 match = 1;
486 break;
487 }
488 }
489 } else {
490 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
491 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
492 CK((*filter->output_function)(c1s, filter->data));
493 }
494 }
495 } else {
496 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
497 (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
498 filter->status = 1;
499 filter->cache = c;
500 *s1 = -1;
501 return match;
502 }
503
504 if (c == 0x00A9) {
505 *s1 = 0x27dc; match = 1;
506 } else if (c == 0x00AE) {
507 *s1 = 0x27dd; match = 1;
508 } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
509 i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
510 if (i >= 0) {
511 *s1 = mb_tbl_uni_kddi2code2_value[i];
512 match = 1;
513 }
514 } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
515 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
516 if (i >= 0) {
517 *s1 = mb_tbl_uni_kddi2code3_value[i];
518 match = 1;
519 }
520 } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
521 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
522 if (i >= 0) {
523 *s1 = mb_tbl_uni_kddi2code5_val[i];
524 match = 1;
525 }
526 }
527 }
528
529 return match;
530 }
531
532 int
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)533 mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
534 {
535 int i, match = 0, c1s;
536
537 if (filter->status == 1) {
538 filter->status = 0;
539 c1s = filter->cache;
540 filter->cache = 0;
541 if (c == 0x20E3) {
542 if (c1s == 0x0023) {
543 *s1 = 0x2817;
544 match = 1;
545 } else if (c1s == 0x0030) {
546 *s1 = 0x282c;
547 match = 1;
548 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
549 *s1 = 0x2823 + (c1s - 0x0031);
550 match = 1;
551 }
552 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
553 for (i=0; i<10; i++) {
554 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
555 *s1 = nflags_code_sb[i];
556 match = 1;
557 break;
558 }
559 }
560 } else {
561 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
562 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
563 CK((*filter->output_function)(c1s, filter->data));
564 }
565 }
566 } else {
567 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
568 filter->status = 1;
569 filter->cache = c;
570 *s1 = -1;
571 return match;
572 }
573
574 if (c == 0x00A9) {
575 *s1 = 0x2855; match = 1;
576 } else if (c == 0x00AE) {
577 *s1 = 0x2856; match = 1;
578 } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
579 i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
580 if (i >= 0) {
581 *s1 = mb_tbl_uni_sb2code2_value[i];
582 match = 1;
583 }
584 } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
585 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
586 if (i >= 0) {
587 *s1 = mb_tbl_uni_sb2code3_value[i];
588 match = 1;
589 }
590 } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
591 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
592 if (i >= 0) {
593 *s1 = mb_tbl_uni_sb2code5_val[i];
594 match = 1;
595 }
596 }
597 }
598 return match;
599 }
600
601 /*
602 * SJIS-win => wchar
603 */
604 int
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)605 mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
606 {
607 int c1, s, s1 = 0, s2 = 0, w;
608 int snd = 0;
609
610 retry:
611 switch (filter->status) {
612 case 0:
613 if (c >= 0 && c < 0x80) { /* latin */
614 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb && c == 0x1b) {
615 filter->cache = c;
616 filter->status = 2;
617 } else {
618 CK((*filter->output_function)(c, filter->data));
619 }
620 } else if (c > 0xa0 && c < 0xe0) { /* kana */
621 CK((*filter->output_function)(0xfec0 + c, filter->data));
622 } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
623 filter->status = 1;
624 filter->cache = c;
625 } else {
626 w = c & MBFL_WCSGROUP_MASK;
627 w |= MBFL_WCSGROUP_THROUGH;
628 CK((*filter->output_function)(w, filter->data));
629 }
630 break;
631
632 case 1: /* kanji second char */
633 filter->status = 0;
634 c1 = filter->cache;
635 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
636 w = 0;
637 SJIS_DECODE(c1, c, s1, s2);
638 s = (s1 - 0x21)*94 + s2 - 0x21;
639 if (s <= 137) {
640 if (s == 31) {
641 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
642 } else if (s == 32) {
643 w = 0xff5e; /* FULLWIDTH TILDE */
644 } else if (s == 33) {
645 w = 0x2225; /* PARALLEL TO */
646 } else if (s == 60) {
647 w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
648 } else if (s == 80) {
649 w = 0xffe0; /* FULLWIDTH CENT SIGN */
650 } else if (s == 81) {
651 w = 0xffe1; /* FULLWIDTH POUND SIGN */
652 } else if (s == 137) {
653 w = 0xffe2; /* FULLWIDTH NOT SIGN */
654 }
655 }
656 if (w == 0) {
657 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
658 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
659 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
660 w = jisx0208_ucs_table[s];
661 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
662 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
663 } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
664 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
665 } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
666 w = s - (94*94) + 0xe000;
667 }
668
669 if (s >= (94*94) && s < 119*94) {
670 if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
671 w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
672 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
673 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
674 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
675 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
676 }
677
678 if (w > 0 && snd > 0) {
679 CK((*filter->output_function)(snd, filter->data));
680 }
681 }
682 }
683 if (w <= 0) {
684 w = (s1 << 8) | s2;
685 w &= MBFL_WCSPLANE_MASK;
686 w |= MBFL_WCSPLANE_WINCP932;
687 }
688 CK((*filter->output_function)(w, filter->data));
689 } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
690 CK((*filter->output_function)(c, filter->data));
691 } else {
692 w = (c1 << 8) | c;
693 w &= MBFL_WCSGROUP_MASK;
694 w |= MBFL_WCSGROUP_THROUGH;
695 CK((*filter->output_function)(w, filter->data));
696 }
697 break;
698 /* ESC : Softbank Emoji */
699 case 2:
700 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
701 c == 0x24) {
702 filter->cache = c;
703 filter->status++;
704 } else {
705 filter->cache = 0;
706 filter->status = 0;
707 CK((*filter->output_function)(0x1b, filter->data));
708 goto retry;
709 }
710 break;
711
712 /* ESC $ : Softbank Emoji */
713 case 3:
714 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
715 ((c >= 0x45 && c <= 0x47) || (c >= 0x4f && c <= 0x51))) {
716 filter->cache = c;
717 filter->status++;
718 } else {
719 filter->cache = 0;
720 filter->status = 0;
721 CK((*filter->output_function)(0x1b, filter->data));
722 CK((*filter->output_function)(0x24, filter->data));
723 goto retry;
724 }
725 break;
726
727 /* ESC [GEFOPQ] : Softbank Emoji */
728 case 4:
729 w = 0;
730 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
731 c1 = filter->cache;
732
733 if (c == 0x0f) {
734 w = c;
735 filter->cache = 0;
736 filter->status = 0;
737 } else {
738 if (c1 == 0x47 && c >= 0x21 && c <= 0x7a) {
739 s1 = 0x91; s2 = c;
740 } else if (c1 == 0x45 && c >= 0x21 && c <= 0x7a) {
741 s1 = 0x8d; s2 = c;
742 } else if (c1 == 0x46 && c >= 0x21 && c <= 0x7a) {
743 s1 = 0x8e; s2 = c;
744 } else if (c1 == 0x4f && c >= 0x21 && c <= 0x6d) {
745 s1 = 0x92; s2 = c;
746 } else if (c1 == 0x50 && c >= 0x21 && c <= 0x6c) {
747 s1 = 0x95; s2 = c;
748 } else if (c1 == 0x51 && c >= 0x21 && c <= 0x5e) {
749 s1 = 0x96; s2 = c;
750 }
751 s = (s1 - 0x21)*94 + s2 - 0x21;
752 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
753 if (w > 0) {
754 if (snd > 0) {
755 CK((*filter->output_function)(snd, filter->data));
756 }
757 CK((*filter->output_function)(w, filter->data));
758 }
759 }
760 }
761
762 if (w <= 0) {
763 c1 = filter->cache;
764 filter->cache = 0;
765 filter->status = 0;
766 CK((*filter->output_function)(0x1b, filter->data));
767 CK((*filter->output_function)(0x24, filter->data));
768 CK((*filter->output_function)(c1 & 0xff, filter->data));
769 goto retry;
770 }
771 break;
772
773 default:
774 filter->status = 0;
775 break;
776 }
777
778 return c;
779 }
780
781 /*
782 * wchar => SJIS-win
783 */
784 int
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)785 mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
786 {
787 int c1, c2, s1, s2;
788
789 s1 = 0;
790 s2 = 0;
791 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
792 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
793 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
794 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
795 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
796 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
797 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
798 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
799 } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
800 s1 = c - 0xe000;
801 c1 = s1/94 + 0x7f;
802 c2 = s1%94 + 0x21;
803 s1 = (c1 << 8) | c2;
804 s2 = 1;
805 }
806 if (s1 <= 0) {
807 c1 = c & ~MBFL_WCSPLANE_MASK;
808 if (c1 == MBFL_WCSPLANE_WINCP932) {
809 s1 = c & MBFL_WCSPLANE_MASK;
810 s2 = 1;
811 } else if (c1 == MBFL_WCSPLANE_JIS0208) {
812 s1 = c & MBFL_WCSPLANE_MASK;
813 } else if (c1 == MBFL_WCSPLANE_JIS0212) {
814 s1 = c & MBFL_WCSPLANE_MASK;
815 s1 |= 0x8080;
816 } else if (c == 0xa5) { /* YEN SIGN */
817 s1 = 0x216f; /* FULLWIDTH YEN SIGN */
818 } else if (c == 0x203e) { /* OVER LINE */
819 s1 = 0x2131; /* FULLWIDTH MACRON */
820 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
821 s1 = 0x2140;
822 } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
823 s1 = 0x2141;
824 } else if (c == 0x2225) { /* PARALLEL TO */
825 s1 = 0x2142;
826 } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
827 s1 = 0x215d;
828 } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
829 s1 = 0x2171;
830 } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
831 s1 = 0x2172;
832 } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
833 s1 = 0x224c;
834 }
835 }
836
837 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
838 s1 = -1;
839 c1 = 0;
840 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
841 while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
842 if (c == cp932ext1_ucs_table[c1]) {
843 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
844 break;
845 }
846 c1++;
847 }
848 if (s1 <= 0) {
849 c1 = 0;
850 c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
851 while (c1 < c2) { /* CP932 vendor ext2 (115ku - 119ku) */
852 if (c == cp932ext2_ucs_table[c1]) {
853 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
854 break;
855 }
856 c1++;
857 }
858 }
859
860 if (s1 <= 0) {
861 c1 = 0;
862 c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
863 while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
864 if (c == cp932ext3_ucs_table[c1]) {
865 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
866 break;
867 }
868 c1++;
869 }
870 }
871 if (c == 0) {
872 s1 = 0;
873 } else if (s1 <= 0) {
874 s1 = -1;
875 }
876 }
877
878 if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
879 mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
880 (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
881 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
882 (filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
883 mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 )) {
884 CODE2JIS(c1,c2,s1,s2);
885 }
886
887 if (filter->status == 1 && filter->cache > 0) {
888 return c;
889 }
890
891 if (s1 >= 0) {
892 if (s1 < 0x100) { /* latin or kana */
893 CK((*filter->output_function)(s1, filter->data));
894 } else { /* kanji */
895 c1 = (s1 >> 8) & 0xff;
896 c2 = s1 & 0xff;
897 SJIS_ENCODE(c1, c2, s1, s2);
898 CK((*filter->output_function)(s1, filter->data));
899 CK((*filter->output_function)(s2, filter->data));
900 }
901 } else {
902 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
903 CK(mbfl_filt_conv_illegal_output(c, filter));
904 }
905 }
906
907 return c;
908 }
909
910 int
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)911 mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
912 {
913 int c1 = filter->cache;
914 if (filter->status == 1 && (c1 == 0x0023 || (c1 >= 0x0030 && c1<=0x0039))) {
915 CK((*filter->output_function)(c1, filter->data));
916 }
917 filter->status = 0;
918 filter->cache = 0;
919
920 if (filter->flush_function != NULL) {
921 return (*filter->flush_function)(filter->data);
922 }
923
924 return 0;
925 }
926