1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * the source code included in this files was separated from mbfilter_sjis_open.c
26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_sjis_mobile.h"
32
33 #include "unicode_table_cp932_ext.h"
34 #include "unicode_table_jis.h"
35
36 #include "emoji2uni.h"
37
38 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
39 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
40 extern const unsigned char mblen_table_sjis[];
41
42 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
43 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
44 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
45
46 const mbfl_encoding mbfl_encoding_sjis_docomo = {
47 mbfl_no_encoding_sjis_docomo,
48 "SJIS-Mobile#DOCOMO",
49 "Shift_JIS",
50 (const char *(*)[])&mbfl_encoding_sjis_docomo_aliases,
51 mblen_table_sjis,
52 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
53 &vtbl_sjis_docomo_wchar,
54 &vtbl_wchar_sjis_docomo
55 };
56
57 const mbfl_encoding mbfl_encoding_sjis_kddi = {
58 mbfl_no_encoding_sjis_kddi,
59 "SJIS-Mobile#KDDI",
60 "Shift_JIS",
61 (const char *(*)[])&mbfl_encoding_sjis_kddi_aliases,
62 mblen_table_sjis,
63 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
64 &vtbl_sjis_kddi_wchar,
65 &vtbl_wchar_sjis_kddi
66 };
67
68 const mbfl_encoding mbfl_encoding_sjis_sb = {
69 mbfl_no_encoding_sjis_sb,
70 "SJIS-Mobile#SOFTBANK",
71 "Shift_JIS",
72 (const char *(*)[])&mbfl_encoding_sjis_sb_aliases,
73 mblen_table_sjis,
74 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
75 &vtbl_sjis_sb_wchar,
76 &vtbl_wchar_sjis_sb
77 };
78
79 const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
80 mbfl_no_encoding_sjis_docomo,
81 mbfl_filt_ident_common_ctor,
82 mbfl_filt_ident_sjis
83 };
84
85 const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
86 mbfl_no_encoding_sjis_kddi,
87 mbfl_filt_ident_common_ctor,
88 mbfl_filt_ident_sjis
89 };
90
91 const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
92 mbfl_no_encoding_sjis_sb,
93 mbfl_filt_ident_common_ctor,
94 mbfl_filt_ident_sjis
95 };
96
97 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
98 mbfl_no_encoding_sjis_docomo,
99 mbfl_no_encoding_wchar,
100 mbfl_filt_conv_common_ctor,
101 NULL,
102 mbfl_filt_conv_sjis_mobile_wchar,
103 mbfl_filt_conv_common_flush,
104 NULL,
105 };
106
107 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
108 mbfl_no_encoding_wchar,
109 mbfl_no_encoding_sjis_docomo,
110 mbfl_filt_conv_common_ctor,
111 NULL,
112 mbfl_filt_conv_wchar_sjis_mobile,
113 mbfl_filt_conv_sjis_mobile_flush,
114 NULL,
115 };
116
117 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
118 mbfl_no_encoding_sjis_kddi,
119 mbfl_no_encoding_wchar,
120 mbfl_filt_conv_common_ctor,
121 NULL,
122 mbfl_filt_conv_sjis_mobile_wchar,
123 mbfl_filt_conv_common_flush,
124 NULL,
125 };
126
127 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
128 mbfl_no_encoding_wchar,
129 mbfl_no_encoding_sjis_kddi,
130 mbfl_filt_conv_common_ctor,
131 NULL,
132 mbfl_filt_conv_wchar_sjis_mobile,
133 mbfl_filt_conv_sjis_mobile_flush,
134 NULL,
135 };
136
137 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
138 mbfl_no_encoding_sjis_sb,
139 mbfl_no_encoding_wchar,
140 mbfl_filt_conv_common_ctor,
141 NULL,
142 mbfl_filt_conv_sjis_mobile_wchar,
143 mbfl_filt_conv_common_flush,
144 NULL,
145 };
146
147 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
148 mbfl_no_encoding_wchar,
149 mbfl_no_encoding_sjis_sb,
150 mbfl_filt_conv_common_ctor,
151 NULL,
152 mbfl_filt_conv_wchar_sjis_mobile,
153 mbfl_filt_conv_sjis_mobile_flush,
154 NULL,
155 };
156
157 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
158 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
159 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
160
161 const unsigned short mbfl_docomo2uni_pua[4][3] = {
162 {0x28c2, 0x292f, 0xe63e},
163 {0x2930, 0x2934, 0xe6ac},
164 {0x2935, 0x2951, 0xe6b1},
165 {0x2952, 0x29db, 0xe6ce},
166 };
167
168 const unsigned short mbfl_kddi2uni_pua[7][3] = {
169 {0x26ec, 0x2838, 0xe468},
170 {0x284c, 0x2863, 0xe5b5},
171 {0x24b8, 0x24ca, 0xe5cd},
172 {0x24cb, 0x2545, 0xea80},
173 {0x2839, 0x284b, 0xeafb},
174 {0x2546, 0x25c0, 0xeb0e},
175 {0x25c1, 0x25c6, 0xeb89},
176 };
177
178 const unsigned short mbfl_sb2uni_pua[6][3] = {
179 {0x27a9, 0x2802, 0xe101},
180 {0x2808, 0x2861, 0xe201},
181 {0x2921, 0x297a, 0xe001},
182 {0x2980, 0x29cc, 0xe301},
183 {0x2a99, 0x2ae4, 0xe401},
184 {0x2af8, 0x2b35, 0xe501},
185 };
186
187 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
188 {0x24b8, 0x24f6, 0xec40},
189 {0x24f7, 0x2573, 0xec80},
190 {0x2574, 0x25b2, 0xed40},
191 {0x25b3, 0x25c6, 0xed80},
192 {0x26ec, 0x272a, 0xef40},
193 {0x272b, 0x27a7, 0xef80},
194 {0x27a8, 0x27e6, 0xf040},
195 {0x27e7, 0x2863, 0xf080},
196 };
197
198 #define NFLAGS(c) (0x1F1A5+(int)(c))
199
200 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
201
202 #define SJIS_ENCODE(c1,c2,s1,s2) \
203 do { \
204 s1 = c1; \
205 s1--; \
206 s1 >>= 1; \
207 if ((c1) < 0x5f) { \
208 s1 += 0x71; \
209 } else { \
210 s1 += 0xb1; \
211 } \
212 s2 = c2; \
213 if ((c1) & 1) { \
214 if ((c2) < 0x60) { \
215 s2--; \
216 } \
217 s2 += 0x20; \
218 } else { \
219 s2 += 0x7e; \
220 } \
221 } while (0)
222
223 #define SJIS_DECODE(c1,c2,s1,s2) \
224 do { \
225 s1 = c1; \
226 if (s1 < 0xa0) { \
227 s1 -= 0x81; \
228 } else { \
229 s1 -= 0xc1; \
230 } \
231 s1 <<= 1; \
232 s1 += 0x21; \
233 s2 = c2; \
234 if (s2 < 0x9f) { \
235 if (s2 < 0x7f) { \
236 s2++; \
237 } \
238 s2 -= 0x20; \
239 } else { \
240 s1++; \
241 s2 -= 0x7e; \
242 } \
243 } while (0)
244
245 #define CODE2JIS(c1,c2,s1,s2) \
246 c1 = (s1)/94+0x21; \
247 c2 = (s1)-94*((c1)-0x21)+0x21; \
248 s1 = ((c1) << 8) | (c2); \
249 s2 = 1
250
251 int
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)252 mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
253 {
254 int i, match = 0;
255
256 for (i = 0; i < n; i++) {
257 if (map[i][0] <= c && c <= map[i][1]) {
258 *w = c - map[i][0] + map[i][2];
259 match = 1;
260 break;
261 }
262 }
263 return match;
264 }
265
266 int
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)267 mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
268 {
269 int i, match = 0;
270
271 for (i = 0; i < n; i++) {
272 if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
273 *w = c + map[i][0] - map[i][2];
274 match = 1;
275 break;
276 }
277 }
278 return match;
279 }
280
281 int
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)282 mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
283 {
284 int w = s;
285 if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
286 if (s >= mb_tbl_code2uni_docomo1_min + 0x00a2 &&
287 s <= mb_tbl_code2uni_docomo1_min + 0x00ad &&
288 s != mb_tbl_code2uni_docomo1_min + 0x00a3) {
289 w = 0x20E3;
290 *snd = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
291 if (*snd > 0xf000) {
292 *snd += 0x10000;
293 }
294 } else {
295 w = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
296 if (w > 0xf000) {
297 w += 0x10000;
298 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
299 w += 0xf0000;
300 }
301 *snd = 0;
302 if (!w) {
303 w = s;
304 }
305 }
306 }
307
308 return w;
309 }
310
311 int
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)312 mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
313 {
314 int w = s, si, c;
315 const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
316
317 *snd = 0;
318 if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
319 si = s - mb_tbl_code2uni_kddi1_min;
320 if (si == 0x0008) { /* ES */
321 *snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
322 } else if (si == 0x0009) { /* RU */
323 *snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
324 } else if (si >= 0x008d && si <= 0x0092) {
325 c = nflags_order_kddi[si-0x008d];
326 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
327 } else if (si == 0x0104) {
328 *snd = 0x0023; w = 0x20E3;
329 } else {
330 w = mb_tbl_code2uni_kddi1[si];
331 if (w > 0xf000) {
332 w += 0x10000;
333 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
334 w += 0xf0000;
335 }
336 }
337 } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
338 si = s - mb_tbl_code2uni_kddi2_min;
339 if (si == 100) { /* JP */
340 *snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
341 } else if (si >= 0x00ba && si <= 0x00c2) {
342 *snd = si-0x00ba+0x0031; w = 0x20E3;
343 } else if (si == 0x010b) { /* US */
344 *snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
345 } else if (si == 0x0144) {
346 *snd = 0x0030; w = 0x20E3;
347 } else {
348 w = mb_tbl_code2uni_kddi2[si];
349 if (w > 0xf000) {
350 w += 0x10000;
351 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
352 w += 0xf0000;
353 }
354 }
355 }
356 return w;
357 }
358
359 int
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)360 mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
361 {
362 int w = s, si, c;
363 const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
364
365 *snd = 0;
366 if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
367 si = s - mb_tbl_code2uni_sb1_min;
368 if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
369 *snd = mb_tbl_code2uni_sb1[si];
370 if (*snd > 0xf000) {
371 *snd += 0x10000;
372 }
373 w = 0x20E3;
374 } else {
375 w = mb_tbl_code2uni_sb1[si];
376 if (w > 0xf000) {
377 w += 0x10000;
378 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
379 w += 0xf0000;
380 }
381 }
382 } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
383 si = s - mb_tbl_code2uni_sb2_min;
384 w = mb_tbl_code2uni_sb2[si];
385 if (w > 0xf000) {
386 w += 0x10000;
387 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
388 w += 0xf0000;
389 }
390 } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
391 si = s - mb_tbl_code2uni_sb3_min;
392 if (si >= 0x0069 && si <= 0x0072) {
393 c = nflags_order_sb[si-0x0069];
394 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
395 } else {
396 w = mb_tbl_code2uni_sb3[si];
397 if (w > 0xf000) {
398 w += 0x10000;
399 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
400 w += 0xf0000;
401 }
402 }
403 }
404 return w;
405 }
406
407 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)408 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
409 {
410 int i, match = 0, c1s;
411
412 if (filter->status == 1) {
413 c1s = filter->cache;
414 filter->cache = 0;
415 filter->status = 0;
416 if (c == 0x20E3) {
417 if (c1s == 0x0023) {
418 *s1 = 0x2964;
419 match = 1;
420 } else if (c1s == 0x0030) {
421 *s1 = 0x296f;
422 match = 1;
423 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
424 *s1 = 0x2966 + (c1s - 0x0031);
425 match = 1;
426 }
427 } else {
428 CK((*filter->output_function)(c1s, filter->data));
429 }
430 } else {
431 if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
432 filter->status = 1;
433 filter->cache = c;
434 *s1 = -1;
435 return match;
436 }
437
438 if (c == 0x00A9) {
439 *s1 = 0x29b5; match = 1;
440 } else if (c == 0x00AE) {
441 *s1 = 0x29ba; match = 1;
442 } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
443 i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
444 if (i >= 0) {
445 *s1 = mb_tbl_uni_docomo2code2_value[i];
446 match = 1;
447 }
448 } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
449 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
450 if (i >= 0) {
451 *s1 = mb_tbl_uni_docomo2code3_value[i];
452 match = 1;
453 }
454 } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
455 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
456 if (i >= 0) {
457 *s1 = mb_tbl_uni_docomo2code5_val[i];
458 match = 1;
459 }
460 }
461 }
462
463 return match;
464 }
465
466 int
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)467 mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
468 {
469 int i, match = 0, c1s;
470
471 if (filter->status == 1) {
472 c1s = filter->cache;
473 filter->cache = 0;
474 filter->status = 0;
475 if (c == 0x20E3) {
476 if (c1s == 0x0023) {
477 *s1 = 0x25bc;
478 match = 1;
479 } else if (c1s == 0x0030) {
480 *s1 = 0x2830;
481 match = 1;
482 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
483 *s1 = 0x27a6 + (c1s - 0x0031);
484 match = 1;
485 }
486 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
487 for (i=0; i<10; i++) {
488 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
489 *s1 = nflags_code_kddi[i];
490 match = 1;
491 break;
492 }
493 }
494 } else {
495 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
496 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
497 CK((*filter->output_function)(c1s, filter->data));
498 }
499 }
500 } else {
501 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
502 (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
503 filter->status = 1;
504 filter->cache = c;
505 *s1 = -1;
506 return match;
507 }
508
509 if (c == 0x00A9) {
510 *s1 = 0x27dc; match = 1;
511 } else if (c == 0x00AE) {
512 *s1 = 0x27dd; match = 1;
513 } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
514 i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
515 if (i >= 0) {
516 *s1 = mb_tbl_uni_kddi2code2_value[i];
517 match = 1;
518 }
519 } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
520 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
521 if (i >= 0) {
522 *s1 = mb_tbl_uni_kddi2code3_value[i];
523 match = 1;
524 }
525 } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
526 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
527 if (i >= 0) {
528 *s1 = mb_tbl_uni_kddi2code5_val[i];
529 match = 1;
530 }
531 }
532 }
533
534 return match;
535 }
536
537 int
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)538 mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
539 {
540 int i, match = 0, c1s;
541
542 if (filter->status == 1) {
543 filter->status = 0;
544 c1s = filter->cache;
545 filter->cache = 0;
546 if (c == 0x20E3) {
547 if (c1s == 0x0023) {
548 *s1 = 0x2817;
549 match = 1;
550 } else if (c1s == 0x0030) {
551 *s1 = 0x282c;
552 match = 1;
553 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
554 *s1 = 0x2823 + (c1s - 0x0031);
555 match = 1;
556 }
557 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
558 for (i=0; i<10; i++) {
559 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
560 *s1 = nflags_code_sb[i];
561 match = 1;
562 break;
563 }
564 }
565 } else {
566 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
567 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
568 CK((*filter->output_function)(c1s, filter->data));
569 }
570 }
571 } else {
572 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
573 filter->status = 1;
574 filter->cache = c;
575 *s1 = -1;
576 return match;
577 }
578
579 if (c == 0x00A9) {
580 *s1 = 0x2855; match = 1;
581 } else if (c == 0x00AE) {
582 *s1 = 0x2856; match = 1;
583 } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
584 i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
585 if (i >= 0) {
586 *s1 = mb_tbl_uni_sb2code2_value[i];
587 match = 1;
588 }
589 } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
590 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
591 if (i >= 0) {
592 *s1 = mb_tbl_uni_sb2code3_value[i];
593 match = 1;
594 }
595 } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
596 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
597 if (i >= 0) {
598 *s1 = mb_tbl_uni_sb2code5_val[i];
599 match = 1;
600 }
601 }
602 }
603 return match;
604 }
605
606 /*
607 * SJIS-win => wchar
608 */
609 int
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)610 mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
611 {
612 int c1, s, s1 = 0, s2 = 0, w;
613 int snd = 0;
614
615 retry:
616 switch (filter->status) {
617 case 0:
618 if (c >= 0 && c < 0x80) { /* latin */
619 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb && c == 0x1b) {
620 filter->cache = c;
621 filter->status = 2;
622 } else {
623 CK((*filter->output_function)(c, filter->data));
624 }
625 } else if (c > 0xa0 && c < 0xe0) { /* kana */
626 CK((*filter->output_function)(0xfec0 + c, filter->data));
627 } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
628 filter->status = 1;
629 filter->cache = c;
630 } else {
631 w = c & MBFL_WCSGROUP_MASK;
632 w |= MBFL_WCSGROUP_THROUGH;
633 CK((*filter->output_function)(w, filter->data));
634 }
635 break;
636
637 case 1: /* kanji second char */
638 filter->status = 0;
639 c1 = filter->cache;
640 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
641 w = 0;
642 SJIS_DECODE(c1, c, s1, s2);
643 s = (s1 - 0x21)*94 + s2 - 0x21;
644 if (s <= 137) {
645 if (s == 31) {
646 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
647 } else if (s == 32) {
648 w = 0xff5e; /* FULLWIDTH TILDE */
649 } else if (s == 33) {
650 w = 0x2225; /* PARALLEL TO */
651 } else if (s == 60) {
652 w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
653 } else if (s == 80) {
654 w = 0xffe0; /* FULLWIDTH CENT SIGN */
655 } else if (s == 81) {
656 w = 0xffe1; /* FULLWIDTH POUND SIGN */
657 } else if (s == 137) {
658 w = 0xffe2; /* FULLWIDTH NOT SIGN */
659 }
660 }
661 if (w == 0) {
662 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
663 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
664 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
665 w = jisx0208_ucs_table[s];
666 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
667 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
668 } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
669 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
670 } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
671 w = s - (94*94) + 0xe000;
672 }
673
674 if (s >= (94*94) && s < 119*94) {
675 if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
676 w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
677 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
678 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
679 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
680 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
681 }
682
683 if (w > 0 && snd > 0) {
684 CK((*filter->output_function)(snd, filter->data));
685 }
686 }
687 }
688 if (w <= 0) {
689 w = (s1 << 8) | s2;
690 w &= MBFL_WCSPLANE_MASK;
691 w |= MBFL_WCSPLANE_WINCP932;
692 }
693 CK((*filter->output_function)(w, filter->data));
694 } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
695 CK((*filter->output_function)(c, filter->data));
696 } else {
697 w = (c1 << 8) | c;
698 w &= MBFL_WCSGROUP_MASK;
699 w |= MBFL_WCSGROUP_THROUGH;
700 CK((*filter->output_function)(w, filter->data));
701 }
702 break;
703 /* ESC : Softbank Emoji */
704 case 2:
705 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
706 c == 0x24) {
707 filter->cache = c;
708 filter->status++;
709 } else {
710 filter->cache = 0;
711 filter->status = 0;
712 CK((*filter->output_function)(0x1b, filter->data));
713 goto retry;
714 }
715 break;
716
717 /* ESC $ : Softbank Emoji */
718 case 3:
719 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
720 ((c >= 0x45 && c <= 0x47) || (c >= 0x4f && c <= 0x51))) {
721 filter->cache = c;
722 filter->status++;
723 } else {
724 filter->cache = 0;
725 filter->status = 0;
726 CK((*filter->output_function)(0x1b, filter->data));
727 CK((*filter->output_function)(0x24, filter->data));
728 goto retry;
729 }
730 break;
731
732 /* ESC [GEFOPQ] : Softbank Emoji */
733 case 4:
734 w = 0;
735 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
736 c1 = filter->cache;
737
738 if (c == 0x0f) {
739 w = c;
740 filter->cache = 0;
741 filter->status = 0;
742 } else {
743 if (c1 == 0x47 && c >= 0x21 && c <= 0x7a) {
744 s1 = 0x91; s2 = c;
745 } else if (c1 == 0x45 && c >= 0x21 && c <= 0x7a) {
746 s1 = 0x8d; s2 = c;
747 } else if (c1 == 0x46 && c >= 0x21 && c <= 0x7a) {
748 s1 = 0x8e; s2 = c;
749 } else if (c1 == 0x4f && c >= 0x21 && c <= 0x6d) {
750 s1 = 0x92; s2 = c;
751 } else if (c1 == 0x50 && c >= 0x21 && c <= 0x6c) {
752 s1 = 0x95; s2 = c;
753 } else if (c1 == 0x51 && c >= 0x21 && c <= 0x5e) {
754 s1 = 0x96; s2 = c;
755 }
756 s = (s1 - 0x21)*94 + s2 - 0x21;
757 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
758 if (w > 0) {
759 if (snd > 0) {
760 CK((*filter->output_function)(snd, filter->data));
761 }
762 CK((*filter->output_function)(w, filter->data));
763 }
764 }
765 }
766
767 if (w <= 0) {
768 c1 = filter->cache;
769 filter->cache = 0;
770 filter->status = 0;
771 CK((*filter->output_function)(0x1b, filter->data));
772 CK((*filter->output_function)(0x24, filter->data));
773 CK((*filter->output_function)(c1 & 0xff, filter->data));
774 goto retry;
775 }
776 break;
777
778 default:
779 filter->status = 0;
780 break;
781 }
782
783 return c;
784 }
785
786 /*
787 * wchar => SJIS-win
788 */
789 int
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)790 mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
791 {
792 int c1, c2, s1, s2;
793
794 s1 = 0;
795 s2 = 0;
796 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
797 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
798 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
799 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
800 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
801 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
802 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
803 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
804 } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
805 s1 = c - 0xe000;
806 c1 = s1/94 + 0x7f;
807 c2 = s1%94 + 0x21;
808 s1 = (c1 << 8) | c2;
809 s2 = 1;
810 }
811 if (s1 <= 0) {
812 c1 = c & ~MBFL_WCSPLANE_MASK;
813 if (c1 == MBFL_WCSPLANE_WINCP932) {
814 s1 = c & MBFL_WCSPLANE_MASK;
815 s2 = 1;
816 } else if (c1 == MBFL_WCSPLANE_JIS0208) {
817 s1 = c & MBFL_WCSPLANE_MASK;
818 } else if (c1 == MBFL_WCSPLANE_JIS0212) {
819 s1 = c & MBFL_WCSPLANE_MASK;
820 s1 |= 0x8080;
821 } else if (c == 0xa5) { /* YEN SIGN */
822 s1 = 0x216f; /* FULLWIDTH YEN SIGN */
823 } else if (c == 0x203e) { /* OVER LINE */
824 s1 = 0x2131; /* FULLWIDTH MACRON */
825 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
826 s1 = 0x2140;
827 } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
828 s1 = 0x2141;
829 } else if (c == 0x2225) { /* PARALLEL TO */
830 s1 = 0x2142;
831 } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
832 s1 = 0x215d;
833 } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
834 s1 = 0x2171;
835 } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
836 s1 = 0x2172;
837 } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
838 s1 = 0x224c;
839 }
840 }
841
842 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
843 s1 = -1;
844 c1 = 0;
845 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
846 while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
847 if (c == cp932ext1_ucs_table[c1]) {
848 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
849 break;
850 }
851 c1++;
852 }
853 if (s1 <= 0) {
854 c1 = 0;
855 c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
856 while (c1 < c2) { /* CP932 vendor ext2 (115ku - 119ku) */
857 if (c == cp932ext2_ucs_table[c1]) {
858 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
859 break;
860 }
861 c1++;
862 }
863 }
864
865 if (s1 <= 0) {
866 c1 = 0;
867 c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
868 while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
869 if (c == cp932ext3_ucs_table[c1]) {
870 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
871 break;
872 }
873 c1++;
874 }
875 }
876 if (c == 0) {
877 s1 = 0;
878 } else if (s1 <= 0) {
879 s1 = -1;
880 }
881 }
882
883 if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
884 mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
885 (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
886 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
887 (filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
888 mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 )) {
889 CODE2JIS(c1,c2,s1,s2);
890 }
891
892 if (filter->status == 1 && filter->cache > 0) {
893 return c;
894 }
895
896 if (s1 >= 0) {
897 if (s1 < 0x100) { /* latin or kana */
898 CK((*filter->output_function)(s1, filter->data));
899 } else { /* kanji */
900 c1 = (s1 >> 8) & 0xff;
901 c2 = s1 & 0xff;
902 SJIS_ENCODE(c1, c2, s1, s2);
903 CK((*filter->output_function)(s1, filter->data));
904 CK((*filter->output_function)(s2, filter->data));
905 }
906 } else {
907 CK(mbfl_filt_conv_illegal_output(c, filter));
908 }
909
910 return c;
911 }
912
913 int
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)914 mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
915 {
916 int c1 = filter->cache;
917 if (filter->status == 1 && (c1 == 0x0023 || (c1 >= 0x0030 && c1<=0x0039))) {
918 CK((*filter->output_function)(c1, filter->data));
919 }
920 filter->status = 0;
921 filter->cache = 0;
922
923 if (filter->flush_function != NULL) {
924 return (*filter->flush_function)(filter->data);
925 }
926
927 return 0;
928 }
929