1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * the source code included in this files was separated from mbfilter_sjis_open.c
26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27 *
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include "mbfilter.h"
35 #include "mbfilter_sjis_mobile.h"
36
37 #include "unicode_table_cp932_ext.h"
38 #include "unicode_table_jis.h"
39
40 #include "emoji2uni.h"
41
42 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
43 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
44 extern const unsigned char mblen_table_sjis[];
45
46 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
47 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
48 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
49
50 const mbfl_encoding mbfl_encoding_sjis_docomo = {
51 mbfl_no_encoding_sjis_docomo,
52 "SJIS-Mobile#DOCOMO",
53 "Shift_JIS",
54 (const char *(*)[])&mbfl_encoding_sjis_docomo_aliases,
55 mblen_table_sjis,
56 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
57 &vtbl_sjis_docomo_wchar,
58 &vtbl_wchar_sjis_docomo
59 };
60
61 const mbfl_encoding mbfl_encoding_sjis_kddi = {
62 mbfl_no_encoding_sjis_kddi,
63 "SJIS-Mobile#KDDI",
64 "Shift_JIS",
65 (const char *(*)[])&mbfl_encoding_sjis_kddi_aliases,
66 mblen_table_sjis,
67 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
68 &vtbl_sjis_kddi_wchar,
69 &vtbl_wchar_sjis_kddi
70 };
71
72 const mbfl_encoding mbfl_encoding_sjis_sb = {
73 mbfl_no_encoding_sjis_sb,
74 "SJIS-Mobile#SOFTBANK",
75 "Shift_JIS",
76 (const char *(*)[])&mbfl_encoding_sjis_sb_aliases,
77 mblen_table_sjis,
78 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
79 &vtbl_sjis_sb_wchar,
80 &vtbl_wchar_sjis_sb
81 };
82
83 const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
84 mbfl_no_encoding_sjis_docomo,
85 mbfl_filt_ident_common_ctor,
86 mbfl_filt_ident_common_dtor,
87 mbfl_filt_ident_sjis
88 };
89
90 const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
91 mbfl_no_encoding_sjis_kddi,
92 mbfl_filt_ident_common_ctor,
93 mbfl_filt_ident_common_dtor,
94 mbfl_filt_ident_sjis
95 };
96
97 const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
98 mbfl_no_encoding_sjis_sb,
99 mbfl_filt_ident_common_ctor,
100 mbfl_filt_ident_common_dtor,
101 mbfl_filt_ident_sjis
102 };
103
104 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
105 mbfl_no_encoding_sjis_docomo,
106 mbfl_no_encoding_wchar,
107 mbfl_filt_conv_common_ctor,
108 mbfl_filt_conv_common_dtor,
109 mbfl_filt_conv_sjis_mobile_wchar,
110 mbfl_filt_conv_common_flush
111 };
112
113 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
114 mbfl_no_encoding_wchar,
115 mbfl_no_encoding_sjis_docomo,
116 mbfl_filt_conv_common_ctor,
117 mbfl_filt_conv_common_dtor,
118 mbfl_filt_conv_wchar_sjis_mobile,
119 mbfl_filt_conv_sjis_mobile_flush
120 };
121
122 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
123 mbfl_no_encoding_sjis_kddi,
124 mbfl_no_encoding_wchar,
125 mbfl_filt_conv_common_ctor,
126 mbfl_filt_conv_common_dtor,
127 mbfl_filt_conv_sjis_mobile_wchar,
128 mbfl_filt_conv_common_flush
129 };
130
131 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
132 mbfl_no_encoding_wchar,
133 mbfl_no_encoding_sjis_kddi,
134 mbfl_filt_conv_common_ctor,
135 mbfl_filt_conv_common_dtor,
136 mbfl_filt_conv_wchar_sjis_mobile,
137 mbfl_filt_conv_sjis_mobile_flush
138 };
139
140 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
141 mbfl_no_encoding_sjis_sb,
142 mbfl_no_encoding_wchar,
143 mbfl_filt_conv_common_ctor,
144 mbfl_filt_conv_common_dtor,
145 mbfl_filt_conv_sjis_mobile_wchar,
146 mbfl_filt_conv_common_flush
147 };
148
149 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
150 mbfl_no_encoding_wchar,
151 mbfl_no_encoding_sjis_sb,
152 mbfl_filt_conv_common_ctor,
153 mbfl_filt_conv_common_dtor,
154 mbfl_filt_conv_wchar_sjis_mobile,
155 mbfl_filt_conv_sjis_mobile_flush
156 };
157
158 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
159 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
160 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
161
162 const unsigned short mbfl_docomo2uni_pua[4][3] = {
163 {0x28c2, 0x292f, 0xe63e},
164 {0x2930, 0x2934, 0xe6ac},
165 {0x2935, 0x2951, 0xe6b1},
166 {0x2952, 0x29db, 0xe6ce},
167 };
168
169 const unsigned short mbfl_kddi2uni_pua[7][3] = {
170 {0x26ec, 0x2838, 0xe468},
171 {0x284c, 0x2863, 0xe5b5},
172 {0x24b8, 0x24ca, 0xe5cd},
173 {0x24cb, 0x2545, 0xea80},
174 {0x2839, 0x284b, 0xeafb},
175 {0x2546, 0x25c0, 0xeb0e},
176 {0x25c1, 0x25c6, 0xeb89},
177 };
178
179 const unsigned short mbfl_sb2uni_pua[6][3] = {
180 {0x27a9, 0x2802, 0xe101},
181 {0x2808, 0x2861, 0xe201},
182 {0x2921, 0x297a, 0xe001},
183 {0x2980, 0x29cc, 0xe301},
184 {0x2a99, 0x2ae4, 0xe401},
185 {0x2af8, 0x2b35, 0xe501},
186 };
187
188 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
189 {0x24b8, 0x24f6, 0xec40},
190 {0x24f7, 0x2573, 0xec80},
191 {0x2574, 0x25b2, 0xed40},
192 {0x25b3, 0x25c6, 0xed80},
193 {0x26ec, 0x272a, 0xef40},
194 {0x272b, 0x27a7, 0xef80},
195 {0x27a8, 0x27e6, 0xf040},
196 {0x27e7, 0x2863, 0xf080},
197 };
198
199 #define NFLAGS(c) (0x1F1A5+(int)(c))
200
201 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
202
203 #define SJIS_ENCODE(c1,c2,s1,s2) \
204 do { \
205 s1 = c1; \
206 s1--; \
207 s1 >>= 1; \
208 if ((c1) < 0x5f) { \
209 s1 += 0x71; \
210 } else { \
211 s1 += 0xb1; \
212 } \
213 s2 = c2; \
214 if ((c1) & 1) { \
215 if ((c2) < 0x60) { \
216 s2--; \
217 } \
218 s2 += 0x20; \
219 } else { \
220 s2 += 0x7e; \
221 } \
222 } while (0)
223
224 #define SJIS_DECODE(c1,c2,s1,s2) \
225 do { \
226 s1 = c1; \
227 if (s1 < 0xa0) { \
228 s1 -= 0x81; \
229 } else { \
230 s1 -= 0xc1; \
231 } \
232 s1 <<= 1; \
233 s1 += 0x21; \
234 s2 = c2; \
235 if (s2 < 0x9f) { \
236 if (s2 < 0x7f) { \
237 s2++; \
238 } \
239 s2 -= 0x20; \
240 } else { \
241 s1++; \
242 s2 -= 0x7e; \
243 } \
244 } while (0)
245
246 #define CODE2JIS(c1,c2,s1,s2) \
247 c1 = (s1)/94+0x21; \
248 c2 = (s1)-94*((c1)-0x21)+0x21; \
249 s1 = ((c1) << 8) | (c2); \
250 s2 = 1
251
252 int
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)253 mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
254 {
255 int i, match = 0;
256
257 for (i = 0; i < n; i++) {
258 if (map[i][0] <= c && c <= map[i][1]) {
259 *w = c - map[i][0] + map[i][2];
260 match = 1;
261 break;
262 }
263 }
264 return match;
265 }
266
267 int
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)268 mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
269 {
270 int i, match = 0;
271
272 for (i = 0; i < n; i++) {
273 if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
274 *w = c + map[i][0] - map[i][2];
275 match = 1;
276 break;
277 }
278 }
279 return match;
280 }
281
282 int
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)283 mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
284 {
285 int w = s;
286 if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
287 if (s >= mb_tbl_code2uni_docomo1_min + 0x00a2 &&
288 s <= mb_tbl_code2uni_docomo1_min + 0x00ad &&
289 s != mb_tbl_code2uni_docomo1_min + 0x00a3) {
290 w = 0x20E3;
291 *snd = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
292 if (*snd > 0xf000) {
293 *snd += 0x10000;
294 }
295 } else {
296 w = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
297 if (w > 0xf000) {
298 w += 0x10000;
299 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
300 w += 0xf0000;
301 }
302 *snd = 0;
303 if (!w) {
304 w = s;
305 }
306 }
307 }
308
309 return w;
310 }
311
312 int
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)313 mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
314 {
315 int w = s, si, c;
316 const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
317
318 *snd = 0;
319 if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
320 si = s - mb_tbl_code2uni_kddi1_min;
321 if (si == 0x0008) { /* ES */
322 *snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
323 } else if (si == 0x0009) { /* RU */
324 *snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
325 } else if (si >= 0x008d && si <= 0x0092) {
326 c = nflags_order_kddi[si-0x008d];
327 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
328 } else if (si == 0x0104) {
329 *snd = 0x0023; w = 0x20E3;
330 } else {
331 w = mb_tbl_code2uni_kddi1[si];
332 if (w > 0xf000) {
333 w += 0x10000;
334 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
335 w += 0xf0000;
336 }
337 }
338 } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
339 si = s - mb_tbl_code2uni_kddi2_min;
340 if (si == 100) { /* JP */
341 *snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
342 } else if (si >= 0x00ba && si <= 0x00c2) {
343 *snd = si-0x00ba+0x0031; w = 0x20E3;
344 } else if (si == 0x010b) { /* US */
345 *snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
346 } else if (si == 0x0144) {
347 *snd = 0x0030; w = 0x20E3;
348 } else {
349 w = mb_tbl_code2uni_kddi2[si];
350 if (w > 0xf000) {
351 w += 0x10000;
352 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
353 w += 0xf0000;
354 }
355 }
356 }
357 return w;
358 }
359
360 int
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)361 mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
362 {
363 int w = s, si, c;
364 const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
365
366 *snd = 0;
367 if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
368 si = s - mb_tbl_code2uni_sb1_min;
369 if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
370 *snd = mb_tbl_code2uni_sb1[si];
371 if (*snd > 0xf000) {
372 *snd += 0x10000;
373 }
374 w = 0x20E3;
375 } else {
376 w = mb_tbl_code2uni_sb1[si];
377 if (w > 0xf000) {
378 w += 0x10000;
379 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
380 w += 0xf0000;
381 }
382 }
383 } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
384 si = s - mb_tbl_code2uni_sb2_min;
385 w = mb_tbl_code2uni_sb2[si];
386 if (w > 0xf000) {
387 w += 0x10000;
388 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
389 w += 0xf0000;
390 }
391 } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
392 si = s - mb_tbl_code2uni_sb3_min;
393 if (si >= 0x0069 && si <= 0x0072) {
394 c = nflags_order_sb[si-0x0069];
395 *snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
396 } else {
397 w = mb_tbl_code2uni_sb3[si];
398 if (w > 0xf000) {
399 w += 0x10000;
400 } else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
401 w += 0xf0000;
402 }
403 }
404 }
405 return w;
406 }
407
408 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)409 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
410 {
411 int i, match = 0, c1s;
412
413 if (filter->status == 1) {
414 c1s = filter->cache;
415 filter->cache = 0;
416 filter->status = 0;
417 if (c == 0x20E3) {
418 if (c1s == 0x0023) {
419 *s1 = 0x2964;
420 match = 1;
421 } else if (c1s == 0x0030) {
422 *s1 = 0x296f;
423 match = 1;
424 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
425 *s1 = 0x2966 + (c1s - 0x0031);
426 match = 1;
427 }
428 } else {
429 CK((*filter->output_function)(c1s, filter->data));
430 }
431 } else {
432 if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
433 filter->status = 1;
434 filter->cache = c;
435 *s1 = -1;
436 return match;
437 }
438
439 if (c == 0x00A9) {
440 *s1 = 0x29b5; match = 1;
441 } else if (c == 0x00AE) {
442 *s1 = 0x29ba; match = 1;
443 } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
444 i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
445 if (i >= 0) {
446 *s1 = mb_tbl_uni_docomo2code2_value[i];
447 match = 1;
448 }
449 } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
450 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
451 if (i >= 0) {
452 *s1 = mb_tbl_uni_docomo2code3_value[i];
453 match = 1;
454 }
455 } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
456 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
457 if (i >= 0) {
458 *s1 = mb_tbl_uni_docomo2code5_val[i];
459 match = 1;
460 }
461 }
462 }
463
464 return match;
465 }
466
467 int
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)468 mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
469 {
470 int i, match = 0, c1s;
471
472 if (filter->status == 1) {
473 c1s = filter->cache;
474 filter->cache = 0;
475 filter->status = 0;
476 if (c == 0x20E3) {
477 if (c1s == 0x0023) {
478 *s1 = 0x25bc;
479 match = 1;
480 } else if (c1s == 0x0030) {
481 *s1 = 0x2830;
482 match = 1;
483 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
484 *s1 = 0x27a6 + (c1s - 0x0031);
485 match = 1;
486 }
487 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
488 for (i=0; i<10; i++) {
489 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
490 *s1 = nflags_code_kddi[i];
491 match = 1;
492 break;
493 }
494 }
495 } else {
496 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
497 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
498 CK((*filter->output_function)(c1s, filter->data));
499 }
500 }
501 } else {
502 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
503 (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
504 filter->status = 1;
505 filter->cache = c;
506 *s1 = -1;
507 return match;
508 }
509
510 if (c == 0x00A9) {
511 *s1 = 0x27dc; match = 1;
512 } else if (c == 0x00AE) {
513 *s1 = 0x27dd; match = 1;
514 } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
515 i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
516 if (i >= 0) {
517 *s1 = mb_tbl_uni_kddi2code2_value[i];
518 match = 1;
519 }
520 } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
521 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
522 if (i >= 0) {
523 *s1 = mb_tbl_uni_kddi2code3_value[i];
524 match = 1;
525 }
526 } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
527 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
528 if (i >= 0) {
529 *s1 = mb_tbl_uni_kddi2code5_val[i];
530 match = 1;
531 }
532 }
533 }
534
535 return match;
536 }
537
538 int
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)539 mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
540 {
541 int i, match = 0, c1s;
542
543 if (filter->status == 1) {
544 filter->status = 0;
545 c1s = filter->cache;
546 filter->cache = 0;
547 if (c == 0x20E3) {
548 if (c1s == 0x0023) {
549 *s1 = 0x2817;
550 match = 1;
551 } else if (c1s == 0x0030) {
552 *s1 = 0x282c;
553 match = 1;
554 } else if (c1s >= 0x0031 && c1s <= 0x0039) {
555 *s1 = 0x2823 + (c1s - 0x0031);
556 match = 1;
557 }
558 } else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
559 for (i=0; i<10; i++) {
560 if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
561 *s1 = nflags_code_sb[i];
562 match = 1;
563 break;
564 }
565 }
566 } else {
567 if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
568 c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
569 CK((*filter->output_function)(c1s, filter->data));
570 }
571 }
572 } else {
573 if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
574 filter->status = 1;
575 filter->cache = c;
576 *s1 = -1;
577 return match;
578 }
579
580 if (c == 0x00A9) {
581 *s1 = 0x2855; match = 1;
582 } else if (c == 0x00AE) {
583 *s1 = 0x2856; match = 1;
584 } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
585 i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
586 if (i >= 0) {
587 *s1 = mb_tbl_uni_sb2code2_value[i];
588 match = 1;
589 }
590 } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
591 i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
592 if (i >= 0) {
593 *s1 = mb_tbl_uni_sb2code3_value[i];
594 match = 1;
595 }
596 } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
597 i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
598 if (i >= 0) {
599 *s1 = mb_tbl_uni_sb2code5_val[i];
600 match = 1;
601 }
602 }
603 }
604 return match;
605 }
606
607 /*
608 * SJIS-win => wchar
609 */
610 int
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)611 mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
612 {
613 int c1, s, s1 = 0, s2 = 0, w;
614 int snd = 0;
615
616 retry:
617 switch (filter->status) {
618 case 0:
619 if (c >= 0 && c < 0x80) { /* latin */
620 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb && c == 0x1b) {
621 filter->cache = c;
622 filter->status = 2;
623 } else {
624 CK((*filter->output_function)(c, filter->data));
625 }
626 } else if (c > 0xa0 && c < 0xe0) { /* kana */
627 CK((*filter->output_function)(0xfec0 + c, filter->data));
628 } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
629 filter->status = 1;
630 filter->cache = c;
631 } else {
632 w = c & MBFL_WCSGROUP_MASK;
633 w |= MBFL_WCSGROUP_THROUGH;
634 CK((*filter->output_function)(w, filter->data));
635 }
636 break;
637
638 case 1: /* kanji second char */
639 filter->status = 0;
640 c1 = filter->cache;
641 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
642 w = 0;
643 SJIS_DECODE(c1, c, s1, s2);
644 s = (s1 - 0x21)*94 + s2 - 0x21;
645 if (s <= 137) {
646 if (s == 31) {
647 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
648 } else if (s == 32) {
649 w = 0xff5e; /* FULLWIDTH TILDE */
650 } else if (s == 33) {
651 w = 0x2225; /* PARALLEL TO */
652 } else if (s == 60) {
653 w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
654 } else if (s == 80) {
655 w = 0xffe0; /* FULLWIDTH CENT SIGN */
656 } else if (s == 81) {
657 w = 0xffe1; /* FULLWIDTH POUND SIGN */
658 } else if (s == 137) {
659 w = 0xffe2; /* FULLWIDTH NOT SIGN */
660 }
661 }
662 if (w == 0) {
663 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
664 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
665 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
666 w = jisx0208_ucs_table[s];
667 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
668 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
669 } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
670 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
671 } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
672 w = s - (94*94) + 0xe000;
673 }
674
675 if (s >= (94*94) && s < 119*94) {
676 if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
677 w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
678 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
679 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
680 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
681 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
682 }
683
684 if (w > 0 && snd > 0) {
685 CK((*filter->output_function)(snd, filter->data));
686 }
687 }
688 }
689 if (w <= 0) {
690 w = (s1 << 8) | s2;
691 w &= MBFL_WCSPLANE_MASK;
692 w |= MBFL_WCSPLANE_WINCP932;
693 }
694 CK((*filter->output_function)(w, filter->data));
695 } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
696 CK((*filter->output_function)(c, filter->data));
697 } else {
698 w = (c1 << 8) | c;
699 w &= MBFL_WCSGROUP_MASK;
700 w |= MBFL_WCSGROUP_THROUGH;
701 CK((*filter->output_function)(w, filter->data));
702 }
703 break;
704 /* ESC : Softbank Emoji */
705 case 2:
706 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
707 c == 0x24) {
708 filter->cache = c;
709 filter->status++;
710 } else {
711 filter->cache = 0;
712 filter->status = 0;
713 CK((*filter->output_function)(0x1b, filter->data));
714 goto retry;
715 }
716 break;
717
718 /* ESC $ : Softbank Emoji */
719 case 3:
720 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
721 ((c >= 0x45 && c <= 0x47) || (c >= 0x4f && c <= 0x51))) {
722 filter->cache = c;
723 filter->status++;
724 } else {
725 filter->cache = 0;
726 filter->status = 0;
727 CK((*filter->output_function)(0x1b, filter->data));
728 CK((*filter->output_function)(0x24, filter->data));
729 goto retry;
730 }
731 break;
732
733 /* ESC [GEFOPQ] : Softbank Emoji */
734 case 4:
735 w = 0;
736 if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
737 c1 = filter->cache;
738
739 if (c == 0x0f) {
740 w = c;
741 filter->cache = 0;
742 filter->status = 0;
743 } else {
744 if (c1 == 0x47 && c >= 0x21 && c <= 0x7a) {
745 s1 = 0x91; s2 = c;
746 } else if (c1 == 0x45 && c >= 0x21 && c <= 0x7a) {
747 s1 = 0x8d; s2 = c;
748 } else if (c1 == 0x46 && c >= 0x21 && c <= 0x7a) {
749 s1 = 0x8e; s2 = c;
750 } else if (c1 == 0x4f && c >= 0x21 && c <= 0x6d) {
751 s1 = 0x92; s2 = c;
752 } else if (c1 == 0x50 && c >= 0x21 && c <= 0x6c) {
753 s1 = 0x95; s2 = c;
754 } else if (c1 == 0x51 && c >= 0x21 && c <= 0x5e) {
755 s1 = 0x96; s2 = c;
756 }
757 s = (s1 - 0x21)*94 + s2 - 0x21;
758 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
759 if (w > 0) {
760 if (snd > 0) {
761 CK((*filter->output_function)(snd, filter->data));
762 }
763 CK((*filter->output_function)(w, filter->data));
764 }
765 }
766 }
767
768 if (w <= 0) {
769 c1 = filter->cache;
770 filter->cache = 0;
771 filter->status = 0;
772 CK((*filter->output_function)(0x1b, filter->data));
773 CK((*filter->output_function)(0x24, filter->data));
774 CK((*filter->output_function)(c1 & 0xff, filter->data));
775 goto retry;
776 }
777 break;
778
779 default:
780 filter->status = 0;
781 break;
782 }
783
784 return c;
785 }
786
787 /*
788 * wchar => SJIS-win
789 */
790 int
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)791 mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
792 {
793 int c1, c2, s1, s2;
794
795 s1 = 0;
796 s2 = 0;
797 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
798 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
799 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
800 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
801 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
802 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
803 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
804 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
805 } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
806 s1 = c - 0xe000;
807 c1 = s1/94 + 0x7f;
808 c2 = s1%94 + 0x21;
809 s1 = (c1 << 8) | c2;
810 s2 = 1;
811 }
812 if (s1 <= 0) {
813 c1 = c & ~MBFL_WCSPLANE_MASK;
814 if (c1 == MBFL_WCSPLANE_WINCP932) {
815 s1 = c & MBFL_WCSPLANE_MASK;
816 s2 = 1;
817 } else if (c1 == MBFL_WCSPLANE_JIS0208) {
818 s1 = c & MBFL_WCSPLANE_MASK;
819 } else if (c1 == MBFL_WCSPLANE_JIS0212) {
820 s1 = c & MBFL_WCSPLANE_MASK;
821 s1 |= 0x8080;
822 } else if (c == 0xa5) { /* YEN SIGN */
823 s1 = 0x216f; /* FULLWIDTH YEN SIGN */
824 } else if (c == 0x203e) { /* OVER LINE */
825 s1 = 0x2131; /* FULLWIDTH MACRON */
826 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
827 s1 = 0x2140;
828 } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
829 s1 = 0x2141;
830 } else if (c == 0x2225) { /* PARALLEL TO */
831 s1 = 0x2142;
832 } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
833 s1 = 0x215d;
834 } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
835 s1 = 0x2171;
836 } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
837 s1 = 0x2172;
838 } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
839 s1 = 0x224c;
840 }
841 }
842
843 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
844 s1 = -1;
845 c1 = 0;
846 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
847 while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
848 if (c == cp932ext1_ucs_table[c1]) {
849 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
850 break;
851 }
852 c1++;
853 }
854 if (s1 <= 0) {
855 c1 = 0;
856 c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
857 while (c1 < c2) { /* CP932 vendor ext2 (115ku - 119ku) */
858 if (c == cp932ext2_ucs_table[c1]) {
859 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
860 break;
861 }
862 c1++;
863 }
864 }
865
866 if (s1 <= 0) {
867 c1 = 0;
868 c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
869 while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
870 if (c == cp932ext3_ucs_table[c1]) {
871 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
872 break;
873 }
874 c1++;
875 }
876 }
877 if (c == 0) {
878 s1 = 0;
879 } else if (s1 <= 0) {
880 s1 = -1;
881 }
882 }
883
884 if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
885 mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
886 (filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
887 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
888 (filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
889 mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 )) {
890 CODE2JIS(c1,c2,s1,s2);
891 }
892
893 if (filter->status == 1 && filter->cache > 0) {
894 return c;
895 }
896
897 if (s1 >= 0) {
898 if (s1 < 0x100) { /* latin or kana */
899 CK((*filter->output_function)(s1, filter->data));
900 } else { /* kanji */
901 c1 = (s1 >> 8) & 0xff;
902 c2 = s1 & 0xff;
903 SJIS_ENCODE(c1, c2, s1, s2);
904 CK((*filter->output_function)(s1, filter->data));
905 CK((*filter->output_function)(s2, filter->data));
906 }
907 } else {
908 CK(mbfl_filt_conv_illegal_output(c, filter));
909 }
910
911 return c;
912 }
913
914 int
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)915 mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
916 {
917 int c1 = filter->cache;
918 if (filter->status == 1 && (c1 == 0x0023 || (c1 >= 0x0030 && c1<=0x0039))) {
919 CK((*filter->output_function)(c1, filter->data));
920 }
921 filter->status = 0;
922 filter->cache = 0;
923
924 if (filter->flush_function != NULL) {
925 return (*filter->flush_function)(filter->data);
926 }
927
928 return 0;
929 }
930