1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * the source code included in this files was separated from mbfilter_sjis_open.c
26  * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_sjis_mobile.h"
32 
33 #include "unicode_table_cp932_ext.h"
34 #include "unicode_table_jis.h"
35 
36 #include "emoji2uni.h"
37 
38 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
39 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
40 extern const unsigned char mblen_table_sjis[];
41 
42 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
43 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
44 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
45 
46 const mbfl_encoding mbfl_encoding_sjis_docomo = {
47  	mbfl_no_encoding_sjis_docomo,
48  	"SJIS-Mobile#DOCOMO",
49  	"Shift_JIS",
50  	(const char *(*)[])&mbfl_encoding_sjis_docomo_aliases,
51  	mblen_table_sjis,
52  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
53 	&vtbl_sjis_docomo_wchar,
54 	&vtbl_wchar_sjis_docomo
55 };
56 
57 const mbfl_encoding mbfl_encoding_sjis_kddi = {
58  	mbfl_no_encoding_sjis_kddi,
59  	"SJIS-Mobile#KDDI",
60  	"Shift_JIS",
61  	(const char *(*)[])&mbfl_encoding_sjis_kddi_aliases,
62  	mblen_table_sjis,
63  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
64 	&vtbl_sjis_kddi_wchar,
65 	&vtbl_wchar_sjis_kddi
66 };
67 
68 const mbfl_encoding mbfl_encoding_sjis_sb = {
69  	mbfl_no_encoding_sjis_sb,
70  	"SJIS-Mobile#SOFTBANK",
71  	"Shift_JIS",
72  	(const char *(*)[])&mbfl_encoding_sjis_sb_aliases,
73  	mblen_table_sjis,
74  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
75 	&vtbl_sjis_sb_wchar,
76 	&vtbl_wchar_sjis_sb
77 };
78 
79 const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
80 	mbfl_no_encoding_sjis_docomo,
81 	mbfl_filt_ident_common_ctor,
82 	mbfl_filt_ident_sjis
83 };
84 
85 const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
86 	mbfl_no_encoding_sjis_kddi,
87 	mbfl_filt_ident_common_ctor,
88 	mbfl_filt_ident_sjis
89 };
90 
91 const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
92 	mbfl_no_encoding_sjis_sb,
93 	mbfl_filt_ident_common_ctor,
94 	mbfl_filt_ident_sjis
95 };
96 
97 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
98  	mbfl_no_encoding_sjis_docomo,
99  	mbfl_no_encoding_wchar,
100  	mbfl_filt_conv_common_ctor,
101 	NULL,
102  	mbfl_filt_conv_sjis_mobile_wchar,
103  	mbfl_filt_conv_common_flush,
104  	NULL,
105 };
106 
107 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
108  	mbfl_no_encoding_wchar,
109  	mbfl_no_encoding_sjis_docomo,
110  	mbfl_filt_conv_common_ctor,
111 	NULL,
112  	mbfl_filt_conv_wchar_sjis_mobile,
113  	mbfl_filt_conv_sjis_mobile_flush,
114  	NULL,
115 };
116 
117 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
118  	mbfl_no_encoding_sjis_kddi,
119  	mbfl_no_encoding_wchar,
120  	mbfl_filt_conv_common_ctor,
121 	NULL,
122  	mbfl_filt_conv_sjis_mobile_wchar,
123  	mbfl_filt_conv_common_flush,
124  	NULL,
125 };
126 
127 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
128  	mbfl_no_encoding_wchar,
129  	mbfl_no_encoding_sjis_kddi,
130  	mbfl_filt_conv_common_ctor,
131 	NULL,
132  	mbfl_filt_conv_wchar_sjis_mobile,
133 	mbfl_filt_conv_sjis_mobile_flush,
134 	NULL,
135 };
136 
137 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
138  	mbfl_no_encoding_sjis_sb,
139  	mbfl_no_encoding_wchar,
140  	mbfl_filt_conv_common_ctor,
141 	NULL,
142  	mbfl_filt_conv_sjis_mobile_wchar,
143  	mbfl_filt_conv_common_flush,
144  	NULL,
145 };
146 
147 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
148  	mbfl_no_encoding_wchar,
149  	mbfl_no_encoding_sjis_sb,
150  	mbfl_filt_conv_common_ctor,
151 	NULL,
152  	mbfl_filt_conv_wchar_sjis_mobile,
153 	mbfl_filt_conv_sjis_mobile_flush,
154 	NULL,
155 };
156 
157 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
158 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
159 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
160 
161 const unsigned short mbfl_docomo2uni_pua[4][3] = {
162 	{0x28c2, 0x292f, 0xe63e},
163 	{0x2930, 0x2934, 0xe6ac},
164 	{0x2935, 0x2951, 0xe6b1},
165 	{0x2952, 0x29db, 0xe6ce},
166 };
167 
168 const unsigned short mbfl_kddi2uni_pua[7][3] = {
169 	{0x26ec, 0x2838, 0xe468},
170 	{0x284c, 0x2863, 0xe5b5},
171 	{0x24b8, 0x24ca, 0xe5cd},
172 	{0x24cb, 0x2545, 0xea80},
173 	{0x2839, 0x284b, 0xeafb},
174 	{0x2546, 0x25c0, 0xeb0e},
175 	{0x25c1, 0x25c6, 0xeb89},
176 };
177 
178 const unsigned short mbfl_sb2uni_pua[6][3] = {
179 	{0x27a9, 0x2802, 0xe101},
180 	{0x2808, 0x2861, 0xe201},
181 	{0x2921, 0x297a, 0xe001},
182 	{0x2980, 0x29cc, 0xe301},
183 	{0x2a99, 0x2ae4, 0xe401},
184 	{0x2af8, 0x2b35, 0xe501},
185 };
186 
187 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
188 	{0x24b8, 0x24f6, 0xec40},
189 	{0x24f7, 0x2573, 0xec80},
190 	{0x2574, 0x25b2, 0xed40},
191 	{0x25b3, 0x25c6, 0xed80},
192 	{0x26ec, 0x272a, 0xef40},
193 	{0x272b, 0x27a7, 0xef80},
194 	{0x27a8, 0x27e6, 0xf040},
195 	{0x27e7, 0x2863, 0xf080},
196 };
197 
198 #define NFLAGS(c) (0x1F1A5+(int)(c))
199 
200 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
201 
202 #define SJIS_ENCODE(c1,c2,s1,s2)	\
203 		do {						\
204 			s1 = c1;				\
205 			s1--;					\
206 			s1 >>= 1;				\
207 			if ((c1) < 0x5f) {		\
208 				s1 += 0x71;			\
209 			} else {				\
210 				s1 += 0xb1;			\
211 			}						\
212 			s2 = c2;				\
213 			if ((c1) & 1) {			\
214 				if ((c2) < 0x60) {	\
215 					s2--;			\
216 				}					\
217 				s2 += 0x20;			\
218 			} else {				\
219 				s2 += 0x7e;			\
220 			}						\
221 		} while (0)
222 
223 #define SJIS_DECODE(c1,c2,s1,s2)	\
224 		do {						\
225 			s1 = c1;				\
226 			if (s1 < 0xa0) {		\
227 				s1 -= 0x81;			\
228 			} else {				\
229 				s1 -= 0xc1;			\
230 			}						\
231 			s1 <<= 1;				\
232 			s1 += 0x21;				\
233 			s2 = c2;				\
234 			if (s2 < 0x9f) {		\
235 				if (s2 < 0x7f) {	\
236 					s2++;			\
237 				}					\
238 				s2 -= 0x20;			\
239 			} else {				\
240 				s1++;				\
241 				s2 -= 0x7e;			\
242 			}						\
243 		} while (0)
244 
245 #define CODE2JIS(c1,c2,s1,s2)       \
246 	c1 = (s1)/94+0x21;				\
247 	c2 = (s1)-94*((c1)-0x21)+0x21;	\
248 	s1 = ((c1) << 8) | (c2);		\
249 	s2 = 1
250 
251 int
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)252 mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
253 {
254 	int i, match = 0;
255 
256 	for (i = 0; i < n; i++) {
257 		if (map[i][0] <= c && c <= map[i][1]) {
258 			*w = c - map[i][0] + map[i][2];
259 			match = 1;
260 			break;
261 		}
262 	}
263 	return match;
264 }
265 
266 int
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)267 mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
268 {
269 	int i, match = 0;
270 
271 	for (i = 0; i < n; i++) {
272 		if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
273 			*w = c + map[i][0] - map[i][2];
274 			match = 1;
275 			break;
276 		}
277 	}
278 	return match;
279 }
280 
281 int
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)282 mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
283 {
284 	int w = s;
285 	if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
286 		if (s >= mb_tbl_code2uni_docomo1_min + 0x00a2 &&
287 			s <= mb_tbl_code2uni_docomo1_min + 0x00ad &&
288 			s != mb_tbl_code2uni_docomo1_min + 0x00a3) {
289 			w =  0x20E3;
290 			*snd = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
291 			if (*snd > 0xf000) {
292 				*snd += 0x10000;
293 			}
294 		} else {
295 			w = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
296 			if (w > 0xf000) {
297 				w += 0x10000;
298 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
299 				w += 0xf0000;
300 			}
301 			*snd = 0;
302 			if (!w) {
303 				w = s;
304 			}
305 		}
306 	}
307 
308 	return w;
309 }
310 
311 int
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)312 mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
313 {
314 	int w = s, si, c;
315 	const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
316 
317 	*snd = 0;
318 	if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
319 		si = s - mb_tbl_code2uni_kddi1_min;
320 		if (si == 0x0008) { /* ES */
321 			*snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
322 		} else if (si == 0x0009) { /* RU */
323 			*snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
324 		} else if (si >= 0x008d && si <= 0x0092) {
325 			c = nflags_order_kddi[si-0x008d];
326 			*snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
327 		} else if (si == 0x0104) {
328 			*snd = 0x0023; w = 0x20E3;
329 		} else {
330 			w = mb_tbl_code2uni_kddi1[si];
331 			if (w > 0xf000) {
332 				w += 0x10000;
333 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
334 				w += 0xf0000;
335 			}
336 		}
337 	} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
338 		si = s - mb_tbl_code2uni_kddi2_min;
339 		if (si == 100) { /* JP */
340 			*snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
341 		} else if (si >= 0x00ba && si <= 0x00c2) {
342 			*snd = si-0x00ba+0x0031; w = 0x20E3;
343 		} else if (si == 0x010b) { /* US */
344 			*snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
345 		} else if (si == 0x0144) {
346 			*snd = 0x0030; w = 0x20E3;
347 		} else {
348 			w = mb_tbl_code2uni_kddi2[si];
349 			if (w > 0xf000) {
350 				w += 0x10000;
351 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
352 				w += 0xf0000;
353 			}
354 		}
355 	}
356 	return w;
357 }
358 
359 int
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)360 mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
361 {
362 	int w = s, si, c;
363 	const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
364 
365 	*snd = 0;
366 	if (s >= mb_tbl_code2uni_sb1_min &&	s <= mb_tbl_code2uni_sb1_max) {
367 		si = s - mb_tbl_code2uni_sb1_min;
368 		if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
369 			*snd =  mb_tbl_code2uni_sb1[si];
370 			if (*snd > 0xf000) {
371 				*snd += 0x10000;
372 			}
373 			w = 0x20E3;
374 		} else {
375 			w = mb_tbl_code2uni_sb1[si];
376 			if (w > 0xf000) {
377 				w += 0x10000;
378 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
379 				w += 0xf0000;
380 			}
381 		}
382 	} else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
383 		si = s - mb_tbl_code2uni_sb2_min;
384 		w = mb_tbl_code2uni_sb2[si];
385 		if (w > 0xf000) {
386 			w += 0x10000;
387 		} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
388 			w += 0xf0000;
389 		}
390 	} else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
391 		si = s - mb_tbl_code2uni_sb3_min;
392 		if (si >= 0x0069 && si <= 0x0072) {
393 			c = nflags_order_sb[si-0x0069];
394 			*snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
395 		} else {
396 			w = mb_tbl_code2uni_sb3[si];
397 			if (w > 0xf000) {
398 				w += 0x10000;
399 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
400 				w += 0xf0000;
401 			}
402 		}
403 	}
404 	return w;
405 }
406 
407 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)408 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
409 {
410 	int i, match = 0, c1s;
411 
412 	if (filter->status == 1) {
413 		c1s = filter->cache;
414 		filter->cache = 0;
415 		filter->status = 0;
416 		if (c == 0x20E3) {
417 			if (c1s == 0x0023) {
418 				*s1 = 0x2964;
419 				match = 1;
420 			} else if (c1s == 0x0030) {
421 				*s1 = 0x296f;
422 				match = 1;
423 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
424 				*s1 = 0x2966 + (c1s - 0x0031);
425 				match = 1;
426 			}
427 		} else {
428 			CK((*filter->output_function)(c1s, filter->data));
429 		}
430 	} else {
431 		if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
432 			filter->status = 1;
433 			filter->cache = c;
434 			*s1 = -1;
435 			return match;
436 		}
437 
438 		if (c == 0x00A9) {
439 			*s1 = 0x29b5; match = 1;
440 		} else if (c == 0x00AE) {
441 			*s1 = 0x29ba; match = 1;
442 		} else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
443 			i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
444 			if (i >= 0) {
445 				*s1 = mb_tbl_uni_docomo2code2_value[i];
446 				match = 1;
447 			}
448 		} else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
449 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
450 			if (i >= 0) {
451 				*s1 = mb_tbl_uni_docomo2code3_value[i];
452 				match = 1;
453 			}
454 		} else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
455 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
456 			if (i >= 0) {
457 				*s1 = mb_tbl_uni_docomo2code5_val[i];
458 				match = 1;
459 			}
460 		}
461 	}
462 
463 	return match;
464 }
465 
466 int
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)467 mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
468 {
469 	int i, match = 0, c1s;
470 
471 	if (filter->status == 1) {
472 		c1s = filter->cache;
473 		filter->cache = 0;
474 		filter->status = 0;
475 		if (c == 0x20E3) {
476 			if (c1s == 0x0023) {
477 				*s1 = 0x25bc;
478 				match = 1;
479 			} else if (c1s == 0x0030) {
480 				*s1 = 0x2830;
481 				match = 1;
482 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
483 				*s1 = 0x27a6 + (c1s - 0x0031);
484 				match = 1;
485 			}
486 		} else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
487 			for (i=0; i<10; i++) {
488 				if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
489 					*s1 = nflags_code_kddi[i];
490 					match = 1;
491 					break;
492 				}
493 			}
494 		} else {
495 			if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
496 				c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
497 				CK((*filter->output_function)(c1s, filter->data));
498 			}
499 		}
500 	} else {
501 		if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
502 			(c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
503 			filter->status = 1;
504 			filter->cache = c;
505 			*s1 = -1;
506 			return match;
507 		}
508 
509 		if (c == 0x00A9) {
510 			*s1 = 0x27dc; match = 1;
511 		} else if (c == 0x00AE) {
512 			*s1 = 0x27dd; match = 1;
513 		} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
514 			i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
515 			if (i >= 0) {
516 				*s1 = mb_tbl_uni_kddi2code2_value[i];
517 				match = 1;
518 			}
519 		} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
520 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
521 			if (i >= 0) {
522 				*s1 = mb_tbl_uni_kddi2code3_value[i];
523 				match = 1;
524 			}
525 		} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
526 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
527 			if (i >= 0) {
528 				*s1 = mb_tbl_uni_kddi2code5_val[i];
529 				match = 1;
530 			}
531 		}
532 	}
533 
534 	return match;
535 }
536 
537 int
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)538 mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
539 {
540 	int i, match = 0, c1s;
541 
542 	if (filter->status == 1) {
543 		filter->status = 0;
544 		c1s = filter->cache;
545 		filter->cache = 0;
546 		if (c == 0x20E3) {
547 			if (c1s == 0x0023) {
548 				*s1 = 0x2817;
549 				match = 1;
550 			} else if (c1s == 0x0030) {
551 				*s1 = 0x282c;
552 				match = 1;
553 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
554 				*s1 = 0x2823 + (c1s - 0x0031);
555 				match = 1;
556 			}
557 		} else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
558 			for (i=0; i<10; i++) {
559 				if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
560 					*s1 = nflags_code_sb[i];
561 					match = 1;
562 					break;
563 				}
564 			}
565 		} else {
566 			if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
567 				c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
568 				CK((*filter->output_function)(c1s, filter->data));
569 			}
570 		}
571 	} else {
572 		if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
573 			filter->status = 1;
574 			filter->cache = c;
575 			*s1 = -1;
576 			return match;
577 		}
578 
579 		if (c == 0x00A9) {
580 			*s1 = 0x2855; match = 1;
581 		} else if (c == 0x00AE) {
582 			*s1 = 0x2856; match = 1;
583 		} else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
584 			i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
585 			if (i >= 0) {
586 				*s1 = mb_tbl_uni_sb2code2_value[i];
587 				match = 1;
588 			}
589 		} else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
590 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
591 			if (i >= 0) {
592 				*s1 = mb_tbl_uni_sb2code3_value[i];
593 				match = 1;
594 			}
595 		} else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
596 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
597 			if (i >= 0) {
598 				*s1 = mb_tbl_uni_sb2code5_val[i];
599 				match = 1;
600 			}
601 		}
602 	}
603 	return match;
604 }
605 
606 /*
607  * SJIS-win => wchar
608  */
609 int
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)610 mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
611 {
612 	int c1, s, s1 = 0, s2 = 0, w;
613 	int snd = 0;
614 
615 retry:
616 	switch (filter->status) {
617 	case 0:
618 		if (c >= 0 && c < 0x80) {	/* latin */
619 			if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb && c == 0x1b) {
620 				filter->cache = c;
621 				filter->status = 2;
622 			} else {
623 				CK((*filter->output_function)(c, filter->data));
624 			}
625 		} else if (c > 0xa0 && c < 0xe0) {	/* kana */
626 			CK((*filter->output_function)(0xfec0 + c, filter->data));
627 		} else if (c > 0x80 && c < 0xfd && c != 0xa0) {	/* kanji first char */
628 			filter->status = 1;
629 			filter->cache = c;
630 		} else {
631 			w = c & MBFL_WCSGROUP_MASK;
632 			w |= MBFL_WCSGROUP_THROUGH;
633 			CK((*filter->output_function)(w, filter->data));
634 		}
635 		break;
636 
637 	case 1:		/* kanji second char */
638 		filter->status = 0;
639 		c1 = filter->cache;
640 		if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
641 			w = 0;
642 			SJIS_DECODE(c1, c, s1, s2);
643 			s = (s1 - 0x21)*94 + s2 - 0x21;
644 			if (s <= 137) {
645 				if (s == 31) {
646 					w = 0xff3c;			/* FULLWIDTH REVERSE SOLIDUS */
647 				} else if (s == 32) {
648 					w = 0xff5e;			/* FULLWIDTH TILDE */
649 				} else if (s == 33) {
650 					w = 0x2225;			/* PARALLEL TO */
651 				} else if (s == 60) {
652 					w = 0xff0d;			/* FULLWIDTH HYPHEN-MINUS */
653 				} else if (s == 80) {
654 					w = 0xffe0;			/* FULLWIDTH CENT SIGN */
655 				} else if (s == 81) {
656 					w = 0xffe1;			/* FULLWIDTH POUND SIGN */
657 				} else if (s == 137) {
658 					w = 0xffe2;			/* FULLWIDTH NOT SIGN */
659 				}
660 			}
661 			if (w == 0) {
662 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {		/* vendor ext1 (13ku) */
663 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
664 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {		/* X 0208 */
665 					w = jisx0208_ucs_table[s];
666 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {		/* vendor ext2 (89ku - 92ku) */
667 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
668 				} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {		/* vendor ext3 (115ku - 119ku) */
669 					w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
670 				} else if (s >= (94*94) && s < (114*94)) {		/* user (95ku - 114ku) */
671 					w = s - (94*94) + 0xe000;
672 				}
673 
674  				if (s >= (94*94) && s < 119*94) {
675 					if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
676 						w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
677 					} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
678 						w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
679 					} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
680 						w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
681 					}
682 
683 					if (w > 0  && snd > 0) {
684 						CK((*filter->output_function)(snd, filter->data));
685 					}
686 				}
687 			}
688 			if (w <= 0) {
689 				w = (s1 << 8) | s2;
690 				w &= MBFL_WCSPLANE_MASK;
691 				w |= MBFL_WCSPLANE_WINCP932;
692 			}
693 			CK((*filter->output_function)(w, filter->data));
694 		} else if ((c >= 0 && c < 0x21) || c == 0x7f) {		/* CTLs */
695 			CK((*filter->output_function)(c, filter->data));
696 		} else {
697 			w = (c1 << 8) | c;
698 			w &= MBFL_WCSGROUP_MASK;
699 			w |= MBFL_WCSGROUP_THROUGH;
700 			CK((*filter->output_function)(w, filter->data));
701 		}
702 		break;
703 	/* ESC : Softbank Emoji */
704 	case 2:
705 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
706 			c == 0x24) {
707 				filter->cache = c;
708 				filter->status++;
709 		} else {
710 			filter->cache = 0;
711 			filter->status = 0;
712 			CK((*filter->output_function)(0x1b, filter->data));
713 			goto retry;
714 		}
715 		break;
716 
717 	/* ESC $ : Softbank Emoji */
718 	case 3:
719 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
720 			((c >= 0x45 && c <= 0x47) || (c >= 0x4f && c <= 0x51))) {
721 				filter->cache = c;
722 				filter->status++;
723 		} else {
724 			filter->cache = 0;
725 			filter->status = 0;
726 			CK((*filter->output_function)(0x1b, filter->data));
727 			CK((*filter->output_function)(0x24, filter->data));
728 			goto retry;
729 		}
730 		break;
731 
732 	/* ESC [GEFOPQ] : Softbank Emoji */
733 	case 4:
734 		w = 0;
735 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
736 			c1 = filter->cache;
737 
738 			if (c == 0x0f) {
739 				w = c;
740 				filter->cache = 0;
741 				filter->status = 0;
742 			} else {
743 				if (c1 == 0x47 && c >= 0x21 && c <= 0x7a) {
744 					s1 = 0x91; s2 = c;
745 				} else if (c1 == 0x45 && c >= 0x21 && c <= 0x7a) {
746 					s1 = 0x8d; s2 = c;
747 				} else if (c1 == 0x46 && c >= 0x21 && c <= 0x7a) {
748 					s1 = 0x8e; s2 = c;
749 				} else if (c1 == 0x4f && c >= 0x21 && c <= 0x6d) {
750 					s1 = 0x92; s2 = c;
751 				} else if (c1 == 0x50 && c >= 0x21 && c <= 0x6c) {
752 					s1 = 0x95; s2 = c;
753 				} else if (c1 == 0x51 && c >= 0x21 && c <= 0x5e) {
754 					s1 = 0x96; s2 = c;
755 				}
756 				s  = (s1 - 0x21)*94 + s2 - 0x21;
757 				w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
758 				if (w > 0) {
759 					if (snd > 0) {
760 						CK((*filter->output_function)(snd, filter->data));
761 					}
762 					CK((*filter->output_function)(w, filter->data));
763 				}
764 			}
765 		}
766 
767 		if (w <= 0) {
768 			c1 = filter->cache;
769 			filter->cache = 0;
770 			filter->status = 0;
771 			CK((*filter->output_function)(0x1b, filter->data));
772 			CK((*filter->output_function)(0x24, filter->data));
773 			CK((*filter->output_function)(c1 & 0xff, filter->data));
774 			goto retry;
775 		}
776 		break;
777 
778 	default:
779 		filter->status = 0;
780 		break;
781 	}
782 
783 	return c;
784 }
785 
786 /*
787  * wchar => SJIS-win
788  */
789 int
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)790 mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
791 {
792 	int c1, c2, s1, s2;
793 
794 	s1 = 0;
795 	s2 = 0;
796 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
797 		s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
798 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
799 		s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
800 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
801 		s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
802 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
803 		s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
804 	} else if (c >= 0xe000 && c < (0xe000 + 20*94)) {	/* user  (95ku - 114ku) */
805 		s1 = c - 0xe000;
806 		c1 = s1/94 + 0x7f;
807 		c2 = s1%94 + 0x21;
808 		s1 = (c1 << 8) | c2;
809 		s2 = 1;
810 	}
811 	if (s1 <= 0) {
812 		c1 = c & ~MBFL_WCSPLANE_MASK;
813 		if (c1 == MBFL_WCSPLANE_WINCP932) {
814 			s1 = c & MBFL_WCSPLANE_MASK;
815 			s2 = 1;
816 		} else if (c1 == MBFL_WCSPLANE_JIS0208) {
817 			s1 = c & MBFL_WCSPLANE_MASK;
818 		} else if (c1 == MBFL_WCSPLANE_JIS0212) {
819 			s1 = c & MBFL_WCSPLANE_MASK;
820 			s1 |= 0x8080;
821 		} else if (c == 0xa5) {		/* YEN SIGN */
822 			s1 = 0x216f;	/* FULLWIDTH YEN SIGN */
823 		} else if (c == 0x203e) {	/* OVER LINE */
824 			s1 = 0x2131;	/* FULLWIDTH MACRON */
825 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
826 			s1 = 0x2140;
827 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
828 			s1 = 0x2141;
829 		} else if (c == 0x2225) {	/* PARALLEL TO */
830 			s1 = 0x2142;
831 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
832 			s1 = 0x215d;
833 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
834 			s1 = 0x2171;
835 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
836 			s1 = 0x2172;
837 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
838 			s1 = 0x224c;
839 		}
840 	}
841 
842 	if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {	/* not found or X 0212 */
843 		s1 = -1;
844 		c1 = 0;
845 		c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
846 		while (c1 < c2) {		/* CP932 vendor ext1 (13ku) */
847 			if (c == cp932ext1_ucs_table[c1]) {
848 				s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
849 				break;
850 			}
851 			c1++;
852 		}
853 		if (s1 <= 0) {
854 			c1 = 0;
855 			c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
856 			while (c1 < c2) {		/* CP932 vendor ext2 (115ku - 119ku) */
857 				if (c == cp932ext2_ucs_table[c1]) {
858 					s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
859 					break;
860 				}
861 				c1++;
862 			}
863 		}
864 
865 		if (s1 <= 0) {
866 			c1 = 0;
867 			c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
868 			while (c1 < c2) {		/* CP932 vendor ext3 (115ku - 119ku) */
869 				if (c == cp932ext3_ucs_table[c1]) {
870 					s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
871 					break;
872 				}
873 				c1++;
874 			}
875 		}
876 		if (c == 0) {
877 			s1 = 0;
878 		} else if (s1 <= 0) {
879 			s1 = -1;
880 		}
881 	}
882 
883  	if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
884 		 mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
885 		(filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
886 		 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
887 		(filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
888 		 mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 )) {
889 		CODE2JIS(c1,c2,s1,s2);
890  	}
891 
892 	if (filter->status == 1 && filter->cache > 0) {
893 		return c;
894 	}
895 
896 	if (s1 >= 0) {
897 		if (s1 < 0x100) { /* latin or kana */
898 			CK((*filter->output_function)(s1, filter->data));
899 		} else { /* kanji */
900 			c1 = (s1 >> 8) & 0xff;
901 			c2 = s1 & 0xff;
902 			SJIS_ENCODE(c1, c2, s1, s2);
903 			CK((*filter->output_function)(s1, filter->data));
904 			CK((*filter->output_function)(s2, filter->data));
905 		}
906 	} else {
907 		CK(mbfl_filt_conv_illegal_output(c, filter));
908 	}
909 
910 	return c;
911 }
912 
913 int
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)914 mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
915 {
916 	int c1 = filter->cache;
917 	if (filter->status == 1 && (c1 == 0x0023 || (c1 >= 0x0030 && c1<=0x0039))) {
918 		CK((*filter->output_function)(c1, filter->data));
919 	}
920 	filter->status = 0;
921 	filter->cache = 0;
922 
923 	if (filter->flush_function != NULL) {
924 		return (*filter->flush_function)(filter->data);
925 	}
926 
927 	return 0;
928 }
929