1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * the source code included in this files was separated from mbfilter_sjis_open.c
26  * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "mbfilter.h"
35 #include "mbfilter_sjis_mobile.h"
36 
37 #include "unicode_table_cp932_ext.h"
38 #include "unicode_table_jis.h"
39 
40 #include "emoji2uni.h"
41 
42 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
43 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
44 extern const unsigned char mblen_table_sjis[];
45 
46 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
47 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
48 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
49 
50 const mbfl_encoding mbfl_encoding_sjis_docomo = {
51  	mbfl_no_encoding_sjis_docomo,
52  	"SJIS-Mobile#DOCOMO",
53  	"Shift_JIS",
54  	(const char *(*)[])&mbfl_encoding_sjis_docomo_aliases,
55  	mblen_table_sjis,
56  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
57 };
58 
59 const mbfl_encoding mbfl_encoding_sjis_kddi = {
60  	mbfl_no_encoding_sjis_kddi,
61  	"SJIS-Mobile#KDDI",
62  	"Shift_JIS",
63  	(const char *(*)[])&mbfl_encoding_sjis_kddi_aliases,
64  	mblen_table_sjis,
65  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
66 };
67 
68 const mbfl_encoding mbfl_encoding_sjis_sb = {
69  	mbfl_no_encoding_sjis_sb,
70  	"SJIS-Mobile#SOFTBANK",
71  	"Shift_JIS",
72  	(const char *(*)[])&mbfl_encoding_sjis_sb_aliases,
73  	mblen_table_sjis,
74  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
75 };
76 
77 const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
78 	mbfl_no_encoding_sjis_docomo,
79 	mbfl_filt_ident_common_ctor,
80 	mbfl_filt_ident_common_dtor,
81 	mbfl_filt_ident_sjis
82 };
83 
84 const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
85 	mbfl_no_encoding_sjis_kddi,
86 	mbfl_filt_ident_common_ctor,
87 	mbfl_filt_ident_common_dtor,
88 	mbfl_filt_ident_sjis
89 };
90 
91 const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
92 	mbfl_no_encoding_sjis_sb,
93 	mbfl_filt_ident_common_ctor,
94 	mbfl_filt_ident_common_dtor,
95 	mbfl_filt_ident_sjis
96 };
97 
98 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
99  	mbfl_no_encoding_sjis_docomo,
100  	mbfl_no_encoding_wchar,
101  	mbfl_filt_conv_common_ctor,
102  	mbfl_filt_conv_common_dtor,
103  	mbfl_filt_conv_sjis_mobile_wchar,
104  	mbfl_filt_conv_common_flush
105 };
106 
107 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
108  	mbfl_no_encoding_wchar,
109  	mbfl_no_encoding_sjis_docomo,
110  	mbfl_filt_conv_common_ctor,
111  	mbfl_filt_conv_common_dtor,
112  	mbfl_filt_conv_wchar_sjis_mobile,
113  	mbfl_filt_conv_sjis_mobile_flush
114 };
115 
116 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
117  	mbfl_no_encoding_sjis_kddi,
118  	mbfl_no_encoding_wchar,
119  	mbfl_filt_conv_common_ctor,
120  	mbfl_filt_conv_common_dtor,
121  	mbfl_filt_conv_sjis_mobile_wchar,
122  	mbfl_filt_conv_common_flush
123 };
124 
125 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
126  	mbfl_no_encoding_wchar,
127  	mbfl_no_encoding_sjis_kddi,
128  	mbfl_filt_conv_common_ctor,
129  	mbfl_filt_conv_common_dtor,
130  	mbfl_filt_conv_wchar_sjis_mobile,
131 	mbfl_filt_conv_sjis_mobile_flush
132 };
133 
134 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
135  	mbfl_no_encoding_sjis_sb,
136  	mbfl_no_encoding_wchar,
137  	mbfl_filt_conv_common_ctor,
138  	mbfl_filt_conv_common_dtor,
139  	mbfl_filt_conv_sjis_mobile_wchar,
140  	mbfl_filt_conv_common_flush
141 };
142 
143 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
144  	mbfl_no_encoding_wchar,
145  	mbfl_no_encoding_sjis_sb,
146  	mbfl_filt_conv_common_ctor,
147  	mbfl_filt_conv_common_dtor,
148  	mbfl_filt_conv_wchar_sjis_mobile,
149 	mbfl_filt_conv_sjis_mobile_flush
150 };
151 
152 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
153 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
154 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
155 
156 const unsigned short mbfl_docomo2uni_pua[4][3] = {
157 	{0x28c2, 0x292f, 0xe63e},
158 	{0x2930, 0x2934, 0xe6ac},
159 	{0x2935, 0x2951, 0xe6b1},
160 	{0x2952, 0x29db, 0xe6ce},
161 };
162 
163 const unsigned short mbfl_kddi2uni_pua[7][3] = {
164 	{0x26ec, 0x2838, 0xe468},
165 	{0x284c, 0x2863, 0xe5b5},
166 	{0x24b8, 0x24ca, 0xe5cd},
167 	{0x24cb, 0x2545, 0xea80},
168 	{0x2839, 0x284b, 0xeafb},
169 	{0x2546, 0x25c0, 0xeb0e},
170 	{0x25c1, 0x25c6, 0xeb89},
171 };
172 
173 const unsigned short mbfl_sb2uni_pua[6][3] = {
174 	{0x27a9, 0x2802, 0xe101},
175 	{0x2808, 0x2861, 0xe201},
176 	{0x2921, 0x297a, 0xe001},
177 	{0x2980, 0x29cc, 0xe301},
178 	{0x2a99, 0x2ae4, 0xe401},
179 	{0x2af8, 0x2b35, 0xe501},
180 };
181 
182 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
183 	{0x24b8, 0x24f6, 0xec40},
184 	{0x24f7, 0x2573, 0xec80},
185 	{0x2574, 0x25b2, 0xed40},
186 	{0x25b3, 0x25c6, 0xed80},
187 	{0x26ec, 0x272a, 0xef40},
188 	{0x272b, 0x27a7, 0xef80},
189 	{0x27a8, 0x27e6, 0xf040},
190 	{0x27e7, 0x2863, 0xf080},
191 };
192 
193 #define NFLAGS(c) (0x1F1A5+(int)(c))
194 
195 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
196 
197 #define SJIS_ENCODE(c1,c2,s1,s2)	\
198 		do {						\
199 			s1 = c1;				\
200 			s1--;					\
201 			s1 >>= 1;				\
202 			if ((c1) < 0x5f) {		\
203 				s1 += 0x71;			\
204 			} else {				\
205 				s1 += 0xb1;			\
206 			}						\
207 			s2 = c2;				\
208 			if ((c1) & 1) {			\
209 				if ((c2) < 0x60) {	\
210 					s2--;			\
211 				}					\
212 				s2 += 0x20;			\
213 			} else {				\
214 				s2 += 0x7e;			\
215 			}						\
216 		} while (0)
217 
218 #define SJIS_DECODE(c1,c2,s1,s2)	\
219 		do {						\
220 			s1 = c1;				\
221 			if (s1 < 0xa0) {		\
222 				s1 -= 0x81;			\
223 			} else {				\
224 				s1 -= 0xc1;			\
225 			}						\
226 			s1 <<= 1;				\
227 			s1 += 0x21;				\
228 			s2 = c2;				\
229 			if (s2 < 0x9f) {		\
230 				if (s2 < 0x7f) {	\
231 					s2++;			\
232 				}					\
233 				s2 -= 0x20;			\
234 			} else {				\
235 				s1++;				\
236 				s2 -= 0x7e;			\
237 			}						\
238 		} while (0)
239 
240 #define CODE2JIS(c1,c2,s1,s2)       \
241 	c1 = (s1)/94+0x21;				\
242 	c2 = (s1)-94*((c1)-0x21)+0x21;	\
243 	s1 = ((c1) << 8) | (c2);		\
244 	s2 = 1
245 
246 int
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)247 mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
248 {
249 	int i, match = 0;
250 
251 	for (i = 0; i < n; i++) {
252 		if (map[i][0] <= c && c <= map[i][1]) {
253 			*w = c - map[i][0] + map[i][2];
254 			match = 1;
255 			break;
256 		}
257 	}
258 	return match;
259 }
260 
261 int
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)262 mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
263 {
264 	int i, match = 0;
265 
266 	for (i = 0; i < n; i++) {
267 		if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
268 			*w = c + map[i][0] - map[i][2];
269 			match = 1;
270 			break;
271 		}
272 	}
273 	return match;
274 }
275 
276 int
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)277 mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
278 {
279 	int w = s;
280 	if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
281 		if (s >= mb_tbl_code2uni_docomo1_min + 0x00a2 &&
282 			s <= mb_tbl_code2uni_docomo1_min + 0x00ad &&
283 			s != mb_tbl_code2uni_docomo1_min + 0x00a3) {
284 			w =  0x20E3;
285 			*snd = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
286 			if (*snd > 0xf000) {
287 				*snd += 0x10000;
288 			}
289 		} else {
290 			w = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
291 			if (w > 0xf000) {
292 				w += 0x10000;
293 			} else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
294 				w += 0xf0000;
295 			}
296 			*snd = 0;
297 			if (!w) {
298 				w = s;
299 			}
300 		}
301 	}
302 
303 	return w;
304 }
305 
306 int
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)307 mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
308 {
309 	int w = s, si, c;
310 	const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
311 
312 	*snd = 0;
313 	if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
314 		si = s - mb_tbl_code2uni_kddi1_min;
315 		if (si == 0x0008) { /* ES */
316 			*snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
317 		} else if (si == 0x0009) { /* RU */
318 			*snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
319 		} else if (si >= 0x008d && si <= 0x0092) {
320 			c = nflags_order_kddi[si-0x008d];
321 			*snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
322 		} else if (si == 0x0104) {
323 			*snd = 0x0023; w = 0x20E3;
324 		} else {
325 			w = mb_tbl_code2uni_kddi1[si];
326 			if (w > 0xf000) {
327 				w += 0x10000;
328 			} else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
329 				w += 0xf0000;
330 			}
331 		}
332 	} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
333 		si = s - mb_tbl_code2uni_kddi2_min;
334 		if (si == 100) { /* JP */
335 			*snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
336 		} else if (si >= 0x00ba && si <= 0x00c2) {
337 			*snd = si-0x00ba+0x0031; w = 0x20E3;
338 		} else if (si == 0x010b) { /* US */
339 			*snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
340 		} else if (si == 0x0144) {
341 			*snd = 0x0030; w = 0x20E3;
342 		} else {
343 			w = mb_tbl_code2uni_kddi2[si];
344 			if (w > 0xf000) {
345 				w += 0x10000;
346 			} else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
347 				w += 0xf0000;
348 			}
349 		}
350 	}
351 	return w;
352 }
353 
354 int
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)355 mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
356 {
357 	int w = s, si, c;
358 	const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
359 
360 	*snd = 0;
361 	if (s >= mb_tbl_code2uni_sb1_min &&	s <= mb_tbl_code2uni_sb1_max) {
362 		si = s - mb_tbl_code2uni_sb1_min;
363 		if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
364 			*snd =  mb_tbl_code2uni_sb1[si];
365 			if (*snd > 0xf000) {
366 				*snd += 0x10000;
367 			}
368 			w = 0x20E3;
369 		} else {
370 			w = mb_tbl_code2uni_sb1[si];
371 			if (w > 0xf000) {
372 				w += 0x10000;
373 			} else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
374 				w += 0xf0000;
375 			}
376 		}
377 	} else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
378 		si = s - mb_tbl_code2uni_sb2_min;
379 		w = mb_tbl_code2uni_sb2[si];
380 		if (w > 0xf000) {
381 			w += 0x10000;
382 		} else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
383 			w += 0xf0000;
384 		}
385 	} else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
386 		si = s - mb_tbl_code2uni_sb3_min;
387 		if (si >= 0x0069 && si <= 0x0072) {
388 			c = nflags_order_sb[si-0x0069];
389 			*snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
390 		} else {
391 			w = mb_tbl_code2uni_sb3[si];
392 			if (w > 0xf000) {
393 				w += 0x10000;
394 			} else if (w > 0xe000) { /* unsuported by Unicode 6.0 */
395 				w += 0xf0000;
396 			}
397 		}
398 	}
399 	return w;
400 }
401 
402 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)403 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
404 {
405 	int i, match = 0, c1s;
406 
407 	if (filter->status == 1) {
408 		c1s = filter->cache;
409 		filter->cache = 0;
410 		filter->status = 0;
411 		if (c == 0x20E3) {
412 			if (c1s == 0x0023) {
413 				*s1 = 0x2964;
414 				match = 1;
415 			} else if (c1s == 0x0030) {
416 				*s1 = 0x296f;
417 				match = 1;
418 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
419 				*s1 = 0x2966 + (c1s - 0x0031);
420 				match = 1;
421 			}
422 		} else {
423 			CK((*filter->output_function)(c1s, filter->data));
424 		}
425 	} else {
426 		if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
427 			filter->status = 1;
428 			filter->cache = c;
429 			*s1 = -1;
430 			return match;
431 		}
432 
433 		if (c == 0x00A9) {
434 			*s1 = 0x29b5; match = 1;
435 		} else if (c == 0x00AE) {
436 			*s1 = 0x29ba; match = 1;
437 		} else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
438 			i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
439 			if (i >= 0) {
440 				*s1 = mb_tbl_uni_docomo2code2_value[i];
441 				match = 1;
442 			}
443 		} else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
444 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
445 			if (i >= 0) {
446 				*s1 = mb_tbl_uni_docomo2code3_value[i];
447 				match = 1;
448 			}
449 		} else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
450 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
451 			if (i >= 0) {
452 				*s1 = mb_tbl_uni_docomo2code5_val[i];
453 				match = 1;
454 			}
455 		}
456 	}
457 
458 	return match;
459 }
460 
461 int
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)462 mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
463 {
464 	int i, match = 0, c1s;
465 
466 	if (filter->status == 1) {
467 		c1s = filter->cache;
468 		filter->cache = 0;
469 		filter->status = 0;
470 		if (c == 0x20E3) {
471 			if (c1s == 0x0023) {
472 				*s1 = 0x25bc;
473 				match = 1;
474 			} else if (c1s == 0x0030) {
475 				*s1 = 0x2830;
476 				match = 1;
477 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
478 				*s1 = 0x27a6 + (c1s - 0x0031);
479 				match = 1;
480 			}
481 		} else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
482 			for (i=0; i<10; i++) {
483 				if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
484 					*s1 = nflags_code_kddi[i];
485 					match = 1;
486 					break;
487 				}
488 			}
489 		} else {
490 			if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
491 				c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
492 				CK((*filter->output_function)(c1s, filter->data));
493 			}
494 		}
495 	} else {
496 		if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
497 			(c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
498 			filter->status = 1;
499 			filter->cache = c;
500 			*s1 = -1;
501 			return match;
502 		}
503 
504 		if (c == 0x00A9) {
505 			*s1 = 0x27dc; match = 1;
506 		} else if (c == 0x00AE) {
507 			*s1 = 0x27dd; match = 1;
508 		} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
509 			i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
510 			if (i >= 0) {
511 				*s1 = mb_tbl_uni_kddi2code2_value[i];
512 				match = 1;
513 			}
514 		} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
515 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
516 			if (i >= 0) {
517 				*s1 = mb_tbl_uni_kddi2code3_value[i];
518 				match = 1;
519 			}
520 		} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
521 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
522 			if (i >= 0) {
523 				*s1 = mb_tbl_uni_kddi2code5_val[i];
524 				match = 1;
525 			}
526 		}
527 	}
528 
529 	return match;
530 }
531 
532 int
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)533 mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
534 {
535 	int i, match = 0, c1s;
536 
537 	if (filter->status == 1) {
538 		filter->status = 0;
539 		c1s = filter->cache;
540 		filter->cache = 0;
541 		if (c == 0x20E3) {
542 			if (c1s == 0x0023) {
543 				*s1 = 0x2817;
544 				match = 1;
545 			} else if (c1s == 0x0030) {
546 				*s1 = 0x282c;
547 				match = 1;
548 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
549 				*s1 = 0x2823 + (c1s - 0x0031);
550 				match = 1;
551 			}
552 		} else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
553 			for (i=0; i<10; i++) {
554 				if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
555 					*s1 = nflags_code_sb[i];
556 					match = 1;
557 					break;
558 				}
559 			}
560 		} else {
561 			if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
562 				c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
563 				CK((*filter->output_function)(c1s, filter->data));
564 			}
565 		}
566 	} else {
567 		if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
568 			filter->status = 1;
569 			filter->cache = c;
570 			*s1 = -1;
571 			return match;
572 		}
573 
574 		if (c == 0x00A9) {
575 			*s1 = 0x2855; match = 1;
576 		} else if (c == 0x00AE) {
577 			*s1 = 0x2856; match = 1;
578 		} else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
579 			i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
580 			if (i >= 0) {
581 				*s1 = mb_tbl_uni_sb2code2_value[i];
582 				match = 1;
583 			}
584 		} else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
585 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
586 			if (i >= 0) {
587 				*s1 = mb_tbl_uni_sb2code3_value[i];
588 				match = 1;
589 			}
590 		} else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
591 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
592 			if (i >= 0) {
593 				*s1 = mb_tbl_uni_sb2code5_val[i];
594 				match = 1;
595 			}
596 		}
597 	}
598 	return match;
599 }
600 
601 /*
602  * SJIS-win => wchar
603  */
604 int
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)605 mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
606 {
607 	int c1, s, s1 = 0, s2 = 0, w;
608 	int snd = 0;
609 
610 retry:
611 	switch (filter->status) {
612 	case 0:
613 		if (c >= 0 && c < 0x80) {	/* latin */
614 			if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb && c == 0x1b) {
615 				filter->cache = c;
616 				filter->status = 2;
617 			} else {
618 				CK((*filter->output_function)(c, filter->data));
619 			}
620 		} else if (c > 0xa0 && c < 0xe0) {	/* kana */
621 			CK((*filter->output_function)(0xfec0 + c, filter->data));
622 		} else if (c > 0x80 && c < 0xfd && c != 0xa0) {	/* kanji first char */
623 			filter->status = 1;
624 			filter->cache = c;
625 		} else {
626 			w = c & MBFL_WCSGROUP_MASK;
627 			w |= MBFL_WCSGROUP_THROUGH;
628 			CK((*filter->output_function)(w, filter->data));
629 		}
630 		break;
631 
632 	case 1:		/* kanji second char */
633 		filter->status = 0;
634 		c1 = filter->cache;
635 		if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
636 			w = 0;
637 			SJIS_DECODE(c1, c, s1, s2);
638 			s = (s1 - 0x21)*94 + s2 - 0x21;
639 			if (s <= 137) {
640 				if (s == 31) {
641 					w = 0xff3c;			/* FULLWIDTH REVERSE SOLIDUS */
642 				} else if (s == 32) {
643 					w = 0xff5e;			/* FULLWIDTH TILDE */
644 				} else if (s == 33) {
645 					w = 0x2225;			/* PARALLEL TO */
646 				} else if (s == 60) {
647 					w = 0xff0d;			/* FULLWIDTH HYPHEN-MINUS */
648 				} else if (s == 80) {
649 					w = 0xffe0;			/* FULLWIDTH CENT SIGN */
650 				} else if (s == 81) {
651 					w = 0xffe1;			/* FULLWIDTH POUND SIGN */
652 				} else if (s == 137) {
653 					w = 0xffe2;			/* FULLWIDTH NOT SIGN */
654 				}
655 			}
656 			if (w == 0) {
657 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {		/* vendor ext1 (13ku) */
658 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
659 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {		/* X 0208 */
660 					w = jisx0208_ucs_table[s];
661 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {		/* vendor ext2 (89ku - 92ku) */
662 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
663 				} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {		/* vendor ext3 (115ku - 119ku) */
664 					w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
665 				} else if (s >= (94*94) && s < (114*94)) {		/* user (95ku - 114ku) */
666 					w = s - (94*94) + 0xe000;
667 				}
668 
669  				if (s >= (94*94) && s < 119*94) {
670 					if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
671 						w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
672 					} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
673 						w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
674 					} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
675 						w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
676 					}
677 
678 					if (w > 0  && snd > 0) {
679 						CK((*filter->output_function)(snd, filter->data));
680 					}
681 				}
682 			}
683 			if (w <= 0) {
684 				w = (s1 << 8) | s2;
685 				w &= MBFL_WCSPLANE_MASK;
686 				w |= MBFL_WCSPLANE_WINCP932;
687 			}
688 			CK((*filter->output_function)(w, filter->data));
689 		} else if ((c >= 0 && c < 0x21) || c == 0x7f) {		/* CTLs */
690 			CK((*filter->output_function)(c, filter->data));
691 		} else {
692 			w = (c1 << 8) | c;
693 			w &= MBFL_WCSGROUP_MASK;
694 			w |= MBFL_WCSGROUP_THROUGH;
695 			CK((*filter->output_function)(w, filter->data));
696 		}
697 		break;
698 	/* ESC : Softbank Emoji */
699 	case 2:
700 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
701 			c == 0x24) {
702 				filter->cache = c;
703 				filter->status++;
704 		} else {
705 			filter->cache = 0;
706 			filter->status = 0;
707 			CK((*filter->output_function)(0x1b, filter->data));
708 			goto retry;
709 		}
710 		break;
711 
712 	/* ESC $ : Softbank Emoji */
713 	case 3:
714 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
715 			((c >= 0x45 && c <= 0x47) || (c >= 0x4f && c <= 0x51))) {
716 				filter->cache = c;
717 				filter->status++;
718 		} else {
719 			filter->cache = 0;
720 			filter->status = 0;
721 			CK((*filter->output_function)(0x1b, filter->data));
722 			CK((*filter->output_function)(0x24, filter->data));
723 			goto retry;
724 		}
725 		break;
726 
727 	/* ESC [GEFOPQ] : Softbank Emoji */
728 	case 4:
729 		w = 0;
730 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
731 			c1 = filter->cache;
732 
733 			if (c == 0x0f) {
734 				w = c;
735 				filter->cache = 0;
736 				filter->status = 0;
737 			} else {
738 				if (c1 == 0x47 && c >= 0x21 && c <= 0x7a) {
739 					s1 = 0x91; s2 = c;
740 				} else if (c1 == 0x45 && c >= 0x21 && c <= 0x7a) {
741 					s1 = 0x8d; s2 = c;
742 				} else if (c1 == 0x46 && c >= 0x21 && c <= 0x7a) {
743 					s1 = 0x8e; s2 = c;
744 				} else if (c1 == 0x4f && c >= 0x21 && c <= 0x6d) {
745 					s1 = 0x92; s2 = c;
746 				} else if (c1 == 0x50 && c >= 0x21 && c <= 0x6c) {
747 					s1 = 0x95; s2 = c;
748 				} else if (c1 == 0x51 && c >= 0x21 && c <= 0x5e) {
749 					s1 = 0x96; s2 = c;
750 				}
751 				s  = (s1 - 0x21)*94 + s2 - 0x21;
752 				w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
753 				if (w > 0) {
754 					if (snd > 0) {
755 						CK((*filter->output_function)(snd, filter->data));
756 					}
757 					CK((*filter->output_function)(w, filter->data));
758 				}
759 			}
760 		}
761 
762 		if (w <= 0) {
763 			c1 = filter->cache;
764 			filter->cache = 0;
765 			filter->status = 0;
766 			CK((*filter->output_function)(0x1b, filter->data));
767 			CK((*filter->output_function)(0x24, filter->data));
768 			CK((*filter->output_function)(c1 & 0xff, filter->data));
769 			goto retry;
770 		}
771 		break;
772 
773 	default:
774 		filter->status = 0;
775 		break;
776 	}
777 
778 	return c;
779 }
780 
781 /*
782  * wchar => SJIS-win
783  */
784 int
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)785 mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
786 {
787 	int c1, c2, s1, s2;
788 
789 	s1 = 0;
790 	s2 = 0;
791 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
792 		s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
793 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
794 		s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
795 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
796 		s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
797 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
798 		s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
799 	} else if (c >= 0xe000 && c < (0xe000 + 20*94)) {	/* user  (95ku - 114ku) */
800 		s1 = c - 0xe000;
801 		c1 = s1/94 + 0x7f;
802 		c2 = s1%94 + 0x21;
803 		s1 = (c1 << 8) | c2;
804 		s2 = 1;
805 	}
806 	if (s1 <= 0) {
807 		c1 = c & ~MBFL_WCSPLANE_MASK;
808 		if (c1 == MBFL_WCSPLANE_WINCP932) {
809 			s1 = c & MBFL_WCSPLANE_MASK;
810 			s2 = 1;
811 		} else if (c1 == MBFL_WCSPLANE_JIS0208) {
812 			s1 = c & MBFL_WCSPLANE_MASK;
813 		} else if (c1 == MBFL_WCSPLANE_JIS0212) {
814 			s1 = c & MBFL_WCSPLANE_MASK;
815 			s1 |= 0x8080;
816 		} else if (c == 0xa5) {		/* YEN SIGN */
817 			s1 = 0x216f;	/* FULLWIDTH YEN SIGN */
818 		} else if (c == 0x203e) {	/* OVER LINE */
819 			s1 = 0x2131;	/* FULLWIDTH MACRON */
820 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
821 			s1 = 0x2140;
822 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
823 			s1 = 0x2141;
824 		} else if (c == 0x2225) {	/* PARALLEL TO */
825 			s1 = 0x2142;
826 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
827 			s1 = 0x215d;
828 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
829 			s1 = 0x2171;
830 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
831 			s1 = 0x2172;
832 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
833 			s1 = 0x224c;
834 		}
835 	}
836 
837 	if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {	/* not found or X 0212 */
838 		s1 = -1;
839 		c1 = 0;
840 		c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
841 		while (c1 < c2) {		/* CP932 vendor ext1 (13ku) */
842 			if (c == cp932ext1_ucs_table[c1]) {
843 				s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
844 				break;
845 			}
846 			c1++;
847 		}
848 		if (s1 <= 0) {
849 			c1 = 0;
850 			c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
851 			while (c1 < c2) {		/* CP932 vendor ext2 (115ku - 119ku) */
852 				if (c == cp932ext2_ucs_table[c1]) {
853 					s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
854 					break;
855 				}
856 				c1++;
857 			}
858 		}
859 
860 		if (s1 <= 0) {
861 			c1 = 0;
862 			c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
863 			while (c1 < c2) {		/* CP932 vendor ext3 (115ku - 119ku) */
864 				if (c == cp932ext3_ucs_table[c1]) {
865 					s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
866 					break;
867 				}
868 				c1++;
869 			}
870 		}
871 		if (c == 0) {
872 			s1 = 0;
873 		} else if (s1 <= 0) {
874 			s1 = -1;
875 		}
876 	}
877 
878  	if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
879 		 mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
880 		(filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
881 		 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
882 		(filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
883 		 mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 )) {
884 		CODE2JIS(c1,c2,s1,s2);
885  	}
886 
887 	if (filter->status == 1 && filter->cache > 0) {
888 		return c;
889 	}
890 
891 	if (s1 >= 0) {
892 		if (s1 < 0x100) { /* latin or kana */
893 			CK((*filter->output_function)(s1, filter->data));
894 		} else { /* kanji */
895 			c1 = (s1 >> 8) & 0xff;
896 			c2 = s1 & 0xff;
897 			SJIS_ENCODE(c1, c2, s1, s2);
898 			CK((*filter->output_function)(s1, filter->data));
899 			CK((*filter->output_function)(s2, filter->data));
900 		}
901 	} else {
902 		if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
903 			CK(mbfl_filt_conv_illegal_output(c, filter));
904 		}
905 	}
906 
907 	return c;
908 }
909 
910 int
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)911 mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
912 {
913 	int c1 = filter->cache;
914 	if (filter->status == 1 && (c1 == 0x0023 || (c1 >= 0x0030 && c1<=0x0039))) {
915 		CK((*filter->output_function)(c1, filter->data));
916 	}
917 	filter->status = 0;
918 	filter->cache = 0;
919 
920 	if (filter->flush_function != NULL) {
921 		return (*filter->flush_function)(filter->data);
922 	}
923 
924 	return 0;
925 }
926