1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * the source code included in this files was separated from mbfilter_sjis_open.c
26  * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "mbfilter.h"
35 #include "mbfilter_sjis_mobile.h"
36 
37 #include "unicode_table_cp932_ext.h"
38 #include "unicode_table_jis.h"
39 
40 #include "emoji2uni.h"
41 
42 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
43 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
44 extern const unsigned char mblen_table_sjis[];
45 
46 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
47 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
48 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
49 
50 const mbfl_encoding mbfl_encoding_sjis_docomo = {
51  	mbfl_no_encoding_sjis_docomo,
52  	"SJIS-Mobile#DOCOMO",
53  	"Shift_JIS",
54  	(const char *(*)[])&mbfl_encoding_sjis_docomo_aliases,
55  	mblen_table_sjis,
56  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
57 	&vtbl_sjis_docomo_wchar,
58 	&vtbl_wchar_sjis_docomo
59 };
60 
61 const mbfl_encoding mbfl_encoding_sjis_kddi = {
62  	mbfl_no_encoding_sjis_kddi,
63  	"SJIS-Mobile#KDDI",
64  	"Shift_JIS",
65  	(const char *(*)[])&mbfl_encoding_sjis_kddi_aliases,
66  	mblen_table_sjis,
67  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
68 	&vtbl_sjis_kddi_wchar,
69 	&vtbl_wchar_sjis_kddi
70 };
71 
72 const mbfl_encoding mbfl_encoding_sjis_sb = {
73  	mbfl_no_encoding_sjis_sb,
74  	"SJIS-Mobile#SOFTBANK",
75  	"Shift_JIS",
76  	(const char *(*)[])&mbfl_encoding_sjis_sb_aliases,
77  	mblen_table_sjis,
78  	MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
79 	&vtbl_sjis_sb_wchar,
80 	&vtbl_wchar_sjis_sb
81 };
82 
83 const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
84 	mbfl_no_encoding_sjis_docomo,
85 	mbfl_filt_ident_common_ctor,
86 	mbfl_filt_ident_common_dtor,
87 	mbfl_filt_ident_sjis
88 };
89 
90 const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
91 	mbfl_no_encoding_sjis_kddi,
92 	mbfl_filt_ident_common_ctor,
93 	mbfl_filt_ident_common_dtor,
94 	mbfl_filt_ident_sjis
95 };
96 
97 const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
98 	mbfl_no_encoding_sjis_sb,
99 	mbfl_filt_ident_common_ctor,
100 	mbfl_filt_ident_common_dtor,
101 	mbfl_filt_ident_sjis
102 };
103 
104 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
105  	mbfl_no_encoding_sjis_docomo,
106  	mbfl_no_encoding_wchar,
107  	mbfl_filt_conv_common_ctor,
108  	mbfl_filt_conv_common_dtor,
109  	mbfl_filt_conv_sjis_mobile_wchar,
110  	mbfl_filt_conv_common_flush
111 };
112 
113 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
114  	mbfl_no_encoding_wchar,
115  	mbfl_no_encoding_sjis_docomo,
116  	mbfl_filt_conv_common_ctor,
117  	mbfl_filt_conv_common_dtor,
118  	mbfl_filt_conv_wchar_sjis_mobile,
119  	mbfl_filt_conv_sjis_mobile_flush
120 };
121 
122 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
123  	mbfl_no_encoding_sjis_kddi,
124  	mbfl_no_encoding_wchar,
125  	mbfl_filt_conv_common_ctor,
126  	mbfl_filt_conv_common_dtor,
127  	mbfl_filt_conv_sjis_mobile_wchar,
128  	mbfl_filt_conv_common_flush
129 };
130 
131 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
132  	mbfl_no_encoding_wchar,
133  	mbfl_no_encoding_sjis_kddi,
134  	mbfl_filt_conv_common_ctor,
135  	mbfl_filt_conv_common_dtor,
136  	mbfl_filt_conv_wchar_sjis_mobile,
137 	mbfl_filt_conv_sjis_mobile_flush
138 };
139 
140 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
141  	mbfl_no_encoding_sjis_sb,
142  	mbfl_no_encoding_wchar,
143  	mbfl_filt_conv_common_ctor,
144  	mbfl_filt_conv_common_dtor,
145  	mbfl_filt_conv_sjis_mobile_wchar,
146  	mbfl_filt_conv_common_flush
147 };
148 
149 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
150  	mbfl_no_encoding_wchar,
151  	mbfl_no_encoding_sjis_sb,
152  	mbfl_filt_conv_common_ctor,
153  	mbfl_filt_conv_common_dtor,
154  	mbfl_filt_conv_wchar_sjis_mobile,
155 	mbfl_filt_conv_sjis_mobile_flush
156 };
157 
158 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
159 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
160 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
161 
162 const unsigned short mbfl_docomo2uni_pua[4][3] = {
163 	{0x28c2, 0x292f, 0xe63e},
164 	{0x2930, 0x2934, 0xe6ac},
165 	{0x2935, 0x2951, 0xe6b1},
166 	{0x2952, 0x29db, 0xe6ce},
167 };
168 
169 const unsigned short mbfl_kddi2uni_pua[7][3] = {
170 	{0x26ec, 0x2838, 0xe468},
171 	{0x284c, 0x2863, 0xe5b5},
172 	{0x24b8, 0x24ca, 0xe5cd},
173 	{0x24cb, 0x2545, 0xea80},
174 	{0x2839, 0x284b, 0xeafb},
175 	{0x2546, 0x25c0, 0xeb0e},
176 	{0x25c1, 0x25c6, 0xeb89},
177 };
178 
179 const unsigned short mbfl_sb2uni_pua[6][3] = {
180 	{0x27a9, 0x2802, 0xe101},
181 	{0x2808, 0x2861, 0xe201},
182 	{0x2921, 0x297a, 0xe001},
183 	{0x2980, 0x29cc, 0xe301},
184 	{0x2a99, 0x2ae4, 0xe401},
185 	{0x2af8, 0x2b35, 0xe501},
186 };
187 
188 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
189 	{0x24b8, 0x24f6, 0xec40},
190 	{0x24f7, 0x2573, 0xec80},
191 	{0x2574, 0x25b2, 0xed40},
192 	{0x25b3, 0x25c6, 0xed80},
193 	{0x26ec, 0x272a, 0xef40},
194 	{0x272b, 0x27a7, 0xef80},
195 	{0x27a8, 0x27e6, 0xf040},
196 	{0x27e7, 0x2863, 0xf080},
197 };
198 
199 #define NFLAGS(c) (0x1F1A5+(int)(c))
200 
201 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
202 
203 #define SJIS_ENCODE(c1,c2,s1,s2)	\
204 		do {						\
205 			s1 = c1;				\
206 			s1--;					\
207 			s1 >>= 1;				\
208 			if ((c1) < 0x5f) {		\
209 				s1 += 0x71;			\
210 			} else {				\
211 				s1 += 0xb1;			\
212 			}						\
213 			s2 = c2;				\
214 			if ((c1) & 1) {			\
215 				if ((c2) < 0x60) {	\
216 					s2--;			\
217 				}					\
218 				s2 += 0x20;			\
219 			} else {				\
220 				s2 += 0x7e;			\
221 			}						\
222 		} while (0)
223 
224 #define SJIS_DECODE(c1,c2,s1,s2)	\
225 		do {						\
226 			s1 = c1;				\
227 			if (s1 < 0xa0) {		\
228 				s1 -= 0x81;			\
229 			} else {				\
230 				s1 -= 0xc1;			\
231 			}						\
232 			s1 <<= 1;				\
233 			s1 += 0x21;				\
234 			s2 = c2;				\
235 			if (s2 < 0x9f) {		\
236 				if (s2 < 0x7f) {	\
237 					s2++;			\
238 				}					\
239 				s2 -= 0x20;			\
240 			} else {				\
241 				s1++;				\
242 				s2 -= 0x7e;			\
243 			}						\
244 		} while (0)
245 
246 #define CODE2JIS(c1,c2,s1,s2)       \
247 	c1 = (s1)/94+0x21;				\
248 	c2 = (s1)-94*((c1)-0x21)+0x21;	\
249 	s1 = ((c1) << 8) | (c2);		\
250 	s2 = 1
251 
252 int
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)253 mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
254 {
255 	int i, match = 0;
256 
257 	for (i = 0; i < n; i++) {
258 		if (map[i][0] <= c && c <= map[i][1]) {
259 			*w = c - map[i][0] + map[i][2];
260 			match = 1;
261 			break;
262 		}
263 	}
264 	return match;
265 }
266 
267 int
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)268 mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
269 {
270 	int i, match = 0;
271 
272 	for (i = 0; i < n; i++) {
273 		if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
274 			*w = c + map[i][0] - map[i][2];
275 			match = 1;
276 			break;
277 		}
278 	}
279 	return match;
280 }
281 
282 int
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)283 mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
284 {
285 	int w = s;
286 	if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
287 		if (s >= mb_tbl_code2uni_docomo1_min + 0x00a2 &&
288 			s <= mb_tbl_code2uni_docomo1_min + 0x00ad &&
289 			s != mb_tbl_code2uni_docomo1_min + 0x00a3) {
290 			w =  0x20E3;
291 			*snd = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
292 			if (*snd > 0xf000) {
293 				*snd += 0x10000;
294 			}
295 		} else {
296 			w = mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min];
297 			if (w > 0xf000) {
298 				w += 0x10000;
299 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
300 				w += 0xf0000;
301 			}
302 			*snd = 0;
303 			if (!w) {
304 				w = s;
305 			}
306 		}
307 	}
308 
309 	return w;
310 }
311 
312 int
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)313 mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
314 {
315 	int w = s, si, c;
316 	const int nflags_order_kddi[] = {3, 1, 5, 4, 0, 7};
317 
318 	*snd = 0;
319 	if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
320 		si = s - mb_tbl_code2uni_kddi1_min;
321 		if (si == 0x0008) { /* ES */
322 			*snd = NFLAGS(nflags_s[2][0]); w = NFLAGS(nflags_s[2][1]);
323 		} else if (si == 0x0009) { /* RU */
324 			*snd = NFLAGS(nflags_s[8][0]); w = NFLAGS(nflags_s[8][1]);
325 		} else if (si >= 0x008d && si <= 0x0092) {
326 			c = nflags_order_kddi[si-0x008d];
327 			*snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
328 		} else if (si == 0x0104) {
329 			*snd = 0x0023; w = 0x20E3;
330 		} else {
331 			w = mb_tbl_code2uni_kddi1[si];
332 			if (w > 0xf000) {
333 				w += 0x10000;
334 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
335 				w += 0xf0000;
336 			}
337 		}
338 	} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
339 		si = s - mb_tbl_code2uni_kddi2_min;
340 		if (si == 100) { /* JP */
341 			*snd = NFLAGS(nflags_s[6][0]); w = NFLAGS(nflags_s[6][1]);
342 		} else if (si >= 0x00ba && si <= 0x00c2) {
343 			*snd = si-0x00ba+0x0031; w = 0x20E3;
344 		} else if (si == 0x010b) { /* US */
345 			*snd = NFLAGS(nflags_s[9][0]); w = NFLAGS(nflags_s[9][1]);
346 		} else if (si == 0x0144) {
347 			*snd = 0x0030; w = 0x20E3;
348 		} else {
349 			w = mb_tbl_code2uni_kddi2[si];
350 			if (w > 0xf000) {
351 				w += 0x10000;
352 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
353 				w += 0xf0000;
354 			}
355 		}
356 	}
357 	return w;
358 }
359 
360 int
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)361 mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
362 {
363 	int w = s, si, c;
364 	const int nflags_order_sb[10] = {6, 9, 3, 1, 5, 4, 2, 8, 0, 7};
365 
366 	*snd = 0;
367 	if (s >= mb_tbl_code2uni_sb1_min &&	s <= mb_tbl_code2uni_sb1_max) {
368 		si = s - mb_tbl_code2uni_sb1_min;
369 		if (si == 0x006e || (si >= 0x007a && si <= 0x0083)) {
370 			*snd =  mb_tbl_code2uni_sb1[si];
371 			if (*snd > 0xf000) {
372 				*snd += 0x10000;
373 			}
374 			w = 0x20E3;
375 		} else {
376 			w = mb_tbl_code2uni_sb1[si];
377 			if (w > 0xf000) {
378 				w += 0x10000;
379 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
380 				w += 0xf0000;
381 			}
382 		}
383 	} else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
384 		si = s - mb_tbl_code2uni_sb2_min;
385 		w = mb_tbl_code2uni_sb2[si];
386 		if (w > 0xf000) {
387 			w += 0x10000;
388 		} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
389 			w += 0xf0000;
390 		}
391 	} else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
392 		si = s - mb_tbl_code2uni_sb3_min;
393 		if (si >= 0x0069 && si <= 0x0072) {
394 			c = nflags_order_sb[si-0x0069];
395 			*snd = NFLAGS(nflags_s[c][0]); w = NFLAGS(nflags_s[c][1]);
396 		} else {
397 			w = mb_tbl_code2uni_sb3[si];
398 			if (w > 0xf000) {
399 				w += 0x10000;
400 			} else if (w > 0xe000) { /* unsupported by Unicode 6.0 */
401 				w += 0xf0000;
402 			}
403 		}
404 	}
405 	return w;
406 }
407 
408 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)409 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
410 {
411 	int i, match = 0, c1s;
412 
413 	if (filter->status == 1) {
414 		c1s = filter->cache;
415 		filter->cache = 0;
416 		filter->status = 0;
417 		if (c == 0x20E3) {
418 			if (c1s == 0x0023) {
419 				*s1 = 0x2964;
420 				match = 1;
421 			} else if (c1s == 0x0030) {
422 				*s1 = 0x296f;
423 				match = 1;
424 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
425 				*s1 = 0x2966 + (c1s - 0x0031);
426 				match = 1;
427 			}
428 		} else {
429 			CK((*filter->output_function)(c1s, filter->data));
430 		}
431 	} else {
432 		if (c == 0x0023 || (c >= 0x0030 && c<=0x0039)) {
433 			filter->status = 1;
434 			filter->cache = c;
435 			*s1 = -1;
436 			return match;
437 		}
438 
439 		if (c == 0x00A9) {
440 			*s1 = 0x29b5; match = 1;
441 		} else if (c == 0x00AE) {
442 			*s1 = 0x29ba; match = 1;
443 		} else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
444 			i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
445 			if (i >= 0) {
446 				*s1 = mb_tbl_uni_docomo2code2_value[i];
447 				match = 1;
448 			}
449 		} else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
450 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
451 			if (i >= 0) {
452 				*s1 = mb_tbl_uni_docomo2code3_value[i];
453 				match = 1;
454 			}
455 		} else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
456 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
457 			if (i >= 0) {
458 				*s1 = mb_tbl_uni_docomo2code5_val[i];
459 				match = 1;
460 			}
461 		}
462 	}
463 
464 	return match;
465 }
466 
467 int
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)468 mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
469 {
470 	int i, match = 0, c1s;
471 
472 	if (filter->status == 1) {
473 		c1s = filter->cache;
474 		filter->cache = 0;
475 		filter->status = 0;
476 		if (c == 0x20E3) {
477 			if (c1s == 0x0023) {
478 				*s1 = 0x25bc;
479 				match = 1;
480 			} else if (c1s == 0x0030) {
481 				*s1 = 0x2830;
482 				match = 1;
483 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
484 				*s1 = 0x27a6 + (c1s - 0x0031);
485 				match = 1;
486 			}
487 		} else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
488 			for (i=0; i<10; i++) {
489 				if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
490 					*s1 = nflags_code_kddi[i];
491 					match = 1;
492 					break;
493 				}
494 			}
495 		} else {
496 			if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
497 				c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
498 				CK((*filter->output_function)(c1s, filter->data));
499 			}
500 		}
501 	} else {
502 		if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) ||
503 			(c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
504 			filter->status = 1;
505 			filter->cache = c;
506 			*s1 = -1;
507 			return match;
508 		}
509 
510 		if (c == 0x00A9) {
511 			*s1 = 0x27dc; match = 1;
512 		} else if (c == 0x00AE) {
513 			*s1 = 0x27dd; match = 1;
514 		} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
515 			i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
516 			if (i >= 0) {
517 				*s1 = mb_tbl_uni_kddi2code2_value[i];
518 				match = 1;
519 			}
520 		} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
521 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
522 			if (i >= 0) {
523 				*s1 = mb_tbl_uni_kddi2code3_value[i];
524 				match = 1;
525 			}
526 		} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
527 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
528 			if (i >= 0) {
529 				*s1 = mb_tbl_uni_kddi2code5_val[i];
530 				match = 1;
531 			}
532 		}
533 	}
534 
535 	return match;
536 }
537 
538 int
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)539 mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
540 {
541 	int i, match = 0, c1s;
542 
543 	if (filter->status == 1) {
544 		filter->status = 0;
545 		c1s = filter->cache;
546 		filter->cache = 0;
547 		if (c == 0x20E3) {
548 			if (c1s == 0x0023) {
549 				*s1 = 0x2817;
550 				match = 1;
551 			} else if (c1s == 0x0030) {
552 				*s1 = 0x282c;
553 				match = 1;
554 			} else if (c1s >= 0x0031 && c1s <= 0x0039) {
555 				*s1 = 0x2823 + (c1s - 0x0031);
556 				match = 1;
557 			}
558 		} else if ((c >= NFLAGS(0x41) && c <= NFLAGS(0x5A)) && (c1s >= NFLAGS(0x41) && c1s <= NFLAGS(0x5A))) {
559 			for (i=0; i<10; i++) {
560 				if (c1s == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
561 					*s1 = nflags_code_sb[i];
562 					match = 1;
563 					break;
564 				}
565 			}
566 		} else {
567 			if (c1s >= ucs_a1_jis_table_min && c1s < ucs_a1_jis_table_max) {
568 				c1s = ucs_a1_jis_table[c1s - ucs_a1_jis_table_min];
569 				CK((*filter->output_function)(c1s, filter->data));
570 			}
571 		}
572 	} else {
573 		if (c == 0x0023 || ( c >= 0x0030 && c<=0x0039) || (c >= NFLAGS(0x41) && c<= NFLAGS(0x5A))) {
574 			filter->status = 1;
575 			filter->cache = c;
576 			*s1 = -1;
577 			return match;
578 		}
579 
580 		if (c == 0x00A9) {
581 			*s1 = 0x2855; match = 1;
582 		} else if (c == 0x00AE) {
583 			*s1 = 0x2856; match = 1;
584 		} else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
585 			i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
586 			if (i >= 0) {
587 				*s1 = mb_tbl_uni_sb2code2_value[i];
588 				match = 1;
589 			}
590 		} else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
591 			i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
592 			if (i >= 0) {
593 				*s1 = mb_tbl_uni_sb2code3_value[i];
594 				match = 1;
595 			}
596 		} else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
597 			i = mbfl_bisec_srch2(c - 0xf0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
598 			if (i >= 0) {
599 				*s1 = mb_tbl_uni_sb2code5_val[i];
600 				match = 1;
601 			}
602 		}
603 	}
604 	return match;
605 }
606 
607 /*
608  * SJIS-win => wchar
609  */
610 int
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)611 mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
612 {
613 	int c1, s, s1 = 0, s2 = 0, w;
614 	int snd = 0;
615 
616 retry:
617 	switch (filter->status) {
618 	case 0:
619 		if (c >= 0 && c < 0x80) {	/* latin */
620 			if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb && c == 0x1b) {
621 				filter->cache = c;
622 				filter->status = 2;
623 			} else {
624 				CK((*filter->output_function)(c, filter->data));
625 			}
626 		} else if (c > 0xa0 && c < 0xe0) {	/* kana */
627 			CK((*filter->output_function)(0xfec0 + c, filter->data));
628 		} else if (c > 0x80 && c < 0xfd && c != 0xa0) {	/* kanji first char */
629 			filter->status = 1;
630 			filter->cache = c;
631 		} else {
632 			w = c & MBFL_WCSGROUP_MASK;
633 			w |= MBFL_WCSGROUP_THROUGH;
634 			CK((*filter->output_function)(w, filter->data));
635 		}
636 		break;
637 
638 	case 1:		/* kanji second char */
639 		filter->status = 0;
640 		c1 = filter->cache;
641 		if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
642 			w = 0;
643 			SJIS_DECODE(c1, c, s1, s2);
644 			s = (s1 - 0x21)*94 + s2 - 0x21;
645 			if (s <= 137) {
646 				if (s == 31) {
647 					w = 0xff3c;			/* FULLWIDTH REVERSE SOLIDUS */
648 				} else if (s == 32) {
649 					w = 0xff5e;			/* FULLWIDTH TILDE */
650 				} else if (s == 33) {
651 					w = 0x2225;			/* PARALLEL TO */
652 				} else if (s == 60) {
653 					w = 0xff0d;			/* FULLWIDTH HYPHEN-MINUS */
654 				} else if (s == 80) {
655 					w = 0xffe0;			/* FULLWIDTH CENT SIGN */
656 				} else if (s == 81) {
657 					w = 0xffe1;			/* FULLWIDTH POUND SIGN */
658 				} else if (s == 137) {
659 					w = 0xffe2;			/* FULLWIDTH NOT SIGN */
660 				}
661 			}
662 			if (w == 0) {
663 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {		/* vendor ext1 (13ku) */
664 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
665 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {		/* X 0208 */
666 					w = jisx0208_ucs_table[s];
667 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {		/* vendor ext2 (89ku - 92ku) */
668 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
669 				} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {		/* vendor ext3 (115ku - 119ku) */
670 					w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
671 				} else if (s >= (94*94) && s < (114*94)) {		/* user (95ku - 114ku) */
672 					w = s - (94*94) + 0xe000;
673 				}
674 
675  				if (s >= (94*94) && s < 119*94) {
676 					if (filter->from->no_encoding == mbfl_no_encoding_sjis_docomo) {
677 						w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
678 					} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_kddi) {
679 						w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
680 					} else if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
681 						w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
682 					}
683 
684 					if (w > 0  && snd > 0) {
685 						CK((*filter->output_function)(snd, filter->data));
686 					}
687 				}
688 			}
689 			if (w <= 0) {
690 				w = (s1 << 8) | s2;
691 				w &= MBFL_WCSPLANE_MASK;
692 				w |= MBFL_WCSPLANE_WINCP932;
693 			}
694 			CK((*filter->output_function)(w, filter->data));
695 		} else if ((c >= 0 && c < 0x21) || c == 0x7f) {		/* CTLs */
696 			CK((*filter->output_function)(c, filter->data));
697 		} else {
698 			w = (c1 << 8) | c;
699 			w &= MBFL_WCSGROUP_MASK;
700 			w |= MBFL_WCSGROUP_THROUGH;
701 			CK((*filter->output_function)(w, filter->data));
702 		}
703 		break;
704 	/* ESC : Softbank Emoji */
705 	case 2:
706 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
707 			c == 0x24) {
708 				filter->cache = c;
709 				filter->status++;
710 		} else {
711 			filter->cache = 0;
712 			filter->status = 0;
713 			CK((*filter->output_function)(0x1b, filter->data));
714 			goto retry;
715 		}
716 		break;
717 
718 	/* ESC $ : Softbank Emoji */
719 	case 3:
720 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb &&
721 			((c >= 0x45 && c <= 0x47) || (c >= 0x4f && c <= 0x51))) {
722 				filter->cache = c;
723 				filter->status++;
724 		} else {
725 			filter->cache = 0;
726 			filter->status = 0;
727 			CK((*filter->output_function)(0x1b, filter->data));
728 			CK((*filter->output_function)(0x24, filter->data));
729 			goto retry;
730 		}
731 		break;
732 
733 	/* ESC [GEFOPQ] : Softbank Emoji */
734 	case 4:
735 		w = 0;
736 		if (filter->from->no_encoding == mbfl_no_encoding_sjis_sb) {
737 			c1 = filter->cache;
738 
739 			if (c == 0x0f) {
740 				w = c;
741 				filter->cache = 0;
742 				filter->status = 0;
743 			} else {
744 				if (c1 == 0x47 && c >= 0x21 && c <= 0x7a) {
745 					s1 = 0x91; s2 = c;
746 				} else if (c1 == 0x45 && c >= 0x21 && c <= 0x7a) {
747 					s1 = 0x8d; s2 = c;
748 				} else if (c1 == 0x46 && c >= 0x21 && c <= 0x7a) {
749 					s1 = 0x8e; s2 = c;
750 				} else if (c1 == 0x4f && c >= 0x21 && c <= 0x6d) {
751 					s1 = 0x92; s2 = c;
752 				} else if (c1 == 0x50 && c >= 0x21 && c <= 0x6c) {
753 					s1 = 0x95; s2 = c;
754 				} else if (c1 == 0x51 && c >= 0x21 && c <= 0x5e) {
755 					s1 = 0x96; s2 = c;
756 				}
757 				s  = (s1 - 0x21)*94 + s2 - 0x21;
758 				w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
759 				if (w > 0) {
760 					if (snd > 0) {
761 						CK((*filter->output_function)(snd, filter->data));
762 					}
763 					CK((*filter->output_function)(w, filter->data));
764 				}
765 			}
766 		}
767 
768 		if (w <= 0) {
769 			c1 = filter->cache;
770 			filter->cache = 0;
771 			filter->status = 0;
772 			CK((*filter->output_function)(0x1b, filter->data));
773 			CK((*filter->output_function)(0x24, filter->data));
774 			CK((*filter->output_function)(c1 & 0xff, filter->data));
775 			goto retry;
776 		}
777 		break;
778 
779 	default:
780 		filter->status = 0;
781 		break;
782 	}
783 
784 	return c;
785 }
786 
787 /*
788  * wchar => SJIS-win
789  */
790 int
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)791 mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
792 {
793 	int c1, c2, s1, s2;
794 
795 	s1 = 0;
796 	s2 = 0;
797 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
798 		s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
799 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
800 		s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
801 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
802 		s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
803 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
804 		s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
805 	} else if (c >= 0xe000 && c < (0xe000 + 20*94)) {	/* user  (95ku - 114ku) */
806 		s1 = c - 0xe000;
807 		c1 = s1/94 + 0x7f;
808 		c2 = s1%94 + 0x21;
809 		s1 = (c1 << 8) | c2;
810 		s2 = 1;
811 	}
812 	if (s1 <= 0) {
813 		c1 = c & ~MBFL_WCSPLANE_MASK;
814 		if (c1 == MBFL_WCSPLANE_WINCP932) {
815 			s1 = c & MBFL_WCSPLANE_MASK;
816 			s2 = 1;
817 		} else if (c1 == MBFL_WCSPLANE_JIS0208) {
818 			s1 = c & MBFL_WCSPLANE_MASK;
819 		} else if (c1 == MBFL_WCSPLANE_JIS0212) {
820 			s1 = c & MBFL_WCSPLANE_MASK;
821 			s1 |= 0x8080;
822 		} else if (c == 0xa5) {		/* YEN SIGN */
823 			s1 = 0x216f;	/* FULLWIDTH YEN SIGN */
824 		} else if (c == 0x203e) {	/* OVER LINE */
825 			s1 = 0x2131;	/* FULLWIDTH MACRON */
826 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
827 			s1 = 0x2140;
828 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
829 			s1 = 0x2141;
830 		} else if (c == 0x2225) {	/* PARALLEL TO */
831 			s1 = 0x2142;
832 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
833 			s1 = 0x215d;
834 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
835 			s1 = 0x2171;
836 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
837 			s1 = 0x2172;
838 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
839 			s1 = 0x224c;
840 		}
841 	}
842 
843 	if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {	/* not found or X 0212 */
844 		s1 = -1;
845 		c1 = 0;
846 		c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
847 		while (c1 < c2) {		/* CP932 vendor ext1 (13ku) */
848 			if (c == cp932ext1_ucs_table[c1]) {
849 				s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
850 				break;
851 			}
852 			c1++;
853 		}
854 		if (s1 <= 0) {
855 			c1 = 0;
856 			c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
857 			while (c1 < c2) {		/* CP932 vendor ext2 (115ku - 119ku) */
858 				if (c == cp932ext2_ucs_table[c1]) {
859 					s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
860 					break;
861 				}
862 				c1++;
863 			}
864 		}
865 
866 		if (s1 <= 0) {
867 			c1 = 0;
868 			c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
869 			while (c1 < c2) {		/* CP932 vendor ext3 (115ku - 119ku) */
870 				if (c == cp932ext3_ucs_table[c1]) {
871 					s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
872 					break;
873 				}
874 				c1++;
875 			}
876 		}
877 		if (c == 0) {
878 			s1 = 0;
879 		} else if (s1 <= 0) {
880 			s1 = -1;
881 		}
882 	}
883 
884  	if ((filter->to->no_encoding == mbfl_no_encoding_sjis_docomo &&
885 		 mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
886 		(filter->to->no_encoding == mbfl_no_encoding_sjis_kddi &&
887 		 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
888 		(filter->to->no_encoding == mbfl_no_encoding_sjis_sb &&
889 		 mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 )) {
890 		CODE2JIS(c1,c2,s1,s2);
891  	}
892 
893 	if (filter->status == 1 && filter->cache > 0) {
894 		return c;
895 	}
896 
897 	if (s1 >= 0) {
898 		if (s1 < 0x100) { /* latin or kana */
899 			CK((*filter->output_function)(s1, filter->data));
900 		} else { /* kanji */
901 			c1 = (s1 >> 8) & 0xff;
902 			c2 = s1 & 0xff;
903 			SJIS_ENCODE(c1, c2, s1, s2);
904 			CK((*filter->output_function)(s1, filter->data));
905 			CK((*filter->output_function)(s2, filter->data));
906 		}
907 	} else {
908 		CK(mbfl_filt_conv_illegal_output(c, filter));
909 	}
910 
911 	return c;
912 }
913 
914 int
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)915 mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
916 {
917 	int c1 = filter->cache;
918 	if (filter->status == 1 && (c1 == 0x0023 || (c1 >= 0x0030 && c1<=0x0039))) {
919 		CK((*filter->output_function)(c1, filter->data));
920 	}
921 	filter->status = 0;
922 	filter->cache = 0;
923 
924 	if (filter->flush_function != NULL) {
925 		return (*filter->flush_function)(filter->data);
926 	}
927 
928 	return 0;
929 }
930