1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * the source code included in this files was separated from mbfilter_sjis_open.c
26  * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_sjis_mobile.h"
32 
33 #include "unicode_table_cp932_ext.h"
34 #include "unicode_table_jis.h"
35 
36 #include "emoji2uni.h"
37 
38 const unsigned char mblen_table_sjis_mobile[] = { /* 0x81-0x9F,0xE0-0xFC */
39   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
55 };
56 
57 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
58 
59 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter);
60 static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
61 static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
62 static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
63 static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
64 static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
65 static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
66 
67 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
68 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
69 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
70 
71 const mbfl_encoding mbfl_encoding_sjis_docomo = {
72 	mbfl_no_encoding_sjis_docomo,
73 	"SJIS-Mobile#DOCOMO",
74 	"Shift_JIS",
75 	mbfl_encoding_sjis_docomo_aliases,
76 	mblen_table_sjis_mobile,
77 	MBFL_ENCTYPE_GL_UNSAFE,
78 	&vtbl_sjis_docomo_wchar,
79 	&vtbl_wchar_sjis_docomo,
80 	mb_sjis_docomo_to_wchar,
81 	mb_wchar_to_sjis_docomo,
82 	NULL
83 };
84 
85 const mbfl_encoding mbfl_encoding_sjis_kddi = {
86 	mbfl_no_encoding_sjis_kddi,
87 	"SJIS-Mobile#KDDI",
88 	"Shift_JIS",
89 	mbfl_encoding_sjis_kddi_aliases,
90 	mblen_table_sjis_mobile,
91 	MBFL_ENCTYPE_GL_UNSAFE,
92 	&vtbl_sjis_kddi_wchar,
93 	&vtbl_wchar_sjis_kddi,
94 	mb_sjis_kddi_to_wchar,
95 	mb_wchar_to_sjis_kddi,
96 	NULL
97 };
98 
99 const mbfl_encoding mbfl_encoding_sjis_sb = {
100 	mbfl_no_encoding_sjis_sb,
101 	"SJIS-Mobile#SOFTBANK",
102 	"Shift_JIS",
103 	mbfl_encoding_sjis_sb_aliases,
104 	mblen_table_sjis_mobile,
105 	MBFL_ENCTYPE_GL_UNSAFE,
106 	&vtbl_sjis_sb_wchar,
107 	&vtbl_wchar_sjis_sb,
108 	mb_sjis_sb_to_wchar,
109 	mb_wchar_to_sjis_sb,
110 	NULL
111 };
112 
113 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
114 	mbfl_no_encoding_sjis_docomo,
115 	mbfl_no_encoding_wchar,
116 	mbfl_filt_conv_common_ctor,
117 	NULL,
118 	mbfl_filt_conv_sjis_mobile_wchar,
119 	mbfl_filt_conv_sjis_wchar_flush,
120 	NULL,
121 };
122 
123 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
124 	mbfl_no_encoding_wchar,
125 	mbfl_no_encoding_sjis_docomo,
126 	mbfl_filt_conv_common_ctor,
127 	NULL,
128 	mbfl_filt_conv_wchar_sjis_mobile,
129 	mbfl_filt_conv_sjis_mobile_flush,
130 	NULL,
131 };
132 
133 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
134 	mbfl_no_encoding_sjis_kddi,
135 	mbfl_no_encoding_wchar,
136 	mbfl_filt_conv_common_ctor,
137 	NULL,
138 	mbfl_filt_conv_sjis_mobile_wchar,
139 	mbfl_filt_conv_sjis_wchar_flush,
140 	NULL,
141 };
142 
143 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
144 	mbfl_no_encoding_wchar,
145 	mbfl_no_encoding_sjis_kddi,
146 	mbfl_filt_conv_common_ctor,
147 	NULL,
148 	mbfl_filt_conv_wchar_sjis_mobile,
149 	mbfl_filt_conv_sjis_mobile_flush,
150 	NULL,
151 };
152 
153 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
154 	mbfl_no_encoding_sjis_sb,
155 	mbfl_no_encoding_wchar,
156 	mbfl_filt_conv_common_ctor,
157 	NULL,
158 	mbfl_filt_conv_sjis_mobile_wchar,
159 	mbfl_filt_conv_sjis_wchar_flush,
160 	NULL,
161 };
162 
163 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
164 	mbfl_no_encoding_wchar,
165 	mbfl_no_encoding_sjis_sb,
166 	mbfl_filt_conv_common_ctor,
167 	NULL,
168 	mbfl_filt_conv_wchar_sjis_mobile,
169 	mbfl_filt_conv_sjis_mobile_flush,
170 	NULL,
171 };
172 
173 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
174 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
175 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
176 
177 const unsigned short mbfl_docomo2uni_pua[4][3] = {
178 	{0x28c2, 0x292f, 0xe63e},
179 	{0x2930, 0x2934, 0xe6ac},
180 	{0x2935, 0x2951, 0xe6b1},
181 	{0x2952, 0x29db, 0xe6ce},
182 };
183 
184 const unsigned short mbfl_kddi2uni_pua[7][3] = {
185 	{0x26ec, 0x2838, 0xe468},
186 	{0x284c, 0x2863, 0xe5b5},
187 	{0x24b8, 0x24ca, 0xe5cd},
188 	{0x24cb, 0x2545, 0xea80},
189 	{0x2839, 0x284b, 0xeafb},
190 	{0x2546, 0x25c0, 0xeb0e},
191 	{0x25c1, 0x25c6, 0xeb89},
192 };
193 
194 const unsigned short mbfl_sb2uni_pua[6][3] = {
195 	{0x27a9, 0x2802, 0xe101},
196 	{0x2808, 0x2861, 0xe201},
197 	{0x2921, 0x297a, 0xe001},
198 	{0x2980, 0x29cc, 0xe301},
199 	{0x2a99, 0x2ae4, 0xe401},
200 	{0x2af8, 0x2b35, 0xe501},
201 };
202 
203 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
204 	{0x24b8, 0x24f6, 0xec40},
205 	{0x24f7, 0x2573, 0xec80},
206 	{0x2574, 0x25b2, 0xed40},
207 	{0x25b3, 0x25c6, 0xed80},
208 	{0x26ec, 0x272a, 0xef40},
209 	{0x272b, 0x27a7, 0xef80},
210 	{0x27a8, 0x27e6, 0xf040},
211 	{0x27e7, 0x2863, 0xf080},
212 };
213 
214 /* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
215  * These correspond to the letters A-Z
216  * To display the flag emoji for a country, two unicode codepoints are combined,
217  * which correspond to the two-letter code for that country
218  * This macro converts uppercase ASCII values to Regional Indicator codepoints */
219 #define NFLAGS(c) (0x1F1A5+(int)(c))
220 
221 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
222 
223 #define SJIS_ENCODE(c1,c2,s1,s2) \
224 		do { \
225 			s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
226 			s2 = c2; \
227 			if ((c1) & 1) { \
228 				if ((c2) < 0x60) { \
229 					s2--; \
230 				} \
231 				s2 += 0x20; \
232 			} else { \
233 				s2 += 0x7e; \
234 			} \
235 		} while (0)
236 
237 #define SJIS_DECODE(c1,c2,s1,s2) \
238 		do { \
239 			if (c1 < 0xa0) { \
240 				s1 = ((c1 - 0x81) << 1) + 0x21; \
241 			} else { \
242 				s1 = ((c1 - 0xc1) << 1) + 0x21; \
243 			} \
244 			s2 = c2; \
245 			if (c2 < 0x9f) { \
246 				if (c2 < 0x7f) { \
247 					s2++; \
248 				} \
249 				s2 -= 0x20; \
250 			} else { \
251 				s1++; \
252 				s2 -= 0x7e; \
253 			} \
254 		} while (0)
255 
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)256 int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
257 {
258 	for (int i = 0; i < n; i++) {
259 		if (map[i][0] <= c && c <= map[i][1]) {
260 			*w = c - map[i][0] + map[i][2];
261 			return 1;
262 		}
263 	}
264 	return 0;
265 }
266 
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)267 int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
268 {
269 	/* Convert in reverse direction */
270 	for (int i = 0; i < n; i++) {
271 		if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
272 			*w = c + map[i][0] - map[i][2];
273 			return 1;
274 		}
275 	}
276 	return 0;
277 }
278 
279 /* number -> (ku*94)+ten value for telephone keypad character */
280 #define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n)))
281 #define DOCOMO_KEYPAD_HASH 0x2964
282 
283 #define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
284 
285 /* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits
286  * in our tables. Therefore, add 0x10000 to recover the true values.
287  *
288  * Again, for some emoji which are not supported by Unicode, we use codepoints
289  * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the
290  * true value. */
convert_emoji_cp(int cp)291 static inline int convert_emoji_cp(int cp)
292 {
293 	if (cp > 0xF000)
294 		return cp + 0x10000;
295 	else if (cp > 0xE000)
296 		return cp + 0xF0000;
297 	return cp;
298 }
299 
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)300 int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
301 {
302 	/* All three mobile vendors had emoji for numbers on a telephone keypad
303 	 * Unicode doesn't have those, but it has a combining character which puts
304 	 * a 'keypad button' around the following character, making it look like
305 	 * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */
306 	if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
307 		if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) {
308 			EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]));
309 		} else {
310 			*snd = 0;
311 			return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]);
312 		}
313 	}
314 	return 0;
315 }
316 
317 #define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)
318 
319 static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"};
320 
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)321 int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
322 {
323 	if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
324 		if (s == 0x24C0) { /* Spain */
325 			EMIT_FLAG_EMOJI("ES");
326 		} else if (s == 0x24C1) { /* Russia */
327 			EMIT_FLAG_EMOJI("RU");
328 		} else if (s >= 0x2545 && s <= 0x254A) {
329 			EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]);
330 		} else if (s == 0x25BC) {
331 			EMIT_KEYPAD_EMOJI('#');
332 		} else {
333 			*snd = 0;
334 			return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]);
335 		}
336 	} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
337 		if (s == 0x2750) { /* Japan */
338 			EMIT_FLAG_EMOJI("JP");
339 		} else if (s >= 0x27A6 && s <= 0x27AE) {
340 			EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1');
341 		} else if (s == 0x27F7) { /* United States */
342 			EMIT_FLAG_EMOJI("US");
343 		} else if (s == 0x2830) {
344 			EMIT_KEYPAD_EMOJI('0');
345 		} else {
346 			*snd = 0;
347 			return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]);
348 		}
349 	}
350 	return 0;
351 }
352 
353 static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"};
354 
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)355 int mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
356 {
357 	if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
358 		if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) {
359 			EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
360 		} else {
361 			*snd = 0;
362 			return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
363 		}
364 	} else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
365 		*snd = 0;
366 		return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]);
367 	} else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
368 		if (s >= 0x2B02 && s <= 0x2B0B) {
369 			EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]);
370 		} else {
371 			*snd = 0;
372 			return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]);
373 		}
374 	}
375 	return 0;
376 }
377 
378 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)379 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
380 {
381 	/* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji
382 	 * to a sequence of 2 codepoints, one of which is a combining character which
383 	 * adds the 'key' image around the other
384 	 *
385 	 * In the other direction, look for such sequences and convert them to a
386 	 * single emoji */
387 	if (filter->status == 1) {
388 		int c1 = filter->cache;
389 		filter->cache = filter->status = 0;
390 		if (c == 0x20E3) {
391 			if (c1 == '#') {
392 				*s1 = 0x2964;
393 			} else if (c1 == '0') {
394 				*s1 = 0x296F;
395 			} else { /* Previous character was '1'-'9' */
396 				*s1 = 0x2966 + (c1 - '1');
397 			}
398 			return 1;
399 		} else {
400 			/* This character wasn't combining character to make keypad symbol,
401 			 * so pass the previous character through... and proceed to process the
402 			 * current character as usual
403 			 * (Single-byte ASCII characters are valid in Shift-JIS...) */
404 			CK((*filter->output_function)(c1, filter->data));
405 		}
406 	}
407 
408 	if (c == '#' || (c >= '0' && c <= '9')) {
409 		filter->status = 1;
410 		filter->cache = c;
411 		return 0;
412 	}
413 
414 	if (c == 0xA9) { /* Copyright sign */
415 		*s1 = 0x29B5;
416 		return 1;
417 	} else if (c == 0x00AE) { /* Registered sign */
418 		*s1 = 0x29BA;
419 		return 1;
420 	} else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
421 		int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
422 		if (i >= 0) {
423 			*s1 = mb_tbl_uni_docomo2code2_value[i];
424 			return 1;
425 		}
426 	} else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
427 		int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
428 		if (i >= 0) {
429 			*s1 = mb_tbl_uni_docomo2code3_value[i];
430 			return 1;
431 		}
432 	} else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
433 		int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
434 		if (i >= 0) {
435 			*s1 = mb_tbl_uni_docomo2code5_val[i];
436 			return 1;
437 		}
438 	}
439 	return 0;
440 }
441 
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)442 int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
443 {
444 	if (filter->status == 1) {
445 		int c1 = filter->cache;
446 		filter->cache = filter->status = 0;
447 		if (c == 0x20E3) {
448 			if (c1 == '#') {
449 				*s1 = 0x25BC;
450 			} else if (c1 == '0') {
451 				*s1 = 0x2830;
452 			} else { /* Previous character was '1'-'9' */
453 				*s1 = 0x27a6 + (c1 - '1');
454 			}
455 			return 1;
456 		} else {
457 			CK((*filter->output_function)(c1, filter->data));
458 		}
459 	} else if (filter->status == 2) {
460 		int c1 = filter->cache;
461 		filter->cache = filter->status = 0;
462 		if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
463 			for (int i = 0; i < 10; i++) {
464 				if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
465 					*s1 = nflags_code_kddi[i];
466 					return 1;
467 				}
468 			}
469 		}
470 
471 		/* If none of the KDDI national flag emoji matched, then we have no way
472 		 * to convert the previous codepoint... */
473 		CK(mbfl_filt_conv_illegal_output(c1, filter));
474 	}
475 
476 	if (c == '#' || (c >= '0' && c <= '9')) {
477 		filter->status = 1;
478 		filter->cache = c;
479 		return 0;
480 	} else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
481 		filter->status = 2;
482 		filter->cache = c;
483 		return 0;
484 	}
485 
486 	if (c == 0xA9) { /* Copyright sign */
487 		*s1 = 0x27DC;
488 		return 1;
489 	} else if (c == 0xAE) { /* Registered sign */
490 		*s1 = 0x27DD;
491 		return 1;
492 	} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
493 		int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
494 		if (i >= 0) {
495 			*s1 = mb_tbl_uni_kddi2code2_value[i];
496 			return 1;
497 		}
498 	} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
499 		int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
500 		if (i >= 0) {
501 			*s1 = mb_tbl_uni_kddi2code3_value[i];
502 			return 1;
503 		}
504 	} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
505 		int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
506 		if (i >= 0) {
507 			*s1 = mb_tbl_uni_kddi2code5_val[i];
508 			return 1;
509 		}
510 	}
511 	return 0;
512 }
513 
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)514 int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
515 {
516 	if (filter->status == 1) {
517 		int c1 = filter->cache;
518 		filter->cache = filter->status = 0;
519 		if (c == 0x20E3) {
520 			if (c1 == '#') {
521 				*s1 = 0x2817;
522 			} else if (c1 == '0') {
523 				*s1 = 0x282c;
524 			} else { /* Previous character was '1'-'9' */
525 				*s1 = 0x2823 + (c1 - '1');
526 			}
527 			return 1;
528 		} else {
529 			CK((*filter->output_function)(c1, filter->data));
530 		}
531 	} else if (filter->status == 2) {
532 		int c1 = filter->cache;
533 		filter->cache = filter->status = 0;
534 		if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
535 			for (int i = 0; i < 10; i++) {
536 				if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
537 					*s1 = nflags_code_sb[i];
538 					return 1;
539 				}
540 			}
541 		}
542 
543 		/* If none of the SoftBank national flag emoji matched, then we have no way
544 		 * to convert the previous codepoint... */
545 		CK(mbfl_filt_conv_illegal_output(c1, filter));
546 	}
547 
548 	if (c == '#' || (c >= '0' && c <= '9')) {
549 		filter->status = 1;
550 		filter->cache = c;
551 		return 0;
552 	} else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
553 		filter->status = 2;
554 		filter->cache = c;
555 		return 0;
556 	}
557 
558 	if (c == 0xA9) { /* Copyright sign */
559 		*s1 = 0x2855;
560 		return 1;
561 	} else if (c == 0xAE) { /* Registered sign */
562 		*s1 = 0x2856;
563 		return 1;
564 	} else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
565 		int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
566 		if (i >= 0) {
567 			*s1 = mb_tbl_uni_sb2code2_value[i];
568 			return 1;
569 		}
570 	} else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
571 		int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
572 		if (i >= 0) {
573 			*s1 = mb_tbl_uni_sb2code3_value[i];
574 			return 1;
575 		}
576 	} else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
577 		int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
578 		if (i >= 0) {
579 			*s1 = mb_tbl_uni_sb2code5_val[i];
580 			return 1;
581 		}
582 	}
583 	return 0;
584 }
585 
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)586 int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
587 {
588 	int c1, s, s1, s2, w, snd = 0;
589 
590 	switch (filter->status) {
591 	case 0:
592 		if (c >= 0 && c < 0x80) { /* ASCII */
593 			if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) {
594 				/* ESC; escape sequences were used on older SoftBank phones for emoji */
595 				filter->cache = c;
596 				filter->status = 2;
597 			} else {
598 				CK((*filter->output_function)(c, filter->data));
599 			}
600 		} else if (c > 0xA0 && c < 0xE0) { /* Kana */
601 			CK((*filter->output_function)(0xFEC0 + c, filter->data));
602 		} else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */
603 			filter->status = 1;
604 			filter->cache = c;
605 		} else {
606 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
607 		}
608 		break;
609 
610 	case 1: /* Kanji, second byte */
611 		filter->status = 0;
612 		c1 = filter->cache;
613 		if (c >= 0x40 && c <= 0xFC && c != 0x7F) {
614 			w = 0;
615 			SJIS_DECODE(c1, c, s1, s2);
616 			s = ((s1 - 0x21) * 94) + s2 - 0x21;
617 			if (s <= 137) {
618 				if (s == 31) {
619 					w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
620 				} else if (s == 32) {
621 					w = 0xFF5E; /* FULLWIDTH TILDE */
622 				} else if (s == 33) {
623 					w = 0x2225; /* PARALLEL TO */
624 				} else if (s == 60) {
625 					w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
626 				} else if (s == 80) {
627 					w = 0xFFE0; /* FULLWIDTH CENT SIGN */
628 				} else if (s == 81) {
629 					w = 0xFFE1; /* FULLWIDTH POUND SIGN */
630 				} else if (s == 137) {
631 					w = 0xFFE2; /* FULLWIDTH NOT SIGN */
632 				}
633 			}
634 			if (w == 0) {
635 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {		/* vendor ext1 (13ku) */
636 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
637 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {		/* X 0208 */
638 					w = jisx0208_ucs_table[s];
639 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {		/* vendor ext2 (89ku - 92ku) */
640 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
641 				}
642 
643 				/* Emoji */
644 				if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
645 					w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
646 					if (snd > 0) {
647 						CK((*filter->output_function)(snd, filter->data));
648 					}
649 				} else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) {
650 					w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
651 					if (snd > 0) {
652 						CK((*filter->output_function)(snd, filter->data));
653 					}
654 				} else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) {
655 					w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
656 					if (snd > 0) {
657 						CK((*filter->output_function)(snd, filter->data));
658 					}
659 				}
660 
661 				if (w == 0) {
662 					if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
663 						w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
664 					} else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
665 						w = s - (94*94) + 0xe000;
666 					}
667 				}
668 			}
669 			if (w <= 0) {
670 				w = MBFL_BAD_INPUT;
671 			}
672 			CK((*filter->output_function)(w, filter->data));
673 		} else {
674 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
675 		}
676 		break;
677 
678 	/* ESC: Softbank Emoji */
679 	case 2:
680 		if (c == '$') {
681 			filter->cache = c;
682 			filter->status++;
683 		} else {
684 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
685 			filter->status = filter->cache = 0;
686 		}
687 		break;
688 
689 	/* ESC $: Softbank Emoji */
690 	case 3:
691 		if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) {
692 			filter->cache = c;
693 			filter->status++;
694 		} else {
695 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
696 			filter->status = filter->cache = 0;
697 		}
698 		break;
699 
700 	/* ESC $ [GEFOPQ]: Softbank Emoji */
701 	case 4:
702 		c1 = filter->cache;
703 		if (c == 0xF) { /* Terminate sequence of emoji */
704 			filter->status = filter->cache = 0;
705 			return 0;
706 		} else {
707 			if (c1 == 'G' && c >= 0x21 && c <= 0x7a) {
708 				s1 = (0x91 - 0x21) * 94;
709 			} else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) {
710 				s1 = (0x8D - 0x21) * 94;
711 			} else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) {
712 				s1 = (0x8E - 0x21) * 94;
713 			} else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) {
714 				s1 = (0x92 - 0x21) * 94;
715 			} else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) {
716 				s1 = (0x95 - 0x21) * 94;
717 			} else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) {
718 				s1 = (0x96 - 0x21) * 94;
719 			} else {
720 				CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
721 				filter->status = filter->cache = 0;
722 				return 0;
723 			}
724 
725 			w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd);
726 			if (w > 0) {
727 				if (snd > 0) {
728 					CK((*filter->output_function)(snd, filter->data));
729 				}
730 				CK((*filter->output_function)(w, filter->data));
731 			} else {
732 				CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
733 				filter->status = filter->cache = 0;
734 			}
735 		}
736 	}
737 
738 	return 0;
739 }
740 
mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter * filter)741 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
742 {
743 	if (filter->status && filter->status != 4) {
744 		(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
745 	}
746 	filter->status = 0;
747 
748 	if (filter->flush_function) {
749 		(*filter->flush_function)(filter->data);
750 	}
751 
752 	return 0;
753 }
754 
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)755 int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
756 {
757 	int c1, c2, s1 = 0, s2 = 0;
758 
759 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
760 		s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
761 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
762 		s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
763 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
764 		s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
765 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
766 		s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
767 	} else if (c >= 0xE000 && c < (0xE000 + 20*94)) {
768 		/* Private User Area (95ku - 114ku) */
769 		s1 = c - 0xE000;
770 		c1 = (s1 / 94) + 0x7F;
771 		c2 = (s1 % 94) + 0x21;
772 		s1 = (c1 << 8) | c2;
773 		s2 = 1;
774 	}
775 
776 	if (s1 <= 0) {
777 		if (c == 0xA5) { /* YEN SIGN */
778 			s1 = 0x216F; /* FULLWIDTH YEN SIGN */
779 		} else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */
780 			s1 = 0x2140;
781 		} else if (c == 0x2225) { /* PARALLEL TO */
782 			s1 = 0x2142;
783 		} else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
784 			s1 = 0x215D;
785 		} else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
786 			s1 = 0x2171;
787 		} else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
788 			s1 = 0x2172;
789 		} else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
790 			s1 = 0x224C;
791 		}
792 	}
793 
794 	if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {	/* not found or X 0212 */
795 		s1 = -1;
796 
797 		/* CP932 vendor ext1 (13ku) */
798 		for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
799 			if (c == cp932ext1_ucs_table[c1]) {
800 				s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
801 				break;
802 			}
803 		}
804 
805 		if (s1 <= 0) {
806 			/* CP932 vendor ext2 (115ku - 119ku) */
807 			for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) {
808 				if (c == cp932ext2_ucs_table[c1]) {
809 					s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21;
810 					break;
811 				}
812 			}
813 		}
814 
815 		if (c == 0) {
816 			s1 = 0;
817 		}
818 	}
819 
820 	if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
821 		  (filter->to == &mbfl_encoding_sjis_kddi   && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
822 		  (filter->to == &mbfl_encoding_sjis_sb     && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0)) {
823 		s1 = (((s1 / 94) + 0x21) << 8) | ((s1 % 94) + 0x21);
824  	}
825 
826 	if (filter->status) {
827 		return 0;
828 	}
829 
830 	if (s1 >= 0) {
831 		if (s1 < 0x100) { /* Latin/Kana */
832 			CK((*filter->output_function)(s1, filter->data));
833 		} else { /* Kanji */
834 			c1 = (s1 >> 8) & 0xff;
835 			c2 = s1 & 0xff;
836 			SJIS_ENCODE(c1, c2, s1, s2);
837 			CK((*filter->output_function)(s1, filter->data));
838 			CK((*filter->output_function)(s2, filter->data));
839 		}
840 	} else {
841 		CK(mbfl_filt_conv_illegal_output(c, filter));
842 	}
843 
844 	return 0;
845 }
846 
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)847 int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
848 {
849 	int c1 = filter->cache;
850 	if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
851 		filter->cache = filter->status = 0;
852 		CK((*filter->output_function)(c1, filter->data));
853 	} else if (filter->status == 2) {
854 		/* First of a pair of Regional Indicator codepoints came at the end of a string */
855 		filter->cache = filter->status = 0;
856 		CK(mbfl_filt_conv_illegal_output(c1, filter));
857 	}
858 
859 	if (filter->flush_function) {
860 		(*filter->flush_function)(filter->data);
861 	}
862 
863 	return 0;
864 }
865 
mb_sjis_docomo_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)866 static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
867 {
868 	unsigned char *p = *in, *e = p + *in_len;
869 	/* Leave one extra space available in output buffer, since some iterations of
870 	 * main loop (below) may emit two wchars */
871 	uint32_t *out = buf, *limit = buf + bufsize - 1;
872 
873 	while (p < e && out < limit) {
874 		unsigned char c = *p++;
875 
876 		if (c <= 0x7F) {
877 			*out++ = c;
878 		} else if (c >= 0xA1 && c <= 0xDF) {
879 			/* Kana */
880 			*out++ = 0xFEC0 + c;
881 		} else if (c > 0x80 && c < 0xFD && c != 0xA0) {
882 			/* Kanji */
883 			if (p == e) {
884 				*out++ = MBFL_BAD_INPUT;
885 				break;
886 			}
887 			unsigned char c2 = *p++;
888 
889 			if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
890 				uint32_t w = 0;
891 				unsigned int s1, s2;
892 				SJIS_DECODE(c, c2, s1, s2);
893 				unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
894 
895 				if (s <= 137) {
896 					if (s == 31) {
897 						w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
898 					} else if (s == 32) {
899 						w = 0xFF5E; /* FULLWIDTH TILDE */
900 					} else if (s == 33) {
901 						w = 0x2225; /* PARALLEL TO */
902 					} else if (s == 60) {
903 						w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
904 					} else if (s == 80) {
905 						w = 0xFFE0; /* FULLWIDTH CENT SIGN */
906 					} else if (s == 81) {
907 						w = 0xFFE1; /* FULLWIDTH POUND SIGN */
908 					} else if (s == 137) {
909 						w = 0xFFE2; /* FULLWIDTH NOT SIGN */
910 					}
911 				}
912 
913 				if (!w) {
914 					if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
915 						int snd = 0;
916 						w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
917 						if (snd) {
918 							*out++ = snd;
919 						}
920 					} else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
921 						w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
922 					} else if (s < jisx0208_ucs_table_size) {
923 						w = jisx0208_ucs_table[s];
924 					} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
925 						w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
926 					}
927 
928 					if (!w) {
929 						if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
930 							w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
931 						} else if (s >= (94*94) && s < (114*94)) {
932 							w = s - (94*94) + 0xE000;
933 						}
934 					}
935 				}
936 
937 				*out++ = w ? w : MBFL_BAD_INPUT;
938 			} else {
939 				*out++ = MBFL_BAD_INPUT;
940 			}
941 		} else {
942 			*out++ = MBFL_BAD_INPUT;
943 		}
944 	}
945 
946 	*in_len = e - p;
947 	*in = p;
948 	return out - buf;
949 }
950 
mb_wchar_to_sjis_docomo(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)951 static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
952 {
953 	unsigned char *out, *limit;
954 	MB_CONVERT_BUF_LOAD(buf, out, limit);
955 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
956 
957 	uint32_t w;
958 	unsigned int s = 0;
959 
960 	if (buf->state) {
961 		/* Continue what we were doing on the previous call */
962 		w = buf->state;
963 		buf->state = 0;
964 		if (len) {
965 			goto reprocess_wchar;
966 		} else {
967 			goto emit_output;
968 		}
969 	}
970 
971 	while (len--) {
972 		w = *in++;
973 reprocess_wchar:
974 		s = 0;
975 
976 		if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
977 			s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
978 		} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
979 			s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
980 		} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
981 			s = ucs_i_jis_table[w - ucs_i_jis_table_min];
982 		} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
983 			s = ucs_r_jis_table[w - ucs_r_jis_table_min];
984 		} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
985 			/* Private User Area (95ku - 114ku) */
986 			s = w - 0xE000;
987 			s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21);
988 			goto process_emoji;
989 		}
990 
991 		if (!s) {
992 			if (w == 0xA5) { /* YEN SIGN */
993 				s = 0x216F; /* FULLWIDTH YEN SIGN */
994 			} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
995 				s = 0x2140;
996 			} else if (w == 0x2225) { /* PARALLEL TO */
997 				s = 0x2142;
998 			} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
999 				s = 0x215D;
1000 			} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
1001 				s = 0x2171;
1002 			} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
1003 				s = 0x2172;
1004 			} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
1005 				s = 0x224C;
1006 			}
1007 		}
1008 
1009 		if (w && (!s || s >= 0x8080)) {
1010 			s = 0;
1011 
1012 			for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
1013 				if (w == cp932ext1_ucs_table[i]) {
1014 					s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
1015 					goto process_emoji;
1016 				}
1017 			}
1018 
1019 			for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
1020 				if (w == cp932ext2_ucs_table[i]) {
1021 					s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
1022 					goto process_emoji;
1023 				}
1024 			}
1025 		}
1026 
1027 process_emoji:
1028 		/* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji
1029 		 * to a sequence of 2 codepoints, one of which is a combining character which
1030 		 * adds the 'key' image around the other
1031 		 *
1032 		 * In the other direction, look for such sequences and convert them to a
1033 		 * single emoji */
1034 		if (w == '#' || (w >= '0' && w <= '9')) {
1035 			if (!len) {
1036 				if (end) {
1037 					goto emit_output;
1038 				} else {
1039 					/* If we are at the end of the current buffer of codepoints, but another
1040 					 * buffer is coming, then remember that we have to reprocess `w` */
1041 					buf->state = w;
1042 					break;
1043 				}
1044 			}
1045 			uint32_t w2 = *in++; len--;
1046 			if (w2 == 0x20E3) {
1047 				if (w == '#') {
1048 					s = 0x2964;
1049 				} else if (w == '0') {
1050 					s = 0x296F;
1051 				} else { /* Previous character was '1'-'9' */
1052 					s = 0x2966 + (w - '1');
1053 				}
1054 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1055 			} else {
1056 				in--; len++;
1057 			}
1058 		} else if (w == 0xA9) { /* Copyright sign */
1059 			s = (((0x29B5 / 94) + 0x21) << 8) | ((0x29B5 % 94) + 0x21);
1060 		} else if (w == 0xAE) { /* Registered sign */
1061 			s = (((0x29BA / 94) + 0x21) << 8) | ((0x29BA % 94) + 0x21);
1062 		} else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) {
1063 			int i = mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
1064 			if (i >= 0) {
1065 				s = mb_tbl_uni_docomo2code2_value[i];
1066 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1067 			}
1068 		} else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) {
1069 			int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
1070 			if (i >= 0) {
1071 				s = mb_tbl_uni_docomo2code3_value[i];
1072 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1073 			}
1074 		} else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) {
1075 			int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
1076 			if (i >= 0) {
1077 				s = mb_tbl_uni_docomo2code5_val[i];
1078 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1079 			}
1080 		}
1081 
1082 emit_output:
1083 		if (!s && w) {
1084 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_docomo);
1085 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1086 		} else if (s <= 0xFF) {
1087 			out = mb_convert_buf_add(out, s);
1088 		} else {
1089 			unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2;
1090 			SJIS_ENCODE(c1, c2, s1, s2);
1091 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
1092 			out = mb_convert_buf_add2(out, s1, s2);
1093 		}
1094 	}
1095 
1096 	MB_CONVERT_BUF_STORE(buf, out, limit);
1097 }
1098 
mb_sjis_kddi_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)1099 static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
1100 {
1101 	unsigned char *p = *in, *e = p + *in_len;
1102 	uint32_t *out = buf, *limit = buf + bufsize - 1;
1103 
1104 	while (p < e && out < limit) {
1105 		unsigned char c = *p++;
1106 
1107 		if (c <= 0x7F) {
1108 			*out++ = c;
1109 		} else if (c >= 0xA1 && c <= 0xDF) {
1110 			/* Kana */
1111 			*out++ = 0xFEC0 + c;
1112 		} else if (c > 0x80 && c < 0xFD && c != 0xA0) {
1113 			/* Kanji */
1114 			if (p == e) {
1115 				*out++ = MBFL_BAD_INPUT;
1116 				break;
1117 			}
1118 			unsigned char c2 = *p++;
1119 
1120 			if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
1121 				uint32_t w = 0;
1122 				unsigned int s1, s2;
1123 				SJIS_DECODE(c, c2, s1, s2);
1124 				unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
1125 
1126 				if (s <= 137) {
1127 					if (s == 31) {
1128 						w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
1129 					} else if (s == 32) {
1130 						w = 0xFF5E; /* FULLWIDTH TILDE */
1131 					} else if (s == 33) {
1132 						w = 0x2225; /* PARALLEL TO */
1133 					} else if (s == 60) {
1134 						w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
1135 					} else if (s == 80) {
1136 						w = 0xFFE0; /* FULLWIDTH CENT SIGN */
1137 					} else if (s == 81) {
1138 						w = 0xFFE1; /* FULLWIDTH POUND SIGN */
1139 					} else if (s == 137) {
1140 						w = 0xFFE2; /* FULLWIDTH NOT SIGN */
1141 					}
1142 				}
1143 
1144 				if (!w) {
1145 					if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) {
1146 						int snd = 0;
1147 						w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
1148 						if (snd) {
1149 							*out++ = snd;
1150 						}
1151 					} else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
1152 						w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
1153 					} else if (s < jisx0208_ucs_table_size) {
1154 						w = jisx0208_ucs_table[s];
1155 					} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
1156 						w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
1157 					}
1158 
1159 					if (!w) {
1160 						if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
1161 							w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
1162 						} else if (s >= (94*94) && s < (114*94)) {
1163 							w = s - (94*94) + 0xE000;
1164 						}
1165 					}
1166 				}
1167 
1168 				*out++ = w ? w : MBFL_BAD_INPUT;
1169 			} else {
1170 				*out++ = MBFL_BAD_INPUT;
1171 			}
1172 		} else {
1173 			*out++ = MBFL_BAD_INPUT;
1174 		}
1175 	}
1176 
1177 	*in_len = e - p;
1178 	*in = p;
1179 	return out - buf;
1180 }
1181 
mb_wchar_to_sjis_kddi(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)1182 static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
1183 {
1184 	unsigned char *out, *limit;
1185 	MB_CONVERT_BUF_LOAD(buf, out, limit);
1186 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
1187 
1188 	uint32_t w;
1189 	unsigned int s = 0;
1190 
1191 	if (buf->state) {
1192 		w = buf->state;
1193 		buf->state = 0;
1194 		if (len) {
1195 			goto reprocess_wchar;
1196 		} else {
1197 			goto emit_output;
1198 		}
1199 	}
1200 
1201 	while (len--) {
1202 		w = *in++;
1203 reprocess_wchar:
1204 		s = 0;
1205 
1206 		if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
1207 			s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
1208 		} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
1209 			s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
1210 		} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
1211 			s = ucs_i_jis_table[w - ucs_i_jis_table_min];
1212 		} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
1213 			s = ucs_r_jis_table[w - ucs_r_jis_table_min];
1214 		} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
1215 			/* Private User Area (95ku - 114ku) */
1216 			s = w - 0xE000;
1217 			s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21);
1218 			goto process_emoji;
1219 		}
1220 
1221 		if (!s) {
1222 			if (w == 0xA5) { /* YEN SIGN */
1223 				s = 0x216F; /* FULLWIDTH YEN SIGN */
1224 			} else if (w == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */
1225 				s = 0x2140;
1226 			} else if (w == 0x2225) { /* PARALLEL TO */
1227 				s = 0x2142;
1228 			} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
1229 				s = 0x215D;
1230 			} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
1231 				s = 0x2171;
1232 			} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
1233 				s = 0x2172;
1234 			} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
1235 				s = 0x224C;
1236 			}
1237 		}
1238 
1239 		if (w && (!s || s >= 0x8080)) {
1240 			s = 0;
1241 
1242 			for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
1243 				if (w == cp932ext1_ucs_table[i]) {
1244 					s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
1245 					goto process_emoji;
1246 				}
1247 			}
1248 
1249 			for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
1250 				if (w == cp932ext2_ucs_table[i]) {
1251 					s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
1252 					goto process_emoji;
1253 				}
1254 			}
1255 		}
1256 
1257 process_emoji:
1258 		if (w == '#' || (w >= '0' && w <= '9')) {
1259 			if (!len) {
1260 				if (end) {
1261 					goto emit_output;
1262 				} else {
1263 					/* If we are at the end of the current buffer of codepoints, but another
1264 					 * buffer is coming, then remember that we have to reprocess `w` */
1265 					buf->state = w;
1266 					break;
1267 				}
1268 			}
1269 			uint32_t w2 = *in++; len--;
1270 			if (w2 == 0x20E3) {
1271 				if (w == '#') {
1272 					s = 0x25BC;
1273 				} else if (w == '0') {
1274 					s = 0x2830;
1275 				} else { /* Previous character was '1'-'9' */
1276 					s = 0x27A6 + (w - '1');
1277 				}
1278 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1279 			} else {
1280 				in--; len++;
1281 			}
1282 		} else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */
1283 			if (!len) {
1284 				if (end) {
1285 					MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi);
1286 				} else {
1287 					/* Reprocess `w` when this function is called again with another buffer
1288 					 * of wchars */
1289 					buf->state = w;
1290 				}
1291 				break;
1292 			}
1293 			uint32_t w2 = *in++; len--;
1294 			if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
1295 				for (int i = 0; i < 10; i++) {
1296 					if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) {
1297 						s = nflags_code_kddi[i];
1298 						s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1299 						goto emit_output;
1300 					}
1301 				}
1302 			}
1303 			in--; len++;
1304 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi);
1305 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1306 			continue;
1307 		} else if (w == 0xA9) { /* Copyright sign */
1308 			s = (((0x27DC / 94) + 0x21) << 8) | ((0x27DC % 94) + 0x21);
1309 		} else if (w == 0xAE) { /* Registered sign */
1310 			s = (((0x27DD / 94) + 0x21) << 8) | ((0x27DD % 94) + 0x21);
1311 		} else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) {
1312 			int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
1313 			if (i >= 0) {
1314 				s = mb_tbl_uni_kddi2code2_value[i];
1315 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1316 			}
1317 		} else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) {
1318 			int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
1319 			if (i >= 0) {
1320 				s = mb_tbl_uni_kddi2code3_value[i];
1321 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1322 			}
1323 		} else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) {
1324 			int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
1325 			if (i >= 0) {
1326 				s = mb_tbl_uni_kddi2code5_val[i];
1327 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1328 			}
1329 		}
1330 
1331 emit_output:
1332 		if (!s && w) {
1333 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi);
1334 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1335 		} else if (s <= 0xFF) {
1336 			out = mb_convert_buf_add(out, s);
1337 		} else {
1338 			unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2;
1339 			SJIS_ENCODE(c1, c2, s1, s2);
1340 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
1341 			out = mb_convert_buf_add2(out, s1, s2);
1342 		}
1343 	}
1344 
1345 	MB_CONVERT_BUF_STORE(buf, out, limit);
1346 }
1347 
mb_sjis_sb_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)1348 static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
1349 {
1350 	unsigned char *p = *in, *e = p + *in_len;
1351 	uint32_t *out = buf, *limit = buf + bufsize - 1;
1352 
1353 	if (*state) {
1354 		goto softbank_emoji_escapes;
1355 	}
1356 
1357 	while (p < e && out < limit) {
1358 		unsigned char c = *p++;
1359 
1360 		if (c == 0x1B) {
1361 			/* Escape sequence */
1362 			if (p == e || *p++ != '$' || p == e) {
1363 				*out++ = MBFL_BAD_INPUT;
1364 				continue;
1365 			}
1366 			unsigned char c2 = *p++;
1367 			if ((c2 < 'E' || c2 > 'G') && (c2 < 'O' || c2 > 'Q')) {
1368 				*out++ = MBFL_BAD_INPUT;
1369 				continue;
1370 			}
1371 			/* Escape sequence was valid, next should be a series of specially
1372 			 * encoded Softbank emoji */
1373 			*state = c2;
1374 
1375 softbank_emoji_escapes:
1376 			while (p < e && out < limit) {
1377 				c = *p++;
1378 				if (c == 0xF) {
1379 					*state = 0;
1380 					break;
1381 				}
1382 				unsigned int s = 0;
1383 				if (*state == 'G' && c >= 0x21 && c <= 0x7A) {
1384 					s = (0x91 - 0x21) * 94;
1385 				} else if (*state == 'E' && c >= 0x21 && c <= 0x7A) {
1386 					s = (0x8D - 0x21) * 94;
1387 				} else if (*state == 'F' && c >= 0x21 && c <= 0x7A) {
1388 					s = (0x8E - 0x21) * 94;
1389 				} else if (*state == 'O' && c >= 0x21 && c <= 0x6D) {
1390 					s = (0x92 - 0x21) * 94;
1391 				} else if (*state == 'P' && c >= 0x21 && c <= 0x6C) {
1392 					s = (0x95 - 0x21) * 94;
1393 				} else if (*state == 'Q' && c >= 0x21 && c <= 0x5E) {
1394 					s = (0x96 - 0x21) * 94;
1395 				} else {
1396 					*out++ = MBFL_BAD_INPUT;
1397 					*state = 0;
1398 					break;
1399 				}
1400 
1401 				int snd = 0;
1402 				uint32_t w = mbfilter_sjis_emoji_sb2unicode(s + c - 0x21, &snd);
1403 				if (w) {
1404 					if (snd) {
1405 						*out++ = snd;
1406 					}
1407 					*out++ = w;
1408 				} else {
1409 					*out++ = MBFL_BAD_INPUT;
1410 					*state = 0;
1411 					break;
1412 				}
1413 			}
1414 		} else if (c <= 0x7F) {
1415 			*out++ = c;
1416 		} else if (c >= 0xA1 && c <= 0xDF) {
1417 			/* Kana */
1418 			*out++ = 0xFEC0 + c;
1419 		} else if (c > 0x80 && c < 0xFD && c != 0xA0) {
1420 			/* Kanji */
1421 			if (p == e) {
1422 				*out++ = MBFL_BAD_INPUT;
1423 				break;
1424 			}
1425 			unsigned char c2 = *p++;
1426 
1427 			if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
1428 				uint32_t w = 0;
1429 				unsigned int s1, s2;
1430 				SJIS_DECODE(c, c2, s1, s2);
1431 				unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
1432 
1433 				if (s <= 137) {
1434 					if (s == 31) {
1435 						w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
1436 					} else if (s == 32) {
1437 						w = 0xFF5E; /* FULLWIDTH TILDE */
1438 					} else if (s == 33) {
1439 						w = 0x2225; /* PARALLEL TO */
1440 					} else if (s == 60) {
1441 						w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
1442 					} else if (s == 80) {
1443 						w = 0xFFE0; /* FULLWIDTH CENT SIGN */
1444 					} else if (s == 81) {
1445 						w = 0xFFE1; /* FULLWIDTH POUND SIGN */
1446 					} else if (s == 137) {
1447 						w = 0xFFE2; /* FULLWIDTH NOT SIGN */
1448 					}
1449 				}
1450 
1451 				if (!w) {
1452 					if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) {
1453 						int snd = 0;
1454 						w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
1455 						if (snd) {
1456 							*out++ = snd;
1457 						}
1458 					} else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
1459 						w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
1460 					} else if (s < jisx0208_ucs_table_size) {
1461 						w = jisx0208_ucs_table[s];
1462 					} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
1463 						w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
1464 					}
1465 
1466 					if (!w) {
1467 						if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
1468 							w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
1469 						} else if (s >= (94*94) && s < (114*94)) {
1470 							w = s - (94*94) + 0xE000;
1471 						}
1472 					}
1473 				}
1474 
1475 				*out++ = w ? w : MBFL_BAD_INPUT;
1476 			} else {
1477 				*out++ = MBFL_BAD_INPUT;
1478 			}
1479 		} else {
1480 			*out++ = MBFL_BAD_INPUT;
1481 		}
1482 	}
1483 
1484 	*in_len = e - p;
1485 	*in = p;
1486 	return out - buf;
1487 }
1488 
mb_wchar_to_sjis_sb(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)1489 static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
1490 {
1491 	unsigned char *out, *limit;
1492 	MB_CONVERT_BUF_LOAD(buf, out, limit);
1493 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
1494 
1495 	uint32_t w;
1496 	unsigned int s = 0;
1497 
1498 	if (buf->state) {
1499 		w = buf->state;
1500 		buf->state = 0;
1501 		if (len) {
1502 			goto reprocess_wchar;
1503 		} else {
1504 			goto emit_output;
1505 		}
1506 	}
1507 
1508 	while (len--) {
1509 		w = *in++;
1510 reprocess_wchar:
1511 		s = 0;
1512 
1513 		if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
1514 			s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
1515 		} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
1516 			s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
1517 		} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
1518 			s = ucs_i_jis_table[w - ucs_i_jis_table_min];
1519 		} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
1520 			s = ucs_r_jis_table[w - ucs_r_jis_table_min];
1521 		} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
1522 			/* Private User Area (95ku - 114ku) */
1523 			s = w - 0xE000;
1524 			s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21);
1525 			goto process_emoji;
1526 		}
1527 
1528 		if (!s) {
1529 			if (w == 0xA5) { /* YEN SIGN */
1530 				s = 0x216F; /* FULLWIDTH YEN SIGN */
1531 			} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
1532 				s = 0x2140;
1533 			} else if (w == 0x2225) { /* PARALLEL TO */
1534 				s = 0x2142;
1535 			} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
1536 				s = 0x215D;
1537 			} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
1538 				s = 0x2171;
1539 			} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
1540 				s = 0x2172;
1541 			} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
1542 				s = 0x224C;
1543 			}
1544 		}
1545 
1546 		if (w && (!s || s >= 0x8080)) {
1547 			s = 0;
1548 
1549 			for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
1550 				if (w == cp932ext1_ucs_table[i]) {
1551 					s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
1552 					goto process_emoji;
1553 				}
1554 			}
1555 
1556 			for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
1557 				if (w == cp932ext2_ucs_table[i]) {
1558 					s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
1559 					goto process_emoji;
1560 				}
1561 			}
1562 		}
1563 
1564 process_emoji:
1565 		if (w == '#' || (w >= '0' && w <= '9')) {
1566 			if (!len) {
1567 				if (end) {
1568 					goto emit_output;
1569 				} else {
1570 					/* If we are at the end of the current buffer of codepoints, but another
1571 					 * buffer is coming, then remember that we have to reprocess `w` */
1572 					buf->state = w;
1573 					break;
1574 				}
1575 			}
1576 			uint32_t w2 = *in++; len--;
1577 			if (w2 == 0x20E3) {
1578 				if (w == '#') {
1579 					s = 0x2817;
1580 				} else if (w == '0') {
1581 					s = 0x282c;
1582 				} else { /* Previous character was '1'-'9' */
1583 					s = 0x2823 + (w - '1');
1584 				}
1585 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1586 			} else {
1587 				in--; len++;
1588 			}
1589 		} else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */
1590 			if (!len) {
1591 				if (end) {
1592 					MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb);
1593 				} else {
1594 					/* Reprocess `w` when this function is called again with
1595 					 * another buffer of wchars */
1596 					buf->state = w;
1597 				}
1598 				break;
1599 			}
1600 			uint32_t w2 = *in++; len--;
1601 			if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
1602 				for (int i = 0; i < 10; i++) {
1603 					if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) {
1604 						s = nflags_code_sb[i];
1605 						s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1606 						goto emit_output;
1607 					}
1608 				}
1609 			}
1610 			in--; len++;
1611 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb);
1612 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1613 			continue;
1614 		} else if (w == 0xA9) { /* Copyright sign */
1615 			s = (((0x2855 / 94) + 0x21) << 8) | ((0x2855 % 94) + 0x21);
1616 		} else if (w == 0xAE) { /* Registered sign */
1617 			s = (((0x2856 / 94) + 0x21) << 8) | ((0x2856 % 94) + 0x21);
1618 		} else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) {
1619 			int i = mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
1620 			if (i >= 0) {
1621 				s = mb_tbl_uni_sb2code2_value[i];
1622 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1623 			}
1624 		} else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) {
1625 			int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
1626 			if (i >= 0) {
1627 				s = mb_tbl_uni_sb2code3_value[i];
1628 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1629 			}
1630 		} else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) {
1631 			int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
1632 			if (i >= 0) {
1633 				s = mb_tbl_uni_sb2code5_val[i];
1634 				s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1635 			}
1636 		}
1637 
1638 emit_output:
1639 		if (!s && w) {
1640 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb);
1641 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1642 		} else if (s <= 0xFF) {
1643 			out = mb_convert_buf_add(out, s);
1644 		} else {
1645 			unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2;
1646 			SJIS_ENCODE(c1, c2, s1, s2);
1647 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
1648 			out = mb_convert_buf_add2(out, s1, s2);
1649 		}
1650 	}
1651 
1652 	MB_CONVERT_BUF_STORE(buf, out, limit);
1653 }
1654