1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * the source code included in this files was separated from mbfilter_sjis_open.c
26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_sjis_mobile.h"
32
33 #include "unicode_table_cp932_ext.h"
34 #include "unicode_table_jis.h"
35
36 #include "emoji2uni.h"
37
38 const unsigned char mblen_table_sjis_mobile[] = { /* 0x81-0x9F,0xE0-0xFC */
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
55 };
56
57 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
58
59 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter);
60 static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
61 static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
62 static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
63 static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
64 static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
65 static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
66
67 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
68 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
69 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
70
71 const mbfl_encoding mbfl_encoding_sjis_docomo = {
72 mbfl_no_encoding_sjis_docomo,
73 "SJIS-Mobile#DOCOMO",
74 "Shift_JIS",
75 mbfl_encoding_sjis_docomo_aliases,
76 mblen_table_sjis_mobile,
77 MBFL_ENCTYPE_GL_UNSAFE,
78 &vtbl_sjis_docomo_wchar,
79 &vtbl_wchar_sjis_docomo,
80 mb_sjis_docomo_to_wchar,
81 mb_wchar_to_sjis_docomo,
82 NULL
83 };
84
85 const mbfl_encoding mbfl_encoding_sjis_kddi = {
86 mbfl_no_encoding_sjis_kddi,
87 "SJIS-Mobile#KDDI",
88 "Shift_JIS",
89 mbfl_encoding_sjis_kddi_aliases,
90 mblen_table_sjis_mobile,
91 MBFL_ENCTYPE_GL_UNSAFE,
92 &vtbl_sjis_kddi_wchar,
93 &vtbl_wchar_sjis_kddi,
94 mb_sjis_kddi_to_wchar,
95 mb_wchar_to_sjis_kddi,
96 NULL
97 };
98
99 const mbfl_encoding mbfl_encoding_sjis_sb = {
100 mbfl_no_encoding_sjis_sb,
101 "SJIS-Mobile#SOFTBANK",
102 "Shift_JIS",
103 mbfl_encoding_sjis_sb_aliases,
104 mblen_table_sjis_mobile,
105 MBFL_ENCTYPE_GL_UNSAFE,
106 &vtbl_sjis_sb_wchar,
107 &vtbl_wchar_sjis_sb,
108 mb_sjis_sb_to_wchar,
109 mb_wchar_to_sjis_sb,
110 NULL
111 };
112
113 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
114 mbfl_no_encoding_sjis_docomo,
115 mbfl_no_encoding_wchar,
116 mbfl_filt_conv_common_ctor,
117 NULL,
118 mbfl_filt_conv_sjis_mobile_wchar,
119 mbfl_filt_conv_sjis_wchar_flush,
120 NULL,
121 };
122
123 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
124 mbfl_no_encoding_wchar,
125 mbfl_no_encoding_sjis_docomo,
126 mbfl_filt_conv_common_ctor,
127 NULL,
128 mbfl_filt_conv_wchar_sjis_mobile,
129 mbfl_filt_conv_sjis_mobile_flush,
130 NULL,
131 };
132
133 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
134 mbfl_no_encoding_sjis_kddi,
135 mbfl_no_encoding_wchar,
136 mbfl_filt_conv_common_ctor,
137 NULL,
138 mbfl_filt_conv_sjis_mobile_wchar,
139 mbfl_filt_conv_sjis_wchar_flush,
140 NULL,
141 };
142
143 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
144 mbfl_no_encoding_wchar,
145 mbfl_no_encoding_sjis_kddi,
146 mbfl_filt_conv_common_ctor,
147 NULL,
148 mbfl_filt_conv_wchar_sjis_mobile,
149 mbfl_filt_conv_sjis_mobile_flush,
150 NULL,
151 };
152
153 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
154 mbfl_no_encoding_sjis_sb,
155 mbfl_no_encoding_wchar,
156 mbfl_filt_conv_common_ctor,
157 NULL,
158 mbfl_filt_conv_sjis_mobile_wchar,
159 mbfl_filt_conv_sjis_wchar_flush,
160 NULL,
161 };
162
163 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
164 mbfl_no_encoding_wchar,
165 mbfl_no_encoding_sjis_sb,
166 mbfl_filt_conv_common_ctor,
167 NULL,
168 mbfl_filt_conv_wchar_sjis_mobile,
169 mbfl_filt_conv_sjis_mobile_flush,
170 NULL,
171 };
172
173 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
174 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
175 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
176
177 const unsigned short mbfl_docomo2uni_pua[4][3] = {
178 {0x28c2, 0x292f, 0xe63e},
179 {0x2930, 0x2934, 0xe6ac},
180 {0x2935, 0x2951, 0xe6b1},
181 {0x2952, 0x29db, 0xe6ce},
182 };
183
184 const unsigned short mbfl_kddi2uni_pua[7][3] = {
185 {0x26ec, 0x2838, 0xe468},
186 {0x284c, 0x2863, 0xe5b5},
187 {0x24b8, 0x24ca, 0xe5cd},
188 {0x24cb, 0x2545, 0xea80},
189 {0x2839, 0x284b, 0xeafb},
190 {0x2546, 0x25c0, 0xeb0e},
191 {0x25c1, 0x25c6, 0xeb89},
192 };
193
194 const unsigned short mbfl_sb2uni_pua[6][3] = {
195 {0x27a9, 0x2802, 0xe101},
196 {0x2808, 0x2861, 0xe201},
197 {0x2921, 0x297a, 0xe001},
198 {0x2980, 0x29cc, 0xe301},
199 {0x2a99, 0x2ae4, 0xe401},
200 {0x2af8, 0x2b35, 0xe501},
201 };
202
203 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
204 {0x24b8, 0x24f6, 0xec40},
205 {0x24f7, 0x2573, 0xec80},
206 {0x2574, 0x25b2, 0xed40},
207 {0x25b3, 0x25c6, 0xed80},
208 {0x26ec, 0x272a, 0xef40},
209 {0x272b, 0x27a7, 0xef80},
210 {0x27a8, 0x27e6, 0xf040},
211 {0x27e7, 0x2863, 0xf080},
212 };
213
214 /* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
215 * These correspond to the letters A-Z
216 * To display the flag emoji for a country, two unicode codepoints are combined,
217 * which correspond to the two-letter code for that country
218 * This macro converts uppercase ASCII values to Regional Indicator codepoints */
219 #define NFLAGS(c) (0x1F1A5+(int)(c))
220
221 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
222
223 #define SJIS_ENCODE(c1,c2,s1,s2) \
224 do { \
225 s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
226 s2 = c2; \
227 if ((c1) & 1) { \
228 if ((c2) < 0x60) { \
229 s2--; \
230 } \
231 s2 += 0x20; \
232 } else { \
233 s2 += 0x7e; \
234 } \
235 } while (0)
236
237 #define SJIS_DECODE(c1,c2,s1,s2) \
238 do { \
239 if (c1 < 0xa0) { \
240 s1 = ((c1 - 0x81) << 1) + 0x21; \
241 } else { \
242 s1 = ((c1 - 0xc1) << 1) + 0x21; \
243 } \
244 s2 = c2; \
245 if (c2 < 0x9f) { \
246 if (c2 < 0x7f) { \
247 s2++; \
248 } \
249 s2 -= 0x20; \
250 } else { \
251 s1++; \
252 s2 -= 0x7e; \
253 } \
254 } while (0)
255
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)256 int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
257 {
258 for (int i = 0; i < n; i++) {
259 if (map[i][0] <= c && c <= map[i][1]) {
260 *w = c - map[i][0] + map[i][2];
261 return 1;
262 }
263 }
264 return 0;
265 }
266
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)267 int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
268 {
269 /* Convert in reverse direction */
270 for (int i = 0; i < n; i++) {
271 if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
272 *w = c + map[i][0] - map[i][2];
273 return 1;
274 }
275 }
276 return 0;
277 }
278
279 /* number -> (ku*94)+ten value for telephone keypad character */
280 #define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n)))
281 #define DOCOMO_KEYPAD_HASH 0x2964
282
283 #define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
284
285 /* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits
286 * in our tables. Therefore, add 0x10000 to recover the true values.
287 *
288 * Again, for some emoji which are not supported by Unicode, we use codepoints
289 * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the
290 * true value. */
convert_emoji_cp(int cp)291 static inline int convert_emoji_cp(int cp)
292 {
293 if (cp > 0xF000)
294 return cp + 0x10000;
295 else if (cp > 0xE000)
296 return cp + 0xF0000;
297 return cp;
298 }
299
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)300 int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
301 {
302 /* All three mobile vendors had emoji for numbers on a telephone keypad
303 * Unicode doesn't have those, but it has a combining character which puts
304 * a 'keypad button' around the following character, making it look like
305 * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */
306 if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
307 if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) {
308 EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]));
309 } else {
310 *snd = 0;
311 return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]);
312 }
313 }
314 return 0;
315 }
316
317 #define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)
318
319 static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"};
320
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)321 int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
322 {
323 if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
324 if (s == 0x24C0) { /* Spain */
325 EMIT_FLAG_EMOJI("ES");
326 } else if (s == 0x24C1) { /* Russia */
327 EMIT_FLAG_EMOJI("RU");
328 } else if (s >= 0x2545 && s <= 0x254A) {
329 EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]);
330 } else if (s == 0x25BC) {
331 EMIT_KEYPAD_EMOJI('#');
332 } else {
333 *snd = 0;
334 return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]);
335 }
336 } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
337 if (s == 0x2750) { /* Japan */
338 EMIT_FLAG_EMOJI("JP");
339 } else if (s >= 0x27A6 && s <= 0x27AE) {
340 EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1');
341 } else if (s == 0x27F7) { /* United States */
342 EMIT_FLAG_EMOJI("US");
343 } else if (s == 0x2830) {
344 EMIT_KEYPAD_EMOJI('0');
345 } else {
346 *snd = 0;
347 return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]);
348 }
349 }
350 return 0;
351 }
352
353 static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"};
354
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)355 int mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
356 {
357 if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
358 if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) {
359 EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
360 } else {
361 *snd = 0;
362 return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
363 }
364 } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
365 *snd = 0;
366 return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]);
367 } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
368 if (s >= 0x2B02 && s <= 0x2B0B) {
369 EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]);
370 } else {
371 *snd = 0;
372 return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]);
373 }
374 }
375 return 0;
376 }
377
378 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)379 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
380 {
381 /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji
382 * to a sequence of 2 codepoints, one of which is a combining character which
383 * adds the 'key' image around the other
384 *
385 * In the other direction, look for such sequences and convert them to a
386 * single emoji */
387 if (filter->status == 1) {
388 int c1 = filter->cache;
389 filter->cache = filter->status = 0;
390 if (c == 0x20E3) {
391 if (c1 == '#') {
392 *s1 = 0x2964;
393 } else if (c1 == '0') {
394 *s1 = 0x296F;
395 } else { /* Previous character was '1'-'9' */
396 *s1 = 0x2966 + (c1 - '1');
397 }
398 return 1;
399 } else {
400 /* This character wasn't combining character to make keypad symbol,
401 * so pass the previous character through... and proceed to process the
402 * current character as usual
403 * (Single-byte ASCII characters are valid in Shift-JIS...) */
404 CK((*filter->output_function)(c1, filter->data));
405 }
406 }
407
408 if (c == '#' || (c >= '0' && c <= '9')) {
409 filter->status = 1;
410 filter->cache = c;
411 return 0;
412 }
413
414 if (c == 0xA9) { /* Copyright sign */
415 *s1 = 0x29B5;
416 return 1;
417 } else if (c == 0x00AE) { /* Registered sign */
418 *s1 = 0x29BA;
419 return 1;
420 } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
421 int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
422 if (i >= 0) {
423 *s1 = mb_tbl_uni_docomo2code2_value[i];
424 return 1;
425 }
426 } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
427 int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
428 if (i >= 0) {
429 *s1 = mb_tbl_uni_docomo2code3_value[i];
430 return 1;
431 }
432 } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
433 int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
434 if (i >= 0) {
435 *s1 = mb_tbl_uni_docomo2code5_val[i];
436 return 1;
437 }
438 }
439 return 0;
440 }
441
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)442 int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
443 {
444 if (filter->status == 1) {
445 int c1 = filter->cache;
446 filter->cache = filter->status = 0;
447 if (c == 0x20E3) {
448 if (c1 == '#') {
449 *s1 = 0x25BC;
450 } else if (c1 == '0') {
451 *s1 = 0x2830;
452 } else { /* Previous character was '1'-'9' */
453 *s1 = 0x27a6 + (c1 - '1');
454 }
455 return 1;
456 } else {
457 CK((*filter->output_function)(c1, filter->data));
458 }
459 } else if (filter->status == 2) {
460 int c1 = filter->cache;
461 filter->cache = filter->status = 0;
462 if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
463 for (int i = 0; i < 10; i++) {
464 if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
465 *s1 = nflags_code_kddi[i];
466 return 1;
467 }
468 }
469 }
470
471 /* If none of the KDDI national flag emoji matched, then we have no way
472 * to convert the previous codepoint... */
473 CK(mbfl_filt_conv_illegal_output(c1, filter));
474 }
475
476 if (c == '#' || (c >= '0' && c <= '9')) {
477 filter->status = 1;
478 filter->cache = c;
479 return 0;
480 } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
481 filter->status = 2;
482 filter->cache = c;
483 return 0;
484 }
485
486 if (c == 0xA9) { /* Copyright sign */
487 *s1 = 0x27DC;
488 return 1;
489 } else if (c == 0xAE) { /* Registered sign */
490 *s1 = 0x27DD;
491 return 1;
492 } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
493 int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
494 if (i >= 0) {
495 *s1 = mb_tbl_uni_kddi2code2_value[i];
496 return 1;
497 }
498 } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
499 int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
500 if (i >= 0) {
501 *s1 = mb_tbl_uni_kddi2code3_value[i];
502 return 1;
503 }
504 } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
505 int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
506 if (i >= 0) {
507 *s1 = mb_tbl_uni_kddi2code5_val[i];
508 return 1;
509 }
510 }
511 return 0;
512 }
513
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)514 int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
515 {
516 if (filter->status == 1) {
517 int c1 = filter->cache;
518 filter->cache = filter->status = 0;
519 if (c == 0x20E3) {
520 if (c1 == '#') {
521 *s1 = 0x2817;
522 } else if (c1 == '0') {
523 *s1 = 0x282c;
524 } else { /* Previous character was '1'-'9' */
525 *s1 = 0x2823 + (c1 - '1');
526 }
527 return 1;
528 } else {
529 CK((*filter->output_function)(c1, filter->data));
530 }
531 } else if (filter->status == 2) {
532 int c1 = filter->cache;
533 filter->cache = filter->status = 0;
534 if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
535 for (int i = 0; i < 10; i++) {
536 if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
537 *s1 = nflags_code_sb[i];
538 return 1;
539 }
540 }
541 }
542
543 /* If none of the SoftBank national flag emoji matched, then we have no way
544 * to convert the previous codepoint... */
545 CK(mbfl_filt_conv_illegal_output(c1, filter));
546 }
547
548 if (c == '#' || (c >= '0' && c <= '9')) {
549 filter->status = 1;
550 filter->cache = c;
551 return 0;
552 } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
553 filter->status = 2;
554 filter->cache = c;
555 return 0;
556 }
557
558 if (c == 0xA9) { /* Copyright sign */
559 *s1 = 0x2855;
560 return 1;
561 } else if (c == 0xAE) { /* Registered sign */
562 *s1 = 0x2856;
563 return 1;
564 } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
565 int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
566 if (i >= 0) {
567 *s1 = mb_tbl_uni_sb2code2_value[i];
568 return 1;
569 }
570 } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
571 int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
572 if (i >= 0) {
573 *s1 = mb_tbl_uni_sb2code3_value[i];
574 return 1;
575 }
576 } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
577 int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
578 if (i >= 0) {
579 *s1 = mb_tbl_uni_sb2code5_val[i];
580 return 1;
581 }
582 }
583 return 0;
584 }
585
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)586 int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
587 {
588 int c1, s, s1, s2, w, snd = 0;
589
590 switch (filter->status) {
591 case 0:
592 if (c >= 0 && c < 0x80) { /* ASCII */
593 if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) {
594 /* ESC; escape sequences were used on older SoftBank phones for emoji */
595 filter->cache = c;
596 filter->status = 2;
597 } else {
598 CK((*filter->output_function)(c, filter->data));
599 }
600 } else if (c > 0xA0 && c < 0xE0) { /* Kana */
601 CK((*filter->output_function)(0xFEC0 + c, filter->data));
602 } else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */
603 filter->status = 1;
604 filter->cache = c;
605 } else {
606 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
607 }
608 break;
609
610 case 1: /* Kanji, second byte */
611 filter->status = 0;
612 c1 = filter->cache;
613 if (c >= 0x40 && c <= 0xFC && c != 0x7F) {
614 w = 0;
615 SJIS_DECODE(c1, c, s1, s2);
616 s = ((s1 - 0x21) * 94) + s2 - 0x21;
617 if (s <= 137) {
618 if (s == 31) {
619 w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
620 } else if (s == 32) {
621 w = 0xFF5E; /* FULLWIDTH TILDE */
622 } else if (s == 33) {
623 w = 0x2225; /* PARALLEL TO */
624 } else if (s == 60) {
625 w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
626 } else if (s == 80) {
627 w = 0xFFE0; /* FULLWIDTH CENT SIGN */
628 } else if (s == 81) {
629 w = 0xFFE1; /* FULLWIDTH POUND SIGN */
630 } else if (s == 137) {
631 w = 0xFFE2; /* FULLWIDTH NOT SIGN */
632 }
633 }
634 if (w == 0) {
635 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
636 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
637 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
638 w = jisx0208_ucs_table[s];
639 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
640 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
641 }
642
643 /* Emoji */
644 if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
645 w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
646 if (snd > 0) {
647 CK((*filter->output_function)(snd, filter->data));
648 }
649 } else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) {
650 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
651 if (snd > 0) {
652 CK((*filter->output_function)(snd, filter->data));
653 }
654 } else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) {
655 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
656 if (snd > 0) {
657 CK((*filter->output_function)(snd, filter->data));
658 }
659 }
660
661 if (w == 0) {
662 if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
663 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
664 } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
665 w = s - (94*94) + 0xe000;
666 }
667 }
668 }
669 if (w <= 0) {
670 w = MBFL_BAD_INPUT;
671 }
672 CK((*filter->output_function)(w, filter->data));
673 } else {
674 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
675 }
676 break;
677
678 /* ESC: Softbank Emoji */
679 case 2:
680 if (c == '$') {
681 filter->cache = c;
682 filter->status++;
683 } else {
684 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
685 filter->status = filter->cache = 0;
686 }
687 break;
688
689 /* ESC $: Softbank Emoji */
690 case 3:
691 if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) {
692 filter->cache = c;
693 filter->status++;
694 } else {
695 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
696 filter->status = filter->cache = 0;
697 }
698 break;
699
700 /* ESC $ [GEFOPQ]: Softbank Emoji */
701 case 4:
702 c1 = filter->cache;
703 if (c == 0xF) { /* Terminate sequence of emoji */
704 filter->status = filter->cache = 0;
705 return 0;
706 } else {
707 if (c1 == 'G' && c >= 0x21 && c <= 0x7a) {
708 s1 = (0x91 - 0x21) * 94;
709 } else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) {
710 s1 = (0x8D - 0x21) * 94;
711 } else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) {
712 s1 = (0x8E - 0x21) * 94;
713 } else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) {
714 s1 = (0x92 - 0x21) * 94;
715 } else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) {
716 s1 = (0x95 - 0x21) * 94;
717 } else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) {
718 s1 = (0x96 - 0x21) * 94;
719 } else {
720 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
721 filter->status = filter->cache = 0;
722 return 0;
723 }
724
725 w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd);
726 if (w > 0) {
727 if (snd > 0) {
728 CK((*filter->output_function)(snd, filter->data));
729 }
730 CK((*filter->output_function)(w, filter->data));
731 } else {
732 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
733 filter->status = filter->cache = 0;
734 }
735 }
736 }
737
738 return 0;
739 }
740
mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter * filter)741 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
742 {
743 if (filter->status && filter->status != 4) {
744 (*filter->output_function)(MBFL_BAD_INPUT, filter->data);
745 }
746 filter->status = 0;
747
748 if (filter->flush_function) {
749 (*filter->flush_function)(filter->data);
750 }
751
752 return 0;
753 }
754
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)755 int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
756 {
757 int c1, c2, s1 = 0, s2 = 0;
758
759 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
760 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
761 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
762 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
763 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
764 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
765 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
766 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
767 } else if (c >= 0xE000 && c < (0xE000 + 20*94)) {
768 /* Private User Area (95ku - 114ku) */
769 s1 = c - 0xE000;
770 c1 = (s1 / 94) + 0x7F;
771 c2 = (s1 % 94) + 0x21;
772 s1 = (c1 << 8) | c2;
773 s2 = 1;
774 }
775
776 if (s1 <= 0) {
777 if (c == 0xA5) { /* YEN SIGN */
778 s1 = 0x216F; /* FULLWIDTH YEN SIGN */
779 } else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */
780 s1 = 0x2140;
781 } else if (c == 0x2225) { /* PARALLEL TO */
782 s1 = 0x2142;
783 } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
784 s1 = 0x215D;
785 } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
786 s1 = 0x2171;
787 } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
788 s1 = 0x2172;
789 } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
790 s1 = 0x224C;
791 }
792 }
793
794 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
795 s1 = -1;
796
797 /* CP932 vendor ext1 (13ku) */
798 for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
799 if (c == cp932ext1_ucs_table[c1]) {
800 s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
801 break;
802 }
803 }
804
805 if (s1 <= 0) {
806 /* CP932 vendor ext2 (115ku - 119ku) */
807 for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) {
808 if (c == cp932ext2_ucs_table[c1]) {
809 s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21;
810 break;
811 }
812 }
813 }
814
815 if (c == 0) {
816 s1 = 0;
817 }
818 }
819
820 if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0) ||
821 (filter->to == &mbfl_encoding_sjis_kddi && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) ||
822 (filter->to == &mbfl_encoding_sjis_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0)) {
823 s1 = (((s1 / 94) + 0x21) << 8) | ((s1 % 94) + 0x21);
824 }
825
826 if (filter->status) {
827 return 0;
828 }
829
830 if (s1 >= 0) {
831 if (s1 < 0x100) { /* Latin/Kana */
832 CK((*filter->output_function)(s1, filter->data));
833 } else { /* Kanji */
834 c1 = (s1 >> 8) & 0xff;
835 c2 = s1 & 0xff;
836 SJIS_ENCODE(c1, c2, s1, s2);
837 CK((*filter->output_function)(s1, filter->data));
838 CK((*filter->output_function)(s2, filter->data));
839 }
840 } else {
841 CK(mbfl_filt_conv_illegal_output(c, filter));
842 }
843
844 return 0;
845 }
846
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)847 int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
848 {
849 int c1 = filter->cache;
850 if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
851 filter->cache = filter->status = 0;
852 CK((*filter->output_function)(c1, filter->data));
853 } else if (filter->status == 2) {
854 /* First of a pair of Regional Indicator codepoints came at the end of a string */
855 filter->cache = filter->status = 0;
856 CK(mbfl_filt_conv_illegal_output(c1, filter));
857 }
858
859 if (filter->flush_function) {
860 (*filter->flush_function)(filter->data);
861 }
862
863 return 0;
864 }
865
mb_sjis_docomo_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)866 static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
867 {
868 unsigned char *p = *in, *e = p + *in_len;
869 /* Leave one extra space available in output buffer, since some iterations of
870 * main loop (below) may emit two wchars */
871 uint32_t *out = buf, *limit = buf + bufsize - 1;
872
873 while (p < e && out < limit) {
874 unsigned char c = *p++;
875
876 if (c <= 0x7F) {
877 *out++ = c;
878 } else if (c >= 0xA1 && c <= 0xDF) {
879 /* Kana */
880 *out++ = 0xFEC0 + c;
881 } else if (c > 0x80 && c < 0xFD && c != 0xA0) {
882 /* Kanji */
883 if (p == e) {
884 *out++ = MBFL_BAD_INPUT;
885 break;
886 }
887 unsigned char c2 = *p++;
888
889 if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
890 uint32_t w = 0;
891 unsigned int s1, s2;
892 SJIS_DECODE(c, c2, s1, s2);
893 unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
894
895 if (s <= 137) {
896 if (s == 31) {
897 w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
898 } else if (s == 32) {
899 w = 0xFF5E; /* FULLWIDTH TILDE */
900 } else if (s == 33) {
901 w = 0x2225; /* PARALLEL TO */
902 } else if (s == 60) {
903 w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
904 } else if (s == 80) {
905 w = 0xFFE0; /* FULLWIDTH CENT SIGN */
906 } else if (s == 81) {
907 w = 0xFFE1; /* FULLWIDTH POUND SIGN */
908 } else if (s == 137) {
909 w = 0xFFE2; /* FULLWIDTH NOT SIGN */
910 }
911 }
912
913 if (!w) {
914 if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
915 int snd = 0;
916 w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
917 if (snd) {
918 *out++ = snd;
919 }
920 } else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
921 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
922 } else if (s < jisx0208_ucs_table_size) {
923 w = jisx0208_ucs_table[s];
924 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
925 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
926 }
927
928 if (!w) {
929 if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
930 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
931 } else if (s >= (94*94) && s < (114*94)) {
932 w = s - (94*94) + 0xE000;
933 }
934 }
935 }
936
937 *out++ = w ? w : MBFL_BAD_INPUT;
938 } else {
939 *out++ = MBFL_BAD_INPUT;
940 }
941 } else {
942 *out++ = MBFL_BAD_INPUT;
943 }
944 }
945
946 *in_len = e - p;
947 *in = p;
948 return out - buf;
949 }
950
mb_wchar_to_sjis_docomo(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)951 static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
952 {
953 unsigned char *out, *limit;
954 MB_CONVERT_BUF_LOAD(buf, out, limit);
955 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
956
957 uint32_t w;
958 unsigned int s = 0;
959
960 if (buf->state) {
961 /* Continue what we were doing on the previous call */
962 w = buf->state;
963 buf->state = 0;
964 if (len) {
965 goto reprocess_wchar;
966 } else {
967 goto emit_output;
968 }
969 }
970
971 while (len--) {
972 w = *in++;
973 reprocess_wchar:
974 s = 0;
975
976 if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
977 s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
978 } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
979 s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
980 } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
981 s = ucs_i_jis_table[w - ucs_i_jis_table_min];
982 } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
983 s = ucs_r_jis_table[w - ucs_r_jis_table_min];
984 } else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
985 /* Private User Area (95ku - 114ku) */
986 s = w - 0xE000;
987 s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21);
988 goto process_emoji;
989 }
990
991 if (!s) {
992 if (w == 0xA5) { /* YEN SIGN */
993 s = 0x216F; /* FULLWIDTH YEN SIGN */
994 } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
995 s = 0x2140;
996 } else if (w == 0x2225) { /* PARALLEL TO */
997 s = 0x2142;
998 } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
999 s = 0x215D;
1000 } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
1001 s = 0x2171;
1002 } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
1003 s = 0x2172;
1004 } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
1005 s = 0x224C;
1006 }
1007 }
1008
1009 if (w && (!s || s >= 0x8080)) {
1010 s = 0;
1011
1012 for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
1013 if (w == cp932ext1_ucs_table[i]) {
1014 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
1015 goto process_emoji;
1016 }
1017 }
1018
1019 for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
1020 if (w == cp932ext2_ucs_table[i]) {
1021 s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
1022 goto process_emoji;
1023 }
1024 }
1025 }
1026
1027 process_emoji:
1028 /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji
1029 * to a sequence of 2 codepoints, one of which is a combining character which
1030 * adds the 'key' image around the other
1031 *
1032 * In the other direction, look for such sequences and convert them to a
1033 * single emoji */
1034 if (w == '#' || (w >= '0' && w <= '9')) {
1035 if (!len) {
1036 if (end) {
1037 goto emit_output;
1038 } else {
1039 /* If we are at the end of the current buffer of codepoints, but another
1040 * buffer is coming, then remember that we have to reprocess `w` */
1041 buf->state = w;
1042 break;
1043 }
1044 }
1045 uint32_t w2 = *in++; len--;
1046 if (w2 == 0x20E3) {
1047 if (w == '#') {
1048 s = 0x2964;
1049 } else if (w == '0') {
1050 s = 0x296F;
1051 } else { /* Previous character was '1'-'9' */
1052 s = 0x2966 + (w - '1');
1053 }
1054 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1055 } else {
1056 in--; len++;
1057 }
1058 } else if (w == 0xA9) { /* Copyright sign */
1059 s = (((0x29B5 / 94) + 0x21) << 8) | ((0x29B5 % 94) + 0x21);
1060 } else if (w == 0xAE) { /* Registered sign */
1061 s = (((0x29BA / 94) + 0x21) << 8) | ((0x29BA % 94) + 0x21);
1062 } else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) {
1063 int i = mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
1064 if (i >= 0) {
1065 s = mb_tbl_uni_docomo2code2_value[i];
1066 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1067 }
1068 } else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) {
1069 int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
1070 if (i >= 0) {
1071 s = mb_tbl_uni_docomo2code3_value[i];
1072 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1073 }
1074 } else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) {
1075 int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
1076 if (i >= 0) {
1077 s = mb_tbl_uni_docomo2code5_val[i];
1078 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1079 }
1080 }
1081
1082 emit_output:
1083 if (!s && w) {
1084 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_docomo);
1085 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1086 } else if (s <= 0xFF) {
1087 out = mb_convert_buf_add(out, s);
1088 } else {
1089 unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2;
1090 SJIS_ENCODE(c1, c2, s1, s2);
1091 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
1092 out = mb_convert_buf_add2(out, s1, s2);
1093 }
1094 }
1095
1096 MB_CONVERT_BUF_STORE(buf, out, limit);
1097 }
1098
mb_sjis_kddi_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)1099 static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
1100 {
1101 unsigned char *p = *in, *e = p + *in_len;
1102 uint32_t *out = buf, *limit = buf + bufsize - 1;
1103
1104 while (p < e && out < limit) {
1105 unsigned char c = *p++;
1106
1107 if (c <= 0x7F) {
1108 *out++ = c;
1109 } else if (c >= 0xA1 && c <= 0xDF) {
1110 /* Kana */
1111 *out++ = 0xFEC0 + c;
1112 } else if (c > 0x80 && c < 0xFD && c != 0xA0) {
1113 /* Kanji */
1114 if (p == e) {
1115 *out++ = MBFL_BAD_INPUT;
1116 break;
1117 }
1118 unsigned char c2 = *p++;
1119
1120 if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
1121 uint32_t w = 0;
1122 unsigned int s1, s2;
1123 SJIS_DECODE(c, c2, s1, s2);
1124 unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
1125
1126 if (s <= 137) {
1127 if (s == 31) {
1128 w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
1129 } else if (s == 32) {
1130 w = 0xFF5E; /* FULLWIDTH TILDE */
1131 } else if (s == 33) {
1132 w = 0x2225; /* PARALLEL TO */
1133 } else if (s == 60) {
1134 w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
1135 } else if (s == 80) {
1136 w = 0xFFE0; /* FULLWIDTH CENT SIGN */
1137 } else if (s == 81) {
1138 w = 0xFFE1; /* FULLWIDTH POUND SIGN */
1139 } else if (s == 137) {
1140 w = 0xFFE2; /* FULLWIDTH NOT SIGN */
1141 }
1142 }
1143
1144 if (!w) {
1145 if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) {
1146 int snd = 0;
1147 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
1148 if (snd) {
1149 *out++ = snd;
1150 }
1151 } else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
1152 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
1153 } else if (s < jisx0208_ucs_table_size) {
1154 w = jisx0208_ucs_table[s];
1155 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
1156 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
1157 }
1158
1159 if (!w) {
1160 if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
1161 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
1162 } else if (s >= (94*94) && s < (114*94)) {
1163 w = s - (94*94) + 0xE000;
1164 }
1165 }
1166 }
1167
1168 *out++ = w ? w : MBFL_BAD_INPUT;
1169 } else {
1170 *out++ = MBFL_BAD_INPUT;
1171 }
1172 } else {
1173 *out++ = MBFL_BAD_INPUT;
1174 }
1175 }
1176
1177 *in_len = e - p;
1178 *in = p;
1179 return out - buf;
1180 }
1181
mb_wchar_to_sjis_kddi(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)1182 static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
1183 {
1184 unsigned char *out, *limit;
1185 MB_CONVERT_BUF_LOAD(buf, out, limit);
1186 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
1187
1188 uint32_t w;
1189 unsigned int s = 0;
1190
1191 if (buf->state) {
1192 w = buf->state;
1193 buf->state = 0;
1194 if (len) {
1195 goto reprocess_wchar;
1196 } else {
1197 goto emit_output;
1198 }
1199 }
1200
1201 while (len--) {
1202 w = *in++;
1203 reprocess_wchar:
1204 s = 0;
1205
1206 if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
1207 s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
1208 } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
1209 s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
1210 } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
1211 s = ucs_i_jis_table[w - ucs_i_jis_table_min];
1212 } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
1213 s = ucs_r_jis_table[w - ucs_r_jis_table_min];
1214 } else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
1215 /* Private User Area (95ku - 114ku) */
1216 s = w - 0xE000;
1217 s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21);
1218 goto process_emoji;
1219 }
1220
1221 if (!s) {
1222 if (w == 0xA5) { /* YEN SIGN */
1223 s = 0x216F; /* FULLWIDTH YEN SIGN */
1224 } else if (w == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */
1225 s = 0x2140;
1226 } else if (w == 0x2225) { /* PARALLEL TO */
1227 s = 0x2142;
1228 } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
1229 s = 0x215D;
1230 } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
1231 s = 0x2171;
1232 } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
1233 s = 0x2172;
1234 } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
1235 s = 0x224C;
1236 }
1237 }
1238
1239 if (w && (!s || s >= 0x8080)) {
1240 s = 0;
1241
1242 for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
1243 if (w == cp932ext1_ucs_table[i]) {
1244 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
1245 goto process_emoji;
1246 }
1247 }
1248
1249 for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
1250 if (w == cp932ext2_ucs_table[i]) {
1251 s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
1252 goto process_emoji;
1253 }
1254 }
1255 }
1256
1257 process_emoji:
1258 if (w == '#' || (w >= '0' && w <= '9')) {
1259 if (!len) {
1260 if (end) {
1261 goto emit_output;
1262 } else {
1263 /* If we are at the end of the current buffer of codepoints, but another
1264 * buffer is coming, then remember that we have to reprocess `w` */
1265 buf->state = w;
1266 break;
1267 }
1268 }
1269 uint32_t w2 = *in++; len--;
1270 if (w2 == 0x20E3) {
1271 if (w == '#') {
1272 s = 0x25BC;
1273 } else if (w == '0') {
1274 s = 0x2830;
1275 } else { /* Previous character was '1'-'9' */
1276 s = 0x27A6 + (w - '1');
1277 }
1278 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1279 } else {
1280 in--; len++;
1281 }
1282 } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */
1283 if (!len) {
1284 if (end) {
1285 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi);
1286 } else {
1287 /* Reprocess `w` when this function is called again with another buffer
1288 * of wchars */
1289 buf->state = w;
1290 }
1291 break;
1292 }
1293 uint32_t w2 = *in++; len--;
1294 if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
1295 for (int i = 0; i < 10; i++) {
1296 if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) {
1297 s = nflags_code_kddi[i];
1298 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1299 goto emit_output;
1300 }
1301 }
1302 }
1303 in--; len++;
1304 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi);
1305 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1306 continue;
1307 } else if (w == 0xA9) { /* Copyright sign */
1308 s = (((0x27DC / 94) + 0x21) << 8) | ((0x27DC % 94) + 0x21);
1309 } else if (w == 0xAE) { /* Registered sign */
1310 s = (((0x27DD / 94) + 0x21) << 8) | ((0x27DD % 94) + 0x21);
1311 } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) {
1312 int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
1313 if (i >= 0) {
1314 s = mb_tbl_uni_kddi2code2_value[i];
1315 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1316 }
1317 } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) {
1318 int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
1319 if (i >= 0) {
1320 s = mb_tbl_uni_kddi2code3_value[i];
1321 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1322 }
1323 } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) {
1324 int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
1325 if (i >= 0) {
1326 s = mb_tbl_uni_kddi2code5_val[i];
1327 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1328 }
1329 }
1330
1331 emit_output:
1332 if (!s && w) {
1333 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi);
1334 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1335 } else if (s <= 0xFF) {
1336 out = mb_convert_buf_add(out, s);
1337 } else {
1338 unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2;
1339 SJIS_ENCODE(c1, c2, s1, s2);
1340 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
1341 out = mb_convert_buf_add2(out, s1, s2);
1342 }
1343 }
1344
1345 MB_CONVERT_BUF_STORE(buf, out, limit);
1346 }
1347
mb_sjis_sb_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)1348 static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
1349 {
1350 unsigned char *p = *in, *e = p + *in_len;
1351 uint32_t *out = buf, *limit = buf + bufsize - 1;
1352
1353 if (*state) {
1354 goto softbank_emoji_escapes;
1355 }
1356
1357 while (p < e && out < limit) {
1358 unsigned char c = *p++;
1359
1360 if (c == 0x1B) {
1361 /* Escape sequence */
1362 if (p == e || *p++ != '$' || p == e) {
1363 *out++ = MBFL_BAD_INPUT;
1364 continue;
1365 }
1366 unsigned char c2 = *p++;
1367 if ((c2 < 'E' || c2 > 'G') && (c2 < 'O' || c2 > 'Q')) {
1368 *out++ = MBFL_BAD_INPUT;
1369 continue;
1370 }
1371 /* Escape sequence was valid, next should be a series of specially
1372 * encoded Softbank emoji */
1373 *state = c2;
1374
1375 softbank_emoji_escapes:
1376 while (p < e && out < limit) {
1377 c = *p++;
1378 if (c == 0xF) {
1379 *state = 0;
1380 break;
1381 }
1382 unsigned int s = 0;
1383 if (*state == 'G' && c >= 0x21 && c <= 0x7A) {
1384 s = (0x91 - 0x21) * 94;
1385 } else if (*state == 'E' && c >= 0x21 && c <= 0x7A) {
1386 s = (0x8D - 0x21) * 94;
1387 } else if (*state == 'F' && c >= 0x21 && c <= 0x7A) {
1388 s = (0x8E - 0x21) * 94;
1389 } else if (*state == 'O' && c >= 0x21 && c <= 0x6D) {
1390 s = (0x92 - 0x21) * 94;
1391 } else if (*state == 'P' && c >= 0x21 && c <= 0x6C) {
1392 s = (0x95 - 0x21) * 94;
1393 } else if (*state == 'Q' && c >= 0x21 && c <= 0x5E) {
1394 s = (0x96 - 0x21) * 94;
1395 } else {
1396 *out++ = MBFL_BAD_INPUT;
1397 *state = 0;
1398 break;
1399 }
1400
1401 int snd = 0;
1402 uint32_t w = mbfilter_sjis_emoji_sb2unicode(s + c - 0x21, &snd);
1403 if (w) {
1404 if (snd) {
1405 *out++ = snd;
1406 }
1407 *out++ = w;
1408 } else {
1409 *out++ = MBFL_BAD_INPUT;
1410 *state = 0;
1411 break;
1412 }
1413 }
1414 } else if (c <= 0x7F) {
1415 *out++ = c;
1416 } else if (c >= 0xA1 && c <= 0xDF) {
1417 /* Kana */
1418 *out++ = 0xFEC0 + c;
1419 } else if (c > 0x80 && c < 0xFD && c != 0xA0) {
1420 /* Kanji */
1421 if (p == e) {
1422 *out++ = MBFL_BAD_INPUT;
1423 break;
1424 }
1425 unsigned char c2 = *p++;
1426
1427 if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
1428 uint32_t w = 0;
1429 unsigned int s1, s2;
1430 SJIS_DECODE(c, c2, s1, s2);
1431 unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
1432
1433 if (s <= 137) {
1434 if (s == 31) {
1435 w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
1436 } else if (s == 32) {
1437 w = 0xFF5E; /* FULLWIDTH TILDE */
1438 } else if (s == 33) {
1439 w = 0x2225; /* PARALLEL TO */
1440 } else if (s == 60) {
1441 w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
1442 } else if (s == 80) {
1443 w = 0xFFE0; /* FULLWIDTH CENT SIGN */
1444 } else if (s == 81) {
1445 w = 0xFFE1; /* FULLWIDTH POUND SIGN */
1446 } else if (s == 137) {
1447 w = 0xFFE2; /* FULLWIDTH NOT SIGN */
1448 }
1449 }
1450
1451 if (!w) {
1452 if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) {
1453 int snd = 0;
1454 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
1455 if (snd) {
1456 *out++ = snd;
1457 }
1458 } else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
1459 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
1460 } else if (s < jisx0208_ucs_table_size) {
1461 w = jisx0208_ucs_table[s];
1462 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
1463 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
1464 }
1465
1466 if (!w) {
1467 if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
1468 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
1469 } else if (s >= (94*94) && s < (114*94)) {
1470 w = s - (94*94) + 0xE000;
1471 }
1472 }
1473 }
1474
1475 *out++ = w ? w : MBFL_BAD_INPUT;
1476 } else {
1477 *out++ = MBFL_BAD_INPUT;
1478 }
1479 } else {
1480 *out++ = MBFL_BAD_INPUT;
1481 }
1482 }
1483
1484 *in_len = e - p;
1485 *in = p;
1486 return out - buf;
1487 }
1488
mb_wchar_to_sjis_sb(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)1489 static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
1490 {
1491 unsigned char *out, *limit;
1492 MB_CONVERT_BUF_LOAD(buf, out, limit);
1493 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
1494
1495 uint32_t w;
1496 unsigned int s = 0;
1497
1498 if (buf->state) {
1499 w = buf->state;
1500 buf->state = 0;
1501 if (len) {
1502 goto reprocess_wchar;
1503 } else {
1504 goto emit_output;
1505 }
1506 }
1507
1508 while (len--) {
1509 w = *in++;
1510 reprocess_wchar:
1511 s = 0;
1512
1513 if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
1514 s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
1515 } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
1516 s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
1517 } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
1518 s = ucs_i_jis_table[w - ucs_i_jis_table_min];
1519 } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
1520 s = ucs_r_jis_table[w - ucs_r_jis_table_min];
1521 } else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
1522 /* Private User Area (95ku - 114ku) */
1523 s = w - 0xE000;
1524 s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21);
1525 goto process_emoji;
1526 }
1527
1528 if (!s) {
1529 if (w == 0xA5) { /* YEN SIGN */
1530 s = 0x216F; /* FULLWIDTH YEN SIGN */
1531 } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
1532 s = 0x2140;
1533 } else if (w == 0x2225) { /* PARALLEL TO */
1534 s = 0x2142;
1535 } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
1536 s = 0x215D;
1537 } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
1538 s = 0x2171;
1539 } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
1540 s = 0x2172;
1541 } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
1542 s = 0x224C;
1543 }
1544 }
1545
1546 if (w && (!s || s >= 0x8080)) {
1547 s = 0;
1548
1549 for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
1550 if (w == cp932ext1_ucs_table[i]) {
1551 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
1552 goto process_emoji;
1553 }
1554 }
1555
1556 for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
1557 if (w == cp932ext2_ucs_table[i]) {
1558 s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
1559 goto process_emoji;
1560 }
1561 }
1562 }
1563
1564 process_emoji:
1565 if (w == '#' || (w >= '0' && w <= '9')) {
1566 if (!len) {
1567 if (end) {
1568 goto emit_output;
1569 } else {
1570 /* If we are at the end of the current buffer of codepoints, but another
1571 * buffer is coming, then remember that we have to reprocess `w` */
1572 buf->state = w;
1573 break;
1574 }
1575 }
1576 uint32_t w2 = *in++; len--;
1577 if (w2 == 0x20E3) {
1578 if (w == '#') {
1579 s = 0x2817;
1580 } else if (w == '0') {
1581 s = 0x282c;
1582 } else { /* Previous character was '1'-'9' */
1583 s = 0x2823 + (w - '1');
1584 }
1585 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1586 } else {
1587 in--; len++;
1588 }
1589 } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */
1590 if (!len) {
1591 if (end) {
1592 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb);
1593 } else {
1594 /* Reprocess `w` when this function is called again with
1595 * another buffer of wchars */
1596 buf->state = w;
1597 }
1598 break;
1599 }
1600 uint32_t w2 = *in++; len--;
1601 if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
1602 for (int i = 0; i < 10; i++) {
1603 if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) {
1604 s = nflags_code_sb[i];
1605 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1606 goto emit_output;
1607 }
1608 }
1609 }
1610 in--; len++;
1611 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb);
1612 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1613 continue;
1614 } else if (w == 0xA9) { /* Copyright sign */
1615 s = (((0x2855 / 94) + 0x21) << 8) | ((0x2855 % 94) + 0x21);
1616 } else if (w == 0xAE) { /* Registered sign */
1617 s = (((0x2856 / 94) + 0x21) << 8) | ((0x2856 % 94) + 0x21);
1618 } else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) {
1619 int i = mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
1620 if (i >= 0) {
1621 s = mb_tbl_uni_sb2code2_value[i];
1622 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1623 }
1624 } else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) {
1625 int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
1626 if (i >= 0) {
1627 s = mb_tbl_uni_sb2code3_value[i];
1628 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1629 }
1630 } else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) {
1631 int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
1632 if (i >= 0) {
1633 s = mb_tbl_uni_sb2code5_val[i];
1634 s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21);
1635 }
1636 }
1637
1638 emit_output:
1639 if (!s && w) {
1640 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb);
1641 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1642 } else if (s <= 0xFF) {
1643 out = mb_convert_buf_add(out, s);
1644 } else {
1645 unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2;
1646 SJIS_ENCODE(c1, c2, s1, s2);
1647 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
1648 out = mb_convert_buf_add2(out, s1, s2);
1649 }
1650 }
1651
1652 MB_CONVERT_BUF_STORE(buf, out, limit);
1653 }
1654