1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * the source code included in this files was separated from mbfilter_ja.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 /* CP932 is Microsoft's version of Shift-JIS.
31  *
32  * What we call "SJIS-win" is a variant of CP932 which maps U+00A5
33  * and U+203E the same way as eucJP-win; namely, instead of mapping
34  * U+00A5 (YEN SIGN) to 0x5C and U+203E (OVERLINE) to 0x7E,
35  * these codepoints are mapped to appropriate JIS X 0208 characters.
36  *
37  * When converting from Shift-JIS to Unicode, there is no difference
38  * between CP932 and "SJIS-win".
39  *
40  * Additional facts:
41  *
42  * • In the libmbfl library which formed the base for mbstring, "CP932" and
43  *   "SJIS-win" were originally aliases. The differing mappings were added in
44  *   December 2002. The libmbfl author later stated that this was done so that
45  *   "CP932" would comply with a certain specification, while "SJIS-win" would
46  *   maintain the existing mappings. He does not remember which specification
47  *   it was.
48  * • The WHATWG specification for "Shift_JIS" (followed by web browsers)
49  *   agrees with our mappings for "CP932".
50  * • Microsoft Windows' "best-fit" mappings for CP932 (via the
51  *   WideCharToMultiByte API) convert U+00A5 to 0x5C, which also agrees with
52  *   our mappings for "CP932".
53  * • glibc's iconv converts U+203E to CP932 0x7E, which again agrees with
54  *   our mappings for "CP932".
55  * • When converting Shift-JIS to CP932, the conversion goes through Unicode.
56  *   Shift-JIS 0x7E converts to U+203E, so mapping U+203E to 0x7E means that
57  *   0x7E will go to 0x7E when converting Shift-JIS to CP932.
58  */
59 
60 #include "mbfilter.h"
61 #include "mbfilter_cp932.h"
62 
63 #include "unicode_table_cp932_ext.h"
64 #include "unicode_table_jis.h"
65 
66 static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter);
67 static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
68 static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
69 static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
70 
71 static const unsigned char mblen_table_sjis[] = { /* 0x81-0x9f,0xE0-0xFF */
72   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
78   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
88 };
89 
90 static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL};
91 static const char *mbfl_encoding_sjiswin_aliases[] = {"SJIS-ms", "SJIS-open", NULL};
92 
93 const mbfl_encoding mbfl_encoding_cp932 = {
94 	mbfl_no_encoding_cp932,
95 	"CP932",
96 	"Shift_JIS",
97 	mbfl_encoding_cp932_aliases,
98 	mblen_table_sjis,
99 	MBFL_ENCTYPE_GL_UNSAFE,
100 	&vtbl_cp932_wchar,
101 	&vtbl_wchar_cp932,
102 	mb_cp932_to_wchar,
103 	mb_wchar_to_cp932,
104 	NULL
105 };
106 
107 const struct mbfl_convert_vtbl vtbl_cp932_wchar = {
108 	mbfl_no_encoding_cp932,
109 	mbfl_no_encoding_wchar,
110 	mbfl_filt_conv_common_ctor,
111 	NULL,
112 	mbfl_filt_conv_cp932_wchar,
113 	mbfl_filt_conv_cp932_wchar_flush,
114 	NULL,
115 };
116 
117 const struct mbfl_convert_vtbl vtbl_wchar_cp932 = {
118 	mbfl_no_encoding_wchar,
119 	mbfl_no_encoding_cp932,
120 	mbfl_filt_conv_common_ctor,
121 	NULL,
122 	mbfl_filt_conv_wchar_cp932,
123 	mbfl_filt_conv_common_flush,
124 	NULL,
125 };
126 
127 const mbfl_encoding mbfl_encoding_sjiswin = {
128 	mbfl_no_encoding_sjiswin,
129 	"SJIS-win",
130 	"Shift_JIS",
131 	mbfl_encoding_sjiswin_aliases,
132 	mblen_table_sjis,
133 	MBFL_ENCTYPE_GL_UNSAFE,
134 	&vtbl_sjiswin_wchar,
135 	&vtbl_wchar_sjiswin,
136 	mb_cp932_to_wchar,
137 	mb_wchar_to_sjiswin,
138 	NULL
139 };
140 
141 const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = {
142 	mbfl_no_encoding_sjiswin,
143 	mbfl_no_encoding_wchar,
144 	mbfl_filt_conv_common_ctor,
145 	NULL,
146 	mbfl_filt_conv_cp932_wchar,
147 	mbfl_filt_conv_cp932_wchar_flush,
148 	NULL,
149 };
150 
151 const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = {
152 	mbfl_no_encoding_wchar,
153 	mbfl_no_encoding_sjiswin,
154 	mbfl_filt_conv_common_ctor,
155 	NULL,
156 	mbfl_filt_conv_wchar_sjiswin,
157 	mbfl_filt_conv_common_flush,
158 	NULL,
159 };
160 
161 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
162 
163 #define SJIS_ENCODE(c1,c2,s1,s2)	\
164 		do {						\
165 			s1 = c1;				\
166 			s1--;					\
167 			s1 >>= 1;				\
168 			if ((c1) < 0x5f) {		\
169 				s1 += 0x71;			\
170 			} else {				\
171 				s1 += 0xb1;			\
172 			}						\
173 			s2 = c2;				\
174 			if ((c1) & 1) {			\
175 				if ((c2) < 0x60) {	\
176 					s2--;			\
177 				}					\
178 				s2 += 0x20;			\
179 			} else {				\
180 				s2 += 0x7e;			\
181 			}						\
182 		} while (0)
183 
184 #define SJIS_DECODE(c1,c2,s1,s2)	\
185 		do {						\
186 			s1 = c1;				\
187 			if (s1 < 0xa0) {		\
188 				s1 -= 0x81;			\
189 			} else {				\
190 				s1 -= 0xc1;			\
191 			}						\
192 			s1 <<= 1;				\
193 			s1 += 0x21;				\
194 			s2 = c2;				\
195 			if (s2 < 0x9f) {		\
196 				if (s2 < 0x7f) {	\
197 					s2++;			\
198 				}					\
199 				s2 -= 0x20;			\
200 			} else {				\
201 				s1++;				\
202 				s2 -= 0x7e;			\
203 			}						\
204 		} while (0)
205 
mbfl_filt_conv_cp932_wchar(int c,mbfl_convert_filter * filter)206 int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter)
207 {
208 	int c1, s, s1, s2, w;
209 
210 	switch (filter->status) {
211 	case 0:
212 		if (c >= 0 && c < 0x80) {	/* latin */
213 			CK((*filter->output_function)(c, filter->data));
214 		} else if (c > 0xa0 && c < 0xe0) {	/* kana */
215 			CK((*filter->output_function)(0xfec0 + c, filter->data));
216 		} else if (c > 0x80 && c < 0xfd && c != 0xa0) {	/* kanji first char */
217 			filter->status = 1;
218 			filter->cache = c;
219 		} else {
220 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
221 		}
222 		break;
223 
224 	case 1:		/* kanji second char */
225 		filter->status = 0;
226 		c1 = filter->cache;
227 		if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
228 			w = 0;
229 			SJIS_DECODE(c1, c, s1, s2);
230 			s = (s1 - 0x21)*94 + s2 - 0x21;
231 			if (s <= 137) {
232 				if (s == 31) {
233 					w = 0xff3c;			/* FULLWIDTH REVERSE SOLIDUS */
234 				} else if (s == 32) {
235 					w = 0xff5e;			/* FULLWIDTH TILDE */
236 				} else if (s == 33) {
237 					w = 0x2225;			/* PARALLEL TO */
238 				} else if (s == 60) {
239 					w = 0xff0d;			/* FULLWIDTH HYPHEN-MINUS */
240 				} else if (s == 80) {
241 					w = 0xffe0;			/* FULLWIDTH CENT SIGN */
242 				} else if (s == 81) {
243 					w = 0xffe1;			/* FULLWIDTH POUND SIGN */
244 				} else if (s == 137) {
245 					w = 0xffe2;			/* FULLWIDTH NOT SIGN */
246 				}
247 			}
248 			if (w == 0) {
249 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {		/* vendor ext1 (13ku) */
250 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
251 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {		/* X 0208 */
252 					w = jisx0208_ucs_table[s];
253 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {		/* vendor ext2 (89ku - 92ku) */
254 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
255 				} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {		/* vendor ext3 (115ku - 119ku) */
256 					w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
257 				} else if (s >= (94*94) && s < (114*94)) {		/* user (95ku - 114ku) */
258 					w = s - (94*94) + 0xe000;
259 				}
260 			}
261 
262 			if (w <= 0) {
263 				w = MBFL_BAD_INPUT;
264 			}
265 
266 			CK((*filter->output_function)(w, filter->data));
267 		} else {
268 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
269 		}
270 		break;
271 
272 		EMPTY_SWITCH_DEFAULT_CASE();
273 	}
274 
275 	return 0;
276 }
277 
mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter * filter)278 static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter)
279 {
280 	if (filter->status) {
281 		(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
282 		filter->status = 0;
283 	}
284 
285 	if (filter->flush_function) {
286 		(*filter->flush_function)(filter->data);
287 	}
288 
289 	return 0;
290 }
291 
mbfl_filt_conv_wchar_cp932(int c,mbfl_convert_filter * filter)292 int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
293 {
294 	int c1, c2, s1, s2;
295 
296 	s1 = 0;
297 	s2 = 0;
298 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
299 		s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
300 	} else if (c == 0x203E) {
301 		s1 = 0x7E;
302 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
303 		s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
304 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
305 		s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
306 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
307 		s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
308 	} else if (c >= 0xe000 && c < (0xe000 + 20*94)) {	/* user  (95ku - 114ku) */
309 		s1 = c - 0xe000;
310 		c1 = s1/94 + 0x7f;
311 		c2 = s1%94 + 0x21;
312 		s1 = (c1 << 8) | c2;
313 		s2 = 1;
314 	}
315 	if (s1 <= 0) {
316 		if (c == 0xa5) { /* YEN SIGN */
317 			s1 = 0x5C;
318 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
319 			s1 = 0x2140;
320 		} else if (c == 0x2225) {	/* PARALLEL TO */
321 			s1 = 0x2142;
322 		} else if (c == 0xff0d) {	/* FULLWIDTH HYPHEN-MINUS */
323 			s1 = 0x215d;
324 		} else if (c == 0xffe0) {	/* FULLWIDTH CENT SIGN */
325 			s1 = 0x2171;
326 		} else if (c == 0xffe1) {	/* FULLWIDTH POUND SIGN */
327 			s1 = 0x2172;
328 		} else if (c == 0xffe2) {	/* FULLWIDTH NOT SIGN */
329 			s1 = 0x224c;
330 		}
331 	}
332 	if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {	/* not found or X 0212 */
333 		s1 = -1;
334 		c1 = 0;
335 		c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
336 		while (c1 < c2) {		/* CP932 vendor ext1 (13ku) */
337 			if (c == cp932ext1_ucs_table[c1]) {
338 				s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
339 				break;
340 			}
341 			c1++;
342 		}
343 		if (s1 <= 0) {
344 			c1 = 0;
345 			c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
346 			while (c1 < c2) {		/* CP932 vendor ext3 (115ku - 119ku) */
347 				if (c == cp932ext3_ucs_table[c1]) {
348 					s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
349 					break;
350 				}
351 				c1++;
352 			}
353 		}
354 		if (c == 0) {
355 			s1 = 0;
356 		} else if (s1 <= 0) {
357 			s1 = -1;
358 		}
359 	}
360 	if (s1 >= 0) {
361 		if (s1 < 0x100) { /* latin or kana */
362 			CK((*filter->output_function)(s1, filter->data));
363 		} else { /* kanji */
364 			c1 = (s1 >> 8) & 0xff;
365 			c2 = s1 & 0xff;
366 			SJIS_ENCODE(c1, c2, s1, s2);
367 			CK((*filter->output_function)(s1, filter->data));
368 			CK((*filter->output_function)(s2, filter->data));
369 		}
370 	} else {
371 		CK(mbfl_filt_conv_illegal_output(c, filter));
372 	}
373 
374 	return 0;
375 }
376 
mbfl_filt_conv_wchar_sjiswin(int c,mbfl_convert_filter * filter)377 int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter)
378 {
379 	if (c == 0xA5) {
380 		CK((*filter->output_function)(0x81, filter->data));
381 		CK((*filter->output_function)(0x8F, filter->data));
382 	} else if (c == 0x203E) {
383 		CK((*filter->output_function)(0x81, filter->data));
384 		CK((*filter->output_function)(0x50, filter->data));
385 	} else {
386 		return mbfl_filt_conv_wchar_cp932(c, filter);
387 	}
388 	return 0;
389 }
390 
mb_cp932_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)391 static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
392 {
393 	unsigned char *p = *in, *e = p + *in_len;
394 	uint32_t *out = buf, *limit = buf + bufsize;
395 
396 	while (p < e && out < limit) {
397 		unsigned char c = *p++;
398 
399 		if (c < 0x80) {
400 			*out++ = c;
401 		} else if (c > 0xA0 && c < 0xE0) {
402 			/* Kana */
403 			*out++ = 0xFEC0 + c;
404 		} else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) {
405 			unsigned char c2 = *p++;
406 
407 			if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
408 				unsigned int s1, s2, w = 0;
409 				SJIS_DECODE(c, c2, s1, s2);
410 				unsigned int s = (s1 - 0x21)*94 + s2 - 0x21;
411 
412 				if (s <= 137) {
413 					if (s == 31) {
414 						w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
415 					} else if (s == 32) {
416 						w = 0xFF5E; /* FULLWIDTH TILDE */
417 					} else if (s == 33) {
418 						w = 0x2225; /* PARALLEL TO */
419 					} else if (s == 60) {
420 						w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
421 					} else if (s == 80) {
422 						w = 0xFFE0; /* FULLWIDTH CENT SIGN */
423 					} else if (s == 81) {
424 						w = 0xFFE1; /* FULLWIDTH POUND SIGN */
425 					} else if (s == 137) {
426 						w = 0xFFE2; /* FULLWIDTH NOT SIGN */
427 					}
428 				}
429 
430 				if (w == 0) {
431 					if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
432 						w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
433 					} else if (s < jisx0208_ucs_table_size) {
434 						w = jisx0208_ucs_table[s];
435 					} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
436 						w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
437 					} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
438 						w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
439 					} else if (s >= (94*94) && s < (114*94)) {
440 						w = s - (94*94) + 0xE000;
441 					}
442 				}
443 
444 				if (!w)
445 					w = MBFL_BAD_INPUT;
446 				*out++ = w;
447 			} else {
448 				*out++ = MBFL_BAD_INPUT;
449 			}
450 		} else {
451 			*out++ = MBFL_BAD_INPUT;
452 		}
453 	}
454 
455 	*in_len = e - p;
456 	*in = p;
457 	return out - buf;
458 }
459 
mb_wchar_to_cp932(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)460 static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
461 {
462 	unsigned char *out, *limit;
463 	MB_CONVERT_BUF_LOAD(buf, out, limit);
464 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
465 
466 	while (len--) {
467 		uint32_t w = *in++;
468 		unsigned int s1 = 0, s2 = 0, c1, c2;
469 
470 		if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
471 			s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
472 		} else if (w == 0x203E) {
473 			s1 = 0x7E;
474 		} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
475 			s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
476 		} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
477 			s1 = ucs_i_jis_table[w - ucs_i_jis_table_min];
478 		} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
479 			s1 = ucs_r_jis_table[w - ucs_r_jis_table_min];
480 		} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
481 			s1 = w - 0xE000;
482 			c1 = s1/94 + 0x7F;
483 			c2 = s1%94 + 0x21;
484 			s1 = (c1 << 8) | c2;
485 			s2 = 1;
486 		}
487 
488 		if (w == 0xA5) { /* YEN SIGN */
489 			s1 = 0x5C;
490 		} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
491 			s1 = 0x2140;
492 		} else if (w == 0x2225) { /* PARALLEL TO */
493 			s1 = 0x2142;
494 		} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
495 			s1 = 0x215D;
496 		} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
497 			s1 = 0x2171;
498 		} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
499 			s1 = 0x2172;
500 		} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
501 			s1 = 0x224C;
502 		} else if (w == 0) {
503 			out = mb_convert_buf_add(out, 0);
504 			continue;
505 		}
506 
507 		if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */
508 			for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
509 				if (cp932ext1_ucs_table[i] == w) {
510 					s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21);
511 					goto emit_output;
512 				}
513 			}
514 
515 			for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
516 				if (cp932ext3_ucs_table[i] == w) {
517 					s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21);
518 					goto emit_output;
519 				}
520 			}
521 
522 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932);
523 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
524 			continue;
525 		}
526 
527 emit_output:
528 		if (s1 < 0x100) {
529 			out = mb_convert_buf_add(out, s1);
530 		} else {
531 			c1 = (s1 >> 8) & 0xFF;
532 			c2 = s1 & 0xFF;
533 			SJIS_ENCODE(c1, c2, s1, s2);
534 			out = mb_convert_buf_add2(out, s1, s2);
535 		}
536 	}
537 
538 	MB_CONVERT_BUF_STORE(buf, out, limit);
539 }
540 
mb_wchar_to_sjiswin(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)541 static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
542 {
543 	unsigned char *out, *limit;
544 	MB_CONVERT_BUF_LOAD(buf, out, limit);
545 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
546 
547 	while (len--) {
548 		uint32_t w = *in++;
549 		unsigned int s1 = 0, s2 = 0, c1, c2;
550 
551 		if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
552 			s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
553 		} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
554 			s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
555 		} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
556 			s1 = ucs_i_jis_table[w - ucs_i_jis_table_min];
557 		} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
558 			s1 = ucs_r_jis_table[w - ucs_r_jis_table_min];
559 		} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
560 			s1 = w - 0xE000;
561 			c1 = s1/94 + 0x7F;
562 			c2 = s1%94 + 0x21;
563 			s1 = (c1 << 8) | c2;
564 			s2 = 1;
565 		}
566 
567 		if (w == 0xA5) { /* YEN SIGN */
568 			s1 = 0x216F; /* FULLWIDTH YEN SIGN */
569 		} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
570 			s1 = 0x2140;
571 		} else if (w == 0x2225) { /* PARALLEL TO */
572 			s1 = 0x2142;
573 		} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
574 			s1 = 0x215D;
575 		} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
576 			s1 = 0x2171;
577 		} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
578 			s1 = 0x2172;
579 		} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
580 			s1 = 0x224C;
581 		} else if (w == 0) {
582 			out = mb_convert_buf_add(out, 0);
583 			continue;
584 		}
585 
586 		if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */
587 			for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
588 				if (cp932ext1_ucs_table[i] == w) {
589 					s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21);
590 					goto emit_output;
591 				}
592 			}
593 
594 			for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
595 				if (cp932ext3_ucs_table[i] == w) {
596 					s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21);
597 					goto emit_output;
598 				}
599 			}
600 
601 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932);
602 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
603 			continue;
604 		}
605 
606 emit_output:
607 		if (s1 < 0x100) {
608 			out = mb_convert_buf_add(out, s1);
609 		} else {
610 			c1 = (s1 >> 8) & 0xFF;
611 			c2 = s1 & 0xFF;
612 			SJIS_ENCODE(c1, c2, s1, s2);
613 			out = mb_convert_buf_add2(out, s1, s2);
614 		}
615 	}
616 
617 	MB_CONVERT_BUF_STORE(buf, out, limit);
618 }
619