1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * the source code included in this files was separated from mbfilter_ja.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 /* CP932 is Microsoft's version of Shift-JIS.
31 *
32 * What we call "SJIS-win" is a variant of CP932 which maps U+00A5
33 * and U+203E the same way as eucJP-win; namely, instead of mapping
34 * U+00A5 (YEN SIGN) to 0x5C and U+203E (OVERLINE) to 0x7E,
35 * these codepoints are mapped to appropriate JIS X 0208 characters.
36 *
37 * When converting from Shift-JIS to Unicode, there is no difference
38 * between CP932 and "SJIS-win".
39 *
40 * Additional facts:
41 *
42 * • In the libmbfl library which formed the base for mbstring, "CP932" and
43 * "SJIS-win" were originally aliases. The differing mappings were added in
44 * December 2002. The libmbfl author later stated that this was done so that
45 * "CP932" would comply with a certain specification, while "SJIS-win" would
46 * maintain the existing mappings. He does not remember which specification
47 * it was.
48 * • The WHATWG specification for "Shift_JIS" (followed by web browsers)
49 * agrees with our mappings for "CP932".
50 * • Microsoft Windows' "best-fit" mappings for CP932 (via the
51 * WideCharToMultiByte API) convert U+00A5 to 0x5C, which also agrees with
52 * our mappings for "CP932".
53 * • glibc's iconv converts U+203E to CP932 0x7E, which again agrees with
54 * our mappings for "CP932".
55 * • When converting Shift-JIS to CP932, the conversion goes through Unicode.
56 * Shift-JIS 0x7E converts to U+203E, so mapping U+203E to 0x7E means that
57 * 0x7E will go to 0x7E when converting Shift-JIS to CP932.
58 */
59
60 #include "mbfilter.h"
61 #include "mbfilter_cp932.h"
62
63 #include "unicode_table_cp932_ext.h"
64 #include "unicode_table_jis.h"
65
66 static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter);
67 static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
68 static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
69 static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
70
71 static const unsigned char mblen_table_sjis[] = { /* 0x81-0x9f,0xE0-0xFF */
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
88 };
89
90 static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL};
91 static const char *mbfl_encoding_sjiswin_aliases[] = {"SJIS-ms", "SJIS-open", NULL};
92
93 const mbfl_encoding mbfl_encoding_cp932 = {
94 mbfl_no_encoding_cp932,
95 "CP932",
96 "Shift_JIS",
97 mbfl_encoding_cp932_aliases,
98 mblen_table_sjis,
99 MBFL_ENCTYPE_GL_UNSAFE,
100 &vtbl_cp932_wchar,
101 &vtbl_wchar_cp932,
102 mb_cp932_to_wchar,
103 mb_wchar_to_cp932,
104 NULL
105 };
106
107 const struct mbfl_convert_vtbl vtbl_cp932_wchar = {
108 mbfl_no_encoding_cp932,
109 mbfl_no_encoding_wchar,
110 mbfl_filt_conv_common_ctor,
111 NULL,
112 mbfl_filt_conv_cp932_wchar,
113 mbfl_filt_conv_cp932_wchar_flush,
114 NULL,
115 };
116
117 const struct mbfl_convert_vtbl vtbl_wchar_cp932 = {
118 mbfl_no_encoding_wchar,
119 mbfl_no_encoding_cp932,
120 mbfl_filt_conv_common_ctor,
121 NULL,
122 mbfl_filt_conv_wchar_cp932,
123 mbfl_filt_conv_common_flush,
124 NULL,
125 };
126
127 const mbfl_encoding mbfl_encoding_sjiswin = {
128 mbfl_no_encoding_sjiswin,
129 "SJIS-win",
130 "Shift_JIS",
131 mbfl_encoding_sjiswin_aliases,
132 mblen_table_sjis,
133 MBFL_ENCTYPE_GL_UNSAFE,
134 &vtbl_sjiswin_wchar,
135 &vtbl_wchar_sjiswin,
136 mb_cp932_to_wchar,
137 mb_wchar_to_sjiswin,
138 NULL
139 };
140
141 const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = {
142 mbfl_no_encoding_sjiswin,
143 mbfl_no_encoding_wchar,
144 mbfl_filt_conv_common_ctor,
145 NULL,
146 mbfl_filt_conv_cp932_wchar,
147 mbfl_filt_conv_cp932_wchar_flush,
148 NULL,
149 };
150
151 const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = {
152 mbfl_no_encoding_wchar,
153 mbfl_no_encoding_sjiswin,
154 mbfl_filt_conv_common_ctor,
155 NULL,
156 mbfl_filt_conv_wchar_sjiswin,
157 mbfl_filt_conv_common_flush,
158 NULL,
159 };
160
161 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
162
163 #define SJIS_ENCODE(c1,c2,s1,s2) \
164 do { \
165 s1 = c1; \
166 s1--; \
167 s1 >>= 1; \
168 if ((c1) < 0x5f) { \
169 s1 += 0x71; \
170 } else { \
171 s1 += 0xb1; \
172 } \
173 s2 = c2; \
174 if ((c1) & 1) { \
175 if ((c2) < 0x60) { \
176 s2--; \
177 } \
178 s2 += 0x20; \
179 } else { \
180 s2 += 0x7e; \
181 } \
182 } while (0)
183
184 #define SJIS_DECODE(c1,c2,s1,s2) \
185 do { \
186 s1 = c1; \
187 if (s1 < 0xa0) { \
188 s1 -= 0x81; \
189 } else { \
190 s1 -= 0xc1; \
191 } \
192 s1 <<= 1; \
193 s1 += 0x21; \
194 s2 = c2; \
195 if (s2 < 0x9f) { \
196 if (s2 < 0x7f) { \
197 s2++; \
198 } \
199 s2 -= 0x20; \
200 } else { \
201 s1++; \
202 s2 -= 0x7e; \
203 } \
204 } while (0)
205
mbfl_filt_conv_cp932_wchar(int c,mbfl_convert_filter * filter)206 int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter)
207 {
208 int c1, s, s1, s2, w;
209
210 switch (filter->status) {
211 case 0:
212 if (c >= 0 && c < 0x80) { /* latin */
213 CK((*filter->output_function)(c, filter->data));
214 } else if (c > 0xa0 && c < 0xe0) { /* kana */
215 CK((*filter->output_function)(0xfec0 + c, filter->data));
216 } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
217 filter->status = 1;
218 filter->cache = c;
219 } else {
220 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
221 }
222 break;
223
224 case 1: /* kanji second char */
225 filter->status = 0;
226 c1 = filter->cache;
227 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
228 w = 0;
229 SJIS_DECODE(c1, c, s1, s2);
230 s = (s1 - 0x21)*94 + s2 - 0x21;
231 if (s <= 137) {
232 if (s == 31) {
233 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
234 } else if (s == 32) {
235 w = 0xff5e; /* FULLWIDTH TILDE */
236 } else if (s == 33) {
237 w = 0x2225; /* PARALLEL TO */
238 } else if (s == 60) {
239 w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
240 } else if (s == 80) {
241 w = 0xffe0; /* FULLWIDTH CENT SIGN */
242 } else if (s == 81) {
243 w = 0xffe1; /* FULLWIDTH POUND SIGN */
244 } else if (s == 137) {
245 w = 0xffe2; /* FULLWIDTH NOT SIGN */
246 }
247 }
248 if (w == 0) {
249 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
250 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
251 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
252 w = jisx0208_ucs_table[s];
253 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
254 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
255 } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
256 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
257 } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
258 w = s - (94*94) + 0xe000;
259 }
260 }
261
262 if (w <= 0) {
263 w = MBFL_BAD_INPUT;
264 }
265
266 CK((*filter->output_function)(w, filter->data));
267 } else {
268 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
269 }
270 break;
271
272 EMPTY_SWITCH_DEFAULT_CASE();
273 }
274
275 return 0;
276 }
277
mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter * filter)278 static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter)
279 {
280 if (filter->status) {
281 (*filter->output_function)(MBFL_BAD_INPUT, filter->data);
282 filter->status = 0;
283 }
284
285 if (filter->flush_function) {
286 (*filter->flush_function)(filter->data);
287 }
288
289 return 0;
290 }
291
mbfl_filt_conv_wchar_cp932(int c,mbfl_convert_filter * filter)292 int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
293 {
294 int c1, c2, s1, s2;
295
296 s1 = 0;
297 s2 = 0;
298 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
299 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
300 } else if (c == 0x203E) {
301 s1 = 0x7E;
302 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
303 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
304 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
305 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
306 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
307 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
308 } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
309 s1 = c - 0xe000;
310 c1 = s1/94 + 0x7f;
311 c2 = s1%94 + 0x21;
312 s1 = (c1 << 8) | c2;
313 s2 = 1;
314 }
315 if (s1 <= 0) {
316 if (c == 0xa5) { /* YEN SIGN */
317 s1 = 0x5C;
318 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
319 s1 = 0x2140;
320 } else if (c == 0x2225) { /* PARALLEL TO */
321 s1 = 0x2142;
322 } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
323 s1 = 0x215d;
324 } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
325 s1 = 0x2171;
326 } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
327 s1 = 0x2172;
328 } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
329 s1 = 0x224c;
330 }
331 }
332 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
333 s1 = -1;
334 c1 = 0;
335 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
336 while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
337 if (c == cp932ext1_ucs_table[c1]) {
338 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
339 break;
340 }
341 c1++;
342 }
343 if (s1 <= 0) {
344 c1 = 0;
345 c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
346 while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
347 if (c == cp932ext3_ucs_table[c1]) {
348 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
349 break;
350 }
351 c1++;
352 }
353 }
354 if (c == 0) {
355 s1 = 0;
356 } else if (s1 <= 0) {
357 s1 = -1;
358 }
359 }
360 if (s1 >= 0) {
361 if (s1 < 0x100) { /* latin or kana */
362 CK((*filter->output_function)(s1, filter->data));
363 } else { /* kanji */
364 c1 = (s1 >> 8) & 0xff;
365 c2 = s1 & 0xff;
366 SJIS_ENCODE(c1, c2, s1, s2);
367 CK((*filter->output_function)(s1, filter->data));
368 CK((*filter->output_function)(s2, filter->data));
369 }
370 } else {
371 CK(mbfl_filt_conv_illegal_output(c, filter));
372 }
373
374 return 0;
375 }
376
mbfl_filt_conv_wchar_sjiswin(int c,mbfl_convert_filter * filter)377 int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter)
378 {
379 if (c == 0xA5) {
380 CK((*filter->output_function)(0x81, filter->data));
381 CK((*filter->output_function)(0x8F, filter->data));
382 } else if (c == 0x203E) {
383 CK((*filter->output_function)(0x81, filter->data));
384 CK((*filter->output_function)(0x50, filter->data));
385 } else {
386 return mbfl_filt_conv_wchar_cp932(c, filter);
387 }
388 return 0;
389 }
390
mb_cp932_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)391 static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
392 {
393 unsigned char *p = *in, *e = p + *in_len;
394 uint32_t *out = buf, *limit = buf + bufsize;
395
396 while (p < e && out < limit) {
397 unsigned char c = *p++;
398
399 if (c < 0x80) {
400 *out++ = c;
401 } else if (c > 0xA0 && c < 0xE0) {
402 /* Kana */
403 *out++ = 0xFEC0 + c;
404 } else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) {
405 unsigned char c2 = *p++;
406
407 if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
408 unsigned int s1, s2, w = 0;
409 SJIS_DECODE(c, c2, s1, s2);
410 unsigned int s = (s1 - 0x21)*94 + s2 - 0x21;
411
412 if (s <= 137) {
413 if (s == 31) {
414 w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
415 } else if (s == 32) {
416 w = 0xFF5E; /* FULLWIDTH TILDE */
417 } else if (s == 33) {
418 w = 0x2225; /* PARALLEL TO */
419 } else if (s == 60) {
420 w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
421 } else if (s == 80) {
422 w = 0xFFE0; /* FULLWIDTH CENT SIGN */
423 } else if (s == 81) {
424 w = 0xFFE1; /* FULLWIDTH POUND SIGN */
425 } else if (s == 137) {
426 w = 0xFFE2; /* FULLWIDTH NOT SIGN */
427 }
428 }
429
430 if (w == 0) {
431 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
432 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
433 } else if (s < jisx0208_ucs_table_size) {
434 w = jisx0208_ucs_table[s];
435 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
436 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
437 } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
438 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
439 } else if (s >= (94*94) && s < (114*94)) {
440 w = s - (94*94) + 0xE000;
441 }
442 }
443
444 if (!w)
445 w = MBFL_BAD_INPUT;
446 *out++ = w;
447 } else {
448 *out++ = MBFL_BAD_INPUT;
449 }
450 } else {
451 *out++ = MBFL_BAD_INPUT;
452 }
453 }
454
455 *in_len = e - p;
456 *in = p;
457 return out - buf;
458 }
459
mb_wchar_to_cp932(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)460 static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
461 {
462 unsigned char *out, *limit;
463 MB_CONVERT_BUF_LOAD(buf, out, limit);
464 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
465
466 while (len--) {
467 uint32_t w = *in++;
468 unsigned int s1 = 0, s2 = 0, c1, c2;
469
470 if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
471 s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
472 } else if (w == 0x203E) {
473 s1 = 0x7E;
474 } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
475 s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
476 } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
477 s1 = ucs_i_jis_table[w - ucs_i_jis_table_min];
478 } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
479 s1 = ucs_r_jis_table[w - ucs_r_jis_table_min];
480 } else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
481 s1 = w - 0xE000;
482 c1 = s1/94 + 0x7F;
483 c2 = s1%94 + 0x21;
484 s1 = (c1 << 8) | c2;
485 s2 = 1;
486 }
487
488 if (w == 0xA5) { /* YEN SIGN */
489 s1 = 0x5C;
490 } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
491 s1 = 0x2140;
492 } else if (w == 0x2225) { /* PARALLEL TO */
493 s1 = 0x2142;
494 } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
495 s1 = 0x215D;
496 } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
497 s1 = 0x2171;
498 } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
499 s1 = 0x2172;
500 } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
501 s1 = 0x224C;
502 } else if (w == 0) {
503 out = mb_convert_buf_add(out, 0);
504 continue;
505 }
506
507 if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */
508 for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
509 if (cp932ext1_ucs_table[i] == w) {
510 s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21);
511 goto emit_output;
512 }
513 }
514
515 for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
516 if (cp932ext3_ucs_table[i] == w) {
517 s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21);
518 goto emit_output;
519 }
520 }
521
522 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932);
523 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
524 continue;
525 }
526
527 emit_output:
528 if (s1 < 0x100) {
529 out = mb_convert_buf_add(out, s1);
530 } else {
531 c1 = (s1 >> 8) & 0xFF;
532 c2 = s1 & 0xFF;
533 SJIS_ENCODE(c1, c2, s1, s2);
534 out = mb_convert_buf_add2(out, s1, s2);
535 }
536 }
537
538 MB_CONVERT_BUF_STORE(buf, out, limit);
539 }
540
mb_wchar_to_sjiswin(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)541 static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
542 {
543 unsigned char *out, *limit;
544 MB_CONVERT_BUF_LOAD(buf, out, limit);
545 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
546
547 while (len--) {
548 uint32_t w = *in++;
549 unsigned int s1 = 0, s2 = 0, c1, c2;
550
551 if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
552 s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
553 } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
554 s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
555 } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
556 s1 = ucs_i_jis_table[w - ucs_i_jis_table_min];
557 } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
558 s1 = ucs_r_jis_table[w - ucs_r_jis_table_min];
559 } else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
560 s1 = w - 0xE000;
561 c1 = s1/94 + 0x7F;
562 c2 = s1%94 + 0x21;
563 s1 = (c1 << 8) | c2;
564 s2 = 1;
565 }
566
567 if (w == 0xA5) { /* YEN SIGN */
568 s1 = 0x216F; /* FULLWIDTH YEN SIGN */
569 } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
570 s1 = 0x2140;
571 } else if (w == 0x2225) { /* PARALLEL TO */
572 s1 = 0x2142;
573 } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
574 s1 = 0x215D;
575 } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
576 s1 = 0x2171;
577 } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
578 s1 = 0x2172;
579 } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
580 s1 = 0x224C;
581 } else if (w == 0) {
582 out = mb_convert_buf_add(out, 0);
583 continue;
584 }
585
586 if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */
587 for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
588 if (cp932ext1_ucs_table[i] == w) {
589 s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21);
590 goto emit_output;
591 }
592 }
593
594 for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
595 if (cp932ext3_ucs_table[i] == w) {
596 s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21);
597 goto emit_output;
598 }
599 }
600
601 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932);
602 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
603 continue;
604 }
605
606 emit_output:
607 if (s1 < 0x100) {
608 out = mb_convert_buf_add(out, s1);
609 } else {
610 c1 = (s1 >> 8) & 0xFF;
611 c2 = s1 & 0xFF;
612 SJIS_ENCODE(c1, c2, s1, s2);
613 out = mb_convert_buf_add2(out, s1, s2);
614 }
615 }
616
617 MB_CONVERT_BUF_STORE(buf, out, limit);
618 }
619