1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp>
22 *
23 */
24
25 #include "mbfl_allocators.h"
26 #include "mbfilter_tl_jisx0201_jisx0208.h"
27 #include "translit_kana_jisx0201_jisx0208.h"
28
29 void
mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter * filt)30 mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter *filt)
31 {
32 mbfl_filt_conv_common_ctor(filt);
33 }
34
35 void
mbfl_filt_tl_jisx0201_jisx0208_cleanup(mbfl_convert_filter * filt)36 mbfl_filt_tl_jisx0201_jisx0208_cleanup(mbfl_convert_filter *filt)
37 {
38 }
39
40 int
mbfl_filt_tl_jisx0201_jisx0208(int c,mbfl_convert_filter * filt)41 mbfl_filt_tl_jisx0201_jisx0208(int c, mbfl_convert_filter *filt)
42 {
43 int s, n;
44 int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
45
46 s = c;
47
48 if ((mode & MBFL_FILT_TL_HAN2ZEN_ALL)
49 && c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) {
50 /* all except <"> <'> <\> <~> */
51 s = c + 0xfee0;
52 } else if ((mode & MBFL_FILT_TL_HAN2ZEN_ALPHA) &&
53 ((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) {
54 /* alpha */
55 s = c + 0xfee0;
56 } else if ((mode & MBFL_FILT_TL_HAN2ZEN_NUMERIC) &&
57 c >= 0x30 && c <= 0x39) {
58 /* num */
59 s = c + 0xfee0;
60 } else if ((mode & MBFL_FILT_TL_HAN2ZEN_SPACE) && c == 0x20) {
61 /* space */
62 s = 0x3000;
63 }
64
65 if (mode &
66 (MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_HIRAGANA)) {
67 /* hankaku kana to zenkaku kana */
68 if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
69 (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
70 /* hankaku kana to zenkaku katakana and glue voiced sound mark */
71 if (c >= 0xff61 && c <= 0xff9f) {
72 if (filt->status) {
73 n = (filt->cache - 0xff60) & 0x3f;
74 if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
75 filt->status = 0;
76 s = 0x3001 + hankana2zenkana_table[n];
77 } else if (c == 0xff9e && n == 19) {
78 filt->status = 0;
79 s = 0x30f4;
80 } else if (c == 0xff9f && (n >= 42 && n <= 46)) {
81 filt->status = 0;
82 s = 0x3002 + hankana2zenkana_table[n];
83 } else {
84 filt->status = 1;
85 filt->cache = c;
86 s = 0x3000 + hankana2zenkana_table[n];
87 }
88 } else {
89 filt->status = 1;
90 filt->cache = c;
91 return c;
92 }
93 } else {
94 if (filt->status) {
95 n = (filt->cache - 0xff60) & 0x3f;
96 filt->status = 0;
97 (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
98 }
99 }
100 } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) &&
101 (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
102 /* hankaku kana to zenkaku hirangana and glue voiced sound mark */
103 if (c >= 0xff61 && c <= 0xff9f) {
104 if (filt->status) {
105 n = (filt->cache - 0xff60) & 0x3f;
106 if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
107 filt->status = 0;
108 s = 0x3001 + hankana2zenhira_table[n];
109 } else if (c == 0xff9f && (n >= 42 && n <= 46)) {
110 filt->status = 0;
111 s = 0x3002 + hankana2zenhira_table[n];
112 } else {
113 filt->status = 1;
114 filt->cache = c;
115 s = 0x3000 + hankana2zenhira_table[n];
116 }
117 } else {
118 filt->status = 1;
119 filt->cache = c;
120 return c;
121 }
122 } else {
123 if (filt->status) {
124 n = (filt->cache - 0xff60) & 0x3f;
125 filt->status = 0;
126 (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
127 }
128 }
129 } else if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
130 c >= 0xff61 && c <= 0xff9f) {
131 /* hankaku kana to zenkaku katakana */
132 s = 0x3000 + hankana2zenkana_table[c - 0xff60];
133 } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA)
134 && c >= 0xff61 && c <= 0xff9f) {
135 /* hankaku kana to zenkaku hirangana */
136 s = 0x3000 + hankana2zenhira_table[c - 0xff60];
137 }
138 }
139
140 if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT1) {
141 /* special ascii to symbol */
142 if (c == 0x5c) {
143 s = 0xffe5; /* FULLWIDTH YEN SIGN */
144 } else if (c == 0xa5) { /* YEN SIGN */
145 s = 0xffe5; /* FULLWIDTH YEN SIGN */
146 } else if (c == 0x7e) {
147 s = 0xffe3; /* FULLWIDTH MACRON */
148 } else if (c == 0x203e) { /* OVERLINE */
149 s = 0xffe3; /* FULLWIDTH MACRON */
150 } else if (c == 0x27) {
151 s = 0x2019; /* RIGHT SINGLE QUOTATION MARK */
152 } else if (c == 0x22) {
153 s = 0x201d; /* RIGHT DOUBLE QUOTATION MARK */
154 }
155 } else if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT2) {
156 /* special ascii to symbol */
157 if (c == 0x5c) {
158 s = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
159 } else if (c == 0x7e) {
160 s = 0xff5e; /* FULLWIDTH TILDE */
161 } else if (c == 0x27) {
162 s = 0xff07; /* FULLWIDTH APOSTROPHE */
163 } else if (c == 0x22) {
164 s = 0xff02; /* FULLWIDTH QUOTATION MARK */
165 }
166 }
167
168 if (mode & 0xf0) { /* zenkaku to hankaku */
169 if ((mode & 0x10) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) { /* all except <"> <'> <\> <~> */
170 s = c - 0xfee0;
171 } else if ((mode & 0x20) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) { /* alpha */
172 s = c - 0xfee0;
173 } else if ((mode & 0x40) && (c >= 0xff10 && c <= 0xff19)) { /* num */
174 s = c - 0xfee0;
175 } else if ((mode & 0x80) && (c == 0x3000)) { /* spase */
176 s = 0x20;
177 } else if ((mode & 0x10) && (c == 0x2212)) { /* MINUS SIGN */
178 s = 0x2d;
179 }
180 }
181
182 if (mode &
183 (MBFL_FILT_TL_ZEN2HAN_KATAKANA | MBFL_FILT_TL_ZEN2HAN_HIRAGANA)) {
184 /* Zenkaku kana to hankaku kana */
185 if ((mode & MBFL_FILT_TL_ZEN2HAN_KATAKANA) &&
186 c >= 0x30a1 && c <= 0x30f4) {
187 /* Zenkaku katakana to hankaku kana */
188 n = c - 0x30a1;
189 if (zenkana2hankana_table[n][1] != 0) {
190 (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
191 s = 0xff00 + zenkana2hankana_table[n][1];
192 } else {
193 s = 0xff00 + zenkana2hankana_table[n][0];
194 }
195 } else if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRAGANA) &&
196 c >= 0x3041 && c <= 0x3093) {
197 /* Zenkaku hirangana to hankaku kana */
198 n = c - 0x3041;
199 if (zenkana2hankana_table[n][1] != 0) {
200 (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
201 s = 0xff00 + zenkana2hankana_table[n][1];
202 } else {
203 s = 0xff00 + zenkana2hankana_table[n][0];
204 }
205 } else if (c == 0x3001) {
206 s = 0xff64; /* HALFWIDTH IDEOGRAPHIC COMMA */
207 } else if (c == 0x3002) {
208 s = 0xff61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */
209 } else if (c == 0x300c) {
210 s = 0xff62; /* HALFWIDTH LEFT CORNER BRACKET */
211 } else if (c == 0x300d) {
212 s = 0xff63; /* HALFWIDTH RIGHT CORNER BRACKET */
213 } else if (c == 0x309b) {
214 s = 0xff9e; /* HALFWIDTH KATAKANA VOICED SOUND MARK */
215 } else if (c == 0x309c) {
216 s = 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
217 } else if (c == 0x30fc) {
218 s = 0xff70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
219 } else if (c == 0x30fb) {
220 s = 0xff65; /* HALFWIDTH KATAKANA MIDDLE DOT */
221 }
222 } else if (mode & (MBFL_FILT_TL_ZEN2HAN_HIRA2KANA
223 | MBFL_FILT_TL_ZEN2HAN_KANA2HIRA)) {
224 if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRA2KANA) &&
225 c >= 0x3041 && c <= 0x3093) {
226 /* Zenkaku hirangana to Zenkaku katakana */
227 s = c + 0x60;
228 } else if ((mode & MBFL_FILT_TL_ZEN2HAN_KANA2HIRA) &&
229 c >= 0x30a1 && c <= 0x30f3) {
230 /* Zenkaku katakana to Zenkaku hirangana */
231 s = c - 0x60;
232 }
233 }
234
235 if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT1) { /* special symbol to ascii */
236 if (c == 0xffe5) { /* FULLWIDTH YEN SIGN */
237 s = 0x5c;
238 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
239 s = 0x5c;
240 } else if (c == 0xffe3) { /* FULLWIDTH MACRON */
241 s = 0x7e;
242 } else if (c == 0x203e) { /* OVERLINE */
243 s = 0x7e;
244 } else if (c == 0x2018) { /* LEFT SINGLE QUOTATION MARK*/
245 s = 0x27;
246 } else if (c == 0x2019) { /* RIGHT SINGLE QUOTATION MARK */
247 s = 0x27;
248 } else if (c == 0x201c) { /* LEFT DOUBLE QUOTATION MARK */
249 s = 0x22;
250 } else if (c == 0x201d) { /* RIGHT DOUBLE QUOTATION MARK */
251 s = 0x22;
252 }
253 }
254
255 if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT2) { /* special symbol to ascii */
256 if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
257 s = 0x5c;
258 } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
259 s = 0x7e;
260 } else if (c == 0xff07) { /* FULLWIDTH APOSTROPHE */
261 s = 0x27;
262 } else if (c == 0xff02) { /* FULLWIDTH QUOTATION MARK */
263 s = 0x22;
264 }
265 }
266
267 return (*filt->output_function)(s, filt->data);
268 }
269
270 int
mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter * filt)271 mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter *filt)
272 {
273 int ret, n;
274 int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
275
276 ret = 0;
277 if (filt->status) {
278 n = (filt->cache - 0xff60) & 0x3f;
279 if (mode & 0x100) { /* hankaku kana to zenkaku katakana */
280 ret = (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
281 } else if (mode & 0x200) { /* hankaku kana to zenkaku hirangana */
282 ret = (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
283 }
284 filt->status = 0;
285 }
286
287 if (filt->flush_function != NULL) {
288 return (*filt->flush_function)(filt->data);
289 }
290
291 return ret;
292 }
293
294 const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208 = {
295 mbfl_no_encoding_wchar,
296 mbfl_no_encoding_wchar,
297 mbfl_filt_tl_jisx0201_jisx0208_init,
298 mbfl_filt_tl_jisx0201_jisx0208_cleanup,
299 mbfl_filt_tl_jisx0201_jisx0208,
300 mbfl_filt_tl_jisx0201_jisx0208_flush
301 };
302
303