1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp>
22  *
23  */
24 
25 #include "mbfilter_tl_jisx0201_jisx0208.h"
26 #include "translit_kana_jisx0201_jisx0208.h"
27 
28 void
mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter * filt)29 mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter *filt)
30 {
31 	mbfl_filt_conv_common_ctor(filt);
32 }
33 
34 int
mbfl_filt_tl_jisx0201_jisx0208(int c,mbfl_convert_filter * filt)35 mbfl_filt_tl_jisx0201_jisx0208(int c, mbfl_convert_filter *filt)
36 {
37 	int s, n;
38 	int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
39 
40 	s = c;
41 
42 	if ((mode & MBFL_FILT_TL_HAN2ZEN_ALL)
43 			&& c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) {
44 		/* all except <"> <'> <\> <~> */
45 		s = c + 0xfee0;
46 	} else if ((mode & MBFL_FILT_TL_HAN2ZEN_ALPHA) &&
47 			((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) {
48 		/* alpha */
49 		s = c + 0xfee0;
50 	} else if ((mode & MBFL_FILT_TL_HAN2ZEN_NUMERIC) &&
51 			c >= 0x30 && c <= 0x39) {
52 		/* num */
53 		s = c + 0xfee0;
54 	} else if ((mode & MBFL_FILT_TL_HAN2ZEN_SPACE) && c == 0x20) {
55 		/* space */
56 		s = 0x3000;
57 	}
58 
59 	if (mode &
60 			(MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_HIRAGANA)) {
61 		/* hankaku kana to zenkaku kana */
62 		if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
63 				(mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
64 			/* hankaku kana to zenkaku katakana and glue voiced sound mark */
65 			if (c >= 0xff61 && c <= 0xff9f) {
66 				if (filt->status) {
67 					n = (filt->cache - 0xff60) & 0x3f;
68 					if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
69 						filt->status = 0;
70 						s = 0x3001 + hankana2zenkana_table[n];
71 					} else if (c == 0xff9e && n == 19) {
72 						filt->status = 0;
73 						s = 0x30f4;
74 					} else if (c == 0xff9f && (n >= 42 && n <= 46)) {
75 						filt->status = 0;
76 						s = 0x3002 + hankana2zenkana_table[n];
77 					} else {
78 						filt->status = 1;
79 						filt->cache = c;
80 						s = 0x3000 + hankana2zenkana_table[n];
81 					}
82 				} else {
83 					filt->status = 1;
84 					filt->cache = c;
85 					return c;
86 				}
87 			} else {
88 				if (filt->status) {
89 					n = (filt->cache - 0xff60) & 0x3f;
90 					filt->status = 0;
91 					(*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
92 				}
93 			}
94 		} else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) &&
95 				(mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
96 			/* hankaku kana to zenkaku hiragana and glue voiced sound mark */
97 			if (c >= 0xff61 && c <= 0xff9f) {
98 				if (filt->status) {
99 					n = (filt->cache - 0xff60) & 0x3f;
100 					if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
101 						filt->status = 0;
102 						s = 0x3001 + hankana2zenhira_table[n];
103 					} else if (c == 0xff9f && (n >= 42 && n <= 46)) {
104 						filt->status = 0;
105 						s = 0x3002 + hankana2zenhira_table[n];
106 					} else {
107 						filt->status = 1;
108 						filt->cache = c;
109 						s = 0x3000 + hankana2zenhira_table[n];
110 					}
111 				} else {
112 					filt->status = 1;
113 					filt->cache = c;
114 					return c;
115 				}
116 			} else {
117 				if (filt->status) {
118 					n = (filt->cache - 0xff60) & 0x3f;
119 					filt->status = 0;
120 					(*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
121 				}
122 			}
123 		} else if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
124 				c >= 0xff61 && c <= 0xff9f) {
125 			/* hankaku kana to zenkaku katakana */
126 			s = 0x3000 + hankana2zenkana_table[c - 0xff60];
127 		} else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA)
128 				&& c >= 0xff61 && c <= 0xff9f) {
129 			/* hankaku kana to zenkaku hiragana */
130 			s = 0x3000 + hankana2zenhira_table[c - 0xff60];
131 		}
132 	}
133 
134 	if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT1) {
135 		/* special ascii to symbol */
136 		if (c == 0x5c) {
137 			s = 0xffe5;				/* FULLWIDTH YEN SIGN */
138 		} else if (c == 0xa5) {		/* YEN SIGN */
139 			s = 0xffe5;				/* FULLWIDTH YEN SIGN */
140 		} else if (c == 0x7e) {
141 			s = 0xffe3;				/* FULLWIDTH MACRON */
142 		} else if (c == 0x203e) {	/* OVERLINE */
143 			s = 0xffe3;				/* FULLWIDTH MACRON */
144 		} else if (c == 0x27) {
145 			s = 0x2019;				/* RIGHT SINGLE QUOTATION MARK */
146 		} else if (c == 0x22) {
147 			s = 0x201d;				/* RIGHT DOUBLE QUOTATION MARK */
148 		}
149 	} else if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT2) {
150 		/* special ascii to symbol */
151 		if (c == 0x5c) {
152 			s = 0xff3c;				/* FULLWIDTH REVERSE SOLIDUS */
153 		} else if (c == 0x7e) {
154 			s = 0xff5e;				/* FULLWIDTH TILDE */
155 		} else if (c == 0x27) {
156 			s = 0xff07;				/* FULLWIDTH APOSTROPHE */
157 		} else if (c == 0x22) {
158 			s = 0xff02;				/* FULLWIDTH QUOTATION MARK */
159 		}
160 	}
161 
162 	if (mode & (MBFL_FILT_TL_ZEN2HAN_ALL | MBFL_FILT_TL_ZEN2HAN_ALPHA | MBFL_FILT_TL_ZEN2HAN_NUMERIC | MBFL_FILT_TL_ZEN2HAN_SPACE)) {
163 		/* Zenkaku to Hankaku */
164 		if ((mode & MBFL_FILT_TL_ZEN2HAN_ALL) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) {
165 			/* all except <"> <'> <\> <~> */
166 			s = c - 0xfee0;
167 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_ALPHA) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) {
168 			s = c - 0xfee0;
169 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_NUMERIC) && (c >= 0xff10 && c <= 0xff19)) {
170 			s = c - 0xfee0;
171 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_SPACE) && (c == 0x3000)) {
172 			s = 0x20;
173 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_ALL) && (c == 0x2212)) {
174 			/* MINUS SIGN */
175 			s = 0x2d;
176 		}
177 	}
178 
179 	if (mode &
180 			(MBFL_FILT_TL_ZEN2HAN_KATAKANA | MBFL_FILT_TL_ZEN2HAN_HIRAGANA)) {
181 		/* Zenkaku kana to hankaku kana */
182 		if ((mode & MBFL_FILT_TL_ZEN2HAN_KATAKANA) &&
183 				c >= 0x30a1 && c <= 0x30f4) {
184 			/* Zenkaku katakana to hankaku kana */
185 			n = c - 0x30a1;
186 			if (zenkana2hankana_table[n][1] != 0) {
187 				(filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
188 				s = 0xff00 + zenkana2hankana_table[n][1];
189 			} else {
190 				s = 0xff00 + zenkana2hankana_table[n][0];
191 			}
192 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRAGANA) &&
193 				c >= 0x3041 && c <= 0x3093) {
194 			/* Zenkaku hiragana to hankaku kana */
195 			n = c - 0x3041;
196 			if (zenkana2hankana_table[n][1] != 0) {
197 				(filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
198 				s = 0xff00 + zenkana2hankana_table[n][1];
199 			} else {
200 				s = 0xff00 + zenkana2hankana_table[n][0];
201 			}
202 		} else if (c == 0x3001) {
203 			s = 0xff64;				/* HALFWIDTH IDEOGRAPHIC COMMA */
204 		} else if (c == 0x3002) {
205 			s = 0xff61;				/* HALFWIDTH IDEOGRAPHIC FULL STOP */
206 		} else if (c == 0x300c) {
207 			s = 0xff62;				/* HALFWIDTH LEFT CORNER BRACKET */
208 		} else if (c == 0x300d) {
209 			s = 0xff63;				/* HALFWIDTH RIGHT CORNER BRACKET */
210 		} else if (c == 0x309b) {
211 			s = 0xff9e;				/* HALFWIDTH KATAKANA VOICED SOUND MARK */
212 		} else if (c == 0x309c) {
213 			s = 0xff9f;				/* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
214 		} else if (c == 0x30fc) {
215 			s = 0xff70;				/* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
216 		} else if (c == 0x30fb) {
217 			s = 0xff65;				/* HALFWIDTH KATAKANA MIDDLE DOT */
218 		}
219 	} else if (mode & (MBFL_FILT_TL_ZEN2HAN_HIRA2KANA
220 			| MBFL_FILT_TL_ZEN2HAN_KANA2HIRA)) {
221 		if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRA2KANA) &&
222 				((c >= 0x3041 && c <= 0x3093) || c == 0x309d || c == 0x309e)) {
223 			/* Zenkaku hiragana to Zenkaku katakana */
224 			s = c + 0x60;
225 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_KANA2HIRA) &&
226 				((c >= 0x30a1 && c <= 0x30f3) || c == 0x30fd || c == 0x30fe)) {
227 			/* Zenkaku katakana to Zenkaku hiragana */
228 			s = c - 0x60;
229 		}
230 	}
231 
232 	if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT1) {	/* special symbol to ascii */
233 		if (c == 0xffe5) {			/* FULLWIDTH YEN SIGN */
234 			s = 0x5c;
235 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
236 			s = 0x5c;
237 		} else if (c == 0xffe3) {	/* FULLWIDTH MACRON */
238 			s = 0x7e;
239 		} else if (c == 0x203e) {	/* OVERLINE */
240 			s = 0x7e;
241 		} else if (c == 0x2018) {	/* LEFT SINGLE QUOTATION MARK*/
242 			s = 0x27;
243 		} else if (c == 0x2019) {	/* RIGHT SINGLE QUOTATION MARK */
244 			s = 0x27;
245 		} else if (c == 0x201c) {	/* LEFT DOUBLE QUOTATION MARK */
246 			s = 0x22;
247 		} else if (c == 0x201d) {	/* RIGHT DOUBLE QUOTATION MARK */
248 			s = 0x22;
249 		}
250 	}
251 
252 	if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT2) {	/* special symbol to ascii */
253 		if (c == 0xff3c) {			/* FULLWIDTH REVERSE SOLIDUS */
254 			s = 0x5c;
255 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
256 			s = 0x7e;
257 		} else if (c == 0xff07) {	/* FULLWIDTH APOSTROPHE */
258 			s = 0x27;
259 		} else if (c == 0xff02) {	/* FULLWIDTH QUOTATION MARK */
260 			s = 0x22;
261 		}
262 	}
263 
264 	return (*filt->output_function)(s, filt->data);
265 }
266 
267 int
mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter * filt)268 mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter *filt)
269 {
270 	int ret, n;
271 	int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
272 
273 	ret = 0;
274 	if (filt->status) {
275 		n = (filt->cache - 0xff60) & 0x3f;
276 		if (mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) {	/* hankaku kana to zenkaku katakana */
277 			ret = (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
278 		} else if (mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) {	/* hankaku kana to zenkaku hiragana */
279 			ret = (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
280 		}
281 		filt->status = 0;
282 	}
283 
284 	if (filt->flush_function != NULL) {
285 		return (*filt->flush_function)(filt->data);
286 	}
287 
288 	return ret;
289 }
290 
291 const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208 = {
292 	mbfl_no_encoding_wchar,
293 	mbfl_no_encoding_wchar,
294 	mbfl_filt_tl_jisx0201_jisx0208_init,
295 	NULL,
296 	mbfl_filt_tl_jisx0201_jisx0208,
297 	mbfl_filt_tl_jisx0201_jisx0208_flush,
298 	NULL,
299 };
300