1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp>
22  *
23  */
24 
25 #include "mbfl_allocators.h"
26 #include "mbfilter_tl_jisx0201_jisx0208.h"
27 #include "translit_kana_jisx0201_jisx0208.h"
28 
29 void
mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter * filt)30 mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter *filt)
31 {
32 	mbfl_filt_conv_common_ctor(filt);
33 }
34 
35 void
mbfl_filt_tl_jisx0201_jisx0208_cleanup(mbfl_convert_filter * filt)36 mbfl_filt_tl_jisx0201_jisx0208_cleanup(mbfl_convert_filter *filt)
37 {
38 }
39 
40 int
mbfl_filt_tl_jisx0201_jisx0208(int c,mbfl_convert_filter * filt)41 mbfl_filt_tl_jisx0201_jisx0208(int c, mbfl_convert_filter *filt)
42 {
43 	int s, n;
44 	int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
45 
46 	s = c;
47 
48 	if ((mode & MBFL_FILT_TL_HAN2ZEN_ALL)
49 			&& c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) {
50 		/* all except <"> <'> <\> <~> */
51 		s = c + 0xfee0;
52 	} else if ((mode & MBFL_FILT_TL_HAN2ZEN_ALPHA) &&
53 			((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) {
54 		/* alpha */
55 		s = c + 0xfee0;
56 	} else if ((mode & MBFL_FILT_TL_HAN2ZEN_NUMERIC) &&
57 			c >= 0x30 && c <= 0x39) {
58 		/* num */
59 		s = c + 0xfee0;
60 	} else if ((mode & MBFL_FILT_TL_HAN2ZEN_SPACE) && c == 0x20) {
61 		/* space */
62 		s = 0x3000;
63 	}
64 
65 	if (mode &
66 			(MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_HIRAGANA)) {
67 		/* hankaku kana to zenkaku kana */
68 		if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
69 				(mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
70 			/* hankaku kana to zenkaku katakana and glue voiced sound mark */
71 			if (c >= 0xff61 && c <= 0xff9f) {
72 				if (filt->status) {
73 					n = (filt->cache - 0xff60) & 0x3f;
74 					if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
75 						filt->status = 0;
76 						s = 0x3001 + hankana2zenkana_table[n];
77 					} else if (c == 0xff9e && n == 19) {
78 						filt->status = 0;
79 						s = 0x30f4;
80 					} else if (c == 0xff9f && (n >= 42 && n <= 46)) {
81 						filt->status = 0;
82 						s = 0x3002 + hankana2zenkana_table[n];
83 					} else {
84 						filt->status = 1;
85 						filt->cache = c;
86 						s = 0x3000 + hankana2zenkana_table[n];
87 					}
88 				} else {
89 					filt->status = 1;
90 					filt->cache = c;
91 					return c;
92 				}
93 			} else {
94 				if (filt->status) {
95 					n = (filt->cache - 0xff60) & 0x3f;
96 					filt->status = 0;
97 					(*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
98 				}
99 			}
100 		} else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) &&
101 				(mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) {
102 			/* hankaku kana to zenkaku hirangana and glue voiced sound mark */
103 			if (c >= 0xff61 && c <= 0xff9f) {
104 				if (filt->status) {
105 					n = (filt->cache - 0xff60) & 0x3f;
106 					if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) {
107 						filt->status = 0;
108 						s = 0x3001 + hankana2zenhira_table[n];
109 					} else if (c == 0xff9f && (n >= 42 && n <= 46)) {
110 						filt->status = 0;
111 						s = 0x3002 + hankana2zenhira_table[n];
112 					} else {
113 						filt->status = 1;
114 						filt->cache = c;
115 						s = 0x3000 + hankana2zenhira_table[n];
116 					}
117 				} else {
118 					filt->status = 1;
119 					filt->cache = c;
120 					return c;
121 				}
122 			} else {
123 				if (filt->status) {
124 					n = (filt->cache - 0xff60) & 0x3f;
125 					filt->status = 0;
126 					(*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
127 				}
128 			}
129 		} else if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) &&
130 				c >= 0xff61 && c <= 0xff9f) {
131 			/* hankaku kana to zenkaku katakana */
132 			s = 0x3000 + hankana2zenkana_table[c - 0xff60];
133 		} else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA)
134 				&& c >= 0xff61 && c <= 0xff9f) {
135 			/* hankaku kana to zenkaku hirangana */
136 			s = 0x3000 + hankana2zenhira_table[c - 0xff60];
137 		}
138 	}
139 
140 	if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT1) {
141 		/* special ascii to symbol */
142 		if (c == 0x5c) {
143 			s = 0xffe5;				/* FULLWIDTH YEN SIGN */
144 		} else if (c == 0xa5) {		/* YEN SIGN */
145 			s = 0xffe5;				/* FULLWIDTH YEN SIGN */
146 		} else if (c == 0x7e) {
147 			s = 0xffe3;				/* FULLWIDTH MACRON */
148 		} else if (c == 0x203e) {	/* OVERLINE */
149 			s = 0xffe3;				/* FULLWIDTH MACRON */
150 		} else if (c == 0x27) {
151 			s = 0x2019;				/* RIGHT SINGLE QUOTATION MARK */
152 		} else if (c == 0x22) {
153 			s = 0x201d;				/* RIGHT DOUBLE QUOTATION MARK */
154 		}
155 	} else if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT2) {
156 		/* special ascii to symbol */
157 		if (c == 0x5c) {
158 			s = 0xff3c;				/* FULLWIDTH REVERSE SOLIDUS */
159 		} else if (c == 0x7e) {
160 			s = 0xff5e;				/* FULLWIDTH TILDE */
161 		} else if (c == 0x27) {
162 			s = 0xff07;				/* FULLWIDTH APOSTROPHE */
163 		} else if (c == 0x22) {
164 			s = 0xff02;				/* FULLWIDTH QUOTATION MARK */
165 		}
166 	}
167 
168 	if (mode & 0xf0) { /* zenkaku to hankaku */
169 		if ((mode & 0x10) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) {	/* all except <"> <'> <\> <~> */
170 			s = c - 0xfee0;
171 		} else if ((mode & 0x20) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) {	/* alpha */
172 			s = c - 0xfee0;
173 		} else if ((mode & 0x40) && (c >= 0xff10 && c <= 0xff19)) {	/* num */
174 			s = c - 0xfee0;
175 		} else if ((mode & 0x80) && (c == 0x3000)) {	/* spase */
176 			s = 0x20;
177 		} else if ((mode & 0x10) && (c == 0x2212)) {	/* MINUS SIGN */
178 			s = 0x2d;
179 		}
180 	}
181 
182 	if (mode &
183 			(MBFL_FILT_TL_ZEN2HAN_KATAKANA | MBFL_FILT_TL_ZEN2HAN_HIRAGANA)) {
184 		/* Zenkaku kana to hankaku kana */
185 		if ((mode & MBFL_FILT_TL_ZEN2HAN_KATAKANA) &&
186 				c >= 0x30a1 && c <= 0x30f4) {
187 			/* Zenkaku katakana to hankaku kana */
188 			n = c - 0x30a1;
189 			if (zenkana2hankana_table[n][1] != 0) {
190 				(filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
191 				s = 0xff00 + zenkana2hankana_table[n][1];
192 			} else {
193 				s = 0xff00 + zenkana2hankana_table[n][0];
194 			}
195 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRAGANA) &&
196 				c >= 0x3041 && c <= 0x3093) {
197 			/* Zenkaku hirangana to hankaku kana */
198 			n = c - 0x3041;
199 			if (zenkana2hankana_table[n][1] != 0) {
200 				(filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data);
201 				s = 0xff00 + zenkana2hankana_table[n][1];
202 			} else {
203 				s = 0xff00 + zenkana2hankana_table[n][0];
204 			}
205 		} else if (c == 0x3001) {
206 			s = 0xff64;				/* HALFWIDTH IDEOGRAPHIC COMMA */
207 		} else if (c == 0x3002) {
208 			s = 0xff61;				/* HALFWIDTH IDEOGRAPHIC FULL STOP */
209 		} else if (c == 0x300c) {
210 			s = 0xff62;				/* HALFWIDTH LEFT CORNER BRACKET */
211 		} else if (c == 0x300d) {
212 			s = 0xff63;				/* HALFWIDTH RIGHT CORNER BRACKET */
213 		} else if (c == 0x309b) {
214 			s = 0xff9e;				/* HALFWIDTH KATAKANA VOICED SOUND MARK */
215 		} else if (c == 0x309c) {
216 			s = 0xff9f;				/* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
217 		} else if (c == 0x30fc) {
218 			s = 0xff70;				/* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
219 		} else if (c == 0x30fb) {
220 			s = 0xff65;				/* HALFWIDTH KATAKANA MIDDLE DOT */
221 		}
222 	} else if (mode & (MBFL_FILT_TL_ZEN2HAN_HIRA2KANA
223 			| MBFL_FILT_TL_ZEN2HAN_KANA2HIRA)) {
224 		if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRA2KANA) &&
225 				c >= 0x3041 && c <= 0x3093) {
226 			/* Zenkaku hirangana to Zenkaku katakana */
227 			s = c + 0x60;
228 		} else if ((mode & MBFL_FILT_TL_ZEN2HAN_KANA2HIRA) &&
229 				c >= 0x30a1 && c <= 0x30f3) {
230 			/* Zenkaku katakana to Zenkaku hirangana */
231 			s = c - 0x60;
232 		}
233 	}
234 
235 	if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT1) {	/* special symbol to ascii */
236 		if (c == 0xffe5) {			/* FULLWIDTH YEN SIGN */
237 			s = 0x5c;
238 		} else if (c == 0xff3c) {	/* FULLWIDTH REVERSE SOLIDUS */
239 			s = 0x5c;
240 		} else if (c == 0xffe3) {	/* FULLWIDTH MACRON */
241 			s = 0x7e;
242 		} else if (c == 0x203e) {	/* OVERLINE */
243 			s = 0x7e;
244 		} else if (c == 0x2018) {	/* LEFT SINGLE QUOTATION MARK*/
245 			s = 0x27;
246 		} else if (c == 0x2019) {	/* RIGHT SINGLE QUOTATION MARK */
247 			s = 0x27;
248 		} else if (c == 0x201c) {	/* LEFT DOUBLE QUOTATION MARK */
249 			s = 0x22;
250 		} else if (c == 0x201d) {	/* RIGHT DOUBLE QUOTATION MARK */
251 			s = 0x22;
252 		}
253 	}
254 
255 	if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT2) {	/* special symbol to ascii */
256 		if (c == 0xff3c) {			/* FULLWIDTH REVERSE SOLIDUS */
257 			s = 0x5c;
258 		} else if (c == 0xff5e) {	/* FULLWIDTH TILDE */
259 			s = 0x7e;
260 		} else if (c == 0xff07) {	/* FULLWIDTH APOSTROPHE */
261 			s = 0x27;
262 		} else if (c == 0xff02) {	/* FULLWIDTH QUOTATION MARK */
263 			s = 0x22;
264 		}
265 	}
266 
267 	return (*filt->output_function)(s, filt->data);
268 }
269 
270 int
mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter * filt)271 mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter *filt)
272 {
273 	int ret, n;
274 	int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode;
275 
276 	ret = 0;
277 	if (filt->status) {
278 		n = (filt->cache - 0xff60) & 0x3f;
279 		if (mode & 0x100) {	/* hankaku kana to zenkaku katakana */
280 			ret = (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data);
281 		} else if (mode & 0x200) {	/* hankaku kana to zenkaku hirangana */
282 			ret = (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data);
283 		}
284 		filt->status = 0;
285 	}
286 
287 	if (filt->flush_function != NULL) {
288 		return (*filt->flush_function)(filt->data);
289 	}
290 
291 	return ret;
292 }
293 
294 const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208 = {
295 	mbfl_no_encoding_wchar,
296 	mbfl_no_encoding_wchar,
297 	mbfl_filt_tl_jisx0201_jisx0208_init,
298 	mbfl_filt_tl_jisx0201_jisx0208_cleanup,
299 	mbfl_filt_tl_jisx0201_jisx0208,
300 	mbfl_filt_tl_jisx0201_jisx0208_flush
301 };
302 
303