1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter_kr.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 /* ISO-2022-KR is defined in RFC 1557
31  * The RFC says that _each_ line which uses KS X 1001 characters must start
32  * with an escape sequence of ESC $ ) C
33  * We don't enforce that for ISO-2022-KR input */
34 
35 #include "mbfilter.h"
36 #include "mbfilter_iso2022_kr.h"
37 #include "unicode_table_uhc.h"
38 
39 static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter);
40 static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter);
41 
42 const mbfl_encoding mbfl_encoding_2022kr = {
43 	mbfl_no_encoding_2022kr,
44 	"ISO-2022-KR",
45 	"ISO-2022-KR",
46 	NULL,
47 	NULL,
48 	MBFL_ENCTYPE_GL_UNSAFE,
49 	&vtbl_2022kr_wchar,
50 	&vtbl_wchar_2022kr,
51 	NULL
52 };
53 
54 const struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
55 	mbfl_no_encoding_wchar,
56 	mbfl_no_encoding_2022kr,
57 	mbfl_filt_conv_common_ctor,
58 	NULL,
59 	mbfl_filt_conv_wchar_2022kr,
60 	mbfl_filt_conv_any_2022kr_flush,
61 	NULL,
62 };
63 
64 const struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
65 	mbfl_no_encoding_2022kr,
66 	mbfl_no_encoding_wchar,
67 	mbfl_filt_conv_common_ctor,
68 	NULL,
69 	mbfl_filt_conv_2022kr_wchar,
70 	mbfl_filt_conv_2022kr_wchar_flush,
71 	NULL,
72 };
73 
74 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
75 
mbfl_filt_conv_2022kr_wchar(int c,mbfl_convert_filter * filter)76 int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
77 {
78 	int w = 0;
79 
80 	switch (filter->status & 0xf) {
81 	/* case 0x00: ASCII */
82 	/* case 0x10: KSC5601 */
83 	case 0:
84 		if (c == 0x1b) { /* ESC */
85 			filter->status += 2;
86 		} else if (c == 0x0f) { /* shift in (ASCII) */
87 			filter->status = 0;
88 		} else if (c == 0x0e) { /* shift out (KSC5601) */
89 			filter->status = 0x10;
90 		} else if ((filter->status & 0x10) && c > 0x20 && c < 0x7f) {
91 			/* KSC5601 lead byte */
92 			filter->cache = c;
93 			filter->status = 0x11;
94 		} else if ((filter->status & 0x10) == 0 && c >= 0 && c < 0x80) {
95 			/* latin, CTLs */
96 			CK((*filter->output_function)(c, filter->data));
97 		} else {
98 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
99 		}
100 		break;
101 
102 	case 1: /* dbcs second byte */
103 		filter->status = 0x10;
104 		int c1 = filter->cache;
105 		int flag = 0;
106 
107 		if (c1 > 0x20 && c1 < 0x47) {
108 			flag = 1;
109 		} else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) {
110 			flag = 2;
111 		}
112 
113 		if (flag > 0 && c > 0x20 && c < 0x7f) {
114 			if (flag == 1) {
115 				if (c1 != 0x22 || c <= 0x65) {
116 					w = (c1 - 0x21)*190 + (c - 0x41) + 0x80;
117 					if (w >= 0 && w < uhc2_ucs_table_size) {
118 						w = uhc2_ucs_table[w];
119 					} else {
120 						w = 0;
121 					}
122 				}
123 			} else {
124 				w = (c1 - 0x47)*94 + (c - 0x21);
125 				if (w >= 0 && w < uhc3_ucs_table_size) {
126 					w = uhc3_ucs_table[w];
127 				} else {
128 					w = 0;
129 				}
130 			}
131 
132 			if (w <= 0) {
133 				w = MBFL_BAD_INPUT;
134 			}
135 			CK((*filter->output_function)(w, filter->data));
136 		} else {
137 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
138 		}
139 		break;
140 
141 	case 2: /* ESC */
142 		if (c == '$') {
143 			filter->status++;
144 		} else {
145 			filter->status = 0;
146 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
147 		}
148 		break;
149 
150 	case 3: /* ESC $ */
151 		if (c == ')') {
152 			filter->status++;
153 		} else {
154 			filter->status = 0;
155 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
156 		}
157 		break;
158 
159 	case 4: /* ESC $ ) */
160 		filter->status = 0;
161 		if (c != 'C') {
162 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
163 		}
164 		break;
165 
166 	default:
167 		filter->status = 0;
168 		break;
169 	}
170 
171 	return 0;
172 }
173 
mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter * filter)174 static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter)
175 {
176 	if (filter->status & 0xF) {
177 		/* 2-byte character was truncated */
178 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
179 	}
180 	filter->status = 0;
181 
182 	if (filter->flush_function) {
183 		(*filter->flush_function)(filter->data);
184 	}
185 
186 	return 0;
187 }
188 
mbfl_filt_conv_wchar_2022kr(int c,mbfl_convert_filter * filter)189 int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
190 {
191 	int c1, c2, s = 0;
192 
193 	if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
194 		s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
195 	} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
196 		s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
197 	} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
198 		s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
199 	} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
200 		s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
201 	} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
202 		s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
203 	} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
204 		s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
205 	} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
206 		s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
207 	}
208 
209 	c1 = (s >> 8) & 0xff;
210 	c2 = s & 0xff;
211 	/* exclude UHC extension area */
212 	if (c1 < 0xa1 || c2 < 0xa1) {
213 		s = c;
214 	}
215 
216 	if (s & 0x8000) {
217 		s -= 0x8080;
218 	}
219 
220 	if (s <= 0) {
221 		if (c == 0) {
222 			s = 0;
223 		} else {
224 			s = -1;
225 		}
226 	} else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
227 		s = -1;
228 	}
229 
230 	if (s >= 0) {
231 		if (s < 0x80 && s >= 0) { /* ASCII */
232 			if (filter->status & 0x10) {
233 				CK((*filter->output_function)(0x0f, filter->data)); /* shift in */
234 				filter->status &= ~0x10;
235 			}
236 			CK((*filter->output_function)(s, filter->data));
237 		} else {
238 			if ((filter->status & 0x100) == 0) {
239 				CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
240 				CK((*filter->output_function)('$', filter->data));
241 				CK((*filter->output_function)(')', filter->data));
242 				CK((*filter->output_function)('C', filter->data));
243 				filter->status |= 0x100;
244 			}
245 			if ((filter->status & 0x10) == 0) {
246 				CK((*filter->output_function)(0x0e, filter->data)); /* shift out */
247 				filter->status |= 0x10;
248 			}
249 			CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
250 			CK((*filter->output_function)(s & 0xff, filter->data));
251 		}
252 	} else {
253 		CK(mbfl_filt_conv_illegal_output(c, filter));
254 	}
255 
256 	return 0;
257 }
258 
mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter * filter)259 static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
260 {
261 	/* back to ascii */
262 	if (filter->status & 0xff00) {
263 		CK((*filter->output_function)(0x0f, filter->data)); /* shift in */
264 	}
265 
266 	filter->status = filter->cache = 0;
267 
268 	if (filter->flush_function) {
269 		return (*filter->flush_function)(filter->data);
270 	}
271 
272 	return 0;
273 }
274