1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
27  * mbfilter.c is included in this package .
28  *
29  */
30 
31 #include <stddef.h>
32 
33 #include "mbfl_encoding.h"
34 #include "mbfl_filter_output.h"
35 #include "mbfilter_pass.h"
36 #include "mbfilter_8bit.h"
37 #include "mbfilter_wchar.h"
38 
39 #include "filters/mbfilter_euc_cn.h"
40 #include "filters/mbfilter_hz.h"
41 #include "filters/mbfilter_euc_tw.h"
42 #include "filters/mbfilter_big5.h"
43 #include "filters/mbfilter_uhc.h"
44 #include "filters/mbfilter_euc_kr.h"
45 #include "filters/mbfilter_iso2022_kr.h"
46 #include "filters/mbfilter_sjis.h"
47 #include "filters/mbfilter_sjis_2004.h"
48 #include "filters/mbfilter_sjis_mobile.h"
49 #include "filters/mbfilter_sjis_mac.h"
50 #include "filters/mbfilter_cp51932.h"
51 #include "filters/mbfilter_jis.h"
52 #include "filters/mbfilter_iso2022_jp_ms.h"
53 #include "filters/mbfilter_iso2022jp_2004.h"
54 #include "filters/mbfilter_iso2022jp_mobile.h"
55 #include "filters/mbfilter_euc_jp.h"
56 #include "filters/mbfilter_euc_jp_2004.h"
57 #include "filters/mbfilter_euc_jp_win.h"
58 #include "filters/mbfilter_gb18030.h"
59 #include "filters/mbfilter_cp932.h"
60 #include "filters/mbfilter_cp936.h"
61 #include "filters/mbfilter_cp5022x.h"
62 #include "filters/mbfilter_base64.h"
63 #include "filters/mbfilter_qprint.h"
64 #include "filters/mbfilter_uuencode.h"
65 #include "filters/mbfilter_7bit.h"
66 #include "filters/mbfilter_utf7.h"
67 #include "filters/mbfilter_utf7imap.h"
68 #include "filters/mbfilter_utf8.h"
69 #include "filters/mbfilter_utf8_mobile.h"
70 #include "filters/mbfilter_utf16.h"
71 #include "filters/mbfilter_utf32.h"
72 #include "filters/mbfilter_ucs4.h"
73 #include "filters/mbfilter_ucs2.h"
74 #include "filters/mbfilter_htmlent.h"
75 #include "filters/mbfilter_singlebyte.h"
76 
77 /* hex character table "0123456789ABCDEF" */
78 static char mbfl_hexchar_table[] = {
79 	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
80 };
81 
82 static const struct mbfl_convert_vtbl *mbfl_special_filter_list[] = {
83 	&vtbl_8bit_b64,
84 	&vtbl_b64_8bit,
85 	&vtbl_uuencode_8bit,
86 	&vtbl_8bit_qprint,
87 	&vtbl_qprint_8bit,
88 	&vtbl_8bit_7bit,
89 	&vtbl_7bit_8bit,
90 	&vtbl_pass,
91 	NULL
92 };
93 
mbfl_convert_filter_init(mbfl_convert_filter * filter,const mbfl_encoding * from,const mbfl_encoding * to,const struct mbfl_convert_vtbl * vtbl,output_function_t output_function,flush_function_t flush_function,void * data)94 static void mbfl_convert_filter_init(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to,
95 	const struct mbfl_convert_vtbl *vtbl, output_function_t output_function, flush_function_t flush_function, void* data)
96 {
97 	/* encoding structure */
98 	filter->from = from;
99 	filter->to = to;
100 
101 	if (output_function != NULL) {
102 		filter->output_function = output_function;
103 	} else {
104 		filter->output_function = mbfl_filter_output_null;
105 	}
106 
107 	filter->flush_function = flush_function;
108 	filter->data = data;
109 	filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
110 	filter->illegal_substchar = '?';
111 	filter->num_illegalchar = 0;
112 	filter->filter_dtor = vtbl->filter_dtor;
113 	filter->filter_function = vtbl->filter_function;
114 	filter->filter_flush = (filter_flush_t)vtbl->filter_flush;
115 	filter->filter_copy = vtbl->filter_copy;
116 
117 	(*vtbl->filter_ctor)(filter);
118 }
119 
mbfl_convert_filter_new(const mbfl_encoding * from,const mbfl_encoding * to,output_function_t output_function,flush_function_t flush_function,void * data)120 mbfl_convert_filter* mbfl_convert_filter_new(const mbfl_encoding *from, const mbfl_encoding *to, output_function_t output_function,
121 	flush_function_t flush_function, void* data)
122 {
123 	const struct mbfl_convert_vtbl *vtbl = mbfl_convert_filter_get_vtbl(from, to);
124 	if (vtbl == NULL) {
125 		return NULL;
126 	}
127 
128 	mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter));
129 	mbfl_convert_filter_init(filter, from, to, vtbl, output_function, flush_function, data);
130 	return filter;
131 }
132 
mbfl_convert_filter_new2(const struct mbfl_convert_vtbl * vtbl,output_function_t output_function,flush_function_t flush_function,void * data)133 mbfl_convert_filter* mbfl_convert_filter_new2(const struct mbfl_convert_vtbl *vtbl, output_function_t output_function,
134 	flush_function_t flush_function, void* data)
135 {
136 	const mbfl_encoding *from_encoding = mbfl_no2encoding(vtbl->from);
137 	const mbfl_encoding *to_encoding = mbfl_no2encoding(vtbl->to);
138 
139 	mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter));
140 	mbfl_convert_filter_init(filter, from_encoding, to_encoding, vtbl, output_function, flush_function, data);
141 	return filter;
142 }
143 
mbfl_convert_filter_delete(mbfl_convert_filter * filter)144 void mbfl_convert_filter_delete(mbfl_convert_filter *filter)
145 {
146 	if (filter->filter_dtor) {
147 		(*filter->filter_dtor)(filter);
148 	}
149 	efree(filter);
150 }
151 
152 /* Feed a char, return 0 if ok - used by mailparse ext */
mbfl_convert_filter_feed(int c,mbfl_convert_filter * filter)153 int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter)
154 {
155 	return (*filter->filter_function)(c, filter);
156 }
157 
158 /* Feed string into `filter` byte by byte; return pointer to first byte not processed */
mbfl_convert_filter_feed_string(mbfl_convert_filter * filter,unsigned char * p,size_t len)159 unsigned char* mbfl_convert_filter_feed_string(mbfl_convert_filter *filter, unsigned char *p, size_t len)
160 {
161 	while (len--) {
162 		if ((*filter->filter_function)(*p++, filter) < 0) {
163 			break;
164 		}
165 	}
166 	return p;
167 }
168 
mbfl_convert_filter_flush(mbfl_convert_filter * filter)169 int mbfl_convert_filter_flush(mbfl_convert_filter *filter)
170 {
171 	(*filter->filter_flush)(filter);
172 	return 0;
173 }
174 
mbfl_convert_filter_reset(mbfl_convert_filter * filter,const mbfl_encoding * from,const mbfl_encoding * to)175 void mbfl_convert_filter_reset(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to)
176 {
177 	if (filter->filter_dtor) {
178 		(*filter->filter_dtor)(filter);
179 	}
180 
181 	const struct mbfl_convert_vtbl *vtbl = mbfl_convert_filter_get_vtbl(from, to);
182 
183 	if (vtbl == NULL) {
184 		vtbl = &vtbl_pass;
185 	}
186 
187 	mbfl_convert_filter_init(filter, from, to, vtbl, filter->output_function, filter->flush_function, filter->data);
188 }
189 
mbfl_convert_filter_copy(mbfl_convert_filter * src,mbfl_convert_filter * dest)190 void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest)
191 {
192 	if (src->filter_copy != NULL) {
193 		src->filter_copy(src, dest);
194 		return;
195 	}
196 
197 	*dest = *src;
198 }
199 
mbfl_convert_filter_devcat(mbfl_convert_filter * filter,mbfl_memory_device * src)200 void mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src)
201 {
202 	mbfl_convert_filter_feed_string(filter, src->buffer, src->pos);
203 }
204 
mbfl_convert_filter_strcat(mbfl_convert_filter * filter,const unsigned char * p)205 int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p)
206 {
207 	int c;
208 	while ((c = *p++)) {
209 		if ((*filter->filter_function)(c, filter) < 0) {
210 			return -1;
211 		}
212 	}
213 
214 	return 0;
215 }
216 
mbfl_filt_conv_output_hex(unsigned int w,mbfl_convert_filter * filter)217 static int mbfl_filt_conv_output_hex(unsigned int w, mbfl_convert_filter *filter)
218 {
219 	bool nonzero = false;
220 	int shift = 28, ret = 0;
221 
222 	while (shift >= 0) {
223 		int n = (w >> shift) & 0xF;
224 		if (n || nonzero) {
225 			nonzero = true;
226 			ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
227 			if (ret < 0) {
228 				return ret;
229 			}
230 		}
231 		shift -= 4;
232 	}
233 
234 	if (!nonzero) {
235 		/* No hex digits were output by above loop */
236 		ret = (*filter->filter_function)('0', filter);
237 	}
238 
239 	return ret;
240 }
241 
242 /* illegal character output function for conv-filter */
mbfl_filt_conv_illegal_output(int c,mbfl_convert_filter * filter)243 int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
244 {
245 	unsigned int w = c;
246 	int ret = 0;
247 	int mode_backup = filter->illegal_mode;
248 	int substchar_backup = filter->illegal_substchar;
249 
250 	/* The used substitution character may not be supported by the target character encoding.
251 	 * If that happens, first try to use "?" instead and if that also fails, silently drop the
252 	 * character. */
253 	if (filter->illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR
254 			&& filter->illegal_substchar != '?') {
255 		filter->illegal_substchar = '?';
256 	} else {
257 		filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
258 	}
259 
260 	switch (mode_backup) {
261 	case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR:
262 		ret = (*filter->filter_function)(substchar_backup, filter);
263 		break;
264 
265 	case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
266 		if (w != MBFL_BAD_INPUT) {
267 			ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
268 			if (ret < 0)
269 				break;
270 			ret = mbfl_filt_conv_output_hex(w, filter);
271 		} else {
272 			ret = (*filter->filter_function)(substchar_backup, filter);
273 		}
274 		break;
275 
276 	case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
277 		if (w != MBFL_BAD_INPUT) {
278 			ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
279 			if (ret < 0)
280 				break;
281 			ret = mbfl_filt_conv_output_hex(w, filter);
282 			if (ret < 0)
283 				break;
284 			ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
285 		} else {
286 			ret = (*filter->filter_function)(substchar_backup, filter);
287 		}
288 		break;
289 
290 	case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE:
291 	default:
292 		break;
293 	}
294 
295 	filter->illegal_mode = mode_backup;
296 	filter->illegal_substchar = substchar_backup;
297 	filter->num_illegalchar++;
298 
299 	return ret;
300 }
301 
mbfl_convert_filter_get_vtbl(const mbfl_encoding * from,const mbfl_encoding * to)302 const struct mbfl_convert_vtbl* mbfl_convert_filter_get_vtbl(const mbfl_encoding *from, const mbfl_encoding *to)
303 {
304 	if (to->no_encoding == mbfl_no_encoding_base64 ||
305 	    to->no_encoding == mbfl_no_encoding_qprint ||
306 	    to->no_encoding == mbfl_no_encoding_7bit) {
307 		from = &mbfl_encoding_8bit;
308 	} else if (from->no_encoding == mbfl_no_encoding_base64 ||
309 			   from->no_encoding == mbfl_no_encoding_qprint ||
310 			   from->no_encoding == mbfl_no_encoding_uuencode ||
311 			   from->no_encoding == mbfl_no_encoding_7bit) {
312 		to = &mbfl_encoding_8bit;
313 	}
314 
315 	if (to == from && (to == &mbfl_encoding_wchar || to == &mbfl_encoding_8bit)) {
316 		return &vtbl_pass;
317 	}
318 
319 	if (to->no_encoding == mbfl_no_encoding_wchar) {
320 		return from->input_filter;
321 	} else if (from->no_encoding == mbfl_no_encoding_wchar) {
322 		return to->output_filter;
323 	} else {
324 		int i = 0;
325 		const struct mbfl_convert_vtbl *vtbl;
326 		while ((vtbl = mbfl_special_filter_list[i++])) {
327 			if (vtbl->from == from->no_encoding && vtbl->to == to->no_encoding) {
328 				return vtbl;
329 			}
330 		}
331 		return NULL;
332 	}
333 }
334 
335 /*
336  * commonly used constructor
337  */
mbfl_filt_conv_common_ctor(mbfl_convert_filter * filter)338 void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
339 {
340 	filter->status = filter->cache = 0;
341 }
342 
mbfl_filt_conv_common_flush(mbfl_convert_filter * filter)343 int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
344 {
345 	if (filter->flush_function) {
346 		(*filter->flush_function)(filter->data);
347 	}
348 	return 0;
349 }
350