1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter.c
26 * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
27 * mbfilter.c is included in this package .
28 *
29 */
30
31 #include <stddef.h>
32
33 #include "mbfl_encoding.h"
34 #include "mbfl_filter_output.h"
35 #include "mbfilter_pass.h"
36 #include "mbfilter_8bit.h"
37 #include "mbfilter_wchar.h"
38
39 #include "filters/mbfilter_euc_cn.h"
40 #include "filters/mbfilter_hz.h"
41 #include "filters/mbfilter_euc_tw.h"
42 #include "filters/mbfilter_big5.h"
43 #include "filters/mbfilter_uhc.h"
44 #include "filters/mbfilter_euc_kr.h"
45 #include "filters/mbfilter_iso2022_kr.h"
46 #include "filters/mbfilter_sjis.h"
47 #include "filters/mbfilter_sjis_2004.h"
48 #include "filters/mbfilter_sjis_mobile.h"
49 #include "filters/mbfilter_sjis_mac.h"
50 #include "filters/mbfilter_cp51932.h"
51 #include "filters/mbfilter_jis.h"
52 #include "filters/mbfilter_iso2022_jp_ms.h"
53 #include "filters/mbfilter_iso2022jp_2004.h"
54 #include "filters/mbfilter_iso2022jp_mobile.h"
55 #include "filters/mbfilter_euc_jp.h"
56 #include "filters/mbfilter_euc_jp_2004.h"
57 #include "filters/mbfilter_euc_jp_win.h"
58 #include "filters/mbfilter_gb18030.h"
59 #include "filters/mbfilter_cp932.h"
60 #include "filters/mbfilter_cp936.h"
61 #include "filters/mbfilter_cp5022x.h"
62 #include "filters/mbfilter_base64.h"
63 #include "filters/mbfilter_qprint.h"
64 #include "filters/mbfilter_uuencode.h"
65 #include "filters/mbfilter_7bit.h"
66 #include "filters/mbfilter_utf7.h"
67 #include "filters/mbfilter_utf7imap.h"
68 #include "filters/mbfilter_utf8.h"
69 #include "filters/mbfilter_utf8_mobile.h"
70 #include "filters/mbfilter_utf16.h"
71 #include "filters/mbfilter_utf32.h"
72 #include "filters/mbfilter_ucs4.h"
73 #include "filters/mbfilter_ucs2.h"
74 #include "filters/mbfilter_htmlent.h"
75 #include "filters/mbfilter_singlebyte.h"
76
77 /* hex character table "0123456789ABCDEF" */
78 static char mbfl_hexchar_table[] = {
79 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
80 };
81
82 static const struct mbfl_convert_vtbl *mbfl_special_filter_list[] = {
83 &vtbl_8bit_b64,
84 &vtbl_b64_8bit,
85 &vtbl_uuencode_8bit,
86 &vtbl_8bit_qprint,
87 &vtbl_qprint_8bit,
88 &vtbl_8bit_7bit,
89 &vtbl_7bit_8bit,
90 &vtbl_pass,
91 NULL
92 };
93
mbfl_convert_filter_init(mbfl_convert_filter * filter,const mbfl_encoding * from,const mbfl_encoding * to,const struct mbfl_convert_vtbl * vtbl,output_function_t output_function,flush_function_t flush_function,void * data)94 static void mbfl_convert_filter_init(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to,
95 const struct mbfl_convert_vtbl *vtbl, output_function_t output_function, flush_function_t flush_function, void* data)
96 {
97 /* encoding structure */
98 filter->from = from;
99 filter->to = to;
100
101 if (output_function != NULL) {
102 filter->output_function = output_function;
103 } else {
104 filter->output_function = mbfl_filter_output_null;
105 }
106
107 filter->flush_function = flush_function;
108 filter->data = data;
109 filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
110 filter->illegal_substchar = '?';
111 filter->num_illegalchar = 0;
112 filter->filter_dtor = vtbl->filter_dtor;
113 filter->filter_function = vtbl->filter_function;
114 filter->filter_flush = (filter_flush_t)vtbl->filter_flush;
115 filter->filter_copy = vtbl->filter_copy;
116
117 (*vtbl->filter_ctor)(filter);
118 }
119
mbfl_convert_filter_new(const mbfl_encoding * from,const mbfl_encoding * to,output_function_t output_function,flush_function_t flush_function,void * data)120 mbfl_convert_filter* mbfl_convert_filter_new(const mbfl_encoding *from, const mbfl_encoding *to, output_function_t output_function,
121 flush_function_t flush_function, void* data)
122 {
123 const struct mbfl_convert_vtbl *vtbl = mbfl_convert_filter_get_vtbl(from, to);
124 if (vtbl == NULL) {
125 return NULL;
126 }
127
128 mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter));
129 mbfl_convert_filter_init(filter, from, to, vtbl, output_function, flush_function, data);
130 return filter;
131 }
132
mbfl_convert_filter_new2(const struct mbfl_convert_vtbl * vtbl,output_function_t output_function,flush_function_t flush_function,void * data)133 mbfl_convert_filter* mbfl_convert_filter_new2(const struct mbfl_convert_vtbl *vtbl, output_function_t output_function,
134 flush_function_t flush_function, void* data)
135 {
136 const mbfl_encoding *from_encoding = mbfl_no2encoding(vtbl->from);
137 const mbfl_encoding *to_encoding = mbfl_no2encoding(vtbl->to);
138
139 mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter));
140 mbfl_convert_filter_init(filter, from_encoding, to_encoding, vtbl, output_function, flush_function, data);
141 return filter;
142 }
143
mbfl_convert_filter_delete(mbfl_convert_filter * filter)144 void mbfl_convert_filter_delete(mbfl_convert_filter *filter)
145 {
146 if (filter->filter_dtor) {
147 (*filter->filter_dtor)(filter);
148 }
149 efree(filter);
150 }
151
152 /* Feed a char, return 0 if ok - used by mailparse ext */
mbfl_convert_filter_feed(int c,mbfl_convert_filter * filter)153 int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter)
154 {
155 return (*filter->filter_function)(c, filter);
156 }
157
158 /* Feed string into `filter` byte by byte; return pointer to first byte not processed */
mbfl_convert_filter_feed_string(mbfl_convert_filter * filter,unsigned char * p,size_t len)159 unsigned char* mbfl_convert_filter_feed_string(mbfl_convert_filter *filter, unsigned char *p, size_t len)
160 {
161 while (len--) {
162 if ((*filter->filter_function)(*p++, filter) < 0) {
163 break;
164 }
165 }
166 return p;
167 }
168
mbfl_convert_filter_flush(mbfl_convert_filter * filter)169 int mbfl_convert_filter_flush(mbfl_convert_filter *filter)
170 {
171 (*filter->filter_flush)(filter);
172 return 0;
173 }
174
mbfl_convert_filter_reset(mbfl_convert_filter * filter,const mbfl_encoding * from,const mbfl_encoding * to)175 void mbfl_convert_filter_reset(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to)
176 {
177 if (filter->filter_dtor) {
178 (*filter->filter_dtor)(filter);
179 }
180
181 const struct mbfl_convert_vtbl *vtbl = mbfl_convert_filter_get_vtbl(from, to);
182
183 if (vtbl == NULL) {
184 vtbl = &vtbl_pass;
185 }
186
187 mbfl_convert_filter_init(filter, from, to, vtbl, filter->output_function, filter->flush_function, filter->data);
188 }
189
mbfl_convert_filter_copy(mbfl_convert_filter * src,mbfl_convert_filter * dest)190 void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest)
191 {
192 if (src->filter_copy != NULL) {
193 src->filter_copy(src, dest);
194 return;
195 }
196
197 *dest = *src;
198 }
199
mbfl_convert_filter_devcat(mbfl_convert_filter * filter,mbfl_memory_device * src)200 void mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src)
201 {
202 mbfl_convert_filter_feed_string(filter, src->buffer, src->pos);
203 }
204
mbfl_convert_filter_strcat(mbfl_convert_filter * filter,const unsigned char * p)205 int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p)
206 {
207 int c;
208 while ((c = *p++)) {
209 if ((*filter->filter_function)(c, filter) < 0) {
210 return -1;
211 }
212 }
213
214 return 0;
215 }
216
mbfl_filt_conv_output_hex(unsigned int w,mbfl_convert_filter * filter)217 static int mbfl_filt_conv_output_hex(unsigned int w, mbfl_convert_filter *filter)
218 {
219 bool nonzero = false;
220 int shift = 28, ret = 0;
221
222 while (shift >= 0) {
223 int n = (w >> shift) & 0xF;
224 if (n || nonzero) {
225 nonzero = true;
226 ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
227 if (ret < 0) {
228 return ret;
229 }
230 }
231 shift -= 4;
232 }
233
234 if (!nonzero) {
235 /* No hex digits were output by above loop */
236 ret = (*filter->filter_function)('0', filter);
237 }
238
239 return ret;
240 }
241
242 /* illegal character output function for conv-filter */
mbfl_filt_conv_illegal_output(int c,mbfl_convert_filter * filter)243 int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
244 {
245 unsigned int w = c;
246 int ret = 0;
247 int mode_backup = filter->illegal_mode;
248 int substchar_backup = filter->illegal_substchar;
249
250 /* The used substitution character may not be supported by the target character encoding.
251 * If that happens, first try to use "?" instead and if that also fails, silently drop the
252 * character. */
253 if (filter->illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR
254 && filter->illegal_substchar != '?') {
255 filter->illegal_substchar = '?';
256 } else {
257 filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
258 }
259
260 switch (mode_backup) {
261 case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR:
262 ret = (*filter->filter_function)(substchar_backup, filter);
263 break;
264
265 case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
266 if (w != MBFL_BAD_INPUT) {
267 ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
268 if (ret < 0)
269 break;
270 ret = mbfl_filt_conv_output_hex(w, filter);
271 } else {
272 ret = (*filter->filter_function)(substchar_backup, filter);
273 }
274 break;
275
276 case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
277 if (w != MBFL_BAD_INPUT) {
278 ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
279 if (ret < 0)
280 break;
281 ret = mbfl_filt_conv_output_hex(w, filter);
282 if (ret < 0)
283 break;
284 ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
285 } else {
286 ret = (*filter->filter_function)(substchar_backup, filter);
287 }
288 break;
289
290 case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE:
291 default:
292 break;
293 }
294
295 filter->illegal_mode = mode_backup;
296 filter->illegal_substchar = substchar_backup;
297 filter->num_illegalchar++;
298
299 return ret;
300 }
301
mbfl_convert_filter_get_vtbl(const mbfl_encoding * from,const mbfl_encoding * to)302 const struct mbfl_convert_vtbl* mbfl_convert_filter_get_vtbl(const mbfl_encoding *from, const mbfl_encoding *to)
303 {
304 if (to->no_encoding == mbfl_no_encoding_base64 ||
305 to->no_encoding == mbfl_no_encoding_qprint ||
306 to->no_encoding == mbfl_no_encoding_7bit) {
307 from = &mbfl_encoding_8bit;
308 } else if (from->no_encoding == mbfl_no_encoding_base64 ||
309 from->no_encoding == mbfl_no_encoding_qprint ||
310 from->no_encoding == mbfl_no_encoding_uuencode ||
311 from->no_encoding == mbfl_no_encoding_7bit) {
312 to = &mbfl_encoding_8bit;
313 }
314
315 if (to == from && (to == &mbfl_encoding_wchar || to == &mbfl_encoding_8bit)) {
316 return &vtbl_pass;
317 }
318
319 if (to->no_encoding == mbfl_no_encoding_wchar) {
320 return from->input_filter;
321 } else if (from->no_encoding == mbfl_no_encoding_wchar) {
322 return to->output_filter;
323 } else {
324 int i = 0;
325 const struct mbfl_convert_vtbl *vtbl;
326 while ((vtbl = mbfl_special_filter_list[i++])) {
327 if (vtbl->from == from->no_encoding && vtbl->to == to->no_encoding) {
328 return vtbl;
329 }
330 }
331 return NULL;
332 }
333 }
334
335 /*
336 * commonly used constructor
337 */
mbfl_filt_conv_common_ctor(mbfl_convert_filter * filter)338 void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
339 {
340 filter->status = filter->cache = 0;
341 }
342
mbfl_filt_conv_common_flush(mbfl_convert_filter * filter)343 int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
344 {
345 if (filter->flush_function) {
346 (*filter->flush_function)(filter->data);
347 }
348 return 0;
349 }
350