1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this file was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_qprint.h"
32 
33 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35 
36 static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
37 
38 const mbfl_encoding mbfl_encoding_qprint = {
39 	mbfl_no_encoding_qprint,
40 	"Quoted-Printable",
41 	"Quoted-Printable",
42 	mbfl_encoding_qprint_aliases,
43 	NULL,
44 	MBFL_ENCTYPE_GL_UNSAFE,
45 	NULL,
46 	NULL,
47 	mb_qprint_to_wchar,
48 	mb_wchar_to_qprint,
49 	NULL,
50 	NULL,
51 };
52 
53 const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
54 	mbfl_no_encoding_8bit,
55 	mbfl_no_encoding_qprint,
56 	mbfl_filt_conv_common_ctor,
57 	NULL,
58 	mbfl_filt_conv_qprintenc,
59 	mbfl_filt_conv_qprintenc_flush,
60 	NULL,
61 };
62 
63 const struct mbfl_convert_vtbl vtbl_qprint_8bit = {
64 	mbfl_no_encoding_qprint,
65 	mbfl_no_encoding_8bit,
66 	mbfl_filt_conv_common_ctor,
67 	NULL,
68 	mbfl_filt_conv_qprintdec,
69 	mbfl_filt_conv_qprintdec_flush,
70 	NULL,
71 };
72 
73 
74 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
75 
76 /*
77  * any => Quoted-Printable
78  */
79 
mbfl_filt_conv_qprintenc(int c,mbfl_convert_filter * filter)80 int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
81 {
82 	int s, n;
83 
84 	switch (filter->status & 0xff) {
85 	case 0:
86 		filter->cache = c;
87 		filter->status++;
88 		break;
89 	default:
90 		s = filter->cache;
91 		filter->cache = c;
92 		n = (filter->status & 0xff00) >> 8;
93 
94 		if (s == 0) {		/* null */
95 			CK((*filter->output_function)(s, filter->data));
96 			filter->status &= ~0xff00;
97 			break;
98 		}
99 
100 		if (s == '\n' || (s == '\r' && c != '\n')) {	/* line feed */
101 			CK((*filter->output_function)('\r', filter->data));
102 			CK((*filter->output_function)('\n', filter->data));
103 			filter->status &= ~0xff00;
104 			break;
105 		} else if (s == 0x0d) {
106 			break;
107 		}
108 
109 		if (n >= 72) {	/* soft line feed */
110 			CK((*filter->output_function)('=', filter->data));
111 			CK((*filter->output_function)('\r', filter->data));
112 			CK((*filter->output_function)('\n', filter->data));
113 			filter->status &= ~0xff00;
114 		}
115 
116 		if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */
117 			/* hex-octet */
118 			CK((*filter->output_function)('=', filter->data));
119 			n = (s >> 4) & 0xf;
120 			if (n < 10) {
121 				n += 48;		/* '0' */
122 			} else {
123 				n += 55;		/* 'A' - 10 */
124 			}
125 			CK((*filter->output_function)(n, filter->data));
126 			n = s & 0xf;
127 			if (n < 10) {
128 				n += 48;
129 			} else {
130 				n += 55;
131 			}
132 			CK((*filter->output_function)(n, filter->data));
133 			filter->status += 0x300;
134 		} else {
135 			CK((*filter->output_function)(s, filter->data));
136 			filter->status += 0x100;
137 		}
138 		break;
139 	}
140 
141 	return 0;
142 }
143 
mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter * filter)144 int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
145 {
146 	/* flush filter cache */
147 	(*filter->filter_function)('\0', filter);
148 	filter->status &= ~0xffff;
149 	filter->cache = 0;
150 
151 	if (filter->flush_function) {
152 		(*filter->flush_function)(filter->data);
153 	}
154 
155 	return 0;
156 }
157 
158 static int hex2code_map[] = {
159 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
160 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162 	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
163 	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165 	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
175 };
176 
177 /*
178  * Quoted-Printable => any
179  */
mbfl_filt_conv_qprintdec(int c,mbfl_convert_filter * filter)180 int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
181 {
182 	int n, m;
183 
184 	switch (filter->status) {
185 	case 1:
186 		if (hex2code_map[c & 0xff] >= 0) {
187 			filter->cache = c;
188 			filter->status = 2;
189 		} else if (c == 0x0d) {	/* soft line feed */
190 			filter->status = 3;
191 		} else if (c == 0x0a) {	/* soft line feed */
192 			filter->status = 0;
193 		} else {
194 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
195 			CK((*filter->output_function)(c, filter->data));
196 			filter->status = 0;
197 		}
198 		break;
199 	case 2:
200 		m = hex2code_map[c & 0xff];
201 		if (m < 0) {
202 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
203 			CK((*filter->output_function)(filter->cache, filter->data));
204 			n = c;
205 		} else {
206 			n = hex2code_map[filter->cache] << 4 | m;
207 		}
208 		CK((*filter->output_function)(n, filter->data));
209 		filter->status = 0;
210 		break;
211 	case 3:
212 		if (c != 0x0a) {		/* LF */
213 			CK((*filter->output_function)(c, filter->data));
214 		}
215 		filter->status = 0;
216 		break;
217 	default:
218 		if (c == 0x3d) {		/* '=' */
219 			filter->status = 1;
220 		} else {
221 			CK((*filter->output_function)(c, filter->data));
222 		}
223 		break;
224 	}
225 
226 	return 0;
227 }
228 
mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter * filter)229 int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
230 {
231 	int status, cache;
232 
233 	status = filter->status;
234 	cache = filter->cache;
235 	filter->status = 0;
236 	filter->cache = 0;
237 	/* flush fragments */
238 	if (status == 1) {
239 		CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
240 	} else if (status == 2) {
241 		CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
242 		CK((*filter->output_function)(cache, filter->data));
243 	}
244 
245 	if (filter->flush_function) {
246 		(*filter->flush_function)(filter->data);
247 	}
248 
249 	return 0;
250 }
251 
mb_qprint_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)252 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
253 {
254 	unsigned char *p = *in, *e = p + *in_len;
255 	uint32_t *out = buf, *limit = buf + bufsize - 2;
256 
257 	while (p < e && out < limit) {
258 		unsigned char c = *p++;
259 
260 		if (c == '=' && p < e) {
261 			unsigned char c2 = *p++;
262 
263 			if (hex2code_map[c2] >= 0 && p < e) {
264 				unsigned char c3 = *p++;
265 
266 				if (hex2code_map[c3] >= 0) {
267 					*out++ = hex2code_map[c2] << 4 | hex2code_map[c3];
268 				} else {
269 					*out++ = '=';
270 					*out++ = c2;
271 					*out++ = c3;
272 				}
273 			} else if (c2 == '\r' && p < e) {
274 				unsigned char c3 = *p++;
275 
276 				if (c3 != '\n') {
277 					*out++ = c3;
278 				}
279 			} else if (c2 != '\n') {
280 				*out++ = '=';
281 				*out++ = c2;
282 			}
283 		} else {
284 			*out++ = c;
285 		}
286 	}
287 
288 	*in_len = e - p;
289 	*in = p;
290 	return out - buf;
291 }
292 
qprint_enc_nibble(unsigned char nibble)293 static unsigned char qprint_enc_nibble(unsigned char nibble)
294 {
295 	if (nibble < 10) {
296 		return nibble + '0';
297 	} else {
298 		return nibble - 10 + 'A';
299 	}
300 }
301 
mb_wchar_to_qprint(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)302 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
303 {
304 	unsigned char *out, *limit;
305 	MB_CONVERT_BUF_LOAD(buf, out, limit);
306 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
307 
308 	unsigned int chars_output = buf->state;
309 
310 	while (len--) {
311 		/* We assume that all the input 'codepoints' are not really Unicode codepoints at all,
312 		 * but raw bytes from 0x00-0xFF */
313 		uint32_t w = *in++;
314 
315 		if (!w) {
316 			out = mb_convert_buf_add(out, '\0');
317 			chars_output = 0;
318 			continue;
319 		} else if (w == '\n') {
320 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
321 			out = mb_convert_buf_add2(out, '\r', '\n');
322 			chars_output = 0;
323 			continue;
324 		} else if (w == '\r') {
325 			/* No output */
326 			continue;
327 		}
328 
329 		/* QPrint actually mandates that line length should not be more than 76 characters,
330 		 * but mbstring stops slightly short of that */
331 		if (chars_output >= 72) {
332 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
333 			out = mb_convert_buf_add3(out, '=', '\r', '\n');
334 			chars_output = 0;
335 		}
336 
337 		if (w >= 0x80 || w == '=') {
338 			/* Not ASCII */
339 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
340 			out = mb_convert_buf_add3(out, '=', qprint_enc_nibble((w >> 4) & 0xF), qprint_enc_nibble(w & 0xF));
341 			chars_output += 3;
342 		} else {
343 			/* Plain ASCII */
344 			out = mb_convert_buf_add(out, w);
345 			chars_output++;
346 		}
347 	}
348 
349 	buf->state = chars_output;
350 	MB_CONVERT_BUF_STORE(buf, out, limit);
351 }
352