1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_qprint.h"
32 #include "unicode_prop.h"
33 
34 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36 
37 static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
38 
39 const mbfl_encoding mbfl_encoding_qprint = {
40 	mbfl_no_encoding_qprint,
41 	"Quoted-Printable",
42 	"Quoted-Printable",
43 	mbfl_encoding_qprint_aliases,
44 	NULL,
45 	MBFL_ENCTYPE_GL_UNSAFE,
46 	NULL,
47 	NULL,
48 	mb_qprint_to_wchar,
49 	mb_wchar_to_qprint,
50 	NULL
51 };
52 
53 const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
54 	mbfl_no_encoding_8bit,
55 	mbfl_no_encoding_qprint,
56 	mbfl_filt_conv_common_ctor,
57 	NULL,
58 	mbfl_filt_conv_qprintenc,
59 	mbfl_filt_conv_qprintenc_flush,
60 	NULL,
61 };
62 
63 const struct mbfl_convert_vtbl vtbl_qprint_8bit = {
64 	mbfl_no_encoding_qprint,
65 	mbfl_no_encoding_8bit,
66 	mbfl_filt_conv_common_ctor,
67 	NULL,
68 	mbfl_filt_conv_qprintdec,
69 	mbfl_filt_conv_qprintdec_flush,
70 	NULL,
71 };
72 
73 
74 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
75 
76 /*
77  * any => Quoted-Printable
78  */
79 
mbfl_filt_conv_qprintenc(int c,mbfl_convert_filter * filter)80 int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
81 {
82 	int s, n;
83 
84 	switch (filter->status & 0xff) {
85 	case 0:
86 		filter->cache = c;
87 		filter->status++;
88 		break;
89 	default:
90 		s = filter->cache;
91 		filter->cache = c;
92 		n = (filter->status & 0xff00) >> 8;
93 
94 		if (s == 0) {		/* null */
95 			CK((*filter->output_function)(s, filter->data));
96 			filter->status &= ~0xff00;
97 			break;
98 		}
99 
100 		if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
101 			if (s == 0x0a || (s == 0x0d && c != 0x0a)) {	/* line feed */
102 				CK((*filter->output_function)(0x0d, filter->data));		/* CR */
103 				CK((*filter->output_function)(0x0a, filter->data));		/* LF */
104 				filter->status &= ~0xff00;
105 				break;
106 			} else if (s == 0x0d) {
107 				break;
108 			}
109 		}
110 
111 		if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0  && n >= 72) {	/* soft line feed */
112 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
113 			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
114 			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
115 			filter->status &= ~0xff00;
116 		}
117 
118 		if (s <= 0 || s >= 0x80 || s == 0x3d		/* not ASCII or '=' */
119 		   || ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) && mime_char_needs_qencode[s])) {
120 			/* hex-octet */
121 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
122 			n = (s >> 4) & 0xf;
123 			if (n < 10) {
124 				n += 48;		/* '0' */
125 			} else {
126 				n += 55;		/* 'A' - 10 */
127 			}
128 			CK((*filter->output_function)(n, filter->data));
129 			n = s & 0xf;
130 			if (n < 10) {
131 				n += 48;
132 			} else {
133 				n += 55;
134 			}
135 			CK((*filter->output_function)(n, filter->data));
136 			if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
137 				filter->status += 0x300;
138 			}
139 		} else {
140 			CK((*filter->output_function)(s, filter->data));
141 			if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
142 				filter->status += 0x100;
143 			}
144 		}
145 		break;
146 	}
147 
148 	return 0;
149 }
150 
mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter * filter)151 int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
152 {
153 	/* flush filter cache */
154 	(*filter->filter_function)('\0', filter);
155 	filter->status &= ~0xffff;
156 	filter->cache = 0;
157 
158 	if (filter->flush_function) {
159 		(*filter->flush_function)(filter->data);
160 	}
161 
162 	return 0;
163 }
164 
165 static int hex2code_map[] = {
166 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
170 	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
181 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
182 };
183 
184 /*
185  * Quoted-Printable => any
186  */
mbfl_filt_conv_qprintdec(int c,mbfl_convert_filter * filter)187 int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
188 {
189 	int n, m;
190 
191 	switch (filter->status) {
192 	case 1:
193 		if (hex2code_map[c & 0xff] >= 0) {
194 			filter->cache = c;
195 			filter->status = 2;
196 		} else if (c == 0x0d) {	/* soft line feed */
197 			filter->status = 3;
198 		} else if (c == 0x0a) {	/* soft line feed */
199 			filter->status = 0;
200 		} else {
201 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
202 			CK((*filter->output_function)(c, filter->data));
203 			filter->status = 0;
204 		}
205 		break;
206 	case 2:
207 		m = hex2code_map[c & 0xff];
208 		if (m < 0) {
209 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
210 			CK((*filter->output_function)(filter->cache, filter->data));
211 			n = c;
212 		} else {
213 			n = hex2code_map[filter->cache] << 4 | m;
214 		}
215 		CK((*filter->output_function)(n, filter->data));
216 		filter->status = 0;
217 		break;
218 	case 3:
219 		if (c != 0x0a) {		/* LF */
220 			CK((*filter->output_function)(c, filter->data));
221 		}
222 		filter->status = 0;
223 		break;
224 	default:
225 		if (c == 0x3d) {		/* '=' */
226 			filter->status = 1;
227 		} else {
228 			CK((*filter->output_function)(c, filter->data));
229 		}
230 		break;
231 	}
232 
233 	return 0;
234 }
235 
mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter * filter)236 int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
237 {
238 	int status, cache;
239 
240 	status = filter->status;
241 	cache = filter->cache;
242 	filter->status = 0;
243 	filter->cache = 0;
244 	/* flush fragments */
245 	if (status == 1) {
246 		CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
247 	} else if (status == 2) {
248 		CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
249 		CK((*filter->output_function)(cache, filter->data));
250 	}
251 
252 	if (filter->flush_function) {
253 		(*filter->flush_function)(filter->data);
254 	}
255 
256 	return 0;
257 }
258 
mb_qprint_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)259 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
260 {
261 	unsigned char *p = *in, *e = p + *in_len;
262 	uint32_t *out = buf, *limit = buf + bufsize - 2;
263 
264 	while (p < e && out < limit) {
265 		unsigned char c = *p++;
266 
267 		if (c == '=' && p < e) {
268 			unsigned char c2 = *p++;
269 
270 			if (hex2code_map[c2] >= 0 && p < e) {
271 				unsigned char c3 = *p++;
272 
273 				if (hex2code_map[c3] >= 0) {
274 					*out++ = hex2code_map[c2] << 4 | hex2code_map[c3];
275 				} else {
276 					*out++ = '=';
277 					*out++ = c2;
278 					*out++ = c3;
279 				}
280 			} else if (c2 == '\r' && p < e) {
281 				unsigned char c3 = *p++;
282 
283 				if (c3 != '\n') {
284 					*out++ = c3;
285 				}
286 			} else if (c2 != '\n') {
287 				*out++ = '=';
288 				*out++ = c2;
289 			}
290 		} else {
291 			*out++ = c;
292 		}
293 	}
294 
295 	*in_len = e - p;
296 	*in = p;
297 	return out - buf;
298 }
299 
qprint_enc_nibble(unsigned char nibble)300 static unsigned char qprint_enc_nibble(unsigned char nibble)
301 {
302 	if (nibble < 10) {
303 		return nibble + '0';
304 	} else {
305 		return nibble - 10 + 'A';
306 	}
307 }
308 
mb_wchar_to_qprint(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)309 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
310 {
311 	unsigned char *out, *limit;
312 	MB_CONVERT_BUF_LOAD(buf, out, limit);
313 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
314 
315 	unsigned int chars_output = buf->state;
316 
317 	while (len--) {
318 		/* We assume that all the input 'codepoints' are not really Unicode codepoints at all,
319 		 * but raw bytes from 0x00-0xFF */
320 		uint32_t w = *in++;
321 
322 		if (!w) {
323 			out = mb_convert_buf_add(out, '\0');
324 			chars_output = 0;
325 			continue;
326 		} else if (w == '\n') {
327 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
328 			out = mb_convert_buf_add2(out, '\r', '\n');
329 			chars_output = 0;
330 			continue;
331 		} else if (w == '\r') {
332 			/* No output */
333 			continue;
334 		}
335 
336 		/* QPrint actually mandates that line length should not be more than 76 characters,
337 		 * but mbstring stops slightly short of that */
338 		if (chars_output >= 72) {
339 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
340 			out = mb_convert_buf_add3(out, '=', '\r', '\n');
341 			chars_output = 0;
342 		}
343 
344 		if (w >= 0x80 || w == '=') {
345 			/* Not ASCII */
346 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
347 			out = mb_convert_buf_add3(out, '=', qprint_enc_nibble((w >> 4) & 0xF), qprint_enc_nibble(w & 0xF));
348 			chars_output += 3;
349 		} else {
350 			/* Plain ASCII */
351 			out = mb_convert_buf_add(out, w);
352 			chars_output++;
353 		}
354 	}
355 
356 	buf->state = chars_output;
357 	MB_CONVERT_BUF_STORE(buf, out, limit);
358 }
359