1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this file was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_qprint.h"
32 
33 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35 
36 static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
37 
38 const mbfl_encoding mbfl_encoding_qprint = {
39 	mbfl_no_encoding_qprint,
40 	"Quoted-Printable",
41 	"Quoted-Printable",
42 	mbfl_encoding_qprint_aliases,
43 	NULL,
44 	MBFL_ENCTYPE_GL_UNSAFE,
45 	NULL,
46 	NULL,
47 	mb_qprint_to_wchar,
48 	mb_wchar_to_qprint,
49 	NULL
50 };
51 
52 const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
53 	mbfl_no_encoding_8bit,
54 	mbfl_no_encoding_qprint,
55 	mbfl_filt_conv_common_ctor,
56 	NULL,
57 	mbfl_filt_conv_qprintenc,
58 	mbfl_filt_conv_qprintenc_flush,
59 	NULL,
60 };
61 
62 const struct mbfl_convert_vtbl vtbl_qprint_8bit = {
63 	mbfl_no_encoding_qprint,
64 	mbfl_no_encoding_8bit,
65 	mbfl_filt_conv_common_ctor,
66 	NULL,
67 	mbfl_filt_conv_qprintdec,
68 	mbfl_filt_conv_qprintdec_flush,
69 	NULL,
70 };
71 
72 
73 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
74 
75 /*
76  * any => Quoted-Printable
77  */
78 
mbfl_filt_conv_qprintenc(int c,mbfl_convert_filter * filter)79 int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
80 {
81 	int s, n;
82 
83 	switch (filter->status & 0xff) {
84 	case 0:
85 		filter->cache = c;
86 		filter->status++;
87 		break;
88 	default:
89 		s = filter->cache;
90 		filter->cache = c;
91 		n = (filter->status & 0xff00) >> 8;
92 
93 		if (s == 0) {		/* null */
94 			CK((*filter->output_function)(s, filter->data));
95 			filter->status &= ~0xff00;
96 			break;
97 		}
98 
99 		if (s == '\n' || (s == '\r' && c != '\n')) {	/* line feed */
100 			CK((*filter->output_function)('\r', filter->data));
101 			CK((*filter->output_function)('\n', filter->data));
102 			filter->status &= ~0xff00;
103 			break;
104 		} else if (s == 0x0d) {
105 			break;
106 		}
107 
108 		if (n >= 72) {	/* soft line feed */
109 			CK((*filter->output_function)('=', filter->data));
110 			CK((*filter->output_function)('\r', filter->data));
111 			CK((*filter->output_function)('\n', filter->data));
112 			filter->status &= ~0xff00;
113 		}
114 
115 		if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */
116 			/* hex-octet */
117 			CK((*filter->output_function)('=', filter->data));
118 			n = (s >> 4) & 0xf;
119 			if (n < 10) {
120 				n += 48;		/* '0' */
121 			} else {
122 				n += 55;		/* 'A' - 10 */
123 			}
124 			CK((*filter->output_function)(n, filter->data));
125 			n = s & 0xf;
126 			if (n < 10) {
127 				n += 48;
128 			} else {
129 				n += 55;
130 			}
131 			CK((*filter->output_function)(n, filter->data));
132 			filter->status += 0x300;
133 		} else {
134 			CK((*filter->output_function)(s, filter->data));
135 			filter->status += 0x100;
136 		}
137 		break;
138 	}
139 
140 	return 0;
141 }
142 
mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter * filter)143 int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
144 {
145 	/* flush filter cache */
146 	(*filter->filter_function)('\0', filter);
147 	filter->status &= ~0xffff;
148 	filter->cache = 0;
149 
150 	if (filter->flush_function) {
151 		(*filter->flush_function)(filter->data);
152 	}
153 
154 	return 0;
155 }
156 
157 static int hex2code_map[] = {
158 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
159 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
160 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161 	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
162 	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164 	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
174 };
175 
176 /*
177  * Quoted-Printable => any
178  */
mbfl_filt_conv_qprintdec(int c,mbfl_convert_filter * filter)179 int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
180 {
181 	int n, m;
182 
183 	switch (filter->status) {
184 	case 1:
185 		if (hex2code_map[c & 0xff] >= 0) {
186 			filter->cache = c;
187 			filter->status = 2;
188 		} else if (c == 0x0d) {	/* soft line feed */
189 			filter->status = 3;
190 		} else if (c == 0x0a) {	/* soft line feed */
191 			filter->status = 0;
192 		} else {
193 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
194 			CK((*filter->output_function)(c, filter->data));
195 			filter->status = 0;
196 		}
197 		break;
198 	case 2:
199 		m = hex2code_map[c & 0xff];
200 		if (m < 0) {
201 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
202 			CK((*filter->output_function)(filter->cache, filter->data));
203 			n = c;
204 		} else {
205 			n = hex2code_map[filter->cache] << 4 | m;
206 		}
207 		CK((*filter->output_function)(n, filter->data));
208 		filter->status = 0;
209 		break;
210 	case 3:
211 		if (c != 0x0a) {		/* LF */
212 			CK((*filter->output_function)(c, filter->data));
213 		}
214 		filter->status = 0;
215 		break;
216 	default:
217 		if (c == 0x3d) {		/* '=' */
218 			filter->status = 1;
219 		} else {
220 			CK((*filter->output_function)(c, filter->data));
221 		}
222 		break;
223 	}
224 
225 	return 0;
226 }
227 
mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter * filter)228 int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
229 {
230 	int status, cache;
231 
232 	status = filter->status;
233 	cache = filter->cache;
234 	filter->status = 0;
235 	filter->cache = 0;
236 	/* flush fragments */
237 	if (status == 1) {
238 		CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
239 	} else if (status == 2) {
240 		CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
241 		CK((*filter->output_function)(cache, filter->data));
242 	}
243 
244 	if (filter->flush_function) {
245 		(*filter->flush_function)(filter->data);
246 	}
247 
248 	return 0;
249 }
250 
mb_qprint_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)251 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
252 {
253 	unsigned char *p = *in, *e = p + *in_len;
254 	uint32_t *out = buf, *limit = buf + bufsize - 2;
255 
256 	while (p < e && out < limit) {
257 		unsigned char c = *p++;
258 
259 		if (c == '=' && p < e) {
260 			unsigned char c2 = *p++;
261 
262 			if (hex2code_map[c2] >= 0 && p < e) {
263 				unsigned char c3 = *p++;
264 
265 				if (hex2code_map[c3] >= 0) {
266 					*out++ = hex2code_map[c2] << 4 | hex2code_map[c3];
267 				} else {
268 					*out++ = '=';
269 					*out++ = c2;
270 					*out++ = c3;
271 				}
272 			} else if (c2 == '\r' && p < e) {
273 				unsigned char c3 = *p++;
274 
275 				if (c3 != '\n') {
276 					*out++ = c3;
277 				}
278 			} else if (c2 != '\n') {
279 				*out++ = '=';
280 				*out++ = c2;
281 			}
282 		} else {
283 			*out++ = c;
284 		}
285 	}
286 
287 	*in_len = e - p;
288 	*in = p;
289 	return out - buf;
290 }
291 
qprint_enc_nibble(unsigned char nibble)292 static unsigned char qprint_enc_nibble(unsigned char nibble)
293 {
294 	if (nibble < 10) {
295 		return nibble + '0';
296 	} else {
297 		return nibble - 10 + 'A';
298 	}
299 }
300 
mb_wchar_to_qprint(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)301 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
302 {
303 	unsigned char *out, *limit;
304 	MB_CONVERT_BUF_LOAD(buf, out, limit);
305 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
306 
307 	unsigned int chars_output = buf->state;
308 
309 	while (len--) {
310 		/* We assume that all the input 'codepoints' are not really Unicode codepoints at all,
311 		 * but raw bytes from 0x00-0xFF */
312 		uint32_t w = *in++;
313 
314 		if (!w) {
315 			out = mb_convert_buf_add(out, '\0');
316 			chars_output = 0;
317 			continue;
318 		} else if (w == '\n') {
319 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
320 			out = mb_convert_buf_add2(out, '\r', '\n');
321 			chars_output = 0;
322 			continue;
323 		} else if (w == '\r') {
324 			/* No output */
325 			continue;
326 		}
327 
328 		/* QPrint actually mandates that line length should not be more than 76 characters,
329 		 * but mbstring stops slightly short of that */
330 		if (chars_output >= 72) {
331 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
332 			out = mb_convert_buf_add3(out, '=', '\r', '\n');
333 			chars_output = 0;
334 		}
335 
336 		if (w >= 0x80 || w == '=') {
337 			/* Not ASCII */
338 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
339 			out = mb_convert_buf_add3(out, '=', qprint_enc_nibble((w >> 4) & 0xF), qprint_enc_nibble(w & 0xF));
340 			chars_output += 3;
341 		} else {
342 			/* Plain ASCII */
343 			out = mb_convert_buf_add(out, w);
344 			chars_output++;
345 		}
346 	}
347 
348 	buf->state = chars_output;
349 	MB_CONVERT_BUF_STORE(buf, out, limit);
350 }
351