1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_qprint.h"
32 #include "unicode_prop.h"
33
34 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36
37 static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
38
39 const mbfl_encoding mbfl_encoding_qprint = {
40 mbfl_no_encoding_qprint,
41 "Quoted-Printable",
42 "Quoted-Printable",
43 mbfl_encoding_qprint_aliases,
44 NULL,
45 MBFL_ENCTYPE_GL_UNSAFE,
46 NULL,
47 NULL,
48 mb_qprint_to_wchar,
49 mb_wchar_to_qprint,
50 NULL
51 };
52
53 const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
54 mbfl_no_encoding_8bit,
55 mbfl_no_encoding_qprint,
56 mbfl_filt_conv_common_ctor,
57 NULL,
58 mbfl_filt_conv_qprintenc,
59 mbfl_filt_conv_qprintenc_flush,
60 NULL,
61 };
62
63 const struct mbfl_convert_vtbl vtbl_qprint_8bit = {
64 mbfl_no_encoding_qprint,
65 mbfl_no_encoding_8bit,
66 mbfl_filt_conv_common_ctor,
67 NULL,
68 mbfl_filt_conv_qprintdec,
69 mbfl_filt_conv_qprintdec_flush,
70 NULL,
71 };
72
73
74 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
75
76 /*
77 * any => Quoted-Printable
78 */
79
mbfl_filt_conv_qprintenc(int c,mbfl_convert_filter * filter)80 int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
81 {
82 int s, n;
83
84 switch (filter->status & 0xff) {
85 case 0:
86 filter->cache = c;
87 filter->status++;
88 break;
89 default:
90 s = filter->cache;
91 filter->cache = c;
92 n = (filter->status & 0xff00) >> 8;
93
94 if (s == 0) { /* null */
95 CK((*filter->output_function)(s, filter->data));
96 filter->status &= ~0xff00;
97 break;
98 }
99
100 if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
101 if (s == 0x0a || (s == 0x0d && c != 0x0a)) { /* line feed */
102 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
103 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
104 filter->status &= ~0xff00;
105 break;
106 } else if (s == 0x0d) {
107 break;
108 }
109 }
110
111 if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0 && n >= 72) { /* soft line feed */
112 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
113 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
114 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
115 filter->status &= ~0xff00;
116 }
117
118 if (s <= 0 || s >= 0x80 || s == 0x3d /* not ASCII or '=' */
119 || ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) && mime_char_needs_qencode[s])) {
120 /* hex-octet */
121 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
122 n = (s >> 4) & 0xf;
123 if (n < 10) {
124 n += 48; /* '0' */
125 } else {
126 n += 55; /* 'A' - 10 */
127 }
128 CK((*filter->output_function)(n, filter->data));
129 n = s & 0xf;
130 if (n < 10) {
131 n += 48;
132 } else {
133 n += 55;
134 }
135 CK((*filter->output_function)(n, filter->data));
136 if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
137 filter->status += 0x300;
138 }
139 } else {
140 CK((*filter->output_function)(s, filter->data));
141 if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
142 filter->status += 0x100;
143 }
144 }
145 break;
146 }
147
148 return 0;
149 }
150
mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter * filter)151 int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
152 {
153 /* flush filter cache */
154 (*filter->filter_function)('\0', filter);
155 filter->status &= ~0xffff;
156 filter->cache = 0;
157
158 if (filter->flush_function) {
159 (*filter->flush_function)(filter->data);
160 }
161
162 return 0;
163 }
164
165 static int hex2code_map[] = {
166 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
170 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
181 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
182 };
183
184 /*
185 * Quoted-Printable => any
186 */
mbfl_filt_conv_qprintdec(int c,mbfl_convert_filter * filter)187 int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
188 {
189 int n, m;
190
191 switch (filter->status) {
192 case 1:
193 if (hex2code_map[c & 0xff] >= 0) {
194 filter->cache = c;
195 filter->status = 2;
196 } else if (c == 0x0d) { /* soft line feed */
197 filter->status = 3;
198 } else if (c == 0x0a) { /* soft line feed */
199 filter->status = 0;
200 } else {
201 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
202 CK((*filter->output_function)(c, filter->data));
203 filter->status = 0;
204 }
205 break;
206 case 2:
207 m = hex2code_map[c & 0xff];
208 if (m < 0) {
209 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
210 CK((*filter->output_function)(filter->cache, filter->data));
211 n = c;
212 } else {
213 n = hex2code_map[filter->cache] << 4 | m;
214 }
215 CK((*filter->output_function)(n, filter->data));
216 filter->status = 0;
217 break;
218 case 3:
219 if (c != 0x0a) { /* LF */
220 CK((*filter->output_function)(c, filter->data));
221 }
222 filter->status = 0;
223 break;
224 default:
225 if (c == 0x3d) { /* '=' */
226 filter->status = 1;
227 } else {
228 CK((*filter->output_function)(c, filter->data));
229 }
230 break;
231 }
232
233 return 0;
234 }
235
mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter * filter)236 int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
237 {
238 int status, cache;
239
240 status = filter->status;
241 cache = filter->cache;
242 filter->status = 0;
243 filter->cache = 0;
244 /* flush fragments */
245 if (status == 1) {
246 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
247 } else if (status == 2) {
248 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
249 CK((*filter->output_function)(cache, filter->data));
250 }
251
252 if (filter->flush_function) {
253 (*filter->flush_function)(filter->data);
254 }
255
256 return 0;
257 }
258
mb_qprint_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)259 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
260 {
261 unsigned char *p = *in, *e = p + *in_len;
262 uint32_t *out = buf, *limit = buf + bufsize - 2;
263
264 while (p < e && out < limit) {
265 unsigned char c = *p++;
266
267 if (c == '=' && p < e) {
268 unsigned char c2 = *p++;
269
270 if (hex2code_map[c2] >= 0 && p < e) {
271 unsigned char c3 = *p++;
272
273 if (hex2code_map[c3] >= 0) {
274 *out++ = hex2code_map[c2] << 4 | hex2code_map[c3];
275 } else {
276 *out++ = '=';
277 *out++ = c2;
278 *out++ = c3;
279 }
280 } else if (c2 == '\r' && p < e) {
281 unsigned char c3 = *p++;
282
283 if (c3 != '\n') {
284 *out++ = c3;
285 }
286 } else if (c2 != '\n') {
287 *out++ = '=';
288 *out++ = c2;
289 }
290 } else {
291 *out++ = c;
292 }
293 }
294
295 *in_len = e - p;
296 *in = p;
297 return out - buf;
298 }
299
qprint_enc_nibble(unsigned char nibble)300 static unsigned char qprint_enc_nibble(unsigned char nibble)
301 {
302 if (nibble < 10) {
303 return nibble + '0';
304 } else {
305 return nibble - 10 + 'A';
306 }
307 }
308
mb_wchar_to_qprint(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)309 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
310 {
311 unsigned char *out, *limit;
312 MB_CONVERT_BUF_LOAD(buf, out, limit);
313 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
314
315 unsigned int chars_output = buf->state;
316
317 while (len--) {
318 /* We assume that all the input 'codepoints' are not really Unicode codepoints at all,
319 * but raw bytes from 0x00-0xFF */
320 uint32_t w = *in++;
321
322 if (!w) {
323 out = mb_convert_buf_add(out, '\0');
324 chars_output = 0;
325 continue;
326 } else if (w == '\n') {
327 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
328 out = mb_convert_buf_add2(out, '\r', '\n');
329 chars_output = 0;
330 continue;
331 } else if (w == '\r') {
332 /* No output */
333 continue;
334 }
335
336 /* QPrint actually mandates that line length should not be more than 76 characters,
337 * but mbstring stops slightly short of that */
338 if (chars_output >= 72) {
339 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
340 out = mb_convert_buf_add3(out, '=', '\r', '\n');
341 chars_output = 0;
342 }
343
344 if (w >= 0x80 || w == '=') {
345 /* Not ASCII */
346 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
347 out = mb_convert_buf_add3(out, '=', qprint_enc_nibble((w >> 4) & 0xF), qprint_enc_nibble(w & 0xF));
348 chars_output += 3;
349 } else {
350 /* Plain ASCII */
351 out = mb_convert_buf_add(out, w);
352 chars_output++;
353 }
354 }
355
356 buf->state = chars_output;
357 MB_CONVERT_BUF_STORE(buf, out, limit);
358 }
359