1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this file was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_qprint.h"
32
33 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35
36 static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
37
38 const mbfl_encoding mbfl_encoding_qprint = {
39 mbfl_no_encoding_qprint,
40 "Quoted-Printable",
41 "Quoted-Printable",
42 mbfl_encoding_qprint_aliases,
43 NULL,
44 MBFL_ENCTYPE_GL_UNSAFE,
45 NULL,
46 NULL,
47 mb_qprint_to_wchar,
48 mb_wchar_to_qprint,
49 NULL,
50 NULL,
51 };
52
53 const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
54 mbfl_no_encoding_8bit,
55 mbfl_no_encoding_qprint,
56 mbfl_filt_conv_common_ctor,
57 NULL,
58 mbfl_filt_conv_qprintenc,
59 mbfl_filt_conv_qprintenc_flush,
60 NULL,
61 };
62
63 const struct mbfl_convert_vtbl vtbl_qprint_8bit = {
64 mbfl_no_encoding_qprint,
65 mbfl_no_encoding_8bit,
66 mbfl_filt_conv_common_ctor,
67 NULL,
68 mbfl_filt_conv_qprintdec,
69 mbfl_filt_conv_qprintdec_flush,
70 NULL,
71 };
72
73
74 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
75
76 /*
77 * any => Quoted-Printable
78 */
79
mbfl_filt_conv_qprintenc(int c,mbfl_convert_filter * filter)80 int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
81 {
82 int s, n;
83
84 switch (filter->status & 0xff) {
85 case 0:
86 filter->cache = c;
87 filter->status++;
88 break;
89 default:
90 s = filter->cache;
91 filter->cache = c;
92 n = (filter->status & 0xff00) >> 8;
93
94 if (s == 0) { /* null */
95 CK((*filter->output_function)(s, filter->data));
96 filter->status &= ~0xff00;
97 break;
98 }
99
100 if (s == '\n' || (s == '\r' && c != '\n')) { /* line feed */
101 CK((*filter->output_function)('\r', filter->data));
102 CK((*filter->output_function)('\n', filter->data));
103 filter->status &= ~0xff00;
104 break;
105 } else if (s == 0x0d) {
106 break;
107 }
108
109 if (n >= 72) { /* soft line feed */
110 CK((*filter->output_function)('=', filter->data));
111 CK((*filter->output_function)('\r', filter->data));
112 CK((*filter->output_function)('\n', filter->data));
113 filter->status &= ~0xff00;
114 }
115
116 if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */
117 /* hex-octet */
118 CK((*filter->output_function)('=', filter->data));
119 n = (s >> 4) & 0xf;
120 if (n < 10) {
121 n += 48; /* '0' */
122 } else {
123 n += 55; /* 'A' - 10 */
124 }
125 CK((*filter->output_function)(n, filter->data));
126 n = s & 0xf;
127 if (n < 10) {
128 n += 48;
129 } else {
130 n += 55;
131 }
132 CK((*filter->output_function)(n, filter->data));
133 filter->status += 0x300;
134 } else {
135 CK((*filter->output_function)(s, filter->data));
136 filter->status += 0x100;
137 }
138 break;
139 }
140
141 return 0;
142 }
143
mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter * filter)144 int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
145 {
146 /* flush filter cache */
147 (*filter->filter_function)('\0', filter);
148 filter->status &= ~0xffff;
149 filter->cache = 0;
150
151 if (filter->flush_function) {
152 (*filter->flush_function)(filter->data);
153 }
154
155 return 0;
156 }
157
158 static int hex2code_map[] = {
159 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
160 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
163 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
175 };
176
177 /*
178 * Quoted-Printable => any
179 */
mbfl_filt_conv_qprintdec(int c,mbfl_convert_filter * filter)180 int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
181 {
182 int n, m;
183
184 switch (filter->status) {
185 case 1:
186 if (hex2code_map[c & 0xff] >= 0) {
187 filter->cache = c;
188 filter->status = 2;
189 } else if (c == 0x0d) { /* soft line feed */
190 filter->status = 3;
191 } else if (c == 0x0a) { /* soft line feed */
192 filter->status = 0;
193 } else {
194 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
195 CK((*filter->output_function)(c, filter->data));
196 filter->status = 0;
197 }
198 break;
199 case 2:
200 m = hex2code_map[c & 0xff];
201 if (m < 0) {
202 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
203 CK((*filter->output_function)(filter->cache, filter->data));
204 n = c;
205 } else {
206 n = hex2code_map[filter->cache] << 4 | m;
207 }
208 CK((*filter->output_function)(n, filter->data));
209 filter->status = 0;
210 break;
211 case 3:
212 if (c != 0x0a) { /* LF */
213 CK((*filter->output_function)(c, filter->data));
214 }
215 filter->status = 0;
216 break;
217 default:
218 if (c == 0x3d) { /* '=' */
219 filter->status = 1;
220 } else {
221 CK((*filter->output_function)(c, filter->data));
222 }
223 break;
224 }
225
226 return 0;
227 }
228
mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter * filter)229 int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
230 {
231 int status, cache;
232
233 status = filter->status;
234 cache = filter->cache;
235 filter->status = 0;
236 filter->cache = 0;
237 /* flush fragments */
238 if (status == 1) {
239 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
240 } else if (status == 2) {
241 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
242 CK((*filter->output_function)(cache, filter->data));
243 }
244
245 if (filter->flush_function) {
246 (*filter->flush_function)(filter->data);
247 }
248
249 return 0;
250 }
251
mb_qprint_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)252 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
253 {
254 unsigned char *p = *in, *e = p + *in_len;
255 uint32_t *out = buf, *limit = buf + bufsize - 2;
256
257 while (p < e && out < limit) {
258 unsigned char c = *p++;
259
260 if (c == '=' && p < e) {
261 unsigned char c2 = *p++;
262
263 if (hex2code_map[c2] >= 0 && p < e) {
264 unsigned char c3 = *p++;
265
266 if (hex2code_map[c3] >= 0) {
267 *out++ = hex2code_map[c2] << 4 | hex2code_map[c3];
268 } else {
269 *out++ = '=';
270 *out++ = c2;
271 *out++ = c3;
272 }
273 } else if (c2 == '\r' && p < e) {
274 unsigned char c3 = *p++;
275
276 if (c3 != '\n') {
277 *out++ = c3;
278 }
279 } else if (c2 != '\n') {
280 *out++ = '=';
281 *out++ = c2;
282 }
283 } else {
284 *out++ = c;
285 }
286 }
287
288 *in_len = e - p;
289 *in = p;
290 return out - buf;
291 }
292
qprint_enc_nibble(unsigned char nibble)293 static unsigned char qprint_enc_nibble(unsigned char nibble)
294 {
295 if (nibble < 10) {
296 return nibble + '0';
297 } else {
298 return nibble - 10 + 'A';
299 }
300 }
301
mb_wchar_to_qprint(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)302 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
303 {
304 unsigned char *out, *limit;
305 MB_CONVERT_BUF_LOAD(buf, out, limit);
306 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
307
308 unsigned int chars_output = buf->state;
309
310 while (len--) {
311 /* We assume that all the input 'codepoints' are not really Unicode codepoints at all,
312 * but raw bytes from 0x00-0xFF */
313 uint32_t w = *in++;
314
315 if (!w) {
316 out = mb_convert_buf_add(out, '\0');
317 chars_output = 0;
318 continue;
319 } else if (w == '\n') {
320 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
321 out = mb_convert_buf_add2(out, '\r', '\n');
322 chars_output = 0;
323 continue;
324 } else if (w == '\r') {
325 /* No output */
326 continue;
327 }
328
329 /* QPrint actually mandates that line length should not be more than 76 characters,
330 * but mbstring stops slightly short of that */
331 if (chars_output >= 72) {
332 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
333 out = mb_convert_buf_add3(out, '=', '\r', '\n');
334 chars_output = 0;
335 }
336
337 if (w >= 0x80 || w == '=') {
338 /* Not ASCII */
339 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
340 out = mb_convert_buf_add3(out, '=', qprint_enc_nibble((w >> 4) & 0xF), qprint_enc_nibble(w & 0xF));
341 chars_output += 3;
342 } else {
343 /* Plain ASCII */
344 out = mb_convert_buf_add(out, w);
345 chars_output++;
346 }
347 }
348
349 buf->state = chars_output;
350 MB_CONVERT_BUF_STORE(buf, out, limit);
351 }
352