1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this file was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_qprint.h"
32
33 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35
36 static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
37
38 const mbfl_encoding mbfl_encoding_qprint = {
39 mbfl_no_encoding_qprint,
40 "Quoted-Printable",
41 "Quoted-Printable",
42 mbfl_encoding_qprint_aliases,
43 NULL,
44 MBFL_ENCTYPE_GL_UNSAFE,
45 NULL,
46 NULL,
47 mb_qprint_to_wchar,
48 mb_wchar_to_qprint,
49 NULL
50 };
51
52 const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
53 mbfl_no_encoding_8bit,
54 mbfl_no_encoding_qprint,
55 mbfl_filt_conv_common_ctor,
56 NULL,
57 mbfl_filt_conv_qprintenc,
58 mbfl_filt_conv_qprintenc_flush,
59 NULL,
60 };
61
62 const struct mbfl_convert_vtbl vtbl_qprint_8bit = {
63 mbfl_no_encoding_qprint,
64 mbfl_no_encoding_8bit,
65 mbfl_filt_conv_common_ctor,
66 NULL,
67 mbfl_filt_conv_qprintdec,
68 mbfl_filt_conv_qprintdec_flush,
69 NULL,
70 };
71
72
73 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
74
75 /*
76 * any => Quoted-Printable
77 */
78
mbfl_filt_conv_qprintenc(int c,mbfl_convert_filter * filter)79 int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
80 {
81 int s, n;
82
83 switch (filter->status & 0xff) {
84 case 0:
85 filter->cache = c;
86 filter->status++;
87 break;
88 default:
89 s = filter->cache;
90 filter->cache = c;
91 n = (filter->status & 0xff00) >> 8;
92
93 if (s == 0) { /* null */
94 CK((*filter->output_function)(s, filter->data));
95 filter->status &= ~0xff00;
96 break;
97 }
98
99 if (s == '\n' || (s == '\r' && c != '\n')) { /* line feed */
100 CK((*filter->output_function)('\r', filter->data));
101 CK((*filter->output_function)('\n', filter->data));
102 filter->status &= ~0xff00;
103 break;
104 } else if (s == 0x0d) {
105 break;
106 }
107
108 if (n >= 72) { /* soft line feed */
109 CK((*filter->output_function)('=', filter->data));
110 CK((*filter->output_function)('\r', filter->data));
111 CK((*filter->output_function)('\n', filter->data));
112 filter->status &= ~0xff00;
113 }
114
115 if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */
116 /* hex-octet */
117 CK((*filter->output_function)('=', filter->data));
118 n = (s >> 4) & 0xf;
119 if (n < 10) {
120 n += 48; /* '0' */
121 } else {
122 n += 55; /* 'A' - 10 */
123 }
124 CK((*filter->output_function)(n, filter->data));
125 n = s & 0xf;
126 if (n < 10) {
127 n += 48;
128 } else {
129 n += 55;
130 }
131 CK((*filter->output_function)(n, filter->data));
132 filter->status += 0x300;
133 } else {
134 CK((*filter->output_function)(s, filter->data));
135 filter->status += 0x100;
136 }
137 break;
138 }
139
140 return 0;
141 }
142
mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter * filter)143 int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
144 {
145 /* flush filter cache */
146 (*filter->filter_function)('\0', filter);
147 filter->status &= ~0xffff;
148 filter->cache = 0;
149
150 if (filter->flush_function) {
151 (*filter->flush_function)(filter->data);
152 }
153
154 return 0;
155 }
156
157 static int hex2code_map[] = {
158 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
159 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
160 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
162 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
174 };
175
176 /*
177 * Quoted-Printable => any
178 */
mbfl_filt_conv_qprintdec(int c,mbfl_convert_filter * filter)179 int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
180 {
181 int n, m;
182
183 switch (filter->status) {
184 case 1:
185 if (hex2code_map[c & 0xff] >= 0) {
186 filter->cache = c;
187 filter->status = 2;
188 } else if (c == 0x0d) { /* soft line feed */
189 filter->status = 3;
190 } else if (c == 0x0a) { /* soft line feed */
191 filter->status = 0;
192 } else {
193 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
194 CK((*filter->output_function)(c, filter->data));
195 filter->status = 0;
196 }
197 break;
198 case 2:
199 m = hex2code_map[c & 0xff];
200 if (m < 0) {
201 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
202 CK((*filter->output_function)(filter->cache, filter->data));
203 n = c;
204 } else {
205 n = hex2code_map[filter->cache] << 4 | m;
206 }
207 CK((*filter->output_function)(n, filter->data));
208 filter->status = 0;
209 break;
210 case 3:
211 if (c != 0x0a) { /* LF */
212 CK((*filter->output_function)(c, filter->data));
213 }
214 filter->status = 0;
215 break;
216 default:
217 if (c == 0x3d) { /* '=' */
218 filter->status = 1;
219 } else {
220 CK((*filter->output_function)(c, filter->data));
221 }
222 break;
223 }
224
225 return 0;
226 }
227
mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter * filter)228 int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
229 {
230 int status, cache;
231
232 status = filter->status;
233 cache = filter->cache;
234 filter->status = 0;
235 filter->cache = 0;
236 /* flush fragments */
237 if (status == 1) {
238 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
239 } else if (status == 2) {
240 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
241 CK((*filter->output_function)(cache, filter->data));
242 }
243
244 if (filter->flush_function) {
245 (*filter->flush_function)(filter->data);
246 }
247
248 return 0;
249 }
250
mb_qprint_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)251 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
252 {
253 unsigned char *p = *in, *e = p + *in_len;
254 uint32_t *out = buf, *limit = buf + bufsize - 2;
255
256 while (p < e && out < limit) {
257 unsigned char c = *p++;
258
259 if (c == '=' && p < e) {
260 unsigned char c2 = *p++;
261
262 if (hex2code_map[c2] >= 0 && p < e) {
263 unsigned char c3 = *p++;
264
265 if (hex2code_map[c3] >= 0) {
266 *out++ = hex2code_map[c2] << 4 | hex2code_map[c3];
267 } else {
268 *out++ = '=';
269 *out++ = c2;
270 *out++ = c3;
271 }
272 } else if (c2 == '\r' && p < e) {
273 unsigned char c3 = *p++;
274
275 if (c3 != '\n') {
276 *out++ = c3;
277 }
278 } else if (c2 != '\n') {
279 *out++ = '=';
280 *out++ = c2;
281 }
282 } else {
283 *out++ = c;
284 }
285 }
286
287 *in_len = e - p;
288 *in = p;
289 return out - buf;
290 }
291
qprint_enc_nibble(unsigned char nibble)292 static unsigned char qprint_enc_nibble(unsigned char nibble)
293 {
294 if (nibble < 10) {
295 return nibble + '0';
296 } else {
297 return nibble - 10 + 'A';
298 }
299 }
300
mb_wchar_to_qprint(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)301 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
302 {
303 unsigned char *out, *limit;
304 MB_CONVERT_BUF_LOAD(buf, out, limit);
305 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
306
307 unsigned int chars_output = buf->state;
308
309 while (len--) {
310 /* We assume that all the input 'codepoints' are not really Unicode codepoints at all,
311 * but raw bytes from 0x00-0xFF */
312 uint32_t w = *in++;
313
314 if (!w) {
315 out = mb_convert_buf_add(out, '\0');
316 chars_output = 0;
317 continue;
318 } else if (w == '\n') {
319 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
320 out = mb_convert_buf_add2(out, '\r', '\n');
321 chars_output = 0;
322 continue;
323 } else if (w == '\r') {
324 /* No output */
325 continue;
326 }
327
328 /* QPrint actually mandates that line length should not be more than 76 characters,
329 * but mbstring stops slightly short of that */
330 if (chars_output >= 72) {
331 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
332 out = mb_convert_buf_add3(out, '=', '\r', '\n');
333 chars_output = 0;
334 }
335
336 if (w >= 0x80 || w == '=') {
337 /* Not ASCII */
338 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
339 out = mb_convert_buf_add3(out, '=', qprint_enc_nibble((w >> 4) & 0xF), qprint_enc_nibble(w & 0xF));
340 chars_output += 3;
341 } else {
342 /* Plain ASCII */
343 out = mb_convert_buf_add(out, w);
344 chars_output++;
345 }
346 }
347
348 buf->state = chars_output;
349 MB_CONVERT_BUF_STORE(buf, out, limit);
350 }
351