1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter.c
26 * by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
27 * mbfilter.c is included in this package .
28 *
29 */
30
31 #include "mbfilter.h"
32 #include "mbfilter_base64.h"
33
34 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36
37 const mbfl_encoding mbfl_encoding_base64 = {
38 mbfl_no_encoding_base64,
39 "BASE64",
40 "BASE64",
41 NULL,
42 NULL,
43 MBFL_ENCTYPE_GL_UNSAFE,
44 NULL,
45 NULL,
46 mb_base64_to_wchar,
47 mb_wchar_to_base64,
48 NULL
49 };
50
51 const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
52 mbfl_no_encoding_8bit,
53 mbfl_no_encoding_base64,
54 mbfl_filt_conv_common_ctor,
55 NULL,
56 mbfl_filt_conv_base64enc,
57 mbfl_filt_conv_base64enc_flush,
58 NULL,
59 };
60
61 const struct mbfl_convert_vtbl vtbl_b64_8bit = {
62 mbfl_no_encoding_base64,
63 mbfl_no_encoding_8bit,
64 mbfl_filt_conv_common_ctor,
65 NULL,
66 mbfl_filt_conv_base64dec,
67 mbfl_filt_conv_base64dec_flush,
68 NULL,
69 };
70
71
72 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
73
74 /*
75 * any => BASE64
76 */
77 static const unsigned char mbfl_base64_table[] = {
78 /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
79 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
80 /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
81 0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
82 /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
83 0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
84 /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
85 0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
86 /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
87 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
88 };
89
mbfl_filt_conv_base64enc(int c,mbfl_convert_filter * filter)90 int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
91 {
92 int n;
93
94 n = (filter->status & 0xff);
95 if (n == 0) {
96 filter->status++;
97 filter->cache = (c & 0xff) << 16;
98 } else if (n == 1) {
99 filter->status++;
100 filter->cache |= (c & 0xff) << 8;
101 } else {
102 filter->status &= ~0xff;
103 if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
104 n = (filter->status & 0xff00) >> 8;
105 if (n > 72) {
106 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
107 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
108 filter->status &= ~0xff00;
109 }
110 filter->status += 0x400;
111 }
112 n = filter->cache | (c & 0xff);
113 CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
114 CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
115 CK((*filter->output_function)(mbfl_base64_table[(n >> 6) & 0x3f], filter->data));
116 CK((*filter->output_function)(mbfl_base64_table[n & 0x3f], filter->data));
117 }
118
119 return 0;
120 }
121
mbfl_filt_conv_base64enc_flush(mbfl_convert_filter * filter)122 int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
123 {
124 int status, cache, len;
125
126 status = filter->status & 0xff;
127 cache = filter->cache;
128 len = (filter->status & 0xff00) >> 8;
129 filter->status &= ~0xffff;
130 filter->cache = 0;
131 /* flush fragments */
132 if (status >= 1) {
133 if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
134 if (len > 72){
135 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
136 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
137 }
138 }
139 CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
140 CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
141 if (status == 1) {
142 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
143 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
144 } else {
145 CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
146 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
147 }
148 }
149
150 if (filter->flush_function) {
151 (*filter->flush_function)(filter->data);
152 }
153
154 return 0;
155 }
156
157 /*
158 * BASE64 => any
159 */
mbfl_filt_conv_base64dec(int c,mbfl_convert_filter * filter)160 int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter)
161 {
162 int n;
163
164 if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) { /* CR or LF or SPACE or HTAB or '=' */
165 return 0;
166 }
167
168 n = 0;
169 if (c >= 0x41 && c <= 0x5a) { /* A - Z */
170 n = c - 65;
171 } else if (c >= 0x61 && c <= 0x7a) { /* a - z */
172 n = c - 71;
173 } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */
174 n = c + 4;
175 } else if (c == 0x2b) { /* '+' */
176 n = 62;
177 } else if (c == 0x2f) { /* '/' */
178 n = 63;
179 } else {
180 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
181 return 0;
182 }
183 n &= 0x3f;
184
185 switch (filter->status) {
186 case 0:
187 filter->status = 1;
188 filter->cache = n << 18;
189 break;
190 case 1:
191 filter->status = 2;
192 filter->cache |= n << 12;
193 break;
194 case 2:
195 filter->status = 3;
196 filter->cache |= n << 6;
197 break;
198 default:
199 filter->status = 0;
200 n |= filter->cache;
201 CK((*filter->output_function)((n >> 16) & 0xff, filter->data));
202 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
203 CK((*filter->output_function)(n & 0xff, filter->data));
204 break;
205 }
206
207 return 0;
208 }
209
mbfl_filt_conv_base64dec_flush(mbfl_convert_filter * filter)210 int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
211 {
212 int status, cache;
213
214 status = filter->status;
215 cache = filter->cache;
216 filter->status = 0;
217 filter->cache = 0;
218 /* flush fragments */
219 if (status >= 2) {
220 CK((*filter->output_function)((cache >> 16) & 0xff, filter->data));
221 if (status >= 3) {
222 CK((*filter->output_function)((cache >> 8) & 0xff, filter->data));
223 }
224 }
225
226 if (filter->flush_function) {
227 (*filter->flush_function)(filter->data);
228 }
229
230 return 0;
231 }
232
decode_base64(char c)233 static int decode_base64(char c)
234 {
235 if (c >= 'A' && c <= 'Z') {
236 return c - 'A';
237 } else if (c >= 'a' && c <= 'z') { /* a - z */
238 return c - 'a' + 26;
239 } else if (c >= '0' && c <= '9') { /* 0 - 9 */
240 return c - '0' + 52;
241 } else if (c == '+') {
242 return 62;
243 } else if (c == '/') {
244 return 63;
245 }
246 return -1;
247 }
248
mb_base64_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)249 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
250 {
251 ZEND_ASSERT(bufsize >= 3);
252
253 unsigned char *p = *in, *e = p + *in_len;
254 uint32_t *out = buf, *limit = buf + bufsize;
255
256 unsigned int bits = *state & 0xFF, cache = *state >> 8;
257
258 while (p < e && (limit - out) >= 3) {
259 unsigned char c = *p++;
260
261 if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
262 continue;
263 }
264
265 int value = decode_base64(c);
266
267 if (value == -1) {
268 *out++ = MBFL_BAD_INPUT;
269 } else {
270 bits += 6;
271 cache = (cache << 6) | (value & 0x3F);
272 if (bits == 24) {
273 *out++ = (cache >> 16) & 0xFF;
274 *out++ = (cache >> 8) & 0xFF;
275 *out++ = cache & 0xFF;
276 bits = cache = 0;
277 }
278 }
279 }
280
281 if (p == e) {
282 if (bits) {
283 /* If we reach here, there will be at least 3 spaces remaining in output buffer */
284 if (bits == 18) {
285 *out++ = (cache >> 10) & 0xFF;
286 *out++ = (cache >> 2) & 0xFF;
287 } else if (bits == 12) {
288 *out++ = (cache >> 4) & 0xFF;
289 }
290 }
291 } else {
292 *state = (cache << 8) | (bits & 0xFF);
293 }
294
295 *in_len = e - p;
296 *in = p;
297 return out - buf;
298 }
299
mb_wchar_to_base64(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)300 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
301 {
302 unsigned int bits = (buf->state & 0x3) * 8;
303 unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
304 unsigned int cache = buf->state >> 8;
305
306 unsigned char *out, *limit;
307 MB_CONVERT_BUF_LOAD(buf, out, limit);
308 /* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
309 * bytes is not a multiple of 3, we still pad the output out to a multiple of 4
310 * That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
311 *
312 * But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
313 * That means we must multiply the above number by 78/76
314 * Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow
315 *
316 * And since we may enter this function multiple times when converting a large string, and
317 * we might already be close to where a CR+LF needs to be emitted, make space for an extra
318 * CR+LF pair in the output buffer */
319 MB_CONVERT_BUF_ENSURE(buf, out, limit, (zend_safe_address_guarded(len + (bits / 8), 26, 52) / 19) + 2);
320
321 while (len--) {
322 uint32_t w = *in++;
323 cache = (cache << 8) | (w & 0xFF);
324 bits += 8;
325 if (bits == 24) {
326 if (chars_output > 72) {
327 out = mb_convert_buf_add2(out, '\r', '\n');
328 chars_output = 0;
329 }
330 out = mb_convert_buf_add4(out,
331 mbfl_base64_table[(cache >> 18) & 0x3F],
332 mbfl_base64_table[(cache >> 12) & 0x3F],
333 mbfl_base64_table[(cache >> 6) & 0x3F],
334 mbfl_base64_table[cache & 0x3F]);
335 chars_output += 4;
336 bits = cache = 0;
337 }
338 }
339
340 if (end && bits) {
341 if (chars_output > 72) {
342 out = mb_convert_buf_add2(out, '\r', '\n');
343 chars_output = 0;
344 }
345 if (bits == 8) {
346 out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
347 } else {
348 out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
349 }
350 } else {
351 buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
352 }
353
354 MB_CONVERT_BUF_STORE(buf, out, limit);
355 }
356