1  /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
27  * mbfilter.c is included in this package .
28  *
29  */
30 
31 #include "mbfilter.h"
32 #include "mbfilter_base64.h"
33 
34 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36 
37 const mbfl_encoding mbfl_encoding_base64 = {
38 	mbfl_no_encoding_base64,
39 	"BASE64",
40 	"BASE64",
41 	NULL,
42 	NULL,
43 	MBFL_ENCTYPE_GL_UNSAFE,
44 	NULL,
45 	NULL,
46 	mb_base64_to_wchar,
47 	mb_wchar_to_base64,
48 	NULL
49 };
50 
51 const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
52 	mbfl_no_encoding_8bit,
53 	mbfl_no_encoding_base64,
54 	mbfl_filt_conv_common_ctor,
55 	NULL,
56 	mbfl_filt_conv_base64enc,
57 	mbfl_filt_conv_base64enc_flush,
58 	NULL,
59 };
60 
61 const struct mbfl_convert_vtbl vtbl_b64_8bit = {
62 	mbfl_no_encoding_base64,
63 	mbfl_no_encoding_8bit,
64 	mbfl_filt_conv_common_ctor,
65 	NULL,
66 	mbfl_filt_conv_base64dec,
67 	mbfl_filt_conv_base64dec_flush,
68 	NULL,
69 };
70 
71 
72 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
73 
74 /*
75  * any => BASE64
76  */
77 static const unsigned char mbfl_base64_table[] = {
78  /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
79    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
80  /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
81    0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
82  /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
83    0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
84  /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
85    0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
86  /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
87    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
88 };
89 
mbfl_filt_conv_base64enc(int c,mbfl_convert_filter * filter)90 int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
91 {
92 	int n;
93 
94 	n = (filter->status & 0xff);
95 	if (n == 0) {
96 		filter->status++;
97 		filter->cache = (c & 0xff) << 16;
98 	} else if (n == 1) {
99 		filter->status++;
100 		filter->cache |= (c & 0xff) << 8;
101 	} else {
102 		filter->status &= ~0xff;
103 		if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
104 			n = (filter->status & 0xff00) >> 8;
105 			if (n > 72) {
106 				CK((*filter->output_function)(0x0d, filter->data));		/* CR */
107 				CK((*filter->output_function)(0x0a, filter->data));		/* LF */
108 				filter->status &= ~0xff00;
109 			}
110 			filter->status += 0x400;
111 		}
112 		n = filter->cache | (c & 0xff);
113 		CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
114 		CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
115 		CK((*filter->output_function)(mbfl_base64_table[(n >> 6) & 0x3f], filter->data));
116 		CK((*filter->output_function)(mbfl_base64_table[n & 0x3f], filter->data));
117 	}
118 
119 	return 0;
120 }
121 
mbfl_filt_conv_base64enc_flush(mbfl_convert_filter * filter)122 int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
123 {
124 	int status, cache, len;
125 
126 	status = filter->status & 0xff;
127 	cache = filter->cache;
128 	len = (filter->status & 0xff00) >> 8;
129 	filter->status &= ~0xffff;
130 	filter->cache = 0;
131 	/* flush fragments */
132 	if (status >= 1) {
133 		if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
134 			if (len > 72){
135 				CK((*filter->output_function)(0x0d, filter->data));		/* CR */
136 				CK((*filter->output_function)(0x0a, filter->data));		/* LF */
137 			}
138 		}
139 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
140 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
141 		if (status == 1) {
142 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
143 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
144 		} else {
145 			CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
146 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
147 		}
148 	}
149 
150 	if (filter->flush_function) {
151 		(*filter->flush_function)(filter->data);
152 	}
153 
154 	return 0;
155 }
156 
157 /*
158  * BASE64 => any
159  */
mbfl_filt_conv_base64dec(int c,mbfl_convert_filter * filter)160 int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter)
161 {
162 	int n;
163 
164 	if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) {	/* CR or LF or SPACE or HTAB or '=' */
165 		return 0;
166 	}
167 
168 	n = 0;
169 	if (c >= 0x41 && c <= 0x5a) {		/* A - Z */
170 		n = c - 65;
171 	} else if (c >= 0x61 && c <= 0x7a) {	/* a - z */
172 		n = c - 71;
173 	} else if (c >= 0x30 && c <= 0x39) {	/* 0 - 9 */
174 		n = c + 4;
175 	} else if (c == 0x2b) {			/* '+' */
176 		n = 62;
177 	} else if (c == 0x2f) {			/* '/' */
178 		n = 63;
179 	} else {
180 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
181 		return 0;
182 	}
183 	n &= 0x3f;
184 
185 	switch (filter->status) {
186 	case 0:
187 		filter->status = 1;
188 		filter->cache = n << 18;
189 		break;
190 	case 1:
191 		filter->status = 2;
192 		filter->cache |= n << 12;
193 		break;
194 	case 2:
195 		filter->status = 3;
196 		filter->cache |= n << 6;
197 		break;
198 	default:
199 		filter->status = 0;
200 		n |= filter->cache;
201 		CK((*filter->output_function)((n >> 16) & 0xff, filter->data));
202 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
203 		CK((*filter->output_function)(n & 0xff, filter->data));
204 		break;
205 	}
206 
207 	return 0;
208 }
209 
mbfl_filt_conv_base64dec_flush(mbfl_convert_filter * filter)210 int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
211 {
212 	int status, cache;
213 
214 	status = filter->status;
215 	cache = filter->cache;
216 	filter->status = 0;
217 	filter->cache = 0;
218 	/* flush fragments */
219 	if (status >= 2) {
220 		CK((*filter->output_function)((cache >> 16) & 0xff, filter->data));
221 		if (status >= 3) {
222 			CK((*filter->output_function)((cache >> 8) & 0xff, filter->data));
223 		}
224 	}
225 
226 	if (filter->flush_function) {
227 		(*filter->flush_function)(filter->data);
228 	}
229 
230 	return 0;
231 }
232 
decode_base64(char c)233 static int decode_base64(char c)
234 {
235 	if (c >= 'A' && c <= 'Z') {
236 		return c - 'A';
237 	} else if (c >= 'a' && c <= 'z') {	/* a - z */
238 		return c - 'a' + 26;
239 	} else if (c >= '0' && c <= '9') {	/* 0 - 9 */
240 		return c - '0' + 52;
241 	} else if (c == '+') {
242 		return 62;
243 	} else if (c == '/') {
244 		return 63;
245 	}
246 	return -1;
247 }
248 
mb_base64_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)249 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
250 {
251 	ZEND_ASSERT(bufsize >= 3);
252 
253 	unsigned char *p = *in, *e = p + *in_len;
254 	uint32_t *out = buf, *limit = buf + bufsize;
255 
256 	unsigned int bits = *state & 0xFF, cache = *state >> 8;
257 
258 	while (p < e && (limit - out) >= 3) {
259 		unsigned char c = *p++;
260 
261 		if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
262 			continue;
263 		}
264 
265 		int value = decode_base64(c);
266 
267 		if (value == -1) {
268 			*out++ = MBFL_BAD_INPUT;
269 		} else {
270 			bits += 6;
271 			cache = (cache << 6) | (value & 0x3F);
272 			if (bits == 24) {
273 				*out++ = (cache >> 16) & 0xFF;
274 				*out++ = (cache >> 8) & 0xFF;
275 				*out++ = cache & 0xFF;
276 				bits = cache = 0;
277 			}
278 		}
279 	}
280 
281 	if (p == e) {
282 		if (bits) {
283 			/* If we reach here, there will be at least 3 spaces remaining in output buffer */
284 			if (bits == 18) {
285 				*out++ = (cache >> 10) & 0xFF;
286 				*out++ = (cache >> 2) & 0xFF;
287 			} else if (bits == 12) {
288 				*out++ = (cache >> 4) & 0xFF;
289 			}
290 		}
291 	} else {
292 		*state = (cache << 8) | (bits & 0xFF);
293 	}
294 
295 	*in_len = e - p;
296 	*in = p;
297 	return out - buf;
298 }
299 
mb_wchar_to_base64(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)300 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
301 {
302 	unsigned int bits = (buf->state & 0x3) * 8;
303 	unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
304 	unsigned int cache = buf->state >> 8;
305 
306 	unsigned char *out, *limit;
307 	MB_CONVERT_BUF_LOAD(buf, out, limit);
308 	/* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
309 	 * bytes is not a multiple of 3, we still pad the output out to a multiple of 4
310 	 * That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
311 	 *
312 	 * But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
313 	 * That means we must multiply the above number by 78/76
314 	 * Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow
315 	 *
316 	 * And since we may enter this function multiple times when converting a large string, and
317 	 * we might already be close to where a CR+LF needs to be emitted, make space for an extra
318 	 * CR+LF pair in the output buffer */
319 	MB_CONVERT_BUF_ENSURE(buf, out, limit, (zend_safe_address_guarded(len + (bits / 8), 26, 52) / 19) + 2);
320 
321 	while (len--) {
322 		uint32_t w = *in++;
323 		cache = (cache << 8) | (w & 0xFF);
324 		bits += 8;
325 		if (bits == 24) {
326 			if (chars_output > 72) {
327 				out = mb_convert_buf_add2(out, '\r', '\n');
328 				chars_output = 0;
329 			}
330 			out = mb_convert_buf_add4(out,
331 				mbfl_base64_table[(cache >> 18) & 0x3F],
332 				mbfl_base64_table[(cache >> 12) & 0x3F],
333 				mbfl_base64_table[(cache >> 6) & 0x3F],
334 				mbfl_base64_table[cache & 0x3F]);
335 			chars_output += 4;
336 			bits = cache = 0;
337 		}
338 	}
339 
340 	if (end && bits) {
341 		if (chars_output > 72) {
342 			out = mb_convert_buf_add2(out, '\r', '\n');
343 			chars_output = 0;
344 		}
345 		if (bits == 8) {
346 			out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
347 		} else {
348 			out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
349 		}
350 	} else {
351 		buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
352 	}
353 
354 	MB_CONVERT_BUF_STORE(buf, out, limit);
355 }
356