1  /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this file was separated from mbfilter.c
26  * by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
27  * mbfilter.c is included in this package .
28  *
29  */
30 
31 #include "mbfilter.h"
32 #include "mbfilter_base64.h"
33 
34 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36 
37 const mbfl_encoding mbfl_encoding_base64 = {
38 	mbfl_no_encoding_base64,
39 	"BASE64",
40 	"BASE64",
41 	NULL,
42 	NULL,
43 	MBFL_ENCTYPE_GL_UNSAFE,
44 	NULL,
45 	NULL,
46 	mb_base64_to_wchar,
47 	mb_wchar_to_base64,
48 	NULL,
49 	NULL,
50 };
51 
52 const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
53 	mbfl_no_encoding_8bit,
54 	mbfl_no_encoding_base64,
55 	mbfl_filt_conv_common_ctor,
56 	NULL,
57 	mbfl_filt_conv_base64enc,
58 	mbfl_filt_conv_base64enc_flush,
59 	NULL,
60 };
61 
62 const struct mbfl_convert_vtbl vtbl_b64_8bit = {
63 	mbfl_no_encoding_base64,
64 	mbfl_no_encoding_8bit,
65 	mbfl_filt_conv_common_ctor,
66 	NULL,
67 	mbfl_filt_conv_base64dec,
68 	mbfl_filt_conv_base64dec_flush,
69 	NULL,
70 };
71 
72 
73 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
74 
75 /*
76  * any => BASE64
77  */
78 static const unsigned char mbfl_base64_table[] = {
79  /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
80    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
81  /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
82    0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
83  /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
84    0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
85  /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
86    0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
87  /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
88    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
89 };
90 
mbfl_filt_conv_base64enc(int c,mbfl_convert_filter * filter)91 int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
92 {
93 	int n;
94 
95 	n = (filter->status & 0xff);
96 	if (n == 0) {
97 		filter->status++;
98 		filter->cache = (c & 0xff) << 16;
99 	} else if (n == 1) {
100 		filter->status++;
101 		filter->cache |= (c & 0xff) << 8;
102 	} else {
103 		filter->status &= ~0xff;
104 		n = (filter->status & 0xff00) >> 8;
105 		if (n > 72) {
106 			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
107 			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
108 			filter->status &= ~0xff00;
109 		}
110 		filter->status += 0x400;
111 		n = filter->cache | (c & 0xff);
112 		CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
113 		CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
114 		CK((*filter->output_function)(mbfl_base64_table[(n >> 6) & 0x3f], filter->data));
115 		CK((*filter->output_function)(mbfl_base64_table[n & 0x3f], filter->data));
116 	}
117 
118 	return 0;
119 }
120 
mbfl_filt_conv_base64enc_flush(mbfl_convert_filter * filter)121 int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
122 {
123 	int status, cache, len;
124 
125 	status = filter->status & 0xff;
126 	cache = filter->cache;
127 	len = (filter->status & 0xff00) >> 8;
128 	filter->status &= ~0xffff;
129 	filter->cache = 0;
130 	/* flush fragments */
131 	if (status >= 1) {
132 		if (len > 72){
133 			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
134 			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
135 		}
136 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
137 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
138 		if (status == 1) {
139 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
140 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
141 		} else {
142 			CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
143 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
144 		}
145 	}
146 
147 	if (filter->flush_function) {
148 		(*filter->flush_function)(filter->data);
149 	}
150 
151 	return 0;
152 }
153 
154 /*
155  * BASE64 => any
156  */
mbfl_filt_conv_base64dec(int c,mbfl_convert_filter * filter)157 int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter)
158 {
159 	int n;
160 
161 	if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) {	/* CR or LF or SPACE or HTAB or '=' */
162 		return 0;
163 	}
164 
165 	n = 0;
166 	if (c >= 0x41 && c <= 0x5a) {		/* A - Z */
167 		n = c - 65;
168 	} else if (c >= 0x61 && c <= 0x7a) {	/* a - z */
169 		n = c - 71;
170 	} else if (c >= 0x30 && c <= 0x39) {	/* 0 - 9 */
171 		n = c + 4;
172 	} else if (c == 0x2b) {			/* '+' */
173 		n = 62;
174 	} else if (c == 0x2f) {			/* '/' */
175 		n = 63;
176 	} else {
177 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
178 		return 0;
179 	}
180 	n &= 0x3f;
181 
182 	switch (filter->status) {
183 	case 0:
184 		filter->status = 1;
185 		filter->cache = n << 18;
186 		break;
187 	case 1:
188 		filter->status = 2;
189 		filter->cache |= n << 12;
190 		break;
191 	case 2:
192 		filter->status = 3;
193 		filter->cache |= n << 6;
194 		break;
195 	default:
196 		filter->status = 0;
197 		n |= filter->cache;
198 		CK((*filter->output_function)((n >> 16) & 0xff, filter->data));
199 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
200 		CK((*filter->output_function)(n & 0xff, filter->data));
201 		break;
202 	}
203 
204 	return 0;
205 }
206 
mbfl_filt_conv_base64dec_flush(mbfl_convert_filter * filter)207 int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
208 {
209 	int status, cache;
210 
211 	status = filter->status;
212 	cache = filter->cache;
213 	filter->status = 0;
214 	filter->cache = 0;
215 	/* flush fragments */
216 	if (status >= 2) {
217 		CK((*filter->output_function)((cache >> 16) & 0xff, filter->data));
218 		if (status >= 3) {
219 			CK((*filter->output_function)((cache >> 8) & 0xff, filter->data));
220 		}
221 	}
222 
223 	if (filter->flush_function) {
224 		(*filter->flush_function)(filter->data);
225 	}
226 
227 	return 0;
228 }
229 
decode_base64(char c)230 static int decode_base64(char c)
231 {
232 	if (c >= 'A' && c <= 'Z') {
233 		return c - 'A';
234 	} else if (c >= 'a' && c <= 'z') {	/* a - z */
235 		return c - 'a' + 26;
236 	} else if (c >= '0' && c <= '9') {	/* 0 - 9 */
237 		return c - '0' + 52;
238 	} else if (c == '+') {
239 		return 62;
240 	} else if (c == '/') {
241 		return 63;
242 	}
243 	return -1;
244 }
245 
mb_base64_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)246 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
247 {
248 	ZEND_ASSERT(bufsize >= 3);
249 
250 	unsigned char *p = *in, *e = p + *in_len;
251 	uint32_t *out = buf, *limit = buf + bufsize;
252 
253 	unsigned int bits = *state & 0xFF, cache = *state >> 8;
254 
255 	while (p < e && (limit - out) >= 3) {
256 		unsigned char c = *p++;
257 
258 		if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
259 			continue;
260 		}
261 
262 		int value = decode_base64(c);
263 
264 		if (value == -1) {
265 			*out++ = MBFL_BAD_INPUT;
266 		} else {
267 			bits += 6;
268 			cache = (cache << 6) | (value & 0x3F);
269 			if (bits == 24) {
270 				*out++ = (cache >> 16) & 0xFF;
271 				*out++ = (cache >> 8) & 0xFF;
272 				*out++ = cache & 0xFF;
273 				bits = cache = 0;
274 			}
275 		}
276 	}
277 
278 	if (p == e) {
279 		if (bits) {
280 			/* If we reach here, there will be at least 3 spaces remaining in output buffer */
281 			if (bits == 18) {
282 				*out++ = (cache >> 10) & 0xFF;
283 				*out++ = (cache >> 2) & 0xFF;
284 			} else if (bits == 12) {
285 				*out++ = (cache >> 4) & 0xFF;
286 			}
287 		}
288 	} else {
289 		*state = (cache << 8) | (bits & 0xFF);
290 	}
291 
292 	*in_len = e - p;
293 	*in = p;
294 	return out - buf;
295 }
296 
mb_wchar_to_base64(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)297 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
298 {
299 	unsigned int bits = (buf->state & 0x3) * 8;
300 	unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
301 	unsigned int cache = buf->state >> 8;
302 
303 	unsigned char *out, *limit;
304 	MB_CONVERT_BUF_LOAD(buf, out, limit);
305 	/* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
306 	 * bytes is not a multiple of 3, we still pad the output out to a multiple of 4
307 	 * That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
308 	 *
309 	 * But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
310 	 * That means we must multiply the above number by 78/76
311 	 * Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow
312 	 *
313 	 * And since we may enter this function multiple times when converting a large string, and
314 	 * we might already be close to where a CR+LF needs to be emitted, make space for an extra
315 	 * CR+LF pair in the output buffer */
316 	MB_CONVERT_BUF_ENSURE(buf, out, limit, (zend_safe_address_guarded(len + (bits / 8), 26, 52) / 19) + 2);
317 
318 	while (len--) {
319 		uint32_t w = *in++;
320 		cache = (cache << 8) | (w & 0xFF);
321 		bits += 8;
322 		if (bits == 24) {
323 			if (chars_output > 72) {
324 				out = mb_convert_buf_add2(out, '\r', '\n');
325 				chars_output = 0;
326 			}
327 			out = mb_convert_buf_add4(out,
328 				mbfl_base64_table[(cache >> 18) & 0x3F],
329 				mbfl_base64_table[(cache >> 12) & 0x3F],
330 				mbfl_base64_table[(cache >> 6) & 0x3F],
331 				mbfl_base64_table[cache & 0x3F]);
332 			chars_output += 4;
333 			bits = cache = 0;
334 		}
335 	}
336 
337 	if (end && bits) {
338 		if (chars_output > 72) {
339 			out = mb_convert_buf_add2(out, '\r', '\n');
340 			chars_output = 0;
341 		}
342 		if (bits == 8) {
343 			out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
344 		} else {
345 			out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
346 		}
347 	} else {
348 		buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
349 	}
350 
351 	MB_CONVERT_BUF_STORE(buf, out, limit);
352 }
353