1  /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this file was separated from mbfilter.c
26  * by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
27  * mbfilter.c is included in this package .
28  *
29  */
30 
31 #include "mbfilter.h"
32 #include "mbfilter_base64.h"
33 
34 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36 
37 const mbfl_encoding mbfl_encoding_base64 = {
38 	mbfl_no_encoding_base64,
39 	"BASE64",
40 	"BASE64",
41 	NULL,
42 	NULL,
43 	MBFL_ENCTYPE_GL_UNSAFE,
44 	NULL,
45 	NULL,
46 	mb_base64_to_wchar,
47 	mb_wchar_to_base64,
48 	NULL
49 };
50 
51 const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
52 	mbfl_no_encoding_8bit,
53 	mbfl_no_encoding_base64,
54 	mbfl_filt_conv_common_ctor,
55 	NULL,
56 	mbfl_filt_conv_base64enc,
57 	mbfl_filt_conv_base64enc_flush,
58 	NULL,
59 };
60 
61 const struct mbfl_convert_vtbl vtbl_b64_8bit = {
62 	mbfl_no_encoding_base64,
63 	mbfl_no_encoding_8bit,
64 	mbfl_filt_conv_common_ctor,
65 	NULL,
66 	mbfl_filt_conv_base64dec,
67 	mbfl_filt_conv_base64dec_flush,
68 	NULL,
69 };
70 
71 
72 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
73 
74 /*
75  * any => BASE64
76  */
77 static const unsigned char mbfl_base64_table[] = {
78  /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
79    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
80  /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
81    0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
82  /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
83    0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
84  /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
85    0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
86  /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
87    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
88 };
89 
mbfl_filt_conv_base64enc(int c,mbfl_convert_filter * filter)90 int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
91 {
92 	int n;
93 
94 	n = (filter->status & 0xff);
95 	if (n == 0) {
96 		filter->status++;
97 		filter->cache = (c & 0xff) << 16;
98 	} else if (n == 1) {
99 		filter->status++;
100 		filter->cache |= (c & 0xff) << 8;
101 	} else {
102 		filter->status &= ~0xff;
103 		n = (filter->status & 0xff00) >> 8;
104 		if (n > 72) {
105 			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
106 			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
107 			filter->status &= ~0xff00;
108 		}
109 		filter->status += 0x400;
110 		n = filter->cache | (c & 0xff);
111 		CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
112 		CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
113 		CK((*filter->output_function)(mbfl_base64_table[(n >> 6) & 0x3f], filter->data));
114 		CK((*filter->output_function)(mbfl_base64_table[n & 0x3f], filter->data));
115 	}
116 
117 	return 0;
118 }
119 
mbfl_filt_conv_base64enc_flush(mbfl_convert_filter * filter)120 int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
121 {
122 	int status, cache, len;
123 
124 	status = filter->status & 0xff;
125 	cache = filter->cache;
126 	len = (filter->status & 0xff00) >> 8;
127 	filter->status &= ~0xffff;
128 	filter->cache = 0;
129 	/* flush fragments */
130 	if (status >= 1) {
131 		if (len > 72){
132 			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
133 			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
134 		}
135 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
136 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
137 		if (status == 1) {
138 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
139 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
140 		} else {
141 			CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
142 			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
143 		}
144 	}
145 
146 	if (filter->flush_function) {
147 		(*filter->flush_function)(filter->data);
148 	}
149 
150 	return 0;
151 }
152 
153 /*
154  * BASE64 => any
155  */
mbfl_filt_conv_base64dec(int c,mbfl_convert_filter * filter)156 int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter)
157 {
158 	int n;
159 
160 	if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) {	/* CR or LF or SPACE or HTAB or '=' */
161 		return 0;
162 	}
163 
164 	n = 0;
165 	if (c >= 0x41 && c <= 0x5a) {		/* A - Z */
166 		n = c - 65;
167 	} else if (c >= 0x61 && c <= 0x7a) {	/* a - z */
168 		n = c - 71;
169 	} else if (c >= 0x30 && c <= 0x39) {	/* 0 - 9 */
170 		n = c + 4;
171 	} else if (c == 0x2b) {			/* '+' */
172 		n = 62;
173 	} else if (c == 0x2f) {			/* '/' */
174 		n = 63;
175 	} else {
176 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
177 		return 0;
178 	}
179 	n &= 0x3f;
180 
181 	switch (filter->status) {
182 	case 0:
183 		filter->status = 1;
184 		filter->cache = n << 18;
185 		break;
186 	case 1:
187 		filter->status = 2;
188 		filter->cache |= n << 12;
189 		break;
190 	case 2:
191 		filter->status = 3;
192 		filter->cache |= n << 6;
193 		break;
194 	default:
195 		filter->status = 0;
196 		n |= filter->cache;
197 		CK((*filter->output_function)((n >> 16) & 0xff, filter->data));
198 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
199 		CK((*filter->output_function)(n & 0xff, filter->data));
200 		break;
201 	}
202 
203 	return 0;
204 }
205 
mbfl_filt_conv_base64dec_flush(mbfl_convert_filter * filter)206 int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
207 {
208 	int status, cache;
209 
210 	status = filter->status;
211 	cache = filter->cache;
212 	filter->status = 0;
213 	filter->cache = 0;
214 	/* flush fragments */
215 	if (status >= 2) {
216 		CK((*filter->output_function)((cache >> 16) & 0xff, filter->data));
217 		if (status >= 3) {
218 			CK((*filter->output_function)((cache >> 8) & 0xff, filter->data));
219 		}
220 	}
221 
222 	if (filter->flush_function) {
223 		(*filter->flush_function)(filter->data);
224 	}
225 
226 	return 0;
227 }
228 
decode_base64(char c)229 static int decode_base64(char c)
230 {
231 	if (c >= 'A' && c <= 'Z') {
232 		return c - 'A';
233 	} else if (c >= 'a' && c <= 'z') {	/* a - z */
234 		return c - 'a' + 26;
235 	} else if (c >= '0' && c <= '9') {	/* 0 - 9 */
236 		return c - '0' + 52;
237 	} else if (c == '+') {
238 		return 62;
239 	} else if (c == '/') {
240 		return 63;
241 	}
242 	return -1;
243 }
244 
mb_base64_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)245 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
246 {
247 	ZEND_ASSERT(bufsize >= 3);
248 
249 	unsigned char *p = *in, *e = p + *in_len;
250 	uint32_t *out = buf, *limit = buf + bufsize;
251 
252 	unsigned int bits = *state & 0xFF, cache = *state >> 8;
253 
254 	while (p < e && (limit - out) >= 3) {
255 		unsigned char c = *p++;
256 
257 		if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
258 			continue;
259 		}
260 
261 		int value = decode_base64(c);
262 
263 		if (value == -1) {
264 			*out++ = MBFL_BAD_INPUT;
265 		} else {
266 			bits += 6;
267 			cache = (cache << 6) | (value & 0x3F);
268 			if (bits == 24) {
269 				*out++ = (cache >> 16) & 0xFF;
270 				*out++ = (cache >> 8) & 0xFF;
271 				*out++ = cache & 0xFF;
272 				bits = cache = 0;
273 			}
274 		}
275 	}
276 
277 	if (p == e) {
278 		if (bits) {
279 			/* If we reach here, there will be at least 3 spaces remaining in output buffer */
280 			if (bits == 18) {
281 				*out++ = (cache >> 10) & 0xFF;
282 				*out++ = (cache >> 2) & 0xFF;
283 			} else if (bits == 12) {
284 				*out++ = (cache >> 4) & 0xFF;
285 			}
286 		}
287 	} else {
288 		*state = (cache << 8) | (bits & 0xFF);
289 	}
290 
291 	*in_len = e - p;
292 	*in = p;
293 	return out - buf;
294 }
295 
mb_wchar_to_base64(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)296 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
297 {
298 	unsigned int bits = (buf->state & 0x3) * 8;
299 	unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
300 	unsigned int cache = buf->state >> 8;
301 
302 	unsigned char *out, *limit;
303 	MB_CONVERT_BUF_LOAD(buf, out, limit);
304 	/* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
305 	 * bytes is not a multiple of 3, we still pad the output out to a multiple of 4
306 	 * That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
307 	 *
308 	 * But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
309 	 * That means we must multiply the above number by 78/76
310 	 * Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow
311 	 *
312 	 * And since we may enter this function multiple times when converting a large string, and
313 	 * we might already be close to where a CR+LF needs to be emitted, make space for an extra
314 	 * CR+LF pair in the output buffer */
315 	MB_CONVERT_BUF_ENSURE(buf, out, limit, (zend_safe_address_guarded(len + (bits / 8), 26, 52) / 19) + 2);
316 
317 	while (len--) {
318 		uint32_t w = *in++;
319 		cache = (cache << 8) | (w & 0xFF);
320 		bits += 8;
321 		if (bits == 24) {
322 			if (chars_output > 72) {
323 				out = mb_convert_buf_add2(out, '\r', '\n');
324 				chars_output = 0;
325 			}
326 			out = mb_convert_buf_add4(out,
327 				mbfl_base64_table[(cache >> 18) & 0x3F],
328 				mbfl_base64_table[(cache >> 12) & 0x3F],
329 				mbfl_base64_table[(cache >> 6) & 0x3F],
330 				mbfl_base64_table[cache & 0x3F]);
331 			chars_output += 4;
332 			bits = cache = 0;
333 		}
334 	}
335 
336 	if (end && bits) {
337 		if (chars_output > 72) {
338 			out = mb_convert_buf_add2(out, '\r', '\n');
339 			chars_output = 0;
340 		}
341 		if (bits == 8) {
342 			out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
343 		} else {
344 			out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
345 		}
346 	} else {
347 		buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
348 	}
349 
350 	MB_CONVERT_BUF_STORE(buf, out, limit);
351 }
352