1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this file was separated from mbfilter.c
26 * by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
27 * mbfilter.c is included in this package .
28 *
29 */
30
31 #include "mbfilter.h"
32 #include "mbfilter_base64.h"
33
34 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36
37 const mbfl_encoding mbfl_encoding_base64 = {
38 mbfl_no_encoding_base64,
39 "BASE64",
40 "BASE64",
41 NULL,
42 NULL,
43 MBFL_ENCTYPE_GL_UNSAFE,
44 NULL,
45 NULL,
46 mb_base64_to_wchar,
47 mb_wchar_to_base64,
48 NULL,
49 NULL,
50 };
51
52 const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
53 mbfl_no_encoding_8bit,
54 mbfl_no_encoding_base64,
55 mbfl_filt_conv_common_ctor,
56 NULL,
57 mbfl_filt_conv_base64enc,
58 mbfl_filt_conv_base64enc_flush,
59 NULL,
60 };
61
62 const struct mbfl_convert_vtbl vtbl_b64_8bit = {
63 mbfl_no_encoding_base64,
64 mbfl_no_encoding_8bit,
65 mbfl_filt_conv_common_ctor,
66 NULL,
67 mbfl_filt_conv_base64dec,
68 mbfl_filt_conv_base64dec_flush,
69 NULL,
70 };
71
72
73 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
74
75 /*
76 * any => BASE64
77 */
78 static const unsigned char mbfl_base64_table[] = {
79 /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
80 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
81 /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
82 0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
83 /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
84 0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
85 /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
86 0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
87 /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
88 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
89 };
90
mbfl_filt_conv_base64enc(int c,mbfl_convert_filter * filter)91 int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
92 {
93 int n;
94
95 n = (filter->status & 0xff);
96 if (n == 0) {
97 filter->status++;
98 filter->cache = (c & 0xff) << 16;
99 } else if (n == 1) {
100 filter->status++;
101 filter->cache |= (c & 0xff) << 8;
102 } else {
103 filter->status &= ~0xff;
104 n = (filter->status & 0xff00) >> 8;
105 if (n > 72) {
106 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
107 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
108 filter->status &= ~0xff00;
109 }
110 filter->status += 0x400;
111 n = filter->cache | (c & 0xff);
112 CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
113 CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
114 CK((*filter->output_function)(mbfl_base64_table[(n >> 6) & 0x3f], filter->data));
115 CK((*filter->output_function)(mbfl_base64_table[n & 0x3f], filter->data));
116 }
117
118 return 0;
119 }
120
mbfl_filt_conv_base64enc_flush(mbfl_convert_filter * filter)121 int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
122 {
123 int status, cache, len;
124
125 status = filter->status & 0xff;
126 cache = filter->cache;
127 len = (filter->status & 0xff00) >> 8;
128 filter->status &= ~0xffff;
129 filter->cache = 0;
130 /* flush fragments */
131 if (status >= 1) {
132 if (len > 72){
133 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
134 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
135 }
136 CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
137 CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
138 if (status == 1) {
139 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
140 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
141 } else {
142 CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
143 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
144 }
145 }
146
147 if (filter->flush_function) {
148 (*filter->flush_function)(filter->data);
149 }
150
151 return 0;
152 }
153
154 /*
155 * BASE64 => any
156 */
mbfl_filt_conv_base64dec(int c,mbfl_convert_filter * filter)157 int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter)
158 {
159 int n;
160
161 if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) { /* CR or LF or SPACE or HTAB or '=' */
162 return 0;
163 }
164
165 n = 0;
166 if (c >= 0x41 && c <= 0x5a) { /* A - Z */
167 n = c - 65;
168 } else if (c >= 0x61 && c <= 0x7a) { /* a - z */
169 n = c - 71;
170 } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */
171 n = c + 4;
172 } else if (c == 0x2b) { /* '+' */
173 n = 62;
174 } else if (c == 0x2f) { /* '/' */
175 n = 63;
176 } else {
177 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
178 return 0;
179 }
180 n &= 0x3f;
181
182 switch (filter->status) {
183 case 0:
184 filter->status = 1;
185 filter->cache = n << 18;
186 break;
187 case 1:
188 filter->status = 2;
189 filter->cache |= n << 12;
190 break;
191 case 2:
192 filter->status = 3;
193 filter->cache |= n << 6;
194 break;
195 default:
196 filter->status = 0;
197 n |= filter->cache;
198 CK((*filter->output_function)((n >> 16) & 0xff, filter->data));
199 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
200 CK((*filter->output_function)(n & 0xff, filter->data));
201 break;
202 }
203
204 return 0;
205 }
206
mbfl_filt_conv_base64dec_flush(mbfl_convert_filter * filter)207 int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
208 {
209 int status, cache;
210
211 status = filter->status;
212 cache = filter->cache;
213 filter->status = 0;
214 filter->cache = 0;
215 /* flush fragments */
216 if (status >= 2) {
217 CK((*filter->output_function)((cache >> 16) & 0xff, filter->data));
218 if (status >= 3) {
219 CK((*filter->output_function)((cache >> 8) & 0xff, filter->data));
220 }
221 }
222
223 if (filter->flush_function) {
224 (*filter->flush_function)(filter->data);
225 }
226
227 return 0;
228 }
229
decode_base64(char c)230 static int decode_base64(char c)
231 {
232 if (c >= 'A' && c <= 'Z') {
233 return c - 'A';
234 } else if (c >= 'a' && c <= 'z') { /* a - z */
235 return c - 'a' + 26;
236 } else if (c >= '0' && c <= '9') { /* 0 - 9 */
237 return c - '0' + 52;
238 } else if (c == '+') {
239 return 62;
240 } else if (c == '/') {
241 return 63;
242 }
243 return -1;
244 }
245
mb_base64_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)246 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
247 {
248 ZEND_ASSERT(bufsize >= 3);
249
250 unsigned char *p = *in, *e = p + *in_len;
251 uint32_t *out = buf, *limit = buf + bufsize;
252
253 unsigned int bits = *state & 0xFF, cache = *state >> 8;
254
255 while (p < e && (limit - out) >= 3) {
256 unsigned char c = *p++;
257
258 if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
259 continue;
260 }
261
262 int value = decode_base64(c);
263
264 if (value == -1) {
265 *out++ = MBFL_BAD_INPUT;
266 } else {
267 bits += 6;
268 cache = (cache << 6) | (value & 0x3F);
269 if (bits == 24) {
270 *out++ = (cache >> 16) & 0xFF;
271 *out++ = (cache >> 8) & 0xFF;
272 *out++ = cache & 0xFF;
273 bits = cache = 0;
274 }
275 }
276 }
277
278 if (p == e) {
279 if (bits) {
280 /* If we reach here, there will be at least 3 spaces remaining in output buffer */
281 if (bits == 18) {
282 *out++ = (cache >> 10) & 0xFF;
283 *out++ = (cache >> 2) & 0xFF;
284 } else if (bits == 12) {
285 *out++ = (cache >> 4) & 0xFF;
286 }
287 }
288 } else {
289 *state = (cache << 8) | (bits & 0xFF);
290 }
291
292 *in_len = e - p;
293 *in = p;
294 return out - buf;
295 }
296
mb_wchar_to_base64(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)297 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
298 {
299 unsigned int bits = (buf->state & 0x3) * 8;
300 unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
301 unsigned int cache = buf->state >> 8;
302
303 unsigned char *out, *limit;
304 MB_CONVERT_BUF_LOAD(buf, out, limit);
305 /* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
306 * bytes is not a multiple of 3, we still pad the output out to a multiple of 4
307 * That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
308 *
309 * But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
310 * That means we must multiply the above number by 78/76
311 * Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow
312 *
313 * And since we may enter this function multiple times when converting a large string, and
314 * we might already be close to where a CR+LF needs to be emitted, make space for an extra
315 * CR+LF pair in the output buffer */
316 MB_CONVERT_BUF_ENSURE(buf, out, limit, (zend_safe_address_guarded(len + (bits / 8), 26, 52) / 19) + 2);
317
318 while (len--) {
319 uint32_t w = *in++;
320 cache = (cache << 8) | (w & 0xFF);
321 bits += 8;
322 if (bits == 24) {
323 if (chars_output > 72) {
324 out = mb_convert_buf_add2(out, '\r', '\n');
325 chars_output = 0;
326 }
327 out = mb_convert_buf_add4(out,
328 mbfl_base64_table[(cache >> 18) & 0x3F],
329 mbfl_base64_table[(cache >> 12) & 0x3F],
330 mbfl_base64_table[(cache >> 6) & 0x3F],
331 mbfl_base64_table[cache & 0x3F]);
332 chars_output += 4;
333 bits = cache = 0;
334 }
335 }
336
337 if (end && bits) {
338 if (chars_output > 72) {
339 out = mb_convert_buf_add2(out, '\r', '\n');
340 chars_output = 0;
341 }
342 if (bits == 8) {
343 out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
344 } else {
345 out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
346 }
347 } else {
348 buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
349 }
350
351 MB_CONVERT_BUF_STORE(buf, out, limit);
352 }
353