1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this file was separated from mbfilter.c
26 * by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
27 * mbfilter.c is included in this package .
28 *
29 */
30
31 #include "mbfilter.h"
32 #include "mbfilter_base64.h"
33
34 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36
37 const mbfl_encoding mbfl_encoding_base64 = {
38 mbfl_no_encoding_base64,
39 "BASE64",
40 "BASE64",
41 NULL,
42 NULL,
43 MBFL_ENCTYPE_GL_UNSAFE,
44 NULL,
45 NULL,
46 mb_base64_to_wchar,
47 mb_wchar_to_base64,
48 NULL
49 };
50
51 const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
52 mbfl_no_encoding_8bit,
53 mbfl_no_encoding_base64,
54 mbfl_filt_conv_common_ctor,
55 NULL,
56 mbfl_filt_conv_base64enc,
57 mbfl_filt_conv_base64enc_flush,
58 NULL,
59 };
60
61 const struct mbfl_convert_vtbl vtbl_b64_8bit = {
62 mbfl_no_encoding_base64,
63 mbfl_no_encoding_8bit,
64 mbfl_filt_conv_common_ctor,
65 NULL,
66 mbfl_filt_conv_base64dec,
67 mbfl_filt_conv_base64dec_flush,
68 NULL,
69 };
70
71
72 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
73
74 /*
75 * any => BASE64
76 */
77 static const unsigned char mbfl_base64_table[] = {
78 /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
79 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
80 /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
81 0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
82 /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
83 0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
84 /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
85 0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
86 /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
87 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
88 };
89
mbfl_filt_conv_base64enc(int c,mbfl_convert_filter * filter)90 int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
91 {
92 int n;
93
94 n = (filter->status & 0xff);
95 if (n == 0) {
96 filter->status++;
97 filter->cache = (c & 0xff) << 16;
98 } else if (n == 1) {
99 filter->status++;
100 filter->cache |= (c & 0xff) << 8;
101 } else {
102 filter->status &= ~0xff;
103 n = (filter->status & 0xff00) >> 8;
104 if (n > 72) {
105 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
106 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
107 filter->status &= ~0xff00;
108 }
109 filter->status += 0x400;
110 n = filter->cache | (c & 0xff);
111 CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
112 CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
113 CK((*filter->output_function)(mbfl_base64_table[(n >> 6) & 0x3f], filter->data));
114 CK((*filter->output_function)(mbfl_base64_table[n & 0x3f], filter->data));
115 }
116
117 return 0;
118 }
119
mbfl_filt_conv_base64enc_flush(mbfl_convert_filter * filter)120 int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
121 {
122 int status, cache, len;
123
124 status = filter->status & 0xff;
125 cache = filter->cache;
126 len = (filter->status & 0xff00) >> 8;
127 filter->status &= ~0xffff;
128 filter->cache = 0;
129 /* flush fragments */
130 if (status >= 1) {
131 if (len > 72){
132 CK((*filter->output_function)(0x0d, filter->data)); /* CR */
133 CK((*filter->output_function)(0x0a, filter->data)); /* LF */
134 }
135 CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
136 CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
137 if (status == 1) {
138 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
139 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
140 } else {
141 CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
142 CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
143 }
144 }
145
146 if (filter->flush_function) {
147 (*filter->flush_function)(filter->data);
148 }
149
150 return 0;
151 }
152
153 /*
154 * BASE64 => any
155 */
mbfl_filt_conv_base64dec(int c,mbfl_convert_filter * filter)156 int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter)
157 {
158 int n;
159
160 if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) { /* CR or LF or SPACE or HTAB or '=' */
161 return 0;
162 }
163
164 n = 0;
165 if (c >= 0x41 && c <= 0x5a) { /* A - Z */
166 n = c - 65;
167 } else if (c >= 0x61 && c <= 0x7a) { /* a - z */
168 n = c - 71;
169 } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */
170 n = c + 4;
171 } else if (c == 0x2b) { /* '+' */
172 n = 62;
173 } else if (c == 0x2f) { /* '/' */
174 n = 63;
175 } else {
176 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
177 return 0;
178 }
179 n &= 0x3f;
180
181 switch (filter->status) {
182 case 0:
183 filter->status = 1;
184 filter->cache = n << 18;
185 break;
186 case 1:
187 filter->status = 2;
188 filter->cache |= n << 12;
189 break;
190 case 2:
191 filter->status = 3;
192 filter->cache |= n << 6;
193 break;
194 default:
195 filter->status = 0;
196 n |= filter->cache;
197 CK((*filter->output_function)((n >> 16) & 0xff, filter->data));
198 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
199 CK((*filter->output_function)(n & 0xff, filter->data));
200 break;
201 }
202
203 return 0;
204 }
205
mbfl_filt_conv_base64dec_flush(mbfl_convert_filter * filter)206 int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
207 {
208 int status, cache;
209
210 status = filter->status;
211 cache = filter->cache;
212 filter->status = 0;
213 filter->cache = 0;
214 /* flush fragments */
215 if (status >= 2) {
216 CK((*filter->output_function)((cache >> 16) & 0xff, filter->data));
217 if (status >= 3) {
218 CK((*filter->output_function)((cache >> 8) & 0xff, filter->data));
219 }
220 }
221
222 if (filter->flush_function) {
223 (*filter->flush_function)(filter->data);
224 }
225
226 return 0;
227 }
228
decode_base64(char c)229 static int decode_base64(char c)
230 {
231 if (c >= 'A' && c <= 'Z') {
232 return c - 'A';
233 } else if (c >= 'a' && c <= 'z') { /* a - z */
234 return c - 'a' + 26;
235 } else if (c >= '0' && c <= '9') { /* 0 - 9 */
236 return c - '0' + 52;
237 } else if (c == '+') {
238 return 62;
239 } else if (c == '/') {
240 return 63;
241 }
242 return -1;
243 }
244
mb_base64_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)245 static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
246 {
247 ZEND_ASSERT(bufsize >= 3);
248
249 unsigned char *p = *in, *e = p + *in_len;
250 uint32_t *out = buf, *limit = buf + bufsize;
251
252 unsigned int bits = *state & 0xFF, cache = *state >> 8;
253
254 while (p < e && (limit - out) >= 3) {
255 unsigned char c = *p++;
256
257 if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
258 continue;
259 }
260
261 int value = decode_base64(c);
262
263 if (value == -1) {
264 *out++ = MBFL_BAD_INPUT;
265 } else {
266 bits += 6;
267 cache = (cache << 6) | (value & 0x3F);
268 if (bits == 24) {
269 *out++ = (cache >> 16) & 0xFF;
270 *out++ = (cache >> 8) & 0xFF;
271 *out++ = cache & 0xFF;
272 bits = cache = 0;
273 }
274 }
275 }
276
277 if (p == e) {
278 if (bits) {
279 /* If we reach here, there will be at least 3 spaces remaining in output buffer */
280 if (bits == 18) {
281 *out++ = (cache >> 10) & 0xFF;
282 *out++ = (cache >> 2) & 0xFF;
283 } else if (bits == 12) {
284 *out++ = (cache >> 4) & 0xFF;
285 }
286 }
287 } else {
288 *state = (cache << 8) | (bits & 0xFF);
289 }
290
291 *in_len = e - p;
292 *in = p;
293 return out - buf;
294 }
295
mb_wchar_to_base64(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)296 static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
297 {
298 unsigned int bits = (buf->state & 0x3) * 8;
299 unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
300 unsigned int cache = buf->state >> 8;
301
302 unsigned char *out, *limit;
303 MB_CONVERT_BUF_LOAD(buf, out, limit);
304 /* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
305 * bytes is not a multiple of 3, we still pad the output out to a multiple of 4
306 * That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
307 *
308 * But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
309 * That means we must multiply the above number by 78/76
310 * Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow
311 *
312 * And since we may enter this function multiple times when converting a large string, and
313 * we might already be close to where a CR+LF needs to be emitted, make space for an extra
314 * CR+LF pair in the output buffer */
315 MB_CONVERT_BUF_ENSURE(buf, out, limit, (zend_safe_address_guarded(len + (bits / 8), 26, 52) / 19) + 2);
316
317 while (len--) {
318 uint32_t w = *in++;
319 cache = (cache << 8) | (w & 0xFF);
320 bits += 8;
321 if (bits == 24) {
322 if (chars_output > 72) {
323 out = mb_convert_buf_add2(out, '\r', '\n');
324 chars_output = 0;
325 }
326 out = mb_convert_buf_add4(out,
327 mbfl_base64_table[(cache >> 18) & 0x3F],
328 mbfl_base64_table[(cache >> 12) & 0x3F],
329 mbfl_base64_table[(cache >> 6) & 0x3F],
330 mbfl_base64_table[cache & 0x3F]);
331 chars_output += 4;
332 bits = cache = 0;
333 }
334 }
335
336 if (end && bits) {
337 if (chars_output > 72) {
338 out = mb_convert_buf_add2(out, '\r', '\n');
339 chars_output = 0;
340 }
341 if (bits == 8) {
342 out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
343 } else {
344 out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
345 }
346 } else {
347 buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
348 }
349
350 MB_CONVERT_BUF_STORE(buf, out, limit);
351 }
352