1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_uuencode.h"
32 
33 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35 
36 const mbfl_encoding mbfl_encoding_uuencode = {
37 	mbfl_no_encoding_uuencode,
38 	"UUENCODE",
39 	"x-uuencode",
40 	NULL,
41 	NULL,
42 	MBFL_ENCTYPE_SBCS,
43 	NULL,
44 	NULL,
45 	mb_uuencode_to_wchar,
46 	mb_wchar_to_uuencode,
47 	NULL
48 };
49 
50 const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
51 	mbfl_no_encoding_uuencode,
52 	mbfl_no_encoding_8bit,
53 	mbfl_filt_conv_common_ctor,
54 	NULL,
55 	mbfl_filt_conv_uudec,
56 	mbfl_filt_conv_common_flush,
57 	NULL,
58 };
59 
60 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
61 
62 #define UUDEC(c)	(char)(((c)-' ') & 077)
63 static const char *uuenc_begin_text = "begin ";
64 enum {
65 	uudec_state_ground=0,
66 	uudec_state_inbegin,
67 	uudec_state_until_newline,
68 	uudec_state_size,
69 	uudec_state_a,
70 	uudec_state_b,
71 	uudec_state_c,
72 	uudec_state_d,
73 	uudec_state_skip_newline
74 };
75 
mbfl_filt_conv_uudec(int c,mbfl_convert_filter * filter)76 int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
77 {
78 	int n;
79 
80 	switch(filter->status)	{
81 		case uudec_state_ground:
82 			/* looking for "begin 0666 filename\n" line */
83 			if (filter->cache == 0 && c == 'b')
84 			{
85 				filter->status = uudec_state_inbegin;
86 				filter->cache = 1; /* move to 'e' */
87 			}
88 			else if (c == '\n')
89 				filter->cache = 0;
90 			else
91 				filter->cache++;
92 			break;
93 		case uudec_state_inbegin:
94 			if (uuenc_begin_text[filter->cache++] != c)	{
95 				/* doesn't match pattern */
96 				filter->status = uudec_state_ground;
97 				break;
98 			}
99 			if (filter->cache == 5)
100 			{
101 				/* that's good enough - wait for a newline */
102 				filter->status = uudec_state_until_newline;
103 				filter->cache = 0;
104 			}
105 			break;
106 		case uudec_state_until_newline:
107 			if (c == '\n')
108 				filter->status = uudec_state_size;
109 			break;
110 		case uudec_state_size:
111 			/* get "size" byte */
112 			n = UUDEC(c);
113 			filter->cache = n << 24;
114 			filter->status = uudec_state_a;
115 			break;
116 		case uudec_state_a:
117 			/* get "a" byte */
118 			n = UUDEC(c);
119 			filter->cache |= (n << 16);
120 			filter->status = uudec_state_b;
121 			break;
122 		case uudec_state_b:
123 			/* get "b" byte */
124 			n = UUDEC(c);
125 			filter->cache |= (n << 8);
126 			filter->status = uudec_state_c;
127 			break;
128 		case uudec_state_c:
129 			/* get "c" byte */
130 			n = UUDEC(c);
131 			filter->cache |= n;
132 			filter->status = uudec_state_d;
133 			break;
134 		case uudec_state_d:
135 			/* get "d" byte */
136 			{
137 				int A, B, C, D = UUDEC(c);
138 				A = (filter->cache >> 16) & 0xff;
139 				B = (filter->cache >> 8) & 0xff;
140 				C = (filter->cache) & 0xff;
141 				n = (filter->cache >> 24) & 0xff;
142 				if (n-- > 0)
143 					CK((*filter->output_function)( (A << 2) | (B >> 4), filter->data));
144 				if (n-- > 0)
145 					CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
146 				if (n-- > 0)
147 					CK((*filter->output_function)( (C << 6) | D, filter->data));
148 				if (n < 0)
149 					n = 0;
150 				filter->cache = n << 24;
151 
152 				if (n == 0)
153 					filter->status = uudec_state_skip_newline;	/* skip next byte (newline) */
154 				else
155 					filter->status = uudec_state_a; /* go back to fetch "A" byte */
156 			}
157 			break;
158 		case uudec_state_skip_newline:
159 			/* skip newline */
160 			filter->status = uudec_state_size;
161 	}
162 	return 0;
163 }
164 
165 /* Using mbstring to decode UUEncoded text is already deprecated
166  * However, to facilitate the move to the new, faster internal conversion interface,
167  * We will temporarily implement it for UUEncode */
168 
mb_uuencode_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)169 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
170 {
171 	ZEND_ASSERT(bufsize >= 3);
172 
173 	unsigned char *p = *in, *e = p + *in_len;
174 	uint32_t *out = buf, *limit = buf + bufsize;
175 
176 	unsigned int _state = *state & 0xFF;
177 	unsigned int size = *state >> 8;
178 
179 	while (p < e && (limit - out) >= 3) {
180 		unsigned char c = *p++;
181 
182 		switch (_state) {
183 		case uudec_state_ground:
184 			if (c == 'b') {
185 				if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) {
186 					p += 5;
187 					while (p < e && *p++ != '\n'); /* Consume everything up to newline */
188 					_state = uudec_state_size;
189 				}
190 				/* We didn't find "begin " */
191 			}
192 			break;
193 
194 		case uudec_state_size:
195 			size = UUDEC(c);
196 			_state = uudec_state_a;
197 			break;
198 
199 		case uudec_state_a:
200 			if ((e - p) < 4) {
201 				p = e;
202 				break;
203 			}
204 
205 			unsigned int a = UUDEC(c);
206 			unsigned int b = UUDEC(*p++);
207 			unsigned int c = UUDEC(*p++);
208 			unsigned int d = UUDEC(*p++);
209 
210 			if (size > 0) {
211 				*out++ = ((a << 2) | (b >> 4)) & 0xFF;
212 				size--;
213 			}
214 			if (size > 0) {
215 				*out++ = ((b << 4) | (c >> 2)) & 0xFF;
216 				size--;
217 			}
218 			if (size > 0) {
219 				*out++ = ((c << 6) | d) & 0xFF;
220 				size--;
221 			}
222 
223 			_state = size ? uudec_state_a : uudec_state_skip_newline;
224 			break;
225 
226 		case uudec_state_skip_newline:
227 			_state = uudec_state_size;
228 			break;
229 		}
230 	}
231 
232 	*state = (size << 8) | _state;
233 	*in_len = e - p;
234 	*in = p;
235 	return out - buf;
236 }
237 
uuencode_six_bits(unsigned int bits)238 static unsigned char uuencode_six_bits(unsigned int bits)
239 {
240 	if (bits == 0) {
241 		return '`';
242 	} else {
243 		return bits + 32;
244 	}
245 }
246 
mb_wchar_to_uuencode(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)247 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
248 {
249 	unsigned char *out, *limit;
250 	MB_CONVERT_BUF_LOAD(buf, out, limit);
251 	/* Every 3 bytes of input gets encoded as 4 bytes of output
252 	 * Additionally, we have a 'length' byte and a newline for each line of output
253 	 * (Maximum 45 input bytes can be encoded on a single output line)
254 	 * Make space for two more bytes in case we start close to where a line must end,
255 	 * and another two if there are cached bits remaining from the previous call */
256 	MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 4);
257 
258 	unsigned int bytes_encoded = (buf->state >> 1) & 0x7F;
259 	/* UUEncode naturally wants to process input bytes in groups of 3, but
260 	 * our buffer size may not be a multiple of 3
261 	 * So there may be data from the previous call which we need to flush out */
262 	unsigned int n_cached_bits = (buf->state >> 8) & 0xFF;
263 	unsigned int cached_bits = buf->state >> 16;
264 
265 	if (!buf->state) {
266 		for (char *s = "begin 0644 filename\n"; *s; s++) {
267 			out = mb_convert_buf_add(out, *s);
268 		}
269 		out = mb_convert_buf_add(out, MIN(len, 45) + 32);
270 		buf->state |= 1;
271 	} else if (!len && end && !bytes_encoded && !n_cached_bits) {
272 		/* Corner case: under EXTREMELY rare circumstances, it's possible that the
273 		 * final call to this conversion function will happen with an empty input
274 		 * buffer, leaving an unwanted trailing len byte in the output buffer. */
275 		buf->out--;
276 		return;
277 	} else {
278 		/* UUEncode starts each line with a byte which indicates how many bytes
279 		 * are encoded on the line
280 		 * This can create a problem, since we receive the incoming data one buffer
281 		 * at a time, and there is no requirement that the buffers should be aligned
282 		 * with line boundaries
283 		 * So if a previous line was cut off, we need to go back and fix up
284 		 * the preceding len byte */
285 		unsigned char *len_byte = out - (bytes_encoded * 4 / 3) - 1;
286 		if (n_cached_bits) {
287 			len_byte -= (n_cached_bits == 2) ? 1 : 2;
288 		}
289 		*len_byte = MIN(bytes_encoded + len + (n_cached_bits ? (n_cached_bits == 2 ? 1 : 2) : 0), 45) + 32;
290 
291 		if (n_cached_bits) {
292 			/* Flush out bits which remained from previous call */
293 			if (n_cached_bits == 2) {
294 				uint32_t w = cached_bits;
295 				uint32_t w2 = 0, w3 = 0;
296 				if (len) {
297 					w2 = *in++;
298 					len--;
299 				}
300 				if (len) {
301 					w3 = *in++;
302 					len--;
303 				}
304 				out = mb_convert_buf_add3(out, uuencode_six_bits((w << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
305 			} else {
306 				uint32_t w2 = cached_bits;
307 				uint32_t w3 = 0;
308 				if (len) {
309 					w3 = *in++;
310 					len--;
311 				}
312 				out = mb_convert_buf_add2(out, uuencode_six_bits((w2 << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
313 			}
314 			n_cached_bits = cached_bits = 0;
315 			goto possible_line_break;
316 		}
317 	}
318 
319 	while (len--) {
320 		uint32_t w = *in++;
321 		uint32_t w2 = 0, w3 = 0;
322 
323 		if (!len) {
324 			if (!end) {
325 				out = mb_convert_buf_add(out, uuencode_six_bits((w >> 2) & 0x3F));
326 				/* Cache 2 remaining bits from 'w' */
327 				cached_bits = w & 0x3;
328 				n_cached_bits = 2;
329 				break;
330 			}
331 		} else {
332 			w2 = *in++;
333 			len--;
334 		}
335 
336 		if (!len) {
337 			if (!end) {
338 				out = mb_convert_buf_add2(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)));
339 				/* Cache 4 remaining bits from 'w2' */
340 				cached_bits = w2 & 0xF;
341 				n_cached_bits = 4;
342 				break;
343 			}
344 		} else {
345 			w3 = *in++;
346 			len--;
347 		}
348 
349 		out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
350 
351 possible_line_break:
352 		bytes_encoded += 3;
353 
354 		if (bytes_encoded >= 45) {
355 			out = mb_convert_buf_add(out, '\n');
356 			if (len || !end) {
357 				out = mb_convert_buf_add(out, MIN(len, 45) + 32);
358 			}
359 			bytes_encoded = 0;
360 		}
361 	}
362 
363 	if (bytes_encoded && end) {
364 		out = mb_convert_buf_add(out, '\n');
365 	}
366 
367 	buf->state = ((cached_bits & 0xFF) << 16) | ((n_cached_bits & 0xFF) << 8) | ((bytes_encoded & 0x7F) << 1) | (buf->state & 1);
368 	MB_CONVERT_BUF_STORE(buf, out, limit);
369 }
370