1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_uuencode.h"
32 
33 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35 
36 const mbfl_encoding mbfl_encoding_uuencode = {
37 	mbfl_no_encoding_uuencode,
38 	"UUENCODE",
39 	"x-uuencode",
40 	NULL,
41 	NULL,
42 	MBFL_ENCTYPE_SBCS,
43 	NULL,
44 	NULL,
45 	mb_uuencode_to_wchar,
46 	mb_wchar_to_uuencode,
47 	NULL,
48 	NULL,
49 };
50 
51 const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
52 	mbfl_no_encoding_uuencode,
53 	mbfl_no_encoding_8bit,
54 	mbfl_filt_conv_common_ctor,
55 	NULL,
56 	mbfl_filt_conv_uudec,
57 	mbfl_filt_conv_common_flush,
58 	NULL,
59 };
60 
61 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
62 
63 #define UUDEC(c)	(char)(((c)-' ') & 077)
64 static const char *uuenc_begin_text = "begin ";
65 enum {
66 	uudec_state_ground=0,
67 	uudec_state_inbegin,
68 	uudec_state_until_newline,
69 	uudec_state_size,
70 	uudec_state_a,
71 	uudec_state_b,
72 	uudec_state_c,
73 	uudec_state_d,
74 	uudec_state_skip_newline
75 };
76 
mbfl_filt_conv_uudec(int c,mbfl_convert_filter * filter)77 int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
78 {
79 	int n;
80 
81 	switch(filter->status)	{
82 		case uudec_state_ground:
83 			/* looking for "begin 0666 filename\n" line */
84 			if (filter->cache == 0 && c == 'b')
85 			{
86 				filter->status = uudec_state_inbegin;
87 				filter->cache = 1; /* move to 'e' */
88 			}
89 			else if (c == '\n')
90 				filter->cache = 0;
91 			else
92 				filter->cache++;
93 			break;
94 		case uudec_state_inbegin:
95 			if (uuenc_begin_text[filter->cache++] != c)	{
96 				/* doesn't match pattern */
97 				filter->status = uudec_state_ground;
98 				break;
99 			}
100 			if (filter->cache == 5)
101 			{
102 				/* that's good enough - wait for a newline */
103 				filter->status = uudec_state_until_newline;
104 				filter->cache = 0;
105 			}
106 			break;
107 		case uudec_state_until_newline:
108 			if (c == '\n')
109 				filter->status = uudec_state_size;
110 			break;
111 		case uudec_state_size:
112 			/* get "size" byte */
113 			n = UUDEC(c);
114 			filter->cache = n << 24;
115 			filter->status = uudec_state_a;
116 			break;
117 		case uudec_state_a:
118 			/* get "a" byte */
119 			n = UUDEC(c);
120 			filter->cache |= (n << 16);
121 			filter->status = uudec_state_b;
122 			break;
123 		case uudec_state_b:
124 			/* get "b" byte */
125 			n = UUDEC(c);
126 			filter->cache |= (n << 8);
127 			filter->status = uudec_state_c;
128 			break;
129 		case uudec_state_c:
130 			/* get "c" byte */
131 			n = UUDEC(c);
132 			filter->cache |= n;
133 			filter->status = uudec_state_d;
134 			break;
135 		case uudec_state_d:
136 			/* get "d" byte */
137 			{
138 				int A, B, C, D = UUDEC(c);
139 				A = (filter->cache >> 16) & 0xff;
140 				B = (filter->cache >> 8) & 0xff;
141 				C = (filter->cache) & 0xff;
142 				n = (filter->cache >> 24) & 0xff;
143 				if (n-- > 0)
144 					CK((*filter->output_function)( (A << 2) | (B >> 4), filter->data));
145 				if (n-- > 0)
146 					CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
147 				if (n-- > 0)
148 					CK((*filter->output_function)( (C << 6) | D, filter->data));
149 				if (n < 0)
150 					n = 0;
151 				filter->cache = n << 24;
152 
153 				if (n == 0)
154 					filter->status = uudec_state_skip_newline;	/* skip next byte (newline) */
155 				else
156 					filter->status = uudec_state_a; /* go back to fetch "A" byte */
157 			}
158 			break;
159 		case uudec_state_skip_newline:
160 			/* skip newline */
161 			filter->status = uudec_state_size;
162 	}
163 	return 0;
164 }
165 
166 /* Using mbstring to decode UUEncoded text is already deprecated
167  * However, to facilitate the move to the new, faster internal conversion interface,
168  * We will temporarily implement it for UUEncode */
169 
mb_uuencode_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)170 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
171 {
172 	ZEND_ASSERT(bufsize >= 3);
173 
174 	unsigned char *p = *in, *e = p + *in_len;
175 	uint32_t *out = buf, *limit = buf + bufsize;
176 
177 	unsigned int _state = *state & 0xFF;
178 	unsigned int size = *state >> 8;
179 
180 	while (p < e && (limit - out) >= 3) {
181 		unsigned char c = *p++;
182 
183 		switch (_state) {
184 		case uudec_state_ground:
185 			if (c == 'b') {
186 				if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) {
187 					p += 5;
188 					while (p < e && *p++ != '\n'); /* Consume everything up to newline */
189 					_state = uudec_state_size;
190 				}
191 				/* We didn't find "begin " */
192 			}
193 			break;
194 
195 		case uudec_state_size:
196 			size = UUDEC(c);
197 			_state = uudec_state_a;
198 			break;
199 
200 		case uudec_state_a:
201 			if ((e - p) < 4) {
202 				p = e;
203 				break;
204 			}
205 
206 			unsigned int a = UUDEC(c);
207 			unsigned int b = UUDEC(*p++);
208 			unsigned int c = UUDEC(*p++);
209 			unsigned int d = UUDEC(*p++);
210 
211 			if (size > 0) {
212 				*out++ = ((a << 2) | (b >> 4)) & 0xFF;
213 				size--;
214 			}
215 			if (size > 0) {
216 				*out++ = ((b << 4) | (c >> 2)) & 0xFF;
217 				size--;
218 			}
219 			if (size > 0) {
220 				*out++ = ((c << 6) | d) & 0xFF;
221 				size--;
222 			}
223 
224 			_state = size ? uudec_state_a : uudec_state_skip_newline;
225 			break;
226 
227 		case uudec_state_skip_newline:
228 			_state = uudec_state_size;
229 			break;
230 		}
231 	}
232 
233 	*state = (size << 8) | _state;
234 	*in_len = e - p;
235 	*in = p;
236 	return out - buf;
237 }
238 
uuencode_six_bits(unsigned int bits)239 static unsigned char uuencode_six_bits(unsigned int bits)
240 {
241 	if (bits == 0) {
242 		return '`';
243 	} else {
244 		return bits + 32;
245 	}
246 }
247 
mb_wchar_to_uuencode(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)248 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
249 {
250 	unsigned char *out, *limit;
251 	MB_CONVERT_BUF_LOAD(buf, out, limit);
252 	/* Every 3 bytes of input gets encoded as 4 bytes of output
253 	 * Additionally, we have a 'length' byte and a newline for each line of output
254 	 * (Maximum 45 input bytes can be encoded on a single output line)
255 	 * Make space for two more bytes in case we start close to where a line must end,
256 	 * and another two if there are cached bits remaining from the previous call */
257 	MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 4);
258 
259 	unsigned int bytes_encoded = (buf->state >> 1) & 0x7F;
260 	/* UUEncode naturally wants to process input bytes in groups of 3, but
261 	 * our buffer size may not be a multiple of 3
262 	 * So there may be data from the previous call which we need to flush out */
263 	unsigned int n_cached_bits = (buf->state >> 8) & 0xFF;
264 	unsigned int cached_bits = buf->state >> 16;
265 
266 	if (!buf->state) {
267 		for (char *s = "begin 0644 filename\n"; *s; s++) {
268 			out = mb_convert_buf_add(out, *s);
269 		}
270 		out = mb_convert_buf_add(out, MIN(len, 45) + 32);
271 		buf->state |= 1;
272 	} else if (!len && end && !bytes_encoded && !n_cached_bits) {
273 		/* Corner case: under EXTREMELY rare circumstances, it's possible that the
274 		 * final call to this conversion function will happen with an empty input
275 		 * buffer, leaving an unwanted trailing len byte in the output buffer. */
276 		buf->out--;
277 		return;
278 	} else {
279 		/* UUEncode starts each line with a byte which indicates how many bytes
280 		 * are encoded on the line
281 		 * This can create a problem, since we receive the incoming data one buffer
282 		 * at a time, and there is no requirement that the buffers should be aligned
283 		 * with line boundaries
284 		 * So if a previous line was cut off, we need to go back and fix up
285 		 * the preceding len byte */
286 		unsigned char *len_byte = out - (bytes_encoded * 4 / 3) - 1;
287 		if (n_cached_bits) {
288 			len_byte -= (n_cached_bits == 2) ? 1 : 2;
289 		}
290 		*len_byte = MIN(bytes_encoded + len + (n_cached_bits ? (n_cached_bits == 2 ? 1 : 2) : 0), 45) + 32;
291 
292 		if (n_cached_bits) {
293 			/* Flush out bits which remained from previous call */
294 			if (n_cached_bits == 2) {
295 				uint32_t w = cached_bits;
296 				uint32_t w2 = 0, w3 = 0;
297 				if (len) {
298 					w2 = *in++;
299 					len--;
300 				}
301 				if (len) {
302 					w3 = *in++;
303 					len--;
304 				}
305 				out = mb_convert_buf_add3(out, uuencode_six_bits((w << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
306 			} else {
307 				uint32_t w2 = cached_bits;
308 				uint32_t w3 = 0;
309 				if (len) {
310 					w3 = *in++;
311 					len--;
312 				}
313 				out = mb_convert_buf_add2(out, uuencode_six_bits((w2 << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
314 			}
315 			n_cached_bits = cached_bits = 0;
316 			goto possible_line_break;
317 		}
318 	}
319 
320 	while (len--) {
321 		uint32_t w = *in++;
322 		uint32_t w2 = 0, w3 = 0;
323 
324 		if (!len) {
325 			if (!end) {
326 				out = mb_convert_buf_add(out, uuencode_six_bits((w >> 2) & 0x3F));
327 				/* Cache 2 remaining bits from 'w' */
328 				cached_bits = w & 0x3;
329 				n_cached_bits = 2;
330 				break;
331 			}
332 		} else {
333 			w2 = *in++;
334 			len--;
335 		}
336 
337 		if (!len) {
338 			if (!end) {
339 				out = mb_convert_buf_add2(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)));
340 				/* Cache 4 remaining bits from 'w2' */
341 				cached_bits = w2 & 0xF;
342 				n_cached_bits = 4;
343 				break;
344 			}
345 		} else {
346 			w3 = *in++;
347 			len--;
348 		}
349 
350 		out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
351 
352 possible_line_break:
353 		bytes_encoded += 3;
354 
355 		if (bytes_encoded >= 45) {
356 			out = mb_convert_buf_add(out, '\n');
357 			if (len || !end) {
358 				out = mb_convert_buf_add(out, MIN(len, 45) + 32);
359 			}
360 			bytes_encoded = 0;
361 		}
362 	}
363 
364 	if (bytes_encoded && end) {
365 		out = mb_convert_buf_add(out, '\n');
366 	}
367 
368 	buf->state = ((cached_bits & 0xFF) << 16) | ((n_cached_bits & 0xFF) << 8) | ((bytes_encoded & 0x7F) << 1) | (buf->state & 1);
369 	MB_CONVERT_BUF_STORE(buf, out, limit);
370 }
371