1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_uuencode.h"
32
33 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35
36 const mbfl_encoding mbfl_encoding_uuencode = {
37 mbfl_no_encoding_uuencode,
38 "UUENCODE",
39 "x-uuencode",
40 NULL,
41 NULL,
42 MBFL_ENCTYPE_SBCS,
43 NULL,
44 NULL,
45 mb_uuencode_to_wchar,
46 mb_wchar_to_uuencode,
47 NULL,
48 NULL,
49 };
50
51 const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
52 mbfl_no_encoding_uuencode,
53 mbfl_no_encoding_8bit,
54 mbfl_filt_conv_common_ctor,
55 NULL,
56 mbfl_filt_conv_uudec,
57 mbfl_filt_conv_common_flush,
58 NULL,
59 };
60
61 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
62
63 #define UUDEC(c) (char)(((c)-' ') & 077)
64 static const char *uuenc_begin_text = "begin ";
65 enum {
66 uudec_state_ground=0,
67 uudec_state_inbegin,
68 uudec_state_until_newline,
69 uudec_state_size,
70 uudec_state_a,
71 uudec_state_b,
72 uudec_state_c,
73 uudec_state_d,
74 uudec_state_skip_newline
75 };
76
mbfl_filt_conv_uudec(int c,mbfl_convert_filter * filter)77 int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
78 {
79 int n;
80
81 switch(filter->status) {
82 case uudec_state_ground:
83 /* looking for "begin 0666 filename\n" line */
84 if (filter->cache == 0 && c == 'b')
85 {
86 filter->status = uudec_state_inbegin;
87 filter->cache = 1; /* move to 'e' */
88 }
89 else if (c == '\n')
90 filter->cache = 0;
91 else
92 filter->cache++;
93 break;
94 case uudec_state_inbegin:
95 if (uuenc_begin_text[filter->cache++] != c) {
96 /* doesn't match pattern */
97 filter->status = uudec_state_ground;
98 break;
99 }
100 if (filter->cache == 5)
101 {
102 /* that's good enough - wait for a newline */
103 filter->status = uudec_state_until_newline;
104 filter->cache = 0;
105 }
106 break;
107 case uudec_state_until_newline:
108 if (c == '\n')
109 filter->status = uudec_state_size;
110 break;
111 case uudec_state_size:
112 /* get "size" byte */
113 n = UUDEC(c);
114 filter->cache = n << 24;
115 filter->status = uudec_state_a;
116 break;
117 case uudec_state_a:
118 /* get "a" byte */
119 n = UUDEC(c);
120 filter->cache |= (n << 16);
121 filter->status = uudec_state_b;
122 break;
123 case uudec_state_b:
124 /* get "b" byte */
125 n = UUDEC(c);
126 filter->cache |= (n << 8);
127 filter->status = uudec_state_c;
128 break;
129 case uudec_state_c:
130 /* get "c" byte */
131 n = UUDEC(c);
132 filter->cache |= n;
133 filter->status = uudec_state_d;
134 break;
135 case uudec_state_d:
136 /* get "d" byte */
137 {
138 int A, B, C, D = UUDEC(c);
139 A = (filter->cache >> 16) & 0xff;
140 B = (filter->cache >> 8) & 0xff;
141 C = (filter->cache) & 0xff;
142 n = (filter->cache >> 24) & 0xff;
143 if (n-- > 0)
144 CK((*filter->output_function)( (A << 2) | (B >> 4), filter->data));
145 if (n-- > 0)
146 CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
147 if (n-- > 0)
148 CK((*filter->output_function)( (C << 6) | D, filter->data));
149 if (n < 0)
150 n = 0;
151 filter->cache = n << 24;
152
153 if (n == 0)
154 filter->status = uudec_state_skip_newline; /* skip next byte (newline) */
155 else
156 filter->status = uudec_state_a; /* go back to fetch "A" byte */
157 }
158 break;
159 case uudec_state_skip_newline:
160 /* skip newline */
161 filter->status = uudec_state_size;
162 }
163 return 0;
164 }
165
166 /* Using mbstring to decode UUEncoded text is already deprecated
167 * However, to facilitate the move to the new, faster internal conversion interface,
168 * We will temporarily implement it for UUEncode */
169
mb_uuencode_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)170 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
171 {
172 ZEND_ASSERT(bufsize >= 3);
173
174 unsigned char *p = *in, *e = p + *in_len;
175 uint32_t *out = buf, *limit = buf + bufsize;
176
177 unsigned int _state = *state & 0xFF;
178 unsigned int size = *state >> 8;
179
180 while (p < e && (limit - out) >= 3) {
181 unsigned char c = *p++;
182
183 switch (_state) {
184 case uudec_state_ground:
185 if (c == 'b') {
186 if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) {
187 p += 5;
188 while (p < e && *p++ != '\n'); /* Consume everything up to newline */
189 _state = uudec_state_size;
190 }
191 /* We didn't find "begin " */
192 }
193 break;
194
195 case uudec_state_size:
196 size = UUDEC(c);
197 _state = uudec_state_a;
198 break;
199
200 case uudec_state_a:
201 if ((e - p) < 4) {
202 p = e;
203 break;
204 }
205
206 unsigned int a = UUDEC(c);
207 unsigned int b = UUDEC(*p++);
208 unsigned int c = UUDEC(*p++);
209 unsigned int d = UUDEC(*p++);
210
211 if (size > 0) {
212 *out++ = ((a << 2) | (b >> 4)) & 0xFF;
213 size--;
214 }
215 if (size > 0) {
216 *out++ = ((b << 4) | (c >> 2)) & 0xFF;
217 size--;
218 }
219 if (size > 0) {
220 *out++ = ((c << 6) | d) & 0xFF;
221 size--;
222 }
223
224 _state = size ? uudec_state_a : uudec_state_skip_newline;
225 break;
226
227 case uudec_state_skip_newline:
228 _state = uudec_state_size;
229 break;
230 }
231 }
232
233 *state = (size << 8) | _state;
234 *in_len = e - p;
235 *in = p;
236 return out - buf;
237 }
238
uuencode_six_bits(unsigned int bits)239 static unsigned char uuencode_six_bits(unsigned int bits)
240 {
241 if (bits == 0) {
242 return '`';
243 } else {
244 return bits + 32;
245 }
246 }
247
mb_wchar_to_uuencode(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)248 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
249 {
250 unsigned char *out, *limit;
251 MB_CONVERT_BUF_LOAD(buf, out, limit);
252 /* Every 3 bytes of input gets encoded as 4 bytes of output
253 * Additionally, we have a 'length' byte and a newline for each line of output
254 * (Maximum 45 input bytes can be encoded on a single output line)
255 * Make space for two more bytes in case we start close to where a line must end,
256 * and another two if there are cached bits remaining from the previous call */
257 MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 4);
258
259 unsigned int bytes_encoded = (buf->state >> 1) & 0x7F;
260 /* UUEncode naturally wants to process input bytes in groups of 3, but
261 * our buffer size may not be a multiple of 3
262 * So there may be data from the previous call which we need to flush out */
263 unsigned int n_cached_bits = (buf->state >> 8) & 0xFF;
264 unsigned int cached_bits = buf->state >> 16;
265
266 if (!buf->state) {
267 for (char *s = "begin 0644 filename\n"; *s; s++) {
268 out = mb_convert_buf_add(out, *s);
269 }
270 out = mb_convert_buf_add(out, MIN(len, 45) + 32);
271 buf->state |= 1;
272 } else if (!len && end && !bytes_encoded && !n_cached_bits) {
273 /* Corner case: under EXTREMELY rare circumstances, it's possible that the
274 * final call to this conversion function will happen with an empty input
275 * buffer, leaving an unwanted trailing len byte in the output buffer. */
276 buf->out--;
277 return;
278 } else {
279 /* UUEncode starts each line with a byte which indicates how many bytes
280 * are encoded on the line
281 * This can create a problem, since we receive the incoming data one buffer
282 * at a time, and there is no requirement that the buffers should be aligned
283 * with line boundaries
284 * So if a previous line was cut off, we need to go back and fix up
285 * the preceding len byte */
286 unsigned char *len_byte = out - (bytes_encoded * 4 / 3) - 1;
287 if (n_cached_bits) {
288 len_byte -= (n_cached_bits == 2) ? 1 : 2;
289 }
290 *len_byte = MIN(bytes_encoded + len + (n_cached_bits ? (n_cached_bits == 2 ? 1 : 2) : 0), 45) + 32;
291
292 if (n_cached_bits) {
293 /* Flush out bits which remained from previous call */
294 if (n_cached_bits == 2) {
295 uint32_t w = cached_bits;
296 uint32_t w2 = 0, w3 = 0;
297 if (len) {
298 w2 = *in++;
299 len--;
300 }
301 if (len) {
302 w3 = *in++;
303 len--;
304 }
305 out = mb_convert_buf_add3(out, uuencode_six_bits((w << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
306 } else {
307 uint32_t w2 = cached_bits;
308 uint32_t w3 = 0;
309 if (len) {
310 w3 = *in++;
311 len--;
312 }
313 out = mb_convert_buf_add2(out, uuencode_six_bits((w2 << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
314 }
315 n_cached_bits = cached_bits = 0;
316 goto possible_line_break;
317 }
318 }
319
320 while (len--) {
321 uint32_t w = *in++;
322 uint32_t w2 = 0, w3 = 0;
323
324 if (!len) {
325 if (!end) {
326 out = mb_convert_buf_add(out, uuencode_six_bits((w >> 2) & 0x3F));
327 /* Cache 2 remaining bits from 'w' */
328 cached_bits = w & 0x3;
329 n_cached_bits = 2;
330 break;
331 }
332 } else {
333 w2 = *in++;
334 len--;
335 }
336
337 if (!len) {
338 if (!end) {
339 out = mb_convert_buf_add2(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)));
340 /* Cache 4 remaining bits from 'w2' */
341 cached_bits = w2 & 0xF;
342 n_cached_bits = 4;
343 break;
344 }
345 } else {
346 w3 = *in++;
347 len--;
348 }
349
350 out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
351
352 possible_line_break:
353 bytes_encoded += 3;
354
355 if (bytes_encoded >= 45) {
356 out = mb_convert_buf_add(out, '\n');
357 if (len || !end) {
358 out = mb_convert_buf_add(out, MIN(len, 45) + 32);
359 }
360 bytes_encoded = 0;
361 }
362 }
363
364 if (bytes_encoded && end) {
365 out = mb_convert_buf_add(out, '\n');
366 }
367
368 buf->state = ((cached_bits & 0xFF) << 16) | ((n_cached_bits & 0xFF) << 8) | ((bytes_encoded & 0x7F) << 1) | (buf->state & 1);
369 MB_CONVERT_BUF_STORE(buf, out, limit);
370 }
371