1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_uuencode.h"
32
33 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
34 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
35
36 const mbfl_encoding mbfl_encoding_uuencode = {
37 mbfl_no_encoding_uuencode,
38 "UUENCODE",
39 "x-uuencode",
40 NULL,
41 NULL,
42 MBFL_ENCTYPE_SBCS,
43 NULL,
44 NULL,
45 mb_uuencode_to_wchar,
46 mb_wchar_to_uuencode,
47 NULL
48 };
49
50 const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
51 mbfl_no_encoding_uuencode,
52 mbfl_no_encoding_8bit,
53 mbfl_filt_conv_common_ctor,
54 NULL,
55 mbfl_filt_conv_uudec,
56 mbfl_filt_conv_common_flush,
57 NULL,
58 };
59
60 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
61
62 #define UUDEC(c) (char)(((c)-' ') & 077)
63 static const char *uuenc_begin_text = "begin ";
64 enum {
65 uudec_state_ground=0,
66 uudec_state_inbegin,
67 uudec_state_until_newline,
68 uudec_state_size,
69 uudec_state_a,
70 uudec_state_b,
71 uudec_state_c,
72 uudec_state_d,
73 uudec_state_skip_newline
74 };
75
mbfl_filt_conv_uudec(int c,mbfl_convert_filter * filter)76 int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
77 {
78 int n;
79
80 switch(filter->status) {
81 case uudec_state_ground:
82 /* looking for "begin 0666 filename\n" line */
83 if (filter->cache == 0 && c == 'b')
84 {
85 filter->status = uudec_state_inbegin;
86 filter->cache = 1; /* move to 'e' */
87 }
88 else if (c == '\n')
89 filter->cache = 0;
90 else
91 filter->cache++;
92 break;
93 case uudec_state_inbegin:
94 if (uuenc_begin_text[filter->cache++] != c) {
95 /* doesn't match pattern */
96 filter->status = uudec_state_ground;
97 break;
98 }
99 if (filter->cache == 5)
100 {
101 /* that's good enough - wait for a newline */
102 filter->status = uudec_state_until_newline;
103 filter->cache = 0;
104 }
105 break;
106 case uudec_state_until_newline:
107 if (c == '\n')
108 filter->status = uudec_state_size;
109 break;
110 case uudec_state_size:
111 /* get "size" byte */
112 n = UUDEC(c);
113 filter->cache = n << 24;
114 filter->status = uudec_state_a;
115 break;
116 case uudec_state_a:
117 /* get "a" byte */
118 n = UUDEC(c);
119 filter->cache |= (n << 16);
120 filter->status = uudec_state_b;
121 break;
122 case uudec_state_b:
123 /* get "b" byte */
124 n = UUDEC(c);
125 filter->cache |= (n << 8);
126 filter->status = uudec_state_c;
127 break;
128 case uudec_state_c:
129 /* get "c" byte */
130 n = UUDEC(c);
131 filter->cache |= n;
132 filter->status = uudec_state_d;
133 break;
134 case uudec_state_d:
135 /* get "d" byte */
136 {
137 int A, B, C, D = UUDEC(c);
138 A = (filter->cache >> 16) & 0xff;
139 B = (filter->cache >> 8) & 0xff;
140 C = (filter->cache) & 0xff;
141 n = (filter->cache >> 24) & 0xff;
142 if (n-- > 0)
143 CK((*filter->output_function)( (A << 2) | (B >> 4), filter->data));
144 if (n-- > 0)
145 CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
146 if (n-- > 0)
147 CK((*filter->output_function)( (C << 6) | D, filter->data));
148 if (n < 0)
149 n = 0;
150 filter->cache = n << 24;
151
152 if (n == 0)
153 filter->status = uudec_state_skip_newline; /* skip next byte (newline) */
154 else
155 filter->status = uudec_state_a; /* go back to fetch "A" byte */
156 }
157 break;
158 case uudec_state_skip_newline:
159 /* skip newline */
160 filter->status = uudec_state_size;
161 }
162 return 0;
163 }
164
165 /* Using mbstring to decode UUEncoded text is already deprecated
166 * However, to facilitate the move to the new, faster internal conversion interface,
167 * We will temporarily implement it for UUEncode */
168
mb_uuencode_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)169 static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
170 {
171 ZEND_ASSERT(bufsize >= 3);
172
173 unsigned char *p = *in, *e = p + *in_len;
174 uint32_t *out = buf, *limit = buf + bufsize;
175
176 unsigned int _state = *state & 0xFF;
177 unsigned int size = *state >> 8;
178
179 while (p < e && (limit - out) >= 3) {
180 unsigned char c = *p++;
181
182 switch (_state) {
183 case uudec_state_ground:
184 if (c == 'b') {
185 if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) {
186 p += 5;
187 while (p < e && *p++ != '\n'); /* Consume everything up to newline */
188 _state = uudec_state_size;
189 }
190 /* We didn't find "begin " */
191 }
192 break;
193
194 case uudec_state_size:
195 size = UUDEC(c);
196 _state = uudec_state_a;
197 break;
198
199 case uudec_state_a:
200 if ((e - p) < 4) {
201 p = e;
202 break;
203 }
204
205 unsigned int a = UUDEC(c);
206 unsigned int b = UUDEC(*p++);
207 unsigned int c = UUDEC(*p++);
208 unsigned int d = UUDEC(*p++);
209
210 if (size > 0) {
211 *out++ = ((a << 2) | (b >> 4)) & 0xFF;
212 size--;
213 }
214 if (size > 0) {
215 *out++ = ((b << 4) | (c >> 2)) & 0xFF;
216 size--;
217 }
218 if (size > 0) {
219 *out++ = ((c << 6) | d) & 0xFF;
220 size--;
221 }
222
223 _state = size ? uudec_state_a : uudec_state_skip_newline;
224 break;
225
226 case uudec_state_skip_newline:
227 _state = uudec_state_size;
228 break;
229 }
230 }
231
232 *state = (size << 8) | _state;
233 *in_len = e - p;
234 *in = p;
235 return out - buf;
236 }
237
uuencode_six_bits(unsigned int bits)238 static unsigned char uuencode_six_bits(unsigned int bits)
239 {
240 if (bits == 0) {
241 return '`';
242 } else {
243 return bits + 32;
244 }
245 }
246
mb_wchar_to_uuencode(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)247 static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
248 {
249 unsigned char *out, *limit;
250 MB_CONVERT_BUF_LOAD(buf, out, limit);
251 /* Every 3 bytes of input gets encoded as 4 bytes of output
252 * Additionally, we have a 'length' byte and a newline for each line of output
253 * (Maximum 45 input bytes can be encoded on a single output line)
254 * Make space for two more bytes in case we start close to where a line must end,
255 * and another two if there are cached bits remaining from the previous call */
256 MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 4);
257
258 unsigned int bytes_encoded = (buf->state >> 1) & 0x7F;
259 /* UUEncode naturally wants to process input bytes in groups of 3, but
260 * our buffer size may not be a multiple of 3
261 * So there may be data from the previous call which we need to flush out */
262 unsigned int n_cached_bits = (buf->state >> 8) & 0xFF;
263 unsigned int cached_bits = buf->state >> 16;
264
265 if (!buf->state) {
266 for (char *s = "begin 0644 filename\n"; *s; s++) {
267 out = mb_convert_buf_add(out, *s);
268 }
269 out = mb_convert_buf_add(out, MIN(len, 45) + 32);
270 buf->state |= 1;
271 } else if (!len && end && !bytes_encoded && !n_cached_bits) {
272 /* Corner case: under EXTREMELY rare circumstances, it's possible that the
273 * final call to this conversion function will happen with an empty input
274 * buffer, leaving an unwanted trailing len byte in the output buffer. */
275 buf->out--;
276 return;
277 } else {
278 /* UUEncode starts each line with a byte which indicates how many bytes
279 * are encoded on the line
280 * This can create a problem, since we receive the incoming data one buffer
281 * at a time, and there is no requirement that the buffers should be aligned
282 * with line boundaries
283 * So if a previous line was cut off, we need to go back and fix up
284 * the preceding len byte */
285 unsigned char *len_byte = out - (bytes_encoded * 4 / 3) - 1;
286 if (n_cached_bits) {
287 len_byte -= (n_cached_bits == 2) ? 1 : 2;
288 }
289 *len_byte = MIN(bytes_encoded + len + (n_cached_bits ? (n_cached_bits == 2 ? 1 : 2) : 0), 45) + 32;
290
291 if (n_cached_bits) {
292 /* Flush out bits which remained from previous call */
293 if (n_cached_bits == 2) {
294 uint32_t w = cached_bits;
295 uint32_t w2 = 0, w3 = 0;
296 if (len) {
297 w2 = *in++;
298 len--;
299 }
300 if (len) {
301 w3 = *in++;
302 len--;
303 }
304 out = mb_convert_buf_add3(out, uuencode_six_bits((w << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
305 } else {
306 uint32_t w2 = cached_bits;
307 uint32_t w3 = 0;
308 if (len) {
309 w3 = *in++;
310 len--;
311 }
312 out = mb_convert_buf_add2(out, uuencode_six_bits((w2 << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
313 }
314 n_cached_bits = cached_bits = 0;
315 goto possible_line_break;
316 }
317 }
318
319 while (len--) {
320 uint32_t w = *in++;
321 uint32_t w2 = 0, w3 = 0;
322
323 if (!len) {
324 if (!end) {
325 out = mb_convert_buf_add(out, uuencode_six_bits((w >> 2) & 0x3F));
326 /* Cache 2 remaining bits from 'w' */
327 cached_bits = w & 0x3;
328 n_cached_bits = 2;
329 break;
330 }
331 } else {
332 w2 = *in++;
333 len--;
334 }
335
336 if (!len) {
337 if (!end) {
338 out = mb_convert_buf_add2(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)));
339 /* Cache 4 remaining bits from 'w2' */
340 cached_bits = w2 & 0xF;
341 n_cached_bits = 4;
342 break;
343 }
344 } else {
345 w3 = *in++;
346 len--;
347 }
348
349 out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
350
351 possible_line_break:
352 bytes_encoded += 3;
353
354 if (bytes_encoded >= 45) {
355 out = mb_convert_buf_add(out, '\n');
356 if (len || !end) {
357 out = mb_convert_buf_add(out, MIN(len, 45) + 32);
358 }
359 bytes_encoded = 0;
360 }
361 }
362
363 if (bytes_encoded && end) {
364 out = mb_convert_buf_add(out, '\n');
365 }
366
367 buf->state = ((cached_bits & 0xFF) << 16) | ((n_cached_bits & 0xFF) << 8) | ((bytes_encoded & 0x7F) << 1) | (buf->state & 1);
368 MB_CONVERT_BUF_STORE(buf, out, limit);
369 }
370