1 /*
2 * Copyright 2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9 #include <stdio.h>
10 #include <string.h>
11 #include <openssl/bio.h>
12 #include <openssl/evp.h>
13 #include <openssl/rand.h>
14
15 #include "testutil.h"
16
17 /* 2047 bytes of "#ooooooooo..." + NUL terminator */
18 static char gunk[2048];
19
20 typedef struct {
21 char *prefix;
22 char *encoded;
23 unsigned bytes;
24 int trunc;
25 char *suffix;
26 int retry;
27 int no_nl;
28 } test_case;
29
30 #define BUFMAX 0xa0000 /* Encode at most 640kB. */
31 #define sEOF "-EOF" /* '-' as in PEM and MIME boundaries */
32 #define junk "#foo" /* Skipped initial content */
33
34 #define EOF_RETURN (-1729) /* Distinct from -1, etc., internal results */
35 #define NLEN 6
36 #define NVAR 5
37 /*
38 * Junk suffixed variants don't make sense with padding or truncated groups
39 * because we will typically stop with an error before seeing the suffix, but
40 * with retriable BIOs may never look at the suffix after detecting padding.
41 */
42 #define NPAD 6
43 #define NVARPAD (NVAR * NPAD - NPAD + 1)
44
45 static char *prefixes[NVAR] = { "", junk, gunk, "", "" };
46 static char *suffixes[NVAR] = { "", "", "", sEOF, junk };
47 static unsigned lengths[6] = { 0, 3, 48, 192, 768, 1536 };
48 static unsigned linelengths[] = {
49 4, 8, 16, 28, 40, 64, 80, 128, 256, 512, 1023, 0
50 };
51 static unsigned wscnts[] = { 0, 1, 2, 4, 8, 16, 0xFFFF };
52
53 /* Generate `len` random octets */
genbytes(unsigned len)54 static unsigned char *genbytes(unsigned len)
55 {
56 unsigned char *buf = NULL;
57
58 if (len > 0 && len <= BUFMAX && (buf = OPENSSL_malloc(len)) != NULL)
59 RAND_bytes(buf, len);
60
61 return buf;
62 }
63
64 /* Append one base64 codepoint, adding newlines after every `llen` bytes */
memout(BIO * mem,char c,int llen,int * pos)65 static int memout(BIO *mem, char c, int llen, int *pos)
66 {
67 if (BIO_write(mem, &c, 1) != 1)
68 return 0;
69 if (++*pos == llen) {
70 *pos = 0;
71 c = '\n';
72 if (BIO_write(mem, &c, 1) != 1)
73 return 0;
74 }
75 return 1;
76 }
77
78 /* Encode and append one 6-bit slice, randomly prepending some whitespace */
memoutws(BIO * mem,char c,unsigned wscnt,unsigned llen,int * pos)79 static int memoutws(BIO *mem, char c, unsigned wscnt, unsigned llen, int *pos)
80 {
81 if (wscnt > 0
82 && (test_random() % llen) < wscnt
83 && memout(mem, ' ', llen, pos) == 0)
84 return 0;
85 return memout(mem, c, llen, pos);
86 }
87
88 /*
89 * Encode an octet string in base64, approximately `llen` bytes per line,
90 * with up to roughly `wscnt` additional space characters inserted at random
91 * before some of the base64 code points.
92 */
encode(unsigned const char * buf,unsigned buflen,char * encoded,int trunc,unsigned llen,unsigned wscnt,BIO * mem)93 static int encode(unsigned const char *buf, unsigned buflen, char *encoded,
94 int trunc, unsigned llen, unsigned wscnt, BIO *mem)
95 {
96 static const unsigned char b64[65] =
97 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
98 int pos = 0;
99 char nl = '\n';
100
101 /* Use a verbatim encoding when provided */
102 if (encoded != NULL) {
103 int elen = strlen(encoded);
104
105 return BIO_write(mem, encoded, elen) == elen;
106 }
107
108 /* Encode full 3-octet groups */
109 while (buflen > 2) {
110 unsigned long v = buf[0] << 16 | buf[1] << 8 | buf[2];
111
112 if (memoutws(mem, b64[v >> 18], wscnt, llen, &pos) == 0
113 || memoutws(mem, b64[(v >> 12) & 0x3f], wscnt, llen, &pos) == 0
114 || memoutws(mem, b64[(v >> 6) & 0x3f], wscnt, llen, &pos) == 0
115 || memoutws(mem, b64[v & 0x3f], wscnt, llen, &pos) == 0)
116 return 0;
117 buf += 3;
118 buflen -= 3;
119 }
120
121 /* Encode and pad final 1 or 2 octet group */
122 if (buflen == 2) {
123 unsigned long v = buf[0] << 8 | buf[1];
124
125 if (memoutws(mem, b64[(v >> 10) & 0x3f], wscnt, llen, &pos) == 0
126 || memoutws(mem, b64[(v >> 4) & 0x3f], wscnt, llen, &pos) == 0
127 || memoutws(mem, b64[(v & 0xf) << 2], wscnt, llen, &pos) == 0
128 || memoutws(mem, '=', wscnt, llen, &pos) == 0)
129 return 0;
130 } else if (buflen == 1) {
131 unsigned long v = buf[0];
132
133 if (memoutws(mem, b64[v >> 2], wscnt, llen, &pos) == 0
134 || memoutws(mem, b64[(v & 0x3) << 4], wscnt, llen, &pos) == 0
135 || memoutws(mem, '=', wscnt, llen, &pos) == 0
136 || memoutws(mem, '=', wscnt, llen, &pos) == 0)
137 return 0;
138 }
139
140 while (trunc-- > 0)
141 if (memoutws(mem, 'A', wscnt, llen, &pos) == 0)
142 return 0;
143
144 /* Terminate last line */
145 if (pos > 0 && BIO_write(mem, &nl, 1) != 1)
146 return 0;
147
148 return 1;
149 }
150
genb64(char * prefix,char * suffix,unsigned const char * buf,unsigned buflen,int trunc,char * encoded,unsigned llen,unsigned wscnt,char ** out)151 static int genb64(char *prefix, char *suffix, unsigned const char *buf,
152 unsigned buflen, int trunc, char *encoded, unsigned llen,
153 unsigned wscnt, char **out)
154 {
155 int preflen = strlen(prefix);
156 int sufflen = strlen(suffix);
157 int outlen;
158 char newline = '\n';
159 BUF_MEM *bptr;
160 BIO *mem = BIO_new(BIO_s_mem());
161
162 if (mem == NULL)
163 return -1;
164
165 if ((*prefix && (BIO_write(mem, prefix, preflen) != preflen
166 || BIO_write(mem, &newline, 1) != 1))
167 || encode(buf, buflen, encoded, trunc, llen, wscnt, mem) <= 0
168 || (*suffix && (BIO_write(mem, suffix, sufflen) != sufflen
169 || BIO_write(mem, &newline, 1) != 1))) {
170 BIO_free(mem);
171 return -1;
172 }
173
174 /* Orphan the memory BIO's data buffer */
175 BIO_get_mem_ptr(mem, &bptr);
176 *out = bptr->data;
177 outlen = bptr->length;
178 bptr->data = NULL;
179 (void) BIO_set_close(mem, BIO_NOCLOSE);
180 BIO_free(mem);
181 BUF_MEM_free(bptr);
182
183 return outlen;
184 }
185
test_bio_base64_run(test_case * t,int llen,int wscnt)186 static int test_bio_base64_run(test_case *t, int llen, int wscnt)
187 {
188 unsigned char *raw;
189 unsigned char *out;
190 unsigned out_len;
191 char *encoded = NULL;
192 int elen;
193 BIO *bio, *b64;
194 int n, n1, n2;
195 int ret;
196
197 /*
198 * Pre-encoded data always encodes NUL octets. If all we care about is the
199 * length, and not the payload, use random bytes.
200 */
201 if (t->encoded != NULL)
202 raw = OPENSSL_zalloc(t->bytes);
203 else
204 raw = genbytes(t->bytes);
205
206 if (raw == NULL && t->bytes > 0) {
207 TEST_error("out of memory");
208 return -1;
209 }
210
211 out_len = t->bytes + 1024;
212 out = OPENSSL_malloc(out_len);
213 if (out == NULL) {
214 OPENSSL_free(raw);
215 TEST_error("out of memory");
216 return -1;
217 }
218
219 elen = genb64(t->prefix, t->suffix, raw, t->bytes, t->trunc, t->encoded,
220 llen, wscnt, &encoded);
221 if (elen < 0 || (bio = BIO_new(BIO_s_mem())) == NULL) {
222 OPENSSL_free(raw);
223 OPENSSL_free(out);
224 OPENSSL_free(encoded);
225 TEST_error("out of memory");
226 return -1;
227 }
228 if (t->retry)
229 BIO_set_mem_eof_return(bio, EOF_RETURN);
230 else
231 BIO_set_mem_eof_return(bio, 0);
232
233 /*
234 * When the input is long enough, and the source bio is retriable, exercise
235 * retries by writting the input to the underlying BIO in two steps (1024
236 * bytes, then the rest) and trying to decode some data after each write.
237 */
238 n1 = elen;
239 if (t->retry)
240 n1 = elen / 2;
241 if (n1 > 0)
242 BIO_write(bio, encoded, n1);
243
244 b64 = BIO_new(BIO_f_base64());
245 if (t->no_nl)
246 BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
247 BIO_push(b64, bio);
248
249 n = BIO_read(b64, out, out_len);
250
251 if (n1 < elen) {
252 /* Append the rest of the input, and read again */
253 BIO_write(bio, encoded + n1, elen - n1);
254 if (n > 0) {
255 n2 = BIO_read(b64, out + n, out_len - n);
256 if (n2 > 0)
257 n += n2;
258 } else if (n == EOF_RETURN) {
259 n = BIO_read(b64, out, out_len);
260 }
261 }
262
263 /* Turn retry-related negative results to normal (0) EOF */
264 if (n < 0 && n == EOF_RETURN)
265 n = 0;
266
267 /* Turn off retries */
268 if (t->retry)
269 BIO_set_mem_eof_return(bio, 0);
270
271 if (n < (int) out_len)
272 /* Perform the last read, checking its result */
273 ret = BIO_read(b64, out + n, out_len - n);
274 else {
275 /* Should not happen, given extra space in out_len */
276 TEST_error("Unexpectedly long decode output");
277 ret = -1;
278 }
279
280 /*
281 * Expect an error to be detected with:
282 *
283 * - truncated groups,
284 * - non-base64 suffixes (other than soft EOF) for non-empty or oneline
285 * input
286 * - non-base64 prefixes in NO_NL mode
287 *
288 * Otherwise, check the decoded content
289 */
290 if (t->trunc > 0
291 || ((t->bytes > 0 || t->no_nl) && *t->suffix && *t->suffix != '-')
292 || (t->no_nl && *t->prefix)) {
293 if ((ret = ret < 0 ? 0 : -1) != 0)
294 TEST_error("Final read result was non-negative");
295 } else if (ret != 0
296 || n != (int) t->bytes
297 || (n > 0 && memcmp(raw, out, n) != 0)) {
298 TEST_error("Failed to decode expected data");
299 ret = -1;
300 }
301
302 BIO_free_all(b64);
303 OPENSSL_free(out);
304 OPENSSL_free(raw);
305 OPENSSL_free(encoded);
306
307 return ret;
308 }
309
generic_case(test_case * t,int verbose)310 static int generic_case(test_case *t, int verbose)
311 {
312 unsigned *llen;
313 unsigned *wscnt;
314 int ok = 1;
315
316 for (llen = linelengths; *llen > 0; ++llen) {
317 for (wscnt = wscnts; *wscnt * 2 < *llen; ++wscnt) {
318 int extra = t->no_nl ? 64 : 0;
319
320 /*
321 * Use a longer line for NO_NL tests, in particular, eventually
322 * exceeding 1k bytes.
323 */
324 if (test_bio_base64_run(t, *llen + extra, *wscnt) != 0)
325 ok = 0;
326
327 if (verbose) {
328 fprintf(stderr, "bio_base64_test: ok=%d", ok);
329 if (*t->prefix)
330 fprintf(stderr, ", prefix='%s'", t->prefix);
331 if (t->encoded)
332 fprintf(stderr, ", data='%s'", t->encoded);
333 else
334 fprintf(stderr, ", datalen=%u", t->bytes);
335 if (t->trunc)
336 fprintf(stderr, ", trunc=%d", t->trunc);
337 if (*t->suffix)
338 fprintf(stderr, ", suffix='%s'", t->suffix);
339 fprintf(stderr, ", linelen=%u", *llen);
340 fprintf(stderr, ", wscount=%u", *wscnt);
341 if (t->retry)
342 fprintf(stderr, ", retriable");
343 if (t->no_nl)
344 fprintf(stderr, ", oneline");
345 fputc('\n', stderr);
346 }
347
348 /* For verbatim input no effect from varying llen or wscnt */
349 if (t->encoded)
350 return ok;
351 }
352 /*
353 * Longer 'llen' has no effect once we're sure to not have multiple
354 * lines of data
355 */
356 if (*llen > t->bytes + (t->bytes >> 1))
357 break;
358 }
359 return ok;
360 }
361
quotrem(int i,unsigned int m,int * q)362 static int quotrem(int i, unsigned int m, int *q)
363 {
364 *q = i / m;
365 return i - *q * m;
366 }
367
test_bio_base64_generated(int idx)368 static int test_bio_base64_generated(int idx)
369 {
370 test_case t;
371 int variant;
372 int lencase;
373 int padcase;
374 int q = idx;
375
376 lencase = quotrem(q, NLEN, &q);
377 variant = quotrem(q, NVARPAD, &q);
378 padcase = quotrem(variant, NPAD, &variant);
379 t.retry = quotrem(q, 2, &q);
380 t.no_nl = quotrem(q, 2, &q);
381
382 if (q != 0) {
383 fprintf(stderr, "Test index out of range: %d", idx);
384 return 0;
385 }
386
387 t.prefix = prefixes[variant];
388 t.encoded = NULL;
389 t.bytes = lengths[lencase];
390 t.trunc = 0;
391 if (padcase && padcase < 3)
392 t.bytes += padcase;
393 else if (padcase >= 3)
394 t.trunc = padcase - 2;
395 t.suffix = suffixes[variant];
396
397 if (padcase != 0 && (*t.suffix && *t.suffix != '-')) {
398 TEST_error("Unexpected suffix test after padding");
399 return 0;
400 }
401
402 return generic_case(&t, 0);
403 }
404
test_bio_base64_corner_case_bug(int idx)405 static int test_bio_base64_corner_case_bug(int idx)
406 {
407 test_case t;
408 int q = idx;
409
410 t.retry = quotrem(q, 2, &q);
411 t.no_nl = quotrem(q, 2, &q);
412
413 if (q != 0) {
414 fprintf(stderr, "Test index out of range: %d", idx);
415 return 0;
416 }
417
418 /* 9 bytes of skipped non-base64 input + newline */
419 t.prefix = "#foo\n#bar";
420
421 /* 9 bytes on 2nd and subsequent lines */
422 t.encoded = "A\nAAA\nAAAA\n";
423 t.suffix = "";
424
425 /* Expected decode length */
426 t.bytes = 6;
427 t.trunc = 0; /* ignored */
428
429 return generic_case(&t, 0);
430 }
431
setup_tests(void)432 int setup_tests(void)
433 {
434 int numidx;
435
436 memset(gunk, 'o', sizeof(gunk));
437 gunk[0] = '#';
438 gunk[sizeof(gunk) - 1] = '\0';
439
440 /*
441 * Test 5 variants of prefix or suffix
442 *
443 * - both empty
444 * - short junk prefix
445 * - long gunk prefix (> internal BIO 1k buffer size),
446 * - soft EOF suffix
447 * - junk suffix (expect to detect an error)
448 *
449 * For 6 input lengths of randomly generated raw input:
450 *
451 * 0, 3, 48, 192, 768 and 1536
452 *
453 * corresponding to encoded lengths (plus linebreaks and ignored
454 * whitespace) of:
455 *
456 * 0, 4, 64, 256, 1024 and 2048
457 *
458 * Followed by zero, one or two additional bytes that may involve padding,
459 * or else (truncation) 1, 2 or 3 bytes with missing padding.
460 * Only the the first four variants make sense with padding or truncated
461 * groups.
462 *
463 * With two types of underlying BIO
464 *
465 * - Non-retriable underlying BIO
466 * - Retriable underlying BIO
467 *
468 * And with/without the BIO_FLAGS_BASE64_NO_NL flag, where now an error is
469 * expected with the junk and gunk prefixes, however, but the "soft EOF"
470 * suffix is still accepted.
471 *
472 * Internally, each test may loop over a range of encoded line lengths and
473 * whitespace average "densities".
474 */
475 numidx = NLEN * (NVAR * NPAD - NPAD + 1) * 2 * 2;
476 ADD_ALL_TESTS(test_bio_base64_generated, numidx);
477
478 /*
479 * Corner case in original code that skips ignored input, when the ignored
480 * length is one byte longer than the total of the second and later lines
481 * of valid input in the first 1k bytes of input. No content variants,
482 * just BIO retry status and oneline flags vary.
483 */
484 numidx = 2 * 2;
485 ADD_ALL_TESTS(test_bio_base64_corner_case_bug, numidx);
486
487 return 1;
488 }
489