xref: /openssl/test/bio_base64_test.c (revision 84393370)
1 /*
2  * Copyright 2024 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 #include <stdio.h>
10 #include <string.h>
11 #include <openssl/bio.h>
12 #include <openssl/evp.h>
13 #include <openssl/rand.h>
14 
15 #include "testutil.h"
16 
17 /* 2047 bytes of "#ooooooooo..." + NUL terminator */
18 static char gunk[2048];
19 
20 typedef struct {
21     char *prefix;
22     char *encoded;
23     unsigned bytes;
24     int trunc;
25     char *suffix;
26     int retry;
27     int no_nl;
28 } test_case;
29 
30 #define BUFMAX 0xa0000          /* Encode at most 640kB. */
31 #define sEOF "-EOF"             /* '-' as in PEM and MIME boundaries */
32 #define junk "#foo"             /* Skipped initial content */
33 
34 #define EOF_RETURN (-1729)      /* Distinct from -1, etc., internal results */
35 #define NLEN 6
36 #define NVAR 5
37 /*
38  * Junk suffixed variants don't make sense with padding or truncated groups
39  * because we will typically stop with an error before seeing the suffix, but
40  * with retriable BIOs may never look at the suffix after detecting padding.
41  */
42 #define NPAD 6
43 #define NVARPAD (NVAR * NPAD - NPAD + 1)
44 
45 static char *prefixes[NVAR] = { "", junk, gunk, "", "" };
46 static char *suffixes[NVAR] = { "", "", "", sEOF, junk };
47 static unsigned lengths[6] = { 0, 3, 48, 192, 768, 1536 };
48 static unsigned linelengths[] = {
49     4, 8, 16, 28, 40, 64, 80, 128, 256, 512, 1023, 0
50 };
51 static unsigned wscnts[] = { 0, 1, 2, 4, 8, 16, 0xFFFF };
52 
53 /* Generate `len` random octets */
genbytes(unsigned len)54 static unsigned char *genbytes(unsigned len)
55 {
56     unsigned char *buf = NULL;
57 
58     if (len > 0 && len <= BUFMAX && (buf = OPENSSL_malloc(len)) != NULL)
59         RAND_bytes(buf, len);
60 
61     return buf;
62 }
63 
64 /* Append one base64 codepoint, adding newlines after every `llen` bytes */
memout(BIO * mem,char c,int llen,int * pos)65 static int memout(BIO *mem, char c, int llen, int *pos)
66 {
67     if (BIO_write(mem, &c, 1) != 1)
68         return 0;
69     if (++*pos == llen) {
70         *pos = 0;
71         c = '\n';
72         if (BIO_write(mem, &c, 1) != 1)
73             return 0;
74     }
75     return 1;
76 }
77 
78 /* Encode and append one 6-bit slice, randomly prepending some whitespace */
memoutws(BIO * mem,char c,unsigned wscnt,unsigned llen,int * pos)79 static int memoutws(BIO *mem, char c, unsigned wscnt, unsigned llen, int *pos)
80 {
81     if (wscnt > 0
82         && (test_random() % llen) < wscnt
83         && memout(mem, ' ', llen, pos) == 0)
84         return 0;
85     return memout(mem, c, llen, pos);
86 }
87 
88 /*
89  * Encode an octet string in base64, approximately `llen` bytes per line,
90  * with up to roughly `wscnt` additional space characters inserted at random
91  * before some of the base64 code points.
92  */
encode(unsigned const char * buf,unsigned buflen,char * encoded,int trunc,unsigned llen,unsigned wscnt,BIO * mem)93 static int encode(unsigned const char *buf, unsigned buflen, char *encoded,
94                   int trunc, unsigned llen, unsigned wscnt, BIO *mem)
95 {
96     static const unsigned char b64[65] =
97         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
98     int pos = 0;
99     char nl = '\n';
100 
101     /* Use a verbatim encoding when provided */
102     if (encoded != NULL) {
103         int elen = strlen(encoded);
104 
105         return BIO_write(mem, encoded, elen) == elen;
106     }
107 
108     /* Encode full 3-octet groups */
109     while (buflen > 2) {
110         unsigned long v = buf[0] << 16 | buf[1] << 8 | buf[2];
111 
112         if (memoutws(mem, b64[v >> 18], wscnt, llen, &pos) == 0
113             || memoutws(mem, b64[(v >> 12) & 0x3f], wscnt, llen, &pos) == 0
114             || memoutws(mem, b64[(v >> 6) & 0x3f], wscnt, llen, &pos) == 0
115             || memoutws(mem, b64[v & 0x3f], wscnt, llen, &pos) == 0)
116             return 0;
117         buf += 3;
118         buflen -= 3;
119     }
120 
121     /* Encode and pad final 1 or 2 octet group */
122     if (buflen == 2) {
123         unsigned long v = buf[0] << 8 | buf[1];
124 
125         if (memoutws(mem, b64[(v >> 10) & 0x3f], wscnt, llen, &pos) == 0
126             || memoutws(mem, b64[(v >> 4) & 0x3f], wscnt, llen, &pos) == 0
127             || memoutws(mem, b64[(v & 0xf) << 2], wscnt, llen, &pos) == 0
128             || memoutws(mem, '=', wscnt, llen, &pos) == 0)
129             return 0;
130     } else if (buflen == 1) {
131         unsigned long v = buf[0];
132 
133         if (memoutws(mem, b64[v >> 2], wscnt, llen, &pos) == 0
134             || memoutws(mem, b64[(v & 0x3) << 4], wscnt, llen, &pos) == 0
135             || memoutws(mem, '=', wscnt, llen, &pos) == 0
136             || memoutws(mem, '=', wscnt, llen, &pos) == 0)
137             return 0;
138     }
139 
140     while (trunc-- > 0)
141         if (memoutws(mem, 'A', wscnt, llen, &pos) == 0)
142             return 0;
143 
144     /* Terminate last line */
145     if (pos > 0 && BIO_write(mem, &nl, 1) != 1)
146         return 0;
147 
148     return 1;
149 }
150 
genb64(char * prefix,char * suffix,unsigned const char * buf,unsigned buflen,int trunc,char * encoded,unsigned llen,unsigned wscnt,char ** out)151 static int genb64(char *prefix, char *suffix, unsigned const char *buf,
152                   unsigned buflen, int trunc, char *encoded, unsigned llen,
153                   unsigned wscnt, char **out)
154 {
155     int preflen = strlen(prefix);
156     int sufflen = strlen(suffix);
157     int outlen;
158     char newline = '\n';
159     BUF_MEM *bptr;
160     BIO *mem = BIO_new(BIO_s_mem());
161 
162     if (mem == NULL)
163         return -1;
164 
165     if ((*prefix && (BIO_write(mem, prefix, preflen) != preflen
166                      || BIO_write(mem, &newline, 1) != 1))
167         || encode(buf, buflen, encoded, trunc, llen, wscnt, mem) <= 0
168         || (*suffix && (BIO_write(mem, suffix, sufflen) != sufflen
169                         || BIO_write(mem, &newline, 1) != 1))) {
170         BIO_free(mem);
171         return -1;
172     }
173 
174     /* Orphan the memory BIO's data buffer */
175     BIO_get_mem_ptr(mem, &bptr);
176     *out = bptr->data;
177     outlen = bptr->length;
178     bptr->data = NULL;
179     (void) BIO_set_close(mem, BIO_NOCLOSE);
180     BIO_free(mem);
181     BUF_MEM_free(bptr);
182 
183     return outlen;
184 }
185 
test_bio_base64_run(test_case * t,int llen,int wscnt)186 static int test_bio_base64_run(test_case *t, int llen, int wscnt)
187 {
188     unsigned char *raw;
189     unsigned char *out;
190     unsigned out_len;
191     char *encoded = NULL;
192     int elen;
193     BIO *bio, *b64;
194     int n, n1, n2;
195     int ret;
196 
197     /*
198      * Pre-encoded data always encodes NUL octets.  If all we care about is the
199      * length, and not the payload, use random bytes.
200      */
201     if (t->encoded != NULL)
202         raw = OPENSSL_zalloc(t->bytes);
203     else
204         raw = genbytes(t->bytes);
205 
206     if (raw == NULL && t->bytes > 0) {
207         TEST_error("out of memory");
208         return -1;
209     }
210 
211     out_len = t->bytes + 1024;
212     out = OPENSSL_malloc(out_len);
213     if (out == NULL) {
214         OPENSSL_free(raw);
215         TEST_error("out of memory");
216         return -1;
217     }
218 
219     elen = genb64(t->prefix, t->suffix, raw, t->bytes, t->trunc, t->encoded,
220                   llen, wscnt, &encoded);
221     if (elen < 0 || (bio = BIO_new(BIO_s_mem())) == NULL) {
222         OPENSSL_free(raw);
223         OPENSSL_free(out);
224         OPENSSL_free(encoded);
225         TEST_error("out of memory");
226         return -1;
227     }
228     if (t->retry)
229         BIO_set_mem_eof_return(bio, EOF_RETURN);
230     else
231         BIO_set_mem_eof_return(bio, 0);
232 
233     /*
234      * When the input is long enough, and the source bio is retriable, exercise
235      * retries by writting the input to the underlying BIO in two steps (1024
236      * bytes, then the rest) and trying to decode some data after each write.
237      */
238     n1 = elen;
239     if (t->retry)
240         n1 = elen / 2;
241     if (n1 > 0)
242         BIO_write(bio, encoded, n1);
243 
244     b64 = BIO_new(BIO_f_base64());
245     if (t->no_nl)
246         BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
247     BIO_push(b64, bio);
248 
249     n = BIO_read(b64, out, out_len);
250 
251     if (n1 < elen) {
252         /* Append the rest of the input, and read again */
253         BIO_write(bio, encoded + n1, elen - n1);
254         if (n > 0) {
255             n2 = BIO_read(b64, out + n, out_len - n);
256             if (n2 > 0)
257                 n += n2;
258         } else if (n == EOF_RETURN) {
259             n = BIO_read(b64, out, out_len);
260         }
261     }
262 
263     /* Turn retry-related negative results to normal (0) EOF */
264     if (n < 0 && n == EOF_RETURN)
265         n = 0;
266 
267     /* Turn off retries */
268     if (t->retry)
269         BIO_set_mem_eof_return(bio, 0);
270 
271     if (n < (int) out_len)
272         /* Perform the last read, checking its result */
273         ret = BIO_read(b64, out + n, out_len - n);
274     else {
275         /* Should not happen, given extra space in out_len */
276         TEST_error("Unexpectedly long decode output");
277         ret = -1;
278     }
279 
280     /*
281      * Expect an error to be detected with:
282      *
283      * - truncated groups,
284      * - non-base64 suffixes (other than soft EOF) for non-empty or oneline
285      *   input
286      * - non-base64 prefixes in NO_NL mode
287      *
288      * Otherwise, check the decoded content
289      */
290     if (t->trunc > 0
291         || ((t->bytes > 0 || t->no_nl) && *t->suffix && *t->suffix != '-')
292         || (t->no_nl && *t->prefix)) {
293         if ((ret = ret < 0 ? 0 : -1) != 0)
294             TEST_error("Final read result was non-negative");
295     } else if (ret != 0
296              || n != (int) t->bytes
297              || (n > 0 && memcmp(raw, out, n) != 0)) {
298         TEST_error("Failed to decode expected data");
299         ret = -1;
300     }
301 
302     BIO_free_all(b64);
303     OPENSSL_free(out);
304     OPENSSL_free(raw);
305     OPENSSL_free(encoded);
306 
307     return ret;
308 }
309 
generic_case(test_case * t,int verbose)310 static int generic_case(test_case *t, int verbose)
311 {
312     unsigned *llen;
313     unsigned *wscnt;
314     int ok = 1;
315 
316     for (llen = linelengths; *llen > 0; ++llen) {
317         for (wscnt = wscnts; *wscnt * 2 < *llen; ++wscnt) {
318             int extra = t->no_nl ? 64 : 0;
319 
320             /*
321              * Use a longer line for NO_NL tests, in particular, eventually
322              * exceeding 1k bytes.
323              */
324             if (test_bio_base64_run(t, *llen + extra, *wscnt) != 0)
325                 ok = 0;
326 
327             if (verbose) {
328                 fprintf(stderr, "bio_base64_test: ok=%d", ok);
329                 if (*t->prefix)
330                     fprintf(stderr, ", prefix='%s'", t->prefix);
331                 if (t->encoded)
332                     fprintf(stderr, ", data='%s'", t->encoded);
333                 else
334                     fprintf(stderr, ", datalen=%u", t->bytes);
335                 if (t->trunc)
336                     fprintf(stderr, ", trunc=%d", t->trunc);
337                 if (*t->suffix)
338                     fprintf(stderr, ", suffix='%s'", t->suffix);
339                 fprintf(stderr, ", linelen=%u", *llen);
340                 fprintf(stderr, ", wscount=%u", *wscnt);
341                 if (t->retry)
342                     fprintf(stderr, ", retriable");
343                 if (t->no_nl)
344                     fprintf(stderr, ", oneline");
345                 fputc('\n', stderr);
346             }
347 
348             /* For verbatim input no effect from varying llen or wscnt */
349             if (t->encoded)
350                 return ok;
351         }
352         /*
353          * Longer 'llen' has no effect once we're sure to not have multiple
354          * lines of data
355          */
356         if (*llen > t->bytes + (t->bytes >> 1))
357             break;
358     }
359     return ok;
360 }
361 
quotrem(int i,unsigned int m,int * q)362 static int quotrem(int i, unsigned int m, int *q)
363 {
364     *q = i / m;
365     return i - *q * m;
366 }
367 
test_bio_base64_generated(int idx)368 static int test_bio_base64_generated(int idx)
369 {
370     test_case t;
371     int variant;
372     int lencase;
373     int padcase;
374     int q = idx;
375 
376     lencase = quotrem(q, NLEN, &q);
377     variant = quotrem(q, NVARPAD, &q);
378     padcase = quotrem(variant, NPAD, &variant);
379     t.retry = quotrem(q, 2, &q);
380     t.no_nl = quotrem(q, 2, &q);
381 
382     if (q != 0) {
383         fprintf(stderr, "Test index out of range: %d", idx);
384         return 0;
385     }
386 
387     t.prefix = prefixes[variant];
388     t.encoded = NULL;
389     t.bytes  = lengths[lencase];
390     t.trunc = 0;
391     if (padcase && padcase < 3)
392         t.bytes  += padcase;
393     else if (padcase >= 3)
394         t.trunc = padcase - 2;
395     t.suffix = suffixes[variant];
396 
397     if (padcase != 0 && (*t.suffix && *t.suffix != '-')) {
398         TEST_error("Unexpected suffix test after padding");
399         return 0;
400     }
401 
402     return generic_case(&t, 0);
403 }
404 
test_bio_base64_corner_case_bug(int idx)405 static int test_bio_base64_corner_case_bug(int idx)
406 {
407     test_case t;
408     int q = idx;
409 
410     t.retry = quotrem(q, 2, &q);
411     t.no_nl = quotrem(q, 2, &q);
412 
413     if (q != 0) {
414         fprintf(stderr, "Test index out of range: %d", idx);
415         return 0;
416     }
417 
418     /* 9 bytes of skipped non-base64 input + newline */
419     t.prefix = "#foo\n#bar";
420 
421     /* 9 bytes on 2nd and subsequent lines */
422     t.encoded = "A\nAAA\nAAAA\n";
423     t.suffix = "";
424 
425     /* Expected decode length */
426     t.bytes = 6;
427     t.trunc = 0;    /* ignored */
428 
429     return generic_case(&t, 0);
430 }
431 
setup_tests(void)432 int setup_tests(void)
433 {
434     int numidx;
435 
436     memset(gunk, 'o', sizeof(gunk));
437     gunk[0] = '#';
438     gunk[sizeof(gunk) - 1] = '\0';
439 
440     /*
441      * Test 5 variants of prefix or suffix
442      *
443      *  - both empty
444      *  - short junk prefix
445      *  - long gunk prefix (> internal BIO 1k buffer size),
446      *  - soft EOF suffix
447      *  - junk suffix (expect to detect an error)
448      *
449      * For 6 input lengths of randomly generated raw input:
450      *
451      *  0, 3, 48, 192, 768 and 1536
452      *
453      * corresponding to encoded lengths (plus linebreaks and ignored
454      * whitespace) of:
455      *
456      *  0, 4, 64, 256, 1024 and 2048
457      *
458      * Followed by zero, one or two additional bytes that may involve padding,
459      * or else (truncation) 1, 2 or 3 bytes with missing padding.
460      * Only the the first four variants make sense with padding or truncated
461      * groups.
462      *
463      * With two types of underlying BIO
464      *
465      *  - Non-retriable underlying BIO
466      *  - Retriable underlying BIO
467      *
468      * And with/without the BIO_FLAGS_BASE64_NO_NL flag, where now an error is
469      * expected with the junk and gunk prefixes, however, but the "soft EOF"
470      * suffix is still accepted.
471      *
472      * Internally, each test may loop over a range of encoded line lengths and
473      * whitespace average "densities".
474      */
475     numidx = NLEN * (NVAR * NPAD - NPAD + 1) * 2 * 2;
476     ADD_ALL_TESTS(test_bio_base64_generated, numidx);
477 
478     /*
479      * Corner case in original code that skips ignored input, when the ignored
480      * length is one byte longer than the total of the second and later lines
481      * of valid input in the first 1k bytes of input.  No content variants,
482      * just BIO retry status and oneline flags vary.
483      */
484     numidx = 2 * 2;
485     ADD_ALL_TESTS(test_bio_base64_corner_case_bug, numidx);
486 
487     return 1;
488 }
489