xref: /curl/lib/content_encoding.c (revision fb711b50)
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #include "urldata.h"
28 #include <curl/curl.h>
29 #include <stddef.h>
30 
31 #ifdef HAVE_LIBZ
32 #include <zlib.h>
33 #endif
34 
35 #ifdef HAVE_BROTLI
36 #if defined(__GNUC__) || defined(__clang__)
37 /* Ignore -Wvla warnings in brotli headers */
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wvla"
40 #endif
41 #include <brotli/decode.h>
42 #if defined(__GNUC__) || defined(__clang__)
43 #pragma GCC diagnostic pop
44 #endif
45 #endif
46 
47 #ifdef HAVE_ZSTD
48 #include <zstd.h>
49 #endif
50 
51 #include "sendf.h"
52 #include "http.h"
53 #include "content_encoding.h"
54 #include "strdup.h"
55 #include "strcase.h"
56 
57 /* The last 3 #include files should be in this order */
58 #include "curl_printf.h"
59 #include "curl_memory.h"
60 #include "memdebug.h"
61 
62 #define CONTENT_ENCODING_DEFAULT  "identity"
63 
64 #ifndef CURL_DISABLE_HTTP
65 
66 /* allow no more than 5 "chained" compression steps */
67 #define MAX_ENCODE_STACK 5
68 
69 #define DSIZ CURL_MAX_WRITE_SIZE /* buffer size for decompressed data */
70 
71 
72 #ifdef HAVE_LIBZ
73 
74 /* Comment this out if zlib is always going to be at least ver. 1.2.0.4
75    (doing so will reduce code size slightly). */
76 #define OLD_ZLIB_SUPPORT 1
77 
78 #define GZIP_MAGIC_0 0x1f
79 #define GZIP_MAGIC_1 0x8b
80 
81 /* gzip flag byte */
82 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
83 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
84 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
85 #define ORIG_NAME    0x08 /* bit 3 set: original filename present */
86 #define COMMENT      0x10 /* bit 4 set: file comment present */
87 #define RESERVED     0xE0 /* bits 5..7: reserved */
88 
89 typedef enum {
90   ZLIB_UNINIT,               /* uninitialized */
91   ZLIB_INIT,                 /* initialized */
92   ZLIB_INFLATING,            /* inflating started. */
93   ZLIB_EXTERNAL_TRAILER,     /* reading external trailer */
94   ZLIB_GZIP_HEADER,          /* reading gzip header */
95   ZLIB_GZIP_INFLATING,       /* inflating gzip stream */
96   ZLIB_INIT_GZIP             /* initialized in transparent gzip mode */
97 } zlibInitState;
98 
99 /* Deflate and gzip writer. */
100 struct zlib_writer {
101   struct Curl_cwriter super;
102   zlibInitState zlib_init;   /* zlib init state */
103   uInt trailerlen;           /* Remaining trailer byte count. */
104   z_stream z;                /* State structure for zlib. */
105 };
106 
107 
108 static voidpf
zalloc_cb(voidpf opaque,unsigned int items,unsigned int size)109 zalloc_cb(voidpf opaque, unsigned int items, unsigned int size)
110 {
111   (void) opaque;
112   /* not a typo, keep it calloc() */
113   return (voidpf) calloc(items, size);
114 }
115 
116 static void
zfree_cb(voidpf opaque,voidpf ptr)117 zfree_cb(voidpf opaque, voidpf ptr)
118 {
119   (void) opaque;
120   free(ptr);
121 }
122 
123 static CURLcode
process_zlib_error(struct Curl_easy * data,z_stream * z)124 process_zlib_error(struct Curl_easy *data, z_stream *z)
125 {
126   if(z->msg)
127     failf(data, "Error while processing content unencoding: %s",
128           z->msg);
129   else
130     failf(data, "Error while processing content unencoding: "
131           "Unknown failure within decompression software.");
132 
133   return CURLE_BAD_CONTENT_ENCODING;
134 }
135 
136 static CURLcode
exit_zlib(struct Curl_easy * data,z_stream * z,zlibInitState * zlib_init,CURLcode result)137 exit_zlib(struct Curl_easy *data,
138           z_stream *z, zlibInitState *zlib_init, CURLcode result)
139 {
140   if(*zlib_init == ZLIB_GZIP_HEADER)
141     Curl_safefree(z->next_in);
142 
143   if(*zlib_init != ZLIB_UNINIT) {
144     if(inflateEnd(z) != Z_OK && result == CURLE_OK)
145       result = process_zlib_error(data, z);
146     *zlib_init = ZLIB_UNINIT;
147   }
148 
149   return result;
150 }
151 
process_trailer(struct Curl_easy * data,struct zlib_writer * zp)152 static CURLcode process_trailer(struct Curl_easy *data,
153                                 struct zlib_writer *zp)
154 {
155   z_stream *z = &zp->z;
156   CURLcode result = CURLE_OK;
157   uInt len = z->avail_in < zp->trailerlen ? z->avail_in : zp->trailerlen;
158 
159   /* Consume expected trailer bytes. Terminate stream if exhausted.
160      Issue an error if unexpected bytes follow. */
161 
162   zp->trailerlen -= len;
163   z->avail_in -= len;
164   z->next_in += len;
165   if(z->avail_in)
166     result = CURLE_WRITE_ERROR;
167   if(result || !zp->trailerlen)
168     result = exit_zlib(data, z, &zp->zlib_init, result);
169   else {
170     /* Only occurs for gzip with zlib < 1.2.0.4 or raw deflate. */
171     zp->zlib_init = ZLIB_EXTERNAL_TRAILER;
172   }
173   return result;
174 }
175 
inflate_stream(struct Curl_easy * data,struct Curl_cwriter * writer,int type,zlibInitState started)176 static CURLcode inflate_stream(struct Curl_easy *data,
177                                struct Curl_cwriter *writer, int type,
178                                zlibInitState started)
179 {
180   struct zlib_writer *zp = (struct zlib_writer *) writer;
181   z_stream *z = &zp->z;         /* zlib state structure */
182   uInt nread = z->avail_in;
183   Bytef *orig_in = z->next_in;
184   bool done = FALSE;
185   CURLcode result = CURLE_OK;   /* Curl_client_write status */
186   char *decomp;                 /* Put the decompressed data here. */
187 
188   /* Check state. */
189   if(zp->zlib_init != ZLIB_INIT &&
190      zp->zlib_init != ZLIB_INFLATING &&
191      zp->zlib_init != ZLIB_INIT_GZIP &&
192      zp->zlib_init != ZLIB_GZIP_INFLATING)
193     return exit_zlib(data, z, &zp->zlib_init, CURLE_WRITE_ERROR);
194 
195   /* Dynamically allocate a buffer for decompression because it is uncommonly
196      large to hold on the stack */
197   decomp = malloc(DSIZ);
198   if(!decomp)
199     return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY);
200 
201   /* because the buffer size is fixed, iteratively decompress and transfer to
202      the client via next_write function. */
203   while(!done) {
204     int status;                   /* zlib status */
205     done = TRUE;
206 
207     /* (re)set buffer for decompressed output for every iteration */
208     z->next_out = (Bytef *) decomp;
209     z->avail_out = DSIZ;
210 
211 #ifdef Z_BLOCK
212     /* Z_BLOCK is only available in zlib ver. >= 1.2.0.5 */
213     status = inflate(z, Z_BLOCK);
214 #else
215     /* fallback for zlib ver. < 1.2.0.5 */
216     status = inflate(z, Z_SYNC_FLUSH);
217 #endif
218 
219     /* Flush output data if some. */
220     if(z->avail_out != DSIZ) {
221       if(status == Z_OK || status == Z_STREAM_END) {
222         zp->zlib_init = started;      /* Data started. */
223         result = Curl_cwriter_write(data, writer->next, type, decomp,
224                                      DSIZ - z->avail_out);
225         if(result) {
226           exit_zlib(data, z, &zp->zlib_init, result);
227           break;
228         }
229       }
230     }
231 
232     /* Dispatch by inflate() status. */
233     switch(status) {
234     case Z_OK:
235       /* Always loop: there may be unflushed latched data in zlib state. */
236       done = FALSE;
237       break;
238     case Z_BUF_ERROR:
239       /* No more data to flush: just exit loop. */
240       break;
241     case Z_STREAM_END:
242       result = process_trailer(data, zp);
243       break;
244     case Z_DATA_ERROR:
245       /* some servers seem to not generate zlib headers, so this is an attempt
246          to fix and continue anyway */
247       if(zp->zlib_init == ZLIB_INIT) {
248         /* Do not use inflateReset2(): only available since zlib 1.2.3.4. */
249         (void) inflateEnd(z);     /* do not care about the return code */
250         if(inflateInit2(z, -MAX_WBITS) == Z_OK) {
251           z->next_in = orig_in;
252           z->avail_in = nread;
253           zp->zlib_init = ZLIB_INFLATING;
254           zp->trailerlen = 4; /* Tolerate up to 4 unknown trailer bytes. */
255           done = FALSE;
256           break;
257         }
258         zp->zlib_init = ZLIB_UNINIT;    /* inflateEnd() already called. */
259       }
260       result = exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z));
261       break;
262     default:
263       result = exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z));
264       break;
265     }
266   }
267   free(decomp);
268 
269   /* We are about to leave this call so the `nread' data bytes will not be seen
270      again. If we are in a state that would wrongly allow restart in raw mode
271      at the next call, assume output has already started. */
272   if(nread && zp->zlib_init == ZLIB_INIT)
273     zp->zlib_init = started;      /* Cannot restart anymore. */
274 
275   return result;
276 }
277 
278 
279 /* Deflate handler. */
deflate_do_init(struct Curl_easy * data,struct Curl_cwriter * writer)280 static CURLcode deflate_do_init(struct Curl_easy *data,
281                                     struct Curl_cwriter *writer)
282 {
283   struct zlib_writer *zp = (struct zlib_writer *) writer;
284   z_stream *z = &zp->z;     /* zlib state structure */
285 
286   /* Initialize zlib */
287   z->zalloc = (alloc_func) zalloc_cb;
288   z->zfree = (free_func) zfree_cb;
289 
290   if(inflateInit(z) != Z_OK)
291     return process_zlib_error(data, z);
292   zp->zlib_init = ZLIB_INIT;
293   return CURLE_OK;
294 }
295 
deflate_do_write(struct Curl_easy * data,struct Curl_cwriter * writer,int type,const char * buf,size_t nbytes)296 static CURLcode deflate_do_write(struct Curl_easy *data,
297                                        struct Curl_cwriter *writer, int type,
298                                        const char *buf, size_t nbytes)
299 {
300   struct zlib_writer *zp = (struct zlib_writer *) writer;
301   z_stream *z = &zp->z;     /* zlib state structure */
302 
303   if(!(type & CLIENTWRITE_BODY) || !nbytes)
304     return Curl_cwriter_write(data, writer->next, type, buf, nbytes);
305 
306   /* Set the compressed input when this function is called */
307   z->next_in = (Bytef *) buf;
308   z->avail_in = (uInt) nbytes;
309 
310   if(zp->zlib_init == ZLIB_EXTERNAL_TRAILER)
311     return process_trailer(data, zp);
312 
313   /* Now uncompress the data */
314   return inflate_stream(data, writer, type, ZLIB_INFLATING);
315 }
316 
deflate_do_close(struct Curl_easy * data,struct Curl_cwriter * writer)317 static void deflate_do_close(struct Curl_easy *data,
318                                  struct Curl_cwriter *writer)
319 {
320   struct zlib_writer *zp = (struct zlib_writer *) writer;
321   z_stream *z = &zp->z;     /* zlib state structure */
322 
323   exit_zlib(data, z, &zp->zlib_init, CURLE_OK);
324 }
325 
326 static const struct Curl_cwtype deflate_encoding = {
327   "deflate",
328   NULL,
329   deflate_do_init,
330   deflate_do_write,
331   deflate_do_close,
332   sizeof(struct zlib_writer)
333 };
334 
335 
336 /* Gzip handler. */
gzip_do_init(struct Curl_easy * data,struct Curl_cwriter * writer)337 static CURLcode gzip_do_init(struct Curl_easy *data,
338                                  struct Curl_cwriter *writer)
339 {
340   struct zlib_writer *zp = (struct zlib_writer *) writer;
341   z_stream *z = &zp->z;     /* zlib state structure */
342 
343   /* Initialize zlib */
344   z->zalloc = (alloc_func) zalloc_cb;
345   z->zfree = (free_func) zfree_cb;
346 
347   if(strcmp(zlibVersion(), "1.2.0.4") >= 0) {
348     /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */
349     if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) {
350       return process_zlib_error(data, z);
351     }
352     zp->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */
353   }
354   else {
355     /* we must parse the gzip header and trailer ourselves */
356     if(inflateInit2(z, -MAX_WBITS) != Z_OK) {
357       return process_zlib_error(data, z);
358     }
359     zp->trailerlen = 8; /* A CRC-32 and a 32-bit input size (RFC 1952, 2.2) */
360     zp->zlib_init = ZLIB_INIT; /* Initial call state */
361   }
362 
363   return CURLE_OK;
364 }
365 
366 #ifdef OLD_ZLIB_SUPPORT
367 /* Skip over the gzip header */
368 typedef enum {
369   GZIP_OK,
370   GZIP_BAD,
371   GZIP_UNDERFLOW
372 } gzip_status;
373 
check_gzip_header(unsigned char const * data,ssize_t len,ssize_t * headerlen)374 static gzip_status check_gzip_header(unsigned char const *data, ssize_t len,
375                                      ssize_t *headerlen)
376 {
377   int method, flags;
378   const ssize_t totallen = len;
379 
380   /* The shortest header is 10 bytes */
381   if(len < 10)
382     return GZIP_UNDERFLOW;
383 
384   if((data[0] != GZIP_MAGIC_0) || (data[1] != GZIP_MAGIC_1))
385     return GZIP_BAD;
386 
387   method = data[2];
388   flags = data[3];
389 
390   if(method != Z_DEFLATED || (flags & RESERVED) != 0) {
391     /* cannot handle this compression method or unknown flag */
392     return GZIP_BAD;
393   }
394 
395   /* Skip over time, xflags, OS code and all previous bytes */
396   len -= 10;
397   data += 10;
398 
399   if(flags & EXTRA_FIELD) {
400     ssize_t extra_len;
401 
402     if(len < 2)
403       return GZIP_UNDERFLOW;
404 
405     extra_len = (data[1] << 8) | data[0];
406 
407     if(len < (extra_len + 2))
408       return GZIP_UNDERFLOW;
409 
410     len -= (extra_len + 2);
411     data += (extra_len + 2);
412   }
413 
414   if(flags & ORIG_NAME) {
415     /* Skip over NUL-terminated filename */
416     while(len && *data) {
417       --len;
418       ++data;
419     }
420     if(!len || *data)
421       return GZIP_UNDERFLOW;
422 
423     /* Skip over the NUL */
424     --len;
425     ++data;
426   }
427 
428   if(flags & COMMENT) {
429     /* Skip over NUL-terminated comment */
430     while(len && *data) {
431       --len;
432       ++data;
433     }
434     if(!len || *data)
435       return GZIP_UNDERFLOW;
436 
437     /* Skip over the NUL */
438     --len;
439   }
440 
441   if(flags & HEAD_CRC) {
442     if(len < 2)
443       return GZIP_UNDERFLOW;
444 
445     len -= 2;
446   }
447 
448   *headerlen = totallen - len;
449   return GZIP_OK;
450 }
451 #endif
452 
gzip_do_write(struct Curl_easy * data,struct Curl_cwriter * writer,int type,const char * buf,size_t nbytes)453 static CURLcode gzip_do_write(struct Curl_easy *data,
454                                     struct Curl_cwriter *writer, int type,
455                                     const char *buf, size_t nbytes)
456 {
457   struct zlib_writer *zp = (struct zlib_writer *) writer;
458   z_stream *z = &zp->z;     /* zlib state structure */
459 
460   if(!(type & CLIENTWRITE_BODY) || !nbytes)
461     return Curl_cwriter_write(data, writer->next, type, buf, nbytes);
462 
463   if(zp->zlib_init == ZLIB_INIT_GZIP) {
464     /* Let zlib handle the gzip decompression entirely */
465     z->next_in = (Bytef *) buf;
466     z->avail_in = (uInt) nbytes;
467     /* Now uncompress the data */
468     return inflate_stream(data, writer, type, ZLIB_INIT_GZIP);
469   }
470 
471 #ifndef OLD_ZLIB_SUPPORT
472   /* Support for old zlib versions is compiled away and we are running with
473      an old version, so return an error. */
474   return exit_zlib(data, z, &zp->zlib_init, CURLE_WRITE_ERROR);
475 
476 #else
477   /* This next mess is to get around the potential case where there is not
478    * enough data passed in to skip over the gzip header. If that happens, we
479    * malloc a block and copy what we have then wait for the next call. If
480    * there still is not enough (this is definitely a worst-case scenario), we
481    * make the block bigger, copy the next part in and keep waiting.
482    *
483    * This is only required with zlib versions < 1.2.0.4 as newer versions
484    * can handle the gzip header themselves.
485    */
486 
487   switch(zp->zlib_init) {
488   /* Skip over gzip header? */
489   case ZLIB_INIT:
490   {
491     /* Initial call state */
492     ssize_t hlen;
493 
494     switch(check_gzip_header((unsigned char *) buf, nbytes, &hlen)) {
495     case GZIP_OK:
496       z->next_in = (Bytef *) buf + hlen;
497       z->avail_in = (uInt) (nbytes - hlen);
498       zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */
499       break;
500 
501     case GZIP_UNDERFLOW:
502       /* We need more data so we can find the end of the gzip header. it is
503        * possible that the memory block we malloc here will never be freed if
504        * the transfer abruptly aborts after this point. Since it is unlikely
505        * that circumstances will be right for this code path to be followed in
506        * the first place, and it is even more unlikely for a transfer to fail
507        * immediately afterwards, it should seldom be a problem.
508        */
509       z->avail_in = (uInt) nbytes;
510       z->next_in = malloc(z->avail_in);
511       if(!z->next_in) {
512         return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY);
513       }
514       memcpy(z->next_in, buf, z->avail_in);
515       zp->zlib_init = ZLIB_GZIP_HEADER;  /* Need more gzip header data state */
516       /* We do not have any data to inflate yet */
517       return CURLE_OK;
518 
519     case GZIP_BAD:
520     default:
521       return exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z));
522     }
523 
524   }
525   break;
526 
527   case ZLIB_GZIP_HEADER:
528   {
529     /* Need more gzip header data state */
530     ssize_t hlen;
531     z->avail_in += (uInt) nbytes;
532     z->next_in = Curl_saferealloc(z->next_in, z->avail_in);
533     if(!z->next_in) {
534       return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY);
535     }
536     /* Append the new block of data to the previous one */
537     memcpy(z->next_in + z->avail_in - nbytes, buf, nbytes);
538 
539     switch(check_gzip_header(z->next_in, (ssize_t)z->avail_in, &hlen)) {
540     case GZIP_OK:
541       /* This is the zlib stream data */
542       free(z->next_in);
543       /* Do not point into the malloced block since we just freed it */
544       z->next_in = (Bytef *) buf + hlen + nbytes - z->avail_in;
545       z->avail_in = z->avail_in - (uInt)hlen;
546       zp->zlib_init = ZLIB_GZIP_INFLATING;   /* Inflating stream state */
547       break;
548 
549     case GZIP_UNDERFLOW:
550       /* We still do not have any data to inflate! */
551       return CURLE_OK;
552 
553     case GZIP_BAD:
554     default:
555       return exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z));
556     }
557 
558   }
559   break;
560 
561   case ZLIB_EXTERNAL_TRAILER:
562     z->next_in = (Bytef *) buf;
563     z->avail_in = (uInt) nbytes;
564     return process_trailer(data, zp);
565 
566   case ZLIB_GZIP_INFLATING:
567   default:
568     /* Inflating stream state */
569     z->next_in = (Bytef *) buf;
570     z->avail_in = (uInt) nbytes;
571     break;
572   }
573 
574   if(z->avail_in == 0) {
575     /* We do not have any data to inflate; wait until next time */
576     return CURLE_OK;
577   }
578 
579   /* We have parsed the header, now uncompress the data */
580   return inflate_stream(data, writer, type, ZLIB_GZIP_INFLATING);
581 #endif
582 }
583 
gzip_do_close(struct Curl_easy * data,struct Curl_cwriter * writer)584 static void gzip_do_close(struct Curl_easy *data,
585                               struct Curl_cwriter *writer)
586 {
587   struct zlib_writer *zp = (struct zlib_writer *) writer;
588   z_stream *z = &zp->z;     /* zlib state structure */
589 
590   exit_zlib(data, z, &zp->zlib_init, CURLE_OK);
591 }
592 
593 static const struct Curl_cwtype gzip_encoding = {
594   "gzip",
595   "x-gzip",
596   gzip_do_init,
597   gzip_do_write,
598   gzip_do_close,
599   sizeof(struct zlib_writer)
600 };
601 
602 #endif /* HAVE_LIBZ */
603 
604 
605 #ifdef HAVE_BROTLI
606 /* Brotli writer. */
607 struct brotli_writer {
608   struct Curl_cwriter super;
609   BrotliDecoderState *br;    /* State structure for brotli. */
610 };
611 
brotli_map_error(BrotliDecoderErrorCode be)612 static CURLcode brotli_map_error(BrotliDecoderErrorCode be)
613 {
614   switch(be) {
615   case BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE:
616   case BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE:
617   case BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET:
618   case BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME:
619   case BROTLI_DECODER_ERROR_FORMAT_CL_SPACE:
620   case BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE:
621   case BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT:
622   case BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1:
623   case BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2:
624   case BROTLI_DECODER_ERROR_FORMAT_TRANSFORM:
625   case BROTLI_DECODER_ERROR_FORMAT_DICTIONARY:
626   case BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS:
627   case BROTLI_DECODER_ERROR_FORMAT_PADDING_1:
628   case BROTLI_DECODER_ERROR_FORMAT_PADDING_2:
629 #ifdef BROTLI_DECODER_ERROR_COMPOUND_DICTIONARY
630   case BROTLI_DECODER_ERROR_COMPOUND_DICTIONARY:
631 #endif
632 #ifdef BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET
633   case BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET:
634 #endif
635   case BROTLI_DECODER_ERROR_INVALID_ARGUMENTS:
636     return CURLE_BAD_CONTENT_ENCODING;
637   case BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES:
638   case BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS:
639   case BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP:
640   case BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1:
641   case BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2:
642   case BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES:
643     return CURLE_OUT_OF_MEMORY;
644   default:
645     break;
646   }
647   return CURLE_WRITE_ERROR;
648 }
649 
brotli_do_init(struct Curl_easy * data,struct Curl_cwriter * writer)650 static CURLcode brotli_do_init(struct Curl_easy *data,
651                                    struct Curl_cwriter *writer)
652 {
653   struct brotli_writer *bp = (struct brotli_writer *) writer;
654   (void) data;
655 
656   bp->br = BrotliDecoderCreateInstance(NULL, NULL, NULL);
657   return bp->br ? CURLE_OK : CURLE_OUT_OF_MEMORY;
658 }
659 
brotli_do_write(struct Curl_easy * data,struct Curl_cwriter * writer,int type,const char * buf,size_t nbytes)660 static CURLcode brotli_do_write(struct Curl_easy *data,
661                                       struct Curl_cwriter *writer, int type,
662                                       const char *buf, size_t nbytes)
663 {
664   struct brotli_writer *bp = (struct brotli_writer *) writer;
665   const uint8_t *src = (const uint8_t *) buf;
666   char *decomp;
667   uint8_t *dst;
668   size_t dstleft;
669   CURLcode result = CURLE_OK;
670   BrotliDecoderResult r = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
671 
672   if(!(type & CLIENTWRITE_BODY) || !nbytes)
673     return Curl_cwriter_write(data, writer->next, type, buf, nbytes);
674 
675   if(!bp->br)
676     return CURLE_WRITE_ERROR;  /* Stream already ended. */
677 
678   decomp = malloc(DSIZ);
679   if(!decomp)
680     return CURLE_OUT_OF_MEMORY;
681 
682   while((nbytes || r == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) &&
683         result == CURLE_OK) {
684     dst = (uint8_t *) decomp;
685     dstleft = DSIZ;
686     r = BrotliDecoderDecompressStream(bp->br,
687                                       &nbytes, &src, &dstleft, &dst, NULL);
688     result = Curl_cwriter_write(data, writer->next, type,
689                                  decomp, DSIZ - dstleft);
690     if(result)
691       break;
692     switch(r) {
693     case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
694     case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
695       break;
696     case BROTLI_DECODER_RESULT_SUCCESS:
697       BrotliDecoderDestroyInstance(bp->br);
698       bp->br = NULL;
699       if(nbytes)
700         result = CURLE_WRITE_ERROR;
701       break;
702     default:
703       result = brotli_map_error(BrotliDecoderGetErrorCode(bp->br));
704       break;
705     }
706   }
707   free(decomp);
708   return result;
709 }
710 
brotli_do_close(struct Curl_easy * data,struct Curl_cwriter * writer)711 static void brotli_do_close(struct Curl_easy *data,
712                                 struct Curl_cwriter *writer)
713 {
714   struct brotli_writer *bp = (struct brotli_writer *) writer;
715 
716   (void) data;
717 
718   if(bp->br) {
719     BrotliDecoderDestroyInstance(bp->br);
720     bp->br = NULL;
721   }
722 }
723 
724 static const struct Curl_cwtype brotli_encoding = {
725   "br",
726   NULL,
727   brotli_do_init,
728   brotli_do_write,
729   brotli_do_close,
730   sizeof(struct brotli_writer)
731 };
732 #endif
733 
734 
735 #ifdef HAVE_ZSTD
736 /* Zstd writer. */
737 struct zstd_writer {
738   struct Curl_cwriter super;
739   ZSTD_DStream *zds;    /* State structure for zstd. */
740   void *decomp;
741 };
742 
zstd_do_init(struct Curl_easy * data,struct Curl_cwriter * writer)743 static CURLcode zstd_do_init(struct Curl_easy *data,
744                                  struct Curl_cwriter *writer)
745 {
746   struct zstd_writer *zp = (struct zstd_writer *) writer;
747 
748   (void)data;
749 
750   zp->zds = ZSTD_createDStream();
751   zp->decomp = NULL;
752   return zp->zds ? CURLE_OK : CURLE_OUT_OF_MEMORY;
753 }
754 
zstd_do_write(struct Curl_easy * data,struct Curl_cwriter * writer,int type,const char * buf,size_t nbytes)755 static CURLcode zstd_do_write(struct Curl_easy *data,
756                                     struct Curl_cwriter *writer, int type,
757                                     const char *buf, size_t nbytes)
758 {
759   CURLcode result = CURLE_OK;
760   struct zstd_writer *zp = (struct zstd_writer *) writer;
761   ZSTD_inBuffer in;
762   ZSTD_outBuffer out;
763   size_t errorCode;
764 
765   if(!(type & CLIENTWRITE_BODY) || !nbytes)
766     return Curl_cwriter_write(data, writer->next, type, buf, nbytes);
767 
768   if(!zp->decomp) {
769     zp->decomp = malloc(DSIZ);
770     if(!zp->decomp)
771       return CURLE_OUT_OF_MEMORY;
772   }
773   in.pos = 0;
774   in.src = buf;
775   in.size = nbytes;
776 
777   for(;;) {
778     out.pos = 0;
779     out.dst = zp->decomp;
780     out.size = DSIZ;
781 
782     errorCode = ZSTD_decompressStream(zp->zds, &out, &in);
783     if(ZSTD_isError(errorCode)) {
784       return CURLE_BAD_CONTENT_ENCODING;
785     }
786     if(out.pos > 0) {
787       result = Curl_cwriter_write(data, writer->next, type,
788                                    zp->decomp, out.pos);
789       if(result)
790         break;
791     }
792     if((in.pos == nbytes) && (out.pos < out.size))
793       break;
794   }
795 
796   return result;
797 }
798 
zstd_do_close(struct Curl_easy * data,struct Curl_cwriter * writer)799 static void zstd_do_close(struct Curl_easy *data,
800                               struct Curl_cwriter *writer)
801 {
802   struct zstd_writer *zp = (struct zstd_writer *) writer;
803 
804   (void)data;
805 
806   if(zp->decomp) {
807     free(zp->decomp);
808     zp->decomp = NULL;
809   }
810   if(zp->zds) {
811     ZSTD_freeDStream(zp->zds);
812     zp->zds = NULL;
813   }
814 }
815 
816 static const struct Curl_cwtype zstd_encoding = {
817   "zstd",
818   NULL,
819   zstd_do_init,
820   zstd_do_write,
821   zstd_do_close,
822   sizeof(struct zstd_writer)
823 };
824 #endif
825 
826 
827 /* Identity handler. */
828 static const struct Curl_cwtype identity_encoding = {
829   "identity",
830   "none",
831   Curl_cwriter_def_init,
832   Curl_cwriter_def_write,
833   Curl_cwriter_def_close,
834   sizeof(struct Curl_cwriter)
835 };
836 
837 
838 /* supported general content decoders. */
839 static const struct Curl_cwtype * const general_unencoders[] = {
840   &identity_encoding,
841 #ifdef HAVE_LIBZ
842   &deflate_encoding,
843   &gzip_encoding,
844 #endif
845 #ifdef HAVE_BROTLI
846   &brotli_encoding,
847 #endif
848 #ifdef HAVE_ZSTD
849   &zstd_encoding,
850 #endif
851   NULL
852 };
853 
854 /* supported content decoders only for transfer encodings */
855 static const struct Curl_cwtype * const transfer_unencoders[] = {
856 #ifndef CURL_DISABLE_HTTP
857   &Curl_httpchunk_unencoder,
858 #endif
859   NULL
860 };
861 
862 /* Provide a list of comma-separated names of supported encodings.
863 */
Curl_all_content_encodings(char * buf,size_t blen)864 void Curl_all_content_encodings(char *buf, size_t blen)
865 {
866   size_t len = 0;
867   const struct Curl_cwtype * const *cep;
868   const struct Curl_cwtype *ce;
869 
870   DEBUGASSERT(buf);
871   DEBUGASSERT(blen);
872   buf[0] = 0;
873 
874   for(cep = general_unencoders; *cep; cep++) {
875     ce = *cep;
876     if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT))
877       len += strlen(ce->name) + 2;
878   }
879 
880   if(!len) {
881     if(blen >= sizeof(CONTENT_ENCODING_DEFAULT))
882       strcpy(buf, CONTENT_ENCODING_DEFAULT);
883   }
884   else if(blen > len) {
885     char *p = buf;
886     for(cep = general_unencoders; *cep; cep++) {
887       ce = *cep;
888       if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) {
889         strcpy(p, ce->name);
890         p += strlen(p);
891         *p++ = ',';
892         *p++ = ' ';
893       }
894     }
895     p[-2] = '\0';
896   }
897 }
898 
899 /* Deferred error dummy writer. */
error_do_init(struct Curl_easy * data,struct Curl_cwriter * writer)900 static CURLcode error_do_init(struct Curl_easy *data,
901                                   struct Curl_cwriter *writer)
902 {
903   (void)data;
904   (void)writer;
905   return CURLE_OK;
906 }
907 
error_do_write(struct Curl_easy * data,struct Curl_cwriter * writer,int type,const char * buf,size_t nbytes)908 static CURLcode error_do_write(struct Curl_easy *data,
909                                      struct Curl_cwriter *writer, int type,
910                                      const char *buf, size_t nbytes)
911 {
912   (void) writer;
913   (void) buf;
914   (void) nbytes;
915 
916   if(!(type & CLIENTWRITE_BODY) || !nbytes)
917     return Curl_cwriter_write(data, writer->next, type, buf, nbytes);
918   else {
919     char all[256];
920     (void)Curl_all_content_encodings(all, sizeof(all));
921     failf(data, "Unrecognized content encoding type. "
922           "libcurl understands %s content encodings.", all);
923   }
924   return CURLE_BAD_CONTENT_ENCODING;
925 }
926 
error_do_close(struct Curl_easy * data,struct Curl_cwriter * writer)927 static void error_do_close(struct Curl_easy *data,
928                                struct Curl_cwriter *writer)
929 {
930   (void) data;
931   (void) writer;
932 }
933 
934 static const struct Curl_cwtype error_writer = {
935   "ce-error",
936   NULL,
937   error_do_init,
938   error_do_write,
939   error_do_close,
940   sizeof(struct Curl_cwriter)
941 };
942 
943 /* Find the content encoding by name. */
find_unencode_writer(const char * name,size_t len,Curl_cwriter_phase phase)944 static const struct Curl_cwtype *find_unencode_writer(const char *name,
945                                                       size_t len,
946                                                       Curl_cwriter_phase phase)
947 {
948   const struct Curl_cwtype * const *cep;
949 
950   if(phase == CURL_CW_TRANSFER_DECODE) {
951     for(cep = transfer_unencoders; *cep; cep++) {
952       const struct Curl_cwtype *ce = *cep;
953       if((strncasecompare(name, ce->name, len) && !ce->name[len]) ||
954          (ce->alias && strncasecompare(name, ce->alias, len)
955                     && !ce->alias[len]))
956         return ce;
957     }
958   }
959   /* look among the general decoders */
960   for(cep = general_unencoders; *cep; cep++) {
961     const struct Curl_cwtype *ce = *cep;
962     if((strncasecompare(name, ce->name, len) && !ce->name[len]) ||
963        (ce->alias && strncasecompare(name, ce->alias, len) && !ce->alias[len]))
964       return ce;
965   }
966   return NULL;
967 }
968 
969 /* Setup the unencoding stack from the Content-Encoding header value.
970  * See RFC 7231 section 3.1.2.2. */
Curl_build_unencoding_stack(struct Curl_easy * data,const char * enclist,int is_transfer)971 CURLcode Curl_build_unencoding_stack(struct Curl_easy *data,
972                                      const char *enclist, int is_transfer)
973 {
974   Curl_cwriter_phase phase = is_transfer ?
975     CURL_CW_TRANSFER_DECODE : CURL_CW_CONTENT_DECODE;
976   CURLcode result;
977 
978   do {
979     const char *name;
980     size_t namelen;
981     bool is_chunked = FALSE;
982 
983     /* Parse a single encoding name. */
984     while(ISBLANK(*enclist) || *enclist == ',')
985       enclist++;
986 
987     name = enclist;
988 
989     for(namelen = 0; *enclist && *enclist != ','; enclist++)
990       if(!ISSPACE(*enclist))
991         namelen = enclist - name + 1;
992 
993     if(namelen) {
994       const struct Curl_cwtype *cwt;
995       struct Curl_cwriter *writer;
996 
997       CURL_TRC_WRITE(data, "looking for %s decoder: %.*s",
998                      is_transfer ? "transfer" : "content", (int)namelen, name);
999       is_chunked = (is_transfer && (namelen == 7) &&
1000                     strncasecompare(name, "chunked", 7));
1001       /* if we skip the decoding in this phase, do not look further.
1002        * Exception is "chunked" transfer-encoding which always must happen */
1003       if((is_transfer && !data->set.http_transfer_encoding && !is_chunked) ||
1004          (!is_transfer && data->set.http_ce_skip)) {
1005         /* not requested, ignore */
1006         CURL_TRC_WRITE(data, "decoder not requested, ignored: %.*s",
1007                        (int)namelen, name);
1008         return CURLE_OK;
1009       }
1010 
1011       if(Curl_cwriter_count(data, phase) + 1 >= MAX_ENCODE_STACK) {
1012         failf(data, "Reject response due to more than %u content encodings",
1013               MAX_ENCODE_STACK);
1014         return CURLE_BAD_CONTENT_ENCODING;
1015       }
1016 
1017       cwt = find_unencode_writer(name, namelen, phase);
1018       if(cwt && is_chunked && Curl_cwriter_get_by_type(data, cwt)) {
1019         /* A 'chunked' transfer encoding has already been added.
1020          * Ignore duplicates. See #13451.
1021          * Also RFC 9112, ch. 6.1:
1022          * "A sender MUST NOT apply the chunked transfer coding more than
1023          *  once to a message body."
1024          */
1025         CURL_TRC_WRITE(data, "ignoring duplicate 'chunked' decoder");
1026         return CURLE_OK;
1027       }
1028 
1029       if(is_transfer && !is_chunked &&
1030          Curl_cwriter_get_by_name(data, "chunked")) {
1031         /* RFC 9112, ch. 6.1:
1032          * "If any transfer coding other than chunked is applied to a
1033          *  response's content, the sender MUST either apply chunked as the
1034          *  final transfer coding or terminate the message by closing the
1035          *  connection."
1036          * "chunked" must be the last added to be the first in its phase,
1037          *  reject this.
1038          */
1039         failf(data, "Reject response due to 'chunked' not being the last "
1040               "Transfer-Encoding");
1041         return CURLE_BAD_CONTENT_ENCODING;
1042       }
1043 
1044       if(!cwt)
1045         cwt = &error_writer;  /* Defer error at use. */
1046 
1047       result = Curl_cwriter_create(&writer, data, cwt, phase);
1048       CURL_TRC_WRITE(data, "added %s decoder %s -> %d",
1049                      is_transfer ? "transfer" : "content", cwt->name, result);
1050       if(result)
1051         return result;
1052 
1053       result = Curl_cwriter_add(data, writer);
1054       if(result) {
1055         Curl_cwriter_free(data, writer);
1056         return result;
1057       }
1058     }
1059   } while(*enclist);
1060 
1061   return CURLE_OK;
1062 }
1063 
1064 #else
1065 /* Stubs for builds without HTTP. */
Curl_build_unencoding_stack(struct Curl_easy * data,const char * enclist,int is_transfer)1066 CURLcode Curl_build_unencoding_stack(struct Curl_easy *data,
1067                                      const char *enclist, int is_transfer)
1068 {
1069   (void) data;
1070   (void) enclist;
1071   (void) is_transfer;
1072   return CURLE_NOT_BUILT_IN;
1073 }
1074 
Curl_all_content_encodings(char * buf,size_t blen)1075 void Curl_all_content_encodings(char *buf, size_t blen)
1076 {
1077   DEBUGASSERT(buf);
1078   DEBUGASSERT(blen);
1079   if(blen < sizeof(CONTENT_ENCODING_DEFAULT))
1080     buf[0] = 0;
1081   else
1082     strcpy(buf, CONTENT_ENCODING_DEFAULT);
1083 }
1084 
1085 
1086 #endif /* CURL_DISABLE_HTTP */
1087