1 /*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 /*
29 * compress routines:
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
34 */
35 #include "file.h"
36
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
39 #endif
40
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <string.h>
50 #include <errno.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <signal.h>
54 #ifndef HAVE_SIG_T
55 typedef void (*sig_t)(int);
56 #endif /* HAVE_SIG_T */
57 #ifdef HAVE_SYS_IOCTL_H
58 #include <sys/ioctl.h>
59 #endif
60 #ifdef HAVE_SYS_WAIT_H
61 #include <sys/wait.h>
62 #endif
63 #if defined(HAVE_SYS_TIME_H)
64 #include <sys/time.h>
65 #endif
66 #if defined(HAVE_ZLIB_H) && defined(PHP_FILEINFO_UNCOMPRESS)
67 #define BUILTIN_DECOMPRESS
68 #include <zlib.h>
69 #endif
70
71 #undef FIONREAD
72
73 #if defined(PHP_FILEINFO_UNCOMPRESS)
74 #define BUILTIN_BZLIB
75 #include <bzlib.h>
76 #endif
77
78 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
79 #define BUILTIN_XZLIB
80 #include <lzma.h>
81 #endif
82
83 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
84 #define BUILTIN_ZSTDLIB
85 #include <zstd.h>
86 #include <zstd_errors.h>
87 #endif
88
89 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
90 #define BUILTIN_LZLIB
91 #include <lzlib.h>
92 #endif
93
94 #ifdef DEBUG
95 int tty = -1;
96 #define DPRINTF(...) do { \
97 if (tty == -1) \
98 tty = open("/dev/tty", O_RDWR); \
99 if (tty == -1) \
100 abort(); \
101 dprintf(tty, __VA_ARGS__); \
102 } while (/*CONSTCOND*/0)
103 #else
104 #define DPRINTF(...)
105 #endif
106
107 #ifdef ZLIBSUPPORT
108 /*
109 * The following python code is not really used because ZLIBSUPPORT is only
110 * defined if we have a built-in zlib, and the built-in zlib handles that.
111 * That is not true for android where we have zlib.h and not -lz.
112 */
113 static const char zlibcode[] =
114 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
115
116 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
117
118 static int
zlibcmp(const unsigned char * buf)119 zlibcmp(const unsigned char *buf)
120 {
121 unsigned short x = 1;
122 unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
123
124 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
125 return 0;
126 if (s[0] != 1) /* endianness test */
127 x = buf[0] | (buf[1] << 8);
128 else
129 x = buf[1] | (buf[0] << 8);
130 if (x % 31)
131 return 0;
132 return 1;
133 }
134 #endif
135
136 #ifdef PHP_FILEINFO_UNCOMPRESS
137
138 static int
lzmacmp(const unsigned char * buf)139 lzmacmp(const unsigned char *buf)
140 {
141 if (buf[0] != 0x5d || buf[1] || buf[2])
142 return 0;
143 if (buf[12] && buf[12] != 0xff)
144 return 0;
145 return 1;
146 }
147
148 #define gzip_flags "-cd"
149 #define lzip_flags gzip_flags
150
151 static const char *gzip_args[] = {
152 "gzip", gzip_flags, NULL
153 };
154 static const char *uncompress_args[] = {
155 "uncompress", "-c", NULL
156 };
157 static const char *bzip2_args[] = {
158 "bzip2", "-cd", NULL
159 };
160 static const char *lzip_args[] = {
161 "lzip", lzip_flags, NULL
162 };
163 static const char *xz_args[] = {
164 "xz", "-cd", NULL
165 };
166 static const char *lrzip_args[] = {
167 "lrzip", "-qdf", "-", NULL
168 };
169 static const char *lz4_args[] = {
170 "lz4", "-cd", NULL
171 };
172 static const char *zstd_args[] = {
173 "zstd", "-cd", NULL
174 };
175
176 #define do_zlib NULL
177 #define do_bzlib NULL
178
179 file_private const struct {
180 union {
181 const char *magic;
182 int (*func)(const unsigned char *);
183 } u;
184 int maglen;
185 const char **argv;
186 void *unused;
187 } compr[] = {
188 #define METH_FROZEN 2
189 #define METH_BZIP 7
190 #define METH_XZ 9
191 #define METH_LZIP 8
192 #define METH_ZSTD 12
193 #define METH_LZMA 13
194 #define METH_ZLIB 14
195 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
196 /* Uncompress can get stuck; so use gzip first if we have it
197 * Idea from Damien Clark, thanks! */
198 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
199 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
200 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
201 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
202 /* the standard pack utilities do not accept standard input */
203 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
204 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
205 /* ...only first file examined */
206 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
207 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
208 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
209 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
210 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
211 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
212 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
213 #ifdef ZLIBSUPPORT
214 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
215 #endif
216 };
217
218 #define OKDATA 0
219 #define NODATA 1
220 #define ERRDATA 2
221
222 file_private ssize_t swrite(int, const void *, size_t);
223 #if HAVE_FORK
224 file_private size_t ncompr = __arraycount(compr);
225 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
226 unsigned char **, size_t *);
227 #ifdef BUILTIN_DECOMPRESS
228 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
229 size_t *, int);
230 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
231 size_t *, int);
232 #endif
233 #ifdef BUILTIN_BZLIB
234 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
235 size_t *, int);
236 #endif
237 #ifdef BUILTIN_XZLIB
238 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
239 size_t *, int);
240 #endif
241 #ifdef BUILTIN_ZSTDLIB
242 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
243 size_t *, int);
244 #endif
245 #ifdef BUILTIN_LZLIB
246 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
247 size_t *, int);
248 #endif
249
250 static int makeerror(unsigned char **, size_t *, const char *, ...)
251 __attribute__((__format__(__printf__, 3, 4)));
252 file_private const char *methodname(size_t);
253
254 file_private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)255 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
256 {
257 unsigned char *p;
258 int mime = ms->flags & MAGIC_MIME;
259
260 if (!mime)
261 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
262
263 for (p = buf; *p; p++)
264 if (!isalnum(*p))
265 *p = '-';
266
267 return file_printf(ms, "application/x-decompression-error-%s-%s",
268 methodname(i), buf);
269 }
270
271 file_protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)272 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
273 {
274 unsigned char *newbuf = NULL;
275 size_t i, nsz;
276 char *rbuf;
277 file_pushbuf_t *pb;
278 int urv, prv, rv = 0;
279 int mime = ms->flags & MAGIC_MIME;
280 int fd = b->fd;
281 const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
282 size_t nbytes = b->flen;
283 int sa_saved = 0;
284 struct sigaction sig_act;
285
286 if ((ms->flags & MAGIC_COMPRESS) == 0)
287 return 0;
288
289 for (i = 0; i < ncompr; i++) {
290 int zm;
291 if (nbytes < CAST(size_t, abs(compr[i].maglen)))
292 continue;
293 if (compr[i].maglen < 0) {
294 zm = (*compr[i].u.func)(buf);
295 } else {
296 zm = memcmp(buf, compr[i].u.magic,
297 CAST(size_t, compr[i].maglen)) == 0;
298 }
299
300 if (!zm)
301 continue;
302
303 /* Prevent SIGPIPE death if child dies unexpectedly */
304 if (!sa_saved) {
305 //We can use sig_act for both new and old, but
306 struct sigaction new_act;
307 memset(&new_act, 0, sizeof(new_act));
308 new_act.sa_handler = SIG_IGN;
309 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
310 }
311
312 nsz = nbytes;
313 efree(newbuf);
314 urv = uncompressbuf(fd, ms->bytes_max, i,
315 (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
316 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
317 (char *)newbuf, nsz);
318 switch (urv) {
319 case OKDATA:
320 case ERRDATA:
321 ms->flags &= ~MAGIC_COMPRESS;
322 if (urv == ERRDATA)
323 prv = format_decompression_error(ms, i, newbuf);
324 else
325 prv = file_buffer(ms, NULL, NULL, name, newbuf,
326 nsz);
327 if (prv == -1)
328 goto error;
329 rv = 1;
330 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
331 goto out;
332 if (mime != MAGIC_MIME && mime != 0)
333 goto out;
334 if ((file_printf(ms,
335 mime ? " compressed-encoding=" : " (")) == -1)
336 goto error;
337 if ((pb = file_push_buffer(ms)) == NULL)
338 goto error;
339 /*
340 * XXX: If file_buffer fails here, we overwrite
341 * the compressed text. FIXME.
342 */
343 if (file_buffer(ms, NULL, NULL, NULL, buf, nbytes) == -1)
344 {
345 if (file_pop_buffer(ms, pb) != NULL)
346 abort();
347 goto error;
348 }
349 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
350 if (file_printf(ms, "%s", rbuf) == -1) {
351 efree(rbuf);
352 goto error;
353 }
354 efree(rbuf);
355 }
356 if (!mime && file_printf(ms, ")") == -1)
357 goto error;
358 /*FALLTHROUGH*/
359 case NODATA:
360 break;
361 default:
362 abort();
363 /*NOTREACHED*/
364 error:
365 rv = -1;
366 break;
367 }
368 }
369 out:
370 DPRINTF("rv = %d\n", rv);
371
372 if (sa_saved && sig_act.sa_handler != SIG_IGN)
373 (void)sigaction(SIGPIPE, &sig_act, NULL);
374
375 if (newbuf)
376 efree(newbuf);
377 ms->flags |= MAGIC_COMPRESS;
378 DPRINTF("Zmagic returns %d\n", rv);
379 return rv;
380 }
381 #endif
382 /*
383 * `safe' write for sockets and pipes.
384 */
385 file_private ssize_t
swrite(int fd,const void * buf,size_t n)386 swrite(int fd, const void *buf, size_t n)
387 {
388 ssize_t rv;
389 size_t rn = n;
390
391 do
392 switch (rv = write(fd, buf, n)) {
393 case -1:
394 if (errno == EINTR)
395 continue;
396 return -1;
397 default:
398 n -= rv;
399 buf = CAST(const char *, buf) + rv;
400 break;
401 }
402 while (n > 0);
403 return rn;
404 }
405
406
407 /*
408 * `safe' read for sockets and pipes.
409 */
410 file_protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)411 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
412 {
413 ssize_t rv;
414 #if defined(FIONREAD) && !defined(__MINGW32__)
415 int t = 0;
416 #endif
417 size_t rn = n;
418
419 if (fd == STDIN_FILENO)
420 goto nocheck;
421
422 #if defined(FIONREAD) && !defined(__MINGW32__)
423 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
424 #ifdef FD_ZERO
425 ssize_t cnt;
426 for (cnt = 0;; cnt++) {
427 fd_set check;
428 struct timeval tout = {0, 100 * 1000};
429 int selrv;
430
431 FD_ZERO(&check);
432 FD_SET(fd, &check);
433
434 /*
435 * Avoid soft deadlock: do not read if there
436 * is nothing to read from sockets and pipes.
437 */
438 selrv = select(fd + 1, &check, NULL, NULL, &tout);
439 if (selrv == -1) {
440 if (errno == EINTR || errno == EAGAIN)
441 continue;
442 } else if (selrv == 0 && cnt >= 5) {
443 return 0;
444 } else
445 break;
446 }
447 #endif
448 (void)ioctl(fd, FIONREAD, &t);
449 }
450
451 if (t > 0 && CAST(size_t, t) < n) {
452 n = t;
453 rn = n;
454 }
455 #endif
456
457 nocheck:
458 do
459 switch ((rv = FINFO_READ_FUNC(fd, buf, n))) {
460 case -1:
461 if (errno == EINTR)
462 continue;
463 return -1;
464 case 0:
465 return rn - n;
466 default:
467 n -= rv;
468 buf = CAST(char *, CCAST(void *, buf)) + rv;
469 break;
470 }
471 while (n > 0);
472 return rn;
473 }
474
475 file_protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)476 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
477 size_t nbytes)
478 {
479 char buf[4096];
480 ssize_t r;
481 int tfd;
482
483 #ifdef WIN32
484 const char *t;
485 buf[0] = '\0';
486 if ((t = getenv("TEMP")) != NULL)
487 (void)strlcpy(buf, t, sizeof(buf));
488 else if ((t = getenv("TMP")) != NULL)
489 (void)strlcpy(buf, t, sizeof(buf));
490 else if ((t = getenv("TMPDIR")) != NULL)
491 (void)strlcpy(buf, t, sizeof(buf));
492 if (buf[0] != '\0')
493 (void)strlcat(buf, "/", sizeof(buf));
494 (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
495 #else
496 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
497 #endif
498 #ifndef HAVE_MKSTEMP
499 {
500 char *ptr = mktemp(buf);
501 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
502 r = errno;
503 (void)unlink(ptr);
504 errno = r;
505 }
506 #else
507 {
508 int te;
509 mode_t ou = umask(0);
510 tfd = mkstemp(buf);
511 (void)umask(ou);
512 te = errno;
513 (void)unlink(buf);
514 errno = te;
515 }
516 #endif
517 if (tfd == -1) {
518 file_error(ms, errno,
519 "cannot create temporary file for pipe copy");
520 return -1;
521 }
522
523 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
524 r = 1;
525 else {
526 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
527 if (swrite(tfd, buf, CAST(size_t, r)) != r)
528 break;
529 }
530
531 switch (r) {
532 case -1:
533 file_error(ms, errno, "error copying from pipe to temp file");
534 return -1;
535 case 0:
536 break;
537 default:
538 file_error(ms, errno, "error while writing to temp file");
539 return -1;
540 }
541
542 /*
543 * We duplicate the file descriptor, because fclose on a
544 * tmpfile will delete the file, but any open descriptors
545 * can still access the phantom inode.
546 */
547 if ((fd = dup2(tfd, fd)) == -1) {
548 file_error(ms, errno, "could not dup descriptor for temp file");
549 return -1;
550 }
551 (void)close(tfd);
552 if (FINFO_LSEEK_FUNC(fd, (zend_off_t)0, SEEK_SET) == (zend_off_t)-1) {
553 file_badseek(ms);
554 return -1;
555 }
556 return fd;
557 }
558 #ifdef PHP_FILEINFO_UNCOMPRESS
559 #ifdef BUILTIN_DECOMPRESS
560
561 #define FHCRC (1 << 1)
562 #define FEXTRA (1 << 2)
563 #define FNAME (1 << 3)
564 #define FCOMMENT (1 << 4)
565
566
567 file_private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)568 uncompressgzipped(const unsigned char *old, unsigned char **newch,
569 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
570 {
571 unsigned char flg;
572 size_t data_start = 10;
573
574 if (*n < 4) {
575 goto err;
576 }
577
578 flg = old[3];
579
580 if (flg & FEXTRA) {
581 if (data_start + 1 >= *n)
582 goto err;
583 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
584 }
585 if (flg & FNAME) {
586 while(data_start < *n && old[data_start])
587 data_start++;
588 data_start++;
589 }
590 if (flg & FCOMMENT) {
591 while(data_start < *n && old[data_start])
592 data_start++;
593 data_start++;
594 }
595 if (flg & FHCRC)
596 data_start += 2;
597
598 if (data_start >= *n)
599 goto err;
600
601 *n -= data_start;
602 old += data_start;
603 return uncompresszlib(old, newch, bytes_max, n, 0);
604 err:
605 return makeerror(newch, n, "File too short");
606 }
607
608 file_private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)609 uncompresszlib(const unsigned char *old, unsigned char **newch,
610 size_t bytes_max, size_t *n, int zlib)
611 {
612 int rc;
613 z_stream z;
614
615 DPRINTF("builtin zlib decompression\n");
616 z.next_in = CCAST(Bytef *, old);
617 z.avail_in = CAST(uint32_t, *n);
618 z.next_out = *newch;
619 z.avail_out = CAST(unsigned int, bytes_max);
620 z.zalloc = Z_NULL;
621 z.zfree = Z_NULL;
622 z.opaque = Z_NULL;
623
624 /* LINTED bug in header macro */
625 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
626 if (rc != Z_OK)
627 goto err;
628
629 rc = inflate(&z, Z_SYNC_FLUSH);
630 if (rc != Z_OK && rc != Z_STREAM_END) {
631 inflateEnd(&z);
632 goto err;
633 }
634
635 *n = CAST(size_t, z.total_out);
636 rc = inflateEnd(&z);
637 if (rc != Z_OK)
638 goto err;
639
640 /* let's keep the nul-terminate tradition */
641 (*newch)[*n] = '\0';
642
643 return OKDATA;
644 err:
645 return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
646 }
647 #endif
648
649 #ifdef BUILTIN_BZLIB
650 file_private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)651 uncompressbzlib(const unsigned char *old, unsigned char **newch,
652 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
653 {
654 int rc;
655 bz_stream bz;
656
657 DPRINTF("builtin bzlib decompression\n");
658 memset(&bz, 0, sizeof(bz));
659 rc = BZ2_bzDecompressInit(&bz, 0, 0);
660 if (rc != BZ_OK)
661 goto err;
662
663 bz.next_in = CCAST(char *, RCAST(const char *, old));
664 bz.avail_in = CAST(uint32_t, *n);
665 bz.next_out = RCAST(char *, *newch);
666 bz.avail_out = CAST(unsigned int, bytes_max);
667
668 rc = BZ2_bzDecompress(&bz);
669 if (rc != BZ_OK && rc != BZ_STREAM_END) {
670 BZ2_bzDecompressEnd(&bz);
671 goto err;
672 }
673
674 /* Assume byte_max is within 32bit */
675 /* assert(bz.total_out_hi32 == 0); */
676 *n = CAST(size_t, bz.total_out_lo32);
677 rc = BZ2_bzDecompressEnd(&bz);
678 if (rc != BZ_OK)
679 goto err;
680
681 /* let's keep the nul-terminate tradition */
682 (*newch)[*n] = '\0';
683
684 return OKDATA;
685 err:
686 return makeerror(newch, n, "bunzip error %d", rc);
687 }
688 #endif
689
690 #ifdef BUILTIN_XZLIB
691 file_private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)692 uncompressxzlib(const unsigned char *old, unsigned char **newch,
693 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
694 {
695 int rc;
696 lzma_stream xz;
697
698 DPRINTF("builtin xzlib decompression\n");
699 memset(&xz, 0, sizeof(xz));
700 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
701 if (rc != LZMA_OK)
702 goto err;
703
704 xz.next_in = CCAST(const uint8_t *, old);
705 xz.avail_in = CAST(uint32_t, *n);
706 xz.next_out = RCAST(uint8_t *, *newch);
707 xz.avail_out = CAST(unsigned int, bytes_max);
708
709 rc = lzma_code(&xz, LZMA_RUN);
710 if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
711 lzma_end(&xz);
712 goto err;
713 }
714
715 *n = CAST(size_t, xz.total_out);
716
717 lzma_end(&xz);
718
719 /* let's keep the nul-terminate tradition */
720 (*newch)[*n] = '\0';
721
722 return OKDATA;
723 err:
724 return makeerror(newch, n, "unxz error %d", rc);
725 }
726 #endif
727
728 #ifdef BUILTIN_ZSTDLIB
729 file_private int
uncompresszstd(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)730 uncompresszstd(const unsigned char *old, unsigned char **newch,
731 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
732 {
733 size_t rc;
734 ZSTD_DStream *zstd;
735 ZSTD_inBuffer in;
736 ZSTD_outBuffer out;
737
738 DPRINTF("builtin zstd decompression\n");
739 if ((zstd = ZSTD_createDStream()) == NULL) {
740 return makeerror(newch, n, "No ZSTD decompression stream, %s",
741 strerror(errno));
742 }
743
744 rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
745 if (ZSTD_isError(rc))
746 goto err;
747
748 in.src = CCAST(const void *, old);
749 in.size = *n;
750 in.pos = 0;
751 out.dst = RCAST(void *, *newch);
752 out.size = bytes_max;
753 out.pos = 0;
754
755 rc = ZSTD_decompressStream(zstd, &out, &in);
756 if (ZSTD_isError(rc))
757 goto err;
758
759 *n = out.pos;
760
761 ZSTD_freeDStream(zstd);
762
763 /* let's keep the nul-terminate tradition */
764 (*newch)[*n] = '\0';
765
766 return OKDATA;
767 err:
768 ZSTD_freeDStream(zstd);
769 return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
770 }
771 #endif
772
773 #ifdef BUILTIN_LZLIB
774 file_private int
uncompresslzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)775 uncompresslzlib(const unsigned char *old, unsigned char **newch,
776 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
777 {
778 enum LZ_Errno err;
779 size_t old_remaining = *n;
780 size_t new_remaining = bytes_max;
781 size_t total_read = 0;
782 unsigned char *bufp;
783 struct LZ_Decoder *dec;
784
785 bufp = *newch;
786
787 DPRINTF("builtin lzlib decompression\n");
788 dec = LZ_decompress_open();
789 if (!dec) {
790 return makeerror(newch, n, "unable to allocate LZ_Decoder");
791 }
792 if (LZ_decompress_errno(dec) != LZ_ok)
793 goto err;
794
795 for (;;) {
796 // LZ_decompress_read() stops at member boundaries, so we may
797 // have more than one successful read after writing all data
798 // we have.
799 if (old_remaining > 0) {
800 int wr = LZ_decompress_write(dec, old, old_remaining);
801 if (wr < 0)
802 goto err;
803 old_remaining -= wr;
804 old += wr;
805 }
806
807 int rd = LZ_decompress_read(dec, bufp, new_remaining);
808 if (rd > 0) {
809 new_remaining -= rd;
810 bufp += rd;
811 total_read += rd;
812 }
813
814 if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
815 goto err;
816 if (new_remaining == 0)
817 break;
818 if (old_remaining == 0 && rd == 0)
819 break;
820 }
821
822 LZ_decompress_close(dec);
823 *n = total_read;
824
825 /* let's keep the nul-terminate tradition */
826 *bufp = '\0';
827
828 return OKDATA;
829 err:
830 err = LZ_decompress_errno(dec);
831 LZ_decompress_close(dec);
832 return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
833 }
834 #endif
835
836
837 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)838 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
839 {
840 char *msg;
841 va_list ap;
842 int rv;
843
844 DPRINTF("Makeerror %s\n", fmt);
845 free(*buf);
846 va_start(ap, fmt);
847 rv = vasprintf(&msg, fmt, ap);
848 va_end(ap);
849 if (rv < 0) {
850 DPRINTF("Makeerror failed");
851 *buf = NULL;
852 *len = 0;
853 return NODATA;
854 }
855 *buf = RCAST(unsigned char *, msg);
856 *len = strlen(msg);
857 return ERRDATA;
858 }
859
860 static void
closefd(int * fd,size_t i)861 closefd(int *fd, size_t i)
862 {
863 if (fd[i] == -1)
864 return;
865 (void) close(fd[i]);
866 fd[i] = -1;
867 }
868
869 static void
closep(int * fd)870 closep(int *fd)
871 {
872 size_t i;
873 for (i = 0; i < 2; i++)
874 closefd(fd, i);
875 }
876
877 static void
movedesc(void * v,int i,int fd)878 movedesc(void *v, int i, int fd)
879 {
880 if (fd == i)
881 return; /* "no dup was necessary" */
882 #ifdef HAVE_POSIX_SPAWNP
883 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
884 posix_spawn_file_actions_adddup2(fa, fd, i);
885 posix_spawn_file_actions_addclose(fa, fd);
886 #else
887 if (dup2(fd, i) == -1) {
888 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
889 exit(EXIT_FAILURE);
890 }
891 close(v ? fd : fd);
892 #endif
893 }
894
895 static void
closedesc(void * v,int fd)896 closedesc(void *v, int fd)
897 {
898 #ifdef HAVE_POSIX_SPAWNP
899 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
900 posix_spawn_file_actions_addclose(fa, fd);
901 #else
902 close(v ? fd : fd);
903 #endif
904 }
905
906 static void
handledesc(void * v,int fd,int fdp[3][2])907 handledesc(void *v, int fd, int fdp[3][2])
908 {
909 if (fd != -1) {
910 (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
911 movedesc(v, STDIN_FILENO, fd);
912 } else {
913 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
914 if (fdp[STDIN_FILENO][1] > 2)
915 closedesc(v, fdp[STDIN_FILENO][1]);
916 }
917
918 file_clear_closexec(STDIN_FILENO);
919
920 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
921 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
922 if (fdp[STDOUT_FILENO][0] > 2)
923 closedesc(v, fdp[STDOUT_FILENO][0]);
924
925 file_clear_closexec(STDOUT_FILENO);
926
927 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
928 if (fdp[STDERR_FILENO][0] > 2)
929 closedesc(v, fdp[STDERR_FILENO][0]);
930
931 file_clear_closexec(STDERR_FILENO);
932 }
933
934 static pid_t
writechild(int fd,const void * old,size_t n)935 writechild(int fd, const void *old, size_t n)
936 {
937 pid_t pid;
938
939 /*
940 * fork again, to avoid blocking because both
941 * pipes filled
942 */
943 pid = fork();
944 if (pid == -1) {
945 DPRINTF("Fork failed (%s)\n", strerror(errno));
946 return -1;
947 }
948 if (pid == 0) {
949 /* child */
950 if (swrite(fd, old, n) != CAST(ssize_t, n)) {
951 DPRINTF("Write failed (%s)\n", strerror(errno));
952 exit(EXIT_FAILURE);
953 }
954 exit(EXIT_SUCCESS);
955 }
956 /* parent */
957 return pid;
958 }
959
960 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)961 filter_error(unsigned char *ubuf, ssize_t n)
962 {
963 char *p;
964 char *buf;
965
966 ubuf[n] = '\0';
967 buf = RCAST(char *, ubuf);
968 while (isspace(CAST(unsigned char, *buf)))
969 buf++;
970 DPRINTF("Filter error[[[%s]]]\n", buf);
971 if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
972 *p = '\0';
973 if ((p = strchr(CAST(char *, buf), ';')) != NULL)
974 *p = '\0';
975 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
976 ++p;
977 while (isspace(CAST(unsigned char, *p)))
978 p++;
979 n = strlen(p);
980 memmove(ubuf, p, CAST(size_t, n + 1));
981 }
982 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
983 if (islower(*ubuf))
984 *ubuf = toupper(*ubuf);
985 return n;
986 }
987
988 file_private const char *
methodname(size_t method)989 methodname(size_t method)
990 {
991 switch (method) {
992 #ifdef BUILTIN_DECOMPRESS
993 case METH_FROZEN:
994 case METH_ZLIB:
995 return "zlib";
996 #endif
997 #ifdef BUILTIN_BZLIB
998 case METH_BZIP:
999 return "bzlib";
1000 #endif
1001 #ifdef BUILTIN_XZLIB
1002 case METH_XZ:
1003 case METH_LZMA:
1004 return "xzlib";
1005 #endif
1006 #ifdef BUILTIN_ZSTDLIB
1007 case METH_ZSTD:
1008 return "zstd";
1009 #endif
1010 #ifdef BUILTIN_LZLIB
1011 case METH_LZIP:
1012 return "lzlib";
1013 #endif
1014 default:
1015 return compr[method].argv[0];
1016 }
1017 }
1018
1019 file_private int (*
getdecompressor(size_t method)1020 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1021 size_t *, int)
1022 {
1023 switch (method) {
1024 #ifdef BUILTIN_DECOMPRESS
1025 case METH_FROZEN:
1026 return uncompressgzipped;
1027 case METH_ZLIB:
1028 return uncompresszlib;
1029 #endif
1030 #ifdef BUILTIN_BZLIB
1031 case METH_BZIP:
1032 return uncompressbzlib;
1033 #endif
1034 #ifdef BUILTIN_XZLIB
1035 case METH_XZ:
1036 case METH_LZMA:
1037 return uncompressxzlib;
1038 #endif
1039 #ifdef BUILTIN_ZSTDLIB
1040 case METH_ZSTD:
1041 return uncompresszstd;
1042 #endif
1043 #ifdef BUILTIN_LZLIB
1044 case METH_LZIP:
1045 return uncompresslzlib;
1046 #endif
1047 default:
1048 return NULL;
1049 }
1050 }
1051
1052 file_private int
uncompressbuf(int fd,size_t bytes_max,size_t method,int nofork,const unsigned char * old,unsigned char ** newch,size_t * n)1053 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1054 const unsigned char *old, unsigned char **newch, size_t* n)
1055 {
1056 int fdp[3][2];
1057 int status, rv, w;
1058 pid_t pid;
1059 pid_t writepid = -1;
1060 size_t i;
1061 ssize_t r, re;
1062 char *const *args;
1063 #ifdef HAVE_POSIX_SPAWNP
1064 posix_spawn_file_actions_t fa;
1065 #endif
1066 int (*decompress)(const unsigned char *, unsigned char **,
1067 size_t, size_t *, int) = getdecompressor(method);
1068
1069 *newch = CAST(unsigned char *, emalloc(bytes_max + 1));
1070 if (*newch == NULL)
1071 return makeerror(newch, n, "No buffer, %s", strerror(errno));
1072
1073 if (decompress) {
1074 if (nofork) {
1075 return makeerror(newch, n,
1076 "Fork is required to uncompress, but disabled");
1077 }
1078 return (*decompress)(old, newch, bytes_max, n, 1);
1079 }
1080
1081 (void)fflush(stdout);
1082 (void)fflush(stderr);
1083
1084 for (i = 0; i < __arraycount(fdp); i++)
1085 fdp[i][0] = fdp[i][1] = -1;
1086
1087 /*
1088 * There are multithreaded users who run magic_file()
1089 * from dozens of threads. If two parallel magic_file() calls
1090 * analyze two large compressed files, both will spawn
1091 * an uncompressing child here, which writes out uncompressed data.
1092 * We read some portion, then close the pipe, then waitpid() the child.
1093 * If uncompressed data is larger, child should get EPIPE and exit.
1094 * However, with *parallel* calls OTHER child may unintentionally
1095 * inherit pipe fds, thus keeping pipe open and making writes in
1096 * our child block instead of failing with EPIPE!
1097 * (For the bug to occur, two threads must mutually inherit their pipes,
1098 * and both must have large outputs. Thus it happens not that often).
1099 * To avoid this, be sure to create pipes with O_CLOEXEC.
1100 */
1101 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1102 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1103 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1104 closep(fdp[STDIN_FILENO]);
1105 closep(fdp[STDOUT_FILENO]);
1106 return makeerror(newch, n, "Cannot create pipe, %s",
1107 strerror(errno));
1108 }
1109
1110 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1111 #ifdef HAVE_POSIX_SPAWNP
1112 posix_spawn_file_actions_init(&fa);
1113
1114 handledesc(&fa, fd, fdp);
1115
1116 DPRINTF("Executing %s\n", compr[method].argv[0]);
1117 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1118 args, NULL);
1119
1120 posix_spawn_file_actions_destroy(&fa);
1121
1122 if (status == -1) {
1123 return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1124 compr[method].argv[0], strerror(errno));
1125 }
1126 #else
1127 /* For processes with large mapped virtual sizes, vfork
1128 * may be _much_ faster (10-100 times) than fork.
1129 */
1130 pid = vfork();
1131 if (pid == -1) {
1132 return makeerror(newch, n, "Cannot vfork, %s",
1133 strerror(errno));
1134 }
1135 if (pid == 0) {
1136 /* child */
1137 /* Note: we are after vfork, do not modify memory
1138 * in a way which confuses parent. In particular,
1139 * do not modify fdp[i][j].
1140 */
1141 handledesc(NULL, fd, fdp);
1142 DPRINTF("Executing %s\n", compr[method].argv[0]);
1143
1144 (void)execvp(compr[method].argv[0], args);
1145 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1146 compr[method].argv[0], strerror(errno));
1147 _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1148 }
1149 #endif
1150 /* parent */
1151 /* Close write sides of child stdout/err pipes */
1152 for (i = 1; i < __arraycount(fdp); i++)
1153 closefd(fdp[i], 1);
1154 /* Write the buffer data to child stdin, if we don't have fd */
1155 if (fd == -1) {
1156 closefd(fdp[STDIN_FILENO], 0);
1157 writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1158 if (writepid == (pid_t)-1) {
1159 rv = makeerror(newch, n, "Write to child failed, %s",
1160 strerror(errno));
1161 DPRINTF("Write to child failed\n");
1162 goto err;
1163 }
1164 closefd(fdp[STDIN_FILENO], 1);
1165 }
1166
1167 rv = OKDATA;
1168 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1169 DPRINTF("read got %zd\n", r);
1170 if (r < 0) {
1171 rv = ERRDATA;
1172 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1173 strerror(errno));
1174 goto err;
1175 }
1176 if (CAST(size_t, r) == bytes_max) {
1177 /*
1178 * close fd so that the child exits with sigpipe and ignore
1179 * errors, otherwise we risk the child blocking and never
1180 * exiting.
1181 */
1182 DPRINTF("Closing stdout for bytes_max\n");
1183 closefd(fdp[STDOUT_FILENO], 0);
1184 goto ok;
1185 }
1186 if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1187 DPRINTF("Got stuff from stderr %s\n", *newch);
1188 rv = ERRDATA;
1189 r = filter_error(*newch, r);
1190 goto ok;
1191 }
1192 if (re == 0)
1193 goto ok;
1194 rv = makeerror(newch, n, "Read stderr failed, %s",
1195 strerror(errno));
1196 goto err;
1197 ok:
1198 *n = r;
1199 /* NUL terminate, as every buffer is handled here. */
1200 (*newch)[*n] = '\0';
1201 err:
1202 closefd(fdp[STDIN_FILENO], 1);
1203 closefd(fdp[STDOUT_FILENO], 0);
1204 closefd(fdp[STDERR_FILENO], 0);
1205
1206 w = waitpid(pid, &status, 0);
1207 wait_err:
1208 if (w == -1) {
1209 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1210 DPRINTF("Child wait return %#x\n", status);
1211 } else if (!WIFEXITED(status)) {
1212 DPRINTF("Child not exited (%#x)\n", status);
1213 } else if (WEXITSTATUS(status) != 0) {
1214 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1215 }
1216 if (writepid > 0) {
1217 /* _After_ we know decompressor has exited, our input writer
1218 * definitely will exit now (at worst, writing fails in it,
1219 * since output fd is closed now on the reading size).
1220 */
1221 w = waitpid(writepid, &status, 0);
1222 writepid = -1;
1223 goto wait_err;
1224 }
1225
1226 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1227 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1228
1229 return rv;
1230 }
1231 #endif
1232 #endif
1233