xref: /php-src/ext/fileinfo/libmagic/compress.c (revision b7c5813c)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * compress routines:
30  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31  *		   information if recognized
32  *	uncompress(method, old, n, newch) - uncompress old into new,
33  *					    using method, return sizeof new
34  */
35 #include "file.h"
36 
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
39 #endif
40 
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <string.h>
50 #include <errno.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <signal.h>
54 #ifndef HAVE_SIG_T
55 typedef void (*sig_t)(int);
56 #endif /* HAVE_SIG_T */
57 #ifdef HAVE_SYS_IOCTL_H
58 #include <sys/ioctl.h>
59 #endif
60 #ifdef HAVE_SYS_WAIT_H
61 #include <sys/wait.h>
62 #endif
63 #if defined(HAVE_SYS_TIME_H)
64 #include <sys/time.h>
65 #endif
66 #if defined(HAVE_ZLIB_H) && defined(PHP_FILEINFO_UNCOMPRESS)
67 #define BUILTIN_DECOMPRESS
68 #include <zlib.h>
69 #endif
70 
71 #undef FIONREAD
72 
73 #if defined(PHP_FILEINFO_UNCOMPRESS)
74 #define BUILTIN_BZLIB
75 #include <bzlib.h>
76 #endif
77 
78 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
79 #define BUILTIN_XZLIB
80 #include <lzma.h>
81 #endif
82 
83 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
84 #define BUILTIN_ZSTDLIB
85 #include <zstd.h>
86 #include <zstd_errors.h>
87 #endif
88 
89 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
90 #define BUILTIN_LZLIB
91 #include <lzlib.h>
92 #endif
93 
94 #ifdef DEBUG
95 int tty = -1;
96 #define DPRINTF(...)	do { \
97 	if (tty == -1) \
98 		tty = open("/dev/tty", O_RDWR); \
99 	if (tty == -1) \
100 		abort(); \
101 	dprintf(tty, __VA_ARGS__); \
102 } while (/*CONSTCOND*/0)
103 #else
104 #define DPRINTF(...)
105 #endif
106 
107 #ifdef ZLIBSUPPORT
108 /*
109  * The following python code is not really used because ZLIBSUPPORT is only
110  * defined if we have a built-in zlib, and the built-in zlib handles that.
111  * That is not true for android where we have zlib.h and not -lz.
112  */
113 static const char zlibcode[] =
114     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
115 
116 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
117 
118 static int
zlibcmp(const unsigned char * buf)119 zlibcmp(const unsigned char *buf)
120 {
121 	unsigned short x = 1;
122 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
123 
124 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
125 		return 0;
126 	if (s[0] != 1)	/* endianness test */
127 		x = buf[0] | (buf[1] << 8);
128 	else
129 		x = buf[1] | (buf[0] << 8);
130 	if (x % 31)
131 		return 0;
132 	return 1;
133 }
134 #endif
135 
136 #ifdef PHP_FILEINFO_UNCOMPRESS
137 
138 static int
lzmacmp(const unsigned char * buf)139 lzmacmp(const unsigned char *buf)
140 {
141 	if (buf[0] != 0x5d || buf[1] || buf[2])
142 		return 0;
143 	if (buf[12] && buf[12] != 0xff)
144 		return 0;
145 	return 1;
146 }
147 
148 #define gzip_flags "-cd"
149 #define lzip_flags gzip_flags
150 
151 static const char *gzip_args[] = {
152 	"gzip", gzip_flags, NULL
153 };
154 static const char *uncompress_args[] = {
155 	"uncompress", "-c", NULL
156 };
157 static const char *bzip2_args[] = {
158 	"bzip2", "-cd", NULL
159 };
160 static const char *lzip_args[] = {
161 	"lzip", lzip_flags, NULL
162 };
163 static const char *xz_args[] = {
164 	"xz", "-cd", NULL
165 };
166 static const char *lrzip_args[] = {
167 	"lrzip", "-qdf", "-", NULL
168 };
169 static const char *lz4_args[] = {
170 	"lz4", "-cd", NULL
171 };
172 static const char *zstd_args[] = {
173 	"zstd", "-cd", NULL
174 };
175 
176 #define	do_zlib		NULL
177 #define	do_bzlib	NULL
178 
179 file_private const struct {
180 	union {
181 		const char *magic;
182 		int (*func)(const unsigned char *);
183 	} u;
184 	int maglen;
185 	const char **argv;
186 	void *unused;
187 } compr[] = {
188 #define METH_FROZEN	2
189 #define METH_BZIP	7
190 #define METH_XZ		9
191 #define METH_LZIP	8
192 #define METH_ZSTD	12
193 #define METH_LZMA	13
194 #define METH_ZLIB	14
195     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
196     /* Uncompress can get stuck; so use gzip first if we have it
197      * Idea from Damien Clark, thanks! */
198     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
199     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
200     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
201     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
202     /* the standard pack utilities do not accept standard input */
203     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
204     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
205     /* ...only first file examined */
206     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
207     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
208     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
209     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
210     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
211     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
212     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
213 #ifdef ZLIBSUPPORT
214     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
215 #endif
216 };
217 
218 #define OKDATA 	0
219 #define NODATA	1
220 #define ERRDATA	2
221 
222 file_private ssize_t swrite(int, const void *, size_t);
223 #if HAVE_FORK
224 file_private size_t ncompr = __arraycount(compr);
225 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
226     unsigned char **, size_t *);
227 #ifdef BUILTIN_DECOMPRESS
228 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
229     size_t *, int);
230 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
231     size_t *, int);
232 #endif
233 #ifdef BUILTIN_BZLIB
234 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
235     size_t *, int);
236 #endif
237 #ifdef BUILTIN_XZLIB
238 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
239     size_t *, int);
240 #endif
241 #ifdef BUILTIN_ZSTDLIB
242 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
243     size_t *, int);
244 #endif
245 #ifdef BUILTIN_LZLIB
246 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
247     size_t *, int);
248 #endif
249 
250 static int makeerror(unsigned char **, size_t *, const char *, ...)
251     __attribute__((__format__(__printf__, 3, 4)));
252 file_private const char *methodname(size_t);
253 
254 file_private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)255 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
256 {
257 	unsigned char *p;
258 	int mime = ms->flags & MAGIC_MIME;
259 
260 	if (!mime)
261 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
262 
263 	for (p = buf; *p; p++)
264 		if (!isalnum(*p))
265 			*p = '-';
266 
267 	return file_printf(ms, "application/x-decompression-error-%s-%s",
268 	    methodname(i), buf);
269 }
270 
271 file_protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)272 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
273 {
274 	unsigned char *newbuf = NULL;
275 	size_t i, nsz;
276 	char *rbuf;
277 	file_pushbuf_t *pb;
278 	int urv, prv, rv = 0;
279 	int mime = ms->flags & MAGIC_MIME;
280 	int fd = b->fd;
281 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
282 	size_t nbytes = b->flen;
283 	int sa_saved = 0;
284 	struct sigaction sig_act;
285 
286 	if ((ms->flags & MAGIC_COMPRESS) == 0)
287 		return 0;
288 
289 	for (i = 0; i < ncompr; i++) {
290 		int zm;
291 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
292 			continue;
293 		if (compr[i].maglen < 0) {
294 			zm = (*compr[i].u.func)(buf);
295 		} else {
296 			zm = memcmp(buf, compr[i].u.magic,
297 			    CAST(size_t, compr[i].maglen)) == 0;
298 		}
299 
300 		if (!zm)
301 			continue;
302 
303 		/* Prevent SIGPIPE death if child dies unexpectedly */
304 		if (!sa_saved) {
305 			//We can use sig_act for both new and old, but
306 			struct sigaction new_act;
307 			memset(&new_act, 0, sizeof(new_act));
308 			new_act.sa_handler = SIG_IGN;
309 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
310 		}
311 
312 		nsz = nbytes;
313 		efree(newbuf);
314 		urv = uncompressbuf(fd, ms->bytes_max, i,
315 		    (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
316 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
317 		    (char *)newbuf, nsz);
318 		switch (urv) {
319 		case OKDATA:
320 		case ERRDATA:
321 			ms->flags &= ~MAGIC_COMPRESS;
322 			if (urv == ERRDATA)
323 				prv = format_decompression_error(ms, i, newbuf);
324 			else
325 				prv = file_buffer(ms, NULL, NULL, name, newbuf,
326 				    nsz);
327 			if (prv == -1)
328 				goto error;
329 			rv = 1;
330 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
331 				goto out;
332 			if (mime != MAGIC_MIME && mime != 0)
333 				goto out;
334 			if ((file_printf(ms,
335 			    mime ? " compressed-encoding=" : " (")) == -1)
336 				goto error;
337 			if ((pb = file_push_buffer(ms)) == NULL)
338 				goto error;
339 			/*
340 			 * XXX: If file_buffer fails here, we overwrite
341 			 * the compressed text. FIXME.
342 			 */
343 			if (file_buffer(ms, NULL, NULL, NULL, buf, nbytes) == -1)
344 			{
345 				if (file_pop_buffer(ms, pb) != NULL)
346 					abort();
347 				goto error;
348 			}
349 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
350 				if (file_printf(ms, "%s", rbuf) == -1) {
351 					efree(rbuf);
352 					goto error;
353 				}
354 				efree(rbuf);
355 			}
356 			if (!mime && file_printf(ms, ")") == -1)
357 				goto error;
358 			/*FALLTHROUGH*/
359 		case NODATA:
360 			break;
361 		default:
362 			abort();
363 			/*NOTREACHED*/
364 		error:
365 			rv = -1;
366 			break;
367 		}
368 	}
369 out:
370 	DPRINTF("rv = %d\n", rv);
371 
372 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
373 		(void)sigaction(SIGPIPE, &sig_act, NULL);
374 
375 	if (newbuf)
376 		efree(newbuf);
377 	ms->flags |= MAGIC_COMPRESS;
378 	DPRINTF("Zmagic returns %d\n", rv);
379 	return rv;
380 }
381 #endif
382 /*
383  * `safe' write for sockets and pipes.
384  */
385 file_private ssize_t
swrite(int fd,const void * buf,size_t n)386 swrite(int fd, const void *buf, size_t n)
387 {
388 	ssize_t rv;
389 	size_t rn = n;
390 
391 	do
392 		switch (rv = write(fd, buf, n)) {
393 		case -1:
394 			if (errno == EINTR)
395 				continue;
396 			return -1;
397 		default:
398 			n -= rv;
399 			buf = CAST(const char *, buf) + rv;
400 			break;
401 		}
402 	while (n > 0);
403 	return rn;
404 }
405 
406 
407 /*
408  * `safe' read for sockets and pipes.
409  */
410 file_protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)411 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
412 {
413 	ssize_t rv;
414 #if defined(FIONREAD) && !defined(__MINGW32__)
415 	int t = 0;
416 #endif
417 	size_t rn = n;
418 
419 	if (fd == STDIN_FILENO)
420 		goto nocheck;
421 
422 #if defined(FIONREAD) && !defined(__MINGW32__)
423 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
424 #ifdef FD_ZERO
425 		ssize_t cnt;
426 		for (cnt = 0;; cnt++) {
427 			fd_set check;
428 			struct timeval tout = {0, 100 * 1000};
429 			int selrv;
430 
431 			FD_ZERO(&check);
432 			FD_SET(fd, &check);
433 
434 			/*
435 			 * Avoid soft deadlock: do not read if there
436 			 * is nothing to read from sockets and pipes.
437 			 */
438 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
439 			if (selrv == -1) {
440 				if (errno == EINTR || errno == EAGAIN)
441 					continue;
442 			} else if (selrv == 0 && cnt >= 5) {
443 				return 0;
444 			} else
445 				break;
446 		}
447 #endif
448 		(void)ioctl(fd, FIONREAD, &t);
449 	}
450 
451 	if (t > 0 && CAST(size_t, t) < n) {
452 		n = t;
453 		rn = n;
454 	}
455 #endif
456 
457 nocheck:
458 	do
459 		switch ((rv = FINFO_READ_FUNC(fd, buf, n))) {
460 		case -1:
461 			if (errno == EINTR)
462 				continue;
463 			return -1;
464 		case 0:
465 			return rn - n;
466 		default:
467 			n -= rv;
468 			buf = CAST(char *, CCAST(void *, buf)) + rv;
469 			break;
470 		}
471 	while (n > 0);
472 	return rn;
473 }
474 
475 file_protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)476 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
477     size_t nbytes)
478 {
479 	char buf[4096];
480 	ssize_t r;
481 	int tfd;
482 
483 #ifdef WIN32
484 	const char *t;
485 	buf[0] = '\0';
486 	if ((t = getenv("TEMP")) != NULL)
487 		(void)strlcpy(buf, t, sizeof(buf));
488 	else if ((t = getenv("TMP")) != NULL)
489 		(void)strlcpy(buf, t, sizeof(buf));
490 	else if ((t = getenv("TMPDIR")) != NULL)
491 		(void)strlcpy(buf, t, sizeof(buf));
492 	if (buf[0] != '\0')
493 		(void)strlcat(buf, "/", sizeof(buf));
494 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
495 #else
496 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
497 #endif
498 #ifndef HAVE_MKSTEMP
499 	{
500 		char *ptr = mktemp(buf);
501 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
502 		r = errno;
503 		(void)unlink(ptr);
504 		errno = r;
505 	}
506 #else
507 	{
508 		int te;
509 		mode_t ou = umask(0);
510 		tfd = mkstemp(buf);
511 		(void)umask(ou);
512 		te = errno;
513 		(void)unlink(buf);
514 		errno = te;
515 	}
516 #endif
517 	if (tfd == -1) {
518 		file_error(ms, errno,
519 		    "cannot create temporary file for pipe copy");
520 		return -1;
521 	}
522 
523 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
524 		r = 1;
525 	else {
526 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
527 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
528 				break;
529 	}
530 
531 	switch (r) {
532 	case -1:
533 		file_error(ms, errno, "error copying from pipe to temp file");
534 		return -1;
535 	case 0:
536 		break;
537 	default:
538 		file_error(ms, errno, "error while writing to temp file");
539 		return -1;
540 	}
541 
542 	/*
543 	 * We duplicate the file descriptor, because fclose on a
544 	 * tmpfile will delete the file, but any open descriptors
545 	 * can still access the phantom inode.
546 	 */
547 	if ((fd = dup2(tfd, fd)) == -1) {
548 		file_error(ms, errno, "could not dup descriptor for temp file");
549 		return -1;
550 	}
551 	(void)close(tfd);
552 	if (FINFO_LSEEK_FUNC(fd, (zend_off_t)0, SEEK_SET) == (zend_off_t)-1) {
553 		file_badseek(ms);
554 		return -1;
555 	}
556 	return fd;
557 }
558 #ifdef PHP_FILEINFO_UNCOMPRESS
559 #ifdef BUILTIN_DECOMPRESS
560 
561 #define FHCRC		(1 << 1)
562 #define FEXTRA		(1 << 2)
563 #define FNAME		(1 << 3)
564 #define FCOMMENT	(1 << 4)
565 
566 
567 file_private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)568 uncompressgzipped(const unsigned char *old, unsigned char **newch,
569     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
570 {
571 	unsigned char flg;
572 	size_t data_start = 10;
573 
574 	if (*n < 4) {
575 		goto err;
576 	}
577 
578 	flg = old[3];
579 
580 	if (flg & FEXTRA) {
581 		if (data_start + 1 >= *n)
582 			goto err;
583 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
584 	}
585 	if (flg & FNAME) {
586 		while(data_start < *n && old[data_start])
587 			data_start++;
588 		data_start++;
589 	}
590 	if (flg & FCOMMENT) {
591 		while(data_start < *n && old[data_start])
592 			data_start++;
593 		data_start++;
594 	}
595 	if (flg & FHCRC)
596 		data_start += 2;
597 
598 	if (data_start >= *n)
599 		goto err;
600 
601 	*n -= data_start;
602 	old += data_start;
603 	return uncompresszlib(old, newch, bytes_max, n, 0);
604 err:
605 	return makeerror(newch, n, "File too short");
606 }
607 
608 file_private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)609 uncompresszlib(const unsigned char *old, unsigned char **newch,
610     size_t bytes_max, size_t *n, int zlib)
611 {
612 	int rc;
613 	z_stream z;
614 
615 	DPRINTF("builtin zlib decompression\n");
616 	z.next_in = CCAST(Bytef *, old);
617 	z.avail_in = CAST(uint32_t, *n);
618 	z.next_out = *newch;
619 	z.avail_out = CAST(unsigned int, bytes_max);
620 	z.zalloc = Z_NULL;
621 	z.zfree = Z_NULL;
622 	z.opaque = Z_NULL;
623 
624 	/* LINTED bug in header macro */
625 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
626 	if (rc != Z_OK)
627 		goto err;
628 
629 	rc = inflate(&z, Z_SYNC_FLUSH);
630 	if (rc != Z_OK && rc != Z_STREAM_END) {
631 		inflateEnd(&z);
632 		goto err;
633 	}
634 
635 	*n = CAST(size_t, z.total_out);
636 	rc = inflateEnd(&z);
637 	if (rc != Z_OK)
638 		goto err;
639 
640 	/* let's keep the nul-terminate tradition */
641 	(*newch)[*n] = '\0';
642 
643 	return OKDATA;
644 err:
645 	return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
646 }
647 #endif
648 
649 #ifdef BUILTIN_BZLIB
650 file_private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)651 uncompressbzlib(const unsigned char *old, unsigned char **newch,
652     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
653 {
654 	int rc;
655 	bz_stream bz;
656 
657 	DPRINTF("builtin bzlib decompression\n");
658 	memset(&bz, 0, sizeof(bz));
659 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
660 	if (rc != BZ_OK)
661 		goto err;
662 
663 	bz.next_in = CCAST(char *, RCAST(const char *, old));
664 	bz.avail_in = CAST(uint32_t, *n);
665 	bz.next_out = RCAST(char *, *newch);
666 	bz.avail_out = CAST(unsigned int, bytes_max);
667 
668 	rc = BZ2_bzDecompress(&bz);
669 	if (rc != BZ_OK && rc != BZ_STREAM_END) {
670 		BZ2_bzDecompressEnd(&bz);
671 		goto err;
672 	}
673 
674 	/* Assume byte_max is within 32bit */
675 	/* assert(bz.total_out_hi32 == 0); */
676 	*n = CAST(size_t, bz.total_out_lo32);
677 	rc = BZ2_bzDecompressEnd(&bz);
678 	if (rc != BZ_OK)
679 		goto err;
680 
681 	/* let's keep the nul-terminate tradition */
682 	(*newch)[*n] = '\0';
683 
684 	return OKDATA;
685 err:
686 	return makeerror(newch, n, "bunzip error %d", rc);
687 }
688 #endif
689 
690 #ifdef BUILTIN_XZLIB
691 file_private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)692 uncompressxzlib(const unsigned char *old, unsigned char **newch,
693     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
694 {
695 	int rc;
696 	lzma_stream xz;
697 
698 	DPRINTF("builtin xzlib decompression\n");
699 	memset(&xz, 0, sizeof(xz));
700 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
701 	if (rc != LZMA_OK)
702 		goto err;
703 
704 	xz.next_in = CCAST(const uint8_t *, old);
705 	xz.avail_in = CAST(uint32_t, *n);
706 	xz.next_out = RCAST(uint8_t *, *newch);
707 	xz.avail_out = CAST(unsigned int, bytes_max);
708 
709 	rc = lzma_code(&xz, LZMA_RUN);
710 	if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
711 		lzma_end(&xz);
712 		goto err;
713 	}
714 
715 	*n = CAST(size_t, xz.total_out);
716 
717 	lzma_end(&xz);
718 
719 	/* let's keep the nul-terminate tradition */
720 	(*newch)[*n] = '\0';
721 
722 	return OKDATA;
723 err:
724 	return makeerror(newch, n, "unxz error %d", rc);
725 }
726 #endif
727 
728 #ifdef BUILTIN_ZSTDLIB
729 file_private int
uncompresszstd(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)730 uncompresszstd(const unsigned char *old, unsigned char **newch,
731     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
732 {
733 	size_t rc;
734 	ZSTD_DStream *zstd;
735 	ZSTD_inBuffer in;
736 	ZSTD_outBuffer out;
737 
738 	DPRINTF("builtin zstd decompression\n");
739 	if ((zstd = ZSTD_createDStream()) == NULL) {
740 		return makeerror(newch, n, "No ZSTD decompression stream, %s",
741 		    strerror(errno));
742 	}
743 
744 	rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
745 	if (ZSTD_isError(rc))
746 		goto err;
747 
748 	in.src = CCAST(const void *, old);
749 	in.size = *n;
750 	in.pos = 0;
751 	out.dst = RCAST(void *, *newch);
752 	out.size = bytes_max;
753 	out.pos = 0;
754 
755 	rc = ZSTD_decompressStream(zstd, &out, &in);
756 	if (ZSTD_isError(rc))
757 		goto err;
758 
759 	*n = out.pos;
760 
761 	ZSTD_freeDStream(zstd);
762 
763 	/* let's keep the nul-terminate tradition */
764 	(*newch)[*n] = '\0';
765 
766 	return OKDATA;
767 err:
768 	ZSTD_freeDStream(zstd);
769 	return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
770 }
771 #endif
772 
773 #ifdef BUILTIN_LZLIB
774 file_private int
uncompresslzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)775 uncompresslzlib(const unsigned char *old, unsigned char **newch,
776     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
777 {
778 	enum LZ_Errno err;
779 	size_t old_remaining = *n;
780 	size_t new_remaining = bytes_max;
781 	size_t total_read = 0;
782 	unsigned char *bufp;
783 	struct LZ_Decoder *dec;
784 
785 	bufp = *newch;
786 
787 	DPRINTF("builtin lzlib decompression\n");
788 	dec = LZ_decompress_open();
789 	if (!dec) {
790 		return makeerror(newch, n, "unable to allocate LZ_Decoder");
791 	}
792 	if (LZ_decompress_errno(dec) != LZ_ok)
793 		goto err;
794 
795 	for (;;) {
796 		// LZ_decompress_read() stops at member boundaries, so we may
797 		// have more than one successful read after writing all data
798 		// we have.
799 		if (old_remaining > 0) {
800 			int wr = LZ_decompress_write(dec, old, old_remaining);
801 			if (wr < 0)
802 				goto err;
803 			old_remaining -= wr;
804 			old += wr;
805 		}
806 
807 		int rd = LZ_decompress_read(dec, bufp, new_remaining);
808 		if (rd > 0) {
809 			new_remaining -= rd;
810 			bufp += rd;
811 			total_read += rd;
812 		}
813 
814 		if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
815 			goto err;
816 		if (new_remaining == 0)
817 			break;
818 		if (old_remaining == 0 && rd == 0)
819 			break;
820 	}
821 
822 	LZ_decompress_close(dec);
823 	*n = total_read;
824 
825 	/* let's keep the nul-terminate tradition */
826 	*bufp = '\0';
827 
828 	return OKDATA;
829 err:
830 	err = LZ_decompress_errno(dec);
831 	LZ_decompress_close(dec);
832 	return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
833 }
834 #endif
835 
836 
837 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)838 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
839 {
840 	char *msg;
841 	va_list ap;
842 	int rv;
843 
844 	DPRINTF("Makeerror %s\n", fmt);
845 	free(*buf);
846 	va_start(ap, fmt);
847 	rv = vasprintf(&msg, fmt, ap);
848 	va_end(ap);
849 	if (rv < 0) {
850 		DPRINTF("Makeerror failed");
851 		*buf = NULL;
852 		*len = 0;
853 		return NODATA;
854 	}
855 	*buf = RCAST(unsigned char *, msg);
856 	*len = strlen(msg);
857 	return ERRDATA;
858 }
859 
860 static void
closefd(int * fd,size_t i)861 closefd(int *fd, size_t i)
862 {
863 	if (fd[i] == -1)
864 		return;
865 	(void) close(fd[i]);
866 	fd[i] = -1;
867 }
868 
869 static void
closep(int * fd)870 closep(int *fd)
871 {
872 	size_t i;
873 	for (i = 0; i < 2; i++)
874 		closefd(fd, i);
875 }
876 
877 static void
movedesc(void * v,int i,int fd)878 movedesc(void *v, int i, int fd)
879 {
880 	if (fd == i)
881 		return; /* "no dup was necessary" */
882 #ifdef HAVE_POSIX_SPAWNP
883 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
884 	posix_spawn_file_actions_adddup2(fa, fd, i);
885 	posix_spawn_file_actions_addclose(fa, fd);
886 #else
887 	if (dup2(fd, i) == -1) {
888 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
889 		exit(EXIT_FAILURE);
890 	}
891 	close(v ? fd : fd);
892 #endif
893 }
894 
895 static void
closedesc(void * v,int fd)896 closedesc(void *v, int fd)
897 {
898 #ifdef HAVE_POSIX_SPAWNP
899 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
900 	posix_spawn_file_actions_addclose(fa, fd);
901 #else
902 	close(v ? fd : fd);
903 #endif
904 }
905 
906 static void
handledesc(void * v,int fd,int fdp[3][2])907 handledesc(void *v, int fd, int fdp[3][2])
908 {
909 	if (fd != -1) {
910 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
911 		movedesc(v, STDIN_FILENO, fd);
912 	} else {
913 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
914 		if (fdp[STDIN_FILENO][1] > 2)
915 		    closedesc(v, fdp[STDIN_FILENO][1]);
916 	}
917 
918 	file_clear_closexec(STDIN_FILENO);
919 
920 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
921 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
922 	if (fdp[STDOUT_FILENO][0] > 2)
923 		closedesc(v, fdp[STDOUT_FILENO][0]);
924 
925 	file_clear_closexec(STDOUT_FILENO);
926 
927 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
928 	if (fdp[STDERR_FILENO][0] > 2)
929 		closedesc(v, fdp[STDERR_FILENO][0]);
930 
931 	file_clear_closexec(STDERR_FILENO);
932 }
933 
934 static pid_t
writechild(int fd,const void * old,size_t n)935 writechild(int fd, const void *old, size_t n)
936 {
937 	pid_t pid;
938 
939 	/*
940 	 * fork again, to avoid blocking because both
941 	 * pipes filled
942 	 */
943 	pid = fork();
944 	if (pid == -1) {
945 		DPRINTF("Fork failed (%s)\n", strerror(errno));
946 		return -1;
947 	}
948 	if (pid == 0) {
949 		/* child */
950 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
951 			DPRINTF("Write failed (%s)\n", strerror(errno));
952 			exit(EXIT_FAILURE);
953 		}
954 		exit(EXIT_SUCCESS);
955 	}
956 	/* parent */
957 	return pid;
958 }
959 
960 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)961 filter_error(unsigned char *ubuf, ssize_t n)
962 {
963 	char *p;
964 	char *buf;
965 
966 	ubuf[n] = '\0';
967 	buf = RCAST(char *, ubuf);
968 	while (isspace(CAST(unsigned char, *buf)))
969 		buf++;
970 	DPRINTF("Filter error[[[%s]]]\n", buf);
971 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
972 		*p = '\0';
973 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
974 		*p = '\0';
975 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
976 		++p;
977 		while (isspace(CAST(unsigned char, *p)))
978 			p++;
979 		n = strlen(p);
980 		memmove(ubuf, p, CAST(size_t, n + 1));
981 	}
982 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
983 	if (islower(*ubuf))
984 		*ubuf = toupper(*ubuf);
985 	return n;
986 }
987 
988 file_private const char *
methodname(size_t method)989 methodname(size_t method)
990 {
991 	switch (method) {
992 #ifdef BUILTIN_DECOMPRESS
993 	case METH_FROZEN:
994 	case METH_ZLIB:
995 		return "zlib";
996 #endif
997 #ifdef BUILTIN_BZLIB
998 	case METH_BZIP:
999 		return "bzlib";
1000 #endif
1001 #ifdef BUILTIN_XZLIB
1002 	case METH_XZ:
1003 	case METH_LZMA:
1004 		return "xzlib";
1005 #endif
1006 #ifdef BUILTIN_ZSTDLIB
1007 	case METH_ZSTD:
1008 		return "zstd";
1009 #endif
1010 #ifdef BUILTIN_LZLIB
1011 	case METH_LZIP:
1012 		return "lzlib";
1013 #endif
1014 	default:
1015 		return compr[method].argv[0];
1016 	}
1017 }
1018 
1019 file_private int (*
getdecompressor(size_t method)1020 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1021     size_t *, int)
1022 {
1023 	switch (method) {
1024 #ifdef BUILTIN_DECOMPRESS
1025 	case METH_FROZEN:
1026 		return uncompressgzipped;
1027 	case METH_ZLIB:
1028 		return uncompresszlib;
1029 #endif
1030 #ifdef BUILTIN_BZLIB
1031 	case METH_BZIP:
1032 		return uncompressbzlib;
1033 #endif
1034 #ifdef BUILTIN_XZLIB
1035 	case METH_XZ:
1036 	case METH_LZMA:
1037 		return uncompressxzlib;
1038 #endif
1039 #ifdef BUILTIN_ZSTDLIB
1040 	case METH_ZSTD:
1041 		return uncompresszstd;
1042 #endif
1043 #ifdef BUILTIN_LZLIB
1044 	case METH_LZIP:
1045 		return uncompresslzlib;
1046 #endif
1047 	default:
1048 		return NULL;
1049 	}
1050 }
1051 
1052 file_private int
uncompressbuf(int fd,size_t bytes_max,size_t method,int nofork,const unsigned char * old,unsigned char ** newch,size_t * n)1053 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1054     const unsigned char *old, unsigned char **newch, size_t* n)
1055 {
1056 	int fdp[3][2];
1057 	int status, rv, w;
1058 	pid_t pid;
1059 	pid_t writepid = -1;
1060 	size_t i;
1061 	ssize_t r, re;
1062 	char *const *args;
1063 #ifdef HAVE_POSIX_SPAWNP
1064 	posix_spawn_file_actions_t fa;
1065 #endif
1066 	int (*decompress)(const unsigned char *, unsigned char **,
1067 	    size_t, size_t *, int) = getdecompressor(method);
1068 
1069 	*newch = CAST(unsigned char *, emalloc(bytes_max + 1));
1070 	if (*newch == NULL)
1071 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
1072 
1073 	if (decompress) {
1074 		if (nofork) {
1075 			return makeerror(newch, n,
1076 			    "Fork is required to uncompress, but disabled");
1077 		}
1078 		return (*decompress)(old, newch, bytes_max, n, 1);
1079 	}
1080 
1081 	(void)fflush(stdout);
1082 	(void)fflush(stderr);
1083 
1084 	for (i = 0; i < __arraycount(fdp); i++)
1085 		fdp[i][0] = fdp[i][1] = -1;
1086 
1087 	/*
1088 	 * There are multithreaded users who run magic_file()
1089 	 * from dozens of threads. If two parallel magic_file() calls
1090 	 * analyze two large compressed files, both will spawn
1091 	 * an uncompressing child here, which writes out uncompressed data.
1092 	 * We read some portion, then close the pipe, then waitpid() the child.
1093 	 * If uncompressed data is larger, child should get EPIPE and exit.
1094 	 * However, with *parallel* calls OTHER child may unintentionally
1095 	 * inherit pipe fds, thus keeping pipe open and making writes in
1096 	 * our child block instead of failing with EPIPE!
1097 	 * (For the bug to occur, two threads must mutually inherit their pipes,
1098 	 * and both must have large outputs. Thus it happens not that often).
1099 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
1100 	 */
1101 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1102 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1103 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1104 		closep(fdp[STDIN_FILENO]);
1105 		closep(fdp[STDOUT_FILENO]);
1106 		return makeerror(newch, n, "Cannot create pipe, %s",
1107 		    strerror(errno));
1108 	}
1109 
1110 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1111 #ifdef HAVE_POSIX_SPAWNP
1112 	posix_spawn_file_actions_init(&fa);
1113 
1114 	handledesc(&fa, fd, fdp);
1115 
1116 	DPRINTF("Executing %s\n", compr[method].argv[0]);
1117 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1118 	    args, NULL);
1119 
1120 	posix_spawn_file_actions_destroy(&fa);
1121 
1122 	if (status == -1) {
1123 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1124 		    compr[method].argv[0], strerror(errno));
1125 	}
1126 #else
1127 	/* For processes with large mapped virtual sizes, vfork
1128 	 * may be _much_ faster (10-100 times) than fork.
1129 	 */
1130 	pid = vfork();
1131 	if (pid == -1) {
1132 		return makeerror(newch, n, "Cannot vfork, %s",
1133 		    strerror(errno));
1134 	}
1135 	if (pid == 0) {
1136 		/* child */
1137 		/* Note: we are after vfork, do not modify memory
1138 		 * in a way which confuses parent. In particular,
1139 		 * do not modify fdp[i][j].
1140 		 */
1141 		handledesc(NULL, fd, fdp);
1142 		DPRINTF("Executing %s\n", compr[method].argv[0]);
1143 
1144 		(void)execvp(compr[method].argv[0], args);
1145 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1146 		    compr[method].argv[0], strerror(errno));
1147 		_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1148 	}
1149 #endif
1150 	/* parent */
1151 	/* Close write sides of child stdout/err pipes */
1152 	for (i = 1; i < __arraycount(fdp); i++)
1153 		closefd(fdp[i], 1);
1154 	/* Write the buffer data to child stdin, if we don't have fd */
1155 	if (fd == -1) {
1156 		closefd(fdp[STDIN_FILENO], 0);
1157 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1158 		if (writepid == (pid_t)-1) {
1159 			rv = makeerror(newch, n, "Write to child failed, %s",
1160 			    strerror(errno));
1161 			DPRINTF("Write to child failed\n");
1162 			goto err;
1163 		}
1164 		closefd(fdp[STDIN_FILENO], 1);
1165 	}
1166 
1167 	rv = OKDATA;
1168 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1169 	DPRINTF("read got %zd\n", r);
1170 	if (r < 0) {
1171 		rv = ERRDATA;
1172 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1173 		        strerror(errno));
1174 		goto err;
1175 	}
1176 	if (CAST(size_t, r) == bytes_max) {
1177 		/*
1178 		 * close fd so that the child exits with sigpipe and ignore
1179 		 * errors, otherwise we risk the child blocking and never
1180 		 * exiting.
1181 		 */
1182 		DPRINTF("Closing stdout for bytes_max\n");
1183 		closefd(fdp[STDOUT_FILENO], 0);
1184 		goto ok;
1185 	}
1186 	if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1187 		DPRINTF("Got stuff from stderr %s\n", *newch);
1188 		rv = ERRDATA;
1189 		r = filter_error(*newch, r);
1190 		goto ok;
1191 	}
1192 	if  (re == 0)
1193 		goto ok;
1194 	rv = makeerror(newch, n, "Read stderr failed, %s",
1195 	    strerror(errno));
1196 	goto err;
1197 ok:
1198 	*n = r;
1199 	/* NUL terminate, as every buffer is handled here. */
1200 	(*newch)[*n] = '\0';
1201 err:
1202 	closefd(fdp[STDIN_FILENO], 1);
1203 	closefd(fdp[STDOUT_FILENO], 0);
1204 	closefd(fdp[STDERR_FILENO], 0);
1205 
1206 	w = waitpid(pid, &status, 0);
1207 wait_err:
1208 	if (w == -1) {
1209 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1210 		DPRINTF("Child wait return %#x\n", status);
1211 	} else if (!WIFEXITED(status)) {
1212 		DPRINTF("Child not exited (%#x)\n", status);
1213 	} else if (WEXITSTATUS(status) != 0) {
1214 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1215 	}
1216 	if (writepid > 0) {
1217 		/* _After_ we know decompressor has exited, our input writer
1218 		 * definitely will exit now (at worst, writing fails in it,
1219 		 * since output fd is closed now on the reading size).
1220 		 */
1221 		w = waitpid(writepid, &status, 0);
1222 		writepid = -1;
1223 		goto wait_err;
1224 	}
1225 
1226 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1227 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1228 
1229 	return rv;
1230 }
1231 #endif
1232 #endif
1233