xref: /PHP-8.3/ext/fileinfo/libmagic/compress.c (revision a24727a5)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * compress routines:
30  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31  *		   information if recognized
32  *	uncompress(method, old, n, newch) - uncompress old into new,
33  *					    using method, return sizeof new
34  */
35 #include "file.h"
36 
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.136 2022/09/13 16:08:34 christos Exp $")
39 #endif
40 
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <string.h>
50 #include <errno.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <signal.h>
54 #ifndef HAVE_SIG_T
55 typedef void (*sig_t)(int);
56 #endif /* HAVE_SIG_T */
57 #ifdef HAVE_SYS_IOCTL_H
58 #include <sys/ioctl.h>
59 #endif
60 #ifdef HAVE_SYS_WAIT_H
61 #include <sys/wait.h>
62 #endif
63 #if defined(HAVE_SYS_TIME_H)
64 #include <sys/time.h>
65 #endif
66 #if defined(HAVE_ZLIB_H) && defined(PHP_FILEINFO_UNCOMPRESS)
67 #define BUILTIN_DECOMPRESS
68 #include <zlib.h>
69 #endif
70 
71 #undef FIONREAD
72 
73 #if defined(PHP_FILEINFO_UNCOMPRESS)
74 #define BUILTIN_BZLIB
75 #include <bzlib.h>
76 #endif
77 
78 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
79 #define BUILTIN_XZLIB
80 #include <lzma.h>
81 #endif
82 
83 #ifdef DEBUG
84 int tty = -1;
85 #define DPRINTF(...)	do { \
86 	if (tty == -1) \
87 		tty = open("/dev/tty", O_RDWR); \
88 	if (tty == -1) \
89 		abort(); \
90 	dprintf(tty, __VA_ARGS__); \
91 } while (/*CONSTCOND*/0)
92 #else
93 #define DPRINTF(...)
94 #endif
95 
96 #ifdef ZLIBSUPPORT
97 /*
98  * The following python code is not really used because ZLIBSUPPORT is only
99  * defined if we have a built-in zlib, and the built-in zlib handles that.
100  * That is not true for android where we have zlib.h and not -lz.
101  */
102 static const char zlibcode[] =
103     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
104 
105 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
106 
107 static int
zlibcmp(const unsigned char * buf)108 zlibcmp(const unsigned char *buf)
109 {
110 	unsigned short x = 1;
111 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
112 
113 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
114 		return 0;
115 	if (s[0] != 1)	/* endianness test */
116 		x = buf[0] | (buf[1] << 8);
117 	else
118 		x = buf[1] | (buf[0] << 8);
119 	if (x % 31)
120 		return 0;
121 	return 1;
122 }
123 #endif
124 
125 #ifdef PHP_FILEINFO_UNCOMPRESS
126 
127 static int
lzmacmp(const unsigned char * buf)128 lzmacmp(const unsigned char *buf)
129 {
130 	if (buf[0] != 0x5d || buf[1] || buf[2])
131 		return 0;
132 	if (buf[12] && buf[12] != 0xff)
133 		return 0;
134 	return 1;
135 }
136 
137 #define gzip_flags "-cd"
138 #define lrzip_flags "-do"
139 #define lzip_flags gzip_flags
140 
141 static const char *gzip_args[] = {
142 	"gzip", gzip_flags, NULL
143 };
144 static const char *uncompress_args[] = {
145 	"uncompress", "-c", NULL
146 };
147 static const char *bzip2_args[] = {
148 	"bzip2", "-cd", NULL
149 };
150 static const char *lzip_args[] = {
151 	"lzip", lzip_flags, NULL
152 };
153 static const char *xz_args[] = {
154 	"xz", "-cd", NULL
155 };
156 static const char *lrzip_args[] = {
157 	"lrzip", lrzip_flags, NULL
158 };
159 static const char *lz4_args[] = {
160 	"lz4", "-cd", NULL
161 };
162 static const char *zstd_args[] = {
163 	"zstd", "-cd", NULL
164 };
165 
166 #define	do_zlib		NULL
167 #define	do_bzlib	NULL
168 
169 private const struct {
170 	union {
171 		const char *magic;
172 		int (*func)(const unsigned char *);
173 	} u;
174 	int maglen;
175 	const char **argv;
176 	void *unused;
177 } compr[] = {
178 #define METH_FROZEN	2
179 #define METH_BZIP	7
180 #define METH_XZ		9
181 #define METH_LZMA	13
182 #define METH_ZLIB	14
183     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
184     /* Uncompress can get stuck; so use gzip first if we have it
185      * Idea from Damien Clark, thanks! */
186     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
187     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
188     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
189     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
190     /* the standard pack utilities do not accept standard input */
191     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
192     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
193     /* ...only first file examined */
194     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
195     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
196     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
197     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
198     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
199     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
200     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
201 #ifdef ZLIBSUPPORT
202     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
203 #endif
204 };
205 
206 #define OKDATA 	0
207 #define NODATA	1
208 #define ERRDATA	2
209 
210 private ssize_t swrite(int, const void *, size_t);
211 #if HAVE_FORK
212 private size_t ncompr = __arraycount(compr);
213 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
214     unsigned char **, size_t *);
215 #ifdef BUILTIN_DECOMPRESS
216 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
217     size_t *, int);
218 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
219     size_t *);
220 #endif
221 #ifdef BUILTIN_BZLIB
222 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
223     size_t *);
224 #endif
225 #ifdef BUILTIN_XZLIB
226 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
227     size_t *);
228 #endif
229 
230 static int makeerror(unsigned char **, size_t *, const char *, ...)
231     __attribute__((__format__(__printf__, 3, 4)));
232 private const char *methodname(size_t);
233 
234 private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)235 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
236 {
237 	unsigned char *p;
238 	int mime = ms->flags & MAGIC_MIME;
239 
240 	if (!mime)
241 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
242 
243 	for (p = buf; *p; p++)
244 		if (!isalnum(*p))
245 			*p = '-';
246 
247 	return file_printf(ms, "application/x-decompression-error-%s-%s",
248 	    methodname(i), buf);
249 }
250 
251 protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)252 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
253 {
254 	unsigned char *newbuf = NULL;
255 	size_t i, nsz;
256 	char *rbuf;
257 	file_pushbuf_t *pb;
258 	int urv, prv, rv = 0;
259 	int mime = ms->flags & MAGIC_MIME;
260 	int fd = b->fd;
261 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
262 	size_t nbytes = b->flen;
263 	int sa_saved = 0;
264 	struct sigaction sig_act;
265 
266 	if ((ms->flags & MAGIC_COMPRESS) == 0)
267 		return 0;
268 
269 	for (i = 0; i < ncompr; i++) {
270 		int zm;
271 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
272 			continue;
273 		if (compr[i].maglen < 0) {
274 			zm = (*compr[i].u.func)(buf);
275 		} else {
276 			zm = memcmp(buf, compr[i].u.magic,
277 			    CAST(size_t, compr[i].maglen)) == 0;
278 		}
279 
280 		if (!zm)
281 			continue;
282 
283 		/* Prevent SIGPIPE death if child dies unexpectedly */
284 		if (!sa_saved) {
285 			//We can use sig_act for both new and old, but
286 			struct sigaction new_act;
287 			memset(&new_act, 0, sizeof(new_act));
288 			new_act.sa_handler = SIG_IGN;
289 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
290 		}
291 
292 		nsz = nbytes;
293 		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
294 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
295 		    (char *)newbuf, nsz);
296 		switch (urv) {
297 		case OKDATA:
298 		case ERRDATA:
299 			ms->flags &= ~MAGIC_COMPRESS;
300 			if (urv == ERRDATA)
301 				prv = format_decompression_error(ms, i, newbuf);
302 			else
303 				prv = file_buffer(ms, NULL, NULL, name, newbuf, nsz);
304 			if (prv == -1)
305 				goto error;
306 			rv = 1;
307 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
308 				goto out;
309 			if (mime != MAGIC_MIME && mime != 0)
310 				goto out;
311 			if ((file_printf(ms,
312 			    mime ? " compressed-encoding=" : " (")) == -1)
313 				goto error;
314 			if ((pb = file_push_buffer(ms)) == NULL)
315 				goto error;
316 			/*
317 			 * XXX: If file_buffer fails here, we overwrite
318 			 * the compressed text. FIXME.
319 			 */
320 			if (file_buffer(ms, NULL, NULL, NULL, buf, nbytes) == -1) {
321 				if (file_pop_buffer(ms, pb) != NULL)
322 					abort();
323 				goto error;
324 			}
325 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
326 				if (file_printf(ms, "%s", rbuf) == -1) {
327 					efree(rbuf);
328 					goto error;
329 				}
330 				efree(rbuf);
331 			}
332 			if (!mime && file_printf(ms, ")") == -1)
333 				goto error;
334 			/*FALLTHROUGH*/
335 		case NODATA:
336 			break;
337 		default:
338 			abort();
339 			/*NOTREACHED*/
340 		error:
341 			rv = -1;
342 			break;
343 		}
344 	}
345 out:
346 	DPRINTF("rv = %d\n", rv);
347 
348 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
349 		(void)sigaction(SIGPIPE, &sig_act, NULL);
350 
351 	if (newbuf)
352 		efree(newbuf);
353 	ms->flags |= MAGIC_COMPRESS;
354 	DPRINTF("Zmagic returns %d\n", rv);
355 	return rv;
356 }
357 #endif
358 /*
359  * `safe' write for sockets and pipes.
360  */
361 private ssize_t
swrite(int fd,const void * buf,size_t n)362 swrite(int fd, const void *buf, size_t n)
363 {
364 	ssize_t rv;
365 	size_t rn = n;
366 
367 	do
368 		switch (rv = write(fd, buf, n)) {
369 		case -1:
370 			if (errno == EINTR)
371 				continue;
372 			return -1;
373 		default:
374 			n -= rv;
375 			buf = CAST(const char *, buf) + rv;
376 			break;
377 		}
378 	while (n > 0);
379 	return rn;
380 }
381 
382 
383 /*
384  * `safe' read for sockets and pipes.
385  */
386 protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)387 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
388 {
389 	ssize_t rv;
390 #ifdef FIONREAD
391 	int t = 0;
392 #endif
393 	size_t rn = n;
394 
395 	if (fd == STDIN_FILENO)
396 		goto nocheck;
397 
398 #ifdef FIONREAD
399 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
400 #ifdef FD_ZERO
401 		ssize_t cnt;
402 		for (cnt = 0;; cnt++) {
403 			fd_set check;
404 			struct timeval tout = {0, 100 * 1000};
405 			int selrv;
406 
407 			FD_ZERO(&check);
408 			FD_SET(fd, &check);
409 
410 			/*
411 			 * Avoid soft deadlock: do not read if there
412 			 * is nothing to read from sockets and pipes.
413 			 */
414 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
415 			if (selrv == -1) {
416 				if (errno == EINTR || errno == EAGAIN)
417 					continue;
418 			} else if (selrv == 0 && cnt >= 5) {
419 				return 0;
420 			} else
421 				break;
422 		}
423 #endif
424 		(void)ioctl(fd, FIONREAD, &t);
425 	}
426 
427 	if (t > 0 && CAST(size_t, t) < n) {
428 		n = t;
429 		rn = n;
430 	}
431 #endif
432 
433 nocheck:
434 	do
435 		switch ((rv = FINFO_READ_FUNC(fd, buf, n))) {
436 		case -1:
437 			if (errno == EINTR)
438 				continue;
439 			return -1;
440 		case 0:
441 			return rn - n;
442 		default:
443 			n -= rv;
444 			buf = CAST(char *, CCAST(void *, buf)) + rv;
445 			break;
446 		}
447 	while (n > 0);
448 	return rn;
449 }
450 
451 protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)452 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
453     size_t nbytes)
454 {
455 	char buf[4096];
456 	ssize_t r;
457 	int tfd;
458 
459 #ifdef WIN32
460 	const char *t;
461 	buf[0] = '\0';
462 	if ((t = getenv("TEMP")) != NULL)
463 		(void)strlcpy(buf, t, sizeof(buf));
464 	else if ((t = getenv("TMP")) != NULL)
465 		(void)strlcpy(buf, t, sizeof(buf));
466 	else if ((t = getenv("TMPDIR")) != NULL)
467 		(void)strlcpy(buf, t, sizeof(buf));
468 	if (buf[0] != '\0')
469 		(void)strlcat(buf, "/", sizeof(buf));
470 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
471 #else
472 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
473 #endif
474 #ifndef HAVE_MKSTEMP
475 	{
476 		char *ptr = mktemp(buf);
477 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
478 		r = errno;
479 		(void)unlink(ptr);
480 		errno = r;
481 	}
482 #else
483 	{
484 		int te;
485 		mode_t ou = umask(0);
486 		tfd = mkstemp(buf);
487 		(void)umask(ou);
488 		te = errno;
489 		(void)unlink(buf);
490 		errno = te;
491 	}
492 #endif
493 	if (tfd == -1) {
494 		file_error(ms, errno,
495 		    "cannot create temporary file for pipe copy");
496 		return -1;
497 	}
498 
499 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
500 		r = 1;
501 	else {
502 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
503 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
504 				break;
505 	}
506 
507 	switch (r) {
508 	case -1:
509 		file_error(ms, errno, "error copying from pipe to temp file");
510 		return -1;
511 	case 0:
512 		break;
513 	default:
514 		file_error(ms, errno, "error while writing to temp file");
515 		return -1;
516 	}
517 
518 	/*
519 	 * We duplicate the file descriptor, because fclose on a
520 	 * tmpfile will delete the file, but any open descriptors
521 	 * can still access the phantom inode.
522 	 */
523 	if ((fd = dup2(tfd, fd)) == -1) {
524 		file_error(ms, errno, "could not dup descriptor for temp file");
525 		return -1;
526 	}
527 	(void)close(tfd);
528 	if (FINFO_LSEEK_FUNC(fd, (zend_off_t)0, SEEK_SET) == (zend_off_t)-1) {
529 		file_badseek(ms);
530 		return -1;
531 	}
532 	return fd;
533 }
534 #ifdef PHP_FILEINFO_UNCOMPRESS
535 #ifdef BUILTIN_DECOMPRESS
536 
537 #define FHCRC		(1 << 1)
538 #define FEXTRA		(1 << 2)
539 #define FNAME		(1 << 3)
540 #define FCOMMENT	(1 << 4)
541 
542 
543 private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n)544 uncompressgzipped(const unsigned char *old, unsigned char **newch,
545     size_t bytes_max, size_t *n)
546 {
547 	unsigned char flg = old[3];
548 	size_t data_start = 10;
549 
550 	if (flg & FEXTRA) {
551 		if (data_start + 1 >= *n)
552 			goto err;
553 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
554 	}
555 	if (flg & FNAME) {
556 		while(data_start < *n && old[data_start])
557 			data_start++;
558 		data_start++;
559 	}
560 	if (flg & FCOMMENT) {
561 		while(data_start < *n && old[data_start])
562 			data_start++;
563 		data_start++;
564 	}
565 	if (flg & FHCRC)
566 		data_start += 2;
567 
568 	if (data_start >= *n)
569 		goto err;
570 
571 	*n -= data_start;
572 	old += data_start;
573 	return uncompresszlib(old, newch, bytes_max, n, 0);
574 err:
575 	return makeerror(newch, n, "File too short");
576 }
577 
578 private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)579 uncompresszlib(const unsigned char *old, unsigned char **newch,
580     size_t bytes_max, size_t *n, int zlib)
581 {
582 	int rc;
583 	z_stream z;
584 
585 	if ((*newch = CAST(unsigned char *, emalloc(bytes_max + 1))) == NULL)
586 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
587 
588 	z.next_in = CCAST(Bytef *, old);
589 	z.avail_in = CAST(uint32_t, *n);
590 	z.next_out = *newch;
591 	z.avail_out = CAST(unsigned int, bytes_max);
592 	z.zalloc = Z_NULL;
593 	z.zfree = Z_NULL;
594 	z.opaque = Z_NULL;
595 
596 	/* LINTED bug in header macro */
597 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
598 	if (rc != Z_OK)
599 		goto err;
600 
601 	rc = inflate(&z, Z_SYNC_FLUSH);
602 	if (rc != Z_OK && rc != Z_STREAM_END)
603 		goto err;
604 
605 	*n = CAST(size_t, z.total_out);
606 	rc = inflateEnd(&z);
607 	if (rc != Z_OK)
608 		goto err;
609 
610 	/* let's keep the nul-terminate tradition */
611 	(*newch)[*n] = '\0';
612 
613 	return OKDATA;
614 err:
615 	strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
616 	*n = strlen(RCAST(char *, *newch));
617 	return ERRDATA;
618 }
619 #endif
620 
621 #ifdef BUILTIN_BZLIB
622 private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n)623 uncompressbzlib(const unsigned char *old, unsigned char **newch,
624     size_t bytes_max, size_t *n)
625 {
626 	int rc;
627 	bz_stream bz;
628 
629 	memset(&bz, 0, sizeof(bz));
630 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
631 	if (rc != BZ_OK)
632 		goto err;
633 
634 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
635 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
636 
637 	bz.next_in = CCAST(char *, RCAST(const char *, old));
638 	bz.avail_in = CAST(uint32_t, *n);
639 	bz.next_out = RCAST(char *, *newch);
640 	bz.avail_out = CAST(unsigned int, bytes_max);
641 
642 	rc = BZ2_bzDecompress(&bz);
643 	if (rc != BZ_OK && rc != BZ_STREAM_END)
644 		goto err;
645 
646 	/* Assume byte_max is within 32bit */
647 	/* assert(bz.total_out_hi32 == 0); */
648 	*n = CAST(size_t, bz.total_out_lo32);
649 	rc = BZ2_bzDecompressEnd(&bz);
650 	if (rc != BZ_OK)
651 		goto err;
652 
653 	/* let's keep the nul-terminate tradition */
654 	(*newch)[*n] = '\0';
655 
656 	return OKDATA;
657 err:
658 	snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
659 	*n = strlen(RCAST(char *, *newch));
660 	return ERRDATA;
661 }
662 #endif
663 
664 #ifdef BUILTIN_XZLIB
665 private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n)666 uncompressxzlib(const unsigned char *old, unsigned char **newch,
667     size_t bytes_max, size_t *n)
668 {
669 	int rc;
670 	lzma_stream xz;
671 
672 	memset(&xz, 0, sizeof(xz));
673 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
674 	if (rc != LZMA_OK)
675 		goto err;
676 
677 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
678 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
679 
680 	xz.next_in = CCAST(const uint8_t *, old);
681 	xz.avail_in = CAST(uint32_t, *n);
682 	xz.next_out = RCAST(uint8_t *, *newch);
683 	xz.avail_out = CAST(unsigned int, bytes_max);
684 
685 	rc = lzma_code(&xz, LZMA_RUN);
686 	if (rc != LZMA_OK && rc != LZMA_STREAM_END)
687 		goto err;
688 
689 	*n = CAST(size_t, xz.total_out);
690 
691 	lzma_end(&xz);
692 
693 	/* let's keep the nul-terminate tradition */
694 	(*newch)[*n] = '\0';
695 
696 	return OKDATA;
697 err:
698 	snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
699 	*n = strlen(RCAST(char *, *newch));
700 	return ERRDATA;
701 }
702 #endif
703 
704 
705 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)706 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
707 {
708 	char *msg;
709 	va_list ap;
710 	int rv;
711 
712 	va_start(ap, fmt);
713 	rv = vasprintf(&msg, fmt, ap);
714 	va_end(ap);
715 	if (rv < 0) {
716 		*buf = NULL;
717 		*len = 0;
718 		return NODATA;
719 	}
720 	*buf = RCAST(unsigned char *, msg);
721 	*len = strlen(msg);
722 	return ERRDATA;
723 }
724 
725 static void
closefd(int * fd,size_t i)726 closefd(int *fd, size_t i)
727 {
728 	if (fd[i] == -1)
729 		return;
730 	(void) close(fd[i]);
731 	fd[i] = -1;
732 }
733 
734 static void
closep(int * fd)735 closep(int *fd)
736 {
737 	size_t i;
738 	for (i = 0; i < 2; i++)
739 		closefd(fd, i);
740 }
741 
742 static void
movedesc(void * v,int i,int fd)743 movedesc(void *v, int i, int fd)
744 {
745 	if (fd == i)
746 		return; /* "no dup was necessary" */
747 #ifdef HAVE_POSIX_SPAWNP
748 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
749 	posix_spawn_file_actions_adddup2(fa, fd, i);
750 	posix_spawn_file_actions_addclose(fa, fd);
751 #else
752 	if (dup2(fd, i) == -1) {
753 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
754 		exit(1);
755 	}
756 	close(v ? fd : fd);
757 #endif
758 }
759 
760 static void
closedesc(void * v,int fd)761 closedesc(void *v, int fd)
762 {
763 #ifdef HAVE_POSIX_SPAWNP
764 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
765 	posix_spawn_file_actions_addclose(fa, fd);
766 #else
767 	close(v ? fd : fd);
768 #endif
769 }
770 
771 static void
handledesc(void * v,int fd,int fdp[3][2])772 handledesc(void *v, int fd, int fdp[3][2])
773 {
774 	if (fd != -1) {
775 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
776 		movedesc(v, STDIN_FILENO, fd);
777 	} else {
778 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
779 		if (fdp[STDIN_FILENO][1] > 2)
780 		    closedesc(v, fdp[STDIN_FILENO][1]);
781 	}
782 
783 	file_clear_closexec(STDIN_FILENO);
784 
785 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
786 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
787 	if (fdp[STDOUT_FILENO][0] > 2)
788 		closedesc(v, fdp[STDOUT_FILENO][0]);
789 
790 	file_clear_closexec(STDOUT_FILENO);
791 
792 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
793 	if (fdp[STDERR_FILENO][0] > 2)
794 		closedesc(v, fdp[STDERR_FILENO][0]);
795 
796 	file_clear_closexec(STDERR_FILENO);
797 }
798 
799 static pid_t
writechild(int fd,const void * old,size_t n)800 writechild(int fd, const void *old, size_t n)
801 {
802 	pid_t pid;
803 
804 	/*
805 	 * fork again, to avoid blocking because both
806 	 * pipes filled
807 	 */
808 	pid = fork();
809 	if (pid == -1) {
810 		DPRINTF("Fork failed (%s)\n", strerror(errno));
811 		exit(1);
812 	}
813 	if (pid == 0) {
814 		/* child */
815 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
816 			DPRINTF("Write failed (%s)\n", strerror(errno));
817 			exit(1);
818 		}
819 		exit(0);
820 	}
821 	/* parent */
822 	return pid;
823 }
824 
825 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)826 filter_error(unsigned char *ubuf, ssize_t n)
827 {
828 	char *p;
829 	char *buf;
830 
831 	ubuf[n] = '\0';
832 	buf = RCAST(char *, ubuf);
833 	while (isspace(CAST(unsigned char, *buf)))
834 		buf++;
835 	DPRINTF("Filter error[[[%s]]]\n", buf);
836 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
837 		*p = '\0';
838 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
839 		*p = '\0';
840 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
841 		++p;
842 		while (isspace(CAST(unsigned char, *p)))
843 			p++;
844 		n = strlen(p);
845 		memmove(ubuf, p, CAST(size_t, n + 1));
846 	}
847 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
848 	if (islower(*ubuf))
849 		*ubuf = toupper(*ubuf);
850 	return n;
851 }
852 
853 private const char *
methodname(size_t method)854 methodname(size_t method)
855 {
856 	switch (method) {
857 #ifdef BUILTIN_DECOMPRESS
858 	case METH_FROZEN:
859 	case METH_ZLIB:
860 		return "zlib";
861 #endif
862 #ifdef BUILTIN_BZLIB
863 	case METH_BZIP:
864 		return "bzlib";
865 #endif
866 #ifdef BUILTIN_XZLIB
867 	case METH_XZ:
868 	case METH_LZMA:
869 		return "xzlib";
870 #endif
871 	default:
872 		return compr[method].argv[0];
873 	}
874 }
875 
876 private int
uncompressbuf(int fd,size_t bytes_max,size_t method,const unsigned char * old,unsigned char ** newch,size_t * n)877 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
878     unsigned char **newch, size_t* n)
879 {
880 	int fdp[3][2];
881 	int status, rv, w;
882 	pid_t pid;
883 	pid_t writepid = -1;
884 	size_t i;
885 	ssize_t r;
886 	char *const *args;
887 #ifdef HAVE_POSIX_SPAWNP
888 	posix_spawn_file_actions_t fa;
889 #endif
890 
891 	switch (method) {
892 #ifdef BUILTIN_DECOMPRESS
893 	case METH_FROZEN:
894 		return uncompressgzipped(old, newch, bytes_max, n);
895 	case METH_ZLIB:
896 		return uncompresszlib(old, newch, bytes_max, n, 1);
897 #endif
898 #ifdef BUILTIN_BZLIB
899 	case METH_BZIP:
900 		return uncompressbzlib(old, newch, bytes_max, n);
901 #endif
902 #ifdef BUILTIN_XZLIB
903 	case METH_XZ:
904 	case METH_LZMA:
905 		return uncompressxzlib(old, newch, bytes_max, n);
906 #endif
907 	default:
908 		break;
909 	}
910 
911 	(void)fflush(stdout);
912 	(void)fflush(stderr);
913 
914 	for (i = 0; i < __arraycount(fdp); i++)
915 		fdp[i][0] = fdp[i][1] = -1;
916 
917 	/*
918 	 * There are multithreaded users who run magic_file()
919 	 * from dozens of threads. If two parallel magic_file() calls
920 	 * analyze two large compressed files, both will spawn
921 	 * an uncompressing child here, which writes out uncompressed data.
922 	 * We read some portion, then close the pipe, then waitpid() the child.
923 	 * If uncompressed data is larger, child shound get EPIPE and exit.
924 	 * However, with *parallel* calls OTHER child may unintentionally
925 	 * inherit pipe fds, thus keeping pipe open and making writes in
926 	 * our child block instead of failing with EPIPE!
927 	 * (For the bug to occur, two threads must mutually inherit their pipes,
928 	 * and both must have large outputs. Thus it happens not that often).
929 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
930 	 */
931 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
932 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
933 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
934 		closep(fdp[STDIN_FILENO]);
935 		closep(fdp[STDOUT_FILENO]);
936 		return makeerror(newch, n, "Cannot create pipe, %s",
937 		    strerror(errno));
938 	}
939 
940 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
941 #ifdef HAVE_POSIX_SPAWNP
942 	posix_spawn_file_actions_init(&fa);
943 
944 	handledesc(&fa, fd, fdp);
945 
946 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
947 	    args, NULL);
948 
949 	posix_spawn_file_actions_destroy(&fa);
950 
951 	if (status == -1) {
952 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
953 		    compr[method].argv[0], strerror(errno));
954 	}
955 #else
956 	/* For processes with large mapped virtual sizes, vfork
957 	 * may be _much_ faster (10-100 times) than fork.
958 	 */
959 	pid = vfork();
960 	if (pid == -1) {
961 		return makeerror(newch, n, "Cannot vfork, %s",
962 		    strerror(errno));
963 	}
964 	if (pid == 0) {
965 		/* child */
966 		/* Note: we are after vfork, do not modify memory
967 		 * in a way which confuses parent. In particular,
968 		 * do not modify fdp[i][j].
969 		 */
970 		handledesc(NULL, fd, fdp);
971 
972 		(void)execvp(compr[method].argv[0], args);
973 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
974 		    compr[method].argv[0], strerror(errno));
975 		_exit(1); /* _exit(), not exit(), because of vfork */
976 	}
977 #endif
978 	/* parent */
979 	/* Close write sides of child stdout/err pipes */
980 	for (i = 1; i < __arraycount(fdp); i++)
981 		closefd(fdp[i], 1);
982 	/* Write the buffer data to child stdin, if we don't have fd */
983 	if (fd == -1) {
984 		closefd(fdp[STDIN_FILENO], 0);
985 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
986 		closefd(fdp[STDIN_FILENO], 1);
987 	}
988 
989 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
990 	if (*newch == NULL) {
991 		rv = makeerror(newch, n, "No buffer, %s",
992 		    strerror(errno));
993 		goto err;
994 	}
995 	rv = OKDATA;
996 	errno = 0;
997 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
998 	if (r == 0 && errno == 0)
999 		goto ok;
1000 	if (r <= 0) {
1001 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1002 		    r != -1 ? strerror(errno) : "no data");
1003 
1004 		rv = ERRDATA;
1005 		if (r == 0 &&
1006 		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
1007 		{
1008 			r = filter_error(*newch, r);
1009 			goto ok;
1010 		}
1011 		free(*newch);
1012 		if  (r == 0)
1013 			rv = makeerror(newch, n, "Read failed, %s",
1014 			    strerror(errno));
1015 		else
1016 			rv = makeerror(newch, n, "No data");
1017 		goto err;
1018 	}
1019 ok:
1020 	*n = r;
1021 	/* NUL terminate, as every buffer is handled here. */
1022 	(*newch)[*n] = '\0';
1023 err:
1024 	closefd(fdp[STDIN_FILENO], 1);
1025 	closefd(fdp[STDOUT_FILENO], 0);
1026 	closefd(fdp[STDERR_FILENO], 0);
1027 
1028 	w = waitpid(pid, &status, 0);
1029 wait_err:
1030 	if (w == -1) {
1031 		free(*newch);
1032 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1033 		DPRINTF("Child wait return %#x\n", status);
1034 	} else if (!WIFEXITED(status)) {
1035 		DPRINTF("Child not exited (%#x)\n", status);
1036 	} else if (WEXITSTATUS(status) != 0) {
1037 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1038 	}
1039 	if (writepid > 0) {
1040 		/* _After_ we know decompressor has exited, our input writer
1041 		 * definitely will exit now (at worst, writing fails in it,
1042 		 * since output fd is closed now on the reading size).
1043 		 */
1044 		w = waitpid(writepid, &status, 0);
1045 		writepid = -1;
1046 		goto wait_err;
1047 	}
1048 
1049 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1050 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1051 
1052 	return rv;
1053 }
1054 #endif
1055 #endif
1056