xref: /PHP-8.1/ext/fileinfo/libmagic/compress.c (revision 3b9173dc)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * compress routines:
30  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31  *		   information if recognized
32  *	uncompress(method, old, n, newch) - uncompress old into new,
33  *					    using method, return sizeof new
34  */
35 #include "file.h"
36 
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
39 #endif
40 
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #include <string.h>
47 #include <errno.h>
48 #include <ctype.h>
49 #include <stdarg.h>
50 #include <signal.h>
51 #ifndef HAVE_SIG_T
52 typedef void (*sig_t)(int);
53 #endif /* HAVE_SIG_T */
54 #ifndef PHP_WIN32
55 #include <sys/ioctl.h>
56 #endif
57 #ifdef HAVE_SYS_WAIT_H
58 #include <sys/wait.h>
59 #endif
60 #if defined(HAVE_SYS_TIME_H)
61 #include <sys/time.h>
62 #endif
63 #if defined(HAVE_ZLIB_H) && defined(PHP_FILEINFO_UNCOMPRESS)
64 #define BUILTIN_DECOMPRESS
65 #include <zlib.h>
66 #endif
67 
68 #undef FIONREAD
69 
70 #if defined(PHP_FILEINFO_UNCOMPRESS)
71 #define BUILTIN_BZLIB
72 #include <bzlib.h>
73 #endif
74 
75 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
76 #define BUILTIN_XZLIB
77 #include <lzma.h>
78 #endif
79 
80 #ifdef DEBUG
81 int tty = -1;
82 #define DPRINTF(...)	do { \
83 	if (tty == -1) \
84 		tty = open("/dev/tty", O_RDWR); \
85 	if (tty == -1) \
86 		abort(); \
87 	dprintf(tty, __VA_ARGS__); \
88 } while (/*CONSTCOND*/0)
89 #else
90 #define DPRINTF(...)
91 #endif
92 
93 #ifdef ZLIBSUPPORT
94 /*
95  * The following python code is not really used because ZLIBSUPPORT is only
96  * defined if we have a built-in zlib, and the built-in zlib handles that.
97  * That is not true for android where we have zlib.h and not -lz.
98  */
99 static const char zlibcode[] =
100     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
101 
102 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
103 
104 static int
zlibcmp(const unsigned char * buf)105 zlibcmp(const unsigned char *buf)
106 {
107 	unsigned short x = 1;
108 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
109 
110 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
111 		return 0;
112 	if (s[0] != 1)	/* endianness test */
113 		x = buf[0] | (buf[1] << 8);
114 	else
115 		x = buf[1] | (buf[0] << 8);
116 	if (x % 31)
117 		return 0;
118 	return 1;
119 }
120 #endif
121 
122 #ifdef PHP_FILEINFO_UNCOMPRESS
123 
124 static int
lzmacmp(const unsigned char * buf)125 lzmacmp(const unsigned char *buf)
126 {
127 	if (buf[0] != 0x5d || buf[1] || buf[2])
128 		return 0;
129 	if (buf[12] && buf[12] != 0xff)
130 		return 0;
131 	return 1;
132 }
133 
134 #define gzip_flags "-cd"
135 #define lrzip_flags "-do"
136 #define lzip_flags gzip_flags
137 
138 static const char *gzip_args[] = {
139 	"gzip", gzip_flags, NULL
140 };
141 static const char *uncompress_args[] = {
142 	"uncompress", "-c", NULL
143 };
144 static const char *bzip2_args[] = {
145 	"bzip2", "-cd", NULL
146 };
147 static const char *lzip_args[] = {
148 	"lzip", lzip_flags, NULL
149 };
150 static const char *xz_args[] = {
151 	"xz", "-cd", NULL
152 };
153 static const char *lrzip_args[] = {
154 	"lrzip", lrzip_flags, NULL
155 };
156 static const char *lz4_args[] = {
157 	"lz4", "-cd", NULL
158 };
159 static const char *zstd_args[] = {
160 	"zstd", "-cd", NULL
161 };
162 
163 #define	do_zlib		NULL
164 #define	do_bzlib	NULL
165 
166 private const struct {
167 	union {
168 		const char *magic;
169 		int (*func)(const unsigned char *);
170 	} u;
171 	int maglen;
172 	const char **argv;
173 	void *unused;
174 } compr[] = {
175 #define METH_FROZEN	2
176 #define METH_BZIP	7
177 #define METH_XZ		9
178 #define METH_LZMA	13
179 #define METH_ZLIB	14
180     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
181     /* Uncompress can get stuck; so use gzip first if we have it
182      * Idea from Damien Clark, thanks! */
183     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
184     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
185     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
186     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
187     /* the standard pack utilities do not accept standard input */
188     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
189     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
190     /* ...only first file examined */
191     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
192     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
193     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
194     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
195     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
196     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
197     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
198 #ifdef ZLIBSUPPORT
199     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
200 #endif
201 };
202 
203 #define OKDATA 	0
204 #define NODATA	1
205 #define ERRDATA	2
206 
207 private ssize_t swrite(int, const void *, size_t);
208 #if HAVE_FORK
209 private size_t ncompr = __arraycount(compr);
210 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
211     unsigned char **, size_t *);
212 #ifdef BUILTIN_DECOMPRESS
213 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
214     size_t *, int);
215 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
216     size_t *);
217 #endif
218 #ifdef BUILTIN_BZLIB
219 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
220     size_t *);
221 #endif
222 #ifdef BUILTIN_XZLIB
223 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
224     size_t *);
225 #endif
226 
227 static int makeerror(unsigned char **, size_t *, const char *, ...);
228 private const char *methodname(size_t);
229 
230 private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)231 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
232 {
233 	unsigned char *p;
234 	int mime = ms->flags & MAGIC_MIME;
235 
236 	if (!mime)
237 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
238 
239 	for (p = buf; *p; p++)
240 		if (!isalnum(*p))
241 			*p = '-';
242 
243 	return file_printf(ms, "application/x-decompression-error-%s-%s",
244 	    methodname(i), buf);
245 }
246 
247 protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)248 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
249 {
250 	unsigned char *newbuf = NULL;
251 	size_t i, nsz;
252 	char *rbuf;
253 	file_pushbuf_t *pb;
254 	int urv, prv, rv = 0;
255 	int mime = ms->flags & MAGIC_MIME;
256 	int fd = b->fd;
257 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
258 	size_t nbytes = b->flen;
259 	int sa_saved = 0;
260 	struct sigaction sig_act;
261 
262 	if ((ms->flags & MAGIC_COMPRESS) == 0)
263 		return 0;
264 
265 	for (i = 0; i < ncompr; i++) {
266 		int zm;
267 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
268 			continue;
269 		if (compr[i].maglen < 0) {
270 			zm = (*compr[i].u.func)(buf);
271 		} else {
272 			zm = memcmp(buf, compr[i].u.magic,
273 			    CAST(size_t, compr[i].maglen)) == 0;
274 		}
275 
276 		if (!zm)
277 			continue;
278 
279 		/* Prevent SIGPIPE death if child dies unexpectedly */
280 		if (!sa_saved) {
281 			//We can use sig_act for both new and old, but
282 			struct sigaction new_act;
283 			memset(&new_act, 0, sizeof(new_act));
284 			new_act.sa_handler = SIG_IGN;
285 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
286 		}
287 
288 		nsz = nbytes;
289 		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
290 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
291 		    (char *)newbuf, nsz);
292 		switch (urv) {
293 		case OKDATA:
294 		case ERRDATA:
295 			ms->flags &= ~MAGIC_COMPRESS;
296 			if (urv == ERRDATA)
297 				prv = format_decompression_error(ms, i, newbuf);
298 			else
299 				prv = file_buffer(ms, NULL, NULL, name, newbuf, nsz);
300 			if (prv == -1)
301 				goto error;
302 			rv = 1;
303 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
304 				goto out;
305 			if (mime != MAGIC_MIME && mime != 0)
306 				goto out;
307 			if ((file_printf(ms,
308 			    mime ? " compressed-encoding=" : " (")) == -1)
309 				goto error;
310 			if ((pb = file_push_buffer(ms)) == NULL)
311 				goto error;
312 			/*
313 			 * XXX: If file_buffer fails here, we overwrite
314 			 * the compressed text. FIXME.
315 			 */
316 			if (file_buffer(ms, NULL, NULL, NULL, buf, nbytes) == -1) {
317 				if (file_pop_buffer(ms, pb) != NULL)
318 					abort();
319 				goto error;
320 			}
321 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
322 				if (file_printf(ms, "%s", rbuf) == -1) {
323 					efree(rbuf);
324 					goto error;
325 				}
326 				efree(rbuf);
327 			}
328 			if (!mime && file_printf(ms, ")") == -1)
329 				goto error;
330 			/*FALLTHROUGH*/
331 		case NODATA:
332 			break;
333 		default:
334 			abort();
335 			/*NOTREACHED*/
336 		error:
337 			rv = -1;
338 			break;
339 		}
340 	}
341 out:
342 	DPRINTF("rv = %d\n", rv);
343 
344 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
345 		(void)sigaction(SIGPIPE, &sig_act, NULL);
346 
347 	if (newbuf)
348 		efree(newbuf);
349 	ms->flags |= MAGIC_COMPRESS;
350 	DPRINTF("Zmagic returns %d\n", rv);
351 	return rv;
352 }
353 #endif
354 /*
355  * `safe' write for sockets and pipes.
356  */
357 private ssize_t
swrite(int fd,const void * buf,size_t n)358 swrite(int fd, const void *buf, size_t n)
359 {
360 	ssize_t rv;
361 	size_t rn = n;
362 
363 	do
364 		switch (rv = write(fd, buf, n)) {
365 		case -1:
366 			if (errno == EINTR)
367 				continue;
368 			return -1;
369 		default:
370 			n -= rv;
371 			buf = CAST(const char *, buf) + rv;
372 			break;
373 		}
374 	while (n > 0);
375 	return rn;
376 }
377 
378 
379 /*
380  * `safe' read for sockets and pipes.
381  */
382 protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)383 sread(int fd, void *buf, size_t n, int canbepipe)
384 {
385 	ssize_t rv;
386 #ifdef FIONREAD
387 	int t = 0;
388 #endif
389 	size_t rn = n;
390 
391 	if (fd == STDIN_FILENO)
392 		goto nocheck;
393 
394 #ifdef FIONREAD
395 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
396 #ifdef FD_ZERO
397 		ssize_t cnt;
398 		for (cnt = 0;; cnt++) {
399 			fd_set check;
400 			struct timeval tout = {0, 100 * 1000};
401 			int selrv;
402 
403 			FD_ZERO(&check);
404 			FD_SET(fd, &check);
405 
406 			/*
407 			 * Avoid soft deadlock: do not read if there
408 			 * is nothing to read from sockets and pipes.
409 			 */
410 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
411 			if (selrv == -1) {
412 				if (errno == EINTR || errno == EAGAIN)
413 					continue;
414 			} else if (selrv == 0 && cnt >= 5) {
415 				return 0;
416 			} else
417 				break;
418 		}
419 #endif
420 		(void)ioctl(fd, FIONREAD, &t);
421 	}
422 
423 	if (t > 0 && CAST(size_t, t) < n) {
424 		n = t;
425 		rn = n;
426 	}
427 #endif
428 
429 nocheck:
430 	do
431 		switch ((rv = FINFO_READ_FUNC(fd, buf, n))) {
432 		case -1:
433 			if (errno == EINTR)
434 				continue;
435 			return -1;
436 		case 0:
437 			return rn - n;
438 		default:
439 			n -= rv;
440 			buf = CAST(char *, CCAST(void *, buf)) + rv;
441 			break;
442 		}
443 	while (n > 0);
444 	return rn;
445 }
446 
447 protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)448 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
449     size_t nbytes)
450 {
451 	char buf[4096];
452 	ssize_t r;
453 	int tfd;
454 
455 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
456 #ifndef HAVE_MKSTEMP
457 	{
458 		char *ptr = mktemp(buf);
459 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
460 		r = errno;
461 		(void)unlink(ptr);
462 		errno = r;
463 	}
464 #else
465 	{
466 		int te;
467 		mode_t ou = umask(0);
468 		tfd = mkstemp(buf);
469 		(void)umask(ou);
470 		te = errno;
471 		(void)unlink(buf);
472 		errno = te;
473 	}
474 #endif
475 	if (tfd == -1) {
476 		file_error(ms, errno,
477 		    "cannot create temporary file for pipe copy");
478 		return -1;
479 	}
480 
481 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
482 		r = 1;
483 	else {
484 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
485 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
486 				break;
487 	}
488 
489 	switch (r) {
490 	case -1:
491 		file_error(ms, errno, "error copying from pipe to temp file");
492 		return -1;
493 	case 0:
494 		break;
495 	default:
496 		file_error(ms, errno, "error while writing to temp file");
497 		return -1;
498 	}
499 
500 	/*
501 	 * We duplicate the file descriptor, because fclose on a
502 	 * tmpfile will delete the file, but any open descriptors
503 	 * can still access the phantom inode.
504 	 */
505 	if ((fd = dup2(tfd, fd)) == -1) {
506 		file_error(ms, errno, "could not dup descriptor for temp file");
507 		return -1;
508 	}
509 	(void)close(tfd);
510 	if (FINFO_LSEEK_FUNC(fd, (zend_off_t)0, SEEK_SET) == (zend_off_t)-1) {
511 		file_badseek(ms);
512 		return -1;
513 	}
514 	return fd;
515 }
516 #ifdef PHP_FILEINFO_UNCOMPRESS
517 #ifdef BUILTIN_DECOMPRESS
518 
519 #define FHCRC		(1 << 1)
520 #define FEXTRA		(1 << 2)
521 #define FNAME		(1 << 3)
522 #define FCOMMENT	(1 << 4)
523 
524 
525 private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n)526 uncompressgzipped(const unsigned char *old, unsigned char **newch,
527     size_t bytes_max, size_t *n)
528 {
529 	unsigned char flg = old[3];
530 	size_t data_start = 10;
531 
532 	if (flg & FEXTRA) {
533 		if (data_start + 1 >= *n)
534 			goto err;
535 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
536 	}
537 	if (flg & FNAME) {
538 		while(data_start < *n && old[data_start])
539 			data_start++;
540 		data_start++;
541 	}
542 	if (flg & FCOMMENT) {
543 		while(data_start < *n && old[data_start])
544 			data_start++;
545 		data_start++;
546 	}
547 	if (flg & FHCRC)
548 		data_start += 2;
549 
550 	if (data_start >= *n)
551 		goto err;
552 
553 	*n -= data_start;
554 	old += data_start;
555 	return uncompresszlib(old, newch, bytes_max, n, 0);
556 err:
557 	return makeerror(newch, n, "File too short");
558 }
559 
560 private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)561 uncompresszlib(const unsigned char *old, unsigned char **newch,
562     size_t bytes_max, size_t *n, int zlib)
563 {
564 	int rc;
565 	z_stream z;
566 
567 	if ((*newch = CAST(unsigned char *, emalloc(bytes_max + 1))) == NULL)
568 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
569 
570 	z.next_in = CCAST(Bytef *, old);
571 	z.avail_in = CAST(uint32_t, *n);
572 	z.next_out = *newch;
573 	z.avail_out = CAST(unsigned int, bytes_max);
574 	z.zalloc = Z_NULL;
575 	z.zfree = Z_NULL;
576 	z.opaque = Z_NULL;
577 
578 	/* LINTED bug in header macro */
579 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
580 	if (rc != Z_OK)
581 		goto err;
582 
583 	rc = inflate(&z, Z_SYNC_FLUSH);
584 	if (rc != Z_OK && rc != Z_STREAM_END)
585 		goto err;
586 
587 	*n = CAST(size_t, z.total_out);
588 	rc = inflateEnd(&z);
589 	if (rc != Z_OK)
590 		goto err;
591 
592 	/* let's keep the nul-terminate tradition */
593 	(*newch)[*n] = '\0';
594 
595 	return OKDATA;
596 err:
597 	strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
598 	*n = strlen(RCAST(char *, *newch));
599 	return ERRDATA;
600 }
601 #endif
602 
603 #ifdef BUILTIN_BZLIB
604 private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n)605 uncompressbzlib(const unsigned char *old, unsigned char **newch,
606     size_t bytes_max, size_t *n)
607 {
608 	int rc;
609 	bz_stream bz;
610 
611 	memset(&bz, 0, sizeof(bz));
612 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
613 	if (rc != BZ_OK)
614 		goto err;
615 
616 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
617 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
618 
619 	bz.next_in = CCAST(char *, RCAST(const char *, old));
620 	bz.avail_in = CAST(uint32_t, *n);
621 	bz.next_out = RCAST(char *, *newch);
622 	bz.avail_out = CAST(unsigned int, bytes_max);
623 
624 	rc = BZ2_bzDecompress(&bz);
625 	if (rc != BZ_OK && rc != BZ_STREAM_END)
626 		goto err;
627 
628 	/* Assume byte_max is within 32bit */
629 	/* assert(bz.total_out_hi32 == 0); */
630 	*n = CAST(size_t, bz.total_out_lo32);
631 	rc = BZ2_bzDecompressEnd(&bz);
632 	if (rc != BZ_OK)
633 		goto err;
634 
635 	/* let's keep the nul-terminate tradition */
636 	(*newch)[*n] = '\0';
637 
638 	return OKDATA;
639 err:
640 	snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
641 	*n = strlen(RCAST(char *, *newch));
642 	return ERRDATA;
643 }
644 #endif
645 
646 #ifdef BUILTIN_XZLIB
647 private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n)648 uncompressxzlib(const unsigned char *old, unsigned char **newch,
649     size_t bytes_max, size_t *n)
650 {
651 	int rc;
652 	lzma_stream xz;
653 
654 	memset(&xz, 0, sizeof(xz));
655 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
656 	if (rc != LZMA_OK)
657 		goto err;
658 
659 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
660 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
661 
662 	xz.next_in = CCAST(const uint8_t *, old);
663 	xz.avail_in = CAST(uint32_t, *n);
664 	xz.next_out = RCAST(uint8_t *, *newch);
665 	xz.avail_out = CAST(unsigned int, bytes_max);
666 
667 	rc = lzma_code(&xz, LZMA_RUN);
668 	if (rc != LZMA_OK && rc != LZMA_STREAM_END)
669 		goto err;
670 
671 	*n = CAST(size_t, xz.total_out);
672 
673 	lzma_end(&xz);
674 
675 	/* let's keep the nul-terminate tradition */
676 	(*newch)[*n] = '\0';
677 
678 	return OKDATA;
679 err:
680 	snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
681 	*n = strlen(RCAST(char *, *newch));
682 	return ERRDATA;
683 }
684 #endif
685 
686 
687 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)688 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
689 {
690 	char *msg;
691 	va_list ap;
692 	int rv;
693 
694 	va_start(ap, fmt);
695 	rv = vasprintf(&msg, fmt, ap);
696 	va_end(ap);
697 	if (rv < 0) {
698 		*buf = NULL;
699 		*len = 0;
700 		return NODATA;
701 	}
702 	*buf = RCAST(unsigned char *, msg);
703 	*len = strlen(msg);
704 	return ERRDATA;
705 }
706 
707 static void
closefd(int * fd,size_t i)708 closefd(int *fd, size_t i)
709 {
710 	if (fd[i] == -1)
711 		return;
712 	(void) close(fd[i]);
713 	fd[i] = -1;
714 }
715 
716 static void
closep(int * fd)717 closep(int *fd)
718 {
719 	size_t i;
720 	for (i = 0; i < 2; i++)
721 		closefd(fd, i);
722 }
723 
724 static int
copydesc(int i,int fd)725 copydesc(int i, int fd)
726 {
727 	if (fd == i)
728 		return 0; /* "no dup was necessary" */
729 	if (dup2(fd, i) == -1) {
730 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
731 		exit(1);
732 	}
733 	return 1;
734 }
735 
736 static pid_t
writechild(int fd,const void * old,size_t n)737 writechild(int fd, const void *old, size_t n)
738 {
739 	pid_t pid;
740 
741 	/*
742 	 * fork again, to avoid blocking because both
743 	 * pipes filled
744 	 */
745 	pid = fork();
746 	if (pid == -1) {
747 		DPRINTF("Fork failed (%s)\n", strerror(errno));
748 		exit(1);
749 	}
750 	if (pid == 0) {
751 		/* child */
752 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
753 			DPRINTF("Write failed (%s)\n", strerror(errno));
754 			exit(1);
755 		}
756 		exit(0);
757 	}
758 	/* parent */
759 	return pid;
760 }
761 
762 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)763 filter_error(unsigned char *ubuf, ssize_t n)
764 {
765 	char *p;
766 	char *buf;
767 
768 	ubuf[n] = '\0';
769 	buf = RCAST(char *, ubuf);
770 	while (isspace(CAST(unsigned char, *buf)))
771 		buf++;
772 	DPRINTF("Filter error[[[%s]]]\n", buf);
773 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
774 		*p = '\0';
775 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
776 		*p = '\0';
777 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
778 		++p;
779 		while (isspace(CAST(unsigned char, *p)))
780 			p++;
781 		n = strlen(p);
782 		memmove(ubuf, p, CAST(size_t, n + 1));
783 	}
784 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
785 	if (islower(*ubuf))
786 		*ubuf = toupper(*ubuf);
787 	return n;
788 }
789 
790 private const char *
methodname(size_t method)791 methodname(size_t method)
792 {
793 	switch (method) {
794 #ifdef BUILTIN_DECOMPRESS
795 	case METH_FROZEN:
796 	case METH_ZLIB:
797 		return "zlib";
798 #endif
799 #ifdef BUILTIN_BZLIB
800 	case METH_BZIP:
801 		return "bzlib";
802 #endif
803 #ifdef BUILTIN_XZLIB
804 	case METH_XZ:
805 	case METH_LZMA:
806 		return "xzlib";
807 #endif
808 	default:
809 		return compr[method].argv[0];
810 	}
811 }
812 
813 private int
uncompressbuf(int fd,size_t bytes_max,size_t method,const unsigned char * old,unsigned char ** newch,size_t * n)814 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
815     unsigned char **newch, size_t* n)
816 {
817 	int fdp[3][2];
818 	int status, rv, w;
819 	pid_t pid;
820 	pid_t writepid = -1;
821 	size_t i;
822 	ssize_t r;
823 
824 	switch (method) {
825 #ifdef BUILTIN_DECOMPRESS
826 	case METH_FROZEN:
827 		return uncompressgzipped(old, newch, bytes_max, n);
828 	case METH_ZLIB:
829 		return uncompresszlib(old, newch, bytes_max, n, 1);
830 #endif
831 #ifdef BUILTIN_BZLIB
832 	case METH_BZIP:
833 		return uncompressbzlib(old, newch, bytes_max, n);
834 #endif
835 #ifdef BUILTIN_XZLIB
836 	case METH_XZ:
837 	case METH_LZMA:
838 		return uncompressxzlib(old, newch, bytes_max, n);
839 #endif
840 	default:
841 		break;
842 	}
843 
844 	(void)fflush(stdout);
845 	(void)fflush(stderr);
846 
847 	for (i = 0; i < __arraycount(fdp); i++)
848 		fdp[i][0] = fdp[i][1] = -1;
849 
850 	/*
851 	 * There are multithreaded users who run magic_file()
852 	 * from dozens of threads. If two parallel magic_file() calls
853 	 * analyze two large compressed files, both will spawn
854 	 * an uncompressing child here, which writes out uncompressed data.
855 	 * We read some portion, then close the pipe, then waitpid() the child.
856 	 * If uncompressed data is larger, child shound get EPIPE and exit.
857 	 * However, with *parallel* calls OTHER child may unintentionally
858 	 * inherit pipe fds, thus keeping pipe open and making writes in
859 	 * our child block instead of failing with EPIPE!
860 	 * (For the bug to occur, two threads must mutually inherit their pipes,
861 	 * and both must have large outputs. Thus it happens not that often).
862 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
863 	 */
864 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
865 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
866 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
867 		closep(fdp[STDIN_FILENO]);
868 		closep(fdp[STDOUT_FILENO]);
869 		return makeerror(newch, n, "Cannot create pipe, %s",
870 		    strerror(errno));
871 	}
872 
873 	/* For processes with large mapped virtual sizes, vfork
874 	 * may be _much_ faster (10-100 times) than fork.
875 	 */
876 	pid = vfork();
877 	if (pid == -1) {
878 		return makeerror(newch, n, "Cannot vfork, %s",
879 		    strerror(errno));
880 	}
881 	if (pid == 0) {
882 		/* child */
883 		/* Note: we are after vfork, do not modify memory
884 		 * in a way which confuses parent. In particular,
885 		 * do not modify fdp[i][j].
886 		 */
887 		if (fd != -1) {
888 			(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
889 			if (copydesc(STDIN_FILENO, fd))
890 				(void) close(fd);
891 		} else {
892 			if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
893 				(void) close(fdp[STDIN_FILENO][0]);
894 			if (fdp[STDIN_FILENO][1] > 2)
895 				(void) close(fdp[STDIN_FILENO][1]);
896 		}
897 		file_clear_closexec(STDIN_FILENO);
898 
899 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
900 		if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
901 			(void) close(fdp[STDOUT_FILENO][1]);
902 		if (fdp[STDOUT_FILENO][0] > 2)
903 			(void) close(fdp[STDOUT_FILENO][0]);
904 		file_clear_closexec(STDOUT_FILENO);
905 
906 		if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
907 			(void) close(fdp[STDERR_FILENO][1]);
908 		if (fdp[STDERR_FILENO][0] > 2)
909 			(void) close(fdp[STDERR_FILENO][0]);
910 		file_clear_closexec(STDERR_FILENO);
911 
912 		(void)execvp(compr[method].argv[0],
913 		    RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
914 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
915 		    compr[method].argv[0], strerror(errno));
916 		_exit(1); /* _exit(), not exit(), because of vfork */
917 	}
918 	/* parent */
919 	/* Close write sides of child stdout/err pipes */
920 	for (i = 1; i < __arraycount(fdp); i++)
921 		closefd(fdp[i], 1);
922 	/* Write the buffer data to child stdin, if we don't have fd */
923 	if (fd == -1) {
924 		closefd(fdp[STDIN_FILENO], 0);
925 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
926 		closefd(fdp[STDIN_FILENO], 1);
927 	}
928 
929 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
930 	if (*newch == NULL) {
931 		rv = makeerror(newch, n, "No buffer, %s",
932 		    strerror(errno));
933 		goto err;
934 	}
935 	rv = OKDATA;
936 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
937 	if (r <= 0) {
938 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
939 		    r != -1 ? strerror(errno) : "no data");
940 
941 		rv = ERRDATA;
942 		if (r == 0 &&
943 		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
944 		{
945 			r = filter_error(*newch, r);
946 			goto ok;
947 		}
948 		free(*newch);
949 		if  (r == 0)
950 			rv = makeerror(newch, n, "Read failed, %s",
951 			    strerror(errno));
952 		else
953 			rv = makeerror(newch, n, "No data");
954 		goto err;
955 	}
956 ok:
957 	*n = r;
958 	/* NUL terminate, as every buffer is handled here. */
959 	(*newch)[*n] = '\0';
960 err:
961 	closefd(fdp[STDIN_FILENO], 1);
962 	closefd(fdp[STDOUT_FILENO], 0);
963 	closefd(fdp[STDERR_FILENO], 0);
964 
965 	w = waitpid(pid, &status, 0);
966 wait_err:
967 	if (w == -1) {
968 		free(*newch);
969 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
970 		DPRINTF("Child wait return %#x\n", status);
971 	} else if (!WIFEXITED(status)) {
972 		DPRINTF("Child not exited (%#x)\n", status);
973 	} else if (WEXITSTATUS(status) != 0) {
974 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
975 	}
976 	if (writepid > 0) {
977 		/* _After_ we know decompressor has exited, our input writer
978 		 * definitely will exit now (at worst, writing fails in it,
979 		 * since output fd is closed now on the reading size).
980 		 */
981 		w = waitpid(writepid, &status, 0);
982 		writepid = -1;
983 		goto wait_err;
984 	}
985 
986 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
987 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
988 
989 	return rv;
990 }
991 #endif
992 #endif
993