1 /*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 /*
29 * compress routines:
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
34 */
35 #include "file.h"
36
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.104 2017/03/29 15:57:48 christos Exp $")
39 #endif
40
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #include <string.h>
47 #include <errno.h>
48 #ifdef HAVE_SIGNAL_H
49 #include <signal.h>
50 # ifndef HAVE_SIG_T
51 typedef void (*sig_t)(int);
52 # endif /* HAVE_SIG_T */
53 #endif
54 #ifndef PHP_WIN32
55 #include <sys/ioctl.h>
56 #endif
57 #ifdef HAVE_SYS_WAIT_H
58 #include <sys/wait.h>
59 #endif
60 #if defined(HAVE_SYS_TIME_H)
61 #include <sys/time.h>
62 #endif
63 #if defined(HAVE_ZLIB_H) && defined(PHP_FILEINFO_UNCOMPRESS)
64 #define BUILTIN_DECOMPRESS
65 #include <zlib.h>
66 #endif
67
68 #undef FIONREAD
69
70 #define gzip_flags "-cd"
71 #define lrzip_flags "-do"
72 #define lzip_flags gzip_flags
73
74 static const char *gzip_args[] = {
75 "gzip", gzip_flags, NULL
76 };
77 static const char *uncompress_args[] = {
78 "uncompress", "-c", NULL
79 };
80 static const char *bzip2_args[] = {
81 "bzip2", "-cd", NULL
82 };
83 static const char *lzip_args[] = {
84 "lzip", lzip_flags, NULL
85 };
86 static const char *xz_args[] = {
87 "xz", "-cd", NULL
88 };
89 static const char *lrzip_args[] = {
90 "lrzip", lrzip_flags, NULL
91 };
92 static const char *lz4_args[] = {
93 "lz4", "-cd", NULL
94 };
95 static const char *zstd_args[] = {
96 "zstd", "-cd", NULL
97 };
98
99 private const struct {
100 const void *magic;
101 size_t maglen;
102 const char **argv;
103 } compr[] = {
104 { "\037\235", 2, gzip_args }, /* compressed */
105 /* Uncompress can get stuck; so use gzip first if we have it
106 * Idea from Damien Clark, thanks! */
107 { "\037\235", 2, uncompress_args }, /* compressed */
108 { "\037\213", 2, gzip_args }, /* gzipped */
109 { "\037\236", 2, gzip_args }, /* frozen */
110 { "\037\240", 2, gzip_args }, /* SCO LZH */
111 /* the standard pack utilities do not accept standard input */
112 { "\037\036", 2, gzip_args }, /* packed */
113 { "PK\3\4", 4, gzip_args }, /* pkzipped, */
114 /* ...only first file examined */
115 { "BZh", 3, bzip2_args }, /* bzip2-ed */
116 { "LZIP", 4, lzip_args }, /* lzip-ed */
117 { "\3757zXZ\0", 6, xz_args }, /* XZ Utils */
118 { "LRZI", 4, lrzip_args }, /* LRZIP */
119 { "\004\"M\030",4, lz4_args }, /* LZ4 */
120 { "\x28\xB5\x2F\xFD", 4, zstd_args }, /* zstd */
121 #ifdef ZLIBSUPPORT
122 { RCAST(const void *, zlibcmp), 0, zlib_args }, /* zlib */
123 #endif
124 };
125
126 #define OKDATA 0
127 #define NODATA 1
128 #define ERRDATA 2
129
130 private ssize_t swrite(int, const void *, size_t);
131 #ifdef PHP_FILEINFO_UNCOMPRESS
132 private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
133 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
134 unsigned char **, size_t *);
135 #ifdef BUILTIN_DECOMPRESS
136 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
137 size_t *, int);
138 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
139 size_t *);
140 #endif
141 static int makeerror(unsigned char **, size_t *, const char *, ...);
142 private const char *methodname(size_t);
143
144 protected int
file_zmagic(struct magic_set * ms,int fd,const char * name,const unsigned char * buf,size_t nbytes)145 file_zmagic(struct magic_set *ms, int fd, const char *name,
146 const unsigned char *buf, size_t nbytes)
147 {
148 unsigned char *newbuf = NULL;
149 size_t i, nsz;
150 char *rbuf;
151 file_pushbuf_t *pb;
152 int urv, prv, rv = 0;
153 int mime = ms->flags & MAGIC_MIME;
154 #ifdef HAVE_SIGNAL_H
155 sig_t osigpipe;
156 #endif
157
158 if ((ms->flags & MAGIC_COMPRESS) == 0)
159 return 0;
160
161 #ifdef HAVE_SIGNAL_H
162 osigpipe = signal(SIGPIPE, SIG_IGN);
163 #endif
164 for (i = 0; i < ncompr; i++) {
165 int zm;
166 if (nbytes < compr[i].maglen)
167 continue;
168 #ifdef ZLIBSUPPORT
169 if (compr[i].maglen == 0)
170 zm = (RCAST(int (*)(const unsigned char *),
171 CCAST(void *, compr[i].magic)))(buf);
172 else
173 #endif
174 zm = memcmp(buf, compr[i].magic, compr[i].maglen) == 0;
175
176 if (!zm)
177 continue;
178 nsz = nbytes;
179 urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
180 DPRINTF("uncompressbuf = %d, %s, %zu\n", urv, (char *)newbuf,
181 nsz);
182 switch (urv) {
183 case OKDATA:
184 case ERRDATA:
185
186 ms->flags &= ~MAGIC_COMPRESS;
187 if (urv == ERRDATA)
188 prv = file_printf(ms, "%s ERROR: %s",
189 methodname(i), newbuf);
190 else
191 prv = file_buffer(ms, -1, name, newbuf, nsz);
192 if (prv == -1)
193 goto error;
194 rv = 1;
195 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
196 goto out;
197 if (mime != MAGIC_MIME && mime != 0)
198 goto out;
199 if ((file_printf(ms,
200 mime ? " compressed-encoding=" : " (")) == -1)
201 goto error;
202 if ((pb = file_push_buffer(ms)) == NULL)
203 goto error;
204 /*
205 * XXX: If file_buffer fails here, we overwrite
206 * the compressed text. FIXME.
207 */
208 if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
209 goto error;
210 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
211 if (file_printf(ms, "%s", rbuf) == -1) {
212 free(rbuf);
213 goto error;
214 }
215 free(rbuf);
216 }
217 if (!mime && file_printf(ms, ")") == -1)
218 goto error;
219 /*FALLTHROUGH*/
220 case NODATA:
221 break;
222 default:
223 abort();
224 /*NOTREACHED*/
225 error:
226 rv = -1;
227 break;
228 }
229 }
230 out:
231 DPRINTF("rv = %d\n", rv);
232
233 #ifdef HAVE_SIGNAL_H
234 (void)signal(SIGPIPE, osigpipe);
235 #endif
236 if (newbuf)
237 efree(newbuf);
238 ms->flags |= MAGIC_COMPRESS;
239 DPRINTF("Zmagic returns %d\n", rv);
240 return rv;
241 }
242 #endif
243 /*
244 * `safe' write for sockets and pipes.
245 */
246 private ssize_t
swrite(int fd,const void * buf,size_t n)247 swrite(int fd, const void *buf, size_t n)
248 {
249 ssize_t rv;
250 size_t rn = n;
251
252 do
253 switch (rv = write(fd, buf, n)) {
254 case -1:
255 if (errno == EINTR)
256 continue;
257 return -1;
258 default:
259 n -= rv;
260 buf = CAST(const char *, buf) + rv;
261 break;
262 }
263 while (n > 0);
264 return rn;
265 }
266
267
268 /*
269 * `safe' read for sockets and pipes.
270 */
271 protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)272 sread(int fd, void *buf, size_t n, int canbepipe)
273 {
274 ssize_t rv;
275 #ifdef FIONREAD
276 int t = 0;
277 #endif
278 size_t rn = n;
279
280 if (fd == STDIN_FILENO)
281 goto nocheck;
282
283 #ifdef FIONREAD
284 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
285 #ifdef FD_ZERO
286 ssize_t cnt;
287 for (cnt = 0;; cnt++) {
288 fd_set check;
289 struct timeval tout = {0, 100 * 1000};
290 int selrv;
291
292 FD_ZERO(&check);
293 FD_SET(fd, &check);
294
295 /*
296 * Avoid soft deadlock: do not read if there
297 * is nothing to read from sockets and pipes.
298 */
299 selrv = select(fd + 1, &check, NULL, NULL, &tout);
300 if (selrv == -1) {
301 if (errno == EINTR || errno == EAGAIN)
302 continue;
303 } else if (selrv == 0 && cnt >= 5) {
304 return 0;
305 } else
306 break;
307 }
308 #endif
309 (void)ioctl(fd, FIONREAD, &t);
310 }
311
312 if (t > 0 && (size_t)t < n) {
313 n = t;
314 rn = n;
315 }
316 #endif
317
318 nocheck:
319 do
320 switch ((rv = FINFO_READ_FUNC(fd, buf, n))) {
321 case -1:
322 if (errno == EINTR)
323 continue;
324 return -1;
325 case 0:
326 return rn - n;
327 default:
328 n -= rv;
329 buf = CAST(char *, CCAST(void *, buf)) + rv;
330 break;
331 }
332 while (n > 0);
333 return rn;
334 }
335
336 protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)337 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
338 size_t nbytes)
339 {
340 char buf[4096];
341 ssize_t r;
342 int tfd;
343
344 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
345 #ifndef HAVE_MKSTEMP
346 {
347 char *ptr = mktemp(buf);
348 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
349 r = errno;
350 (void)unlink(ptr);
351 errno = r;
352 }
353 #else
354 {
355 int te;
356 tfd = mkstemp(buf);
357 te = errno;
358 (void)unlink(buf);
359 errno = te;
360 }
361 #endif
362 if (tfd == -1) {
363 file_error(ms, errno,
364 "cannot create temporary file for pipe copy");
365 return -1;
366 }
367
368 if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
369 r = 1;
370 else {
371 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
372 if (swrite(tfd, buf, (size_t)r) != r)
373 break;
374 }
375
376 switch (r) {
377 case -1:
378 file_error(ms, errno, "error copying from pipe to temp file");
379 return -1;
380 case 0:
381 break;
382 default:
383 file_error(ms, errno, "error while writing to temp file");
384 return -1;
385 }
386
387 /*
388 * We duplicate the file descriptor, because fclose on a
389 * tmpfile will delete the file, but any open descriptors
390 * can still access the phantom inode.
391 */
392 if ((fd = dup2(tfd, fd)) == -1) {
393 file_error(ms, errno, "could not dup descriptor for temp file");
394 return -1;
395 }
396 (void)close(tfd);
397 if (FINFO_LSEEK_FUNC(fd, (zend_off_t)0, SEEK_SET) == (zend_off_t)-1) {
398 file_badseek(ms);
399 return -1;
400 }
401 return fd;
402 }
403
404 #ifdef PHP_FILEINFO_UNCOMPRESS
405 #ifdef BUILTIN_DECOMPRESS
406
407 #define FHCRC (1 << 1)
408 #define FEXTRA (1 << 2)
409 #define FNAME (1 << 3)
410 #define FCOMMENT (1 << 4)
411
412
413 private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n)414 uncompressgzipped(const unsigned char *old, unsigned char **newch,
415 size_t bytes_max, size_t *n)
416 {
417 unsigned char flg = old[3];
418 size_t data_start = 10;
419
420 if (flg & FEXTRA) {
421 if (data_start + 1 >= *n)
422 goto err;
423 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
424 }
425 if (flg & FNAME) {
426 while(data_start < *n && old[data_start])
427 data_start++;
428 data_start++;
429 }
430 if (flg & FCOMMENT) {
431 while(data_start < *n && old[data_start])
432 data_start++;
433 data_start++;
434 }
435 if (flg & FHCRC)
436 data_start += 2;
437
438 if (data_start >= *n)
439 goto err;
440
441 *n -= data_start;
442 old += data_start;
443 return uncompresszlib(old, newch, bytes_max, n, 0);
444 err:
445 return makeerror(newch, n, "File too short");
446 }
447
448 private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)449 uncompresszlib(const unsigned char *old, unsigned char **newch,
450 size_t bytes_max, size_t *n, int zlib)
451 {
452 int rc;
453 z_stream z;
454
455 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
456 return makeerror(newch, n, "No buffer, %s", strerror(errno));
457
458 z.next_in = CCAST(Bytef *, old);
459 z.avail_in = CAST(uint32_t, *n);
460 z.next_out = *newch;
461 z.avail_out = CAST(unsigned int, bytes_max);
462 z.zalloc = Z_NULL;
463 z.zfree = Z_NULL;
464 z.opaque = Z_NULL;
465
466 /* LINTED bug in header macro */
467 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
468 if (rc != Z_OK)
469 goto err;
470
471 rc = inflate(&z, Z_SYNC_FLUSH);
472 if (rc != Z_OK && rc != Z_STREAM_END)
473 goto err;
474
475 *n = (size_t)z.total_out;
476 rc = inflateEnd(&z);
477 if (rc != Z_OK)
478 goto err;
479
480 /* let's keep the nul-terminate tradition */
481 (*newch)[*n] = '\0';
482
483 return OKDATA;
484 err:
485 strlcpy((char *)*newch, z.msg ? z.msg : zError(rc), bytes_max);
486 *n = strlen((char *)*newch);
487 return ERRDATA;
488 }
489 #endif
490
491 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)492 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
493 {
494 char *msg;
495 va_list ap;
496 int rv;
497
498 va_start(ap, fmt);
499 rv = vasprintf(&msg, fmt, ap);
500 va_end(ap);
501 if (rv < 0) {
502 *buf = NULL;
503 *len = 0;
504 return NODATA;
505 }
506 *buf = (unsigned char *)msg;
507 *len = strlen(msg);
508 return ERRDATA;
509 }
510
511 static void
closefd(int * fd,size_t i)512 closefd(int *fd, size_t i)
513 {
514 if (fd[i] == -1)
515 return;
516 (void) close(fd[i]);
517 fd[i] = -1;
518 }
519
520 static void
closep(int * fd)521 closep(int *fd)
522 {
523 size_t i;
524 for (i = 0; i < 2; i++)
525 closefd(fd, i);
526 }
527
528 static void
copydesc(int i,int * fd)529 copydesc(int i, int *fd)
530 {
531 int j = fd[i == STDIN_FILENO ? 0 : 1];
532 if (j == i)
533 return;
534 if (dup2(j, i) == -1) {
535 DPRINTF("dup(%d, %d) failed (%s)\n", j, i, strerror(errno));
536 exit(1);
537 }
538 closep(fd);
539 }
540
541 static void
writechild(int fdp[3][2],const void * old,size_t n)542 writechild(int fdp[3][2], const void *old, size_t n)
543 {
544 int status;
545
546 closefd(fdp[STDIN_FILENO], 0);
547 /*
548 * fork again, to avoid blocking because both
549 * pipes filled
550 */
551 switch (fork()) {
552 case 0: /* child */
553 closefd(fdp[STDOUT_FILENO], 0);
554 if (swrite(fdp[STDIN_FILENO][1], old, n) != (ssize_t)n) {
555 DPRINTF("Write failed (%s)\n", strerror(errno));
556 exit(1);
557 }
558 exit(0);
559 /*NOTREACHED*/
560
561 case -1:
562 DPRINTF("Fork failed (%s)\n", strerror(errno));
563 exit(1);
564 /*NOTREACHED*/
565
566 default: /* parent */
567 if (wait(&status) == -1) {
568 DPRINTF("Wait failed (%s)\n", strerror(errno));
569 exit(1);
570 }
571 DPRINTF("Grandchild wait return %#x\n", status);
572 }
573 closefd(fdp[STDIN_FILENO], 1);
574 }
575
576 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)577 filter_error(unsigned char *ubuf, ssize_t n)
578 {
579 char *p;
580 char *buf;
581
582 ubuf[n] = '\0';
583 buf = (char *)ubuf;
584 while (isspace((unsigned char)*buf))
585 buf++;
586 DPRINTF("Filter error[[[%s]]]\n", buf);
587 if ((p = strchr((char *)buf, '\n')) != NULL)
588 *p = '\0';
589 if ((p = strchr((char *)buf, ';')) != NULL)
590 *p = '\0';
591 if ((p = strrchr((char *)buf, ':')) != NULL) {
592 ++p;
593 while (isspace((unsigned char)*p))
594 p++;
595 n = strlen(p);
596 memmove(ubuf, p, CAST(size_t, n + 1));
597 }
598 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
599 if (islower(*ubuf))
600 *ubuf = toupper(*ubuf);
601 return n;
602 }
603
604 private const char *
methodname(size_t method)605 methodname(size_t method)
606 {
607 #ifdef BUILTIN_DECOMPRESS
608 /* FIXME: This doesn't cope with bzip2 */
609 if (method == 2 || compr[method].maglen == 0)
610 return "zlib";
611 #endif
612 return compr[method].argv[0];
613 }
614
615 private int
uncompressbuf(int fd,size_t bytes_max,size_t method,const unsigned char * old,unsigned char ** newch,size_t * n)616 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
617 unsigned char **newch, size_t* n)
618 {
619 int fdp[3][2];
620 int status, rv;
621 size_t i;
622 ssize_t r;
623
624 #ifdef BUILTIN_DECOMPRESS
625 /* FIXME: This doesn't cope with bzip2 */
626 if (method == 2)
627 return uncompressgzipped(old, newch, bytes_max, n);
628 if (compr[method].maglen == 0)
629 return uncompresszlib(old, newch, bytes_max, n, 1);
630 #endif
631 (void)fflush(stdout);
632 (void)fflush(stderr);
633
634 for (i = 0; i < __arraycount(fdp); i++)
635 fdp[i][0] = fdp[i][1] = -1;
636
637 if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
638 pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
639 closep(fdp[STDIN_FILENO]);
640 closep(fdp[STDOUT_FILENO]);
641 return makeerror(newch, n, "Cannot create pipe, %s",
642 strerror(errno));
643 }
644 switch (fork()) {
645 case 0: /* child */
646 if (fd != -1) {
647 fdp[STDIN_FILENO][0] = fd;
648 (void) lseek(fd, (off_t)0, SEEK_SET);
649 }
650
651 for (i = 0; i < __arraycount(fdp); i++)
652 copydesc(CAST(int, i), fdp[i]);
653
654 (void)execvp(compr[method].argv[0],
655 (char *const *)(intptr_t)compr[method].argv);
656 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
657 compr[method].argv[0], strerror(errno));
658 exit(1);
659 /*NOTREACHED*/
660 case -1:
661 return makeerror(newch, n, "Cannot fork, %s",
662 strerror(errno));
663
664 default: /* parent */
665 for (i = 1; i < __arraycount(fdp); i++)
666 closefd(fdp[i], 1);
667
668 /* Write the buffer data to the child, if we don't have fd */
669 if (fd == -1)
670 writechild(fdp, old, *n);
671
672 *newch = CAST(unsigned char *, malloc(bytes_max + 1));
673 if (*newch == NULL) {
674 rv = makeerror(newch, n, "No buffer, %s",
675 strerror(errno));
676 goto err;
677 }
678 rv = OKDATA;
679 if ((r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0)) > 0)
680 break;
681 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
682 r != -1 ? strerror(errno) : "no data");
683
684 rv = ERRDATA;
685 if (r == 0 &&
686 (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
687 {
688 r = filter_error(*newch, r);
689 break;
690 }
691 free(*newch);
692 if (r == 0)
693 rv = makeerror(newch, n, "Read failed, %s",
694 strerror(errno));
695 else
696 rv = makeerror(newch, n, "No data");
697 goto err;
698 }
699 }
700 #endif /* if PHP_FILEINFO_UNCOMPRESS */
701