xref: /PHP-7.3/ext/fileinfo/libmagic/apprentice.c (revision c621182c)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * apprentice - make one pass through /etc/magic, learning its secrets.
30  */
31 
32 #include "php.h"
33 
34 #include "file.h"
35 
36 #ifndef	lint
37 FILE_RCSID("@(#)$File: apprentice.c,v 1.270 2018/02/21 21:26:48 christos Exp $")
38 #endif	/* lint */
39 
40 #include "magic.h"
41 #include "patchlevel.h"
42 #include <stdlib.h>
43 
44 #if defined(__hpux) && !defined(HAVE_STRTOULL)
45 #if SIZEOF_LONG == 8
46 # define strtoull strtoul
47 #else
48 # define strtoull __strtoull
49 #endif
50 #endif
51 
52 #ifdef PHP_WIN32
53 #include "win32/unistd.h"
54 #define strtoull _strtoui64
55 #else
56 #include <unistd.h>
57 #endif
58 #include <string.h>
59 #include <assert.h>
60 #include <ctype.h>
61 #include <fcntl.h>
62 
63 #ifndef SSIZE_MAX
64 #define MAXMAGIC_SIZE        ((ssize_t)0x7fffffff)
65 #else
66 #define MAXMAGIC_SIZE        SSIZE_MAX
67 #endif
68 
69 #define	EATAB {while (isascii((unsigned char) *l) && \
70 		      isspace((unsigned char) *l))  ++l;}
71 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
72 			tolower((unsigned char) (l)) : (l))
73 /*
74  * Work around a bug in headers on Digital Unix.
75  * At least confirmed for: OSF1 V4.0 878
76  */
77 #if defined(__osf__) && defined(__DECC)
78 #ifdef MAP_FAILED
79 #undef MAP_FAILED
80 #endif
81 #endif
82 
83 #ifndef offsetof
84 #define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD))
85 #endif
86 
87 #ifndef MAP_FAILED
88 #define MAP_FAILED (void *) -1
89 #endif
90 
91 #ifndef MAP_FILE
92 #define MAP_FILE 0
93 #endif
94 
95 #define ALLOC_CHUNK	(size_t)10
96 #define ALLOC_INCR	(size_t)200
97 
98 #define MAP_TYPE_USER	0
99 #define MAP_TYPE_MALLOC	1
100 #define MAP_TYPE_MMAP	2
101 
102 struct magic_entry {
103 	struct magic *mp;
104 	uint32_t cont_count;
105 	uint32_t max_count;
106 };
107 
108 struct magic_entry_set {
109 	struct magic_entry *me;
110 	uint32_t count;
111 	uint32_t max;
112 };
113 
114 struct magic_map {
115 	void *p;
116 	size_t len;
117 	int type;
118 	struct magic *magic[MAGIC_SETS];
119 	uint32_t nmagic[MAGIC_SETS];
120 };
121 
122 int file_formats[FILE_NAMES_SIZE];
123 const size_t file_nformats = FILE_NAMES_SIZE;
124 const char *file_names[FILE_NAMES_SIZE];
125 const size_t file_nnames = FILE_NAMES_SIZE;
126 
127 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
128 private int hextoint(int);
129 private const char *getstr(struct magic_set *, struct magic *, const char *,
130     int);
131 private int parse(struct magic_set *, struct magic_entry *, const char *,
132     size_t, int);
133 private void eatsize(const char **);
134 private int apprentice_1(struct magic_set *, const char *, int);
135 private size_t apprentice_magic_strength(const struct magic *);
136 private int apprentice_sort(const void *, const void *);
137 private void apprentice_list(struct mlist *, int );
138 private struct magic_map *apprentice_load(struct magic_set *,
139     const char *, int);
140 private struct mlist *mlist_alloc(void);
141 private void mlist_free(struct mlist *);
142 private void byteswap(struct magic *, uint32_t);
143 private void bs1(struct magic *);
144 private uint16_t swap2(uint16_t);
145 private uint32_t swap4(uint32_t);
146 private uint64_t swap8(uint64_t);
147 private char *mkdbname(struct magic_set *, const char *, int);
148 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
149     size_t);
150 private struct magic_map *apprentice_map(struct magic_set *, const char *);
151 private int check_buffer(struct magic_set *, struct magic_map *, const char *);
152 private void apprentice_unmap(struct magic_map *);
153 private int apprentice_compile(struct magic_set *, struct magic_map *,
154     const char *);
155 private int check_format_type(const char *, int, const char **);
156 private int check_format(struct magic_set *, struct magic *);
157 private int get_op(char);
158 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
159 private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
160 private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
161 private int parse_ext(struct magic_set *, struct magic_entry *, const char *);
162 
163 
164 private size_t magicsize = sizeof(struct magic);
165 
166 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
167 
168 private struct {
169 	const char *name;
170 	size_t len;
171 	int (*fun)(struct magic_set *, struct magic_entry *, const char *);
172 } bang[] = {
173 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
174 	DECLARE_FIELD(mime),
175 	DECLARE_FIELD(apple),
176 	DECLARE_FIELD(ext),
177 	DECLARE_FIELD(strength),
178 #undef	DECLARE_FIELD
179 	{ NULL, 0, NULL }
180 };
181 
182 #include "../data_file.c"
183 
184 struct type_tbl_s {
185 	const char name[16];
186 	const size_t len;
187 	const int type;
188 	const int format;
189 };
190 
191 /*
192  * XXX - the actual Single UNIX Specification says that "long" means "long",
193  * as in the C data type, but we treat it as meaning "4-byte integer".
194  * Given that the OS X version of file 5.04 did the same, I guess that passes
195  * the actual test; having "long" be dependent on how big a "long" is on
196  * the machine running "file" is silly.
197  */
198 static const struct type_tbl_s type_tbl[] = {
199 # define XX(s)		s, (sizeof(s) - 1)
200 # define XX_NULL	"", 0
201 	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
202 	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
203 	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
204 	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_NONE },
205 	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
206 	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
207 	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
208 	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
209 	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
210 	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
211 	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
212 	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
213 	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
214 	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
215 	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
216 	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
217 	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
218 	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
219 	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
220 	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
221 	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
222 	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
223 	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
224 	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
225 	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
226 	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
227 	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
228 	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
229 	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
230 	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
231 	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
232 	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
233 	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
234 	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
235 	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
236 	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
237 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
238 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
239 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
240 	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
241 	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
242 	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
243 	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
244 	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
245 	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
246 	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
247 	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
248 	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
249 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
250 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
251 };
252 
253 /*
254  * These are not types, and cannot be preceded by "u" to make them
255  * unsigned.
256  */
257 static const struct type_tbl_s special_tbl[] = {
258 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
259 	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
260 	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
261 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
262 };
263 # undef XX
264 # undef XX_NULL
265 
266 private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)267 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
268 {
269 	const struct type_tbl_s *p;
270 
271 	for (p = tbl; p->len; p++) {
272 		if (strncmp(l, p->name, p->len) == 0) {
273 			if (t)
274 				*t = l + p->len;
275 			break;
276 		}
277 	}
278 	return p->type;
279 }
280 
281 private int
get_standard_integer_type(const char * l,const char ** t)282 get_standard_integer_type(const char *l, const char **t)
283 {
284 	int type;
285 
286 	if (isalpha((unsigned char)l[1])) {
287 		switch (l[1]) {
288 		case 'C':
289 			/* "dC" and "uC" */
290 			type = FILE_BYTE;
291 			break;
292 		case 'S':
293 			/* "dS" and "uS" */
294 			type = FILE_SHORT;
295 			break;
296 		case 'I':
297 		case 'L':
298 			/*
299 			 * "dI", "dL", "uI", and "uL".
300 			 *
301 			 * XXX - the actual Single UNIX Specification says
302 			 * that "L" means "long", as in the C data type,
303 			 * but we treat it as meaning "4-byte integer".
304 			 * Given that the OS X version of file 5.04 did
305 			 * the same, I guess that passes the actual SUS
306 			 * validation suite; having "dL" be dependent on
307 			 * how big a "long" is on the machine running
308 			 * "file" is silly.
309 			 */
310 			type = FILE_LONG;
311 			break;
312 		case 'Q':
313 			/* "dQ" and "uQ" */
314 			type = FILE_QUAD;
315 			break;
316 		default:
317 			/* "d{anything else}", "u{anything else}" */
318 			return FILE_INVALID;
319 		}
320 		l += 2;
321 	} else if (isdigit((unsigned char)l[1])) {
322 		/*
323 		 * "d{num}" and "u{num}"; we only support {num} values
324 		 * of 1, 2, 4, and 8 - the Single UNIX Specification
325 		 * doesn't say anything about whether arbitrary
326 		 * values should be supported, but both the Solaris 10
327 		 * and OS X Mountain Lion versions of file passed the
328 		 * Single UNIX Specification validation suite, and
329 		 * neither of them support values bigger than 8 or
330 		 * non-power-of-2 values.
331 		 */
332 		if (isdigit((unsigned char)l[2])) {
333 			/* Multi-digit, so > 9 */
334 			return FILE_INVALID;
335 		}
336 		switch (l[1]) {
337 		case '1':
338 			type = FILE_BYTE;
339 			break;
340 		case '2':
341 			type = FILE_SHORT;
342 			break;
343 		case '4':
344 			type = FILE_LONG;
345 			break;
346 		case '8':
347 			type = FILE_QUAD;
348 			break;
349 		default:
350 			/* XXX - what about 3, 5, 6, or 7? */
351 			return FILE_INVALID;
352 		}
353 		l += 2;
354 	} else {
355 		/*
356 		 * "d" or "u" by itself.
357 		 */
358 		type = FILE_LONG;
359 		++l;
360 	}
361 	if (t)
362 		*t = l;
363 	return type;
364 }
365 
366 private void
init_file_tables(void)367 init_file_tables(void)
368 {
369 	static int done = 0;
370 	const struct type_tbl_s *p;
371 
372 	if (done)
373 		return;
374 	done++;
375 
376 	for (p = type_tbl; p->len; p++) {
377 		assert(p->type < FILE_NAMES_SIZE);
378 		file_names[p->type] = p->name;
379 		file_formats[p->type] = p->format;
380 	}
381 	assert(p - type_tbl == FILE_NAMES_SIZE);
382 }
383 
384 private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)385 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
386 {
387 	struct mlist *ml;
388 
389 	mlp->map = NULL;
390 	if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
391 		return -1;
392 
393 	ml->map = idx == 0 ? map : NULL;
394 	ml->magic = map->magic[idx];
395 	ml->nmagic = map->nmagic[idx];
396 
397 	mlp->prev->next = ml;
398 	ml->prev = mlp->prev;
399 	ml->next = mlp;
400 	mlp->prev = ml;
401 	return 0;
402 }
403 
404 /*
405  * Handle one file or directory.
406  */
407 private int
apprentice_1(struct magic_set * ms,const char * fn,int action)408 apprentice_1(struct magic_set *ms, const char *fn, int action)
409 {
410 	struct magic_map *map;
411 	struct mlist *ml;
412 	size_t i;
413 
414 	if (magicsize != FILE_MAGICSIZE) {
415 		file_error(ms, 0, "magic element size %lu != %lu",
416 		    (unsigned long)sizeof(*map->magic[0]),
417 		    (unsigned long)FILE_MAGICSIZE);
418 		return -1;
419 	}
420 
421 	if (action == FILE_COMPILE) {
422 		map = apprentice_load(ms, fn, action);
423 		if (map == NULL)
424 			return -1;
425 		return apprentice_compile(ms, map, fn);
426 	}
427 
428 	map = apprentice_map(ms, fn);
429 	if (map == (struct magic_map *)-1)
430 		return -1;
431 	if (map == NULL) {
432 		if (fn) {
433 			if (ms->flags & MAGIC_CHECK)
434 				file_magwarn(ms, "using regular magic file `%s'", fn);
435 			map = apprentice_load(ms, fn, action);
436 		}
437 		if (map == NULL)
438 			return -1;
439 	}
440 
441 	for (i = 0; i < MAGIC_SETS; i++) {
442 		if (add_mlist(ms->mlist[i], map, i) == -1) {
443 			file_oomem(ms, sizeof(*ml));
444 			return -1;
445 		}
446 	}
447 
448 	if (action == FILE_LIST) {
449 		for (i = 0; i < MAGIC_SETS; i++) {
450 			printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
451 			    i);
452 			apprentice_list(ms->mlist[i], BINTEST);
453 			printf("Text patterns:\n");
454 			apprentice_list(ms->mlist[i], TEXTTEST);
455 		}
456 	}
457 	return 0;
458 }
459 
460 protected void
file_ms_free(struct magic_set * ms)461 file_ms_free(struct magic_set *ms)
462 {
463 	size_t i;
464 	if (ms == NULL)
465 		return;
466 	for (i = 0; i < MAGIC_SETS; i++)
467 		mlist_free(ms->mlist[i]);
468 	if (ms->o.pbuf) {
469 		efree(ms->o.pbuf);
470 	}
471 	if (ms->o.buf) {
472 		efree(ms->o.buf);
473 	}
474 	if (ms->c.li) {
475 		efree(ms->c.li);
476 	}
477 	efree(ms);
478 }
479 
480 protected struct magic_set *
file_ms_alloc(int flags)481 file_ms_alloc(int flags)
482 {
483 	struct magic_set *ms;
484 	size_t i, len;
485 
486 	if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
487 	    sizeof(struct magic_set)))) == NULL)
488 		return NULL;
489 
490 	if (magic_setflags(ms, flags) == -1) {
491 		errno = EINVAL;
492 		goto free;
493 	}
494 
495 	ms->o.buf = ms->o.pbuf = NULL;
496 	len = (ms->c.len = 10) * sizeof(*ms->c.li);
497 
498 	if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
499 		goto free;
500 
501 	ms->event_flags = 0;
502 	ms->error = -1;
503 	for (i = 0; i < MAGIC_SETS; i++)
504 		ms->mlist[i] = NULL;
505 	ms->file = "unknown";
506 	ms->line = 0;
507 	ms->indir_max = FILE_INDIR_MAX;
508 	ms->name_max = FILE_NAME_MAX;
509 	ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
510 	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
511 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
512 	ms->regex_max = FILE_REGEX_MAX;
513 	ms->bytes_max = FILE_BYTES_MAX;
514 	return ms;
515 free:
516 	efree(ms);
517 	return NULL;
518 }
519 
520 private void
apprentice_unmap(struct magic_map * map)521 apprentice_unmap(struct magic_map *map)
522 {
523 	if (map == NULL)
524 		return;
525 	if (map->p != php_magic_database) {
526 		if (map->p == NULL) {
527 			int j;
528 			for (j = 0; j < MAGIC_SETS; j++) {
529 				if (map->magic[j]) {
530 					efree(map->magic[j]);
531 				}
532 			}
533 		} else {
534 			efree(map->p);
535 		}
536 	}
537 	efree(map);
538 }
539 
540 private struct mlist *
mlist_alloc(void)541 mlist_alloc(void)
542 {
543 	struct mlist *mlist;
544 	if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
545 		return NULL;
546 	}
547 	mlist->next = mlist->prev = mlist;
548 	return mlist;
549 }
550 
551 private void
mlist_free(struct mlist * mlist)552 mlist_free(struct mlist *mlist)
553 {
554 	struct mlist *ml, *next;
555 
556 	if (mlist == NULL)
557 		return;
558 
559 	ml = mlist->next;
560 	for (ml = mlist->next; (next = ml->next) != NULL; ml = next) {
561 		if (ml->map)
562 			apprentice_unmap(CAST(struct magic_map *, ml->map));
563 		efree(ml);
564 		if (ml == mlist)
565 			break;
566 	}
567 }
568 
569 /* const char *fn: list of magic files and directories */
570 protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)571 file_apprentice(struct magic_set *ms, const char *fn, int action)
572 {
573 	char *p, *mfn;
574 	int fileerr, errs = -1;
575 	size_t i;
576 
577 	if (ms->mlist[0] != NULL)
578 		(void)file_reset(ms, 0);
579 
580 /* XXX disabling default magic loading so the compiled in data is used */
581 #if 0
582 	if ((fn = magic_getpath(fn, action)) == NULL)
583 		return -1;
584 #endif
585 
586 	init_file_tables();
587 
588 	if (fn == NULL)
589 		fn = getenv("MAGIC");
590 	if (fn == NULL) {
591 		for (i = 0; i < MAGIC_SETS; i++) {
592 			mlist_free(ms->mlist[i]);
593 			if ((ms->mlist[i] = mlist_alloc()) == NULL) {
594 				file_oomem(ms, sizeof(*ms->mlist[i]));
595 				return -1;
596 			}
597 		}
598 		return apprentice_1(ms, fn, action);
599 	}
600 
601 	if ((mfn = estrdup(fn)) == NULL) {
602 		file_oomem(ms, strlen(fn));
603 		return -1;
604 	}
605 
606 	for (i = 0; i < MAGIC_SETS; i++) {
607 		mlist_free(ms->mlist[i]);
608 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
609 			file_oomem(ms, sizeof(*ms->mlist[i]));
610 			while (i-- > 0) {
611 				mlist_free(ms->mlist[i]);
612 				ms->mlist[i] = NULL;
613 			}
614 			efree(mfn);
615 			return -1;
616 		}
617 	}
618 	fn = mfn;
619 
620 	while (fn) {
621 		p = strchr(fn, PATHSEP);
622 		if (p)
623 			*p++ = '\0';
624 		if (*fn == '\0')
625 			break;
626 		fileerr = apprentice_1(ms, fn, action);
627 		errs = MAX(errs, fileerr);
628 		fn = p;
629 	}
630 
631 	efree(mfn);
632 
633 	if (errs == -1) {
634 		for (i = 0; i < MAGIC_SETS; i++) {
635 			mlist_free(ms->mlist[i]);
636 			ms->mlist[i] = NULL;
637 		}
638 		file_error(ms, 0, "could not find any valid magic files!");
639 		return -1;
640 	}
641 
642 #if 0
643 	/*
644 	 * Always leave the database loaded
645 	 */
646 	if (action == FILE_LOAD)
647 		return 0;
648 
649 	for (i = 0; i < MAGIC_SETS; i++) {
650 		mlist_free(ms->mlist[i]);
651 		ms->mlist[i] = NULL;
652 	}
653 #endif
654 
655 	switch (action) {
656 	case FILE_LOAD:
657 	case FILE_COMPILE:
658 	case FILE_CHECK:
659 	case FILE_LIST:
660 		return 0;
661 	default:
662 		file_error(ms, 0, "Invalid action %d", action);
663 		return -1;
664 	}
665 }
666 
667 /*
668  * Compute the real length of a magic expression, for the purposes
669  * of determining how "strong" a magic expression is (approximating
670  * how specific its matches are):
671  *	- magic characters count 0 unless escaped.
672  *	- [] expressions count 1
673  *	- {} expressions count 0
674  *	- regular characters or escaped magic characters count 1
675  *	- 0 length expressions count as one
676  */
677 private size_t
nonmagic(const char * str)678 nonmagic(const char *str)
679 {
680 	const char *p;
681 	size_t rv = 0;
682 
683 	for (p = str; *p; p++)
684 		switch (*p) {
685 		case '\\':	/* Escaped anything counts 1 */
686 			if (!*++p)
687 				p--;
688 			rv++;
689 			continue;
690 		case '?':	/* Magic characters count 0 */
691 		case '*':
692 		case '.':
693 		case '+':
694 		case '^':
695 		case '$':
696 			continue;
697 		case '[':	/* Bracketed expressions count 1 the ']' */
698 			while (*p && *p != ']')
699 				p++;
700 			p--;
701 			continue;
702 		case '{':	/* Braced expressions count 0 */
703 			while (*p && *p != '}')
704 				p++;
705 			if (!*p)
706 				p--;
707 			continue;
708 		default:	/* Anything else counts 1 */
709 			rv++;
710 			continue;
711 		}
712 
713 	return rv == 0 ? 1 : rv;	/* Return at least 1 */
714 }
715 
716 
717 private size_t
typesize(int type)718 typesize(int type)
719 {
720 	switch (type) {
721 	case FILE_BYTE:
722 		return 1;
723 
724 	case FILE_SHORT:
725 	case FILE_LESHORT:
726 	case FILE_BESHORT:
727 		return 2;
728 
729 	case FILE_LONG:
730 	case FILE_LELONG:
731 	case FILE_BELONG:
732 	case FILE_MELONG:
733 		return 4;
734 
735 	case FILE_DATE:
736 	case FILE_LEDATE:
737 	case FILE_BEDATE:
738 	case FILE_MEDATE:
739 	case FILE_LDATE:
740 	case FILE_LELDATE:
741 	case FILE_BELDATE:
742 	case FILE_MELDATE:
743 	case FILE_FLOAT:
744 	case FILE_BEFLOAT:
745 	case FILE_LEFLOAT:
746 		return 4;
747 
748 	case FILE_QUAD:
749 	case FILE_BEQUAD:
750 	case FILE_LEQUAD:
751 	case FILE_QDATE:
752 	case FILE_LEQDATE:
753 	case FILE_BEQDATE:
754 	case FILE_QLDATE:
755 	case FILE_LEQLDATE:
756 	case FILE_BEQLDATE:
757 	case FILE_QWDATE:
758 	case FILE_LEQWDATE:
759 	case FILE_BEQWDATE:
760 	case FILE_DOUBLE:
761 	case FILE_BEDOUBLE:
762 	case FILE_LEDOUBLE:
763 		return 8;
764 	default:
765 		return (size_t)~0;
766 	}
767 }
768 
769 /*
770  * Get weight of this magic entry, for sorting purposes.
771  */
772 private size_t
apprentice_magic_strength(const struct magic * m)773 apprentice_magic_strength(const struct magic *m)
774 {
775 #define MULT 10
776 	size_t ts, v, val = 2 * MULT;	/* baseline strength */
777 
778 	switch (m->type) {
779 	case FILE_DEFAULT:	/* make sure this sorts last */
780 		if (m->factor_op != FILE_FACTOR_OP_NONE)
781 			abort();
782 		return 0;
783 
784 	case FILE_BYTE:
785 	case FILE_SHORT:
786 	case FILE_LESHORT:
787 	case FILE_BESHORT:
788 	case FILE_LONG:
789 	case FILE_LELONG:
790 	case FILE_BELONG:
791 	case FILE_MELONG:
792 	case FILE_DATE:
793 	case FILE_LEDATE:
794 	case FILE_BEDATE:
795 	case FILE_MEDATE:
796 	case FILE_LDATE:
797 	case FILE_LELDATE:
798 	case FILE_BELDATE:
799 	case FILE_MELDATE:
800 	case FILE_FLOAT:
801 	case FILE_BEFLOAT:
802 	case FILE_LEFLOAT:
803 	case FILE_QUAD:
804 	case FILE_BEQUAD:
805 	case FILE_LEQUAD:
806 	case FILE_QDATE:
807 	case FILE_LEQDATE:
808 	case FILE_BEQDATE:
809 	case FILE_QLDATE:
810 	case FILE_LEQLDATE:
811 	case FILE_BEQLDATE:
812 	case FILE_QWDATE:
813 	case FILE_LEQWDATE:
814 	case FILE_BEQWDATE:
815 	case FILE_DOUBLE:
816 	case FILE_BEDOUBLE:
817 	case FILE_LEDOUBLE:
818 		ts = typesize(m->type);
819 		if (ts == (size_t)~0)
820 			abort();
821 		val += ts * MULT;
822 		break;
823 
824 	case FILE_PSTRING:
825 	case FILE_STRING:
826 		val += m->vallen * MULT;
827 		break;
828 
829 	case FILE_BESTRING16:
830 	case FILE_LESTRING16:
831 		val += m->vallen * MULT / 2;
832 		break;
833 
834 	case FILE_SEARCH:
835 		val += m->vallen * MAX(MULT / m->vallen, 1);
836 		break;
837 
838 	case FILE_REGEX:
839 		v = nonmagic(m->value.s);
840 		val += v * MAX(MULT / v, 1);
841 		break;
842 
843 	case FILE_INDIRECT:
844 	case FILE_NAME:
845 	case FILE_USE:
846 		break;
847 
848 	case FILE_DER:
849 		val += MULT;
850 		break;
851 
852 	default:
853 		(void)fprintf(stderr, "Bad type %d\n", m->type);
854 		abort();
855 	}
856 
857 	switch (m->reln) {
858 	case 'x':	/* matches anything penalize */
859 	case '!':       /* matches almost anything penalize */
860 		val = 0;
861 		break;
862 
863 	case '=':	/* Exact match, prefer */
864 		val += MULT;
865 		break;
866 
867 	case '>':
868 	case '<':	/* comparison match reduce strength */
869 		val -= 2 * MULT;
870 		break;
871 
872 	case '^':
873 	case '&':	/* masking bits, we could count them too */
874 		val -= MULT;
875 		break;
876 
877 	default:
878 		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
879 		abort();
880 	}
881 
882 	if (val == 0)	/* ensure we only return 0 for FILE_DEFAULT */
883 		val = 1;
884 
885 	switch (m->factor_op) {
886 	case FILE_FACTOR_OP_NONE:
887 		break;
888 	case FILE_FACTOR_OP_PLUS:
889 		val += m->factor;
890 		break;
891 	case FILE_FACTOR_OP_MINUS:
892 		val -= m->factor;
893 		break;
894 	case FILE_FACTOR_OP_TIMES:
895 		val *= m->factor;
896 		break;
897 	case FILE_FACTOR_OP_DIV:
898 		val /= m->factor;
899 		break;
900 	default:
901 		abort();
902 	}
903 
904 	/*
905 	 * Magic entries with no description get a bonus because they depend
906 	 * on subsequent magic entries to print something.
907 	 */
908 	if (m->desc[0] == '\0')
909 		val++;
910 	return val;
911 }
912 
913 /*
914  * Sort callback for sorting entries by "strength" (basically length)
915  */
916 private int
apprentice_sort(const void * a,const void * b)917 apprentice_sort(const void *a, const void *b)
918 {
919 	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
920 	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
921 	size_t sa = apprentice_magic_strength(ma->mp);
922 	size_t sb = apprentice_magic_strength(mb->mp);
923 	if (sa == sb)
924 		return 0;
925 	else if (sa > sb)
926 		return -1;
927 	else
928 		return 1;
929 }
930 
931 /*
932  * Shows sorted patterns list in the order which is used for the matching
933  */
934 private void
apprentice_list(struct mlist * mlist,int mode)935 apprentice_list(struct mlist *mlist, int mode)
936 {
937 	uint32_t magindex = 0;
938 	struct mlist *ml;
939 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
940 		for (magindex = 0; magindex < ml->nmagic; magindex++) {
941 			struct magic *m = &ml->magic[magindex];
942 			if ((m->flag & mode) != mode) {
943 				/* Skip sub-tests */
944 				while (magindex + 1 < ml->nmagic &&
945 				       ml->magic[magindex + 1].cont_level != 0)
946 					++magindex;
947 				continue; /* Skip to next top-level test*/
948 			}
949 
950 			/*
951 			 * Try to iterate over the tree until we find item with
952 			 * description/mimetype.
953 			 */
954 			while (magindex + 1 < ml->nmagic &&
955 			       ml->magic[magindex + 1].cont_level != 0 &&
956 			       *ml->magic[magindex].desc == '\0' &&
957 			       *ml->magic[magindex].mimetype == '\0')
958 				magindex++;
959 
960 			printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
961 			    apprentice_magic_strength(m),
962 			    ml->magic[magindex].lineno,
963 			    ml->magic[magindex].desc,
964 			    ml->magic[magindex].mimetype);
965 		}
966 	}
967 }
968 
969 private void
set_test_type(struct magic * mstart,struct magic * m)970 set_test_type(struct magic *mstart, struct magic *m)
971 {
972 	switch (m->type) {
973 	case FILE_BYTE:
974 	case FILE_SHORT:
975 	case FILE_LONG:
976 	case FILE_DATE:
977 	case FILE_BESHORT:
978 	case FILE_BELONG:
979 	case FILE_BEDATE:
980 	case FILE_LESHORT:
981 	case FILE_LELONG:
982 	case FILE_LEDATE:
983 	case FILE_LDATE:
984 	case FILE_BELDATE:
985 	case FILE_LELDATE:
986 	case FILE_MEDATE:
987 	case FILE_MELDATE:
988 	case FILE_MELONG:
989 	case FILE_QUAD:
990 	case FILE_LEQUAD:
991 	case FILE_BEQUAD:
992 	case FILE_QDATE:
993 	case FILE_LEQDATE:
994 	case FILE_BEQDATE:
995 	case FILE_QLDATE:
996 	case FILE_LEQLDATE:
997 	case FILE_BEQLDATE:
998 	case FILE_QWDATE:
999 	case FILE_LEQWDATE:
1000 	case FILE_BEQWDATE:
1001 	case FILE_FLOAT:
1002 	case FILE_BEFLOAT:
1003 	case FILE_LEFLOAT:
1004 	case FILE_DOUBLE:
1005 	case FILE_BEDOUBLE:
1006 	case FILE_LEDOUBLE:
1007 	case FILE_DER:
1008 		mstart->flag |= BINTEST;
1009 		break;
1010 	case FILE_STRING:
1011 	case FILE_PSTRING:
1012 	case FILE_BESTRING16:
1013 	case FILE_LESTRING16:
1014 		/* Allow text overrides */
1015 		if (mstart->str_flags & STRING_TEXTTEST)
1016 			mstart->flag |= TEXTTEST;
1017 		else
1018 			mstart->flag |= BINTEST;
1019 		break;
1020 	case FILE_REGEX:
1021 	case FILE_SEARCH:
1022 		/* Check for override */
1023 		if (mstart->str_flags & STRING_BINTEST)
1024 			mstart->flag |= BINTEST;
1025 		if (mstart->str_flags & STRING_TEXTTEST)
1026 			mstart->flag |= TEXTTEST;
1027 
1028 		if (mstart->flag & (TEXTTEST|BINTEST))
1029 			break;
1030 
1031 		/* binary test if pattern is not text */
1032 		if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
1033 		    NULL) <= 0)
1034 			mstart->flag |= BINTEST;
1035 		else
1036 			mstart->flag |= TEXTTEST;
1037 		break;
1038 	case FILE_DEFAULT:
1039 		/* can't deduce anything; we shouldn't see this at the
1040 		   top level anyway */
1041 		break;
1042 	case FILE_INVALID:
1043 	default:
1044 		/* invalid search type, but no need to complain here */
1045 		break;
1046 	}
1047 }
1048 
1049 private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry_set * mset)1050 addentry(struct magic_set *ms, struct magic_entry *me,
1051    struct magic_entry_set *mset)
1052 {
1053 	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1054 	if (mset[i].count == mset[i].max) {
1055 		struct magic_entry *mp;
1056 
1057 		mset[i].max += ALLOC_INCR;
1058 		if ((mp = CAST(struct magic_entry *,
1059 		    erealloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
1060 		    NULL) {
1061 			file_oomem(ms, sizeof(*mp) * mset[i].max);
1062 			return -1;
1063 		}
1064 		(void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1065 		    ALLOC_INCR);
1066 		mset[i].me = mp;
1067 	}
1068 	mset[i].me[mset[i].count++] = *me;
1069 	memset(me, 0, sizeof(*me));
1070 	return 0;
1071 }
1072 
1073 /*
1074  * Load and parse one file.
1075  */
1076 private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry_set * mset)1077 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1078    struct magic_entry_set *mset)
1079 {
1080 	char buffer[BUFSIZ + 1];
1081 	char *line = NULL;
1082 	size_t len;
1083 	size_t lineno = 0;
1084 	struct magic_entry me;
1085 
1086 	php_stream *stream;
1087 
1088 
1089 	ms->file = fn;
1090 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
1091 
1092 	if (stream == NULL) {
1093 		if (errno != ENOENT)
1094 			file_error(ms, errno, "cannot read magic file `%s'",
1095 				   fn);
1096 		(*errs)++;
1097 		return;
1098 	}
1099 
1100 	memset(&me, 0, sizeof(me));
1101 	/* read and parse this file */
1102 	for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1103 		if (len == 0) /* null line, garbage, etc */
1104 			continue;
1105 		if (line[len - 1] == '\n') {
1106 			lineno++;
1107 			line[len - 1] = '\0'; /* delete newline */
1108 		}
1109 		switch (line[0]) {
1110 		case '\0':	/* empty, do not parse */
1111 		case '#':	/* comment, do not parse */
1112 			continue;
1113 		case '!':
1114 			if (line[1] == ':') {
1115 				size_t i;
1116 
1117 				for (i = 0; bang[i].name != NULL; i++) {
1118 					if ((size_t)(len - 2) > bang[i].len &&
1119 					    memcmp(bang[i].name, line + 2,
1120 					    bang[i].len) == 0)
1121 						break;
1122 				}
1123 				if (bang[i].name == NULL) {
1124 					file_error(ms, 0,
1125 					    "Unknown !: entry `%s'", line);
1126 					(*errs)++;
1127 					continue;
1128 				}
1129 				if (me.mp == NULL) {
1130 					file_error(ms, 0,
1131 					    "No current entry for :!%s type",
1132 						bang[i].name);
1133 					(*errs)++;
1134 					continue;
1135 				}
1136 				if ((*bang[i].fun)(ms, &me,
1137 				    line + bang[i].len + 2) != 0) {
1138 					(*errs)++;
1139 					continue;
1140 				}
1141 				continue;
1142 			}
1143 			/*FALLTHROUGH*/
1144 		default:
1145 		again:
1146 			switch (parse(ms, &me, line, lineno, action)) {
1147 			case 0:
1148 				continue;
1149 			case 1:
1150 				(void)addentry(ms, &me, mset);
1151 				goto again;
1152 			default:
1153 				(*errs)++;
1154 				break;
1155 			}
1156 		}
1157 	}
1158 	if (me.mp)
1159 		(void)addentry(ms, &me, mset);
1160     efree(line);
1161 	php_stream_close(stream);
1162 }
1163 
1164 /*
1165  * parse a file or directory of files
1166  * const char *fn: name of magic file or directory
1167  */
1168 private int
cmpstrp(const void * p1,const void * p2)1169 cmpstrp(const void *p1, const void *p2)
1170 {
1171         return strcmp(*(char *const *)p1, *(char *const *)p2);
1172 }
1173 
1174 
1175 private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1176 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1177     uint32_t starttest)
1178 {
1179 	static const char text[] = "text";
1180 	static const char binary[] = "binary";
1181 	static const size_t len = sizeof(text);
1182 
1183 	uint32_t i = starttest;
1184 
1185 	do {
1186 		set_test_type(me[starttest].mp, me[i].mp);
1187 		if ((ms->flags & MAGIC_DEBUG) == 0)
1188 			continue;
1189 		(void)fprintf(stderr, "%s%s%s: %s\n",
1190 		    me[i].mp->mimetype,
1191 		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1192 		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1193 		    me[i].mp->flag & BINTEST ? binary : text);
1194 		if (me[i].mp->flag & BINTEST) {
1195 			char *p = strstr(me[i].mp->desc, text);
1196 			if (p && (p == me[i].mp->desc ||
1197 			    isspace((unsigned char)p[-1])) &&
1198 			    (p + len - me[i].mp->desc == MAXstring
1199 			    || (p[len] == '\0' ||
1200 			    isspace((unsigned char)p[len]))))
1201 				(void)fprintf(stderr, "*** Possible "
1202 				    "binary test for text type\n");
1203 		}
1204 	} while (++i < nme && me[i].mp->cont_level != 0);
1205 	return i;
1206 }
1207 
1208 private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1209 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1210 {
1211 	uint32_t i;
1212 	for (i = 0; i < nme; i++) {
1213 		if (me[i].mp->cont_level == 0 &&
1214 		    me[i].mp->type == FILE_DEFAULT) {
1215 			while (++i < nme)
1216 				if (me[i].mp->cont_level == 0)
1217 					break;
1218 			if (i != nme) {
1219 				/* XXX - Ugh! */
1220 				ms->line = me[i].mp->lineno;
1221 				file_magwarn(ms,
1222 				    "level 0 \"default\" did not sort last");
1223 			}
1224 			return;
1225 		}
1226 	}
1227 }
1228 
1229 private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1230 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1231     struct magic **ma, uint32_t *nma)
1232 {
1233 	uint32_t i, mentrycount = 0;
1234 	size_t slen;
1235 
1236 	for (i = 0; i < nme; i++)
1237 		mentrycount += me[i].cont_count;
1238 
1239 	slen = sizeof(**ma) * mentrycount;
1240 	if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1241 		file_oomem(ms, slen);
1242 		return -1;
1243 	}
1244 
1245 	mentrycount = 0;
1246 	for (i = 0; i < nme; i++) {
1247 		(void)memcpy(*ma + mentrycount, me[i].mp,
1248 		    me[i].cont_count * sizeof(**ma));
1249 		mentrycount += me[i].cont_count;
1250 	}
1251 	*nma = mentrycount;
1252 	return 0;
1253 }
1254 
1255 private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1256 magic_entry_free(struct magic_entry *me, uint32_t nme)
1257 {
1258 	uint32_t i;
1259 	if (me == NULL)
1260 		return;
1261 	for (i = 0; i < nme; i++)
1262 		efree(me[i].mp);
1263 	efree(me);
1264 }
1265 
1266 private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1267 apprentice_load(struct magic_set *ms, const char *fn, int action)
1268 {
1269 	int errs = 0;
1270 	uint32_t i, j;
1271 	size_t files = 0, maxfiles = 0;
1272 	char **filearr = NULL;
1273 	zend_stat_t st;
1274 	struct magic_map *map;
1275 	struct magic_entry_set mset[MAGIC_SETS];
1276 	php_stream *dir;
1277 	php_stream_dirent d;
1278 
1279 
1280 	memset(mset, 0, sizeof(mset));
1281 	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1282 
1283 
1284 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1285 	{
1286 		file_oomem(ms, sizeof(*map));
1287 		return NULL;
1288 	}
1289 	map->type = MAP_TYPE_MALLOC;
1290 
1291 	/* print silly verbose header for USG compat. */
1292 	if (action == FILE_CHECK)
1293 		(void)fprintf(stderr, "%s\n", usg_hdr);
1294 
1295 	/* load directory or file */
1296 	/* FIXME: Read file names and sort them to prevent
1297 	   non-determinism. See Debian bug #488562. */
1298 	if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1299 		int mflen;
1300 		char mfn[MAXPATHLEN];
1301 
1302 		dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1303 		if (!dir) {
1304 			errs++;
1305 			goto out;
1306 		}
1307 		while (php_stream_readdir(dir, &d)) {
1308 			if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1309 				file_oomem(ms,
1310 				strlen(fn) + strlen(d.d_name) + 2);
1311 				errs++;
1312 				php_stream_closedir(dir);
1313 				goto out;
1314 			}
1315 			if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1316 				continue;
1317 			}
1318 			if (files >= maxfiles) {
1319 				size_t mlen;
1320 				maxfiles = (maxfiles + 1) * 2;
1321 				mlen = maxfiles * sizeof(*filearr);
1322 				if ((filearr = CAST(char **,
1323 				    erealloc(filearr, mlen))) == NULL) {
1324 					file_oomem(ms, mlen);
1325 					php_stream_closedir(dir);
1326 					errs++;
1327 					goto out;
1328 				}
1329 			}
1330 			filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1331 		}
1332 		php_stream_closedir(dir);
1333 		qsort(filearr, files, sizeof(*filearr), cmpstrp);
1334 		for (i = 0; i < files; i++) {
1335 			load_1(ms, action, filearr[i], &errs, mset);
1336 			efree(filearr[i]);
1337 		}
1338 		efree(filearr);
1339 	} else
1340 		load_1(ms, action, fn, &errs, mset);
1341 	if (errs)
1342 		goto out;
1343 
1344 	for (j = 0; j < MAGIC_SETS; j++) {
1345 		/* Set types of tests */
1346 		for (i = 0; i < mset[j].count; ) {
1347 			if (mset[j].me[i].mp->cont_level != 0) {
1348 				i++;
1349 				continue;
1350 			}
1351 			i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1352 		}
1353 		if (mset[j].me)
1354 			qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1355 			    apprentice_sort);
1356 
1357 		/*
1358 		 * Make sure that any level 0 "default" line is last
1359 		 * (if one exists).
1360 		 */
1361 		set_last_default(ms, mset[j].me, mset[j].count);
1362 
1363 		/* coalesce per file arrays into a single one */
1364 		if (coalesce_entries(ms, mset[j].me, mset[j].count,
1365 		    &map->magic[j], &map->nmagic[j]) == -1) {
1366 			errs++;
1367 			goto out;
1368 		}
1369 	}
1370 
1371 out:
1372 	for (j = 0; j < MAGIC_SETS; j++)
1373 		magic_entry_free(mset[j].me, mset[j].count);
1374 
1375 	if (errs) {
1376 		apprentice_unmap(map);
1377 		return NULL;
1378 	}
1379 	return map;
1380 }
1381 
1382 /*
1383  * extend the sign bit if the comparison is to be signed
1384  */
1385 protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1386 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1387 {
1388 	if (!(m->flag & UNSIGNED)) {
1389 		switch(m->type) {
1390 		/*
1391 		 * Do not remove the casts below.  They are
1392 		 * vital.  When later compared with the data,
1393 		 * the sign extension must have happened.
1394 		 */
1395 		case FILE_BYTE:
1396 			v = (signed char) v;
1397 			break;
1398 		case FILE_SHORT:
1399 		case FILE_BESHORT:
1400 		case FILE_LESHORT:
1401 			v = (short) v;
1402 			break;
1403 		case FILE_DATE:
1404 		case FILE_BEDATE:
1405 		case FILE_LEDATE:
1406 		case FILE_MEDATE:
1407 		case FILE_LDATE:
1408 		case FILE_BELDATE:
1409 		case FILE_LELDATE:
1410 		case FILE_MELDATE:
1411 		case FILE_LONG:
1412 		case FILE_BELONG:
1413 		case FILE_LELONG:
1414 		case FILE_MELONG:
1415 		case FILE_FLOAT:
1416 		case FILE_BEFLOAT:
1417 		case FILE_LEFLOAT:
1418 			v = (int32_t) v;
1419 			break;
1420 		case FILE_QUAD:
1421 		case FILE_BEQUAD:
1422 		case FILE_LEQUAD:
1423 		case FILE_QDATE:
1424 		case FILE_QLDATE:
1425 		case FILE_QWDATE:
1426 		case FILE_BEQDATE:
1427 		case FILE_BEQLDATE:
1428 		case FILE_BEQWDATE:
1429 		case FILE_LEQDATE:
1430 		case FILE_LEQLDATE:
1431 		case FILE_LEQWDATE:
1432 		case FILE_DOUBLE:
1433 		case FILE_BEDOUBLE:
1434 		case FILE_LEDOUBLE:
1435 			v = (int64_t) v;
1436 			break;
1437 		case FILE_STRING:
1438 		case FILE_PSTRING:
1439 		case FILE_BESTRING16:
1440 		case FILE_LESTRING16:
1441 		case FILE_REGEX:
1442 		case FILE_SEARCH:
1443 		case FILE_DEFAULT:
1444 		case FILE_INDIRECT:
1445 		case FILE_NAME:
1446 		case FILE_USE:
1447 		case FILE_CLEAR:
1448 		case FILE_DER:
1449 			break;
1450 		default:
1451 			if (ms->flags & MAGIC_CHECK)
1452 			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1453 				    m->type);
1454 			return ~0U;
1455 		}
1456 	}
1457 	return v;
1458 }
1459 
1460 private int
string_modifier_check(struct magic_set * ms,struct magic * m)1461 string_modifier_check(struct magic_set *ms, struct magic *m)
1462 {
1463 	if ((ms->flags & MAGIC_CHECK) == 0)
1464 		return 0;
1465 
1466 	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1467 	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1468 		file_magwarn(ms,
1469 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1470 		return -1;
1471 	}
1472 	switch (m->type) {
1473 	case FILE_BESTRING16:
1474 	case FILE_LESTRING16:
1475 		if (m->str_flags != 0) {
1476 			file_magwarn(ms,
1477 			    "no modifiers allowed for 16-bit strings\n");
1478 			return -1;
1479 		}
1480 		break;
1481 	case FILE_STRING:
1482 	case FILE_PSTRING:
1483 		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1484 			file_magwarn(ms,
1485 			    "'/%c' only allowed on regex and search\n",
1486 			    CHAR_REGEX_OFFSET_START);
1487 			return -1;
1488 		}
1489 		break;
1490 	case FILE_SEARCH:
1491 		if (m->str_range == 0) {
1492 			file_magwarn(ms,
1493 			    "missing range; defaulting to %d\n",
1494                             STRING_DEFAULT_RANGE);
1495 			m->str_range = STRING_DEFAULT_RANGE;
1496 			return -1;
1497 		}
1498 		break;
1499 	case FILE_REGEX:
1500 		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1501 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1502 			    CHAR_COMPACT_WHITESPACE);
1503 			return -1;
1504 		}
1505 		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1506 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1507 			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1508 			return -1;
1509 		}
1510 		break;
1511 	default:
1512 		file_magwarn(ms, "coding error: m->type=%d\n",
1513 		    m->type);
1514 		return -1;
1515 	}
1516 	return 0;
1517 }
1518 
1519 private int
get_op(char c)1520 get_op(char c)
1521 {
1522 	switch (c) {
1523 	case '&':
1524 		return FILE_OPAND;
1525 	case '|':
1526 		return FILE_OPOR;
1527 	case '^':
1528 		return FILE_OPXOR;
1529 	case '+':
1530 		return FILE_OPADD;
1531 	case '-':
1532 		return FILE_OPMINUS;
1533 	case '*':
1534 		return FILE_OPMULTIPLY;
1535 	case '/':
1536 		return FILE_OPDIVIDE;
1537 	case '%':
1538 		return FILE_OPMODULO;
1539 	default:
1540 		return -1;
1541 	}
1542 }
1543 
1544 #ifdef ENABLE_CONDITIONALS
1545 private int
get_cond(const char * l,const char ** t)1546 get_cond(const char *l, const char **t)
1547 {
1548 	static const struct cond_tbl_s {
1549 		char name[8];
1550 		size_t len;
1551 		int cond;
1552 	} cond_tbl[] = {
1553 		{ "if",		2,	COND_IF },
1554 		{ "elif",	4,	COND_ELIF },
1555 		{ "else",	4,	COND_ELSE },
1556 		{ "",		0,	COND_NONE },
1557 	};
1558 	const struct cond_tbl_s *p;
1559 
1560 	for (p = cond_tbl; p->len; p++) {
1561 		if (strncmp(l, p->name, p->len) == 0 &&
1562 		    isspace((unsigned char)l[p->len])) {
1563 			if (t)
1564 				*t = l + p->len;
1565 			break;
1566 		}
1567 	}
1568 	return p->cond;
1569 }
1570 
1571 private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1572 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1573 {
1574 	int last_cond;
1575 	last_cond = ms->c.li[cont_level].last_cond;
1576 
1577 	switch (cond) {
1578 	case COND_IF:
1579 		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1580 			if (ms->flags & MAGIC_CHECK)
1581 				file_magwarn(ms, "syntax error: `if'");
1582 			return -1;
1583 		}
1584 		last_cond = COND_IF;
1585 		break;
1586 
1587 	case COND_ELIF:
1588 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1589 			if (ms->flags & MAGIC_CHECK)
1590 				file_magwarn(ms, "syntax error: `elif'");
1591 			return -1;
1592 		}
1593 		last_cond = COND_ELIF;
1594 		break;
1595 
1596 	case COND_ELSE:
1597 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1598 			if (ms->flags & MAGIC_CHECK)
1599 				file_magwarn(ms, "syntax error: `else'");
1600 			return -1;
1601 		}
1602 		last_cond = COND_NONE;
1603 		break;
1604 
1605 	case COND_NONE:
1606 		last_cond = COND_NONE;
1607 		break;
1608 	}
1609 
1610 	ms->c.li[cont_level].last_cond = last_cond;
1611 	return 0;
1612 }
1613 #endif /* ENABLE_CONDITIONALS */
1614 
1615 private int
parse_indirect_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1616 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1617 {
1618 	const char *l = *lp;
1619 
1620 	while (!isspace((unsigned char)*++l))
1621 		switch (*l) {
1622 		case CHAR_INDIRECT_RELATIVE:
1623 			m->str_flags |= INDIRECT_RELATIVE;
1624 			break;
1625 		default:
1626 			if (ms->flags & MAGIC_CHECK)
1627 				file_magwarn(ms, "indirect modifier `%c' "
1628 					"invalid", *l);
1629 			*lp = l;
1630 			return -1;
1631 		}
1632 	*lp = l;
1633 	return 0;
1634 }
1635 
1636 private void
parse_op_modifier(struct magic_set * ms,struct magic * m,const char ** lp,int op)1637 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1638     int op)
1639 {
1640 	const char *l = *lp;
1641 	char *t;
1642 	uint64_t val;
1643 
1644 	++l;
1645 	m->mask_op |= op;
1646 	val = (uint64_t)strtoull(l, &t, 0);
1647 	l = t;
1648 	m->num_mask = file_signextend(ms, m, val);
1649 	eatsize(&l);
1650 	*lp = l;
1651 }
1652 
1653 private int
parse_string_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1654 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1655 {
1656 	const char *l = *lp;
1657 	char *t;
1658 	int have_range = 0;
1659 
1660 	while (!isspace((unsigned char)*++l)) {
1661 		switch (*l) {
1662 		case '0':  case '1':  case '2':
1663 		case '3':  case '4':  case '5':
1664 		case '6':  case '7':  case '8':
1665 		case '9':
1666 			if (have_range && (ms->flags & MAGIC_CHECK))
1667 				file_magwarn(ms, "multiple ranges");
1668 			have_range = 1;
1669 			m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1670 			if (m->str_range == 0)
1671 				file_magwarn(ms, "zero range");
1672 			l = t - 1;
1673 			break;
1674 		case CHAR_COMPACT_WHITESPACE:
1675 			m->str_flags |= STRING_COMPACT_WHITESPACE;
1676 			break;
1677 		case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1678 			m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1679 			break;
1680 		case CHAR_IGNORE_LOWERCASE:
1681 			m->str_flags |= STRING_IGNORE_LOWERCASE;
1682 			break;
1683 		case CHAR_IGNORE_UPPERCASE:
1684 			m->str_flags |= STRING_IGNORE_UPPERCASE;
1685 			break;
1686 		case CHAR_REGEX_OFFSET_START:
1687 			m->str_flags |= REGEX_OFFSET_START;
1688 			break;
1689 		case CHAR_BINTEST:
1690 			m->str_flags |= STRING_BINTEST;
1691 			break;
1692 		case CHAR_TEXTTEST:
1693 			m->str_flags |= STRING_TEXTTEST;
1694 			break;
1695 		case CHAR_TRIM:
1696 			m->str_flags |= STRING_TRIM;
1697 			break;
1698 		case CHAR_PSTRING_1_LE:
1699 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1700 			if (m->type != FILE_PSTRING)
1701 				goto bad;
1702 			SET_LENGTH(PSTRING_1_LE);
1703 			break;
1704 		case CHAR_PSTRING_2_BE:
1705 			if (m->type != FILE_PSTRING)
1706 				goto bad;
1707 			SET_LENGTH(PSTRING_2_BE);
1708 			break;
1709 		case CHAR_PSTRING_2_LE:
1710 			if (m->type != FILE_PSTRING)
1711 				goto bad;
1712 			SET_LENGTH(PSTRING_2_LE);
1713 			break;
1714 		case CHAR_PSTRING_4_BE:
1715 			if (m->type != FILE_PSTRING)
1716 				goto bad;
1717 			SET_LENGTH(PSTRING_4_BE);
1718 			break;
1719 		case CHAR_PSTRING_4_LE:
1720 			switch (m->type) {
1721 			case FILE_PSTRING:
1722 			case FILE_REGEX:
1723 				break;
1724 			default:
1725 				goto bad;
1726 			}
1727 			SET_LENGTH(PSTRING_4_LE);
1728 			break;
1729 		case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1730 			if (m->type != FILE_PSTRING)
1731 				goto bad;
1732 			m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1733 			break;
1734 		default:
1735 		bad:
1736 			if (ms->flags & MAGIC_CHECK)
1737 				file_magwarn(ms, "string modifier `%c' "
1738 					"invalid", *l);
1739 			goto out;
1740 		}
1741 		/* allow multiple '/' for readability */
1742 		if (l[1] == '/' && !isspace((unsigned char)l[2]))
1743 			l++;
1744 	}
1745 	if (string_modifier_check(ms, m) == -1)
1746 		goto out;
1747 	*lp = l;
1748 	return 0;
1749 out:
1750 	*lp = l;
1751 	return -1;
1752 }
1753 
1754 /*
1755  * parse one line from magic file, put into magic[index++] if valid
1756  */
1757 private int
parse(struct magic_set * ms,struct magic_entry * me,const char * line,size_t lineno,int action)1758 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1759     size_t lineno, int action)
1760 {
1761 #ifdef ENABLE_CONDITIONALS
1762 	static uint32_t last_cont_level = 0;
1763 #endif
1764 	size_t i;
1765 	struct magic *m;
1766 	const char *l = line;
1767 	char *t;
1768 	int op;
1769 	uint32_t cont_level;
1770 	int32_t diff;
1771 
1772 	cont_level = 0;
1773 
1774 	/*
1775 	 * Parse the offset.
1776 	 */
1777 	while (*l == '>') {
1778 		++l;		/* step over */
1779 		cont_level++;
1780 	}
1781 #ifdef ENABLE_CONDITIONALS
1782 	if (cont_level == 0 || cont_level > last_cont_level)
1783 		if (file_check_mem(ms, cont_level) == -1)
1784 			return -1;
1785 	last_cont_level = cont_level;
1786 #endif
1787 	if (cont_level != 0) {
1788 		if (me->mp == NULL) {
1789 			file_magerror(ms, "No current entry for continuation");
1790 			return -1;
1791 		}
1792 		if (me->cont_count == 0) {
1793 			file_magerror(ms, "Continuations present with 0 count");
1794 			return -1;
1795 		}
1796 		m = &me->mp[me->cont_count - 1];
1797 		diff = (int32_t)cont_level - (int32_t)m->cont_level;
1798 		if (diff > 1)
1799 			file_magwarn(ms, "New continuation level %u is more "
1800 			    "than one larger than current level %u", cont_level,
1801 			    m->cont_level);
1802 		if (me->cont_count == me->max_count) {
1803 			struct magic *nm;
1804 			size_t cnt = me->max_count + ALLOC_CHUNK;
1805 			if ((nm = CAST(struct magic *, erealloc(me->mp,
1806 			    sizeof(*nm) * cnt))) == NULL) {
1807 				file_oomem(ms, sizeof(*nm) * cnt);
1808 				return -1;
1809 			}
1810 			me->mp = m = nm;
1811 			me->max_count = CAST(uint32_t, cnt);
1812 		}
1813 		m = &me->mp[me->cont_count++];
1814 		(void)memset(m, 0, sizeof(*m));
1815 		m->cont_level = cont_level;
1816 	} else {
1817 		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1818 		if (me->mp != NULL)
1819 			return 1;
1820 		if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1821 			file_oomem(ms, len);
1822 			return -1;
1823 		}
1824 		me->mp = m;
1825 		me->max_count = ALLOC_CHUNK;
1826 		(void)memset(m, 0, sizeof(*m));
1827 		m->factor_op = FILE_FACTOR_OP_NONE;
1828 		m->cont_level = 0;
1829 		me->cont_count = 1;
1830 	}
1831 	m->lineno = CAST(uint32_t, lineno);
1832 
1833 	if (*l == '&') {  /* m->cont_level == 0 checked below. */
1834                 ++l;            /* step over */
1835                 m->flag |= OFFADD;
1836         }
1837 	if (*l == '(') {
1838 		++l;		/* step over */
1839 		m->flag |= INDIR;
1840 		if (m->flag & OFFADD)
1841 			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1842 
1843 		if (*l == '&') {  /* m->cont_level == 0 checked below */
1844 			++l;            /* step over */
1845 			m->flag |= OFFADD;
1846 		}
1847 	}
1848 	/* Indirect offsets are not valid at level 0. */
1849 	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
1850 		if (ms->flags & MAGIC_CHECK)
1851 			file_magwarn(ms, "relative offset at level 0");
1852 		return -1;
1853 	}
1854 
1855 	/* get offset, then skip over it */
1856 	m->offset = (int32_t)strtol(l, &t, 0);
1857         if (l == t) {
1858 		if (ms->flags & MAGIC_CHECK)
1859 			file_magwarn(ms, "offset `%s' invalid", l);
1860 		return -1;
1861 	}
1862         l = t;
1863 
1864 	if (m->flag & INDIR) {
1865 		m->in_type = FILE_LONG;
1866 		m->in_offset = 0;
1867 		m->in_op = 0;
1868 		/*
1869 		 * read [.,lbs][+-]nnnnn)
1870 		 */
1871 		if (*l == '.' || *l == ',') {
1872 			if (*l == ',')
1873 				m->in_op |= FILE_OPSIGNED;
1874 			l++;
1875 			switch (*l) {
1876 			case 'l':
1877 				m->in_type = FILE_LELONG;
1878 				break;
1879 			case 'L':
1880 				m->in_type = FILE_BELONG;
1881 				break;
1882 			case 'm':
1883 				m->in_type = FILE_MELONG;
1884 				break;
1885 			case 'h':
1886 			case 's':
1887 				m->in_type = FILE_LESHORT;
1888 				break;
1889 			case 'H':
1890 			case 'S':
1891 				m->in_type = FILE_BESHORT;
1892 				break;
1893 			case 'c':
1894 			case 'b':
1895 			case 'C':
1896 			case 'B':
1897 				m->in_type = FILE_BYTE;
1898 				break;
1899 			case 'e':
1900 			case 'f':
1901 			case 'g':
1902 				m->in_type = FILE_LEDOUBLE;
1903 				break;
1904 			case 'E':
1905 			case 'F':
1906 			case 'G':
1907 				m->in_type = FILE_BEDOUBLE;
1908 				break;
1909 			case 'i':
1910 				m->in_type = FILE_LEID3;
1911 				break;
1912 			case 'I':
1913 				m->in_type = FILE_BEID3;
1914 				break;
1915 			default:
1916 				if (ms->flags & MAGIC_CHECK)
1917 					file_magwarn(ms,
1918 					    "indirect offset type `%c' invalid",
1919 					    *l);
1920 				return -1;
1921 			}
1922 			l++;
1923 		}
1924 
1925 		if (*l == '~') {
1926 			m->in_op |= FILE_OPINVERSE;
1927 			l++;
1928 		}
1929 		if ((op = get_op(*l)) != -1) {
1930 			m->in_op |= op;
1931 			l++;
1932 		}
1933 		if (*l == '(') {
1934 			m->in_op |= FILE_OPINDIRECT;
1935 			l++;
1936 		}
1937 		if (isdigit((unsigned char)*l) || *l == '-') {
1938 			m->in_offset = (int32_t)strtol(l, &t, 0);
1939 			if (l == t) {
1940 				if (ms->flags & MAGIC_CHECK)
1941 					file_magwarn(ms,
1942 					    "in_offset `%s' invalid", l);
1943 				return -1;
1944 			}
1945 			l = t;
1946 		}
1947 		if (*l++ != ')' ||
1948 		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
1949 			if (ms->flags & MAGIC_CHECK)
1950 				file_magwarn(ms,
1951 				    "missing ')' in indirect offset");
1952 			return -1;
1953 		}
1954 	}
1955 	EATAB;
1956 
1957 #ifdef ENABLE_CONDITIONALS
1958 	m->cond = get_cond(l, &l);
1959 	if (check_cond(ms, m->cond, cont_level) == -1)
1960 		return -1;
1961 
1962 	EATAB;
1963 #endif
1964 
1965 	/*
1966 	 * Parse the type.
1967 	 */
1968 	if (*l == 'u') {
1969 		/*
1970 		 * Try it as a keyword type prefixed by "u"; match what
1971 		 * follows the "u".  If that fails, try it as an SUS
1972 		 * integer type.
1973 		 */
1974 		m->type = get_type(type_tbl, l + 1, &l);
1975 		if (m->type == FILE_INVALID) {
1976 			/*
1977 			 * Not a keyword type; parse it as an SUS type,
1978 			 * 'u' possibly followed by a number or C/S/L.
1979 			 */
1980 			m->type = get_standard_integer_type(l, &l);
1981 		}
1982 		/* It's unsigned. */
1983 		if (m->type != FILE_INVALID)
1984 			m->flag |= UNSIGNED;
1985 	} else {
1986 		/*
1987 		 * Try it as a keyword type.  If that fails, try it as
1988 		 * an SUS integer type if it begins with "d" or as an
1989 		 * SUS string type if it begins with "s".  In any case,
1990 		 * it's not unsigned.
1991 		 */
1992 		m->type = get_type(type_tbl, l, &l);
1993 		if (m->type == FILE_INVALID) {
1994 			/*
1995 			 * Not a keyword type; parse it as an SUS type,
1996 			 * either 'd' possibly followed by a number or
1997 			 * C/S/L, or just 's'.
1998 			 */
1999 			if (*l == 'd')
2000 				m->type = get_standard_integer_type(l, &l);
2001 			else if (*l == 's' && !isalpha((unsigned char)l[1])) {
2002 				m->type = FILE_STRING;
2003 				++l;
2004 			}
2005 		}
2006 	}
2007 
2008 	if (m->type == FILE_INVALID) {
2009 		/* Not found - try it as a special keyword. */
2010 		m->type = get_type(special_tbl, l, &l);
2011 	}
2012 
2013 	if (m->type == FILE_INVALID) {
2014 		if (ms->flags & MAGIC_CHECK)
2015 			file_magwarn(ms, "type `%s' invalid", l);
2016 		return -1;
2017 	}
2018 
2019 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2020 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2021 
2022 	m->mask_op = 0;
2023 	if (*l == '~') {
2024 		if (!IS_LIBMAGIC_STRING(m->type))
2025 			m->mask_op |= FILE_OPINVERSE;
2026 		else if (ms->flags & MAGIC_CHECK)
2027 			file_magwarn(ms, "'~' invalid for string types");
2028 		++l;
2029 	}
2030 	m->str_range = 0;
2031 	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2032 	if ((op = get_op(*l)) != -1) {
2033 		if (IS_LIBMAGIC_STRING(m->type)) {
2034 			int r;
2035 
2036 			if (op != FILE_OPDIVIDE) {
2037 				if (ms->flags & MAGIC_CHECK)
2038 					file_magwarn(ms,
2039 					    "invalid string/indirect op: "
2040 					    "`%c'", *t);
2041 				return -1;
2042 			}
2043 
2044 			if (m->type == FILE_INDIRECT)
2045 				r = parse_indirect_modifier(ms, m, &l);
2046 			else
2047 				r = parse_string_modifier(ms, m, &l);
2048 			if (r == -1)
2049 				return -1;
2050 		} else
2051 			parse_op_modifier(ms, m, &l, op);
2052 	}
2053 
2054 	/*
2055 	 * We used to set mask to all 1's here, instead let's just not do
2056 	 * anything if mask = 0 (unless you have a better idea)
2057 	 */
2058 	EATAB;
2059 
2060 	switch (*l) {
2061 	case '>':
2062 	case '<':
2063   		m->reln = *l;
2064   		++l;
2065 		if (*l == '=') {
2066 			if (ms->flags & MAGIC_CHECK) {
2067 				file_magwarn(ms, "%c= not supported",
2068 				    m->reln);
2069 				return -1;
2070 			}
2071 		   ++l;
2072 		}
2073 		break;
2074 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
2075 	case '&':
2076 	case '^':
2077 	case '=':
2078   		m->reln = *l;
2079   		++l;
2080 		if (*l == '=') {
2081 		   /* HP compat: ignore &= etc. */
2082 		   ++l;
2083 		}
2084 		break;
2085 	case '!':
2086 		m->reln = *l;
2087 		++l;
2088 		break;
2089 	default:
2090   		m->reln = '=';	/* the default relation */
2091 		if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
2092 		    isspace((unsigned char)l[1])) || !l[1])) {
2093 			m->reln = *l;
2094 			++l;
2095 		}
2096 		break;
2097 	}
2098 	/*
2099 	 * Grab the value part, except for an 'x' reln.
2100 	 */
2101 	if (m->reln != 'x' && getvalue(ms, m, &l, action))
2102 		return -1;
2103 
2104 	/*
2105 	 * TODO finish this macro and start using it!
2106 	 * #define offsetcheck {if (offset > ms->bytes_max -1)
2107 	 *	magwarn("offset too big"); }
2108 	 */
2109 
2110 	/*
2111 	 * Now get last part - the description
2112 	 */
2113 	EATAB;
2114 	if (l[0] == '\b') {
2115 		++l;
2116 		m->flag |= NOSPACE;
2117 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
2118 		++l;
2119 		++l;
2120 		m->flag |= NOSPACE;
2121 	}
2122 	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2123 		continue;
2124 	if (i == sizeof(m->desc)) {
2125 		m->desc[sizeof(m->desc) - 1] = '\0';
2126 		if (ms->flags & MAGIC_CHECK)
2127 			file_magwarn(ms, "description `%s' truncated", m->desc);
2128 	}
2129 
2130         /*
2131 	 * We only do this check while compiling, or if any of the magic
2132 	 * files were not compiled.
2133          */
2134         if (ms->flags & MAGIC_CHECK) {
2135 		if (check_format(ms, m) == -1)
2136 			return -1;
2137 	}
2138 	m->mimetype[0] = '\0';		/* initialise MIME type to none */
2139 	return 0;
2140 }
2141 
2142 /*
2143  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2144  * if valid
2145  */
2146 private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line)2147 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
2148 {
2149 	const char *l = line;
2150 	char *el;
2151 	unsigned long factor;
2152 	struct magic *m = &me->mp[0];
2153 
2154 	if (m->factor_op != FILE_FACTOR_OP_NONE) {
2155 		file_magwarn(ms,
2156 		    "Current entry already has a strength type: %c %d",
2157 		    m->factor_op, m->factor);
2158 		return -1;
2159 	}
2160 	if (m->type == FILE_NAME) {
2161 		file_magwarn(ms, "%s: Strength setting is not supported in "
2162 		    "\"name\" magic entries", m->value.s);
2163 		return -1;
2164 	}
2165 	EATAB;
2166 	switch (*l) {
2167 	case FILE_FACTOR_OP_NONE:
2168 	case FILE_FACTOR_OP_PLUS:
2169 	case FILE_FACTOR_OP_MINUS:
2170 	case FILE_FACTOR_OP_TIMES:
2171 	case FILE_FACTOR_OP_DIV:
2172 		m->factor_op = *l++;
2173 		break;
2174 	default:
2175 		file_magwarn(ms, "Unknown factor op `%c'", *l);
2176 		return -1;
2177 	}
2178 	EATAB;
2179 	factor = strtoul(l, &el, 0);
2180 	if (factor > 255) {
2181 		file_magwarn(ms, "Too large factor `%lu'", factor);
2182 		goto out;
2183 	}
2184 	if (*el && !isspace((unsigned char)*el)) {
2185 		file_magwarn(ms, "Bad factor `%s'", l);
2186 		goto out;
2187 	}
2188 	m->factor = (uint8_t)factor;
2189 	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2190 		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2191 		    m->factor_op, m->factor);
2192 		goto out;
2193 	}
2194 	return 0;
2195 out:
2196 	m->factor_op = FILE_FACTOR_OP_NONE;
2197 	m->factor = 0;
2198 	return -1;
2199 }
2200 
2201 private int
goodchar(unsigned char x,const char * extra)2202 goodchar(unsigned char x, const char *extra)
2203 {
2204 	return (isascii(x) && isalnum(x)) || strchr(extra, x);
2205 }
2206 
2207 private int
parse_extra(struct magic_set * ms,struct magic_entry * me,const char * line,zend_off_t off,size_t len,const char * name,const char * extra,int nt)2208 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2209     zend_off_t off, size_t len, const char *name, const char *extra, int nt)
2210 {
2211 	size_t i;
2212 	const char *l = line;
2213 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2214 	char *buf = CAST(char *, CAST(void *, m)) + off;
2215 
2216 	if (buf[0] != '\0') {
2217 		len = nt ? strlen(buf) : len;
2218 		file_magwarn(ms, "Current entry already has a %s type "
2219 		    "`%.*s', new type `%s'", name, (int)len, buf, l);
2220 		return -1;
2221 	}
2222 
2223 	if (*m->desc == '\0') {
2224 		file_magwarn(ms, "Current entry does not yet have a "
2225 		    "description for adding a %s type", name);
2226 		return -1;
2227 	}
2228 
2229 	EATAB;
2230 	for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++)
2231 		continue;
2232 
2233 	if (i == len && *l) {
2234 		if (nt)
2235 			buf[len - 1] = '\0';
2236 		if (ms->flags & MAGIC_CHECK)
2237 			file_magwarn(ms, "%s type `%s' truncated %"
2238 			    SIZE_T_FORMAT "u", name, line, i);
2239 	} else {
2240 		if (!isspace((unsigned char)*l) && !goodchar(*l, extra))
2241 			file_magwarn(ms, "%s type `%s' has bad char '%c'",
2242 			    name, line, *l);
2243 		if (nt)
2244 			buf[i] = '\0';
2245 	}
2246 
2247 	if (i > 0)
2248 		return 0;
2249 
2250 	file_magerror(ms, "Bad magic entry '%s'", line);
2251 	return -1;
2252 }
2253 
2254 /*
2255  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2256  * magic[index - 1]
2257  */
2258 private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line)2259 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2260 {
2261 	struct magic *m = &me->mp[0];
2262 
2263 	return parse_extra(ms, me, line,
2264 	    CAST(off_t, offsetof(struct magic, apple)),
2265 	    sizeof(m->apple), "APPLE", "!+-./?", 0);
2266 }
2267 
2268 /*
2269  * Parse a comma-separated list of extensions
2270  */
2271 private int
parse_ext(struct magic_set * ms,struct magic_entry * me,const char * line)2272 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line)
2273 {
2274 	struct magic *m = &me->mp[0];
2275 
2276 	return parse_extra(ms, me, line,
2277 	    CAST(off_t, offsetof(struct magic, ext)),
2278 	    sizeof(m->ext), "EXTENSION", ",!+-/@?_$", 0);
2279 }
2280 
2281 /*
2282  * parse a MIME annotation line from magic file, put into magic[index - 1]
2283  * if valid
2284  */
2285 private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line)2286 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2287 {
2288 	struct magic *m = &me->mp[0];
2289 
2290 	return parse_extra(ms, me, line,
2291 	    CAST(zend_off_t, offsetof(struct magic, mimetype)),
2292 	    sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1);
2293 }
2294 
2295 private int
check_format_type(const char * ptr,int type,const char ** estr)2296 check_format_type(const char *ptr, int type, const char **estr)
2297 {
2298 	int quad = 0, h;
2299 	size_t len, cnt;
2300 	if (*ptr == '\0') {
2301 		/* Missing format string; bad */
2302 		*estr = "missing format spec";
2303 		return -1;
2304 	}
2305 
2306 	switch (file_formats[type]) {
2307 	case FILE_FMT_QUAD:
2308 		quad = 1;
2309 		/*FALLTHROUGH*/
2310 	case FILE_FMT_NUM:
2311 		if (quad == 0) {
2312 			switch (type) {
2313 			case FILE_BYTE:
2314 				h = 2;
2315 				break;
2316 			case FILE_SHORT:
2317 			case FILE_BESHORT:
2318 			case FILE_LESHORT:
2319 				h = 1;
2320 				break;
2321 			case FILE_LONG:
2322 			case FILE_BELONG:
2323 			case FILE_LELONG:
2324 			case FILE_MELONG:
2325 			case FILE_LEID3:
2326 			case FILE_BEID3:
2327 			case FILE_INDIRECT:
2328 				h = 0;
2329 				break;
2330 			default:
2331 				abort();
2332 			}
2333 		} else
2334 			h = 0;
2335 		if (*ptr == '-')
2336 			ptr++;
2337 		if (*ptr == '.')
2338 			ptr++;
2339 		if (*ptr == '#')
2340 			ptr++;
2341 #define CHECKLEN() do { \
2342 	for (len = cnt = 0; isdigit((unsigned char)*ptr); ptr++, cnt++) \
2343 		len = len * 10 + (*ptr - '0'); \
2344 	if (cnt > 5 || len > 1024) \
2345 		goto toolong; \
2346 } while (/*CONSTCOND*/0)
2347 
2348 		CHECKLEN();
2349 		if (*ptr == '.')
2350 			ptr++;
2351 		CHECKLEN();
2352 		if (quad) {
2353 			if (*ptr++ != 'l')
2354 				goto invalid;
2355 			if (*ptr++ != 'l')
2356 				goto invalid;
2357 		}
2358 
2359 		switch (*ptr++) {
2360 #ifdef STRICT_FORMAT 	/* "long" formats are int formats for us */
2361 		/* so don't accept the 'l' modifier */
2362 		case 'l':
2363 			switch (*ptr++) {
2364 			case 'i':
2365 			case 'd':
2366 			case 'u':
2367 			case 'o':
2368 			case 'x':
2369 			case 'X':
2370 				if (h == 0)
2371 					return 0;
2372 				/*FALLTHROUGH*/
2373 			default:
2374 				goto invalid;
2375 			}
2376 
2377 		/*
2378 		 * Don't accept h and hh modifiers. They make writing
2379 		 * magic entries more complicated, for very little benefit
2380 		 */
2381 		case 'h':
2382 			if (h-- <= 0)
2383 				goto invalid;
2384 			switch (*ptr++) {
2385 			case 'h':
2386 				if (h-- <= 0)
2387 					goto invalid;
2388 				switch (*ptr++) {
2389 				case 'i':
2390 				case 'd':
2391 				case 'u':
2392 				case 'o':
2393 				case 'x':
2394 				case 'X':
2395 					return 0;
2396 				default:
2397 					goto invalid;
2398 				}
2399 			case 'i':
2400 			case 'd':
2401 			case 'u':
2402 			case 'o':
2403 			case 'x':
2404 			case 'X':
2405 				if (h == 0)
2406 					return 0;
2407 				/*FALLTHROUGH*/
2408 			default:
2409 				goto invalid;
2410 			}
2411 #endif
2412 		case 'c':
2413 			if (h == 2)
2414 				return 0;
2415 			goto invalid;
2416 		case 'i':
2417 		case 'd':
2418 		case 'u':
2419 		case 'o':
2420 		case 'x':
2421 		case 'X':
2422 #ifdef STRICT_FORMAT
2423 			if (h == 0)
2424 				return 0;
2425 			/*FALLTHROUGH*/
2426 #else
2427 			return 0;
2428 #endif
2429 		default:
2430 			goto invalid;
2431 		}
2432 
2433 	case FILE_FMT_FLOAT:
2434 	case FILE_FMT_DOUBLE:
2435 		if (*ptr == '-')
2436 			ptr++;
2437 		if (*ptr == '.')
2438 			ptr++;
2439 		CHECKLEN();
2440 		if (*ptr == '.')
2441 			ptr++;
2442 		CHECKLEN();
2443 		switch (*ptr++) {
2444 		case 'e':
2445 		case 'E':
2446 		case 'f':
2447 		case 'F':
2448 		case 'g':
2449 		case 'G':
2450 			return 0;
2451 
2452 		default:
2453 			goto invalid;
2454 		}
2455 
2456 
2457 	case FILE_FMT_STR:
2458 		if (*ptr == '-')
2459 			ptr++;
2460 		while (isdigit((unsigned char )*ptr))
2461 			ptr++;
2462 		if (*ptr == '.') {
2463 			ptr++;
2464 			while (isdigit((unsigned char )*ptr))
2465 				ptr++;
2466 		}
2467 
2468 		switch (*ptr++) {
2469 		case 's':
2470 			return 0;
2471 		default:
2472 			goto invalid;
2473 		}
2474 
2475 	default:
2476 		/* internal error */
2477 		abort();
2478 	}
2479 invalid:
2480 	*estr = "not valid";
2481 toolong:
2482 	*estr = "too long";
2483 	return -1;
2484 }
2485 
2486 /*
2487  * Check that the optional printf format in description matches
2488  * the type of the magic.
2489  */
2490 private int
check_format(struct magic_set * ms,struct magic * m)2491 check_format(struct magic_set *ms, struct magic *m)
2492 {
2493 	char *ptr;
2494 	const char *estr;
2495 
2496 	for (ptr = m->desc; *ptr; ptr++)
2497 		if (*ptr == '%')
2498 			break;
2499 	if (*ptr == '\0') {
2500 		/* No format string; ok */
2501 		return 1;
2502 	}
2503 
2504 	assert(file_nformats == file_nnames);
2505 
2506 	if (m->type >= file_nformats) {
2507 		file_magwarn(ms, "Internal error inconsistency between "
2508 		    "m->type and format strings");
2509 		return -1;
2510 	}
2511 	if (file_formats[m->type] == FILE_FMT_NONE) {
2512 		file_magwarn(ms, "No format string for `%s' with description "
2513 		    "`%s'", m->desc, file_names[m->type]);
2514 		return -1;
2515 	}
2516 
2517 	ptr++;
2518 	if (check_format_type(ptr, m->type, &estr) == -1) {
2519 		/*
2520 		 * TODO: this error message is unhelpful if the format
2521 		 * string is not one character long
2522 		 */
2523 		file_magwarn(ms, "Printf format is %s for type "
2524 		    "`%s' in description `%s'", estr,
2525 		    file_names[m->type], m->desc);
2526 		return -1;
2527 	}
2528 
2529 	for (; *ptr; ptr++) {
2530 		if (*ptr == '%') {
2531 			file_magwarn(ms,
2532 			    "Too many format strings (should have at most one) "
2533 			    "for `%s' with description `%s'",
2534 			    file_names[m->type], m->desc);
2535 			return -1;
2536 		}
2537 	}
2538 	return 0;
2539 }
2540 
2541 /*
2542  * Read a numeric value from a pointer, into the value union of a magic
2543  * pointer, according to the magic type.  Update the string pointer to point
2544  * just after the number read.  Return 0 for success, non-zero for failure.
2545  */
2546 private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2547 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2548 {
2549 	char *ep;
2550 	uint64_t ull;
2551 
2552 	switch (m->type) {
2553 	case FILE_BESTRING16:
2554 	case FILE_LESTRING16:
2555 	case FILE_STRING:
2556 	case FILE_PSTRING:
2557 	case FILE_REGEX:
2558 	case FILE_SEARCH:
2559 	case FILE_NAME:
2560 	case FILE_USE:
2561 	case FILE_DER:
2562 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2563 		if (*p == NULL) {
2564 			if (ms->flags & MAGIC_CHECK)
2565 				file_magwarn(ms, "cannot get string from `%s'",
2566 				    m->value.s);
2567 			return -1;
2568 		}
2569 		if (m->type == FILE_REGEX) {
2570 			zval pattern;
2571 			int options = 0;
2572 			pcre_cache_entry *pce;
2573 
2574 			convert_libmagic_pattern(&pattern, m->value.s, strlen(m->value.s), options);
2575 
2576 			if ((pce = pcre_get_compiled_regex_cache(Z_STR(pattern))) == NULL) {
2577 				zval_dtor(&pattern);
2578 				return -1;
2579 			}
2580 			zval_dtor(&pattern);
2581 
2582 			return 0;
2583 		}
2584 		return 0;
2585 	default:
2586 		if (m->reln == 'x')
2587 			return 0;
2588 		break;
2589 	}
2590 
2591 	switch (m->type) {
2592 	case FILE_FLOAT:
2593 	case FILE_BEFLOAT:
2594 	case FILE_LEFLOAT:
2595 		errno = 0;
2596 #ifdef HAVE_STRTOF
2597 		m->value.f = strtof(*p, &ep);
2598 #else
2599 		m->value.f = (float)strtod(*p, &ep);
2600 #endif
2601 		if (errno == 0)
2602 			*p = ep;
2603 		return 0;
2604 	case FILE_DOUBLE:
2605 	case FILE_BEDOUBLE:
2606 	case FILE_LEDOUBLE:
2607 		errno = 0;
2608 		m->value.d = strtod(*p, &ep);
2609 		if (errno == 0)
2610 			*p = ep;
2611 		return 0;
2612 	default:
2613 		errno = 0;
2614 		ull = (uint64_t)strtoull(*p, &ep, 0);
2615 		m->value.q = file_signextend(ms, m, ull);
2616 		if (*p == ep) {
2617 			file_magwarn(ms, "Unparseable number `%s'", *p);
2618 		} else {
2619 			size_t ts = typesize(m->type);
2620 			uint64_t x;
2621 			const char *q;
2622 
2623 			if (ts == (size_t)~0) {
2624 				file_magwarn(ms,
2625 				    "Expected numeric type got `%s'",
2626 				    type_tbl[m->type].name);
2627 			}
2628 			for (q = *p; isspace((unsigned char)*q); q++)
2629 				continue;
2630 			if (*q == '-')
2631 				ull = -(int64_t)ull;
2632 			switch (ts) {
2633 			case 1:
2634 				x = (uint64_t)(ull & ~0xffULL);
2635 				break;
2636 			case 2:
2637 				x = (uint64_t)(ull & ~0xffffULL);
2638 				break;
2639 			case 4:
2640 				x = (uint64_t)(ull & ~0xffffffffULL);
2641 				break;
2642 			case 8:
2643 				x = 0;
2644 				break;
2645 			default:
2646 				abort();
2647 			}
2648 			if (x) {
2649 				file_magwarn(ms, "Overflow for numeric"
2650 				    " type `%s' value %#" PRIx64,
2651 				    type_tbl[m->type].name, ull);
2652 			}
2653 		}
2654 		if (errno == 0) {
2655 			*p = ep;
2656 			eatsize(p);
2657 		}
2658 		return 0;
2659 	}
2660 }
2661 
2662 /*
2663  * Convert a string containing C character escapes.  Stop at an unescaped
2664  * space or tab.
2665  * Copy the converted version to "m->value.s", and the length in m->vallen.
2666  * Return updated scan pointer as function result. Warn if set.
2667  */
2668 private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2669 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2670 {
2671 	const char *origs = s;
2672 	char	*p = m->value.s;
2673 	size_t  plen = sizeof(m->value.s);
2674 	char 	*origp = p;
2675 	char	*pmax = p + plen - 1;
2676 	int	c;
2677 	int	val;
2678 
2679 	while ((c = *s++) != '\0') {
2680 		if (isspace((unsigned char) c))
2681 			break;
2682 		if (p >= pmax) {
2683 			file_error(ms, 0, "string too long: `%s'", origs);
2684 			return NULL;
2685 		}
2686 		if (c == '\\') {
2687 			switch(c = *s++) {
2688 
2689 			case '\0':
2690 				if (warn)
2691 					file_magwarn(ms, "incomplete escape");
2692 				s--;
2693 				goto out;
2694 
2695 			case '\t':
2696 				if (warn) {
2697 					file_magwarn(ms,
2698 					    "escaped tab found, use \\t instead");
2699 					warn = 0;	/* already did */
2700 				}
2701 				/*FALLTHROUGH*/
2702 			default:
2703 				if (warn) {
2704 					if (isprint((unsigned char)c)) {
2705 						/* Allow escaping of
2706 						 * ``relations'' */
2707 						if (strchr("<>&^=!", c) == NULL
2708 						    && (m->type != FILE_REGEX ||
2709 						    strchr("[]().*?^$|{}", c)
2710 						    == NULL)) {
2711 							file_magwarn(ms, "no "
2712 							    "need to escape "
2713 							    "`%c'", c);
2714 						}
2715 					} else {
2716 						file_magwarn(ms,
2717 						    "unknown escape sequence: "
2718 						    "\\%03o", c);
2719 					}
2720 				}
2721 				/*FALLTHROUGH*/
2722 			/* space, perhaps force people to use \040? */
2723 			case ' ':
2724 #if 0
2725 			/*
2726 			 * Other things people escape, but shouldn't need to,
2727 			 * so we disallow them
2728 			 */
2729 			case '\'':
2730 			case '"':
2731 			case '?':
2732 #endif
2733 			/* Relations */
2734 			case '>':
2735 			case '<':
2736 			case '&':
2737 			case '^':
2738 			case '=':
2739 			case '!':
2740 			/* and baskslash itself */
2741 			case '\\':
2742 				*p++ = (char) c;
2743 				break;
2744 
2745 			case 'a':
2746 				*p++ = '\a';
2747 				break;
2748 
2749 			case 'b':
2750 				*p++ = '\b';
2751 				break;
2752 
2753 			case 'f':
2754 				*p++ = '\f';
2755 				break;
2756 
2757 			case 'n':
2758 				*p++ = '\n';
2759 				break;
2760 
2761 			case 'r':
2762 				*p++ = '\r';
2763 				break;
2764 
2765 			case 't':
2766 				*p++ = '\t';
2767 				break;
2768 
2769 			case 'v':
2770 				*p++ = '\v';
2771 				break;
2772 
2773 			/* \ and up to 3 octal digits */
2774 			case '0':
2775 			case '1':
2776 			case '2':
2777 			case '3':
2778 			case '4':
2779 			case '5':
2780 			case '6':
2781 			case '7':
2782 				val = c - '0';
2783 				c = *s++;  /* try for 2 */
2784 				if (c >= '0' && c <= '7') {
2785 					val = (val << 3) | (c - '0');
2786 					c = *s++;  /* try for 3 */
2787 					if (c >= '0' && c <= '7')
2788 						val = (val << 3) | (c-'0');
2789 					else
2790 						--s;
2791 				}
2792 				else
2793 					--s;
2794 				*p++ = (char)val;
2795 				break;
2796 
2797 			/* \x and up to 2 hex digits */
2798 			case 'x':
2799 				val = 'x';	/* Default if no digits */
2800 				c = hextoint(*s++);	/* Get next char */
2801 				if (c >= 0) {
2802 					val = c;
2803 					c = hextoint(*s++);
2804 					if (c >= 0)
2805 						val = (val << 4) + c;
2806 					else
2807 						--s;
2808 				} else
2809 					--s;
2810 				*p++ = (char)val;
2811 				break;
2812 			}
2813 		} else
2814 			*p++ = (char)c;
2815 	}
2816 	--s;
2817 out:
2818 	*p = '\0';
2819 	m->vallen = CAST(unsigned char, (p - origp));
2820 	if (m->type == FILE_PSTRING)
2821 		m->vallen += (unsigned char)file_pstring_length_size(m);
2822 	return s;
2823 }
2824 
2825 
2826 /* Single hex char to int; -1 if not a hex char. */
2827 private int
hextoint(int c)2828 hextoint(int c)
2829 {
2830 	if (!isascii((unsigned char) c))
2831 		return -1;
2832 	if (isdigit((unsigned char) c))
2833 		return c - '0';
2834 	if ((c >= 'a') && (c <= 'f'))
2835 		return c + 10 - 'a';
2836 	if (( c>= 'A') && (c <= 'F'))
2837 		return c + 10 - 'A';
2838 	return -1;
2839 }
2840 
2841 
2842 /*
2843  * Print a string containing C character escapes.
2844  */
2845 protected void
file_showstr(FILE * fp,const char * s,size_t len)2846 file_showstr(FILE *fp, const char *s, size_t len)
2847 {
2848 	char	c;
2849 
2850 	for (;;) {
2851 		if (len == ~0U) {
2852 			c = *s++;
2853 			if (c == '\0')
2854 				break;
2855 		}
2856 		else  {
2857 			if (len-- == 0)
2858 				break;
2859 			c = *s++;
2860 		}
2861 		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
2862 			(void) fputc(c, fp);
2863 		else {
2864 			(void) fputc('\\', fp);
2865 			switch (c) {
2866 			case '\a':
2867 				(void) fputc('a', fp);
2868 				break;
2869 
2870 			case '\b':
2871 				(void) fputc('b', fp);
2872 				break;
2873 
2874 			case '\f':
2875 				(void) fputc('f', fp);
2876 				break;
2877 
2878 			case '\n':
2879 				(void) fputc('n', fp);
2880 				break;
2881 
2882 			case '\r':
2883 				(void) fputc('r', fp);
2884 				break;
2885 
2886 			case '\t':
2887 				(void) fputc('t', fp);
2888 				break;
2889 
2890 			case '\v':
2891 				(void) fputc('v', fp);
2892 				break;
2893 
2894 			default:
2895 				(void) fprintf(fp, "%.3o", c & 0377);
2896 				break;
2897 			}
2898 		}
2899 	}
2900 }
2901 
2902 /*
2903  * eatsize(): Eat the size spec from a number [eg. 10UL]
2904  */
2905 private void
eatsize(const char ** p)2906 eatsize(const char **p)
2907 {
2908 	const char *l = *p;
2909 
2910 	if (LOWCASE(*l) == 'u')
2911 		l++;
2912 
2913 	switch (LOWCASE(*l)) {
2914 	case 'l':    /* long */
2915 	case 's':    /* short */
2916 	case 'h':    /* short */
2917 	case 'b':    /* char/byte */
2918 	case 'c':    /* char/byte */
2919 		l++;
2920 		/*FALLTHROUGH*/
2921 	default:
2922 		break;
2923 	}
2924 
2925 	*p = l;
2926 }
2927 
2928 /*
2929  * handle a buffer containing a compiled file.
2930  */
2931 private struct magic_map *
apprentice_buf(struct magic_set * ms,struct magic * buf,size_t len)2932 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
2933 {
2934 	struct magic_map *map;
2935 
2936 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2937 		file_oomem(ms, sizeof(*map));
2938 		return NULL;
2939 	}
2940 	map->len = len;
2941 	map->p = buf;
2942 	map->type = MAP_TYPE_USER;
2943 	if (check_buffer(ms, map, "buffer") != 0) {
2944 		apprentice_unmap(map);
2945 		return NULL;
2946 	}
2947 	return map;
2948 }
2949 
2950 /*
2951  * handle a compiled file.
2952  */
2953 
2954 private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)2955 apprentice_map(struct magic_set *ms, const char *fn)
2956 {
2957 	uint32_t *ptr;
2958 	uint32_t version, entries, nentries;
2959 	int needsbyteswap;
2960 	char *dbname = NULL;
2961 	struct magic_map *map;
2962 	size_t i;
2963 	php_stream *stream = NULL;
2964 	php_stream_statbuf st;
2965 
2966 
2967 
2968 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2969 		file_oomem(ms, sizeof(*map));
2970 		return NULL;
2971 	}
2972 
2973 	if (fn == NULL) {
2974 		map->p = (void *)&php_magic_database;
2975 		goto internal_loaded;
2976 	}
2977 
2978 #ifdef PHP_WIN32
2979 	/* Don't bother on windows with php_stream_open_wrapper,
2980 	return to give apprentice_load() a chance. */
2981 	if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2982                if (st.sb.st_mode & S_IFDIR) {
2983                        return NULL;
2984                }
2985        }
2986 #endif
2987 
2988 	dbname = mkdbname(ms, fn, 0);
2989 	if (dbname == NULL)
2990 		goto error;
2991 
2992 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2993 
2994 	if (!stream) {
2995 		goto error;
2996 	}
2997 
2998 	if (php_stream_stat(stream, &st) < 0) {
2999 		file_error(ms, errno, "cannot stat `%s'", dbname);
3000 		goto error;
3001 	}
3002 	if (st.sb.st_size < 8 || st.sb.st_size > MAXMAGIC_SIZE) {
3003 		file_error(ms, 0, "file `%s' is too %s", dbname,
3004 		    st.sb.st_size < 8 ? "small" : "large");
3005 		goto error;
3006 	}
3007 
3008 	map->len = (size_t)st.sb.st_size;
3009 	if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
3010 		file_oomem(ms, map->len);
3011 		goto error;
3012 	}
3013 	if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
3014 		file_badread(ms);
3015 		goto error;
3016 	}
3017 	map->len = 0;
3018 #define RET	1
3019 
3020 	php_stream_close(stream);
3021 	stream = NULL;
3022 
3023 internal_loaded:
3024 	ptr = (uint32_t *)(void *)map->p;
3025 	if (*ptr != MAGICNO) {
3026 		if (swap4(*ptr) != MAGICNO) {
3027 			file_error(ms, 0, "bad magic in `%s'", dbname);
3028 			goto error;
3029 		}
3030 		needsbyteswap = 1;
3031 	} else
3032 		needsbyteswap = 0;
3033 	if (needsbyteswap)
3034 		version = swap4(ptr[1]);
3035 	else
3036 		version = ptr[1];
3037 	if (version != VERSIONNO) {
3038 		file_error(ms, 0, "File %d.%d supports only version %d magic "
3039 		    "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
3040 		    VERSIONNO, dbname, version);
3041 		goto error;
3042 	}
3043 
3044 	/* php_magic_database is a const, performing writes will segfault. This is for big-endian
3045 	machines only, PPC and Sparc specifically. Consider static variable or MINIT in
3046 	future. */
3047 	if (needsbyteswap && fn == NULL) {
3048 		map->p = emalloc(sizeof(php_magic_database));
3049 		map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
3050 	}
3051 
3052 	if (NULL != fn) {
3053 		nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
3054 		entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
3055 		if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
3056 			file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
3057 				dbname, (unsigned long long)st.sb.st_size,
3058 				sizeof(struct magic));
3059 			goto error;
3060 		}
3061 	}
3062 	map->magic[0] = CAST(struct magic *, map->p) + 1;
3063 	nentries = 0;
3064 	for (i = 0; i < MAGIC_SETS; i++) {
3065 		if (needsbyteswap)
3066 			map->nmagic[i] = swap4(ptr[i + 2]);
3067 		else
3068 			map->nmagic[i] = ptr[i + 2];
3069 		if (i != MAGIC_SETS - 1)
3070 			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3071 		nentries += map->nmagic[i];
3072 	}
3073 	if (NULL != fn && entries != nentries + 1) {
3074 		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3075 		    dbname, entries, nentries + 1);
3076 		goto error;
3077 	}
3078 	if (needsbyteswap)
3079 		for (i = 0; i < MAGIC_SETS; i++)
3080 			byteswap(map->magic[i], map->nmagic[i]);
3081 
3082 	if (dbname) {
3083 		efree(dbname);
3084 	}
3085 	return map;
3086 
3087 error:
3088 	if (stream) {
3089 		php_stream_close(stream);
3090 	}
3091 	apprentice_unmap(map);
3092 	if (dbname) {
3093 		efree(dbname);
3094 	}
3095 	return NULL;
3096 }
3097 
3098 private int
check_buffer(struct magic_set * ms,struct magic_map * map,const char * dbname)3099 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
3100 {
3101 	uint32_t *ptr;
3102 	uint32_t entries, nentries;
3103 	uint32_t version;
3104 	int i, needsbyteswap;
3105 
3106 	ptr = CAST(uint32_t *, map->p);
3107 	if (*ptr != MAGICNO) {
3108 		if (swap4(*ptr) != MAGICNO) {
3109 			file_error(ms, 0, "bad magic in `%s'", dbname);
3110 			return -1;
3111 		}
3112 		needsbyteswap = 1;
3113 	} else
3114 		needsbyteswap = 0;
3115 	if (needsbyteswap)
3116 		version = swap4(ptr[1]);
3117 	else
3118 		version = ptr[1];
3119 	if (version != VERSIONNO) {
3120 		file_error(ms, 0, "File %s supports only version %d magic "
3121 		    "files. `%s' is version %d", FILE_VERSION_MAJOR,
3122 		    VERSIONNO, dbname, version);
3123 		return -1;
3124 	}
3125 	entries = (uint32_t)(map->len / sizeof(struct magic));
3126 	if ((entries * sizeof(struct magic)) != map->len) {
3127 		file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
3128 		    "a multiple of %" SIZE_T_FORMAT "u",
3129 		    dbname, map->len, sizeof(struct magic));
3130 		return -1;
3131 	}
3132 	map->magic[0] = CAST(struct magic *, map->p) + 1;
3133 	nentries = 0;
3134 	for (i = 0; i < MAGIC_SETS; i++) {
3135 		if (needsbyteswap)
3136 			map->nmagic[i] = swap4(ptr[i + 2]);
3137 		else
3138 			map->nmagic[i] = ptr[i + 2];
3139 		if (i != MAGIC_SETS - 1)
3140 			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3141 		nentries += map->nmagic[i];
3142 	}
3143 	if (entries != nentries + 1) {
3144 		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3145 		    dbname, entries, nentries + 1);
3146 		return -1;
3147 	}
3148 	if (needsbyteswap)
3149 		for (i = 0; i < MAGIC_SETS; i++)
3150 			byteswap(map->magic[i], map->nmagic[i]);
3151 	return 0;
3152 }
3153 
3154 /*
3155  * handle an mmaped file.
3156  */
3157 private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)3158 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3159 {
3160 	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3161 	static const size_t m = sizeof(**map->magic);
3162 	size_t len;
3163 	char *dbname;
3164 	int rv = -1;
3165 	uint32_t i;
3166 	union {
3167 		struct magic m;
3168 		uint32_t h[2 + MAGIC_SETS];
3169 	} hdr;
3170 	php_stream *stream;
3171 
3172 	dbname = mkdbname(ms, fn, 1);
3173 
3174 	if (dbname == NULL)
3175 		goto out;
3176 
3177 	/* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
3178 	stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
3179 
3180 	if (!stream) {
3181 		file_error(ms, errno, "cannot open `%s'", dbname);
3182 		goto out;
3183 	}
3184 	memset(&hdr, 0, sizeof(hdr));
3185 	hdr.h[0] = MAGICNO;
3186 	hdr.h[1] = VERSIONNO;
3187 	memcpy(hdr.h + 2, map->nmagic, nm);
3188 
3189 	if (php_stream_write(stream,(const char *)&hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
3190 		file_error(ms, errno, "error writing `%s'", dbname);
3191 		goto out;
3192 	}
3193 
3194 	for (i = 0; i < MAGIC_SETS; i++) {
3195 		len = m * map->nmagic[i];
3196 		if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
3197 			file_error(ms, errno, "error writing `%s'", dbname);
3198 			goto out;
3199 		}
3200 	}
3201 
3202 	if (stream) {
3203 		php_stream_close(stream);
3204 	}
3205 	rv = 0;
3206 out:
3207 	efree(dbname);
3208 	return rv;
3209 }
3210 
3211 private const char ext[] = ".mgc";
3212 /*
3213  * make a dbname
3214  */
3215 private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)3216 mkdbname(struct magic_set *ms, const char *fn, int strip)
3217 {
3218 	const char *p, *q;
3219 	char *buf;
3220 
3221 	if (strip) {
3222 		if ((p = strrchr(fn, '/')) != NULL)
3223 			fn = ++p;
3224 	}
3225 
3226 	for (q = fn; *q; q++)
3227 		continue;
3228 	/* Look for .mgc */
3229 	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3230 		if (*p != *q)
3231 			break;
3232 
3233 	/* Did not find .mgc, restore q */
3234 	if (p >= ext)
3235 		while (*q)
3236 			q++;
3237 
3238 	q++;
3239 	/* Compatibility with old code that looked in .mime */
3240 	if (ms->flags & MAGIC_MIME) {
3241 		spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
3242 #ifdef PHP_WIN32
3243 		if (VCWD_ACCESS(buf, R_OK) == 0) {
3244 #else
3245 		if (VCWD_ACCESS(buf, R_OK) != -1) {
3246 #endif
3247 			ms->flags &= MAGIC_MIME_TYPE;
3248 			return buf;
3249 		}
3250 		efree(buf);
3251 	}
3252 	spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
3253 
3254 	/* Compatibility with old code that looked in .mime */
3255 	if (strstr(fn, ".mime") != NULL)
3256 		ms->flags &= MAGIC_MIME_TYPE;
3257 	return buf;
3258 }
3259 
3260 /*
3261  * Byteswap an mmap'ed file if needed
3262  */
3263 private void
3264 byteswap(struct magic *magic, uint32_t nmagic)
3265 {
3266 	uint32_t i;
3267 	for (i = 0; i < nmagic; i++)
3268 		bs1(&magic[i]);
3269 }
3270 
3271 /*
3272  * swap a short
3273  */
3274 private uint16_t
3275 swap2(uint16_t sv)
3276 {
3277 	uint16_t rv;
3278 	uint8_t *s = (uint8_t *)(void *)&sv;
3279 	uint8_t *d = (uint8_t *)(void *)&rv;
3280 	d[0] = s[1];
3281 	d[1] = s[0];
3282 	return rv;
3283 }
3284 
3285 /*
3286  * swap an int
3287  */
3288 private uint32_t
3289 swap4(uint32_t sv)
3290 {
3291 	uint32_t rv;
3292 	uint8_t *s = (uint8_t *)(void *)&sv;
3293 	uint8_t *d = (uint8_t *)(void *)&rv;
3294 	d[0] = s[3];
3295 	d[1] = s[2];
3296 	d[2] = s[1];
3297 	d[3] = s[0];
3298 	return rv;
3299 }
3300 
3301 /*
3302  * swap a quad
3303  */
3304 private uint64_t
3305 swap8(uint64_t sv)
3306 {
3307 	uint64_t rv;
3308 	uint8_t *s = (uint8_t *)(void *)&sv;
3309 	uint8_t *d = (uint8_t *)(void *)&rv;
3310 #if 0
3311 	d[0] = s[3];
3312 	d[1] = s[2];
3313 	d[2] = s[1];
3314 	d[3] = s[0];
3315 	d[4] = s[7];
3316 	d[5] = s[6];
3317 	d[6] = s[5];
3318 	d[7] = s[4];
3319 #else
3320 	d[0] = s[7];
3321 	d[1] = s[6];
3322 	d[2] = s[5];
3323 	d[3] = s[4];
3324 	d[4] = s[3];
3325 	d[5] = s[2];
3326 	d[6] = s[1];
3327 	d[7] = s[0];
3328 #endif
3329 	return rv;
3330 }
3331 
3332 /*
3333  * byteswap a single magic entry
3334  */
3335 private void
3336 bs1(struct magic *m)
3337 {
3338 	m->cont_level = swap2(m->cont_level);
3339 	m->offset = swap4((int32_t)m->offset);
3340 	m->in_offset = swap4((uint32_t)m->in_offset);
3341 	m->lineno = swap4((uint32_t)m->lineno);
3342 	if (IS_LIBMAGIC_STRING(m->type)) {
3343 		m->str_range = swap4(m->str_range);
3344 		m->str_flags = swap4(m->str_flags);
3345 	}
3346 	else {
3347 		m->value.q = swap8(m->value.q);
3348 		m->num_mask = swap8(m->num_mask);
3349 	}
3350 }
3351 
3352 protected size_t
3353 file_pstring_length_size(const struct magic *m)
3354 {
3355 	switch (m->str_flags & PSTRING_LEN) {
3356 	case PSTRING_1_LE:
3357 		return 1;
3358 	case PSTRING_2_LE:
3359 	case PSTRING_2_BE:
3360 		return 2;
3361 	case PSTRING_4_LE:
3362 	case PSTRING_4_BE:
3363 		return 4;
3364 	default:
3365 		abort();	/* Impossible */
3366 		return 1;
3367 	}
3368 }
3369 protected size_t
3370 file_pstring_get_length(const struct magic *m, const char *ss)
3371 {
3372 	size_t len = 0;
3373 	const unsigned char *s = (const unsigned char *)ss;
3374 	unsigned int s3, s2, s1, s0;
3375 
3376 	switch (m->str_flags & PSTRING_LEN) {
3377 	case PSTRING_1_LE:
3378 		len = *s;
3379 		break;
3380 	case PSTRING_2_LE:
3381 		s0 = s[0];
3382 		s1 = s[1];
3383 		len = (s1 << 8) | s0;
3384 		break;
3385 	case PSTRING_2_BE:
3386 		s0 = s[0];
3387 		s1 = s[1];
3388 		len = (s0 << 8) | s1;
3389 		break;
3390 	case PSTRING_4_LE:
3391 		s0 = s[0];
3392 		s1 = s[1];
3393 		s2 = s[2];
3394 		s3 = s[3];
3395 		len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3396 		break;
3397 	case PSTRING_4_BE:
3398 		s0 = s[0];
3399 		s1 = s[1];
3400 		s2 = s[2];
3401 		s3 = s[3];
3402 		len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3403 		break;
3404 	default:
3405 		abort();	/* Impossible */
3406 	}
3407 
3408 	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
3409 		len -= file_pstring_length_size(m);
3410 
3411 	return len;
3412 }
3413 
3414 protected int
3415 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3416 {
3417 	uint32_t i, j;
3418 	struct mlist *mlist, *ml;
3419 
3420 	mlist = ms->mlist[1];
3421 
3422 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3423 		struct magic *ma = ml->magic;
3424 		uint32_t nma = ml->nmagic;
3425 		for (i = 0; i < nma; i++) {
3426 			if (ma[i].type != FILE_NAME)
3427 				continue;
3428 			if (strcmp(ma[i].value.s, name) == 0) {
3429 				v->magic = &ma[i];
3430 				for (j = i + 1; j < nma; j++)
3431 				    if (ma[j].cont_level == 0)
3432 					    break;
3433 				v->nmagic = j - i;
3434 				return 0;
3435 			}
3436 		}
3437 	}
3438 	return -1;
3439 }
3440