xref: /PHP-8.2/ext/fileinfo/libmagic/apprentice.c (revision 1b39d4c6)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * apprentice - make one pass through /etc/magic, learning its secrets.
30  */
31 
32 #include "file.h"
33 
34 #ifndef	lint
35 FILE_RCSID("@(#)$File: apprentice.c,v 1.326 2022/09/13 18:46:07 christos Exp $")
36 #endif	/* lint */
37 
38 #include "magic.h"
39 #include <stdlib.h>
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #include <stddef.h>
44 #include <string.h>
45 #include <assert.h>
46 #include <ctype.h>
47 #include <fcntl.h>
48 #ifdef QUICK
49 #include <sys/mman.h>
50 #endif
51 #ifdef HAVE_DIRENT_H
52 #include <dirent.h>
53 #endif
54 #include <limits.h>
55 #ifdef HAVE_BYTESWAP_H
56 #include <byteswap.h>
57 #endif
58 #ifdef HAVE_SYS_BSWAP_H
59 #include <sys/bswap.h>
60 #endif
61 
62 
63 #define	EATAB {while (isascii(CAST(unsigned char, *l)) && \
64 		      isspace(CAST(unsigned char, *l)))  ++l;}
65 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
66 			tolower(CAST(unsigned char, l)) : (l))
67 /*
68  * Work around a bug in headers on Digital Unix.
69  * At least confirmed for: OSF1 V4.0 878
70  */
71 #if defined(__osf__) && defined(__DECC)
72 #ifdef MAP_FAILED
73 #undef MAP_FAILED
74 #endif
75 #endif
76 
77 #ifndef MAP_FAILED
78 #define MAP_FAILED (void *) -1
79 #endif
80 
81 #ifndef MAP_FILE
82 #define MAP_FILE 0
83 #endif
84 
85 #define ALLOC_CHUNK	CAST(size_t, 10)
86 #define ALLOC_INCR	CAST(size_t, 200)
87 
88 #define MAP_TYPE_USER	0
89 #define MAP_TYPE_MALLOC	1
90 #define MAP_TYPE_MMAP	2
91 
92 struct magic_entry {
93 	struct magic *mp;
94 	uint32_t cont_count;
95 	uint32_t max_count;
96 };
97 
98 struct magic_entry_set {
99 	struct magic_entry *me;
100 	uint32_t count;
101 	uint32_t max;
102 };
103 
104 struct magic_map {
105 	void *p;
106 	size_t len;
107 	int type;
108 	struct magic *magic[MAGIC_SETS];
109 	uint32_t nmagic[MAGIC_SETS];
110 };
111 
112 int file_formats[FILE_NAMES_SIZE];
113 const size_t file_nformats = FILE_NAMES_SIZE;
114 const char *file_names[FILE_NAMES_SIZE];
115 const size_t file_nnames = FILE_NAMES_SIZE;
116 
117 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
118 private int hextoint(int);
119 private const char *getstr(struct magic_set *, struct magic *, const char *,
120     int);
121 private int parse(struct magic_set *, struct magic_entry *, const char *,
122     size_t, int);
123 private void eatsize(const char **);
124 private int apprentice_1(struct magic_set *, const char *, int);
125 private ssize_t apprentice_magic_strength_1(const struct magic *);
126 private size_t apprentice_magic_strength(const struct magic *, size_t);
127 private int apprentice_sort(const void *, const void *);
128 private void apprentice_list(struct mlist *, int );
129 private struct magic_map *apprentice_load(struct magic_set *,
130     const char *, int);
131 private struct mlist *mlist_alloc(void);
132 private void mlist_free_all(struct magic_set *);
133 private void mlist_free(struct mlist *);
134 private void byteswap(struct magic *, uint32_t);
135 private void bs1(struct magic *);
136 
137 #if defined(HAVE_BYTESWAP_H)
138 #define swap2(x)	bswap_16(x)
139 #define swap4(x)	bswap_32(x)
140 #define swap8(x)	bswap_64(x)
141 #elif defined(HAVE_SYS_BSWAP_H)
142 #define swap2(x)	bswap16(x)
143 #define swap4(x)	bswap32(x)
144 #define swap8(x)	bswap64(x)
145 #else
146 private uint16_t swap2(uint16_t);
147 private uint32_t swap4(uint32_t);
148 private uint64_t swap8(uint64_t);
149 #endif
150 
151 private char *mkdbname(struct magic_set *, const char *, int);
152 private struct magic_map *apprentice_map(struct magic_set *, const char *);
153 private void apprentice_unmap(struct magic_map *);
154 private int apprentice_compile(struct magic_set *, struct magic_map *,
155     const char *);
156 private int check_format_type(const char *, int, const char **);
157 private int check_format(struct magic_set *, struct magic *);
158 private int get_op(char);
159 private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
160     size_t);
161 private int parse_strength(struct magic_set *, struct magic_entry *,
162     const char *, size_t);
163 private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
164     size_t);
165 private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
166     size_t);
167 
168 
169 private size_t magicsize = sizeof(struct magic);
170 
171 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
172 
173 private struct {
174 	const char *name;
175 	size_t len;
176 	int (*fun)(struct magic_set *, struct magic_entry *, const char *,
177 	    size_t);
178 } bang[] = {
179 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
180 	DECLARE_FIELD(mime),
181 	DECLARE_FIELD(apple),
182 	DECLARE_FIELD(ext),
183 	DECLARE_FIELD(strength),
184 #undef	DECLARE_FIELD
185 	{ NULL, 0, NULL }
186 };
187 
188 #include "../data_file.c"
189 
190 #ifdef COMPILE_ONLY
191 
192 int main(int, char *[]);
193 
194 int
main(int argc,char * argv[])195 main(int argc, char *argv[])
196 {
197 	int ret;
198 	struct magic_set *ms;
199 	char *progname;
200 
201 	if ((progname = strrchr(argv[0], '/')) != NULL)
202 		progname++;
203 	else
204 		progname = argv[0];
205 
206 	if (argc != 2) {
207 		(void)fprintf(stderr, "Usage: %s file\n", progname);
208 		return 1;
209 	}
210 
211 	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
212 		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
213 		return 1;
214 	}
215 	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
216 	if (ret == 1)
217 		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
218 	magic_close(ms);
219 	return ret;
220 }
221 #endif /* COMPILE_ONLY */
222 
223 struct type_tbl_s {
224 	const char name[16];
225 	const size_t len;
226 	const int type;
227 	const int format;
228 };
229 
230 /*
231  * XXX - the actual Single UNIX Specification says that "long" means "long",
232  * as in the C data type, but we treat it as meaning "4-byte integer".
233  * Given that the OS X version of file 5.04 did the same, I guess that passes
234  * the actual test; having "long" be dependent on how big a "long" is on
235  * the machine running "file" is silly.
236  */
237 static const struct type_tbl_s type_tbl[] = {
238 # define XX(s)		s, (sizeof(s) - 1)
239 # define XX_NULL	"", 0
240 	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
241 	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
242 	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
243 	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_NONE },
244 	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
245 	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
246 	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
247 	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
248 	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
249 	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
250 	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
251 	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
252 	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
253 	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
254 	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
255 	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
256 	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
257 	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
258 	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
259 	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
260 	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
261 	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
262 	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
263 	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
264 	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
265 	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
266 	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
267 	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
268 	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
269 	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
270 	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
271 	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
272 	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
273 	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
274 	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
275 	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
276 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
277 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
278 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
279 	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
280 	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
281 	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
282 	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
283 	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
284 	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
285 	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
286 	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
287 	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
288 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
289 	{ XX("guid"),		FILE_GUID,		FILE_FMT_STR },
290 	{ XX("offset"),		FILE_OFFSET,		FILE_FMT_QUAD },
291 	{ XX("bevarint"),	FILE_BEVARINT,		FILE_FMT_STR },
292 	{ XX("levarint"),	FILE_LEVARINT,		FILE_FMT_STR },
293 	{ XX("msdosdate"),	FILE_MSDOSDATE,		FILE_FMT_STR },
294 	{ XX("lemsdosdate"),	FILE_LEMSDOSDATE,	FILE_FMT_STR },
295 	{ XX("bemsdosdate"),	FILE_BEMSDOSDATE,	FILE_FMT_STR },
296 	{ XX("msdostime"),	FILE_MSDOSTIME,		FILE_FMT_STR },
297 	{ XX("lemsdostime"),	FILE_LEMSDOSTIME,	FILE_FMT_STR },
298 	{ XX("bemsdostime"),	FILE_BEMSDOSTIME,	FILE_FMT_STR },
299 	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
300 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
301 };
302 
303 /*
304  * These are not types, and cannot be preceded by "u" to make them
305  * unsigned.
306  */
307 static const struct type_tbl_s special_tbl[] = {
308 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
309 	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
310 	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
311 	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
312 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
313 };
314 # undef XX
315 # undef XX_NULL
316 
317 private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)318 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
319 {
320 	const struct type_tbl_s *p;
321 
322 	for (p = tbl; p->len; p++) {
323 		if (strncmp(l, p->name, p->len) == 0) {
324 			if (t)
325 				*t = l + p->len;
326 			break;
327 		}
328 	}
329 	return p->type;
330 }
331 
332 private off_t
maxoff_t(void)333 maxoff_t(void) {
334 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
335 		return CAST(off_t, INT_MAX);
336 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
337 		return CAST(off_t, LONG_MAX);
338 	return 0x7fffffff;
339 }
340 
341 private int
get_standard_integer_type(const char * l,const char ** t)342 get_standard_integer_type(const char *l, const char **t)
343 {
344 	int type;
345 
346 	if (isalpha(CAST(unsigned char, l[1]))) {
347 		switch (l[1]) {
348 		case 'C':
349 			/* "dC" and "uC" */
350 			type = FILE_BYTE;
351 			break;
352 		case 'S':
353 			/* "dS" and "uS" */
354 			type = FILE_SHORT;
355 			break;
356 		case 'I':
357 		case 'L':
358 			/*
359 			 * "dI", "dL", "uI", and "uL".
360 			 *
361 			 * XXX - the actual Single UNIX Specification says
362 			 * that "L" means "long", as in the C data type,
363 			 * but we treat it as meaning "4-byte integer".
364 			 * Given that the OS X version of file 5.04 did
365 			 * the same, I guess that passes the actual SUS
366 			 * validation suite; having "dL" be dependent on
367 			 * how big a "long" is on the machine running
368 			 * "file" is silly.
369 			 */
370 			type = FILE_LONG;
371 			break;
372 		case 'Q':
373 			/* "dQ" and "uQ" */
374 			type = FILE_QUAD;
375 			break;
376 		default:
377 			/* "d{anything else}", "u{anything else}" */
378 			return FILE_INVALID;
379 		}
380 		l += 2;
381 	} else if (isdigit(CAST(unsigned char, l[1]))) {
382 		/*
383 		 * "d{num}" and "u{num}"; we only support {num} values
384 		 * of 1, 2, 4, and 8 - the Single UNIX Specification
385 		 * doesn't say anything about whether arbitrary
386 		 * values should be supported, but both the Solaris 10
387 		 * and OS X Mountain Lion versions of file passed the
388 		 * Single UNIX Specification validation suite, and
389 		 * neither of them support values bigger than 8 or
390 		 * non-power-of-2 values.
391 		 */
392 		if (isdigit(CAST(unsigned char, l[2]))) {
393 			/* Multi-digit, so > 9 */
394 			return FILE_INVALID;
395 		}
396 		switch (l[1]) {
397 		case '1':
398 			type = FILE_BYTE;
399 			break;
400 		case '2':
401 			type = FILE_SHORT;
402 			break;
403 		case '4':
404 			type = FILE_LONG;
405 			break;
406 		case '8':
407 			type = FILE_QUAD;
408 			break;
409 		default:
410 			/* XXX - what about 3, 5, 6, or 7? */
411 			return FILE_INVALID;
412 		}
413 		l += 2;
414 	} else {
415 		/*
416 		 * "d" or "u" by itself.
417 		 */
418 		type = FILE_LONG;
419 		++l;
420 	}
421 	if (t)
422 		*t = l;
423 	return type;
424 }
425 
426 private void
init_file_tables(void)427 init_file_tables(void)
428 {
429 	static int done = 0;
430 	const struct type_tbl_s *p;
431 
432 	if (done)
433 		return;
434 	done++;
435 
436 	for (p = type_tbl; p->len; p++) {
437 		assert(p->type < FILE_NAMES_SIZE);
438 		file_names[p->type] = p->name;
439 		file_formats[p->type] = p->format;
440 	}
441 	assert(p - type_tbl == FILE_NAMES_SIZE);
442 }
443 
444 private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)445 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
446 {
447 	struct mlist *ml;
448 
449 	mlp->map = NULL;
450 	if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
451 		return -1;
452 
453 	ml->map = idx == 0 ? map : NULL;
454 	ml->magic = map->magic[idx];
455 	ml->nmagic = map->nmagic[idx];
456 
457 	mlp->prev->next = ml;
458 	ml->prev = mlp->prev;
459 	ml->next = mlp;
460 	mlp->prev = ml;
461 	return 0;
462 }
463 
464 /*
465  * Handle one file or directory.
466  */
467 private int
apprentice_1(struct magic_set * ms,const char * fn,int action)468 apprentice_1(struct magic_set *ms, const char *fn, int action)
469 {
470 	struct magic_map *map;
471 #ifndef COMPILE_ONLY
472 	struct mlist *ml;
473 	size_t i;
474 #endif
475 
476 	if (magicsize != FILE_MAGICSIZE) {
477 		file_error(ms, 0, "magic element size %lu != %lu",
478 		    CAST(unsigned long, sizeof(*map->magic[0])),
479 		    CAST(unsigned long, FILE_MAGICSIZE));
480 		return -1;
481 	}
482 
483 	if (action == FILE_COMPILE) {
484 		map = apprentice_load(ms, fn, action);
485 		if (map == NULL)
486 			return -1;
487 		return apprentice_compile(ms, map, fn);
488 	}
489 
490 #ifndef COMPILE_ONLY
491 	map = apprentice_map(ms, fn);
492 	if (map == NULL) {
493 		if (ms->flags & MAGIC_CHECK)
494 			file_magwarn(ms, "using regular magic file `%s'", fn);
495 		map = apprentice_load(ms, fn, action);
496 		if (map == NULL)
497 			return -1;
498 	}
499 
500 	for (i = 0; i < MAGIC_SETS; i++) {
501 		if (add_mlist(ms->mlist[i], map, i) == -1) {
502 			/* failed to add to any list, free explicitly */
503 			if (i == 0)
504 				apprentice_unmap(map);
505 			else
506 				mlist_free_all(ms);
507 			file_oomem(ms, sizeof(*ml));
508 			return -1;
509 		}
510 	}
511 
512 	if (action == FILE_LIST) {
513 		for (i = 0; i < MAGIC_SETS; i++) {
514 			printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
515 			    i);
516 			apprentice_list(ms->mlist[i], BINTEST);
517 			printf("Text patterns:\n");
518 			apprentice_list(ms->mlist[i], TEXTTEST);
519 		}
520 	}
521 	return 0;
522 #else
523 	return 0;
524 #endif /* COMPILE_ONLY */
525 }
526 
527 protected void
file_ms_free(struct magic_set * ms)528 file_ms_free(struct magic_set *ms)
529 {
530 	size_t i;
531 	if (ms == NULL)
532 		return;
533 	for (i = 0; i < MAGIC_SETS; i++)
534 		mlist_free(ms->mlist[i]);
535 	if (ms->o.pbuf) {
536 		efree(ms->o.pbuf);
537 	}
538 	if (ms->o.buf) {
539 		efree(ms->o.buf);
540 	}
541 	if (ms->c.li) {
542 		efree(ms->c.li);
543 	}
544 #ifdef USE_C_LOCALE
545 	freelocale(ms->c_lc_ctype);
546 #endif
547 	efree(ms);
548 }
549 
550 protected struct magic_set *
file_ms_alloc(int flags)551 file_ms_alloc(int flags)
552 {
553 	struct magic_set *ms;
554 	size_t i, len;
555 
556 	if ((ms = CAST(struct magic_set *, ecalloc(CAST(size_t, 1u),
557 	    sizeof(struct magic_set)))) == NULL)
558 		return NULL;
559 
560 	if (magic_setflags(ms, flags) == -1) {
561 		errno = EINVAL;
562 		goto free;
563 	}
564 
565 	ms->o.buf = ms->o.pbuf = NULL;
566 	ms->o.blen = 0;
567 	len = (ms->c.len = 10) * sizeof(*ms->c.li);
568 
569 	if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
570 		goto free;
571 
572 	ms->event_flags = 0;
573 	ms->error = -1;
574 	for (i = 0; i < MAGIC_SETS; i++)
575 		ms->mlist[i] = NULL;
576 	ms->file = "unknown";
577 	ms->line = 0;
578 	ms->indir_max = FILE_INDIR_MAX;
579 	ms->name_max = FILE_NAME_MAX;
580 	ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
581 	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
582 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
583 	ms->regex_max = FILE_REGEX_MAX;
584 	ms->bytes_max = FILE_BYTES_MAX;
585 	ms->encoding_max = FILE_ENCODING_MAX;
586 #ifdef USE_C_LOCALE
587 	ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
588 	assert(ms->c_lc_ctype != NULL);
589 #endif
590 	return ms;
591 free:
592 	free(ms);
593 	return NULL;
594 }
595 
596 private void
apprentice_unmap(struct magic_map * map)597 apprentice_unmap(struct magic_map *map)
598 {
599 	if (map == NULL)
600 		return;
601 	if (map->p != php_magic_database) {
602 		if (map->p == NULL) {
603 			int j;
604 			for (j = 0; j < MAGIC_SETS; j++) {
605 				if (map->magic[j]) {
606 					efree(map->magic[j]);
607 				}
608 			}
609 		} else {
610 			efree(map->p);
611 		}
612 	}
613 	efree(map);
614 }
615 
616 private struct mlist *
mlist_alloc(void)617 mlist_alloc(void)
618 {
619 	struct mlist *mlist;
620 	if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
621 		return NULL;
622 	}
623 	mlist->next = mlist->prev = mlist;
624 	return mlist;
625 }
626 
627 private void
mlist_free_all(struct magic_set * ms)628 mlist_free_all(struct magic_set *ms)
629 {
630 	size_t i;
631 
632 	for (i = 0; i < MAGIC_SETS; i++) {
633 		mlist_free(ms->mlist[i]);
634 		ms->mlist[i] = NULL;
635 	}
636 }
637 
638 private void
mlist_free_one(struct mlist * ml)639 mlist_free_one(struct mlist *ml)
640 {
641 	if (ml->map)
642 		apprentice_unmap(CAST(struct magic_map *, ml->map));
643 	efree(ml);
644 }
645 
646 private void
mlist_free(struct mlist * mlist)647 mlist_free(struct mlist *mlist)
648 {
649 	struct mlist *ml, *next;
650 
651 	if (mlist == NULL)
652 		return;
653 
654 	for (ml = mlist->next; ml != mlist;) {
655 		next = ml->next;
656 		mlist_free_one(ml);
657 		ml = next;
658 	}
659 	mlist_free_one(mlist);
660 }
661 
662 /* const char *fn: list of magic files and directories */
663 protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)664 file_apprentice(struct magic_set *ms, const char *fn, int action)
665 {
666 	char *p, *mfn;
667 	int fileerr, errs = -1;
668 	size_t i, j;
669 
670 	(void)file_reset(ms, 0);
671 
672 /* XXX disabling default magic loading so the compiled in data is used */
673 #if 0
674 	if ((fn = magic_getpath(fn, action)) == NULL)
675 		return -1;
676 #endif
677 
678 	init_file_tables();
679 
680 	if (fn == NULL)
681 		fn = getenv("MAGIC");
682 	if (fn == NULL) {
683 		for (i = 0; i < MAGIC_SETS; i++) {
684 			mlist_free(ms->mlist[i]);
685 			if ((ms->mlist[i] = mlist_alloc()) == NULL) {
686 				file_oomem(ms, sizeof(*ms->mlist[i]));
687 				return -1;
688 			}
689 		}
690 		return apprentice_1(ms, fn, action);
691 	}
692 
693 	if ((mfn = estrdup(fn)) == NULL) {
694 		file_oomem(ms, strlen(fn));
695 		return -1;
696 	}
697 
698 	for (i = 0; i < MAGIC_SETS; i++) {
699 		mlist_free(ms->mlist[i]);
700 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
701 			file_oomem(ms, sizeof(*ms->mlist[i]));
702 			for (j = 0; j < i; j++) {
703 				mlist_free(ms->mlist[j]);
704 				ms->mlist[j] = NULL;
705 			}
706 			efree(mfn);
707 			return -1;
708 		}
709 	}
710 	fn = mfn;
711 
712 	while (fn) {
713 		p = CCAST(char *, strchr(fn, PATHSEP));
714 		if (p)
715 			*p++ = '\0';
716 		if (*fn == '\0')
717 			break;
718 		fileerr = apprentice_1(ms, fn, action);
719 		errs = MAX(errs, fileerr);
720 		fn = p;
721 	}
722 
723 	efree(mfn);
724 
725 	if (errs == -1) {
726 		for (i = 0; i < MAGIC_SETS; i++) {
727 			mlist_free(ms->mlist[i]);
728 			ms->mlist[i] = NULL;
729 		}
730 		file_error(ms, 0, "could not find any valid magic files!");
731 		return -1;
732 	}
733 
734 #if 0
735 	/*
736 	 * Always leave the database loaded
737 	 */
738 	if (action == FILE_LOAD)
739 		return 0;
740 
741 	for (i = 0; i < MAGIC_SETS; i++) {
742 		mlist_free(ms->mlist[i]);
743 		ms->mlist[i] = NULL;
744 	}
745 #endif
746 
747 	switch (action) {
748 	case FILE_LOAD:
749 	case FILE_COMPILE:
750 	case FILE_CHECK:
751 	case FILE_LIST:
752 		return 0;
753 	default:
754 		file_error(ms, 0, "Invalid action %d", action);
755 		return -1;
756 	}
757 }
758 
759 /*
760  * Compute the real length of a magic expression, for the purposes
761  * of determining how "strong" a magic expression is (approximating
762  * how specific its matches are):
763  *	- magic characters count 0 unless escaped.
764  *	- [] expressions count 1
765  *	- {} expressions count 0
766  *	- regular characters or escaped magic characters count 1
767  *	- 0 length expressions count as one
768  */
769 private size_t
nonmagic(const char * str)770 nonmagic(const char *str)
771 {
772 	const char *p;
773 	size_t rv = 0;
774 
775 	for (p = str; *p; p++)
776 		switch (*p) {
777 		case '\\':	/* Escaped anything counts 1 */
778 			if (!*++p)
779 				p--;
780 			rv++;
781 			continue;
782 		case '?':	/* Magic characters count 0 */
783 		case '*':
784 		case '.':
785 		case '+':
786 		case '^':
787 		case '$':
788 			continue;
789 		case '[':	/* Bracketed expressions count 1 the ']' */
790 			while (*p && *p != ']')
791 				p++;
792 			p--;
793 			continue;
794 		case '{':	/* Braced expressions count 0 */
795 			while (*p && *p != '}')
796 				p++;
797 			if (!*p)
798 				p--;
799 			continue;
800 		default:	/* Anything else counts 1 */
801 			rv++;
802 			continue;
803 		}
804 
805 	return rv == 0 ? 1 : rv;	/* Return at least 1 */
806 }
807 
808 
809 private size_t
typesize(int type)810 typesize(int type)
811 {
812 	switch (type) {
813 	case FILE_BYTE:
814 		return 1;
815 
816 	case FILE_SHORT:
817 	case FILE_LESHORT:
818 	case FILE_BESHORT:
819 	case FILE_MSDOSDATE:
820 	case FILE_BEMSDOSDATE:
821 	case FILE_LEMSDOSDATE:
822 	case FILE_MSDOSTIME:
823 	case FILE_BEMSDOSTIME:
824 	case FILE_LEMSDOSTIME:
825 		return 2;
826 
827 	case FILE_LONG:
828 	case FILE_LELONG:
829 	case FILE_BELONG:
830 	case FILE_MELONG:
831 		return 4;
832 
833 	case FILE_DATE:
834 	case FILE_LEDATE:
835 	case FILE_BEDATE:
836 	case FILE_MEDATE:
837 	case FILE_LDATE:
838 	case FILE_LELDATE:
839 	case FILE_BELDATE:
840 	case FILE_MELDATE:
841 	case FILE_FLOAT:
842 	case FILE_BEFLOAT:
843 	case FILE_LEFLOAT:
844 	case FILE_BEID3:
845 	case FILE_LEID3:
846 		return 4;
847 
848 	case FILE_QUAD:
849 	case FILE_BEQUAD:
850 	case FILE_LEQUAD:
851 	case FILE_QDATE:
852 	case FILE_LEQDATE:
853 	case FILE_BEQDATE:
854 	case FILE_QLDATE:
855 	case FILE_LEQLDATE:
856 	case FILE_BEQLDATE:
857 	case FILE_QWDATE:
858 	case FILE_LEQWDATE:
859 	case FILE_BEQWDATE:
860 	case FILE_DOUBLE:
861 	case FILE_BEDOUBLE:
862 	case FILE_LEDOUBLE:
863 	case FILE_OFFSET:
864 	case FILE_BEVARINT:
865 	case FILE_LEVARINT:
866 		return 8;
867 
868 	case FILE_GUID:
869 		return 16;
870 
871 	default:
872 		return FILE_BADSIZE;
873 	}
874 }
875 
876 /*
877  * Get weight of this magic entry, for sorting purposes.
878  */
879 private ssize_t
apprentice_magic_strength_1(const struct magic * m)880 apprentice_magic_strength_1(const struct magic *m)
881 {
882 #define MULT 10U
883 	size_t ts, v;
884 	ssize_t val = 2 * MULT;	/* baseline strength */
885 
886 	switch (m->type) {
887 	case FILE_DEFAULT:	/* make sure this sorts last */
888 		if (m->factor_op != FILE_FACTOR_OP_NONE) {
889 			fprintf(stderr, "Bad factor_op %d", m->factor_op);
890 			abort();
891 		}
892 		return 0;
893 
894 	case FILE_BYTE:
895 	case FILE_SHORT:
896 	case FILE_LESHORT:
897 	case FILE_BESHORT:
898 	case FILE_LONG:
899 	case FILE_LELONG:
900 	case FILE_BELONG:
901 	case FILE_MELONG:
902 	case FILE_DATE:
903 	case FILE_LEDATE:
904 	case FILE_BEDATE:
905 	case FILE_MEDATE:
906 	case FILE_LDATE:
907 	case FILE_LELDATE:
908 	case FILE_BELDATE:
909 	case FILE_MELDATE:
910 	case FILE_FLOAT:
911 	case FILE_BEFLOAT:
912 	case FILE_LEFLOAT:
913 	case FILE_QUAD:
914 	case FILE_BEQUAD:
915 	case FILE_LEQUAD:
916 	case FILE_QDATE:
917 	case FILE_LEQDATE:
918 	case FILE_BEQDATE:
919 	case FILE_QLDATE:
920 	case FILE_LEQLDATE:
921 	case FILE_BEQLDATE:
922 	case FILE_QWDATE:
923 	case FILE_LEQWDATE:
924 	case FILE_BEQWDATE:
925 	case FILE_DOUBLE:
926 	case FILE_BEDOUBLE:
927 	case FILE_LEDOUBLE:
928 	case FILE_BEVARINT:
929 	case FILE_LEVARINT:
930 	case FILE_GUID:
931 	case FILE_BEID3:
932 	case FILE_LEID3:
933 	case FILE_OFFSET:
934 	case FILE_MSDOSDATE:
935 	case FILE_BEMSDOSDATE:
936 	case FILE_LEMSDOSDATE:
937 	case FILE_MSDOSTIME:
938 	case FILE_BEMSDOSTIME:
939 	case FILE_LEMSDOSTIME:
940 		ts = typesize(m->type);
941 		if (ts == FILE_BADSIZE) {
942 			(void)fprintf(stderr, "Bad size for type %d\n",
943 			    m->type);
944 			abort();
945 		}
946 		val += ts * MULT;
947 		break;
948 
949 	case FILE_PSTRING:
950 	case FILE_STRING:
951 	case FILE_OCTAL:
952 		val += m->vallen * MULT;
953 		break;
954 
955 	case FILE_BESTRING16:
956 	case FILE_LESTRING16:
957 		val += m->vallen * MULT / 2;
958 		break;
959 
960 	case FILE_SEARCH:
961 		if (m->vallen == 0)
962 			break;
963 		val += m->vallen * MAX(MULT / m->vallen, 1);
964 		break;
965 
966 	case FILE_REGEX:
967 		v = nonmagic(m->value.s);
968 		val += v * MAX(MULT / v, 1);
969 		break;
970 
971 	case FILE_INDIRECT:
972 	case FILE_NAME:
973 	case FILE_USE:
974 	case FILE_CLEAR:
975 		break;
976 
977 	case FILE_DER:
978 		val += MULT;
979 		break;
980 
981 	default:
982 		(void)fprintf(stderr, "Bad type %d\n", m->type);
983 		abort();
984 	}
985 
986 	switch (m->reln) {
987 	case 'x':	/* matches anything penalize */
988 	case '!':       /* matches almost anything penalize */
989 		val = 0;
990 		break;
991 
992 	case '=':	/* Exact match, prefer */
993 		val += MULT;
994 		break;
995 
996 	case '>':
997 	case '<':	/* comparison match reduce strength */
998 		val -= 2 * MULT;
999 		break;
1000 
1001 	case '^':
1002 	case '&':	/* masking bits, we could count them too */
1003 		val -= MULT;
1004 		break;
1005 
1006 	default:
1007 		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
1008 		abort();
1009 	}
1010 
1011 	return val;
1012 }
1013 
1014 
1015 /*ARGSUSED*/
1016 private size_t
apprentice_magic_strength(const struct magic * m,size_t nmagic)1017 apprentice_magic_strength(const struct magic *m,
1018     size_t nmagic __attribute__((__unused__)))
1019 {
1020 	ssize_t val = apprentice_magic_strength_1(m);
1021 
1022 #ifdef notyet
1023 	if (m->desc[0] == '\0') {
1024 		size_t i;
1025 		/*
1026 		 * Magic entries with no description get their continuations
1027 		 * added
1028 		 */
1029 		for (i = 1; m[i].cont_level != 0 && i < MIN(nmagic, 3); i++) {
1030 			ssize_t v = apprentice_magic_strength_1(&m[i]) >>
1031 			    (i + 1);
1032 			val += v;
1033 			if (m[i].desc[0] != '\0')
1034 				break;
1035 		}
1036 	}
1037 #endif
1038 
1039 	switch (m->factor_op) {
1040 	case FILE_FACTOR_OP_NONE:
1041 		break;
1042 	case FILE_FACTOR_OP_PLUS:
1043 		val += m->factor;
1044 		break;
1045 	case FILE_FACTOR_OP_MINUS:
1046 		val -= m->factor;
1047 		break;
1048 	case FILE_FACTOR_OP_TIMES:
1049 		val *= m->factor;
1050 		break;
1051 	case FILE_FACTOR_OP_DIV:
1052 		val /= m->factor;
1053 		break;
1054 	default:
1055 		(void)fprintf(stderr, "Bad factor_op %u\n", m->factor_op);
1056 		abort();
1057 	}
1058 
1059 	if (val <= 0)	/* ensure we only return 0 for FILE_DEFAULT */
1060 		val = 1;
1061 
1062 #ifndef notyet
1063 	/*
1064 	 * Magic entries with no description get a bonus because they depend
1065 	 * on subsequent magic entries to print something.
1066 	 */
1067 	if (m->desc[0] == '\0')
1068 		val++;
1069 #endif
1070 
1071 	return val;
1072 }
1073 
1074 /*
1075  * Sort callback for sorting entries by "strength" (basically length)
1076  */
1077 private int
apprentice_sort(const void * a,const void * b)1078 apprentice_sort(const void *a, const void *b)
1079 {
1080 	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1081 	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1082 	size_t sa = apprentice_magic_strength(ma->mp, ma->cont_count);
1083 	size_t sb = apprentice_magic_strength(mb->mp, mb->cont_count);
1084 	if (sa == sb)
1085 		return 0;
1086 	else if (sa > sb)
1087 		return -1;
1088 	else
1089 		return 1;
1090 }
1091 
1092 /*
1093  * Shows sorted patterns list in the order which is used for the matching
1094  */
1095 private void
apprentice_list(struct mlist * mlist,int mode)1096 apprentice_list(struct mlist *mlist, int mode)
1097 {
1098 	uint32_t magindex, descindex, mimeindex, lineindex;
1099 	struct mlist *ml;
1100 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
1101 		for (magindex = 0; magindex < ml->nmagic; magindex++) {
1102 			struct magic *m = &ml->magic[magindex];
1103 			if ((m->flag & mode) != mode) {
1104 				/* Skip sub-tests */
1105 				while (magindex + 1 < ml->nmagic &&
1106 				       ml->magic[magindex + 1].cont_level != 0)
1107 					++magindex;
1108 				continue; /* Skip to next top-level test*/
1109 			}
1110 
1111 			/*
1112 			 * Try to iterate over the tree until we find item with
1113 			 * description/mimetype.
1114 			 */
1115 			lineindex = descindex = mimeindex = magindex;
1116 			for (magindex++; magindex < ml->nmagic &&
1117 			   ml->magic[magindex].cont_level != 0; magindex++) {
1118 				if (*ml->magic[descindex].desc == '\0'
1119 				    && *ml->magic[magindex].desc)
1120 					descindex = magindex;
1121 				if (*ml->magic[mimeindex].mimetype == '\0'
1122 				    && *ml->magic[magindex].mimetype)
1123 					mimeindex = magindex;
1124 			}
1125 
1126 			printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1127 			    apprentice_magic_strength(m, ml->nmagic - magindex),
1128 			    ml->magic[lineindex].lineno,
1129 			    ml->magic[descindex].desc,
1130 			    ml->magic[mimeindex].mimetype);
1131 		}
1132 	}
1133 }
1134 
1135 private void
set_test_type(struct magic * mstart,struct magic * m)1136 set_test_type(struct magic *mstart, struct magic *m)
1137 {
1138 	switch (m->type) {
1139 	case FILE_BYTE:
1140 	case FILE_SHORT:
1141 	case FILE_LONG:
1142 	case FILE_DATE:
1143 	case FILE_BESHORT:
1144 	case FILE_BELONG:
1145 	case FILE_BEDATE:
1146 	case FILE_LESHORT:
1147 	case FILE_LELONG:
1148 	case FILE_LEDATE:
1149 	case FILE_LDATE:
1150 	case FILE_BELDATE:
1151 	case FILE_LELDATE:
1152 	case FILE_MEDATE:
1153 	case FILE_MELDATE:
1154 	case FILE_MELONG:
1155 	case FILE_QUAD:
1156 	case FILE_LEQUAD:
1157 	case FILE_BEQUAD:
1158 	case FILE_QDATE:
1159 	case FILE_LEQDATE:
1160 	case FILE_BEQDATE:
1161 	case FILE_QLDATE:
1162 	case FILE_LEQLDATE:
1163 	case FILE_BEQLDATE:
1164 	case FILE_QWDATE:
1165 	case FILE_LEQWDATE:
1166 	case FILE_BEQWDATE:
1167 	case FILE_FLOAT:
1168 	case FILE_BEFLOAT:
1169 	case FILE_LEFLOAT:
1170 	case FILE_DOUBLE:
1171 	case FILE_BEDOUBLE:
1172 	case FILE_LEDOUBLE:
1173 	case FILE_BEVARINT:
1174 	case FILE_LEVARINT:
1175 	case FILE_DER:
1176 	case FILE_GUID:
1177 	case FILE_OFFSET:
1178 	case FILE_MSDOSDATE:
1179 	case FILE_BEMSDOSDATE:
1180 	case FILE_LEMSDOSDATE:
1181 	case FILE_MSDOSTIME:
1182 	case FILE_BEMSDOSTIME:
1183 	case FILE_LEMSDOSTIME:
1184 	case FILE_OCTAL:
1185 		mstart->flag |= BINTEST;
1186 		break;
1187 	case FILE_STRING:
1188 	case FILE_PSTRING:
1189 	case FILE_BESTRING16:
1190 	case FILE_LESTRING16:
1191 		/* Allow text overrides */
1192 		if (mstart->str_flags & STRING_TEXTTEST)
1193 			mstart->flag |= TEXTTEST;
1194 		else
1195 			mstart->flag |= BINTEST;
1196 		break;
1197 	case FILE_REGEX:
1198 	case FILE_SEARCH:
1199 		/* Check for override */
1200 		if (mstart->str_flags & STRING_BINTEST)
1201 			mstart->flag |= BINTEST;
1202 		if (mstart->str_flags & STRING_TEXTTEST)
1203 			mstart->flag |= TEXTTEST;
1204 
1205 		if (mstart->flag & (TEXTTEST|BINTEST))
1206 			break;
1207 
1208 		/* binary test if pattern is not text */
1209 		if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1210 		    NULL) <= 0)
1211 			mstart->flag |= BINTEST;
1212 		else
1213 			mstart->flag |= TEXTTEST;
1214 		break;
1215 	case FILE_DEFAULT:
1216 		/* can't deduce anything; we shouldn't see this at the
1217 		   top level anyway */
1218 		break;
1219 	case FILE_INVALID:
1220 	default:
1221 		/* invalid search type, but no need to complain here */
1222 		break;
1223 	}
1224 }
1225 
1226 private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry_set * mset)1227 addentry(struct magic_set *ms, struct magic_entry *me,
1228    struct magic_entry_set *mset)
1229 {
1230 	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1231 	if (mset[i].me == NULL || mset[i].count == mset[i].max) {
1232 		struct magic_entry *mp;
1233 
1234 		size_t incr = mset[i].max + ALLOC_INCR;
1235 		if ((mp = CAST(struct magic_entry *,
1236 		    erealloc(mset[i].me, sizeof(*mp) * incr))) ==
1237 		    NULL) {
1238 			file_oomem(ms, sizeof(*mp) * incr);
1239 			return -1;
1240 		}
1241 		(void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1242 		    ALLOC_INCR);
1243 		mset[i].me = mp;
1244 		mset[i].max = CAST(uint32_t, incr);
1245 		assert(mset[i].max == incr);
1246 	}
1247 	mset[i].me[mset[i].count++] = *me;
1248 	memset(me, 0, sizeof(*me));
1249 	return 0;
1250 }
1251 
1252 /*
1253  * Load and parse one file.
1254  */
1255 private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry_set * mset)1256 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1257    struct magic_entry_set *mset)
1258 {
1259 	char buffer[BUFSIZ + 1];
1260 	char *line = NULL;
1261 	size_t len;
1262 	size_t lineno = 0;
1263 	struct magic_entry me;
1264 
1265 	php_stream *stream;
1266 
1267 
1268 	ms->file = fn;
1269 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
1270 
1271 	if (stream == NULL) {
1272 		if (errno != ENOENT)
1273 			file_error(ms, errno, "cannot read magic file `%s'",
1274 				   fn);
1275 		(*errs)++;
1276 		return;
1277 	}
1278 
1279 	memset(&me, 0, sizeof(me));
1280 	/* read and parse this file */
1281 	for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1282 		if (len == 0) /* null line, garbage, etc */
1283 			continue;
1284 		if (line[len - 1] == '\n') {
1285 			lineno++;
1286 			line[len - 1] = '\0'; /* delete newline */
1287 		}
1288 		switch (line[0]) {
1289 		case '\0':	/* empty, do not parse */
1290 		case '#':	/* comment, do not parse */
1291 			continue;
1292 		case '!':
1293 			if (line[1] == ':') {
1294 				size_t i;
1295 
1296 				for (i = 0; bang[i].name != NULL; i++) {
1297 					if (CAST(size_t, len - 2) > bang[i].len &&
1298 					    memcmp(bang[i].name, line + 2,
1299 					    bang[i].len) == 0)
1300 						break;
1301 				}
1302 				if (bang[i].name == NULL) {
1303 					file_error(ms, 0,
1304 					    "Unknown !: entry `%s'", line);
1305 					(*errs)++;
1306 					continue;
1307 				}
1308 				if (me.mp == NULL) {
1309 					file_error(ms, 0,
1310 					    "No current entry for :!%s type",
1311 						bang[i].name);
1312 					(*errs)++;
1313 					continue;
1314 				}
1315 				if ((*bang[i].fun)(ms, &me,
1316 				    line + bang[i].len + 2,
1317 				    len - bang[i].len - 2) != 0) {
1318 					(*errs)++;
1319 					continue;
1320 				}
1321 				continue;
1322 			}
1323 			/*FALLTHROUGH*/
1324 		default:
1325 		again:
1326 			switch (parse(ms, &me, line, lineno, action)) {
1327 			case 0:
1328 				continue;
1329 			case 1:
1330 				(void)addentry(ms, &me, mset);
1331 				goto again;
1332 			default:
1333 				(*errs)++;
1334 				break;
1335 			}
1336 		}
1337 	}
1338 	if (me.mp)
1339 		(void)addentry(ms, &me, mset);
1340 	efree(line);
1341 	php_stream_close(stream);
1342 }
1343 
1344 /*
1345  * parse a file or directory of files
1346  * const char *fn: name of magic file or directory
1347  */
1348 private int
cmpstrp(const void * p1,const void * p2)1349 cmpstrp(const void *p1, const void *p2)
1350 {
1351         return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1352 }
1353 
1354 
1355 private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1356 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1357     uint32_t starttest)
1358 {
1359 	static const char text[] = "text";
1360 	static const char binary[] = "binary";
1361 	static const size_t len = sizeof(text);
1362 
1363 	uint32_t i = starttest;
1364 
1365 	do {
1366 		set_test_type(me[starttest].mp, me[i].mp);
1367 		if ((ms->flags & MAGIC_DEBUG) == 0)
1368 			continue;
1369 		(void)fprintf(stderr, "%s%s%s: %s\n",
1370 		    me[i].mp->mimetype,
1371 		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1372 		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1373 		    me[i].mp->flag & BINTEST ? binary : text);
1374 		if (me[i].mp->flag & BINTEST) {
1375 			char *p = strstr(me[i].mp->desc, text);
1376 			if (p && (p == me[i].mp->desc ||
1377 			    isspace(CAST(unsigned char, p[-1]))) &&
1378 			    (p + len - me[i].mp->desc == MAXstring
1379 			    || (p[len] == '\0' ||
1380 			    isspace(CAST(unsigned char, p[len])))))
1381 				(void)fprintf(stderr, "*** Possible "
1382 				    "binary test for text type\n");
1383 		}
1384 	} while (++i < nme && me[i].mp->cont_level != 0);
1385 	return i;
1386 }
1387 
1388 private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1389 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1390 {
1391 	uint32_t i;
1392 	for (i = 0; i < nme; i++) {
1393 		if (me[i].mp->cont_level == 0 &&
1394 		    me[i].mp->type == FILE_DEFAULT) {
1395 			while (++i < nme)
1396 				if (me[i].mp->cont_level == 0)
1397 					break;
1398 			if (i != nme) {
1399 				/* XXX - Ugh! */
1400 				ms->line = me[i].mp->lineno;
1401 				file_magwarn(ms,
1402 				    "level 0 \"default\" did not sort last");
1403 			}
1404 			return;
1405 		}
1406 	}
1407 }
1408 
1409 private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1410 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1411     struct magic **ma, uint32_t *nma)
1412 {
1413 	uint32_t i, mentrycount = 0;
1414 	size_t slen;
1415 
1416 	for (i = 0; i < nme; i++)
1417 		mentrycount += me[i].cont_count;
1418 
1419 	if (mentrycount == 0) {
1420 		*ma = NULL;
1421 		*nma = 0;
1422 		return 0;
1423 	}
1424 
1425 	slen = sizeof(**ma) * mentrycount;
1426 	if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1427 		file_oomem(ms, slen);
1428 		return -1;
1429 	}
1430 
1431 	mentrycount = 0;
1432 	for (i = 0; i < nme; i++) {
1433 		(void)memcpy(*ma + mentrycount, me[i].mp,
1434 		    me[i].cont_count * sizeof(**ma));
1435 		mentrycount += me[i].cont_count;
1436 	}
1437 	*nma = mentrycount;
1438 	return 0;
1439 }
1440 
1441 private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1442 magic_entry_free(struct magic_entry *me, uint32_t nme)
1443 {
1444 	uint32_t i;
1445 	if (me == NULL)
1446 		return;
1447 	for (i = 0; i < nme; i++)
1448 		efree(me[i].mp);
1449 	efree(me);
1450 }
1451 
1452 private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1453 apprentice_load(struct magic_set *ms, const char *fn, int action)
1454 {
1455 	int errs = 0;
1456 	uint32_t i, j;
1457 	size_t files = 0, maxfiles = 0;
1458 	char **filearr = NULL;
1459 	zend_stat_t st = {0};
1460 	struct magic_map *map;
1461 	struct magic_entry_set mset[MAGIC_SETS];
1462 	php_stream *dir;
1463 	php_stream_dirent d;
1464 
1465 
1466 	memset(mset, 0, sizeof(mset));
1467 	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1468 
1469 
1470 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1471 	{
1472 		file_oomem(ms, sizeof(*map));
1473 		return NULL;
1474 	}
1475 	map->type = MAP_TYPE_MALLOC;
1476 
1477 	/* print silly verbose header for USG compat. */
1478 	if (action == FILE_CHECK)
1479 		(void)fprintf(stderr, "%s\n", usg_hdr);
1480 
1481 	/* load directory or file */
1482 	/* FIXME: Read file names and sort them to prevent
1483 	   non-determinism. See Debian bug #488562. */
1484 	if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1485 		int mflen;
1486 		char mfn[MAXPATHLEN];
1487 
1488 		dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1489 		if (!dir) {
1490 			errs++;
1491 			goto out;
1492 		}
1493 		while (php_stream_readdir(dir, &d)) {
1494 			if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1495 				file_oomem(ms,
1496 				strlen(fn) + strlen(d.d_name) + 2);
1497 				errs++;
1498 				php_stream_closedir(dir);
1499 				goto out;
1500 			}
1501 			if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1502 				continue;
1503 			}
1504 			if (files >= maxfiles) {
1505 				size_t mlen;
1506 				maxfiles = (maxfiles + 1) * 2;
1507 				mlen = maxfiles * sizeof(*filearr);
1508 				if ((filearr = CAST(char **,
1509 				    erealloc(filearr, mlen))) == NULL) {
1510 					file_oomem(ms, mlen);
1511 					php_stream_closedir(dir);
1512 					errs++;
1513 					goto out;
1514 				}
1515 			}
1516 			filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1517 		}
1518 		php_stream_closedir(dir);
1519 		if (filearr) {
1520 			qsort(filearr, files, sizeof(*filearr), cmpstrp);
1521 			for (i = 0; i < files; i++) {
1522 				load_1(ms, action, filearr[i], &errs, mset);
1523 				efree(filearr[i]);
1524 			}
1525 			efree(filearr);
1526 		}
1527 	} else
1528 		load_1(ms, action, fn, &errs, mset);
1529 	if (errs)
1530 		goto out;
1531 
1532 	for (j = 0; j < MAGIC_SETS; j++) {
1533 		/* Set types of tests */
1534 		for (i = 0; i < mset[j].count; ) {
1535 			if (mset[j].me[i].mp->cont_level != 0) {
1536 				i++;
1537 				continue;
1538 			}
1539 			i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1540 		}
1541 		if (mset[j].me)
1542 			qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1543 			    apprentice_sort);
1544 
1545 		/*
1546 		 * Make sure that any level 0 "default" line is last
1547 		 * (if one exists).
1548 		 */
1549 		set_last_default(ms, mset[j].me, mset[j].count);
1550 
1551 		/* coalesce per file arrays into a single one, if needed */
1552 		if (mset[j].count == 0)
1553 			continue;
1554 
1555 		if (coalesce_entries(ms, mset[j].me, mset[j].count,
1556 		    &map->magic[j], &map->nmagic[j]) == -1) {
1557 			errs++;
1558 			goto out;
1559 		}
1560 	}
1561 
1562 out:
1563 	for (j = 0; j < MAGIC_SETS; j++)
1564 		magic_entry_free(mset[j].me, mset[j].count);
1565 
1566 	if (errs) {
1567 		apprentice_unmap(map);
1568 		return NULL;
1569 	}
1570 	return map;
1571 }
1572 
1573 /*
1574  * extend the sign bit if the comparison is to be signed
1575  */
1576 protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1577 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1578 {
1579 	if (!(m->flag & UNSIGNED)) {
1580 		switch(m->type) {
1581 		/*
1582 		 * Do not remove the casts below.  They are
1583 		 * vital.  When later compared with the data,
1584 		 * the sign extension must have happened.
1585 		 */
1586 		case FILE_BYTE:
1587 			v = CAST(signed char,  v);
1588 			break;
1589 		case FILE_SHORT:
1590 		case FILE_BESHORT:
1591 		case FILE_LESHORT:
1592 			v = CAST(short, v);
1593 			break;
1594 		case FILE_DATE:
1595 		case FILE_BEDATE:
1596 		case FILE_LEDATE:
1597 		case FILE_MEDATE:
1598 		case FILE_LDATE:
1599 		case FILE_BELDATE:
1600 		case FILE_LELDATE:
1601 		case FILE_MELDATE:
1602 		case FILE_LONG:
1603 		case FILE_BELONG:
1604 		case FILE_LELONG:
1605 		case FILE_MELONG:
1606 		case FILE_FLOAT:
1607 		case FILE_BEFLOAT:
1608 		case FILE_LEFLOAT:
1609 		case FILE_MSDOSDATE:
1610 		case FILE_BEMSDOSDATE:
1611 		case FILE_LEMSDOSDATE:
1612 		case FILE_MSDOSTIME:
1613 		case FILE_BEMSDOSTIME:
1614 		case FILE_LEMSDOSTIME:
1615 			v = CAST(int32_t, v);
1616 			break;
1617 		case FILE_QUAD:
1618 		case FILE_BEQUAD:
1619 		case FILE_LEQUAD:
1620 		case FILE_QDATE:
1621 		case FILE_QLDATE:
1622 		case FILE_QWDATE:
1623 		case FILE_BEQDATE:
1624 		case FILE_BEQLDATE:
1625 		case FILE_BEQWDATE:
1626 		case FILE_LEQDATE:
1627 		case FILE_LEQLDATE:
1628 		case FILE_LEQWDATE:
1629 		case FILE_DOUBLE:
1630 		case FILE_BEDOUBLE:
1631 		case FILE_LEDOUBLE:
1632 		case FILE_OFFSET:
1633 		case FILE_BEVARINT:
1634 		case FILE_LEVARINT:
1635 			v = CAST(int64_t, v);
1636 			break;
1637 		case FILE_STRING:
1638 		case FILE_PSTRING:
1639 		case FILE_BESTRING16:
1640 		case FILE_LESTRING16:
1641 		case FILE_REGEX:
1642 		case FILE_SEARCH:
1643 		case FILE_DEFAULT:
1644 		case FILE_INDIRECT:
1645 		case FILE_NAME:
1646 		case FILE_USE:
1647 		case FILE_CLEAR:
1648 		case FILE_DER:
1649 		case FILE_GUID:
1650 		case FILE_OCTAL:
1651 			break;
1652 		default:
1653 			if (ms->flags & MAGIC_CHECK)
1654 			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1655 				    m->type);
1656 			return FILE_BADSIZE;
1657 		}
1658 	}
1659 	return v;
1660 }
1661 
1662 private int
string_modifier_check(struct magic_set * ms,struct magic * m)1663 string_modifier_check(struct magic_set *ms, struct magic *m)
1664 {
1665 	if ((ms->flags & MAGIC_CHECK) == 0)
1666 		return 0;
1667 
1668 	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1669 	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1670 		file_magwarn(ms,
1671 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1672 		return -1;
1673 	}
1674 	switch (m->type) {
1675 	case FILE_BESTRING16:
1676 	case FILE_LESTRING16:
1677 		if (m->str_flags != 0) {
1678 			file_magwarn(ms,
1679 			    "no modifiers allowed for 16-bit strings\n");
1680 			return -1;
1681 		}
1682 		break;
1683 	case FILE_STRING:
1684 	case FILE_PSTRING:
1685 		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1686 			file_magwarn(ms,
1687 			    "'/%c' only allowed on regex and search\n",
1688 			    CHAR_REGEX_OFFSET_START);
1689 			return -1;
1690 		}
1691 		break;
1692 	case FILE_SEARCH:
1693 		if (m->str_range == 0) {
1694 			file_magwarn(ms,
1695 			    "missing range; defaulting to %d\n",
1696                             STRING_DEFAULT_RANGE);
1697 			m->str_range = STRING_DEFAULT_RANGE;
1698 			return -1;
1699 		}
1700 		break;
1701 	case FILE_REGEX:
1702 		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1703 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1704 			    CHAR_COMPACT_WHITESPACE);
1705 			return -1;
1706 		}
1707 		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1708 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1709 			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1710 			return -1;
1711 		}
1712 		break;
1713 	default:
1714 		file_magwarn(ms, "coding error: m->type=%d\n",
1715 		    m->type);
1716 		return -1;
1717 	}
1718 	return 0;
1719 }
1720 
1721 private int
get_op(char c)1722 get_op(char c)
1723 {
1724 	switch (c) {
1725 	case '&':
1726 		return FILE_OPAND;
1727 	case '|':
1728 		return FILE_OPOR;
1729 	case '^':
1730 		return FILE_OPXOR;
1731 	case '+':
1732 		return FILE_OPADD;
1733 	case '-':
1734 		return FILE_OPMINUS;
1735 	case '*':
1736 		return FILE_OPMULTIPLY;
1737 	case '/':
1738 		return FILE_OPDIVIDE;
1739 	case '%':
1740 		return FILE_OPMODULO;
1741 	default:
1742 		return -1;
1743 	}
1744 }
1745 
1746 #ifdef ENABLE_CONDITIONALS
1747 private int
get_cond(const char * l,const char ** t)1748 get_cond(const char *l, const char **t)
1749 {
1750 	static const struct cond_tbl_s {
1751 		char name[8];
1752 		size_t len;
1753 		int cond;
1754 	} cond_tbl[] = {
1755 		{ "if",		2,	COND_IF },
1756 		{ "elif",	4,	COND_ELIF },
1757 		{ "else",	4,	COND_ELSE },
1758 		{ "",		0,	COND_NONE },
1759 	};
1760 	const struct cond_tbl_s *p;
1761 
1762 	for (p = cond_tbl; p->len; p++) {
1763 		if (strncmp(l, p->name, p->len) == 0 &&
1764 		    isspace(CAST(unsigned char, l[p->len]))) {
1765 			if (t)
1766 				*t = l + p->len;
1767 			break;
1768 		}
1769 	}
1770 	return p->cond;
1771 }
1772 
1773 private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1774 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1775 {
1776 	int last_cond;
1777 	last_cond = ms->c.li[cont_level].last_cond;
1778 
1779 	switch (cond) {
1780 	case COND_IF:
1781 		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1782 			if (ms->flags & MAGIC_CHECK)
1783 				file_magwarn(ms, "syntax error: `if'");
1784 			return -1;
1785 		}
1786 		last_cond = COND_IF;
1787 		break;
1788 
1789 	case COND_ELIF:
1790 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1791 			if (ms->flags & MAGIC_CHECK)
1792 				file_magwarn(ms, "syntax error: `elif'");
1793 			return -1;
1794 		}
1795 		last_cond = COND_ELIF;
1796 		break;
1797 
1798 	case COND_ELSE:
1799 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1800 			if (ms->flags & MAGIC_CHECK)
1801 				file_magwarn(ms, "syntax error: `else'");
1802 			return -1;
1803 		}
1804 		last_cond = COND_NONE;
1805 		break;
1806 
1807 	case COND_NONE:
1808 		last_cond = COND_NONE;
1809 		break;
1810 	}
1811 
1812 	ms->c.li[cont_level].last_cond = last_cond;
1813 	return 0;
1814 }
1815 #endif /* ENABLE_CONDITIONALS */
1816 
1817 private int
parse_indirect_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1818 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1819 {
1820 	const char *l = *lp;
1821 
1822 	while (!isspace(CAST(unsigned char, *++l)))
1823 		switch (*l) {
1824 		case CHAR_INDIRECT_RELATIVE:
1825 			m->str_flags |= INDIRECT_RELATIVE;
1826 			break;
1827 		default:
1828 			if (ms->flags & MAGIC_CHECK)
1829 				file_magwarn(ms, "indirect modifier `%c' "
1830 					"invalid", *l);
1831 			*lp = l;
1832 			return -1;
1833 		}
1834 	*lp = l;
1835 	return 0;
1836 }
1837 
1838 private void
parse_op_modifier(struct magic_set * ms,struct magic * m,const char ** lp,int op)1839 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1840     int op)
1841 {
1842 	const char *l = *lp;
1843 	char *t;
1844 	uint64_t val;
1845 
1846 	++l;
1847 	m->mask_op |= op;
1848 	val = CAST(uint64_t, strtoull(l, &t, 0));
1849 	l = t;
1850 	m->num_mask = file_signextend(ms, m, val);
1851 	eatsize(&l);
1852 	*lp = l;
1853 }
1854 
1855 private int
parse_string_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1856 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1857 {
1858 	const char *l = *lp;
1859 	char *t;
1860 	int have_range = 0;
1861 
1862 	while (!isspace(CAST(unsigned char, *++l))) {
1863 		switch (*l) {
1864 		case '0':  case '1':  case '2':
1865 		case '3':  case '4':  case '5':
1866 		case '6':  case '7':  case '8':
1867 		case '9':
1868 			if (have_range && (ms->flags & MAGIC_CHECK))
1869 				file_magwarn(ms, "multiple ranges");
1870 			have_range = 1;
1871 			m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1872 			if (m->str_range == 0)
1873 				file_magwarn(ms, "zero range");
1874 			l = t - 1;
1875 			break;
1876 		case CHAR_COMPACT_WHITESPACE:
1877 			m->str_flags |= STRING_COMPACT_WHITESPACE;
1878 			break;
1879 		case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1880 			m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1881 			break;
1882 		case CHAR_IGNORE_LOWERCASE:
1883 			m->str_flags |= STRING_IGNORE_LOWERCASE;
1884 			break;
1885 		case CHAR_IGNORE_UPPERCASE:
1886 			m->str_flags |= STRING_IGNORE_UPPERCASE;
1887 			break;
1888 		case CHAR_REGEX_OFFSET_START:
1889 			m->str_flags |= REGEX_OFFSET_START;
1890 			break;
1891 		case CHAR_BINTEST:
1892 			m->str_flags |= STRING_BINTEST;
1893 			break;
1894 		case CHAR_TEXTTEST:
1895 			m->str_flags |= STRING_TEXTTEST;
1896 			break;
1897 		case CHAR_TRIM:
1898 			m->str_flags |= STRING_TRIM;
1899 			break;
1900 		case CHAR_FULL_WORD:
1901 			m->str_flags |= STRING_FULL_WORD;
1902 			break;
1903 		case CHAR_PSTRING_1_LE:
1904 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1905 			if (m->type != FILE_PSTRING)
1906 				goto bad;
1907 			SET_LENGTH(PSTRING_1_LE);
1908 			break;
1909 		case CHAR_PSTRING_2_BE:
1910 			if (m->type != FILE_PSTRING)
1911 				goto bad;
1912 			SET_LENGTH(PSTRING_2_BE);
1913 			break;
1914 		case CHAR_PSTRING_2_LE:
1915 			if (m->type != FILE_PSTRING)
1916 				goto bad;
1917 			SET_LENGTH(PSTRING_2_LE);
1918 			break;
1919 		case CHAR_PSTRING_4_BE:
1920 			if (m->type != FILE_PSTRING)
1921 				goto bad;
1922 			SET_LENGTH(PSTRING_4_BE);
1923 			break;
1924 		case CHAR_PSTRING_4_LE:
1925 			switch (m->type) {
1926 			case FILE_PSTRING:
1927 			case FILE_REGEX:
1928 				break;
1929 			default:
1930 				goto bad;
1931 			}
1932 			SET_LENGTH(PSTRING_4_LE);
1933 			break;
1934 		case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1935 			if (m->type != FILE_PSTRING)
1936 				goto bad;
1937 			m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1938 			break;
1939 		default:
1940 		bad:
1941 			if (ms->flags & MAGIC_CHECK)
1942 				file_magwarn(ms, "string modifier `%c' "
1943 					"invalid", *l);
1944 			goto out;
1945 		}
1946 		/* allow multiple '/' for readability */
1947 		if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
1948 			l++;
1949 	}
1950 	if (string_modifier_check(ms, m) == -1)
1951 		goto out;
1952 	*lp = l;
1953 	return 0;
1954 out:
1955 	*lp = l;
1956 	return -1;
1957 }
1958 
1959 /*
1960  * parse one line from magic file, put into magic[index++] if valid
1961  */
1962 private int
parse(struct magic_set * ms,struct magic_entry * me,const char * line,size_t lineno,int action)1963 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1964     size_t lineno, int action)
1965 {
1966 #ifdef ENABLE_CONDITIONALS
1967 	static uint32_t last_cont_level = 0;
1968 #endif
1969 	size_t i;
1970 	struct magic *m;
1971 	const char *l = line;
1972 	char *t;
1973 	int op;
1974 	uint32_t cont_level;
1975 	int32_t diff;
1976 
1977 	cont_level = 0;
1978 
1979 	/*
1980 	 * Parse the offset.
1981 	 */
1982 	while (*l == '>') {
1983 		++l;		/* step over */
1984 		cont_level++;
1985 	}
1986 #ifdef ENABLE_CONDITIONALS
1987 	if (cont_level == 0 || cont_level > last_cont_level)
1988 		if (file_check_mem(ms, cont_level) == -1)
1989 			return -1;
1990 	last_cont_level = cont_level;
1991 #endif
1992 	if (cont_level != 0) {
1993 		if (me->mp == NULL) {
1994 			file_magerror(ms, "No current entry for continuation");
1995 			return -1;
1996 		}
1997 		if (me->cont_count == 0) {
1998 			file_magerror(ms, "Continuations present with 0 count");
1999 			return -1;
2000 		}
2001 		m = &me->mp[me->cont_count - 1];
2002 		diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
2003 		if (diff > 1)
2004 			file_magwarn(ms, "New continuation level %u is more "
2005 			    "than one larger than current level %u", cont_level,
2006 			    m->cont_level);
2007 		if (me->cont_count == me->max_count) {
2008 			struct magic *nm;
2009 			size_t cnt = me->max_count + ALLOC_CHUNK;
2010 			if ((nm = CAST(struct magic *, erealloc(me->mp,
2011 			    sizeof(*nm) * cnt))) == NULL) {
2012 				file_oomem(ms, sizeof(*nm) * cnt);
2013 				return -1;
2014 			}
2015 			me->mp = nm;
2016 			me->max_count = CAST(uint32_t, cnt);
2017 		}
2018 		m = &me->mp[me->cont_count++];
2019 		(void)memset(m, 0, sizeof(*m));
2020 		m->cont_level = cont_level;
2021 	} else {
2022 		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
2023 		if (me->mp != NULL)
2024 			return 1;
2025 		if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
2026 			file_oomem(ms, len);
2027 			return -1;
2028 		}
2029 		me->mp = m;
2030 		me->max_count = ALLOC_CHUNK;
2031 		(void)memset(m, 0, sizeof(*m));
2032 		m->factor_op = FILE_FACTOR_OP_NONE;
2033 		m->cont_level = 0;
2034 		me->cont_count = 1;
2035 	}
2036 	m->lineno = CAST(uint32_t, lineno);
2037 
2038 	if (*l == '&') {  /* m->cont_level == 0 checked below. */
2039                 ++l;            /* step over */
2040                 m->flag |= OFFADD;
2041         }
2042 	if (*l == '(') {
2043 		++l;		/* step over */
2044 		m->flag |= INDIR;
2045 		if (m->flag & OFFADD)
2046 			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
2047 
2048 		if (*l == '&') {  /* m->cont_level == 0 checked below */
2049 			++l;            /* step over */
2050 			m->flag |= OFFADD;
2051 		}
2052 	}
2053 	/* Indirect offsets are not valid at level 0. */
2054 	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
2055 		if (ms->flags & MAGIC_CHECK)
2056 			file_magwarn(ms, "relative offset at level 0");
2057 		return -1;
2058 	}
2059 
2060 	/* get offset, then skip over it */
2061 	if (*l == '-') {
2062 		++l;            /* step over */
2063 		m->flag |= OFFNEGATIVE;
2064 	}
2065 	m->offset = CAST(int32_t, strtol(l, &t, 0));
2066         if (l == t) {
2067 		if (ms->flags & MAGIC_CHECK)
2068 			file_magwarn(ms, "offset `%s' invalid", l);
2069 		return -1;
2070 	}
2071 
2072         l = t;
2073 
2074 	if (m->flag & INDIR) {
2075 		m->in_type = FILE_LONG;
2076 		m->in_offset = 0;
2077 		m->in_op = 0;
2078 		/*
2079 		 * read [.,lbs][+-]nnnnn)
2080 		 */
2081 		if (*l == '.' || *l == ',') {
2082 			if (*l == ',')
2083 				m->in_op |= FILE_OPSIGNED;
2084 			l++;
2085 			switch (*l) {
2086 			case 'l':
2087 				m->in_type = FILE_LELONG;
2088 				break;
2089 			case 'L':
2090 				m->in_type = FILE_BELONG;
2091 				break;
2092 			case 'm':
2093 				m->in_type = FILE_MELONG;
2094 				break;
2095 			case 'h':
2096 			case 's':
2097 				m->in_type = FILE_LESHORT;
2098 				break;
2099 			case 'H':
2100 			case 'S':
2101 				m->in_type = FILE_BESHORT;
2102 				break;
2103 			case 'c':
2104 			case 'b':
2105 			case 'C':
2106 			case 'B':
2107 				m->in_type = FILE_BYTE;
2108 				break;
2109 			case 'e':
2110 			case 'f':
2111 			case 'g':
2112 				m->in_type = FILE_LEDOUBLE;
2113 				break;
2114 			case 'E':
2115 			case 'F':
2116 			case 'G':
2117 				m->in_type = FILE_BEDOUBLE;
2118 				break;
2119 			case 'i':
2120 				m->in_type = FILE_LEID3;
2121 				break;
2122 			case 'I':
2123 				m->in_type = FILE_BEID3;
2124 				break;
2125 			case 'o':
2126 				m->in_type = FILE_OCTAL;
2127 				break;
2128 			case 'q':
2129 				m->in_type = FILE_LEQUAD;
2130 				break;
2131 			case 'Q':
2132 				m->in_type = FILE_BEQUAD;
2133 				break;
2134 			default:
2135 				if (ms->flags & MAGIC_CHECK)
2136 					file_magwarn(ms,
2137 					    "indirect offset type `%c' invalid",
2138 					    *l);
2139 				return -1;
2140 			}
2141 			l++;
2142 		}
2143 
2144 		if (*l == '~') {
2145 			m->in_op |= FILE_OPINVERSE;
2146 			l++;
2147 		}
2148 		if ((op = get_op(*l)) != -1) {
2149 			m->in_op |= op;
2150 			l++;
2151 		}
2152 		if (*l == '(') {
2153 			m->in_op |= FILE_OPINDIRECT;
2154 			l++;
2155 		}
2156 		if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
2157 			m->in_offset = CAST(int32_t, strtol(l, &t, 0));
2158 			if (l == t) {
2159 				if (ms->flags & MAGIC_CHECK)
2160 					file_magwarn(ms,
2161 					    "in_offset `%s' invalid", l);
2162 				return -1;
2163 			}
2164 			l = t;
2165 		}
2166 		if (*l++ != ')' ||
2167 		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2168 			if (ms->flags & MAGIC_CHECK)
2169 				file_magwarn(ms,
2170 				    "missing ')' in indirect offset");
2171 			return -1;
2172 		}
2173 	}
2174 	EATAB;
2175 
2176 #ifdef ENABLE_CONDITIONALS
2177 	m->cond = get_cond(l, &l);
2178 	if (check_cond(ms, m->cond, cont_level) == -1)
2179 		return -1;
2180 
2181 	EATAB;
2182 #endif
2183 
2184 	/*
2185 	 * Parse the type.
2186 	 */
2187 	if (*l == 'u') {
2188 		/*
2189 		 * Try it as a keyword type prefixed by "u"; match what
2190 		 * follows the "u".  If that fails, try it as an SUS
2191 		 * integer type.
2192 		 */
2193 		m->type = get_type(type_tbl, l + 1, &l);
2194 		if (m->type == FILE_INVALID) {
2195 			/*
2196 			 * Not a keyword type; parse it as an SUS type,
2197 			 * 'u' possibly followed by a number or C/S/L.
2198 			 */
2199 			m->type = get_standard_integer_type(l, &l);
2200 		}
2201 		/* It's unsigned. */
2202 		if (m->type != FILE_INVALID)
2203 			m->flag |= UNSIGNED;
2204 	} else {
2205 		/*
2206 		 * Try it as a keyword type.  If that fails, try it as
2207 		 * an SUS integer type if it begins with "d" or as an
2208 		 * SUS string type if it begins with "s".  In any case,
2209 		 * it's not unsigned.
2210 		 */
2211 		m->type = get_type(type_tbl, l, &l);
2212 		if (m->type == FILE_INVALID) {
2213 			/*
2214 			 * Not a keyword type; parse it as an SUS type,
2215 			 * either 'd' possibly followed by a number or
2216 			 * C/S/L, or just 's'.
2217 			 */
2218 			if (*l == 'd')
2219 				m->type = get_standard_integer_type(l, &l);
2220 			else if (*l == 's'
2221 			    && !isalpha(CAST(unsigned char, l[1]))) {
2222 				m->type = FILE_STRING;
2223 				++l;
2224 			}
2225 		}
2226 	}
2227 
2228 	if (m->type == FILE_INVALID) {
2229 		/* Not found - try it as a special keyword. */
2230 		m->type = get_type(special_tbl, l, &l);
2231 	}
2232 
2233 	if (m->type == FILE_INVALID) {
2234 		if (ms->flags & MAGIC_CHECK)
2235 			file_magwarn(ms, "type `%s' invalid", l);
2236 		return -1;
2237 	}
2238 
2239 	if (m->type == FILE_NAME && cont_level != 0) {
2240 		if (ms->flags & MAGIC_CHECK)
2241 			file_magwarn(ms, "`name%s' entries can only be "
2242 			    "declared at top level", l);
2243 		return -1;
2244 	}
2245 
2246 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2247 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2248 
2249 	m->mask_op = 0;
2250 	if (*l == '~') {
2251 		if (!IS_LIBMAGIC_STRING(m->type))
2252 			m->mask_op |= FILE_OPINVERSE;
2253 		else if (ms->flags & MAGIC_CHECK)
2254 			file_magwarn(ms, "'~' invalid for string types");
2255 		++l;
2256 	}
2257 	m->str_range = 0;
2258 	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2259 	if ((op = get_op(*l)) != -1) {
2260 		if (IS_LIBMAGIC_STRING(m->type)) {
2261 			int r;
2262 
2263 			if (op != FILE_OPDIVIDE) {
2264 				if (ms->flags & MAGIC_CHECK)
2265 					file_magwarn(ms,
2266 					    "invalid string/indirect op: "
2267 					    "`%c'", *t);
2268 				return -1;
2269 			}
2270 
2271 			if (m->type == FILE_INDIRECT)
2272 				r = parse_indirect_modifier(ms, m, &l);
2273 			else
2274 				r = parse_string_modifier(ms, m, &l);
2275 			if (r == -1)
2276 				return -1;
2277 		} else
2278 			parse_op_modifier(ms, m, &l, op);
2279 	}
2280 
2281 	/*
2282 	 * We used to set mask to all 1's here, instead let's just not do
2283 	 * anything if mask = 0 (unless you have a better idea)
2284 	 */
2285 	EATAB;
2286 
2287 	switch (*l) {
2288 	case '>':
2289 	case '<':
2290   		m->reln = *l;
2291   		++l;
2292 		if (*l == '=') {
2293 			if (ms->flags & MAGIC_CHECK) {
2294 				file_magwarn(ms, "%c= not supported",
2295 				    m->reln);
2296 				return -1;
2297 			}
2298 		   ++l;
2299 		}
2300 		break;
2301 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
2302 	case '&':
2303 	case '^':
2304 	case '=':
2305   		m->reln = *l;
2306   		++l;
2307 		if (*l == '=') {
2308 		   /* HP compat: ignore &= etc. */
2309 		   ++l;
2310 		}
2311 		break;
2312 	case '!':
2313 		m->reln = *l;
2314 		++l;
2315 		break;
2316 	default:
2317   		m->reln = '=';	/* the default relation */
2318 		if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
2319 		    isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2320 			m->reln = *l;
2321 			++l;
2322 		}
2323 		break;
2324 	}
2325 	/*
2326 	 * Grab the value part, except for an 'x' reln.
2327 	 */
2328 	if (m->reln != 'x' && getvalue(ms, m, &l, action))
2329 		return -1;
2330 
2331 	/*
2332 	 * TODO finish this macro and start using it!
2333 	 * #define offsetcheck {if (offset > ms->bytes_max -1)
2334 	 *	magwarn("offset too big"); }
2335 	 */
2336 
2337 	/*
2338 	 * Now get last part - the description
2339 	 */
2340 	EATAB;
2341 	if (l[0] == '\b') {
2342 		++l;
2343 		m->flag |= NOSPACE;
2344 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
2345 		++l;
2346 		++l;
2347 		m->flag |= NOSPACE;
2348 	}
2349 	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2350 		continue;
2351 	if (i == sizeof(m->desc)) {
2352 		m->desc[sizeof(m->desc) - 1] = '\0';
2353 		if (ms->flags & MAGIC_CHECK)
2354 			file_magwarn(ms, "description `%s' truncated", m->desc);
2355 	}
2356 
2357         /*
2358 	 * We only do this check while compiling, or if any of the magic
2359 	 * files were not compiled.
2360          */
2361         if (ms->flags & MAGIC_CHECK) {
2362 		if (check_format(ms, m) == -1)
2363 			return -1;
2364 	}
2365 #ifndef COMPILE_ONLY
2366 	if (action == FILE_CHECK) {
2367 		file_mdump(m);
2368 	}
2369 #endif
2370 	m->mimetype[0] = '\0';		/* initialise MIME type to none */
2371 	return 0;
2372 }
2373 
2374 /*
2375  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2376  * if valid
2377  */
2378 /*ARGSUSED*/
2379 private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2380 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
2381     size_t len __attribute__((__unused__)))
2382 {
2383 	const char *l = line;
2384 	char *el;
2385 	unsigned long factor;
2386 	struct magic *m = &me->mp[0];
2387 
2388 	if (m->factor_op != FILE_FACTOR_OP_NONE) {
2389 		file_magwarn(ms,
2390 		    "Current entry already has a strength type: %c %d",
2391 		    m->factor_op, m->factor);
2392 		return -1;
2393 	}
2394 	if (m->type == FILE_NAME) {
2395 		file_magwarn(ms, "%s: Strength setting is not supported in "
2396 		    "\"name\" magic entries", m->value.s);
2397 		return -1;
2398 	}
2399 	EATAB;
2400 	switch (*l) {
2401 	case FILE_FACTOR_OP_NONE:
2402 	case FILE_FACTOR_OP_PLUS:
2403 	case FILE_FACTOR_OP_MINUS:
2404 	case FILE_FACTOR_OP_TIMES:
2405 	case FILE_FACTOR_OP_DIV:
2406 		m->factor_op = *l++;
2407 		break;
2408 	default:
2409 		file_magwarn(ms, "Unknown factor op `%c'", *l);
2410 		return -1;
2411 	}
2412 	EATAB;
2413 	factor = strtoul(l, &el, 0);
2414 	if (factor > 255) {
2415 		file_magwarn(ms, "Too large factor `%lu'", factor);
2416 		goto out;
2417 	}
2418 	if (*el && !isspace(CAST(unsigned char, *el))) {
2419 		file_magwarn(ms, "Bad factor `%s'", l);
2420 		goto out;
2421 	}
2422 	m->factor = CAST(uint8_t, factor);
2423 	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2424 		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2425 		    m->factor_op, m->factor);
2426 		goto out;
2427 	}
2428 	return 0;
2429 out:
2430 	m->factor_op = FILE_FACTOR_OP_NONE;
2431 	m->factor = 0;
2432 	return -1;
2433 }
2434 
2435 private int
goodchar(unsigned char x,const char * extra)2436 goodchar(unsigned char x, const char *extra)
2437 {
2438 	return (isascii(x) && isalnum(x)) || strchr(extra, x);
2439 }
2440 
2441 private int
parse_extra(struct magic_set * ms,struct magic_entry * me,const char * line,size_t llen,zend_off_t off,size_t len,const char * name,const char * extra,int nt)2442 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2443     size_t llen, zend_off_t off, size_t len, const char *name, const char *extra, int nt)
2444 {
2445 	size_t i;
2446 	const char *l = line;
2447 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2448 	char *buf = CAST(char *, CAST(void *, m)) + off;
2449 
2450 	if (buf[0] != '\0') {
2451 		len = nt ? strlen(buf) : len;
2452 		file_magwarn(ms, "Current entry already has a %s type "
2453 		    "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2454 		return -1;
2455 	}
2456 
2457 	if (*m->desc == '\0') {
2458 		file_magwarn(ms, "Current entry does not yet have a "
2459 		    "description for adding a %s type", name);
2460 		return -1;
2461 	}
2462 
2463 	EATAB;
2464 	for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
2465 	    buf[i++] = *l++)
2466 		continue;
2467 
2468 	if (i == len && *l) {
2469 		if (nt)
2470 			buf[len - 1] = '\0';
2471 		if (ms->flags & MAGIC_CHECK)
2472 			file_magwarn(ms, "%s type `%s' truncated %"
2473 			    SIZE_T_FORMAT "u", name, line, i);
2474 	} else {
2475 		if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2476 			file_magwarn(ms, "%s type `%s' has bad char '%c'",
2477 			    name, line, *l);
2478 		if (nt)
2479 			buf[i] = '\0';
2480 	}
2481 
2482 	if (i > 0)
2483 		return 0;
2484 
2485 	file_magerror(ms, "Bad magic entry '%s'", line);
2486 	return -1;
2487 }
2488 
2489 /*
2490  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2491  * magic[index - 1]
2492  */
2493 private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2494 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
2495     size_t len)
2496 {
2497 	return parse_extra(ms, me, line, len,
2498 	    CAST(off_t, offsetof(struct magic, apple)),
2499 	    sizeof(me->mp[0].apple), "APPLE", "!+-./?", 0);
2500 }
2501 
2502 /*
2503  * Parse a comma-separated list of extensions
2504  */
2505 private int
parse_ext(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2506 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
2507     size_t len)
2508 {
2509 	return parse_extra(ms, me, line, len,
2510 	    CAST(off_t, offsetof(struct magic, ext)),
2511 	    sizeof(me->mp[0].ext), "EXTENSION", ",!+-/@?_$&", 0); /* & for b&w */
2512 }
2513 
2514 /*
2515  * parse a MIME annotation line from magic file, put into magic[index - 1]
2516  * if valid
2517  */
2518 private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2519 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
2520     size_t len)
2521 {
2522 	return parse_extra(ms, me, line, len,
2523 	    CAST(off_t, offsetof(struct magic, mimetype)),
2524 	    sizeof(me->mp[0].mimetype), "MIME", "+-/.$?:{}", 1);
2525 }
2526 
2527 private int
check_format_type(const char * ptr,int type,const char ** estr)2528 check_format_type(const char *ptr, int type, const char **estr)
2529 {
2530 	int quad = 0, h;
2531 	size_t len, cnt;
2532 	if (*ptr == '\0') {
2533 		/* Missing format string; bad */
2534 		*estr = "missing format spec";
2535 		return -1;
2536 	}
2537 
2538 	switch (file_formats[type]) {
2539 	case FILE_FMT_QUAD:
2540 		quad = 1;
2541 		/*FALLTHROUGH*/
2542 	case FILE_FMT_NUM:
2543 		if (quad == 0) {
2544 			switch (type) {
2545 			case FILE_BYTE:
2546 				h = 2;
2547 				break;
2548 			case FILE_SHORT:
2549 			case FILE_BESHORT:
2550 			case FILE_LESHORT:
2551 				h = 1;
2552 				break;
2553 			case FILE_LONG:
2554 			case FILE_BELONG:
2555 			case FILE_LELONG:
2556 			case FILE_MELONG:
2557 			case FILE_LEID3:
2558 			case FILE_BEID3:
2559 			case FILE_INDIRECT:
2560 				h = 0;
2561 				break;
2562 			default:
2563 				fprintf(stderr, "Bad number format %d", type);
2564 				abort();
2565 			}
2566 		} else
2567 			h = 0;
2568 		while (*ptr && strchr("-.#", *ptr) != NULL)
2569 			ptr++;
2570 #define CHECKLEN() do { \
2571 	for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
2572 		len = len * 10 + (*ptr - '0'); \
2573 	if (cnt > 5 || len > 1024) \
2574 		goto toolong; \
2575 } while (/*CONSTCOND*/0)
2576 
2577 		CHECKLEN();
2578 		if (*ptr == '.')
2579 			ptr++;
2580 		CHECKLEN();
2581 		if (quad) {
2582 			if (*ptr++ != 'l')
2583 				goto invalid;
2584 			if (*ptr++ != 'l')
2585 				goto invalid;
2586 		}
2587 
2588 		switch (*ptr++) {
2589 #ifdef STRICT_FORMAT 	/* "long" formats are int formats for us */
2590 		/* so don't accept the 'l' modifier */
2591 		case 'l':
2592 			switch (*ptr++) {
2593 			case 'i':
2594 			case 'd':
2595 			case 'u':
2596 			case 'o':
2597 			case 'x':
2598 			case 'X':
2599 				if (h == 0)
2600 					return 0;
2601 				/*FALLTHROUGH*/
2602 			default:
2603 				goto invalid;
2604 			}
2605 
2606 		/*
2607 		 * Don't accept h and hh modifiers. They make writing
2608 		 * magic entries more complicated, for very little benefit
2609 		 */
2610 		case 'h':
2611 			if (h-- <= 0)
2612 				goto invalid;
2613 			switch (*ptr++) {
2614 			case 'h':
2615 				if (h-- <= 0)
2616 					goto invalid;
2617 				switch (*ptr++) {
2618 				case 'i':
2619 				case 'd':
2620 				case 'u':
2621 				case 'o':
2622 				case 'x':
2623 				case 'X':
2624 					return 0;
2625 				default:
2626 					goto invalid;
2627 				}
2628 			case 'i':
2629 			case 'd':
2630 			case 'u':
2631 			case 'o':
2632 			case 'x':
2633 			case 'X':
2634 				if (h == 0)
2635 					return 0;
2636 				/*FALLTHROUGH*/
2637 			default:
2638 				goto invalid;
2639 			}
2640 #endif
2641 		case 'c':
2642 			if (h == 2)
2643 				return 0;
2644 			goto invalid;
2645 		case 'i':
2646 		case 'd':
2647 		case 'u':
2648 		case 'o':
2649 		case 'x':
2650 		case 'X':
2651 #ifdef STRICT_FORMAT
2652 			if (h == 0)
2653 				return 0;
2654 			/*FALLTHROUGH*/
2655 #else
2656 			return 0;
2657 #endif
2658 		default:
2659 			goto invalid;
2660 		}
2661 
2662 	case FILE_FMT_FLOAT:
2663 	case FILE_FMT_DOUBLE:
2664 		if (*ptr == '-')
2665 			ptr++;
2666 		if (*ptr == '.')
2667 			ptr++;
2668 		CHECKLEN();
2669 		if (*ptr == '.')
2670 			ptr++;
2671 		CHECKLEN();
2672 		switch (*ptr++) {
2673 		case 'e':
2674 		case 'E':
2675 		case 'f':
2676 		case 'F':
2677 		case 'g':
2678 		case 'G':
2679 			return 0;
2680 
2681 		default:
2682 			goto invalid;
2683 		}
2684 
2685 
2686 	case FILE_FMT_STR:
2687 		if (*ptr == '-')
2688 			ptr++;
2689 		while (isdigit(CAST(unsigned char, *ptr)))
2690 			ptr++;
2691 		if (*ptr == '.') {
2692 			ptr++;
2693 			while (isdigit(CAST(unsigned char , *ptr)))
2694 				ptr++;
2695 		}
2696 
2697 		switch (*ptr++) {
2698 		case 's':
2699 			return 0;
2700 		default:
2701 			goto invalid;
2702 		}
2703 
2704 	default:
2705 		/* internal error */
2706 		fprintf(stderr, "Bad file format %d", type);
2707 		abort();
2708 	}
2709 invalid:
2710 	*estr = "not valid";
2711 toolong:
2712 	*estr = "too long";
2713 	return -1;
2714 }
2715 
2716 /*
2717  * Check that the optional printf format in description matches
2718  * the type of the magic.
2719  */
2720 private int
check_format(struct magic_set * ms,struct magic * m)2721 check_format(struct magic_set *ms, struct magic *m)
2722 {
2723 	char *ptr;
2724 	const char *estr;
2725 
2726 	for (ptr = m->desc; *ptr; ptr++)
2727 		if (*ptr == '%')
2728 			break;
2729 	if (*ptr == '\0') {
2730 		/* No format string; ok */
2731 		return 1;
2732 	}
2733 
2734 	assert(file_nformats == file_nnames);
2735 
2736 	if (m->type >= file_nformats) {
2737 		file_magwarn(ms, "Internal error inconsistency between "
2738 		    "m->type and format strings");
2739 		return -1;
2740 	}
2741 	if (file_formats[m->type] == FILE_FMT_NONE) {
2742 		file_magwarn(ms, "No format string for `%s' with description "
2743 		    "`%s'", m->desc, file_names[m->type]);
2744 		return -1;
2745 	}
2746 
2747 	ptr++;
2748 	if (check_format_type(ptr, m->type, &estr) == -1) {
2749 		/*
2750 		 * TODO: this error message is unhelpful if the format
2751 		 * string is not one character long
2752 		 */
2753 		file_magwarn(ms, "Printf format is %s for type "
2754 		    "`%s' in description `%s'", estr,
2755 		    file_names[m->type], m->desc);
2756 		return -1;
2757 	}
2758 
2759 	for (; *ptr; ptr++) {
2760 		if (*ptr == '%') {
2761 			file_magwarn(ms,
2762 			    "Too many format strings (should have at most one) "
2763 			    "for `%s' with description `%s'",
2764 			    file_names[m->type], m->desc);
2765 			return -1;
2766 		}
2767 	}
2768 	return 0;
2769 }
2770 
2771 /*
2772  * Read a numeric value from a pointer, into the value union of a magic
2773  * pointer, according to the magic type.  Update the string pointer to point
2774  * just after the number read.  Return 0 for success, non-zero for failure.
2775  */
2776 private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2777 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2778 {
2779 	char *ep;
2780 	uint64_t ull;
2781 
2782 	switch (m->type) {
2783 	case FILE_BESTRING16:
2784 	case FILE_LESTRING16:
2785 	case FILE_STRING:
2786 	case FILE_PSTRING:
2787 	case FILE_REGEX:
2788 	case FILE_SEARCH:
2789 	case FILE_NAME:
2790 	case FILE_USE:
2791 	case FILE_DER:
2792 	case FILE_OCTAL:
2793 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2794 		if (*p == NULL) {
2795 			if (ms->flags & MAGIC_CHECK)
2796 				file_magwarn(ms, "cannot get string from `%s'",
2797 				    m->value.s);
2798 			return -1;
2799 		}
2800 		if (m->type == FILE_REGEX) {
2801 			zend_string *pattern;
2802 			int options = 0;
2803 			pcre_cache_entry *pce;
2804 
2805 			pattern = convert_libmagic_pattern(m->value.s, strlen(m->value.s), options);
2806 
2807 			if ((pce = pcre_get_compiled_regex_cache(pattern)) == NULL) {
2808 				zend_string_release(pattern);
2809 				return -1;
2810 			}
2811 			zend_string_release(pattern);
2812 
2813 			return 0;
2814 		}
2815 		return 0;
2816 	default:
2817 		if (m->reln == 'x')
2818 			return 0;
2819 		break;
2820 	}
2821 
2822 	switch (m->type) {
2823 	case FILE_FLOAT:
2824 	case FILE_BEFLOAT:
2825 	case FILE_LEFLOAT:
2826 		errno = 0;
2827 #ifdef HAVE_STRTOF
2828 		m->value.f = strtof(*p, &ep);
2829 #else
2830 		m->value.f = (float)strtod(*p, &ep);
2831 #endif
2832 		if (errno == 0)
2833 			*p = ep;
2834 		return 0;
2835 	case FILE_DOUBLE:
2836 	case FILE_BEDOUBLE:
2837 	case FILE_LEDOUBLE:
2838 		errno = 0;
2839 		m->value.d = strtod(*p, &ep);
2840 		if (errno == 0)
2841 			*p = ep;
2842 		return 0;
2843 	case FILE_GUID:
2844 		if (file_parse_guid(*p, m->value.guid) == -1)
2845 			return -1;
2846 		*p += FILE_GUID_SIZE - 1;
2847 		return 0;
2848 	default:
2849 		errno = 0;
2850 		ull = CAST(uint64_t, strtoull(*p, &ep, 0));
2851 		m->value.q = file_signextend(ms, m, ull);
2852 		if (*p == ep) {
2853 			file_magwarn(ms, "Unparsable number `%s'", *p);
2854 		} else {
2855 			size_t ts = typesize(m->type);
2856 			uint64_t x;
2857 			const char *q;
2858 
2859 			if (ts == FILE_BADSIZE) {
2860 				file_magwarn(ms,
2861 				    "Expected numeric type got `%s'",
2862 				    type_tbl[m->type].name);
2863 			}
2864 			for (q = *p; isspace(CAST(unsigned char, *q)); q++)
2865 				continue;
2866 			if (*q == '-')
2867 				ull = -CAST(int64_t, ull);
2868 			switch (ts) {
2869 			case 1:
2870 				x = CAST(uint64_t, ull & ~0xffULL);
2871 				break;
2872 			case 2:
2873 				x = CAST(uint64_t, ull & ~0xffffULL);
2874 				break;
2875 			case 4:
2876 				x = CAST(uint64_t, ull & ~0xffffffffULL);
2877 				break;
2878 			case 8:
2879 				x = 0;
2880 				break;
2881 			default:
2882 				fprintf(stderr, "Bad width %zu", ts);
2883 				abort();
2884 			}
2885 			if (x) {
2886 				file_magwarn(ms, "Overflow for numeric"
2887 				    " type `%s' value %#" PRIx64,
2888 				    type_tbl[m->type].name, ull);
2889 			}
2890 		}
2891 		if (errno == 0) {
2892 			*p = ep;
2893 			eatsize(p);
2894 		}
2895 		return 0;
2896 	}
2897 }
2898 
2899 /*
2900  * Convert a string containing C character escapes.  Stop at an unescaped
2901  * space or tab.
2902  * Copy the converted version to "m->value.s", and the length in m->vallen.
2903  * Return updated scan pointer as function result. Warn if set.
2904  */
2905 private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2906 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2907 {
2908 	const char *origs = s;
2909 	char	*p = m->value.s;
2910 	size_t  plen = sizeof(m->value.s);
2911 	char 	*origp = p;
2912 	char	*pmax = p + plen - 1;
2913 	int	c;
2914 	int	val;
2915 	size_t	bracket_nesting = 0;
2916 
2917 	while ((c = *s++) != '\0') {
2918 		if (isspace(CAST(unsigned char, c)))
2919 			break;
2920 		if (p >= pmax) {
2921 			file_error(ms, 0, "string too long: `%s'", origs);
2922 			return NULL;
2923 		}
2924 		if (c != '\\') {
2925 		    if (c == '[') {
2926 			    bracket_nesting++;
2927 		    }
2928 		    if (c == ']' && bracket_nesting > 0) {
2929 			    bracket_nesting--;
2930 		    }
2931 		    *p++ = CAST(char, c);
2932 		    continue;
2933 		}
2934 		switch(c = *s++) {
2935 
2936 		case '\0':
2937 			if (warn)
2938 				file_magwarn(ms, "incomplete escape");
2939 			s--;
2940 			goto out;
2941 		case '.':
2942 			if (m->type == FILE_REGEX &&
2943 			    bracket_nesting == 0 && warn) {
2944 				file_magwarn(ms, "escaped dot ('.') found, "
2945 				    "use \\\\. instead");
2946 			}
2947 			warn = 0; /* already did */
2948 			/*FALLTHROUGH*/
2949 		case '\t':
2950 			if (warn) {
2951 				file_magwarn(ms,
2952 				    "escaped tab found, use \\\\t instead");
2953 				warn = 0;	/* already did */
2954 			}
2955 			/*FALLTHROUGH*/
2956 		default:
2957 			if (warn) {
2958 				if (isprint(CAST(unsigned char, c))) {
2959 					/* Allow escaping of
2960 					 * ``relations'' */
2961 					if (strchr("<>&^=!", c) == NULL
2962 					    && (m->type != FILE_REGEX ||
2963 					    strchr("[]().*?^$|{}", c)
2964 					    == NULL)) {
2965 						file_magwarn(ms, "no "
2966 						    "need to escape "
2967 						    "`%c'", c);
2968 					}
2969 				} else {
2970 					file_magwarn(ms,
2971 					    "unknown escape sequence: "
2972 					    "\\%03o", c);
2973 				}
2974 			}
2975 			/*FALLTHROUGH*/
2976 		/* space, perhaps force people to use \040? */
2977 		case ' ':
2978 #if 0
2979 		/*
2980 		 * Other things people escape, but shouldn't need to,
2981 		 * so we disallow them
2982 		 */
2983 		case '\'':
2984 		case '"':
2985 		case '?':
2986 #endif
2987 		/* Relations */
2988 		case '>':
2989 		case '<':
2990 		case '&':
2991 		case '^':
2992 		case '=':
2993 		case '!':
2994 		/* and backslash itself */
2995 		case '\\':
2996 			*p++ = CAST(char, c);
2997 			break;
2998 
2999 		case 'a':
3000 			*p++ = '\a';
3001 			break;
3002 
3003 		case 'b':
3004 			*p++ = '\b';
3005 			break;
3006 
3007 		case 'f':
3008 			*p++ = '\f';
3009 			break;
3010 
3011 		case 'n':
3012 			*p++ = '\n';
3013 			break;
3014 
3015 		case 'r':
3016 			*p++ = '\r';
3017 			break;
3018 
3019 		case 't':
3020 			*p++ = '\t';
3021 			break;
3022 
3023 		case 'v':
3024 			*p++ = '\v';
3025 			break;
3026 
3027 		/* \ and up to 3 octal digits */
3028 		case '0':
3029 		case '1':
3030 		case '2':
3031 		case '3':
3032 		case '4':
3033 		case '5':
3034 		case '6':
3035 		case '7':
3036 			val = c - '0';
3037 			c = *s++;  /* try for 2 */
3038 			if (c >= '0' && c <= '7') {
3039 				val = (val << 3) | (c - '0');
3040 				c = *s++;  /* try for 3 */
3041 				if (c >= '0' && c <= '7')
3042 					val = (val << 3) | (c-'0');
3043 				else
3044 					--s;
3045 			}
3046 			else
3047 				--s;
3048 			*p++ = CAST(char, val);
3049 			break;
3050 
3051 		/* \x and up to 2 hex digits */
3052 		case 'x':
3053 			val = 'x';	/* Default if no digits */
3054 			c = hextoint(*s++);	/* Get next char */
3055 			if (c >= 0) {
3056 				val = c;
3057 				c = hextoint(*s++);
3058 				if (c >= 0)
3059 					val = (val << 4) + c;
3060 				else
3061 					--s;
3062 			} else
3063 				--s;
3064 			*p++ = CAST(char, val);
3065 			break;
3066 		}
3067 	}
3068 	--s;
3069 out:
3070 	*p = '\0';
3071 	m->vallen = CAST(unsigned char, (p - origp));
3072 	if (m->type == FILE_PSTRING) {
3073 		size_t l =  file_pstring_length_size(ms, m);
3074 		if (l == FILE_BADSIZE)
3075 			return NULL;
3076 		m->vallen += CAST(unsigned char, l);
3077 	}
3078 	return s;
3079 }
3080 
3081 
3082 /* Single hex char to int; -1 if not a hex char. */
3083 private int
hextoint(int c)3084 hextoint(int c)
3085 {
3086 	if (!isascii(CAST(unsigned char, c)))
3087 		return -1;
3088 	if (isdigit(CAST(unsigned char, c)))
3089 		return c - '0';
3090 	if ((c >= 'a') && (c <= 'f'))
3091 		return c + 10 - 'a';
3092 	if (( c>= 'A') && (c <= 'F'))
3093 		return c + 10 - 'A';
3094 	return -1;
3095 }
3096 
3097 
3098 /*
3099  * Print a string containing C character escapes.
3100  */
3101 protected void
file_showstr(FILE * fp,const char * s,size_t len)3102 file_showstr(FILE *fp, const char *s, size_t len)
3103 {
3104 	char	c;
3105 
3106 	for (;;) {
3107 		if (len == FILE_BADSIZE) {
3108 			c = *s++;
3109 			if (c == '\0')
3110 				break;
3111 		}
3112 		else  {
3113 			if (len-- == 0)
3114 				break;
3115 			c = *s++;
3116 		}
3117 		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
3118 			(void) fputc(c, fp);
3119 		else {
3120 			(void) fputc('\\', fp);
3121 			switch (c) {
3122 			case '\a':
3123 				(void) fputc('a', fp);
3124 				break;
3125 
3126 			case '\b':
3127 				(void) fputc('b', fp);
3128 				break;
3129 
3130 			case '\f':
3131 				(void) fputc('f', fp);
3132 				break;
3133 
3134 			case '\n':
3135 				(void) fputc('n', fp);
3136 				break;
3137 
3138 			case '\r':
3139 				(void) fputc('r', fp);
3140 				break;
3141 
3142 			case '\t':
3143 				(void) fputc('t', fp);
3144 				break;
3145 
3146 			case '\v':
3147 				(void) fputc('v', fp);
3148 				break;
3149 
3150 			default:
3151 				(void) fprintf(fp, "%.3o", c & 0377);
3152 				break;
3153 			}
3154 		}
3155 	}
3156 }
3157 
3158 /*
3159  * eatsize(): Eat the size spec from a number [eg. 10UL]
3160  */
3161 private void
eatsize(const char ** p)3162 eatsize(const char **p)
3163 {
3164 	const char *l = *p;
3165 
3166 	if (LOWCASE(*l) == 'u')
3167 		l++;
3168 
3169 	switch (LOWCASE(*l)) {
3170 	case 'l':    /* long */
3171 	case 's':    /* short */
3172 	case 'h':    /* short */
3173 	case 'b':    /* char/byte */
3174 	case 'c':    /* char/byte */
3175 		l++;
3176 		/*FALLTHROUGH*/
3177 	default:
3178 		break;
3179 	}
3180 
3181 	*p = l;
3182 }
3183 
3184 /*
3185  * handle a compiled file.
3186  */
3187 
3188 private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)3189 apprentice_map(struct magic_set *ms, const char *fn)
3190 {
3191 	uint32_t *ptr;
3192 	uint32_t version, entries = 0, nentries;
3193 	int needsbyteswap;
3194 	char *dbname = NULL;
3195 	struct magic_map *map;
3196 	size_t i;
3197 	php_stream *stream = NULL;
3198 	php_stream_statbuf st;
3199 
3200 
3201 
3202 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
3203 		file_oomem(ms, sizeof(*map));
3204 		return NULL;
3205 	}
3206 
3207 	if (fn == NULL) {
3208 		map->p = (void *)&php_magic_database;
3209 		goto internal_loaded;
3210 	}
3211 
3212 #ifdef PHP_WIN32
3213 	/* Don't bother on windows with php_stream_open_wrapper,
3214 	return to give apprentice_load() a chance. */
3215 	if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
3216                if (st.sb.st_mode & S_IFDIR) {
3217                        goto error;
3218                }
3219        }
3220 #endif
3221 
3222 	dbname = mkdbname(ms, fn, 0);
3223 	if (dbname == NULL)
3224 		goto error;
3225 
3226 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
3227 
3228 	if (!stream) {
3229 		goto error;
3230 	}
3231 
3232 #ifndef PHP_WIN32
3233 	if (php_stream_stat(stream, &st) < 0) {
3234 		file_error(ms, errno, "cannot stat `%s'", dbname);
3235 		goto error;
3236 	}
3237 #endif
3238 	if (st.sb.st_size < 8 || st.sb.st_size > maxoff_t()) {
3239 		file_error(ms, 0, "file `%s' is too %s", dbname,
3240 		    st.sb.st_size < 8 ? "small" : "large");
3241 		goto error;
3242 	}
3243 
3244 	map->type = MAP_TYPE_MALLOC;
3245 	map->len = CAST(size_t, st.sb.st_size);
3246 	map->p = CAST(void *, emalloc(map->len));
3247 
3248 	if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
3249 		file_badread(ms);
3250 		goto error;
3251 	}
3252 
3253 	php_stream_close(stream);
3254 	stream = NULL;
3255 
3256 internal_loaded:
3257 	ptr = (uint32_t *)(void *)map->p;
3258 	if (*ptr != MAGICNO) {
3259 		if (swap4(*ptr) != MAGICNO) {
3260 			file_error(ms, 0, "bad magic in `%s'", dbname);
3261 			goto error;
3262 		}
3263 		needsbyteswap = 1;
3264 	} else
3265 		needsbyteswap = 0;
3266 	if (needsbyteswap)
3267 		version = swap4(ptr[1]);
3268 	else
3269 		version = ptr[1];
3270 	if (version != VERSIONNO) {
3271 		file_error(ms, 0, "File %d supports only version %d magic "
3272 		    "files. `%s' is version %d", MAGIC_VERSION,
3273 		    VERSIONNO, dbname, version);
3274 		goto error;
3275 	}
3276 
3277 	/* php_magic_database is a const, performing writes will segfault. This is for big-endian
3278 	machines only, PPC and Sparc specifically. Consider static variable or MINIT in
3279 	future. */
3280 	if (needsbyteswap && fn == NULL) {
3281 		map->p = emalloc(sizeof(php_magic_database));
3282 		map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
3283 	}
3284 
3285 	if (NULL != fn) {
3286 		nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
3287 		entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
3288 		if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
3289 			file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
3290 				dbname, (unsigned long long)st.sb.st_size,
3291 				sizeof(struct magic));
3292 			goto error;
3293 		}
3294 	}
3295 	map->magic[0] = CAST(struct magic *, map->p) + 1;
3296 	nentries = 0;
3297 	for (i = 0; i < MAGIC_SETS; i++) {
3298 		if (needsbyteswap)
3299 			map->nmagic[i] = swap4(ptr[i + 2]);
3300 		else
3301 			map->nmagic[i] = ptr[i + 2];
3302 		if (i != MAGIC_SETS - 1)
3303 			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3304 		nentries += map->nmagic[i];
3305 	}
3306 	if (NULL != fn && entries != nentries + 1) {
3307 		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3308 		    dbname, entries, nentries + 1);
3309 		goto error;
3310 	}
3311 	if (needsbyteswap)
3312 		for (i = 0; i < MAGIC_SETS; i++)
3313 			byteswap(map->magic[i], map->nmagic[i]);
3314 
3315 	if (dbname) {
3316 		efree(dbname);
3317 	}
3318 	return map;
3319 
3320 error:
3321 	if (stream) {
3322 		php_stream_close(stream);
3323 	}
3324 	apprentice_unmap(map);
3325 	if (dbname) {
3326 		efree(dbname);
3327 	}
3328 	return NULL;
3329 }
3330 
3331 /*
3332  * handle an mmaped file.
3333  */
3334 private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)3335 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3336 {
3337 	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3338 	static const size_t m = sizeof(**map->magic);
3339 	php_stream *stream;
3340 	size_t len;
3341 	char *dbname;
3342 	int rv = -1;
3343 	uint32_t i;
3344 	union {
3345 		struct magic m;
3346 		uint32_t h[2 + MAGIC_SETS];
3347 	} hdr;
3348 
3349 	dbname = mkdbname(ms, fn, 1);
3350 
3351 	if (dbname == NULL)
3352 		goto out;
3353 
3354 	/* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
3355 	stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
3356 
3357 	if (!stream) {
3358 		file_error(ms, errno, "cannot open `%s'", dbname);
3359 		goto out;
3360 	}
3361 	memset(&hdr, 0, sizeof(hdr));
3362 	hdr.h[0] = MAGICNO;
3363 	hdr.h[1] = VERSIONNO;
3364 	memcpy(hdr.h + 2, map->nmagic, nm);
3365 
3366 	if (php_stream_write(stream,(const char *)&hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
3367 		file_error(ms, errno, "error writing `%s'", dbname);
3368 		goto out;
3369 	}
3370 
3371 	for (i = 0; i < MAGIC_SETS; i++) {
3372 		len = m * map->nmagic[i];
3373 		if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
3374 			file_error(ms, errno, "error writing `%s'", dbname);
3375 			goto out;
3376 		}
3377 	}
3378 
3379 	rv = 0;
3380 	if (stream) {
3381 		php_stream_close(stream);
3382 	}
3383 out:
3384 	efree(dbname);
3385 	return rv;
3386 }
3387 
3388 private const char ext[] = ".mgc";
3389 /*
3390  * make a dbname
3391  */
3392 private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)3393 mkdbname(struct magic_set *ms, const char *fn, int strip)
3394 {
3395 	const char *p, *q;
3396 	char *buf;
3397 
3398 	if (strip) {
3399 		if ((p = strrchr(fn, '/')) != NULL)
3400 			fn = ++p;
3401 	}
3402 
3403 	for (q = fn; *q; q++)
3404 		continue;
3405 	/* Look for .mgc */
3406 	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3407 		if (*p != *q)
3408 			break;
3409 
3410 	/* Did not find .mgc, restore q */
3411 	if (p >= ext)
3412 		while (*q)
3413 			q++;
3414 
3415 	q++;
3416 	/* Compatibility with old code that looked in .mime */
3417 	if (ms->flags & MAGIC_MIME) {
3418 		spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", CAST(int, q - fn), fn, ext);
3419 #ifdef PHP_WIN32
3420 		if (VCWD_ACCESS(buf, R_OK) == 0) {
3421 #else
3422 		if (VCWD_ACCESS(buf, R_OK) != -1) {
3423 #endif
3424 			ms->flags &= MAGIC_MIME_TYPE;
3425 			return buf;
3426 		}
3427 		efree(buf);
3428 	}
3429 	spprintf(&buf, MAXPATHLEN, "%.*s%s", CAST(int, q - fn), fn, ext);
3430 
3431 	/* Compatibility with old code that looked in .mime */
3432 	if (strstr(fn, ".mime") != NULL)
3433 		ms->flags &= MAGIC_MIME_TYPE;
3434 	return buf;
3435 }
3436 
3437 /*
3438  * Byteswap an mmap'ed file if needed
3439  */
3440 private void
3441 byteswap(struct magic *magic, uint32_t nmagic)
3442 {
3443 	uint32_t i;
3444 	for (i = 0; i < nmagic; i++)
3445 		bs1(&magic[i]);
3446 }
3447 
3448 #if !defined(HAVE_BYTESWAP_H) && !defined(HAVE_SYS_BSWAP_H)
3449 /*
3450  * swap a short
3451  */
3452 private uint16_t
3453 swap2(uint16_t sv)
3454 {
3455 	uint16_t rv;
3456 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3457 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3458 	d[0] = s[1];
3459 	d[1] = s[0];
3460 	return rv;
3461 }
3462 
3463 /*
3464  * swap an int
3465  */
3466 private uint32_t
3467 swap4(uint32_t sv)
3468 {
3469 	uint32_t rv;
3470 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3471 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3472 	d[0] = s[3];
3473 	d[1] = s[2];
3474 	d[2] = s[1];
3475 	d[3] = s[0];
3476 	return rv;
3477 }
3478 
3479 /*
3480  * swap a quad
3481  */
3482 private uint64_t
3483 swap8(uint64_t sv)
3484 {
3485 	uint64_t rv;
3486 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3487 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3488 # if 0
3489 	d[0] = s[3];
3490 	d[1] = s[2];
3491 	d[2] = s[1];
3492 	d[3] = s[0];
3493 	d[4] = s[7];
3494 	d[5] = s[6];
3495 	d[6] = s[5];
3496 	d[7] = s[4];
3497 # else
3498 	d[0] = s[7];
3499 	d[1] = s[6];
3500 	d[2] = s[5];
3501 	d[3] = s[4];
3502 	d[4] = s[3];
3503 	d[5] = s[2];
3504 	d[6] = s[1];
3505 	d[7] = s[0];
3506 # endif
3507 	return rv;
3508 }
3509 #endif
3510 
3511 protected uintmax_t
3512 file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
3513 {
3514         uintmax_t x = 0;
3515         const unsigned char *c;
3516         if (t == FILE_LEVARINT) {
3517                 for (c = us; *c; c++) {
3518                         if ((*c & 0x80) == 0)
3519                                 break;
3520                 }
3521 		if (l)
3522 			*l = c - us + 1;
3523                 for (; c >= us; c--) {
3524                         x |= *c & 0x7f;
3525                         x <<= 7;
3526                 }
3527         } else {
3528                 for (c = us; *c; c++) {
3529 			x |= *c & 0x7f;
3530 			if ((*c & 0x80) == 0)
3531 				break;
3532 			x <<= 7;
3533                 }
3534 		if (l)
3535 			*l = c - us + 1;
3536         }
3537 	return x;
3538 }
3539 
3540 
3541 /*
3542  * byteswap a single magic entry
3543  */
3544 private void
3545 bs1(struct magic *m)
3546 {
3547 	m->cont_level = swap2(m->cont_level);
3548 	m->offset = swap4(CAST(uint32_t, m->offset));
3549 	m->in_offset = swap4(CAST(uint32_t, m->in_offset));
3550 	m->lineno = swap4(CAST(uint32_t, m->lineno));
3551 	if (IS_LIBMAGIC_STRING(m->type)) {
3552 		m->str_range = swap4(m->str_range);
3553 		m->str_flags = swap4(m->str_flags);
3554 	}
3555 	else {
3556 		m->value.q = swap8(m->value.q);
3557 		m->num_mask = swap8(m->num_mask);
3558 	}
3559 }
3560 
3561 protected size_t
3562 file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3563 {
3564 	switch (m->str_flags & PSTRING_LEN) {
3565 	case PSTRING_1_LE:
3566 		return 1;
3567 	case PSTRING_2_LE:
3568 	case PSTRING_2_BE:
3569 		return 2;
3570 	case PSTRING_4_LE:
3571 	case PSTRING_4_BE:
3572 		return 4;
3573 	default:
3574 		file_error(ms, 0, "corrupt magic file "
3575 		    "(bad pascal string length %d)",
3576 		    m->str_flags & PSTRING_LEN);
3577 		return FILE_BADSIZE;
3578 	}
3579 }
3580 protected size_t
3581 file_pstring_get_length(struct magic_set *ms, const struct magic *m,
3582     const char *ss)
3583 {
3584 	size_t len = 0;
3585 	const unsigned char *s = RCAST(const unsigned char *, ss);
3586 	unsigned int s3, s2, s1, s0;
3587 
3588 	switch (m->str_flags & PSTRING_LEN) {
3589 	case PSTRING_1_LE:
3590 		len = *s;
3591 		break;
3592 	case PSTRING_2_LE:
3593 		s0 = s[0];
3594 		s1 = s[1];
3595 		len = (s1 << 8) | s0;
3596 		break;
3597 	case PSTRING_2_BE:
3598 		s0 = s[0];
3599 		s1 = s[1];
3600 		len = (s0 << 8) | s1;
3601 		break;
3602 	case PSTRING_4_LE:
3603 		s0 = s[0];
3604 		s1 = s[1];
3605 		s2 = s[2];
3606 		s3 = s[3];
3607 		len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3608 		break;
3609 	case PSTRING_4_BE:
3610 		s0 = s[0];
3611 		s1 = s[1];
3612 		s2 = s[2];
3613 		s3 = s[3];
3614 		len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3615 		break;
3616 	default:
3617 		file_error(ms, 0, "corrupt magic file "
3618 		    "(bad pascal string length %d)",
3619 		    m->str_flags & PSTRING_LEN);
3620 		return FILE_BADSIZE;
3621 	}
3622 
3623 	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
3624 		size_t l = file_pstring_length_size(ms, m);
3625 		if (l == FILE_BADSIZE)
3626 			return l;
3627 		len -= l;
3628 	}
3629 
3630 	return len;
3631 }
3632 
3633 protected int
3634 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3635 {
3636 	uint32_t i, j;
3637 	struct mlist *mlist, *ml;
3638 
3639 	mlist = ms->mlist[1];
3640 
3641 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3642 		struct magic *ma = ml->magic;
3643 		for (i = 0; i < ml->nmagic; i++) {
3644 			if (ma[i].type != FILE_NAME)
3645 				continue;
3646 			if (strcmp(ma[i].value.s, name) == 0) {
3647 				v->magic = &ma[i];
3648 				for (j = i + 1; j < ml->nmagic; j++)
3649 				    if (ma[j].cont_level == 0)
3650 					    break;
3651 				v->nmagic = j - i;
3652 				return 0;
3653 			}
3654 		}
3655 	}
3656 	return -1;
3657 }
3658