xref: /php-src/ext/fileinfo/libmagic/apprentice.c (revision b7c5813c)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * apprentice - make one pass through /etc/magic, learning its secrets.
30  */
31 
32 #include "file.h"
33 
34 #ifndef	lint
35 FILE_RCSID("@(#)$File: apprentice.c,v 1.342 2023/07/17 14:38:35 christos Exp $")
36 #endif	/* lint */
37 
38 #include "magic.h"
39 #include <stdlib.h>
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #include <stddef.h>
44 #include <string.h>
45 #include <assert.h>
46 #include <ctype.h>
47 #include <fcntl.h>
48 #ifdef QUICK
49 #include <sys/mman.h>
50 #endif
51 #ifdef HAVE_DIRENT_H
52 #include <dirent.h>
53 #endif
54 #include <limits.h>
55 #ifdef HAVE_BYTESWAP_H
56 #include <byteswap.h>
57 #endif
58 #ifdef HAVE_SYS_BSWAP_H
59 #include <sys/bswap.h>
60 #endif
61 
62 
63 #define	EATAB {while (isascii(CAST(unsigned char, *l)) && \
64 		      isspace(CAST(unsigned char, *l)))  ++l;}
65 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
66 			tolower(CAST(unsigned char, l)) : (l))
67 /*
68  * Work around a bug in headers on Digital Unix.
69  * At least confirmed for: OSF1 V4.0 878
70  */
71 #if defined(__osf__) && defined(__DECC)
72 #ifdef MAP_FAILED
73 #undef MAP_FAILED
74 #endif
75 #endif
76 
77 #ifndef MAP_FAILED
78 #define MAP_FAILED (void *) -1
79 #endif
80 
81 #ifndef MAP_FILE
82 #define MAP_FILE 0
83 #endif
84 
85 #define ALLOC_CHUNK	CAST(size_t, 10)
86 #define ALLOC_INCR	CAST(size_t, 200)
87 
88 #define MAP_TYPE_USER	0
89 #define MAP_TYPE_MALLOC	1
90 #define MAP_TYPE_MMAP	2
91 
92 struct magic_entry {
93 	struct magic *mp;
94 	uint32_t cont_count;
95 	uint32_t max_count;
96 };
97 
98 struct magic_entry_set {
99 	struct magic_entry *me;
100 	uint32_t count;
101 	uint32_t max;
102 };
103 
104 struct magic_map {
105 	void *p;
106 	size_t len;
107 	int type;
108 	struct magic *magic[MAGIC_SETS];
109 	uint32_t nmagic[MAGIC_SETS];
110 };
111 
112 int file_formats[FILE_NAMES_SIZE];
113 const size_t file_nformats = FILE_NAMES_SIZE;
114 const char *file_names[FILE_NAMES_SIZE];
115 const size_t file_nnames = FILE_NAMES_SIZE;
116 
117 file_private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
118 file_private int hextoint(int);
119 file_private const char *getstr(struct magic_set *, struct magic *, const char *,
120     int);
121 file_private int parse(struct magic_set *, struct magic_entry *, const char *,
122     size_t, int);
123 file_private void eatsize(const char **);
124 file_private int apprentice_1(struct magic_set *, const char *, int);
125 file_private ssize_t apprentice_magic_strength_1(const struct magic *);
126 file_private int apprentice_sort(const void *, const void *);
127 file_private void apprentice_list(struct mlist *, int );
128 file_private struct magic_map *apprentice_load(struct magic_set *,
129     const char *, int);
130 file_private struct mlist *mlist_alloc(void);
131 file_private void mlist_free_all(struct magic_set *);
132 file_private void mlist_free(struct mlist *);
133 file_private void byteswap(struct magic *, uint32_t);
134 file_private void bs1(struct magic *);
135 
136 #if defined(HAVE_BYTESWAP_H)
137 #define swap2(x)	bswap_16(x)
138 #define swap4(x)	bswap_32(x)
139 #define swap8(x)	bswap_64(x)
140 #elif defined(HAVE_SYS_BSWAP_H)
141 #define swap2(x)	bswap16(x)
142 #define swap4(x)	bswap32(x)
143 #define swap8(x)	bswap64(x)
144 #else
145 file_private uint16_t swap2(uint16_t);
146 file_private uint32_t swap4(uint32_t);
147 file_private uint64_t swap8(uint64_t);
148 #endif
149 
150 file_private char *mkdbname(struct magic_set *, const char *, int);
151 file_private struct magic_map *apprentice_map(struct magic_set *, const char *);
152 file_private void apprentice_unmap(struct magic_map *);
153 file_private int apprentice_compile(struct magic_set *, struct magic_map *,
154     const char *);
155 file_private int check_format_type(const char *, int, const char **);
156 file_private int check_format(struct magic_set *, struct magic *);
157 file_private int get_op(char);
158 file_private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
159     size_t);
160 file_private int parse_strength(struct magic_set *, struct magic_entry *,
161     const char *, size_t);
162 file_private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
163     size_t);
164 file_private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
165     size_t);
166 
167 
168 file_private size_t magicsize = sizeof(struct magic);
169 
170 file_private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
171 
172 file_private struct {
173 	const char *name;
174 	size_t len;
175 	int (*fun)(struct magic_set *, struct magic_entry *, const char *,
176 	    size_t);
177 } bang[] = {
178 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
179 	DECLARE_FIELD(mime),
180 	DECLARE_FIELD(apple),
181 	DECLARE_FIELD(ext),
182 	DECLARE_FIELD(strength),
183 #undef	DECLARE_FIELD
184 	{ NULL, 0, NULL }
185 };
186 
187 #include "../data_file.c"
188 
189 #ifdef COMPILE_ONLY
190 
191 int main(int, char *[]);
192 
193 int
main(int argc,char * argv[])194 main(int argc, char *argv[])
195 {
196 	int ret;
197 	struct magic_set *ms;
198 	char *progname;
199 
200 	if ((progname = strrchr(argv[0], '/')) != NULL)
201 		progname++;
202 	else
203 		progname = argv[0];
204 
205 	if (argc != 2) {
206 		(void)fprintf(stderr, "Usage: %s file\n", progname);
207 		return 1;
208 	}
209 
210 	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
211 		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
212 		return 1;
213 	}
214 	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
215 	if (ret == 1)
216 		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
217 	magic_close(ms);
218 	return ret;
219 }
220 #endif /* COMPILE_ONLY */
221 
222 struct type_tbl_s {
223 	const char name[16];
224 	const size_t len;
225 	const int type;
226 	const int format;
227 };
228 
229 /*
230  * XXX - the actual Single UNIX Specification says that "long" means "long",
231  * as in the C data type, but we treat it as meaning "4-byte integer".
232  * Given that the OS X version of file 5.04 did the same, I guess that passes
233  * the actual test; having "long" be dependent on how big a "long" is on
234  * the machine running "file" is silly.
235  */
236 static const struct type_tbl_s type_tbl[] = {
237 # define XX(s)		s, (sizeof(s) - 1)
238 # define XX_NULL	"", 0
239 	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
240 	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
241 	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
242 	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_NONE },
243 	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
244 	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
245 	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
246 	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
247 	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
248 	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
249 	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
250 	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
251 	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
252 	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
253 	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
254 	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
255 	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
256 	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
257 	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
258 	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
259 	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
260 	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
261 	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
262 	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
263 	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
264 	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
265 	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
266 	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
267 	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
268 	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
269 	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
270 	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
271 	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
272 	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
273 	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
274 	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
275 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
276 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
277 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
278 	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
279 	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
280 	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
281 	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
282 	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
283 	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
284 	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
285 	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
286 	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
287 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
288 	{ XX("guid"),		FILE_GUID,		FILE_FMT_STR },
289 	{ XX("offset"),		FILE_OFFSET,		FILE_FMT_QUAD },
290 	{ XX("bevarint"),	FILE_BEVARINT,		FILE_FMT_STR },
291 	{ XX("levarint"),	FILE_LEVARINT,		FILE_FMT_STR },
292 	{ XX("msdosdate"),	FILE_MSDOSDATE,		FILE_FMT_STR },
293 	{ XX("lemsdosdate"),	FILE_LEMSDOSDATE,	FILE_FMT_STR },
294 	{ XX("bemsdosdate"),	FILE_BEMSDOSDATE,	FILE_FMT_STR },
295 	{ XX("msdostime"),	FILE_MSDOSTIME,		FILE_FMT_STR },
296 	{ XX("lemsdostime"),	FILE_LEMSDOSTIME,	FILE_FMT_STR },
297 	{ XX("bemsdostime"),	FILE_BEMSDOSTIME,	FILE_FMT_STR },
298 	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
299 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
300 };
301 
302 /*
303  * These are not types, and cannot be preceded by "u" to make them
304  * unsigned.
305  */
306 static const struct type_tbl_s special_tbl[] = {
307 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
308 	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
309 	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
310 	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
311 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
312 };
313 # undef XX
314 # undef XX_NULL
315 
316 file_private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)317 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
318 {
319 	const struct type_tbl_s *p;
320 
321 	for (p = tbl; p->len; p++) {
322 		if (strncmp(l, p->name, p->len) == 0) {
323 			if (t)
324 				*t = l + p->len;
325 			break;
326 		}
327 	}
328 	return p->type;
329 }
330 
331 file_private off_t
maxoff_t(void)332 maxoff_t(void) {
333 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
334 		return CAST(off_t, INT_MAX);
335 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
336 		return CAST(off_t, LONG_MAX);
337 	return 0x7fffffff;
338 }
339 
340 file_private int
get_standard_integer_type(const char * l,const char ** t)341 get_standard_integer_type(const char *l, const char **t)
342 {
343 	int type;
344 
345 	if (isalpha(CAST(unsigned char, l[1]))) {
346 		switch (l[1]) {
347 		case 'C':
348 			/* "dC" and "uC" */
349 			type = FILE_BYTE;
350 			break;
351 		case 'S':
352 			/* "dS" and "uS" */
353 			type = FILE_SHORT;
354 			break;
355 		case 'I':
356 		case 'L':
357 			/*
358 			 * "dI", "dL", "uI", and "uL".
359 			 *
360 			 * XXX - the actual Single UNIX Specification says
361 			 * that "L" means "long", as in the C data type,
362 			 * but we treat it as meaning "4-byte integer".
363 			 * Given that the OS X version of file 5.04 did
364 			 * the same, I guess that passes the actual SUS
365 			 * validation suite; having "dL" be dependent on
366 			 * how big a "long" is on the machine running
367 			 * "file" is silly.
368 			 */
369 			type = FILE_LONG;
370 			break;
371 		case 'Q':
372 			/* "dQ" and "uQ" */
373 			type = FILE_QUAD;
374 			break;
375 		default:
376 			/* "d{anything else}", "u{anything else}" */
377 			return FILE_INVALID;
378 		}
379 		l += 2;
380 	} else if (isdigit(CAST(unsigned char, l[1]))) {
381 		/*
382 		 * "d{num}" and "u{num}"; we only support {num} values
383 		 * of 1, 2, 4, and 8 - the Single UNIX Specification
384 		 * doesn't say anything about whether arbitrary
385 		 * values should be supported, but both the Solaris 10
386 		 * and OS X Mountain Lion versions of file passed the
387 		 * Single UNIX Specification validation suite, and
388 		 * neither of them support values bigger than 8 or
389 		 * non-power-of-2 values.
390 		 */
391 		if (isdigit(CAST(unsigned char, l[2]))) {
392 			/* Multi-digit, so > 9 */
393 			return FILE_INVALID;
394 		}
395 		switch (l[1]) {
396 		case '1':
397 			type = FILE_BYTE;
398 			break;
399 		case '2':
400 			type = FILE_SHORT;
401 			break;
402 		case '4':
403 			type = FILE_LONG;
404 			break;
405 		case '8':
406 			type = FILE_QUAD;
407 			break;
408 		default:
409 			/* XXX - what about 3, 5, 6, or 7? */
410 			return FILE_INVALID;
411 		}
412 		l += 2;
413 	} else {
414 		/*
415 		 * "d" or "u" by itself.
416 		 */
417 		type = FILE_LONG;
418 		++l;
419 	}
420 	if (t)
421 		*t = l;
422 	return type;
423 }
424 
425 file_private void
init_file_tables(void)426 init_file_tables(void)
427 {
428 	static int done = 0;
429 	const struct type_tbl_s *p;
430 
431 	if (done)
432 		return;
433 	done++;
434 
435 	for (p = type_tbl; p->len; p++) {
436 		assert(p->type < FILE_NAMES_SIZE);
437 		file_names[p->type] = p->name;
438 		file_formats[p->type] = p->format;
439 	}
440 	assert(p - type_tbl == FILE_NAMES_SIZE);
441 }
442 
443 file_private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)444 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
445 {
446 	struct mlist *ml;
447 
448 	mlp->map = NULL;
449 	if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
450 		return -1;
451 
452 	ml->map = idx == 0 ? map : NULL;
453 	ml->magic = map->magic[idx];
454 	ml->nmagic = map->nmagic[idx];
455 
456 	mlp->prev->next = ml;
457 	ml->prev = mlp->prev;
458 	ml->next = mlp;
459 	mlp->prev = ml;
460 	return 0;
461 }
462 
463 /*
464  * Handle one file or directory.
465  */
466 file_private int
apprentice_1(struct magic_set * ms,const char * fn,int action)467 apprentice_1(struct magic_set *ms, const char *fn, int action)
468 {
469 	struct magic_map *map;
470 #ifndef COMPILE_ONLY
471 	size_t i;
472 #endif
473 
474 	if (magicsize != FILE_MAGICSIZE) {
475 		file_error(ms, 0, "magic element size %lu != %lu",
476 		    CAST(unsigned long, sizeof(*map->magic[0])),
477 		    CAST(unsigned long, FILE_MAGICSIZE));
478 		return -1;
479 	}
480 
481 	if (action == FILE_COMPILE) {
482 		map = apprentice_load(ms, fn, action);
483 		if (map == NULL)
484 			return -1;
485 		return apprentice_compile(ms, map, fn);
486 	}
487 
488 #ifndef COMPILE_ONLY
489 	map = apprentice_map(ms, fn);
490 	if (map == NULL) {
491 		if (ms->flags & MAGIC_CHECK)
492 			file_magwarn(NULL, "using regular magic file `%s'", fn);
493 		map = apprentice_load(ms, fn, action);
494 		if (map == NULL)
495 			return -1;
496 	}
497 
498 	for (i = 0; i < MAGIC_SETS; i++) {
499 		if (add_mlist(ms->mlist[i], map, i) == -1) {
500 			/* failed to add to any list, free explicitly */
501 			if (i == 0)
502 				apprentice_unmap(map);
503 			else
504 				mlist_free_all(ms);
505 			file_oomem(ms, sizeof(*ms->mlist[0]));
506 			return -1;
507 		}
508 	}
509 
510 	if (action == FILE_LIST) {
511 		for (i = 0; i < MAGIC_SETS; i++) {
512 			printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
513 			    i);
514 			apprentice_list(ms->mlist[i], BINTEST);
515 			printf("Text patterns:\n");
516 			apprentice_list(ms->mlist[i], TEXTTEST);
517 		}
518 	}
519 	return 0;
520 #else
521 	return 0;
522 #endif /* COMPILE_ONLY */
523 }
524 
525 file_protected void
file_ms_free(struct magic_set * ms)526 file_ms_free(struct magic_set *ms)
527 {
528 	size_t i;
529 	if (ms == NULL)
530 		return;
531 	for (i = 0; i < MAGIC_SETS; i++)
532 		mlist_free(ms->mlist[i]);
533 	if (ms->o.pbuf) {
534 		efree(ms->o.pbuf);
535 	}
536 	if (ms->o.buf) {
537 		efree(ms->o.buf);
538 	}
539 	if (ms->c.li) {
540 		efree(ms->c.li);
541 	}
542 #ifdef USE_C_LOCALE
543 	freelocale(ms->c_lc_ctype);
544 #endif
545 	efree(ms);
546 }
547 
548 file_protected struct magic_set *
file_ms_alloc(int flags)549 file_ms_alloc(int flags)
550 {
551 	struct magic_set *ms;
552 	size_t i, len;
553 
554 	if ((ms = CAST(struct magic_set *, ecalloc(CAST(size_t, 1u),
555 	    sizeof(*ms)))) == NULL)
556 		return NULL;
557 
558 	if (magic_setflags(ms, flags) == -1) {
559 		errno = EINVAL;
560 		goto free;
561 	}
562 
563 	ms->o.buf = ms->o.pbuf = NULL;
564 	ms->o.blen = 0;
565 	len = (ms->c.len = 10) * sizeof(*ms->c.li);
566 
567 	if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
568 		goto free;
569 
570 	ms->event_flags = 0;
571 	ms->error = -1;
572 	for (i = 0; i < MAGIC_SETS; i++)
573 		ms->mlist[i] = NULL;
574 	ms->file = "unknown";
575 	ms->line = 0;
576 	ms->indir_max = FILE_INDIR_MAX;
577 	ms->name_max = FILE_NAME_MAX;
578 	ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
579 	ms->elf_shsize_max = FILE_ELF_SHSIZE_MAX;
580 	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
581 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
582 	ms->regex_max = FILE_REGEX_MAX;
583 	ms->bytes_max = FILE_BYTES_MAX;
584 	ms->encoding_max = FILE_ENCODING_MAX;
585 #ifdef USE_C_LOCALE
586 	ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
587 	assert(ms->c_lc_ctype != NULL);
588 #endif
589 	return ms;
590 free:
591 	efree(ms);
592 	return NULL;
593 }
594 
595 file_private void
apprentice_unmap(struct magic_map * map)596 apprentice_unmap(struct magic_map *map)
597 {
598 	if (map == NULL)
599 		return;
600 	if (map->p != php_magic_database) {
601 		if (map->p == NULL) {
602 			int j;
603 			for (j = 0; j < MAGIC_SETS; j++) {
604 				if (map->magic[j]) {
605 					efree(map->magic[j]);
606 				}
607 			}
608 		} else {
609 			efree(map->p);
610 		}
611 	}
612 	efree(map);
613 }
614 
615 file_private struct mlist *
mlist_alloc(void)616 mlist_alloc(void)
617 {
618 	struct mlist *mlist;
619 	if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
620 		return NULL;
621 	}
622 	mlist->next = mlist->prev = mlist;
623 	return mlist;
624 }
625 
626 file_private void
mlist_free_all(struct magic_set * ms)627 mlist_free_all(struct magic_set *ms)
628 {
629 	size_t i;
630 
631 	for (i = 0; i < MAGIC_SETS; i++) {
632 		mlist_free(ms->mlist[i]);
633 		ms->mlist[i] = NULL;
634 	}
635 }
636 
637 file_private void
mlist_free_one(struct mlist * ml)638 mlist_free_one(struct mlist *ml)
639 {
640 	if (ml->map)
641 		apprentice_unmap(CAST(struct magic_map *, ml->map));
642 	efree(ml);
643 }
644 
645 file_private void
mlist_free(struct mlist * mlist)646 mlist_free(struct mlist *mlist)
647 {
648 	struct mlist *ml, *next;
649 
650 	if (mlist == NULL)
651 		return;
652 
653 	for (ml = mlist->next; ml != mlist;) {
654 		next = ml->next;
655 		mlist_free_one(ml);
656 		ml = next;
657 	}
658 	mlist_free_one(mlist);
659 }
660 
661 /* const char *fn: list of magic files and directories */
662 file_protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)663 file_apprentice(struct magic_set *ms, const char *fn, int action)
664 {
665 	char *p, *mfn;
666 	int fileerr, errs = -1;
667 	size_t i, j;
668 
669 	(void)file_reset(ms, 0);
670 
671 /* XXX disabling default magic loading so the compiled in data is used */
672 #if 0
673 	if ((fn = magic_getpath(fn, action)) == NULL)
674 		return -1;
675 #endif
676 
677 	init_file_tables();
678 
679 	if (fn == NULL)
680 		fn = getenv("MAGIC");
681 	if (fn == NULL) {
682 		for (i = 0; i < MAGIC_SETS; i++) {
683 			mlist_free(ms->mlist[i]);
684 			if ((ms->mlist[i] = mlist_alloc()) == NULL) {
685 				file_oomem(ms, sizeof(*ms->mlist[i]));
686 				return -1;
687 			}
688 		}
689 		return apprentice_1(ms, fn, action);
690 	}
691 
692 	if ((mfn = estrdup(fn)) == NULL) {
693 		file_oomem(ms, strlen(fn));
694 		return -1;
695 	}
696 
697 	for (i = 0; i < MAGIC_SETS; i++) {
698 		mlist_free(ms->mlist[i]);
699 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
700 			file_oomem(ms, sizeof(*ms->mlist[0]));
701 			for (j = 0; j < i; j++) {
702 				mlist_free(ms->mlist[j]);
703 				ms->mlist[j] = NULL;
704 			}
705 			efree(mfn);
706 			return -1;
707 		}
708 	}
709 	fn = mfn;
710 
711 	while (fn) {
712 		p = CCAST(char *, strchr(fn, PATHSEP));
713 		if (p)
714 			*p++ = '\0';
715 		if (*fn == '\0')
716 			break;
717 		fileerr = apprentice_1(ms, fn, action);
718 		errs = MAX(errs, fileerr);
719 		fn = p;
720 	}
721 
722 	efree(mfn);
723 
724 	if (errs == -1) {
725 		for (i = 0; i < MAGIC_SETS; i++) {
726 			mlist_free(ms->mlist[i]);
727 			ms->mlist[i] = NULL;
728 		}
729 		file_error(ms, 0, "could not find any valid magic files!");
730 		return -1;
731 	}
732 
733 #if 0
734 	/*
735 	 * Always leave the database loaded
736 	 */
737 	if (action == FILE_LOAD)
738 		return 0;
739 
740 	for (i = 0; i < MAGIC_SETS; i++) {
741 		mlist_free(ms->mlist[i]);
742 		ms->mlist[i] = NULL;
743 	}
744 #endif
745 
746 	switch (action) {
747 	case FILE_LOAD:
748 	case FILE_COMPILE:
749 	case FILE_CHECK:
750 	case FILE_LIST:
751 		return 0;
752 	default:
753 		file_error(ms, 0, "Invalid action %d", action);
754 		return -1;
755 	}
756 }
757 
758 /*
759  * Compute the real length of a magic expression, for the purposes
760  * of determining how "strong" a magic expression is (approximating
761  * how specific its matches are):
762  *	- magic characters count 0 unless escaped.
763  *	- [] expressions count 1
764  *	- {} expressions count 0
765  *	- regular characters or escaped magic characters count 1
766  *	- 0 length expressions count as one
767  */
768 file_private size_t
nonmagic(const char * str)769 nonmagic(const char *str)
770 {
771 	const char *p;
772 	size_t rv = 0;
773 
774 	for (p = str; *p; p++)
775 		switch (*p) {
776 		case '\\':	/* Escaped anything counts 1 */
777 			if (!*++p)
778 				p--;
779 			rv++;
780 			continue;
781 		case '?':	/* Magic characters count 0 */
782 		case '*':
783 		case '.':
784 		case '+':
785 		case '^':
786 		case '$':
787 			continue;
788 		case '[':	/* Bracketed expressions count 1 the ']' */
789 			while (*p && *p != ']')
790 				p++;
791 			p--;
792 			continue;
793 		case '{':	/* Braced expressions count 0 */
794 			while (*p && *p != '}')
795 				p++;
796 			if (!*p)
797 				p--;
798 			continue;
799 		default:	/* Anything else counts 1 */
800 			rv++;
801 			continue;
802 		}
803 
804 	return rv == 0 ? 1 : rv;	/* Return at least 1 */
805 }
806 
807 
808 file_private size_t
typesize(int type)809 typesize(int type)
810 {
811 	switch (type) {
812 	case FILE_BYTE:
813 		return 1;
814 
815 	case FILE_SHORT:
816 	case FILE_LESHORT:
817 	case FILE_BESHORT:
818 	case FILE_MSDOSDATE:
819 	case FILE_BEMSDOSDATE:
820 	case FILE_LEMSDOSDATE:
821 	case FILE_MSDOSTIME:
822 	case FILE_BEMSDOSTIME:
823 	case FILE_LEMSDOSTIME:
824 		return 2;
825 
826 	case FILE_LONG:
827 	case FILE_LELONG:
828 	case FILE_BELONG:
829 	case FILE_MELONG:
830 		return 4;
831 
832 	case FILE_DATE:
833 	case FILE_LEDATE:
834 	case FILE_BEDATE:
835 	case FILE_MEDATE:
836 	case FILE_LDATE:
837 	case FILE_LELDATE:
838 	case FILE_BELDATE:
839 	case FILE_MELDATE:
840 	case FILE_FLOAT:
841 	case FILE_BEFLOAT:
842 	case FILE_LEFLOAT:
843 	case FILE_BEID3:
844 	case FILE_LEID3:
845 		return 4;
846 
847 	case FILE_QUAD:
848 	case FILE_BEQUAD:
849 	case FILE_LEQUAD:
850 	case FILE_QDATE:
851 	case FILE_LEQDATE:
852 	case FILE_BEQDATE:
853 	case FILE_QLDATE:
854 	case FILE_LEQLDATE:
855 	case FILE_BEQLDATE:
856 	case FILE_QWDATE:
857 	case FILE_LEQWDATE:
858 	case FILE_BEQWDATE:
859 	case FILE_DOUBLE:
860 	case FILE_BEDOUBLE:
861 	case FILE_LEDOUBLE:
862 	case FILE_OFFSET:
863 	case FILE_BEVARINT:
864 	case FILE_LEVARINT:
865 		return 8;
866 
867 	case FILE_GUID:
868 		return 16;
869 
870 	default:
871 		return FILE_BADSIZE;
872 	}
873 }
874 
875 /*
876  * Get weight of this magic entry, for sorting purposes.
877  */
878 file_private ssize_t
apprentice_magic_strength_1(const struct magic * m)879 apprentice_magic_strength_1(const struct magic *m)
880 {
881 #define MULT 10U
882 	size_t ts, v;
883 	ssize_t val = 2 * MULT;	/* baseline strength */
884 
885 	switch (m->type) {
886 	case FILE_DEFAULT:	/* make sure this sorts last */
887 		if (m->factor_op != FILE_FACTOR_OP_NONE) {
888 			file_magwarn(NULL, "Usupported factor_op in default %d",
889 			    m->factor_op);
890 		}
891 		return 0;
892 
893 	case FILE_BYTE:
894 	case FILE_SHORT:
895 	case FILE_LESHORT:
896 	case FILE_BESHORT:
897 	case FILE_LONG:
898 	case FILE_LELONG:
899 	case FILE_BELONG:
900 	case FILE_MELONG:
901 	case FILE_DATE:
902 	case FILE_LEDATE:
903 	case FILE_BEDATE:
904 	case FILE_MEDATE:
905 	case FILE_LDATE:
906 	case FILE_LELDATE:
907 	case FILE_BELDATE:
908 	case FILE_MELDATE:
909 	case FILE_FLOAT:
910 	case FILE_BEFLOAT:
911 	case FILE_LEFLOAT:
912 	case FILE_QUAD:
913 	case FILE_BEQUAD:
914 	case FILE_LEQUAD:
915 	case FILE_QDATE:
916 	case FILE_LEQDATE:
917 	case FILE_BEQDATE:
918 	case FILE_QLDATE:
919 	case FILE_LEQLDATE:
920 	case FILE_BEQLDATE:
921 	case FILE_QWDATE:
922 	case FILE_LEQWDATE:
923 	case FILE_BEQWDATE:
924 	case FILE_DOUBLE:
925 	case FILE_BEDOUBLE:
926 	case FILE_LEDOUBLE:
927 	case FILE_BEVARINT:
928 	case FILE_LEVARINT:
929 	case FILE_GUID:
930 	case FILE_BEID3:
931 	case FILE_LEID3:
932 	case FILE_OFFSET:
933 	case FILE_MSDOSDATE:
934 	case FILE_BEMSDOSDATE:
935 	case FILE_LEMSDOSDATE:
936 	case FILE_MSDOSTIME:
937 	case FILE_BEMSDOSTIME:
938 	case FILE_LEMSDOSTIME:
939 		ts = typesize(m->type);
940 		if (ts == FILE_BADSIZE) {
941 			(void)fprintf(stderr, "Bad size for type %d\n",
942 			    m->type);
943 			abort();
944 		}
945 		val += ts * MULT;
946 		break;
947 
948 	case FILE_PSTRING:
949 	case FILE_STRING:
950 	case FILE_OCTAL:
951 		val += m->vallen * MULT;
952 		break;
953 
954 	case FILE_BESTRING16:
955 	case FILE_LESTRING16:
956 		val += m->vallen * MULT / 2;
957 		break;
958 
959 	case FILE_SEARCH:
960 		if (m->vallen == 0)
961 			break;
962 		val += m->vallen * MAX(MULT / m->vallen, 1);
963 		break;
964 
965 	case FILE_REGEX:
966 		v = nonmagic(m->value.s);
967 		val += v * MAX(MULT / v, 1);
968 		break;
969 
970 	case FILE_INDIRECT:
971 	case FILE_NAME:
972 	case FILE_USE:
973 	case FILE_CLEAR:
974 		break;
975 
976 	case FILE_DER:
977 		val += MULT;
978 		break;
979 
980 	default:
981 		(void)fprintf(stderr, "Bad type %d\n", m->type);
982 		abort();
983 	}
984 
985 	switch (m->reln) {
986 	case 'x':	/* matches anything penalize */
987 	case '!':       /* matches almost anything penalize */
988 		val = 0;
989 		break;
990 
991 	case '=':	/* Exact match, prefer */
992 		val += MULT;
993 		break;
994 
995 	case '>':
996 	case '<':	/* comparison match reduce strength */
997 		val -= 2 * MULT;
998 		break;
999 
1000 	case '^':
1001 	case '&':	/* masking bits, we could count them too */
1002 		val -= MULT;
1003 		break;
1004 
1005 	default:
1006 		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
1007 		abort();
1008 	}
1009 
1010 	return val;
1011 }
1012 
1013 
1014 /*ARGSUSED*/
1015 file_protected size_t
file_magic_strength(const struct magic * m,size_t nmagic)1016 file_magic_strength(const struct magic *m,
1017     size_t nmagic __attribute__((__unused__)))
1018 {
1019 	ssize_t val = apprentice_magic_strength_1(m);
1020 
1021 #ifdef notyet
1022 	if (m->desc[0] == '\0') {
1023 		size_t i;
1024 		/*
1025 		 * Magic entries with no description get their continuations
1026 		 * added
1027 		 */
1028 		for (i = 1; m[i].cont_level != 0 && i < MIN(nmagic, 3); i++) {
1029 			ssize_t v = apprentice_magic_strength_1(&m[i]) >>
1030 			    (i + 1);
1031 			val += v;
1032 			if (m[i].desc[0] != '\0')
1033 				break;
1034 		}
1035 	}
1036 #endif
1037 
1038 	switch (m->factor_op) {
1039 	case FILE_FACTOR_OP_NONE:
1040 		break;
1041 	case FILE_FACTOR_OP_PLUS:
1042 		val += m->factor;
1043 		break;
1044 	case FILE_FACTOR_OP_MINUS:
1045 		val -= m->factor;
1046 		break;
1047 	case FILE_FACTOR_OP_TIMES:
1048 		val *= m->factor;
1049 		break;
1050 	case FILE_FACTOR_OP_DIV:
1051 		val /= m->factor;
1052 		break;
1053 	default:
1054 		(void)fprintf(stderr, "Bad factor_op %u\n", m->factor_op);
1055 		abort();
1056 	}
1057 
1058 	if (val <= 0)	/* ensure we only return 0 for FILE_DEFAULT */
1059 		val = 1;
1060 
1061 #ifndef notyet
1062 	/*
1063 	 * Magic entries with no description get a bonus because they depend
1064 	 * on subsequent magic entries to print something.
1065 	 */
1066 	if (m->desc[0] == '\0')
1067 		val++;
1068 #endif
1069 
1070 	return val;
1071 }
1072 
1073 /*
1074  * Sort callback for sorting entries by "strength" (basically length)
1075  */
1076 file_private int
apprentice_sort(const void * a,const void * b)1077 apprentice_sort(const void *a, const void *b)
1078 {
1079 	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1080 	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1081 	size_t sa = file_magic_strength(ma->mp, ma->cont_count);
1082 	size_t sb = file_magic_strength(mb->mp, mb->cont_count);
1083 	if (sa == sb)
1084 		return 0;
1085 	else if (sa > sb)
1086 		return -1;
1087 	else
1088 		return 1;
1089 }
1090 
1091 /*
1092  * Shows sorted patterns list in the order which is used for the matching
1093  */
1094 file_private void
apprentice_list(struct mlist * mlist,int mode)1095 apprentice_list(struct mlist *mlist, int mode)
1096 {
1097 	uint32_t magindex, descindex, mimeindex, lineindex;
1098 	struct mlist *ml;
1099 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
1100 		for (magindex = 0; magindex < ml->nmagic; magindex++) {
1101 			struct magic *m = &ml->magic[magindex];
1102 			if ((m->flag & mode) != mode) {
1103 				/* Skip sub-tests */
1104 				while (magindex + 1 < ml->nmagic &&
1105 				       ml->magic[magindex + 1].cont_level != 0)
1106 					++magindex;
1107 				continue; /* Skip to next top-level test*/
1108 			}
1109 
1110 			/*
1111 			 * Try to iterate over the tree until we find item with
1112 			 * description/mimetype.
1113 			 */
1114 			lineindex = descindex = mimeindex = magindex;
1115 			for (; magindex + 1 < ml->nmagic &&
1116 			   ml->magic[magindex + 1].cont_level != 0;
1117 			   magindex++) {
1118 				uint32_t mi = magindex + 1;
1119 				if (*ml->magic[descindex].desc == '\0'
1120 				    && *ml->magic[mi].desc)
1121 					descindex = mi;
1122 				if (*ml->magic[mimeindex].mimetype == '\0'
1123 				    && *ml->magic[mi].mimetype)
1124 					mimeindex = mi;
1125 			}
1126 
1127 			printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1128 			    file_magic_strength(m, ml->nmagic - magindex),
1129 			    ml->magic[lineindex].lineno,
1130 			    ml->magic[descindex].desc,
1131 			    ml->magic[mimeindex].mimetype);
1132 		}
1133 	}
1134 }
1135 
1136 file_private void
set_test_type(struct magic * mstart,struct magic * m)1137 set_test_type(struct magic *mstart, struct magic *m)
1138 {
1139 	switch (m->type) {
1140 	case FILE_BYTE:
1141 	case FILE_SHORT:
1142 	case FILE_LONG:
1143 	case FILE_DATE:
1144 	case FILE_BESHORT:
1145 	case FILE_BELONG:
1146 	case FILE_BEDATE:
1147 	case FILE_LESHORT:
1148 	case FILE_LELONG:
1149 	case FILE_LEDATE:
1150 	case FILE_LDATE:
1151 	case FILE_BELDATE:
1152 	case FILE_LELDATE:
1153 	case FILE_MEDATE:
1154 	case FILE_MELDATE:
1155 	case FILE_MELONG:
1156 	case FILE_QUAD:
1157 	case FILE_LEQUAD:
1158 	case FILE_BEQUAD:
1159 	case FILE_QDATE:
1160 	case FILE_LEQDATE:
1161 	case FILE_BEQDATE:
1162 	case FILE_QLDATE:
1163 	case FILE_LEQLDATE:
1164 	case FILE_BEQLDATE:
1165 	case FILE_QWDATE:
1166 	case FILE_LEQWDATE:
1167 	case FILE_BEQWDATE:
1168 	case FILE_FLOAT:
1169 	case FILE_BEFLOAT:
1170 	case FILE_LEFLOAT:
1171 	case FILE_DOUBLE:
1172 	case FILE_BEDOUBLE:
1173 	case FILE_LEDOUBLE:
1174 	case FILE_BEVARINT:
1175 	case FILE_LEVARINT:
1176 	case FILE_DER:
1177 	case FILE_GUID:
1178 	case FILE_OFFSET:
1179 	case FILE_MSDOSDATE:
1180 	case FILE_BEMSDOSDATE:
1181 	case FILE_LEMSDOSDATE:
1182 	case FILE_MSDOSTIME:
1183 	case FILE_BEMSDOSTIME:
1184 	case FILE_LEMSDOSTIME:
1185 	case FILE_OCTAL:
1186 		mstart->flag |= BINTEST;
1187 		break;
1188 	case FILE_STRING:
1189 	case FILE_PSTRING:
1190 	case FILE_BESTRING16:
1191 	case FILE_LESTRING16:
1192 		/* Allow text overrides */
1193 		if (mstart->str_flags & STRING_TEXTTEST)
1194 			mstart->flag |= TEXTTEST;
1195 		else
1196 			mstart->flag |= BINTEST;
1197 		break;
1198 	case FILE_REGEX:
1199 	case FILE_SEARCH:
1200 		/* Check for override */
1201 		if (mstart->str_flags & STRING_BINTEST)
1202 			mstart->flag |= BINTEST;
1203 		if (mstart->str_flags & STRING_TEXTTEST)
1204 			mstart->flag |= TEXTTEST;
1205 
1206 		if (mstart->flag & (TEXTTEST|BINTEST))
1207 			break;
1208 
1209 		/* binary test if pattern is not text */
1210 		if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1211 		    NULL) <= 0)
1212 			mstart->flag |= BINTEST;
1213 		else
1214 			mstart->flag |= TEXTTEST;
1215 		break;
1216 	case FILE_DEFAULT:
1217 		/* can't deduce anything; we shouldn't see this at the
1218 		   top level anyway */
1219 		break;
1220 	case FILE_INVALID:
1221 	default:
1222 		/* invalid search type, but no need to complain here */
1223 		break;
1224 	}
1225 }
1226 
1227 file_private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry_set * mset)1228 addentry(struct magic_set *ms, struct magic_entry *me,
1229    struct magic_entry_set *mset)
1230 {
1231 	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1232 	if (mset[i].me == NULL || mset[i].count == mset[i].max) {
1233 		struct magic_entry *mp;
1234 
1235 		size_t incr = mset[i].max + ALLOC_INCR;
1236 		if ((mp = CAST(struct magic_entry *,
1237 		    erealloc(mset[i].me, sizeof(*mp) * incr))) ==
1238 		    NULL) {
1239 			file_oomem(ms, sizeof(*mp) * incr);
1240 			return -1;
1241 		}
1242 		(void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1243 		    ALLOC_INCR);
1244 		mset[i].me = mp;
1245 		mset[i].max = CAST(uint32_t, incr);
1246 		assert(mset[i].max == incr);
1247 	}
1248 	mset[i].me[mset[i].count++] = *me;
1249 	memset(me, 0, sizeof(*me));
1250 	return 0;
1251 }
1252 
1253 /*
1254  * Load and parse one file.
1255  */
1256 file_private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry_set * mset)1257 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1258    struct magic_entry_set *mset)
1259 {
1260 	char buffer[BUFSIZ + 1];
1261 	char *line = NULL;
1262 	size_t len;
1263 	size_t lineno = 0;
1264 	struct magic_entry me;
1265 
1266 	php_stream *stream;
1267 
1268 
1269 	ms->file = fn;
1270 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
1271 
1272 	if (stream == NULL) {
1273 		if (errno != ENOENT)
1274 			file_error(ms, errno, "cannot read magic file `%s'",
1275 				   fn);
1276 		(*errs)++;
1277 		return;
1278 	}
1279 
1280 	memset(&me, 0, sizeof(me));
1281 	/* read and parse this file */
1282 	for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1283 		if (len == 0) /* null line, garbage, etc */
1284 			continue;
1285 		if (line[len - 1] == '\n') {
1286 			lineno++;
1287 			line[len - 1] = '\0'; /* delete newline */
1288 		}
1289 		switch (line[0]) {
1290 		case '\0':	/* empty, do not parse */
1291 		case '#':	/* comment, do not parse */
1292 			continue;
1293 		case '!':
1294 			if (line[1] == ':') {
1295 				size_t i;
1296 
1297 				for (i = 0; bang[i].name != NULL; i++) {
1298 					if (CAST(size_t, len - 2) > bang[i].len &&
1299 					    memcmp(bang[i].name, line + 2,
1300 					    bang[i].len) == 0)
1301 						break;
1302 				}
1303 				if (bang[i].name == NULL) {
1304 					file_error(ms, 0,
1305 					    "Unknown !: entry `%s'", line);
1306 					(*errs)++;
1307 					continue;
1308 				}
1309 				if (me.mp == NULL) {
1310 					file_error(ms, 0,
1311 					    "No current entry for :!%s type",
1312 						bang[i].name);
1313 					(*errs)++;
1314 					continue;
1315 				}
1316 				if ((*bang[i].fun)(ms, &me,
1317 				    line + bang[i].len + 2,
1318 				    len - bang[i].len - 2) != 0) {
1319 					(*errs)++;
1320 					continue;
1321 				}
1322 				continue;
1323 			}
1324 			/*FALLTHROUGH*/
1325 		default:
1326 		again:
1327 			switch (parse(ms, &me, line, lineno, action)) {
1328 			case 0:
1329 				continue;
1330 			case 1:
1331 				(void)addentry(ms, &me, mset);
1332 				goto again;
1333 			default:
1334 				(*errs)++;
1335 				break;
1336 			}
1337 		}
1338 	}
1339 	if (me.mp)
1340 		(void)addentry(ms, &me, mset);
1341 	efree(line);
1342 	php_stream_close(stream);
1343 }
1344 
1345 /*
1346  * parse a file or directory of files
1347  * const char *fn: name of magic file or directory
1348  */
1349 file_private int
cmpstrp(const void * p1,const void * p2)1350 cmpstrp(const void *p1, const void *p2)
1351 {
1352         return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1353 }
1354 
1355 
1356 file_private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1357 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1358     uint32_t starttest)
1359 {
1360 	static const char text[] = "text";
1361 	static const char binary[] = "binary";
1362 	static const size_t len = sizeof(text);
1363 
1364 	uint32_t i = starttest;
1365 
1366 	do {
1367 		set_test_type(me[starttest].mp, me[i].mp);
1368 		if ((ms->flags & MAGIC_DEBUG) == 0)
1369 			continue;
1370 		(void)fprintf(stderr, "%s%s%s: %s\n",
1371 		    me[i].mp->mimetype,
1372 		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1373 		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1374 		    me[i].mp->flag & BINTEST ? binary : text);
1375 		if (me[i].mp->flag & BINTEST) {
1376 			char *p = strstr(me[i].mp->desc, text);
1377 			if (p && (p == me[i].mp->desc ||
1378 			    isspace(CAST(unsigned char, p[-1]))) &&
1379 			    (p + len - me[i].mp->desc == MAXstring
1380 			    || (p[len] == '\0' ||
1381 			    isspace(CAST(unsigned char, p[len])))))
1382 				(void)fprintf(stderr, "*** Possible "
1383 				    "binary test for text type\n");
1384 		}
1385 	} while (++i < nme && me[i].mp->cont_level != 0);
1386 	return i;
1387 }
1388 
1389 file_private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1390 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1391 {
1392 	uint32_t i;
1393 	for (i = 0; i < nme; i++) {
1394 		if (me[i].mp->cont_level == 0 &&
1395 		    me[i].mp->type == FILE_DEFAULT) {
1396 			while (++i < nme)
1397 				if (me[i].mp->cont_level == 0)
1398 					break;
1399 			if (i != nme) {
1400 				/* XXX - Ugh! */
1401 				ms->line = me[i].mp->lineno;
1402 				file_magwarn(ms,
1403 				    "level 0 \"default\" did not sort last");
1404 			}
1405 			return;
1406 		}
1407 	}
1408 }
1409 
1410 file_private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1411 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1412     struct magic **ma, uint32_t *nma)
1413 {
1414 	uint32_t i, mentrycount = 0;
1415 	size_t slen;
1416 
1417 	for (i = 0; i < nme; i++)
1418 		mentrycount += me[i].cont_count;
1419 
1420 	if (mentrycount == 0) {
1421 		*ma = NULL;
1422 		*nma = 0;
1423 		return 0;
1424 	}
1425 
1426 	slen = sizeof(**ma) * mentrycount;
1427 	if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1428 		file_oomem(ms, slen);
1429 		return -1;
1430 	}
1431 
1432 	mentrycount = 0;
1433 	for (i = 0; i < nme; i++) {
1434 		(void)memcpy(*ma + mentrycount, me[i].mp,
1435 		    me[i].cont_count * sizeof(**ma));
1436 		mentrycount += me[i].cont_count;
1437 	}
1438 	*nma = mentrycount;
1439 	return 0;
1440 }
1441 
1442 file_private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1443 magic_entry_free(struct magic_entry *me, uint32_t nme)
1444 {
1445 	uint32_t i;
1446 	if (me == NULL)
1447 		return;
1448 	for (i = 0; i < nme; i++)
1449 		efree(me[i].mp);
1450 	efree(me);
1451 }
1452 
1453 file_private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1454 apprentice_load(struct magic_set *ms, const char *fn, int action)
1455 {
1456 	int errs = 0;
1457 	uint32_t i, j;
1458 	size_t files = 0, maxfiles = 0;
1459 	char **filearr = NULL;
1460 	zend_stat_t st = {0};
1461 	struct magic_map *map;
1462 	struct magic_entry_set mset[MAGIC_SETS];
1463 	php_stream *dir;
1464 	php_stream_dirent d;
1465 
1466 
1467 	memset(mset, 0, sizeof(mset));
1468 	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1469 
1470 
1471 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1472 	{
1473 		file_oomem(ms, sizeof(*map));
1474 		return NULL;
1475 	}
1476 	map->type = MAP_TYPE_MALLOC;
1477 
1478 	/* print silly verbose header for USG compat. */
1479 	if (action == FILE_CHECK)
1480 		(void)fprintf(stderr, "%s\n", usg_hdr);
1481 
1482 	/* load directory or file */
1483 	/* FIXME: Read file names and sort them to prevent
1484 	   non-determinism. See Debian bug #488562. */
1485 	if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1486 		int mflen;
1487 		char mfn[MAXPATHLEN];
1488 
1489 		dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1490 		if (!dir) {
1491 			errs++;
1492 			goto out;
1493 		}
1494 		while (php_stream_readdir(dir, &d)) {
1495 			if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1496 				file_oomem(ms,
1497 				strlen(fn) + strlen(d.d_name) + 2);
1498 				errs++;
1499 				php_stream_closedir(dir);
1500 				goto out;
1501 			}
1502 			if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1503 				continue;
1504 			}
1505 			if (files >= maxfiles) {
1506 				size_t mlen;
1507 				maxfiles = (maxfiles + 1) * 2;
1508 				mlen = maxfiles * sizeof(*filearr);
1509 				if ((filearr = CAST(char **,
1510 				    erealloc(filearr, mlen))) == NULL) {
1511 					file_oomem(ms, mlen);
1512 					php_stream_closedir(dir);
1513 					errs++;
1514 					goto out;
1515 				}
1516 			}
1517 			filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1518 		}
1519 		php_stream_closedir(dir);
1520 		if (filearr) {
1521 			qsort(filearr, files, sizeof(*filearr), cmpstrp);
1522 			for (i = 0; i < files; i++) {
1523 				load_1(ms, action, filearr[i], &errs, mset);
1524 				efree(filearr[i]);
1525 			}
1526 			efree(filearr);
1527 		}
1528 	} else
1529 		load_1(ms, action, fn, &errs, mset);
1530 	if (errs)
1531 		goto out;
1532 
1533 	for (j = 0; j < MAGIC_SETS; j++) {
1534 		/* Set types of tests */
1535 		for (i = 0; i < mset[j].count; ) {
1536 			if (mset[j].me[i].mp->cont_level != 0) {
1537 				i++;
1538 				continue;
1539 			}
1540 			i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1541 		}
1542 		if (mset[j].me)
1543 			qsort(mset[j].me, mset[j].count, sizeof(*mset[0].me),
1544 			    apprentice_sort);
1545 
1546 		/*
1547 		 * Make sure that any level 0 "default" line is last
1548 		 * (if one exists).
1549 		 */
1550 		set_last_default(ms, mset[j].me, mset[j].count);
1551 
1552 		/* coalesce per file arrays into a single one, if needed */
1553 		if (mset[j].count == 0)
1554 			continue;
1555 
1556 		if (coalesce_entries(ms, mset[j].me, mset[j].count,
1557 		    &map->magic[j], &map->nmagic[j]) == -1) {
1558 			errs++;
1559 			goto out;
1560 		}
1561 	}
1562 
1563 out:
1564 	for (j = 0; j < MAGIC_SETS; j++)
1565 		magic_entry_free(mset[j].me, mset[j].count);
1566 
1567 	if (errs) {
1568 		apprentice_unmap(map);
1569 		return NULL;
1570 	}
1571 	return map;
1572 }
1573 
1574 /*
1575  * extend the sign bit if the comparison is to be signed
1576  */
1577 file_protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1578 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1579 {
1580 	if (!(m->flag & UNSIGNED)) {
1581 		switch(m->type) {
1582 		/*
1583 		 * Do not remove the casts below.  They are
1584 		 * vital.  When later compared with the data,
1585 		 * the sign extension must have happened.
1586 		 */
1587 		case FILE_BYTE:
1588 			v = CAST(signed char,  v);
1589 			break;
1590 		case FILE_SHORT:
1591 		case FILE_BESHORT:
1592 		case FILE_LESHORT:
1593 			v = CAST(short, v);
1594 			break;
1595 		case FILE_DATE:
1596 		case FILE_BEDATE:
1597 		case FILE_LEDATE:
1598 		case FILE_MEDATE:
1599 		case FILE_LDATE:
1600 		case FILE_BELDATE:
1601 		case FILE_LELDATE:
1602 		case FILE_MELDATE:
1603 		case FILE_LONG:
1604 		case FILE_BELONG:
1605 		case FILE_LELONG:
1606 		case FILE_MELONG:
1607 		case FILE_FLOAT:
1608 		case FILE_BEFLOAT:
1609 		case FILE_LEFLOAT:
1610 		case FILE_MSDOSDATE:
1611 		case FILE_BEMSDOSDATE:
1612 		case FILE_LEMSDOSDATE:
1613 		case FILE_MSDOSTIME:
1614 		case FILE_BEMSDOSTIME:
1615 		case FILE_LEMSDOSTIME:
1616 			v = CAST(int32_t, v);
1617 			break;
1618 		case FILE_QUAD:
1619 		case FILE_BEQUAD:
1620 		case FILE_LEQUAD:
1621 		case FILE_QDATE:
1622 		case FILE_QLDATE:
1623 		case FILE_QWDATE:
1624 		case FILE_BEQDATE:
1625 		case FILE_BEQLDATE:
1626 		case FILE_BEQWDATE:
1627 		case FILE_LEQDATE:
1628 		case FILE_LEQLDATE:
1629 		case FILE_LEQWDATE:
1630 		case FILE_DOUBLE:
1631 		case FILE_BEDOUBLE:
1632 		case FILE_LEDOUBLE:
1633 		case FILE_OFFSET:
1634 		case FILE_BEVARINT:
1635 		case FILE_LEVARINT:
1636 			v = CAST(int64_t, v);
1637 			break;
1638 		case FILE_STRING:
1639 		case FILE_PSTRING:
1640 		case FILE_BESTRING16:
1641 		case FILE_LESTRING16:
1642 		case FILE_REGEX:
1643 		case FILE_SEARCH:
1644 		case FILE_DEFAULT:
1645 		case FILE_INDIRECT:
1646 		case FILE_NAME:
1647 		case FILE_USE:
1648 		case FILE_CLEAR:
1649 		case FILE_DER:
1650 		case FILE_GUID:
1651 		case FILE_OCTAL:
1652 			break;
1653 		default:
1654 			if (ms->flags & MAGIC_CHECK)
1655 			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1656 				    m->type);
1657 			return FILE_BADSIZE;
1658 		}
1659 	}
1660 	return v;
1661 }
1662 
1663 file_private int
string_modifier_check(struct magic_set * ms,struct magic * m)1664 string_modifier_check(struct magic_set *ms, struct magic *m)
1665 {
1666 	if ((ms->flags & MAGIC_CHECK) == 0)
1667 		return 0;
1668 
1669 	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1670 	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1671 		file_magwarn(ms,
1672 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1673 		return -1;
1674 	}
1675 	switch (m->type) {
1676 	case FILE_BESTRING16:
1677 	case FILE_LESTRING16:
1678 		if (m->str_flags != 0) {
1679 			file_magwarn(ms,
1680 			    "no modifiers allowed for 16-bit strings\n");
1681 			return -1;
1682 		}
1683 		break;
1684 	case FILE_STRING:
1685 	case FILE_PSTRING:
1686 		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1687 			file_magwarn(ms,
1688 			    "'/%c' only allowed on regex and search\n",
1689 			    CHAR_REGEX_OFFSET_START);
1690 			return -1;
1691 		}
1692 		break;
1693 	case FILE_SEARCH:
1694 		if (m->str_range == 0) {
1695 			file_magwarn(ms,
1696 			    "missing range; defaulting to %d\n",
1697                             STRING_DEFAULT_RANGE);
1698 			m->str_range = STRING_DEFAULT_RANGE;
1699 			return -1;
1700 		}
1701 		break;
1702 	case FILE_REGEX:
1703 		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1704 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1705 			    CHAR_COMPACT_WHITESPACE);
1706 			return -1;
1707 		}
1708 		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1709 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1710 			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1711 			return -1;
1712 		}
1713 		break;
1714 	default:
1715 		file_magwarn(ms, "coding error: m->type=%d\n",
1716 		    m->type);
1717 		return -1;
1718 	}
1719 	return 0;
1720 }
1721 
1722 file_private int
get_op(char c)1723 get_op(char c)
1724 {
1725 	switch (c) {
1726 	case '&':
1727 		return FILE_OPAND;
1728 	case '|':
1729 		return FILE_OPOR;
1730 	case '^':
1731 		return FILE_OPXOR;
1732 	case '+':
1733 		return FILE_OPADD;
1734 	case '-':
1735 		return FILE_OPMINUS;
1736 	case '*':
1737 		return FILE_OPMULTIPLY;
1738 	case '/':
1739 		return FILE_OPDIVIDE;
1740 	case '%':
1741 		return FILE_OPMODULO;
1742 	default:
1743 		return -1;
1744 	}
1745 }
1746 
1747 #ifdef ENABLE_CONDITIONALS
1748 file_private int
get_cond(const char * l,const char ** t)1749 get_cond(const char *l, const char **t)
1750 {
1751 	static const struct cond_tbl_s {
1752 		char name[8];
1753 		size_t len;
1754 		int cond;
1755 	} cond_tbl[] = {
1756 		{ "if",		2,	COND_IF },
1757 		{ "elif",	4,	COND_ELIF },
1758 		{ "else",	4,	COND_ELSE },
1759 		{ "",		0,	COND_NONE },
1760 	};
1761 	const struct cond_tbl_s *p;
1762 
1763 	for (p = cond_tbl; p->len; p++) {
1764 		if (strncmp(l, p->name, p->len) == 0 &&
1765 		    isspace(CAST(unsigned char, l[p->len]))) {
1766 			if (t)
1767 				*t = l + p->len;
1768 			break;
1769 		}
1770 	}
1771 	return p->cond;
1772 }
1773 
1774 file_private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1775 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1776 {
1777 	int last_cond;
1778 	last_cond = ms->c.li[cont_level].last_cond;
1779 
1780 	switch (cond) {
1781 	case COND_IF:
1782 		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1783 			if (ms->flags & MAGIC_CHECK)
1784 				file_magwarn(ms, "syntax error: `if'");
1785 			return -1;
1786 		}
1787 		last_cond = COND_IF;
1788 		break;
1789 
1790 	case COND_ELIF:
1791 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1792 			if (ms->flags & MAGIC_CHECK)
1793 				file_magwarn(ms, "syntax error: `elif'");
1794 			return -1;
1795 		}
1796 		last_cond = COND_ELIF;
1797 		break;
1798 
1799 	case COND_ELSE:
1800 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1801 			if (ms->flags & MAGIC_CHECK)
1802 				file_magwarn(ms, "syntax error: `else'");
1803 			return -1;
1804 		}
1805 		last_cond = COND_NONE;
1806 		break;
1807 
1808 	case COND_NONE:
1809 		last_cond = COND_NONE;
1810 		break;
1811 	}
1812 
1813 	ms->c.li[cont_level].last_cond = last_cond;
1814 	return 0;
1815 }
1816 #endif /* ENABLE_CONDITIONALS */
1817 
1818 file_private int
parse_indirect_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1819 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1820 {
1821 	const char *l = *lp;
1822 
1823 	while (!isspace(CAST(unsigned char, *++l)))
1824 		switch (*l) {
1825 		case CHAR_INDIRECT_RELATIVE:
1826 			m->str_flags |= INDIRECT_RELATIVE;
1827 			break;
1828 		default:
1829 			if (ms->flags & MAGIC_CHECK)
1830 				file_magwarn(ms, "indirect modifier `%c' "
1831 					"invalid", *l);
1832 			*lp = l;
1833 			return -1;
1834 		}
1835 	*lp = l;
1836 	return 0;
1837 }
1838 
1839 file_private void
parse_op_modifier(struct magic_set * ms,struct magic * m,const char ** lp,int op)1840 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1841     int op)
1842 {
1843 	const char *l = *lp;
1844 	char *t;
1845 	uint64_t val;
1846 
1847 	++l;
1848 	m->mask_op |= op;
1849 	val = CAST(uint64_t, strtoull(l, &t, 0));
1850 	l = t;
1851 	m->num_mask = file_signextend(ms, m, val);
1852 	eatsize(&l);
1853 	*lp = l;
1854 }
1855 
1856 file_private int
parse_string_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1857 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1858 {
1859 	const char *l = *lp;
1860 	char *t;
1861 	int have_range = 0;
1862 
1863 	while (!isspace(CAST(unsigned char, *++l))) {
1864 		switch (*l) {
1865 		case '0':  case '1':  case '2':
1866 		case '3':  case '4':  case '5':
1867 		case '6':  case '7':  case '8':
1868 		case '9':
1869 			if (have_range && (ms->flags & MAGIC_CHECK))
1870 				file_magwarn(ms, "multiple ranges");
1871 			have_range = 1;
1872 			m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1873 			if (m->str_range == 0)
1874 				file_magwarn(ms, "zero range");
1875 			l = t - 1;
1876 			break;
1877 		case CHAR_COMPACT_WHITESPACE:
1878 			m->str_flags |= STRING_COMPACT_WHITESPACE;
1879 			break;
1880 		case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1881 			m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1882 			break;
1883 		case CHAR_IGNORE_LOWERCASE:
1884 			m->str_flags |= STRING_IGNORE_LOWERCASE;
1885 			break;
1886 		case CHAR_IGNORE_UPPERCASE:
1887 			m->str_flags |= STRING_IGNORE_UPPERCASE;
1888 			break;
1889 		case CHAR_REGEX_OFFSET_START:
1890 			m->str_flags |= REGEX_OFFSET_START;
1891 			break;
1892 		case CHAR_BINTEST:
1893 			m->str_flags |= STRING_BINTEST;
1894 			break;
1895 		case CHAR_TEXTTEST:
1896 			m->str_flags |= STRING_TEXTTEST;
1897 			break;
1898 		case CHAR_TRIM:
1899 			m->str_flags |= STRING_TRIM;
1900 			break;
1901 		case CHAR_FULL_WORD:
1902 			m->str_flags |= STRING_FULL_WORD;
1903 			break;
1904 		case CHAR_PSTRING_1_LE:
1905 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1906 			if (m->type != FILE_PSTRING)
1907 				goto bad;
1908 			SET_LENGTH(PSTRING_1_LE);
1909 			break;
1910 		case CHAR_PSTRING_2_BE:
1911 			if (m->type != FILE_PSTRING)
1912 				goto bad;
1913 			SET_LENGTH(PSTRING_2_BE);
1914 			break;
1915 		case CHAR_PSTRING_2_LE:
1916 			if (m->type != FILE_PSTRING)
1917 				goto bad;
1918 			SET_LENGTH(PSTRING_2_LE);
1919 			break;
1920 		case CHAR_PSTRING_4_BE:
1921 			if (m->type != FILE_PSTRING)
1922 				goto bad;
1923 			SET_LENGTH(PSTRING_4_BE);
1924 			break;
1925 		case CHAR_PSTRING_4_LE:
1926 			switch (m->type) {
1927 			case FILE_PSTRING:
1928 			case FILE_REGEX:
1929 				break;
1930 			default:
1931 				goto bad;
1932 			}
1933 			SET_LENGTH(PSTRING_4_LE);
1934 			break;
1935 		case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1936 			if (m->type != FILE_PSTRING)
1937 				goto bad;
1938 			m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1939 			break;
1940 		default:
1941 		bad:
1942 			if (ms->flags & MAGIC_CHECK)
1943 				file_magwarn(ms, "string modifier `%c' "
1944 					"invalid", *l);
1945 			goto out;
1946 		}
1947 		/* allow multiple '/' for readability */
1948 		if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
1949 			l++;
1950 	}
1951 	if (string_modifier_check(ms, m) == -1)
1952 		goto out;
1953 	*lp = l;
1954 	return 0;
1955 out:
1956 	*lp = l;
1957 	return -1;
1958 }
1959 
1960 /*
1961  * parse one line from magic file, put into magic[index++] if valid
1962  */
1963 file_private int
parse(struct magic_set * ms,struct magic_entry * me,const char * line,size_t lineno,int action)1964 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1965     size_t lineno, int action)
1966 {
1967 #ifdef ENABLE_CONDITIONALS
1968 	static uint32_t last_cont_level = 0;
1969 #endif
1970 	size_t i;
1971 	struct magic *m;
1972 	const char *l = line;
1973 	char *t;
1974 	int op;
1975 	uint32_t cont_level;
1976 	int32_t diff;
1977 
1978 	cont_level = 0;
1979 
1980 	/*
1981 	 * Parse the offset.
1982 	 */
1983 	while (*l == '>') {
1984 		++l;		/* step over */
1985 		cont_level++;
1986 	}
1987 #ifdef ENABLE_CONDITIONALS
1988 	if (cont_level == 0 || cont_level > last_cont_level)
1989 		if (file_check_mem(ms, cont_level) == -1)
1990 			return -1;
1991 	last_cont_level = cont_level;
1992 #endif
1993 	if (cont_level != 0) {
1994 		if (me->mp == NULL) {
1995 			file_magerror(ms, "No current entry for continuation");
1996 			return -1;
1997 		}
1998 		if (me->cont_count == 0) {
1999 			file_magerror(ms, "Continuations present with 0 count");
2000 			return -1;
2001 		}
2002 		m = &me->mp[me->cont_count - 1];
2003 		diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
2004 		if (diff > 1)
2005 			file_magwarn(ms, "New continuation level %u is more "
2006 			    "than one larger than current level %u", cont_level,
2007 			    m->cont_level);
2008 		if (me->cont_count == me->max_count) {
2009 			struct magic *nm;
2010 			size_t cnt = me->max_count + ALLOC_CHUNK;
2011 			if ((nm = CAST(struct magic *, erealloc(me->mp,
2012 			    sizeof(*nm) * cnt))) == NULL) {
2013 				file_oomem(ms, sizeof(*nm) * cnt);
2014 				return -1;
2015 			}
2016 			me->mp = nm;
2017 			me->max_count = CAST(uint32_t, cnt);
2018 		}
2019 		m = &me->mp[me->cont_count++];
2020 		(void)memset(m, 0, sizeof(*m));
2021 		m->cont_level = cont_level;
2022 	} else {
2023 		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
2024 		if (me->mp != NULL)
2025 			return 1;
2026 		if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
2027 			file_oomem(ms, len);
2028 			return -1;
2029 		}
2030 		me->mp = m;
2031 		me->max_count = ALLOC_CHUNK;
2032 		(void)memset(m, 0, sizeof(*m));
2033 		m->factor_op = FILE_FACTOR_OP_NONE;
2034 		m->cont_level = 0;
2035 		me->cont_count = 1;
2036 	}
2037 	m->lineno = CAST(uint32_t, lineno);
2038 
2039 	if (*l == '&') {  /* m->cont_level == 0 checked below. */
2040                 ++l;            /* step over */
2041                 m->flag |= OFFADD;
2042         }
2043 	if (*l == '(') {
2044 		++l;		/* step over */
2045 		m->flag |= INDIR;
2046 		if (m->flag & OFFADD)
2047 			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
2048 
2049 		if (*l == '&') {  /* m->cont_level == 0 checked below */
2050 			++l;            /* step over */
2051 			m->flag |= OFFADD;
2052 		}
2053 	}
2054 	/* Indirect offsets are not valid at level 0. */
2055 	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
2056 		if (ms->flags & MAGIC_CHECK)
2057 			file_magwarn(ms, "relative offset at level 0");
2058 		return -1;
2059 	}
2060 
2061 	/* get offset, then skip over it */
2062 	if (*l == '-') {
2063 		++l;            /* step over */
2064 		m->flag |= OFFNEGATIVE;
2065 	}
2066 	m->offset = CAST(int32_t, strtol(l, &t, 0));
2067         if (l == t) {
2068 		if (ms->flags & MAGIC_CHECK)
2069 			file_magwarn(ms, "offset `%s' invalid", l);
2070 		return -1;
2071 	}
2072 
2073         l = t;
2074 
2075 	if (m->flag & INDIR) {
2076 		m->in_type = FILE_LONG;
2077 		m->in_offset = 0;
2078 		m->in_op = 0;
2079 		/*
2080 		 * read [.,lbs][+-]nnnnn)
2081 		 */
2082 		if (*l == '.' || *l == ',') {
2083 			if (*l == ',')
2084 				m->in_op |= FILE_OPSIGNED;
2085 			l++;
2086 			switch (*l) {
2087 			case 'l':
2088 				m->in_type = FILE_LELONG;
2089 				break;
2090 			case 'L':
2091 				m->in_type = FILE_BELONG;
2092 				break;
2093 			case 'm':
2094 				m->in_type = FILE_MELONG;
2095 				break;
2096 			case 'h':
2097 			case 's':
2098 				m->in_type = FILE_LESHORT;
2099 				break;
2100 			case 'H':
2101 			case 'S':
2102 				m->in_type = FILE_BESHORT;
2103 				break;
2104 			case 'c':
2105 			case 'b':
2106 			case 'C':
2107 			case 'B':
2108 				m->in_type = FILE_BYTE;
2109 				break;
2110 			case 'e':
2111 			case 'f':
2112 			case 'g':
2113 				m->in_type = FILE_LEDOUBLE;
2114 				break;
2115 			case 'E':
2116 			case 'F':
2117 			case 'G':
2118 				m->in_type = FILE_BEDOUBLE;
2119 				break;
2120 			case 'i':
2121 				m->in_type = FILE_LEID3;
2122 				break;
2123 			case 'I':
2124 				m->in_type = FILE_BEID3;
2125 				break;
2126 			case 'o':
2127 				m->in_type = FILE_OCTAL;
2128 				break;
2129 			case 'q':
2130 				m->in_type = FILE_LEQUAD;
2131 				break;
2132 			case 'Q':
2133 				m->in_type = FILE_BEQUAD;
2134 				break;
2135 			default:
2136 				if (ms->flags & MAGIC_CHECK)
2137 					file_magwarn(ms,
2138 					    "indirect offset type `%c' invalid",
2139 					    *l);
2140 				return -1;
2141 			}
2142 			l++;
2143 		}
2144 
2145 		if (*l == '~') {
2146 			m->in_op |= FILE_OPINVERSE;
2147 			l++;
2148 		}
2149 		if ((op = get_op(*l)) != -1) {
2150 			m->in_op |= op;
2151 			l++;
2152 		}
2153 		if (*l == '(') {
2154 			m->in_op |= FILE_OPINDIRECT;
2155 			l++;
2156 		}
2157 		if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
2158 			m->in_offset = CAST(int32_t, strtol(l, &t, 0));
2159 			if (l == t) {
2160 				if (ms->flags & MAGIC_CHECK)
2161 					file_magwarn(ms,
2162 					    "in_offset `%s' invalid", l);
2163 				return -1;
2164 			}
2165 			l = t;
2166 		}
2167 		if (*l++ != ')' ||
2168 		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2169 			if (ms->flags & MAGIC_CHECK)
2170 				file_magwarn(ms,
2171 				    "missing ')' in indirect offset");
2172 			return -1;
2173 		}
2174 	}
2175 	EATAB;
2176 
2177 #ifdef ENABLE_CONDITIONALS
2178 	m->cond = get_cond(l, &l);
2179 	if (check_cond(ms, m->cond, cont_level) == -1)
2180 		return -1;
2181 
2182 	EATAB;
2183 #endif
2184 
2185 	/*
2186 	 * Parse the type.
2187 	 */
2188 	if (*l == 'u') {
2189 		/*
2190 		 * Try it as a keyword type prefixed by "u"; match what
2191 		 * follows the "u".  If that fails, try it as an SUS
2192 		 * integer type.
2193 		 */
2194 		m->type = get_type(type_tbl, l + 1, &l);
2195 		if (m->type == FILE_INVALID) {
2196 			/*
2197 			 * Not a keyword type; parse it as an SUS type,
2198 			 * 'u' possibly followed by a number or C/S/L.
2199 			 */
2200 			m->type = get_standard_integer_type(l, &l);
2201 		}
2202 		/* It's unsigned. */
2203 		if (m->type != FILE_INVALID)
2204 			m->flag |= UNSIGNED;
2205 	} else {
2206 		/*
2207 		 * Try it as a keyword type.  If that fails, try it as
2208 		 * an SUS integer type if it begins with "d" or as an
2209 		 * SUS string type if it begins with "s".  In any case,
2210 		 * it's not unsigned.
2211 		 */
2212 		m->type = get_type(type_tbl, l, &l);
2213 		if (m->type == FILE_INVALID) {
2214 			/*
2215 			 * Not a keyword type; parse it as an SUS type,
2216 			 * either 'd' possibly followed by a number or
2217 			 * C/S/L, or just 's'.
2218 			 */
2219 			if (*l == 'd')
2220 				m->type = get_standard_integer_type(l, &l);
2221 			else if (*l == 's'
2222 			    && !isalpha(CAST(unsigned char, l[1]))) {
2223 				m->type = FILE_STRING;
2224 				++l;
2225 			}
2226 		}
2227 	}
2228 
2229 	if (m->type == FILE_INVALID) {
2230 		/* Not found - try it as a special keyword. */
2231 		m->type = get_type(special_tbl, l, &l);
2232 	}
2233 
2234 	if (m->type == FILE_INVALID) {
2235 		if (ms->flags & MAGIC_CHECK)
2236 			file_magwarn(ms, "type `%s' invalid", l);
2237 		return -1;
2238 	}
2239 
2240 	if (m->type == FILE_NAME && cont_level != 0) {
2241 		if (ms->flags & MAGIC_CHECK)
2242 			file_magwarn(ms, "`name%s' entries can only be "
2243 			    "declared at top level", l);
2244 		return -1;
2245 	}
2246 
2247 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2248 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2249 
2250 	m->mask_op = 0;
2251 	if (*l == '~') {
2252 		if (!IS_LIBMAGIC_STRING(m->type))
2253 			m->mask_op |= FILE_OPINVERSE;
2254 		else if (ms->flags & MAGIC_CHECK)
2255 			file_magwarn(ms, "'~' invalid for string types");
2256 		++l;
2257 	}
2258 	m->str_range = 0;
2259 	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2260 	if ((op = get_op(*l)) != -1) {
2261 		if (IS_LIBMAGIC_STRING(m->type)) {
2262 			int r;
2263 
2264 			if (op != FILE_OPDIVIDE) {
2265 				if (ms->flags & MAGIC_CHECK)
2266 					file_magwarn(ms,
2267 					    "invalid string/indirect op: "
2268 					    "`%c'", *t);
2269 				return -1;
2270 			}
2271 
2272 			if (m->type == FILE_INDIRECT)
2273 				r = parse_indirect_modifier(ms, m, &l);
2274 			else
2275 				r = parse_string_modifier(ms, m, &l);
2276 			if (r == -1)
2277 				return -1;
2278 		} else
2279 			parse_op_modifier(ms, m, &l, op);
2280 	}
2281 
2282 	/*
2283 	 * We used to set mask to all 1's here, instead let's just not do
2284 	 * anything if mask = 0 (unless you have a better idea)
2285 	 */
2286 	EATAB;
2287 
2288 	switch (*l) {
2289 	case '>':
2290 	case '<':
2291   		m->reln = *l;
2292   		++l;
2293 		if (*l == '=') {
2294 			if (ms->flags & MAGIC_CHECK) {
2295 				file_magwarn(ms, "%c= not supported",
2296 				    m->reln);
2297 				return -1;
2298 			}
2299 		   ++l;
2300 		}
2301 		break;
2302 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
2303 	case '&':
2304 	case '^':
2305 	case '=':
2306   		m->reln = *l;
2307   		++l;
2308 		if (*l == '=') {
2309 		   /* HP compat: ignore &= etc. */
2310 		   ++l;
2311 		}
2312 		break;
2313 	case '!':
2314 		m->reln = *l;
2315 		++l;
2316 		break;
2317 	default:
2318   		m->reln = '=';	/* the default relation */
2319 		if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
2320 		    isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2321 			m->reln = *l;
2322 			++l;
2323 		}
2324 		break;
2325 	}
2326 	/*
2327 	 * Grab the value part, except for an 'x' reln.
2328 	 */
2329 	if (m->reln != 'x' && getvalue(ms, m, &l, action))
2330 		return -1;
2331 
2332 	/*
2333 	 * TODO finish this macro and start using it!
2334 	 * #define offsetcheck {if (offset > ms->bytes_max -1)
2335 	 *	magwarn("offset too big"); }
2336 	 */
2337 
2338 	/*
2339 	 * Now get last part - the description
2340 	 */
2341 	EATAB;
2342 	if (l[0] == '\b') {
2343 		++l;
2344 		m->flag |= NOSPACE;
2345 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
2346 		++l;
2347 		++l;
2348 		m->flag |= NOSPACE;
2349 	}
2350 	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2351 		continue;
2352 	if (i == sizeof(m->desc)) {
2353 		m->desc[sizeof(m->desc) - 1] = '\0';
2354 		if (ms->flags & MAGIC_CHECK)
2355 			file_magwarn(ms, "description `%s' truncated", m->desc);
2356 	}
2357 
2358         /*
2359 	 * We only do this check while compiling, or if any of the magic
2360 	 * files were not compiled.
2361          */
2362         if (ms->flags & MAGIC_CHECK) {
2363 		if (check_format(ms, m) == -1)
2364 			return -1;
2365 	}
2366 #ifndef COMPILE_ONLY
2367 	if (action == FILE_CHECK) {
2368 		file_mdump(m);
2369 	}
2370 #endif
2371 	m->mimetype[0] = '\0';		/* initialise MIME type to none */
2372 	return 0;
2373 }
2374 
2375 /*
2376  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2377  * if valid
2378  */
2379 /*ARGSUSED*/
2380 file_private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2381 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
2382     size_t len __attribute__((__unused__)))
2383 {
2384 	const char *l = line;
2385 	char *el;
2386 	unsigned long factor;
2387 	char sbuf[512];
2388 	struct magic *m = &me->mp[0];
2389 
2390 	if (m->factor_op != FILE_FACTOR_OP_NONE) {
2391 		file_magwarn(ms,
2392 		    "Current entry already has a strength type: %c %d",
2393 		    m->factor_op, m->factor);
2394 		return -1;
2395 	}
2396 	if (m->type == FILE_NAME) {
2397 		file_magwarn(ms, "%s: Strength setting is not supported in "
2398 		    "\"name\" magic entries",
2399 		    file_printable(ms, sbuf, sizeof(sbuf), m->value.s,
2400 		    sizeof(m->value.s)));
2401 		return -1;
2402 	}
2403 	EATAB;
2404 	switch (*l) {
2405 	case FILE_FACTOR_OP_NONE:
2406 		break;
2407 	case FILE_FACTOR_OP_PLUS:
2408 	case FILE_FACTOR_OP_MINUS:
2409 	case FILE_FACTOR_OP_TIMES:
2410 	case FILE_FACTOR_OP_DIV:
2411 		m->factor_op = *l++;
2412 		break;
2413 	default:
2414 		file_magwarn(ms, "Unknown factor op `%c'", *l);
2415 		return -1;
2416 	}
2417 	EATAB;
2418 	factor = strtoul(l, &el, 0);
2419 	if (factor > 255) {
2420 		file_magwarn(ms, "Too large factor `%lu'", factor);
2421 		goto out;
2422 	}
2423 	if (*el && !isspace(CAST(unsigned char, *el))) {
2424 		file_magwarn(ms, "Bad factor `%s'", l);
2425 		goto out;
2426 	}
2427 	m->factor = CAST(uint8_t, factor);
2428 	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2429 		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2430 		    m->factor_op, m->factor);
2431 		goto out;
2432 	}
2433 	return 0;
2434 out:
2435 	m->factor_op = FILE_FACTOR_OP_NONE;
2436 	m->factor = 0;
2437 	return -1;
2438 }
2439 
2440 file_private int
goodchar(unsigned char x,const char * extra)2441 goodchar(unsigned char x, const char *extra)
2442 {
2443 	return (isascii(x) && isalnum(x)) || strchr(extra, x);
2444 }
2445 
2446 file_private int
parse_extra(struct magic_set * ms,struct magic_entry * me,const char * line,size_t llen,zend_off_t off,size_t len,const char * name,const char * extra,int nt)2447 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2448     size_t llen, zend_off_t off, size_t len, const char *name, const char *extra,
2449     int nt)
2450 {
2451 	size_t i;
2452 	const char *l = line;
2453 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2454 	char *buf = CAST(char *, CAST(void *, m)) + off;
2455 
2456 	if (buf[0] != '\0') {
2457 		len = nt ? strlen(buf) : len;
2458 		file_magwarn(ms, "Current entry already has a %s type "
2459 		    "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2460 		return -1;
2461 	}
2462 
2463 	if (*m->desc == '\0') {
2464 		file_magwarn(ms, "Current entry does not yet have a "
2465 		    "description for adding a %s type", name);
2466 		return -1;
2467 	}
2468 
2469 	EATAB;
2470 	for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
2471 	    buf[i++] = *l++)
2472 		continue;
2473 
2474 	if (i == len && *l) {
2475 		if (nt)
2476 			buf[len - 1] = '\0';
2477 		if (ms->flags & MAGIC_CHECK)
2478 			file_magwarn(ms, "%s type `%s' truncated %"
2479 			    SIZE_T_FORMAT "u", name, line, i);
2480 	} else {
2481 		if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2482 			file_magwarn(ms, "%s type `%s' has bad char '%c'",
2483 			    name, line, *l);
2484 		if (nt)
2485 			buf[i] = '\0';
2486 	}
2487 
2488 	if (i > 0)
2489 		return 0;
2490 
2491 	file_magerror(ms, "Bad magic entry '%s'", line);
2492 	return -1;
2493 }
2494 
2495 /*
2496  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2497  * magic[index - 1]
2498  */
2499 file_private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2500 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
2501     size_t len)
2502 {
2503 	return parse_extra(ms, me, line, len,
2504 	    CAST(off_t, offsetof(struct magic, apple)),
2505 	    sizeof(me->mp[0].apple), "APPLE", "!+-./?", 0);
2506 }
2507 
2508 /*
2509  * Parse a comma-separated list of extensions
2510  */
2511 file_private int
parse_ext(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2512 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
2513     size_t len)
2514 {
2515 	return parse_extra(ms, me, line, len,
2516 	    CAST(off_t, offsetof(struct magic, ext)),
2517 	    sizeof(me->mp[0].ext), "EXTENSION", ",!+-/@?_$&~", 0);
2518 	    /* & for b&w */
2519 	    /* ~ for journal~ */
2520 }
2521 
2522 /*
2523  * parse a MIME annotation line from magic file, put into magic[index - 1]
2524  * if valid
2525  */
2526 file_private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2527 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
2528     size_t len)
2529 {
2530 	return parse_extra(ms, me, line, len,
2531 	    CAST(off_t, offsetof(struct magic, mimetype)),
2532 	    sizeof(me->mp[0].mimetype), "MIME", "+-/.$?:{}", 1);
2533 }
2534 
2535 file_private int
check_format_type(const char * ptr,int type,const char ** estr)2536 check_format_type(const char *ptr, int type, const char **estr)
2537 {
2538 	int quad = 0, h;
2539 	size_t len, cnt;
2540 	if (*ptr == '\0') {
2541 		/* Missing format string; bad */
2542 		*estr = "missing format spec";
2543 		return -1;
2544 	}
2545 
2546 	switch (file_formats[type]) {
2547 	case FILE_FMT_QUAD:
2548 		quad = 1;
2549 		/*FALLTHROUGH*/
2550 	case FILE_FMT_NUM:
2551 		if (quad == 0) {
2552 			switch (type) {
2553 			case FILE_BYTE:
2554 				h = 2;
2555 				break;
2556 			case FILE_SHORT:
2557 			case FILE_BESHORT:
2558 			case FILE_LESHORT:
2559 				h = 1;
2560 				break;
2561 			case FILE_LONG:
2562 			case FILE_BELONG:
2563 			case FILE_LELONG:
2564 			case FILE_MELONG:
2565 			case FILE_LEID3:
2566 			case FILE_BEID3:
2567 			case FILE_INDIRECT:
2568 				h = 0;
2569 				break;
2570 			default:
2571 				fprintf(stderr, "Bad number format %d", type);
2572 				abort();
2573 			}
2574 		} else
2575 			h = 0;
2576 		while (*ptr && strchr("-.#", *ptr) != NULL)
2577 			ptr++;
2578 #define CHECKLEN() do { \
2579 	for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
2580 		len = len * 10 + (*ptr - '0'); \
2581 	if (cnt > 5 || len > 1024) \
2582 		goto toolong; \
2583 } while (/*CONSTCOND*/0)
2584 
2585 		CHECKLEN();
2586 		if (*ptr == '.')
2587 			ptr++;
2588 		CHECKLEN();
2589 		if (quad) {
2590 			if (*ptr++ != 'l')
2591 				goto invalid;
2592 			if (*ptr++ != 'l')
2593 				goto invalid;
2594 		}
2595 
2596 		switch (*ptr++) {
2597 #ifdef STRICT_FORMAT 	/* "long" formats are int formats for us */
2598 		/* so don't accept the 'l' modifier */
2599 		case 'l':
2600 			switch (*ptr++) {
2601 			case 'i':
2602 			case 'd':
2603 			case 'u':
2604 			case 'o':
2605 			case 'x':
2606 			case 'X':
2607 				if (h == 0)
2608 					return 0;
2609 				/*FALLTHROUGH*/
2610 			default:
2611 				goto invalid;
2612 			}
2613 
2614 		/*
2615 		 * Don't accept h and hh modifiers. They make writing
2616 		 * magic entries more complicated, for very little benefit
2617 		 */
2618 		case 'h':
2619 			if (h-- <= 0)
2620 				goto invalid;
2621 			switch (*ptr++) {
2622 			case 'h':
2623 				if (h-- <= 0)
2624 					goto invalid;
2625 				switch (*ptr++) {
2626 				case 'i':
2627 				case 'd':
2628 				case 'u':
2629 				case 'o':
2630 				case 'x':
2631 				case 'X':
2632 					return 0;
2633 				default:
2634 					goto invalid;
2635 				}
2636 			case 'i':
2637 			case 'd':
2638 			case 'u':
2639 			case 'o':
2640 			case 'x':
2641 			case 'X':
2642 				if (h == 0)
2643 					return 0;
2644 				/*FALLTHROUGH*/
2645 			default:
2646 				goto invalid;
2647 			}
2648 #endif
2649 		case 'c':
2650 			if (h == 2)
2651 				return 0;
2652 			goto invalid;
2653 		case 'i':
2654 		case 'd':
2655 		case 'u':
2656 		case 'o':
2657 		case 'x':
2658 		case 'X':
2659 #ifdef STRICT_FORMAT
2660 			if (h == 0)
2661 				return 0;
2662 			/*FALLTHROUGH*/
2663 #else
2664 			return 0;
2665 #endif
2666 		default:
2667 			goto invalid;
2668 		}
2669 
2670 	case FILE_FMT_FLOAT:
2671 	case FILE_FMT_DOUBLE:
2672 		if (*ptr == '-')
2673 			ptr++;
2674 		if (*ptr == '.')
2675 			ptr++;
2676 		CHECKLEN();
2677 		if (*ptr == '.')
2678 			ptr++;
2679 		CHECKLEN();
2680 		switch (*ptr++) {
2681 		case 'e':
2682 		case 'E':
2683 		case 'f':
2684 		case 'F':
2685 		case 'g':
2686 		case 'G':
2687 			return 0;
2688 
2689 		default:
2690 			goto invalid;
2691 		}
2692 
2693 
2694 	case FILE_FMT_STR:
2695 		if (*ptr == '-')
2696 			ptr++;
2697 		while (isdigit(CAST(unsigned char, *ptr)))
2698 			ptr++;
2699 		if (*ptr == '.') {
2700 			ptr++;
2701 			while (isdigit(CAST(unsigned char , *ptr)))
2702 				ptr++;
2703 		}
2704 
2705 		switch (*ptr++) {
2706 		case 's':
2707 			return 0;
2708 		default:
2709 			goto invalid;
2710 		}
2711 
2712 	default:
2713 		/* internal error */
2714 		fprintf(stderr, "Bad file format %d", type);
2715 		abort();
2716 	}
2717 invalid:
2718 	*estr = "not valid";
2719 	return -1;
2720 toolong:
2721 	*estr = "too long";
2722 	return -1;
2723 }
2724 
2725 /*
2726  * Check that the optional printf format in description matches
2727  * the type of the magic.
2728  */
2729 file_private int
check_format(struct magic_set * ms,struct magic * m)2730 check_format(struct magic_set *ms, struct magic *m)
2731 {
2732 	char *ptr;
2733 	const char *estr;
2734 
2735 	for (ptr = m->desc; *ptr; ptr++)
2736 		if (*ptr == '%')
2737 			break;
2738 	if (*ptr == '\0') {
2739 		/* No format string; ok */
2740 		return 1;
2741 	}
2742 
2743 	assert(file_nformats == file_nnames);
2744 
2745 	if (m->type >= file_nformats) {
2746 		file_magwarn(ms, "Internal error inconsistency between "
2747 		    "m->type and format strings");
2748 		return -1;
2749 	}
2750 	if (file_formats[m->type] == FILE_FMT_NONE) {
2751 		file_magwarn(ms, "No format string for `%s' with description "
2752 		    "`%s'", m->desc, file_names[m->type]);
2753 		return -1;
2754 	}
2755 
2756 	ptr++;
2757 	if (check_format_type(ptr, m->type, &estr) == -1) {
2758 		/*
2759 		 * TODO: this error message is unhelpful if the format
2760 		 * string is not one character long
2761 		 */
2762 		file_magwarn(ms, "Printf format is %s for type "
2763 		    "`%s' in description `%s'", estr,
2764 		    file_names[m->type], m->desc);
2765 		return -1;
2766 	}
2767 
2768 	for (; *ptr; ptr++) {
2769 		if (*ptr == '%') {
2770 			file_magwarn(ms,
2771 			    "Too many format strings (should have at most one) "
2772 			    "for `%s' with description `%s'",
2773 			    file_names[m->type], m->desc);
2774 			return -1;
2775 		}
2776 	}
2777 	return 0;
2778 }
2779 
2780 /*
2781  * Read a numeric value from a pointer, into the value union of a magic
2782  * pointer, according to the magic type.  Update the string pointer to point
2783  * just after the number read.  Return 0 for success, non-zero for failure.
2784  */
2785 file_private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2786 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2787 {
2788 	char *ep;
2789 	uint64_t ull;
2790 	int y;
2791 
2792 	switch (m->type) {
2793 	case FILE_BESTRING16:
2794 	case FILE_LESTRING16:
2795 	case FILE_STRING:
2796 	case FILE_PSTRING:
2797 	case FILE_REGEX:
2798 	case FILE_SEARCH:
2799 	case FILE_NAME:
2800 	case FILE_USE:
2801 	case FILE_DER:
2802 	case FILE_OCTAL:
2803 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2804 		if (*p == NULL) {
2805 			if (ms->flags & MAGIC_CHECK)
2806 				file_magwarn(ms, "cannot get string from `%s'",
2807 				    m->value.s);
2808 			return -1;
2809 		}
2810 		if (m->type == FILE_REGEX) {
2811 			zend_string *pattern;
2812 			int options = 0;
2813 			pcre_cache_entry *pce;
2814 
2815 			pattern = convert_libmagic_pattern(m->value.s, strlen(m->value.s), options);
2816 
2817 			if ((pce = pcre_get_compiled_regex_cache(pattern)) == NULL) {
2818 				zend_string_release(pattern);
2819 				return -1;
2820 			}
2821 			zend_string_release(pattern);
2822 
2823 			return 0;
2824 		}
2825 		return 0;
2826 	default:
2827 		if (m->reln == 'x')
2828 			return 0;
2829 		break;
2830 	}
2831 
2832 	switch (m->type) {
2833 	case FILE_FLOAT:
2834 	case FILE_BEFLOAT:
2835 	case FILE_LEFLOAT:
2836 		errno = 0;
2837 #ifdef HAVE_STRTOF
2838 		m->value.f = strtof(*p, &ep);
2839 #else
2840 		m->value.f = (float)strtod(*p, &ep);
2841 #endif
2842 		if (errno == 0)
2843 			*p = ep;
2844 		return 0;
2845 	case FILE_DOUBLE:
2846 	case FILE_BEDOUBLE:
2847 	case FILE_LEDOUBLE:
2848 		errno = 0;
2849 		m->value.d = strtod(*p, &ep);
2850 		if (errno == 0)
2851 			*p = ep;
2852 		return 0;
2853 	case FILE_GUID:
2854 		if (file_parse_guid(*p, m->value.guid) == -1)
2855 			return -1;
2856 		*p += FILE_GUID_SIZE - 1;
2857 		return 0;
2858 	default:
2859 		errno = 0;
2860 		ull = CAST(uint64_t, strtoull(*p, &ep, 0));
2861 		m->value.q = file_signextend(ms, m, ull);
2862 		if (*p == ep) {
2863 			file_magwarn(ms, "Unparsable number `%s'", *p);
2864 			return -1;
2865 		} else {
2866 			size_t ts = typesize(m->type);
2867 			uint64_t x;
2868 			const char *q;
2869 
2870 			if (ts == FILE_BADSIZE) {
2871 				file_magwarn(ms,
2872 				    "Expected numeric type got `%s'",
2873 				    type_tbl[m->type].name);
2874 				return -1;
2875 			}
2876 			for (q = *p; isspace(CAST(unsigned char, *q)); q++)
2877 				continue;
2878 			if (*q == '-' && ull != UINT64_MAX)
2879 				ull = -CAST(int64_t, ull);
2880 			switch (ts) {
2881 			case 1:
2882 				x = CAST(uint64_t, ull & ~0xffULL);
2883 				y = (x & ~0xffULL) != ~0xffULL;
2884 				break;
2885 			case 2:
2886 				x = CAST(uint64_t, ull & ~0xffffULL);
2887 				y = (x & ~0xffffULL) != ~0xffffULL;
2888 				break;
2889 			case 4:
2890 				x = CAST(uint64_t, ull & ~0xffffffffULL);
2891 				y = (x & ~0xffffffffULL) != ~0xffffffffULL;
2892 				break;
2893 			case 8:
2894 				x = 0;
2895 				y = 0;
2896 				break;
2897 			default:
2898 				fprintf(stderr, "Bad width %zu", ts);
2899 				abort();
2900 			}
2901 			if (x && y) {
2902 				file_magwarn(ms, "Overflow for numeric"
2903 				    " type `%s' value %#" PRIx64,
2904 				    type_tbl[m->type].name, ull);
2905 				return -1;
2906 			}
2907 		}
2908 		if (errno == 0) {
2909 			*p = ep;
2910 			eatsize(p);
2911 		}
2912 		return 0;
2913 	}
2914 }
2915 
2916 /*
2917  * Convert a string containing C character escapes.  Stop at an unescaped
2918  * space or tab.
2919  * Copy the converted version to "m->value.s", and the length in m->vallen.
2920  * Return updated scan pointer as function result. Warn if set.
2921  */
2922 file_private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2923 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2924 {
2925 	const char *origs = s;
2926 	char	*p = m->value.s;
2927 	size_t  plen = sizeof(m->value.s);
2928 	char 	*origp = p;
2929 	char	*pmax = p + plen - 1;
2930 	int	c;
2931 	int	val;
2932 	size_t	bracket_nesting = 0;
2933 
2934 	while ((c = *s++) != '\0') {
2935 		if (isspace(CAST(unsigned char, c)))
2936 			break;
2937 		if (p >= pmax) {
2938 			file_error(ms, 0, "string too long: `%s'", origs);
2939 			return NULL;
2940 		}
2941 		if (c != '\\') {
2942 		    if (c == '[') {
2943 			    bracket_nesting++;
2944 		    }
2945 		    if (c == ']' && bracket_nesting > 0) {
2946 			    bracket_nesting--;
2947 		    }
2948 		    *p++ = CAST(char, c);
2949 		    continue;
2950 		}
2951 		switch(c = *s++) {
2952 
2953 		case '\0':
2954 			if (warn)
2955 				file_magwarn(ms, "incomplete escape");
2956 			s--;
2957 			goto out;
2958 		case '.':
2959 			if (m->type == FILE_REGEX &&
2960 			    bracket_nesting == 0 && warn) {
2961 				file_magwarn(ms, "escaped dot ('.') found, "
2962 				    "use \\\\. instead");
2963 			}
2964 			warn = 0; /* already did */
2965 			/*FALLTHROUGH*/
2966 		case '\t':
2967 			if (warn) {
2968 				file_magwarn(ms,
2969 				    "escaped tab found, use \\\\t instead");
2970 				warn = 0;	/* already did */
2971 			}
2972 			/*FALLTHROUGH*/
2973 		default:
2974 			if (warn) {
2975 				if (isprint(CAST(unsigned char, c))) {
2976 					/* Allow escaping of
2977 					 * ``relations'' */
2978 					if (strchr("<>&^=!", c) == NULL
2979 					    && (m->type != FILE_REGEX ||
2980 					    strchr("[]().*?^$|{}", c)
2981 					    == NULL)) {
2982 						file_magwarn(ms, "no "
2983 						    "need to escape "
2984 						    "`%c'", c);
2985 					}
2986 				} else {
2987 					file_magwarn(ms,
2988 					    "unknown escape sequence: "
2989 					    "\\%03o", c);
2990 				}
2991 			}
2992 			/*FALLTHROUGH*/
2993 		/* space, perhaps force people to use \040? */
2994 		case ' ':
2995 #if 0
2996 		/*
2997 		 * Other things people escape, but shouldn't need to,
2998 		 * so we disallow them
2999 		 */
3000 		case '\'':
3001 		case '"':
3002 		case '?':
3003 #endif
3004 		/* Relations */
3005 		case '>':
3006 		case '<':
3007 		case '&':
3008 		case '^':
3009 		case '=':
3010 		case '!':
3011 		/* and backslash itself */
3012 		case '\\':
3013 			*p++ = CAST(char, c);
3014 			break;
3015 
3016 		case 'a':
3017 			*p++ = '\a';
3018 			break;
3019 
3020 		case 'b':
3021 			*p++ = '\b';
3022 			break;
3023 
3024 		case 'f':
3025 			*p++ = '\f';
3026 			break;
3027 
3028 		case 'n':
3029 			*p++ = '\n';
3030 			break;
3031 
3032 		case 'r':
3033 			*p++ = '\r';
3034 			break;
3035 
3036 		case 't':
3037 			*p++ = '\t';
3038 			break;
3039 
3040 		case 'v':
3041 			*p++ = '\v';
3042 			break;
3043 
3044 		/* \ and up to 3 octal digits */
3045 		case '0':
3046 		case '1':
3047 		case '2':
3048 		case '3':
3049 		case '4':
3050 		case '5':
3051 		case '6':
3052 		case '7':
3053 			val = c - '0';
3054 			c = *s++;  /* try for 2 */
3055 			if (c >= '0' && c <= '7') {
3056 				val = (val << 3) | (c - '0');
3057 				c = *s++;  /* try for 3 */
3058 				if (c >= '0' && c <= '7')
3059 					val = (val << 3) | (c-'0');
3060 				else
3061 					--s;
3062 			}
3063 			else
3064 				--s;
3065 			*p++ = CAST(char, val);
3066 			break;
3067 
3068 		/* \x and up to 2 hex digits */
3069 		case 'x':
3070 			val = 'x';	/* Default if no digits */
3071 			c = hextoint(*s++);	/* Get next char */
3072 			if (c >= 0) {
3073 				val = c;
3074 				c = hextoint(*s++);
3075 				if (c >= 0)
3076 					val = (val << 4) + c;
3077 				else
3078 					--s;
3079 			} else
3080 				--s;
3081 			*p++ = CAST(char, val);
3082 			break;
3083 		}
3084 	}
3085 	--s;
3086 out:
3087 	*p = '\0';
3088 	m->vallen = CAST(unsigned char, (p - origp));
3089 	if (m->type == FILE_PSTRING) {
3090 		size_t l =  file_pstring_length_size(ms, m);
3091 		if (l == FILE_BADSIZE)
3092 			return NULL;
3093 		m->vallen += CAST(unsigned char, l);
3094 	}
3095 	return s;
3096 }
3097 
3098 
3099 /* Single hex char to int; -1 if not a hex char. */
3100 file_private int
hextoint(int c)3101 hextoint(int c)
3102 {
3103 	if (!isascii(CAST(unsigned char, c)))
3104 		return -1;
3105 	if (isdigit(CAST(unsigned char, c)))
3106 		return c - '0';
3107 	if ((c >= 'a') && (c <= 'f'))
3108 		return c + 10 - 'a';
3109 	if (( c>= 'A') && (c <= 'F'))
3110 		return c + 10 - 'A';
3111 	return -1;
3112 }
3113 
3114 
3115 /*
3116  * Print a string containing C character escapes.
3117  */
3118 file_protected void
file_showstr(FILE * fp,const char * s,size_t len)3119 file_showstr(FILE *fp, const char *s, size_t len)
3120 {
3121 	char	c;
3122 
3123 	for (;;) {
3124 		if (len == FILE_BADSIZE) {
3125 			c = *s++;
3126 			if (c == '\0')
3127 				break;
3128 		}
3129 		else  {
3130 			if (len-- == 0)
3131 				break;
3132 			c = *s++;
3133 		}
3134 		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
3135 			(void) fputc(c, fp);
3136 		else {
3137 			(void) fputc('\\', fp);
3138 			switch (c) {
3139 			case '\a':
3140 				(void) fputc('a', fp);
3141 				break;
3142 
3143 			case '\b':
3144 				(void) fputc('b', fp);
3145 				break;
3146 
3147 			case '\f':
3148 				(void) fputc('f', fp);
3149 				break;
3150 
3151 			case '\n':
3152 				(void) fputc('n', fp);
3153 				break;
3154 
3155 			case '\r':
3156 				(void) fputc('r', fp);
3157 				break;
3158 
3159 			case '\t':
3160 				(void) fputc('t', fp);
3161 				break;
3162 
3163 			case '\v':
3164 				(void) fputc('v', fp);
3165 				break;
3166 
3167 			default:
3168 				(void) fprintf(fp, "%.3o", c & 0377);
3169 				break;
3170 			}
3171 		}
3172 	}
3173 }
3174 
3175 /*
3176  * eatsize(): Eat the size spec from a number [eg. 10UL]
3177  */
3178 file_private void
eatsize(const char ** p)3179 eatsize(const char **p)
3180 {
3181 	const char *l = *p;
3182 
3183 	if (LOWCASE(*l) == 'u')
3184 		l++;
3185 
3186 	switch (LOWCASE(*l)) {
3187 	case 'l':    /* long */
3188 	case 's':    /* short */
3189 	case 'h':    /* short */
3190 	case 'b':    /* char/byte */
3191 	case 'c':    /* char/byte */
3192 		l++;
3193 		/*FALLTHROUGH*/
3194 	default:
3195 		break;
3196 	}
3197 
3198 	*p = l;
3199 }
3200 
3201 /*
3202  * handle a compiled file.
3203  */
3204 
3205 file_private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)3206 apprentice_map(struct magic_set *ms, const char *fn)
3207 {
3208 	uint32_t *ptr;
3209 	uint32_t version, entries = 0, nentries;
3210 	int needsbyteswap;
3211 	char *dbname = NULL;
3212 	struct magic_map *map;
3213 	size_t i;
3214 	php_stream *stream = NULL;
3215 	php_stream_statbuf st;
3216 
3217 
3218 
3219 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
3220 		file_oomem(ms, sizeof(*map));
3221 		return NULL;
3222 	}
3223 
3224 	if (fn == NULL) {
3225 		map->p = (void *)&php_magic_database;
3226 		goto internal_loaded;
3227 	}
3228 
3229 #ifdef PHP_WIN32
3230 	/* Don't bother on windows with php_stream_open_wrapper,
3231 	return to give apprentice_load() a chance. */
3232 	if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
3233                if (st.sb.st_mode & S_IFDIR) {
3234                        goto error;
3235                }
3236        }
3237 #endif
3238 
3239 	dbname = mkdbname(ms, fn, 0);
3240 	if (dbname == NULL)
3241 		goto error;
3242 
3243 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
3244 
3245 	if (!stream) {
3246 		goto error;
3247 	}
3248 
3249 #ifndef PHP_WIN32
3250 	if (php_stream_stat(stream, &st) < 0) {
3251 		file_error(ms, errno, "cannot stat `%s'", dbname);
3252 		goto error;
3253 	}
3254 #endif
3255 	if (st.sb.st_size < 8 || st.sb.st_size > maxoff_t()) {
3256 		file_error(ms, 0, "file `%s' is too %s", dbname,
3257 		    st.sb.st_size < 8 ? "small" : "large");
3258 		goto error;
3259 	}
3260 
3261 	map->type = MAP_TYPE_MALLOC;
3262 	map->len = CAST(size_t, st.sb.st_size);
3263 	map->p = CAST(void *, emalloc(map->len));
3264 
3265 	if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
3266 		file_badread(ms);
3267 		goto error;
3268 	}
3269 
3270 	php_stream_close(stream);
3271 	stream = NULL;
3272 
3273 internal_loaded:
3274 	ptr = (uint32_t *)(void *)map->p;
3275 	if (*ptr != MAGICNO) {
3276 		if (swap4(*ptr) != MAGICNO) {
3277 			file_error(ms, 0, "bad magic in `%s'", dbname);
3278 			goto error;
3279 		}
3280 		needsbyteswap = 1;
3281 	} else
3282 		needsbyteswap = 0;
3283 	if (needsbyteswap)
3284 		version = swap4(ptr[1]);
3285 	else
3286 		version = ptr[1];
3287 	if (version != VERSIONNO) {
3288 		file_error(ms, 0, "File %d supports only version %d magic "
3289 		    "files. `%s' is version %d", MAGIC_VERSION,
3290 		    VERSIONNO, dbname, version);
3291 		goto error;
3292 	}
3293 
3294 	/* php_magic_database is a const, performing writes will segfault. This is for big-endian
3295 	machines only, PPC and Sparc specifically. Consider static variable or MINIT in
3296 	future. */
3297 	if (needsbyteswap && fn == NULL) {
3298 		map->p = emalloc(sizeof(php_magic_database));
3299 		map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
3300 	}
3301 
3302 	if (NULL != fn) {
3303 		nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
3304 		entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
3305 		if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
3306 			file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
3307 				dbname, (unsigned long long)st.sb.st_size,
3308 				sizeof(struct magic));
3309 			goto error;
3310 		}
3311 	}
3312 	map->magic[0] = CAST(struct magic *, map->p) + 1;
3313 	nentries = 0;
3314 	for (i = 0; i < MAGIC_SETS; i++) {
3315 		if (needsbyteswap)
3316 			map->nmagic[i] = swap4(ptr[i + 2]);
3317 		else
3318 			map->nmagic[i] = ptr[i + 2];
3319 		if (i != MAGIC_SETS - 1)
3320 			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3321 		nentries += map->nmagic[i];
3322 	}
3323 	if (NULL != fn && entries != nentries + 1) {
3324 		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3325 		    dbname, entries, nentries + 1);
3326 		goto error;
3327 	}
3328 	if (needsbyteswap)
3329 		for (i = 0; i < MAGIC_SETS; i++)
3330 			byteswap(map->magic[i], map->nmagic[i]);
3331 
3332 	if (dbname) {
3333 		efree(dbname);
3334 	}
3335 	return map;
3336 
3337 error:
3338 	if (stream) {
3339 		php_stream_close(stream);
3340 	}
3341 	apprentice_unmap(map);
3342 	if (dbname) {
3343 		efree(dbname);
3344 	}
3345 	return NULL;
3346 }
3347 
3348 /*
3349  * handle an mmaped file.
3350  */
3351 file_private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)3352 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3353 {
3354 	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3355 	static const size_t m = sizeof(**map->magic);
3356 	php_stream *stream;
3357 	size_t len;
3358 	char *dbname;
3359 	int rv = -1;
3360 	uint32_t i;
3361 	union {
3362 		struct magic m;
3363 		uint32_t h[2 + MAGIC_SETS];
3364 	} hdr;
3365 
3366 	dbname = mkdbname(ms, fn, 1);
3367 
3368 	if (dbname == NULL)
3369 		goto out;
3370 
3371 	/* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
3372 	stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
3373 
3374 	if (!stream) {
3375 		file_error(ms, errno, "cannot open `%s'", dbname);
3376 		goto out;
3377 	}
3378 	memset(&hdr, 0, sizeof(hdr));
3379 	hdr.h[0] = MAGICNO;
3380 	hdr.h[1] = VERSIONNO;
3381 	memcpy(hdr.h + 2, map->nmagic, nm);
3382 
3383 	if (php_stream_write(stream,(const char *)&hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
3384 		file_error(ms, errno, "error writing `%s'", dbname);
3385 		goto out;
3386 	}
3387 
3388 	for (i = 0; i < MAGIC_SETS; i++) {
3389 		len = m * map->nmagic[i];
3390 		if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
3391 			file_error(ms, errno, "error writing `%s'", dbname);
3392 			goto out;
3393 		}
3394 	}
3395 
3396 	rv = 0;
3397 	if (stream) {
3398 		php_stream_close(stream);
3399 	}
3400 out:
3401 	efree(dbname);
3402 	return rv;
3403 }
3404 
3405 file_private const char ext[] = ".mgc";
3406 /*
3407  * make a dbname
3408  */
3409 file_private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)3410 mkdbname(struct magic_set *ms, const char *fn, int strip)
3411 {
3412 	const char *p, *q;
3413 	char *buf;
3414 
3415 	if (strip) {
3416 		if ((p = strrchr(fn, '/')) != NULL)
3417 			fn = ++p;
3418 	}
3419 
3420 	for (q = fn; *q; q++)
3421 		continue;
3422 	/* Look for .mgc */
3423 	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3424 		if (*p != *q)
3425 			break;
3426 
3427 	/* Did not find .mgc, restore q */
3428 	if (p >= ext)
3429 		while (*q)
3430 			q++;
3431 
3432 	q++;
3433 	/* Compatibility with old code that looked in .mime */
3434 	if (ms->flags & MAGIC_MIME) {
3435 		spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", CAST(int, q - fn), fn, ext);
3436 #ifdef PHP_WIN32
3437 		if (VCWD_ACCESS(buf, R_OK) == 0) {
3438 #else
3439 		if (VCWD_ACCESS(buf, R_OK) != -1) {
3440 #endif
3441 			ms->flags &= MAGIC_MIME_TYPE;
3442 			return buf;
3443 		}
3444 		efree(buf);
3445 	}
3446 	spprintf(&buf, MAXPATHLEN, "%.*s%s", CAST(int, q - fn), fn, ext);
3447 
3448 	/* Compatibility with old code that looked in .mime */
3449 	if (strstr(fn, ".mime") != NULL)
3450 		ms->flags &= MAGIC_MIME_TYPE;
3451 	return buf;
3452 }
3453 
3454 /*
3455  * Byteswap an mmap'ed file if needed
3456  */
3457 file_private void
3458 byteswap(struct magic *magic, uint32_t nmagic)
3459 {
3460 	uint32_t i;
3461 	for (i = 0; i < nmagic; i++)
3462 		bs1(&magic[i]);
3463 }
3464 
3465 #if !defined(HAVE_BYTESWAP_H) && !defined(HAVE_SYS_BSWAP_H)
3466 /*
3467  * swap a short
3468  */
3469 file_private uint16_t
3470 swap2(uint16_t sv)
3471 {
3472 	uint16_t rv;
3473 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3474 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3475 	d[0] = s[1];
3476 	d[1] = s[0];
3477 	return rv;
3478 }
3479 
3480 /*
3481  * swap an int
3482  */
3483 file_private uint32_t
3484 swap4(uint32_t sv)
3485 {
3486 	uint32_t rv;
3487 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3488 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3489 	d[0] = s[3];
3490 	d[1] = s[2];
3491 	d[2] = s[1];
3492 	d[3] = s[0];
3493 	return rv;
3494 }
3495 
3496 /*
3497  * swap a quad
3498  */
3499 file_private uint64_t
3500 swap8(uint64_t sv)
3501 {
3502 	uint64_t rv;
3503 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3504 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3505 # if 0
3506 	d[0] = s[3];
3507 	d[1] = s[2];
3508 	d[2] = s[1];
3509 	d[3] = s[0];
3510 	d[4] = s[7];
3511 	d[5] = s[6];
3512 	d[6] = s[5];
3513 	d[7] = s[4];
3514 # else
3515 	d[0] = s[7];
3516 	d[1] = s[6];
3517 	d[2] = s[5];
3518 	d[3] = s[4];
3519 	d[4] = s[3];
3520 	d[5] = s[2];
3521 	d[6] = s[1];
3522 	d[7] = s[0];
3523 # endif
3524 	return rv;
3525 }
3526 #endif
3527 
3528 file_protected uintmax_t
3529 file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
3530 {
3531         uintmax_t x = 0;
3532         const unsigned char *c;
3533         if (t == FILE_LEVARINT) {
3534                 for (c = us; *c; c++) {
3535                         if ((*c & 0x80) == 0)
3536                                 break;
3537                 }
3538 		if (l)
3539 			*l = c - us + 1;
3540                 for (; c >= us; c--) {
3541                         x |= *c & 0x7f;
3542                         x <<= 7;
3543                 }
3544         } else {
3545                 for (c = us; *c; c++) {
3546 			x |= *c & 0x7f;
3547 			if ((*c & 0x80) == 0)
3548 				break;
3549 			x <<= 7;
3550                 }
3551 		if (l)
3552 			*l = c - us + 1;
3553         }
3554 	return x;
3555 }
3556 
3557 
3558 /*
3559  * byteswap a single magic entry
3560  */
3561 file_private void
3562 bs1(struct magic *m)
3563 {
3564 	m->cont_level = swap2(m->cont_level);
3565 	m->offset = swap4(CAST(uint32_t, m->offset));
3566 	m->in_offset = swap4(CAST(uint32_t, m->in_offset));
3567 	m->lineno = swap4(CAST(uint32_t, m->lineno));
3568 	if (IS_LIBMAGIC_STRING(m->type)) {
3569 		m->str_range = swap4(m->str_range);
3570 		m->str_flags = swap4(m->str_flags);
3571 	}
3572 	else {
3573 		m->value.q = swap8(m->value.q);
3574 		m->num_mask = swap8(m->num_mask);
3575 	}
3576 }
3577 
3578 file_protected size_t
3579 file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3580 {
3581 	switch (m->str_flags & PSTRING_LEN) {
3582 	case PSTRING_1_LE:
3583 		return 1;
3584 	case PSTRING_2_LE:
3585 	case PSTRING_2_BE:
3586 		return 2;
3587 	case PSTRING_4_LE:
3588 	case PSTRING_4_BE:
3589 		return 4;
3590 	default:
3591 		file_error(ms, 0, "corrupt magic file "
3592 		    "(bad pascal string length %d)",
3593 		    m->str_flags & PSTRING_LEN);
3594 		return FILE_BADSIZE;
3595 	}
3596 }
3597 file_protected size_t
3598 file_pstring_get_length(struct magic_set *ms, const struct magic *m,
3599     const char *ss)
3600 {
3601 	size_t len = 0;
3602 	const unsigned char *s = RCAST(const unsigned char *, ss);
3603 	unsigned int s3, s2, s1, s0;
3604 
3605 	switch (m->str_flags & PSTRING_LEN) {
3606 	case PSTRING_1_LE:
3607 		len = *s;
3608 		break;
3609 	case PSTRING_2_LE:
3610 		s0 = s[0];
3611 		s1 = s[1];
3612 		len = (s1 << 8) | s0;
3613 		break;
3614 	case PSTRING_2_BE:
3615 		s0 = s[0];
3616 		s1 = s[1];
3617 		len = (s0 << 8) | s1;
3618 		break;
3619 	case PSTRING_4_LE:
3620 		s0 = s[0];
3621 		s1 = s[1];
3622 		s2 = s[2];
3623 		s3 = s[3];
3624 		len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3625 		break;
3626 	case PSTRING_4_BE:
3627 		s0 = s[0];
3628 		s1 = s[1];
3629 		s2 = s[2];
3630 		s3 = s[3];
3631 		len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3632 		break;
3633 	default:
3634 		file_error(ms, 0, "corrupt magic file "
3635 		    "(bad pascal string length %d)",
3636 		    m->str_flags & PSTRING_LEN);
3637 		return FILE_BADSIZE;
3638 	}
3639 
3640 	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
3641 		size_t l = file_pstring_length_size(ms, m);
3642 		if (l == FILE_BADSIZE)
3643 			return l;
3644 		len -= l;
3645 	}
3646 
3647 	return len;
3648 }
3649 
3650 file_protected int
3651 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3652 {
3653 	uint32_t i, j;
3654 	struct mlist *mlist, *ml;
3655 
3656 	mlist = ms->mlist[1];
3657 
3658 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3659 		struct magic *ma = ml->magic;
3660 		for (i = 0; i < ml->nmagic; i++) {
3661 			if (ma[i].type != FILE_NAME)
3662 				continue;
3663 			if (strcmp(ma[i].value.s, name) == 0) {
3664 				v->magic = &ma[i];
3665 				for (j = i + 1; j < ml->nmagic; j++)
3666 				    if (ma[j].cont_level == 0)
3667 					    break;
3668 				v->nmagic = j - i;
3669 				return 0;
3670 			}
3671 		}
3672 	}
3673 	return -1;
3674 }
3675