xref: /PHP-5.5/ext/fileinfo/libmagic/apprentice.c (revision 43652d38)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * apprentice - make one pass through /etc/magic, learning its secrets.
30  */
31 
32 #include "php.h"
33 
34 #include "file.h"
35 
36 #ifndef	lint
37 FILE_RCSID("@(#)$File: apprentice.c,v 1.191 2013/02/26 21:02:48 christos Exp $")
38 #endif	/* lint */
39 
40 #include "magic.h"
41 #include "patchlevel.h"
42 #include <stdlib.h>
43 
44 #if defined(__hpux) && !defined(HAVE_STRTOULL)
45 #if SIZEOF_LONG == 8
46 # define strtoull strtoul
47 #else
48 # define strtoull __strtoull
49 #endif
50 #endif
51 
52 #ifdef PHP_WIN32
53 #include "win32/unistd.h"
54 #if _MSC_VER <= 1300
55 # include "win32/php_strtoi64.h"
56 #endif
57 #define strtoull _strtoui64
58 #else
59 #include <unistd.h>
60 #endif
61 #include <string.h>
62 #include <assert.h>
63 #include <ctype.h>
64 #include <fcntl.h>
65 
66 #define	EATAB {while (isascii((unsigned char) *l) && \
67 		      isspace((unsigned char) *l))  ++l;}
68 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
69 			tolower((unsigned char) (l)) : (l))
70 /*
71  * Work around a bug in headers on Digital Unix.
72  * At least confirmed for: OSF1 V4.0 878
73  */
74 #if defined(__osf__) && defined(__DECC)
75 #ifdef MAP_FAILED
76 #undef MAP_FAILED
77 #endif
78 #endif
79 
80 #ifndef MAP_FAILED
81 #define MAP_FAILED (void *) -1
82 #endif
83 
84 #ifndef MAP_FILE
85 #define MAP_FILE 0
86 #endif
87 
88 #define ALLOC_CHUNK	(size_t)10
89 #define ALLOC_INCR	(size_t)200
90 
91 struct magic_entry {
92 	struct magic *mp;
93 	uint32_t cont_count;
94 	uint32_t max_count;
95 };
96 
97 struct magic_map {
98 	void *p;
99 	size_t len;
100 	struct magic *magic[MAGIC_SETS];
101 	uint32_t nmagic[MAGIC_SETS];
102 };
103 
104 int file_formats[FILE_NAMES_SIZE];
105 const size_t file_nformats = FILE_NAMES_SIZE;
106 const char *file_names[FILE_NAMES_SIZE];
107 const size_t file_nnames = FILE_NAMES_SIZE;
108 
109 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
110 private int hextoint(int);
111 private const char *getstr(struct magic_set *, struct magic *, const char *,
112     int);
113 private int parse(struct magic_set *, struct magic_entry *, const char *,
114     size_t, int);
115 private void eatsize(const char **);
116 private int apprentice_1(struct magic_set *, const char *, int);
117 private size_t apprentice_magic_strength(const struct magic *);
118 private int apprentice_sort(const void *, const void *);
119 private void apprentice_list(struct mlist *, int );
120 private struct magic_map *apprentice_load(struct magic_set *,
121     const char *, int);
122 private struct mlist *mlist_alloc(void);
123 private void mlist_free(struct mlist *);
124 private void byteswap(struct magic *, uint32_t);
125 private void bs1(struct magic *);
126 private uint16_t swap2(uint16_t);
127 private uint32_t swap4(uint32_t);
128 private uint64_t swap8(uint64_t);
129 private char *mkdbname(struct magic_set *, const char *, int);
130 private struct magic_map *apprentice_map(struct magic_set *, const char *);
131 private void apprentice_unmap(struct magic_map *);
132 private int apprentice_compile(struct magic_set *, struct magic_map *,
133     const char *);
134 private int check_format_type(const char *, int);
135 private int check_format(struct magic_set *, struct magic *);
136 private int get_op(char);
137 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
138 private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
139 private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
140 
141 
142 private size_t maxmagic[MAGIC_SETS] = { 0 };
143 private size_t magicsize = sizeof(struct magic);
144 
145 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
146 
147 private struct {
148 	const char *name;
149 	size_t len;
150 	int (*fun)(struct magic_set *, struct magic_entry *, const char *);
151 } bang[] = {
152 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
153 	DECLARE_FIELD(mime),
154 	DECLARE_FIELD(apple),
155 	DECLARE_FIELD(strength),
156 #undef	DECLARE_FIELD
157 	{ NULL, 0, NULL }
158 };
159 
160 #include "../data_file.c"
161 
162 struct type_tbl_s {
163 	const char name[16];
164 	const size_t len;
165 	const int type;
166 	const int format;
167 };
168 
169 /*
170  * XXX - the actual Single UNIX Specification says that "long" means "long",
171  * as in the C data type, but we treat it as meaning "4-byte integer".
172  * Given that the OS X version of file 5.04 did the same, I guess that passes
173  * the actual test; having "long" be dependent on how big a "long" is on
174  * the machine running "file" is silly.
175  */
176 static const struct type_tbl_s type_tbl[] = {
177 # define XX(s)		s, (sizeof(s) - 1)
178 # define XX_NULL	"", 0
179 	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
180 	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
181 	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
182 	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_STR },
183 	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
184 	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
185 	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
186 	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
187 	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
188 	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
189 	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
190 	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
191 	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
192 	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
193 	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
194 	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
195 	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
196 	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
197 	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
198 	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
199 	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
200 	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
201 	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
202 	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
203 	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
204 	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
205 	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
206 	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
207 	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
208 	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
209 	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
210 	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
211 	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
212 	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
213 	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
214 	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
215 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
216 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
217 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
218 	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
219 	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
220 	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
221 	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
222 	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
223 	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
224 	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
225 	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
226 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
227 };
228 
229 /*
230  * These are not types, and cannot be preceded by "u" to make them
231  * unsigned.
232  */
233 static const struct type_tbl_s special_tbl[] = {
234 	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
235 	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
236 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
237 };
238 # undef XX
239 # undef XX_NULL
240 
241 #ifndef S_ISDIR
242 #define S_ISDIR(mode) ((mode) & _S_IFDIR)
243 #endif
244 
245 private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)246 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
247 {
248 	const struct type_tbl_s *p;
249 
250 	for (p = tbl; p->len; p++) {
251 		if (strncmp(l, p->name, p->len) == 0) {
252 			if (t)
253 				*t = l + p->len;
254 			break;
255 		}
256 	}
257 	return p->type;
258 }
259 
260 private int
get_standard_integer_type(const char * l,const char ** t)261 get_standard_integer_type(const char *l, const char **t)
262 {
263 	int type;
264 
265 	if (isalpha((unsigned char)l[1])) {
266 		switch (l[1]) {
267 		case 'C':
268 			/* "dC" and "uC" */
269 			type = FILE_BYTE;
270 			break;
271 		case 'S':
272 			/* "dS" and "uS" */
273 			type = FILE_SHORT;
274 			break;
275 		case 'I':
276 		case 'L':
277 			/*
278 			 * "dI", "dL", "uI", and "uL".
279 			 *
280 			 * XXX - the actual Single UNIX Specification says
281 			 * that "L" means "long", as in the C data type,
282 			 * but we treat it as meaning "4-byte integer".
283 			 * Given that the OS X version of file 5.04 did
284 			 * the same, I guess that passes the actual SUS
285 			 * validation suite; having "dL" be dependent on
286 			 * how big a "long" is on the machine running
287 			 * "file" is silly.
288 			 */
289 			type = FILE_LONG;
290 			break;
291 		case 'Q':
292 			/* "dQ" and "uQ" */
293 			type = FILE_QUAD;
294 			break;
295 		default:
296 			/* "d{anything else}", "u{anything else}" */
297 			return FILE_INVALID;
298 		}
299 		l += 2;
300 	} else if (isdigit((unsigned char)l[1])) {
301 		/*
302 		 * "d{num}" and "u{num}"; we only support {num} values
303 		 * of 1, 2, 4, and 8 - the Single UNIX Specification
304 		 * doesn't say anything about whether arbitrary
305 		 * values should be supported, but both the Solaris 10
306 		 * and OS X Mountain Lion versions of file passed the
307 		 * Single UNIX Specification validation suite, and
308 		 * neither of them support values bigger than 8 or
309 		 * non-power-of-2 values.
310 		 */
311 		if (isdigit((unsigned char)l[2])) {
312 			/* Multi-digit, so > 9 */
313 			return FILE_INVALID;
314 		}
315 		switch (l[1]) {
316 		case '1':
317 			type = FILE_BYTE;
318 			break;
319 		case '2':
320 			type = FILE_SHORT;
321 			break;
322 		case '4':
323 			type = FILE_LONG;
324 			break;
325 		case '8':
326 			type = FILE_QUAD;
327 			break;
328 		default:
329 			/* XXX - what about 3, 5, 6, or 7? */
330 			return FILE_INVALID;
331 		}
332 		l += 2;
333 	} else {
334 		/*
335 		 * "d" or "u" by itself.
336 		 */
337 		type = FILE_LONG;
338 		++l;
339 	}
340 	if (t)
341 		*t = l;
342 	return type;
343 }
344 
345 private void
init_file_tables(void)346 init_file_tables(void)
347 {
348 	static int done = 0;
349 	const struct type_tbl_s *p;
350 
351 	if (done)
352 		return;
353 	done++;
354 
355 	for (p = type_tbl; p->len; p++) {
356 		assert(p->type < FILE_NAMES_SIZE);
357 		file_names[p->type] = p->name;
358 		file_formats[p->type] = p->format;
359 	}
360 	assert(p - type_tbl == FILE_NAMES_SIZE);
361 }
362 
363 private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)364 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
365 {
366 	struct mlist *ml;
367 
368 	if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
369 		return -1;
370 
371 	ml->map = idx == 0 ? map : NULL;
372 	ml->magic = map->magic[idx];
373 	ml->nmagic = map->nmagic[idx];
374 
375 	mlp->prev->next = ml;
376 	ml->prev = mlp->prev;
377 	ml->next = mlp;
378 	mlp->prev = ml;
379 	return 0;
380 }
381 
382 /*
383  * Handle one file or directory.
384  */
385 private int
apprentice_1(struct magic_set * ms,const char * fn,int action)386 apprentice_1(struct magic_set *ms, const char *fn, int action)
387 {
388 	struct mlist *ml;
389 	struct magic_map *map;
390 	size_t i;
391 
392 	if (magicsize != FILE_MAGICSIZE) {
393 		file_error(ms, 0, "magic element size %lu != %lu",
394 		    (unsigned long)sizeof(*map->magic[0]),
395 		    (unsigned long)FILE_MAGICSIZE);
396 		return -1;
397 	}
398 
399 	if (action == FILE_COMPILE) {
400 		map = apprentice_load(ms, fn, action);
401 		if (map == NULL)
402 			return -1;
403 		return apprentice_compile(ms, map, fn);
404 	}
405 
406 	map = apprentice_map(ms, fn);
407 	if (map == NULL) {
408 		if (fn) {
409 			if (ms->flags & MAGIC_CHECK)
410 				file_magwarn(ms, "using regular magic file `%s'", fn);
411 			map = apprentice_load(ms, fn, action);
412 		}
413 		if (map == NULL)
414 			return -1;
415 	}
416 
417 	for (i = 0; i < MAGIC_SETS; i++) {
418 		if (add_mlist(ms->mlist[i], map, i) == -1) {
419 			file_oomem(ms, sizeof(*ml));
420 			apprentice_unmap(map);
421 			return -1;
422 		}
423 	}
424 
425 	if (action == FILE_LIST) {
426 		for (i = 0; i < MAGIC_SETS; i++) {
427 			printf("Set %zu:\nBinary patterns:\n", i);
428 			apprentice_list(ms->mlist[i], BINTEST);
429 			printf("Text patterns:\n");
430 			apprentice_list(ms->mlist[i], TEXTTEST);
431 		}
432 	}
433 
434 	return 0;
435 }
436 
437 protected void
file_ms_free(struct magic_set * ms)438 file_ms_free(struct magic_set *ms)
439 {
440 	size_t i;
441 	if (ms == NULL)
442 		return;
443 	for (i = 0; i < MAGIC_SETS; i++)
444 		mlist_free(ms->mlist[i]);
445 	if (ms->o.pbuf) {
446 		efree(ms->o.pbuf);
447 	}
448 	if (ms->o.buf) {
449 		efree(ms->o.buf);
450 	}
451 	if (ms->c.li) {
452 		efree(ms->c.li);
453 	}
454 	efree(ms);
455 }
456 
457 protected struct magic_set *
file_ms_alloc(int flags)458 file_ms_alloc(int flags)
459 {
460 	struct magic_set *ms;
461 	size_t i, len;
462 
463 	if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
464 	    sizeof(struct magic_set)))) == NULL)
465 		return NULL;
466 
467 	if (magic_setflags(ms, flags) == -1) {
468 		errno = EINVAL;
469 		goto free;
470 	}
471 
472 	ms->o.buf = ms->o.pbuf = NULL;
473 	len = (ms->c.len = 10) * sizeof(*ms->c.li);
474 
475 	if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
476 		goto free;
477 
478 	ms->event_flags = 0;
479 	ms->error = -1;
480 	for (i = 0; i < MAGIC_SETS; i++)
481 		ms->mlist[i] = NULL;
482 	ms->file = "unknown";
483 	ms->line = 0;
484 	return ms;
485 free:
486 	efree(ms);
487 	return NULL;
488 }
489 
490 private void
apprentice_unmap(struct magic_map * map)491 apprentice_unmap(struct magic_map *map)
492 {
493 	if (map == NULL)
494 		return;
495 	if (map->p != php_magic_database) {
496 		if (map->p == NULL) {
497 			int j;
498 			for (j = 0; j < MAGIC_SETS; j++) {
499 				if (map->magic[j]) {
500 					efree(map->magic[j]);
501 				}
502 			}
503 		} else {
504 			efree(map->p);
505 		}
506 	}
507 	efree(map);
508 }
509 
510 private struct mlist *
mlist_alloc(void)511 mlist_alloc(void)
512 {
513 	struct mlist *mlist;
514 	if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
515 		return NULL;
516 	}
517 	mlist->next = mlist->prev = mlist;
518 	return mlist;
519 }
520 
521 private void
mlist_free(struct mlist * mlist)522 mlist_free(struct mlist *mlist)
523 {
524 	struct mlist *ml;
525 
526 	if (mlist == NULL)
527 		return;
528 
529 	for (ml = mlist->next; ml != mlist;) {
530 		struct mlist *next = ml->next;
531 		if (ml->map)
532 			apprentice_unmap(ml->map);
533 		efree(ml);
534 		ml = next;
535 	}
536 	efree(ml);
537 }
538 
539 /* const char *fn: list of magic files and directories */
540 protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)541 file_apprentice(struct magic_set *ms, const char *fn, int action)
542 {
543 	char *p, *mfn;
544 	int file_err, errs = -1;
545 	size_t i;
546 /* XXX disabling default magic loading so the compiled in data is used */
547 #if 0
548 	if ((fn = magic_getpath(fn, action)) == NULL)
549 		return -1;
550 #endif
551 
552 	init_file_tables();
553 
554 	if (fn == NULL)
555 		fn = getenv("MAGIC");
556 	if (fn == NULL) {
557 		for (i = 0; i < MAGIC_SETS; i++) {
558 			mlist_free(ms->mlist[i]);
559 			if ((ms->mlist[i] = mlist_alloc()) == NULL) {
560 				file_oomem(ms, sizeof(*ms->mlist[i]));
561 				return -1;
562 			}
563 		}
564 		return apprentice_1(ms, fn, action);
565 	}
566 
567 	if ((mfn = estrdup(fn)) == NULL) {
568 		file_oomem(ms, strlen(fn));
569 		return -1;
570 	}
571 
572 	for (i = 0; i < MAGIC_SETS; i++) {
573 		mlist_free(ms->mlist[i]);
574 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
575 			file_oomem(ms, sizeof(*ms->mlist[i]));
576 			if (i != 0) {
577 				--i;
578 				do
579 					mlist_free(ms->mlist[i]);
580 				while (i != 0);
581 			}
582 			efree(mfn);
583 			return -1;
584 		}
585 	}
586 	fn = mfn;
587 
588 	while (fn) {
589 		p = strchr(fn, PATHSEP);
590 		if (p)
591 			*p++ = '\0';
592 		if (*fn == '\0')
593 			break;
594 		file_err = apprentice_1(ms, fn, action);
595 		errs = MAX(errs, file_err);
596 		fn = p;
597 	}
598 
599 	efree(mfn);
600 
601 	if (errs == -1) {
602 		for (i = 0; i < MAGIC_SETS; i++) {
603 			mlist_free(ms->mlist[i]);
604 			ms->mlist[i] = NULL;
605 		}
606 		file_error(ms, 0, "could not find any valid magic files!");
607 		return -1;
608 	}
609 
610 	if (action == FILE_LOAD)
611 		return 0;
612 
613 	for (i = 0; i < MAGIC_SETS; i++) {
614 		mlist_free(ms->mlist[i]);
615 		ms->mlist[i] = NULL;
616 	}
617 
618 	switch (action) {
619 	case FILE_COMPILE:
620 	case FILE_CHECK:
621 	case FILE_LIST:
622 		return 0;
623 	default:
624 		file_error(ms, 0, "Invalid action %d", action);
625 		return -1;
626 	}
627 }
628 
629 /*
630  * Get weight of this magic entry, for sorting purposes.
631  */
632 private size_t
apprentice_magic_strength(const struct magic * m)633 apprentice_magic_strength(const struct magic *m)
634 {
635 #define MULT 10
636 	size_t val = 2 * MULT;	/* baseline strength */
637 
638 	switch (m->type) {
639 	case FILE_DEFAULT:	/* make sure this sorts last */
640 		if (m->factor_op != FILE_FACTOR_OP_NONE)
641 			abort();
642 		return 0;
643 
644 	case FILE_BYTE:
645 		val += 1 * MULT;
646 		break;
647 
648 	case FILE_SHORT:
649 	case FILE_LESHORT:
650 	case FILE_BESHORT:
651 		val += 2 * MULT;
652 		break;
653 
654 	case FILE_LONG:
655 	case FILE_LELONG:
656 	case FILE_BELONG:
657 	case FILE_MELONG:
658 		val += 4 * MULT;
659 		break;
660 
661 	case FILE_PSTRING:
662 	case FILE_STRING:
663 		val += m->vallen * MULT;
664 		break;
665 
666 	case FILE_BESTRING16:
667 	case FILE_LESTRING16:
668 		val += m->vallen * MULT / 2;
669 		break;
670 
671 	case FILE_SEARCH:
672 	case FILE_REGEX:
673 		val += m->vallen * MAX(MULT / m->vallen, 1);
674 		break;
675 
676 	case FILE_DATE:
677 	case FILE_LEDATE:
678 	case FILE_BEDATE:
679 	case FILE_MEDATE:
680 	case FILE_LDATE:
681 	case FILE_LELDATE:
682 	case FILE_BELDATE:
683 	case FILE_MELDATE:
684 	case FILE_FLOAT:
685 	case FILE_BEFLOAT:
686 	case FILE_LEFLOAT:
687 		val += 4 * MULT;
688 		break;
689 
690 	case FILE_QUAD:
691 	case FILE_BEQUAD:
692 	case FILE_LEQUAD:
693 	case FILE_QDATE:
694 	case FILE_LEQDATE:
695 	case FILE_BEQDATE:
696 	case FILE_QLDATE:
697 	case FILE_LEQLDATE:
698 	case FILE_BEQLDATE:
699 	case FILE_QWDATE:
700 	case FILE_LEQWDATE:
701 	case FILE_BEQWDATE:
702 	case FILE_DOUBLE:
703 	case FILE_BEDOUBLE:
704 	case FILE_LEDOUBLE:
705 		val += 8 * MULT;
706 		break;
707 
708 	case FILE_INDIRECT:
709 	case FILE_NAME:
710 	case FILE_USE:
711 		break;
712 
713 	default:
714 		val = 0;
715 		(void)fprintf(stderr, "Bad type %d\n", m->type);
716 		abort();
717 	}
718 
719 	switch (m->reln) {
720 	case 'x':	/* matches anything penalize */
721 	case '!':       /* matches almost anything penalize */
722 		val = 0;
723 		break;
724 
725 	case '=':	/* Exact match, prefer */
726 		val += MULT;
727 		break;
728 
729 	case '>':
730 	case '<':	/* comparison match reduce strength */
731 		val -= 2 * MULT;
732 		break;
733 
734 	case '^':
735 	case '&':	/* masking bits, we could count them too */
736 		val -= MULT;
737 		break;
738 
739 	default:
740 		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
741 		abort();
742 	}
743 
744 	if (val == 0)	/* ensure we only return 0 for FILE_DEFAULT */
745 		val = 1;
746 
747 	switch (m->factor_op) {
748 	case FILE_FACTOR_OP_NONE:
749 		break;
750 	case FILE_FACTOR_OP_PLUS:
751 		val += m->factor;
752 		break;
753 	case FILE_FACTOR_OP_MINUS:
754 		val -= m->factor;
755 		break;
756 	case FILE_FACTOR_OP_TIMES:
757 		val *= m->factor;
758 		break;
759 	case FILE_FACTOR_OP_DIV:
760 		val /= m->factor;
761 		break;
762 	default:
763 		abort();
764 	}
765 
766 	/*
767 	 * Magic entries with no description get a bonus because they depend
768 	 * on subsequent magic entries to print something.
769 	 */
770 	if (m->desc[0] == '\0')
771 		val++;
772 	return val;
773 }
774 
775 /*
776  * Sort callback for sorting entries by "strength" (basically length)
777  */
778 private int
apprentice_sort(const void * a,const void * b)779 apprentice_sort(const void *a, const void *b)
780 {
781 	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
782 	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
783 	size_t sa = apprentice_magic_strength(ma->mp);
784 	size_t sb = apprentice_magic_strength(mb->mp);
785 	if (sa == sb)
786 		return 0;
787 	else if (sa > sb)
788 		return -1;
789 	else
790 		return 1;
791 }
792 
793 /*
794  * Shows sorted patterns list in the order which is used for the matching
795  */
796 private void
apprentice_list(struct mlist * mlist,int mode)797 apprentice_list(struct mlist *mlist, int mode)
798 {
799 	uint32_t magindex = 0;
800 	struct mlist *ml;
801 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
802 		for (magindex = 0; magindex < ml->nmagic; magindex++) {
803 			struct magic *m = &ml->magic[magindex];
804 			if ((m->flag & mode) != mode) {
805 				/* Skip sub-tests */
806 				while (magindex + 1 < ml->nmagic &&
807 				       ml->magic[magindex + 1].cont_level != 0)
808 					++magindex;
809 				continue; /* Skip to next top-level test*/
810 			}
811 
812 			/*
813 			 * Try to iterate over the tree until we find item with
814 			 * description/mimetype.
815 			 */
816 			while (magindex + 1 < ml->nmagic &&
817 			       ml->magic[magindex + 1].cont_level != 0 &&
818 			       *ml->magic[magindex].desc == '\0' &&
819 			       *ml->magic[magindex].mimetype == '\0')
820 				magindex++;
821 
822 			printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
823 			    apprentice_magic_strength(m),
824 			    ml->magic[magindex].desc,
825 			    ml->magic[magindex].mimetype);
826 		}
827 	}
828 }
829 
830 private void
set_test_type(struct magic * mstart,struct magic * m)831 set_test_type(struct magic *mstart, struct magic *m)
832 {
833 	switch (m->type) {
834 	case FILE_BYTE:
835 	case FILE_SHORT:
836 	case FILE_LONG:
837 	case FILE_DATE:
838 	case FILE_BESHORT:
839 	case FILE_BELONG:
840 	case FILE_BEDATE:
841 	case FILE_LESHORT:
842 	case FILE_LELONG:
843 	case FILE_LEDATE:
844 	case FILE_LDATE:
845 	case FILE_BELDATE:
846 	case FILE_LELDATE:
847 	case FILE_MEDATE:
848 	case FILE_MELDATE:
849 	case FILE_MELONG:
850 	case FILE_QUAD:
851 	case FILE_LEQUAD:
852 	case FILE_BEQUAD:
853 	case FILE_QDATE:
854 	case FILE_LEQDATE:
855 	case FILE_BEQDATE:
856 	case FILE_QLDATE:
857 	case FILE_LEQLDATE:
858 	case FILE_BEQLDATE:
859 	case FILE_QWDATE:
860 	case FILE_LEQWDATE:
861 	case FILE_BEQWDATE:
862 	case FILE_FLOAT:
863 	case FILE_BEFLOAT:
864 	case FILE_LEFLOAT:
865 	case FILE_DOUBLE:
866 	case FILE_BEDOUBLE:
867 	case FILE_LEDOUBLE:
868 		mstart->flag |= BINTEST;
869 		break;
870 	case FILE_STRING:
871 	case FILE_PSTRING:
872 	case FILE_BESTRING16:
873 	case FILE_LESTRING16:
874 		/* Allow text overrides */
875 		if (mstart->str_flags & STRING_TEXTTEST)
876 			mstart->flag |= TEXTTEST;
877 		else
878 			mstart->flag |= BINTEST;
879 		break;
880 	case FILE_REGEX:
881 	case FILE_SEARCH:
882 		/* Check for override */
883 		if (mstart->str_flags & STRING_BINTEST)
884 			mstart->flag |= BINTEST;
885 		if (mstart->str_flags & STRING_TEXTTEST)
886 			mstart->flag |= TEXTTEST;
887 
888 		if (mstart->flag & (TEXTTEST|BINTEST))
889 			break;
890 
891 		/* binary test if pattern is not text */
892 		if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
893 		    NULL) <= 0)
894 			mstart->flag |= BINTEST;
895 		else
896 			mstart->flag |= TEXTTEST;
897 		break;
898 	case FILE_DEFAULT:
899 		/* can't deduce anything; we shouldn't see this at the
900 		   top level anyway */
901 		break;
902 	case FILE_INVALID:
903 	default:
904 		/* invalid search type, but no need to complain here */
905 		break;
906 	}
907 }
908 
909 private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry ** mentry,uint32_t * mentrycount)910 addentry(struct magic_set *ms, struct magic_entry *me,
911    struct magic_entry **mentry, uint32_t *mentrycount)
912 {
913 	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
914 	if (mentrycount[i] == maxmagic[i]) {
915 		struct magic_entry *mp;
916 
917 		maxmagic[i] += ALLOC_INCR;
918 		if ((mp = CAST(struct magic_entry *,
919 		    erealloc(mentry[i], sizeof(*mp) * maxmagic[i]))) ==
920 		    NULL) {
921 			file_oomem(ms, sizeof(*mp) * maxmagic[i]);
922 			return -1;
923 		}
924 		(void)memset(&mp[mentrycount[i]], 0, sizeof(*mp) *
925 		    ALLOC_INCR);
926 		mentry[i] = mp;
927 	}
928 	mentry[i][mentrycount[i]++] = *me;
929 	memset(me, 0, sizeof(*me));
930 	return 0;
931 }
932 
933 /*
934  * Load and parse one file.
935  */
936 private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry ** mentry,uint32_t * mentrycount)937 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
938    struct magic_entry **mentry, uint32_t *mentrycount)
939 {
940 	char buffer[BUFSIZ + 1];
941 	char *line = NULL;
942 	size_t len;
943 	size_t lineno = 0;
944 	struct magic_entry me;
945 
946 	php_stream *stream;
947 
948 	TSRMLS_FETCH();
949 
950 	ms->file = fn;
951 #if PHP_API_VERSION < 20100412
952 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
953 #else
954 	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
955 #endif
956 
957 	if (stream == NULL) {
958 		if (errno != ENOENT)
959 			file_error(ms, errno, "cannot read magic file `%s'",
960 				   fn);
961 		(*errs)++;
962 		return;
963 	}
964 
965 	memset(&me, 0, sizeof(me));
966 	/* read and parse this file */
967 	for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
968 		if (len == 0) /* null line, garbage, etc */
969 			continue;
970 		if (line[len - 1] == '\n') {
971 			lineno++;
972 			line[len - 1] = '\0'; /* delete newline */
973 		}
974 		switch (line[0]) {
975 		case '\0':	/* empty, do not parse */
976 		case '#':	/* comment, do not parse */
977 			continue;
978 		case '!':
979 			if (line[1] == ':') {
980 				size_t i;
981 
982 				for (i = 0; bang[i].name != NULL; i++) {
983 					if ((size_t)(len - 2) > bang[i].len &&
984 					    memcmp(bang[i].name, line + 2,
985 					    bang[i].len) == 0)
986 						break;
987 				}
988 				if (bang[i].name == NULL) {
989 					file_error(ms, 0,
990 					    "Unknown !: entry `%s'", line);
991 					(*errs)++;
992 					continue;
993 				}
994 				if (me.mp == NULL) {
995 					file_error(ms, 0,
996 					    "No current entry for :!%s type",
997 						bang[i].name);
998 					(*errs)++;
999 					continue;
1000 				}
1001 				if ((*bang[i].fun)(ms, &me,
1002 				    line + bang[i].len + 2) != 0) {
1003 					(*errs)++;
1004 					continue;
1005 				}
1006 				continue;
1007 			}
1008 			/*FALLTHROUGH*/
1009 		default:
1010 		again:
1011 			switch (parse(ms, &me, line, lineno, action)) {
1012 			case 0:
1013 				continue;
1014 			case 1:
1015 				(void)addentry(ms, &me, mentry, mentrycount);
1016 				goto again;
1017 			default:
1018 				(*errs)++;
1019 			break;
1020 		}
1021 	}
1022 	}
1023 	if (me.mp)
1024 		(void)addentry(ms, &me, mentry, mentrycount);
1025 	php_stream_close(stream);
1026 }
1027 
1028 /*
1029  * parse a file or directory of files
1030  * const char *fn: name of magic file or directory
1031  */
1032 private int
cmpstrp(const void * p1,const void * p2)1033 cmpstrp(const void *p1, const void *p2)
1034 {
1035         return strcmp(*(char *const *)p1, *(char *const *)p2);
1036 }
1037 
1038 
1039 private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1040 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1041     uint32_t starttest)
1042 {
1043 	static const char text[] = "text";
1044 	static const char binary[] = "binary";
1045 	static const size_t len = sizeof(text);
1046 
1047 	uint32_t i = starttest;
1048 
1049 	do {
1050 		set_test_type(me[starttest].mp, me[i].mp);
1051 		if ((ms->flags & MAGIC_DEBUG) == 0)
1052 			continue;
1053 		(void)fprintf(stderr, "%s%s%s: %s\n",
1054 		    me[i].mp->mimetype,
1055 		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1056 		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1057 		    me[i].mp->flag & BINTEST ? binary : text);
1058 		if (me[i].mp->flag & BINTEST) {
1059 			char *p = strstr(me[i].mp->desc, text);
1060 			if (p && (p == me[i].mp->desc ||
1061 			    isspace((unsigned char)p[-1])) &&
1062 			    (p + len - me[i].mp->desc == MAXstring
1063 			    || (p[len] == '\0' ||
1064 			    isspace((unsigned char)p[len]))))
1065 				(void)fprintf(stderr, "*** Possible "
1066 				    "binary test for text type\n");
1067 		}
1068 	} while (++i < nme && me[i].mp->cont_level != 0);
1069 	return i;
1070 }
1071 
1072 private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1073 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1074 {
1075 	uint32_t i;
1076 	for (i = 0; i < nme; i++) {
1077 		if (me[i].mp->cont_level == 0 &&
1078 		    me[i].mp->type == FILE_DEFAULT) {
1079 			while (++i < nme)
1080 				if (me[i].mp->cont_level == 0)
1081 					break;
1082 			if (i != nme) {
1083 				/* XXX - Ugh! */
1084 				ms->line = me[i].mp->lineno;
1085 				file_magwarn(ms,
1086 				    "level 0 \"default\" did not sort last");
1087 			}
1088 			return;
1089 		}
1090 	}
1091 }
1092 
1093 private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1094 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1095     struct magic **ma, uint32_t *nma)
1096 {
1097 	uint32_t i, mentrycount = 0;
1098 	size_t slen;
1099 
1100 	for (i = 0; i < nme; i++)
1101 		mentrycount += me[i].cont_count;
1102 
1103 	slen = sizeof(**ma) * mentrycount;
1104 	if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1105 		file_oomem(ms, slen);
1106 		return -1;
1107 	}
1108 
1109 	mentrycount = 0;
1110 	for (i = 0; i < nme; i++) {
1111 		(void)memcpy(*ma + mentrycount, me[i].mp,
1112 		    me[i].cont_count * sizeof(**ma));
1113 		mentrycount += me[i].cont_count;
1114 	}
1115 	*nma = mentrycount;
1116 	return 0;
1117 }
1118 
1119 private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1120 magic_entry_free(struct magic_entry *me, uint32_t nme)
1121 {
1122 	uint32_t i;
1123 	if (me == NULL)
1124 		return;
1125 	for (i = 0; i < nme; i++)
1126 		efree(me[i].mp);
1127 	efree(me);
1128 }
1129 
1130 private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1131 apprentice_load(struct magic_set *ms, const char *fn, int action)
1132 {
1133 	int errs = 0;
1134 	struct magic_entry *mentry[MAGIC_SETS] = { NULL };
1135 	uint32_t mentrycount[MAGIC_SETS] = { 0 };
1136 	uint32_t i, j;
1137 	size_t files = 0, maxfiles = 0;
1138 	char **filearr = NULL;
1139 	struct stat st;
1140 	struct magic_map *map;
1141 	php_stream *dir;
1142 	php_stream_dirent d;
1143 
1144 	TSRMLS_FETCH();
1145 
1146 	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1147 
1148 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
1149 		file_oomem(ms, sizeof(*map));
1150 		return NULL;
1151 	}
1152 
1153 	/* print silly verbose header for USG compat. */
1154 	if (action == FILE_CHECK)
1155 		(void)fprintf(stderr, "%s\n", usg_hdr);
1156 
1157 	{
1158 		/* XXX the maxmagic has to be reset each time we load some new magic file.
1159 		Where file commando is used it's not essential as the CLI process
1160 		ends, multiple loading within the same process wouldn't work. */
1161 		int k;
1162 		for (k = 0; k < MAGIC_SETS; k++) {
1163 			maxmagic[k] = 0;
1164 		}
1165 	}
1166 
1167 	/* load directory or file */
1168 	/* FIXME: Read file names and sort them to prevent
1169 	   non-determinism. See Debian bug #488562. */
1170 	if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1171 		int mflen;
1172 		char mfn[MAXPATHLEN];
1173 
1174 		dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1175 		if (!dir) {
1176 			errs++;
1177 			goto out;
1178 		}
1179 		while (php_stream_readdir(dir, &d)) {
1180 			if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1181 				file_oomem(ms,
1182 				strlen(fn) + strlen(d.d_name) + 2);
1183 				errs++;
1184 				php_stream_closedir(dir);
1185 				goto out;
1186 			}
1187 			if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1188 				continue;
1189 			}
1190 			if (files >= maxfiles) {
1191 				size_t mlen;
1192 				maxfiles = (maxfiles + 1) * 2;
1193 				mlen = maxfiles * sizeof(*filearr);
1194 				if ((filearr = CAST(char **,
1195 				    erealloc(filearr, mlen))) == NULL) {
1196 					file_oomem(ms, mlen);
1197 					php_stream_closedir(dir);
1198 					errs++;
1199 					goto out;
1200 				}
1201 			}
1202 			filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1203 		}
1204 		php_stream_closedir(dir);
1205 		qsort(filearr, files, sizeof(*filearr), cmpstrp);
1206 		for (i = 0; i < files; i++) {
1207 			load_1(ms, action, filearr[i], &errs, mentry,
1208 			    mentrycount);
1209 			efree(filearr[i]);
1210 		}
1211 		efree(filearr);
1212 	} else
1213 		load_1(ms, action, fn, &errs, mentry, mentrycount);
1214 	if (errs)
1215 		goto out;
1216 
1217 	for (j = 0; j < MAGIC_SETS; j++) {
1218 		/* Set types of tests */
1219 		for (i = 0; i < mentrycount[j]; ) {
1220 			if (mentry[j][i].mp->cont_level != 0) {
1221 				i++;
1222 				continue;
1223 			}
1224 			i = set_text_binary(ms, mentry[j], mentrycount[j], i);
1225 		}
1226 		qsort(mentry[j], mentrycount[j], sizeof(*mentry[j]),
1227 		    apprentice_sort);
1228 
1229 		/*
1230 		 * Make sure that any level 0 "default" line is last
1231 		 * (if one exists).
1232 		 */
1233 		set_last_default(ms, mentry[j], mentrycount[j]);
1234 
1235 		/* coalesce per file arrays into a single one */
1236 		if (coalesce_entries(ms, mentry[j], mentrycount[j],
1237 		    &map->magic[j], &map->nmagic[j]) == -1) {
1238 			errs++;
1239 			goto out;
1240 		}
1241 	}
1242 
1243 out:
1244 	for (j = 0; j < MAGIC_SETS; j++)
1245 		magic_entry_free(mentry[j], mentrycount[j]);
1246 
1247 	if (errs) {
1248 		for (j = 0; j < MAGIC_SETS; j++) {
1249 			if (map->magic[j])
1250 				efree(map->magic[j]);
1251 		}
1252 		efree(map);
1253 		return NULL;
1254 	}
1255 	return map;
1256 }
1257 
1258 /*
1259  * extend the sign bit if the comparison is to be signed
1260  */
1261 protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1262 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1263 {
1264 	if (!(m->flag & UNSIGNED)) {
1265 		switch(m->type) {
1266 		/*
1267 		 * Do not remove the casts below.  They are
1268 		 * vital.  When later compared with the data,
1269 		 * the sign extension must have happened.
1270 		 */
1271 		case FILE_BYTE:
1272 			v = (signed char) v;
1273 			break;
1274 		case FILE_SHORT:
1275 		case FILE_BESHORT:
1276 		case FILE_LESHORT:
1277 			v = (short) v;
1278 			break;
1279 		case FILE_DATE:
1280 		case FILE_BEDATE:
1281 		case FILE_LEDATE:
1282 		case FILE_MEDATE:
1283 		case FILE_LDATE:
1284 		case FILE_BELDATE:
1285 		case FILE_LELDATE:
1286 		case FILE_MELDATE:
1287 		case FILE_LONG:
1288 		case FILE_BELONG:
1289 		case FILE_LELONG:
1290 		case FILE_MELONG:
1291 		case FILE_FLOAT:
1292 		case FILE_BEFLOAT:
1293 		case FILE_LEFLOAT:
1294 			v = (int32_t) v;
1295 			break;
1296 		case FILE_QUAD:
1297 		case FILE_BEQUAD:
1298 		case FILE_LEQUAD:
1299 		case FILE_QDATE:
1300 		case FILE_QLDATE:
1301 		case FILE_QWDATE:
1302 		case FILE_BEQDATE:
1303 		case FILE_BEQLDATE:
1304 		case FILE_BEQWDATE:
1305 		case FILE_LEQDATE:
1306 		case FILE_LEQLDATE:
1307 		case FILE_LEQWDATE:
1308 		case FILE_DOUBLE:
1309 		case FILE_BEDOUBLE:
1310 		case FILE_LEDOUBLE:
1311 			v = (int64_t) v;
1312 			break;
1313 		case FILE_STRING:
1314 		case FILE_PSTRING:
1315 		case FILE_BESTRING16:
1316 		case FILE_LESTRING16:
1317 		case FILE_REGEX:
1318 		case FILE_SEARCH:
1319 		case FILE_DEFAULT:
1320 		case FILE_INDIRECT:
1321 		case FILE_NAME:
1322 		case FILE_USE:
1323 			break;
1324 		default:
1325 			if (ms->flags & MAGIC_CHECK)
1326 			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1327 				    m->type);
1328 			return ~0U;
1329 		}
1330 	}
1331 	return v;
1332 }
1333 
1334 private int
string_modifier_check(struct magic_set * ms,struct magic * m)1335 string_modifier_check(struct magic_set *ms, struct magic *m)
1336 {
1337 	if ((ms->flags & MAGIC_CHECK) == 0)
1338 		return 0;
1339 
1340 	if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
1341 		file_magwarn(ms,
1342 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1343 		return -1;
1344 	}
1345 	switch (m->type) {
1346 	case FILE_BESTRING16:
1347 	case FILE_LESTRING16:
1348 		if (m->str_flags != 0) {
1349 			file_magwarn(ms,
1350 			    "no modifiers allowed for 16-bit strings\n");
1351 			return -1;
1352 		}
1353 		break;
1354 	case FILE_STRING:
1355 	case FILE_PSTRING:
1356 		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1357 			file_magwarn(ms,
1358 			    "'/%c' only allowed on regex and search\n",
1359 			    CHAR_REGEX_OFFSET_START);
1360 			return -1;
1361 		}
1362 		break;
1363 	case FILE_SEARCH:
1364 		if (m->str_range == 0) {
1365 			file_magwarn(ms,
1366 			    "missing range; defaulting to %d\n",
1367                             STRING_DEFAULT_RANGE);
1368 			m->str_range = STRING_DEFAULT_RANGE;
1369 			return -1;
1370 		}
1371 		break;
1372 	case FILE_REGEX:
1373 		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1374 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1375 			    CHAR_COMPACT_WHITESPACE);
1376 			return -1;
1377 		}
1378 		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1379 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1380 			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1381 			return -1;
1382 		}
1383 		break;
1384 	default:
1385 		file_magwarn(ms, "coding error: m->type=%d\n",
1386 		    m->type);
1387 		return -1;
1388 	}
1389 	return 0;
1390 }
1391 
1392 private int
get_op(char c)1393 get_op(char c)
1394 {
1395 	switch (c) {
1396 	case '&':
1397 		return FILE_OPAND;
1398 	case '|':
1399 		return FILE_OPOR;
1400 	case '^':
1401 		return FILE_OPXOR;
1402 	case '+':
1403 		return FILE_OPADD;
1404 	case '-':
1405 		return FILE_OPMINUS;
1406 	case '*':
1407 		return FILE_OPMULTIPLY;
1408 	case '/':
1409 		return FILE_OPDIVIDE;
1410 	case '%':
1411 		return FILE_OPMODULO;
1412 	default:
1413 		return -1;
1414 	}
1415 }
1416 
1417 #ifdef ENABLE_CONDITIONALS
1418 private int
get_cond(const char * l,const char ** t)1419 get_cond(const char *l, const char **t)
1420 {
1421 	static const struct cond_tbl_s {
1422 		char name[8];
1423 		size_t len;
1424 		int cond;
1425 	} cond_tbl[] = {
1426 		{ "if",		2,	COND_IF },
1427 		{ "elif",	4,	COND_ELIF },
1428 		{ "else",	4,	COND_ELSE },
1429 		{ "",		0,	COND_NONE },
1430 	};
1431 	const struct cond_tbl_s *p;
1432 
1433 	for (p = cond_tbl; p->len; p++) {
1434 		if (strncmp(l, p->name, p->len) == 0 &&
1435 		    isspace((unsigned char)l[p->len])) {
1436 			if (t)
1437 				*t = l + p->len;
1438 			break;
1439 		}
1440 	}
1441 	return p->cond;
1442 }
1443 
1444 private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1445 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1446 {
1447 	int last_cond;
1448 	last_cond = ms->c.li[cont_level].last_cond;
1449 
1450 	switch (cond) {
1451 	case COND_IF:
1452 		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1453 			if (ms->flags & MAGIC_CHECK)
1454 				file_magwarn(ms, "syntax error: `if'");
1455 			return -1;
1456 		}
1457 		last_cond = COND_IF;
1458 		break;
1459 
1460 	case COND_ELIF:
1461 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1462 			if (ms->flags & MAGIC_CHECK)
1463 				file_magwarn(ms, "syntax error: `elif'");
1464 			return -1;
1465 		}
1466 		last_cond = COND_ELIF;
1467 		break;
1468 
1469 	case COND_ELSE:
1470 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1471 			if (ms->flags & MAGIC_CHECK)
1472 				file_magwarn(ms, "syntax error: `else'");
1473 			return -1;
1474 		}
1475 		last_cond = COND_NONE;
1476 		break;
1477 
1478 	case COND_NONE:
1479 		last_cond = COND_NONE;
1480 		break;
1481 	}
1482 
1483 	ms->c.li[cont_level].last_cond = last_cond;
1484 	return 0;
1485 }
1486 #endif /* ENABLE_CONDITIONALS */
1487 
1488 /*
1489  * parse one line from magic file, put into magic[index++] if valid
1490  */
1491 private int
parse(struct magic_set * ms,struct magic_entry * me,const char * line,size_t lineno,int action)1492 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1493     size_t lineno, int action)
1494 {
1495 #ifdef ENABLE_CONDITIONALS
1496 	static uint32_t last_cont_level = 0;
1497 #endif
1498 	size_t i;
1499 	struct magic *m;
1500 	const char *l = line;
1501 	char *t;
1502 	int op;
1503 	uint32_t cont_level;
1504 	int32_t diff;
1505 
1506 	cont_level = 0;
1507 
1508 	/*
1509 	 * Parse the offset.
1510 	 */
1511 	while (*l == '>') {
1512 		++l;		/* step over */
1513 		cont_level++;
1514 	}
1515 #ifdef ENABLE_CONDITIONALS
1516 	if (cont_level == 0 || cont_level > last_cont_level)
1517 		if (file_check_mem(ms, cont_level) == -1)
1518 			return -1;
1519 	last_cont_level = cont_level;
1520 #endif
1521 	if (cont_level != 0) {
1522 		if (me->mp == NULL) {
1523 			file_magerror(ms, "No current entry for continuation");
1524 			return -1;
1525 		}
1526 		if (me->cont_count == 0) {
1527 			file_magerror(ms, "Continuations present with 0 count");
1528 			return -1;
1529 		}
1530 		m = &me->mp[me->cont_count - 1];
1531 		diff = (int32_t)cont_level - (int32_t)m->cont_level;
1532 		if (diff > 1)
1533 			file_magwarn(ms, "New continuation level %u is more "
1534 			    "than one larger than current level %u", cont_level,
1535 			    m->cont_level);
1536 		if (me->cont_count == me->max_count) {
1537 			struct magic *nm;
1538 			size_t cnt = me->max_count + ALLOC_CHUNK;
1539 			if ((nm = CAST(struct magic *, erealloc(me->mp,
1540 			    sizeof(*nm) * cnt))) == NULL) {
1541 				file_oomem(ms, sizeof(*nm) * cnt);
1542 				return -1;
1543 			}
1544 			me->mp = m = nm;
1545 			me->max_count = CAST(uint32_t, cnt);
1546 		}
1547 		m = &me->mp[me->cont_count++];
1548 		(void)memset(m, 0, sizeof(*m));
1549 		m->cont_level = cont_level;
1550 	} else {
1551 		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1552 		if (me->mp != NULL)
1553 			return 1;
1554 		if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1555 			file_oomem(ms, len);
1556 			return -1;
1557 		}
1558 		me->mp = m;
1559 		me->max_count = ALLOC_CHUNK;
1560 		(void)memset(m, 0, sizeof(*m));
1561 		m->factor_op = FILE_FACTOR_OP_NONE;
1562 		m->cont_level = 0;
1563 		me->cont_count = 1;
1564 	}
1565 	m->lineno = CAST(uint32_t, lineno);
1566 
1567 	if (*l == '&') {  /* m->cont_level == 0 checked below. */
1568                 ++l;            /* step over */
1569                 m->flag |= OFFADD;
1570         }
1571 	if (*l == '(') {
1572 		++l;		/* step over */
1573 		m->flag |= INDIR;
1574 		if (m->flag & OFFADD)
1575 			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1576 
1577 		if (*l == '&') {  /* m->cont_level == 0 checked below */
1578 			++l;            /* step over */
1579 			m->flag |= OFFADD;
1580 		}
1581 	}
1582 	/* Indirect offsets are not valid at level 0. */
1583 	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1584 		if (ms->flags & MAGIC_CHECK)
1585 			file_magwarn(ms, "relative offset at level 0");
1586 
1587 	/* get offset, then skip over it */
1588 	m->offset = (uint32_t)strtoul(l, &t, 0);
1589         if (l == t)
1590 		if (ms->flags & MAGIC_CHECK)
1591 			file_magwarn(ms, "offset `%s' invalid", l);
1592         l = t;
1593 
1594 	if (m->flag & INDIR) {
1595 		m->in_type = FILE_LONG;
1596 		m->in_offset = 0;
1597 		/*
1598 		 * read [.lbs][+-]nnnnn)
1599 		 */
1600 		if (*l == '.') {
1601 			l++;
1602 			switch (*l) {
1603 			case 'l':
1604 				m->in_type = FILE_LELONG;
1605 				break;
1606 			case 'L':
1607 				m->in_type = FILE_BELONG;
1608 				break;
1609 			case 'm':
1610 				m->in_type = FILE_MELONG;
1611 				break;
1612 			case 'h':
1613 			case 's':
1614 				m->in_type = FILE_LESHORT;
1615 				break;
1616 			case 'H':
1617 			case 'S':
1618 				m->in_type = FILE_BESHORT;
1619 				break;
1620 			case 'c':
1621 			case 'b':
1622 			case 'C':
1623 			case 'B':
1624 				m->in_type = FILE_BYTE;
1625 				break;
1626 			case 'e':
1627 			case 'f':
1628 			case 'g':
1629 				m->in_type = FILE_LEDOUBLE;
1630 				break;
1631 			case 'E':
1632 			case 'F':
1633 			case 'G':
1634 				m->in_type = FILE_BEDOUBLE;
1635 				break;
1636 			case 'i':
1637 				m->in_type = FILE_LEID3;
1638 				break;
1639 			case 'I':
1640 				m->in_type = FILE_BEID3;
1641 				break;
1642 			default:
1643 				if (ms->flags & MAGIC_CHECK)
1644 					file_magwarn(ms,
1645 					    "indirect offset type `%c' invalid",
1646 					    *l);
1647 				break;
1648 			}
1649 			l++;
1650 		}
1651 
1652 		m->in_op = 0;
1653 		if (*l == '~') {
1654 			m->in_op |= FILE_OPINVERSE;
1655 			l++;
1656 		}
1657 		if ((op = get_op(*l)) != -1) {
1658 			m->in_op |= op;
1659 			l++;
1660 		}
1661 		if (*l == '(') {
1662 			m->in_op |= FILE_OPINDIRECT;
1663 			l++;
1664 		}
1665 		if (isdigit((unsigned char)*l) || *l == '-') {
1666 			m->in_offset = (int32_t)strtol(l, &t, 0);
1667 			if (l == t)
1668 				if (ms->flags & MAGIC_CHECK)
1669 					file_magwarn(ms,
1670 					    "in_offset `%s' invalid", l);
1671 			l = t;
1672 		}
1673 		if (*l++ != ')' ||
1674 		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1675 			if (ms->flags & MAGIC_CHECK)
1676 				file_magwarn(ms,
1677 				    "missing ')' in indirect offset");
1678 	}
1679 	EATAB;
1680 
1681 #ifdef ENABLE_CONDITIONALS
1682 	m->cond = get_cond(l, &l);
1683 	if (check_cond(ms, m->cond, cont_level) == -1)
1684 		return -1;
1685 
1686 	EATAB;
1687 #endif
1688 
1689 	/*
1690 	 * Parse the type.
1691 	 */
1692 	if (*l == 'u') {
1693 		/*
1694 		 * Try it as a keyword type prefixed by "u"; match what
1695 		 * follows the "u".  If that fails, try it as an SUS
1696 		 * integer type.
1697 		 */
1698 		m->type = get_type(type_tbl, l + 1, &l);
1699 		if (m->type == FILE_INVALID) {
1700 			/*
1701 			 * Not a keyword type; parse it as an SUS type,
1702 			 * 'u' possibly followed by a number or C/S/L.
1703 			 */
1704 			m->type = get_standard_integer_type(l, &l);
1705 		}
1706 		// It's unsigned.
1707 		if (m->type != FILE_INVALID)
1708 			m->flag |= UNSIGNED;
1709 	} else {
1710 		/*
1711 		 * Try it as a keyword type.  If that fails, try it as
1712 		 * an SUS integer type if it begins with "d" or as an
1713 		 * SUS string type if it begins with "s".  In any case,
1714 		 * it's not unsigned.
1715 		 */
1716 		m->type = get_type(type_tbl, l, &l);
1717 		if (m->type == FILE_INVALID) {
1718 			/*
1719 			 * Not a keyword type; parse it as an SUS type,
1720 			 * either 'd' possibly followed by a number or
1721 			 * C/S/L, or just 's'.
1722 			 */
1723 			if (*l == 'd')
1724 				m->type = get_standard_integer_type(l, &l);
1725 			else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1726 				m->type = FILE_STRING;
1727 		++l;
1728 			}
1729 		}
1730 	}
1731 
1732 	if (m->type == FILE_INVALID) {
1733 		/* Not found - try it as a special keyword. */
1734 		m->type = get_type(special_tbl, l, &l);
1735 	}
1736 
1737 	if (m->type == FILE_INVALID) {
1738 		if (ms->flags & MAGIC_CHECK)
1739 			file_magwarn(ms, "type `%s' invalid", l);
1740 		if (me->mp) {
1741 			efree(me->mp);
1742 			me->mp = NULL;
1743 		}
1744 		return -1;
1745 	}
1746 
1747 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1748 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1749 
1750 	m->mask_op = 0;
1751 	if (*l == '~') {
1752 		if (!IS_LIBMAGIC_STRING(m->type))
1753 			m->mask_op |= FILE_OPINVERSE;
1754 		else if (ms->flags & MAGIC_CHECK)
1755 			file_magwarn(ms, "'~' invalid for string types");
1756 		++l;
1757 	}
1758 	m->str_range = 0;
1759 	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1760 	if ((op = get_op(*l)) != -1) {
1761 		if (!IS_LIBMAGIC_STRING(m->type)) {
1762 			uint64_t val;
1763 			++l;
1764 			m->mask_op |= op;
1765 			val = (uint64_t)strtoull(l, &t, 0);
1766 			l = t;
1767 			m->num_mask = file_signextend(ms, m, val);
1768 			eatsize(&l);
1769 		}
1770 		else if (op == FILE_OPDIVIDE) {
1771 			int have_range = 0;
1772 			while (!isspace((unsigned char)*++l)) {
1773 				switch (*l) {
1774 				case '0':  case '1':  case '2':
1775 				case '3':  case '4':  case '5':
1776 				case '6':  case '7':  case '8':
1777 				case '9':
1778 					if (have_range &&
1779 					    (ms->flags & MAGIC_CHECK))
1780 						file_magwarn(ms,
1781 						    "multiple ranges");
1782 					have_range = 1;
1783 					m->str_range = CAST(uint32_t,
1784 					    strtoul(l, &t, 0));
1785 					if (m->str_range == 0)
1786 						file_magwarn(ms,
1787 						    "zero range");
1788 					l = t - 1;
1789 					break;
1790 				case CHAR_COMPACT_WHITESPACE:
1791 					m->str_flags |=
1792 					    STRING_COMPACT_WHITESPACE;
1793 					break;
1794 				case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1795 					m->str_flags |=
1796 					    STRING_COMPACT_OPTIONAL_WHITESPACE;
1797 					break;
1798 				case CHAR_IGNORE_LOWERCASE:
1799 					m->str_flags |= STRING_IGNORE_LOWERCASE;
1800 					break;
1801 				case CHAR_IGNORE_UPPERCASE:
1802 					m->str_flags |= STRING_IGNORE_UPPERCASE;
1803 					break;
1804 				case CHAR_REGEX_OFFSET_START:
1805 					m->str_flags |= REGEX_OFFSET_START;
1806 					break;
1807 				case CHAR_BINTEST:
1808 					m->str_flags |= STRING_BINTEST;
1809 					break;
1810 				case CHAR_TEXTTEST:
1811 					m->str_flags |= STRING_TEXTTEST;
1812 					break;
1813 				case CHAR_TRIM:
1814 					m->str_flags |= STRING_TRIM;
1815 					break;
1816 				case CHAR_PSTRING_1_LE:
1817 					if (m->type != FILE_PSTRING)
1818 						goto bad;
1819 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
1820 					break;
1821 				case CHAR_PSTRING_2_BE:
1822 					if (m->type != FILE_PSTRING)
1823 						goto bad;
1824 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
1825 					break;
1826 				case CHAR_PSTRING_2_LE:
1827 					if (m->type != FILE_PSTRING)
1828 						goto bad;
1829 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
1830 					break;
1831 				case CHAR_PSTRING_4_BE:
1832 					if (m->type != FILE_PSTRING)
1833 						goto bad;
1834 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
1835 					break;
1836 				case CHAR_PSTRING_4_LE:
1837 					if (m->type != FILE_PSTRING)
1838 						goto bad;
1839 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
1840 					break;
1841 				case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1842 					if (m->type != FILE_PSTRING)
1843 						goto bad;
1844 					m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1845 					break;
1846 				default:
1847 				bad:
1848 					if (ms->flags & MAGIC_CHECK)
1849 						file_magwarn(ms,
1850 						    "string extension `%c' "
1851 						    "invalid", *l);
1852 					return -1;
1853 				}
1854 				/* allow multiple '/' for readability */
1855 				if (l[1] == '/' &&
1856 				    !isspace((unsigned char)l[2]))
1857 					l++;
1858 			}
1859 			if (string_modifier_check(ms, m) == -1)
1860 				return -1;
1861 		}
1862 		else {
1863 			if (ms->flags & MAGIC_CHECK)
1864 				file_magwarn(ms, "invalid string op: %c", *t);
1865 			return -1;
1866 		}
1867 	}
1868 	/*
1869 	 * We used to set mask to all 1's here, instead let's just not do
1870 	 * anything if mask = 0 (unless you have a better idea)
1871 	 */
1872 	EATAB;
1873 
1874 	switch (*l) {
1875 	case '>':
1876 	case '<':
1877   		m->reln = *l;
1878   		++l;
1879 		if (*l == '=') {
1880 			if (ms->flags & MAGIC_CHECK) {
1881 				file_magwarn(ms, "%c= not supported",
1882 				    m->reln);
1883 				return -1;
1884 			}
1885 		   ++l;
1886 		}
1887 		break;
1888 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
1889 	case '&':
1890 	case '^':
1891 	case '=':
1892   		m->reln = *l;
1893   		++l;
1894 		if (*l == '=') {
1895 		   /* HP compat: ignore &= etc. */
1896 		   ++l;
1897 		}
1898 		break;
1899 	case '!':
1900 		m->reln = *l;
1901 		++l;
1902 		break;
1903 	default:
1904   		m->reln = '=';	/* the default relation */
1905 		if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1906 		    isspace((unsigned char)l[1])) || !l[1])) {
1907 			m->reln = *l;
1908 			++l;
1909 		}
1910 		break;
1911 	}
1912 	/*
1913 	 * Grab the value part, except for an 'x' reln.
1914 	 */
1915 	if (m->reln != 'x' && getvalue(ms, m, &l, action))
1916 		return -1;
1917 
1918 	/*
1919 	 * TODO finish this macro and start using it!
1920 	 * #define offsetcheck {if (offset > HOWMANY-1)
1921 	 *	magwarn("offset too big"); }
1922 	 */
1923 
1924 	/*
1925 	 * Now get last part - the description
1926 	 */
1927 	EATAB;
1928 	if (l[0] == '\b') {
1929 		++l;
1930 		m->flag |= NOSPACE;
1931 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
1932 		++l;
1933 		++l;
1934 		m->flag |= NOSPACE;
1935 	}
1936 	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1937 		continue;
1938 	if (i == sizeof(m->desc)) {
1939 		m->desc[sizeof(m->desc) - 1] = '\0';
1940 		if (ms->flags & MAGIC_CHECK)
1941 			file_magwarn(ms, "description `%s' truncated", m->desc);
1942 	}
1943 
1944         /*
1945 	 * We only do this check while compiling, or if any of the magic
1946 	 * files were not compiled.
1947          */
1948         if (ms->flags & MAGIC_CHECK) {
1949 		if (check_format(ms, m) == -1)
1950 			return -1;
1951 	}
1952 	m->mimetype[0] = '\0';		/* initialise MIME type to none */
1953 	return 0;
1954 }
1955 
1956 /*
1957  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1958  * if valid
1959  */
1960 private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line)1961 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1962 {
1963 	const char *l = line;
1964 	char *el;
1965 	unsigned long factor;
1966 	struct magic *m = &me->mp[0];
1967 
1968 	if (m->factor_op != FILE_FACTOR_OP_NONE) {
1969 		file_magwarn(ms,
1970 		    "Current entry already has a strength type: %c %d",
1971 		    m->factor_op, m->factor);
1972 		return -1;
1973 	}
1974 	EATAB;
1975 	switch (*l) {
1976 	case FILE_FACTOR_OP_NONE:
1977 	case FILE_FACTOR_OP_PLUS:
1978 	case FILE_FACTOR_OP_MINUS:
1979 	case FILE_FACTOR_OP_TIMES:
1980 	case FILE_FACTOR_OP_DIV:
1981 		m->factor_op = *l++;
1982 		break;
1983 	default:
1984 		file_magwarn(ms, "Unknown factor op `%c'", *l);
1985 		return -1;
1986 	}
1987 	EATAB;
1988 	factor = strtoul(l, &el, 0);
1989 	if (factor > 255) {
1990 		file_magwarn(ms, "Too large factor `%lu'", factor);
1991 		goto out;
1992 	}
1993 	if (*el && !isspace((unsigned char)*el)) {
1994 		file_magwarn(ms, "Bad factor `%s'", l);
1995 		goto out;
1996 	}
1997 	m->factor = (uint8_t)factor;
1998 	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
1999 		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2000 		    m->factor_op, m->factor);
2001 		goto out;
2002 	}
2003 	return 0;
2004 out:
2005 	m->factor_op = FILE_FACTOR_OP_NONE;
2006 	m->factor = 0;
2007 	return -1;
2008 }
2009 
2010 /*
2011  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2012  * magic[index - 1]
2013  */
2014 private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line)2015 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2016 {
2017 	size_t i;
2018 	const char *l = line;
2019 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2020 
2021 	if (m->apple[0] != '\0') {
2022 		file_magwarn(ms, "Current entry already has a APPLE type "
2023 		    "`%.8s', new type `%s'", m->mimetype, l);
2024 		return -1;
2025 	}
2026 
2027 	EATAB;
2028 	for (i = 0; *l && ((isascii((unsigned char)*l) &&
2029 	    isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2030 	    i < sizeof(m->apple); m->apple[i++] = *l++)
2031 		continue;
2032 	if (i == sizeof(m->apple) && *l) {
2033 		/* We don't need to NUL terminate here, printing handles it */
2034 		if (ms->flags & MAGIC_CHECK)
2035 			file_magwarn(ms, "APPLE type `%s' truncated %"
2036 			    SIZE_T_FORMAT "u", line, i);
2037 	}
2038 
2039 	if (i > 0)
2040 		return 0;
2041 	else
2042 		return -1;
2043 }
2044 
2045 /*
2046  * parse a MIME annotation line from magic file, put into magic[index - 1]
2047  * if valid
2048  */
2049 private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line)2050 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2051 {
2052 	size_t i;
2053 	const char *l = line;
2054 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2055 
2056 	if (m->mimetype[0] != '\0') {
2057 		file_magwarn(ms, "Current entry already has a MIME type `%s',"
2058 		    " new type `%s'", m->mimetype, l);
2059 		return -1;
2060 	}
2061 
2062 	EATAB;
2063 	for (i = 0; *l && ((isascii((unsigned char)*l) &&
2064 	    isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2065 	    i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
2066 		continue;
2067 	if (i == sizeof(m->mimetype)) {
2068 		m->mimetype[sizeof(m->mimetype) - 1] = '\0';
2069 		if (ms->flags & MAGIC_CHECK)
2070 			file_magwarn(ms, "MIME type `%s' truncated %"
2071 			    SIZE_T_FORMAT "u", m->mimetype, i);
2072 	} else
2073 		m->mimetype[i] = '\0';
2074 
2075 	if (i > 0)
2076 		return 0;
2077 	else
2078 		return -1;
2079 }
2080 
2081 private int
check_format_type(const char * ptr,int type)2082 check_format_type(const char *ptr, int type)
2083 {
2084 	int quad = 0;
2085 	if (*ptr == '\0') {
2086 		/* Missing format string; bad */
2087 		return -1;
2088 	}
2089 
2090 	switch (type) {
2091 	case FILE_FMT_QUAD:
2092 		quad = 1;
2093 		/*FALLTHROUGH*/
2094 	case FILE_FMT_NUM:
2095 		if (*ptr == '-')
2096 			ptr++;
2097 		if (*ptr == '.')
2098 			ptr++;
2099 		while (isdigit((unsigned char)*ptr)) ptr++;
2100 		if (*ptr == '.')
2101 			ptr++;
2102 		while (isdigit((unsigned char)*ptr)) ptr++;
2103 		if (quad) {
2104 			if (*ptr++ != 'l')
2105 				return -1;
2106 			if (*ptr++ != 'l')
2107 				return -1;
2108 		}
2109 
2110 		switch (*ptr++) {
2111 		case 'l':
2112 			switch (*ptr++) {
2113 			case 'i':
2114 			case 'd':
2115 			case 'u':
2116 			case 'o':
2117 			case 'x':
2118 			case 'X':
2119 				return 0;
2120 			default:
2121 				return -1;
2122 			}
2123 
2124 		case 'h':
2125 			switch (*ptr++) {
2126 			case 'h':
2127 				switch (*ptr++) {
2128 				case 'i':
2129 				case 'd':
2130 				case 'u':
2131 				case 'o':
2132 				case 'x':
2133 				case 'X':
2134 					return 0;
2135 				default:
2136 					return -1;
2137 				}
2138 			case 'd':
2139 				return 0;
2140 			default:
2141 				return -1;
2142 			}
2143 
2144 		case 'i':
2145 		case 'c':
2146 		case 'd':
2147 		case 'u':
2148 		case 'o':
2149 		case 'x':
2150 		case 'X':
2151 			return 0;
2152 
2153 		default:
2154 			return -1;
2155 		}
2156 
2157 	case FILE_FMT_FLOAT:
2158 	case FILE_FMT_DOUBLE:
2159 		if (*ptr == '-')
2160 			ptr++;
2161 		if (*ptr == '.')
2162 			ptr++;
2163 		while (isdigit((unsigned char)*ptr)) ptr++;
2164 		if (*ptr == '.')
2165 			ptr++;
2166 		while (isdigit((unsigned char)*ptr)) ptr++;
2167 
2168 		switch (*ptr++) {
2169 		case 'e':
2170 		case 'E':
2171 		case 'f':
2172 		case 'F':
2173 		case 'g':
2174 		case 'G':
2175 			return 0;
2176 
2177 		default:
2178 			return -1;
2179 		}
2180 
2181 
2182 	case FILE_FMT_STR:
2183 		if (*ptr == '-')
2184 			ptr++;
2185 		while (isdigit((unsigned char )*ptr))
2186 			ptr++;
2187 		if (*ptr == '.') {
2188 			ptr++;
2189 			while (isdigit((unsigned char )*ptr))
2190 				ptr++;
2191 		}
2192 
2193 		switch (*ptr++) {
2194 		case 's':
2195 			return 0;
2196 		default:
2197 			return -1;
2198 		}
2199 
2200 	default:
2201 		/* internal error */
2202 		abort();
2203 	}
2204 	/*NOTREACHED*/
2205 	return -1;
2206 }
2207 
2208 /*
2209  * Check that the optional printf format in description matches
2210  * the type of the magic.
2211  */
2212 private int
check_format(struct magic_set * ms,struct magic * m)2213 check_format(struct magic_set *ms, struct magic *m)
2214 {
2215 	char *ptr;
2216 
2217 	for (ptr = m->desc; *ptr; ptr++)
2218 		if (*ptr == '%')
2219 			break;
2220 	if (*ptr == '\0') {
2221 		/* No format string; ok */
2222 		return 1;
2223 	}
2224 
2225 	assert(file_nformats == file_nnames);
2226 
2227 	if (m->type >= file_nformats) {
2228 		file_magwarn(ms, "Internal error inconsistency between "
2229 		    "m->type and format strings");
2230 		return -1;
2231 	}
2232 	if (file_formats[m->type] == FILE_FMT_NONE) {
2233 		file_magwarn(ms, "No format string for `%s' with description "
2234 		    "`%s'", m->desc, file_names[m->type]);
2235 		return -1;
2236 	}
2237 
2238 	ptr++;
2239 	if (check_format_type(ptr, file_formats[m->type]) == -1) {
2240 		/*
2241 		 * TODO: this error message is unhelpful if the format
2242 		 * string is not one character long
2243 		 */
2244 		file_magwarn(ms, "Printf format `%c' is not valid for type "
2245 		    "`%s' in description `%s'", *ptr ? *ptr : '?',
2246 		    file_names[m->type], m->desc);
2247 		return -1;
2248 	}
2249 
2250 	for (; *ptr; ptr++) {
2251 		if (*ptr == '%') {
2252 			file_magwarn(ms,
2253 			    "Too many format strings (should have at most one) "
2254 			    "for `%s' with description `%s'",
2255 			    file_names[m->type], m->desc);
2256 			return -1;
2257 		}
2258 	}
2259 	return 0;
2260 }
2261 
2262 /*
2263  * Read a numeric value from a pointer, into the value union of a magic
2264  * pointer, according to the magic type.  Update the string pointer to point
2265  * just after the number read.  Return 0 for success, non-zero for failure.
2266  */
2267 private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2268 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2269 {
2270 	switch (m->type) {
2271 	case FILE_BESTRING16:
2272 	case FILE_LESTRING16:
2273 	case FILE_STRING:
2274 	case FILE_PSTRING:
2275 	case FILE_REGEX:
2276 	case FILE_SEARCH:
2277 	case FILE_NAME:
2278 	case FILE_USE:
2279 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2280 		if (*p == NULL) {
2281 			if (ms->flags & MAGIC_CHECK)
2282 				file_magwarn(ms, "cannot get string from `%s'",
2283 				    m->value.s);
2284 			return -1;
2285 		}
2286 		return 0;
2287 	case FILE_FLOAT:
2288 	case FILE_BEFLOAT:
2289 	case FILE_LEFLOAT:
2290 		if (m->reln != 'x') {
2291 			char *ep;
2292 #ifdef HAVE_STRTOF
2293 			m->value.f = strtof(*p, &ep);
2294 #else
2295 			m->value.f = (float)strtod(*p, &ep);
2296 #endif
2297 			*p = ep;
2298 		}
2299 		return 0;
2300 	case FILE_DOUBLE:
2301 	case FILE_BEDOUBLE:
2302 	case FILE_LEDOUBLE:
2303 		if (m->reln != 'x') {
2304 			char *ep;
2305 			m->value.d = strtod(*p, &ep);
2306 			*p = ep;
2307 		}
2308 		return 0;
2309 	default:
2310 		if (m->reln != 'x') {
2311 			char *ep;
2312 			m->value.q = file_signextend(ms, m,
2313 			    (uint64_t)strtoull(*p, &ep, 0));
2314 			*p = ep;
2315 			eatsize(p);
2316 		}
2317 		return 0;
2318 	}
2319 }
2320 
2321 /*
2322  * Convert a string containing C character escapes.  Stop at an unescaped
2323  * space or tab.
2324  * Copy the converted version to "m->value.s", and the length in m->vallen.
2325  * Return updated scan pointer as function result. Warn if set.
2326  */
2327 private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2328 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2329 {
2330 	const char *origs = s;
2331 	char	*p = m->value.s;
2332 	size_t  plen = sizeof(m->value.s);
2333 	char 	*origp = p;
2334 	char	*pmax = p + plen - 1;
2335 	int	c;
2336 	int	val;
2337 
2338 	while ((c = *s++) != '\0') {
2339 		if (isspace((unsigned char) c))
2340 			break;
2341 		if (p >= pmax) {
2342 			file_error(ms, 0, "string too long: `%s'", origs);
2343 			return NULL;
2344 		}
2345 		if (c == '\\') {
2346 			switch(c = *s++) {
2347 
2348 			case '\0':
2349 				if (warn)
2350 					file_magwarn(ms, "incomplete escape");
2351 				goto out;
2352 
2353 			case '\t':
2354 				if (warn) {
2355 					file_magwarn(ms,
2356 					    "escaped tab found, use \\t instead");
2357 					warn = 0;	/* already did */
2358 				}
2359 				/*FALLTHROUGH*/
2360 			default:
2361 				if (warn) {
2362 					if (isprint((unsigned char)c)) {
2363 						/* Allow escaping of
2364 						 * ``relations'' */
2365 						if (strchr("<>&^=!", c) == NULL
2366 						    && (m->type != FILE_REGEX ||
2367 						    strchr("[]().*?^$|{}", c)
2368 						    == NULL)) {
2369 							file_magwarn(ms, "no "
2370 							    "need to escape "
2371 							    "`%c'", c);
2372 						}
2373 					} else {
2374 						file_magwarn(ms,
2375 						    "unknown escape sequence: "
2376 						    "\\%03o", c);
2377 					}
2378 				}
2379 				/*FALLTHROUGH*/
2380 			/* space, perhaps force people to use \040? */
2381 			case ' ':
2382 #if 0
2383 			/*
2384 			 * Other things people escape, but shouldn't need to,
2385 			 * so we disallow them
2386 			 */
2387 			case '\'':
2388 			case '"':
2389 			case '?':
2390 #endif
2391 			/* Relations */
2392 			case '>':
2393 			case '<':
2394 			case '&':
2395 			case '^':
2396 			case '=':
2397 			case '!':
2398 			/* and baskslash itself */
2399 			case '\\':
2400 				*p++ = (char) c;
2401 				break;
2402 
2403 			case 'a':
2404 				*p++ = '\a';
2405 				break;
2406 
2407 			case 'b':
2408 				*p++ = '\b';
2409 				break;
2410 
2411 			case 'f':
2412 				*p++ = '\f';
2413 				break;
2414 
2415 			case 'n':
2416 				*p++ = '\n';
2417 				break;
2418 
2419 			case 'r':
2420 				*p++ = '\r';
2421 				break;
2422 
2423 			case 't':
2424 				*p++ = '\t';
2425 				break;
2426 
2427 			case 'v':
2428 				*p++ = '\v';
2429 				break;
2430 
2431 			/* \ and up to 3 octal digits */
2432 			case '0':
2433 			case '1':
2434 			case '2':
2435 			case '3':
2436 			case '4':
2437 			case '5':
2438 			case '6':
2439 			case '7':
2440 				val = c - '0';
2441 				c = *s++;  /* try for 2 */
2442 				if (c >= '0' && c <= '7') {
2443 					val = (val << 3) | (c - '0');
2444 					c = *s++;  /* try for 3 */
2445 					if (c >= '0' && c <= '7')
2446 						val = (val << 3) | (c-'0');
2447 					else
2448 						--s;
2449 				}
2450 				else
2451 					--s;
2452 				*p++ = (char)val;
2453 				break;
2454 
2455 			/* \x and up to 2 hex digits */
2456 			case 'x':
2457 				val = 'x';	/* Default if no digits */
2458 				c = hextoint(*s++);	/* Get next char */
2459 				if (c >= 0) {
2460 					val = c;
2461 					c = hextoint(*s++);
2462 					if (c >= 0)
2463 						val = (val << 4) + c;
2464 					else
2465 						--s;
2466 				} else
2467 					--s;
2468 				*p++ = (char)val;
2469 				break;
2470 			}
2471 		} else
2472 			*p++ = (char)c;
2473 	}
2474 out:
2475 	*p = '\0';
2476 	m->vallen = CAST(unsigned char, (p - origp));
2477 	if (m->type == FILE_PSTRING)
2478 		m->vallen += (unsigned char)file_pstring_length_size(m);
2479 	return s;
2480 }
2481 
2482 
2483 /* Single hex char to int; -1 if not a hex char. */
2484 private int
hextoint(int c)2485 hextoint(int c)
2486 {
2487 	if (!isascii((unsigned char) c))
2488 		return -1;
2489 	if (isdigit((unsigned char) c))
2490 		return c - '0';
2491 	if ((c >= 'a') && (c <= 'f'))
2492 		return c + 10 - 'a';
2493 	if (( c>= 'A') && (c <= 'F'))
2494 		return c + 10 - 'A';
2495 	return -1;
2496 }
2497 
2498 
2499 /*
2500  * Print a string containing C character escapes.
2501  */
2502 protected void
file_showstr(FILE * fp,const char * s,size_t len)2503 file_showstr(FILE *fp, const char *s, size_t len)
2504 {
2505 	char	c;
2506 
2507 	for (;;) {
2508 		if (len == ~0U) {
2509 			c = *s++;
2510 			if (c == '\0')
2511 				break;
2512 		}
2513 		else  {
2514 			if (len-- == 0)
2515 				break;
2516 			c = *s++;
2517 		}
2518 		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
2519 			(void) fputc(c, fp);
2520 		else {
2521 			(void) fputc('\\', fp);
2522 			switch (c) {
2523 			case '\a':
2524 				(void) fputc('a', fp);
2525 				break;
2526 
2527 			case '\b':
2528 				(void) fputc('b', fp);
2529 				break;
2530 
2531 			case '\f':
2532 				(void) fputc('f', fp);
2533 				break;
2534 
2535 			case '\n':
2536 				(void) fputc('n', fp);
2537 				break;
2538 
2539 			case '\r':
2540 				(void) fputc('r', fp);
2541 				break;
2542 
2543 			case '\t':
2544 				(void) fputc('t', fp);
2545 				break;
2546 
2547 			case '\v':
2548 				(void) fputc('v', fp);
2549 				break;
2550 
2551 			default:
2552 				(void) fprintf(fp, "%.3o", c & 0377);
2553 				break;
2554 			}
2555 		}
2556 	}
2557 }
2558 
2559 /*
2560  * eatsize(): Eat the size spec from a number [eg. 10UL]
2561  */
2562 private void
eatsize(const char ** p)2563 eatsize(const char **p)
2564 {
2565 	const char *l = *p;
2566 
2567 	if (LOWCASE(*l) == 'u')
2568 		l++;
2569 
2570 	switch (LOWCASE(*l)) {
2571 	case 'l':    /* long */
2572 	case 's':    /* short */
2573 	case 'h':    /* short */
2574 	case 'b':    /* char/byte */
2575 	case 'c':    /* char/byte */
2576 		l++;
2577 		/*FALLTHROUGH*/
2578 	default:
2579 		break;
2580 	}
2581 
2582 	*p = l;
2583 }
2584 
2585 /*
2586  * handle a compiled file.
2587  */
2588 
2589 private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)2590 apprentice_map(struct magic_set *ms, const char *fn)
2591 {
2592 	uint32_t *ptr;
2593 	uint32_t version, entries, nentries;
2594 	int needsbyteswap;
2595 	char *dbname = NULL;
2596 	struct magic_map *map;
2597 	size_t i;
2598 	php_stream *stream = NULL;
2599 	php_stream_statbuf st;
2600 
2601 
2602 	TSRMLS_FETCH();
2603 
2604 	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2605 		file_oomem(ms, sizeof(*map));
2606 		return NULL;
2607 	}
2608 
2609 	if (fn == NULL) {
2610 		map->p = (void *)&php_magic_database;
2611 		goto internal_loaded;
2612 	}
2613 
2614 #ifdef PHP_WIN32
2615 	/* Don't bother on windows with php_stream_open_wrapper,
2616 	return to give apprentice_load() a chance. */
2617 	if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2618                if (st.sb.st_mode & S_IFDIR) {
2619                        return NULL;
2620                }
2621        }
2622 #endif
2623 
2624 	dbname = mkdbname(ms, fn, 0);
2625 	if (dbname == NULL)
2626 		goto error;
2627 
2628 #if PHP_API_VERSION < 20100412
2629 		stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2630 #else
2631 		stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2632 #endif
2633 
2634 	if (!stream) {
2635 		goto error;
2636 	}
2637 
2638 	if (php_stream_stat(stream, &st) < 0) {
2639 		file_error(ms, errno, "cannot stat `%s'", dbname);
2640 		goto error;
2641 	}
2642 
2643 	if (st.sb.st_size < 8) {
2644 		file_error(ms, 0, "file `%s' is too small", dbname);
2645 		goto error;
2646 	}
2647 
2648 	map->len = (size_t)st.sb.st_size;
2649 	if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2650 		file_oomem(ms, map->len);
2651 		goto error;
2652 	}
2653 	if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2654 		file_badread(ms);
2655 		goto error;
2656 	}
2657 	map->len = 0;
2658 #define RET	1
2659 
2660 	php_stream_close(stream);
2661 	stream = NULL;
2662 
2663 internal_loaded:
2664 	ptr = (uint32_t *)(void *)map->p;
2665 	if (*ptr != MAGICNO) {
2666 		if (swap4(*ptr) != MAGICNO) {
2667 			file_error(ms, 0, "bad magic in `%s'", dbname);
2668 			goto error;
2669 		}
2670 		needsbyteswap = 1;
2671 	} else
2672 		needsbyteswap = 0;
2673 	if (needsbyteswap)
2674 		version = swap4(ptr[1]);
2675 	else
2676 		version = ptr[1];
2677 	if (version != VERSIONNO) {
2678 		file_error(ms, 0, "File %d.%d supports only version %d magic "
2679 		    "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2680 		    VERSIONNO, dbname, version);
2681 		goto error;
2682 	}
2683 
2684 	/* php_magic_database is a const, performing writes will segfault. This is for big-endian
2685 	machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2686 	future. */
2687 	if (needsbyteswap && fn == NULL) {
2688 		map->p = emalloc(sizeof(php_magic_database));
2689 		map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
2690 	}
2691 
2692 	if (NULL != fn) {
2693 		nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2694 		entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2695 		if ((off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2696 			file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2697 				dbname, (unsigned long long)st.sb.st_size,
2698 				sizeof(struct magic));
2699 			goto error;
2700 		}
2701 	}
2702 	map->magic[0] = CAST(struct magic *, map->p) + 1;
2703 	nentries = 0;
2704 	for (i = 0; i < MAGIC_SETS; i++) {
2705 		if (needsbyteswap)
2706 			map->nmagic[i] = swap4(ptr[i + 2]);
2707 		else
2708 			map->nmagic[i] = ptr[i + 2];
2709 		if (i != MAGIC_SETS - 1)
2710 			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2711 		nentries += map->nmagic[i];
2712 	}
2713 	if (NULL != fn && entries != nentries + 1) {
2714 		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2715 		    dbname, entries, nentries + 1);
2716 		goto error;
2717 	}
2718 
2719 	if (needsbyteswap)
2720 		for (i = 0; i < MAGIC_SETS; i++)
2721 			byteswap(map->magic[i], map->nmagic[i]);
2722 
2723 	if (dbname) {
2724 		efree(dbname);
2725 	}
2726 	return map;
2727 
2728 error:
2729 	if (stream) {
2730 		php_stream_close(stream);
2731 	}
2732 	apprentice_unmap(map);
2733 	if (dbname) {
2734 		efree(dbname);
2735 	}
2736 	return NULL;
2737 }
2738 
2739 private const uint32_t ar[] = {
2740     MAGICNO, VERSIONNO
2741 };
2742 
2743 /*
2744  * handle an mmaped file.
2745  */
2746 private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)2747 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
2748 {
2749 	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
2750 	static const size_t m = sizeof(**map->magic);
2751 	int fd = -1;
2752 	size_t len;
2753 	char *dbname;
2754 	int rv = -1;
2755 	uint32_t i;
2756 	php_stream *stream;
2757 
2758 	TSRMLS_FETCH();
2759 
2760 	dbname = mkdbname(ms, fn, 0);
2761 
2762 	if (dbname == NULL)
2763 		goto out;
2764 
2765 /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
2766 #if PHP_API_VERSION < 20100412
2767 	stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2768 #else
2769 	stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
2770 #endif
2771 
2772 	if (!stream) {
2773 		file_error(ms, errno, "cannot open `%s'", dbname);
2774 		goto out;
2775 	}
2776 
2777 	if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
2778 		file_error(ms, errno, "error writing `%s'", dbname);
2779 		goto out;
2780 	}
2781 
2782 	if (php_stream_write(stream, (const char *)map->nmagic, nm) != (ssize_t)nm) {
2783 		file_error(ms, errno, "error writing `%s'", dbname);
2784 		goto out;
2785 	}
2786 
2787 	assert(nm + sizeof(ar) < m);
2788 
2789 	if (php_stream_seek(stream,(off_t)sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
2790 		file_error(ms, errno, "error seeking `%s'", dbname);
2791 		goto out;
2792 	}
2793 
2794 	for (i = 0; i < MAGIC_SETS; i++) {
2795 		len = m * map->nmagic[i];
2796 		if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
2797 			file_error(ms, errno, "error writing `%s'", dbname);
2798 			goto out;
2799 		}
2800 	}
2801 
2802 	if (stream) {
2803 		php_stream_close(stream);
2804 	}
2805 
2806 	rv = 0;
2807 out:
2808 	efree(dbname);
2809 	return rv;
2810 }
2811 
2812 private const char ext[] = ".mgc";
2813 /*
2814  * make a dbname
2815  */
2816 private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)2817 mkdbname(struct magic_set *ms, const char *fn, int strip)
2818 {
2819 	const char *p, *q;
2820 	char *buf;
2821 	TSRMLS_FETCH();
2822 
2823 	if (strip) {
2824 		if ((p = strrchr(fn, '/')) != NULL)
2825 			fn = ++p;
2826 	}
2827 
2828 	for (q = fn; *q; q++)
2829 		continue;
2830 	/* Look for .mgc */
2831 	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
2832 		if (*p != *q)
2833 			break;
2834 
2835 	/* Did not find .mgc, restore q */
2836 	if (p >= ext)
2837 		while (*q)
2838 			q++;
2839 
2840 	q++;
2841 	/* Compatibility with old code that looked in .mime */
2842 	if (ms->flags & MAGIC_MIME) {
2843 		spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
2844 #ifdef PHP_WIN32
2845 		if (VCWD_ACCESS(buf, R_OK) == 0) {
2846 #else
2847 		if (VCWD_ACCESS(buf, R_OK) != -1) {
2848 #endif
2849 			ms->flags &= MAGIC_MIME_TYPE;
2850 			return buf;
2851 		}
2852 		efree(buf);
2853 	}
2854 	spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
2855 
2856 	/* Compatibility with old code that looked in .mime */
2857 	if (strstr(p, ".mime") != NULL)
2858 		ms->flags &= MAGIC_MIME_TYPE;
2859 	return buf;
2860 }
2861 
2862 /*
2863  * Byteswap an mmap'ed file if needed
2864  */
2865 private void
2866 byteswap(struct magic *magic, uint32_t nmagic)
2867 {
2868 	uint32_t i;
2869 	for (i = 0; i < nmagic; i++)
2870 		bs1(&magic[i]);
2871 }
2872 
2873 /*
2874  * swap a short
2875  */
2876 private uint16_t
2877 swap2(uint16_t sv)
2878 {
2879 	uint16_t rv;
2880 	uint8_t *s = (uint8_t *)(void *)&sv;
2881 	uint8_t *d = (uint8_t *)(void *)&rv;
2882 	d[0] = s[1];
2883 	d[1] = s[0];
2884 	return rv;
2885 }
2886 
2887 /*
2888  * swap an int
2889  */
2890 private uint32_t
2891 swap4(uint32_t sv)
2892 {
2893 	uint32_t rv;
2894 	uint8_t *s = (uint8_t *)(void *)&sv;
2895 	uint8_t *d = (uint8_t *)(void *)&rv;
2896 	d[0] = s[3];
2897 	d[1] = s[2];
2898 	d[2] = s[1];
2899 	d[3] = s[0];
2900 	return rv;
2901 }
2902 
2903 /*
2904  * swap a quad
2905  */
2906 private uint64_t
2907 swap8(uint64_t sv)
2908 {
2909 	uint64_t rv;
2910 	uint8_t *s = (uint8_t *)(void *)&sv;
2911 	uint8_t *d = (uint8_t *)(void *)&rv;
2912 #if 0
2913 	d[0] = s[3];
2914 	d[1] = s[2];
2915 	d[2] = s[1];
2916 	d[3] = s[0];
2917 	d[4] = s[7];
2918 	d[5] = s[6];
2919 	d[6] = s[5];
2920 	d[7] = s[4];
2921 #else
2922 	d[0] = s[7];
2923 	d[1] = s[6];
2924 	d[2] = s[5];
2925 	d[3] = s[4];
2926 	d[4] = s[3];
2927 	d[5] = s[2];
2928 	d[6] = s[1];
2929 	d[7] = s[0];
2930 #endif
2931 	return rv;
2932 }
2933 
2934 /*
2935  * byteswap a single magic entry
2936  */
2937 private void
2938 bs1(struct magic *m)
2939 {
2940 	m->cont_level = swap2(m->cont_level);
2941 	m->offset = swap4((uint32_t)m->offset);
2942 	m->in_offset = swap4((uint32_t)m->in_offset);
2943 	m->lineno = swap4((uint32_t)m->lineno);
2944 	if (IS_LIBMAGIC_STRING(m->type)) {
2945 		m->str_range = swap4(m->str_range);
2946 		m->str_flags = swap4(m->str_flags);
2947 	}
2948 	else {
2949 		m->value.q = swap8(m->value.q);
2950 		m->num_mask = swap8(m->num_mask);
2951 	}
2952 }
2953 
2954 protected size_t
2955 file_pstring_length_size(const struct magic *m)
2956 {
2957 	switch (m->str_flags & PSTRING_LEN) {
2958 	case PSTRING_1_LE:
2959 		return 1;
2960 	case PSTRING_2_LE:
2961 	case PSTRING_2_BE:
2962 		return 2;
2963 	case PSTRING_4_LE:
2964 	case PSTRING_4_BE:
2965 		return 4;
2966 	default:
2967 		abort();	/* Impossible */
2968 		return 1;
2969 	}
2970 }
2971 protected size_t
2972 file_pstring_get_length(const struct magic *m, const char *s)
2973 {
2974 	size_t len = 0;
2975 
2976 	switch (m->str_flags & PSTRING_LEN) {
2977 	case PSTRING_1_LE:
2978 		len = *s;
2979 		break;
2980 	case PSTRING_2_LE:
2981 		len = (s[1] << 8) | s[0];
2982 		break;
2983 	case PSTRING_2_BE:
2984 		len = (s[0] << 8) | s[1];
2985 		break;
2986 	case PSTRING_4_LE:
2987 		len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
2988 		break;
2989 	case PSTRING_4_BE:
2990 		len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
2991 		break;
2992 	default:
2993 		abort();	/* Impossible */
2994 	}
2995 
2996 	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
2997 		len -= file_pstring_length_size(m);
2998 
2999 	return len;
3000 }
3001 
3002 protected int
3003 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3004 {
3005 	uint32_t i, j;
3006 	struct mlist *mlist, *ml;
3007 
3008 	mlist = ms->mlist[1];
3009 
3010 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3011 		struct magic *ma = ml->magic;
3012 		uint32_t nma = ml->nmagic;
3013 		for (i = 0; i < nma; i++) {
3014 			if (ma[i].type != FILE_NAME)
3015 				continue;
3016 			if (strcmp(ma[i].value.s, name) == 0) {
3017 				v->magic = &ma[i];
3018 				for (j = i + 1; j < nma; j++)
3019 				    if (ma[j].cont_level == 0)
3020 					    break;
3021 				v->nmagic = j - i;
3022 				return 0;
3023 			}
3024 		}
3025 	}
3026 	return -1;
3027 }
3028