xref: /PHP-7.2/ext/fileinfo/libmagic/file.h (revision 0e33c282)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * file.h - definitions for file(1) program
30  * @(#)$File: file.h,v 1.182 2017/04/07 19:46:44 christos Exp $
31  */
32 
33 #ifndef __file_h__
34 #define __file_h__
35 
36 #include "config.h"
37 
38 #ifdef PHP_WIN32
39   #ifdef _WIN64
40     #define SIZE_T_FORMAT "I64"
41   #else
42     #define SIZE_T_FORMAT ""
43   #endif
44   #define INT64_T_FORMAT "I64"
45   #define INTMAX_T_FORMAT "I64"
46 #else
47   #define SIZE_T_FORMAT "z"
48   #define INT64_T_FORMAT "ll"
49   #define INTMAX_T_FORMAT "j"
50 #endif
51 
52 #include <stdio.h>	/* Include that here, to make sure __P gets defined */
53 #include <errno.h>
54 #include <fcntl.h>	/* For open and flags */
55 #ifdef HAVE_STDINT_H
56 #ifndef __STDC_LIMIT_MACROS
57 #define __STDC_LIMIT_MACROS
58 #endif
59 #include <stdint.h>
60 #endif
61 #ifdef HAVE_INTTYPES_H
62 #include <inttypes.h>
63 #endif
64 #ifdef PHP_WIN32
65 #include "win32/php_stdint.h"
66 #endif
67 
68 #include "php.h"
69 #include "ext/standard/php_string.h"
70 #include "ext/pcre/php_pcre.h"
71 
72 #include <sys/types.h>
73 #ifdef PHP_WIN32
74 #include "win32/param.h"
75 #else
76 #include <sys/param.h>
77 #endif
78 /* Do this here and now, because struct stat gets re-defined on solaris */
79 #include <sys/stat.h>
80 #include <stdarg.h>
81 
82 #define ENABLE_CONDITIONALS
83 
84 #ifndef MAGIC
85 #define MAGIC "/etc/magic"
86 #endif
87 
88 #if defined(__EMX__) || defined(PHP_WIN32)
89 #define PATHSEP	';'
90 #else
91 #define PATHSEP	':'
92 #endif
93 
94 #define private static
95 
96 #if HAVE_VISIBILITY && !defined(WIN32)
97 #define public  __attribute__ ((__visibility__("default")))
98 #ifndef protected
99 #define protected __attribute__ ((__visibility__("hidden")))
100 #endif
101 #else
102 #define public
103 #ifndef protected
104 #define protected
105 #endif
106 #endif
107 
108 #ifndef __arraycount
109 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
110 #endif
111 
112 #ifndef __GNUC_PREREQ__
113 #ifdef __GNUC__
114 #define	__GNUC_PREREQ__(x, y)						\
115 	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
116 	 (__GNUC__ > (x)))
117 #else
118 #define	__GNUC_PREREQ__(x, y)	0
119 #endif
120 #endif
121 
122 #ifndef MIN
123 #define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
124 #endif
125 
126 #ifndef MAX
127 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
128 #endif
129 
130 #ifndef FILE_BYTES_MAX
131 # define FILE_BYTES_MAX (1024 * 1024)	/* how much of the file to look at */
132 #endif
133 #define MAXMAGIS 8192		/* max entries in any one magic file
134 				   or directory */
135 #define MAXDESC	64		/* max len of text description/MIME type */
136 #define MAXMIME	80		/* max len of text MIME type */
137 #define MAXstring 96		/* max len of "string" types */
138 
139 #define MAGICNO		0xF11E041C
140 #define VERSIONNO	14
141 #define FILE_MAGICSIZE	344
142 
143 #define	FILE_LOAD	0
144 #define FILE_CHECK	1
145 #define FILE_COMPILE	2
146 #define FILE_LIST	3
147 
148 union VALUETYPE {
149 	uint8_t b;
150 	uint16_t h;
151 	uint32_t l;
152 	uint64_t q;
153 	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
154 	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
155 	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
156 	char s[MAXstring];	/* the search string or regex pattern */
157 	unsigned char us[MAXstring];
158 	float f;
159 	double d;
160 };
161 
162 struct magic {
163 	/* Word 1 */
164 	uint16_t cont_level;	/* level of ">" */
165 	uint8_t flag;
166 #define INDIR		0x01	/* if '(...)' appears */
167 #define OFFADD		0x02	/* if '>&' or '>...(&' appears */
168 #define INDIROFFADD	0x04	/* if '>&(' appears */
169 #define UNSIGNED	0x08	/* comparison is unsigned */
170 #define NOSPACE		0x10	/* suppress space character before output */
171 #define BINTEST		0x20	/* test is for a binary type (set only
172 				   for top-level tests) */
173 #define TEXTTEST	0x40	/* for passing to file_softmagic */
174 
175 	uint8_t factor;
176 
177 	/* Word 2 */
178 	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
179 	uint8_t vallen;		/* length of string value, if any */
180 	uint8_t type;		/* comparison type (FILE_*) */
181 	uint8_t in_type;	/* type of indirection */
182 #define 			FILE_INVALID	0
183 #define 			FILE_BYTE	1
184 #define				FILE_SHORT	2
185 #define				FILE_DEFAULT	3
186 #define				FILE_LONG	4
187 #define				FILE_STRING	5
188 #define				FILE_DATE	6
189 #define				FILE_BESHORT	7
190 #define				FILE_BELONG	8
191 #define				FILE_BEDATE	9
192 #define				FILE_LESHORT	10
193 #define				FILE_LELONG	11
194 #define				FILE_LEDATE	12
195 #define				FILE_PSTRING	13
196 #define				FILE_LDATE	14
197 #define				FILE_BELDATE	15
198 #define				FILE_LELDATE	16
199 #define				FILE_REGEX	17
200 #define				FILE_BESTRING16	18
201 #define				FILE_LESTRING16	19
202 #define				FILE_SEARCH	20
203 #define				FILE_MEDATE	21
204 #define				FILE_MELDATE	22
205 #define				FILE_MELONG	23
206 #define				FILE_QUAD	24
207 #define				FILE_LEQUAD	25
208 #define				FILE_BEQUAD	26
209 #define				FILE_QDATE	27
210 #define				FILE_LEQDATE	28
211 #define				FILE_BEQDATE	29
212 #define				FILE_QLDATE	30
213 #define				FILE_LEQLDATE	31
214 #define				FILE_BEQLDATE	32
215 #define				FILE_FLOAT	33
216 #define				FILE_BEFLOAT	34
217 #define				FILE_LEFLOAT	35
218 #define				FILE_DOUBLE	36
219 #define				FILE_BEDOUBLE	37
220 #define				FILE_LEDOUBLE	38
221 #define				FILE_BEID3	39
222 #define				FILE_LEID3	40
223 #define				FILE_INDIRECT	41
224 #define				FILE_QWDATE	42
225 #define				FILE_LEQWDATE	43
226 #define				FILE_BEQWDATE	44
227 #define				FILE_NAME	45
228 #define				FILE_USE	46
229 #define				FILE_CLEAR	47
230 #define				FILE_DER	48
231 #define				FILE_NAMES_SIZE	49 /* size of array to contain all names */
232 
233 #define IS_LIBMAGIC_STRING(t) \
234 	((t) == FILE_STRING || \
235 	 (t) == FILE_PSTRING || \
236 	 (t) == FILE_BESTRING16 || \
237 	 (t) == FILE_LESTRING16 || \
238 	 (t) == FILE_REGEX || \
239 	 (t) == FILE_SEARCH || \
240 	 (t) == FILE_INDIRECT || \
241 	 (t) == FILE_NAME || \
242 	 (t) == FILE_USE)
243 
244 #define FILE_FMT_NONE 0
245 #define FILE_FMT_NUM  1 /* "cduxXi" */
246 #define FILE_FMT_STR  2 /* "s" */
247 #define FILE_FMT_QUAD 3 /* "ll" */
248 #define FILE_FMT_FLOAT 4 /* "eEfFgG" */
249 #define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
250 
251 	/* Word 3 */
252 	uint8_t in_op;		/* operator for indirection */
253 	uint8_t mask_op;	/* operator for mask */
254 #ifdef ENABLE_CONDITIONALS
255 	uint8_t cond;		/* conditional type */
256 #else
257 	uint8_t dummy;
258 #endif
259 	uint8_t factor_op;
260 #define		FILE_FACTOR_OP_PLUS	'+'
261 #define		FILE_FACTOR_OP_MINUS	'-'
262 #define		FILE_FACTOR_OP_TIMES	'*'
263 #define		FILE_FACTOR_OP_DIV	'/'
264 #define		FILE_FACTOR_OP_NONE	'\0'
265 
266 #define				FILE_OPS	"&|^+-*/%"
267 #define				FILE_OPAND	0
268 #define				FILE_OPOR	1
269 #define				FILE_OPXOR	2
270 #define				FILE_OPADD	3
271 #define				FILE_OPMINUS	4
272 #define				FILE_OPMULTIPLY	5
273 #define				FILE_OPDIVIDE	6
274 #define				FILE_OPMODULO	7
275 #define				FILE_OPS_MASK	0x07 /* mask for above ops */
276 #define				FILE_UNUSED_1	0x08
277 #define				FILE_UNUSED_2	0x10
278 #define				FILE_OPSIGNED	0x20
279 #define				FILE_OPINVERSE	0x40
280 #define				FILE_OPINDIRECT	0x80
281 
282 #ifdef ENABLE_CONDITIONALS
283 #define				COND_NONE	0
284 #define				COND_IF		1
285 #define				COND_ELIF	2
286 #define				COND_ELSE	3
287 #endif /* ENABLE_CONDITIONALS */
288 
289 	/* Word 4 */
290 	uint32_t offset;	/* offset to magic number */
291 	/* Word 5 */
292 	int32_t in_offset;	/* offset from indirection */
293 	/* Word 6 */
294 	uint32_t lineno;	/* line number in magic file */
295 	/* Word 7,8 */
296 	union {
297 		uint64_t _mask;	/* for use with numeric and date types */
298 		struct {
299 			uint32_t _count;	/* repeat/line count */
300 			uint32_t _flags;	/* modifier flags */
301 		} _s;		/* for use with string types */
302 	} _u;
303 #define num_mask _u._mask
304 #define str_range _u._s._count
305 #define str_flags _u._s._flags
306 	/* Words 9-24 */
307 	union VALUETYPE value;	/* either number or string */
308 	/* Words 25-40 */
309 	char desc[MAXDESC];	/* description */
310 	/* Words 41-60 */
311 	char mimetype[MAXMIME]; /* MIME type */
312 	/* Words 61-62 */
313 	char apple[8];		/* APPLE CREATOR/TYPE */
314 	/* Words 63-78 */
315 	char ext[64];		/* Popular extensions */
316 };
317 
318 #define BIT(A)   (1 << (A))
319 #define STRING_COMPACT_WHITESPACE		BIT(0)
320 #define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
321 #define STRING_IGNORE_LOWERCASE			BIT(2)
322 #define STRING_IGNORE_UPPERCASE			BIT(3)
323 #define REGEX_OFFSET_START			BIT(4)
324 #define STRING_TEXTTEST				BIT(5)
325 #define STRING_BINTEST				BIT(6)
326 #define PSTRING_1_BE				BIT(7)
327 #define PSTRING_1_LE				BIT(7)
328 #define PSTRING_2_BE				BIT(8)
329 #define PSTRING_2_LE				BIT(9)
330 #define PSTRING_4_BE				BIT(10)
331 #define PSTRING_4_LE				BIT(11)
332 #define REGEX_LINE_COUNT			BIT(11)
333 #define PSTRING_LEN	\
334     (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
335 #define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
336 #define	STRING_TRIM				BIT(13)
337 #define CHAR_COMPACT_WHITESPACE			'W'
338 #define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
339 #define CHAR_IGNORE_LOWERCASE			'c'
340 #define CHAR_IGNORE_UPPERCASE			'C'
341 #define CHAR_REGEX_OFFSET_START			's'
342 #define CHAR_TEXTTEST				't'
343 #define	CHAR_TRIM				'T'
344 #define CHAR_BINTEST				'b'
345 #define CHAR_PSTRING_1_BE			'B'
346 #define CHAR_PSTRING_1_LE			'B'
347 #define CHAR_PSTRING_2_BE			'H'
348 #define CHAR_PSTRING_2_LE			'h'
349 #define CHAR_PSTRING_4_BE			'L'
350 #define CHAR_PSTRING_4_LE			'l'
351 #define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
352 #define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
353 #define STRING_DEFAULT_RANGE		100
354 
355 #define	INDIRECT_RELATIVE			BIT(0)
356 #define	CHAR_INDIRECT_RELATIVE			'r'
357 
358 /* list of magic entries */
359 struct mlist {
360 	struct magic *magic;		/* array of magic entries */
361 	uint32_t nmagic;		/* number of entries in array */
362 	void *map;			/* internal resources used by entry */
363 	struct mlist *next, *prev;
364 };
365 
366 #ifdef __cplusplus
367 #define CAST(T, b)	static_cast<T>(b)
368 #define RCAST(T, b)	reinterpret_cast<T>(b)
369 #define CCAST(T, b)	const_cast<T>(b)
370 #else
371 #define CAST(T, b)	((T)(b))
372 #define RCAST(T, b)	((T)(b))
373 #define CCAST(T, b)	((T)(uintptr_t)(b))
374 #endif
375 
376 struct level_info {
377 	int32_t off;
378 	int got_match;
379 #ifdef ENABLE_CONDITIONALS
380 	int last_match;
381 	int last_cond;	/* used for error checking by parse() */
382 #endif
383 };
384 
385 #define MAGIC_SETS	2
386 
387 struct magic_set {
388 	struct mlist *mlist[MAGIC_SETS];	/* list of regular entries */
389 	struct cont {
390 		size_t len;
391 		struct level_info *li;
392 	} c;
393 	struct out {
394 		char *buf;		/* Accumulation buffer */
395 		char *pbuf;		/* Printable buffer */
396 	} o;
397 	uint32_t offset;
398 	int error;
399 	int flags;			/* Control magic tests. */
400 	int event_flags;		/* Note things that happened. */
401 #define 		EVENT_HAD_ERR		0x01
402 	const char *file;
403 	size_t line;			/* current magic line number */
404 
405 	/* data for searches */
406 	struct {
407 		const char *s;		/* start of search in original source */
408 		size_t s_len;		/* length of search region */
409 		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
410 		size_t rm_len;		/* match length */
411 	} search;
412 
413 	/* FIXME: Make the string dynamically allocated so that e.g.
414 	   strings matched in files can be longer than MAXstring */
415 	union VALUETYPE ms_value;	/* either number or string */
416 	uint16_t indir_max;
417 	uint16_t name_max;
418 	uint16_t elf_shnum_max;
419 	uint16_t elf_phnum_max;
420 	uint16_t elf_notes_max;
421 	uint16_t regex_max;
422 	size_t bytes_max;		/* number of bytes to read from file */
423 #define	FILE_INDIR_MAX			50
424 #define	FILE_NAME_MAX			30
425 #define	FILE_ELF_SHNUM_MAX		32768
426 #define	FILE_ELF_PHNUM_MAX		2048
427 #define	FILE_ELF_NOTES_MAX		256
428 #define	FILE_REGEX_MAX			8192
429 };
430 
431 /* Type for Unicode characters */
432 typedef unsigned long unichar;
433 
434 #define FILE_T_LOCAL	1
435 #define FILE_T_WINDOWS	2
436 protected const char *file_fmttime(uint64_t, int, char *);
437 protected struct magic_set *file_ms_alloc(int);
438 protected void file_ms_free(struct magic_set *);
439 protected int file_buffer(struct magic_set *, php_stream *, const char *, const void *,
440     size_t);
441 protected int file_fsmagic(struct magic_set *, const char *, zend_stat_t *, php_stream *);
442 protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
443 protected size_t file_printedlen(const struct magic_set *);
444 protected int file_replace(struct magic_set *, const char *, const char *);
445 protected int file_printf(struct magic_set *, const char *, ...);
446 protected int file_reset(struct magic_set *);
447 protected int file_trycdf(struct magic_set *, int, const unsigned char *,
448     size_t);
449 #ifdef PHP_FILEINFO_UNCOMPRESS
450 protected int file_zmagic(struct magic_set *, int, const char *,
451     const unsigned char *, size_t);
452 #endif
453 protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
454     int);
455 protected int file_ascmagic_with_encoding(struct magic_set *,
456     const unsigned char *, size_t, unichar *, size_t, const char *,
457     const char *, int);
458 protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
459     unichar **, size_t *, const char **, const char **, const char **);
460 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
461 protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
462     uint16_t *, uint16_t *, int, int);
463 protected int file_apprentice(struct magic_set *, const char *, int);
464 protected int buffer_apprentice(struct magic_set *, struct magic **,
465     size_t *, size_t);
466 protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
467 protected uint64_t file_signextend(struct magic_set *, struct magic *,
468     uint64_t);
469 protected void file_delmagic(struct magic *, int type, size_t entries);
470 protected void file_badread(struct magic_set *);
471 protected void file_badseek(struct magic_set *);
472 protected void file_oomem(struct magic_set *, size_t);
473 protected void file_error(struct magic_set *, int, const char *, ...);
474 protected void file_magerror(struct magic_set *, const char *, ...);
475 protected void file_magwarn(struct magic_set *, const char *, ...);
476 protected void file_showstr(FILE *, const char *, size_t);
477 protected size_t file_mbswidth(const char *);
478 protected const char *file_getbuffer(struct magic_set *);
479 protected ssize_t sread(int, void *, size_t, int);
480 protected int file_check_mem(struct magic_set *, unsigned int);
481 protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
482     size_t *);
483 protected size_t file_pstring_length_size(const struct magic *);
484 protected size_t file_pstring_get_length(const struct magic *, const char *);
485 protected char * file_printable(char *, size_t, const char *);
486 #ifdef __EMX__
487 protected int file_os2_apptype(struct magic_set *, const char *, const void *,
488     size_t);
489 #endif /* __EMX__ */
490 
491 public void
492 convert_libmagic_pattern(zval *pattern, char *val, int len, int options);
493 
494 typedef struct {
495 	char *buf;
496 	uint32_t offset;
497 } file_pushbuf_t;
498 
499 protected file_pushbuf_t *file_push_buffer(struct magic_set *);
500 protected char  *file_pop_buffer(struct magic_set *, file_pushbuf_t *);
501 
502 extern const char *file_names[];
503 extern const size_t file_nnames;
504 
505 #ifndef HAVE_STRERROR
506 extern int sys_nerr;
507 extern char *sys_errlist[];
508 #define strerror(e) \
509 	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
510 #endif
511 
512 #ifndef HAVE_STRTOUL
513 #define strtoul(a, b, c)	strtol(a, b, c)
514 #endif
515 
516 #ifndef strlcpy
517 size_t strlcpy(char *, const char *, size_t);
518 #endif
519 #ifndef strlcat
520 size_t strlcat(char *, const char *, size_t);
521 #endif
522 #ifndef HAVE_STRCASESTR
523 char *strcasestr(const char *, const char *);
524 #endif
525 #ifndef HAVE_GETLINE
526 ssize_t getline(char **, size_t *, FILE *);
527 ssize_t getdelim(char **, size_t *, int, FILE *);
528 #endif
529 #ifndef HAVE_CTIME_R
530 char   *ctime_r(const time_t *, char *);
531 #endif
532 #ifndef HAVE_ASCTIME_R
533 char   *asctime_r(const struct tm *, char *);
534 #endif
535 
536 #if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
537 #define QUICK
538 #endif
539 
540 #ifndef O_BINARY
541 #define O_BINARY	0
542 #endif
543 
544 #ifndef __cplusplus
545 #if defined(__GNUC__) && (__GNUC__ >= 3)
546 #define FILE_RCSID(id) \
547 static const char rcsid[] __attribute__((__used__)) = id;
548 #else
549 #define FILE_RCSID(id) \
550 static const char *rcsid(const char *p) { \
551 	return rcsid(p = id); \
552 }
553 #endif
554 #else
555 #define FILE_RCSID(id)
556 #endif
557 
558 #ifdef PHP_WIN32
559 #ifdef _WIN64
560 #define FINFO_LSEEK_FUNC _lseeki64
561 #else
562 #define FINFO_LSEEK_FUNC _lseek
563 #endif
564 #define FINFO_READ_FUNC _read
565 #else
566 #define FINFO_LSEEK_FUNC lseek
567 #define FINFO_READ_FUNC read
568 #endif
569 #ifndef __RCSID
570 #define __RCSID(a)
571 #endif
572 
573 #endif /* __file_h__ */
574