xref: /PHP-5.3/ext/fileinfo/libmagic/file.h (revision f13b0ede)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * file.h - definitions for file(1) program
30  * @(#)$File: file.h,v 1.135 2011/09/20 15:30:14 christos Exp $
31  */
32 
33 #ifndef __file_h__
34 #define __file_h__
35 
36 #include "config.h"
37 
38 #ifdef PHP_WIN32
39   #ifdef _WIN64
40     #define SIZE_T_FORMAT "I64"
41   #else
42     #define SIZE_T_FORMAT ""
43   #endif
44   #define INT64_T_FORMAT "I64"
45 #else
46   #define SIZE_T_FORMAT "z"
47   #define INT64_T_FORMAT "ll"
48 #endif
49 
50 #include <stdio.h>	/* Include that here, to make sure __P gets defined */
51 #include <errno.h>
52 #include <fcntl.h>	/* For open and flags */
53 #ifdef HAVE_STDINT_H
54 #ifndef __STDC_LIMIT_MACROS
55 #define __STDC_LIMIT_MACROS
56 #endif
57 #include <stdint.h>
58 #endif
59 #ifdef HAVE_INTTYPES_H
60 #include <inttypes.h>
61 #endif
62 #ifdef PHP_WIN32
63 #include "win32/php_stdint.h"
64 #endif
65 
66 #include "php.h"
67 #include "ext/standard/php_string.h"
68 #include "ext/pcre/php_pcre.h"
69 
70 #include <sys/types.h>
71 #ifdef PHP_WIN32
72 #include "win32/param.h"
73 #else
74 #include <sys/param.h>
75 #endif
76 /* Do this here and now, because struct stat gets re-defined on solaris */
77 #include <sys/stat.h>
78 #include <stdarg.h>
79 
80 #define ENABLE_CONDITIONALS
81 
82 #ifndef MAGIC
83 #define MAGIC "/etc/magic"
84 #endif
85 
86 #if defined(__EMX__) || defined(PHP_WIN32)
87 #define PATHSEP	';'
88 #else
89 #define PATHSEP	':'
90 #endif
91 
92 #define private static
93 #ifndef protected
94 #define protected
95 #endif
96 #define public
97 
98 #ifndef __arraycount
99 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
100 #endif
101 
102 #ifndef __GNUC_PREREQ__
103 #ifdef __GNUC__
104 #define	__GNUC_PREREQ__(x, y)						\
105 	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
106 	 (__GNUC__ > (x)))
107 #else
108 #define	__GNUC_PREREQ__(x, y)	0
109 #endif
110 #endif
111 
112 #ifndef MIN
113 #define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
114 #endif
115 
116 #ifndef MAX
117 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
118 #endif
119 
120 #ifndef HOWMANY
121 # define HOWMANY (256 * 1024)	/* how much of the file to look at */
122 #endif
123 #define MAXMAGIS 8192		/* max entries in any one magic file
124 				   or directory */
125 #define MAXDESC	64		/* max leng of text description/MIME type */
126 #define MAXstring 64		/* max leng of "string" types */
127 
128 #define MAGICNO		0xF11E041C
129 #define VERSIONNO	8
130 #define FILE_MAGICSIZE	232
131 
132 #define	FILE_LOAD	0
133 #define FILE_CHECK	1
134 #define FILE_COMPILE	2
135 #define FILE_LIST	3
136 
137 union VALUETYPE {
138 	uint8_t b;
139 	uint16_t h;
140 	uint32_t l;
141 	uint64_t q;
142 	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
143 	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
144 	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
145 	char s[MAXstring];	/* the search string or regex pattern */
146 	unsigned char us[MAXstring];
147 	float f;
148 	double d;
149 };
150 
151 struct magic {
152 	/* Word 1 */
153 	uint16_t cont_level;	/* level of ">" */
154 	uint8_t flag;
155 #define INDIR		0x01	/* if '(...)' appears */
156 #define OFFADD		0x02	/* if '>&' or '>...(&' appears */
157 #define INDIROFFADD	0x04	/* if '>&(' appears */
158 #define UNSIGNED	0x08	/* comparison is unsigned */
159 #define NOSPACE		0x10	/* suppress space character before output */
160 #define BINTEST		0x20	/* test is for a binary type (set only
161 				   for top-level tests) */
162 #define TEXTTEST	0x40	/* for passing to file_softmagic */
163 
164 	uint8_t factor;
165 
166 	/* Word 2 */
167 	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
168 	uint8_t vallen;		/* length of string value, if any */
169 	uint8_t type;		/* comparison type (FILE_*) */
170 	uint8_t in_type;	/* type of indirection */
171 #define 			FILE_INVALID	0
172 #define 			FILE_BYTE	1
173 #define				FILE_SHORT	2
174 #define				FILE_DEFAULT	3
175 #define				FILE_LONG	4
176 #define				FILE_STRING	5
177 #define				FILE_DATE	6
178 #define				FILE_BESHORT	7
179 #define				FILE_BELONG	8
180 #define				FILE_BEDATE	9
181 #define				FILE_LESHORT	10
182 #define				FILE_LELONG	11
183 #define				FILE_LEDATE	12
184 #define				FILE_PSTRING	13
185 #define				FILE_LDATE	14
186 #define				FILE_BELDATE	15
187 #define				FILE_LELDATE	16
188 #define				FILE_REGEX	17
189 #define				FILE_BESTRING16	18
190 #define				FILE_LESTRING16	19
191 #define				FILE_SEARCH	20
192 #define				FILE_MEDATE	21
193 #define				FILE_MELDATE	22
194 #define				FILE_MELONG	23
195 #define				FILE_QUAD	24
196 #define				FILE_LEQUAD	25
197 #define				FILE_BEQUAD	26
198 #define				FILE_QDATE	27
199 #define				FILE_LEQDATE	28
200 #define				FILE_BEQDATE	29
201 #define				FILE_QLDATE	30
202 #define				FILE_LEQLDATE	31
203 #define				FILE_BEQLDATE	32
204 #define				FILE_FLOAT	33
205 #define				FILE_BEFLOAT	34
206 #define				FILE_LEFLOAT	35
207 #define				FILE_DOUBLE	36
208 #define				FILE_BEDOUBLE	37
209 #define				FILE_LEDOUBLE	38
210 #define				FILE_BEID3	39
211 #define				FILE_LEID3	40
212 #define				FILE_INDIRECT	41
213 #define				FILE_NAMES_SIZE	42/* size of array to contain all names */
214 
215 #define IS_LIBMAGIC_STRING(t) \
216 	((t) == FILE_STRING || \
217 	 (t) == FILE_PSTRING || \
218 	 (t) == FILE_BESTRING16 || \
219 	 (t) == FILE_LESTRING16 || \
220 	 (t) == FILE_REGEX || \
221 	 (t) == FILE_SEARCH || \
222 	 (t) == FILE_DEFAULT)
223 
224 #define FILE_FMT_NONE 0
225 #define FILE_FMT_NUM  1 /* "cduxXi" */
226 #define FILE_FMT_STR  2 /* "s" */
227 #define FILE_FMT_QUAD 3 /* "ll" */
228 #define FILE_FMT_FLOAT 4 /* "eEfFgG" */
229 #define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
230 
231 	/* Word 3 */
232 	uint8_t in_op;		/* operator for indirection */
233 	uint8_t mask_op;	/* operator for mask */
234 #ifdef ENABLE_CONDITIONALS
235 	uint8_t cond;		/* conditional type */
236 #else
237 	uint8_t dummy;
238 #endif
239 	uint8_t factor_op;
240 #define		FILE_FACTOR_OP_PLUS	'+'
241 #define		FILE_FACTOR_OP_MINUS	'-'
242 #define		FILE_FACTOR_OP_TIMES	'*'
243 #define		FILE_FACTOR_OP_DIV	'/'
244 #define		FILE_FACTOR_OP_NONE	'\0'
245 
246 #define				FILE_OPS	"&|^+-*/%"
247 #define				FILE_OPAND	0
248 #define				FILE_OPOR	1
249 #define				FILE_OPXOR	2
250 #define				FILE_OPADD	3
251 #define				FILE_OPMINUS	4
252 #define				FILE_OPMULTIPLY	5
253 #define				FILE_OPDIVIDE	6
254 #define				FILE_OPMODULO	7
255 #define				FILE_OPS_MASK	0x07 /* mask for above ops */
256 #define				FILE_UNUSED_1	0x08
257 #define				FILE_UNUSED_2	0x10
258 #define				FILE_UNUSED_3	0x20
259 #define				FILE_OPINVERSE	0x40
260 #define				FILE_OPINDIRECT	0x80
261 
262 #ifdef ENABLE_CONDITIONALS
263 #define				COND_NONE	0
264 #define				COND_IF		1
265 #define				COND_ELIF	2
266 #define				COND_ELSE	3
267 #endif /* ENABLE_CONDITIONALS */
268 
269 	/* Word 4 */
270 	uint32_t offset;	/* offset to magic number */
271 	/* Word 5 */
272 	int32_t in_offset;	/* offset from indirection */
273 	/* Word 6 */
274 	uint32_t lineno;	/* line number in magic file */
275 	/* Word 7,8 */
276 	union {
277 		uint64_t _mask;	/* for use with numeric and date types */
278 		struct {
279 			uint32_t _count;	/* repeat/line count */
280 			uint32_t _flags;	/* modifier flags */
281 		} _s;		/* for use with string types */
282 	} _u;
283 #define num_mask _u._mask
284 #define str_range _u._s._count
285 #define str_flags _u._s._flags
286 	/* Words 9-16 */
287 	union VALUETYPE value;	/* either number or string */
288 	/* Words 17-32 */
289 	char desc[MAXDESC];	/* description */
290 	/* Words 33-48 */
291 	char mimetype[MAXDESC]; /* MIME type */
292 	/* Words 49-50 */
293 	char apple[8];
294 };
295 
296 #define BIT(A)   (1 << (A))
297 #define STRING_COMPACT_WHITESPACE		BIT(0)
298 #define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
299 #define STRING_IGNORE_LOWERCASE			BIT(2)
300 #define STRING_IGNORE_UPPERCASE			BIT(3)
301 #define REGEX_OFFSET_START			BIT(4)
302 #define STRING_TEXTTEST				BIT(5)
303 #define STRING_BINTEST				BIT(6)
304 #define PSTRING_1_BE				BIT(7)
305 #define PSTRING_1_LE				BIT(7)
306 #define PSTRING_2_BE				BIT(8)
307 #define PSTRING_2_LE				BIT(9)
308 #define PSTRING_4_BE				BIT(10)
309 #define PSTRING_4_LE				BIT(11)
310 #define PSTRING_LEN	\
311     (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
312 #define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
313 #define CHAR_COMPACT_WHITESPACE			'W'
314 #define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
315 #define CHAR_IGNORE_LOWERCASE			'c'
316 #define CHAR_IGNORE_UPPERCASE			'C'
317 #define CHAR_REGEX_OFFSET_START			's'
318 #define CHAR_TEXTTEST				't'
319 #define CHAR_BINTEST				'b'
320 #define CHAR_PSTRING_1_BE			'B'
321 #define CHAR_PSTRING_1_LE			'B'
322 #define CHAR_PSTRING_2_BE			'H'
323 #define CHAR_PSTRING_2_LE			'h'
324 #define CHAR_PSTRING_4_BE			'L'
325 #define CHAR_PSTRING_4_LE			'l'
326 #define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
327 #define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
328 #define STRING_DEFAULT_RANGE		100
329 
330 
331 /* list of magic entries */
332 struct mlist {
333 	struct magic *magic;		/* array of magic entries */
334 	uint32_t nmagic;			/* number of entries in array */
335 	int mapped;  /* allocation type: 0 => apprentice_file
336 		      *                  1 => apprentice_map + malloc
337 		      *                  2 => apprentice_map + mmap */
338 	struct mlist *next, *prev;
339 };
340 
341 #ifdef __cplusplus
342 #define CAST(T, b)	static_cast<T>(b)
343 #define RCAST(T, b)	reinterpret_cast<T>(b)
344 #else
345 #define CAST(T, b)	(T)(b)
346 #define RCAST(T, b)	(T)(b)
347 #endif
348 
349 struct level_info {
350 	int32_t off;
351 	int got_match;
352 #ifdef ENABLE_CONDITIONALS
353 	int last_match;
354 	int last_cond;	/* used for error checking by parse() */
355 #endif
356 };
357 struct magic_set {
358 	struct mlist *mlist;
359 	struct cont {
360 		size_t len;
361 		struct level_info *li;
362 	} c;
363 	struct out {
364 		char *buf;		/* Accumulation buffer */
365 		char *pbuf;		/* Printable buffer */
366 	} o;
367 	uint32_t offset;
368 	int error;
369 	int flags;			/* Control magic tests. */
370 	int event_flags;		/* Note things that happened. */
371 #define 		EVENT_HAD_ERR		0x01
372 	const char *file;
373 	size_t line;			/* current magic line number */
374 
375 	/* data for searches */
376 	struct {
377 		const char *s;		/* start of search in original source */
378 		size_t s_len;		/* length of search region */
379 		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
380 		size_t rm_len;		/* match length */
381 	} search;
382 
383 	/* FIXME: Make the string dynamically allocated so that e.g.
384 	   strings matched in files can be longer than MAXstring */
385 	union VALUETYPE ms_value;	/* either number or string */
386 };
387 
388 /* Type for Unicode characters */
389 typedef unsigned long unichar;
390 
391 struct stat;
392 protected const char *file_fmttime(uint32_t, int);
393 protected int file_buffer(struct magic_set *, php_stream *, const char *, const void *,
394     size_t);
395 protected int file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb, php_stream *stream);
396 protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
397 protected int file_replace(struct magic_set *, const char *, const char *);
398 protected int file_printf(struct magic_set *, const char *, ...);
399 protected int file_reset(struct magic_set *);
400 protected int file_tryelf(struct magic_set *, int, const unsigned char *,
401     size_t);
402 protected int file_trycdf(struct magic_set *, int, const unsigned char *,
403     size_t);
404 #ifdef PHP_FILEINFO_UNCOMPRESS
405 protected int file_zmagic(struct magic_set *, int, const char *,
406     const unsigned char *, size_t);
407 #endif
408 protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
409     int);
410 protected int file_ascmagic_with_encoding(struct magic_set *,
411     const unsigned char *, size_t, unichar *, size_t, const char *,
412     const char *, int);
413 protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
414     unichar **, size_t *, const char **, const char **, const char **);
415 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
416 protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
417     int, int);
418 protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
419 protected uint64_t file_signextend(struct magic_set *, struct magic *,
420     uint64_t);
421 protected void file_delmagic(struct magic *, int type, size_t entries);
422 protected void file_badread(struct magic_set *);
423 protected void file_badseek(struct magic_set *);
424 protected void file_oomem(struct magic_set *, size_t);
425 protected void file_error(struct magic_set *, int, const char *, ...);
426 protected void file_magerror(struct magic_set *, const char *, ...);
427 protected void file_magwarn(struct magic_set *, const char *, ...);
428 protected void file_showstr(FILE *, const char *, size_t);
429 protected size_t file_mbswidth(const char *);
430 protected const char *file_getbuffer(struct magic_set *);
431 protected ssize_t sread(int, void *, size_t, int);
432 protected int file_check_mem(struct magic_set *, unsigned int);
433 protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
434     size_t *);
435 protected size_t file_pstring_length_size(const struct magic *);
436 protected size_t file_pstring_get_length(const struct magic *, const char *);
437 protected size_t file_printedlen(const struct magic_set *ms);
438 #ifdef __EMX__
439 protected int file_os2_apptype(struct magic_set *, const char *, const void *,
440     size_t);
441 #endif /* __EMX__ */
442 
443 extern const char *file_names[];
444 extern const size_t file_nnames;
445 
446 #ifndef HAVE_STRERROR
447 extern int sys_nerr;
448 extern char *sys_errlist[];
449 #define strerror(e) \
450 	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
451 #endif
452 
453 #ifndef HAVE_STRTOUL
454 #define strtoul(a, b, c)	strtol(a, b, c)
455 #endif
456 
457 #ifndef strlcpy
458 size_t strlcpy(char *dst, const char *src, size_t siz);
459 #endif
460 #ifndef strlcat
461 size_t strlcat(char *dst, const char *src, size_t siz);
462 #endif
463 #ifndef HAVE_GETLINE
464 ssize_t getline(char **dst, size_t *len, FILE *fp);
465 ssize_t getdelim(char **dst, size_t *len, int delimiter, FILE *fp);
466 #endif
467 
468 #if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
469 #define QUICK
470 #endif
471 
472 #ifndef O_BINARY
473 #define O_BINARY	0
474 #endif
475 
476 #ifndef __cplusplus
477 #if defined(__GNUC__) && (__GNUC__ >= 3)
478 #define FILE_RCSID(id) \
479 static const char rcsid[] __attribute__((__used__)) = id;
480 #else
481 #define FILE_RCSID(id) \
482 static const char *rcsid(const char *p) { \
483 	return rcsid(p = id); \
484 }
485 #endif
486 #else
487 #define FILE_RCSID(id)
488 #endif
489 
490 #ifdef PHP_WIN32
491 #define FINFO_LSEEK_FUNC _lseek
492 #define FINFO_READ_FUNC _read
493 #else
494 #define FINFO_LSEEK_FUNC lseek
495 #define FINFO_READ_FUNC read
496 #endif
497 
498 #endif /* __file_h__ */
499