xref: /php-src/ext/fileinfo/libmagic/magic.c (revision b7c5813c)
1 /*
2  * Copyright (c) Christos Zoulas 2003.
3  * All Rights Reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice immediately at the beginning of the file, without modification,
10  *    this list of conditions, and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "file.h"
29 
30 #ifndef	lint
31 FILE_RCSID("@(#)$File: magic.c,v 1.121 2023/02/09 17:45:19 christos Exp $")
32 #endif	/* lint */
33 
34 #include "magic.h"
35 
36 #include <stdlib.h>
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h>
39 #endif
40 #include <string.h>
41 #ifdef QUICK
42 #include <sys/mman.h>
43 #endif
44 #include <limits.h>	/* for PIPE_BUF */
45 
46 #if defined(HAVE_UTIMES)
47 # include <sys/time.h>
48 #elif defined(HAVE_UTIME)
49 # if defined(HAVE_SYS_UTIME_H)
50 #  include <sys/utime.h>
51 # elif defined(HAVE_UTIME_H)
52 #  include <utime.h>
53 # endif
54 #endif
55 
56 #ifdef HAVE_UNISTD_H
57 #include <unistd.h>	/* for read() */
58 #endif
59 
60 #ifndef PIPE_BUF
61 /* Get the PIPE_BUF from pathconf */
62 #ifdef _PC_PIPE_BUF
63 #define PIPE_BUF pathconf(".", _PC_PIPE_BUF)
64 #else
65 #define PIPE_BUF 512
66 #endif
67 #endif
68 
69 #ifdef PHP_WIN32
70 # undef S_IFLNK
71 # undef S_IFIFO
72 #endif
73 
74 file_private int unreadable_info(struct magic_set *, mode_t, const char *);
75 file_private const char *file_or_stream(struct magic_set *, const char *, php_stream *);
76 
77 #ifndef	STDIN_FILENO
78 #define	STDIN_FILENO	0
79 #endif
80 
81 file_public struct magic_set *
magic_open(int flags)82 magic_open(int flags)
83 {
84 	return file_ms_alloc(flags);
85 }
86 
87 file_private int
unreadable_info(struct magic_set * ms,mode_t md,const char * file)88 unreadable_info(struct magic_set *ms, mode_t md, const char *file)
89 {
90 	if (file) {
91 		/* We cannot open it, but we were able to stat it. */
92 		if (access(file, W_OK) == 0)
93 			if (file_printf(ms, "writable, ") == -1)
94 				return -1;
95 #ifndef WIN32
96 		if (access(file, X_OK) == 0)
97 			if (file_printf(ms, "executable, ") == -1)
98 				return -1;
99 #else
100 		/* X_OK doesn't work well on MS-Windows */
101 		{
102 			const char *p = strrchr(file, '.');
103 			if (p && (stricmp(p, ".exe")
104 				  || stricmp(p, ".dll")
105 				  || stricmp(p, ".bat")
106 				  || stricmp(p, ".cmd")))
107 				if (file_printf(ms, "writable, ") == -1)
108 					return -1;
109 		}
110 #endif
111 	}
112 	if (S_ISREG(md))
113 		if (file_printf(ms, "regular file, ") == -1)
114 			return -1;
115 	if (file_printf(ms, "no read permission") == -1)
116 		return -1;
117 	return 0;
118 }
119 
120 file_public void
magic_close(struct magic_set * ms)121 magic_close(struct magic_set *ms)
122 {
123 	if (ms == NULL)
124 		return;
125 	file_ms_free(ms);
126 }
127 
128 /*
129  * load a magic file
130  */
131 file_public int
magic_load(struct magic_set * ms,const char * magicfile)132 magic_load(struct magic_set *ms, const char *magicfile)
133 {
134 	if (ms == NULL)
135 		return -1;
136 	return file_apprentice(ms, magicfile, FILE_LOAD);
137 }
138 
139 file_public int
magic_compile(struct magic_set * ms,const char * magicfile)140 magic_compile(struct magic_set *ms, const char *magicfile)
141 {
142 	if (ms == NULL)
143 		return -1;
144 	return file_apprentice(ms, magicfile, FILE_COMPILE);
145 }
146 
147 file_public int
magic_check(struct magic_set * ms,const char * magicfile)148 magic_check(struct magic_set *ms, const char *magicfile)
149 {
150 	if (ms == NULL)
151 		return -1;
152 	return file_apprentice(ms, magicfile, FILE_CHECK);
153 }
154 
155 file_public int
magic_list(struct magic_set * ms,const char * magicfile)156 magic_list(struct magic_set *ms, const char *magicfile)
157 {
158 	if (ms == NULL)
159 		return -1;
160 	return file_apprentice(ms, magicfile, FILE_LIST);
161 }
162 
163 #ifndef COMPILE_ONLY
164 
165 /*
166  * find type of descriptor
167  */
168 file_public const char *
magic_descriptor(struct magic_set * ms,int fd)169 magic_descriptor(struct magic_set *ms, int fd)
170 {
171 	if (ms == NULL)
172 		return NULL;
173 	return file_or_stream(ms, NULL, NULL);
174 }
175 
176 /*
177  * find type of named file
178  */
179 file_public const char *
magic_file(struct magic_set * ms,const char * inname)180 magic_file(struct magic_set *ms, const char *inname)
181 {
182 	if (ms == NULL)
183 		return NULL;
184 	return file_or_stream(ms, inname, NULL);
185 }
186 
187 file_public const char *
magic_stream(struct magic_set * ms,php_stream * stream)188 magic_stream(struct magic_set *ms, php_stream *stream)
189 {
190 	if (ms == NULL)
191 		return NULL;
192 	return file_or_stream(ms, NULL, stream);
193 }
194 
195 file_private const char *
file_or_stream(struct magic_set * ms,const char * inname,php_stream * stream)196 file_or_stream(struct magic_set *ms, const char *inname, php_stream *stream)
197 {
198 	int	rv = -1;
199 	unsigned char *buf;
200 	zend_stat_t   sb = {0};
201 	ssize_t nbytes = 0;	/* number of bytes read from a datafile */
202 	int no_in_stream = 0;
203 
204 	if (file_reset(ms, 1) == -1)
205 		goto out;
206 
207 	/*
208 	 * one extra for terminating '\0', and
209 	 * some overlapping space for matches near EOF
210 	 */
211 #define SLOP (1 + sizeof(union VALUETYPE))
212 	if ((buf = CAST(unsigned char *, emalloc(ms->bytes_max + SLOP))) == NULL)
213 		return NULL;
214 
215 	switch (file_fsmagic(ms, inname, &sb)) {
216 	case -1:		/* error */
217 		goto done;
218 	case 0:			/* nothing found */
219 		break;
220 	default:		/* matched it and printed type */
221 		rv = 0;
222 		goto done;
223 	}
224 
225 	errno = 0;
226 
227 	if (inname && !stream) {
228 		no_in_stream = 1;
229 		stream = php_stream_open_wrapper((char *)inname, "rb", REPORT_ERRORS, NULL);
230 		if (!stream) {
231 			if (unreadable_info(ms, sb.st_mode, inname) == -1)
232 				goto done;
233 			rv = -1;
234 			goto done;
235 		}
236 	}
237 
238 	php_stream_statbuf ssb;
239 	if (php_stream_stat(stream, &ssb) < 0) {
240 		if (ms->flags & MAGIC_ERROR) {
241 			file_error(ms, errno, "cannot stat `%s'", inname);
242 			rv = -1;
243 			goto done;
244 		}
245 	}
246 	memcpy(&sb, &ssb.sb, sizeof(zend_stat_t));
247 
248 	/*
249 	 * try looking at the first ms->bytes_max bytes
250 	 */
251 	if ((nbytes = php_stream_read(stream, (char *)buf, ms->bytes_max - nbytes)) < 0) {
252 		file_error(ms, errno, "cannot read `%s'", inname);
253 		goto done;
254 	}
255 
256 	(void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */
257 	if (file_buffer(ms, stream, &sb, inname, buf, CAST(size_t, nbytes)) == -1)
258 		goto done;
259 	rv = 0;
260 done:
261 	efree(buf);
262 
263 	if (no_in_stream && stream) {
264 		php_stream_close(stream);
265 	}
266 out:
267 	return rv == 0 ? file_getbuffer(ms) : NULL;
268 }
269 
270 
271 file_public const char *
magic_buffer(struct magic_set * ms,const void * buf,size_t nb)272 magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
273 {
274 	if (ms == NULL)
275 		return NULL;
276 	if (file_reset(ms, 1) == -1)
277 		return NULL;
278 	/*
279 	 * The main work is done here!
280 	 * We have the file name and/or the data buffer to be identified.
281 	 */
282 	if (file_buffer(ms, NULL, NULL, NULL, buf, nb) == -1) {
283 		return NULL;
284 	}
285 	return file_getbuffer(ms);
286 }
287 #endif
288 
289 file_public const char *
magic_error(struct magic_set * ms)290 magic_error(struct magic_set *ms)
291 {
292 	if (ms == NULL)
293 		return "Magic database is not open";
294 	return (ms->event_flags & EVENT_HAD_ERR) ? ms->o.buf : NULL;
295 }
296 
297 file_public int
magic_errno(struct magic_set * ms)298 magic_errno(struct magic_set *ms)
299 {
300 	if (ms == NULL)
301 		return EINVAL;
302 	return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0;
303 }
304 
305 file_public int
magic_getflags(struct magic_set * ms)306 magic_getflags(struct magic_set *ms)
307 {
308 	if (ms == NULL)
309 		return -1;
310 
311 	return ms->flags;
312 }
313 
314 file_public int
magic_setflags(struct magic_set * ms,int flags)315 magic_setflags(struct magic_set *ms, int flags)
316 {
317 	if (ms == NULL)
318 		return -1;
319 #if !defined(HAVE_UTIME) && !defined(HAVE_UTIMES)
320 	if (flags & MAGIC_PRESERVE_ATIME)
321 		return -1;
322 #endif
323 	ms->flags = flags;
324 	return 0;
325 }
326 
327 file_public int
magic_version(void)328 magic_version(void)
329 {
330 	return MAGIC_VERSION;
331 }
332 
333 file_public int
magic_setparam(struct magic_set * ms,int param,const void * val)334 magic_setparam(struct magic_set *ms, int param, const void *val)
335 {
336 	if (ms == NULL)
337 		return -1;
338 	switch (param) {
339 	case MAGIC_PARAM_INDIR_MAX:
340 		ms->indir_max = CAST(uint16_t, *CAST(const size_t *, val));
341 		return 0;
342 	case MAGIC_PARAM_NAME_MAX:
343 		ms->name_max = CAST(uint16_t, *CAST(const size_t *, val));
344 		return 0;
345 	case MAGIC_PARAM_ELF_PHNUM_MAX:
346 		ms->elf_phnum_max = CAST(uint16_t, *CAST(const size_t *, val));
347 		return 0;
348 	case MAGIC_PARAM_ELF_SHNUM_MAX:
349 		ms->elf_shnum_max = CAST(uint16_t, *CAST(const size_t *, val));
350 		return 0;
351 	case MAGIC_PARAM_ELF_SHSIZE_MAX:
352 		ms->elf_shsize_max = *CAST(const size_t *, val);
353 		return 0;
354 	case MAGIC_PARAM_ELF_NOTES_MAX:
355 		ms->elf_notes_max = CAST(uint16_t, *CAST(const size_t *, val));
356 		return 0;
357 	case MAGIC_PARAM_REGEX_MAX:
358 		ms->regex_max = CAST(uint16_t, *CAST(const size_t *, val));
359 		return 0;
360 	case MAGIC_PARAM_BYTES_MAX:
361 		ms->bytes_max = *CAST(const size_t *, val);
362 		return 0;
363 	case MAGIC_PARAM_ENCODING_MAX:
364 		ms->encoding_max = *CAST(const size_t *, val);
365 		return 0;
366 	default:
367 		errno = EINVAL;
368 		return -1;
369 	}
370 }
371 
372 file_public int
magic_getparam(struct magic_set * ms,int param,void * val)373 magic_getparam(struct magic_set *ms, int param, void *val)
374 {
375 	if (ms == NULL)
376 		return -1;
377 	switch (param) {
378 	case MAGIC_PARAM_INDIR_MAX:
379 		*CAST(size_t *, val) = ms->indir_max;
380 		return 0;
381 	case MAGIC_PARAM_NAME_MAX:
382 		*CAST(size_t *, val) = ms->name_max;
383 		return 0;
384 	case MAGIC_PARAM_ELF_PHNUM_MAX:
385 		*CAST(size_t *, val) = ms->elf_phnum_max;
386 		return 0;
387 	case MAGIC_PARAM_ELF_SHNUM_MAX:
388 		*CAST(size_t *, val) = ms->elf_shnum_max;
389 		return 0;
390 	case MAGIC_PARAM_ELF_SHSIZE_MAX:
391 		*CAST(size_t *, val) = ms->elf_shsize_max;
392 		return 0;
393 	case MAGIC_PARAM_ELF_NOTES_MAX:
394 		*CAST(size_t *, val) = ms->elf_notes_max;
395 		return 0;
396 	case MAGIC_PARAM_REGEX_MAX:
397 		*CAST(size_t *, val) = ms->regex_max;
398 		return 0;
399 	case MAGIC_PARAM_BYTES_MAX:
400 		*CAST(size_t *, val) = ms->bytes_max;
401 		return 0;
402 	case MAGIC_PARAM_ENCODING_MAX:
403 		*CAST(size_t *, val) = ms->encoding_max;
404 		return 0;
405 	default:
406 		errno = EINVAL;
407 		return -1;
408 	}
409 }
410