xref: /PHP-5.3/ext/fileinfo/libmagic/magic.c (revision 06760310)
1 /*
2  * Copyright (c) Christos Zoulas 2003.
3  * All Rights Reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice immediately at the beginning of the file, without modification,
10  *    this list of conditions, and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "file.h"
29 
30 #ifndef	lint
31 FILE_RCSID("@(#)$File: magic.c,v 1.74 2011/05/26 01:27:59 christos Exp $")
32 #endif	/* lint */
33 
34 #include "magic.h"
35 
36 #include <stdlib.h>
37 #ifdef PHP_WIN32
38 #include "win32/unistd.h"
39 #else
40 #include <unistd.h>
41 #endif
42 #include <string.h>
43 #ifdef PHP_WIN32
44 # include "config.w32.h"
45 #else
46 # include "php_config.h"
47 #endif
48 
49 #ifdef PHP_WIN32
50 #include <shlwapi.h>
51 #endif
52 
53 #include <limits.h>	/* for PIPE_BUF */
54 
55 #if defined(HAVE_UTIMES)
56 # include <sys/time.h>
57 #elif defined(HAVE_UTIME)
58 # if defined(HAVE_SYS_UTIME_H)
59 #  include <sys/utime.h>
60 # elif defined(HAVE_UTIME_H)
61 #  include <utime.h>
62 # endif
63 #endif
64 
65 #ifdef HAVE_UNISTD_H
66 #include <unistd.h>	/* for read() */
67 #endif
68 
69 #ifndef PIPE_BUF
70 /* Get the PIPE_BUF from pathconf */
71 #ifdef _PC_PIPE_BUF
72 #define PIPE_BUF pathconf(".", _PC_PIPE_BUF)
73 #else
74 #define PIPE_BUF 512
75 #endif
76 #endif
77 
78 #ifdef PHP_WIN32
79 # undef S_IFLNK
80 # undef S_IFIFO
81 #endif
82 
83 private void free_mlist(struct mlist *);
84 private void close_and_restore(const struct magic_set *, const char *, int,
85     const struct stat *);
86 private int unreadable_info(struct magic_set *, mode_t, const char *);
87 private const char* get_default_magic(void);
88 private const char *file_or_stream(struct magic_set *, const char *, php_stream *);
89 
90 #ifndef	STDIN_FILENO
91 #define	STDIN_FILENO	0
92 #endif
93 
94 /* XXX this functionality is excluded in php, enable it in apprentice.c:340 */
95 #if 0
96 private const char *
97 get_default_magic(void)
98 {
99 	static const char hmagic[] = "/.magic/magic.mgc";
100 	static char *default_magic;
101 	char *home, *hmagicpath;
102 
103 #ifndef PHP_WIN32
104 	struct stat st;
105 
106 	if (default_magic) {
107 		free(default_magic);
108 		default_magic = NULL;
109 	}
110 	if ((home = getenv("HOME")) == NULL)
111 		return MAGIC;
112 
113 	if (asprintf(&hmagicpath, "%s/.magic", home) < 0)
114 		return MAGIC;
115 	if (stat(hmagicpath, &st) == -1)
116 		goto out;
117 	if (S_ISDIR(st.st_mode)) {
118 		free(hmagicpath);
119 		if (asprintf(&hmagicpath, "%s/%s", home, hmagic) < 0)
120 			return MAGIC;
121 		if (access(hmagicpath, R_OK) == -1)
122 			goto out;
123 	}
124 
125 	if (asprintf(&default_magic, "%s:%s", hmagicpath, MAGIC) < 0)
126 		goto out;
127 	free(hmagicpath);
128 	return default_magic;
129 out:
130 	default_magic = NULL;
131 	free(hmagicpath);
132 	return MAGIC;
133 #else
134 	char *hmagicp = hmagicpath;
135 	char *tmppath = NULL;
136 	LPTSTR dllpath;
137 
138 #define APPENDPATH() \
139 	do { \
140 		if (tmppath && access(tmppath, R_OK) != -1) { \
141 			if (hmagicpath == NULL) \
142 				hmagicpath = tmppath; \
143 			else { \
144 				if (asprintf(&hmagicp, "%s%c%s", hmagicpath, \
145 				    PATHSEP, tmppath) >= 0) { \
146 					free(hmagicpath); \
147 					hmagicpath = hmagicp; \
148 				} \
149 				free(tmppath); \
150 			} \
151 			tmppath = NULL; \
152 		} \
153 	} while (/*CONSTCOND*/0)
154 
155 	if (default_magic) {
156 		free(default_magic);
157 		default_magic = NULL;
158 	}
159 
160 	/* First, try to get user-specific magic file */
161 	if ((home = getenv("LOCALAPPDATA")) == NULL) {
162 		if ((home = getenv("USERPROFILE")) != NULL)
163 			if (asprintf(&tmppath,
164 			    "%s/Local Settings/Application Data%s", home,
165 			    hmagic) < 0)
166 				tmppath = NULL;
167 	} else {
168 		if (asprintf(&tmppath, "%s%s", home, hmagic) < 0)
169 			tmppath = NULL;
170 	}
171 
172 	APPENDPATH();
173 
174 	/* Second, try to get a magic file from Common Files */
175 	if ((home = getenv("COMMONPROGRAMFILES")) != NULL) {
176 		if (asprintf(&tmppath, "%s%s", home, hmagic) >= 0)
177 			APPENDPATH();
178 	}
179 
180 	/* Third, try to get magic file relative to dll location */
181 	dllpath = malloc(sizeof(*dllpath) * (MAX_PATH + 1));
182 	dllpath[MAX_PATH] = 0;	/* just in case long path gets truncated and not null terminated */
183 	if (GetModuleFileNameA(NULL, dllpath, MAX_PATH)){
184 		PathRemoveFileSpecA(dllpath);
185 		if (strlen(dllpath) > 3 &&
186 		    stricmp(&dllpath[strlen(dllpath) - 3], "bin") == 0) {
187 			if (asprintf(&tmppath,
188 			    "%s/../share/misc/magic.mgc", dllpath) >= 0)
189 				APPENDPATH();
190 		} else {
191 			if (asprintf(&tmppath,
192 			    "%s/share/misc/magic.mgc", dllpath) >= 0)
193 				APPENDPATH();
194 			else if (asprintf(&tmppath,
195 			    "%s/magic.mgc", dllpath) >= 0)
196 				APPENDPATH();
197 		}
198 	}
199 
200 	/* Don't put MAGIC constant - it likely points to a file within MSys
201 	tree */
202 	default_magic = hmagicpath;
203 	return default_magic;
204 #endif
205 }
206 
207 public const char *
208 magic_getpath(const char *magicfile, int action)
209 {
210 	if (magicfile != NULL)
211 		return magicfile;
212 
213 	magicfile = getenv("MAGIC");
214 	if (magicfile != NULL)
215 		return magicfile;
216 
217 	return action == FILE_LOAD ? get_default_magic() : MAGIC;
218 }
219 #endif
220 
221 public struct magic_set *
magic_open(int flags)222 magic_open(int flags)
223 {
224 	struct magic_set *ms;
225 
226 	ms = ecalloc((size_t)1, sizeof(struct magic_set));
227 
228 	if (magic_setflags(ms, flags) == -1) {
229 		errno = EINVAL;
230 		goto free;
231 	}
232 
233 	ms->o.buf = ms->o.pbuf = NULL;
234 
235 	ms->c.li = emalloc((ms->c.len = 10) * sizeof(*ms->c.li));
236 
237 	ms->event_flags = 0;
238 	ms->error = -1;
239 	ms->mlist = NULL;
240 	ms->file = "unknown";
241 	ms->line = 0;
242 	return ms;
243 free:
244 	efree(ms);
245 	return NULL;
246 }
247 
248 private void
free_mlist(struct mlist * mlist)249 free_mlist(struct mlist *mlist)
250 {
251 	struct mlist *ml;
252 
253 	if (mlist == NULL)
254 		return;
255 
256 	for (ml = mlist->next; ml != mlist;) {
257 		struct mlist *next = ml->next;
258 		struct magic *mg = ml->magic;
259 		file_delmagic(mg, ml->mapped, ml->nmagic);
260 		efree(ml);
261 		ml = next;
262 	}
263 	efree(ml);
264 }
265 
266 private int
unreadable_info(struct magic_set * ms,mode_t md,const char * file)267 unreadable_info(struct magic_set *ms, mode_t md, const char *file)
268 {
269 	/* We cannot open it, but we were able to stat it. */
270 	if (access(file, W_OK) == 0)
271 		if (file_printf(ms, "writable, ") == -1)
272 			return -1;
273 	if (access(file, X_OK) == 0)
274 		if (file_printf(ms, "executable, ") == -1)
275 			return -1;
276 	if (S_ISREG(md))
277 		if (file_printf(ms, "regular file, ") == -1)
278 			return -1;
279 	if (file_printf(ms, "no read permission") == -1)
280 		return -1;
281 	return 0;
282 }
283 
284 public void
magic_close(struct magic_set * ms)285 magic_close(struct magic_set *ms)
286 {
287 	if (ms->mlist) {
288 		free_mlist(ms->mlist);
289 	}
290 	if (ms->o.pbuf) {
291 		efree(ms->o.pbuf);
292 	}
293 	if (ms->o.buf) {
294 		efree(ms->o.buf);
295 	}
296 	if (ms->c.li) {
297 		efree(ms->c.li);
298 	}
299 	efree(ms);
300 }
301 
302 /*
303  * load a magic file
304  */
305 public int
magic_load(struct magic_set * ms,const char * magicfile)306 magic_load(struct magic_set *ms, const char *magicfile)
307 {
308 	struct mlist *ml = file_apprentice(ms, magicfile, FILE_LOAD);
309 	if (ml) {
310 		free_mlist(ms->mlist);
311 		ms->mlist = ml;
312 		return 0;
313 	}
314 	return -1;
315 }
316 
317 public int
magic_compile(struct magic_set * ms,const char * magicfile)318 magic_compile(struct magic_set *ms, const char *magicfile)
319 {
320 	struct mlist *ml = file_apprentice(ms, magicfile, FILE_COMPILE);
321 	free_mlist(ml);
322 	return ml ? 0 : -1;
323 }
324 
325 
326 public int
magic_list(struct magic_set * ms,const char * magicfile)327 magic_list(struct magic_set *ms, const char *magicfile)
328 {
329 	struct mlist *ml = file_apprentice(ms, magicfile, FILE_LIST);
330 	free_mlist(ml);
331 	return ml ? 0 : -1;
332 }
333 
334 private void
close_and_restore(const struct magic_set * ms,const char * name,int fd,const struct stat * sb)335 close_and_restore(const struct magic_set *ms, const char *name, int fd,
336     const struct stat *sb)
337 {
338 
339 	if ((ms->flags & MAGIC_PRESERVE_ATIME) != 0) {
340 		/*
341 		 * Try to restore access, modification times if read it.
342 		 * This is really *bad* because it will modify the status
343 		 * time of the file... And of course this will affect
344 		 * backup programs
345 		 */
346 #ifdef HAVE_UTIMES
347 		struct timeval  utsbuf[2];
348 		(void)memset(utsbuf, 0, sizeof(utsbuf));
349 		utsbuf[0].tv_sec = sb->st_atime;
350 		utsbuf[1].tv_sec = sb->st_mtime;
351 
352 		(void) utimes(name, utsbuf); /* don't care if loses */
353 #elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
354 		struct utimbuf  utbuf;
355 
356 		(void)memset(&utbuf, 0, sizeof(utbuf));
357 		utbuf.actime = sb->st_atime;
358 		utbuf.modtime = sb->st_mtime;
359 		(void) utime(name, &utbuf); /* don't care if loses */
360 #endif
361 	}
362 }
363 
364 
365 /*
366  * find type of descriptor
367  */
368 public const char *
magic_descriptor(struct magic_set * ms,int fd)369 magic_descriptor(struct magic_set *ms, int fd)
370 {
371 	return file_or_stream(ms, NULL, NULL);
372 }
373 
374 /*
375  * find type of named file
376  */
377 public const char *
magic_file(struct magic_set * ms,const char * inname)378 magic_file(struct magic_set *ms, const char *inname)
379 {
380 	return file_or_stream(ms, inname, NULL);
381 }
382 
383 public const char *
magic_stream(struct magic_set * ms,php_stream * stream)384 magic_stream(struct magic_set *ms, php_stream *stream)
385 {
386 	return file_or_stream(ms, NULL, stream);
387 }
388 
389 private const char *
file_or_stream(struct magic_set * ms,const char * inname,php_stream * stream)390 file_or_stream(struct magic_set *ms, const char *inname, php_stream *stream)
391 {
392 	int	rv = -1;
393 	unsigned char *buf;
394 	struct stat	sb;
395 	ssize_t nbytes = 0;	/* number of bytes read from a datafile */
396 	int no_in_stream = 0;
397 	TSRMLS_FETCH();
398 
399 	if (!inname && !stream) {
400 		return NULL;
401 	}
402 
403 	/*
404 	 * one extra for terminating '\0', and
405 	 * some overlapping space for matches near EOF
406 	 */
407 #define SLOP (1 + sizeof(union VALUETYPE))
408 	buf = emalloc(HOWMANY + SLOP);
409 
410 	if (file_reset(ms) == -1)
411 		goto done;
412 
413 	switch (file_fsmagic(ms, inname, &sb, stream)) {
414 	case -1:		/* error */
415 		goto done;
416 	case 0:			/* nothing found */
417 		break;
418 	default:		/* matched it and printed type */
419 		rv = 0;
420 		goto done;
421 	}
422 
423 	errno = 0;
424 
425 	if (!stream && inname) {
426 		no_in_stream = 1;
427 #if PHP_API_VERSION < 20100412
428 		stream = php_stream_open_wrapper((char *)inname, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
429 #else
430 		stream = php_stream_open_wrapper((char *)inname, "rb", REPORT_ERRORS, NULL);
431 #endif
432 	}
433 
434 	if (!stream) {
435 		if (unreadable_info(ms, sb.st_mode, inname) == -1)
436 			goto done;
437 		rv = 0;
438 		goto done;
439 	}
440 
441 #ifdef O_NONBLOCK
442 /* we should be already be in non blocking mode for network socket */
443 #endif
444 
445 	/*
446 	 * try looking at the first HOWMANY bytes
447 	 */
448 	if ((nbytes = php_stream_read(stream, (char *)buf, HOWMANY)) < 0) {
449 		file_error(ms, errno, "cannot read `%s'", inname);
450 		goto done;
451 	}
452 
453 	(void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */
454 	if (file_buffer(ms, stream, inname, buf, (size_t)nbytes) == -1)
455 		goto done;
456 	rv = 0;
457 done:
458 	efree(buf);
459 
460 	if (no_in_stream && stream) {
461 		php_stream_close(stream);
462 	}
463 
464 	close_and_restore(ms, inname, 0, &sb);
465 	return rv == 0 ? file_getbuffer(ms) : NULL;
466 }
467 
468 
469 public const char *
magic_buffer(struct magic_set * ms,const void * buf,size_t nb)470 magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
471 {
472 	if (file_reset(ms) == -1)
473 		return NULL;
474 	/*
475 	 * The main work is done here!
476 	 * We have the file name and/or the data buffer to be identified.
477 	 */
478 	if (file_buffer(ms, NULL, NULL, buf, nb) == -1) {
479 		return NULL;
480 	}
481 	return file_getbuffer(ms);
482 }
483 
484 public const char *
magic_error(struct magic_set * ms)485 magic_error(struct magic_set *ms)
486 {
487 	return (ms->event_flags & EVENT_HAD_ERR) ? ms->o.buf : NULL;
488 }
489 
490 public int
magic_errno(struct magic_set * ms)491 magic_errno(struct magic_set *ms)
492 {
493 	return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0;
494 }
495 
496 public int
magic_setflags(struct magic_set * ms,int flags)497 magic_setflags(struct magic_set *ms, int flags)
498 {
499 #if !defined(HAVE_UTIME) && !defined(HAVE_UTIMES)
500 	if (flags & MAGIC_PRESERVE_ATIME)
501 		return -1;
502 #endif
503 	ms->flags = flags;
504 	return 0;
505 }
506