xref: /PHP-7.1/ext/fileinfo/libmagic/readcdf.c (revision f51d78a3)
1 /*-
2  * Copyright (c) 2008 Christos Zoulas
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 #include "file.h"
27 
28 #ifndef lint
29 FILE_RCSID("@(#)$File: readcdf.c,v 1.50 2015/01/02 21:29:39 christos Exp $")
30 #endif
31 
32 #include <stdlib.h>
33 #ifdef PHP_WIN32
34 #include "win32/unistd.h"
35 #else
36 #include <unistd.h>
37 #endif
38 #include <string.h>
39 #include <time.h>
40 #include <ctype.h>
41 #if defined(HAVE_LOCALE_H)
42 #include <locale.h>
43 #endif
44 
45 #include "cdf.h"
46 #include "magic.h"
47 
48 #ifndef __arraycount
49 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
50 #endif
51 
52 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
53 
54 static const struct nv {
55 	const char *pattern;
56 	const char *mime;
57 } app2mime[] =  {
58 	{ "Word",			"msword",		},
59 	{ "Excel",			"vnd.ms-excel",		},
60 	{ "Powerpoint",			"vnd.ms-powerpoint",	},
61 	{ "Crystal Reports",		"x-rpt",		},
62 	{ "Advanced Installer",		"vnd.ms-msi",		},
63 	{ "InstallShield",		"vnd.ms-msi",		},
64 	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
65 	{ "NAnt",			"vnd.ms-msi",		},
66 	{ "Windows Installer",		"vnd.ms-msi",		},
67 	{ NULL,				NULL,			},
68 }, name2mime[] = {
69 	{ "WordDocument",		"msword",		},
70 	{ "PowerPoint",			"vnd.ms-powerpoint",	},
71 	{ "DigitalSignature",		"vnd.ms-msi",		},
72 	{ NULL,				NULL,			},
73 }, name2desc[] = {
74 	{ "WordDocument",		"Microsoft Office Word",},
75 	{ "PowerPoint",			"Microsoft PowerPoint",	},
76 	{ "DigitalSignature",		"Microsoft Installer",	},
77 	{ NULL,				NULL,			},
78 };
79 
80 #ifdef PHP_WIN32
81 # define strcasestr strstr
82 #endif
83 
84 static const struct cv {
85 	uint64_t clsid[2];
86 	const char *mime;
87 } clsid2mime[] = {
88 	{
89 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
90 		"x-msi",
91 	},
92 	{	{ 0,			 0			},
93 		NULL,
94 	}
95 }, clsid2desc[] = {
96 	{
97 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
98 		"MSI Installer",
99 	},
100 	{	{ 0,			 0			},
101 		NULL,
102 	},
103 };
104 
105 private const char *
cdf_clsid_to_mime(const uint64_t clsid[2],const struct cv * cv)106 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
107 {
108 	size_t i;
109 	for (i = 0; cv[i].mime != NULL; i++) {
110 		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
111 			return cv[i].mime;
112 	}
113 	return NULL;
114 }
115 
116 private const char *
cdf_app_to_mime(const char * vbuf,const struct nv * nv)117 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
118 {
119 	size_t i;
120 	const char *rv = NULL;
121 
122 	(void)setlocale(LC_CTYPE, "C");
123 	for (i = 0; nv[i].pattern != NULL; i++)
124 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
125 			rv = nv[i].mime;
126 			break;
127 		}
128 	(void)setlocale(LC_CTYPE, "");
129 	return rv;
130 }
131 
132 private int
cdf_file_property_info(struct magic_set * ms,const cdf_property_info_t * info,size_t count,const cdf_directory_t * root_storage)133 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
134     size_t count, const cdf_directory_t *root_storage)
135 {
136         size_t i;
137         cdf_timestamp_t tp;
138         struct timeval ts;
139         char buf[64];
140         const char *str = NULL;
141         const char *s;
142         int len;
143 
144 	memset(&ts, 0, sizeof(ts));
145 
146         if (!NOTMIME(ms) && root_storage)
147 		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
148 		    clsid2mime);
149 
150         for (i = 0; i < count; i++) {
151                 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
152                 switch (info[i].pi_type) {
153                 case CDF_NULL:
154                         break;
155                 case CDF_SIGNED16:
156                         if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
157                             info[i].pi_s16) == -1)
158                                 return -1;
159                         break;
160                 case CDF_SIGNED32:
161                         if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
162                             info[i].pi_s32) == -1)
163                                 return -1;
164                         break;
165                 case CDF_UNSIGNED32:
166                         if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
167                             info[i].pi_u32) == -1)
168                                 return -1;
169                         break;
170                 case CDF_FLOAT:
171                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
172                             info[i].pi_f) == -1)
173                                 return -1;
174                         break;
175                 case CDF_DOUBLE:
176                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
177                             info[i].pi_d) == -1)
178                                 return -1;
179                         break;
180                 case CDF_LENGTH32_STRING:
181                 case CDF_LENGTH32_WSTRING:
182                         len = info[i].pi_str.s_len;
183                         if (len > 1) {
184                                 char vbuf[1024];
185                                 size_t j, k = 1;
186 
187                                 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
188                                     k++;
189                                 s = info[i].pi_str.s_buf;
190                                 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
191                                         if (*s == '\0')
192                                                 break;
193                                         if (isprint((unsigned char)*s))
194                                                 vbuf[j++] = *s;
195                                 }
196                                 if (j == sizeof(vbuf))
197                                         --j;
198                                 vbuf[j] = '\0';
199                                 if (NOTMIME(ms)) {
200                                         if (vbuf[0]) {
201                                                 if (file_printf(ms, ", %s: %s",
202                                                     buf, vbuf) == -1)
203                                                         return -1;
204                                         }
205                                 } else if (str == NULL && info[i].pi_id ==
206 				    CDF_PROPERTY_NAME_OF_APPLICATION) {
207 					str = cdf_app_to_mime(vbuf, app2mime);
208 				}
209 			}
210                         break;
211                 case CDF_FILETIME:
212                         tp = info[i].pi_tp;
213                         if (tp != 0) {
214 				char tbuf[64];
215                                 if (tp < 1000000000000000LL) {
216                                         cdf_print_elapsed_time(tbuf,
217                                             sizeof(tbuf), tp);
218                                         if (NOTMIME(ms) && file_printf(ms,
219                                             ", %s: %s", buf, tbuf) == -1)
220                                                 return -1;
221                                 } else {
222                                         char *c, *ec;
223 					const time_t sec = ts.tv_sec;
224                                         if (cdf_timestamp_to_timespec(&ts, tp) == -1) {
225 											return -1;
226 										}
227                                         c = cdf_ctime(&sec, tbuf);
228                                         if (c != NULL &&
229 					    (ec = strchr(c, '\n')) != NULL)
230 						*ec = '\0';
231 
232                                         if (NOTMIME(ms) && file_printf(ms,
233                                             ", %s: %s", buf, c) == -1)
234                                                 return -1;
235                                 }
236                         }
237                         break;
238                 case CDF_CLIPBOARD:
239                         break;
240                 default:
241                         return -1;
242                 }
243         }
244         if (!NOTMIME(ms)) {
245 		if (str == NULL)
246 			return 0;
247                 if (file_printf(ms, "application/%s", str) == -1)
248                         return -1;
249         }
250         return 1;
251 }
252 
253 private int
cdf_file_catalog(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst)254 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
255     const cdf_stream_t *sst)
256 {
257 	cdf_catalog_t *cat;
258 	size_t i;
259 	char buf[256];
260 	cdf_catalog_entry_t *ce;
261 
262         if (NOTMIME(ms)) {
263 		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
264 			return -1;
265 		if (cdf_unpack_catalog(h, sst, &cat) == -1)
266 			return -1;
267 		ce = cat->cat_e;
268 		/* skip first entry since it has a , or paren */
269 		for (i = 1; i < cat->cat_num; i++)
270 			if (file_printf(ms, "%s%s",
271 			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
272 			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
273 				free(cat);
274 				return -1;
275 			}
276 		free(cat);
277 	} else {
278 		if (file_printf(ms, "application/CDFV2") == -1)
279 			return -1;
280 	}
281 	return 1;
282 }
283 
284 private int
cdf_file_summary_info(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst,const cdf_directory_t * root_storage)285 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
286     const cdf_stream_t *sst, const cdf_directory_t *root_storage)
287 {
288         cdf_summary_info_header_t si;
289         cdf_property_info_t *info;
290         size_t count;
291         int m;
292 
293         if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
294                 return -1;
295 
296         if (NOTMIME(ms)) {
297 		const char *str;
298 
299                 if (file_printf(ms, "Composite Document File V2 Document")
300 		    == -1)
301                         return -1;
302 
303                 if (file_printf(ms, ", %s Endian",
304                     si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
305                         return -2;
306                 switch (si.si_os) {
307                 case 2:
308                         if (file_printf(ms, ", Os: Windows, Version %d.%d",
309                             si.si_os_version & 0xff,
310                             (uint32_t)si.si_os_version >> 8) == -1)
311                                 return -2;
312                         break;
313                 case 1:
314                         if (file_printf(ms, ", Os: MacOS, Version %d.%d",
315                             (uint32_t)si.si_os_version >> 8,
316                             si.si_os_version & 0xff) == -1)
317                                 return -2;
318                         break;
319                 default:
320                         if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
321                             si.si_os_version & 0xff,
322                             (uint32_t)si.si_os_version >> 8) == -1)
323                                 return -2;
324                         break;
325                 }
326 		if (root_storage) {
327 			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
328 			    clsid2desc);
329 			if (str) {
330 				if (file_printf(ms, ", %s", str) == -1)
331 					return -2;
332 			}
333 		}
334 	}
335 
336         m = cdf_file_property_info(ms, info, count, root_storage);
337         free(info);
338 
339         return m == -1 ? -2 : m;
340 }
341 
342 #ifdef notdef
343 private char *
format_clsid(char * buf,size_t len,const uint64_t uuid[2])344 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
345 	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
346 	    PRIx64 "-%.12" PRIx64,
347 	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
348 	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
349 	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
350 	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
351 	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
352 	return buf;
353 }
354 #endif
355 
356 private int
cdf_file_catalog_info(struct magic_set * ms,const cdf_info_t * info,const cdf_header_t * h,const cdf_sat_t * sat,const cdf_sat_t * ssat,const cdf_stream_t * sst,const cdf_dir_t * dir,cdf_stream_t * scn)357 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
358     const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
359     const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
360 {
361 	int i;
362 
363 	if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
364 	    dir, "Catalog", scn)) <= 0)
365 		return i;
366 #ifdef CDF_DEBUG
367 	cdf_dump_catalog(&h, &scn);
368 #endif
369 	if ((i = cdf_file_catalog(ms, h, scn)) == -1)
370 		return -1;
371 	return i;
372 }
373 
374 private struct sinfo {
375 	const char *name;
376 	const char *mime;
377 	const char *sections[5];
378 	const int  types[5];
379 } sectioninfo[] = {
380 	{ "Encrypted", "encrypted",
381 		{
382 			"EncryptedPackage", NULL, NULL, NULL, NULL,
383 		},
384 		{
385 			CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
386 
387 		},
388 	},
389 	{ "QuickBooks", "quickbooks",
390 		{
391 #if 0
392 			"TaxForms", "PDFTaxForms", "modulesInBackup",
393 #endif
394 			"mfbu_header", NULL, NULL, NULL, NULL,
395 		},
396 		{
397 #if 0
398 			CDF_DIR_TYPE_USER_STORAGE,
399 			CDF_DIR_TYPE_USER_STORAGE,
400 			CDF_DIR_TYPE_USER_STREAM,
401 #endif
402 			CDF_DIR_TYPE_USER_STREAM,
403 			0, 0, 0, 0
404 		},
405 	},
406 };
407 
408 private int
cdf_file_dir_info(struct magic_set * ms,const cdf_dir_t * dir)409 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
410 {
411 	size_t sd, j;
412 
413 	for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
414 		const struct sinfo *si = &sectioninfo[sd];
415 		for (j = 0; si->sections[j]; j++) {
416 			if (cdf_find_stream(dir, si->sections[j], si->types[j])
417 			    <= 0) {
418 #ifdef CDF_DEBUG
419 				fprintf(stderr, "Can't read %s\n",
420 				    si->sections[j]);
421 #endif
422 				break;
423 			}
424 		}
425 		if (si->sections[j] != NULL)
426 			continue;
427 		if (NOTMIME(ms)) {
428 			if (file_printf(ms, "CDFV2 %s", si->name) == -1)
429 				return -1;
430 		} else {
431 			if (file_printf(ms, "application/CDFV2-%s",
432 			    si->mime) == -1)
433 				return -1;
434 		}
435 		return 1;
436 	}
437 	return -1;
438 }
439 
440 protected int
file_trycdf(struct magic_set * ms,int fd,const unsigned char * buf,size_t nbytes)441 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
442     size_t nbytes)
443 {
444         cdf_info_t info;
445         cdf_header_t h;
446         cdf_sat_t sat, ssat;
447         cdf_stream_t sst, scn;
448         cdf_dir_t dir;
449         int i;
450         const char *expn = "";
451         const cdf_directory_t *root_storage;
452 
453         info.i_fd = fd;
454         info.i_buf = buf;
455         info.i_len = nbytes;
456         if (ms->flags & MAGIC_APPLE)
457                 return 0;
458         if (cdf_read_header(&info, &h) == -1)
459                 return 0;
460 #ifdef CDF_DEBUG
461         cdf_dump_header(&h);
462 #endif
463 
464         if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
465                 expn = "Can't read SAT";
466                 goto out0;
467         }
468 #ifdef CDF_DEBUG
469         cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
470 #endif
471 
472         if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
473                 expn = "Can't read SSAT";
474                 goto out1;
475         }
476 #ifdef CDF_DEBUG
477         cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
478 #endif
479 
480         if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
481                 expn = "Can't read directory";
482                 goto out2;
483         }
484 
485         if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
486 	    &root_storage)) == -1) {
487                 expn = "Cannot read short stream";
488                 goto out3;
489         }
490 #ifdef CDF_DEBUG
491         cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
492 #endif
493 #ifdef notdef
494 	if (root_storage) {
495 		if (NOTMIME(ms)) {
496 			char clsbuf[128];
497 			if (file_printf(ms, "CLSID %s, ",
498 			    format_clsid(clsbuf, sizeof(clsbuf),
499 			    root_storage->d_storage_uuid)) == -1)
500 				return -1;
501 		}
502 	}
503 #endif
504 
505 	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
506 	    "FileHeader", &scn)) != -1) {
507 #define HWP5_SIGNATURE "HWP Document File"
508 		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
509 		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
510 		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
511 		    if (NOTMIME(ms)) {
512 			if (file_printf(ms,
513 			    "Hangul (Korean) Word Processor File 5.x") == -1)
514 			    return -1;
515 		    } else {
516 			if (file_printf(ms, "application/x-hwp") == -1)
517 			    return -1;
518 		    }
519 		    i = 1;
520 		    goto out5;
521 		} else {
522 		    free(scn.sst_tab);
523 		    scn.sst_tab = NULL;
524 		    scn.sst_len = 0;
525 		    scn.sst_dirlen = 0;
526 		}
527 	}
528 
529         if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
530             &scn)) == -1) {
531                 if (errno != ESRCH) {
532                         expn = "Cannot read summary info";
533 			goto out4;
534 		}
535 		i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
536 		    &dir, &scn);
537 		if (i > 0)
538 			goto out4;
539 		i = cdf_file_dir_info(ms, &dir);
540 		if (i < 0)
541                         expn = "Cannot read section info";
542 		goto out4;
543 	}
544 
545 
546 #ifdef CDF_DEBUG
547         cdf_dump_summary_info(&h, &scn);
548 #endif
549         if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
550             expn = "Can't expand summary_info";
551 
552 	if (i == 0) {
553 		const char *str = NULL;
554 		cdf_directory_t *d;
555 		char name[__arraycount(d->d_name)];
556 		size_t j, k;
557 
558 		for (j = 0; str == NULL && j < dir.dir_len; j++) {
559 			d = &dir.dir_tab[j];
560 			for (k = 0; k < sizeof(name); k++)
561 				name[k] = (char)cdf_tole2(d->d_name[k]);
562 			str = cdf_app_to_mime(name,
563 			    NOTMIME(ms) ? name2desc : name2mime);
564 		}
565 		if (NOTMIME(ms)) {
566 			if (str != NULL) {
567 				if (file_printf(ms, "%s", str) == -1)
568 					return -1;
569 				i = 1;
570 			}
571 		} else {
572 			if (str == NULL)
573 				str = "vnd.ms-office";
574 			if (file_printf(ms, "application/%s", str) == -1)
575 				return -1;
576 			i = 1;
577 		}
578 	}
579 out5:
580         free(scn.sst_tab);
581 out4:
582         free(sst.sst_tab);
583 out3:
584         free(dir.dir_tab);
585 out2:
586         free(ssat.sat_tab);
587 out1:
588         free(sat.sat_tab);
589 out0:
590 	if (i == -1) {
591 	    if (NOTMIME(ms)) {
592 		if (file_printf(ms,
593 		    "Composite Document File V2 Document") == -1)
594 		    return -1;
595 		if (*expn)
596 		    if (file_printf(ms, ", %s", expn) == -1)
597 			return -1;
598 	    } else {
599 		if (file_printf(ms, "application/CDFV2-unknown") == -1)
600 		    return -1;
601 	    }
602 	    i = 1;
603 	}
604         return i;
605 }
606