1 /*-
2 * Copyright (c) 2008 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26 #include "file.h"
27
28 #ifndef lint
29 FILE_RCSID("@(#)$File: readcdf.c,v 1.50 2015/01/02 21:29:39 christos Exp $")
30 #endif
31
32 #include <stdlib.h>
33 #ifdef PHP_WIN32
34 #include "win32/unistd.h"
35 #else
36 #include <unistd.h>
37 #endif
38 #include <string.h>
39 #include <time.h>
40 #include <ctype.h>
41 #if defined(HAVE_LOCALE_H)
42 #include <locale.h>
43 #endif
44
45 #include "cdf.h"
46 #include "magic.h"
47
48 #ifndef __arraycount
49 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
50 #endif
51
52 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
53
54 static const struct nv {
55 const char *pattern;
56 const char *mime;
57 } app2mime[] = {
58 { "Word", "msword", },
59 { "Excel", "vnd.ms-excel", },
60 { "Powerpoint", "vnd.ms-powerpoint", },
61 { "Crystal Reports", "x-rpt", },
62 { "Advanced Installer", "vnd.ms-msi", },
63 { "InstallShield", "vnd.ms-msi", },
64 { "Microsoft Patch Compiler", "vnd.ms-msi", },
65 { "NAnt", "vnd.ms-msi", },
66 { "Windows Installer", "vnd.ms-msi", },
67 { NULL, NULL, },
68 }, name2mime[] = {
69 { "WordDocument", "msword", },
70 { "PowerPoint", "vnd.ms-powerpoint", },
71 { "DigitalSignature", "vnd.ms-msi", },
72 { NULL, NULL, },
73 }, name2desc[] = {
74 { "WordDocument", "Microsoft Office Word",},
75 { "PowerPoint", "Microsoft PowerPoint", },
76 { "DigitalSignature", "Microsoft Installer", },
77 { NULL, NULL, },
78 };
79
80 #ifdef PHP_WIN32
81 # define strcasestr strstr
82 #endif
83
84 static const struct cv {
85 uint64_t clsid[2];
86 const char *mime;
87 } clsid2mime[] = {
88 {
89 { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
90 "x-msi",
91 },
92 { { 0, 0 },
93 NULL,
94 }
95 }, clsid2desc[] = {
96 {
97 { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
98 "MSI Installer",
99 },
100 { { 0, 0 },
101 NULL,
102 },
103 };
104
105 private const char *
cdf_clsid_to_mime(const uint64_t clsid[2],const struct cv * cv)106 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
107 {
108 size_t i;
109 for (i = 0; cv[i].mime != NULL; i++) {
110 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
111 return cv[i].mime;
112 }
113 return NULL;
114 }
115
116 private const char *
cdf_app_to_mime(const char * vbuf,const struct nv * nv)117 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
118 {
119 size_t i;
120 const char *rv = NULL;
121
122 (void)setlocale(LC_CTYPE, "C");
123 for (i = 0; nv[i].pattern != NULL; i++)
124 if (strcasestr(vbuf, nv[i].pattern) != NULL) {
125 rv = nv[i].mime;
126 break;
127 }
128 (void)setlocale(LC_CTYPE, "");
129 return rv;
130 }
131
132 private int
cdf_file_property_info(struct magic_set * ms,const cdf_property_info_t * info,size_t count,const cdf_directory_t * root_storage)133 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
134 size_t count, const cdf_directory_t *root_storage)
135 {
136 size_t i;
137 cdf_timestamp_t tp;
138 struct timeval ts;
139 char buf[64];
140 const char *str = NULL;
141 const char *s;
142 int len;
143
144 memset(&ts, 0, sizeof(ts));
145
146 if (!NOTMIME(ms) && root_storage)
147 str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
148 clsid2mime);
149
150 for (i = 0; i < count; i++) {
151 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
152 switch (info[i].pi_type) {
153 case CDF_NULL:
154 break;
155 case CDF_SIGNED16:
156 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
157 info[i].pi_s16) == -1)
158 return -1;
159 break;
160 case CDF_SIGNED32:
161 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
162 info[i].pi_s32) == -1)
163 return -1;
164 break;
165 case CDF_UNSIGNED32:
166 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
167 info[i].pi_u32) == -1)
168 return -1;
169 break;
170 case CDF_FLOAT:
171 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
172 info[i].pi_f) == -1)
173 return -1;
174 break;
175 case CDF_DOUBLE:
176 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
177 info[i].pi_d) == -1)
178 return -1;
179 break;
180 case CDF_LENGTH32_STRING:
181 case CDF_LENGTH32_WSTRING:
182 len = info[i].pi_str.s_len;
183 if (len > 1) {
184 char vbuf[1024];
185 size_t j, k = 1;
186
187 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
188 k++;
189 s = info[i].pi_str.s_buf;
190 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
191 if (*s == '\0')
192 break;
193 if (isprint((unsigned char)*s))
194 vbuf[j++] = *s;
195 }
196 if (j == sizeof(vbuf))
197 --j;
198 vbuf[j] = '\0';
199 if (NOTMIME(ms)) {
200 if (vbuf[0]) {
201 if (file_printf(ms, ", %s: %s",
202 buf, vbuf) == -1)
203 return -1;
204 }
205 } else if (str == NULL && info[i].pi_id ==
206 CDF_PROPERTY_NAME_OF_APPLICATION) {
207 str = cdf_app_to_mime(vbuf, app2mime);
208 }
209 }
210 break;
211 case CDF_FILETIME:
212 tp = info[i].pi_tp;
213 if (tp != 0) {
214 char tbuf[64];
215 if (tp < 1000000000000000LL) {
216 cdf_print_elapsed_time(tbuf,
217 sizeof(tbuf), tp);
218 if (NOTMIME(ms) && file_printf(ms,
219 ", %s: %s", buf, tbuf) == -1)
220 return -1;
221 } else {
222 char *c, *ec;
223 const time_t sec = ts.tv_sec;
224 if (cdf_timestamp_to_timespec(&ts, tp) == -1) {
225 return -1;
226 }
227 c = cdf_ctime(&sec, tbuf);
228 if (c != NULL &&
229 (ec = strchr(c, '\n')) != NULL)
230 *ec = '\0';
231
232 if (NOTMIME(ms) && file_printf(ms,
233 ", %s: %s", buf, c) == -1)
234 return -1;
235 }
236 }
237 break;
238 case CDF_CLIPBOARD:
239 break;
240 default:
241 return -1;
242 }
243 }
244 if (!NOTMIME(ms)) {
245 if (str == NULL)
246 return 0;
247 if (file_printf(ms, "application/%s", str) == -1)
248 return -1;
249 }
250 return 1;
251 }
252
253 private int
cdf_file_catalog(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst)254 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
255 const cdf_stream_t *sst)
256 {
257 cdf_catalog_t *cat;
258 size_t i;
259 char buf[256];
260 cdf_catalog_entry_t *ce;
261
262 if (NOTMIME(ms)) {
263 if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
264 return -1;
265 if (cdf_unpack_catalog(h, sst, &cat) == -1)
266 return -1;
267 ce = cat->cat_e;
268 /* skip first entry since it has a , or paren */
269 for (i = 1; i < cat->cat_num; i++)
270 if (file_printf(ms, "%s%s",
271 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
272 i == cat->cat_num - 1 ? "]" : ", ") == -1) {
273 free(cat);
274 return -1;
275 }
276 free(cat);
277 } else {
278 if (file_printf(ms, "application/CDFV2") == -1)
279 return -1;
280 }
281 return 1;
282 }
283
284 private int
cdf_file_summary_info(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst,const cdf_directory_t * root_storage)285 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
286 const cdf_stream_t *sst, const cdf_directory_t *root_storage)
287 {
288 cdf_summary_info_header_t si;
289 cdf_property_info_t *info;
290 size_t count;
291 int m;
292
293 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
294 return -1;
295
296 if (NOTMIME(ms)) {
297 const char *str;
298
299 if (file_printf(ms, "Composite Document File V2 Document")
300 == -1)
301 return -1;
302
303 if (file_printf(ms, ", %s Endian",
304 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1)
305 return -2;
306 switch (si.si_os) {
307 case 2:
308 if (file_printf(ms, ", Os: Windows, Version %d.%d",
309 si.si_os_version & 0xff,
310 (uint32_t)si.si_os_version >> 8) == -1)
311 return -2;
312 break;
313 case 1:
314 if (file_printf(ms, ", Os: MacOS, Version %d.%d",
315 (uint32_t)si.si_os_version >> 8,
316 si.si_os_version & 0xff) == -1)
317 return -2;
318 break;
319 default:
320 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
321 si.si_os_version & 0xff,
322 (uint32_t)si.si_os_version >> 8) == -1)
323 return -2;
324 break;
325 }
326 if (root_storage) {
327 str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
328 clsid2desc);
329 if (str) {
330 if (file_printf(ms, ", %s", str) == -1)
331 return -2;
332 }
333 }
334 }
335
336 m = cdf_file_property_info(ms, info, count, root_storage);
337 free(info);
338
339 return m == -1 ? -2 : m;
340 }
341
342 #ifdef notdef
343 private char *
format_clsid(char * buf,size_t len,const uint64_t uuid[2])344 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
345 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
346 PRIx64 "-%.12" PRIx64,
347 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
348 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
349 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL,
350 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
351 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL);
352 return buf;
353 }
354 #endif
355
356 private int
cdf_file_catalog_info(struct magic_set * ms,const cdf_info_t * info,const cdf_header_t * h,const cdf_sat_t * sat,const cdf_sat_t * ssat,const cdf_stream_t * sst,const cdf_dir_t * dir,cdf_stream_t * scn)357 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
358 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
359 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
360 {
361 int i;
362
363 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
364 dir, "Catalog", scn)) <= 0)
365 return i;
366 #ifdef CDF_DEBUG
367 cdf_dump_catalog(&h, &scn);
368 #endif
369 if ((i = cdf_file_catalog(ms, h, scn)) == -1)
370 return -1;
371 return i;
372 }
373
374 private struct sinfo {
375 const char *name;
376 const char *mime;
377 const char *sections[5];
378 const int types[5];
379 } sectioninfo[] = {
380 { "Encrypted", "encrypted",
381 {
382 "EncryptedPackage", NULL, NULL, NULL, NULL,
383 },
384 {
385 CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
386
387 },
388 },
389 { "QuickBooks", "quickbooks",
390 {
391 #if 0
392 "TaxForms", "PDFTaxForms", "modulesInBackup",
393 #endif
394 "mfbu_header", NULL, NULL, NULL, NULL,
395 },
396 {
397 #if 0
398 CDF_DIR_TYPE_USER_STORAGE,
399 CDF_DIR_TYPE_USER_STORAGE,
400 CDF_DIR_TYPE_USER_STREAM,
401 #endif
402 CDF_DIR_TYPE_USER_STREAM,
403 0, 0, 0, 0
404 },
405 },
406 };
407
408 private int
cdf_file_dir_info(struct magic_set * ms,const cdf_dir_t * dir)409 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
410 {
411 size_t sd, j;
412
413 for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
414 const struct sinfo *si = §ioninfo[sd];
415 for (j = 0; si->sections[j]; j++) {
416 if (cdf_find_stream(dir, si->sections[j], si->types[j])
417 <= 0) {
418 #ifdef CDF_DEBUG
419 fprintf(stderr, "Can't read %s\n",
420 si->sections[j]);
421 #endif
422 break;
423 }
424 }
425 if (si->sections[j] != NULL)
426 continue;
427 if (NOTMIME(ms)) {
428 if (file_printf(ms, "CDFV2 %s", si->name) == -1)
429 return -1;
430 } else {
431 if (file_printf(ms, "application/CDFV2-%s",
432 si->mime) == -1)
433 return -1;
434 }
435 return 1;
436 }
437 return -1;
438 }
439
440 protected int
file_trycdf(struct magic_set * ms,int fd,const unsigned char * buf,size_t nbytes)441 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
442 size_t nbytes)
443 {
444 cdf_info_t info;
445 cdf_header_t h;
446 cdf_sat_t sat, ssat;
447 cdf_stream_t sst, scn;
448 cdf_dir_t dir;
449 int i;
450 const char *expn = "";
451 const cdf_directory_t *root_storage;
452
453 info.i_fd = fd;
454 info.i_buf = buf;
455 info.i_len = nbytes;
456 if (ms->flags & MAGIC_APPLE)
457 return 0;
458 if (cdf_read_header(&info, &h) == -1)
459 return 0;
460 #ifdef CDF_DEBUG
461 cdf_dump_header(&h);
462 #endif
463
464 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
465 expn = "Can't read SAT";
466 goto out0;
467 }
468 #ifdef CDF_DEBUG
469 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
470 #endif
471
472 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
473 expn = "Can't read SSAT";
474 goto out1;
475 }
476 #ifdef CDF_DEBUG
477 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
478 #endif
479
480 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
481 expn = "Can't read directory";
482 goto out2;
483 }
484
485 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
486 &root_storage)) == -1) {
487 expn = "Cannot read short stream";
488 goto out3;
489 }
490 #ifdef CDF_DEBUG
491 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
492 #endif
493 #ifdef notdef
494 if (root_storage) {
495 if (NOTMIME(ms)) {
496 char clsbuf[128];
497 if (file_printf(ms, "CLSID %s, ",
498 format_clsid(clsbuf, sizeof(clsbuf),
499 root_storage->d_storage_uuid)) == -1)
500 return -1;
501 }
502 }
503 #endif
504
505 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
506 "FileHeader", &scn)) != -1) {
507 #define HWP5_SIGNATURE "HWP Document File"
508 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
509 && memcmp(scn.sst_tab, HWP5_SIGNATURE,
510 sizeof(HWP5_SIGNATURE) - 1) == 0) {
511 if (NOTMIME(ms)) {
512 if (file_printf(ms,
513 "Hangul (Korean) Word Processor File 5.x") == -1)
514 return -1;
515 } else {
516 if (file_printf(ms, "application/x-hwp") == -1)
517 return -1;
518 }
519 i = 1;
520 goto out5;
521 } else {
522 free(scn.sst_tab);
523 scn.sst_tab = NULL;
524 scn.sst_len = 0;
525 scn.sst_dirlen = 0;
526 }
527 }
528
529 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
530 &scn)) == -1) {
531 if (errno != ESRCH) {
532 expn = "Cannot read summary info";
533 goto out4;
534 }
535 i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
536 &dir, &scn);
537 if (i > 0)
538 goto out4;
539 i = cdf_file_dir_info(ms, &dir);
540 if (i < 0)
541 expn = "Cannot read section info";
542 goto out4;
543 }
544
545
546 #ifdef CDF_DEBUG
547 cdf_dump_summary_info(&h, &scn);
548 #endif
549 if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
550 expn = "Can't expand summary_info";
551
552 if (i == 0) {
553 const char *str = NULL;
554 cdf_directory_t *d;
555 char name[__arraycount(d->d_name)];
556 size_t j, k;
557
558 for (j = 0; str == NULL && j < dir.dir_len; j++) {
559 d = &dir.dir_tab[j];
560 for (k = 0; k < sizeof(name); k++)
561 name[k] = (char)cdf_tole2(d->d_name[k]);
562 str = cdf_app_to_mime(name,
563 NOTMIME(ms) ? name2desc : name2mime);
564 }
565 if (NOTMIME(ms)) {
566 if (str != NULL) {
567 if (file_printf(ms, "%s", str) == -1)
568 return -1;
569 i = 1;
570 }
571 } else {
572 if (str == NULL)
573 str = "vnd.ms-office";
574 if (file_printf(ms, "application/%s", str) == -1)
575 return -1;
576 i = 1;
577 }
578 }
579 out5:
580 free(scn.sst_tab);
581 out4:
582 free(sst.sst_tab);
583 out3:
584 free(dir.dir_tab);
585 out2:
586 free(ssat.sat_tab);
587 out1:
588 free(sat.sat_tab);
589 out0:
590 if (i == -1) {
591 if (NOTMIME(ms)) {
592 if (file_printf(ms,
593 "Composite Document File V2 Document") == -1)
594 return -1;
595 if (*expn)
596 if (file_printf(ms, ", %s", expn) == -1)
597 return -1;
598 } else {
599 if (file_printf(ms, "application/CDFV2-unknown") == -1)
600 return -1;
601 }
602 i = 1;
603 }
604 return i;
605 }
606