1 /*-
2 * Copyright (c) 2018 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /*
28 * Parse JSON object serialization format (RFC-7159)
29 */
30
31 #ifndef TEST
32 #include "file.h"
33
34 #ifndef lint
35 FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
36 #endif
37
38 #include "magic.h"
39 #else
40 #include <stdio.h>
41 #include <stddef.h>
42 #endif
43 #include <string.h>
44
45 #ifdef DEBUG
46 #include <stdio.h>
47 #define DPRINTF(a, b, c) \
48 printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
49 (int)(b - c), (const char *)(c))
50 #define __file_debugused
51 #else
52 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
53 #define __file_debugused __attribute__((__unused__))
54 #endif
55
56 #define JSON_ARRAY 0
57 #define JSON_CONSTANT 1
58 #define JSON_NUMBER 2
59 #define JSON_OBJECT 3
60 #define JSON_STRING 4
61 #define JSON_ARRAYN 5
62 #define JSON_MAX 6
63
64 /*
65 * if JSON_COUNT != 0:
66 * count all the objects, require that we have the whole data file
67 * otherwise:
68 * stop if we find an object or an array
69 */
70 #ifndef JSON_COUNT
71 #define JSON_COUNT 0
72 #endif
73
74 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75 size_t);
76
77 static int
json_isspace(const unsigned char uc)78 json_isspace(const unsigned char uc)
79 {
80 switch (uc) {
81 case ' ':
82 case '\n':
83 case '\r':
84 case '\t':
85 return 1;
86 default:
87 return 0;
88 }
89 }
90
91 static int
json_isdigit(unsigned char uc)92 json_isdigit(unsigned char uc)
93 {
94 switch (uc) {
95 case '0': case '1': case '2': case '3': case '4':
96 case '5': case '6': case '7': case '8': case '9':
97 return 1;
98 default:
99 return 0;
100 }
101 }
102
103 static int
json_isxdigit(unsigned char uc)104 json_isxdigit(unsigned char uc)
105 {
106 if (json_isdigit(uc))
107 return 1;
108 switch (uc) {
109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111 return 1;
112 default:
113 return 0;
114 }
115 }
116
117 static const unsigned char *
json_skip_space(const unsigned char * uc,const unsigned char * ue)118 json_skip_space(const unsigned char *uc, const unsigned char *ue)
119 {
120 while (uc < ue && json_isspace(*uc))
121 uc++;
122 return uc;
123 }
124
125 /*ARGSUSED*/
126 static int
json_parse_string(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)127 json_parse_string(const unsigned char **ucp, const unsigned char *ue,
128 size_t lvl __file_debugused)
129 {
130 const unsigned char *uc = *ucp;
131 size_t i;
132
133 DPRINTF("Parse string: ", uc, *ucp);
134 while (uc < ue) {
135 switch (*uc++) {
136 case '\0':
137 goto out;
138 case '\\':
139 if (uc == ue)
140 goto out;
141 switch (*uc++) {
142 case '\0':
143 goto out;
144 case '"':
145 case '\\':
146 case '/':
147 case 'b':
148 case 'f':
149 case 'n':
150 case 'r':
151 case 't':
152 continue;
153 case 'u':
154 if (ue - uc < 4) {
155 uc = ue;
156 goto out;
157 }
158 for (i = 0; i < 4; i++)
159 if (!json_isxdigit(*uc++))
160 goto out;
161 continue;
162 default:
163 goto out;
164 }
165 case '"':
166 DPRINTF("Good string: ", uc, *ucp);
167 *ucp = uc;
168 return 1;
169 default:
170 continue;
171 }
172 }
173 out:
174 DPRINTF("Bad string: ", uc, *ucp);
175 *ucp = uc;
176 return 0;
177 }
178
179 static int
json_parse_array(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)180 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
181 size_t *st, size_t lvl)
182 {
183 const unsigned char *uc = *ucp;
184
185 DPRINTF("Parse array: ", uc, *ucp);
186 while (uc < ue) {
187 uc = json_skip_space(uc, ue);
188 if (uc == ue)
189 goto out;
190 if (*uc == ']')
191 goto done;
192 if (!json_parse(&uc, ue, st, lvl + 1))
193 goto out;
194 if (uc == ue)
195 goto out;
196 switch (*uc) {
197 case ',':
198 uc++;
199 continue;
200 case ']':
201 done:
202 st[JSON_ARRAYN]++;
203 DPRINTF("Good array: ", uc, *ucp);
204 *ucp = uc + 1;
205 return 1;
206 default:
207 goto out;
208 }
209 }
210 out:
211 DPRINTF("Bad array: ", uc, *ucp);
212 *ucp = uc;
213 return 0;
214 }
215
216 static int
json_parse_object(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)217 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
218 size_t *st, size_t lvl)
219 {
220 const unsigned char *uc = *ucp;
221 DPRINTF("Parse object: ", uc, *ucp);
222 while (uc < ue) {
223 uc = json_skip_space(uc, ue);
224 if (uc == ue)
225 goto out;
226 if (*uc == '}') {
227 uc++;
228 goto done;
229 }
230 if (*uc++ != '"') {
231 DPRINTF("not string", uc, *ucp);
232 goto out;
233 }
234 DPRINTF("next field", uc, *ucp);
235 if (!json_parse_string(&uc, ue, lvl)) {
236 DPRINTF("not string", uc, *ucp);
237 goto out;
238 }
239 uc = json_skip_space(uc, ue);
240 if (uc == ue)
241 goto out;
242 if (*uc++ != ':') {
243 DPRINTF("not colon", uc, *ucp);
244 goto out;
245 }
246 if (!json_parse(&uc, ue, st, lvl + 1)) {
247 DPRINTF("not json", uc, *ucp);
248 goto out;
249 }
250 if (uc == ue)
251 goto out;
252 switch (*uc++) {
253 case ',':
254 continue;
255 case '}': /* { */
256 done:
257 DPRINTF("Good object: ", uc, *ucp);
258 *ucp = uc;
259 return 1;
260 default:
261 DPRINTF("not more", uc, *ucp);
262 *ucp = uc - 1;
263 goto out;
264 }
265 }
266 out:
267 DPRINTF("Bad object: ", uc, *ucp);
268 *ucp = uc;
269 return 0;
270 }
271
272 /*ARGSUSED*/
273 static int
json_parse_number(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)274 json_parse_number(const unsigned char **ucp, const unsigned char *ue,
275 size_t lvl __file_debugused)
276 {
277 const unsigned char *uc = *ucp;
278 int got = 0;
279
280 DPRINTF("Parse number: ", uc, *ucp);
281 if (uc == ue)
282 return 0;
283 if (*uc == '-')
284 uc++;
285
286 for (; uc < ue; uc++) {
287 if (!json_isdigit(*uc))
288 break;
289 got = 1;
290 }
291 if (uc == ue)
292 goto out;
293 if (*uc == '.')
294 uc++;
295 for (; uc < ue; uc++) {
296 if (!json_isdigit(*uc))
297 break;
298 got = 1;
299 }
300 if (uc == ue)
301 goto out;
302 if (got && (*uc == 'e' || *uc == 'E')) {
303 uc++;
304 got = 0;
305 if (uc == ue)
306 goto out;
307 if (*uc == '+' || *uc == '-')
308 uc++;
309 for (; uc < ue; uc++) {
310 if (!json_isdigit(*uc))
311 break;
312 got = 1;
313 }
314 }
315 out:
316 if (!got)
317 DPRINTF("Bad number: ", uc, *ucp);
318 else
319 DPRINTF("Good number: ", uc, *ucp);
320 *ucp = uc;
321 return got;
322 }
323
324 /*ARGSUSED*/
325 static int
json_parse_const(const unsigned char ** ucp,const unsigned char * ue,const char * str,size_t len,size_t lvl __file_debugused)326 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
327 const char *str, size_t len, size_t lvl __file_debugused)
328 {
329 const unsigned char *uc = *ucp;
330
331 DPRINTF("Parse const: ", uc, *ucp);
332 *ucp += --len - 1;
333 if (*ucp > ue)
334 *ucp = ue;
335 for (; uc < ue && --len;) {
336 if (*uc++ != *++str) {
337 DPRINTF("Bad const: ", uc, *ucp);
338 return 0;
339 }
340 }
341 DPRINTF("Good const: ", uc, *ucp);
342 return 1;
343 }
344
345 static int
json_parse(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)346 json_parse(const unsigned char **ucp, const unsigned char *ue,
347 size_t *st, size_t lvl)
348 {
349 const unsigned char *uc, *ouc;
350 int rv = 0;
351 int t;
352
353 ouc = uc = json_skip_space(*ucp, ue);
354 if (uc == ue)
355 goto out;
356
357 // Avoid recursion
358 if (lvl > 500) {
359 DPRINTF("Too many levels", uc, *ucp);
360 return 0;
361 }
362 #if JSON_COUNT
363 /* bail quickly if not counting */
364 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
365 return 1;
366 #endif
367
368 DPRINTF("Parse general: ", uc, *ucp);
369 switch (*uc++) {
370 case '"':
371 rv = json_parse_string(&uc, ue, lvl + 1);
372 t = JSON_STRING;
373 break;
374 case '[':
375 rv = json_parse_array(&uc, ue, st, lvl + 1);
376 t = JSON_ARRAY;
377 break;
378 case '{': /* '}' */
379 rv = json_parse_object(&uc, ue, st, lvl + 1);
380 t = JSON_OBJECT;
381 break;
382 case 't':
383 rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
384 t = JSON_CONSTANT;
385 break;
386 case 'f':
387 rv = json_parse_const(&uc, ue, "false", sizeof("false"),
388 lvl + 1);
389 t = JSON_CONSTANT;
390 break;
391 case 'n':
392 rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
393 t = JSON_CONSTANT;
394 break;
395 default:
396 --uc;
397 rv = json_parse_number(&uc, ue, lvl + 1);
398 t = JSON_NUMBER;
399 break;
400 }
401 if (rv)
402 st[t]++;
403 uc = json_skip_space(uc, ue);
404 out:
405 DPRINTF("End general: ", uc, *ucp);
406 *ucp = uc;
407 if (lvl == 0) {
408 if (!rv)
409 return 0;
410 if (uc == ue)
411 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
412 if (*ouc == *uc && json_parse(&uc, ue, st, 1))
413 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
414 else
415 return 0;
416 }
417 return rv;
418 }
419
420 #ifndef TEST
421 int
file_is_json(struct magic_set * ms,const struct buffer * b)422 file_is_json(struct magic_set *ms, const struct buffer *b)
423 {
424 const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
425 const unsigned char *ue = uc + b->flen;
426 size_t st[JSON_MAX];
427 int mime = ms->flags & MAGIC_MIME;
428 int jt;
429
430
431 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
432 return 0;
433
434 memset(st, 0, sizeof(st));
435
436 if ((jt = json_parse(&uc, ue, st, 0)) == 0)
437 return 0;
438
439 if (mime == MAGIC_MIME_ENCODING)
440 return 1;
441 if (mime) {
442 if (file_printf(ms, "application/%s",
443 jt == 1 ? "json" : "x-ndjson") == -1)
444 return -1;
445 return 1;
446 }
447 if (file_printf(ms, "%sJSON text data",
448 jt == 1 ? "" : "New Line Delimited ") == -1)
449 return -1;
450 #if JSON_COUNT
451 #define P(n) st[n], st[n] > 1 ? "s" : ""
452 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
453 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
454 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
455 "u >1array%s)",
456 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
457 P(JSON_NUMBER), P(JSON_ARRAYN))
458 == -1)
459 return -1;
460 #endif
461 return 1;
462 }
463
464 #else
465
466 #include <sys/types.h>
467 #include <sys/stat.h>
468 #include <stdio.h>
469 #include <fcntl.h>
470 #include <unistd.h>
471 #include <stdlib.h>
472 #include <stdint.h>
473 #include <err.h>
474
475 int
main(int argc,char * argv[])476 main(int argc, char *argv[])
477 {
478 int fd;
479 struct stat st;
480 unsigned char *p;
481 size_t stats[JSON_MAX];
482
483 if ((fd = open(argv[1], O_RDONLY)) == -1)
484 err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
485
486 if (fstat(fd, &st) == -1)
487 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
488
489 if ((p = CAST(char *, malloc(st.st_size))) == NULL)
490 err(EXIT_FAILURE, "Can't allocate %jd bytes",
491 (intmax_t)st.st_size);
492 if (read(fd, p, st.st_size) != st.st_size)
493 err(EXIT_FAILURE, "Can't read %jd bytes",
494 (intmax_t)st.st_size);
495 memset(stats, 0, sizeof(stats));
496 printf("is json %d\n", json_parse((const unsigned char **)&p,
497 p + st.st_size, stats, 0));
498 return 0;
499 }
500 #endif
501