1 /*-
2 * Copyright (c) 2018 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /*
28 * Parse JSON object serialization format (RFC-7159)
29 */
30
31 #ifndef TEST
32 #include "file.h"
33
34 #ifndef lint
35 FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $")
36 #endif
37
38 #include <string.h>
39 #include "magic.h"
40 #endif
41
42 #ifdef DEBUG
43 #include <stdio.h>
44 #define DPRINTF(a, b, c) \
45 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
46 #else
47 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
48 #endif
49
50 #define JSON_ARRAY 0
51 #define JSON_CONSTANT 1
52 #define JSON_NUMBER 2
53 #define JSON_OBJECT 3
54 #define JSON_STRING 4
55 #define JSON_ARRAYN 5
56 #define JSON_MAX 6
57
58 /*
59 * if JSON_COUNT != 0:
60 * count all the objects, require that we have the whole data file
61 * otherwise:
62 * stop if we find an object or an array
63 */
64 #ifndef JSON_COUNT
65 #define JSON_COUNT 0
66 #endif
67
68 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
69 size_t);
70
71 static int
json_isspace(const unsigned char uc)72 json_isspace(const unsigned char uc)
73 {
74 switch (uc) {
75 case ' ':
76 case '\n':
77 case '\r':
78 case '\t':
79 return 1;
80 default:
81 return 0;
82 }
83 }
84
85 static int
json_isdigit(unsigned char uc)86 json_isdigit(unsigned char uc)
87 {
88 switch (uc) {
89 case '0': case '1': case '2': case '3': case '4':
90 case '5': case '6': case '7': case '8': case '9':
91 return 1;
92 default:
93 return 0;
94 }
95 }
96
97 static int
json_isxdigit(unsigned char uc)98 json_isxdigit(unsigned char uc)
99 {
100 if (json_isdigit(uc))
101 return 1;
102 switch (uc) {
103 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
105 return 1;
106 default:
107 return 0;
108 }
109 }
110
111 static const unsigned char *
json_skip_space(const unsigned char * uc,const unsigned char * ue)112 json_skip_space(const unsigned char *uc, const unsigned char *ue)
113 {
114 while (uc < ue && json_isspace(*uc))
115 uc++;
116 return uc;
117 }
118
119 static int
json_parse_string(const unsigned char ** ucp,const unsigned char * ue)120 json_parse_string(const unsigned char **ucp, const unsigned char *ue)
121 {
122 const unsigned char *uc = *ucp;
123 size_t i;
124
125 DPRINTF("Parse string: ", uc, *ucp);
126 while (uc < ue) {
127 switch (*uc++) {
128 case '\0':
129 goto out;
130 case '\\':
131 if (uc == ue)
132 goto out;
133 switch (*uc++) {
134 case '\0':
135 goto out;
136 case '"':
137 case '\\':
138 case '/':
139 case 'b':
140 case 'f':
141 case 'n':
142 case 'r':
143 case 't':
144 continue;
145 case 'u':
146 if (ue - uc < 4) {
147 uc = ue;
148 goto out;
149 }
150 for (i = 0; i < 4; i++)
151 if (!json_isxdigit(*uc++))
152 goto out;
153 continue;
154 default:
155 goto out;
156 }
157 case '"':
158 *ucp = uc;
159 DPRINTF("Good string: ", uc, *ucp);
160 return 1;
161 default:
162 continue;
163 }
164 }
165 out:
166 DPRINTF("Bad string: ", uc, *ucp);
167 *ucp = uc;
168 return 0;
169 }
170
171 static int
json_parse_array(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)172 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
173 size_t *st, size_t lvl)
174 {
175 const unsigned char *uc = *ucp;
176
177 DPRINTF("Parse array: ", uc, *ucp);
178 while (uc < ue) {
179 if (*uc == ']')
180 goto done;
181 if (!json_parse(&uc, ue, st, lvl + 1))
182 goto out;
183 if (uc == ue)
184 goto out;
185 switch (*uc) {
186 case ',':
187 uc++;
188 continue;
189 case ']':
190 done:
191 st[JSON_ARRAYN]++;
192 *ucp = uc + 1;
193 DPRINTF("Good array: ", uc, *ucp);
194 return 1;
195 default:
196 goto out;
197 }
198 }
199 out:
200 DPRINTF("Bad array: ", uc, *ucp);
201 *ucp = uc;
202 return 0;
203 }
204
205 static int
json_parse_object(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)206 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
207 size_t *st, size_t lvl)
208 {
209 const unsigned char *uc = *ucp;
210 DPRINTF("Parse object: ", uc, *ucp);
211 while (uc < ue) {
212 uc = json_skip_space(uc, ue);
213 if (uc == ue)
214 goto out;
215 if (*uc == '}') {
216 uc++;
217 goto done;
218 }
219 if (*uc++ != '"') {
220 DPRINTF("not string", uc, *ucp);
221 goto out;
222 }
223 DPRINTF("next field", uc, *ucp);
224 if (!json_parse_string(&uc, ue)) {
225 DPRINTF("not string", uc, *ucp);
226 goto out;
227 }
228 uc = json_skip_space(uc, ue);
229 if (uc == ue)
230 goto out;
231 if (*uc++ != ':') {
232 DPRINTF("not colon", uc, *ucp);
233 goto out;
234 }
235 if (!json_parse(&uc, ue, st, lvl + 1)) {
236 DPRINTF("not json", uc, *ucp);
237 goto out;
238 }
239 if (uc == ue)
240 goto out;
241 switch (*uc++) {
242 case ',':
243 continue;
244 case '}': /* { */
245 done:
246 *ucp = uc;
247 DPRINTF("Good object: ", uc, *ucp);
248 return 1;
249 default:
250 *ucp = uc - 1;
251 DPRINTF("not more", uc, *ucp);
252 goto out;
253 }
254 }
255 out:
256 DPRINTF("Bad object: ", uc, *ucp);
257 *ucp = uc;
258 return 0;
259 }
260
261 static int
json_parse_number(const unsigned char ** ucp,const unsigned char * ue)262 json_parse_number(const unsigned char **ucp, const unsigned char *ue)
263 {
264 const unsigned char *uc = *ucp;
265 int got = 0;
266
267 DPRINTF("Parse number: ", uc, *ucp);
268 if (uc == ue)
269 return 0;
270 if (*uc == '-')
271 uc++;
272
273 for (; uc < ue; uc++) {
274 if (!json_isdigit(*uc))
275 break;
276 got = 1;
277 }
278 if (uc == ue)
279 goto out;
280 if (*uc == '.')
281 uc++;
282 for (; uc < ue; uc++) {
283 if (!json_isdigit(*uc))
284 break;
285 got = 1;
286 }
287 if (uc == ue)
288 goto out;
289 if (got && (*uc == 'e' || *uc == 'E')) {
290 uc++;
291 got = 0;
292 if (uc == ue)
293 goto out;
294 if (*uc == '+' || *uc == '-')
295 uc++;
296 for (; uc < ue; uc++) {
297 if (!json_isdigit(*uc))
298 break;
299 got = 1;
300 }
301 }
302 out:
303 if (!got)
304 DPRINTF("Bad number: ", uc, *ucp);
305 else
306 DPRINTF("Good number: ", uc, *ucp);
307 *ucp = uc;
308 return got;
309 }
310
311 static int
json_parse_const(const unsigned char ** ucp,const unsigned char * ue,const char * str,size_t len)312 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
313 const char *str, size_t len)
314 {
315 const unsigned char *uc = *ucp;
316
317 DPRINTF("Parse const: ", uc, *ucp);
318 for (len--; uc < ue && --len;) {
319 if (*uc++ == *++str)
320 continue;
321 }
322 if (len)
323 DPRINTF("Bad const: ", uc, *ucp);
324 *ucp = uc;
325 return len == 0;
326 }
327
328 static int
json_parse(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)329 json_parse(const unsigned char **ucp, const unsigned char *ue,
330 size_t *st, size_t lvl)
331 {
332 const unsigned char *uc;
333 int rv = 0;
334 int t;
335
336 uc = json_skip_space(*ucp, ue);
337 if (uc == ue)
338 goto out;
339
340 // Avoid recursion
341 if (lvl > 20)
342 return 0;
343 #if JSON_COUNT
344 /* bail quickly if not counting */
345 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
346 return 1;
347 #endif
348
349 DPRINTF("Parse general: ", uc, *ucp);
350 switch (*uc++) {
351 case '"':
352 rv = json_parse_string(&uc, ue);
353 t = JSON_STRING;
354 break;
355 case '[':
356 rv = json_parse_array(&uc, ue, st, lvl + 1);
357 t = JSON_ARRAY;
358 break;
359 case '{': /* '}' */
360 rv = json_parse_object(&uc, ue, st, lvl + 1);
361 t = JSON_OBJECT;
362 break;
363 case 't':
364 rv = json_parse_const(&uc, ue, "true", sizeof("true"));
365 t = JSON_CONSTANT;
366 break;
367 case 'f':
368 rv = json_parse_const(&uc, ue, "false", sizeof("false"));
369 t = JSON_CONSTANT;
370 break;
371 case 'n':
372 rv = json_parse_const(&uc, ue, "null", sizeof("null"));
373 t = JSON_CONSTANT;
374 break;
375 default:
376 --uc;
377 rv = json_parse_number(&uc, ue);
378 t = JSON_NUMBER;
379 break;
380 }
381 if (rv)
382 st[t]++;
383 uc = json_skip_space(uc, ue);
384 out:
385 *ucp = uc;
386 DPRINTF("End general: ", uc, *ucp);
387 if (lvl == 0)
388 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
389 return rv;
390 }
391
392 #ifndef TEST
393 int
file_is_json(struct magic_set * ms,const struct buffer * b)394 file_is_json(struct magic_set *ms, const struct buffer *b)
395 {
396 const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
397 const unsigned char *ue = uc + b->flen;
398 size_t st[JSON_MAX];
399 int mime = ms->flags & MAGIC_MIME;
400
401
402 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
403 return 0;
404
405 memset(st, 0, sizeof(st));
406
407 if (!json_parse(&uc, ue, st, 0))
408 return 0;
409
410 if (mime == MAGIC_MIME_ENCODING)
411 return 1;
412 if (mime) {
413 if (file_printf(ms, "application/json") == -1)
414 return -1;
415 return 1;
416 }
417 if (file_printf(ms, "JSON data") == -1)
418 return -1;
419 #if JSON_COUNT
420 #define P(n) st[n], st[n] > 1 ? "s" : ""
421 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
422 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
423 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
424 "u >1array%s)",
425 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
426 P(JSON_NUMBER), P(JSON_ARRAYN))
427 == -1)
428 return -1;
429 #endif
430 return 1;
431 }
432
433 #else
434
435 #include <sys/types.h>
436 #include <sys/stat.h>
437 #include <stdio.h>
438 #include <fcntl.h>
439 #include <unistd.h>
440 #include <stdlib.h>
441 #include <stdint.h>
442 #include <err.h>
443
444 int
main(int argc,char * argv[])445 main(int argc, char *argv[])
446 {
447 int fd, rv;
448 struct stat st;
449 unsigned char *p;
450 size_t stats[JSON_MAX];
451
452 if ((fd = open(argv[1], O_RDONLY)) == -1)
453 err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
454
455 if (fstat(fd, &st) == -1)
456 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
457
458 if ((p = malloc(st.st_size)) == NULL)
459 err(EXIT_FAILURE, "Can't allocate %jd bytes",
460 (intmax_t)st.st_size);
461 if (read(fd, p, st.st_size) != st.st_size)
462 err(EXIT_FAILURE, "Can't read %jd bytes",
463 (intmax_t)st.st_size);
464 memset(stats, 0, sizeof(stats));
465 printf("is json %d\n", json_parse((const unsigned char **)&p,
466 p + st.st_size, stats, 0));
467 return 0;
468 }
469 #endif
470