1 /*-
2 * Copyright (c) 2018 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /*
28 * Parse JSON object serialization format (RFC-7159)
29 */
30
31 #ifndef TEST
32 #include "file.h"
33
34 #ifndef lint
35 FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
36 #endif
37
38 #include <string.h>
39 #include "magic.h"
40 #endif
41
42 #ifdef DEBUG
43 #include <stdio.h>
44 #define DPRINTF(a, b, c) \
45 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
46 #else
47 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
48 #endif
49
50 #define JSON_ARRAY 0
51 #define JSON_CONSTANT 1
52 #define JSON_NUMBER 2
53 #define JSON_OBJECT 3
54 #define JSON_STRING 4
55 #define JSON_ARRAYN 5
56 #define JSON_MAX 6
57
58 /*
59 * if JSON_COUNT != 0:
60 * count all the objects, require that we have the whole data file
61 * otherwise:
62 * stop if we find an object or an array
63 */
64 #ifndef JSON_COUNT
65 #define JSON_COUNT 0
66 #endif
67
68 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
69 size_t);
70
71 static int
json_isspace(const unsigned char uc)72 json_isspace(const unsigned char uc)
73 {
74 switch (uc) {
75 case ' ':
76 case '\n':
77 case '\r':
78 case '\t':
79 return 1;
80 default:
81 return 0;
82 }
83 }
84
85 static int
json_isdigit(unsigned char uc)86 json_isdigit(unsigned char uc)
87 {
88 switch (uc) {
89 case '0': case '1': case '2': case '3': case '4':
90 case '5': case '6': case '7': case '8': case '9':
91 return 1;
92 default:
93 return 0;
94 }
95 }
96
97 static int
json_isxdigit(unsigned char uc)98 json_isxdigit(unsigned char uc)
99 {
100 if (json_isdigit(uc))
101 return 1;
102 switch (uc) {
103 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
105 return 1;
106 default:
107 return 0;
108 }
109 }
110
111 static const unsigned char *
json_skip_space(const unsigned char * uc,const unsigned char * ue)112 json_skip_space(const unsigned char *uc, const unsigned char *ue)
113 {
114 while (uc < ue && json_isspace(*uc))
115 uc++;
116 return uc;
117 }
118
119 static int
json_parse_string(const unsigned char ** ucp,const unsigned char * ue)120 json_parse_string(const unsigned char **ucp, const unsigned char *ue)
121 {
122 const unsigned char *uc = *ucp;
123 size_t i;
124
125 DPRINTF("Parse string: ", uc, *ucp);
126 while (uc < ue) {
127 switch (*uc++) {
128 case '\0':
129 goto out;
130 case '\\':
131 if (uc == ue)
132 goto out;
133 switch (*uc++) {
134 case '\0':
135 goto out;
136 case '"':
137 case '\\':
138 case '/':
139 case 'b':
140 case 'f':
141 case 'n':
142 case 'r':
143 case 't':
144 continue;
145 case 'u':
146 if (ue - uc < 4) {
147 uc = ue;
148 goto out;
149 }
150 for (i = 0; i < 4; i++)
151 if (!json_isxdigit(*uc++))
152 goto out;
153 continue;
154 default:
155 goto out;
156 }
157 case '"':
158 *ucp = uc;
159 return 1;
160 default:
161 continue;
162 }
163 }
164 out:
165 DPRINTF("Bad string: ", uc, *ucp);
166 *ucp = uc;
167 return 0;
168 }
169
170 static int
json_parse_array(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)171 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
172 size_t *st, size_t lvl)
173 {
174 const unsigned char *uc = *ucp;
175 int more = 0; /* Array has more than 1 element */
176
177 DPRINTF("Parse array: ", uc, *ucp);
178 while (uc < ue) {
179 if (!json_parse(&uc, ue, st, lvl + 1))
180 goto out;
181 if (uc == ue)
182 goto out;
183 switch (*uc) {
184 case ',':
185 more++;
186 uc++;
187 continue;
188 case ']':
189 if (more)
190 st[JSON_ARRAYN]++;
191 *ucp = uc + 1;
192 return 1;
193 default:
194 goto out;
195 }
196 }
197 out:
198 DPRINTF("Bad array: ", uc, *ucp);
199 *ucp = uc;
200 return 0;
201 }
202
203 static int
json_parse_object(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)204 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
205 size_t *st, size_t lvl)
206 {
207 const unsigned char *uc = *ucp;
208 DPRINTF("Parse object: ", uc, *ucp);
209 while (uc < ue) {
210 uc = json_skip_space(uc, ue);
211 if (uc == ue)
212 goto out;
213 if (*uc++ != '"') {
214 DPRINTF("not string", uc, *ucp);
215 goto out;
216 }
217 DPRINTF("next field", uc, *ucp);
218 if (!json_parse_string(&uc, ue)) {
219 DPRINTF("not string", uc, *ucp);
220 goto out;
221 }
222 uc = json_skip_space(uc, ue);
223 if (uc == ue)
224 goto out;
225 if (*uc++ != ':') {
226 DPRINTF("not colon", uc, *ucp);
227 goto out;
228 }
229 if (!json_parse(&uc, ue, st, lvl + 1)) {
230 DPRINTF("not json", uc, *ucp);
231 goto out;
232 }
233 if (uc == ue)
234 goto out;
235 switch (*uc++) {
236 case ',':
237 continue;
238 case '}': /* { */
239 *ucp = uc;
240 DPRINTF("Good object: ", uc, *ucp);
241 return 1;
242 default:
243 *ucp = uc - 1;
244 DPRINTF("not more", uc, *ucp);
245 goto out;
246 }
247 }
248 out:
249 DPRINTF("Bad object: ", uc, *ucp);
250 *ucp = uc;
251 return 0;
252 }
253
254 static int
json_parse_number(const unsigned char ** ucp,const unsigned char * ue)255 json_parse_number(const unsigned char **ucp, const unsigned char *ue)
256 {
257 const unsigned char *uc = *ucp;
258 int got = 0;
259
260 DPRINTF("Parse number: ", uc, *ucp);
261 if (uc == ue)
262 return 0;
263 if (*uc == '-')
264 uc++;
265
266 for (; uc < ue; uc++) {
267 if (!json_isdigit(*uc))
268 break;
269 got = 1;
270 }
271 if (uc == ue)
272 goto out;
273 if (*uc == '.')
274 uc++;
275 for (; uc < ue; uc++) {
276 if (!json_isdigit(*uc))
277 break;
278 got = 1;
279 }
280 if (uc == ue)
281 goto out;
282 if (got && (*uc == 'e' || *uc == 'E')) {
283 uc++;
284 got = 0;
285 if (uc == ue)
286 goto out;
287 if (*uc == '+' || *uc == '-')
288 uc++;
289 for (; uc < ue; uc++) {
290 if (!json_isdigit(*uc))
291 break;
292 got = 1;
293 }
294 }
295 out:
296 if (!got)
297 DPRINTF("Bad number: ", uc, *ucp);
298 else
299 DPRINTF("Good number: ", uc, *ucp);
300 *ucp = uc;
301 return got;
302 }
303
304 static int
json_parse_const(const unsigned char ** ucp,const unsigned char * ue,const char * str,size_t len)305 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
306 const char *str, size_t len)
307 {
308 const unsigned char *uc = *ucp;
309
310 DPRINTF("Parse const: ", uc, *ucp);
311 for (len--; uc < ue && --len;) {
312 if (*uc++ == *++str)
313 continue;
314 }
315 if (len)
316 DPRINTF("Bad const: ", uc, *ucp);
317 *ucp = uc;
318 return len == 0;
319 }
320
321 static int
json_parse(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)322 json_parse(const unsigned char **ucp, const unsigned char *ue,
323 size_t *st, size_t lvl)
324 {
325 const unsigned char *uc;
326 int rv = 0;
327 int t;
328
329 uc = json_skip_space(*ucp, ue);
330 if (uc == ue)
331 goto out;
332
333 // Avoid recursion
334 if (lvl > 20)
335 return 0;
336 #if JSON_COUNT
337 /* bail quickly if not counting */
338 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
339 return 1;
340 #endif
341
342 DPRINTF("Parse general: ", uc, *ucp);
343 switch (*uc++) {
344 case '"':
345 rv = json_parse_string(&uc, ue);
346 t = JSON_STRING;
347 break;
348 case '[':
349 rv = json_parse_array(&uc, ue, st, lvl + 1);
350 t = JSON_ARRAY;
351 break;
352 case '{': /* '}' */
353 rv = json_parse_object(&uc, ue, st, lvl + 1);
354 t = JSON_OBJECT;
355 break;
356 case 't':
357 rv = json_parse_const(&uc, ue, "true", sizeof("true"));
358 t = JSON_CONSTANT;
359 break;
360 case 'f':
361 rv = json_parse_const(&uc, ue, "false", sizeof("false"));
362 t = JSON_CONSTANT;
363 break;
364 case 'n':
365 rv = json_parse_const(&uc, ue, "null", sizeof("null"));
366 t = JSON_CONSTANT;
367 break;
368 default:
369 --uc;
370 rv = json_parse_number(&uc, ue);
371 t = JSON_NUMBER;
372 break;
373 }
374 if (rv)
375 st[t]++;
376 uc = json_skip_space(uc, ue);
377 out:
378 *ucp = uc;
379 DPRINTF("End general: ", uc, *ucp);
380 if (lvl == 0)
381 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
382 return rv;
383 }
384
385 #ifndef TEST
386 int
file_is_json(struct magic_set * ms,const struct buffer * b)387 file_is_json(struct magic_set *ms, const struct buffer *b)
388 {
389 const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
390 const unsigned char *ue = uc + b->flen;
391 size_t st[JSON_MAX];
392 int mime = ms->flags & MAGIC_MIME;
393
394
395 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
396 return 0;
397
398 memset(st, 0, sizeof(st));
399
400 if (!json_parse(&uc, ue, st, 0))
401 return 0;
402
403 if (mime == MAGIC_MIME_ENCODING)
404 return 1;
405 if (mime) {
406 if (file_printf(ms, "application/json") == -1)
407 return -1;
408 return 1;
409 }
410 if (file_printf(ms, "JSON data") == -1)
411 return -1;
412 #if JSON_COUNT
413 #define P(n) st[n], st[n] > 1 ? "s" : ""
414 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
415 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
416 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
417 "u >1array%s)",
418 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
419 P(JSON_NUMBER), P(JSON_ARRAYN))
420 == -1)
421 return -1;
422 #endif
423 return 1;
424 }
425
426 #else
427
428 #include <sys/types.h>
429 #include <sys/stat.h>
430 #include <stdio.h>
431 #include <fcntl.h>
432 #include <unistd.h>
433 #include <stdlib.h>
434 #include <stdint.h>
435 #include <err.h>
436
437 int
main(int argc,char * argv[])438 main(int argc, char *argv[])
439 {
440 int fd, rv;
441 struct stat st;
442 unsigned char *p;
443 size_t stats[JSON_MAX];
444
445 if ((fd = open(argv[1], O_RDONLY)) == -1)
446 err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
447
448 if (fstat(fd, &st) == -1)
449 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
450
451 if ((p = malloc(st.st_size)) == NULL)
452 err(EXIT_FAILURE, "Can't allocate %jd bytes",
453 (intmax_t)st.st_size);
454 if (read(fd, p, st.st_size) != st.st_size)
455 err(EXIT_FAILURE, "Can't read %jd bytes",
456 (intmax_t)st.st_size);
457 memset(stats, 0, sizeof(stats));
458 printf("is json %d\n", json_parse((const unsigned char **)&p,
459 p + st.st_size, stats, 0));
460 return 0;
461 }
462 #endif
463