xref: /PHP-7.0/ext/json/json_scanner.re (revision 478f119a)
1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2017 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Jakub Zelenka <bukka@php.net>                                |
16  +----------------------------------------------------------------------+
17*/
18
19#include "php.h"
20#include "php_json_scanner.h"
21#include "php_json_scanner_defs.h"
22#include "php_json_parser.h"
23#include "json_parser.tab.h"
24
25#define	YYCTYPE     php_json_ctype
26#define	YYCURSOR    s->cursor
27#define	YYLIMIT     s->limit
28#define	YYMARKER    s->marker
29#define	YYCTXMARKER s->ctxmarker
30
31#define YYGETCONDITION()        s->state
32#define YYSETCONDITION(yystate) s->state = yystate
33
34#define	YYFILL(n)
35
36#define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition)
37#define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition
38
39#define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0)
40#define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5)
41#define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11)
42
43#define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1)
44
45
46static void php_json_scanner_copy_string(php_json_scanner *s, int esc_size)
47{
48	size_t len = s->cursor - s->str_start - esc_size - 1;
49	if (len) {
50		memcpy(s->pstr, s->str_start, len);
51		s->pstr += len;
52	}
53}
54
55static int php_json_hex_to_int(char code)
56{
57	if (code >= '0' && code <= '9') {
58		return code - '0';
59	} else if (code >= 'A' && code <= 'F') {
60		return code - ('A' - 10);
61	} else if (code >= 'a' && code <= 'f') {
62		return code - ('a' - 10);
63	} else {
64		/* this should never happened (just to suppress compiler warning) */
65		return -1;
66	}
67}
68
69static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start)
70{
71	int i, code = 0;
72	php_json_ctype *pc = s->cursor - start;
73	for (i = 0; i < size; i++) {
74		code |= php_json_hex_to_int(*(pc--)) << (i * 4);
75	}
76	return code;
77}
78
79static int php_json_ucs2_to_int(php_json_scanner *s, int size)
80{
81	return php_json_ucs2_to_int_ex(s, size, 1);
82}
83
84void php_json_scanner_init(php_json_scanner *s, char *str, size_t str_len, int options)
85{
86	s->cursor = (php_json_ctype *) str;
87	s->limit = (php_json_ctype *) str + str_len;
88	s->options = options;
89	PHP_JSON_CONDITION_SET(JS);
90}
91
92int php_json_scan(php_json_scanner *s)
93{
94	ZVAL_NULL(&s->value);
95
96std:
97	s->token = s->cursor;
98
99/*!re2c
100	re2c:indent:top = 1;
101	re2c:yyfill:enable = 0;
102
103	DIGIT   = [0-9] ;
104	DIGITNZ = [1-9] ;
105	UINT    = "0" | ( DIGITNZ DIGIT* ) ;
106	INT     = "-"? UINT ;
107	HEX     = DIGIT | [a-fA-F] ;
108	HEXNZ   = DIGITNZ | [a-fA-F] ;
109	HEX7    = [0-7] ;
110	HEXC    = DIGIT | [a-cA-C] ;
111	FLOAT   = INT "." DIGIT+ ;
112	EXP     = ( INT | FLOAT ) [eE] [+-]? DIGIT+ ;
113	NL      = "\r"? "\n" ;
114	WS      = [ \t\r]+ ;
115	EOI     = "\000";
116	CTRL    = [\x00-\x1F] ;
117	UTF8T   = [\x80-\xBF] ;
118	UTF8_1  = [\x00-\x7F] ;
119	UTF8_2  = [\xC2-\xDF] UTF8T ;
120	UTF8_3A = "\xE0" [\xA0-\xBF] UTF8T ;
121	UTF8_3B = [\xE1-\xEC] UTF8T{2} ;
122	UTF8_3C = "\xED" [\x80-\x9F] UTF8T ;
123	UTF8_3D = [\xEE-\xEF] UTF8T{2} ;
124	UTF8_3  = UTF8_3A | UTF8_3B | UTF8_3C | UTF8_3D ;
125	UTF8_4A = "\xF0"[\x90-\xBF] UTF8T{2} ;
126	UTF8_4B = [\xF1-\xF3] UTF8T{3} ;
127	UTF8_4C = "\xF4" [\x80-\x8F] UTF8T{2} ;
128	UTF8_4  = UTF8_4A | UTF8_4B | UTF8_4C ;
129	UTF8    = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 ;
130	ANY     = [^] ;
131	ESCPREF = "\\" ;
132	ESCSYM  = ( "\"" | "\\" | "/" | [bfnrt] ) ;
133	ESC     = ESCPREF ESCSYM ;
134	UTFSYM  = "u" ;
135	UTFPREF = ESCPREF UTFSYM ;
136	UCS2    = UTFPREF HEX{4} ;
137	UTF16_1 = UTFPREF "00" HEX7 HEX ;
138	UTF16_2 = UTFPREF "0" HEX7 HEX{2} ;
139	UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ;
140	UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ;
141
142	<JS>"{"                  { return '{'; }
143	<JS>"}"                  { return '}'; }
144	<JS>"["                  { return '['; }
145	<JS>"]"                  { return ']'; }
146	<JS>":"                  { return ':'; }
147	<JS>","                  { return ','; }
148	<JS>"null"               {
149		ZVAL_NULL(&s->value);
150		return PHP_JSON_T_NUL;
151	}
152	<JS>"true"               {
153		ZVAL_TRUE(&s->value);
154		return PHP_JSON_T_TRUE;
155	}
156	<JS>"false"              {
157		ZVAL_FALSE(&s->value);
158		return PHP_JSON_T_FALSE;
159	}
160	<JS>INT                  {
161		zend_bool bigint = 0, negative = s->token[0] == '-';
162		size_t digits = (size_t) (s->cursor - s->token - negative);
163		if (digits >= PHP_JSON_INT_MAX_LENGTH) {
164			if (digits == PHP_JSON_INT_MAX_LENGTH) {
165				int cmp = strncmp((char *) (s->token + negative), LONG_MIN_DIGITS, PHP_JSON_INT_MAX_LENGTH);
166				if (!(cmp < 0 || (cmp == 0 && negative))) {
167					bigint = 1;
168				}
169			} else {
170				bigint = 1;
171			}
172		}
173		if (!bigint) {
174			ZVAL_LONG(&s->value, ZEND_STRTOL((char *) s->token, NULL, 10));
175			return PHP_JSON_T_INT;
176		} else if (s->options & PHP_JSON_BIGINT_AS_STRING) {
177			ZVAL_STRINGL(&s->value, (char *) s->token, s->cursor - s->token);
178			return PHP_JSON_T_STRING;
179		} else {
180			ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL));
181			return PHP_JSON_T_DOUBLE;
182		}
183	}
184	<JS>FLOAT|EXP            {
185		ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL));
186		return PHP_JSON_T_DOUBLE;
187	}
188	<JS>NL|WS                { goto std; }
189	<JS>EOI                  {
190		if (s->limit < s->cursor) {
191			return PHP_JSON_T_EOI;
192		} else {
193			s->errcode = PHP_JSON_ERROR_CTRL_CHAR;
194			return PHP_JSON_T_ERROR;
195		}
196	}
197	<JS>["]                  {
198		s->str_start = s->cursor;
199		s->str_esc = 0;
200		PHP_JSON_CONDITION_SET(STR_P1);
201		PHP_JSON_CONDITION_GOTO(STR_P1);
202	}
203	<JS>CTRL                 {
204		s->errcode = PHP_JSON_ERROR_CTRL_CHAR;
205		return PHP_JSON_T_ERROR;
206	}
207	<JS>UTF8                 {
208		s->errcode = PHP_JSON_ERROR_SYNTAX;
209		return PHP_JSON_T_ERROR;
210	}
211	<JS>ANY                  {
212		s->errcode = PHP_JSON_ERROR_UTF8;
213		return PHP_JSON_T_ERROR;
214	}
215
216	<STR_P1>CTRL             {
217		s->errcode = PHP_JSON_ERROR_CTRL_CHAR;
218		return PHP_JSON_T_ERROR;
219	}
220	<STR_P1>UTF16_1          {
221		s->str_esc += 5;
222		PHP_JSON_CONDITION_GOTO(STR_P1);
223	}
224	<STR_P1>UTF16_2          {
225		s->str_esc += 4;
226		PHP_JSON_CONDITION_GOTO(STR_P1);
227	}
228	<STR_P1>UTF16_3          {
229		s->str_esc += 3;
230		PHP_JSON_CONDITION_GOTO(STR_P1);
231	}
232	<STR_P1>UTF16_4          {
233		s->str_esc += 8;
234		PHP_JSON_CONDITION_GOTO(STR_P1);
235	}
236	<STR_P1>UCS2             {
237		s->errcode = PHP_JSON_ERROR_UTF16;
238		return PHP_JSON_T_ERROR;
239	}
240	<STR_P1>ESC              {
241		s->str_esc++;
242		PHP_JSON_CONDITION_GOTO(STR_P1);
243	}
244	<STR_P1>ESCPREF           {
245		s->errcode = PHP_JSON_ERROR_SYNTAX;
246		return PHP_JSON_T_ERROR;
247	}
248	<STR_P1>["]              {
249		zend_string *str;
250		size_t len = s->cursor - s->str_start - s->str_esc - 1;
251		if (len == 0) {
252			PHP_JSON_CONDITION_SET(JS);
253			ZVAL_EMPTY_STRING(&s->value);
254			return PHP_JSON_T_ESTRING;
255		}
256		str = zend_string_alloc(len, 0);
257		ZSTR_VAL(str)[len] = '\0';
258		ZVAL_STR(&s->value, str);
259		if (s->str_esc) {
260			s->pstr = (php_json_ctype *) Z_STRVAL(s->value);
261			s->cursor = s->str_start;
262			PHP_JSON_CONDITION_SET(STR_P2);
263			PHP_JSON_CONDITION_GOTO(STR_P2);
264		} else {
265			memcpy(Z_STRVAL(s->value), s->str_start, len);
266			PHP_JSON_CONDITION_SET(JS);
267			return PHP_JSON_T_STRING;
268		}
269	}
270	<STR_P1>UTF8             { PHP_JSON_CONDITION_GOTO(STR_P1); }
271	<STR_P1>ANY              {
272		s->errcode = PHP_JSON_ERROR_UTF8;
273		return PHP_JSON_T_ERROR;
274	}
275
276	<STR_P2>UTF16_1             {
277		int utf16 = php_json_ucs2_to_int(s, 2);
278		PHP_JSON_SCANNER_COPY_UTF();
279		*(s->pstr++) = (char) utf16;
280		s->str_start = s->cursor;
281		PHP_JSON_CONDITION_GOTO(STR_P2);
282	}
283	<STR_P2>UTF16_2             {
284		int utf16 = php_json_ucs2_to_int(s, 3);
285		PHP_JSON_SCANNER_COPY_UTF();
286		*(s->pstr++) = (char) (0xc0 | (utf16 >> 6));
287		*(s->pstr++) = (char) (0x80 | (utf16 & 0x3f));
288		s->str_start = s->cursor;
289		PHP_JSON_CONDITION_GOTO(STR_P2);
290	}
291	<STR_P2>UTF16_3             {
292		int utf16 = php_json_ucs2_to_int(s, 4);
293		PHP_JSON_SCANNER_COPY_UTF();
294		*(s->pstr++) = (char) (0xe0 | (utf16 >> 12));
295		*(s->pstr++) = (char) (0x80 | ((utf16 >> 6) & 0x3f));
296		*(s->pstr++) = (char) (0x80 | (utf16 & 0x3f));
297		s->str_start = s->cursor;
298		PHP_JSON_CONDITION_GOTO(STR_P2);
299	}
300	<STR_P2>UTF16_4             {
301		int utf32, utf16_hi, utf16_lo;
302		utf16_hi = php_json_ucs2_to_int(s, 4);
303		utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7);
304		utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000;
305		PHP_JSON_SCANNER_COPY_UTF_SP();
306		*(s->pstr++) = (char) (0xf0 | (utf32 >> 18));
307		*(s->pstr++) = (char) (0x80 | ((utf32 >> 12) & 0x3f));
308		*(s->pstr++) = (char) (0x80 | ((utf32 >> 6) & 0x3f));
309		*(s->pstr++) = (char) (0x80 | (utf32 & 0x3f));
310		s->str_start = s->cursor;
311		PHP_JSON_CONDITION_GOTO(STR_P2);
312	}
313	<STR_P2>ESCPREF          {
314		char esc;
315		PHP_JSON_SCANNER_COPY_ESC();
316		switch (*s->cursor) {
317			case 'b':
318				esc = '\b';
319				break;
320			case 'f':
321				esc = '\f';
322				break;
323			case 'n':
324				esc = '\n';
325				break;
326			case 'r':
327				esc = '\r';
328				break;
329			case 't':
330				esc = '\t';
331				break;
332			case '\\':
333			case '/':
334			case '"':
335				esc = *s->cursor;
336				break;
337			default:
338				s->errcode = PHP_JSON_ERROR_SYNTAX;
339				return PHP_JSON_T_ERROR;
340		}
341		*(s->pstr++) = esc;
342		++YYCURSOR;
343		s->str_start = s->cursor;
344		PHP_JSON_CONDITION_GOTO(STR_P2);
345	}
346	<STR_P2>["] => JS        {
347		PHP_JSON_SCANNER_COPY_ESC();
348		return PHP_JSON_T_STRING;
349	}
350	<STR_P2>ANY              { PHP_JSON_CONDITION_GOTO(STR_P2); }
351
352	<*>ANY                   {
353		s->errcode = PHP_JSON_ERROR_SYNTAX;
354		return PHP_JSON_T_ERROR;
355	}
356*/
357
358}
359
360