1/* 2 +----------------------------------------------------------------------+ 3 | Copyright (c) The PHP Group | 4 +----------------------------------------------------------------------+ 5 | This source file is subject to version 3.01 of the PHP license, | 6 | that is bundled with this package in the file LICENSE, and is | 7 | available through the world-wide-web at the following url: | 8 | https://www.php.net/license/3_01.txt | 9 | If you did not receive a copy of the PHP license and are unable to | 10 | obtain it through the world-wide-web, please send a note to | 11 | license@php.net so we can mail you a copy immediately. | 12 +----------------------------------------------------------------------+ 13 | Author: Jakub Zelenka <bukka@php.net> | 14 +----------------------------------------------------------------------+ 15*/ 16 17#include "php.h" 18#include "php_json_scanner.h" 19#include "php_json_scanner_defs.h" 20#include "php_json_parser.h" 21#include "json_parser.tab.h" 22 23#define YYCTYPE php_json_ctype 24#define YYCURSOR s->cursor 25#define YYLIMIT s->limit 26#define YYMARKER s->marker 27#define YYCTXMARKER s->ctxmarker 28 29#define YYGETCONDITION() s->state 30#define YYSETCONDITION(yystate) s->state = yystate 31 32#define YYFILL(n) 33 34#define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition) 35#define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition 36#define PHP_JSON_CONDITION_SET_AND_GOTO(condition) \ 37 PHP_JSON_CONDITION_SET(condition); \ 38 PHP_JSON_CONDITION_GOTO(condition) 39#define PHP_JSON_CONDITION_GOTO_STR_P2() \ 40 do { \ 41 if (s->utf8_invalid) { \ 42 PHP_JSON_CONDITION_GOTO(STR_P2_BIN); \ 43 } else { \ 44 PHP_JSON_CONDITION_GOTO(STR_P2_UTF); \ 45 } \ 46 } while(0) 47 48 49#define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0) 50#define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5) 51#define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11) 52 53#define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1) 54 55 56static void php_json_scanner_copy_string(php_json_scanner *s, size_t esc_size) 57{ 58 size_t len = (size_t)(s->cursor - s->str_start - esc_size - 1); 59 if (len) { 60 memcpy(s->pstr, s->str_start, len); 61 s->pstr += len; 62 } 63} 64 65static int php_json_hex_to_int(unsigned char code) 66{ 67 if (code >= '0' && code <= '9') { 68 return code - '0'; 69 } else if (code >= 'A' && code <= 'F') { 70 return code - ('A' - 10); 71 } else if (code >= 'a' && code <= 'f') { 72 return code - ('a' - 10); 73 } else { 74 /* this should never happened (just to suppress compiler warning) */ 75 return -1; 76 } 77} 78 79static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start) 80{ 81 int i, code = 0; 82 php_json_ctype *pc = s->cursor - start; 83 for (i = 0; i < size; i++) { 84 code |= php_json_hex_to_int(*(pc--)) << (i * 4); 85 } 86 return code; 87} 88 89static int php_json_ucs2_to_int(php_json_scanner *s, int size) 90{ 91 return php_json_ucs2_to_int_ex(s, size, 1); 92} 93 94void php_json_scanner_init(php_json_scanner *s, const char *str, size_t str_len, int options) 95{ 96 s->cursor = (php_json_ctype *) str; 97 s->limit = (php_json_ctype *) str + str_len; 98 s->options = options; 99 PHP_JSON_CONDITION_SET(JS); 100} 101 102int php_json_scan(php_json_scanner *s) 103{ 104 ZVAL_NULL(&s->value); 105 106std: 107 s->token = s->cursor; 108 109/*!re2c 110 re2c:indent:top = 1; 111 re2c:yyfill:enable = 0; 112 113 DIGIT = [0-9] ; 114 DIGITNZ = [1-9] ; 115 UINT = "0" | ( DIGITNZ DIGIT* ) ; 116 INT = "-"? UINT ; 117 HEX = DIGIT | [a-fA-F] ; 118 HEXNZ = DIGITNZ | [a-fA-F] ; 119 HEX7 = [0-7] ; 120 HEXC = DIGIT | [a-cA-C] ; 121 FLOAT = INT "." DIGIT+ ; 122 EXP = ( INT | FLOAT ) [eE] [+-]? DIGIT+ ; 123 NL = "\r"? "\n" ; 124 WS = [ \t\r]+ ; 125 EOI = "\000"; 126 CTRL = [\x00-\x1F] ; 127 UTF8T = [\x80-\xBF] ; 128 UTF8_1 = [\x00-\x7F] ; 129 UTF8_2 = [\xC2-\xDF] UTF8T ; 130 UTF8_3A = "\xE0" [\xA0-\xBF] UTF8T ; 131 UTF8_3B = [\xE1-\xEC] UTF8T{2} ; 132 UTF8_3C = "\xED" [\x80-\x9F] UTF8T ; 133 UTF8_3D = [\xEE-\xEF] UTF8T{2} ; 134 UTF8_3 = UTF8_3A | UTF8_3B | UTF8_3C | UTF8_3D ; 135 UTF8_4A = "\xF0"[\x90-\xBF] UTF8T{2} ; 136 UTF8_4B = [\xF1-\xF3] UTF8T{3} ; 137 UTF8_4C = "\xF4" [\x80-\x8F] UTF8T{2} ; 138 UTF8_4 = UTF8_4A | UTF8_4B | UTF8_4C ; 139 UTF8 = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 ; 140 ANY = [^] ; 141 ESCPREF = "\\" ; 142 ESCSYM = ( "\"" | "\\" | "/" | [bfnrt] ) ; 143 ESC = ESCPREF ESCSYM ; 144 UTFSYM = "u" ; 145 UTFPREF = ESCPREF UTFSYM ; 146 UCS2 = UTFPREF HEX{4} ; 147 UTF16_1 = UTFPREF "00" HEX7 HEX ; 148 UTF16_2 = UTFPREF "0" HEX7 HEX{2} ; 149 UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ; 150 UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ; 151 152 <JS>"{" { return '{'; } 153 <JS>"}" { return '}'; } 154 <JS>"[" { return '['; } 155 <JS>"]" { return ']'; } 156 <JS>":" { return ':'; } 157 <JS>"," { return ','; } 158 <JS>"null" { 159 ZVAL_NULL(&s->value); 160 return PHP_JSON_T_NUL; 161 } 162 <JS>"true" { 163 ZVAL_TRUE(&s->value); 164 return PHP_JSON_T_TRUE; 165 } 166 <JS>"false" { 167 ZVAL_FALSE(&s->value); 168 return PHP_JSON_T_FALSE; 169 } 170 <JS>INT { 171 bool bigint = 0, negative = s->token[0] == '-'; 172 size_t digits = (size_t) (s->cursor - s->token - negative); 173 if (digits >= PHP_JSON_INT_MAX_LENGTH) { 174 if (digits == PHP_JSON_INT_MAX_LENGTH) { 175 int cmp = strncmp((char *) (s->token + negative), LONG_MIN_DIGITS, PHP_JSON_INT_MAX_LENGTH); 176 if (!(cmp < 0 || (cmp == 0 && negative))) { 177 bigint = 1; 178 } 179 } else { 180 bigint = 1; 181 } 182 } 183 if (!bigint) { 184 ZVAL_LONG(&s->value, ZEND_STRTOL((char *) s->token, NULL, 10)); 185 return PHP_JSON_T_INT; 186 } else if (s->options & PHP_JSON_BIGINT_AS_STRING) { 187 ZVAL_STRINGL(&s->value, (char *) s->token, (size_t)(s->cursor - s->token)); 188 return PHP_JSON_T_STRING; 189 } else { 190 ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); 191 return PHP_JSON_T_DOUBLE; 192 } 193 } 194 <JS>FLOAT|EXP { 195 ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); 196 return PHP_JSON_T_DOUBLE; 197 } 198 <JS>NL|WS { goto std; } 199 <JS>EOI { 200 if (s->limit < s->cursor) { 201 return PHP_JSON_T_EOI; 202 } else { 203 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 204 return PHP_JSON_T_ERROR; 205 } 206 } 207 <JS>["] { 208 s->str_start = s->cursor; 209 s->str_esc = 0; 210 s->utf8_invalid = 0; 211 s->utf8_invalid_count = 0; 212 PHP_JSON_CONDITION_SET_AND_GOTO(STR_P1); 213 } 214 <JS>CTRL { 215 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 216 return PHP_JSON_T_ERROR; 217 } 218 <JS>UTF8 { 219 s->errcode = PHP_JSON_ERROR_SYNTAX; 220 return PHP_JSON_T_ERROR; 221 } 222 <JS>ANY { 223 s->errcode = PHP_JSON_ERROR_UTF8; 224 return PHP_JSON_T_ERROR; 225 } 226 227 <STR_P1>CTRL { 228 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 229 return PHP_JSON_T_ERROR; 230 } 231 <STR_P1>UTF16_1 { 232 s->str_esc += 5; 233 PHP_JSON_CONDITION_GOTO(STR_P1); 234 } 235 <STR_P1>UTF16_2 { 236 s->str_esc += 4; 237 PHP_JSON_CONDITION_GOTO(STR_P1); 238 } 239 <STR_P1>UTF16_3 { 240 s->str_esc += 3; 241 PHP_JSON_CONDITION_GOTO(STR_P1); 242 } 243 <STR_P1>UTF16_4 { 244 s->str_esc += 8; 245 PHP_JSON_CONDITION_GOTO(STR_P1); 246 } 247 <STR_P1>UCS2 { 248 s->errcode = PHP_JSON_ERROR_UTF16; 249 return PHP_JSON_T_ERROR; 250 } 251 <STR_P1>ESC { 252 s->str_esc++; 253 PHP_JSON_CONDITION_GOTO(STR_P1); 254 } 255 <STR_P1>ESCPREF { 256 s->errcode = PHP_JSON_ERROR_SYNTAX; 257 return PHP_JSON_T_ERROR; 258 } 259 <STR_P1>["] { 260 zend_string *str; 261 size_t len = (size_t)(s->cursor - s->str_start - s->str_esc - 1 + s->utf8_invalid_count); 262 if (len == 0) { 263 PHP_JSON_CONDITION_SET(JS); 264 ZVAL_EMPTY_STRING(&s->value); 265 return PHP_JSON_T_ESTRING; 266 } 267 str = zend_string_alloc(len, 0); 268 ZSTR_VAL(str)[len] = '\0'; 269 ZVAL_STR(&s->value, str); 270 if (s->str_esc || s->utf8_invalid) { 271 s->pstr = (php_json_ctype *) Z_STRVAL(s->value); 272 s->cursor = s->str_start; 273 PHP_JSON_CONDITION_GOTO_STR_P2(); 274 } else { 275 memcpy(Z_STRVAL(s->value), s->str_start, len); 276 PHP_JSON_CONDITION_SET(JS); 277 return PHP_JSON_T_STRING; 278 } 279 } 280 <STR_P1>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P1); } 281 <STR_P1>ANY { 282 if (s->options & (PHP_JSON_INVALID_UTF8_IGNORE | PHP_JSON_INVALID_UTF8_SUBSTITUTE)) { 283 if (s->options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { 284 if (s->utf8_invalid_count > INT_MAX - 2) { 285 s->errcode = PHP_JSON_ERROR_UTF8; 286 return PHP_JSON_T_ERROR; 287 } 288 s->utf8_invalid_count += 2; 289 } else { 290 s->utf8_invalid_count--; 291 } 292 s->utf8_invalid = 1; 293 PHP_JSON_CONDITION_GOTO(STR_P1); 294 } 295 s->errcode = PHP_JSON_ERROR_UTF8; 296 return PHP_JSON_T_ERROR; 297 } 298 299 <STR_P2_UTF,STR_P2_BIN>UTF16_1 { 300 int utf16 = php_json_ucs2_to_int(s, 2); 301 PHP_JSON_SCANNER_COPY_UTF(); 302 *(s->pstr++) = (unsigned char) utf16; 303 s->str_start = s->cursor; 304 PHP_JSON_CONDITION_GOTO_STR_P2(); 305 } 306 <STR_P2_UTF,STR_P2_BIN>UTF16_2 { 307 int utf16 = php_json_ucs2_to_int(s, 3); 308 PHP_JSON_SCANNER_COPY_UTF(); 309 *(s->pstr++) = (unsigned char) (0xc0 | (utf16 >> 6)); 310 *(s->pstr++) = (unsigned char) (0x80 | (utf16 & 0x3f)); 311 s->str_start = s->cursor; 312 PHP_JSON_CONDITION_GOTO_STR_P2(); 313 } 314 <STR_P2_UTF,STR_P2_BIN>UTF16_3 { 315 int utf16 = php_json_ucs2_to_int(s, 4); 316 PHP_JSON_SCANNER_COPY_UTF(); 317 *(s->pstr++) = (unsigned char) (0xe0 | (utf16 >> 12)); 318 *(s->pstr++) = (unsigned char) (0x80 | ((utf16 >> 6) & 0x3f)); 319 *(s->pstr++) = (unsigned char) (0x80 | (utf16 & 0x3f)); 320 s->str_start = s->cursor; 321 PHP_JSON_CONDITION_GOTO_STR_P2(); 322 } 323 <STR_P2_UTF,STR_P2_BIN>UTF16_4 { 324 int utf32, utf16_hi, utf16_lo; 325 utf16_hi = php_json_ucs2_to_int(s, 4); 326 utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7); 327 utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000; 328 PHP_JSON_SCANNER_COPY_UTF_SP(); 329 *(s->pstr++) = (unsigned char) (0xf0 | (utf32 >> 18)); 330 *(s->pstr++) = (unsigned char) (0x80 | ((utf32 >> 12) & 0x3f)); 331 *(s->pstr++) = (unsigned char) (0x80 | ((utf32 >> 6) & 0x3f)); 332 *(s->pstr++) = (unsigned char) (0x80 | (utf32 & 0x3f)); 333 s->str_start = s->cursor; 334 PHP_JSON_CONDITION_GOTO_STR_P2(); 335 } 336 <STR_P2_UTF,STR_P2_BIN>ESCPREF { 337 unsigned char esc; 338 PHP_JSON_SCANNER_COPY_ESC(); 339 switch (*s->cursor) { 340 case 'b': 341 esc = '\b'; 342 break; 343 case 'f': 344 esc = '\f'; break; 345 case 'n': 346 esc = '\n'; 347 break; 348 case 'r': 349 esc = '\r'; 350 break; 351 case 't': 352 esc = '\t'; 353 break; 354 case '\\': 355 case '/': 356 case '"': 357 esc = *s->cursor; 358 break; 359 default: 360 s->errcode = PHP_JSON_ERROR_SYNTAX; 361 return PHP_JSON_T_ERROR; 362 } 363 *(s->pstr++) = esc; 364 ++YYCURSOR; 365 s->str_start = s->cursor; 366 PHP_JSON_CONDITION_GOTO_STR_P2(); 367 } 368 <STR_P2_UTF,STR_P2_BIN>["] => JS { 369 PHP_JSON_SCANNER_COPY_ESC(); 370 return PHP_JSON_T_STRING; 371 } 372 <STR_P2_BIN>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P2_BIN); } 373 <STR_P2_BIN>ANY { 374 if (s->utf8_invalid) { 375 PHP_JSON_SCANNER_COPY_ESC(); 376 if (s->options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { 377 *(s->pstr++) = (unsigned char) (0xe0 | (0xfffd >> 12)); 378 *(s->pstr++) = (unsigned char) (0x80 | ((0xfffd >> 6) & 0x3f)); 379 *(s->pstr++) = (unsigned char) (0x80 | (0xfffd & 0x3f)); 380 } 381 s->str_start = s->cursor; 382 } 383 PHP_JSON_CONDITION_GOTO(STR_P2_BIN); 384 } 385 <STR_P2_UTF>ANY { PHP_JSON_CONDITION_GOTO(STR_P2_UTF); } 386 387 <*>ANY { 388 s->errcode = PHP_JSON_ERROR_SYNTAX; 389 return PHP_JSON_T_ERROR; 390 } 391*/ 392 393} 394