1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 7 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Jakub Zelenka <bukka@php.net> | 16 +----------------------------------------------------------------------+ 17*/ 18 19#include "php.h" 20#include "php_json_scanner.h" 21#include "php_json_scanner_defs.h" 22#include "php_json_parser.h" 23#include "json_parser.tab.h" 24 25#define YYCTYPE php_json_ctype 26#define YYCURSOR s->cursor 27#define YYLIMIT s->limit 28#define YYMARKER s->marker 29#define YYCTXMARKER s->ctxmarker 30 31#define YYGETCONDITION() s->state 32#define YYSETCONDITION(yystate) s->state = yystate 33 34#define YYFILL(n) 35 36#define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition) 37#define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition 38#define PHP_JSON_CONDITION_SET_AND_GOTO(condition) \ 39 PHP_JSON_CONDITION_SET(condition); \ 40 PHP_JSON_CONDITION_GOTO(condition) 41#define PHP_JSON_CONDITION_GOTO_STR_P2() \ 42 do { \ 43 if (s->utf8_invalid) { \ 44 PHP_JSON_CONDITION_GOTO(STR_P2_BIN); \ 45 } else { \ 46 PHP_JSON_CONDITION_GOTO(STR_P2_UTF); \ 47 } \ 48 } while(0) 49 50 51#define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0) 52#define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5) 53#define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11) 54 55#define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1) 56 57 58static void php_json_scanner_copy_string(php_json_scanner *s, int esc_size) 59{ 60 size_t len = s->cursor - s->str_start - esc_size - 1; 61 if (len) { 62 memcpy(s->pstr, s->str_start, len); 63 s->pstr += len; 64 } 65} 66 67static int php_json_hex_to_int(char code) 68{ 69 if (code >= '0' && code <= '9') { 70 return code - '0'; 71 } else if (code >= 'A' && code <= 'F') { 72 return code - ('A' - 10); 73 } else if (code >= 'a' && code <= 'f') { 74 return code - ('a' - 10); 75 } else { 76 /* this should never happened (just to suppress compiler warning) */ 77 return -1; 78 } 79} 80 81static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start) 82{ 83 int i, code = 0; 84 php_json_ctype *pc = s->cursor - start; 85 for (i = 0; i < size; i++) { 86 code |= php_json_hex_to_int(*(pc--)) << (i * 4); 87 } 88 return code; 89} 90 91static int php_json_ucs2_to_int(php_json_scanner *s, int size) 92{ 93 return php_json_ucs2_to_int_ex(s, size, 1); 94} 95 96void php_json_scanner_init(php_json_scanner *s, char *str, size_t str_len, int options) 97{ 98 s->cursor = (php_json_ctype *) str; 99 s->limit = (php_json_ctype *) str + str_len; 100 s->options = options; 101 PHP_JSON_CONDITION_SET(JS); 102} 103 104int php_json_scan(php_json_scanner *s) 105{ 106 ZVAL_NULL(&s->value); 107 108std: 109 s->token = s->cursor; 110 111/*!re2c 112 re2c:indent:top = 1; 113 re2c:yyfill:enable = 0; 114 115 DIGIT = [0-9] ; 116 DIGITNZ = [1-9] ; 117 UINT = "0" | ( DIGITNZ DIGIT* ) ; 118 INT = "-"? UINT ; 119 HEX = DIGIT | [a-fA-F] ; 120 HEXNZ = DIGITNZ | [a-fA-F] ; 121 HEX7 = [0-7] ; 122 HEXC = DIGIT | [a-cA-C] ; 123 FLOAT = INT "." DIGIT+ ; 124 EXP = ( INT | FLOAT ) [eE] [+-]? DIGIT+ ; 125 NL = "\r"? "\n" ; 126 WS = [ \t\r]+ ; 127 EOI = "\000"; 128 CTRL = [\x00-\x1F] ; 129 UTF8T = [\x80-\xBF] ; 130 UTF8_1 = [\x00-\x7F] ; 131 UTF8_2 = [\xC2-\xDF] UTF8T ; 132 UTF8_3A = "\xE0" [\xA0-\xBF] UTF8T ; 133 UTF8_3B = [\xE1-\xEC] UTF8T{2} ; 134 UTF8_3C = "\xED" [\x80-\x9F] UTF8T ; 135 UTF8_3D = [\xEE-\xEF] UTF8T{2} ; 136 UTF8_3 = UTF8_3A | UTF8_3B | UTF8_3C | UTF8_3D ; 137 UTF8_4A = "\xF0"[\x90-\xBF] UTF8T{2} ; 138 UTF8_4B = [\xF1-\xF3] UTF8T{3} ; 139 UTF8_4C = "\xF4" [\x80-\x8F] UTF8T{2} ; 140 UTF8_4 = UTF8_4A | UTF8_4B | UTF8_4C ; 141 UTF8 = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 ; 142 ANY = [^] ; 143 ESCPREF = "\\" ; 144 ESCSYM = ( "\"" | "\\" | "/" | [bfnrt] ) ; 145 ESC = ESCPREF ESCSYM ; 146 UTFSYM = "u" ; 147 UTFPREF = ESCPREF UTFSYM ; 148 UCS2 = UTFPREF HEX{4} ; 149 UTF16_1 = UTFPREF "00" HEX7 HEX ; 150 UTF16_2 = UTFPREF "0" HEX7 HEX{2} ; 151 UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ; 152 UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ; 153 154 <JS>"{" { return '{'; } 155 <JS>"}" { return '}'; } 156 <JS>"[" { return '['; } 157 <JS>"]" { return ']'; } 158 <JS>":" { return ':'; } 159 <JS>"," { return ','; } 160 <JS>"null" { 161 ZVAL_NULL(&s->value); 162 return PHP_JSON_T_NUL; 163 } 164 <JS>"true" { 165 ZVAL_TRUE(&s->value); 166 return PHP_JSON_T_TRUE; 167 } 168 <JS>"false" { 169 ZVAL_FALSE(&s->value); 170 return PHP_JSON_T_FALSE; 171 } 172 <JS>INT { 173 zend_bool bigint = 0, negative = s->token[0] == '-'; 174 size_t digits = (size_t) (s->cursor - s->token - negative); 175 if (digits >= PHP_JSON_INT_MAX_LENGTH) { 176 if (digits == PHP_JSON_INT_MAX_LENGTH) { 177 int cmp = strncmp((char *) (s->token + negative), LONG_MIN_DIGITS, PHP_JSON_INT_MAX_LENGTH); 178 if (!(cmp < 0 || (cmp == 0 && negative))) { 179 bigint = 1; 180 } 181 } else { 182 bigint = 1; 183 } 184 } 185 if (!bigint) { 186 ZVAL_LONG(&s->value, ZEND_STRTOL((char *) s->token, NULL, 10)); 187 return PHP_JSON_T_INT; 188 } else if (s->options & PHP_JSON_BIGINT_AS_STRING) { 189 ZVAL_STRINGL(&s->value, (char *) s->token, s->cursor - s->token); 190 return PHP_JSON_T_STRING; 191 } else { 192 ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); 193 return PHP_JSON_T_DOUBLE; 194 } 195 } 196 <JS>FLOAT|EXP { 197 ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); 198 return PHP_JSON_T_DOUBLE; 199 } 200 <JS>NL|WS { goto std; } 201 <JS>EOI { 202 if (s->limit < s->cursor) { 203 return PHP_JSON_T_EOI; 204 } else { 205 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 206 return PHP_JSON_T_ERROR; 207 } 208 } 209 <JS>["] { 210 s->str_start = s->cursor; 211 s->str_esc = 0; 212 s->utf8_invalid = 0; 213 s->utf8_invalid_count = 0; 214 PHP_JSON_CONDITION_SET_AND_GOTO(STR_P1); 215 } 216 <JS>CTRL { 217 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 218 return PHP_JSON_T_ERROR; 219 } 220 <JS>UTF8 { 221 s->errcode = PHP_JSON_ERROR_SYNTAX; 222 return PHP_JSON_T_ERROR; 223 } 224 <JS>ANY { 225 s->errcode = PHP_JSON_ERROR_UTF8; 226 return PHP_JSON_T_ERROR; 227 } 228 229 <STR_P1>CTRL { 230 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 231 return PHP_JSON_T_ERROR; 232 } 233 <STR_P1>UTF16_1 { 234 s->str_esc += 5; 235 PHP_JSON_CONDITION_GOTO(STR_P1); 236 } 237 <STR_P1>UTF16_2 { 238 s->str_esc += 4; 239 PHP_JSON_CONDITION_GOTO(STR_P1); 240 } 241 <STR_P1>UTF16_3 { 242 s->str_esc += 3; 243 PHP_JSON_CONDITION_GOTO(STR_P1); 244 } 245 <STR_P1>UTF16_4 { 246 s->str_esc += 8; 247 PHP_JSON_CONDITION_GOTO(STR_P1); 248 } 249 <STR_P1>UCS2 { 250 s->errcode = PHP_JSON_ERROR_UTF16; 251 return PHP_JSON_T_ERROR; 252 } 253 <STR_P1>ESC { 254 s->str_esc++; 255 PHP_JSON_CONDITION_GOTO(STR_P1); 256 } 257 <STR_P1>ESCPREF { 258 s->errcode = PHP_JSON_ERROR_SYNTAX; 259 return PHP_JSON_T_ERROR; 260 } 261 <STR_P1>["] { 262 zend_string *str; 263 size_t len = s->cursor - s->str_start - s->str_esc - 1 + s->utf8_invalid_count; 264 if (len == 0) { 265 PHP_JSON_CONDITION_SET(JS); 266 ZVAL_EMPTY_STRING(&s->value); 267 return PHP_JSON_T_ESTRING; 268 } 269 str = zend_string_alloc(len, 0); 270 ZSTR_VAL(str)[len] = '\0'; 271 ZVAL_STR(&s->value, str); 272 if (s->str_esc || s->utf8_invalid) { 273 s->pstr = (php_json_ctype *) Z_STRVAL(s->value); 274 s->cursor = s->str_start; 275 PHP_JSON_CONDITION_GOTO_STR_P2(); 276 } else { 277 memcpy(Z_STRVAL(s->value), s->str_start, len); 278 PHP_JSON_CONDITION_SET(JS); 279 return PHP_JSON_T_STRING; 280 } 281 } 282 <STR_P1>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P1); } 283 <STR_P1>ANY { 284 if (s->options & (PHP_JSON_INVALID_UTF8_IGNORE | PHP_JSON_INVALID_UTF8_SUBSTITUTE)) { 285 if (s->options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { 286 if (s->utf8_invalid_count > INT_MAX - 2) { 287 s->errcode = PHP_JSON_ERROR_UTF8; 288 return PHP_JSON_T_ERROR; 289 } 290 s->utf8_invalid_count += 2; 291 } else { 292 s->utf8_invalid_count--; 293 } 294 s->utf8_invalid = 1; 295 PHP_JSON_CONDITION_GOTO(STR_P1); 296 } 297 s->errcode = PHP_JSON_ERROR_UTF8; 298 return PHP_JSON_T_ERROR; 299 } 300 301 <STR_P2_UTF,STR_P2_BIN>UTF16_1 { 302 int utf16 = php_json_ucs2_to_int(s, 2); 303 PHP_JSON_SCANNER_COPY_UTF(); 304 *(s->pstr++) = (char) utf16; 305 s->str_start = s->cursor; 306 PHP_JSON_CONDITION_GOTO_STR_P2(); 307 } 308 <STR_P2_UTF,STR_P2_BIN>UTF16_2 { 309 int utf16 = php_json_ucs2_to_int(s, 3); 310 PHP_JSON_SCANNER_COPY_UTF(); 311 *(s->pstr++) = (char) (0xc0 | (utf16 >> 6)); 312 *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); 313 s->str_start = s->cursor; 314 PHP_JSON_CONDITION_GOTO_STR_P2(); 315 } 316 <STR_P2_UTF,STR_P2_BIN>UTF16_3 { 317 int utf16 = php_json_ucs2_to_int(s, 4); 318 PHP_JSON_SCANNER_COPY_UTF(); 319 *(s->pstr++) = (char) (0xe0 | (utf16 >> 12)); 320 *(s->pstr++) = (char) (0x80 | ((utf16 >> 6) & 0x3f)); 321 *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); 322 s->str_start = s->cursor; 323 PHP_JSON_CONDITION_GOTO_STR_P2(); 324 } 325 <STR_P2_UTF,STR_P2_BIN>UTF16_4 { 326 int utf32, utf16_hi, utf16_lo; 327 utf16_hi = php_json_ucs2_to_int(s, 4); 328 utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7); 329 utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000; 330 PHP_JSON_SCANNER_COPY_UTF_SP(); 331 *(s->pstr++) = (char) (0xf0 | (utf32 >> 18)); 332 *(s->pstr++) = (char) (0x80 | ((utf32 >> 12) & 0x3f)); 333 *(s->pstr++) = (char) (0x80 | ((utf32 >> 6) & 0x3f)); 334 *(s->pstr++) = (char) (0x80 | (utf32 & 0x3f)); 335 s->str_start = s->cursor; 336 PHP_JSON_CONDITION_GOTO_STR_P2(); 337 } 338 <STR_P2_UTF,STR_P2_BIN>ESCPREF { 339 char esc; 340 PHP_JSON_SCANNER_COPY_ESC(); 341 switch (*s->cursor) { 342 case 'b': 343 esc = '\b'; 344 break; 345 case 'f': 346 esc = '\f'; break; 347 case 'n': 348 esc = '\n'; 349 break; 350 case 'r': 351 esc = '\r'; 352 break; 353 case 't': 354 esc = '\t'; 355 break; 356 case '\\': 357 case '/': 358 case '"': 359 esc = *s->cursor; 360 break; 361 default: 362 s->errcode = PHP_JSON_ERROR_SYNTAX; 363 return PHP_JSON_T_ERROR; 364 } 365 *(s->pstr++) = esc; 366 ++YYCURSOR; 367 s->str_start = s->cursor; 368 PHP_JSON_CONDITION_GOTO_STR_P2(); 369 } 370 <STR_P2_UTF,STR_P2_BIN>["] => JS { 371 PHP_JSON_SCANNER_COPY_ESC(); 372 return PHP_JSON_T_STRING; 373 } 374 <STR_P2_BIN>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P2_BIN); } 375 <STR_P2_BIN>ANY { 376 if (s->utf8_invalid) { 377 PHP_JSON_SCANNER_COPY_ESC(); 378 if (s->options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { 379 *(s->pstr++) = (char) (0xe0 | (0xfffd >> 12)); 380 *(s->pstr++) = (char) (0x80 | ((0xfffd >> 6) & 0x3f)); 381 *(s->pstr++) = (char) (0x80 | (0xfffd & 0x3f)); 382 } 383 s->str_start = s->cursor; 384 } 385 PHP_JSON_CONDITION_GOTO(STR_P2_BIN); 386 } 387 <STR_P2_UTF>ANY { PHP_JSON_CONDITION_GOTO(STR_P2_UTF); } 388 389 <*>ANY { 390 s->errcode = PHP_JSON_ERROR_SYNTAX; 391 return PHP_JSON_T_ERROR; 392 } 393*/ 394 395} 396