1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 7 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2017 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Jakub Zelenka <bukka@php.net> | 16 +----------------------------------------------------------------------+ 17*/ 18 19#include "php.h" 20#include "php_json_scanner.h" 21#include "php_json_scanner_defs.h" 22#include "php_json_parser.h" 23#include "json_parser.tab.h" 24 25#define YYCTYPE php_json_ctype 26#define YYCURSOR s->cursor 27#define YYLIMIT s->limit 28#define YYMARKER s->marker 29#define YYCTXMARKER s->ctxmarker 30 31#define YYGETCONDITION() s->state 32#define YYSETCONDITION(yystate) s->state = yystate 33 34#define YYFILL(n) 35 36#define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition) 37#define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition 38 39#define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0) 40#define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5) 41#define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11) 42 43#define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1) 44 45 46static void php_json_scanner_copy_string(php_json_scanner *s, int esc_size) 47{ 48 size_t len = s->cursor - s->str_start - esc_size - 1; 49 if (len) { 50 memcpy(s->pstr, s->str_start, len); 51 s->pstr += len; 52 } 53} 54 55static int php_json_hex_to_int(char code) 56{ 57 if (code >= '0' && code <= '9') { 58 return code - '0'; 59 } else if (code >= 'A' && code <= 'F') { 60 return code - ('A' - 10); 61 } else if (code >= 'a' && code <= 'f') { 62 return code - ('a' - 10); 63 } else { 64 /* this should never happened (just to suppress compiler warning) */ 65 return -1; 66 } 67} 68 69static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start) 70{ 71 int i, code = 0; 72 php_json_ctype *pc = s->cursor - start; 73 for (i = 0; i < size; i++) { 74 code |= php_json_hex_to_int(*(pc--)) << (i * 4); 75 } 76 return code; 77} 78 79static int php_json_ucs2_to_int(php_json_scanner *s, int size) 80{ 81 return php_json_ucs2_to_int_ex(s, size, 1); 82} 83 84void php_json_scanner_init(php_json_scanner *s, char *str, size_t str_len, int options) 85{ 86 s->cursor = (php_json_ctype *) str; 87 s->limit = (php_json_ctype *) str + str_len; 88 s->options = options; 89 PHP_JSON_CONDITION_SET(JS); 90} 91 92int php_json_scan(php_json_scanner *s) 93{ 94 ZVAL_NULL(&s->value); 95 96std: 97 s->token = s->cursor; 98 99/*!re2c 100 re2c:indent:top = 1; 101 re2c:yyfill:enable = 0; 102 103 DIGIT = [0-9] ; 104 DIGITNZ = [1-9] ; 105 UINT = "0" | ( DIGITNZ DIGIT* ) ; 106 INT = "-"? UINT ; 107 HEX = DIGIT | [a-fA-F] ; 108 HEXNZ = DIGITNZ | [a-fA-F] ; 109 HEX7 = [0-7] ; 110 HEXC = DIGIT | [a-cA-C] ; 111 FLOAT = INT "." DIGIT+ ; 112 EXP = ( INT | FLOAT ) [eE] [+-]? DIGIT+ ; 113 NL = "\r"? "\n" ; 114 WS = [ \t\r]+ ; 115 EOI = "\000"; 116 CTRL = [\x00-\x1F] ; 117 UTF8T = [\x80-\xBF] ; 118 UTF8_1 = [\x00-\x7F] ; 119 UTF8_2 = [\xC2-\xDF] UTF8T ; 120 UTF8_3A = "\xE0" [\xA0-\xBF] UTF8T ; 121 UTF8_3B = [\xE1-\xEC] UTF8T{2} ; 122 UTF8_3C = "\xED" [\x80-\x9F] UTF8T ; 123 UTF8_3D = [\xEE-\xEF] UTF8T{2} ; 124 UTF8_3 = UTF8_3A | UTF8_3B | UTF8_3C | UTF8_3D ; 125 UTF8_4A = "\xF0"[\x90-\xBF] UTF8T{2} ; 126 UTF8_4B = [\xF1-\xF3] UTF8T{3} ; 127 UTF8_4C = "\xF4" [\x80-\x8F] UTF8T{2} ; 128 UTF8_4 = UTF8_4A | UTF8_4B | UTF8_4C ; 129 UTF8 = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 ; 130 ANY = [^] ; 131 ESCPREF = "\\" ; 132 ESCSYM = ( "\"" | "\\" | "/" | [bfnrt] ) ; 133 ESC = ESCPREF ESCSYM ; 134 UTFSYM = "u" ; 135 UTFPREF = ESCPREF UTFSYM ; 136 UCS2 = UTFPREF HEX{4} ; 137 UTF16_1 = UTFPREF "00" HEX7 HEX ; 138 UTF16_2 = UTFPREF "0" HEX7 HEX{2} ; 139 UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ; 140 UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ; 141 142 <JS>"{" { return '{'; } 143 <JS>"}" { return '}'; } 144 <JS>"[" { return '['; } 145 <JS>"]" { return ']'; } 146 <JS>":" { return ':'; } 147 <JS>"," { return ','; } 148 <JS>"null" { 149 ZVAL_NULL(&s->value); 150 return PHP_JSON_T_NUL; 151 } 152 <JS>"true" { 153 ZVAL_TRUE(&s->value); 154 return PHP_JSON_T_TRUE; 155 } 156 <JS>"false" { 157 ZVAL_FALSE(&s->value); 158 return PHP_JSON_T_FALSE; 159 } 160 <JS>INT { 161 zend_bool bigint = 0, negative = s->token[0] == '-'; 162 size_t digits = (size_t) (s->cursor - s->token - negative); 163 if (digits >= PHP_JSON_INT_MAX_LENGTH) { 164 if (digits == PHP_JSON_INT_MAX_LENGTH) { 165 int cmp = strncmp((char *) (s->token + negative), LONG_MIN_DIGITS, PHP_JSON_INT_MAX_LENGTH); 166 if (!(cmp < 0 || (cmp == 0 && negative))) { 167 bigint = 1; 168 } 169 } else { 170 bigint = 1; 171 } 172 } 173 if (!bigint) { 174 ZVAL_LONG(&s->value, ZEND_STRTOL((char *) s->token, NULL, 10)); 175 return PHP_JSON_T_INT; 176 } else if (s->options & PHP_JSON_BIGINT_AS_STRING) { 177 ZVAL_STRINGL(&s->value, (char *) s->token, s->cursor - s->token); 178 return PHP_JSON_T_STRING; 179 } else { 180 ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); 181 return PHP_JSON_T_DOUBLE; 182 } 183 } 184 <JS>FLOAT|EXP { 185 ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); 186 return PHP_JSON_T_DOUBLE; 187 } 188 <JS>NL|WS { goto std; } 189 <JS>EOI { 190 if (s->limit < s->cursor) { 191 return PHP_JSON_T_EOI; 192 } else { 193 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 194 return PHP_JSON_T_ERROR; 195 } 196 } 197 <JS>["] { 198 s->str_start = s->cursor; 199 s->str_esc = 0; 200 PHP_JSON_CONDITION_SET(STR_P1); 201 PHP_JSON_CONDITION_GOTO(STR_P1); 202 } 203 <JS>CTRL { 204 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 205 return PHP_JSON_T_ERROR; 206 } 207 <JS>UTF8 { 208 s->errcode = PHP_JSON_ERROR_SYNTAX; 209 return PHP_JSON_T_ERROR; 210 } 211 <JS>ANY { 212 s->errcode = PHP_JSON_ERROR_UTF8; 213 return PHP_JSON_T_ERROR; 214 } 215 216 <STR_P1>CTRL { 217 s->errcode = PHP_JSON_ERROR_CTRL_CHAR; 218 return PHP_JSON_T_ERROR; 219 } 220 <STR_P1>UTF16_1 { 221 s->str_esc += 5; 222 PHP_JSON_CONDITION_GOTO(STR_P1); 223 } 224 <STR_P1>UTF16_2 { 225 s->str_esc += 4; 226 PHP_JSON_CONDITION_GOTO(STR_P1); 227 } 228 <STR_P1>UTF16_3 { 229 s->str_esc += 3; 230 PHP_JSON_CONDITION_GOTO(STR_P1); 231 } 232 <STR_P1>UTF16_4 { 233 s->str_esc += 8; 234 PHP_JSON_CONDITION_GOTO(STR_P1); 235 } 236 <STR_P1>UCS2 { 237 s->errcode = PHP_JSON_ERROR_UTF16; 238 return PHP_JSON_T_ERROR; 239 } 240 <STR_P1>ESC { 241 s->str_esc++; 242 PHP_JSON_CONDITION_GOTO(STR_P1); 243 } 244 <STR_P1>ESCPREF { 245 s->errcode = PHP_JSON_ERROR_SYNTAX; 246 return PHP_JSON_T_ERROR; 247 } 248 <STR_P1>["] { 249 zend_string *str; 250 size_t len = s->cursor - s->str_start - s->str_esc - 1; 251 if (len == 0) { 252 PHP_JSON_CONDITION_SET(JS); 253 ZVAL_EMPTY_STRING(&s->value); 254 return PHP_JSON_T_ESTRING; 255 } 256 str = zend_string_alloc(len, 0); 257 ZSTR_VAL(str)[len] = '\0'; 258 ZVAL_STR(&s->value, str); 259 if (s->str_esc) { 260 s->pstr = (php_json_ctype *) Z_STRVAL(s->value); 261 s->cursor = s->str_start; 262 PHP_JSON_CONDITION_SET(STR_P2); 263 PHP_JSON_CONDITION_GOTO(STR_P2); 264 } else { 265 memcpy(Z_STRVAL(s->value), s->str_start, len); 266 PHP_JSON_CONDITION_SET(JS); 267 return PHP_JSON_T_STRING; 268 } 269 } 270 <STR_P1>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P1); } 271 <STR_P1>ANY { 272 s->errcode = PHP_JSON_ERROR_UTF8; 273 return PHP_JSON_T_ERROR; 274 } 275 276 <STR_P2>UTF16_1 { 277 int utf16 = php_json_ucs2_to_int(s, 2); 278 PHP_JSON_SCANNER_COPY_UTF(); 279 *(s->pstr++) = (char) utf16; 280 s->str_start = s->cursor; 281 PHP_JSON_CONDITION_GOTO(STR_P2); 282 } 283 <STR_P2>UTF16_2 { 284 int utf16 = php_json_ucs2_to_int(s, 3); 285 PHP_JSON_SCANNER_COPY_UTF(); 286 *(s->pstr++) = (char) (0xc0 | (utf16 >> 6)); 287 *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); 288 s->str_start = s->cursor; 289 PHP_JSON_CONDITION_GOTO(STR_P2); 290 } 291 <STR_P2>UTF16_3 { 292 int utf16 = php_json_ucs2_to_int(s, 4); 293 PHP_JSON_SCANNER_COPY_UTF(); 294 *(s->pstr++) = (char) (0xe0 | (utf16 >> 12)); 295 *(s->pstr++) = (char) (0x80 | ((utf16 >> 6) & 0x3f)); 296 *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); 297 s->str_start = s->cursor; 298 PHP_JSON_CONDITION_GOTO(STR_P2); 299 } 300 <STR_P2>UTF16_4 { 301 int utf32, utf16_hi, utf16_lo; 302 utf16_hi = php_json_ucs2_to_int(s, 4); 303 utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7); 304 utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000; 305 PHP_JSON_SCANNER_COPY_UTF_SP(); 306 *(s->pstr++) = (char) (0xf0 | (utf32 >> 18)); 307 *(s->pstr++) = (char) (0x80 | ((utf32 >> 12) & 0x3f)); 308 *(s->pstr++) = (char) (0x80 | ((utf32 >> 6) & 0x3f)); 309 *(s->pstr++) = (char) (0x80 | (utf32 & 0x3f)); 310 s->str_start = s->cursor; 311 PHP_JSON_CONDITION_GOTO(STR_P2); 312 } 313 <STR_P2>ESCPREF { 314 char esc; 315 PHP_JSON_SCANNER_COPY_ESC(); 316 switch (*s->cursor) { 317 case 'b': 318 esc = '\b'; 319 break; 320 case 'f': 321 esc = '\f'; 322 break; 323 case 'n': 324 esc = '\n'; 325 break; 326 case 'r': 327 esc = '\r'; 328 break; 329 case 't': 330 esc = '\t'; 331 break; 332 case '\\': 333 case '/': 334 case '"': 335 esc = *s->cursor; 336 break; 337 default: 338 s->errcode = PHP_JSON_ERROR_SYNTAX; 339 return PHP_JSON_T_ERROR; 340 } 341 *(s->pstr++) = esc; 342 ++YYCURSOR; 343 s->str_start = s->cursor; 344 PHP_JSON_CONDITION_GOTO(STR_P2); 345 } 346 <STR_P2>["] => JS { 347 PHP_JSON_SCANNER_COPY_ESC(); 348 return PHP_JSON_T_STRING; 349 } 350 <STR_P2>ANY { PHP_JSON_CONDITION_GOTO(STR_P2); } 351 352 <*>ANY { 353 s->errcode = PHP_JSON_ERROR_SYNTAX; 354 return PHP_JSON_T_ERROR; 355 } 356*/ 357 358} 359 360