1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Marcus Boerger <helly@php.net> |
16 | Nuno Lopes <nlopess@php.net> |
17 | Scott MacVicar <scottmac@php.net> |
18 | Flex version authors: |
19 | Andi Gutmans <andi@php.net> |
20 | Zeev Suraski <zeev@php.net> |
21 +----------------------------------------------------------------------+
22 */
23
24 #if 0
25 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
26 #else
27 # define YYDEBUG(s, c)
28 #endif
29
30 #include "zend_language_scanner_defs.h"
31
32 #include <errno.h>
33 #include "zend.h"
34 #ifdef ZEND_WIN32
35 # include <Winuser.h>
36 #endif
37 #include "zend_alloc.h"
38 #include <zend_language_parser.h>
39 #include "zend_compile.h"
40 #include "zend_language_scanner.h"
41 #include "zend_highlight.h"
42 #include "zend_constants.h"
43 #include "zend_variables.h"
44 #include "zend_operators.h"
45 #include "zend_API.h"
46 #include "zend_strtod.h"
47 #include "zend_exceptions.h"
48 #include "zend_virtual_cwd.h"
49
50 #define YYCTYPE unsigned char
51 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
52 #define YYCURSOR SCNG(yy_cursor)
53 #define YYLIMIT SCNG(yy_limit)
54 #define YYMARKER SCNG(yy_marker)
55
56 #define YYGETCONDITION() SCNG(yy_state)
57 #define YYSETCONDITION(s) SCNG(yy_state) = s
58
59 #define STATE(name) yyc##name
60
61 /* emulate flex constructs */
62 #define BEGIN(state) YYSETCONDITION(STATE(state))
63 #define YYSTATE YYGETCONDITION()
64 #define yytext ((char*)SCNG(yy_text))
65 #define yyleng SCNG(yy_leng)
66 #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
67 yyleng = (unsigned int)x; } while(0)
68 #define yymore() goto yymore_restart
69
70 /* perform sanity check. If this message is triggered you should
71 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
72 /*!max:re2c */
73 #if ZEND_MMAP_AHEAD < YYMAXFILL
74 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
75 #endif
76
77 #include <stdarg.h>
78
79 #ifdef HAVE_UNISTD_H
80 # include <unistd.h>
81 #endif
82
83 /* Globals Macros */
84 #define SCNG LANG_SCNG
85 #ifdef ZTS
86 ZEND_API ts_rsrc_id language_scanner_globals_id;
87 ZEND_API size_t language_scanner_globals_offset;
88 #else
89 ZEND_API zend_php_scanner_globals language_scanner_globals;
90 #endif
91
92 #define HANDLE_NEWLINES(s, l) \
93 do { \
94 char *p = (s), *boundary = p+(l); \
95 \
96 while (p<boundary) { \
97 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
98 CG(zend_lineno)++; \
99 } \
100 p++; \
101 } \
102 } while (0)
103
104 #define HANDLE_NEWLINE(c) \
105 { \
106 if (c == '\n' || c == '\r') { \
107 CG(zend_lineno)++; \
108 } \
109 }
110
111 /* To save initial string length after scanning to first variable */
112 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
113 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
114
115 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
116 #define IS_LABEL_SUCCESSOR(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || ((c) >= '0' && (c) <= '9') || (c) == '_' || (c) >= 0x80)
117
118 #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
119 #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
120
BEGIN_EXTERN_C()121 BEGIN_EXTERN_C()
122
123 static void strip_underscores(char *str, size_t *len)
124 {
125 char *src = str, *dest = str;
126 while (*src != '\0') {
127 if (*src != '_') {
128 *dest = *src;
129 dest++;
130 } else {
131 --(*len);
132 }
133 src++;
134 }
135 *dest = '\0';
136 }
137
encoding_filter_script_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)138 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139 {
140 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
141 ZEND_ASSERT(internal_encoding);
142 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
143 }
144
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)145 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
146 {
147 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
148 }
149
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)150 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
151 {
152 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
153 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
154 }
155
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)156 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
157 {
158 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
159 ZEND_ASSERT(internal_encoding);
160 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
161 internal_encoding, zend_multibyte_encoding_utf8);
162 }
163
164
_yy_push_state(int new_state)165 static void _yy_push_state(int new_state)
166 {
167 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
168 YYSETCONDITION(new_state);
169 }
170
171 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
172
yy_pop_state(void)173 static void yy_pop_state(void)
174 {
175 int *stack_state = zend_stack_top(&SCNG(state_stack));
176 YYSETCONDITION(*stack_state);
177 zend_stack_del_top(&SCNG(state_stack));
178 }
179
yy_scan_buffer(char * str,size_t len)180 static void yy_scan_buffer(char *str, size_t len)
181 {
182 YYCURSOR = (YYCTYPE*)str;
183 YYLIMIT = YYCURSOR + len;
184 if (!SCNG(yy_start)) {
185 SCNG(yy_start) = YYCURSOR;
186 }
187 }
188
startup_scanner(void)189 void startup_scanner(void)
190 {
191 CG(parse_error) = 0;
192 CG(doc_comment) = NULL;
193 CG(extra_fn_flags) = 0;
194 zend_stack_init(&SCNG(state_stack), sizeof(int));
195 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
196 SCNG(heredoc_scan_ahead) = 0;
197 }
198
heredoc_label_dtor(zend_heredoc_label * heredoc_label)199 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
200 efree(heredoc_label->label);
201 }
202
shutdown_scanner(void)203 void shutdown_scanner(void)
204 {
205 CG(parse_error) = 0;
206 RESET_DOC_COMMENT();
207 zend_stack_destroy(&SCNG(state_stack));
208 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
209 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
210 SCNG(heredoc_scan_ahead) = 0;
211 SCNG(on_event) = NULL;
212 }
213
zend_save_lexical_state(zend_lex_state * lex_state)214 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
215 {
216 lex_state->yy_leng = SCNG(yy_leng);
217 lex_state->yy_start = SCNG(yy_start);
218 lex_state->yy_text = SCNG(yy_text);
219 lex_state->yy_cursor = SCNG(yy_cursor);
220 lex_state->yy_marker = SCNG(yy_marker);
221 lex_state->yy_limit = SCNG(yy_limit);
222
223 lex_state->state_stack = SCNG(state_stack);
224 zend_stack_init(&SCNG(state_stack), sizeof(int));
225
226 lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
227 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
228
229 lex_state->in = SCNG(yy_in);
230 lex_state->yy_state = YYSTATE;
231 lex_state->filename = zend_get_compiled_filename();
232 lex_state->lineno = CG(zend_lineno);
233
234 lex_state->script_org = SCNG(script_org);
235 lex_state->script_org_size = SCNG(script_org_size);
236 lex_state->script_filtered = SCNG(script_filtered);
237 lex_state->script_filtered_size = SCNG(script_filtered_size);
238 lex_state->input_filter = SCNG(input_filter);
239 lex_state->output_filter = SCNG(output_filter);
240 lex_state->script_encoding = SCNG(script_encoding);
241
242 lex_state->on_event = SCNG(on_event);
243 lex_state->on_event_context = SCNG(on_event_context);
244
245 lex_state->ast = CG(ast);
246 lex_state->ast_arena = CG(ast_arena);
247 }
248
zend_restore_lexical_state(zend_lex_state * lex_state)249 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
250 {
251 SCNG(yy_leng) = lex_state->yy_leng;
252 SCNG(yy_start) = lex_state->yy_start;
253 SCNG(yy_text) = lex_state->yy_text;
254 SCNG(yy_cursor) = lex_state->yy_cursor;
255 SCNG(yy_marker) = lex_state->yy_marker;
256 SCNG(yy_limit) = lex_state->yy_limit;
257
258 zend_stack_destroy(&SCNG(state_stack));
259 SCNG(state_stack) = lex_state->state_stack;
260
261 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
262 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
263 SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
264
265 SCNG(yy_in) = lex_state->in;
266 YYSETCONDITION(lex_state->yy_state);
267 CG(zend_lineno) = lex_state->lineno;
268 zend_restore_compiled_filename(lex_state->filename);
269
270 if (SCNG(script_filtered)) {
271 efree(SCNG(script_filtered));
272 SCNG(script_filtered) = NULL;
273 }
274 SCNG(script_org) = lex_state->script_org;
275 SCNG(script_org_size) = lex_state->script_org_size;
276 SCNG(script_filtered) = lex_state->script_filtered;
277 SCNG(script_filtered_size) = lex_state->script_filtered_size;
278 SCNG(input_filter) = lex_state->input_filter;
279 SCNG(output_filter) = lex_state->output_filter;
280 SCNG(script_encoding) = lex_state->script_encoding;
281
282 SCNG(on_event) = lex_state->on_event;
283 SCNG(on_event_context) = lex_state->on_event_context;
284
285 CG(ast) = lex_state->ast;
286 CG(ast_arena) = lex_state->ast_arena;
287
288 RESET_DOC_COMMENT();
289 }
290
zend_destroy_file_handle(zend_file_handle * file_handle)291 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
292 {
293 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
294 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
295 file_handle->opened_path = NULL;
296 if (file_handle->free_filename) {
297 file_handle->filename = NULL;
298 }
299 }
300
zend_lex_tstring(zval * zv)301 ZEND_API void zend_lex_tstring(zval *zv)
302 {
303 if (SCNG(on_event)) {
304 SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
305 }
306
307 ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
308 }
309
310 #define BOM_UTF32_BE "\x00\x00\xfe\xff"
311 #define BOM_UTF32_LE "\xff\xfe\x00\x00"
312 #define BOM_UTF16_BE "\xfe\xff"
313 #define BOM_UTF16_LE "\xff\xfe"
314 #define BOM_UTF8 "\xef\xbb\xbf"
315
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)316 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
317 {
318 const unsigned char *p;
319 int wchar_size = 2;
320 int le = 0;
321
322 /* utf-16 or utf-32? */
323 p = script;
324 assert(p >= script);
325 while ((size_t)(p-script) < script_size) {
326 p = memchr(p, 0, script_size-(p-script)-2);
327 if (!p) {
328 break;
329 }
330 if (*(p+1) == '\0' && *(p+2) == '\0') {
331 wchar_size = 4;
332 break;
333 }
334
335 /* searching for UTF-32 specific byte orders, so this will do */
336 p += 4;
337 }
338
339 /* BE or LE? */
340 p = script;
341 assert(p >= script);
342 while ((size_t)(p-script) < script_size) {
343 if (*p == '\0' && *(p+wchar_size-1) != '\0') {
344 /* BE */
345 le = 0;
346 break;
347 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
348 /* LE* */
349 le = 1;
350 break;
351 }
352 p += wchar_size;
353 }
354
355 if (wchar_size == 2) {
356 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
357 } else {
358 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
359 }
360
361 return NULL;
362 }
363
zend_multibyte_detect_unicode(void)364 static const zend_encoding* zend_multibyte_detect_unicode(void)
365 {
366 const zend_encoding *script_encoding = NULL;
367 int bom_size;
368 unsigned char *pos1, *pos2;
369
370 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
371 return NULL;
372 }
373
374 /* check out BOM */
375 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
376 script_encoding = zend_multibyte_encoding_utf32be;
377 bom_size = sizeof(BOM_UTF32_BE)-1;
378 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
379 script_encoding = zend_multibyte_encoding_utf32le;
380 bom_size = sizeof(BOM_UTF32_LE)-1;
381 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
382 script_encoding = zend_multibyte_encoding_utf16be;
383 bom_size = sizeof(BOM_UTF16_BE)-1;
384 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
385 script_encoding = zend_multibyte_encoding_utf16le;
386 bom_size = sizeof(BOM_UTF16_LE)-1;
387 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
388 script_encoding = zend_multibyte_encoding_utf8;
389 bom_size = sizeof(BOM_UTF8)-1;
390 }
391
392 if (script_encoding) {
393 /* remove BOM */
394 LANG_SCNG(script_org) += bom_size;
395 LANG_SCNG(script_org_size) -= bom_size;
396
397 return script_encoding;
398 }
399
400 /* script contains NULL bytes -> auto-detection */
401 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
402 /* check if the NULL byte is after the __HALT_COMPILER(); */
403 pos2 = LANG_SCNG(script_org);
404
405 while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
406 pos2 = memchr(pos2, '_', pos1 - pos2);
407 if (!pos2) break;
408 pos2++;
409 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
410 pos2 += sizeof("_HALT_COMPILER")-1;
411 while (*pos2 == ' ' ||
412 *pos2 == '\t' ||
413 *pos2 == '\r' ||
414 *pos2 == '\n') {
415 pos2++;
416 }
417 if (*pos2 == '(') {
418 pos2++;
419 while (*pos2 == ' ' ||
420 *pos2 == '\t' ||
421 *pos2 == '\r' ||
422 *pos2 == '\n') {
423 pos2++;
424 }
425 if (*pos2 == ')') {
426 pos2++;
427 while (*pos2 == ' ' ||
428 *pos2 == '\t' ||
429 *pos2 == '\r' ||
430 *pos2 == '\n') {
431 pos2++;
432 }
433 if (*pos2 == ';') {
434 return NULL;
435 }
436 }
437 }
438 }
439 }
440 /* make best effort if BOM is missing */
441 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
442 }
443
444 return NULL;
445 }
446
zend_multibyte_find_script_encoding(void)447 static const zend_encoding* zend_multibyte_find_script_encoding(void)
448 {
449 const zend_encoding *script_encoding;
450
451 if (CG(detect_unicode)) {
452 /* check out bom(byte order mark) and see if containing wchars */
453 script_encoding = zend_multibyte_detect_unicode();
454 if (script_encoding != NULL) {
455 /* bom or wchar detection is prior to 'script_encoding' option */
456 return script_encoding;
457 }
458 }
459
460 /* if no script_encoding specified, just leave alone */
461 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
462 return NULL;
463 }
464
465 /* if multiple encodings specified, detect automagically */
466 if (CG(script_encoding_list_size) > 1) {
467 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
468 }
469
470 return CG(script_encoding_list)[0];
471 }
472
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)473 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
474 {
475 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
476 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
477
478 if (!script_encoding) {
479 return FAILURE;
480 }
481
482 /* judge input/output filter */
483 LANG_SCNG(script_encoding) = script_encoding;
484 LANG_SCNG(input_filter) = NULL;
485 LANG_SCNG(output_filter) = NULL;
486
487 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
488 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
489 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
490 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
491 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
492 } else {
493 LANG_SCNG(input_filter) = NULL;
494 LANG_SCNG(output_filter) = NULL;
495 }
496 return SUCCESS;
497 }
498
499 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
500 LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
501 LANG_SCNG(output_filter) = NULL;
502 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
503 LANG_SCNG(input_filter) = NULL;
504 LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
505 } else {
506 /* both script and internal encodings are incompatible w/ flex */
507 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
508 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
509 }
510
511 return 0;
512 }
513
open_file_for_scanning(zend_file_handle * file_handle)514 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
515 {
516 char *buf;
517 size_t size;
518 zend_string *compiled_filename;
519
520 if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
521 /* Still add it to open_files to make destroy_file_handle work */
522 zend_llist_add_element(&CG(open_files), file_handle);
523 return FAILURE;
524 }
525
526 ZEND_ASSERT(!EG(exception) && "stream_fixup() should have failed");
527 zend_llist_add_element(&CG(open_files), file_handle);
528 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
529 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
530 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
531 fh->handle.stream.handle = (void*)(((char*)fh) + diff);
532 file_handle->handle.stream.handle = fh->handle.stream.handle;
533 }
534
535 /* Reset the scanner for scanning the new file */
536 SCNG(yy_in) = file_handle;
537 SCNG(yy_start) = NULL;
538
539 if (size != (size_t)-1) {
540 if (CG(multibyte)) {
541 SCNG(script_org) = (unsigned char*)buf;
542 SCNG(script_org_size) = size;
543 SCNG(script_filtered) = NULL;
544
545 zend_multibyte_set_filter(NULL);
546
547 if (SCNG(input_filter)) {
548 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
549 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
550 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
551 }
552 buf = (char*)SCNG(script_filtered);
553 size = SCNG(script_filtered_size);
554 }
555 }
556 SCNG(yy_start) = (unsigned char *)buf;
557 yy_scan_buffer(buf, size);
558 } else {
559 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
560 }
561
562 if (CG(skip_shebang)) {
563 CG(skip_shebang) = 0;
564 BEGIN(SHEBANG);
565 } else {
566 BEGIN(INITIAL);
567 }
568
569 if (file_handle->opened_path) {
570 compiled_filename = zend_string_copy(file_handle->opened_path);
571 } else {
572 compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
573 }
574
575 zend_set_compiled_filename(compiled_filename);
576 zend_string_release_ex(compiled_filename, 0);
577
578 RESET_DOC_COMMENT();
579 CG(zend_lineno) = 1;
580 CG(increment_lineno) = 0;
581 return SUCCESS;
582 }
END_EXTERN_C()583 END_EXTERN_C()
584
585 static zend_op_array *zend_compile(int type)
586 {
587 zend_op_array *op_array = NULL;
588 zend_bool original_in_compilation = CG(in_compilation);
589
590 CG(in_compilation) = 1;
591 CG(ast) = NULL;
592 CG(ast_arena) = zend_arena_create(1024 * 32);
593
594 if (!zendparse()) {
595 int last_lineno = CG(zend_lineno);
596 zend_file_context original_file_context;
597 zend_oparray_context original_oparray_context;
598 zend_op_array *original_active_op_array = CG(active_op_array);
599
600 op_array = emalloc(sizeof(zend_op_array));
601 init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
602 CG(active_op_array) = op_array;
603
604 /* Use heap to not waste arena memory */
605 op_array->fn_flags |= ZEND_ACC_HEAP_RT_CACHE;
606
607 if (zend_ast_process) {
608 zend_ast_process(CG(ast));
609 }
610
611 zend_file_context_begin(&original_file_context);
612 zend_oparray_context_begin(&original_oparray_context);
613 zend_compile_top_stmt(CG(ast));
614 CG(zend_lineno) = last_lineno;
615 zend_emit_final_return(type == ZEND_USER_FUNCTION);
616 op_array->line_start = 1;
617 op_array->line_end = last_lineno;
618 pass_two(op_array);
619 zend_oparray_context_end(&original_oparray_context);
620 zend_file_context_end(&original_file_context);
621
622 CG(active_op_array) = original_active_op_array;
623 }
624
625 zend_ast_destroy(CG(ast));
626 zend_arena_destroy(CG(ast_arena));
627
628 CG(in_compilation) = original_in_compilation;
629
630 return op_array;
631 }
632
compile_file(zend_file_handle * file_handle,int type)633 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
634 {
635 zend_lex_state original_lex_state;
636 zend_op_array *op_array = NULL;
637 zend_save_lexical_state(&original_lex_state);
638
639 if (open_file_for_scanning(file_handle)==FAILURE) {
640 if (!EG(exception)) {
641 if (type==ZEND_REQUIRE) {
642 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
643 zend_bailout();
644 } else {
645 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
646 }
647 }
648 } else {
649 op_array = zend_compile(ZEND_USER_FUNCTION);
650 }
651
652 zend_restore_lexical_state(&original_lex_state);
653 return op_array;
654 }
655
656
compile_filename(int type,zval * filename)657 zend_op_array *compile_filename(int type, zval *filename)
658 {
659 zend_file_handle file_handle;
660 zval tmp;
661 zend_op_array *retval;
662 zend_string *opened_path = NULL;
663
664 if (Z_TYPE_P(filename) != IS_STRING) {
665 ZVAL_STR(&tmp, zval_get_string(filename));
666 filename = &tmp;
667 }
668 zend_stream_init_filename(&file_handle, Z_STRVAL_P(filename));
669
670 retval = zend_compile_file(&file_handle, type);
671 if (retval && file_handle.handle.stream.handle) {
672 if (!file_handle.opened_path) {
673 file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
674 }
675
676 zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
677
678 if (opened_path) {
679 zend_string_release_ex(opened_path, 0);
680 }
681 }
682 zend_destroy_file_handle(&file_handle);
683
684 if (UNEXPECTED(filename == &tmp)) {
685 zval_ptr_dtor(&tmp);
686 }
687 return retval;
688 }
689
zend_prepare_string_for_scanning(zval * str,char * filename)690 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
691 {
692 char *buf;
693 size_t size, old_len;
694 zend_string *new_compiled_filename;
695
696 /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
697 old_len = Z_STRLEN_P(str);
698 Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
699 Z_TYPE_INFO_P(str) = IS_STRING_EX;
700 memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
701
702 SCNG(yy_in) = NULL;
703 SCNG(yy_start) = NULL;
704
705 buf = Z_STRVAL_P(str);
706 size = old_len;
707
708 if (CG(multibyte)) {
709 SCNG(script_org) = (unsigned char*)buf;
710 SCNG(script_org_size) = size;
711 SCNG(script_filtered) = NULL;
712
713 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
714
715 if (SCNG(input_filter)) {
716 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
717 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
718 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
719 }
720 buf = (char*)SCNG(script_filtered);
721 size = SCNG(script_filtered_size);
722 }
723 }
724
725 yy_scan_buffer(buf, size);
726
727 new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
728 zend_set_compiled_filename(new_compiled_filename);
729 zend_string_release_ex(new_compiled_filename, 0);
730 CG(zend_lineno) = 1;
731 CG(increment_lineno) = 0;
732 RESET_DOC_COMMENT();
733 return SUCCESS;
734 }
735
736
zend_get_scanned_file_offset(void)737 ZEND_API size_t zend_get_scanned_file_offset(void)
738 {
739 size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
740 if (SCNG(input_filter)) {
741 size_t original_offset = offset, length = 0;
742 do {
743 unsigned char *p = NULL;
744 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
745 return (size_t)-1;
746 }
747 efree(p);
748 if (length > original_offset) {
749 offset--;
750 } else if (length < original_offset) {
751 offset++;
752 }
753 } while (original_offset != length);
754 }
755 return offset;
756 }
757
compile_string(zval * source_string,char * filename)758 zend_op_array *compile_string(zval *source_string, char *filename)
759 {
760 zend_lex_state original_lex_state;
761 zend_op_array *op_array = NULL;
762 zval tmp;
763
764 if (UNEXPECTED(Z_TYPE_P(source_string) != IS_STRING)) {
765 ZVAL_STR(&tmp, zval_get_string_func(source_string));
766 } else {
767 ZVAL_COPY(&tmp, source_string);
768 }
769
770 if (Z_STRLEN(tmp)==0) {
771 zval_ptr_dtor(&tmp);
772 return NULL;
773 }
774
775 zend_save_lexical_state(&original_lex_state);
776 if (zend_prepare_string_for_scanning(&tmp, filename) == SUCCESS) {
777 BEGIN(ST_IN_SCRIPTING);
778 op_array = zend_compile(ZEND_EVAL_CODE);
779 }
780
781 zend_restore_lexical_state(&original_lex_state);
782 zval_ptr_dtor(&tmp);
783
784 return op_array;
785 }
786
787
BEGIN_EXTERN_C()788 BEGIN_EXTERN_C()
789 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
790 {
791 zend_lex_state original_lex_state;
792 zend_file_handle file_handle;
793
794 zend_stream_init_filename(&file_handle, filename);
795 zend_save_lexical_state(&original_lex_state);
796 if (open_file_for_scanning(&file_handle)==FAILURE) {
797 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
798 zend_restore_lexical_state(&original_lex_state);
799 return FAILURE;
800 }
801 zend_highlight(syntax_highlighter_ini);
802 if (SCNG(script_filtered)) {
803 efree(SCNG(script_filtered));
804 SCNG(script_filtered) = NULL;
805 }
806 zend_destroy_file_handle(&file_handle);
807 zend_restore_lexical_state(&original_lex_state);
808 return SUCCESS;
809 }
810
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)811 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
812 {
813 zend_lex_state original_lex_state;
814 zval tmp;
815
816 if (UNEXPECTED(Z_TYPE_P(str) != IS_STRING)) {
817 ZVAL_STR(&tmp, zval_get_string_func(str));
818 str = &tmp;
819 }
820 zend_save_lexical_state(&original_lex_state);
821 if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
822 zend_restore_lexical_state(&original_lex_state);
823 if (UNEXPECTED(str == &tmp)) {
824 zval_ptr_dtor(&tmp);
825 }
826 return FAILURE;
827 }
828 BEGIN(INITIAL);
829 zend_highlight(syntax_highlighter_ini);
830 if (SCNG(script_filtered)) {
831 efree(SCNG(script_filtered));
832 SCNG(script_filtered) = NULL;
833 }
834 zend_restore_lexical_state(&original_lex_state);
835 if (UNEXPECTED(str == &tmp)) {
836 zval_ptr_dtor(&tmp);
837 }
838 return SUCCESS;
839 }
840
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)841 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
842 {
843 size_t length;
844 unsigned char *new_yy_start;
845
846 /* convert and set */
847 if (!SCNG(input_filter)) {
848 if (SCNG(script_filtered)) {
849 efree(SCNG(script_filtered));
850 SCNG(script_filtered) = NULL;
851 }
852 SCNG(script_filtered_size) = 0;
853 length = SCNG(script_org_size);
854 new_yy_start = SCNG(script_org);
855 } else {
856 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
857 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
858 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
859 }
860 if (SCNG(script_filtered)) {
861 efree(SCNG(script_filtered));
862 }
863 SCNG(script_filtered) = new_yy_start;
864 SCNG(script_filtered_size) = length;
865 }
866
867 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
868 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
869 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
870 SCNG(yy_limit) = new_yy_start + length;
871
872 SCNG(yy_start) = new_yy_start;
873 }
874
875
876 // TODO: avoid reallocation ???
877 # define zend_copy_value(zendlval, yytext, yyleng) \
878 if (SCNG(output_filter)) { \
879 size_t sz = 0; \
880 char *s = NULL; \
881 SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
882 ZVAL_STRINGL(zendlval, s, sz); \
883 efree(s); \
884 } else if (yyleng == 1) { \
885 ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \
886 } else { \
887 ZVAL_STRINGL(zendlval, yytext, yyleng); \
888 }
889
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)890 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
891 {
892 register char *s, *t;
893 char *end;
894
895 if (len <= 1) {
896 if (len < 1) {
897 ZVAL_EMPTY_STRING(zendlval);
898 } else {
899 zend_uchar c = (zend_uchar)*str;
900 if (c == '\n' || c == '\r') {
901 CG(zend_lineno)++;
902 }
903 ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
904 }
905 goto skip_escape_conversion;
906 }
907
908 ZVAL_STRINGL(zendlval, str, len);
909
910 /* convert escape sequences */
911 s = Z_STRVAL_P(zendlval);
912 end = s+Z_STRLEN_P(zendlval);
913 while (1) {
914 if (UNEXPECTED(*s=='\\')) {
915 break;
916 }
917 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
918 CG(zend_lineno)++;
919 }
920 s++;
921 if (s == end) {
922 goto skip_escape_conversion;
923 }
924 }
925
926 t = s;
927 while (s<end) {
928 if (*s=='\\') {
929 s++;
930 if (s >= end) {
931 *t++ = '\\';
932 break;
933 }
934
935 switch(*s) {
936 case 'n':
937 *t++ = '\n';
938 break;
939 case 'r':
940 *t++ = '\r';
941 break;
942 case 't':
943 *t++ = '\t';
944 break;
945 case 'f':
946 *t++ = '\f';
947 break;
948 case 'v':
949 *t++ = '\v';
950 break;
951 case 'e':
952 #ifdef ZEND_WIN32
953 *t++ = VK_ESCAPE;
954 #else
955 *t++ = '\e';
956 #endif
957 break;
958 case '"':
959 case '`':
960 if (*s != quote_type) {
961 *t++ = '\\';
962 *t++ = *s;
963 break;
964 }
965 case '\\':
966 case '$':
967 *t++ = *s;
968 break;
969 case 'x':
970 case 'X':
971 if (ZEND_IS_HEX(*(s+1))) {
972 char hex_buf[3] = { 0, 0, 0 };
973
974 hex_buf[0] = *(++s);
975 if (ZEND_IS_HEX(*(s+1))) {
976 hex_buf[1] = *(++s);
977 }
978 *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
979 } else {
980 *t++ = '\\';
981 *t++ = *s;
982 }
983 break;
984 /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
985 case 'u':
986 {
987 /* cache where we started so we can parse after validating */
988 char *start = s + 1;
989 size_t len = 0;
990 zend_bool valid = 1;
991 unsigned long codepoint;
992
993 if (*start != '{') {
994 /* we silently let this pass to avoid breaking code
995 * with JSON in string literals (e.g. "\"\u202e\""
996 */
997 *t++ = '\\';
998 *t++ = 'u';
999 break;
1000 } else {
1001 /* on the other hand, invalid \u{blah} errors */
1002 s++;
1003 len++;
1004 s++;
1005 while (*s != '}') {
1006 if (!ZEND_IS_HEX(*s)) {
1007 valid = 0;
1008 break;
1009 } else {
1010 len++;
1011 }
1012 s++;
1013 }
1014 if (*s == '}') {
1015 valid = 1;
1016 len++;
1017 }
1018 }
1019
1020 /* \u{} is invalid */
1021 if (len <= 2) {
1022 valid = 0;
1023 }
1024
1025 if (!valid) {
1026 zend_throw_exception(zend_ce_parse_error,
1027 "Invalid UTF-8 codepoint escape sequence", 0);
1028 zval_ptr_dtor(zendlval);
1029 ZVAL_UNDEF(zendlval);
1030 return FAILURE;
1031 }
1032
1033 errno = 0;
1034 codepoint = strtoul(start + 1, NULL, 16);
1035
1036 /* per RFC 3629, UTF-8 can only represent 21 bits */
1037 if (codepoint > 0x10FFFF || errno) {
1038 zend_throw_exception(zend_ce_parse_error,
1039 "Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1040 zval_ptr_dtor(zendlval);
1041 ZVAL_UNDEF(zendlval);
1042 return FAILURE;
1043 }
1044
1045 /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1046 if (codepoint < 0x80) {
1047 *t++ = codepoint;
1048 } else if (codepoint <= 0x7FF) {
1049 *t++ = (codepoint >> 6) + 0xC0;
1050 *t++ = (codepoint & 0x3F) + 0x80;
1051 } else if (codepoint <= 0xFFFF) {
1052 *t++ = (codepoint >> 12) + 0xE0;
1053 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1054 *t++ = (codepoint & 0x3F) + 0x80;
1055 } else if (codepoint <= 0x10FFFF) {
1056 *t++ = (codepoint >> 18) + 0xF0;
1057 *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1058 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1059 *t++ = (codepoint & 0x3F) + 0x80;
1060 }
1061 }
1062 break;
1063 default:
1064 /* check for an octal */
1065 if (ZEND_IS_OCT(*s)) {
1066 char octal_buf[4] = { 0, 0, 0, 0 };
1067
1068 octal_buf[0] = *s;
1069 if (ZEND_IS_OCT(*(s+1))) {
1070 octal_buf[1] = *(++s);
1071 if (ZEND_IS_OCT(*(s+1))) {
1072 octal_buf[2] = *(++s);
1073 }
1074 }
1075 if (octal_buf[2] && (octal_buf[0] > '3') && !SCNG(heredoc_scan_ahead)) {
1076 /* 3 octit values must not overflow 0xFF (\377) */
1077 zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
1078 }
1079
1080 *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1081 } else {
1082 *t++ = '\\';
1083 *t++ = *s;
1084 }
1085 break;
1086 }
1087 } else {
1088 *t++ = *s;
1089 }
1090
1091 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1092 CG(zend_lineno)++;
1093 }
1094 s++;
1095 }
1096 *t = 0;
1097 Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
1098
1099 skip_escape_conversion:
1100 if (SCNG(output_filter)) {
1101 size_t sz = 0;
1102 unsigned char *str;
1103 // TODO: avoid realocation ???
1104 s = Z_STRVAL_P(zendlval);
1105 SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1106 zval_ptr_dtor(zendlval);
1107 ZVAL_STRINGL(zendlval, (char *) str, sz);
1108 efree(str);
1109 }
1110 return SUCCESS;
1111 }
1112
1113 #define HEREDOC_USING_SPACES 1
1114 #define HEREDOC_USING_TABS 2
1115
next_newline(const char * str,const char * end,size_t * newline_len)1116 static const char *next_newline(const char *str, const char *end, size_t *newline_len) {
1117 for (; str < end; str++) {
1118 if (*str == '\r') {
1119 *newline_len = str + 1 < end && *(str + 1) == '\n' ? 2 : 1;
1120 return str;
1121 } else if (*str == '\n') {
1122 *newline_len = 1;
1123 return str;
1124 }
1125 }
1126 *newline_len = 0;
1127 return NULL;
1128 }
1129
strip_multiline_string_indentation(zval * zendlval,int indentation,zend_bool using_spaces,zend_bool newline_at_start,zend_bool newline_at_end)1130 static zend_bool strip_multiline_string_indentation(
1131 zval *zendlval, int indentation, zend_bool using_spaces,
1132 zend_bool newline_at_start, zend_bool newline_at_end)
1133 {
1134 const char *str = Z_STRVAL_P(zendlval), *end = str + Z_STRLEN_P(zendlval);
1135 char *copy = Z_STRVAL_P(zendlval);
1136
1137 int newline_count = 0;
1138 size_t newline_len;
1139 const char *nl;
1140
1141 if (!newline_at_start) {
1142 nl = next_newline(str, end, &newline_len);
1143 if (!nl) {
1144 return 1;
1145 }
1146
1147 str = nl + newline_len;
1148 copy = (char *) nl + newline_len;
1149 newline_count++;
1150 } else {
1151 nl = str;
1152 }
1153
1154 /* <= intentional */
1155 while (str <= end && nl) {
1156 size_t skip;
1157 nl = next_newline(str, end, &newline_len);
1158 if (!nl && newline_at_end) {
1159 nl = end;
1160 }
1161
1162 /* Try to skip indentation */
1163 for (skip = 0; skip < indentation; skip++, str++) {
1164 if (str == nl) {
1165 /* Don't require full indentation on whitespace-only lines */
1166 break;
1167 }
1168
1169 if (str == end || (*str != ' ' && *str != '\t')) {
1170 CG(zend_lineno) += newline_count;
1171 zend_throw_exception_ex(zend_ce_parse_error, 0,
1172 "Invalid body indentation level (expecting an indentation level of at least %d)", indentation);
1173 goto error;
1174 }
1175
1176 if ((!using_spaces && *str == ' ') || (using_spaces && *str == '\t')) {
1177 CG(zend_lineno) += newline_count;
1178 zend_throw_exception(zend_ce_parse_error,
1179 "Invalid indentation - tabs and spaces cannot be mixed", 0);
1180 goto error;
1181 }
1182 }
1183
1184 if (str == end) {
1185 break;
1186 }
1187
1188 size_t len = nl ? (nl - str + newline_len) : (end - str);
1189 memmove(copy, str, len);
1190 str += len;
1191 copy += len;
1192 newline_count++;
1193 }
1194
1195 *copy = '\0';
1196 Z_STRLEN_P(zendlval) = copy - Z_STRVAL_P(zendlval);
1197 return 1;
1198
1199 error:
1200 zval_ptr_dtor_str(zendlval);
1201 ZVAL_UNDEF(zendlval);
1202
1203 return 0;
1204 }
1205
copy_heredoc_label_stack(void * void_heredoc_label)1206 static void copy_heredoc_label_stack(void *void_heredoc_label)
1207 {
1208 zend_heredoc_label *heredoc_label = void_heredoc_label;
1209 zend_heredoc_label *new_heredoc_label = emalloc(sizeof(zend_heredoc_label));
1210
1211 *new_heredoc_label = *heredoc_label;
1212 new_heredoc_label->label = estrndup(heredoc_label->label, heredoc_label->length);
1213
1214 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
1215 }
1216
1217 #define PARSER_MODE() \
1218 EXPECTED(elem != NULL)
1219
1220 #define RETURN_TOKEN(_token) do { \
1221 token = _token; \
1222 goto emit_token; \
1223 } while (0)
1224
1225 #define RETURN_TOKEN_WITH_VAL(_token) do { \
1226 token = _token; \
1227 goto emit_token_with_val; \
1228 } while (0)
1229
1230 #define RETURN_TOKEN_WITH_STR(_token, _offset) do { \
1231 token = _token; \
1232 offset = _offset; \
1233 goto emit_token_with_str; \
1234 } while (0)
1235
1236 #define RETURN_OR_SKIP_TOKEN(_token) do { \
1237 token = _token; \
1238 if (PARSER_MODE()) { \
1239 goto skip_token; \
1240 } \
1241 goto emit_token; \
1242 } while (0)
1243
lex_scan(zval * zendlval,zend_parser_stack_elem * elem)1244 int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
1245 {
1246 int token;
1247 int offset;
1248 int start_line = CG(zend_lineno);
1249
1250 ZVAL_UNDEF(zendlval);
1251 restart:
1252 SCNG(yy_text) = YYCURSOR;
1253
1254 /*!re2c
1255 re2c:yyfill:check = 0;
1256 LNUM [0-9]+(_[0-9]+)*
1257 DNUM ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?)
1258 EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1259 HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
1260 BNUM "0b"[01]+(_[01]+)*
1261 LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1262 WHITESPACE [ \n\r\t]+
1263 TABS_AND_SPACES [ \t]*
1264 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1265 ANY_CHAR [^]
1266 NEWLINE ("\r"|"\n"|"\r\n")
1267
1268 /* compute yyleng before each rule */
1269 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1270
1271 <ST_IN_SCRIPTING>"exit" {
1272 RETURN_TOKEN(T_EXIT);
1273 }
1274
1275 <ST_IN_SCRIPTING>"die" {
1276 RETURN_TOKEN(T_EXIT);
1277 }
1278
1279 <ST_IN_SCRIPTING>"fn" {
1280 RETURN_TOKEN(T_FN);
1281 }
1282
1283 <ST_IN_SCRIPTING>"function" {
1284 RETURN_TOKEN(T_FUNCTION);
1285 }
1286
1287 <ST_IN_SCRIPTING>"const" {
1288 RETURN_TOKEN(T_CONST);
1289 }
1290
1291 <ST_IN_SCRIPTING>"return" {
1292 RETURN_TOKEN(T_RETURN);
1293 }
1294
1295 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1296 yyless(yyleng - 1);
1297 HANDLE_NEWLINES(yytext, yyleng);
1298 RETURN_TOKEN(T_YIELD_FROM);
1299 }
1300
1301 <ST_IN_SCRIPTING>"yield" {
1302 RETURN_TOKEN(T_YIELD);
1303 }
1304
1305 <ST_IN_SCRIPTING>"try" {
1306 RETURN_TOKEN(T_TRY);
1307 }
1308
1309 <ST_IN_SCRIPTING>"catch" {
1310 RETURN_TOKEN(T_CATCH);
1311 }
1312
1313 <ST_IN_SCRIPTING>"finally" {
1314 RETURN_TOKEN(T_FINALLY);
1315 }
1316
1317 <ST_IN_SCRIPTING>"throw" {
1318 RETURN_TOKEN(T_THROW);
1319 }
1320
1321 <ST_IN_SCRIPTING>"if" {
1322 RETURN_TOKEN(T_IF);
1323 }
1324
1325 <ST_IN_SCRIPTING>"elseif" {
1326 RETURN_TOKEN(T_ELSEIF);
1327 }
1328
1329 <ST_IN_SCRIPTING>"endif" {
1330 RETURN_TOKEN(T_ENDIF);
1331 }
1332
1333 <ST_IN_SCRIPTING>"else" {
1334 RETURN_TOKEN(T_ELSE);
1335 }
1336
1337 <ST_IN_SCRIPTING>"while" {
1338 RETURN_TOKEN(T_WHILE);
1339 }
1340
1341 <ST_IN_SCRIPTING>"endwhile" {
1342 RETURN_TOKEN(T_ENDWHILE);
1343 }
1344
1345 <ST_IN_SCRIPTING>"do" {
1346 RETURN_TOKEN(T_DO);
1347 }
1348
1349 <ST_IN_SCRIPTING>"for" {
1350 RETURN_TOKEN(T_FOR);
1351 }
1352
1353 <ST_IN_SCRIPTING>"endfor" {
1354 RETURN_TOKEN(T_ENDFOR);
1355 }
1356
1357 <ST_IN_SCRIPTING>"foreach" {
1358 RETURN_TOKEN(T_FOREACH);
1359 }
1360
1361 <ST_IN_SCRIPTING>"endforeach" {
1362 RETURN_TOKEN(T_ENDFOREACH);
1363 }
1364
1365 <ST_IN_SCRIPTING>"declare" {
1366 RETURN_TOKEN(T_DECLARE);
1367 }
1368
1369 <ST_IN_SCRIPTING>"enddeclare" {
1370 RETURN_TOKEN(T_ENDDECLARE);
1371 }
1372
1373 <ST_IN_SCRIPTING>"instanceof" {
1374 RETURN_TOKEN(T_INSTANCEOF);
1375 }
1376
1377 <ST_IN_SCRIPTING>"as" {
1378 RETURN_TOKEN(T_AS);
1379 }
1380
1381 <ST_IN_SCRIPTING>"switch" {
1382 RETURN_TOKEN(T_SWITCH);
1383 }
1384
1385 <ST_IN_SCRIPTING>"endswitch" {
1386 RETURN_TOKEN(T_ENDSWITCH);
1387 }
1388
1389 <ST_IN_SCRIPTING>"case" {
1390 RETURN_TOKEN(T_CASE);
1391 }
1392
1393 <ST_IN_SCRIPTING>"default" {
1394 RETURN_TOKEN(T_DEFAULT);
1395 }
1396
1397 <ST_IN_SCRIPTING>"break" {
1398 RETURN_TOKEN(T_BREAK);
1399 }
1400
1401 <ST_IN_SCRIPTING>"continue" {
1402 RETURN_TOKEN(T_CONTINUE);
1403 }
1404
1405 <ST_IN_SCRIPTING>"goto" {
1406 RETURN_TOKEN(T_GOTO);
1407 }
1408
1409 <ST_IN_SCRIPTING>"echo" {
1410 RETURN_TOKEN(T_ECHO);
1411 }
1412
1413 <ST_IN_SCRIPTING>"print" {
1414 RETURN_TOKEN(T_PRINT);
1415 }
1416
1417 <ST_IN_SCRIPTING>"class" {
1418 RETURN_TOKEN(T_CLASS);
1419 }
1420
1421 <ST_IN_SCRIPTING>"interface" {
1422 RETURN_TOKEN(T_INTERFACE);
1423 }
1424
1425 <ST_IN_SCRIPTING>"trait" {
1426 RETURN_TOKEN(T_TRAIT);
1427 }
1428
1429 <ST_IN_SCRIPTING>"extends" {
1430 RETURN_TOKEN(T_EXTENDS);
1431 }
1432
1433 <ST_IN_SCRIPTING>"implements" {
1434 RETURN_TOKEN(T_IMPLEMENTS);
1435 }
1436
1437 <ST_IN_SCRIPTING>"->" {
1438 yy_push_state(ST_LOOKING_FOR_PROPERTY);
1439 RETURN_TOKEN(T_OBJECT_OPERATOR);
1440 }
1441
1442 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1443 goto return_whitespace;
1444 }
1445
1446 <ST_LOOKING_FOR_PROPERTY>"->" {
1447 RETURN_TOKEN(T_OBJECT_OPERATOR);
1448 }
1449
1450 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1451 yy_pop_state();
1452 RETURN_TOKEN_WITH_STR(T_STRING, 0);
1453 }
1454
1455 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1456 yyless(0);
1457 yy_pop_state();
1458 goto restart;
1459 }
1460
1461 <ST_IN_SCRIPTING>"::" {
1462 RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1463 }
1464
1465 <ST_IN_SCRIPTING>"\\" {
1466 RETURN_TOKEN(T_NS_SEPARATOR);
1467 }
1468
1469 <ST_IN_SCRIPTING>"..." {
1470 RETURN_TOKEN(T_ELLIPSIS);
1471 }
1472
1473 <ST_IN_SCRIPTING>"??" {
1474 RETURN_TOKEN(T_COALESCE);
1475 }
1476
1477 <ST_IN_SCRIPTING>"new" {
1478 RETURN_TOKEN(T_NEW);
1479 }
1480
1481 <ST_IN_SCRIPTING>"clone" {
1482 RETURN_TOKEN(T_CLONE);
1483 }
1484
1485 <ST_IN_SCRIPTING>"var" {
1486 RETURN_TOKEN(T_VAR);
1487 }
1488
1489 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1490 RETURN_TOKEN(T_INT_CAST);
1491 }
1492
1493 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("double"|"float"){TABS_AND_SPACES}")" {
1494 RETURN_TOKEN(T_DOUBLE_CAST);
1495 }
1496
1497 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"real"{TABS_AND_SPACES}")" {
1498 if (PARSER_MODE()) {
1499 zend_error(E_DEPRECATED, "The (real) cast is deprecated, use (float) instead");
1500 }
1501 RETURN_TOKEN(T_DOUBLE_CAST);
1502 }
1503
1504 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1505 RETURN_TOKEN(T_STRING_CAST);
1506 }
1507
1508 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1509 RETURN_TOKEN(T_ARRAY_CAST);
1510 }
1511
1512 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1513 RETURN_TOKEN(T_OBJECT_CAST);
1514 }
1515
1516 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1517 RETURN_TOKEN(T_BOOL_CAST);
1518 }
1519
1520 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1521 RETURN_TOKEN(T_UNSET_CAST);
1522 }
1523
1524 <ST_IN_SCRIPTING>"eval" {
1525 RETURN_TOKEN(T_EVAL);
1526 }
1527
1528 <ST_IN_SCRIPTING>"include" {
1529 RETURN_TOKEN(T_INCLUDE);
1530 }
1531
1532 <ST_IN_SCRIPTING>"include_once" {
1533 RETURN_TOKEN(T_INCLUDE_ONCE);
1534 }
1535
1536 <ST_IN_SCRIPTING>"require" {
1537 RETURN_TOKEN(T_REQUIRE);
1538 }
1539
1540 <ST_IN_SCRIPTING>"require_once" {
1541 RETURN_TOKEN(T_REQUIRE_ONCE);
1542 }
1543
1544 <ST_IN_SCRIPTING>"namespace" {
1545 RETURN_TOKEN(T_NAMESPACE);
1546 }
1547
1548 <ST_IN_SCRIPTING>"use" {
1549 RETURN_TOKEN(T_USE);
1550 }
1551
1552 <ST_IN_SCRIPTING>"insteadof" {
1553 RETURN_TOKEN(T_INSTEADOF);
1554 }
1555
1556 <ST_IN_SCRIPTING>"global" {
1557 RETURN_TOKEN(T_GLOBAL);
1558 }
1559
1560 <ST_IN_SCRIPTING>"isset" {
1561 RETURN_TOKEN(T_ISSET);
1562 }
1563
1564 <ST_IN_SCRIPTING>"empty" {
1565 RETURN_TOKEN(T_EMPTY);
1566 }
1567
1568 <ST_IN_SCRIPTING>"__halt_compiler" {
1569 RETURN_TOKEN(T_HALT_COMPILER);
1570 }
1571
1572 <ST_IN_SCRIPTING>"static" {
1573 RETURN_TOKEN(T_STATIC);
1574 }
1575
1576 <ST_IN_SCRIPTING>"abstract" {
1577 RETURN_TOKEN(T_ABSTRACT);
1578 }
1579
1580 <ST_IN_SCRIPTING>"final" {
1581 RETURN_TOKEN(T_FINAL);
1582 }
1583
1584 <ST_IN_SCRIPTING>"private" {
1585 RETURN_TOKEN(T_PRIVATE);
1586 }
1587
1588 <ST_IN_SCRIPTING>"protected" {
1589 RETURN_TOKEN(T_PROTECTED);
1590 }
1591
1592 <ST_IN_SCRIPTING>"public" {
1593 RETURN_TOKEN(T_PUBLIC);
1594 }
1595
1596 <ST_IN_SCRIPTING>"unset" {
1597 RETURN_TOKEN(T_UNSET);
1598 }
1599
1600 <ST_IN_SCRIPTING>"=>" {
1601 RETURN_TOKEN(T_DOUBLE_ARROW);
1602 }
1603
1604 <ST_IN_SCRIPTING>"list" {
1605 RETURN_TOKEN(T_LIST);
1606 }
1607
1608 <ST_IN_SCRIPTING>"array" {
1609 RETURN_TOKEN(T_ARRAY);
1610 }
1611
1612 <ST_IN_SCRIPTING>"callable" {
1613 RETURN_TOKEN(T_CALLABLE);
1614 }
1615
1616 <ST_IN_SCRIPTING>"++" {
1617 RETURN_TOKEN(T_INC);
1618 }
1619
1620 <ST_IN_SCRIPTING>"--" {
1621 RETURN_TOKEN(T_DEC);
1622 }
1623
1624 <ST_IN_SCRIPTING>"===" {
1625 RETURN_TOKEN(T_IS_IDENTICAL);
1626 }
1627
1628 <ST_IN_SCRIPTING>"!==" {
1629 RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1630 }
1631
1632 <ST_IN_SCRIPTING>"==" {
1633 RETURN_TOKEN(T_IS_EQUAL);
1634 }
1635
1636 <ST_IN_SCRIPTING>"!="|"<>" {
1637 RETURN_TOKEN(T_IS_NOT_EQUAL);
1638 }
1639
1640 <ST_IN_SCRIPTING>"<=>" {
1641 RETURN_TOKEN(T_SPACESHIP);
1642 }
1643
1644 <ST_IN_SCRIPTING>"<=" {
1645 RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1646 }
1647
1648 <ST_IN_SCRIPTING>">=" {
1649 RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1650 }
1651
1652 <ST_IN_SCRIPTING>"+=" {
1653 RETURN_TOKEN(T_PLUS_EQUAL);
1654 }
1655
1656 <ST_IN_SCRIPTING>"-=" {
1657 RETURN_TOKEN(T_MINUS_EQUAL);
1658 }
1659
1660 <ST_IN_SCRIPTING>"*=" {
1661 RETURN_TOKEN(T_MUL_EQUAL);
1662 }
1663
1664 <ST_IN_SCRIPTING>"*\*" {
1665 RETURN_TOKEN(T_POW);
1666 }
1667
1668 <ST_IN_SCRIPTING>"*\*=" {
1669 RETURN_TOKEN(T_POW_EQUAL);
1670 }
1671
1672 <ST_IN_SCRIPTING>"/=" {
1673 RETURN_TOKEN(T_DIV_EQUAL);
1674 }
1675
1676 <ST_IN_SCRIPTING>".=" {
1677 RETURN_TOKEN(T_CONCAT_EQUAL);
1678 }
1679
1680 <ST_IN_SCRIPTING>"%=" {
1681 RETURN_TOKEN(T_MOD_EQUAL);
1682 }
1683
1684 <ST_IN_SCRIPTING>"<<=" {
1685 RETURN_TOKEN(T_SL_EQUAL);
1686 }
1687
1688 <ST_IN_SCRIPTING>">>=" {
1689 RETURN_TOKEN(T_SR_EQUAL);
1690 }
1691
1692 <ST_IN_SCRIPTING>"&=" {
1693 RETURN_TOKEN(T_AND_EQUAL);
1694 }
1695
1696 <ST_IN_SCRIPTING>"|=" {
1697 RETURN_TOKEN(T_OR_EQUAL);
1698 }
1699
1700 <ST_IN_SCRIPTING>"^=" {
1701 RETURN_TOKEN(T_XOR_EQUAL);
1702 }
1703
1704 <ST_IN_SCRIPTING>"??=" {
1705 RETURN_TOKEN(T_COALESCE_EQUAL);
1706 }
1707
1708 <ST_IN_SCRIPTING>"||" {
1709 RETURN_TOKEN(T_BOOLEAN_OR);
1710 }
1711
1712 <ST_IN_SCRIPTING>"&&" {
1713 RETURN_TOKEN(T_BOOLEAN_AND);
1714 }
1715
1716 <ST_IN_SCRIPTING>"OR" {
1717 RETURN_TOKEN(T_LOGICAL_OR);
1718 }
1719
1720 <ST_IN_SCRIPTING>"AND" {
1721 RETURN_TOKEN(T_LOGICAL_AND);
1722 }
1723
1724 <ST_IN_SCRIPTING>"XOR" {
1725 RETURN_TOKEN(T_LOGICAL_XOR);
1726 }
1727
1728 <ST_IN_SCRIPTING>"<<" {
1729 RETURN_TOKEN(T_SL);
1730 }
1731
1732 <ST_IN_SCRIPTING>">>" {
1733 RETURN_TOKEN(T_SR);
1734 }
1735
1736 <ST_IN_SCRIPTING>{TOKENS} {
1737 RETURN_TOKEN(yytext[0]);
1738 }
1739
1740
1741 <ST_IN_SCRIPTING>"{" {
1742 yy_push_state(ST_IN_SCRIPTING);
1743 RETURN_TOKEN('{');
1744 }
1745
1746
1747 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1748 yy_push_state(ST_LOOKING_FOR_VARNAME);
1749 RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1750 }
1751
1752
1753 <ST_IN_SCRIPTING>"}" {
1754 RESET_DOC_COMMENT();
1755 if (!zend_stack_is_empty(&SCNG(state_stack))) {
1756 yy_pop_state();
1757 }
1758 RETURN_TOKEN('}');
1759 }
1760
1761
1762 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1763 yyless(yyleng - 1);
1764 yy_pop_state();
1765 yy_push_state(ST_IN_SCRIPTING);
1766 RETURN_TOKEN_WITH_STR(T_STRING_VARNAME, 0);
1767 }
1768
1769
1770 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1771 yyless(0);
1772 yy_pop_state();
1773 yy_push_state(ST_IN_SCRIPTING);
1774 goto restart;
1775 }
1776
1777 <ST_IN_SCRIPTING>{BNUM} {
1778 /* The +/- 2 skips "0b" */
1779 size_t len = yyleng - 2;
1780 char *end, *bin = yytext + 2;
1781 zend_bool contains_underscores;
1782
1783 /* Skip any leading 0s */
1784 while (len > 0 && (*bin == '0' || *bin == '_')) {
1785 ++bin;
1786 --len;
1787 }
1788
1789 contains_underscores = (memchr(bin, '_', len) != NULL);
1790
1791 if (contains_underscores) {
1792 bin = estrndup(bin, len);
1793 strip_underscores(bin, &len);
1794 }
1795
1796 if (len < SIZEOF_ZEND_LONG * 8) {
1797 if (len == 0) {
1798 ZVAL_LONG(zendlval, 0);
1799 } else {
1800 errno = 0;
1801 ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1802 ZEND_ASSERT(!errno && end == bin + len);
1803 }
1804 if (contains_underscores) {
1805 efree(bin);
1806 }
1807 RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1808 } else {
1809 ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1810 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1811 ZEND_ASSERT(end == bin + len);
1812 if (contains_underscores) {
1813 efree(bin);
1814 }
1815 RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1816 }
1817 }
1818
1819 <ST_IN_SCRIPTING>{LNUM} {
1820 size_t len = yyleng;
1821 char *end, *lnum = yytext;
1822 zend_bool is_octal = lnum[0] == '0';
1823 zend_bool contains_underscores = (memchr(lnum, '_', len) != NULL);
1824
1825 if (contains_underscores) {
1826 lnum = estrndup(lnum, len);
1827 strip_underscores(lnum, &len);
1828 }
1829
1830 /* Digits 8 and 9 are illegal in octal literals. */
1831 if (is_octal) {
1832 size_t i;
1833 for (i = 0; i < len; i++) {
1834 if (lnum[i] == '8' || lnum[i] == '9') {
1835 zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1836 if (PARSER_MODE()) {
1837 if (contains_underscores) {
1838 efree(lnum);
1839 }
1840 ZVAL_UNDEF(zendlval);
1841 RETURN_TOKEN(T_ERROR);
1842 }
1843
1844 /* Continue in order to determine if this is T_LNUMBER or T_DNUMBER. */
1845 len = i;
1846 break;
1847 }
1848 }
1849 }
1850
1851
1852 if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1853 errno = 0;
1854 /* base must be passed explicitly for correct parse error on Windows */
1855 ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, is_octal ? 8 : 10));
1856 ZEND_ASSERT(end == lnum + len);
1857 } else {
1858 errno = 0;
1859 ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
1860 if (errno == ERANGE) { /* Overflow */
1861 errno = 0;
1862 if (is_octal) { /* octal overflow */
1863 ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
1864 } else {
1865 ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
1866 }
1867 ZEND_ASSERT(end == lnum + len);
1868 if (contains_underscores) {
1869 efree(lnum);
1870 }
1871 RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1872 }
1873 ZEND_ASSERT(end == lnum + len);
1874 }
1875 ZEND_ASSERT(!errno);
1876 if (contains_underscores) {
1877 efree(lnum);
1878 }
1879 RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1880 }
1881
1882 <ST_IN_SCRIPTING>{HNUM} {
1883 /* The +/- 2 skips "0x" */
1884 size_t len = yyleng - 2;
1885 char *end, *hex = yytext + 2;
1886 zend_bool contains_underscores;
1887
1888 /* Skip any leading 0s */
1889 while (len > 0 && (*hex == '0' || *hex == '_')) {
1890 ++hex;
1891 --len;
1892 }
1893
1894 contains_underscores = (memchr(hex, '_', len) != NULL);
1895
1896 if (contains_underscores) {
1897 hex = estrndup(hex, len);
1898 strip_underscores(hex, &len);
1899 }
1900
1901 if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1902 if (len == 0) {
1903 ZVAL_LONG(zendlval, 0);
1904 } else {
1905 errno = 0;
1906 ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1907 ZEND_ASSERT(!errno && end == hex + len);
1908 }
1909 if (contains_underscores) {
1910 efree(hex);
1911 }
1912 RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1913 } else {
1914 ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1915 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1916 ZEND_ASSERT(end == hex + len);
1917 if (contains_underscores) {
1918 efree(hex);
1919 }
1920 RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1921 }
1922 }
1923
1924 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1925 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1926 char *end;
1927 errno = 0;
1928 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1929 if (errno == ERANGE) {
1930 goto string;
1931 }
1932 ZEND_ASSERT(end == yytext + yyleng);
1933 } else {
1934 string:
1935 ZVAL_STRINGL(zendlval, yytext, yyleng);
1936 }
1937 RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
1938 }
1939
1940 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1941 if (yyleng == 1) {
1942 ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext)));
1943 } else {
1944 ZVAL_STRINGL(zendlval, yytext, yyleng);
1945 }
1946 RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
1947 }
1948
1949 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1950 const char *end;
1951 size_t len = yyleng;
1952 char *dnum = yytext;
1953 zend_bool contains_underscores = (memchr(dnum, '_', len) != NULL);
1954
1955 if (contains_underscores) {
1956 dnum = estrndup(dnum, len);
1957 strip_underscores(dnum, &len);
1958 }
1959
1960 ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
1961 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1962 ZEND_ASSERT(end == dnum + len);
1963 if (contains_underscores) {
1964 efree(dnum);
1965 }
1966 RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1967 }
1968
1969 <ST_IN_SCRIPTING>"__CLASS__" {
1970 RETURN_TOKEN(T_CLASS_C);
1971 }
1972
1973 <ST_IN_SCRIPTING>"__TRAIT__" {
1974 RETURN_TOKEN(T_TRAIT_C);
1975 }
1976
1977 <ST_IN_SCRIPTING>"__FUNCTION__" {
1978 RETURN_TOKEN(T_FUNC_C);
1979 }
1980
1981 <ST_IN_SCRIPTING>"__METHOD__" {
1982 RETURN_TOKEN(T_METHOD_C);
1983 }
1984
1985 <ST_IN_SCRIPTING>"__LINE__" {
1986 RETURN_TOKEN(T_LINE);
1987 }
1988
1989 <ST_IN_SCRIPTING>"__FILE__" {
1990 RETURN_TOKEN(T_FILE);
1991 }
1992
1993 <ST_IN_SCRIPTING>"__DIR__" {
1994 RETURN_TOKEN(T_DIR);
1995 }
1996
1997 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1998 RETURN_TOKEN(T_NS_C);
1999 }
2000
2001 <SHEBANG>"#!" .* {NEWLINE} {
2002 CG(zend_lineno)++;
2003 BEGIN(INITIAL);
2004 goto restart;
2005 }
2006
2007 <SHEBANG>{ANY_CHAR} {
2008 yyless(0);
2009 BEGIN(INITIAL);
2010 goto restart;
2011 }
2012
2013 <INITIAL>"<?=" {
2014 BEGIN(ST_IN_SCRIPTING);
2015 if (PARSER_MODE()) {
2016 RETURN_TOKEN(T_ECHO);
2017 }
2018 RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
2019 }
2020
2021
2022 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
2023 HANDLE_NEWLINE(yytext[yyleng-1]);
2024 BEGIN(ST_IN_SCRIPTING);
2025 RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
2026 }
2027
2028 <INITIAL>"<?php" {
2029 /* Allow <?php followed by end of file. */
2030 if (YYCURSOR == YYLIMIT) {
2031 BEGIN(ST_IN_SCRIPTING);
2032 RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
2033 }
2034 /* Degenerate case: <?phpX is interpreted as <? phpX with short tags. */
2035 if (CG(short_tags)) {
2036 yyless(2);
2037 BEGIN(ST_IN_SCRIPTING);
2038 RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
2039 }
2040 goto inline_char_handler;
2041 }
2042
2043 <INITIAL>"<?" {
2044 if (CG(short_tags)) {
2045 BEGIN(ST_IN_SCRIPTING);
2046 RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
2047 } else {
2048 goto inline_char_handler;
2049 }
2050 }
2051
2052 <INITIAL>{ANY_CHAR} {
2053 if (YYCURSOR > YYLIMIT) {
2054 RETURN_TOKEN(END);
2055 }
2056
2057 inline_char_handler:
2058
2059 while (1) {
2060 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
2061
2062 YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
2063
2064 if (YYCURSOR >= YYLIMIT) {
2065 break;
2066 }
2067
2068 if (*YYCURSOR == '?') {
2069 if (CG(short_tags) /* <? */
2070 || (*(YYCURSOR + 1) == '=') /* <?= */
2071 || (!strncasecmp((char*)YYCURSOR + 1, "php", 3) && /* <?php[ \t\r\n] */
2072 (YYCURSOR + 4 == YYLIMIT ||
2073 YYCURSOR[4] == ' ' || YYCURSOR[4] == '\t' ||
2074 YYCURSOR[4] == '\n' || YYCURSOR[4] == '\r'))
2075 ) {
2076 YYCURSOR--;
2077 break;
2078 }
2079 }
2080 }
2081
2082 yyleng = YYCURSOR - SCNG(yy_text);
2083
2084 if (SCNG(output_filter)) {
2085 size_t readsize;
2086 char *s = NULL;
2087 size_t sz = 0;
2088 // TODO: avoid reallocation ???
2089 readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
2090 ZVAL_STRINGL(zendlval, s, sz);
2091 efree(s);
2092 if (readsize < yyleng) {
2093 yyless(readsize);
2094 }
2095 } else if (yyleng == 1) {
2096 ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext));
2097 } else {
2098 ZVAL_STRINGL(zendlval, yytext, yyleng);
2099 }
2100 HANDLE_NEWLINES(yytext, yyleng);
2101 RETURN_TOKEN_WITH_VAL(T_INLINE_HTML);
2102 }
2103
2104
2105 /* Make sure a label character follows "->", otherwise there is no property
2106 * and "->" will be taken literally
2107 */
2108 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
2109 yyless(yyleng - 3);
2110 yy_push_state(ST_LOOKING_FOR_PROPERTY);
2111 RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
2112 }
2113
2114 /* A [ always designates a variable offset, regardless of what follows
2115 */
2116 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
2117 yyless(yyleng - 1);
2118 yy_push_state(ST_VAR_OFFSET);
2119 RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
2120 }
2121
2122 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
2123 RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
2124 }
2125
2126 <ST_VAR_OFFSET>"]" {
2127 yy_pop_state();
2128 RETURN_TOKEN(']');
2129 }
2130
2131 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
2132 /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
2133 RETURN_TOKEN(yytext[0]);
2134 }
2135
2136 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
2137 /* Invalid rule to return a more explicit parse error with proper line number */
2138 yyless(0);
2139 yy_pop_state();
2140 ZVAL_NULL(zendlval);
2141 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2142 }
2143
2144 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
2145 RETURN_TOKEN_WITH_STR(T_STRING, 0);
2146 }
2147
2148
2149 <ST_IN_SCRIPTING>"#"|"//" {
2150 while (YYCURSOR < YYLIMIT) {
2151 switch (*YYCURSOR++) {
2152 case '\r':
2153 if (*YYCURSOR == '\n') {
2154 YYCURSOR++;
2155 }
2156 /* fall through */
2157 case '\n':
2158 CG(zend_lineno)++;
2159 break;
2160 case '?':
2161 if (*YYCURSOR == '>') {
2162 YYCURSOR--;
2163 break;
2164 }
2165 /* fall through */
2166 default:
2167 continue;
2168 }
2169
2170 break;
2171 }
2172
2173 yyleng = YYCURSOR - SCNG(yy_text);
2174 RETURN_OR_SKIP_TOKEN(T_COMMENT);
2175 }
2176
2177 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
2178 int doc_com;
2179
2180 if (yyleng > 2) {
2181 doc_com = 1;
2182 RESET_DOC_COMMENT();
2183 } else {
2184 doc_com = 0;
2185 }
2186
2187 while (YYCURSOR < YYLIMIT) {
2188 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
2189 break;
2190 }
2191 }
2192
2193 if (YYCURSOR < YYLIMIT) {
2194 YYCURSOR++;
2195 } else if (!SCNG(heredoc_scan_ahead)) {
2196 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
2197 }
2198
2199 yyleng = YYCURSOR - SCNG(yy_text);
2200 HANDLE_NEWLINES(yytext, yyleng);
2201
2202 if (doc_com) {
2203 CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
2204 RETURN_OR_SKIP_TOKEN(T_DOC_COMMENT);
2205 }
2206
2207 RETURN_OR_SKIP_TOKEN(T_COMMENT);
2208 }
2209
2210 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
2211 BEGIN(INITIAL);
2212 if (yytext[yyleng-1] != '>') {
2213 CG(increment_lineno) = 1;
2214 }
2215 if (PARSER_MODE()) {
2216 RETURN_TOKEN(';'); /* implicit ';' at php-end tag */
2217 }
2218 RETURN_TOKEN(T_CLOSE_TAG);
2219 }
2220
2221
2222 <ST_IN_SCRIPTING>b?['] {
2223 register char *s, *t;
2224 char *end;
2225 int bprefix = (yytext[0] != '\'') ? 1 : 0;
2226
2227 while (1) {
2228 if (YYCURSOR < YYLIMIT) {
2229 if (*YYCURSOR == '\'') {
2230 YYCURSOR++;
2231 yyleng = YYCURSOR - SCNG(yy_text);
2232
2233 break;
2234 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2235 YYCURSOR++;
2236 }
2237 } else {
2238 yyleng = YYLIMIT - SCNG(yy_text);
2239
2240 /* Unclosed single quotes; treat similar to double quotes, but without a separate token
2241 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2242 * rule, which continued in ST_IN_SCRIPTING state after the quote */
2243 ZVAL_NULL(zendlval);
2244 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2245 }
2246 }
2247
2248 if (yyleng-bprefix-2 <= 1) {
2249 if (yyleng-bprefix-2 < 1) {
2250 ZVAL_EMPTY_STRING(zendlval);
2251 } else {
2252 zend_uchar c = (zend_uchar)*(yytext+bprefix+1);
2253 if (c == '\n' || c == '\r') {
2254 CG(zend_lineno)++;
2255 }
2256 ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
2257 }
2258 goto skip_escape_conversion;
2259 }
2260 ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
2261
2262 /* convert escape sequences */
2263 s = Z_STRVAL_P(zendlval);
2264 end = s+Z_STRLEN_P(zendlval);
2265 while (1) {
2266 if (UNEXPECTED(*s=='\\')) {
2267 break;
2268 }
2269 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2270 CG(zend_lineno)++;
2271 }
2272 s++;
2273 if (s == end) {
2274 goto skip_escape_conversion;
2275 }
2276 }
2277
2278 t = s;
2279 while (s<end) {
2280 if (*s=='\\') {
2281 s++;
2282 if (*s == '\\' || *s == '\'') {
2283 *t++ = *s;
2284 } else {
2285 *t++ = '\\';
2286 *t++ = *s;
2287 }
2288 } else {
2289 *t++ = *s;
2290 }
2291 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2292 CG(zend_lineno)++;
2293 }
2294 s++;
2295 }
2296 *t = 0;
2297 Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
2298
2299 skip_escape_conversion:
2300 if (SCNG(output_filter)) {
2301 size_t sz = 0;
2302 char *str = NULL;
2303 zend_string *new_str;
2304 s = Z_STRVAL_P(zendlval);
2305 // TODO: avoid reallocation ???
2306 SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2307 new_str = zend_string_init(str, sz, 0);
2308 if (str != s) {
2309 efree(str);
2310 }
2311 zend_string_release_ex(Z_STR_P(zendlval), 0);
2312 ZVAL_STR(zendlval, new_str);
2313 }
2314 RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
2315 }
2316
2317
2318 <ST_IN_SCRIPTING>b?["] {
2319 int bprefix = (yytext[0] != '"') ? 1 : 0;
2320
2321 while (YYCURSOR < YYLIMIT) {
2322 switch (*YYCURSOR++) {
2323 case '"':
2324 yyleng = YYCURSOR - SCNG(yy_text);
2325 if (EXPECTED(zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == SUCCESS)
2326 || !PARSER_MODE()) {
2327 RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
2328 } else {
2329 RETURN_TOKEN(T_ERROR);
2330 }
2331 case '$':
2332 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2333 break;
2334 }
2335 continue;
2336 case '{':
2337 if (*YYCURSOR == '$') {
2338 break;
2339 }
2340 continue;
2341 case '\\':
2342 if (YYCURSOR < YYLIMIT) {
2343 YYCURSOR++;
2344 }
2345 /* fall through */
2346 default:
2347 continue;
2348 }
2349
2350 YYCURSOR--;
2351 break;
2352 }
2353
2354 /* Remember how much was scanned to save rescanning */
2355 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2356
2357 YYCURSOR = SCNG(yy_text) + yyleng;
2358
2359 BEGIN(ST_DOUBLE_QUOTES);
2360 RETURN_TOKEN('"');
2361 }
2362
2363
2364 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2365 char *s;
2366 unsigned char *saved_cursor;
2367 int bprefix = (yytext[0] != '<') ? 1 : 0, spacing = 0, indentation = 0;
2368 zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2369 zend_bool is_heredoc = 1;
2370
2371 CG(zend_lineno)++;
2372 heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2373 s = yytext+bprefix+3;
2374 while ((*s == ' ') || (*s == '\t')) {
2375 s++;
2376 heredoc_label->length--;
2377 }
2378
2379 if (*s == '\'') {
2380 s++;
2381 heredoc_label->length -= 2;
2382 is_heredoc = 0;
2383
2384 BEGIN(ST_NOWDOC);
2385 } else {
2386 if (*s == '"') {
2387 s++;
2388 heredoc_label->length -= 2;
2389 }
2390
2391 BEGIN(ST_HEREDOC);
2392 }
2393
2394 heredoc_label->label = estrndup(s, heredoc_label->length);
2395 heredoc_label->indentation = 0;
2396 saved_cursor = YYCURSOR;
2397
2398 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2399
2400 while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
2401 if (*YYCURSOR == '\t') {
2402 spacing |= HEREDOC_USING_TABS;
2403 } else {
2404 spacing |= HEREDOC_USING_SPACES;
2405 }
2406 ++YYCURSOR;
2407 ++indentation;
2408 }
2409
2410 if (YYCURSOR == YYLIMIT) {
2411 YYCURSOR = saved_cursor;
2412 RETURN_TOKEN(T_START_HEREDOC);
2413 }
2414
2415 /* Check for ending label on the next line */
2416 if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2417 if (!IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
2418 if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
2419 zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
2420 if (PARSER_MODE()) {
2421 RETURN_TOKEN(T_ERROR);
2422 }
2423 }
2424
2425 YYCURSOR = saved_cursor;
2426 heredoc_label->indentation = indentation;
2427
2428 BEGIN(ST_END_HEREDOC);
2429 RETURN_TOKEN(T_START_HEREDOC);
2430 }
2431 }
2432
2433 YYCURSOR = saved_cursor;
2434
2435 if (is_heredoc && !SCNG(heredoc_scan_ahead)) {
2436 zend_lex_state current_state;
2437 zend_string *saved_doc_comment = CG(doc_comment);
2438 int heredoc_nesting_level = 1;
2439 int first_token = 0;
2440 int error = 0;
2441
2442 zend_save_lexical_state(¤t_state);
2443
2444 SCNG(heredoc_scan_ahead) = 1;
2445 SCNG(heredoc_indentation) = 0;
2446 SCNG(heredoc_indentation_uses_spaces) = 0;
2447 LANG_SCNG(on_event) = NULL;
2448 CG(doc_comment) = NULL;
2449
2450 zend_ptr_stack_reverse_apply(¤t_state.heredoc_label_stack, copy_heredoc_label_stack);
2451
2452 zend_exception_save();
2453 while (heredoc_nesting_level) {
2454 zval zv;
2455 int retval;
2456
2457 ZVAL_UNDEF(&zv);
2458 retval = lex_scan(&zv, NULL);
2459 zval_ptr_dtor_nogc(&zv);
2460
2461 if (EG(exception)) {
2462 zend_clear_exception();
2463 break;
2464 }
2465
2466 if (!first_token) {
2467 first_token = retval;
2468 }
2469
2470 switch (retval) {
2471 case T_START_HEREDOC:
2472 ++heredoc_nesting_level;
2473 break;
2474 case T_END_HEREDOC:
2475 --heredoc_nesting_level;
2476 break;
2477 case END:
2478 heredoc_nesting_level = 0;
2479 }
2480 }
2481 zend_exception_restore();
2482
2483 if (
2484 (first_token == T_VARIABLE
2485 || first_token == T_DOLLAR_OPEN_CURLY_BRACES
2486 || first_token == T_CURLY_OPEN
2487 ) && SCNG(heredoc_indentation)) {
2488 zend_throw_exception_ex(zend_ce_parse_error, 0, "Invalid body indentation level (expecting an indentation level of at least %d)", SCNG(heredoc_indentation));
2489 error = 1;
2490 }
2491
2492 heredoc_label->indentation = SCNG(heredoc_indentation);
2493 heredoc_label->indentation_uses_spaces = SCNG(heredoc_indentation_uses_spaces);
2494
2495 zend_restore_lexical_state(¤t_state);
2496 SCNG(heredoc_scan_ahead) = 0;
2497 CG(increment_lineno) = 0;
2498 CG(doc_comment) = saved_doc_comment;
2499
2500 if (PARSER_MODE() && error) {
2501 RETURN_TOKEN(T_ERROR);
2502 }
2503 }
2504
2505 RETURN_TOKEN(T_START_HEREDOC);
2506 }
2507
2508
2509 <ST_IN_SCRIPTING>[`] {
2510 BEGIN(ST_BACKQUOTE);
2511 RETURN_TOKEN('`');
2512 }
2513
2514
2515 <ST_END_HEREDOC>{ANY_CHAR} {
2516 zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2517
2518 yyleng = heredoc_label->indentation + heredoc_label->length;
2519 YYCURSOR += yyleng - 1;
2520
2521 heredoc_label_dtor(heredoc_label);
2522 efree(heredoc_label);
2523
2524 BEGIN(ST_IN_SCRIPTING);
2525 RETURN_TOKEN(T_END_HEREDOC);
2526 }
2527
2528
2529 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2530 yy_push_state(ST_IN_SCRIPTING);
2531 yyless(1);
2532 RETURN_TOKEN(T_CURLY_OPEN);
2533 }
2534
2535
2536 <ST_DOUBLE_QUOTES>["] {
2537 BEGIN(ST_IN_SCRIPTING);
2538 RETURN_TOKEN('"');
2539 }
2540
2541 <ST_BACKQUOTE>[`] {
2542 BEGIN(ST_IN_SCRIPTING);
2543 RETURN_TOKEN('`');
2544 }
2545
2546
2547 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2548 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2549 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2550 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2551
2552 goto double_quotes_scan_done;
2553 }
2554
2555 if (YYCURSOR > YYLIMIT) {
2556 RETURN_TOKEN(END);
2557 }
2558 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2559 YYCURSOR++;
2560 }
2561
2562 while (YYCURSOR < YYLIMIT) {
2563 switch (*YYCURSOR++) {
2564 case '"':
2565 break;
2566 case '$':
2567 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2568 break;
2569 }
2570 continue;
2571 case '{':
2572 if (*YYCURSOR == '$') {
2573 break;
2574 }
2575 continue;
2576 case '\\':
2577 if (YYCURSOR < YYLIMIT) {
2578 YYCURSOR++;
2579 }
2580 /* fall through */
2581 default:
2582 continue;
2583 }
2584
2585 YYCURSOR--;
2586 break;
2587 }
2588
2589 double_quotes_scan_done:
2590 yyleng = YYCURSOR - SCNG(yy_text);
2591
2592 if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '"') == SUCCESS)
2593 || !PARSER_MODE()) {
2594 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2595 } else {
2596 RETURN_TOKEN(T_ERROR);
2597 }
2598 }
2599
2600
2601 <ST_BACKQUOTE>{ANY_CHAR} {
2602 if (YYCURSOR > YYLIMIT) {
2603 RETURN_TOKEN(END);
2604 }
2605 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2606 YYCURSOR++;
2607 }
2608
2609 while (YYCURSOR < YYLIMIT) {
2610 switch (*YYCURSOR++) {
2611 case '`':
2612 break;
2613 case '$':
2614 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2615 break;
2616 }
2617 continue;
2618 case '{':
2619 if (*YYCURSOR == '$') {
2620 break;
2621 }
2622 continue;
2623 case '\\':
2624 if (YYCURSOR < YYLIMIT) {
2625 YYCURSOR++;
2626 }
2627 /* fall through */
2628 default:
2629 continue;
2630 }
2631
2632 YYCURSOR--;
2633 break;
2634 }
2635
2636 yyleng = YYCURSOR - SCNG(yy_text);
2637
2638 if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '`') == SUCCESS)
2639 || !PARSER_MODE()) {
2640 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2641 } else {
2642 RETURN_TOKEN(T_ERROR);
2643 }
2644 }
2645
2646
2647 <ST_HEREDOC>{ANY_CHAR} {
2648 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2649 int newline = 0, indentation = 0, spacing = 0;
2650
2651 if (YYCURSOR > YYLIMIT) {
2652 RETURN_TOKEN(END);
2653 }
2654
2655 YYCURSOR--;
2656
2657 while (YYCURSOR < YYLIMIT) {
2658 switch (*YYCURSOR++) {
2659 case '\r':
2660 if (*YYCURSOR == '\n') {
2661 YYCURSOR++;
2662 }
2663 /* fall through */
2664 case '\n':
2665 indentation = spacing = 0;
2666
2667 while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
2668 if (*YYCURSOR == '\t') {
2669 spacing |= HEREDOC_USING_TABS;
2670 } else {
2671 spacing |= HEREDOC_USING_SPACES;
2672 }
2673 ++YYCURSOR;
2674 ++indentation;
2675 }
2676
2677 if (YYCURSOR == YYLIMIT) {
2678 yyleng = YYCURSOR - SCNG(yy_text);
2679 HANDLE_NEWLINES(yytext, yyleng);
2680 ZVAL_NULL(zendlval);
2681 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2682 }
2683
2684 /* Check for ending label on the next line */
2685 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2686 if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
2687 continue;
2688 }
2689
2690 if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
2691 zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
2692 if (PARSER_MODE()) {
2693 RETURN_TOKEN(T_ERROR);
2694 }
2695 }
2696
2697 /* newline before label will be subtracted from returned text, but
2698 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2699 if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
2700 newline = 2; /* Windows newline */
2701 } else {
2702 newline = 1;
2703 }
2704
2705 CG(increment_lineno) = 1; /* For newline before label */
2706
2707 if (SCNG(heredoc_scan_ahead)) {
2708 SCNG(heredoc_indentation) = indentation;
2709 SCNG(heredoc_indentation_uses_spaces) = (spacing == HEREDOC_USING_SPACES);
2710 } else {
2711 YYCURSOR -= indentation;
2712 }
2713
2714 BEGIN(ST_END_HEREDOC);
2715
2716 goto heredoc_scan_done;
2717 }
2718 continue;
2719 case '$':
2720 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2721 break;
2722 }
2723 continue;
2724 case '{':
2725 if (*YYCURSOR == '$') {
2726 break;
2727 }
2728 continue;
2729 case '\\':
2730 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2731 YYCURSOR++;
2732 }
2733 /* fall through */
2734 default:
2735 continue;
2736 }
2737
2738 YYCURSOR--;
2739 break;
2740 }
2741
2742 heredoc_scan_done:
2743
2744 yyleng = YYCURSOR - SCNG(yy_text);
2745 ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
2746
2747 if (!SCNG(heredoc_scan_ahead) && !EG(exception) && PARSER_MODE()) {
2748 zend_bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
2749 zend_string *copy = Z_STR_P(zendlval);
2750
2751 if (!strip_multiline_string_indentation(
2752 zendlval, heredoc_label->indentation, heredoc_label->indentation_uses_spaces,
2753 newline_at_start, newline != 0)) {
2754 RETURN_TOKEN(T_ERROR);
2755 }
2756
2757 if (UNEXPECTED(zend_scan_escape_string(zendlval, ZSTR_VAL(copy), ZSTR_LEN(copy), 0) != SUCCESS)) {
2758 zend_string_efree(copy);
2759 RETURN_TOKEN(T_ERROR);
2760 }
2761
2762 zend_string_efree(copy);
2763 } else {
2764 HANDLE_NEWLINES(yytext, yyleng - newline);
2765 }
2766
2767 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2768 }
2769
2770
2771 <ST_NOWDOC>{ANY_CHAR} {
2772 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2773 int newline = 0, indentation = 0, spacing = -1;
2774
2775 if (YYCURSOR > YYLIMIT) {
2776 RETURN_TOKEN(END);
2777 }
2778
2779 YYCURSOR--;
2780
2781 while (YYCURSOR < YYLIMIT) {
2782 switch (*YYCURSOR++) {
2783 case '\r':
2784 if (*YYCURSOR == '\n') {
2785 YYCURSOR++;
2786 }
2787 /* fall through */
2788 case '\n':
2789 indentation = spacing = 0;
2790
2791 while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
2792 if (*YYCURSOR == '\t') {
2793 spacing |= HEREDOC_USING_TABS;
2794 } else {
2795 spacing |= HEREDOC_USING_SPACES;
2796 }
2797 ++YYCURSOR;
2798 ++indentation;
2799 }
2800
2801 if (YYCURSOR == YYLIMIT) {
2802 yyleng = YYCURSOR - SCNG(yy_text);
2803 HANDLE_NEWLINES(yytext, yyleng);
2804 ZVAL_NULL(zendlval);
2805 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2806 }
2807
2808 /* Check for ending label on the next line */
2809 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2810 if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
2811 continue;
2812 }
2813
2814 if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
2815 zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
2816 if (PARSER_MODE()) {
2817 RETURN_TOKEN(T_ERROR);
2818 }
2819 }
2820
2821 /* newline before label will be subtracted from returned text, but
2822 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2823 if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
2824 newline = 2; /* Windows newline */
2825 } else {
2826 newline = 1;
2827 }
2828
2829 CG(increment_lineno) = 1; /* For newline before label */
2830
2831 YYCURSOR -= indentation;
2832 heredoc_label->indentation = indentation;
2833
2834 BEGIN(ST_END_HEREDOC);
2835
2836 goto nowdoc_scan_done;
2837 }
2838 /* fall through */
2839 default:
2840 continue;
2841 }
2842 }
2843
2844 nowdoc_scan_done:
2845 yyleng = YYCURSOR - SCNG(yy_text);
2846 ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
2847
2848 if (!EG(exception) && spacing != -1 && PARSER_MODE()) {
2849 zend_bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
2850 if (!strip_multiline_string_indentation(
2851 zendlval, indentation, spacing == HEREDOC_USING_SPACES,
2852 newline_at_start, newline != 0)) {
2853 RETURN_TOKEN(T_ERROR);
2854 }
2855 }
2856
2857 HANDLE_NEWLINES(yytext, yyleng - newline);
2858 RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2859 }
2860
2861
2862 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2863 if (YYCURSOR > YYLIMIT) {
2864 RETURN_TOKEN(END);
2865 }
2866
2867 if (!SCNG(heredoc_scan_ahead)) {
2868 zend_error(E_COMPILE_WARNING, "Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2869 }
2870 if (PARSER_MODE()) {
2871 goto restart;
2872 } else {
2873 RETURN_TOKEN(T_BAD_CHARACTER);
2874 }
2875 }
2876
2877 */
2878
2879 emit_token_with_str:
2880 zend_copy_value(zendlval, (yytext + offset), (yyleng - offset));
2881
2882 emit_token_with_val:
2883 if (PARSER_MODE()) {
2884 ZEND_ASSERT(Z_TYPE_P(zendlval) != IS_UNDEF);
2885 elem->ast = zend_ast_create_zval_with_lineno(zendlval, start_line);
2886 }
2887
2888 emit_token:
2889 if (SCNG(on_event)) {
2890 SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
2891 }
2892 return token;
2893
2894 return_whitespace:
2895 HANDLE_NEWLINES(yytext, yyleng);
2896 if (SCNG(on_event)) {
2897 SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context));
2898 }
2899 if (PARSER_MODE()) {
2900 start_line = CG(zend_lineno);
2901 goto restart;
2902 } else {
2903 return T_WHITESPACE;
2904 }
2905
2906 skip_token:
2907 if (SCNG(on_event)) {
2908 SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
2909 }
2910 start_line = CG(zend_lineno);
2911 goto restart;
2912 }
2913