xref: /PHP-7.3/Zend/zend_language_scanner.l (revision e8d36ce7)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Marcus Boerger <helly@php.net>                              |
16    |          Nuno Lopes <nlopess@php.net>                                |
17    |          Scott MacVicar <scottmac@php.net>                           |
18    | Flex version authors:                                                |
19    |          Andi Gutmans <andi@php.net>                                 |
20    |          Zeev Suraski <zeev@php.net>                                 |
21    +----------------------------------------------------------------------+
22 */
23 
24 #if 0
25 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
26 #else
27 # define YYDEBUG(s, c)
28 #endif
29 
30 #include "zend_language_scanner_defs.h"
31 
32 #include <errno.h>
33 #include "zend.h"
34 #ifdef ZEND_WIN32
35 # include <Winuser.h>
36 #endif
37 #include "zend_alloc.h"
38 #include <zend_language_parser.h>
39 #include "zend_compile.h"
40 #include "zend_language_scanner.h"
41 #include "zend_highlight.h"
42 #include "zend_constants.h"
43 #include "zend_variables.h"
44 #include "zend_operators.h"
45 #include "zend_API.h"
46 #include "zend_strtod.h"
47 #include "zend_exceptions.h"
48 #include "zend_virtual_cwd.h"
49 #include "tsrm_config_common.h"
50 
51 #define YYCTYPE   unsigned char
52 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
53 #define YYCURSOR  SCNG(yy_cursor)
54 #define YYLIMIT   SCNG(yy_limit)
55 #define YYMARKER  SCNG(yy_marker)
56 
57 #define YYGETCONDITION()  SCNG(yy_state)
58 #define YYSETCONDITION(s) SCNG(yy_state) = s
59 
60 #define STATE(name)  yyc##name
61 
62 /* emulate flex constructs */
63 #define BEGIN(state) YYSETCONDITION(STATE(state))
64 #define YYSTATE      YYGETCONDITION()
65 #define yytext       ((char*)SCNG(yy_text))
66 #define yyleng       SCNG(yy_leng)
67 #define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
68                           yyleng   = (unsigned int)x; } while(0)
69 #define yymore()     goto yymore_restart
70 
71 /* perform sanity check. If this message is triggered you should
72    increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
73 /*!max:re2c */
74 #if ZEND_MMAP_AHEAD < YYMAXFILL
75 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
76 #endif
77 
78 #ifdef HAVE_STDARG_H
79 # include <stdarg.h>
80 #endif
81 
82 #ifdef HAVE_UNISTD_H
83 # include <unistd.h>
84 #endif
85 
86 /* Globals Macros */
87 #define SCNG	LANG_SCNG
88 #ifdef ZTS
89 ZEND_API ts_rsrc_id language_scanner_globals_id;
90 #else
91 ZEND_API zend_php_scanner_globals language_scanner_globals;
92 #endif
93 
94 #define HANDLE_NEWLINES(s, l)													\
95 do {																			\
96 	char *p = (s), *boundary = p+(l);											\
97 																				\
98 	while (p<boundary) {														\
99 		if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {					\
100 			CG(zend_lineno)++;													\
101 		}																		\
102 		p++;																	\
103 	}																			\
104 } while (0)
105 
106 #define HANDLE_NEWLINE(c) \
107 { \
108 	if (c == '\n' || c == '\r') { \
109 		CG(zend_lineno)++; \
110 	} \
111 }
112 
113 /* To save initial string length after scanning to first variable */
114 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
115 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
116 
117 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
118 #define IS_LABEL_SUCCESSOR(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || ((c) >= '0' && (c) <= '9') || (c) == '_' || (c) >= 0x80)
119 
120 #define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
121 #define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
122 
BEGIN_EXTERN_C()123 BEGIN_EXTERN_C()
124 
125 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
126 {
127 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
128 	ZEND_ASSERT(internal_encoding);
129 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
130 }
131 
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)132 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
133 {
134 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
135 }
136 
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)137 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
138 {
139 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
140 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
141 }
142 
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)143 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
144 {
145 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
146 	ZEND_ASSERT(internal_encoding);
147 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
148 internal_encoding, zend_multibyte_encoding_utf8);
149 }
150 
151 
_yy_push_state(int new_state)152 static void _yy_push_state(int new_state)
153 {
154 	zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
155 	YYSETCONDITION(new_state);
156 }
157 
158 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
159 
yy_pop_state(void)160 static void yy_pop_state(void)
161 {
162 	int *stack_state = zend_stack_top(&SCNG(state_stack));
163 	YYSETCONDITION(*stack_state);
164 	zend_stack_del_top(&SCNG(state_stack));
165 }
166 
yy_scan_buffer(char * str,unsigned int len)167 static void yy_scan_buffer(char *str, unsigned int len)
168 {
169 	YYCURSOR       = (YYCTYPE*)str;
170 	YYLIMIT        = YYCURSOR + len;
171 	if (!SCNG(yy_start)) {
172 		SCNG(yy_start) = YYCURSOR;
173 	}
174 }
175 
startup_scanner(void)176 void startup_scanner(void)
177 {
178 	CG(parse_error) = 0;
179 	CG(doc_comment) = NULL;
180 	CG(extra_fn_flags) = 0;
181 	zend_stack_init(&SCNG(state_stack), sizeof(int));
182 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183 	SCNG(heredoc_scan_ahead) = 0;
184 }
185 
heredoc_label_dtor(zend_heredoc_label * heredoc_label)186 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
187     efree(heredoc_label->label);
188 }
189 
shutdown_scanner(void)190 void shutdown_scanner(void)
191 {
192 	CG(parse_error) = 0;
193 	RESET_DOC_COMMENT();
194 	zend_stack_destroy(&SCNG(state_stack));
195 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
196 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
197 	SCNG(heredoc_scan_ahead) = 0;
198 	SCNG(on_event) = NULL;
199 }
200 
zend_save_lexical_state(zend_lex_state * lex_state)201 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
202 {
203 	lex_state->yy_leng   = SCNG(yy_leng);
204 	lex_state->yy_start  = SCNG(yy_start);
205 	lex_state->yy_text   = SCNG(yy_text);
206 	lex_state->yy_cursor = SCNG(yy_cursor);
207 	lex_state->yy_marker = SCNG(yy_marker);
208 	lex_state->yy_limit  = SCNG(yy_limit);
209 
210 	lex_state->state_stack = SCNG(state_stack);
211 	zend_stack_init(&SCNG(state_stack), sizeof(int));
212 
213 	lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
214 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
215 
216 	lex_state->in = SCNG(yy_in);
217 	lex_state->yy_state = YYSTATE;
218 	lex_state->filename = zend_get_compiled_filename();
219 	lex_state->lineno = CG(zend_lineno);
220 
221 	lex_state->script_org = SCNG(script_org);
222 	lex_state->script_org_size = SCNG(script_org_size);
223 	lex_state->script_filtered = SCNG(script_filtered);
224 	lex_state->script_filtered_size = SCNG(script_filtered_size);
225 	lex_state->input_filter = SCNG(input_filter);
226 	lex_state->output_filter = SCNG(output_filter);
227 	lex_state->script_encoding = SCNG(script_encoding);
228 
229 	lex_state->on_event = SCNG(on_event);
230 	lex_state->on_event_context = SCNG(on_event_context);
231 
232 	lex_state->ast = CG(ast);
233 	lex_state->ast_arena = CG(ast_arena);
234 }
235 
zend_restore_lexical_state(zend_lex_state * lex_state)236 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
237 {
238 	SCNG(yy_leng)   = lex_state->yy_leng;
239 	SCNG(yy_start)  = lex_state->yy_start;
240 	SCNG(yy_text)   = lex_state->yy_text;
241 	SCNG(yy_cursor) = lex_state->yy_cursor;
242 	SCNG(yy_marker) = lex_state->yy_marker;
243 	SCNG(yy_limit)  = lex_state->yy_limit;
244 
245 	zend_stack_destroy(&SCNG(state_stack));
246 	SCNG(state_stack) = lex_state->state_stack;
247 
248 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
249 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
250 	SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
251 
252 	SCNG(yy_in) = lex_state->in;
253 	YYSETCONDITION(lex_state->yy_state);
254 	CG(zend_lineno) = lex_state->lineno;
255 	zend_restore_compiled_filename(lex_state->filename);
256 
257 	if (SCNG(script_filtered)) {
258 		efree(SCNG(script_filtered));
259 		SCNG(script_filtered) = NULL;
260 	}
261 	SCNG(script_org) = lex_state->script_org;
262 	SCNG(script_org_size) = lex_state->script_org_size;
263 	SCNG(script_filtered) = lex_state->script_filtered;
264 	SCNG(script_filtered_size) = lex_state->script_filtered_size;
265 	SCNG(input_filter) = lex_state->input_filter;
266 	SCNG(output_filter) = lex_state->output_filter;
267 	SCNG(script_encoding) = lex_state->script_encoding;
268 
269 	SCNG(on_event) = lex_state->on_event;
270 	SCNG(on_event_context) = lex_state->on_event_context;
271 
272 	CG(ast) = lex_state->ast;
273 	CG(ast_arena) = lex_state->ast_arena;
274 
275 	RESET_DOC_COMMENT();
276 }
277 
zend_destroy_file_handle(zend_file_handle * file_handle)278 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
279 {
280 	zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
281 	/* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
282 	file_handle->opened_path = NULL;
283 	if (file_handle->free_filename) {
284 		file_handle->filename = NULL;
285 	}
286 }
287 
zend_lex_tstring(zval * zv)288 ZEND_API void zend_lex_tstring(zval *zv)
289 {
290 	if (SCNG(on_event)) {
291 		SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
292 	}
293 
294 	ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
295 }
296 
297 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
298 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
299 #define	BOM_UTF16_BE	"\xfe\xff"
300 #define	BOM_UTF16_LE	"\xff\xfe"
301 #define	BOM_UTF8		"\xef\xbb\xbf"
302 
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)303 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
304 {
305 	const unsigned char *p;
306 	int wchar_size = 2;
307 	int le = 0;
308 
309 	/* utf-16 or utf-32? */
310 	p = script;
311 	assert(p >= script);
312 	while ((size_t)(p-script) < script_size) {
313 		p = memchr(p, 0, script_size-(p-script)-2);
314 		if (!p) {
315 			break;
316 		}
317 		if (*(p+1) == '\0' && *(p+2) == '\0') {
318 			wchar_size = 4;
319 			break;
320 		}
321 
322 		/* searching for UTF-32 specific byte orders, so this will do */
323 		p += 4;
324 	}
325 
326 	/* BE or LE? */
327 	p = script;
328 	assert(p >= script);
329 	while ((size_t)(p-script) < script_size) {
330 		if (*p == '\0' && *(p+wchar_size-1) != '\0') {
331 			/* BE */
332 			le = 0;
333 			break;
334 		} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
335 			/* LE* */
336 			le = 1;
337 			break;
338 		}
339 		p += wchar_size;
340 	}
341 
342 	if (wchar_size == 2) {
343 		return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
344 	} else {
345 		return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
346 	}
347 
348 	return NULL;
349 }
350 
zend_multibyte_detect_unicode(void)351 static const zend_encoding* zend_multibyte_detect_unicode(void)
352 {
353 	const zend_encoding *script_encoding = NULL;
354 	int bom_size;
355 	unsigned char *pos1, *pos2;
356 
357 	if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
358 		return NULL;
359 	}
360 
361 	/* check out BOM */
362 	if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
363 		script_encoding = zend_multibyte_encoding_utf32be;
364 		bom_size = sizeof(BOM_UTF32_BE)-1;
365 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
366 		script_encoding = zend_multibyte_encoding_utf32le;
367 		bom_size = sizeof(BOM_UTF32_LE)-1;
368 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
369 		script_encoding = zend_multibyte_encoding_utf16be;
370 		bom_size = sizeof(BOM_UTF16_BE)-1;
371 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
372 		script_encoding = zend_multibyte_encoding_utf16le;
373 		bom_size = sizeof(BOM_UTF16_LE)-1;
374 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
375 		script_encoding = zend_multibyte_encoding_utf8;
376 		bom_size = sizeof(BOM_UTF8)-1;
377 	}
378 
379 	if (script_encoding) {
380 		/* remove BOM */
381 		LANG_SCNG(script_org) += bom_size;
382 		LANG_SCNG(script_org_size) -= bom_size;
383 
384 		return script_encoding;
385 	}
386 
387 	/* script contains NULL bytes -> auto-detection */
388 	if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
389 		/* check if the NULL byte is after the __HALT_COMPILER(); */
390 		pos2 = LANG_SCNG(script_org);
391 
392 		while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
393 			pos2 = memchr(pos2, '_', pos1 - pos2);
394 			if (!pos2) break;
395 			pos2++;
396 			if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
397 				pos2 += sizeof("_HALT_COMPILER")-1;
398 				while (*pos2 == ' '  ||
399 					   *pos2 == '\t' ||
400 					   *pos2 == '\r' ||
401 					   *pos2 == '\n') {
402 					pos2++;
403 				}
404 				if (*pos2 == '(') {
405 					pos2++;
406 					while (*pos2 == ' '  ||
407 						   *pos2 == '\t' ||
408 						   *pos2 == '\r' ||
409 						   *pos2 == '\n') {
410 						pos2++;
411 					}
412 					if (*pos2 == ')') {
413 						pos2++;
414 						while (*pos2 == ' '  ||
415 							   *pos2 == '\t' ||
416 							   *pos2 == '\r' ||
417 							   *pos2 == '\n') {
418 							pos2++;
419 						}
420 						if (*pos2 == ';') {
421 							return NULL;
422 						}
423 					}
424 				}
425 			}
426 		}
427 		/* make best effort if BOM is missing */
428 		return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
429 	}
430 
431 	return NULL;
432 }
433 
zend_multibyte_find_script_encoding(void)434 static const zend_encoding* zend_multibyte_find_script_encoding(void)
435 {
436 	const zend_encoding *script_encoding;
437 
438 	if (CG(detect_unicode)) {
439 		/* check out bom(byte order mark) and see if containing wchars */
440 		script_encoding = zend_multibyte_detect_unicode();
441 		if (script_encoding != NULL) {
442 			/* bom or wchar detection is prior to 'script_encoding' option */
443 			return script_encoding;
444 		}
445 	}
446 
447 	/* if no script_encoding specified, just leave alone */
448 	if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
449 		return NULL;
450 	}
451 
452 	/* if multiple encodings specified, detect automagically */
453 	if (CG(script_encoding_list_size) > 1) {
454 		return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
455 	}
456 
457 	return CG(script_encoding_list)[0];
458 }
459 
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)460 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
461 {
462 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
463 	const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
464 
465 	if (!script_encoding) {
466 		return FAILURE;
467 	}
468 
469 	/* judge input/output filter */
470 	LANG_SCNG(script_encoding) = script_encoding;
471 	LANG_SCNG(input_filter) = NULL;
472 	LANG_SCNG(output_filter) = NULL;
473 
474 	if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
475 		if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
476 			/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
477 			LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
478 			LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
479 		} else {
480 			LANG_SCNG(input_filter) = NULL;
481 			LANG_SCNG(output_filter) = NULL;
482 		}
483 		return SUCCESS;
484 	}
485 
486 	if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
487 		LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
488 		LANG_SCNG(output_filter) = NULL;
489 	} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
490 		LANG_SCNG(input_filter) = NULL;
491 		LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
492 	} else {
493 		/* both script and internal encodings are incompatible w/ flex */
494 		LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
495 		LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
496 	}
497 
498 	return 0;
499 }
500 
open_file_for_scanning(zend_file_handle * file_handle)501 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
502 {
503 	char *buf;
504 	size_t size, offset = 0;
505 	zend_string *compiled_filename;
506 
507 	/* The shebang line was read, get the current position to obtain the buffer start */
508 	if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
509 		if ((offset = ftell(file_handle->handle.fp)) == (size_t)-1) {
510 			offset = 0;
511 		}
512 	}
513 
514 	if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
515 		return FAILURE;
516 	}
517 
518 	zend_llist_add_element(&CG(open_files), file_handle);
519 	if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
520 		zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
521 		size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
522 		fh->handle.stream.handle = (void*)(((char*)fh) + diff);
523 		file_handle->handle.stream.handle = fh->handle.stream.handle;
524 	}
525 
526 	/* Reset the scanner for scanning the new file */
527 	SCNG(yy_in) = file_handle;
528 	SCNG(yy_start) = NULL;
529 
530 	if (size != (size_t)-1) {
531 		if (CG(multibyte)) {
532 			SCNG(script_org) = (unsigned char*)buf;
533 			SCNG(script_org_size) = size;
534 			SCNG(script_filtered) = NULL;
535 
536 			zend_multibyte_set_filter(NULL);
537 
538 			if (SCNG(input_filter)) {
539 				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
540 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
541 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
542 				}
543 				buf = (char*)SCNG(script_filtered);
544 				size = SCNG(script_filtered_size);
545 			}
546 		}
547 		SCNG(yy_start) = (unsigned char *)buf - offset;
548 		yy_scan_buffer(buf, (unsigned int)size);
549 	} else {
550 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
551 	}
552 
553 	BEGIN(INITIAL);
554 
555 	if (file_handle->opened_path) {
556 		compiled_filename = zend_string_copy(file_handle->opened_path);
557 	} else {
558 		compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
559 	}
560 
561 	zend_set_compiled_filename(compiled_filename);
562 	zend_string_release_ex(compiled_filename, 0);
563 
564 	if (CG(start_lineno)) {
565 		CG(zend_lineno) = CG(start_lineno);
566 		CG(start_lineno) = 0;
567 	} else {
568 		CG(zend_lineno) = 1;
569 	}
570 
571 	RESET_DOC_COMMENT();
572 	CG(increment_lineno) = 0;
573 	return SUCCESS;
574 }
END_EXTERN_C()575 END_EXTERN_C()
576 
577 static zend_op_array *zend_compile(int type)
578 {
579 	zend_op_array *op_array = NULL;
580 	zend_bool original_in_compilation = CG(in_compilation);
581 
582 	CG(in_compilation) = 1;
583 	CG(ast) = NULL;
584 	CG(ast_arena) = zend_arena_create(1024 * 32);
585 
586 	if (!zendparse()) {
587 		int last_lineno = CG(zend_lineno);
588 		zend_file_context original_file_context;
589 		zend_oparray_context original_oparray_context;
590 		zend_op_array *original_active_op_array = CG(active_op_array);
591 
592 		op_array = emalloc(sizeof(zend_op_array));
593 		init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
594 		CG(active_op_array) = op_array;
595 
596 		if (zend_ast_process) {
597 			zend_ast_process(CG(ast));
598 		}
599 
600 		zend_file_context_begin(&original_file_context);
601 		zend_oparray_context_begin(&original_oparray_context);
602 		zend_compile_top_stmt(CG(ast));
603 		CG(zend_lineno) = last_lineno;
604 		zend_emit_final_return(type == ZEND_USER_FUNCTION);
605 		op_array->line_start = 1;
606 		op_array->line_end = last_lineno;
607 		pass_two(op_array);
608 		zend_oparray_context_end(&original_oparray_context);
609 		zend_file_context_end(&original_file_context);
610 
611 		CG(active_op_array) = original_active_op_array;
612 	}
613 
614 	zend_ast_destroy(CG(ast));
615 	zend_arena_destroy(CG(ast_arena));
616 
617 	CG(in_compilation) = original_in_compilation;
618 
619 	return op_array;
620 }
621 
compile_file(zend_file_handle * file_handle,int type)622 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
623 {
624 	zend_lex_state original_lex_state;
625 	zend_op_array *op_array = NULL;
626 	zend_save_lexical_state(&original_lex_state);
627 
628 	if (open_file_for_scanning(file_handle)==FAILURE) {
629 		if (type==ZEND_REQUIRE) {
630 			zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
631 			zend_bailout();
632 		} else {
633 			zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
634 		}
635 	} else {
636 		op_array = zend_compile(ZEND_USER_FUNCTION);
637 	}
638 
639 	zend_restore_lexical_state(&original_lex_state);
640 	return op_array;
641 }
642 
643 
compile_filename(int type,zval * filename)644 zend_op_array *compile_filename(int type, zval *filename)
645 {
646 	zend_file_handle file_handle;
647 	zval tmp;
648 	zend_op_array *retval;
649 	zend_string *opened_path = NULL;
650 
651 	if (Z_TYPE_P(filename) != IS_STRING) {
652 		ZVAL_STR(&tmp, zval_get_string(filename));
653 		filename = &tmp;
654 	}
655 	file_handle.filename = Z_STRVAL_P(filename);
656 	file_handle.free_filename = 0;
657 	file_handle.type = ZEND_HANDLE_FILENAME;
658 	file_handle.opened_path = NULL;
659 	file_handle.handle.fp = NULL;
660 
661 	retval = zend_compile_file(&file_handle, type);
662 	if (retval && file_handle.handle.stream.handle) {
663 		if (!file_handle.opened_path) {
664 			file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
665 		}
666 
667 		zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
668 
669 		if (opened_path) {
670 			zend_string_release_ex(opened_path, 0);
671 		}
672 	}
673 	zend_destroy_file_handle(&file_handle);
674 
675 	if (UNEXPECTED(filename == &tmp)) {
676 		zval_ptr_dtor(&tmp);
677 	}
678 	return retval;
679 }
680 
zend_prepare_string_for_scanning(zval * str,char * filename)681 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
682 {
683 	char *buf;
684 	size_t size, old_len;
685 	zend_string *new_compiled_filename;
686 
687 	/* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
688 	old_len = Z_STRLEN_P(str);
689 	Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
690 	Z_TYPE_INFO_P(str) = IS_STRING_EX;
691 	memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
692 
693 	SCNG(yy_in) = NULL;
694 	SCNG(yy_start) = NULL;
695 
696 	buf = Z_STRVAL_P(str);
697 	size = old_len;
698 
699 	if (CG(multibyte)) {
700 		SCNG(script_org) = (unsigned char*)buf;
701 		SCNG(script_org_size) = size;
702 		SCNG(script_filtered) = NULL;
703 
704 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
705 
706 		if (SCNG(input_filter)) {
707 			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
708 				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
709 						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
710 			}
711 			buf = (char*)SCNG(script_filtered);
712 			size = SCNG(script_filtered_size);
713 		}
714 	}
715 
716 	yy_scan_buffer(buf, (unsigned int)size);
717 
718 	new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
719 	zend_set_compiled_filename(new_compiled_filename);
720 	zend_string_release_ex(new_compiled_filename, 0);
721 	CG(zend_lineno) = 1;
722 	CG(increment_lineno) = 0;
723 	RESET_DOC_COMMENT();
724 	return SUCCESS;
725 }
726 
727 
zend_get_scanned_file_offset(void)728 ZEND_API size_t zend_get_scanned_file_offset(void)
729 {
730 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
731 	if (SCNG(input_filter)) {
732 		size_t original_offset = offset, length = 0;
733 		do {
734 			unsigned char *p = NULL;
735 			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
736 				return (size_t)-1;
737 			}
738 			efree(p);
739 			if (length > original_offset) {
740 				offset--;
741 			} else if (length < original_offset) {
742 				offset++;
743 			}
744 		} while (original_offset != length);
745 	}
746 	return offset;
747 }
748 
compile_string(zval * source_string,char * filename)749 zend_op_array *compile_string(zval *source_string, char *filename)
750 {
751 	zend_lex_state original_lex_state;
752 	zend_op_array *op_array = NULL;
753 	zval tmp;
754 
755 	if (UNEXPECTED(Z_TYPE_P(source_string) != IS_STRING)) {
756 		ZVAL_STR(&tmp, zval_get_string_func(source_string));
757 	} else {
758 		ZVAL_COPY(&tmp, source_string);
759 	}
760 
761 	if (Z_STRLEN(tmp)==0) {
762 		zval_ptr_dtor(&tmp);
763 		return NULL;
764 	}
765 
766 	zend_save_lexical_state(&original_lex_state);
767 	if (zend_prepare_string_for_scanning(&tmp, filename) == SUCCESS) {
768 		BEGIN(ST_IN_SCRIPTING);
769 		op_array = zend_compile(ZEND_EVAL_CODE);
770 	}
771 
772 	zend_restore_lexical_state(&original_lex_state);
773 	zval_ptr_dtor(&tmp);
774 
775 	return op_array;
776 }
777 
778 
BEGIN_EXTERN_C()779 BEGIN_EXTERN_C()
780 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
781 {
782 	zend_lex_state original_lex_state;
783 	zend_file_handle file_handle;
784 
785 	file_handle.type = ZEND_HANDLE_FILENAME;
786 	file_handle.filename = filename;
787 	file_handle.free_filename = 0;
788 	file_handle.opened_path = NULL;
789 	zend_save_lexical_state(&original_lex_state);
790 	if (open_file_for_scanning(&file_handle)==FAILURE) {
791 		zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
792 		zend_restore_lexical_state(&original_lex_state);
793 		return FAILURE;
794 	}
795 	zend_highlight(syntax_highlighter_ini);
796 	if (SCNG(script_filtered)) {
797 		efree(SCNG(script_filtered));
798 		SCNG(script_filtered) = NULL;
799 	}
800 	zend_destroy_file_handle(&file_handle);
801 	zend_restore_lexical_state(&original_lex_state);
802 	return SUCCESS;
803 }
804 
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)805 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
806 {
807 	zend_lex_state original_lex_state;
808 	zval tmp;
809 
810 	if (UNEXPECTED(Z_TYPE_P(str) != IS_STRING)) {
811 		ZVAL_STR(&tmp, zval_get_string_func(str));
812 		str = &tmp;
813 	}
814 	zend_save_lexical_state(&original_lex_state);
815 	if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
816 		zend_restore_lexical_state(&original_lex_state);
817 		if (UNEXPECTED(str == &tmp)) {
818 			zval_ptr_dtor(&tmp);
819 		}
820 		return FAILURE;
821 	}
822 	BEGIN(INITIAL);
823 	zend_highlight(syntax_highlighter_ini);
824 	if (SCNG(script_filtered)) {
825 		efree(SCNG(script_filtered));
826 		SCNG(script_filtered) = NULL;
827 	}
828 	zend_restore_lexical_state(&original_lex_state);
829 	if (UNEXPECTED(str == &tmp)) {
830 		zval_ptr_dtor(&tmp);
831 	}
832 	return SUCCESS;
833 }
834 
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)835 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
836 {
837 	size_t length;
838 	unsigned char *new_yy_start;
839 
840 	/* convert and set */
841 	if (!SCNG(input_filter)) {
842 		if (SCNG(script_filtered)) {
843 			efree(SCNG(script_filtered));
844 			SCNG(script_filtered) = NULL;
845 		}
846 		SCNG(script_filtered_size) = 0;
847 		length = SCNG(script_org_size);
848 		new_yy_start = SCNG(script_org);
849 	} else {
850 		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
851 			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
852 					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
853 		}
854 		if (SCNG(script_filtered)) {
855 			efree(SCNG(script_filtered));
856 		}
857 		SCNG(script_filtered) = new_yy_start;
858 		SCNG(script_filtered_size) = length;
859 	}
860 
861 	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
862 	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
863 	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
864 	SCNG(yy_limit) = new_yy_start + length;
865 
866 	SCNG(yy_start) = new_yy_start;
867 }
868 
869 
870 // TODO: avoid reallocation ???
871 # define zend_copy_value(zendlval, yytext, yyleng) \
872 	if (SCNG(output_filter)) { \
873 		size_t sz = 0; \
874 		char *s = NULL; \
875 		SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
876 		ZVAL_STRINGL(zendlval, s, sz); \
877 		efree(s); \
878 	} else if (yyleng == 1) { \
879 		ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \
880 	} else { \
881 		ZVAL_STRINGL(zendlval, yytext, yyleng); \
882 	}
883 
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)884 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
885 {
886 	register char *s, *t;
887 	char *end;
888 
889 	if (len <= 1) {
890 		if (len < 1) {
891 			ZVAL_EMPTY_STRING(zendlval);
892 		} else {
893 			zend_uchar c = (zend_uchar)*str;
894 			if (c == '\n' || c == '\r') {
895 				CG(zend_lineno)++;
896 			}
897 			ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
898 		}
899 		goto skip_escape_conversion;
900 	}
901 
902 	ZVAL_STRINGL(zendlval, str, len);
903 
904 	/* convert escape sequences */
905 	s = Z_STRVAL_P(zendlval);
906 	end = s+Z_STRLEN_P(zendlval);
907 	while (1) {
908 		if (UNEXPECTED(*s=='\\')) {
909 			break;
910 		}
911 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
912 			CG(zend_lineno)++;
913 		}
914 		s++;
915 		if (s == end) {
916 			goto skip_escape_conversion;
917 		}
918 	}
919 
920 	t = s;
921 	while (s<end) {
922 		if (*s=='\\') {
923 			s++;
924 			if (s >= end) {
925 				*t++ = '\\';
926 				break;
927 			}
928 
929 			switch(*s) {
930 				case 'n':
931 					*t++ = '\n';
932 					break;
933 				case 'r':
934 					*t++ = '\r';
935 					break;
936 				case 't':
937 					*t++ = '\t';
938 					break;
939 				case 'f':
940 					*t++ = '\f';
941 					break;
942 				case 'v':
943 					*t++ = '\v';
944 					break;
945 				case 'e':
946 #ifdef ZEND_WIN32
947 					*t++ = VK_ESCAPE;
948 #else
949 					*t++ = '\e';
950 #endif
951 					break;
952 				case '"':
953 				case '`':
954 					if (*s != quote_type) {
955 						*t++ = '\\';
956 						*t++ = *s;
957 						break;
958 					}
959 				case '\\':
960 				case '$':
961 					*t++ = *s;
962 					break;
963 				case 'x':
964 				case 'X':
965 					if (ZEND_IS_HEX(*(s+1))) {
966 						char hex_buf[3] = { 0, 0, 0 };
967 
968 						hex_buf[0] = *(++s);
969 						if (ZEND_IS_HEX(*(s+1))) {
970 							hex_buf[1] = *(++s);
971 						}
972 						*t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
973 					} else {
974 						*t++ = '\\';
975 						*t++ = *s;
976 					}
977 					break;
978 				/* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
979 				case 'u':
980 					{
981 						/* cache where we started so we can parse after validating */
982 						char *start = s + 1;
983 						size_t len = 0;
984 						zend_bool valid = 1;
985 						unsigned long codepoint;
986 
987 						if (*start != '{') {
988 							/* we silently let this pass to avoid breaking code
989 							 * with JSON in string literals (e.g. "\"\u202e\""
990 							 */
991 							*t++ = '\\';
992 							*t++ = 'u';
993 							break;
994 						} else {
995 							/* on the other hand, invalid \u{blah} errors */
996 							s++;
997 							len++;
998 							s++;
999 							while (*s != '}') {
1000 								if (!ZEND_IS_HEX(*s)) {
1001 									valid = 0;
1002 									break;
1003 								} else {
1004 									len++;
1005 								}
1006 								s++;
1007 							}
1008 							if (*s == '}') {
1009 								valid = 1;
1010 								len++;
1011 							}
1012 						}
1013 
1014 						/* \u{} is invalid */
1015 						if (len <= 2) {
1016 							valid = 0;
1017 						}
1018 
1019 						if (!valid) {
1020 							zend_throw_exception(zend_ce_parse_error,
1021 								"Invalid UTF-8 codepoint escape sequence", 0);
1022 							zval_ptr_dtor(zendlval);
1023 							ZVAL_UNDEF(zendlval);
1024 							return FAILURE;
1025 						}
1026 
1027 						errno = 0;
1028 						codepoint = strtoul(start + 1, NULL, 16);
1029 
1030 						/* per RFC 3629, UTF-8 can only represent 21 bits */
1031 						if (codepoint > 0x10FFFF || errno) {
1032 							zend_throw_exception(zend_ce_parse_error,
1033 								"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1034 							zval_ptr_dtor(zendlval);
1035 							ZVAL_UNDEF(zendlval);
1036 							return FAILURE;
1037 						}
1038 
1039 						/* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1040 						if (codepoint < 0x80) {
1041 							*t++ = codepoint;
1042 						} else if (codepoint <= 0x7FF) {
1043 							*t++ = (codepoint >> 6) + 0xC0;
1044 							*t++ = (codepoint & 0x3F) + 0x80;
1045 						} else if (codepoint <= 0xFFFF) {
1046 							*t++ = (codepoint >> 12) + 0xE0;
1047 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1048 							*t++ = (codepoint & 0x3F) + 0x80;
1049 						} else if (codepoint <= 0x10FFFF) {
1050 							*t++ = (codepoint >> 18) + 0xF0;
1051 							*t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1052 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1053 							*t++ = (codepoint & 0x3F) + 0x80;
1054 						}
1055 					}
1056 					break;
1057 				default:
1058 					/* check for an octal */
1059 					if (ZEND_IS_OCT(*s)) {
1060 						char octal_buf[4] = { 0, 0, 0, 0 };
1061 
1062 						octal_buf[0] = *s;
1063 						if (ZEND_IS_OCT(*(s+1))) {
1064 							octal_buf[1] = *(++s);
1065 							if (ZEND_IS_OCT(*(s+1))) {
1066 								octal_buf[2] = *(++s);
1067 							}
1068 						}
1069 						if (octal_buf[2] && (octal_buf[0] > '3') && !SCNG(heredoc_scan_ahead)) {
1070 							/* 3 octit values must not overflow 0xFF (\377) */
1071 							zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
1072 						}
1073 
1074 						*t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1075 					} else {
1076 						*t++ = '\\';
1077 						*t++ = *s;
1078 					}
1079 					break;
1080 			}
1081 		} else {
1082 			*t++ = *s;
1083 		}
1084 
1085 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1086 			CG(zend_lineno)++;
1087 		}
1088 		s++;
1089 	}
1090 	*t = 0;
1091 	Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
1092 
1093 skip_escape_conversion:
1094 	if (SCNG(output_filter)) {
1095 		size_t sz = 0;
1096 		unsigned char *str;
1097 		// TODO: avoid realocation ???
1098 		s = Z_STRVAL_P(zendlval);
1099 		SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1100 		zval_ptr_dtor(zendlval);
1101 		ZVAL_STRINGL(zendlval, (char *) str, sz);
1102 		efree(str);
1103 	}
1104 	return SUCCESS;
1105 }
1106 
1107 #define HEREDOC_USING_SPACES 1
1108 #define HEREDOC_USING_TABS 2
1109 
next_newline(const char * str,const char * end,size_t * newline_len)1110 static const char *next_newline(const char *str, const char *end, size_t *newline_len) {
1111 	for (; str < end; str++) {
1112 		if (*str == '\r') {
1113 			*newline_len = str + 1 < end && *(str + 1) == '\n' ? 2 : 1;
1114 			return str;
1115 		} else if (*str == '\n') {
1116 			*newline_len = 1;
1117 			return str;
1118 		}
1119 	}
1120 	*newline_len = 0;
1121 	return NULL;
1122 }
1123 
strip_multiline_string_indentation(zval * zendlval,int indentation,zend_bool using_spaces,zend_bool newline_at_start,zend_bool newline_at_end)1124 static zend_bool strip_multiline_string_indentation(
1125 	zval *zendlval, int indentation, zend_bool using_spaces,
1126 	zend_bool newline_at_start, zend_bool newline_at_end)
1127 {
1128 	const char *str = Z_STRVAL_P(zendlval), *end = str + Z_STRLEN_P(zendlval);
1129 	char *copy = Z_STRVAL_P(zendlval);
1130 
1131 	int newline_count = 0;
1132 	size_t newline_len;
1133 	const char *nl;
1134 
1135 	if (!newline_at_start) {
1136 		nl = next_newline(str, end, &newline_len);
1137 		if (!nl) {
1138 			return 1;
1139 		}
1140 
1141 		str = nl + newline_len;
1142 		copy = (char *) nl + newline_len;
1143 		newline_count++;
1144 	} else {
1145 		nl = str;
1146 	}
1147 
1148 	/* <= intentional */
1149 	while (str <= end && nl) {
1150 		size_t skip;
1151 		nl = next_newline(str, end, &newline_len);
1152 		if (!nl && newline_at_end) {
1153 			nl = end;
1154 		}
1155 
1156 		/* Try to skip indentation */
1157 		for (skip = 0; skip < indentation; skip++, str++) {
1158 			if (str == nl) {
1159 				/* Don't require full indentation on whitespace-only lines */
1160 				break;
1161 			}
1162 
1163 			if (str == end || (*str != ' ' && *str != '\t')) {
1164 				CG(zend_lineno) += newline_count;
1165 				zend_throw_exception_ex(zend_ce_parse_error, 0,
1166 					"Invalid body indentation level (expecting an indentation level of at least %d)", indentation);
1167 				goto error;
1168 			}
1169 
1170 			if ((!using_spaces && *str == ' ') || (using_spaces && *str == '\t')) {
1171 				CG(zend_lineno) += newline_count;
1172 				zend_throw_exception(zend_ce_parse_error,
1173 					"Invalid indentation - tabs and spaces cannot be mixed", 0);
1174 				goto error;
1175 			}
1176 		}
1177 
1178 		if (str == end) {
1179 			break;
1180 		}
1181 
1182 		size_t len = nl ? (nl - str + newline_len) : (end - str);
1183 		memmove(copy, str, len);
1184 		str += len;
1185 		copy += len;
1186 		newline_count++;
1187 	}
1188 
1189 	*copy = '\0';
1190 	Z_STRLEN_P(zendlval) = copy - Z_STRVAL_P(zendlval);
1191 	return 1;
1192 
1193 error:
1194 	zval_ptr_dtor_str(zendlval);
1195 	ZVAL_UNDEF(zendlval);
1196 
1197 	return 0;
1198 }
1199 
copy_heredoc_label_stack(void * void_heredoc_label)1200 static void copy_heredoc_label_stack(void *void_heredoc_label)
1201 {
1202 	zend_heredoc_label *heredoc_label = void_heredoc_label;
1203 	zend_heredoc_label *new_heredoc_label = emalloc(sizeof(zend_heredoc_label));
1204 
1205 	*new_heredoc_label = *heredoc_label;
1206 	new_heredoc_label->label = estrndup(heredoc_label->label, heredoc_label->length);
1207 
1208 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
1209 }
1210 
1211 #define PARSER_MODE() \
1212 	EXPECTED(elem != NULL)
1213 
1214 #define RETURN_TOKEN(_token) do { \
1215 		token = _token; \
1216 		goto emit_token; \
1217 	} while (0)
1218 
1219 #define RETURN_TOKEN_WITH_VAL(_token) do { \
1220 		token = _token; \
1221 		goto emit_token_with_val; \
1222 	} while (0)
1223 
1224 #define RETURN_TOKEN_WITH_STR(_token, _offset) do { \
1225 		token = _token; \
1226 		offset = _offset; \
1227 		goto emit_token_with_str; \
1228 	} while (0)
1229 
1230 #define SKIP_TOKEN(_token) do { \
1231 		token = _token; \
1232 		goto skip_token; \
1233 	} while (0)
1234 
lex_scan(zval * zendlval,zend_parser_stack_elem * elem)1235 int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
1236 {
1237 int token;
1238 int offset;
1239 int start_line = CG(zend_lineno);
1240 
1241 	ZVAL_UNDEF(zendlval);
1242 restart:
1243 	SCNG(yy_text) = YYCURSOR;
1244 
1245 /*!re2c
1246 re2c:yyfill:check = 0;
1247 LNUM	[0-9]+
1248 DNUM	([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1249 EXPONENT_DNUM	(({LNUM}|{DNUM})[eE][+-]?{LNUM})
1250 HNUM	"0x"[0-9a-fA-F]+
1251 BNUM	"0b"[01]+
1252 LABEL	[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1253 WHITESPACE [ \n\r\t]+
1254 TABS_AND_SPACES [ \t]*
1255 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1256 ANY_CHAR [^]
1257 NEWLINE ("\r"|"\n"|"\r\n")
1258 
1259 /* compute yyleng before each rule */
1260 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1261 
1262 <ST_IN_SCRIPTING>"exit" {
1263 	RETURN_TOKEN(T_EXIT);
1264 }
1265 
1266 <ST_IN_SCRIPTING>"die" {
1267 	RETURN_TOKEN(T_EXIT);
1268 }
1269 
1270 <ST_IN_SCRIPTING>"function" {
1271 	RETURN_TOKEN(T_FUNCTION);
1272 }
1273 
1274 <ST_IN_SCRIPTING>"const" {
1275 	RETURN_TOKEN(T_CONST);
1276 }
1277 
1278 <ST_IN_SCRIPTING>"return" {
1279 	RETURN_TOKEN(T_RETURN);
1280 }
1281 
1282 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1283 	yyless(yyleng - 1);
1284 	HANDLE_NEWLINES(yytext, yyleng);
1285 	RETURN_TOKEN(T_YIELD_FROM);
1286 }
1287 
1288 <ST_IN_SCRIPTING>"yield" {
1289 	RETURN_TOKEN(T_YIELD);
1290 }
1291 
1292 <ST_IN_SCRIPTING>"try" {
1293 	RETURN_TOKEN(T_TRY);
1294 }
1295 
1296 <ST_IN_SCRIPTING>"catch" {
1297 	RETURN_TOKEN(T_CATCH);
1298 }
1299 
1300 <ST_IN_SCRIPTING>"finally" {
1301 	RETURN_TOKEN(T_FINALLY);
1302 }
1303 
1304 <ST_IN_SCRIPTING>"throw" {
1305 	RETURN_TOKEN(T_THROW);
1306 }
1307 
1308 <ST_IN_SCRIPTING>"if" {
1309 	RETURN_TOKEN(T_IF);
1310 }
1311 
1312 <ST_IN_SCRIPTING>"elseif" {
1313 	RETURN_TOKEN(T_ELSEIF);
1314 }
1315 
1316 <ST_IN_SCRIPTING>"endif" {
1317 	RETURN_TOKEN(T_ENDIF);
1318 }
1319 
1320 <ST_IN_SCRIPTING>"else" {
1321 	RETURN_TOKEN(T_ELSE);
1322 }
1323 
1324 <ST_IN_SCRIPTING>"while" {
1325 	RETURN_TOKEN(T_WHILE);
1326 }
1327 
1328 <ST_IN_SCRIPTING>"endwhile" {
1329 	RETURN_TOKEN(T_ENDWHILE);
1330 }
1331 
1332 <ST_IN_SCRIPTING>"do" {
1333 	RETURN_TOKEN(T_DO);
1334 }
1335 
1336 <ST_IN_SCRIPTING>"for" {
1337 	RETURN_TOKEN(T_FOR);
1338 }
1339 
1340 <ST_IN_SCRIPTING>"endfor" {
1341 	RETURN_TOKEN(T_ENDFOR);
1342 }
1343 
1344 <ST_IN_SCRIPTING>"foreach" {
1345 	RETURN_TOKEN(T_FOREACH);
1346 }
1347 
1348 <ST_IN_SCRIPTING>"endforeach" {
1349 	RETURN_TOKEN(T_ENDFOREACH);
1350 }
1351 
1352 <ST_IN_SCRIPTING>"declare" {
1353 	RETURN_TOKEN(T_DECLARE);
1354 }
1355 
1356 <ST_IN_SCRIPTING>"enddeclare" {
1357 	RETURN_TOKEN(T_ENDDECLARE);
1358 }
1359 
1360 <ST_IN_SCRIPTING>"instanceof" {
1361 	RETURN_TOKEN(T_INSTANCEOF);
1362 }
1363 
1364 <ST_IN_SCRIPTING>"as" {
1365 	RETURN_TOKEN(T_AS);
1366 }
1367 
1368 <ST_IN_SCRIPTING>"switch" {
1369 	RETURN_TOKEN(T_SWITCH);
1370 }
1371 
1372 <ST_IN_SCRIPTING>"endswitch" {
1373 	RETURN_TOKEN(T_ENDSWITCH);
1374 }
1375 
1376 <ST_IN_SCRIPTING>"case" {
1377 	RETURN_TOKEN(T_CASE);
1378 }
1379 
1380 <ST_IN_SCRIPTING>"default" {
1381 	RETURN_TOKEN(T_DEFAULT);
1382 }
1383 
1384 <ST_IN_SCRIPTING>"break" {
1385 	RETURN_TOKEN(T_BREAK);
1386 }
1387 
1388 <ST_IN_SCRIPTING>"continue" {
1389 	RETURN_TOKEN(T_CONTINUE);
1390 }
1391 
1392 <ST_IN_SCRIPTING>"goto" {
1393 	RETURN_TOKEN(T_GOTO);
1394 }
1395 
1396 <ST_IN_SCRIPTING>"echo" {
1397 	RETURN_TOKEN(T_ECHO);
1398 }
1399 
1400 <ST_IN_SCRIPTING>"print" {
1401 	RETURN_TOKEN(T_PRINT);
1402 }
1403 
1404 <ST_IN_SCRIPTING>"class" {
1405 	RETURN_TOKEN(T_CLASS);
1406 }
1407 
1408 <ST_IN_SCRIPTING>"interface" {
1409 	RETURN_TOKEN(T_INTERFACE);
1410 }
1411 
1412 <ST_IN_SCRIPTING>"trait" {
1413 	RETURN_TOKEN(T_TRAIT);
1414 }
1415 
1416 <ST_IN_SCRIPTING>"extends" {
1417 	RETURN_TOKEN(T_EXTENDS);
1418 }
1419 
1420 <ST_IN_SCRIPTING>"implements" {
1421 	RETURN_TOKEN(T_IMPLEMENTS);
1422 }
1423 
1424 <ST_IN_SCRIPTING>"->" {
1425 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
1426 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1427 }
1428 
1429 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1430 	goto return_whitespace;
1431 }
1432 
1433 <ST_LOOKING_FOR_PROPERTY>"->" {
1434 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1435 }
1436 
1437 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1438 	yy_pop_state();
1439 	RETURN_TOKEN_WITH_STR(T_STRING, 0);
1440 }
1441 
1442 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1443 	yyless(0);
1444 	yy_pop_state();
1445 	goto restart;
1446 }
1447 
1448 <ST_IN_SCRIPTING>"::" {
1449 	RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1450 }
1451 
1452 <ST_IN_SCRIPTING>"\\" {
1453 	RETURN_TOKEN(T_NS_SEPARATOR);
1454 }
1455 
1456 <ST_IN_SCRIPTING>"..." {
1457 	RETURN_TOKEN(T_ELLIPSIS);
1458 }
1459 
1460 <ST_IN_SCRIPTING>"??" {
1461 	RETURN_TOKEN(T_COALESCE);
1462 }
1463 
1464 <ST_IN_SCRIPTING>"new" {
1465 	RETURN_TOKEN(T_NEW);
1466 }
1467 
1468 <ST_IN_SCRIPTING>"clone" {
1469 	RETURN_TOKEN(T_CLONE);
1470 }
1471 
1472 <ST_IN_SCRIPTING>"var" {
1473 	RETURN_TOKEN(T_VAR);
1474 }
1475 
1476 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1477 	RETURN_TOKEN(T_INT_CAST);
1478 }
1479 
1480 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1481 	RETURN_TOKEN(T_DOUBLE_CAST);
1482 }
1483 
1484 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1485 	RETURN_TOKEN(T_STRING_CAST);
1486 }
1487 
1488 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1489 	RETURN_TOKEN(T_ARRAY_CAST);
1490 }
1491 
1492 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1493 	RETURN_TOKEN(T_OBJECT_CAST);
1494 }
1495 
1496 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1497 	RETURN_TOKEN(T_BOOL_CAST);
1498 }
1499 
1500 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1501 	RETURN_TOKEN(T_UNSET_CAST);
1502 }
1503 
1504 <ST_IN_SCRIPTING>"eval" {
1505 	RETURN_TOKEN(T_EVAL);
1506 }
1507 
1508 <ST_IN_SCRIPTING>"include" {
1509 	RETURN_TOKEN(T_INCLUDE);
1510 }
1511 
1512 <ST_IN_SCRIPTING>"include_once" {
1513 	RETURN_TOKEN(T_INCLUDE_ONCE);
1514 }
1515 
1516 <ST_IN_SCRIPTING>"require" {
1517 	RETURN_TOKEN(T_REQUIRE);
1518 }
1519 
1520 <ST_IN_SCRIPTING>"require_once" {
1521 	RETURN_TOKEN(T_REQUIRE_ONCE);
1522 }
1523 
1524 <ST_IN_SCRIPTING>"namespace" {
1525 	RETURN_TOKEN(T_NAMESPACE);
1526 }
1527 
1528 <ST_IN_SCRIPTING>"use" {
1529 	RETURN_TOKEN(T_USE);
1530 }
1531 
1532 <ST_IN_SCRIPTING>"insteadof" {
1533     RETURN_TOKEN(T_INSTEADOF);
1534 }
1535 
1536 <ST_IN_SCRIPTING>"global" {
1537 	RETURN_TOKEN(T_GLOBAL);
1538 }
1539 
1540 <ST_IN_SCRIPTING>"isset" {
1541 	RETURN_TOKEN(T_ISSET);
1542 }
1543 
1544 <ST_IN_SCRIPTING>"empty" {
1545 	RETURN_TOKEN(T_EMPTY);
1546 }
1547 
1548 <ST_IN_SCRIPTING>"__halt_compiler" {
1549 	RETURN_TOKEN(T_HALT_COMPILER);
1550 }
1551 
1552 <ST_IN_SCRIPTING>"static" {
1553 	RETURN_TOKEN(T_STATIC);
1554 }
1555 
1556 <ST_IN_SCRIPTING>"abstract" {
1557 	RETURN_TOKEN(T_ABSTRACT);
1558 }
1559 
1560 <ST_IN_SCRIPTING>"final" {
1561 	RETURN_TOKEN(T_FINAL);
1562 }
1563 
1564 <ST_IN_SCRIPTING>"private" {
1565 	RETURN_TOKEN(T_PRIVATE);
1566 }
1567 
1568 <ST_IN_SCRIPTING>"protected" {
1569 	RETURN_TOKEN(T_PROTECTED);
1570 }
1571 
1572 <ST_IN_SCRIPTING>"public" {
1573 	RETURN_TOKEN(T_PUBLIC);
1574 }
1575 
1576 <ST_IN_SCRIPTING>"unset" {
1577 	RETURN_TOKEN(T_UNSET);
1578 }
1579 
1580 <ST_IN_SCRIPTING>"=>" {
1581 	RETURN_TOKEN(T_DOUBLE_ARROW);
1582 }
1583 
1584 <ST_IN_SCRIPTING>"list" {
1585 	RETURN_TOKEN(T_LIST);
1586 }
1587 
1588 <ST_IN_SCRIPTING>"array" {
1589 	RETURN_TOKEN(T_ARRAY);
1590 }
1591 
1592 <ST_IN_SCRIPTING>"callable" {
1593 	RETURN_TOKEN(T_CALLABLE);
1594 }
1595 
1596 <ST_IN_SCRIPTING>"++" {
1597 	RETURN_TOKEN(T_INC);
1598 }
1599 
1600 <ST_IN_SCRIPTING>"--" {
1601 	RETURN_TOKEN(T_DEC);
1602 }
1603 
1604 <ST_IN_SCRIPTING>"===" {
1605 	RETURN_TOKEN(T_IS_IDENTICAL);
1606 }
1607 
1608 <ST_IN_SCRIPTING>"!==" {
1609 	RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1610 }
1611 
1612 <ST_IN_SCRIPTING>"==" {
1613 	RETURN_TOKEN(T_IS_EQUAL);
1614 }
1615 
1616 <ST_IN_SCRIPTING>"!="|"<>" {
1617 	RETURN_TOKEN(T_IS_NOT_EQUAL);
1618 }
1619 
1620 <ST_IN_SCRIPTING>"<=>" {
1621 	RETURN_TOKEN(T_SPACESHIP);
1622 }
1623 
1624 <ST_IN_SCRIPTING>"<=" {
1625 	RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1626 }
1627 
1628 <ST_IN_SCRIPTING>">=" {
1629 	RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1630 }
1631 
1632 <ST_IN_SCRIPTING>"+=" {
1633 	RETURN_TOKEN(T_PLUS_EQUAL);
1634 }
1635 
1636 <ST_IN_SCRIPTING>"-=" {
1637 	RETURN_TOKEN(T_MINUS_EQUAL);
1638 }
1639 
1640 <ST_IN_SCRIPTING>"*=" {
1641 	RETURN_TOKEN(T_MUL_EQUAL);
1642 }
1643 
1644 <ST_IN_SCRIPTING>"*\*" {
1645 	RETURN_TOKEN(T_POW);
1646 }
1647 
1648 <ST_IN_SCRIPTING>"*\*=" {
1649 	RETURN_TOKEN(T_POW_EQUAL);
1650 }
1651 
1652 <ST_IN_SCRIPTING>"/=" {
1653 	RETURN_TOKEN(T_DIV_EQUAL);
1654 }
1655 
1656 <ST_IN_SCRIPTING>".=" {
1657 	RETURN_TOKEN(T_CONCAT_EQUAL);
1658 }
1659 
1660 <ST_IN_SCRIPTING>"%=" {
1661 	RETURN_TOKEN(T_MOD_EQUAL);
1662 }
1663 
1664 <ST_IN_SCRIPTING>"<<=" {
1665 	RETURN_TOKEN(T_SL_EQUAL);
1666 }
1667 
1668 <ST_IN_SCRIPTING>">>=" {
1669 	RETURN_TOKEN(T_SR_EQUAL);
1670 }
1671 
1672 <ST_IN_SCRIPTING>"&=" {
1673 	RETURN_TOKEN(T_AND_EQUAL);
1674 }
1675 
1676 <ST_IN_SCRIPTING>"|=" {
1677 	RETURN_TOKEN(T_OR_EQUAL);
1678 }
1679 
1680 <ST_IN_SCRIPTING>"^=" {
1681 	RETURN_TOKEN(T_XOR_EQUAL);
1682 }
1683 
1684 <ST_IN_SCRIPTING>"||" {
1685 	RETURN_TOKEN(T_BOOLEAN_OR);
1686 }
1687 
1688 <ST_IN_SCRIPTING>"&&" {
1689 	RETURN_TOKEN(T_BOOLEAN_AND);
1690 }
1691 
1692 <ST_IN_SCRIPTING>"OR" {
1693 	RETURN_TOKEN(T_LOGICAL_OR);
1694 }
1695 
1696 <ST_IN_SCRIPTING>"AND" {
1697 	RETURN_TOKEN(T_LOGICAL_AND);
1698 }
1699 
1700 <ST_IN_SCRIPTING>"XOR" {
1701 	RETURN_TOKEN(T_LOGICAL_XOR);
1702 }
1703 
1704 <ST_IN_SCRIPTING>"<<" {
1705 	RETURN_TOKEN(T_SL);
1706 }
1707 
1708 <ST_IN_SCRIPTING>">>" {
1709 	RETURN_TOKEN(T_SR);
1710 }
1711 
1712 <ST_IN_SCRIPTING>{TOKENS} {
1713 	RETURN_TOKEN(yytext[0]);
1714 }
1715 
1716 
1717 <ST_IN_SCRIPTING>"{" {
1718 	yy_push_state(ST_IN_SCRIPTING);
1719 	RETURN_TOKEN('{');
1720 }
1721 
1722 
1723 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1724 	yy_push_state(ST_LOOKING_FOR_VARNAME);
1725 	RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1726 }
1727 
1728 
1729 <ST_IN_SCRIPTING>"}" {
1730 	RESET_DOC_COMMENT();
1731 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
1732 		yy_pop_state();
1733 	}
1734 	RETURN_TOKEN('}');
1735 }
1736 
1737 
1738 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1739 	yyless(yyleng - 1);
1740 	yy_pop_state();
1741 	yy_push_state(ST_IN_SCRIPTING);
1742 	RETURN_TOKEN_WITH_STR(T_STRING_VARNAME, 0);
1743 }
1744 
1745 
1746 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1747 	yyless(0);
1748 	yy_pop_state();
1749 	yy_push_state(ST_IN_SCRIPTING);
1750 	goto restart;
1751 }
1752 
1753 <ST_IN_SCRIPTING>{BNUM} {
1754 	char *bin = yytext + 2; /* Skip "0b" */
1755 	int len = yyleng - 2;
1756 	char *end;
1757 
1758 	/* Skip any leading 0s */
1759 	while (*bin == '0') {
1760 		++bin;
1761 		--len;
1762 	}
1763 
1764 	if (len < SIZEOF_ZEND_LONG * 8) {
1765 		if (len == 0) {
1766 			ZVAL_LONG(zendlval, 0);
1767 		} else {
1768 			errno = 0;
1769 			ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1770 			ZEND_ASSERT(!errno && end == yytext + yyleng);
1771 		}
1772 		RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1773 	} else {
1774 		ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1775 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1776 		ZEND_ASSERT(end == yytext + yyleng);
1777 		RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1778 	}
1779 }
1780 
1781 <ST_IN_SCRIPTING>{LNUM} {
1782 	char *end;
1783 	if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1784 		errno = 0;
1785 		/* base must be passed explicitly for correct parse error on Windows */
1786 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
1787 		/* This isn't an assert, we need to ensure 019 isn't valid octal
1788 		 * Because the lexing itself doesn't do that for us
1789 		 */
1790 		if (end != yytext + yyleng) {
1791 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1792 			ZVAL_UNDEF(zendlval);
1793 			if (PARSER_MODE()) {
1794 				RETURN_TOKEN(T_ERROR);
1795 			}
1796 			RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1797 		}
1798 	} else {
1799 		errno = 0;
1800 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1801 		if (errno == ERANGE) { /* Overflow */
1802 			errno = 0;
1803 			if (yytext[0] == '0') { /* octal overflow */
1804 				ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1805 			} else {
1806 				ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1807 			}
1808 			/* Also not an assert for the same reason */
1809 			if (end != yytext + yyleng) {
1810 				zend_throw_exception(zend_ce_parse_error,
1811 					"Invalid numeric literal", 0);
1812 				ZVAL_UNDEF(zendlval);
1813 				if (PARSER_MODE()) {
1814 					RETURN_TOKEN(T_ERROR);
1815 				}
1816 			}
1817 			RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1818 		}
1819 		/* Also not an assert for the same reason */
1820 		if (end != yytext + yyleng) {
1821 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1822 			ZVAL_UNDEF(zendlval);
1823 			if (PARSER_MODE()) {
1824 				RETURN_TOKEN(T_ERROR);
1825 			}
1826 			RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1827 		}
1828 	}
1829 	ZEND_ASSERT(!errno);
1830 	RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1831 }
1832 
1833 <ST_IN_SCRIPTING>{HNUM} {
1834 	char *hex = yytext + 2; /* Skip "0x" */
1835 	int len = yyleng - 2;
1836 	char *end;
1837 
1838 	/* Skip any leading 0s */
1839 	while (*hex == '0') {
1840 		hex++;
1841 		len--;
1842 	}
1843 
1844 	if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1845 		if (len == 0) {
1846 			ZVAL_LONG(zendlval, 0);
1847 		} else {
1848 			errno = 0;
1849 			ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1850 			ZEND_ASSERT(!errno && end == hex + len);
1851 		}
1852 		RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1853 	} else {
1854 		ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1855 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1856 		ZEND_ASSERT(end == hex + len);
1857 		RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1858 	}
1859 }
1860 
1861 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1862 	if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1863 		char *end;
1864 		errno = 0;
1865 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1866 		if (errno == ERANGE) {
1867 			goto string;
1868 		}
1869 		ZEND_ASSERT(end == yytext + yyleng);
1870 	} else {
1871 string:
1872 		ZVAL_STRINGL(zendlval, yytext, yyleng);
1873 	}
1874 	RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
1875 }
1876 
1877 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1878 	if (yyleng == 1) {
1879 		ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext)));
1880 	} else {
1881 		ZVAL_STRINGL(zendlval, yytext, yyleng);
1882 	}
1883 	RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
1884 }
1885 
1886 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1887 	const char *end;
1888 
1889 	ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1890 	/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1891 	ZEND_ASSERT(end == yytext + yyleng);
1892 	RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1893 }
1894 
1895 <ST_IN_SCRIPTING>"__CLASS__" {
1896 	RETURN_TOKEN(T_CLASS_C);
1897 }
1898 
1899 <ST_IN_SCRIPTING>"__TRAIT__" {
1900 	RETURN_TOKEN(T_TRAIT_C);
1901 }
1902 
1903 <ST_IN_SCRIPTING>"__FUNCTION__" {
1904 	RETURN_TOKEN(T_FUNC_C);
1905 }
1906 
1907 <ST_IN_SCRIPTING>"__METHOD__" {
1908 	RETURN_TOKEN(T_METHOD_C);
1909 }
1910 
1911 <ST_IN_SCRIPTING>"__LINE__" {
1912 	RETURN_TOKEN(T_LINE);
1913 }
1914 
1915 <ST_IN_SCRIPTING>"__FILE__" {
1916 	RETURN_TOKEN(T_FILE);
1917 }
1918 
1919 <ST_IN_SCRIPTING>"__DIR__" {
1920 	RETURN_TOKEN(T_DIR);
1921 }
1922 
1923 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1924 	RETURN_TOKEN(T_NS_C);
1925 }
1926 
1927 
1928 <INITIAL>"<?=" {
1929 	BEGIN(ST_IN_SCRIPTING);
1930 	if (PARSER_MODE()) {
1931 		RETURN_TOKEN(T_ECHO);
1932 	}
1933 	RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1934 }
1935 
1936 
1937 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1938 	HANDLE_NEWLINE(yytext[yyleng-1]);
1939 	BEGIN(ST_IN_SCRIPTING);
1940 	if (PARSER_MODE()) {
1941 		SKIP_TOKEN(T_OPEN_TAG);
1942 	}
1943 	RETURN_TOKEN(T_OPEN_TAG);
1944 }
1945 
1946 
1947 <INITIAL>"<?" {
1948 	if (CG(short_tags)) {
1949 		BEGIN(ST_IN_SCRIPTING);
1950 		if (PARSER_MODE()) {
1951 			SKIP_TOKEN(T_OPEN_TAG);
1952 		}
1953 		RETURN_TOKEN(T_OPEN_TAG);
1954 	} else {
1955 		goto inline_char_handler;
1956 	}
1957 }
1958 
1959 <INITIAL>{ANY_CHAR} {
1960 	if (YYCURSOR > YYLIMIT) {
1961 		RETURN_TOKEN(END);
1962 	}
1963 
1964 inline_char_handler:
1965 
1966 	while (1) {
1967 		YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1968 
1969 		YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1970 
1971 		if (YYCURSOR >= YYLIMIT) {
1972 			break;
1973 		}
1974 
1975 		if (*YYCURSOR == '?') {
1976 			if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1977 
1978 				YYCURSOR--;
1979 				break;
1980 			}
1981 		}
1982 	}
1983 
1984 	yyleng = YYCURSOR - SCNG(yy_text);
1985 
1986 	if (SCNG(output_filter)) {
1987 		size_t readsize;
1988 		char *s = NULL;
1989 		size_t sz = 0;
1990 		// TODO: avoid reallocation ???
1991 		readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1992 		ZVAL_STRINGL(zendlval, s, sz);
1993 		efree(s);
1994 		if (readsize < yyleng) {
1995 			yyless(readsize);
1996 		}
1997 	} else if (yyleng == 1) {
1998 		ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext));
1999 	} else {
2000 		ZVAL_STRINGL(zendlval, yytext, yyleng);
2001 	}
2002 	HANDLE_NEWLINES(yytext, yyleng);
2003 	RETURN_TOKEN_WITH_VAL(T_INLINE_HTML);
2004 }
2005 
2006 
2007 /* Make sure a label character follows "->", otherwise there is no property
2008  * and "->" will be taken literally
2009  */
2010 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
2011 	yyless(yyleng - 3);
2012 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
2013 	RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
2014 }
2015 
2016 /* A [ always designates a variable offset, regardless of what follows
2017  */
2018 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
2019 	yyless(yyleng - 1);
2020 	yy_push_state(ST_VAR_OFFSET);
2021 	RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
2022 }
2023 
2024 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
2025 	RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
2026 }
2027 
2028 <ST_VAR_OFFSET>"]" {
2029 	yy_pop_state();
2030 	RETURN_TOKEN(']');
2031 }
2032 
2033 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
2034 	/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
2035 	RETURN_TOKEN(yytext[0]);
2036 }
2037 
2038 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
2039 	/* Invalid rule to return a more explicit parse error with proper line number */
2040 	yyless(0);
2041 	yy_pop_state();
2042 	ZVAL_NULL(zendlval);
2043 	RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2044 }
2045 
2046 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
2047 	RETURN_TOKEN_WITH_STR(T_STRING, 0);
2048 }
2049 
2050 
2051 <ST_IN_SCRIPTING>"#"|"//" {
2052 	while (YYCURSOR < YYLIMIT) {
2053 		switch (*YYCURSOR++) {
2054 			case '\r':
2055 				if (*YYCURSOR == '\n') {
2056 					YYCURSOR++;
2057 				}
2058 				/* fall through */
2059 			case '\n':
2060 				CG(zend_lineno)++;
2061 				break;
2062 			case '?':
2063 				if (*YYCURSOR == '>') {
2064 					YYCURSOR--;
2065 					break;
2066 				}
2067 				/* fall through */
2068 			default:
2069 				continue;
2070 		}
2071 
2072 		break;
2073 	}
2074 
2075 	yyleng = YYCURSOR - SCNG(yy_text);
2076 
2077 	if (PARSER_MODE()) {
2078 		SKIP_TOKEN(T_COMMENT);
2079 	}
2080 	RETURN_TOKEN(T_COMMENT);
2081 }
2082 
2083 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
2084 	int doc_com;
2085 
2086 	if (yyleng > 2) {
2087 		doc_com = 1;
2088 		RESET_DOC_COMMENT();
2089 	} else {
2090 		doc_com = 0;
2091 	}
2092 
2093 	while (YYCURSOR < YYLIMIT) {
2094 		if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
2095 			break;
2096 		}
2097 	}
2098 
2099 	if (YYCURSOR < YYLIMIT) {
2100 		YYCURSOR++;
2101 	} else if (!SCNG(heredoc_scan_ahead)) {
2102 		zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
2103 	}
2104 
2105 	yyleng = YYCURSOR - SCNG(yy_text);
2106 	HANDLE_NEWLINES(yytext, yyleng);
2107 
2108 	if (doc_com) {
2109 		CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
2110 		if (PARSER_MODE()) {
2111 			SKIP_TOKEN(T_DOC_COMMENT);
2112 		}
2113 		RETURN_TOKEN(T_DOC_COMMENT);
2114 	}
2115 
2116 	if (PARSER_MODE()) {
2117 		SKIP_TOKEN(T_COMMENT);
2118 	}
2119 	RETURN_TOKEN(T_COMMENT);
2120 }
2121 
2122 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
2123 	BEGIN(INITIAL);
2124 	if (yytext[yyleng-1] != '>') {
2125 		CG(increment_lineno) = 1;
2126 	}
2127 	if (PARSER_MODE()) {
2128 		RETURN_TOKEN(';');  /* implicit ';' at php-end tag */
2129 	}
2130 	RETURN_TOKEN(T_CLOSE_TAG);
2131 }
2132 
2133 
2134 <ST_IN_SCRIPTING>b?['] {
2135 	register char *s, *t;
2136 	char *end;
2137 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
2138 
2139 	while (1) {
2140 		if (YYCURSOR < YYLIMIT) {
2141 			if (*YYCURSOR == '\'') {
2142 				YYCURSOR++;
2143 				yyleng = YYCURSOR - SCNG(yy_text);
2144 
2145 				break;
2146 			} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2147 				YYCURSOR++;
2148 			}
2149 		} else {
2150 			yyleng = YYLIMIT - SCNG(yy_text);
2151 
2152 			/* Unclosed single quotes; treat similar to double quotes, but without a separate token
2153 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2154 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
2155 			ZVAL_NULL(zendlval);
2156 			RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2157 		}
2158 	}
2159 
2160 	if (yyleng-bprefix-2 <= 1) {
2161 		if (yyleng-bprefix-2 < 1) {
2162 			ZVAL_EMPTY_STRING(zendlval);
2163 		} else {
2164 			zend_uchar c = (zend_uchar)*(yytext+bprefix+1);
2165 			if (c == '\n' || c == '\r') {
2166 				CG(zend_lineno)++;
2167 			}
2168 			ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
2169 		}
2170 		goto skip_escape_conversion;
2171 	}
2172 	ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
2173 
2174 	/* convert escape sequences */
2175 	s = Z_STRVAL_P(zendlval);
2176 	end = s+Z_STRLEN_P(zendlval);
2177 	while (1) {
2178 		if (UNEXPECTED(*s=='\\')) {
2179 			break;
2180 		}
2181 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2182 			CG(zend_lineno)++;
2183 		}
2184 		s++;
2185 		if (s == end) {
2186 			goto skip_escape_conversion;
2187 		}
2188 	}
2189 
2190 	t = s;
2191 	while (s<end) {
2192 		if (*s=='\\') {
2193 			s++;
2194 			if (*s == '\\' || *s == '\'') {
2195 				*t++ = *s;
2196 			} else {
2197 				*t++ = '\\';
2198 				*t++ = *s;
2199 			}
2200 		} else {
2201 			*t++ = *s;
2202 		}
2203 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2204 			CG(zend_lineno)++;
2205 		}
2206 		s++;
2207 	}
2208 	*t = 0;
2209 	Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
2210 
2211 skip_escape_conversion:
2212 	if (SCNG(output_filter)) {
2213 		size_t sz = 0;
2214 		char *str = NULL;
2215 		s = Z_STRVAL_P(zendlval);
2216 		// TODO: avoid reallocation ???
2217 		SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2218 		ZVAL_STRINGL(zendlval, str, sz);
2219 	}
2220 	RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
2221 }
2222 
2223 
2224 <ST_IN_SCRIPTING>b?["] {
2225 	int bprefix = (yytext[0] != '"') ? 1 : 0;
2226 
2227 	while (YYCURSOR < YYLIMIT) {
2228 		switch (*YYCURSOR++) {
2229 			case '"':
2230 				yyleng = YYCURSOR - SCNG(yy_text);
2231 				if (EXPECTED(zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == SUCCESS)
2232 				 || !PARSER_MODE()) {
2233 					RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
2234 				} else {
2235 					RETURN_TOKEN(T_ERROR);
2236 				}
2237 			case '$':
2238 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2239 					break;
2240 				}
2241 				continue;
2242 			case '{':
2243 				if (*YYCURSOR == '$') {
2244 					break;
2245 				}
2246 				continue;
2247 			case '\\':
2248 				if (YYCURSOR < YYLIMIT) {
2249 					YYCURSOR++;
2250 				}
2251 				/* fall through */
2252 			default:
2253 				continue;
2254 		}
2255 
2256 		YYCURSOR--;
2257 		break;
2258 	}
2259 
2260 	/* Remember how much was scanned to save rescanning */
2261 	SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2262 
2263 	YYCURSOR = SCNG(yy_text) + yyleng;
2264 
2265 	BEGIN(ST_DOUBLE_QUOTES);
2266 	RETURN_TOKEN('"');
2267 }
2268 
2269 
2270 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2271 	char *s;
2272 	unsigned char *saved_cursor;
2273 	int bprefix = (yytext[0] != '<') ? 1 : 0, spacing = 0, indentation = 0;
2274 	zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2275 	zend_bool is_heredoc = 1;
2276 
2277 	CG(zend_lineno)++;
2278 	heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2279 	s = yytext+bprefix+3;
2280 	while ((*s == ' ') || (*s == '\t')) {
2281 		s++;
2282 		heredoc_label->length--;
2283 	}
2284 
2285 	if (*s == '\'') {
2286 		s++;
2287 		heredoc_label->length -= 2;
2288 		is_heredoc = 0;
2289 
2290 		BEGIN(ST_NOWDOC);
2291 	} else {
2292 		if (*s == '"') {
2293 			s++;
2294 			heredoc_label->length -= 2;
2295 		}
2296 
2297 		BEGIN(ST_HEREDOC);
2298 	}
2299 
2300 	heredoc_label->label = estrndup(s, heredoc_label->length);
2301 	heredoc_label->indentation = 0;
2302 	saved_cursor = YYCURSOR;
2303 
2304 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2305 
2306 	while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
2307 		if (*YYCURSOR == '\t') {
2308 			spacing |= HEREDOC_USING_TABS;
2309 		} else {
2310 			spacing |= HEREDOC_USING_SPACES;
2311 		}
2312 		++YYCURSOR;
2313 		++indentation;
2314 	}
2315 
2316 	if (YYCURSOR == YYLIMIT) {
2317 		YYCURSOR = saved_cursor;
2318 		RETURN_TOKEN(T_START_HEREDOC);
2319 	}
2320 
2321 	/* Check for ending label on the next line */
2322 	if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2323 		if (!IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
2324 			if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
2325 				zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
2326 				if (PARSER_MODE()) {
2327 					RETURN_TOKEN(T_ERROR);
2328 				}
2329 			}
2330 
2331 			YYCURSOR = saved_cursor;
2332 			heredoc_label->indentation = indentation;
2333 
2334 			BEGIN(ST_END_HEREDOC);
2335 			RETURN_TOKEN(T_START_HEREDOC);
2336 		}
2337 	}
2338 
2339 	YYCURSOR = saved_cursor;
2340 
2341 	if (is_heredoc && !SCNG(heredoc_scan_ahead)) {
2342 		zend_lex_state current_state;
2343 		zend_string *saved_doc_comment = CG(doc_comment);
2344 		int heredoc_nesting_level = 1;
2345 		int first_token = 0;
2346 		int error = 0;
2347 
2348 		zend_save_lexical_state(&current_state);
2349 
2350 		SCNG(heredoc_scan_ahead) = 1;
2351 		SCNG(heredoc_indentation) = 0;
2352 		SCNG(heredoc_indentation_uses_spaces) = 0;
2353 		LANG_SCNG(on_event) = NULL;
2354 		CG(doc_comment) = NULL;
2355 
2356 		zend_ptr_stack_reverse_apply(&current_state.heredoc_label_stack, copy_heredoc_label_stack);
2357 
2358 		zend_exception_save();
2359 		while (heredoc_nesting_level) {
2360 			zval zv;
2361 			int retval;
2362 
2363 			ZVAL_UNDEF(&zv);
2364 			retval = lex_scan(&zv, NULL);
2365 			zval_ptr_dtor_nogc(&zv);
2366 
2367 			if (EG(exception)) {
2368 				zend_clear_exception();
2369 				break;
2370 			}
2371 
2372 			if (!first_token) {
2373 				first_token = retval;
2374 			}
2375 
2376 			switch (retval) {
2377 				case T_START_HEREDOC:
2378 					++heredoc_nesting_level;
2379 					break;
2380 				case T_END_HEREDOC:
2381 					--heredoc_nesting_level;
2382 					break;
2383 				case END:
2384 					heredoc_nesting_level = 0;
2385 			}
2386 		}
2387 		zend_exception_restore();
2388 
2389 		if (
2390 		    (first_token == T_VARIABLE
2391 		     || first_token == T_DOLLAR_OPEN_CURLY_BRACES
2392 		     || first_token == T_CURLY_OPEN
2393 		    ) && SCNG(heredoc_indentation)) {
2394 			zend_throw_exception_ex(zend_ce_parse_error, 0, "Invalid body indentation level (expecting an indentation level of at least %d)", SCNG(heredoc_indentation));
2395 			error = 1;
2396 		}
2397 
2398 		heredoc_label->indentation = SCNG(heredoc_indentation);
2399 		heredoc_label->indentation_uses_spaces = SCNG(heredoc_indentation_uses_spaces);
2400 
2401 		zend_restore_lexical_state(&current_state);
2402 		SCNG(heredoc_scan_ahead) = 0;
2403 		CG(increment_lineno) = 0;
2404 		CG(doc_comment) = saved_doc_comment;
2405 
2406 		if (PARSER_MODE() && error) {
2407 			RETURN_TOKEN(T_ERROR);
2408 		}
2409 	}
2410 
2411 	RETURN_TOKEN(T_START_HEREDOC);
2412 }
2413 
2414 
2415 <ST_IN_SCRIPTING>[`] {
2416 	BEGIN(ST_BACKQUOTE);
2417 	RETURN_TOKEN('`');
2418 }
2419 
2420 
2421 <ST_END_HEREDOC>{ANY_CHAR} {
2422 	zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2423 
2424 	yyleng = heredoc_label->indentation + heredoc_label->length;
2425 	YYCURSOR += yyleng - 1;
2426 
2427 	heredoc_label_dtor(heredoc_label);
2428 	efree(heredoc_label);
2429 
2430 	BEGIN(ST_IN_SCRIPTING);
2431 	RETURN_TOKEN(T_END_HEREDOC);
2432 }
2433 
2434 
2435 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2436 	yy_push_state(ST_IN_SCRIPTING);
2437 	yyless(1);
2438 	RETURN_TOKEN(T_CURLY_OPEN);
2439 }
2440 
2441 
2442 <ST_DOUBLE_QUOTES>["] {
2443 	BEGIN(ST_IN_SCRIPTING);
2444 	RETURN_TOKEN('"');
2445 }
2446 
2447 <ST_BACKQUOTE>[`] {
2448 	BEGIN(ST_IN_SCRIPTING);
2449 	RETURN_TOKEN('`');
2450 }
2451 
2452 
2453 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2454 	if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2455 		YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2456 		SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2457 
2458 		goto double_quotes_scan_done;
2459 	}
2460 
2461 	if (YYCURSOR > YYLIMIT) {
2462 		RETURN_TOKEN(END);
2463 	}
2464 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2465 		YYCURSOR++;
2466 	}
2467 
2468 	while (YYCURSOR < YYLIMIT) {
2469 		switch (*YYCURSOR++) {
2470 			case '"':
2471 				break;
2472 			case '$':
2473 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2474 					break;
2475 				}
2476 				continue;
2477 			case '{':
2478 				if (*YYCURSOR == '$') {
2479 					break;
2480 				}
2481 				continue;
2482 			case '\\':
2483 				if (YYCURSOR < YYLIMIT) {
2484 					YYCURSOR++;
2485 				}
2486 				/* fall through */
2487 			default:
2488 				continue;
2489 		}
2490 
2491 		YYCURSOR--;
2492 		break;
2493 	}
2494 
2495 double_quotes_scan_done:
2496 	yyleng = YYCURSOR - SCNG(yy_text);
2497 
2498 	if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '"') == SUCCESS)
2499 	 || !PARSER_MODE()) {
2500 		RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2501 	} else {
2502 		RETURN_TOKEN(T_ERROR);
2503 	}
2504 }
2505 
2506 
2507 <ST_BACKQUOTE>{ANY_CHAR} {
2508 	if (YYCURSOR > YYLIMIT) {
2509 		RETURN_TOKEN(END);
2510 	}
2511 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2512 		YYCURSOR++;
2513 	}
2514 
2515 	while (YYCURSOR < YYLIMIT) {
2516 		switch (*YYCURSOR++) {
2517 			case '`':
2518 				break;
2519 			case '$':
2520 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2521 					break;
2522 				}
2523 				continue;
2524 			case '{':
2525 				if (*YYCURSOR == '$') {
2526 					break;
2527 				}
2528 				continue;
2529 			case '\\':
2530 				if (YYCURSOR < YYLIMIT) {
2531 					YYCURSOR++;
2532 				}
2533 				/* fall through */
2534 			default:
2535 				continue;
2536 		}
2537 
2538 		YYCURSOR--;
2539 		break;
2540 	}
2541 
2542 	yyleng = YYCURSOR - SCNG(yy_text);
2543 
2544 	if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '`') == SUCCESS)
2545 	 || !PARSER_MODE()) {
2546 		RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2547 	} else {
2548 		RETURN_TOKEN(T_ERROR);
2549 	}
2550 }
2551 
2552 
2553 <ST_HEREDOC>{ANY_CHAR} {
2554 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2555 	int newline = 0, indentation = 0, spacing = 0;
2556 
2557 	if (YYCURSOR > YYLIMIT) {
2558 		RETURN_TOKEN(END);
2559 	}
2560 
2561 	YYCURSOR--;
2562 
2563 	while (YYCURSOR < YYLIMIT) {
2564 		switch (*YYCURSOR++) {
2565 			case '\r':
2566 				if (*YYCURSOR == '\n') {
2567 					YYCURSOR++;
2568 				}
2569 				/* fall through */
2570 			case '\n':
2571 				indentation = spacing = 0;
2572 
2573 				while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
2574 					if (*YYCURSOR == '\t') {
2575 						spacing |= HEREDOC_USING_TABS;
2576 					} else {
2577 						spacing |= HEREDOC_USING_SPACES;
2578 					}
2579 					++YYCURSOR;
2580 					++indentation;
2581 				}
2582 
2583 				if (YYCURSOR == YYLIMIT) {
2584 					yyleng = YYCURSOR - SCNG(yy_text);
2585 					HANDLE_NEWLINES(yytext, yyleng);
2586 					ZVAL_NULL(zendlval);
2587 					RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2588 				}
2589 
2590 				/* Check for ending label on the next line */
2591 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2592 					if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
2593 						continue;
2594 					}
2595 
2596 					if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
2597 						zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
2598 						if (PARSER_MODE()) {
2599 							RETURN_TOKEN(T_ERROR);
2600 						}
2601 					}
2602 
2603 					/* newline before label will be subtracted from returned text, but
2604 					 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2605 					if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
2606 						newline = 2; /* Windows newline */
2607 					} else {
2608 						newline = 1;
2609 					}
2610 
2611 					CG(increment_lineno) = 1; /* For newline before label */
2612 
2613 					if (SCNG(heredoc_scan_ahead)) {
2614 						SCNG(heredoc_indentation) = indentation;
2615 						SCNG(heredoc_indentation_uses_spaces) = (spacing == HEREDOC_USING_SPACES);
2616 					} else {
2617 						YYCURSOR -= indentation;
2618 					}
2619 
2620 					BEGIN(ST_END_HEREDOC);
2621 
2622 					goto heredoc_scan_done;
2623 				}
2624 				continue;
2625 			case '$':
2626 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2627 					break;
2628 				}
2629 				continue;
2630 			case '{':
2631 				if (*YYCURSOR == '$') {
2632 					break;
2633 				}
2634 				continue;
2635 			case '\\':
2636 				if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2637 					YYCURSOR++;
2638 				}
2639 				/* fall through */
2640 			default:
2641 				continue;
2642 		}
2643 
2644 		YYCURSOR--;
2645 		break;
2646 	}
2647 
2648 heredoc_scan_done:
2649 
2650 	yyleng = YYCURSOR - SCNG(yy_text);
2651 	ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
2652 
2653 	if (!SCNG(heredoc_scan_ahead) && !EG(exception) && PARSER_MODE()) {
2654 		zend_bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
2655 		zend_string *copy = Z_STR_P(zendlval);
2656 
2657 		if (!strip_multiline_string_indentation(
2658 				zendlval, heredoc_label->indentation, heredoc_label->indentation_uses_spaces,
2659 				newline_at_start, newline != 0)) {
2660 			RETURN_TOKEN(T_ERROR);
2661 		}
2662 
2663 		if (UNEXPECTED(zend_scan_escape_string(zendlval, ZSTR_VAL(copy), ZSTR_LEN(copy), 0) != SUCCESS)) {
2664 			zend_string_efree(copy);
2665 			RETURN_TOKEN(T_ERROR);
2666 		}
2667 
2668 		zend_string_efree(copy);
2669 	} else {
2670 		HANDLE_NEWLINES(yytext, yyleng - newline);
2671 	}
2672 
2673 	RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2674 }
2675 
2676 
2677 <ST_NOWDOC>{ANY_CHAR} {
2678 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2679 	int newline = 0, indentation = 0, spacing = -1;
2680 
2681 	if (YYCURSOR > YYLIMIT) {
2682 		RETURN_TOKEN(END);
2683 	}
2684 
2685 	YYCURSOR--;
2686 
2687 	while (YYCURSOR < YYLIMIT) {
2688 		switch (*YYCURSOR++) {
2689 			case '\r':
2690 				if (*YYCURSOR == '\n') {
2691 					YYCURSOR++;
2692 				}
2693 				/* fall through */
2694 			case '\n':
2695 				indentation = spacing = 0;
2696 
2697 				while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
2698 					if (*YYCURSOR == '\t') {
2699 						spacing |= HEREDOC_USING_TABS;
2700 					} else {
2701 						spacing |= HEREDOC_USING_SPACES;
2702 					}
2703 					++YYCURSOR;
2704 					++indentation;
2705 				}
2706 
2707 				if (YYCURSOR == YYLIMIT) {
2708 					yyleng = YYCURSOR - SCNG(yy_text);
2709 					HANDLE_NEWLINES(yytext, yyleng);
2710 					ZVAL_NULL(zendlval);
2711 					RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2712 				}
2713 
2714 				/* Check for ending label on the next line */
2715 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2716 					if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
2717 						continue;
2718 					}
2719 
2720 					if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
2721 						zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
2722 						if (PARSER_MODE()) {
2723 							RETURN_TOKEN(T_ERROR);
2724 						}
2725 					}
2726 
2727 					/* newline before label will be subtracted from returned text, but
2728 					 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2729 					if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
2730 						newline = 2; /* Windows newline */
2731 					} else {
2732 						newline = 1;
2733 					}
2734 
2735 					CG(increment_lineno) = 1; /* For newline before label */
2736 
2737 					YYCURSOR -= indentation;
2738 					heredoc_label->indentation = indentation;
2739 
2740 					BEGIN(ST_END_HEREDOC);
2741 
2742 					goto nowdoc_scan_done;
2743 				}
2744 				/* fall through */
2745 			default:
2746 				continue;
2747 		}
2748 	}
2749 
2750 nowdoc_scan_done:
2751 	yyleng = YYCURSOR - SCNG(yy_text);
2752 	ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
2753 
2754 	if (!EG(exception) && spacing != -1 && PARSER_MODE()) {
2755 		zend_bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
2756 		if (!strip_multiline_string_indentation(
2757 				zendlval, indentation, spacing == HEREDOC_USING_SPACES,
2758 				newline_at_start, newline != 0)) {
2759 			RETURN_TOKEN(T_ERROR);
2760 		}
2761 	}
2762 
2763 	HANDLE_NEWLINES(yytext, yyleng - newline);
2764 	RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
2765 }
2766 
2767 
2768 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2769 	if (YYCURSOR > YYLIMIT) {
2770 		RETURN_TOKEN(END);
2771 	}
2772 
2773 	if (!SCNG(heredoc_scan_ahead)) {
2774 		zend_error(E_COMPILE_WARNING, "Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2775 	}
2776 	goto restart;
2777 }
2778 
2779 */
2780 
2781 emit_token_with_str:
2782 	zend_copy_value(zendlval, (yytext + offset), (yyleng - offset));
2783 
2784 emit_token_with_val:
2785 	if (PARSER_MODE()) {
2786 		ZEND_ASSERT(Z_TYPE_P(zendlval) != IS_UNDEF);
2787 		elem->ast = zend_ast_create_zval_with_lineno(zendlval, start_line);
2788 	}
2789 
2790 emit_token:
2791 	if (SCNG(on_event)) {
2792 		SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
2793 	}
2794 	return token;
2795 
2796 return_whitespace:
2797 	HANDLE_NEWLINES(yytext, yyleng);
2798 	if (SCNG(on_event)) {
2799 		SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context));
2800 	}
2801 	if (PARSER_MODE()) {
2802 		start_line = CG(zend_lineno);
2803 		goto restart;
2804 	} else {
2805 		return T_WHITESPACE;
2806 	}
2807 
2808 skip_token:
2809 	if (SCNG(on_event)) {
2810 		SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
2811 	}
2812 	start_line = CG(zend_lineno);
2813 	goto restart;
2814 }
2815