xref: /PHP-7.1/Zend/zend_language_scanner.l (revision ccd4716e)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Marcus Boerger <helly@php.net>                              |
16    |          Nuno Lopes <nlopess@php.net>                                |
17    |          Scott MacVicar <scottmac@php.net>                           |
18    | Flex version authors:                                                |
19    |          Andi Gutmans <andi@zend.com>                                |
20    |          Zeev Suraski <zeev@zend.com>                                |
21    +----------------------------------------------------------------------+
22 */
23 
24 /* $Id$ */
25 
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31 
32 #include "zend_language_scanner_defs.h"
33 
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef ZEND_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52 
53 #define YYCTYPE   unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR  SCNG(yy_cursor)
56 #define YYLIMIT   SCNG(yy_limit)
57 #define YYMARKER  SCNG(yy_marker)
58 
59 #define YYGETCONDITION()  SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61 
62 #define STATE(name)  yyc##name
63 
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE      YYGETCONDITION()
67 #define yytext       ((char*)SCNG(yy_text))
68 #define yyleng       SCNG(yy_leng)
69 #define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                           yyleng   = (unsigned int)x; } while(0)
71 #define yymore()     goto yymore_restart
72 
73 /* perform sanity check. If this message is triggered you should
74    increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79 
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83 
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87 
88 /* Globals Macros */
89 #define SCNG	LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95 
96 #define HANDLE_NEWLINES(s, l)													\
97 do {																			\
98 	char *p = (s), *boundary = p+(l);											\
99 																				\
100 	while (p<boundary) {														\
101 		if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {					\
102 			CG(zend_lineno)++;													\
103 		}																		\
104 		p++;																	\
105 	}																			\
106 } while (0)
107 
108 #define HANDLE_NEWLINE(c) \
109 { \
110 	if (c == '\n' || c == '\r') { \
111 		CG(zend_lineno)++; \
112 	} \
113 }
114 
115 /* To save initial string length after scanning to first variable */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118 
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
120 
121 #define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123 
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125 
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127 {
128 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129 	ZEND_ASSERT(internal_encoding);
130 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131 }
132 
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134 {
135 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136 }
137 
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139 {
140 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142 }
143 
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145 {
146 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147 	ZEND_ASSERT(internal_encoding);
148 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8);
150 }
151 
152 
_yy_push_state(int new_state)153 static void _yy_push_state(int new_state)
154 {
155 	zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156 	YYSETCONDITION(new_state);
157 }
158 
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160 
yy_pop_state(void)161 static void yy_pop_state(void)
162 {
163 	int *stack_state = zend_stack_top(&SCNG(state_stack));
164 	YYSETCONDITION(*stack_state);
165 	zend_stack_del_top(&SCNG(state_stack));
166 }
167 
yy_scan_buffer(char * str,unsigned int len)168 static void yy_scan_buffer(char *str, unsigned int len)
169 {
170 	YYCURSOR       = (YYCTYPE*)str;
171 	YYLIMIT        = YYCURSOR + len;
172 	if (!SCNG(yy_start)) {
173 		SCNG(yy_start) = YYCURSOR;
174 	}
175 }
176 
startup_scanner(void)177 void startup_scanner(void)
178 {
179 	CG(parse_error) = 0;
180 	CG(doc_comment) = NULL;
181 	CG(extra_fn_flags) = 0;
182 	zend_stack_init(&SCNG(state_stack), sizeof(int));
183 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
184 }
185 
heredoc_label_dtor(zend_heredoc_label * heredoc_label)186 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
187     efree(heredoc_label->label);
188 }
189 
shutdown_scanner(void)190 void shutdown_scanner(void)
191 {
192 	CG(parse_error) = 0;
193 	RESET_DOC_COMMENT();
194 	zend_stack_destroy(&SCNG(state_stack));
195 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
196 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
197 	SCNG(on_event) = NULL;
198 }
199 
zend_save_lexical_state(zend_lex_state * lex_state)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
201 {
202 	lex_state->yy_leng   = SCNG(yy_leng);
203 	lex_state->yy_start  = SCNG(yy_start);
204 	lex_state->yy_text   = SCNG(yy_text);
205 	lex_state->yy_cursor = SCNG(yy_cursor);
206 	lex_state->yy_marker = SCNG(yy_marker);
207 	lex_state->yy_limit  = SCNG(yy_limit);
208 
209 	lex_state->state_stack = SCNG(state_stack);
210 	zend_stack_init(&SCNG(state_stack), sizeof(int));
211 
212 	lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214 
215 	lex_state->in = SCNG(yy_in);
216 	lex_state->yy_state = YYSTATE;
217 	lex_state->filename = zend_get_compiled_filename();
218 	lex_state->lineno = CG(zend_lineno);
219 
220 	lex_state->script_org = SCNG(script_org);
221 	lex_state->script_org_size = SCNG(script_org_size);
222 	lex_state->script_filtered = SCNG(script_filtered);
223 	lex_state->script_filtered_size = SCNG(script_filtered_size);
224 	lex_state->input_filter = SCNG(input_filter);
225 	lex_state->output_filter = SCNG(output_filter);
226 	lex_state->script_encoding = SCNG(script_encoding);
227 
228 	lex_state->on_event = SCNG(on_event);
229 	lex_state->on_event_context = SCNG(on_event_context);
230 
231 	lex_state->ast = CG(ast);
232 	lex_state->ast_arena = CG(ast_arena);
233 }
234 
zend_restore_lexical_state(zend_lex_state * lex_state)235 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
236 {
237 	SCNG(yy_leng)   = lex_state->yy_leng;
238 	SCNG(yy_start)  = lex_state->yy_start;
239 	SCNG(yy_text)   = lex_state->yy_text;
240 	SCNG(yy_cursor) = lex_state->yy_cursor;
241 	SCNG(yy_marker) = lex_state->yy_marker;
242 	SCNG(yy_limit)  = lex_state->yy_limit;
243 
244 	zend_stack_destroy(&SCNG(state_stack));
245 	SCNG(state_stack) = lex_state->state_stack;
246 
247 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
248 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
249 	SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
250 
251 	SCNG(yy_in) = lex_state->in;
252 	YYSETCONDITION(lex_state->yy_state);
253 	CG(zend_lineno) = lex_state->lineno;
254 	zend_restore_compiled_filename(lex_state->filename);
255 
256 	if (SCNG(script_filtered)) {
257 		efree(SCNG(script_filtered));
258 		SCNG(script_filtered) = NULL;
259 	}
260 	SCNG(script_org) = lex_state->script_org;
261 	SCNG(script_org_size) = lex_state->script_org_size;
262 	SCNG(script_filtered) = lex_state->script_filtered;
263 	SCNG(script_filtered_size) = lex_state->script_filtered_size;
264 	SCNG(input_filter) = lex_state->input_filter;
265 	SCNG(output_filter) = lex_state->output_filter;
266 	SCNG(script_encoding) = lex_state->script_encoding;
267 
268 	SCNG(on_event) = lex_state->on_event;
269 	SCNG(on_event_context) = lex_state->on_event_context;
270 
271 	CG(ast) = lex_state->ast;
272 	CG(ast_arena) = lex_state->ast_arena;
273 
274 	RESET_DOC_COMMENT();
275 }
276 
zend_destroy_file_handle(zend_file_handle * file_handle)277 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
278 {
279 	zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
280 	/* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
281 	file_handle->opened_path = NULL;
282 	if (file_handle->free_filename) {
283 		file_handle->filename = NULL;
284 	}
285 }
286 
zend_lex_tstring(zval * zv)287 ZEND_API void zend_lex_tstring(zval *zv)
288 {
289 	if (SCNG(on_event)) {
290 		SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
291 	}
292 
293 	ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
294 }
295 
296 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
297 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
298 #define	BOM_UTF16_BE	"\xfe\xff"
299 #define	BOM_UTF16_LE	"\xff\xfe"
300 #define	BOM_UTF8		"\xef\xbb\xbf"
301 
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)302 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
303 {
304 	const unsigned char *p;
305 	int wchar_size = 2;
306 	int le = 0;
307 
308 	/* utf-16 or utf-32? */
309 	p = script;
310 	assert(p >= script);
311 	while ((size_t)(p-script) < script_size) {
312 		p = memchr(p, 0, script_size-(p-script)-2);
313 		if (!p) {
314 			break;
315 		}
316 		if (*(p+1) == '\0' && *(p+2) == '\0') {
317 			wchar_size = 4;
318 			break;
319 		}
320 
321 		/* searching for UTF-32 specific byte orders, so this will do */
322 		p += 4;
323 	}
324 
325 	/* BE or LE? */
326 	p = script;
327 	assert(p >= script);
328 	while ((size_t)(p-script) < script_size) {
329 		if (*p == '\0' && *(p+wchar_size-1) != '\0') {
330 			/* BE */
331 			le = 0;
332 			break;
333 		} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
334 			/* LE* */
335 			le = 1;
336 			break;
337 		}
338 		p += wchar_size;
339 	}
340 
341 	if (wchar_size == 2) {
342 		return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
343 	} else {
344 		return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
345 	}
346 
347 	return NULL;
348 }
349 
zend_multibyte_detect_unicode(void)350 static const zend_encoding* zend_multibyte_detect_unicode(void)
351 {
352 	const zend_encoding *script_encoding = NULL;
353 	int bom_size;
354 	unsigned char *pos1, *pos2;
355 
356 	if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
357 		return NULL;
358 	}
359 
360 	/* check out BOM */
361 	if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
362 		script_encoding = zend_multibyte_encoding_utf32be;
363 		bom_size = sizeof(BOM_UTF32_BE)-1;
364 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
365 		script_encoding = zend_multibyte_encoding_utf32le;
366 		bom_size = sizeof(BOM_UTF32_LE)-1;
367 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
368 		script_encoding = zend_multibyte_encoding_utf16be;
369 		bom_size = sizeof(BOM_UTF16_BE)-1;
370 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
371 		script_encoding = zend_multibyte_encoding_utf16le;
372 		bom_size = sizeof(BOM_UTF16_LE)-1;
373 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
374 		script_encoding = zend_multibyte_encoding_utf8;
375 		bom_size = sizeof(BOM_UTF8)-1;
376 	}
377 
378 	if (script_encoding) {
379 		/* remove BOM */
380 		LANG_SCNG(script_org) += bom_size;
381 		LANG_SCNG(script_org_size) -= bom_size;
382 
383 		return script_encoding;
384 	}
385 
386 	/* script contains NULL bytes -> auto-detection */
387 	if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
388 		/* check if the NULL byte is after the __HALT_COMPILER(); */
389 		pos2 = LANG_SCNG(script_org);
390 
391 		while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
392 			pos2 = memchr(pos2, '_', pos1 - pos2);
393 			if (!pos2) break;
394 			pos2++;
395 			if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
396 				pos2 += sizeof("_HALT_COMPILER")-1;
397 				while (*pos2 == ' '  ||
398 					   *pos2 == '\t' ||
399 					   *pos2 == '\r' ||
400 					   *pos2 == '\n') {
401 					pos2++;
402 				}
403 				if (*pos2 == '(') {
404 					pos2++;
405 					while (*pos2 == ' '  ||
406 						   *pos2 == '\t' ||
407 						   *pos2 == '\r' ||
408 						   *pos2 == '\n') {
409 						pos2++;
410 					}
411 					if (*pos2 == ')') {
412 						pos2++;
413 						while (*pos2 == ' '  ||
414 							   *pos2 == '\t' ||
415 							   *pos2 == '\r' ||
416 							   *pos2 == '\n') {
417 							pos2++;
418 						}
419 						if (*pos2 == ';') {
420 							return NULL;
421 						}
422 					}
423 				}
424 			}
425 		}
426 		/* make best effort if BOM is missing */
427 		return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
428 	}
429 
430 	return NULL;
431 }
432 
zend_multibyte_find_script_encoding(void)433 static const zend_encoding* zend_multibyte_find_script_encoding(void)
434 {
435 	const zend_encoding *script_encoding;
436 
437 	if (CG(detect_unicode)) {
438 		/* check out bom(byte order mark) and see if containing wchars */
439 		script_encoding = zend_multibyte_detect_unicode();
440 		if (script_encoding != NULL) {
441 			/* bom or wchar detection is prior to 'script_encoding' option */
442 			return script_encoding;
443 		}
444 	}
445 
446 	/* if no script_encoding specified, just leave alone */
447 	if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
448 		return NULL;
449 	}
450 
451 	/* if multiple encodings specified, detect automagically */
452 	if (CG(script_encoding_list_size) > 1) {
453 		return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
454 	}
455 
456 	return CG(script_encoding_list)[0];
457 }
458 
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)459 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
460 {
461 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
462 	const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
463 
464 	if (!script_encoding) {
465 		return FAILURE;
466 	}
467 
468 	/* judge input/output filter */
469 	LANG_SCNG(script_encoding) = script_encoding;
470 	LANG_SCNG(input_filter) = NULL;
471 	LANG_SCNG(output_filter) = NULL;
472 
473 	if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
474 		if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
475 			/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
476 			LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
477 			LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
478 		} else {
479 			LANG_SCNG(input_filter) = NULL;
480 			LANG_SCNG(output_filter) = NULL;
481 		}
482 		return SUCCESS;
483 	}
484 
485 	if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
486 		LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
487 		LANG_SCNG(output_filter) = NULL;
488 	} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
489 		LANG_SCNG(input_filter) = NULL;
490 		LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
491 	} else {
492 		/* both script and internal encodings are incompatible w/ flex */
493 		LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
494 		LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
495 	}
496 
497 	return 0;
498 }
499 
open_file_for_scanning(zend_file_handle * file_handle)500 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
501 {
502 	char *buf;
503 	size_t size, offset = 0;
504 	zend_string *compiled_filename;
505 
506 	/* The shebang line was read, get the current position to obtain the buffer start */
507 	if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
508 		if ((offset = ftell(file_handle->handle.fp)) == (size_t)-1) {
509 			offset = 0;
510 		}
511 	}
512 
513 	if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
514 		return FAILURE;
515 	}
516 
517 	zend_llist_add_element(&CG(open_files), file_handle);
518 	if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
519 		zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
520 		size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
521 		fh->handle.stream.handle = (void*)(((char*)fh) + diff);
522 		file_handle->handle.stream.handle = fh->handle.stream.handle;
523 	}
524 
525 	/* Reset the scanner for scanning the new file */
526 	SCNG(yy_in) = file_handle;
527 	SCNG(yy_start) = NULL;
528 
529 	if (size != (size_t)-1) {
530 		if (CG(multibyte)) {
531 			SCNG(script_org) = (unsigned char*)buf;
532 			SCNG(script_org_size) = size;
533 			SCNG(script_filtered) = NULL;
534 
535 			zend_multibyte_set_filter(NULL);
536 
537 			if (SCNG(input_filter)) {
538 				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
539 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
540 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
541 				}
542 				buf = (char*)SCNG(script_filtered);
543 				size = SCNG(script_filtered_size);
544 			}
545 		}
546 		SCNG(yy_start) = (unsigned char *)buf - offset;
547 		yy_scan_buffer(buf, (unsigned int)size);
548 	} else {
549 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
550 	}
551 
552 	BEGIN(INITIAL);
553 
554 	if (file_handle->opened_path) {
555 		compiled_filename = zend_string_copy(file_handle->opened_path);
556 	} else {
557 		compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
558 	}
559 
560 	zend_set_compiled_filename(compiled_filename);
561 	zend_string_release(compiled_filename);
562 
563 	if (CG(start_lineno)) {
564 		CG(zend_lineno) = CG(start_lineno);
565 		CG(start_lineno) = 0;
566 	} else {
567 		CG(zend_lineno) = 1;
568 	}
569 
570 	RESET_DOC_COMMENT();
571 	CG(increment_lineno) = 0;
572 	return SUCCESS;
573 }
END_EXTERN_C()574 END_EXTERN_C()
575 
576 static zend_op_array *zend_compile(int type)
577 {
578 	zend_op_array *op_array = NULL;
579 	zend_bool original_in_compilation = CG(in_compilation);
580 
581 	CG(in_compilation) = 1;
582 	CG(ast) = NULL;
583 	CG(ast_arena) = zend_arena_create(1024 * 32);
584 
585 	if (!zendparse()) {
586 		int last_lineno = CG(zend_lineno);
587 		zend_file_context original_file_context;
588 		zend_oparray_context original_oparray_context;
589 		zend_op_array *original_active_op_array = CG(active_op_array);
590 
591 		op_array = emalloc(sizeof(zend_op_array));
592 		init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
593 		CG(active_op_array) = op_array;
594 
595 		if (zend_ast_process) {
596 			zend_ast_process(CG(ast));
597 		}
598 
599 		zend_file_context_begin(&original_file_context);
600 		zend_oparray_context_begin(&original_oparray_context);
601 		zend_compile_top_stmt(CG(ast));
602 		CG(zend_lineno) = last_lineno;
603 		zend_emit_final_return(type == ZEND_USER_FUNCTION);
604 		op_array->line_start = 1;
605 		op_array->line_end = last_lineno;
606 		pass_two(op_array);
607 		zend_oparray_context_end(&original_oparray_context);
608 		zend_file_context_end(&original_file_context);
609 
610 		CG(active_op_array) = original_active_op_array;
611 	}
612 
613 	zend_ast_destroy(CG(ast));
614 	zend_arena_destroy(CG(ast_arena));
615 
616 	CG(in_compilation) = original_in_compilation;
617 
618 	return op_array;
619 }
620 
compile_file(zend_file_handle * file_handle,int type)621 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
622 {
623 	zend_lex_state original_lex_state;
624 	zend_op_array *op_array = NULL;
625 	zend_save_lexical_state(&original_lex_state);
626 
627 	if (open_file_for_scanning(file_handle)==FAILURE) {
628 		if (type==ZEND_REQUIRE) {
629 			zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
630 			zend_bailout();
631 		} else {
632 			zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
633 		}
634 	} else {
635 		op_array = zend_compile(ZEND_USER_FUNCTION);
636 	}
637 
638 	zend_restore_lexical_state(&original_lex_state);
639 	return op_array;
640 }
641 
642 
compile_filename(int type,zval * filename)643 zend_op_array *compile_filename(int type, zval *filename)
644 {
645 	zend_file_handle file_handle;
646 	zval tmp;
647 	zend_op_array *retval;
648 	zend_string *opened_path = NULL;
649 
650 	if (Z_TYPE_P(filename) != IS_STRING) {
651 		tmp = *filename;
652 		zval_copy_ctor(&tmp);
653 		convert_to_string(&tmp);
654 		filename = &tmp;
655 	}
656 	file_handle.filename = Z_STRVAL_P(filename);
657 	file_handle.free_filename = 0;
658 	file_handle.type = ZEND_HANDLE_FILENAME;
659 	file_handle.opened_path = NULL;
660 	file_handle.handle.fp = NULL;
661 
662 	retval = zend_compile_file(&file_handle, type);
663 	if (retval && file_handle.handle.stream.handle) {
664 		if (!file_handle.opened_path) {
665 			file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
666 		}
667 
668 		zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
669 
670 		if (opened_path) {
671 			zend_string_release(opened_path);
672 		}
673 	}
674 	zend_destroy_file_handle(&file_handle);
675 
676 	if (filename==&tmp) {
677 		zval_dtor(&tmp);
678 	}
679 	return retval;
680 }
681 
zend_prepare_string_for_scanning(zval * str,char * filename)682 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
683 {
684 	char *buf;
685 	size_t size, old_len;
686 	zend_string *new_compiled_filename;
687 
688 	/* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
689 	old_len = Z_STRLEN_P(str);
690 	Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
691 	Z_TYPE_INFO_P(str) = IS_STRING_EX;
692 	memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
693 
694 	SCNG(yy_in) = NULL;
695 	SCNG(yy_start) = NULL;
696 
697 	buf = Z_STRVAL_P(str);
698 	size = old_len;
699 
700 	if (CG(multibyte)) {
701 		SCNG(script_org) = (unsigned char*)buf;
702 		SCNG(script_org_size) = size;
703 		SCNG(script_filtered) = NULL;
704 
705 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
706 
707 		if (SCNG(input_filter)) {
708 			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
709 				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
710 						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
711 			}
712 			buf = (char*)SCNG(script_filtered);
713 			size = SCNG(script_filtered_size);
714 		}
715 	}
716 
717 	yy_scan_buffer(buf, (unsigned int)size);
718 
719 	new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
720 	zend_set_compiled_filename(new_compiled_filename);
721 	zend_string_release(new_compiled_filename);
722 	CG(zend_lineno) = 1;
723 	CG(increment_lineno) = 0;
724 	RESET_DOC_COMMENT();
725 	return SUCCESS;
726 }
727 
728 
zend_get_scanned_file_offset(void)729 ZEND_API size_t zend_get_scanned_file_offset(void)
730 {
731 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
732 	if (SCNG(input_filter)) {
733 		size_t original_offset = offset, length = 0;
734 		do {
735 			unsigned char *p = NULL;
736 			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
737 				return (size_t)-1;
738 			}
739 			efree(p);
740 			if (length > original_offset) {
741 				offset--;
742 			} else if (length < original_offset) {
743 				offset++;
744 			}
745 		} while (original_offset != length);
746 	}
747 	return offset;
748 }
749 
compile_string(zval * source_string,char * filename)750 zend_op_array *compile_string(zval *source_string, char *filename)
751 {
752 	zend_lex_state original_lex_state;
753 	zend_op_array *op_array = NULL;
754 	zval tmp;
755 
756 	if (Z_STRLEN_P(source_string)==0) {
757 		return NULL;
758 	}
759 
760 	ZVAL_DUP(&tmp, source_string);
761 	convert_to_string(&tmp);
762 	source_string = &tmp;
763 
764 	zend_save_lexical_state(&original_lex_state);
765 	if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
766 		BEGIN(ST_IN_SCRIPTING);
767 		op_array = zend_compile(ZEND_EVAL_CODE);
768 	}
769 
770 	zend_restore_lexical_state(&original_lex_state);
771 	zval_dtor(&tmp);
772 
773 	return op_array;
774 }
775 
776 
BEGIN_EXTERN_C()777 BEGIN_EXTERN_C()
778 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
779 {
780 	zend_lex_state original_lex_state;
781 	zend_file_handle file_handle;
782 
783 	file_handle.type = ZEND_HANDLE_FILENAME;
784 	file_handle.filename = filename;
785 	file_handle.free_filename = 0;
786 	file_handle.opened_path = NULL;
787 	zend_save_lexical_state(&original_lex_state);
788 	if (open_file_for_scanning(&file_handle)==FAILURE) {
789 		zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
790 		zend_restore_lexical_state(&original_lex_state);
791 		return FAILURE;
792 	}
793 	zend_highlight(syntax_highlighter_ini);
794 	if (SCNG(script_filtered)) {
795 		efree(SCNG(script_filtered));
796 		SCNG(script_filtered) = NULL;
797 	}
798 	zend_destroy_file_handle(&file_handle);
799 	zend_restore_lexical_state(&original_lex_state);
800 	return SUCCESS;
801 }
802 
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)803 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
804 {
805 	zend_lex_state original_lex_state;
806 	zval tmp = *str;
807 
808 	str = &tmp;
809 	zval_copy_ctor(str);
810 	zend_save_lexical_state(&original_lex_state);
811 	if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
812 		zend_restore_lexical_state(&original_lex_state);
813 		return FAILURE;
814 	}
815 	BEGIN(INITIAL);
816 	zend_highlight(syntax_highlighter_ini);
817 	if (SCNG(script_filtered)) {
818 		efree(SCNG(script_filtered));
819 		SCNG(script_filtered) = NULL;
820 	}
821 	zend_restore_lexical_state(&original_lex_state);
822 	zval_dtor(str);
823 	return SUCCESS;
824 }
825 
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)826 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
827 {
828 	size_t length;
829 	unsigned char *new_yy_start;
830 
831 	/* convert and set */
832 	if (!SCNG(input_filter)) {
833 		if (SCNG(script_filtered)) {
834 			efree(SCNG(script_filtered));
835 			SCNG(script_filtered) = NULL;
836 		}
837 		SCNG(script_filtered_size) = 0;
838 		length = SCNG(script_org_size);
839 		new_yy_start = SCNG(script_org);
840 	} else {
841 		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
842 			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
843 					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
844 		}
845 		if (SCNG(script_filtered)) {
846 			efree(SCNG(script_filtered));
847 		}
848 		SCNG(script_filtered) = new_yy_start;
849 		SCNG(script_filtered_size) = length;
850 	}
851 
852 	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
853 	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
854 	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
855 	SCNG(yy_limit) = new_yy_start + length;
856 
857 	SCNG(yy_start) = new_yy_start;
858 }
859 
860 
861 // TODO: avoid reallocation ???
862 # define zend_copy_value(zendlval, yytext, yyleng) \
863 	if (SCNG(output_filter)) { \
864 		size_t sz = 0; \
865 		char *s = NULL; \
866 		SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
867 		ZVAL_STRINGL(zendlval, s, sz); \
868 		efree(s); \
869 	} else { \
870 		ZVAL_STRINGL(zendlval, yytext, yyleng); \
871 	}
872 
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)873 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
874 {
875 	register char *s, *t;
876 	char *end;
877 
878 	ZVAL_STRINGL(zendlval, str, len);
879 
880 	/* convert escape sequences */
881 	s = t = Z_STRVAL_P(zendlval);
882 	end = s+Z_STRLEN_P(zendlval);
883 	while (s<end) {
884 		if (*s=='\\') {
885 			s++;
886 			if (s >= end) {
887 				*t++ = '\\';
888 				break;
889 			}
890 
891 			switch(*s) {
892 				case 'n':
893 					*t++ = '\n';
894 					Z_STRLEN_P(zendlval)--;
895 					break;
896 				case 'r':
897 					*t++ = '\r';
898 					Z_STRLEN_P(zendlval)--;
899 					break;
900 				case 't':
901 					*t++ = '\t';
902 					Z_STRLEN_P(zendlval)--;
903 					break;
904 				case 'f':
905 					*t++ = '\f';
906 					Z_STRLEN_P(zendlval)--;
907 					break;
908 				case 'v':
909 					*t++ = '\v';
910 					Z_STRLEN_P(zendlval)--;
911 					break;
912 				case 'e':
913 #ifdef ZEND_WIN32
914 					*t++ = VK_ESCAPE;
915 #else
916 					*t++ = '\e';
917 #endif
918 					Z_STRLEN_P(zendlval)--;
919 					break;
920 				case '"':
921 				case '`':
922 					if (*s != quote_type) {
923 						*t++ = '\\';
924 						*t++ = *s;
925 						break;
926 					}
927 				case '\\':
928 				case '$':
929 					*t++ = *s;
930 					Z_STRLEN_P(zendlval)--;
931 					break;
932 				case 'x':
933 				case 'X':
934 					if (ZEND_IS_HEX(*(s+1))) {
935 						char hex_buf[3] = { 0, 0, 0 };
936 
937 						Z_STRLEN_P(zendlval)--; /* for the 'x' */
938 
939 						hex_buf[0] = *(++s);
940 						Z_STRLEN_P(zendlval)--;
941 						if (ZEND_IS_HEX(*(s+1))) {
942 							hex_buf[1] = *(++s);
943 							Z_STRLEN_P(zendlval)--;
944 						}
945 						*t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
946 					} else {
947 						*t++ = '\\';
948 						*t++ = *s;
949 					}
950 					break;
951 				/* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
952 				case 'u':
953 					{
954 						/* cache where we started so we can parse after validating */
955 						char *start = s + 1;
956 						size_t len = 0;
957 						zend_bool valid = 1;
958 						unsigned long codepoint;
959 						size_t byte_len = 0;
960 
961 						if (*start != '{') {
962 							/* we silently let this pass to avoid breaking code
963 							 * with JSON in string literals (e.g. "\"\u202e\""
964 							 */
965 							*t++ = '\\';
966 							*t++ = 'u';
967 							break;
968 						} else {
969 							/* on the other hand, invalid \u{blah} errors */
970 							s++;
971 							len++;
972 							s++;
973 							while (*s != '}') {
974 								if (!ZEND_IS_HEX(*s)) {
975 									valid = 0;
976 									break;
977 								} else {
978 									len++;
979 								}
980 								s++;
981 							}
982 							if (*s == '}') {
983 								valid = 1;
984 								len++;
985 							}
986 						}
987 
988 						/* \u{} is invalid */
989 						if (len <= 2) {
990 							valid = 0;
991 						}
992 
993 						if (!valid) {
994 							zend_throw_exception(zend_ce_parse_error,
995 								"Invalid UTF-8 codepoint escape sequence", 0);
996 							zval_ptr_dtor(zendlval);
997 							ZVAL_UNDEF(zendlval);
998 							return FAILURE;
999 						}
1000 
1001 						errno = 0;
1002 						codepoint = strtoul(start + 1, NULL, 16);
1003 
1004 						/* per RFC 3629, UTF-8 can only represent 21 bits */
1005 						if (codepoint > 0x10FFFF || errno) {
1006 							zend_throw_exception(zend_ce_parse_error,
1007 								"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1008 							zval_ptr_dtor(zendlval);
1009 							ZVAL_UNDEF(zendlval);
1010 							return FAILURE;
1011 						}
1012 
1013 						/* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1014 						if (codepoint < 0x80) {
1015 							byte_len = 1;
1016 							*t++ = codepoint;
1017 						} else if (codepoint <= 0x7FF) {
1018 							byte_len = 2;
1019 							*t++ = (codepoint >> 6) + 0xC0;
1020 							*t++ = (codepoint & 0x3F) + 0x80;
1021 						} else if (codepoint <= 0xFFFF) {
1022 							byte_len = 3;
1023 							*t++ = (codepoint >> 12) + 0xE0;
1024 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1025 							*t++ = (codepoint & 0x3F) + 0x80;
1026 						} else if (codepoint <= 0x10FFFF) {
1027 							byte_len = 4;
1028 							*t++ = (codepoint >> 18) + 0xF0;
1029 							*t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1030 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1031 							*t++ = (codepoint & 0x3F) + 0x80;
1032 						}
1033 
1034 						Z_STRLEN_P(zendlval) -= 2; /* \u */
1035 						Z_STRLEN_P(zendlval) -= (len - byte_len);
1036 					}
1037 					break;
1038 				default:
1039 					/* check for an octal */
1040 					if (ZEND_IS_OCT(*s)) {
1041 						char octal_buf[4] = { 0, 0, 0, 0 };
1042 
1043 						octal_buf[0] = *s;
1044 						Z_STRLEN_P(zendlval)--;
1045 						if (ZEND_IS_OCT(*(s+1))) {
1046 							octal_buf[1] = *(++s);
1047 							Z_STRLEN_P(zendlval)--;
1048 							if (ZEND_IS_OCT(*(s+1))) {
1049 								octal_buf[2] = *(++s);
1050 								Z_STRLEN_P(zendlval)--;
1051 							}
1052 						}
1053 						if (octal_buf[2] &&
1054 						    (octal_buf[0] > '3')) {
1055 							/* 3 octit values must not overflow 0xFF (\377) */
1056 							zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
1057 						}
1058 
1059 						*t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1060 					} else {
1061 						*t++ = '\\';
1062 						*t++ = *s;
1063 					}
1064 					break;
1065 			}
1066 		} else {
1067 			*t++ = *s;
1068 		}
1069 
1070 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1071 			CG(zend_lineno)++;
1072 		}
1073 		s++;
1074 	}
1075 	*t = 0;
1076 	if (SCNG(output_filter)) {
1077 		size_t sz = 0;
1078 		unsigned char *str;
1079 		// TODO: avoid realocation ???
1080 		s = Z_STRVAL_P(zendlval);
1081 		SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1082 		zval_ptr_dtor(zendlval);
1083 		ZVAL_STRINGL(zendlval, (char *) str, sz);
1084 		efree(str);
1085 	}
1086 	return SUCCESS;
1087 }
1088 
emit_token(int token,int token_line)1089 static zend_always_inline int emit_token(int token, int token_line)
1090 {
1091 	if (SCNG(on_event)) {
1092 		SCNG(on_event)(ON_TOKEN, token, token_line, SCNG(on_event_context));
1093 	}
1094 
1095 	return token;
1096 }
1097 
1098 #define RETURN_TOKEN(token) return emit_token(token, start_line);
1099 
lex_scan(zval * zendlval)1100 int lex_scan(zval *zendlval)
1101 {
1102 
1103 int start_line = CG(zend_lineno);
1104 
1105 restart:
1106 	SCNG(yy_text) = YYCURSOR;
1107 
1108 /*!re2c
1109 re2c:yyfill:check = 0;
1110 LNUM	[0-9]+
1111 DNUM	([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1112 EXPONENT_DNUM	(({LNUM}|{DNUM})[eE][+-]?{LNUM})
1113 HNUM	"0x"[0-9a-fA-F]+
1114 BNUM	"0b"[01]+
1115 LABEL	[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1116 WHITESPACE [ \n\r\t]+
1117 TABS_AND_SPACES [ \t]*
1118 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1119 ANY_CHAR [^]
1120 NEWLINE ("\r"|"\n"|"\r\n")
1121 
1122 /* compute yyleng before each rule */
1123 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1124 
1125 <ST_IN_SCRIPTING>"exit" {
1126 	RETURN_TOKEN(T_EXIT);
1127 }
1128 
1129 <ST_IN_SCRIPTING>"die" {
1130 	RETURN_TOKEN(T_EXIT);
1131 }
1132 
1133 <ST_IN_SCRIPTING>"function" {
1134 	RETURN_TOKEN(T_FUNCTION);
1135 }
1136 
1137 <ST_IN_SCRIPTING>"const" {
1138 	RETURN_TOKEN(T_CONST);
1139 }
1140 
1141 <ST_IN_SCRIPTING>"return" {
1142 	RETURN_TOKEN(T_RETURN);
1143 }
1144 
1145 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1146 	yyless(yyleng - 1);
1147 	HANDLE_NEWLINES(yytext, yyleng);
1148 	RETURN_TOKEN(T_YIELD_FROM);
1149 }
1150 
1151 <ST_IN_SCRIPTING>"yield" {
1152 	RETURN_TOKEN(T_YIELD);
1153 }
1154 
1155 <ST_IN_SCRIPTING>"try" {
1156 	RETURN_TOKEN(T_TRY);
1157 }
1158 
1159 <ST_IN_SCRIPTING>"catch" {
1160 	RETURN_TOKEN(T_CATCH);
1161 }
1162 
1163 <ST_IN_SCRIPTING>"finally" {
1164 	RETURN_TOKEN(T_FINALLY);
1165 }
1166 
1167 <ST_IN_SCRIPTING>"throw" {
1168 	RETURN_TOKEN(T_THROW);
1169 }
1170 
1171 <ST_IN_SCRIPTING>"if" {
1172 	RETURN_TOKEN(T_IF);
1173 }
1174 
1175 <ST_IN_SCRIPTING>"elseif" {
1176 	RETURN_TOKEN(T_ELSEIF);
1177 }
1178 
1179 <ST_IN_SCRIPTING>"endif" {
1180 	RETURN_TOKEN(T_ENDIF);
1181 }
1182 
1183 <ST_IN_SCRIPTING>"else" {
1184 	RETURN_TOKEN(T_ELSE);
1185 }
1186 
1187 <ST_IN_SCRIPTING>"while" {
1188 	RETURN_TOKEN(T_WHILE);
1189 }
1190 
1191 <ST_IN_SCRIPTING>"endwhile" {
1192 	RETURN_TOKEN(T_ENDWHILE);
1193 }
1194 
1195 <ST_IN_SCRIPTING>"do" {
1196 	RETURN_TOKEN(T_DO);
1197 }
1198 
1199 <ST_IN_SCRIPTING>"for" {
1200 	RETURN_TOKEN(T_FOR);
1201 }
1202 
1203 <ST_IN_SCRIPTING>"endfor" {
1204 	RETURN_TOKEN(T_ENDFOR);
1205 }
1206 
1207 <ST_IN_SCRIPTING>"foreach" {
1208 	RETURN_TOKEN(T_FOREACH);
1209 }
1210 
1211 <ST_IN_SCRIPTING>"endforeach" {
1212 	RETURN_TOKEN(T_ENDFOREACH);
1213 }
1214 
1215 <ST_IN_SCRIPTING>"declare" {
1216 	RETURN_TOKEN(T_DECLARE);
1217 }
1218 
1219 <ST_IN_SCRIPTING>"enddeclare" {
1220 	RETURN_TOKEN(T_ENDDECLARE);
1221 }
1222 
1223 <ST_IN_SCRIPTING>"instanceof" {
1224 	RETURN_TOKEN(T_INSTANCEOF);
1225 }
1226 
1227 <ST_IN_SCRIPTING>"as" {
1228 	RETURN_TOKEN(T_AS);
1229 }
1230 
1231 <ST_IN_SCRIPTING>"switch" {
1232 	RETURN_TOKEN(T_SWITCH);
1233 }
1234 
1235 <ST_IN_SCRIPTING>"endswitch" {
1236 	RETURN_TOKEN(T_ENDSWITCH);
1237 }
1238 
1239 <ST_IN_SCRIPTING>"case" {
1240 	RETURN_TOKEN(T_CASE);
1241 }
1242 
1243 <ST_IN_SCRIPTING>"default" {
1244 	RETURN_TOKEN(T_DEFAULT);
1245 }
1246 
1247 <ST_IN_SCRIPTING>"break" {
1248 	RETURN_TOKEN(T_BREAK);
1249 }
1250 
1251 <ST_IN_SCRIPTING>"continue" {
1252 	RETURN_TOKEN(T_CONTINUE);
1253 }
1254 
1255 <ST_IN_SCRIPTING>"goto" {
1256 	RETURN_TOKEN(T_GOTO);
1257 }
1258 
1259 <ST_IN_SCRIPTING>"echo" {
1260 	RETURN_TOKEN(T_ECHO);
1261 }
1262 
1263 <ST_IN_SCRIPTING>"print" {
1264 	RETURN_TOKEN(T_PRINT);
1265 }
1266 
1267 <ST_IN_SCRIPTING>"class" {
1268 	RETURN_TOKEN(T_CLASS);
1269 }
1270 
1271 <ST_IN_SCRIPTING>"interface" {
1272 	RETURN_TOKEN(T_INTERFACE);
1273 }
1274 
1275 <ST_IN_SCRIPTING>"trait" {
1276 	RETURN_TOKEN(T_TRAIT);
1277 }
1278 
1279 <ST_IN_SCRIPTING>"extends" {
1280 	RETURN_TOKEN(T_EXTENDS);
1281 }
1282 
1283 <ST_IN_SCRIPTING>"implements" {
1284 	RETURN_TOKEN(T_IMPLEMENTS);
1285 }
1286 
1287 <ST_IN_SCRIPTING>"->" {
1288 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
1289 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1290 }
1291 
1292 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1293 	HANDLE_NEWLINES(yytext, yyleng);
1294 	RETURN_TOKEN(T_WHITESPACE);
1295 }
1296 
1297 <ST_LOOKING_FOR_PROPERTY>"->" {
1298 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1299 }
1300 
1301 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1302 	yy_pop_state();
1303 	zend_copy_value(zendlval, yytext, yyleng);
1304 	RETURN_TOKEN(T_STRING);
1305 }
1306 
1307 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1308 	yyless(0);
1309 	yy_pop_state();
1310 	goto restart;
1311 }
1312 
1313 <ST_IN_SCRIPTING>"::" {
1314 	RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1315 }
1316 
1317 <ST_IN_SCRIPTING>"\\" {
1318 	RETURN_TOKEN(T_NS_SEPARATOR);
1319 }
1320 
1321 <ST_IN_SCRIPTING>"..." {
1322 	RETURN_TOKEN(T_ELLIPSIS);
1323 }
1324 
1325 <ST_IN_SCRIPTING>"??" {
1326 	RETURN_TOKEN(T_COALESCE);
1327 }
1328 
1329 <ST_IN_SCRIPTING>"new" {
1330 	RETURN_TOKEN(T_NEW);
1331 }
1332 
1333 <ST_IN_SCRIPTING>"clone" {
1334 	RETURN_TOKEN(T_CLONE);
1335 }
1336 
1337 <ST_IN_SCRIPTING>"var" {
1338 	RETURN_TOKEN(T_VAR);
1339 }
1340 
1341 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1342 	RETURN_TOKEN(T_INT_CAST);
1343 }
1344 
1345 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1346 	RETURN_TOKEN(T_DOUBLE_CAST);
1347 }
1348 
1349 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1350 	RETURN_TOKEN(T_STRING_CAST);
1351 }
1352 
1353 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1354 	RETURN_TOKEN(T_ARRAY_CAST);
1355 }
1356 
1357 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1358 	RETURN_TOKEN(T_OBJECT_CAST);
1359 }
1360 
1361 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1362 	RETURN_TOKEN(T_BOOL_CAST);
1363 }
1364 
1365 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1366 	RETURN_TOKEN(T_UNSET_CAST);
1367 }
1368 
1369 <ST_IN_SCRIPTING>"eval" {
1370 	RETURN_TOKEN(T_EVAL);
1371 }
1372 
1373 <ST_IN_SCRIPTING>"include" {
1374 	RETURN_TOKEN(T_INCLUDE);
1375 }
1376 
1377 <ST_IN_SCRIPTING>"include_once" {
1378 	RETURN_TOKEN(T_INCLUDE_ONCE);
1379 }
1380 
1381 <ST_IN_SCRIPTING>"require" {
1382 	RETURN_TOKEN(T_REQUIRE);
1383 }
1384 
1385 <ST_IN_SCRIPTING>"require_once" {
1386 	RETURN_TOKEN(T_REQUIRE_ONCE);
1387 }
1388 
1389 <ST_IN_SCRIPTING>"namespace" {
1390 	RETURN_TOKEN(T_NAMESPACE);
1391 }
1392 
1393 <ST_IN_SCRIPTING>"use" {
1394 	RETURN_TOKEN(T_USE);
1395 }
1396 
1397 <ST_IN_SCRIPTING>"insteadof" {
1398     RETURN_TOKEN(T_INSTEADOF);
1399 }
1400 
1401 <ST_IN_SCRIPTING>"global" {
1402 	RETURN_TOKEN(T_GLOBAL);
1403 }
1404 
1405 <ST_IN_SCRIPTING>"isset" {
1406 	RETURN_TOKEN(T_ISSET);
1407 }
1408 
1409 <ST_IN_SCRIPTING>"empty" {
1410 	RETURN_TOKEN(T_EMPTY);
1411 }
1412 
1413 <ST_IN_SCRIPTING>"__halt_compiler" {
1414 	RETURN_TOKEN(T_HALT_COMPILER);
1415 }
1416 
1417 <ST_IN_SCRIPTING>"static" {
1418 	RETURN_TOKEN(T_STATIC);
1419 }
1420 
1421 <ST_IN_SCRIPTING>"abstract" {
1422 	RETURN_TOKEN(T_ABSTRACT);
1423 }
1424 
1425 <ST_IN_SCRIPTING>"final" {
1426 	RETURN_TOKEN(T_FINAL);
1427 }
1428 
1429 <ST_IN_SCRIPTING>"private" {
1430 	RETURN_TOKEN(T_PRIVATE);
1431 }
1432 
1433 <ST_IN_SCRIPTING>"protected" {
1434 	RETURN_TOKEN(T_PROTECTED);
1435 }
1436 
1437 <ST_IN_SCRIPTING>"public" {
1438 	RETURN_TOKEN(T_PUBLIC);
1439 }
1440 
1441 <ST_IN_SCRIPTING>"unset" {
1442 	RETURN_TOKEN(T_UNSET);
1443 }
1444 
1445 <ST_IN_SCRIPTING>"=>" {
1446 	RETURN_TOKEN(T_DOUBLE_ARROW);
1447 }
1448 
1449 <ST_IN_SCRIPTING>"list" {
1450 	RETURN_TOKEN(T_LIST);
1451 }
1452 
1453 <ST_IN_SCRIPTING>"array" {
1454 	RETURN_TOKEN(T_ARRAY);
1455 }
1456 
1457 <ST_IN_SCRIPTING>"callable" {
1458 	RETURN_TOKEN(T_CALLABLE);
1459 }
1460 
1461 <ST_IN_SCRIPTING>"++" {
1462 	RETURN_TOKEN(T_INC);
1463 }
1464 
1465 <ST_IN_SCRIPTING>"--" {
1466 	RETURN_TOKEN(T_DEC);
1467 }
1468 
1469 <ST_IN_SCRIPTING>"===" {
1470 	RETURN_TOKEN(T_IS_IDENTICAL);
1471 }
1472 
1473 <ST_IN_SCRIPTING>"!==" {
1474 	RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1475 }
1476 
1477 <ST_IN_SCRIPTING>"==" {
1478 	RETURN_TOKEN(T_IS_EQUAL);
1479 }
1480 
1481 <ST_IN_SCRIPTING>"!="|"<>" {
1482 	RETURN_TOKEN(T_IS_NOT_EQUAL);
1483 }
1484 
1485 <ST_IN_SCRIPTING>"<=>" {
1486 	RETURN_TOKEN(T_SPACESHIP);
1487 }
1488 
1489 <ST_IN_SCRIPTING>"<=" {
1490 	RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1491 }
1492 
1493 <ST_IN_SCRIPTING>">=" {
1494 	RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1495 }
1496 
1497 <ST_IN_SCRIPTING>"+=" {
1498 	RETURN_TOKEN(T_PLUS_EQUAL);
1499 }
1500 
1501 <ST_IN_SCRIPTING>"-=" {
1502 	RETURN_TOKEN(T_MINUS_EQUAL);
1503 }
1504 
1505 <ST_IN_SCRIPTING>"*=" {
1506 	RETURN_TOKEN(T_MUL_EQUAL);
1507 }
1508 
1509 <ST_IN_SCRIPTING>"*\*" {
1510 	RETURN_TOKEN(T_POW);
1511 }
1512 
1513 <ST_IN_SCRIPTING>"*\*=" {
1514 	RETURN_TOKEN(T_POW_EQUAL);
1515 }
1516 
1517 <ST_IN_SCRIPTING>"/=" {
1518 	RETURN_TOKEN(T_DIV_EQUAL);
1519 }
1520 
1521 <ST_IN_SCRIPTING>".=" {
1522 	RETURN_TOKEN(T_CONCAT_EQUAL);
1523 }
1524 
1525 <ST_IN_SCRIPTING>"%=" {
1526 	RETURN_TOKEN(T_MOD_EQUAL);
1527 }
1528 
1529 <ST_IN_SCRIPTING>"<<=" {
1530 	RETURN_TOKEN(T_SL_EQUAL);
1531 }
1532 
1533 <ST_IN_SCRIPTING>">>=" {
1534 	RETURN_TOKEN(T_SR_EQUAL);
1535 }
1536 
1537 <ST_IN_SCRIPTING>"&=" {
1538 	RETURN_TOKEN(T_AND_EQUAL);
1539 }
1540 
1541 <ST_IN_SCRIPTING>"|=" {
1542 	RETURN_TOKEN(T_OR_EQUAL);
1543 }
1544 
1545 <ST_IN_SCRIPTING>"^=" {
1546 	RETURN_TOKEN(T_XOR_EQUAL);
1547 }
1548 
1549 <ST_IN_SCRIPTING>"||" {
1550 	RETURN_TOKEN(T_BOOLEAN_OR);
1551 }
1552 
1553 <ST_IN_SCRIPTING>"&&" {
1554 	RETURN_TOKEN(T_BOOLEAN_AND);
1555 }
1556 
1557 <ST_IN_SCRIPTING>"OR" {
1558 	RETURN_TOKEN(T_LOGICAL_OR);
1559 }
1560 
1561 <ST_IN_SCRIPTING>"AND" {
1562 	RETURN_TOKEN(T_LOGICAL_AND);
1563 }
1564 
1565 <ST_IN_SCRIPTING>"XOR" {
1566 	RETURN_TOKEN(T_LOGICAL_XOR);
1567 }
1568 
1569 <ST_IN_SCRIPTING>"<<" {
1570 	RETURN_TOKEN(T_SL);
1571 }
1572 
1573 <ST_IN_SCRIPTING>">>" {
1574 	RETURN_TOKEN(T_SR);
1575 }
1576 
1577 <ST_IN_SCRIPTING>{TOKENS} {
1578 	RETURN_TOKEN(yytext[0]);
1579 }
1580 
1581 
1582 <ST_IN_SCRIPTING>"{" {
1583 	yy_push_state(ST_IN_SCRIPTING);
1584 	RETURN_TOKEN('{');
1585 }
1586 
1587 
1588 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1589 	yy_push_state(ST_LOOKING_FOR_VARNAME);
1590 	RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1591 }
1592 
1593 
1594 <ST_IN_SCRIPTING>"}" {
1595 	RESET_DOC_COMMENT();
1596 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
1597 		yy_pop_state();
1598 	}
1599 	RETURN_TOKEN('}');
1600 }
1601 
1602 
1603 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1604 	yyless(yyleng - 1);
1605 	zend_copy_value(zendlval, yytext, yyleng);
1606 	yy_pop_state();
1607 	yy_push_state(ST_IN_SCRIPTING);
1608 	RETURN_TOKEN(T_STRING_VARNAME);
1609 }
1610 
1611 
1612 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1613 	yyless(0);
1614 	yy_pop_state();
1615 	yy_push_state(ST_IN_SCRIPTING);
1616 	goto restart;
1617 }
1618 
1619 <ST_IN_SCRIPTING>{BNUM} {
1620 	char *bin = yytext + 2; /* Skip "0b" */
1621 	int len = yyleng - 2;
1622 	char *end;
1623 
1624 	/* Skip any leading 0s */
1625 	while (*bin == '0') {
1626 		++bin;
1627 		--len;
1628 	}
1629 
1630 	if (len < SIZEOF_ZEND_LONG * 8) {
1631 		if (len == 0) {
1632 			ZVAL_LONG(zendlval, 0);
1633 		} else {
1634 			errno = 0;
1635 			ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1636 			ZEND_ASSERT(!errno && end == yytext + yyleng);
1637 		}
1638 		RETURN_TOKEN(T_LNUMBER);
1639 	} else {
1640 		ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1641 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1642 		ZEND_ASSERT(end == yytext + yyleng);
1643 		RETURN_TOKEN(T_DNUMBER);
1644 	}
1645 }
1646 
1647 <ST_IN_SCRIPTING>{LNUM} {
1648 	char *end;
1649 	if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1650 		errno = 0;
1651 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1652 		/* This isn't an assert, we need to ensure 019 isn't valid octal
1653 		 * Because the lexing itself doesn't do that for us
1654 		 */
1655 		if (end != yytext + yyleng) {
1656 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1657 			ZVAL_UNDEF(zendlval);
1658 			RETURN_TOKEN(T_LNUMBER);
1659 		}
1660 	} else {
1661 		errno = 0;
1662 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1663 		if (errno == ERANGE) { /* Overflow */
1664 			errno = 0;
1665 			if (yytext[0] == '0') { /* octal overflow */
1666 				ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1667 			} else {
1668 				ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1669 			}
1670 			/* Also not an assert for the same reason */
1671 			if (end != yytext + yyleng) {
1672 				zend_throw_exception(zend_ce_parse_error,
1673 					"Invalid numeric literal", 0);
1674 				ZVAL_UNDEF(zendlval);
1675 				RETURN_TOKEN(T_DNUMBER);
1676 			}
1677 			RETURN_TOKEN(T_DNUMBER);
1678 		}
1679 		/* Also not an assert for the same reason */
1680 		if (end != yytext + yyleng) {
1681 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1682 			ZVAL_UNDEF(zendlval);
1683 			RETURN_TOKEN(T_DNUMBER);
1684 		}
1685 	}
1686 	ZEND_ASSERT(!errno);
1687 	RETURN_TOKEN(T_LNUMBER);
1688 }
1689 
1690 <ST_IN_SCRIPTING>{HNUM} {
1691 	char *hex = yytext + 2; /* Skip "0x" */
1692 	int len = yyleng - 2;
1693 	char *end;
1694 
1695 	/* Skip any leading 0s */
1696 	while (*hex == '0') {
1697 		hex++;
1698 		len--;
1699 	}
1700 
1701 	if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1702 		if (len == 0) {
1703 			ZVAL_LONG(zendlval, 0);
1704 		} else {
1705 			errno = 0;
1706 			ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1707 			ZEND_ASSERT(!errno && end == hex + len);
1708 		}
1709 		RETURN_TOKEN(T_LNUMBER);
1710 	} else {
1711 		ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1712 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1713 		ZEND_ASSERT(end == hex + len);
1714 		RETURN_TOKEN(T_DNUMBER);
1715 	}
1716 }
1717 
1718 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1719 	if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1720 		char *end;
1721 		errno = 0;
1722 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1723 		if (errno == ERANGE) {
1724 			goto string;
1725 		}
1726 		ZEND_ASSERT(end == yytext + yyleng);
1727 	} else {
1728 string:
1729 		ZVAL_STRINGL(zendlval, yytext, yyleng);
1730 	}
1731 	RETURN_TOKEN(T_NUM_STRING);
1732 }
1733 
1734 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1735 	ZVAL_STRINGL(zendlval, yytext, yyleng);
1736 	RETURN_TOKEN(T_NUM_STRING);
1737 }
1738 
1739 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1740 	const char *end;
1741 
1742 	ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1743 	/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1744 	ZEND_ASSERT(end == yytext + yyleng);
1745 	RETURN_TOKEN(T_DNUMBER);
1746 }
1747 
1748 <ST_IN_SCRIPTING>"__CLASS__" {
1749 	RETURN_TOKEN(T_CLASS_C);
1750 }
1751 
1752 <ST_IN_SCRIPTING>"__TRAIT__" {
1753 	RETURN_TOKEN(T_TRAIT_C);
1754 }
1755 
1756 <ST_IN_SCRIPTING>"__FUNCTION__" {
1757 	RETURN_TOKEN(T_FUNC_C);
1758 }
1759 
1760 <ST_IN_SCRIPTING>"__METHOD__" {
1761 	RETURN_TOKEN(T_METHOD_C);
1762 }
1763 
1764 <ST_IN_SCRIPTING>"__LINE__" {
1765 	RETURN_TOKEN(T_LINE);
1766 }
1767 
1768 <ST_IN_SCRIPTING>"__FILE__" {
1769 	RETURN_TOKEN(T_FILE);
1770 }
1771 
1772 <ST_IN_SCRIPTING>"__DIR__" {
1773 	RETURN_TOKEN(T_DIR);
1774 }
1775 
1776 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1777 	RETURN_TOKEN(T_NS_C);
1778 }
1779 
1780 
1781 <INITIAL>"<?=" {
1782 	BEGIN(ST_IN_SCRIPTING);
1783 	RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1784 }
1785 
1786 
1787 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1788 	HANDLE_NEWLINE(yytext[yyleng-1]);
1789 	BEGIN(ST_IN_SCRIPTING);
1790 	RETURN_TOKEN(T_OPEN_TAG);
1791 }
1792 
1793 
1794 <INITIAL>"<?" {
1795 	if (CG(short_tags)) {
1796 		BEGIN(ST_IN_SCRIPTING);
1797 		RETURN_TOKEN(T_OPEN_TAG);
1798 	} else {
1799 		goto inline_char_handler;
1800 	}
1801 }
1802 
1803 <INITIAL>{ANY_CHAR} {
1804 	if (YYCURSOR > YYLIMIT) {
1805 		RETURN_TOKEN(END);
1806 	}
1807 
1808 inline_char_handler:
1809 
1810 	while (1) {
1811 		YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1812 
1813 		YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1814 
1815 		if (YYCURSOR >= YYLIMIT) {
1816 			break;
1817 		}
1818 
1819 		if (*YYCURSOR == '?') {
1820 			if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1821 
1822 				YYCURSOR--;
1823 				break;
1824 			}
1825 		}
1826 	}
1827 
1828 	yyleng = YYCURSOR - SCNG(yy_text);
1829 
1830 	if (SCNG(output_filter)) {
1831 		size_t readsize;
1832 		char *s = NULL;
1833 		size_t sz = 0;
1834 		// TODO: avoid reallocation ???
1835 		readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1836 		ZVAL_STRINGL(zendlval, s, sz);
1837 		efree(s);
1838 		if (readsize < yyleng) {
1839 			yyless(readsize);
1840 		}
1841 	} else {
1842 	  ZVAL_STRINGL(zendlval, yytext, yyleng);
1843 	}
1844 	HANDLE_NEWLINES(yytext, yyleng);
1845 	RETURN_TOKEN(T_INLINE_HTML);
1846 }
1847 
1848 
1849 /* Make sure a label character follows "->", otherwise there is no property
1850  * and "->" will be taken literally
1851  */
1852 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
1853 	yyless(yyleng - 3);
1854 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
1855 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1856 	RETURN_TOKEN(T_VARIABLE);
1857 }
1858 
1859 /* A [ always designates a variable offset, regardless of what follows
1860  */
1861 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1862 	yyless(yyleng - 1);
1863 	yy_push_state(ST_VAR_OFFSET);
1864 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1865 	RETURN_TOKEN(T_VARIABLE);
1866 }
1867 
1868 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1869 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1870 	RETURN_TOKEN(T_VARIABLE);
1871 }
1872 
1873 <ST_VAR_OFFSET>"]" {
1874 	yy_pop_state();
1875 	RETURN_TOKEN(']');
1876 }
1877 
1878 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1879 	/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
1880 	RETURN_TOKEN(yytext[0]);
1881 }
1882 
1883 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1884 	/* Invalid rule to return a more explicit parse error with proper line number */
1885 	yyless(0);
1886 	yy_pop_state();
1887 	ZVAL_NULL(zendlval);
1888 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1889 }
1890 
1891 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1892 	zend_copy_value(zendlval, yytext, yyleng);
1893 	RETURN_TOKEN(T_STRING);
1894 }
1895 
1896 
1897 <ST_IN_SCRIPTING>"#"|"//" {
1898 	while (YYCURSOR < YYLIMIT) {
1899 		switch (*YYCURSOR++) {
1900 			case '\r':
1901 				if (*YYCURSOR == '\n') {
1902 					YYCURSOR++;
1903 				}
1904 				/* fall through */
1905 			case '\n':
1906 				CG(zend_lineno)++;
1907 				break;
1908 			case '?':
1909 				if (*YYCURSOR == '>') {
1910 					YYCURSOR--;
1911 					break;
1912 				}
1913 				/* fall through */
1914 			default:
1915 				continue;
1916 		}
1917 
1918 		break;
1919 	}
1920 
1921 	yyleng = YYCURSOR - SCNG(yy_text);
1922 
1923 	RETURN_TOKEN(T_COMMENT);
1924 }
1925 
1926 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1927 	int doc_com;
1928 
1929 	if (yyleng > 2) {
1930 		doc_com = 1;
1931 		RESET_DOC_COMMENT();
1932 	} else {
1933 		doc_com = 0;
1934 	}
1935 
1936 	while (YYCURSOR < YYLIMIT) {
1937 		if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1938 			break;
1939 		}
1940 	}
1941 
1942 	if (YYCURSOR < YYLIMIT) {
1943 		YYCURSOR++;
1944 	} else {
1945 		zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1946 	}
1947 
1948 	yyleng = YYCURSOR - SCNG(yy_text);
1949 	HANDLE_NEWLINES(yytext, yyleng);
1950 
1951 	if (doc_com) {
1952 		CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1953 		RETURN_TOKEN(T_DOC_COMMENT);
1954 	}
1955 
1956 	RETURN_TOKEN(T_COMMENT);
1957 }
1958 
1959 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1960 	BEGIN(INITIAL);
1961 	RETURN_TOKEN(T_CLOSE_TAG);  /* implicit ';' at php-end tag */
1962 }
1963 
1964 
1965 <ST_IN_SCRIPTING>b?['] {
1966 	register char *s, *t;
1967 	char *end;
1968 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
1969 
1970 	while (1) {
1971 		if (YYCURSOR < YYLIMIT) {
1972 			if (*YYCURSOR == '\'') {
1973 				YYCURSOR++;
1974 				yyleng = YYCURSOR - SCNG(yy_text);
1975 
1976 				break;
1977 			} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1978 				YYCURSOR++;
1979 			}
1980 		} else {
1981 			yyleng = YYLIMIT - SCNG(yy_text);
1982 
1983 			/* Unclosed single quotes; treat similar to double quotes, but without a separate token
1984 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1985 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1986 			ZVAL_NULL(zendlval);
1987 			RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1988 		}
1989 	}
1990 
1991 	ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1992 
1993 	/* convert escape sequences */
1994 	s = t = Z_STRVAL_P(zendlval);
1995 	end = s+Z_STRLEN_P(zendlval);
1996 	while (s<end) {
1997 		if (*s=='\\') {
1998 			s++;
1999 
2000 			switch(*s) {
2001 				case '\\':
2002 				case '\'':
2003 					*t++ = *s;
2004 					Z_STRLEN_P(zendlval)--;
2005 					break;
2006 				default:
2007 					*t++ = '\\';
2008 					*t++ = *s;
2009 					break;
2010 			}
2011 		} else {
2012 			*t++ = *s;
2013 		}
2014 
2015 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2016 			CG(zend_lineno)++;
2017 		}
2018 		s++;
2019 	}
2020 	*t = 0;
2021 
2022 	if (SCNG(output_filter)) {
2023 		size_t sz = 0;
2024 		char *str = NULL;
2025 		s = Z_STRVAL_P(zendlval);
2026 		// TODO: avoid reallocation ???
2027 		SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2028 		ZVAL_STRINGL(zendlval, str, sz);
2029 	}
2030 	RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2031 }
2032 
2033 
2034 <ST_IN_SCRIPTING>b?["] {
2035 	int bprefix = (yytext[0] != '"') ? 1 : 0;
2036 
2037 	while (YYCURSOR < YYLIMIT) {
2038 		switch (*YYCURSOR++) {
2039 			case '"':
2040 				yyleng = YYCURSOR - SCNG(yy_text);
2041 				zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
2042 				RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2043 			case '$':
2044 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2045 					break;
2046 				}
2047 				continue;
2048 			case '{':
2049 				if (*YYCURSOR == '$') {
2050 					break;
2051 				}
2052 				continue;
2053 			case '\\':
2054 				if (YYCURSOR < YYLIMIT) {
2055 					YYCURSOR++;
2056 				}
2057 				/* fall through */
2058 			default:
2059 				continue;
2060 		}
2061 
2062 		YYCURSOR--;
2063 		break;
2064 	}
2065 
2066 	/* Remember how much was scanned to save rescanning */
2067 	SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2068 
2069 	YYCURSOR = SCNG(yy_text) + yyleng;
2070 
2071 	BEGIN(ST_DOUBLE_QUOTES);
2072 	RETURN_TOKEN('"');
2073 }
2074 
2075 
2076 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2077 	char *s;
2078 	int bprefix = (yytext[0] != '<') ? 1 : 0;
2079 	zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2080 
2081 	CG(zend_lineno)++;
2082 	heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2083 	s = yytext+bprefix+3;
2084 	while ((*s == ' ') || (*s == '\t')) {
2085 		s++;
2086 		heredoc_label->length--;
2087 	}
2088 
2089 	if (*s == '\'') {
2090 		s++;
2091 		heredoc_label->length -= 2;
2092 
2093 		BEGIN(ST_NOWDOC);
2094 	} else {
2095 		if (*s == '"') {
2096 			s++;
2097 			heredoc_label->length -= 2;
2098 		}
2099 
2100 		BEGIN(ST_HEREDOC);
2101 	}
2102 
2103 	heredoc_label->label = estrndup(s, heredoc_label->length);
2104 
2105 	/* Check for ending label on the next line */
2106 	if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2107 		YYCTYPE *end = YYCURSOR + heredoc_label->length;
2108 
2109 		if (*end == ';') {
2110 			end++;
2111 		}
2112 
2113 		if (*end == '\n' || *end == '\r') {
2114 			BEGIN(ST_END_HEREDOC);
2115 		}
2116 	}
2117 
2118 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2119 
2120 	RETURN_TOKEN(T_START_HEREDOC);
2121 }
2122 
2123 
2124 <ST_IN_SCRIPTING>[`] {
2125 	BEGIN(ST_BACKQUOTE);
2126 	RETURN_TOKEN('`');
2127 }
2128 
2129 
2130 <ST_END_HEREDOC>{ANY_CHAR} {
2131 	zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2132 
2133 	YYCURSOR += heredoc_label->length - 1;
2134 	yyleng = heredoc_label->length;
2135 
2136 	heredoc_label_dtor(heredoc_label);
2137 	efree(heredoc_label);
2138 
2139 	BEGIN(ST_IN_SCRIPTING);
2140 	RETURN_TOKEN(T_END_HEREDOC);
2141 }
2142 
2143 
2144 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2145 	Z_LVAL_P(zendlval) = (zend_long) '{';
2146 	yy_push_state(ST_IN_SCRIPTING);
2147 	yyless(1);
2148 	RETURN_TOKEN(T_CURLY_OPEN);
2149 }
2150 
2151 
2152 <ST_DOUBLE_QUOTES>["] {
2153 	BEGIN(ST_IN_SCRIPTING);
2154 	RETURN_TOKEN('"');
2155 }
2156 
2157 <ST_BACKQUOTE>[`] {
2158 	BEGIN(ST_IN_SCRIPTING);
2159 	RETURN_TOKEN('`');
2160 }
2161 
2162 
2163 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2164 	if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2165 		YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2166 		SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2167 
2168 		goto double_quotes_scan_done;
2169 	}
2170 
2171 	if (YYCURSOR > YYLIMIT) {
2172 		RETURN_TOKEN(END);
2173 	}
2174 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2175 		YYCURSOR++;
2176 	}
2177 
2178 	while (YYCURSOR < YYLIMIT) {
2179 		switch (*YYCURSOR++) {
2180 			case '"':
2181 				break;
2182 			case '$':
2183 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2184 					break;
2185 				}
2186 				continue;
2187 			case '{':
2188 				if (*YYCURSOR == '$') {
2189 					break;
2190 				}
2191 				continue;
2192 			case '\\':
2193 				if (YYCURSOR < YYLIMIT) {
2194 					YYCURSOR++;
2195 				}
2196 				/* fall through */
2197 			default:
2198 				continue;
2199 		}
2200 
2201 		YYCURSOR--;
2202 		break;
2203 	}
2204 
2205 double_quotes_scan_done:
2206 	yyleng = YYCURSOR - SCNG(yy_text);
2207 
2208 	zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2209 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2210 }
2211 
2212 
2213 <ST_BACKQUOTE>{ANY_CHAR} {
2214 	if (YYCURSOR > YYLIMIT) {
2215 		RETURN_TOKEN(END);
2216 	}
2217 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2218 		YYCURSOR++;
2219 	}
2220 
2221 	while (YYCURSOR < YYLIMIT) {
2222 		switch (*YYCURSOR++) {
2223 			case '`':
2224 				break;
2225 			case '$':
2226 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2227 					break;
2228 				}
2229 				continue;
2230 			case '{':
2231 				if (*YYCURSOR == '$') {
2232 					break;
2233 				}
2234 				continue;
2235 			case '\\':
2236 				if (YYCURSOR < YYLIMIT) {
2237 					YYCURSOR++;
2238 				}
2239 				/* fall through */
2240 			default:
2241 				continue;
2242 		}
2243 
2244 		YYCURSOR--;
2245 		break;
2246 	}
2247 
2248 	yyleng = YYCURSOR - SCNG(yy_text);
2249 
2250 	zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2251 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2252 }
2253 
2254 
2255 <ST_HEREDOC>{ANY_CHAR} {
2256 	int newline = 0;
2257 
2258 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2259 
2260 	if (YYCURSOR > YYLIMIT) {
2261 		RETURN_TOKEN(END);
2262 	}
2263 
2264 	YYCURSOR--;
2265 
2266 	while (YYCURSOR < YYLIMIT) {
2267 		switch (*YYCURSOR++) {
2268 			case '\r':
2269 				if (*YYCURSOR == '\n') {
2270 					YYCURSOR++;
2271 				}
2272 				/* fall through */
2273 			case '\n':
2274 				/* Check for ending label on the next line */
2275 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2276 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2277 
2278 					if (*end == ';') {
2279 						end++;
2280 					}
2281 
2282 					if (*end == '\n' || *end == '\r') {
2283 						/* newline before label will be subtracted from returned text, but
2284 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2285 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2286 							newline = 2; /* Windows newline */
2287 						} else {
2288 							newline = 1;
2289 						}
2290 
2291 						CG(increment_lineno) = 1; /* For newline before label */
2292 						BEGIN(ST_END_HEREDOC);
2293 
2294 						goto heredoc_scan_done;
2295 					}
2296 				}
2297 				continue;
2298 			case '$':
2299 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2300 					break;
2301 				}
2302 				continue;
2303 			case '{':
2304 				if (*YYCURSOR == '$') {
2305 					break;
2306 				}
2307 				continue;
2308 			case '\\':
2309 				if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2310 					YYCURSOR++;
2311 				}
2312 				/* fall through */
2313 			default:
2314 				continue;
2315 		}
2316 
2317 		YYCURSOR--;
2318 		break;
2319 	}
2320 
2321 heredoc_scan_done:
2322 	yyleng = YYCURSOR - SCNG(yy_text);
2323 
2324 	zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2325 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2326 }
2327 
2328 
2329 <ST_NOWDOC>{ANY_CHAR} {
2330 	int newline = 0;
2331 
2332 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2333 
2334 	if (YYCURSOR > YYLIMIT) {
2335 		RETURN_TOKEN(END);
2336 	}
2337 
2338 	YYCURSOR--;
2339 
2340 	while (YYCURSOR < YYLIMIT) {
2341 		switch (*YYCURSOR++) {
2342 			case '\r':
2343 				if (*YYCURSOR == '\n') {
2344 					YYCURSOR++;
2345 				}
2346 				/* fall through */
2347 			case '\n':
2348 				/* Check for ending label on the next line */
2349 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2350 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2351 
2352 					if (*end == ';') {
2353 						end++;
2354 					}
2355 
2356 					if (*end == '\n' || *end == '\r') {
2357 						/* newline before label will be subtracted from returned text, but
2358 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2359 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2360 							newline = 2; /* Windows newline */
2361 						} else {
2362 							newline = 1;
2363 						}
2364 
2365 						CG(increment_lineno) = 1; /* For newline before label */
2366 						BEGIN(ST_END_HEREDOC);
2367 
2368 						goto nowdoc_scan_done;
2369 					}
2370 				}
2371 				/* fall through */
2372 			default:
2373 				continue;
2374 		}
2375 	}
2376 
2377 nowdoc_scan_done:
2378 	yyleng = YYCURSOR - SCNG(yy_text);
2379 
2380 	zend_copy_value(zendlval, yytext, yyleng - newline);
2381 	HANDLE_NEWLINES(yytext, yyleng - newline);
2382 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2383 }
2384 
2385 
2386 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2387 	if (YYCURSOR > YYLIMIT) {
2388 		RETURN_TOKEN(END);
2389 	}
2390 
2391 	zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2392 	goto restart;
2393 }
2394 
2395 */
2396 }
2397