xref: /PHP-7.2/Zend/zend_language_scanner.l (revision b6b15fc6)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Marcus Boerger <helly@php.net>                              |
16    |          Nuno Lopes <nlopess@php.net>                                |
17    |          Scott MacVicar <scottmac@php.net>                           |
18    | Flex version authors:                                                |
19    |          Andi Gutmans <andi@zend.com>                                |
20    |          Zeev Suraski <zeev@zend.com>                                |
21    +----------------------------------------------------------------------+
22 */
23 
24 /* $Id$ */
25 
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31 
32 #include "zend_language_scanner_defs.h"
33 
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef ZEND_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52 
53 #define YYCTYPE   unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR  SCNG(yy_cursor)
56 #define YYLIMIT   SCNG(yy_limit)
57 #define YYMARKER  SCNG(yy_marker)
58 
59 #define YYGETCONDITION()  SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61 
62 #define STATE(name)  yyc##name
63 
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE      YYGETCONDITION()
67 #define yytext       ((char*)SCNG(yy_text))
68 #define yyleng       SCNG(yy_leng)
69 #define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                           yyleng   = (unsigned int)x; } while(0)
71 #define yymore()     goto yymore_restart
72 
73 /* perform sanity check. If this message is triggered you should
74    increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79 
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83 
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87 
88 /* Globals Macros */
89 #define SCNG	LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95 
96 #define HANDLE_NEWLINES(s, l)													\
97 do {																			\
98 	char *p = (s), *boundary = p+(l);											\
99 																				\
100 	while (p<boundary) {														\
101 		if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {					\
102 			CG(zend_lineno)++;													\
103 		}																		\
104 		p++;																	\
105 	}																			\
106 } while (0)
107 
108 #define HANDLE_NEWLINE(c) \
109 { \
110 	if (c == '\n' || c == '\r') { \
111 		CG(zend_lineno)++; \
112 	} \
113 }
114 
115 /* To save initial string length after scanning to first variable */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118 
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
120 
121 #define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123 
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125 
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127 {
128 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129 	ZEND_ASSERT(internal_encoding);
130 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131 }
132 
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134 {
135 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136 }
137 
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139 {
140 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142 }
143 
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145 {
146 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147 	ZEND_ASSERT(internal_encoding);
148 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8);
150 }
151 
152 
_yy_push_state(int new_state)153 static void _yy_push_state(int new_state)
154 {
155 	zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156 	YYSETCONDITION(new_state);
157 }
158 
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160 
yy_pop_state(void)161 static void yy_pop_state(void)
162 {
163 	int *stack_state = zend_stack_top(&SCNG(state_stack));
164 	YYSETCONDITION(*stack_state);
165 	zend_stack_del_top(&SCNG(state_stack));
166 }
167 
yy_scan_buffer(char * str,unsigned int len)168 static void yy_scan_buffer(char *str, unsigned int len)
169 {
170 	YYCURSOR       = (YYCTYPE*)str;
171 	YYLIMIT        = YYCURSOR + len;
172 	if (!SCNG(yy_start)) {
173 		SCNG(yy_start) = YYCURSOR;
174 	}
175 }
176 
startup_scanner(void)177 void startup_scanner(void)
178 {
179 	CG(parse_error) = 0;
180 	CG(doc_comment) = NULL;
181 	CG(extra_fn_flags) = 0;
182 	zend_stack_init(&SCNG(state_stack), sizeof(int));
183 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
184 }
185 
heredoc_label_dtor(zend_heredoc_label * heredoc_label)186 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
187     efree(heredoc_label->label);
188 }
189 
shutdown_scanner(void)190 void shutdown_scanner(void)
191 {
192 	CG(parse_error) = 0;
193 	RESET_DOC_COMMENT();
194 	zend_stack_destroy(&SCNG(state_stack));
195 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
196 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
197 	SCNG(on_event) = NULL;
198 }
199 
zend_save_lexical_state(zend_lex_state * lex_state)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
201 {
202 	lex_state->yy_leng   = SCNG(yy_leng);
203 	lex_state->yy_start  = SCNG(yy_start);
204 	lex_state->yy_text   = SCNG(yy_text);
205 	lex_state->yy_cursor = SCNG(yy_cursor);
206 	lex_state->yy_marker = SCNG(yy_marker);
207 	lex_state->yy_limit  = SCNG(yy_limit);
208 
209 	lex_state->state_stack = SCNG(state_stack);
210 	zend_stack_init(&SCNG(state_stack), sizeof(int));
211 
212 	lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214 
215 	lex_state->in = SCNG(yy_in);
216 	lex_state->yy_state = YYSTATE;
217 	lex_state->filename = zend_get_compiled_filename();
218 	lex_state->lineno = CG(zend_lineno);
219 
220 	lex_state->script_org = SCNG(script_org);
221 	lex_state->script_org_size = SCNG(script_org_size);
222 	lex_state->script_filtered = SCNG(script_filtered);
223 	lex_state->script_filtered_size = SCNG(script_filtered_size);
224 	lex_state->input_filter = SCNG(input_filter);
225 	lex_state->output_filter = SCNG(output_filter);
226 	lex_state->script_encoding = SCNG(script_encoding);
227 
228 	lex_state->on_event = SCNG(on_event);
229 	lex_state->on_event_context = SCNG(on_event_context);
230 
231 	lex_state->ast = CG(ast);
232 	lex_state->ast_arena = CG(ast_arena);
233 }
234 
zend_restore_lexical_state(zend_lex_state * lex_state)235 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
236 {
237 	SCNG(yy_leng)   = lex_state->yy_leng;
238 	SCNG(yy_start)  = lex_state->yy_start;
239 	SCNG(yy_text)   = lex_state->yy_text;
240 	SCNG(yy_cursor) = lex_state->yy_cursor;
241 	SCNG(yy_marker) = lex_state->yy_marker;
242 	SCNG(yy_limit)  = lex_state->yy_limit;
243 
244 	zend_stack_destroy(&SCNG(state_stack));
245 	SCNG(state_stack) = lex_state->state_stack;
246 
247 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
248 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
249 	SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
250 
251 	SCNG(yy_in) = lex_state->in;
252 	YYSETCONDITION(lex_state->yy_state);
253 	CG(zend_lineno) = lex_state->lineno;
254 	zend_restore_compiled_filename(lex_state->filename);
255 
256 	if (SCNG(script_filtered)) {
257 		efree(SCNG(script_filtered));
258 		SCNG(script_filtered) = NULL;
259 	}
260 	SCNG(script_org) = lex_state->script_org;
261 	SCNG(script_org_size) = lex_state->script_org_size;
262 	SCNG(script_filtered) = lex_state->script_filtered;
263 	SCNG(script_filtered_size) = lex_state->script_filtered_size;
264 	SCNG(input_filter) = lex_state->input_filter;
265 	SCNG(output_filter) = lex_state->output_filter;
266 	SCNG(script_encoding) = lex_state->script_encoding;
267 
268 	SCNG(on_event) = lex_state->on_event;
269 	SCNG(on_event_context) = lex_state->on_event_context;
270 
271 	CG(ast) = lex_state->ast;
272 	CG(ast_arena) = lex_state->ast_arena;
273 
274 	RESET_DOC_COMMENT();
275 }
276 
zend_destroy_file_handle(zend_file_handle * file_handle)277 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
278 {
279 	zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
280 	/* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
281 	file_handle->opened_path = NULL;
282 	if (file_handle->free_filename) {
283 		file_handle->filename = NULL;
284 	}
285 }
286 
zend_lex_tstring(zval * zv)287 ZEND_API void zend_lex_tstring(zval *zv)
288 {
289 	if (SCNG(on_event)) {
290 		SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
291 	}
292 
293 	ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
294 }
295 
296 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
297 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
298 #define	BOM_UTF16_BE	"\xfe\xff"
299 #define	BOM_UTF16_LE	"\xff\xfe"
300 #define	BOM_UTF8		"\xef\xbb\xbf"
301 
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)302 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
303 {
304 	const unsigned char *p;
305 	int wchar_size = 2;
306 	int le = 0;
307 
308 	/* utf-16 or utf-32? */
309 	p = script;
310 	assert(p >= script);
311 	while ((size_t)(p-script) < script_size) {
312 		p = memchr(p, 0, script_size-(p-script)-2);
313 		if (!p) {
314 			break;
315 		}
316 		if (*(p+1) == '\0' && *(p+2) == '\0') {
317 			wchar_size = 4;
318 			break;
319 		}
320 
321 		/* searching for UTF-32 specific byte orders, so this will do */
322 		p += 4;
323 	}
324 
325 	/* BE or LE? */
326 	p = script;
327 	assert(p >= script);
328 	while ((size_t)(p-script) < script_size) {
329 		if (*p == '\0' && *(p+wchar_size-1) != '\0') {
330 			/* BE */
331 			le = 0;
332 			break;
333 		} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
334 			/* LE* */
335 			le = 1;
336 			break;
337 		}
338 		p += wchar_size;
339 	}
340 
341 	if (wchar_size == 2) {
342 		return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
343 	} else {
344 		return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
345 	}
346 
347 	return NULL;
348 }
349 
zend_multibyte_detect_unicode(void)350 static const zend_encoding* zend_multibyte_detect_unicode(void)
351 {
352 	const zend_encoding *script_encoding = NULL;
353 	int bom_size;
354 	unsigned char *pos1, *pos2;
355 
356 	if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
357 		return NULL;
358 	}
359 
360 	/* check out BOM */
361 	if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
362 		script_encoding = zend_multibyte_encoding_utf32be;
363 		bom_size = sizeof(BOM_UTF32_BE)-1;
364 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
365 		script_encoding = zend_multibyte_encoding_utf32le;
366 		bom_size = sizeof(BOM_UTF32_LE)-1;
367 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
368 		script_encoding = zend_multibyte_encoding_utf16be;
369 		bom_size = sizeof(BOM_UTF16_BE)-1;
370 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
371 		script_encoding = zend_multibyte_encoding_utf16le;
372 		bom_size = sizeof(BOM_UTF16_LE)-1;
373 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
374 		script_encoding = zend_multibyte_encoding_utf8;
375 		bom_size = sizeof(BOM_UTF8)-1;
376 	}
377 
378 	if (script_encoding) {
379 		/* remove BOM */
380 		LANG_SCNG(script_org) += bom_size;
381 		LANG_SCNG(script_org_size) -= bom_size;
382 
383 		return script_encoding;
384 	}
385 
386 	/* script contains NULL bytes -> auto-detection */
387 	if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
388 		/* check if the NULL byte is after the __HALT_COMPILER(); */
389 		pos2 = LANG_SCNG(script_org);
390 
391 		while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
392 			pos2 = memchr(pos2, '_', pos1 - pos2);
393 			if (!pos2) break;
394 			pos2++;
395 			if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
396 				pos2 += sizeof("_HALT_COMPILER")-1;
397 				while (*pos2 == ' '  ||
398 					   *pos2 == '\t' ||
399 					   *pos2 == '\r' ||
400 					   *pos2 == '\n') {
401 					pos2++;
402 				}
403 				if (*pos2 == '(') {
404 					pos2++;
405 					while (*pos2 == ' '  ||
406 						   *pos2 == '\t' ||
407 						   *pos2 == '\r' ||
408 						   *pos2 == '\n') {
409 						pos2++;
410 					}
411 					if (*pos2 == ')') {
412 						pos2++;
413 						while (*pos2 == ' '  ||
414 							   *pos2 == '\t' ||
415 							   *pos2 == '\r' ||
416 							   *pos2 == '\n') {
417 							pos2++;
418 						}
419 						if (*pos2 == ';') {
420 							return NULL;
421 						}
422 					}
423 				}
424 			}
425 		}
426 		/* make best effort if BOM is missing */
427 		return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
428 	}
429 
430 	return NULL;
431 }
432 
zend_multibyte_find_script_encoding(void)433 static const zend_encoding* zend_multibyte_find_script_encoding(void)
434 {
435 	const zend_encoding *script_encoding;
436 
437 	if (CG(detect_unicode)) {
438 		/* check out bom(byte order mark) and see if containing wchars */
439 		script_encoding = zend_multibyte_detect_unicode();
440 		if (script_encoding != NULL) {
441 			/* bom or wchar detection is prior to 'script_encoding' option */
442 			return script_encoding;
443 		}
444 	}
445 
446 	/* if no script_encoding specified, just leave alone */
447 	if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
448 		return NULL;
449 	}
450 
451 	/* if multiple encodings specified, detect automagically */
452 	if (CG(script_encoding_list_size) > 1) {
453 		return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
454 	}
455 
456 	return CG(script_encoding_list)[0];
457 }
458 
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)459 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
460 {
461 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
462 	const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
463 
464 	if (!script_encoding) {
465 		return FAILURE;
466 	}
467 
468 	/* judge input/output filter */
469 	LANG_SCNG(script_encoding) = script_encoding;
470 	LANG_SCNG(input_filter) = NULL;
471 	LANG_SCNG(output_filter) = NULL;
472 
473 	if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
474 		if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
475 			/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
476 			LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
477 			LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
478 		} else {
479 			LANG_SCNG(input_filter) = NULL;
480 			LANG_SCNG(output_filter) = NULL;
481 		}
482 		return SUCCESS;
483 	}
484 
485 	if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
486 		LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
487 		LANG_SCNG(output_filter) = NULL;
488 	} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
489 		LANG_SCNG(input_filter) = NULL;
490 		LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
491 	} else {
492 		/* both script and internal encodings are incompatible w/ flex */
493 		LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
494 		LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
495 	}
496 
497 	return 0;
498 }
499 
open_file_for_scanning(zend_file_handle * file_handle)500 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
501 {
502 	char *buf;
503 	size_t size, offset = 0;
504 	zend_string *compiled_filename;
505 
506 	/* The shebang line was read, get the current position to obtain the buffer start */
507 	if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
508 		if ((offset = ftell(file_handle->handle.fp)) == (size_t)-1) {
509 			offset = 0;
510 		}
511 	}
512 
513 	if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
514 		return FAILURE;
515 	}
516 
517 	zend_llist_add_element(&CG(open_files), file_handle);
518 	if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
519 		zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
520 		size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
521 		fh->handle.stream.handle = (void*)(((char*)fh) + diff);
522 		file_handle->handle.stream.handle = fh->handle.stream.handle;
523 	}
524 
525 	/* Reset the scanner for scanning the new file */
526 	SCNG(yy_in) = file_handle;
527 	SCNG(yy_start) = NULL;
528 
529 	if (size != (size_t)-1) {
530 		if (CG(multibyte)) {
531 			SCNG(script_org) = (unsigned char*)buf;
532 			SCNG(script_org_size) = size;
533 			SCNG(script_filtered) = NULL;
534 
535 			zend_multibyte_set_filter(NULL);
536 
537 			if (SCNG(input_filter)) {
538 				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
539 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
540 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
541 				}
542 				buf = (char*)SCNG(script_filtered);
543 				size = SCNG(script_filtered_size);
544 			}
545 		}
546 		SCNG(yy_start) = (unsigned char *)buf - offset;
547 		yy_scan_buffer(buf, (unsigned int)size);
548 	} else {
549 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
550 	}
551 
552 	BEGIN(INITIAL);
553 
554 	if (file_handle->opened_path) {
555 		compiled_filename = zend_string_copy(file_handle->opened_path);
556 	} else {
557 		compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
558 	}
559 
560 	zend_set_compiled_filename(compiled_filename);
561 	zend_string_release(compiled_filename);
562 
563 	if (CG(start_lineno)) {
564 		CG(zend_lineno) = CG(start_lineno);
565 		CG(start_lineno) = 0;
566 	} else {
567 		CG(zend_lineno) = 1;
568 	}
569 
570 	RESET_DOC_COMMENT();
571 	CG(increment_lineno) = 0;
572 	return SUCCESS;
573 }
END_EXTERN_C()574 END_EXTERN_C()
575 
576 static zend_op_array *zend_compile(int type)
577 {
578 	zend_op_array *op_array = NULL;
579 	zend_bool original_in_compilation = CG(in_compilation);
580 
581 	CG(in_compilation) = 1;
582 	CG(ast) = NULL;
583 	CG(ast_arena) = zend_arena_create(1024 * 32);
584 
585 	if (!zendparse()) {
586 		int last_lineno = CG(zend_lineno);
587 		zend_file_context original_file_context;
588 		zend_oparray_context original_oparray_context;
589 		zend_op_array *original_active_op_array = CG(active_op_array);
590 
591 		op_array = emalloc(sizeof(zend_op_array));
592 		init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
593 		CG(active_op_array) = op_array;
594 
595 		if (zend_ast_process) {
596 			zend_ast_process(CG(ast));
597 		}
598 
599 		zend_file_context_begin(&original_file_context);
600 		zend_oparray_context_begin(&original_oparray_context);
601 		zend_compile_top_stmt(CG(ast));
602 		CG(zend_lineno) = last_lineno;
603 		zend_emit_final_return(type == ZEND_USER_FUNCTION);
604 		op_array->line_start = 1;
605 		op_array->line_end = last_lineno;
606 		pass_two(op_array);
607 		zend_oparray_context_end(&original_oparray_context);
608 		zend_file_context_end(&original_file_context);
609 
610 		CG(active_op_array) = original_active_op_array;
611 	}
612 
613 	zend_ast_destroy(CG(ast));
614 	zend_arena_destroy(CG(ast_arena));
615 
616 	CG(in_compilation) = original_in_compilation;
617 
618 	return op_array;
619 }
620 
compile_file(zend_file_handle * file_handle,int type)621 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
622 {
623 	zend_lex_state original_lex_state;
624 	zend_op_array *op_array = NULL;
625 	zend_save_lexical_state(&original_lex_state);
626 
627 	if (open_file_for_scanning(file_handle)==FAILURE) {
628 		if (type==ZEND_REQUIRE) {
629 			zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
630 			zend_bailout();
631 		} else {
632 			zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
633 		}
634 	} else {
635 		op_array = zend_compile(ZEND_USER_FUNCTION);
636 	}
637 
638 	zend_restore_lexical_state(&original_lex_state);
639 	return op_array;
640 }
641 
642 
compile_filename(int type,zval * filename)643 zend_op_array *compile_filename(int type, zval *filename)
644 {
645 	zend_file_handle file_handle;
646 	zval tmp;
647 	zend_op_array *retval;
648 	zend_string *opened_path = NULL;
649 
650 	if (Z_TYPE_P(filename) != IS_STRING) {
651 		tmp = *filename;
652 		zval_copy_ctor(&tmp);
653 		convert_to_string(&tmp);
654 		filename = &tmp;
655 	}
656 	file_handle.filename = Z_STRVAL_P(filename);
657 	file_handle.free_filename = 0;
658 	file_handle.type = ZEND_HANDLE_FILENAME;
659 	file_handle.opened_path = NULL;
660 	file_handle.handle.fp = NULL;
661 
662 	retval = zend_compile_file(&file_handle, type);
663 	if (retval && file_handle.handle.stream.handle) {
664 		if (!file_handle.opened_path) {
665 			file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
666 		}
667 
668 		zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
669 
670 		if (opened_path) {
671 			zend_string_release(opened_path);
672 		}
673 	}
674 	zend_destroy_file_handle(&file_handle);
675 
676 	if (filename==&tmp) {
677 		zval_dtor(&tmp);
678 	}
679 	return retval;
680 }
681 
zend_prepare_string_for_scanning(zval * str,char * filename)682 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
683 {
684 	char *buf;
685 	size_t size, old_len;
686 	zend_string *new_compiled_filename;
687 
688 	/* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
689 	old_len = Z_STRLEN_P(str);
690 	Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
691 	Z_TYPE_INFO_P(str) = IS_STRING_EX;
692 	memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
693 
694 	SCNG(yy_in) = NULL;
695 	SCNG(yy_start) = NULL;
696 
697 	buf = Z_STRVAL_P(str);
698 	size = old_len;
699 
700 	if (CG(multibyte)) {
701 		SCNG(script_org) = (unsigned char*)buf;
702 		SCNG(script_org_size) = size;
703 		SCNG(script_filtered) = NULL;
704 
705 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
706 
707 		if (SCNG(input_filter)) {
708 			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
709 				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
710 						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
711 			}
712 			buf = (char*)SCNG(script_filtered);
713 			size = SCNG(script_filtered_size);
714 		}
715 	}
716 
717 	yy_scan_buffer(buf, (unsigned int)size);
718 
719 	new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
720 	zend_set_compiled_filename(new_compiled_filename);
721 	zend_string_release(new_compiled_filename);
722 	CG(zend_lineno) = 1;
723 	CG(increment_lineno) = 0;
724 	RESET_DOC_COMMENT();
725 	return SUCCESS;
726 }
727 
728 
zend_get_scanned_file_offset(void)729 ZEND_API size_t zend_get_scanned_file_offset(void)
730 {
731 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
732 	if (SCNG(input_filter)) {
733 		size_t original_offset = offset, length = 0;
734 		do {
735 			unsigned char *p = NULL;
736 			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
737 				return (size_t)-1;
738 			}
739 			efree(p);
740 			if (length > original_offset) {
741 				offset--;
742 			} else if (length < original_offset) {
743 				offset++;
744 			}
745 		} while (original_offset != length);
746 	}
747 	return offset;
748 }
749 
compile_string(zval * source_string,char * filename)750 zend_op_array *compile_string(zval *source_string, char *filename)
751 {
752 	zend_lex_state original_lex_state;
753 	zend_op_array *op_array = NULL;
754 	zval tmp;
755 
756 	if (Z_STRLEN_P(source_string)==0) {
757 		return NULL;
758 	}
759 
760 	ZVAL_DUP(&tmp, source_string);
761 	convert_to_string(&tmp);
762 	source_string = &tmp;
763 
764 	zend_save_lexical_state(&original_lex_state);
765 	if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
766 		BEGIN(ST_IN_SCRIPTING);
767 		op_array = zend_compile(ZEND_EVAL_CODE);
768 	}
769 
770 	zend_restore_lexical_state(&original_lex_state);
771 	zval_dtor(&tmp);
772 
773 	return op_array;
774 }
775 
776 
BEGIN_EXTERN_C()777 BEGIN_EXTERN_C()
778 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
779 {
780 	zend_lex_state original_lex_state;
781 	zend_file_handle file_handle;
782 
783 	file_handle.type = ZEND_HANDLE_FILENAME;
784 	file_handle.filename = filename;
785 	file_handle.free_filename = 0;
786 	file_handle.opened_path = NULL;
787 	zend_save_lexical_state(&original_lex_state);
788 	if (open_file_for_scanning(&file_handle)==FAILURE) {
789 		zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
790 		zend_restore_lexical_state(&original_lex_state);
791 		return FAILURE;
792 	}
793 	zend_highlight(syntax_highlighter_ini);
794 	if (SCNG(script_filtered)) {
795 		efree(SCNG(script_filtered));
796 		SCNG(script_filtered) = NULL;
797 	}
798 	zend_destroy_file_handle(&file_handle);
799 	zend_restore_lexical_state(&original_lex_state);
800 	return SUCCESS;
801 }
802 
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)803 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
804 {
805 	zend_lex_state original_lex_state;
806 	zval tmp = *str;
807 
808 	str = &tmp;
809 	zval_copy_ctor(str);
810 	zend_save_lexical_state(&original_lex_state);
811 	if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
812 		zend_restore_lexical_state(&original_lex_state);
813 		return FAILURE;
814 	}
815 	BEGIN(INITIAL);
816 	zend_highlight(syntax_highlighter_ini);
817 	if (SCNG(script_filtered)) {
818 		efree(SCNG(script_filtered));
819 		SCNG(script_filtered) = NULL;
820 	}
821 	zend_restore_lexical_state(&original_lex_state);
822 	zval_dtor(str);
823 	return SUCCESS;
824 }
825 
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)826 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
827 {
828 	size_t length;
829 	unsigned char *new_yy_start;
830 
831 	/* convert and set */
832 	if (!SCNG(input_filter)) {
833 		if (SCNG(script_filtered)) {
834 			efree(SCNG(script_filtered));
835 			SCNG(script_filtered) = NULL;
836 		}
837 		SCNG(script_filtered_size) = 0;
838 		length = SCNG(script_org_size);
839 		new_yy_start = SCNG(script_org);
840 	} else {
841 		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
842 			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
843 					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
844 		}
845 		if (SCNG(script_filtered)) {
846 			efree(SCNG(script_filtered));
847 		}
848 		SCNG(script_filtered) = new_yy_start;
849 		SCNG(script_filtered_size) = length;
850 	}
851 
852 	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
853 	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
854 	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
855 	SCNG(yy_limit) = new_yy_start + length;
856 
857 	SCNG(yy_start) = new_yy_start;
858 }
859 
860 
861 // TODO: avoid reallocation ???
862 # define zend_copy_value(zendlval, yytext, yyleng) \
863 	if (SCNG(output_filter)) { \
864 		size_t sz = 0; \
865 		char *s = NULL; \
866 		SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
867 		ZVAL_STRINGL(zendlval, s, sz); \
868 		efree(s); \
869 	} else { \
870 		ZVAL_STRINGL(zendlval, yytext, yyleng); \
871 	}
872 
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)873 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
874 {
875 	register char *s, *t;
876 	char *end;
877 
878 	ZVAL_STRINGL(zendlval, str, len);
879 
880 	/* convert escape sequences */
881 	s = t = Z_STRVAL_P(zendlval);
882 	end = s+Z_STRLEN_P(zendlval);
883 	while (s<end) {
884 		if (*s=='\\') {
885 			s++;
886 			if (s >= end) {
887 				*t++ = '\\';
888 				break;
889 			}
890 
891 			switch(*s) {
892 				case 'n':
893 					*t++ = '\n';
894 					Z_STRLEN_P(zendlval)--;
895 					break;
896 				case 'r':
897 					*t++ = '\r';
898 					Z_STRLEN_P(zendlval)--;
899 					break;
900 				case 't':
901 					*t++ = '\t';
902 					Z_STRLEN_P(zendlval)--;
903 					break;
904 				case 'f':
905 					*t++ = '\f';
906 					Z_STRLEN_P(zendlval)--;
907 					break;
908 				case 'v':
909 					*t++ = '\v';
910 					Z_STRLEN_P(zendlval)--;
911 					break;
912 				case 'e':
913 #ifdef ZEND_WIN32
914 					*t++ = VK_ESCAPE;
915 #else
916 					*t++ = '\e';
917 #endif
918 					Z_STRLEN_P(zendlval)--;
919 					break;
920 				case '"':
921 				case '`':
922 					if (*s != quote_type) {
923 						*t++ = '\\';
924 						*t++ = *s;
925 						break;
926 					}
927 				case '\\':
928 				case '$':
929 					*t++ = *s;
930 					Z_STRLEN_P(zendlval)--;
931 					break;
932 				case 'x':
933 				case 'X':
934 					if (ZEND_IS_HEX(*(s+1))) {
935 						char hex_buf[3] = { 0, 0, 0 };
936 
937 						Z_STRLEN_P(zendlval)--; /* for the 'x' */
938 
939 						hex_buf[0] = *(++s);
940 						Z_STRLEN_P(zendlval)--;
941 						if (ZEND_IS_HEX(*(s+1))) {
942 							hex_buf[1] = *(++s);
943 							Z_STRLEN_P(zendlval)--;
944 						}
945 						*t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
946 					} else {
947 						*t++ = '\\';
948 						*t++ = *s;
949 					}
950 					break;
951 				/* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
952 				case 'u':
953 					{
954 						/* cache where we started so we can parse after validating */
955 						char *start = s + 1;
956 						size_t len = 0;
957 						zend_bool valid = 1;
958 						unsigned long codepoint;
959 						size_t byte_len = 0;
960 
961 						if (*start != '{') {
962 							/* we silently let this pass to avoid breaking code
963 							 * with JSON in string literals (e.g. "\"\u202e\""
964 							 */
965 							*t++ = '\\';
966 							*t++ = 'u';
967 							break;
968 						} else {
969 							/* on the other hand, invalid \u{blah} errors */
970 							s++;
971 							len++;
972 							s++;
973 							while (*s != '}') {
974 								if (!ZEND_IS_HEX(*s)) {
975 									valid = 0;
976 									break;
977 								} else {
978 									len++;
979 								}
980 								s++;
981 							}
982 							if (*s == '}') {
983 								valid = 1;
984 								len++;
985 							}
986 						}
987 
988 						/* \u{} is invalid */
989 						if (len <= 2) {
990 							valid = 0;
991 						}
992 
993 						if (!valid) {
994 							zend_throw_exception(zend_ce_parse_error,
995 								"Invalid UTF-8 codepoint escape sequence", 0);
996 							zval_ptr_dtor(zendlval);
997 							ZVAL_UNDEF(zendlval);
998 							return FAILURE;
999 						}
1000 
1001 						errno = 0;
1002 						codepoint = strtoul(start + 1, NULL, 16);
1003 
1004 						/* per RFC 3629, UTF-8 can only represent 21 bits */
1005 						if (codepoint > 0x10FFFF || errno) {
1006 							zend_throw_exception(zend_ce_parse_error,
1007 								"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1008 							zval_ptr_dtor(zendlval);
1009 							ZVAL_UNDEF(zendlval);
1010 							return FAILURE;
1011 						}
1012 
1013 						/* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1014 						if (codepoint < 0x80) {
1015 							byte_len = 1;
1016 							*t++ = codepoint;
1017 						} else if (codepoint <= 0x7FF) {
1018 							byte_len = 2;
1019 							*t++ = (codepoint >> 6) + 0xC0;
1020 							*t++ = (codepoint & 0x3F) + 0x80;
1021 						} else if (codepoint <= 0xFFFF) {
1022 							byte_len = 3;
1023 							*t++ = (codepoint >> 12) + 0xE0;
1024 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1025 							*t++ = (codepoint & 0x3F) + 0x80;
1026 						} else if (codepoint <= 0x10FFFF) {
1027 							byte_len = 4;
1028 							*t++ = (codepoint >> 18) + 0xF0;
1029 							*t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1030 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1031 							*t++ = (codepoint & 0x3F) + 0x80;
1032 						}
1033 
1034 						Z_STRLEN_P(zendlval) -= 2; /* \u */
1035 						Z_STRLEN_P(zendlval) -= (len - byte_len);
1036 					}
1037 					break;
1038 				default:
1039 					/* check for an octal */
1040 					if (ZEND_IS_OCT(*s)) {
1041 						char octal_buf[4] = { 0, 0, 0, 0 };
1042 
1043 						octal_buf[0] = *s;
1044 						Z_STRLEN_P(zendlval)--;
1045 						if (ZEND_IS_OCT(*(s+1))) {
1046 							octal_buf[1] = *(++s);
1047 							Z_STRLEN_P(zendlval)--;
1048 							if (ZEND_IS_OCT(*(s+1))) {
1049 								octal_buf[2] = *(++s);
1050 								Z_STRLEN_P(zendlval)--;
1051 							}
1052 						}
1053 						if (octal_buf[2] &&
1054 						    (octal_buf[0] > '3')) {
1055 							/* 3 octit values must not overflow 0xFF (\377) */
1056 							zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
1057 						}
1058 
1059 						*t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1060 					} else {
1061 						*t++ = '\\';
1062 						*t++ = *s;
1063 					}
1064 					break;
1065 			}
1066 		} else {
1067 			*t++ = *s;
1068 		}
1069 
1070 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1071 			CG(zend_lineno)++;
1072 		}
1073 		s++;
1074 	}
1075 	*t = 0;
1076 	if (SCNG(output_filter)) {
1077 		size_t sz = 0;
1078 		unsigned char *str;
1079 		// TODO: avoid realocation ???
1080 		s = Z_STRVAL_P(zendlval);
1081 		SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1082 		zval_ptr_dtor(zendlval);
1083 		ZVAL_STRINGL(zendlval, (char *) str, sz);
1084 		efree(str);
1085 	}
1086 	return SUCCESS;
1087 }
1088 
emit_token(int token,int token_line)1089 static zend_always_inline int emit_token(int token, int token_line)
1090 {
1091 	if (SCNG(on_event)) {
1092 		SCNG(on_event)(ON_TOKEN, token, token_line, SCNG(on_event_context));
1093 	}
1094 
1095 	return token;
1096 }
1097 
1098 #define RETURN_TOKEN(token) return emit_token(token, start_line);
1099 
lex_scan(zval * zendlval)1100 int lex_scan(zval *zendlval)
1101 {
1102 
1103 int start_line = CG(zend_lineno);
1104 
1105 restart:
1106 	SCNG(yy_text) = YYCURSOR;
1107 
1108 /*!re2c
1109 re2c:yyfill:check = 0;
1110 LNUM	[0-9]+
1111 DNUM	([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1112 EXPONENT_DNUM	(({LNUM}|{DNUM})[eE][+-]?{LNUM})
1113 HNUM	"0x"[0-9a-fA-F]+
1114 BNUM	"0b"[01]+
1115 LABEL	[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1116 WHITESPACE [ \n\r\t]+
1117 TABS_AND_SPACES [ \t]*
1118 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1119 ANY_CHAR [^]
1120 NEWLINE ("\r"|"\n"|"\r\n")
1121 
1122 /* compute yyleng before each rule */
1123 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1124 
1125 <ST_IN_SCRIPTING>"exit" {
1126 	RETURN_TOKEN(T_EXIT);
1127 }
1128 
1129 <ST_IN_SCRIPTING>"die" {
1130 	RETURN_TOKEN(T_EXIT);
1131 }
1132 
1133 <ST_IN_SCRIPTING>"function" {
1134 	RETURN_TOKEN(T_FUNCTION);
1135 }
1136 
1137 <ST_IN_SCRIPTING>"const" {
1138 	RETURN_TOKEN(T_CONST);
1139 }
1140 
1141 <ST_IN_SCRIPTING>"return" {
1142 	RETURN_TOKEN(T_RETURN);
1143 }
1144 
1145 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1146 	yyless(yyleng - 1);
1147 	HANDLE_NEWLINES(yytext, yyleng);
1148 	RETURN_TOKEN(T_YIELD_FROM);
1149 }
1150 
1151 <ST_IN_SCRIPTING>"yield" {
1152 	RETURN_TOKEN(T_YIELD);
1153 }
1154 
1155 <ST_IN_SCRIPTING>"try" {
1156 	RETURN_TOKEN(T_TRY);
1157 }
1158 
1159 <ST_IN_SCRIPTING>"catch" {
1160 	RETURN_TOKEN(T_CATCH);
1161 }
1162 
1163 <ST_IN_SCRIPTING>"finally" {
1164 	RETURN_TOKEN(T_FINALLY);
1165 }
1166 
1167 <ST_IN_SCRIPTING>"throw" {
1168 	RETURN_TOKEN(T_THROW);
1169 }
1170 
1171 <ST_IN_SCRIPTING>"if" {
1172 	RETURN_TOKEN(T_IF);
1173 }
1174 
1175 <ST_IN_SCRIPTING>"elseif" {
1176 	RETURN_TOKEN(T_ELSEIF);
1177 }
1178 
1179 <ST_IN_SCRIPTING>"endif" {
1180 	RETURN_TOKEN(T_ENDIF);
1181 }
1182 
1183 <ST_IN_SCRIPTING>"else" {
1184 	RETURN_TOKEN(T_ELSE);
1185 }
1186 
1187 <ST_IN_SCRIPTING>"while" {
1188 	RETURN_TOKEN(T_WHILE);
1189 }
1190 
1191 <ST_IN_SCRIPTING>"endwhile" {
1192 	RETURN_TOKEN(T_ENDWHILE);
1193 }
1194 
1195 <ST_IN_SCRIPTING>"do" {
1196 	RETURN_TOKEN(T_DO);
1197 }
1198 
1199 <ST_IN_SCRIPTING>"for" {
1200 	RETURN_TOKEN(T_FOR);
1201 }
1202 
1203 <ST_IN_SCRIPTING>"endfor" {
1204 	RETURN_TOKEN(T_ENDFOR);
1205 }
1206 
1207 <ST_IN_SCRIPTING>"foreach" {
1208 	RETURN_TOKEN(T_FOREACH);
1209 }
1210 
1211 <ST_IN_SCRIPTING>"endforeach" {
1212 	RETURN_TOKEN(T_ENDFOREACH);
1213 }
1214 
1215 <ST_IN_SCRIPTING>"declare" {
1216 	RETURN_TOKEN(T_DECLARE);
1217 }
1218 
1219 <ST_IN_SCRIPTING>"enddeclare" {
1220 	RETURN_TOKEN(T_ENDDECLARE);
1221 }
1222 
1223 <ST_IN_SCRIPTING>"instanceof" {
1224 	RETURN_TOKEN(T_INSTANCEOF);
1225 }
1226 
1227 <ST_IN_SCRIPTING>"as" {
1228 	RETURN_TOKEN(T_AS);
1229 }
1230 
1231 <ST_IN_SCRIPTING>"switch" {
1232 	RETURN_TOKEN(T_SWITCH);
1233 }
1234 
1235 <ST_IN_SCRIPTING>"endswitch" {
1236 	RETURN_TOKEN(T_ENDSWITCH);
1237 }
1238 
1239 <ST_IN_SCRIPTING>"case" {
1240 	RETURN_TOKEN(T_CASE);
1241 }
1242 
1243 <ST_IN_SCRIPTING>"default" {
1244 	RETURN_TOKEN(T_DEFAULT);
1245 }
1246 
1247 <ST_IN_SCRIPTING>"break" {
1248 	RETURN_TOKEN(T_BREAK);
1249 }
1250 
1251 <ST_IN_SCRIPTING>"continue" {
1252 	RETURN_TOKEN(T_CONTINUE);
1253 }
1254 
1255 <ST_IN_SCRIPTING>"goto" {
1256 	RETURN_TOKEN(T_GOTO);
1257 }
1258 
1259 <ST_IN_SCRIPTING>"echo" {
1260 	RETURN_TOKEN(T_ECHO);
1261 }
1262 
1263 <ST_IN_SCRIPTING>"print" {
1264 	RETURN_TOKEN(T_PRINT);
1265 }
1266 
1267 <ST_IN_SCRIPTING>"class" {
1268 	RETURN_TOKEN(T_CLASS);
1269 }
1270 
1271 <ST_IN_SCRIPTING>"interface" {
1272 	RETURN_TOKEN(T_INTERFACE);
1273 }
1274 
1275 <ST_IN_SCRIPTING>"trait" {
1276 	RETURN_TOKEN(T_TRAIT);
1277 }
1278 
1279 <ST_IN_SCRIPTING>"extends" {
1280 	RETURN_TOKEN(T_EXTENDS);
1281 }
1282 
1283 <ST_IN_SCRIPTING>"implements" {
1284 	RETURN_TOKEN(T_IMPLEMENTS);
1285 }
1286 
1287 <ST_IN_SCRIPTING>"->" {
1288 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
1289 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1290 }
1291 
1292 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1293 	HANDLE_NEWLINES(yytext, yyleng);
1294 	RETURN_TOKEN(T_WHITESPACE);
1295 }
1296 
1297 <ST_LOOKING_FOR_PROPERTY>"->" {
1298 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1299 }
1300 
1301 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1302 	yy_pop_state();
1303 	zend_copy_value(zendlval, yytext, yyleng);
1304 	RETURN_TOKEN(T_STRING);
1305 }
1306 
1307 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1308 	yyless(0);
1309 	yy_pop_state();
1310 	goto restart;
1311 }
1312 
1313 <ST_IN_SCRIPTING>"::" {
1314 	RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1315 }
1316 
1317 <ST_IN_SCRIPTING>"\\" {
1318 	RETURN_TOKEN(T_NS_SEPARATOR);
1319 }
1320 
1321 <ST_IN_SCRIPTING>"..." {
1322 	RETURN_TOKEN(T_ELLIPSIS);
1323 }
1324 
1325 <ST_IN_SCRIPTING>"??" {
1326 	RETURN_TOKEN(T_COALESCE);
1327 }
1328 
1329 <ST_IN_SCRIPTING>"new" {
1330 	RETURN_TOKEN(T_NEW);
1331 }
1332 
1333 <ST_IN_SCRIPTING>"clone" {
1334 	RETURN_TOKEN(T_CLONE);
1335 }
1336 
1337 <ST_IN_SCRIPTING>"var" {
1338 	RETURN_TOKEN(T_VAR);
1339 }
1340 
1341 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1342 	RETURN_TOKEN(T_INT_CAST);
1343 }
1344 
1345 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1346 	RETURN_TOKEN(T_DOUBLE_CAST);
1347 }
1348 
1349 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1350 	RETURN_TOKEN(T_STRING_CAST);
1351 }
1352 
1353 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1354 	RETURN_TOKEN(T_ARRAY_CAST);
1355 }
1356 
1357 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1358 	RETURN_TOKEN(T_OBJECT_CAST);
1359 }
1360 
1361 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1362 	RETURN_TOKEN(T_BOOL_CAST);
1363 }
1364 
1365 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1366 	RETURN_TOKEN(T_UNSET_CAST);
1367 }
1368 
1369 <ST_IN_SCRIPTING>"eval" {
1370 	RETURN_TOKEN(T_EVAL);
1371 }
1372 
1373 <ST_IN_SCRIPTING>"include" {
1374 	RETURN_TOKEN(T_INCLUDE);
1375 }
1376 
1377 <ST_IN_SCRIPTING>"include_once" {
1378 	RETURN_TOKEN(T_INCLUDE_ONCE);
1379 }
1380 
1381 <ST_IN_SCRIPTING>"require" {
1382 	RETURN_TOKEN(T_REQUIRE);
1383 }
1384 
1385 <ST_IN_SCRIPTING>"require_once" {
1386 	RETURN_TOKEN(T_REQUIRE_ONCE);
1387 }
1388 
1389 <ST_IN_SCRIPTING>"namespace" {
1390 	RETURN_TOKEN(T_NAMESPACE);
1391 }
1392 
1393 <ST_IN_SCRIPTING>"use" {
1394 	RETURN_TOKEN(T_USE);
1395 }
1396 
1397 <ST_IN_SCRIPTING>"insteadof" {
1398     RETURN_TOKEN(T_INSTEADOF);
1399 }
1400 
1401 <ST_IN_SCRIPTING>"global" {
1402 	RETURN_TOKEN(T_GLOBAL);
1403 }
1404 
1405 <ST_IN_SCRIPTING>"isset" {
1406 	RETURN_TOKEN(T_ISSET);
1407 }
1408 
1409 <ST_IN_SCRIPTING>"empty" {
1410 	RETURN_TOKEN(T_EMPTY);
1411 }
1412 
1413 <ST_IN_SCRIPTING>"__halt_compiler" {
1414 	RETURN_TOKEN(T_HALT_COMPILER);
1415 }
1416 
1417 <ST_IN_SCRIPTING>"static" {
1418 	RETURN_TOKEN(T_STATIC);
1419 }
1420 
1421 <ST_IN_SCRIPTING>"abstract" {
1422 	RETURN_TOKEN(T_ABSTRACT);
1423 }
1424 
1425 <ST_IN_SCRIPTING>"final" {
1426 	RETURN_TOKEN(T_FINAL);
1427 }
1428 
1429 <ST_IN_SCRIPTING>"private" {
1430 	RETURN_TOKEN(T_PRIVATE);
1431 }
1432 
1433 <ST_IN_SCRIPTING>"protected" {
1434 	RETURN_TOKEN(T_PROTECTED);
1435 }
1436 
1437 <ST_IN_SCRIPTING>"public" {
1438 	RETURN_TOKEN(T_PUBLIC);
1439 }
1440 
1441 <ST_IN_SCRIPTING>"unset" {
1442 	RETURN_TOKEN(T_UNSET);
1443 }
1444 
1445 <ST_IN_SCRIPTING>"=>" {
1446 	RETURN_TOKEN(T_DOUBLE_ARROW);
1447 }
1448 
1449 <ST_IN_SCRIPTING>"list" {
1450 	RETURN_TOKEN(T_LIST);
1451 }
1452 
1453 <ST_IN_SCRIPTING>"array" {
1454 	RETURN_TOKEN(T_ARRAY);
1455 }
1456 
1457 <ST_IN_SCRIPTING>"callable" {
1458 	RETURN_TOKEN(T_CALLABLE);
1459 }
1460 
1461 <ST_IN_SCRIPTING>"++" {
1462 	RETURN_TOKEN(T_INC);
1463 }
1464 
1465 <ST_IN_SCRIPTING>"--" {
1466 	RETURN_TOKEN(T_DEC);
1467 }
1468 
1469 <ST_IN_SCRIPTING>"===" {
1470 	RETURN_TOKEN(T_IS_IDENTICAL);
1471 }
1472 
1473 <ST_IN_SCRIPTING>"!==" {
1474 	RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1475 }
1476 
1477 <ST_IN_SCRIPTING>"==" {
1478 	RETURN_TOKEN(T_IS_EQUAL);
1479 }
1480 
1481 <ST_IN_SCRIPTING>"!="|"<>" {
1482 	RETURN_TOKEN(T_IS_NOT_EQUAL);
1483 }
1484 
1485 <ST_IN_SCRIPTING>"<=>" {
1486 	RETURN_TOKEN(T_SPACESHIP);
1487 }
1488 
1489 <ST_IN_SCRIPTING>"<=" {
1490 	RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1491 }
1492 
1493 <ST_IN_SCRIPTING>">=" {
1494 	RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1495 }
1496 
1497 <ST_IN_SCRIPTING>"+=" {
1498 	RETURN_TOKEN(T_PLUS_EQUAL);
1499 }
1500 
1501 <ST_IN_SCRIPTING>"-=" {
1502 	RETURN_TOKEN(T_MINUS_EQUAL);
1503 }
1504 
1505 <ST_IN_SCRIPTING>"*=" {
1506 	RETURN_TOKEN(T_MUL_EQUAL);
1507 }
1508 
1509 <ST_IN_SCRIPTING>"*\*" {
1510 	RETURN_TOKEN(T_POW);
1511 }
1512 
1513 <ST_IN_SCRIPTING>"*\*=" {
1514 	RETURN_TOKEN(T_POW_EQUAL);
1515 }
1516 
1517 <ST_IN_SCRIPTING>"/=" {
1518 	RETURN_TOKEN(T_DIV_EQUAL);
1519 }
1520 
1521 <ST_IN_SCRIPTING>".=" {
1522 	RETURN_TOKEN(T_CONCAT_EQUAL);
1523 }
1524 
1525 <ST_IN_SCRIPTING>"%=" {
1526 	RETURN_TOKEN(T_MOD_EQUAL);
1527 }
1528 
1529 <ST_IN_SCRIPTING>"<<=" {
1530 	RETURN_TOKEN(T_SL_EQUAL);
1531 }
1532 
1533 <ST_IN_SCRIPTING>">>=" {
1534 	RETURN_TOKEN(T_SR_EQUAL);
1535 }
1536 
1537 <ST_IN_SCRIPTING>"&=" {
1538 	RETURN_TOKEN(T_AND_EQUAL);
1539 }
1540 
1541 <ST_IN_SCRIPTING>"|=" {
1542 	RETURN_TOKEN(T_OR_EQUAL);
1543 }
1544 
1545 <ST_IN_SCRIPTING>"^=" {
1546 	RETURN_TOKEN(T_XOR_EQUAL);
1547 }
1548 
1549 <ST_IN_SCRIPTING>"||" {
1550 	RETURN_TOKEN(T_BOOLEAN_OR);
1551 }
1552 
1553 <ST_IN_SCRIPTING>"&&" {
1554 	RETURN_TOKEN(T_BOOLEAN_AND);
1555 }
1556 
1557 <ST_IN_SCRIPTING>"OR" {
1558 	RETURN_TOKEN(T_LOGICAL_OR);
1559 }
1560 
1561 <ST_IN_SCRIPTING>"AND" {
1562 	RETURN_TOKEN(T_LOGICAL_AND);
1563 }
1564 
1565 <ST_IN_SCRIPTING>"XOR" {
1566 	RETURN_TOKEN(T_LOGICAL_XOR);
1567 }
1568 
1569 <ST_IN_SCRIPTING>"<<" {
1570 	RETURN_TOKEN(T_SL);
1571 }
1572 
1573 <ST_IN_SCRIPTING>">>" {
1574 	RETURN_TOKEN(T_SR);
1575 }
1576 
1577 <ST_IN_SCRIPTING>{TOKENS} {
1578 	RETURN_TOKEN(yytext[0]);
1579 }
1580 
1581 
1582 <ST_IN_SCRIPTING>"{" {
1583 	yy_push_state(ST_IN_SCRIPTING);
1584 	RETURN_TOKEN('{');
1585 }
1586 
1587 
1588 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1589 	yy_push_state(ST_LOOKING_FOR_VARNAME);
1590 	RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1591 }
1592 
1593 
1594 <ST_IN_SCRIPTING>"}" {
1595 	RESET_DOC_COMMENT();
1596 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
1597 		yy_pop_state();
1598 	}
1599 	RETURN_TOKEN('}');
1600 }
1601 
1602 
1603 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1604 	yyless(yyleng - 1);
1605 	zend_copy_value(zendlval, yytext, yyleng);
1606 	yy_pop_state();
1607 	yy_push_state(ST_IN_SCRIPTING);
1608 	RETURN_TOKEN(T_STRING_VARNAME);
1609 }
1610 
1611 
1612 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1613 	yyless(0);
1614 	yy_pop_state();
1615 	yy_push_state(ST_IN_SCRIPTING);
1616 	goto restart;
1617 }
1618 
1619 <ST_IN_SCRIPTING>{BNUM} {
1620 	char *bin = yytext + 2; /* Skip "0b" */
1621 	int len = yyleng - 2;
1622 	char *end;
1623 
1624 	/* Skip any leading 0s */
1625 	while (*bin == '0') {
1626 		++bin;
1627 		--len;
1628 	}
1629 
1630 	if (len < SIZEOF_ZEND_LONG * 8) {
1631 		if (len == 0) {
1632 			ZVAL_LONG(zendlval, 0);
1633 		} else {
1634 			errno = 0;
1635 			ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1636 			ZEND_ASSERT(!errno && end == yytext + yyleng);
1637 		}
1638 		RETURN_TOKEN(T_LNUMBER);
1639 	} else {
1640 		ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1641 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1642 		ZEND_ASSERT(end == yytext + yyleng);
1643 		RETURN_TOKEN(T_DNUMBER);
1644 	}
1645 }
1646 
1647 <ST_IN_SCRIPTING>{LNUM} {
1648 	char *end;
1649 	if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1650 		errno = 0;
1651 		/* base must be passed explicitly for correct parse error on Windows */
1652 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
1653 		/* This isn't an assert, we need to ensure 019 isn't valid octal
1654 		 * Because the lexing itself doesn't do that for us
1655 		 */
1656 		if (end != yytext + yyleng) {
1657 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1658 			ZVAL_UNDEF(zendlval);
1659 			RETURN_TOKEN(T_LNUMBER);
1660 		}
1661 	} else {
1662 		errno = 0;
1663 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1664 		if (errno == ERANGE) { /* Overflow */
1665 			errno = 0;
1666 			if (yytext[0] == '0') { /* octal overflow */
1667 				ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1668 			} else {
1669 				ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1670 			}
1671 			/* Also not an assert for the same reason */
1672 			if (end != yytext + yyleng) {
1673 				zend_throw_exception(zend_ce_parse_error,
1674 					"Invalid numeric literal", 0);
1675 				ZVAL_UNDEF(zendlval);
1676 				RETURN_TOKEN(T_DNUMBER);
1677 			}
1678 			RETURN_TOKEN(T_DNUMBER);
1679 		}
1680 		/* Also not an assert for the same reason */
1681 		if (end != yytext + yyleng) {
1682 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1683 			ZVAL_UNDEF(zendlval);
1684 			RETURN_TOKEN(T_DNUMBER);
1685 		}
1686 	}
1687 	ZEND_ASSERT(!errno);
1688 	RETURN_TOKEN(T_LNUMBER);
1689 }
1690 
1691 <ST_IN_SCRIPTING>{HNUM} {
1692 	char *hex = yytext + 2; /* Skip "0x" */
1693 	int len = yyleng - 2;
1694 	char *end;
1695 
1696 	/* Skip any leading 0s */
1697 	while (*hex == '0') {
1698 		hex++;
1699 		len--;
1700 	}
1701 
1702 	if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1703 		if (len == 0) {
1704 			ZVAL_LONG(zendlval, 0);
1705 		} else {
1706 			errno = 0;
1707 			ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1708 			ZEND_ASSERT(!errno && end == hex + len);
1709 		}
1710 		RETURN_TOKEN(T_LNUMBER);
1711 	} else {
1712 		ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1713 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1714 		ZEND_ASSERT(end == hex + len);
1715 		RETURN_TOKEN(T_DNUMBER);
1716 	}
1717 }
1718 
1719 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1720 	if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1721 		char *end;
1722 		errno = 0;
1723 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1724 		if (errno == ERANGE) {
1725 			goto string;
1726 		}
1727 		ZEND_ASSERT(end == yytext + yyleng);
1728 	} else {
1729 string:
1730 		ZVAL_STRINGL(zendlval, yytext, yyleng);
1731 	}
1732 	RETURN_TOKEN(T_NUM_STRING);
1733 }
1734 
1735 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1736 	ZVAL_STRINGL(zendlval, yytext, yyleng);
1737 	RETURN_TOKEN(T_NUM_STRING);
1738 }
1739 
1740 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1741 	const char *end;
1742 
1743 	ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1744 	/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1745 	ZEND_ASSERT(end == yytext + yyleng);
1746 	RETURN_TOKEN(T_DNUMBER);
1747 }
1748 
1749 <ST_IN_SCRIPTING>"__CLASS__" {
1750 	RETURN_TOKEN(T_CLASS_C);
1751 }
1752 
1753 <ST_IN_SCRIPTING>"__TRAIT__" {
1754 	RETURN_TOKEN(T_TRAIT_C);
1755 }
1756 
1757 <ST_IN_SCRIPTING>"__FUNCTION__" {
1758 	RETURN_TOKEN(T_FUNC_C);
1759 }
1760 
1761 <ST_IN_SCRIPTING>"__METHOD__" {
1762 	RETURN_TOKEN(T_METHOD_C);
1763 }
1764 
1765 <ST_IN_SCRIPTING>"__LINE__" {
1766 	RETURN_TOKEN(T_LINE);
1767 }
1768 
1769 <ST_IN_SCRIPTING>"__FILE__" {
1770 	RETURN_TOKEN(T_FILE);
1771 }
1772 
1773 <ST_IN_SCRIPTING>"__DIR__" {
1774 	RETURN_TOKEN(T_DIR);
1775 }
1776 
1777 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1778 	RETURN_TOKEN(T_NS_C);
1779 }
1780 
1781 
1782 <INITIAL>"<?=" {
1783 	BEGIN(ST_IN_SCRIPTING);
1784 	RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1785 }
1786 
1787 
1788 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1789 	HANDLE_NEWLINE(yytext[yyleng-1]);
1790 	BEGIN(ST_IN_SCRIPTING);
1791 	RETURN_TOKEN(T_OPEN_TAG);
1792 }
1793 
1794 
1795 <INITIAL>"<?" {
1796 	if (CG(short_tags)) {
1797 		BEGIN(ST_IN_SCRIPTING);
1798 		RETURN_TOKEN(T_OPEN_TAG);
1799 	} else {
1800 		goto inline_char_handler;
1801 	}
1802 }
1803 
1804 <INITIAL>{ANY_CHAR} {
1805 	if (YYCURSOR > YYLIMIT) {
1806 		RETURN_TOKEN(END);
1807 	}
1808 
1809 inline_char_handler:
1810 
1811 	while (1) {
1812 		YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1813 
1814 		YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1815 
1816 		if (YYCURSOR >= YYLIMIT) {
1817 			break;
1818 		}
1819 
1820 		if (*YYCURSOR == '?') {
1821 			if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1822 
1823 				YYCURSOR--;
1824 				break;
1825 			}
1826 		}
1827 	}
1828 
1829 	yyleng = YYCURSOR - SCNG(yy_text);
1830 
1831 	if (SCNG(output_filter)) {
1832 		size_t readsize;
1833 		char *s = NULL;
1834 		size_t sz = 0;
1835 		// TODO: avoid reallocation ???
1836 		readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1837 		ZVAL_STRINGL(zendlval, s, sz);
1838 		efree(s);
1839 		if (readsize < yyleng) {
1840 			yyless(readsize);
1841 		}
1842 	} else {
1843 	  ZVAL_STRINGL(zendlval, yytext, yyleng);
1844 	}
1845 	HANDLE_NEWLINES(yytext, yyleng);
1846 	RETURN_TOKEN(T_INLINE_HTML);
1847 }
1848 
1849 
1850 /* Make sure a label character follows "->", otherwise there is no property
1851  * and "->" will be taken literally
1852  */
1853 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
1854 	yyless(yyleng - 3);
1855 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
1856 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1857 	RETURN_TOKEN(T_VARIABLE);
1858 }
1859 
1860 /* A [ always designates a variable offset, regardless of what follows
1861  */
1862 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1863 	yyless(yyleng - 1);
1864 	yy_push_state(ST_VAR_OFFSET);
1865 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1866 	RETURN_TOKEN(T_VARIABLE);
1867 }
1868 
1869 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1870 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1871 	RETURN_TOKEN(T_VARIABLE);
1872 }
1873 
1874 <ST_VAR_OFFSET>"]" {
1875 	yy_pop_state();
1876 	RETURN_TOKEN(']');
1877 }
1878 
1879 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1880 	/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
1881 	RETURN_TOKEN(yytext[0]);
1882 }
1883 
1884 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1885 	/* Invalid rule to return a more explicit parse error with proper line number */
1886 	yyless(0);
1887 	yy_pop_state();
1888 	ZVAL_NULL(zendlval);
1889 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1890 }
1891 
1892 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1893 	zend_copy_value(zendlval, yytext, yyleng);
1894 	RETURN_TOKEN(T_STRING);
1895 }
1896 
1897 
1898 <ST_IN_SCRIPTING>"#"|"//" {
1899 	while (YYCURSOR < YYLIMIT) {
1900 		switch (*YYCURSOR++) {
1901 			case '\r':
1902 				if (*YYCURSOR == '\n') {
1903 					YYCURSOR++;
1904 				}
1905 				/* fall through */
1906 			case '\n':
1907 				CG(zend_lineno)++;
1908 				break;
1909 			case '?':
1910 				if (*YYCURSOR == '>') {
1911 					YYCURSOR--;
1912 					break;
1913 				}
1914 				/* fall through */
1915 			default:
1916 				continue;
1917 		}
1918 
1919 		break;
1920 	}
1921 
1922 	yyleng = YYCURSOR - SCNG(yy_text);
1923 
1924 	RETURN_TOKEN(T_COMMENT);
1925 }
1926 
1927 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1928 	int doc_com;
1929 
1930 	if (yyleng > 2) {
1931 		doc_com = 1;
1932 		RESET_DOC_COMMENT();
1933 	} else {
1934 		doc_com = 0;
1935 	}
1936 
1937 	while (YYCURSOR < YYLIMIT) {
1938 		if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1939 			break;
1940 		}
1941 	}
1942 
1943 	if (YYCURSOR < YYLIMIT) {
1944 		YYCURSOR++;
1945 	} else {
1946 		zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1947 	}
1948 
1949 	yyleng = YYCURSOR - SCNG(yy_text);
1950 	HANDLE_NEWLINES(yytext, yyleng);
1951 
1952 	if (doc_com) {
1953 		CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1954 		RETURN_TOKEN(T_DOC_COMMENT);
1955 	}
1956 
1957 	RETURN_TOKEN(T_COMMENT);
1958 }
1959 
1960 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1961 	BEGIN(INITIAL);
1962 	if (yytext[yyleng-1] != '>') {
1963 		CG(increment_lineno) = 1;
1964 	}
1965 	RETURN_TOKEN(T_CLOSE_TAG);  /* implicit ';' at php-end tag */
1966 }
1967 
1968 
1969 <ST_IN_SCRIPTING>b?['] {
1970 	register char *s, *t;
1971 	char *end;
1972 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
1973 
1974 	while (1) {
1975 		if (YYCURSOR < YYLIMIT) {
1976 			if (*YYCURSOR == '\'') {
1977 				YYCURSOR++;
1978 				yyleng = YYCURSOR - SCNG(yy_text);
1979 
1980 				break;
1981 			} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1982 				YYCURSOR++;
1983 			}
1984 		} else {
1985 			yyleng = YYLIMIT - SCNG(yy_text);
1986 
1987 			/* Unclosed single quotes; treat similar to double quotes, but without a separate token
1988 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1989 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1990 			ZVAL_NULL(zendlval);
1991 			RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1992 		}
1993 	}
1994 
1995 	ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1996 
1997 	/* convert escape sequences */
1998 	s = t = Z_STRVAL_P(zendlval);
1999 	end = s+Z_STRLEN_P(zendlval);
2000 	while (s<end) {
2001 		if (*s=='\\') {
2002 			s++;
2003 
2004 			switch(*s) {
2005 				case '\\':
2006 				case '\'':
2007 					*t++ = *s;
2008 					Z_STRLEN_P(zendlval)--;
2009 					break;
2010 				default:
2011 					*t++ = '\\';
2012 					*t++ = *s;
2013 					break;
2014 			}
2015 		} else {
2016 			*t++ = *s;
2017 		}
2018 
2019 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2020 			CG(zend_lineno)++;
2021 		}
2022 		s++;
2023 	}
2024 	*t = 0;
2025 
2026 	if (SCNG(output_filter)) {
2027 		size_t sz = 0;
2028 		char *str = NULL;
2029 		s = Z_STRVAL_P(zendlval);
2030 		// TODO: avoid reallocation ???
2031 		SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2032 		ZVAL_STRINGL(zendlval, str, sz);
2033 	}
2034 	RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2035 }
2036 
2037 
2038 <ST_IN_SCRIPTING>b?["] {
2039 	int bprefix = (yytext[0] != '"') ? 1 : 0;
2040 
2041 	while (YYCURSOR < YYLIMIT) {
2042 		switch (*YYCURSOR++) {
2043 			case '"':
2044 				yyleng = YYCURSOR - SCNG(yy_text);
2045 				zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
2046 				RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2047 			case '$':
2048 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2049 					break;
2050 				}
2051 				continue;
2052 			case '{':
2053 				if (*YYCURSOR == '$') {
2054 					break;
2055 				}
2056 				continue;
2057 			case '\\':
2058 				if (YYCURSOR < YYLIMIT) {
2059 					YYCURSOR++;
2060 				}
2061 				/* fall through */
2062 			default:
2063 				continue;
2064 		}
2065 
2066 		YYCURSOR--;
2067 		break;
2068 	}
2069 
2070 	/* Remember how much was scanned to save rescanning */
2071 	SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2072 
2073 	YYCURSOR = SCNG(yy_text) + yyleng;
2074 
2075 	BEGIN(ST_DOUBLE_QUOTES);
2076 	RETURN_TOKEN('"');
2077 }
2078 
2079 
2080 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2081 	char *s;
2082 	int bprefix = (yytext[0] != '<') ? 1 : 0;
2083 	zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2084 
2085 	CG(zend_lineno)++;
2086 	heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2087 	s = yytext+bprefix+3;
2088 	while ((*s == ' ') || (*s == '\t')) {
2089 		s++;
2090 		heredoc_label->length--;
2091 	}
2092 
2093 	if (*s == '\'') {
2094 		s++;
2095 		heredoc_label->length -= 2;
2096 
2097 		BEGIN(ST_NOWDOC);
2098 	} else {
2099 		if (*s == '"') {
2100 			s++;
2101 			heredoc_label->length -= 2;
2102 		}
2103 
2104 		BEGIN(ST_HEREDOC);
2105 	}
2106 
2107 	heredoc_label->label = estrndup(s, heredoc_label->length);
2108 
2109 	/* Check for ending label on the next line */
2110 	if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2111 		YYCTYPE *end = YYCURSOR + heredoc_label->length;
2112 
2113 		if (*end == ';') {
2114 			end++;
2115 		}
2116 
2117 		if (*end == '\n' || *end == '\r') {
2118 			BEGIN(ST_END_HEREDOC);
2119 		}
2120 	}
2121 
2122 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2123 
2124 	RETURN_TOKEN(T_START_HEREDOC);
2125 }
2126 
2127 
2128 <ST_IN_SCRIPTING>[`] {
2129 	BEGIN(ST_BACKQUOTE);
2130 	RETURN_TOKEN('`');
2131 }
2132 
2133 
2134 <ST_END_HEREDOC>{ANY_CHAR} {
2135 	zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2136 
2137 	YYCURSOR += heredoc_label->length - 1;
2138 	yyleng = heredoc_label->length;
2139 
2140 	heredoc_label_dtor(heredoc_label);
2141 	efree(heredoc_label);
2142 
2143 	BEGIN(ST_IN_SCRIPTING);
2144 	RETURN_TOKEN(T_END_HEREDOC);
2145 }
2146 
2147 
2148 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2149 	Z_LVAL_P(zendlval) = (zend_long) '{';
2150 	yy_push_state(ST_IN_SCRIPTING);
2151 	yyless(1);
2152 	RETURN_TOKEN(T_CURLY_OPEN);
2153 }
2154 
2155 
2156 <ST_DOUBLE_QUOTES>["] {
2157 	BEGIN(ST_IN_SCRIPTING);
2158 	RETURN_TOKEN('"');
2159 }
2160 
2161 <ST_BACKQUOTE>[`] {
2162 	BEGIN(ST_IN_SCRIPTING);
2163 	RETURN_TOKEN('`');
2164 }
2165 
2166 
2167 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2168 	if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2169 		YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2170 		SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2171 
2172 		goto double_quotes_scan_done;
2173 	}
2174 
2175 	if (YYCURSOR > YYLIMIT) {
2176 		RETURN_TOKEN(END);
2177 	}
2178 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2179 		YYCURSOR++;
2180 	}
2181 
2182 	while (YYCURSOR < YYLIMIT) {
2183 		switch (*YYCURSOR++) {
2184 			case '"':
2185 				break;
2186 			case '$':
2187 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2188 					break;
2189 				}
2190 				continue;
2191 			case '{':
2192 				if (*YYCURSOR == '$') {
2193 					break;
2194 				}
2195 				continue;
2196 			case '\\':
2197 				if (YYCURSOR < YYLIMIT) {
2198 					YYCURSOR++;
2199 				}
2200 				/* fall through */
2201 			default:
2202 				continue;
2203 		}
2204 
2205 		YYCURSOR--;
2206 		break;
2207 	}
2208 
2209 double_quotes_scan_done:
2210 	yyleng = YYCURSOR - SCNG(yy_text);
2211 
2212 	zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2213 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2214 }
2215 
2216 
2217 <ST_BACKQUOTE>{ANY_CHAR} {
2218 	if (YYCURSOR > YYLIMIT) {
2219 		RETURN_TOKEN(END);
2220 	}
2221 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2222 		YYCURSOR++;
2223 	}
2224 
2225 	while (YYCURSOR < YYLIMIT) {
2226 		switch (*YYCURSOR++) {
2227 			case '`':
2228 				break;
2229 			case '$':
2230 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2231 					break;
2232 				}
2233 				continue;
2234 			case '{':
2235 				if (*YYCURSOR == '$') {
2236 					break;
2237 				}
2238 				continue;
2239 			case '\\':
2240 				if (YYCURSOR < YYLIMIT) {
2241 					YYCURSOR++;
2242 				}
2243 				/* fall through */
2244 			default:
2245 				continue;
2246 		}
2247 
2248 		YYCURSOR--;
2249 		break;
2250 	}
2251 
2252 	yyleng = YYCURSOR - SCNG(yy_text);
2253 
2254 	zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2255 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2256 }
2257 
2258 
2259 <ST_HEREDOC>{ANY_CHAR} {
2260 	int newline = 0;
2261 
2262 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2263 
2264 	if (YYCURSOR > YYLIMIT) {
2265 		RETURN_TOKEN(END);
2266 	}
2267 
2268 	YYCURSOR--;
2269 
2270 	while (YYCURSOR < YYLIMIT) {
2271 		switch (*YYCURSOR++) {
2272 			case '\r':
2273 				if (*YYCURSOR == '\n') {
2274 					YYCURSOR++;
2275 				}
2276 				/* fall through */
2277 			case '\n':
2278 				/* Check for ending label on the next line */
2279 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2280 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2281 
2282 					if (*end == ';') {
2283 						end++;
2284 					}
2285 
2286 					if (*end == '\n' || *end == '\r') {
2287 						/* newline before label will be subtracted from returned text, but
2288 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2289 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2290 							newline = 2; /* Windows newline */
2291 						} else {
2292 							newline = 1;
2293 						}
2294 
2295 						CG(increment_lineno) = 1; /* For newline before label */
2296 						BEGIN(ST_END_HEREDOC);
2297 
2298 						goto heredoc_scan_done;
2299 					}
2300 				}
2301 				continue;
2302 			case '$':
2303 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2304 					break;
2305 				}
2306 				continue;
2307 			case '{':
2308 				if (*YYCURSOR == '$') {
2309 					break;
2310 				}
2311 				continue;
2312 			case '\\':
2313 				if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2314 					YYCURSOR++;
2315 				}
2316 				/* fall through */
2317 			default:
2318 				continue;
2319 		}
2320 
2321 		YYCURSOR--;
2322 		break;
2323 	}
2324 
2325 heredoc_scan_done:
2326 	yyleng = YYCURSOR - SCNG(yy_text);
2327 
2328 	zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2329 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2330 }
2331 
2332 
2333 <ST_NOWDOC>{ANY_CHAR} {
2334 	int newline = 0;
2335 
2336 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2337 
2338 	if (YYCURSOR > YYLIMIT) {
2339 		RETURN_TOKEN(END);
2340 	}
2341 
2342 	YYCURSOR--;
2343 
2344 	while (YYCURSOR < YYLIMIT) {
2345 		switch (*YYCURSOR++) {
2346 			case '\r':
2347 				if (*YYCURSOR == '\n') {
2348 					YYCURSOR++;
2349 				}
2350 				/* fall through */
2351 			case '\n':
2352 				/* Check for ending label on the next line */
2353 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2354 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2355 
2356 					if (*end == ';') {
2357 						end++;
2358 					}
2359 
2360 					if (*end == '\n' || *end == '\r') {
2361 						/* newline before label will be subtracted from returned text, but
2362 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2363 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2364 							newline = 2; /* Windows newline */
2365 						} else {
2366 							newline = 1;
2367 						}
2368 
2369 						CG(increment_lineno) = 1; /* For newline before label */
2370 						BEGIN(ST_END_HEREDOC);
2371 
2372 						goto nowdoc_scan_done;
2373 					}
2374 				}
2375 				/* fall through */
2376 			default:
2377 				continue;
2378 		}
2379 	}
2380 
2381 nowdoc_scan_done:
2382 	yyleng = YYCURSOR - SCNG(yy_text);
2383 
2384 	zend_copy_value(zendlval, yytext, yyleng - newline);
2385 	HANDLE_NEWLINES(yytext, yyleng - newline);
2386 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2387 }
2388 
2389 
2390 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2391 	if (YYCURSOR > YYLIMIT) {
2392 		RETURN_TOKEN(END);
2393 	}
2394 
2395 	zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2396 	goto restart;
2397 }
2398 
2399 */
2400 }
2401