xref: /PHP-7.0/Zend/zend_language_scanner.l (revision 95d29088)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2017 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Marcus Boerger <helly@php.net>                              |
16    |          Nuno Lopes <nlopess@php.net>                                |
17    |          Scott MacVicar <scottmac@php.net>                           |
18    | Flex version authors:                                                |
19    |          Andi Gutmans <andi@zend.com>                                |
20    |          Zeev Suraski <zeev@zend.com>                                |
21    +----------------------------------------------------------------------+
22 */
23 
24 /* $Id$ */
25 
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31 
32 #include "zend_language_scanner_defs.h"
33 
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef ZEND_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52 
53 #define YYCTYPE   unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR  SCNG(yy_cursor)
56 #define YYLIMIT   SCNG(yy_limit)
57 #define YYMARKER  SCNG(yy_marker)
58 
59 #define YYGETCONDITION()  SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61 
62 #define STATE(name)  yyc##name
63 
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE      YYGETCONDITION()
67 #define yytext       ((char*)SCNG(yy_text))
68 #define yyleng       SCNG(yy_leng)
69 #define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                           yyleng   = (unsigned int)x; } while(0)
71 #define yymore()     goto yymore_restart
72 
73 /* perform sanity check. If this message is triggered you should
74    increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79 
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83 
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87 
88 /* Globals Macros */
89 #define SCNG	LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95 
96 #define HANDLE_NEWLINES(s, l)													\
97 do {																			\
98 	char *p = (s), *boundary = p+(l);											\
99 																				\
100 	while (p<boundary) {														\
101 		if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {					\
102 			CG(zend_lineno)++;													\
103 		}																		\
104 		p++;																	\
105 	}																			\
106 } while (0)
107 
108 #define HANDLE_NEWLINE(c) \
109 { \
110 	if (c == '\n' || c == '\r') { \
111 		CG(zend_lineno)++; \
112 	} \
113 }
114 
115 /* To save initial string length after scanning to first variable */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118 
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120 
121 #define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123 
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125 
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127 {
128 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129 	ZEND_ASSERT(internal_encoding);
130 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131 }
132 
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134 {
135 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136 }
137 
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139 {
140 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142 }
143 
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145 {
146 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147 	ZEND_ASSERT(internal_encoding);
148 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8);
150 }
151 
152 
_yy_push_state(int new_state)153 static void _yy_push_state(int new_state)
154 {
155 	zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156 	YYSETCONDITION(new_state);
157 }
158 
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160 
yy_pop_state(void)161 static void yy_pop_state(void)
162 {
163 	int *stack_state = zend_stack_top(&SCNG(state_stack));
164 	YYSETCONDITION(*stack_state);
165 	zend_stack_del_top(&SCNG(state_stack));
166 }
167 
yy_scan_buffer(char * str,unsigned int len)168 static void yy_scan_buffer(char *str, unsigned int len)
169 {
170 	YYCURSOR       = (YYCTYPE*)str;
171 	YYLIMIT        = YYCURSOR + len;
172 	if (!SCNG(yy_start)) {
173 		SCNG(yy_start) = YYCURSOR;
174 	}
175 }
176 
startup_scanner(void)177 void startup_scanner(void)
178 {
179 	CG(parse_error) = 0;
180 	CG(doc_comment) = NULL;
181 	zend_stack_init(&SCNG(state_stack), sizeof(int));
182 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183 }
184 
heredoc_label_dtor(zend_heredoc_label * heredoc_label)185 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186     efree(heredoc_label->label);
187 }
188 
shutdown_scanner(void)189 void shutdown_scanner(void)
190 {
191 	CG(parse_error) = 0;
192 	RESET_DOC_COMMENT();
193 	zend_stack_destroy(&SCNG(state_stack));
194 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196 	SCNG(on_event) = NULL;
197 }
198 
zend_save_lexical_state(zend_lex_state * lex_state)199 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
200 {
201 	lex_state->yy_leng   = SCNG(yy_leng);
202 	lex_state->yy_start  = SCNG(yy_start);
203 	lex_state->yy_text   = SCNG(yy_text);
204 	lex_state->yy_cursor = SCNG(yy_cursor);
205 	lex_state->yy_marker = SCNG(yy_marker);
206 	lex_state->yy_limit  = SCNG(yy_limit);
207 
208 	lex_state->state_stack = SCNG(state_stack);
209 	zend_stack_init(&SCNG(state_stack), sizeof(int));
210 
211 	lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
212 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
213 
214 	lex_state->in = SCNG(yy_in);
215 	lex_state->yy_state = YYSTATE;
216 	lex_state->filename = zend_get_compiled_filename();
217 	lex_state->lineno = CG(zend_lineno);
218 
219 	lex_state->script_org = SCNG(script_org);
220 	lex_state->script_org_size = SCNG(script_org_size);
221 	lex_state->script_filtered = SCNG(script_filtered);
222 	lex_state->script_filtered_size = SCNG(script_filtered_size);
223 	lex_state->input_filter = SCNG(input_filter);
224 	lex_state->output_filter = SCNG(output_filter);
225 	lex_state->script_encoding = SCNG(script_encoding);
226 
227 	lex_state->on_event = SCNG(on_event);
228 
229 	lex_state->ast = CG(ast);
230 	lex_state->ast_arena = CG(ast_arena);
231 }
232 
zend_restore_lexical_state(zend_lex_state * lex_state)233 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
234 {
235 	SCNG(yy_leng)   = lex_state->yy_leng;
236 	SCNG(yy_start)  = lex_state->yy_start;
237 	SCNG(yy_text)   = lex_state->yy_text;
238 	SCNG(yy_cursor) = lex_state->yy_cursor;
239 	SCNG(yy_marker) = lex_state->yy_marker;
240 	SCNG(yy_limit)  = lex_state->yy_limit;
241 
242 	zend_stack_destroy(&SCNG(state_stack));
243 	SCNG(state_stack) = lex_state->state_stack;
244 
245 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
246 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
247 	SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
248 
249 	SCNG(yy_in) = lex_state->in;
250 	YYSETCONDITION(lex_state->yy_state);
251 	CG(zend_lineno) = lex_state->lineno;
252 	zend_restore_compiled_filename(lex_state->filename);
253 
254 	if (SCNG(script_filtered)) {
255 		efree(SCNG(script_filtered));
256 		SCNG(script_filtered) = NULL;
257 	}
258 	SCNG(script_org) = lex_state->script_org;
259 	SCNG(script_org_size) = lex_state->script_org_size;
260 	SCNG(script_filtered) = lex_state->script_filtered;
261 	SCNG(script_filtered_size) = lex_state->script_filtered_size;
262 	SCNG(input_filter) = lex_state->input_filter;
263 	SCNG(output_filter) = lex_state->output_filter;
264 	SCNG(script_encoding) = lex_state->script_encoding;
265 
266 	SCNG(on_event) = lex_state->on_event;
267 
268 	CG(ast) = lex_state->ast;
269 	CG(ast_arena) = lex_state->ast_arena;
270 
271 	RESET_DOC_COMMENT();
272 }
273 
zend_destroy_file_handle(zend_file_handle * file_handle)274 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
275 {
276 	zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
277 	/* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
278 	file_handle->opened_path = NULL;
279 	if (file_handle->free_filename) {
280 		file_handle->filename = NULL;
281 	}
282 }
283 
zend_lex_tstring(zval * zv)284 ZEND_API void zend_lex_tstring(zval *zv)
285 {
286 	if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0);
287 
288 	ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
289 }
290 
291 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
292 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
293 #define	BOM_UTF16_BE	"\xfe\xff"
294 #define	BOM_UTF16_LE	"\xff\xfe"
295 #define	BOM_UTF8		"\xef\xbb\xbf"
296 
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)297 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
298 {
299 	const unsigned char *p;
300 	int wchar_size = 2;
301 	int le = 0;
302 
303 	/* utf-16 or utf-32? */
304 	p = script;
305 	assert(p >= script);
306 	while ((size_t)(p-script) < script_size) {
307 		p = memchr(p, 0, script_size-(p-script)-2);
308 		if (!p) {
309 			break;
310 		}
311 		if (*(p+1) == '\0' && *(p+2) == '\0') {
312 			wchar_size = 4;
313 			break;
314 		}
315 
316 		/* searching for UTF-32 specific byte orders, so this will do */
317 		p += 4;
318 	}
319 
320 	/* BE or LE? */
321 	p = script;
322 	assert(p >= script);
323 	while ((size_t)(p-script) < script_size) {
324 		if (*p == '\0' && *(p+wchar_size-1) != '\0') {
325 			/* BE */
326 			le = 0;
327 			break;
328 		} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
329 			/* LE* */
330 			le = 1;
331 			break;
332 		}
333 		p += wchar_size;
334 	}
335 
336 	if (wchar_size == 2) {
337 		return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
338 	} else {
339 		return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
340 	}
341 
342 	return NULL;
343 }
344 
zend_multibyte_detect_unicode(void)345 static const zend_encoding* zend_multibyte_detect_unicode(void)
346 {
347 	const zend_encoding *script_encoding = NULL;
348 	int bom_size;
349 	unsigned char *pos1, *pos2;
350 
351 	if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
352 		return NULL;
353 	}
354 
355 	/* check out BOM */
356 	if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
357 		script_encoding = zend_multibyte_encoding_utf32be;
358 		bom_size = sizeof(BOM_UTF32_BE)-1;
359 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
360 		script_encoding = zend_multibyte_encoding_utf32le;
361 		bom_size = sizeof(BOM_UTF32_LE)-1;
362 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
363 		script_encoding = zend_multibyte_encoding_utf16be;
364 		bom_size = sizeof(BOM_UTF16_BE)-1;
365 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
366 		script_encoding = zend_multibyte_encoding_utf16le;
367 		bom_size = sizeof(BOM_UTF16_LE)-1;
368 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
369 		script_encoding = zend_multibyte_encoding_utf8;
370 		bom_size = sizeof(BOM_UTF8)-1;
371 	}
372 
373 	if (script_encoding) {
374 		/* remove BOM */
375 		LANG_SCNG(script_org) += bom_size;
376 		LANG_SCNG(script_org_size) -= bom_size;
377 
378 		return script_encoding;
379 	}
380 
381 	/* script contains NULL bytes -> auto-detection */
382 	if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
383 		/* check if the NULL byte is after the __HALT_COMPILER(); */
384 		pos2 = LANG_SCNG(script_org);
385 
386 		while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
387 			pos2 = memchr(pos2, '_', pos1 - pos2);
388 			if (!pos2) break;
389 			pos2++;
390 			if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
391 				pos2 += sizeof("_HALT_COMPILER")-1;
392 				while (*pos2 == ' '  ||
393 					   *pos2 == '\t' ||
394 					   *pos2 == '\r' ||
395 					   *pos2 == '\n') {
396 					pos2++;
397 				}
398 				if (*pos2 == '(') {
399 					pos2++;
400 					while (*pos2 == ' '  ||
401 						   *pos2 == '\t' ||
402 						   *pos2 == '\r' ||
403 						   *pos2 == '\n') {
404 						pos2++;
405 					}
406 					if (*pos2 == ')') {
407 						pos2++;
408 						while (*pos2 == ' '  ||
409 							   *pos2 == '\t' ||
410 							   *pos2 == '\r' ||
411 							   *pos2 == '\n') {
412 							pos2++;
413 						}
414 						if (*pos2 == ';') {
415 							return NULL;
416 						}
417 					}
418 				}
419 			}
420 		}
421 		/* make best effort if BOM is missing */
422 		return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
423 	}
424 
425 	return NULL;
426 }
427 
zend_multibyte_find_script_encoding(void)428 static const zend_encoding* zend_multibyte_find_script_encoding(void)
429 {
430 	const zend_encoding *script_encoding;
431 
432 	if (CG(detect_unicode)) {
433 		/* check out bom(byte order mark) and see if containing wchars */
434 		script_encoding = zend_multibyte_detect_unicode();
435 		if (script_encoding != NULL) {
436 			/* bom or wchar detection is prior to 'script_encoding' option */
437 			return script_encoding;
438 		}
439 	}
440 
441 	/* if no script_encoding specified, just leave alone */
442 	if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
443 		return NULL;
444 	}
445 
446 	/* if multiple encodings specified, detect automagically */
447 	if (CG(script_encoding_list_size) > 1) {
448 		return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
449 	}
450 
451 	return CG(script_encoding_list)[0];
452 }
453 
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)454 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
455 {
456 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
457 	const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
458 
459 	if (!script_encoding) {
460 		return FAILURE;
461 	}
462 
463 	/* judge input/output filter */
464 	LANG_SCNG(script_encoding) = script_encoding;
465 	LANG_SCNG(input_filter) = NULL;
466 	LANG_SCNG(output_filter) = NULL;
467 
468 	if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
469 		if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
470 			/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
471 			LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
472 			LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
473 		} else {
474 			LANG_SCNG(input_filter) = NULL;
475 			LANG_SCNG(output_filter) = NULL;
476 		}
477 		return SUCCESS;
478 	}
479 
480 	if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
481 		LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
482 		LANG_SCNG(output_filter) = NULL;
483 	} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
484 		LANG_SCNG(input_filter) = NULL;
485 		LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
486 	} else {
487 		/* both script and internal encodings are incompatible w/ flex */
488 		LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
489 		LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
490 	}
491 
492 	return 0;
493 }
494 
open_file_for_scanning(zend_file_handle * file_handle)495 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
496 {
497 	char *buf;
498 	size_t size, offset = 0;
499 	zend_string *compiled_filename;
500 
501 	/* The shebang line was read, get the current position to obtain the buffer start */
502 	if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
503 		if ((offset = ftell(file_handle->handle.fp)) == -1) {
504 			offset = 0;
505 		}
506 	}
507 
508 	if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
509 		return FAILURE;
510 	}
511 
512 	zend_llist_add_element(&CG(open_files), file_handle);
513 	if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
514 		zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
515 		size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
516 		fh->handle.stream.handle = (void*)(((char*)fh) + diff);
517 		file_handle->handle.stream.handle = fh->handle.stream.handle;
518 	}
519 
520 	/* Reset the scanner for scanning the new file */
521 	SCNG(yy_in) = file_handle;
522 	SCNG(yy_start) = NULL;
523 
524 	if (size != -1) {
525 		if (CG(multibyte)) {
526 			SCNG(script_org) = (unsigned char*)buf;
527 			SCNG(script_org_size) = size;
528 			SCNG(script_filtered) = NULL;
529 
530 			zend_multibyte_set_filter(NULL);
531 
532 			if (SCNG(input_filter)) {
533 				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
534 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
535 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
536 				}
537 				buf = (char*)SCNG(script_filtered);
538 				size = SCNG(script_filtered_size);
539 			}
540 		}
541 		SCNG(yy_start) = (unsigned char *)buf - offset;
542 		yy_scan_buffer(buf, (unsigned int)size);
543 	} else {
544 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
545 	}
546 
547 	BEGIN(INITIAL);
548 
549 	if (file_handle->opened_path) {
550 		compiled_filename = zend_string_copy(file_handle->opened_path);
551 	} else {
552 		compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
553 	}
554 
555 	zend_set_compiled_filename(compiled_filename);
556 	zend_string_release(compiled_filename);
557 
558 	if (CG(start_lineno)) {
559 		CG(zend_lineno) = CG(start_lineno);
560 		CG(start_lineno) = 0;
561 	} else {
562 		CG(zend_lineno) = 1;
563 	}
564 
565 	RESET_DOC_COMMENT();
566 	CG(increment_lineno) = 0;
567 	return SUCCESS;
568 }
END_EXTERN_C()569 END_EXTERN_C()
570 
571 
572 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
573 {
574 	zend_lex_state original_lex_state;
575 	zend_op_array *op_array = NULL;
576 	zend_save_lexical_state(&original_lex_state);
577 
578 	if (open_file_for_scanning(file_handle)==FAILURE) {
579 		if (type==ZEND_REQUIRE) {
580 			zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
581 			zend_bailout();
582 		} else {
583 			zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
584 		}
585 	} else {
586 		zend_bool original_in_compilation = CG(in_compilation);
587 		CG(in_compilation) = 1;
588 
589 		CG(ast) = NULL;
590 		CG(ast_arena) = zend_arena_create(1024 * 32);
591 		if (!zendparse()) {
592 			int last_lineno = CG(zend_lineno);
593 			zval retval_zv;
594 			zend_file_context original_file_context;
595 			zend_oparray_context original_oparray_context;
596 			zend_op_array *original_active_op_array = CG(active_op_array);
597 			op_array = emalloc(sizeof(zend_op_array));
598 			init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE);
599 			CG(active_op_array) = op_array;
600 			ZVAL_LONG(&retval_zv, 1);
601 
602 			if (zend_ast_process) {
603 				zend_ast_process(CG(ast));
604 			}
605 
606 			zend_file_context_begin(&original_file_context);
607 			zend_oparray_context_begin(&original_oparray_context);
608 			zend_compile_top_stmt(CG(ast));
609 			CG(zend_lineno) = last_lineno;
610 			zend_emit_final_return(&retval_zv);
611 			op_array->line_start = 1;
612 			op_array->line_end = last_lineno;
613 			pass_two(op_array);
614 			zend_oparray_context_end(&original_oparray_context);
615 			zend_file_context_end(&original_file_context);
616 
617 			CG(active_op_array) = original_active_op_array;
618 		}
619 
620 		zend_ast_destroy(CG(ast));
621 		zend_arena_destroy(CG(ast_arena));
622 		CG(in_compilation) = original_in_compilation;
623 	}
624 
625 	zend_restore_lexical_state(&original_lex_state);
626 	return op_array;
627 }
628 
629 
compile_filename(int type,zval * filename)630 zend_op_array *compile_filename(int type, zval *filename)
631 {
632 	zend_file_handle file_handle;
633 	zval tmp;
634 	zend_op_array *retval;
635 	zend_string *opened_path = NULL;
636 
637 	if (Z_TYPE_P(filename) != IS_STRING) {
638 		tmp = *filename;
639 		zval_copy_ctor(&tmp);
640 		convert_to_string(&tmp);
641 		filename = &tmp;
642 	}
643 	file_handle.filename = Z_STRVAL_P(filename);
644 	file_handle.free_filename = 0;
645 	file_handle.type = ZEND_HANDLE_FILENAME;
646 	file_handle.opened_path = NULL;
647 	file_handle.handle.fp = NULL;
648 
649 	retval = zend_compile_file(&file_handle, type);
650 	if (retval && file_handle.handle.stream.handle) {
651 		if (!file_handle.opened_path) {
652 			file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
653 		}
654 
655 		zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
656 
657 		if (opened_path) {
658 			zend_string_release(opened_path);
659 		}
660 	}
661 	zend_destroy_file_handle(&file_handle);
662 
663 	if (filename==&tmp) {
664 		zval_dtor(&tmp);
665 	}
666 	return retval;
667 }
668 
zend_prepare_string_for_scanning(zval * str,char * filename)669 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
670 {
671 	char *buf;
672 	size_t size, old_len;
673 	zend_string *new_compiled_filename;
674 
675 	/* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
676 	old_len = Z_STRLEN_P(str);
677 	Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
678 	Z_TYPE_INFO_P(str) = IS_STRING_EX;
679 	memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
680 
681 	SCNG(yy_in) = NULL;
682 	SCNG(yy_start) = NULL;
683 
684 	buf = Z_STRVAL_P(str);
685 	size = old_len;
686 
687 	if (CG(multibyte)) {
688 		SCNG(script_org) = (unsigned char*)buf;
689 		SCNG(script_org_size) = size;
690 		SCNG(script_filtered) = NULL;
691 
692 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
693 
694 		if (SCNG(input_filter)) {
695 			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
696 				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
697 						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
698 			}
699 			buf = (char*)SCNG(script_filtered);
700 			size = SCNG(script_filtered_size);
701 		}
702 	}
703 
704 	yy_scan_buffer(buf, (unsigned int)size);
705 
706 	new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
707 	zend_set_compiled_filename(new_compiled_filename);
708 	zend_string_release(new_compiled_filename);
709 	CG(zend_lineno) = 1;
710 	CG(increment_lineno) = 0;
711 	RESET_DOC_COMMENT();
712 	return SUCCESS;
713 }
714 
715 
zend_get_scanned_file_offset(void)716 ZEND_API size_t zend_get_scanned_file_offset(void)
717 {
718 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
719 	if (SCNG(input_filter)) {
720 		size_t original_offset = offset, length = 0;
721 		do {
722 			unsigned char *p = NULL;
723 			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
724 				return (size_t)-1;
725 			}
726 			efree(p);
727 			if (length > original_offset) {
728 				offset--;
729 			} else if (length < original_offset) {
730 				offset++;
731 			}
732 		} while (original_offset != length);
733 	}
734 	return offset;
735 }
736 
737 
compile_string(zval * source_string,char * filename)738 zend_op_array *compile_string(zval *source_string, char *filename)
739 {
740 	zend_lex_state original_lex_state;
741 	zend_op_array *op_array = NULL;
742 	zval tmp;
743 	zend_bool original_in_compilation = CG(in_compilation);
744 
745 	if (Z_STRLEN_P(source_string)==0) {
746 		return NULL;
747 	}
748 
749 	ZVAL_DUP(&tmp, source_string);
750 	convert_to_string(&tmp);
751 	source_string = &tmp;
752 
753 	CG(in_compilation) = 1;
754 	zend_save_lexical_state(&original_lex_state);
755 	if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
756 		CG(ast) = NULL;
757 		CG(ast_arena) = zend_arena_create(1024 * 32);
758 		BEGIN(ST_IN_SCRIPTING);
759 
760 		if (!zendparse()) {
761 			int last_lineno = CG(zend_lineno);
762 			zend_file_context original_file_context;
763 			zend_oparray_context original_oparray_context;
764 			zend_op_array *original_active_op_array = CG(active_op_array);
765 			op_array = emalloc(sizeof(zend_op_array));
766 			init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE);
767 			CG(active_op_array) = op_array;
768 
769 			if (zend_ast_process) {
770 				zend_ast_process(CG(ast));
771 			}
772 
773 			zend_file_context_begin(&original_file_context);
774 			zend_oparray_context_begin(&original_oparray_context);
775 			zend_compile_top_stmt(CG(ast));
776 			CG(zend_lineno) = last_lineno;
777 			zend_emit_final_return(NULL);
778 			op_array->line_start = 1;
779 			op_array->line_end = last_lineno;
780 			pass_two(op_array);
781 			zend_oparray_context_end(&original_oparray_context);
782 			zend_file_context_end(&original_file_context);
783 
784 			CG(active_op_array) = original_active_op_array;
785 		}
786 
787 		zend_ast_destroy(CG(ast));
788 		zend_arena_destroy(CG(ast_arena));
789 	}
790 
791 	zend_restore_lexical_state(&original_lex_state);
792 	zval_dtor(&tmp);
793 	CG(in_compilation) = original_in_compilation;
794 	return op_array;
795 }
796 
797 
BEGIN_EXTERN_C()798 BEGIN_EXTERN_C()
799 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
800 {
801 	zend_lex_state original_lex_state;
802 	zend_file_handle file_handle;
803 
804 	file_handle.type = ZEND_HANDLE_FILENAME;
805 	file_handle.filename = filename;
806 	file_handle.free_filename = 0;
807 	file_handle.opened_path = NULL;
808 	zend_save_lexical_state(&original_lex_state);
809 	if (open_file_for_scanning(&file_handle)==FAILURE) {
810 		zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
811 		zend_restore_lexical_state(&original_lex_state);
812 		return FAILURE;
813 	}
814 	zend_highlight(syntax_highlighter_ini);
815 	if (SCNG(script_filtered)) {
816 		efree(SCNG(script_filtered));
817 		SCNG(script_filtered) = NULL;
818 	}
819 	zend_destroy_file_handle(&file_handle);
820 	zend_restore_lexical_state(&original_lex_state);
821 	return SUCCESS;
822 }
823 
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)824 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
825 {
826 	zend_lex_state original_lex_state;
827 	zval tmp = *str;
828 
829 	str = &tmp;
830 	zval_copy_ctor(str);
831 	zend_save_lexical_state(&original_lex_state);
832 	if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
833 		zend_restore_lexical_state(&original_lex_state);
834 		return FAILURE;
835 	}
836 	BEGIN(INITIAL);
837 	zend_highlight(syntax_highlighter_ini);
838 	if (SCNG(script_filtered)) {
839 		efree(SCNG(script_filtered));
840 		SCNG(script_filtered) = NULL;
841 	}
842 	zend_restore_lexical_state(&original_lex_state);
843 	zval_dtor(str);
844 	return SUCCESS;
845 }
846 
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)847 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
848 {
849 	size_t length;
850 	unsigned char *new_yy_start;
851 
852 	/* convert and set */
853 	if (!SCNG(input_filter)) {
854 		if (SCNG(script_filtered)) {
855 			efree(SCNG(script_filtered));
856 			SCNG(script_filtered) = NULL;
857 		}
858 		SCNG(script_filtered_size) = 0;
859 		length = SCNG(script_org_size);
860 		new_yy_start = SCNG(script_org);
861 	} else {
862 		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
863 			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
864 					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
865 		}
866 		if (SCNG(script_filtered)) {
867 			efree(SCNG(script_filtered));
868 		}
869 		SCNG(script_filtered) = new_yy_start;
870 		SCNG(script_filtered_size) = length;
871 	}
872 
873 	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
874 	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
875 	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
876 	SCNG(yy_limit) = new_yy_start + length;
877 
878 	SCNG(yy_start) = new_yy_start;
879 }
880 
881 
882 // TODO: avoid reallocation ???
883 # define zend_copy_value(zendlval, yytext, yyleng) \
884 	if (SCNG(output_filter)) { \
885 		size_t sz = 0; \
886 		char *s = NULL; \
887 		SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
888 		ZVAL_STRINGL(zendlval, s, sz); \
889 		efree(s); \
890 	} else { \
891 		ZVAL_STRINGL(zendlval, yytext, yyleng); \
892 	}
893 
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)894 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
895 {
896 	register char *s, *t;
897 	char *end;
898 
899 	ZVAL_STRINGL(zendlval, str, len);
900 
901 	/* convert escape sequences */
902 	s = t = Z_STRVAL_P(zendlval);
903 	end = s+Z_STRLEN_P(zendlval);
904 	while (s<end) {
905 		if (*s=='\\') {
906 			s++;
907 			if (s >= end) {
908 				*t++ = '\\';
909 				break;
910 			}
911 
912 			switch(*s) {
913 				case 'n':
914 					*t++ = '\n';
915 					Z_STRLEN_P(zendlval)--;
916 					break;
917 				case 'r':
918 					*t++ = '\r';
919 					Z_STRLEN_P(zendlval)--;
920 					break;
921 				case 't':
922 					*t++ = '\t';
923 					Z_STRLEN_P(zendlval)--;
924 					break;
925 				case 'f':
926 					*t++ = '\f';
927 					Z_STRLEN_P(zendlval)--;
928 					break;
929 				case 'v':
930 					*t++ = '\v';
931 					Z_STRLEN_P(zendlval)--;
932 					break;
933 				case 'e':
934 #ifdef ZEND_WIN32
935 					*t++ = VK_ESCAPE;
936 #else
937 					*t++ = '\e';
938 #endif
939 					Z_STRLEN_P(zendlval)--;
940 					break;
941 				case '"':
942 				case '`':
943 					if (*s != quote_type) {
944 						*t++ = '\\';
945 						*t++ = *s;
946 						break;
947 					}
948 				case '\\':
949 				case '$':
950 					*t++ = *s;
951 					Z_STRLEN_P(zendlval)--;
952 					break;
953 				case 'x':
954 				case 'X':
955 					if (ZEND_IS_HEX(*(s+1))) {
956 						char hex_buf[3] = { 0, 0, 0 };
957 
958 						Z_STRLEN_P(zendlval)--; /* for the 'x' */
959 
960 						hex_buf[0] = *(++s);
961 						Z_STRLEN_P(zendlval)--;
962 						if (ZEND_IS_HEX(*(s+1))) {
963 							hex_buf[1] = *(++s);
964 							Z_STRLEN_P(zendlval)--;
965 						}
966 						*t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
967 					} else {
968 						*t++ = '\\';
969 						*t++ = *s;
970 					}
971 					break;
972 				/* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
973 				case 'u':
974 					{
975 						/* cache where we started so we can parse after validating */
976 						char *start = s + 1;
977 						size_t len = 0;
978 						zend_bool valid = 1;
979 						unsigned long codepoint;
980 						size_t byte_len = 0;
981 
982 						if (*start != '{') {
983 							/* we silently let this pass to avoid breaking code
984 							 * with JSON in string literals (e.g. "\"\u202e\""
985 							 */
986 							*t++ = '\\';
987 							*t++ = 'u';
988 							break;
989 						} else {
990 							/* on the other hand, invalid \u{blah} errors */
991 							s++;
992 							len++;
993 							s++;
994 							while (*s != '}') {
995 								if (!ZEND_IS_HEX(*s)) {
996 									valid = 0;
997 									break;
998 								} else {
999 									len++;
1000 								}
1001 								s++;
1002 							}
1003 							if (*s == '}') {
1004 								valid = 1;
1005 								len++;
1006 							}
1007 						}
1008 
1009 						/* \u{} is invalid */
1010 						if (len <= 2) {
1011 							valid = 0;
1012 						}
1013 
1014 						if (!valid) {
1015 							zend_throw_exception(zend_ce_parse_error,
1016 								"Invalid UTF-8 codepoint escape sequence", 0);
1017 							zval_ptr_dtor(zendlval);
1018 							ZVAL_UNDEF(zendlval);
1019 							return FAILURE;
1020 						}
1021 
1022 						errno = 0;
1023 						codepoint = strtoul(start + 1, NULL, 16);
1024 
1025 						/* per RFC 3629, UTF-8 can only represent 21 bits */
1026 						if (codepoint > 0x10FFFF || errno) {
1027 							zend_throw_exception(zend_ce_parse_error,
1028 								"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1029 							zval_ptr_dtor(zendlval);
1030 							ZVAL_UNDEF(zendlval);
1031 							return FAILURE;
1032 						}
1033 
1034 						/* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1035 						if (codepoint < 0x80) {
1036 							byte_len = 1;
1037 							*t++ = codepoint;
1038 						} else if (codepoint <= 0x7FF) {
1039 							byte_len = 2;
1040 							*t++ = (codepoint >> 6) + 0xC0;
1041 							*t++ = (codepoint & 0x3F) + 0x80;
1042 						} else if (codepoint <= 0xFFFF) {
1043 							byte_len = 3;
1044 							*t++ = (codepoint >> 12) + 0xE0;
1045 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1046 							*t++ = (codepoint & 0x3F) + 0x80;
1047 						} else if (codepoint <= 0x10FFFF) {
1048 							byte_len = 4;
1049 							*t++ = (codepoint >> 18) + 0xF0;
1050 							*t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1051 							*t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1052 							*t++ = (codepoint & 0x3F) + 0x80;
1053 						}
1054 
1055 						Z_STRLEN_P(zendlval) -= 2; /* \u */
1056 						Z_STRLEN_P(zendlval) -= (len - byte_len);
1057 					}
1058 					break;
1059 				default:
1060 					/* check for an octal */
1061 					if (ZEND_IS_OCT(*s)) {
1062 						char octal_buf[4] = { 0, 0, 0, 0 };
1063 
1064 						octal_buf[0] = *s;
1065 						Z_STRLEN_P(zendlval)--;
1066 						if (ZEND_IS_OCT(*(s+1))) {
1067 							octal_buf[1] = *(++s);
1068 							Z_STRLEN_P(zendlval)--;
1069 							if (ZEND_IS_OCT(*(s+1))) {
1070 								octal_buf[2] = *(++s);
1071 								Z_STRLEN_P(zendlval)--;
1072 							}
1073 						}
1074 						*t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1075 					} else {
1076 						*t++ = '\\';
1077 						*t++ = *s;
1078 					}
1079 					break;
1080 			}
1081 		} else {
1082 			*t++ = *s;
1083 		}
1084 
1085 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1086 			CG(zend_lineno)++;
1087 		}
1088 		s++;
1089 	}
1090 	*t = 0;
1091 	if (SCNG(output_filter)) {
1092 		size_t sz = 0;
1093 		unsigned char *str;
1094 		// TODO: avoid realocation ???
1095 		s = Z_STRVAL_P(zendlval);
1096 		SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1097 		zval_ptr_dtor(zendlval);
1098 		ZVAL_STRINGL(zendlval, (char *) str, sz);
1099 		efree(str);
1100 	}
1101 	return SUCCESS;
1102 }
1103 
emit_token(int token,int token_line)1104 static zend_always_inline int emit_token(int token, int token_line)
1105 {
1106 	if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line);
1107 
1108 	return token;
1109 }
1110 
1111 #define RETURN_TOKEN(token) return emit_token(token, start_line);
1112 
lex_scan(zval * zendlval)1113 int lex_scan(zval *zendlval)
1114 {
1115 
1116 int start_line = CG(zend_lineno);
1117 
1118 restart:
1119 	SCNG(yy_text) = YYCURSOR;
1120 
1121 /*!re2c
1122 re2c:yyfill:check = 0;
1123 LNUM	[0-9]+
1124 DNUM	([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1125 EXPONENT_DNUM	(({LNUM}|{DNUM})[eE][+-]?{LNUM})
1126 HNUM	"0x"[0-9a-fA-F]+
1127 BNUM	"0b"[01]+
1128 LABEL	[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1129 WHITESPACE [ \n\r\t]+
1130 TABS_AND_SPACES [ \t]*
1131 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1132 ANY_CHAR [^]
1133 NEWLINE ("\r"|"\n"|"\r\n")
1134 
1135 /* compute yyleng before each rule */
1136 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1137 
1138 <ST_IN_SCRIPTING>"exit" {
1139 	RETURN_TOKEN(T_EXIT);
1140 }
1141 
1142 <ST_IN_SCRIPTING>"die" {
1143 	RETURN_TOKEN(T_EXIT);
1144 }
1145 
1146 <ST_IN_SCRIPTING>"function" {
1147 	RETURN_TOKEN(T_FUNCTION);
1148 }
1149 
1150 <ST_IN_SCRIPTING>"const" {
1151 	RETURN_TOKEN(T_CONST);
1152 }
1153 
1154 <ST_IN_SCRIPTING>"return" {
1155 	RETURN_TOKEN(T_RETURN);
1156 }
1157 
1158 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1159 	yyless(yyleng - 1);
1160 	HANDLE_NEWLINES(yytext, yyleng);
1161 	RETURN_TOKEN(T_YIELD_FROM);
1162 }
1163 
1164 <ST_IN_SCRIPTING>"yield" {
1165 	RETURN_TOKEN(T_YIELD);
1166 }
1167 
1168 <ST_IN_SCRIPTING>"try" {
1169 	RETURN_TOKEN(T_TRY);
1170 }
1171 
1172 <ST_IN_SCRIPTING>"catch" {
1173 	RETURN_TOKEN(T_CATCH);
1174 }
1175 
1176 <ST_IN_SCRIPTING>"finally" {
1177 	RETURN_TOKEN(T_FINALLY);
1178 }
1179 
1180 <ST_IN_SCRIPTING>"throw" {
1181 	RETURN_TOKEN(T_THROW);
1182 }
1183 
1184 <ST_IN_SCRIPTING>"if" {
1185 	RETURN_TOKEN(T_IF);
1186 }
1187 
1188 <ST_IN_SCRIPTING>"elseif" {
1189 	RETURN_TOKEN(T_ELSEIF);
1190 }
1191 
1192 <ST_IN_SCRIPTING>"endif" {
1193 	RETURN_TOKEN(T_ENDIF);
1194 }
1195 
1196 <ST_IN_SCRIPTING>"else" {
1197 	RETURN_TOKEN(T_ELSE);
1198 }
1199 
1200 <ST_IN_SCRIPTING>"while" {
1201 	RETURN_TOKEN(T_WHILE);
1202 }
1203 
1204 <ST_IN_SCRIPTING>"endwhile" {
1205 	RETURN_TOKEN(T_ENDWHILE);
1206 }
1207 
1208 <ST_IN_SCRIPTING>"do" {
1209 	RETURN_TOKEN(T_DO);
1210 }
1211 
1212 <ST_IN_SCRIPTING>"for" {
1213 	RETURN_TOKEN(T_FOR);
1214 }
1215 
1216 <ST_IN_SCRIPTING>"endfor" {
1217 	RETURN_TOKEN(T_ENDFOR);
1218 }
1219 
1220 <ST_IN_SCRIPTING>"foreach" {
1221 	RETURN_TOKEN(T_FOREACH);
1222 }
1223 
1224 <ST_IN_SCRIPTING>"endforeach" {
1225 	RETURN_TOKEN(T_ENDFOREACH);
1226 }
1227 
1228 <ST_IN_SCRIPTING>"declare" {
1229 	RETURN_TOKEN(T_DECLARE);
1230 }
1231 
1232 <ST_IN_SCRIPTING>"enddeclare" {
1233 	RETURN_TOKEN(T_ENDDECLARE);
1234 }
1235 
1236 <ST_IN_SCRIPTING>"instanceof" {
1237 	RETURN_TOKEN(T_INSTANCEOF);
1238 }
1239 
1240 <ST_IN_SCRIPTING>"as" {
1241 	RETURN_TOKEN(T_AS);
1242 }
1243 
1244 <ST_IN_SCRIPTING>"switch" {
1245 	RETURN_TOKEN(T_SWITCH);
1246 }
1247 
1248 <ST_IN_SCRIPTING>"endswitch" {
1249 	RETURN_TOKEN(T_ENDSWITCH);
1250 }
1251 
1252 <ST_IN_SCRIPTING>"case" {
1253 	RETURN_TOKEN(T_CASE);
1254 }
1255 
1256 <ST_IN_SCRIPTING>"default" {
1257 	RETURN_TOKEN(T_DEFAULT);
1258 }
1259 
1260 <ST_IN_SCRIPTING>"break" {
1261 	RETURN_TOKEN(T_BREAK);
1262 }
1263 
1264 <ST_IN_SCRIPTING>"continue" {
1265 	RETURN_TOKEN(T_CONTINUE);
1266 }
1267 
1268 <ST_IN_SCRIPTING>"goto" {
1269 	RETURN_TOKEN(T_GOTO);
1270 }
1271 
1272 <ST_IN_SCRIPTING>"echo" {
1273 	RETURN_TOKEN(T_ECHO);
1274 }
1275 
1276 <ST_IN_SCRIPTING>"print" {
1277 	RETURN_TOKEN(T_PRINT);
1278 }
1279 
1280 <ST_IN_SCRIPTING>"class" {
1281 	RETURN_TOKEN(T_CLASS);
1282 }
1283 
1284 <ST_IN_SCRIPTING>"interface" {
1285 	RETURN_TOKEN(T_INTERFACE);
1286 }
1287 
1288 <ST_IN_SCRIPTING>"trait" {
1289 	RETURN_TOKEN(T_TRAIT);
1290 }
1291 
1292 <ST_IN_SCRIPTING>"extends" {
1293 	RETURN_TOKEN(T_EXTENDS);
1294 }
1295 
1296 <ST_IN_SCRIPTING>"implements" {
1297 	RETURN_TOKEN(T_IMPLEMENTS);
1298 }
1299 
1300 <ST_IN_SCRIPTING>"->" {
1301 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
1302 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1303 }
1304 
1305 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1306 	HANDLE_NEWLINES(yytext, yyleng);
1307 	RETURN_TOKEN(T_WHITESPACE);
1308 }
1309 
1310 <ST_LOOKING_FOR_PROPERTY>"->" {
1311 	RETURN_TOKEN(T_OBJECT_OPERATOR);
1312 }
1313 
1314 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1315 	yy_pop_state();
1316 	zend_copy_value(zendlval, yytext, yyleng);
1317 	RETURN_TOKEN(T_STRING);
1318 }
1319 
1320 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1321 	yyless(0);
1322 	yy_pop_state();
1323 	goto restart;
1324 }
1325 
1326 <ST_IN_SCRIPTING>"::" {
1327 	RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1328 }
1329 
1330 <ST_IN_SCRIPTING>"\\" {
1331 	RETURN_TOKEN(T_NS_SEPARATOR);
1332 }
1333 
1334 <ST_IN_SCRIPTING>"..." {
1335 	RETURN_TOKEN(T_ELLIPSIS);
1336 }
1337 
1338 <ST_IN_SCRIPTING>"??" {
1339 	RETURN_TOKEN(T_COALESCE);
1340 }
1341 
1342 <ST_IN_SCRIPTING>"new" {
1343 	RETURN_TOKEN(T_NEW);
1344 }
1345 
1346 <ST_IN_SCRIPTING>"clone" {
1347 	RETURN_TOKEN(T_CLONE);
1348 }
1349 
1350 <ST_IN_SCRIPTING>"var" {
1351 	RETURN_TOKEN(T_VAR);
1352 }
1353 
1354 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1355 	RETURN_TOKEN(T_INT_CAST);
1356 }
1357 
1358 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1359 	RETURN_TOKEN(T_DOUBLE_CAST);
1360 }
1361 
1362 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1363 	RETURN_TOKEN(T_STRING_CAST);
1364 }
1365 
1366 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1367 	RETURN_TOKEN(T_ARRAY_CAST);
1368 }
1369 
1370 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1371 	RETURN_TOKEN(T_OBJECT_CAST);
1372 }
1373 
1374 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1375 	RETURN_TOKEN(T_BOOL_CAST);
1376 }
1377 
1378 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1379 	RETURN_TOKEN(T_UNSET_CAST);
1380 }
1381 
1382 <ST_IN_SCRIPTING>"eval" {
1383 	RETURN_TOKEN(T_EVAL);
1384 }
1385 
1386 <ST_IN_SCRIPTING>"include" {
1387 	RETURN_TOKEN(T_INCLUDE);
1388 }
1389 
1390 <ST_IN_SCRIPTING>"include_once" {
1391 	RETURN_TOKEN(T_INCLUDE_ONCE);
1392 }
1393 
1394 <ST_IN_SCRIPTING>"require" {
1395 	RETURN_TOKEN(T_REQUIRE);
1396 }
1397 
1398 <ST_IN_SCRIPTING>"require_once" {
1399 	RETURN_TOKEN(T_REQUIRE_ONCE);
1400 }
1401 
1402 <ST_IN_SCRIPTING>"namespace" {
1403 	RETURN_TOKEN(T_NAMESPACE);
1404 }
1405 
1406 <ST_IN_SCRIPTING>"use" {
1407 	RETURN_TOKEN(T_USE);
1408 }
1409 
1410 <ST_IN_SCRIPTING>"insteadof" {
1411     RETURN_TOKEN(T_INSTEADOF);
1412 }
1413 
1414 <ST_IN_SCRIPTING>"global" {
1415 	RETURN_TOKEN(T_GLOBAL);
1416 }
1417 
1418 <ST_IN_SCRIPTING>"isset" {
1419 	RETURN_TOKEN(T_ISSET);
1420 }
1421 
1422 <ST_IN_SCRIPTING>"empty" {
1423 	RETURN_TOKEN(T_EMPTY);
1424 }
1425 
1426 <ST_IN_SCRIPTING>"__halt_compiler" {
1427 	RETURN_TOKEN(T_HALT_COMPILER);
1428 }
1429 
1430 <ST_IN_SCRIPTING>"static" {
1431 	RETURN_TOKEN(T_STATIC);
1432 }
1433 
1434 <ST_IN_SCRIPTING>"abstract" {
1435 	RETURN_TOKEN(T_ABSTRACT);
1436 }
1437 
1438 <ST_IN_SCRIPTING>"final" {
1439 	RETURN_TOKEN(T_FINAL);
1440 }
1441 
1442 <ST_IN_SCRIPTING>"private" {
1443 	RETURN_TOKEN(T_PRIVATE);
1444 }
1445 
1446 <ST_IN_SCRIPTING>"protected" {
1447 	RETURN_TOKEN(T_PROTECTED);
1448 }
1449 
1450 <ST_IN_SCRIPTING>"public" {
1451 	RETURN_TOKEN(T_PUBLIC);
1452 }
1453 
1454 <ST_IN_SCRIPTING>"unset" {
1455 	RETURN_TOKEN(T_UNSET);
1456 }
1457 
1458 <ST_IN_SCRIPTING>"=>" {
1459 	RETURN_TOKEN(T_DOUBLE_ARROW);
1460 }
1461 
1462 <ST_IN_SCRIPTING>"list" {
1463 	RETURN_TOKEN(T_LIST);
1464 }
1465 
1466 <ST_IN_SCRIPTING>"array" {
1467 	RETURN_TOKEN(T_ARRAY);
1468 }
1469 
1470 <ST_IN_SCRIPTING>"callable" {
1471 	RETURN_TOKEN(T_CALLABLE);
1472 }
1473 
1474 <ST_IN_SCRIPTING>"++" {
1475 	RETURN_TOKEN(T_INC);
1476 }
1477 
1478 <ST_IN_SCRIPTING>"--" {
1479 	RETURN_TOKEN(T_DEC);
1480 }
1481 
1482 <ST_IN_SCRIPTING>"===" {
1483 	RETURN_TOKEN(T_IS_IDENTICAL);
1484 }
1485 
1486 <ST_IN_SCRIPTING>"!==" {
1487 	RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1488 }
1489 
1490 <ST_IN_SCRIPTING>"==" {
1491 	RETURN_TOKEN(T_IS_EQUAL);
1492 }
1493 
1494 <ST_IN_SCRIPTING>"!="|"<>" {
1495 	RETURN_TOKEN(T_IS_NOT_EQUAL);
1496 }
1497 
1498 <ST_IN_SCRIPTING>"<=>" {
1499 	RETURN_TOKEN(T_SPACESHIP);
1500 }
1501 
1502 <ST_IN_SCRIPTING>"<=" {
1503 	RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1504 }
1505 
1506 <ST_IN_SCRIPTING>">=" {
1507 	RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1508 }
1509 
1510 <ST_IN_SCRIPTING>"+=" {
1511 	RETURN_TOKEN(T_PLUS_EQUAL);
1512 }
1513 
1514 <ST_IN_SCRIPTING>"-=" {
1515 	RETURN_TOKEN(T_MINUS_EQUAL);
1516 }
1517 
1518 <ST_IN_SCRIPTING>"*=" {
1519 	RETURN_TOKEN(T_MUL_EQUAL);
1520 }
1521 
1522 <ST_IN_SCRIPTING>"*\*" {
1523 	RETURN_TOKEN(T_POW);
1524 }
1525 
1526 <ST_IN_SCRIPTING>"*\*=" {
1527 	RETURN_TOKEN(T_POW_EQUAL);
1528 }
1529 
1530 <ST_IN_SCRIPTING>"/=" {
1531 	RETURN_TOKEN(T_DIV_EQUAL);
1532 }
1533 
1534 <ST_IN_SCRIPTING>".=" {
1535 	RETURN_TOKEN(T_CONCAT_EQUAL);
1536 }
1537 
1538 <ST_IN_SCRIPTING>"%=" {
1539 	RETURN_TOKEN(T_MOD_EQUAL);
1540 }
1541 
1542 <ST_IN_SCRIPTING>"<<=" {
1543 	RETURN_TOKEN(T_SL_EQUAL);
1544 }
1545 
1546 <ST_IN_SCRIPTING>">>=" {
1547 	RETURN_TOKEN(T_SR_EQUAL);
1548 }
1549 
1550 <ST_IN_SCRIPTING>"&=" {
1551 	RETURN_TOKEN(T_AND_EQUAL);
1552 }
1553 
1554 <ST_IN_SCRIPTING>"|=" {
1555 	RETURN_TOKEN(T_OR_EQUAL);
1556 }
1557 
1558 <ST_IN_SCRIPTING>"^=" {
1559 	RETURN_TOKEN(T_XOR_EQUAL);
1560 }
1561 
1562 <ST_IN_SCRIPTING>"||" {
1563 	RETURN_TOKEN(T_BOOLEAN_OR);
1564 }
1565 
1566 <ST_IN_SCRIPTING>"&&" {
1567 	RETURN_TOKEN(T_BOOLEAN_AND);
1568 }
1569 
1570 <ST_IN_SCRIPTING>"OR" {
1571 	RETURN_TOKEN(T_LOGICAL_OR);
1572 }
1573 
1574 <ST_IN_SCRIPTING>"AND" {
1575 	RETURN_TOKEN(T_LOGICAL_AND);
1576 }
1577 
1578 <ST_IN_SCRIPTING>"XOR" {
1579 	RETURN_TOKEN(T_LOGICAL_XOR);
1580 }
1581 
1582 <ST_IN_SCRIPTING>"<<" {
1583 	RETURN_TOKEN(T_SL);
1584 }
1585 
1586 <ST_IN_SCRIPTING>">>" {
1587 	RETURN_TOKEN(T_SR);
1588 }
1589 
1590 <ST_IN_SCRIPTING>{TOKENS} {
1591 	RETURN_TOKEN(yytext[0]);
1592 }
1593 
1594 
1595 <ST_IN_SCRIPTING>"{" {
1596 	yy_push_state(ST_IN_SCRIPTING);
1597 	RETURN_TOKEN('{');
1598 }
1599 
1600 
1601 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1602 	yy_push_state(ST_LOOKING_FOR_VARNAME);
1603 	RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1604 }
1605 
1606 
1607 <ST_IN_SCRIPTING>"}" {
1608 	RESET_DOC_COMMENT();
1609 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
1610 		yy_pop_state();
1611 	}
1612 	RETURN_TOKEN('}');
1613 }
1614 
1615 
1616 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1617 	yyless(yyleng - 1);
1618 	zend_copy_value(zendlval, yytext, yyleng);
1619 	yy_pop_state();
1620 	yy_push_state(ST_IN_SCRIPTING);
1621 	RETURN_TOKEN(T_STRING_VARNAME);
1622 }
1623 
1624 
1625 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1626 	yyless(0);
1627 	yy_pop_state();
1628 	yy_push_state(ST_IN_SCRIPTING);
1629 	goto restart;
1630 }
1631 
1632 <ST_IN_SCRIPTING>{BNUM} {
1633 	char *bin = yytext + 2; /* Skip "0b" */
1634 	int len = yyleng - 2;
1635 	char *end;
1636 
1637 	/* Skip any leading 0s */
1638 	while (*bin == '0') {
1639 		++bin;
1640 		--len;
1641 	}
1642 
1643 	if (len < SIZEOF_ZEND_LONG * 8) {
1644 		if (len == 0) {
1645 			ZVAL_LONG(zendlval, 0);
1646 		} else {
1647 			errno = 0;
1648 			ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1649 			ZEND_ASSERT(!errno && end == yytext + yyleng);
1650 		}
1651 		RETURN_TOKEN(T_LNUMBER);
1652 	} else {
1653 		ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1654 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1655 		ZEND_ASSERT(end == yytext + yyleng);
1656 		RETURN_TOKEN(T_DNUMBER);
1657 	}
1658 }
1659 
1660 <ST_IN_SCRIPTING>{LNUM} {
1661 	char *end;
1662 	if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1663 		errno = 0;
1664 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1665 		/* This isn't an assert, we need to ensure 019 isn't valid octal
1666 		 * Because the lexing itself doesn't do that for us
1667 		 */
1668 		if (end != yytext + yyleng) {
1669 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1670 			ZVAL_UNDEF(zendlval);
1671 			RETURN_TOKEN(T_LNUMBER);
1672 		}
1673 	} else {
1674 		errno = 0;
1675 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1676 		if (errno == ERANGE) { /* Overflow */
1677 			errno = 0;
1678 			if (yytext[0] == '0') { /* octal overflow */
1679 				ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1680 			} else {
1681 				ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1682 			}
1683 			/* Also not an assert for the same reason */
1684 			if (end != yytext + yyleng) {
1685 				zend_throw_exception(zend_ce_parse_error,
1686 					"Invalid numeric literal", 0);
1687 				ZVAL_UNDEF(zendlval);
1688 				RETURN_TOKEN(T_DNUMBER);
1689 			}
1690 			RETURN_TOKEN(T_DNUMBER);
1691 		}
1692 		/* Also not an assert for the same reason */
1693 		if (end != yytext + yyleng) {
1694 			zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1695 			ZVAL_UNDEF(zendlval);
1696 			RETURN_TOKEN(T_DNUMBER);
1697 		}
1698 	}
1699 	ZEND_ASSERT(!errno);
1700 	RETURN_TOKEN(T_LNUMBER);
1701 }
1702 
1703 <ST_IN_SCRIPTING>{HNUM} {
1704 	char *hex = yytext + 2; /* Skip "0x" */
1705 	int len = yyleng - 2;
1706 	char *end;
1707 
1708 	/* Skip any leading 0s */
1709 	while (*hex == '0') {
1710 		hex++;
1711 		len--;
1712 	}
1713 
1714 	if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1715 		if (len == 0) {
1716 			ZVAL_LONG(zendlval, 0);
1717 		} else {
1718 			errno = 0;
1719 			ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1720 			ZEND_ASSERT(!errno && end == hex + len);
1721 		}
1722 		RETURN_TOKEN(T_LNUMBER);
1723 	} else {
1724 		ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1725 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1726 		ZEND_ASSERT(end == hex + len);
1727 		RETURN_TOKEN(T_DNUMBER);
1728 	}
1729 }
1730 
1731 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1732 	if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1733 		char *end;
1734 		errno = 0;
1735 		ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1736 		if (errno == ERANGE) {
1737 			goto string;
1738 		}
1739 		ZEND_ASSERT(end == yytext + yyleng);
1740 	} else {
1741 string:
1742 		ZVAL_STRINGL(zendlval, yytext, yyleng);
1743 	}
1744 	RETURN_TOKEN(T_NUM_STRING);
1745 }
1746 
1747 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1748 	ZVAL_STRINGL(zendlval, yytext, yyleng);
1749 	RETURN_TOKEN(T_NUM_STRING);
1750 }
1751 
1752 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1753 	const char *end;
1754 
1755 	ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1756 	/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1757 	ZEND_ASSERT(end == yytext + yyleng);
1758 	RETURN_TOKEN(T_DNUMBER);
1759 }
1760 
1761 <ST_IN_SCRIPTING>"__CLASS__" {
1762 	RETURN_TOKEN(T_CLASS_C);
1763 }
1764 
1765 <ST_IN_SCRIPTING>"__TRAIT__" {
1766 	RETURN_TOKEN(T_TRAIT_C);
1767 }
1768 
1769 <ST_IN_SCRIPTING>"__FUNCTION__" {
1770 	RETURN_TOKEN(T_FUNC_C);
1771 }
1772 
1773 <ST_IN_SCRIPTING>"__METHOD__" {
1774 	RETURN_TOKEN(T_METHOD_C);
1775 }
1776 
1777 <ST_IN_SCRIPTING>"__LINE__" {
1778 	RETURN_TOKEN(T_LINE);
1779 }
1780 
1781 <ST_IN_SCRIPTING>"__FILE__" {
1782 	RETURN_TOKEN(T_FILE);
1783 }
1784 
1785 <ST_IN_SCRIPTING>"__DIR__" {
1786 	RETURN_TOKEN(T_DIR);
1787 }
1788 
1789 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1790 	RETURN_TOKEN(T_NS_C);
1791 }
1792 
1793 
1794 <INITIAL>"<?=" {
1795 	BEGIN(ST_IN_SCRIPTING);
1796 	RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1797 }
1798 
1799 
1800 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1801 	HANDLE_NEWLINE(yytext[yyleng-1]);
1802 	BEGIN(ST_IN_SCRIPTING);
1803 	RETURN_TOKEN(T_OPEN_TAG);
1804 }
1805 
1806 
1807 <INITIAL>"<?" {
1808 	if (CG(short_tags)) {
1809 		BEGIN(ST_IN_SCRIPTING);
1810 		RETURN_TOKEN(T_OPEN_TAG);
1811 	} else {
1812 		goto inline_char_handler;
1813 	}
1814 }
1815 
1816 <INITIAL>{ANY_CHAR} {
1817 	if (YYCURSOR > YYLIMIT) {
1818 		RETURN_TOKEN(END);
1819 	}
1820 
1821 inline_char_handler:
1822 
1823 	while (1) {
1824 		YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1825 
1826 		YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1827 
1828 		if (YYCURSOR >= YYLIMIT) {
1829 			break;
1830 		}
1831 
1832 		if (*YYCURSOR == '?') {
1833 			if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1834 
1835 				YYCURSOR--;
1836 				break;
1837 			}
1838 		}
1839 	}
1840 
1841 	yyleng = YYCURSOR - SCNG(yy_text);
1842 
1843 	if (SCNG(output_filter)) {
1844 		size_t readsize;
1845 		char *s = NULL;
1846 		size_t sz = 0;
1847 		// TODO: avoid reallocation ???
1848 		readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1849 		ZVAL_STRINGL(zendlval, s, sz);
1850 		efree(s);
1851 		if (readsize < yyleng) {
1852 			yyless(readsize);
1853 		}
1854 	} else {
1855 	  ZVAL_STRINGL(zendlval, yytext, yyleng);
1856 	}
1857 	HANDLE_NEWLINES(yytext, yyleng);
1858 	RETURN_TOKEN(T_INLINE_HTML);
1859 }
1860 
1861 
1862 /* Make sure a label character follows "->", otherwise there is no property
1863  * and "->" will be taken literally
1864  */
1865 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1866 	yyless(yyleng - 3);
1867 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
1868 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1869 	RETURN_TOKEN(T_VARIABLE);
1870 }
1871 
1872 /* A [ always designates a variable offset, regardless of what follows
1873  */
1874 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1875 	yyless(yyleng - 1);
1876 	yy_push_state(ST_VAR_OFFSET);
1877 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1878 	RETURN_TOKEN(T_VARIABLE);
1879 }
1880 
1881 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1882 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1883 	RETURN_TOKEN(T_VARIABLE);
1884 }
1885 
1886 <ST_VAR_OFFSET>"]" {
1887 	yy_pop_state();
1888 	RETURN_TOKEN(']');
1889 }
1890 
1891 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1892 	/* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1893 	RETURN_TOKEN(yytext[0]);
1894 }
1895 
1896 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1897 	/* Invalid rule to return a more explicit parse error with proper line number */
1898 	yyless(0);
1899 	yy_pop_state();
1900 	ZVAL_NULL(zendlval);
1901 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1902 }
1903 
1904 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1905 	zend_copy_value(zendlval, yytext, yyleng);
1906 	RETURN_TOKEN(T_STRING);
1907 }
1908 
1909 
1910 <ST_IN_SCRIPTING>"#"|"//" {
1911 	while (YYCURSOR < YYLIMIT) {
1912 		switch (*YYCURSOR++) {
1913 			case '\r':
1914 				if (*YYCURSOR == '\n') {
1915 					YYCURSOR++;
1916 				}
1917 				/* fall through */
1918 			case '\n':
1919 				CG(zend_lineno)++;
1920 				break;
1921 			case '?':
1922 				if (*YYCURSOR == '>') {
1923 					YYCURSOR--;
1924 					break;
1925 				}
1926 				/* fall through */
1927 			default:
1928 				continue;
1929 		}
1930 
1931 		break;
1932 	}
1933 
1934 	yyleng = YYCURSOR - SCNG(yy_text);
1935 
1936 	RETURN_TOKEN(T_COMMENT);
1937 }
1938 
1939 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1940 	int doc_com;
1941 
1942 	if (yyleng > 2) {
1943 		doc_com = 1;
1944 		RESET_DOC_COMMENT();
1945 	} else {
1946 		doc_com = 0;
1947 	}
1948 
1949 	while (YYCURSOR < YYLIMIT) {
1950 		if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1951 			break;
1952 		}
1953 	}
1954 
1955 	if (YYCURSOR < YYLIMIT) {
1956 		YYCURSOR++;
1957 	} else {
1958 		zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1959 	}
1960 
1961 	yyleng = YYCURSOR - SCNG(yy_text);
1962 	HANDLE_NEWLINES(yytext, yyleng);
1963 
1964 	if (doc_com) {
1965 		CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1966 		RETURN_TOKEN(T_DOC_COMMENT);
1967 	}
1968 
1969 	RETURN_TOKEN(T_COMMENT);
1970 }
1971 
1972 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1973 	BEGIN(INITIAL);
1974 	RETURN_TOKEN(T_CLOSE_TAG);  /* implicit ';' at php-end tag */
1975 }
1976 
1977 
1978 <ST_IN_SCRIPTING>b?['] {
1979 	register char *s, *t;
1980 	char *end;
1981 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
1982 
1983 	while (1) {
1984 		if (YYCURSOR < YYLIMIT) {
1985 			if (*YYCURSOR == '\'') {
1986 				YYCURSOR++;
1987 				yyleng = YYCURSOR - SCNG(yy_text);
1988 
1989 				break;
1990 			} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1991 				YYCURSOR++;
1992 			}
1993 		} else {
1994 			yyleng = YYLIMIT - SCNG(yy_text);
1995 
1996 			/* Unclosed single quotes; treat similar to double quotes, but without a separate token
1997 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1998 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1999 			ZVAL_NULL(zendlval);
2000 			RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2001 		}
2002 	}
2003 
2004 	ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
2005 
2006 	/* convert escape sequences */
2007 	s = t = Z_STRVAL_P(zendlval);
2008 	end = s+Z_STRLEN_P(zendlval);
2009 	while (s<end) {
2010 		if (*s=='\\') {
2011 			s++;
2012 
2013 			switch(*s) {
2014 				case '\\':
2015 				case '\'':
2016 					*t++ = *s;
2017 					Z_STRLEN_P(zendlval)--;
2018 					break;
2019 				default:
2020 					*t++ = '\\';
2021 					*t++ = *s;
2022 					break;
2023 			}
2024 		} else {
2025 			*t++ = *s;
2026 		}
2027 
2028 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2029 			CG(zend_lineno)++;
2030 		}
2031 		s++;
2032 	}
2033 	*t = 0;
2034 
2035 	if (SCNG(output_filter)) {
2036 		size_t sz = 0;
2037 		char *str = NULL;
2038 		s = Z_STRVAL_P(zendlval);
2039 		// TODO: avoid reallocation ???
2040 		SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2041 		ZVAL_STRINGL(zendlval, str, sz);
2042 	}
2043 	RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2044 }
2045 
2046 
2047 <ST_IN_SCRIPTING>b?["] {
2048 	int bprefix = (yytext[0] != '"') ? 1 : 0;
2049 
2050 	while (YYCURSOR < YYLIMIT) {
2051 		switch (*YYCURSOR++) {
2052 			case '"':
2053 				yyleng = YYCURSOR - SCNG(yy_text);
2054 				zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
2055 				RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2056 			case '$':
2057 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2058 					break;
2059 				}
2060 				continue;
2061 			case '{':
2062 				if (*YYCURSOR == '$') {
2063 					break;
2064 				}
2065 				continue;
2066 			case '\\':
2067 				if (YYCURSOR < YYLIMIT) {
2068 					YYCURSOR++;
2069 				}
2070 				/* fall through */
2071 			default:
2072 				continue;
2073 		}
2074 
2075 		YYCURSOR--;
2076 		break;
2077 	}
2078 
2079 	/* Remember how much was scanned to save rescanning */
2080 	SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2081 
2082 	YYCURSOR = SCNG(yy_text) + yyleng;
2083 
2084 	BEGIN(ST_DOUBLE_QUOTES);
2085 	RETURN_TOKEN('"');
2086 }
2087 
2088 
2089 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2090 	char *s;
2091 	int bprefix = (yytext[0] != '<') ? 1 : 0;
2092 	zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2093 
2094 	CG(zend_lineno)++;
2095 	heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2096 	s = yytext+bprefix+3;
2097 	while ((*s == ' ') || (*s == '\t')) {
2098 		s++;
2099 		heredoc_label->length--;
2100 	}
2101 
2102 	if (*s == '\'') {
2103 		s++;
2104 		heredoc_label->length -= 2;
2105 
2106 		BEGIN(ST_NOWDOC);
2107 	} else {
2108 		if (*s == '"') {
2109 			s++;
2110 			heredoc_label->length -= 2;
2111 		}
2112 
2113 		BEGIN(ST_HEREDOC);
2114 	}
2115 
2116 	heredoc_label->label = estrndup(s, heredoc_label->length);
2117 
2118 	/* Check for ending label on the next line */
2119 	if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2120 		YYCTYPE *end = YYCURSOR + heredoc_label->length;
2121 
2122 		if (*end == ';') {
2123 			end++;
2124 		}
2125 
2126 		if (*end == '\n' || *end == '\r') {
2127 			BEGIN(ST_END_HEREDOC);
2128 		}
2129 	}
2130 
2131 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2132 
2133 	RETURN_TOKEN(T_START_HEREDOC);
2134 }
2135 
2136 
2137 <ST_IN_SCRIPTING>[`] {
2138 	BEGIN(ST_BACKQUOTE);
2139 	RETURN_TOKEN('`');
2140 }
2141 
2142 
2143 <ST_END_HEREDOC>{ANY_CHAR} {
2144 	zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2145 
2146 	YYCURSOR += heredoc_label->length - 1;
2147 	yyleng = heredoc_label->length;
2148 
2149 	heredoc_label_dtor(heredoc_label);
2150 	efree(heredoc_label);
2151 
2152 	BEGIN(ST_IN_SCRIPTING);
2153 	RETURN_TOKEN(T_END_HEREDOC);
2154 }
2155 
2156 
2157 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2158 	Z_LVAL_P(zendlval) = (zend_long) '{';
2159 	yy_push_state(ST_IN_SCRIPTING);
2160 	yyless(1);
2161 	RETURN_TOKEN(T_CURLY_OPEN);
2162 }
2163 
2164 
2165 <ST_DOUBLE_QUOTES>["] {
2166 	BEGIN(ST_IN_SCRIPTING);
2167 	RETURN_TOKEN('"');
2168 }
2169 
2170 <ST_BACKQUOTE>[`] {
2171 	BEGIN(ST_IN_SCRIPTING);
2172 	RETURN_TOKEN('`');
2173 }
2174 
2175 
2176 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2177 	if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2178 		YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2179 		SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2180 
2181 		goto double_quotes_scan_done;
2182 	}
2183 
2184 	if (YYCURSOR > YYLIMIT) {
2185 		RETURN_TOKEN(END);
2186 	}
2187 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2188 		YYCURSOR++;
2189 	}
2190 
2191 	while (YYCURSOR < YYLIMIT) {
2192 		switch (*YYCURSOR++) {
2193 			case '"':
2194 				break;
2195 			case '$':
2196 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2197 					break;
2198 				}
2199 				continue;
2200 			case '{':
2201 				if (*YYCURSOR == '$') {
2202 					break;
2203 				}
2204 				continue;
2205 			case '\\':
2206 				if (YYCURSOR < YYLIMIT) {
2207 					YYCURSOR++;
2208 				}
2209 				/* fall through */
2210 			default:
2211 				continue;
2212 		}
2213 
2214 		YYCURSOR--;
2215 		break;
2216 	}
2217 
2218 double_quotes_scan_done:
2219 	yyleng = YYCURSOR - SCNG(yy_text);
2220 
2221 	zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2222 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2223 }
2224 
2225 
2226 <ST_BACKQUOTE>{ANY_CHAR} {
2227 	if (YYCURSOR > YYLIMIT) {
2228 		RETURN_TOKEN(END);
2229 	}
2230 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2231 		YYCURSOR++;
2232 	}
2233 
2234 	while (YYCURSOR < YYLIMIT) {
2235 		switch (*YYCURSOR++) {
2236 			case '`':
2237 				break;
2238 			case '$':
2239 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2240 					break;
2241 				}
2242 				continue;
2243 			case '{':
2244 				if (*YYCURSOR == '$') {
2245 					break;
2246 				}
2247 				continue;
2248 			case '\\':
2249 				if (YYCURSOR < YYLIMIT) {
2250 					YYCURSOR++;
2251 				}
2252 				/* fall through */
2253 			default:
2254 				continue;
2255 		}
2256 
2257 		YYCURSOR--;
2258 		break;
2259 	}
2260 
2261 	yyleng = YYCURSOR - SCNG(yy_text);
2262 
2263 	zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2264 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2265 }
2266 
2267 
2268 <ST_HEREDOC>{ANY_CHAR} {
2269 	int newline = 0;
2270 
2271 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2272 
2273 	if (YYCURSOR > YYLIMIT) {
2274 		RETURN_TOKEN(END);
2275 	}
2276 
2277 	YYCURSOR--;
2278 
2279 	while (YYCURSOR < YYLIMIT) {
2280 		switch (*YYCURSOR++) {
2281 			case '\r':
2282 				if (*YYCURSOR == '\n') {
2283 					YYCURSOR++;
2284 				}
2285 				/* fall through */
2286 			case '\n':
2287 				/* Check for ending label on the next line */
2288 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2289 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2290 
2291 					if (*end == ';') {
2292 						end++;
2293 					}
2294 
2295 					if (*end == '\n' || *end == '\r') {
2296 						/* newline before label will be subtracted from returned text, but
2297 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2298 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2299 							newline = 2; /* Windows newline */
2300 						} else {
2301 							newline = 1;
2302 						}
2303 
2304 						CG(increment_lineno) = 1; /* For newline before label */
2305 						BEGIN(ST_END_HEREDOC);
2306 
2307 						goto heredoc_scan_done;
2308 					}
2309 				}
2310 				continue;
2311 			case '$':
2312 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2313 					break;
2314 				}
2315 				continue;
2316 			case '{':
2317 				if (*YYCURSOR == '$') {
2318 					break;
2319 				}
2320 				continue;
2321 			case '\\':
2322 				if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2323 					YYCURSOR++;
2324 				}
2325 				/* fall through */
2326 			default:
2327 				continue;
2328 		}
2329 
2330 		YYCURSOR--;
2331 		break;
2332 	}
2333 
2334 heredoc_scan_done:
2335 	yyleng = YYCURSOR - SCNG(yy_text);
2336 
2337 	zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2338 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2339 }
2340 
2341 
2342 <ST_NOWDOC>{ANY_CHAR} {
2343 	int newline = 0;
2344 
2345 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2346 
2347 	if (YYCURSOR > YYLIMIT) {
2348 		RETURN_TOKEN(END);
2349 	}
2350 
2351 	YYCURSOR--;
2352 
2353 	while (YYCURSOR < YYLIMIT) {
2354 		switch (*YYCURSOR++) {
2355 			case '\r':
2356 				if (*YYCURSOR == '\n') {
2357 					YYCURSOR++;
2358 				}
2359 				/* fall through */
2360 			case '\n':
2361 				/* Check for ending label on the next line */
2362 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2363 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2364 
2365 					if (*end == ';') {
2366 						end++;
2367 					}
2368 
2369 					if (*end == '\n' || *end == '\r') {
2370 						/* newline before label will be subtracted from returned text, but
2371 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2372 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2373 							newline = 2; /* Windows newline */
2374 						} else {
2375 							newline = 1;
2376 						}
2377 
2378 						CG(increment_lineno) = 1; /* For newline before label */
2379 						BEGIN(ST_END_HEREDOC);
2380 
2381 						goto nowdoc_scan_done;
2382 					}
2383 				}
2384 				/* fall through */
2385 			default:
2386 				continue;
2387 		}
2388 	}
2389 
2390 nowdoc_scan_done:
2391 	yyleng = YYCURSOR - SCNG(yy_text);
2392 
2393 	zend_copy_value(zendlval, yytext, yyleng - newline);
2394 	HANDLE_NEWLINES(yytext, yyleng - newline);
2395 	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2396 }
2397 
2398 
2399 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2400 	if (YYCURSOR > YYLIMIT) {
2401 		RETURN_TOKEN(END);
2402 	}
2403 
2404 	zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2405 	goto restart;
2406 }
2407 
2408 */
2409 }
2410