xref: /PHP-5.6/Zend/zend_language_scanner.l (revision 3537e95d)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2016 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Marcus Boerger <helly@php.net>                              |
16    |          Nuno Lopes <nlopess@php.net>                                |
17    |          Scott MacVicar <scottmac@php.net>                           |
18    | Flex version authors:                                                |
19    |          Andi Gutmans <andi@zend.com>                                |
20    |          Zeev Suraski <zeev@zend.com>                                |
21    +----------------------------------------------------------------------+
22 */
23 
24 /* $Id$ */
25 
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31 
32 #include "zend_language_scanner_defs.h"
33 
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef PHP_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52 
53 #define YYCTYPE   unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR  SCNG(yy_cursor)
56 #define YYLIMIT   SCNG(yy_limit)
57 #define YYMARKER  SCNG(yy_marker)
58 
59 #define YYGETCONDITION()  SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61 
62 #define STATE(name)  yyc##name
63 
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE      YYGETCONDITION()
67 #define yytext       ((char*)SCNG(yy_text))
68 #define yyleng       SCNG(yy_leng)
69 #define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                           yyleng   = (unsigned int)x; } while(0)
71 #define yymore()     goto yymore_restart
72 
73 /* perform sanity check. If this message is triggered you should
74    increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79 
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83 
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87 
88 /* Globals Macros */
89 #define SCNG	LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95 
96 #define HANDLE_NEWLINES(s, l)													\
97 do {																			\
98 	char *p = (s), *boundary = p+(l);											\
99 																				\
100 	while (p<boundary) {														\
101 		if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {					\
102 			CG(zend_lineno)++;													\
103 		}																		\
104 		p++;																	\
105 	}																			\
106 } while (0)
107 
108 #define HANDLE_NEWLINE(c) \
109 { \
110 	if (c == '\n' || c == '\r') { \
111 		CG(zend_lineno)++; \
112 	} \
113 }
114 
115 /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    CG(doc_comment_len)
118 
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120 
121 #define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123 
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125 
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127 {
128 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129 	assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131 }
132 
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134 {
135 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136 }
137 
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139 {
140 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142 }
143 
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145 {
146 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147 	assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150 }
151 
152 
_yy_push_state(int new_state TSRMLS_DC)153 static void _yy_push_state(int new_state TSRMLS_DC)
154 {
155 	zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156 	YYSETCONDITION(new_state);
157 }
158 
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160 
yy_pop_state(TSRMLS_D)161 static void yy_pop_state(TSRMLS_D)
162 {
163 	int *stack_state;
164 	zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165 	YYSETCONDITION(*stack_state);
166 	zend_stack_del_top(&SCNG(state_stack));
167 }
168 
yy_scan_buffer(char * str,unsigned int len TSRMLS_DC)169 static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170 {
171 	YYCURSOR       = (YYCTYPE*)str;
172 	YYLIMIT        = YYCURSOR + len;
173 	if (!SCNG(yy_start)) {
174 		SCNG(yy_start) = YYCURSOR;
175 	}
176 }
177 
startup_scanner(TSRMLS_D)178 void startup_scanner(TSRMLS_D)
179 {
180 	CG(parse_error) = 0;
181 	CG(doc_comment) = NULL;
182 	CG(doc_comment_len) = 0;
183 	zend_stack_init(&SCNG(state_stack));
184 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
185 }
186 
heredoc_label_dtor(zend_heredoc_label * heredoc_label)187 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
188     efree(heredoc_label->label);
189 }
190 
shutdown_scanner(TSRMLS_D)191 void shutdown_scanner(TSRMLS_D)
192 {
193 	CG(parse_error) = 0;
194 	RESET_DOC_COMMENT();
195 	zend_stack_destroy(&SCNG(state_stack));
196 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
197 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
198 }
199 
zend_save_lexical_state(zend_lex_state * lex_state TSRMLS_DC)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
201 {
202 	lex_state->yy_leng   = SCNG(yy_leng);
203 	lex_state->yy_start  = SCNG(yy_start);
204 	lex_state->yy_text   = SCNG(yy_text);
205 	lex_state->yy_cursor = SCNG(yy_cursor);
206 	lex_state->yy_marker = SCNG(yy_marker);
207 	lex_state->yy_limit  = SCNG(yy_limit);
208 
209 	lex_state->state_stack = SCNG(state_stack);
210 	zend_stack_init(&SCNG(state_stack));
211 
212 	lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214 
215 	lex_state->in = SCNG(yy_in);
216 	lex_state->yy_state = YYSTATE;
217 	lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
218 	lex_state->lineno = CG(zend_lineno);
219 
220 	lex_state->script_org = SCNG(script_org);
221 	lex_state->script_org_size = SCNG(script_org_size);
222 	lex_state->script_filtered = SCNG(script_filtered);
223 	lex_state->script_filtered_size = SCNG(script_filtered_size);
224 	lex_state->input_filter = SCNG(input_filter);
225 	lex_state->output_filter = SCNG(output_filter);
226 	lex_state->script_encoding = SCNG(script_encoding);
227 }
228 
zend_restore_lexical_state(zend_lex_state * lex_state TSRMLS_DC)229 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
230 {
231 	SCNG(yy_leng)   = lex_state->yy_leng;
232 	SCNG(yy_start)  = lex_state->yy_start;
233 	SCNG(yy_text)   = lex_state->yy_text;
234 	SCNG(yy_cursor) = lex_state->yy_cursor;
235 	SCNG(yy_marker) = lex_state->yy_marker;
236 	SCNG(yy_limit)  = lex_state->yy_limit;
237 
238 	zend_stack_destroy(&SCNG(state_stack));
239 	SCNG(state_stack) = lex_state->state_stack;
240 
241 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
242 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
243 	SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
244 
245 	SCNG(yy_in) = lex_state->in;
246 	YYSETCONDITION(lex_state->yy_state);
247 	CG(zend_lineno) = lex_state->lineno;
248 	zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
249 
250 	if (SCNG(script_filtered)) {
251 		efree(SCNG(script_filtered));
252 		SCNG(script_filtered) = NULL;
253 	}
254 	SCNG(script_org) = lex_state->script_org;
255 	SCNG(script_org_size) = lex_state->script_org_size;
256 	SCNG(script_filtered) = lex_state->script_filtered;
257 	SCNG(script_filtered_size) = lex_state->script_filtered_size;
258 	SCNG(input_filter) = lex_state->input_filter;
259 	SCNG(output_filter) = lex_state->output_filter;
260 	SCNG(script_encoding) = lex_state->script_encoding;
261 
262 	RESET_DOC_COMMENT();
263 }
264 
zend_destroy_file_handle(zend_file_handle * file_handle TSRMLS_DC)265 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
266 {
267 	zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
268 	/* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
269 	file_handle->opened_path = NULL;
270 	if (file_handle->free_filename) {
271 		file_handle->filename = NULL;
272 	}
273 }
274 
275 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
276 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
277 #define	BOM_UTF16_BE	"\xfe\xff"
278 #define	BOM_UTF16_LE	"\xff\xfe"
279 #define	BOM_UTF8		"\xef\xbb\xbf"
280 
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size TSRMLS_DC)281 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
282 {
283 	const unsigned char *p;
284 	int wchar_size = 2;
285 	int le = 0;
286 
287 	/* utf-16 or utf-32? */
288 	p = script;
289 	while ((p-script) < script_size) {
290 		p = memchr(p, 0, script_size-(p-script)-2);
291 		if (!p) {
292 			break;
293 		}
294 		if (*(p+1) == '\0' && *(p+2) == '\0') {
295 			wchar_size = 4;
296 			break;
297 		}
298 
299 		/* searching for UTF-32 specific byte orders, so this will do */
300 		p += 4;
301 	}
302 
303 	/* BE or LE? */
304 	p = script;
305 	while ((p-script) < script_size) {
306 		if (*p == '\0' && *(p+wchar_size-1) != '\0') {
307 			/* BE */
308 			le = 0;
309 			break;
310 		} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
311 			/* LE* */
312 			le = 1;
313 			break;
314 		}
315 		p += wchar_size;
316 	}
317 
318 	if (wchar_size == 2) {
319 		return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
320 	} else {
321 		return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
322 	}
323 
324 	return NULL;
325 }
326 
zend_multibyte_detect_unicode(TSRMLS_D)327 static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
328 {
329 	const zend_encoding *script_encoding = NULL;
330 	int bom_size;
331 	unsigned char *pos1, *pos2;
332 
333 	if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
334 		return NULL;
335 	}
336 
337 	/* check out BOM */
338 	if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
339 		script_encoding = zend_multibyte_encoding_utf32be;
340 		bom_size = sizeof(BOM_UTF32_BE)-1;
341 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
342 		script_encoding = zend_multibyte_encoding_utf32le;
343 		bom_size = sizeof(BOM_UTF32_LE)-1;
344 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
345 		script_encoding = zend_multibyte_encoding_utf16be;
346 		bom_size = sizeof(BOM_UTF16_BE)-1;
347 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
348 		script_encoding = zend_multibyte_encoding_utf16le;
349 		bom_size = sizeof(BOM_UTF16_LE)-1;
350 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
351 		script_encoding = zend_multibyte_encoding_utf8;
352 		bom_size = sizeof(BOM_UTF8)-1;
353 	}
354 
355 	if (script_encoding) {
356 		/* remove BOM */
357 		LANG_SCNG(script_org) += bom_size;
358 		LANG_SCNG(script_org_size) -= bom_size;
359 
360 		return script_encoding;
361 	}
362 
363 	/* script contains NULL bytes -> auto-detection */
364 	if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
365 		/* check if the NULL byte is after the __HALT_COMPILER(); */
366 		pos2 = LANG_SCNG(script_org);
367 
368 		while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
369 			pos2 = memchr(pos2, '_', pos1 - pos2);
370 			if (!pos2) break;
371 			pos2++;
372 			if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
373 				pos2 += sizeof("_HALT_COMPILER")-1;
374 				while (*pos2 == ' '  ||
375 					   *pos2 == '\t' ||
376 					   *pos2 == '\r' ||
377 					   *pos2 == '\n') {
378 					pos2++;
379 				}
380 				if (*pos2 == '(') {
381 					pos2++;
382 					while (*pos2 == ' '  ||
383 						   *pos2 == '\t' ||
384 						   *pos2 == '\r' ||
385 						   *pos2 == '\n') {
386 						pos2++;
387 					}
388 					if (*pos2 == ')') {
389 						pos2++;
390 						while (*pos2 == ' '  ||
391 							   *pos2 == '\t' ||
392 							   *pos2 == '\r' ||
393 							   *pos2 == '\n') {
394 							pos2++;
395 						}
396 						if (*pos2 == ';') {
397 							return NULL;
398 						}
399 					}
400 				}
401 			}
402 		}
403 		/* make best effort if BOM is missing */
404 		return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
405 	}
406 
407 	return NULL;
408 }
409 
zend_multibyte_find_script_encoding(TSRMLS_D)410 static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
411 {
412 	const zend_encoding *script_encoding;
413 
414 	if (CG(detect_unicode)) {
415 		/* check out bom(byte order mark) and see if containing wchars */
416 		script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
417 		if (script_encoding != NULL) {
418 			/* bom or wchar detection is prior to 'script_encoding' option */
419 			return script_encoding;
420 		}
421 	}
422 
423 	/* if no script_encoding specified, just leave alone */
424 	if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
425 		return NULL;
426 	}
427 
428 	/* if multiple encodings specified, detect automagically */
429 	if (CG(script_encoding_list_size) > 1) {
430 		return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
431 	}
432 
433 	return CG(script_encoding_list)[0];
434 }
435 
zend_multibyte_set_filter(const zend_encoding * onetime_encoding TSRMLS_DC)436 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
437 {
438 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
439 	const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
440 
441 	if (!script_encoding) {
442 		return FAILURE;
443 	}
444 
445 	/* judge input/output filter */
446 	LANG_SCNG(script_encoding) = script_encoding;
447 	LANG_SCNG(input_filter) = NULL;
448 	LANG_SCNG(output_filter) = NULL;
449 
450 	if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
451 		if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
452 			/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
453 			LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
454 			LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
455 		} else {
456 			LANG_SCNG(input_filter) = NULL;
457 			LANG_SCNG(output_filter) = NULL;
458 		}
459 		return SUCCESS;
460 	}
461 
462 	if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
463 		LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
464 		LANG_SCNG(output_filter) = NULL;
465 	} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
466 		LANG_SCNG(input_filter) = NULL;
467 		LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
468 	} else {
469 		/* both script and internal encodings are incompatible w/ flex */
470 		LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
471 		LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
472 	}
473 
474 	return 0;
475 }
476 
open_file_for_scanning(zend_file_handle * file_handle TSRMLS_DC)477 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
478 {
479 	const char *file_path = NULL;
480 	char *buf;
481 	size_t size, offset = 0;
482 
483 	/* The shebang line was read, get the current position to obtain the buffer start */
484 	if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
485 		if ((offset = ftell(file_handle->handle.fp)) == -1) {
486 			offset = 0;
487 		}
488 	}
489 
490 	if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
491 		return FAILURE;
492 	}
493 
494 	zend_llist_add_element(&CG(open_files), file_handle);
495 	if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
496 		zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
497 		size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
498 		fh->handle.stream.handle = (void*)(((char*)fh) + diff);
499 		file_handle->handle.stream.handle = fh->handle.stream.handle;
500 	}
501 
502 	/* Reset the scanner for scanning the new file */
503 	SCNG(yy_in) = file_handle;
504 	SCNG(yy_start) = NULL;
505 
506 	if (size != -1) {
507 		if (CG(multibyte)) {
508 			SCNG(script_org) = (unsigned char*)buf;
509 			SCNG(script_org_size) = size;
510 			SCNG(script_filtered) = NULL;
511 
512 			zend_multibyte_set_filter(NULL TSRMLS_CC);
513 
514 			if (SCNG(input_filter)) {
515 				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
516 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
517 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
518 				}
519 				buf = (char*)SCNG(script_filtered);
520 				size = SCNG(script_filtered_size);
521 			}
522 		}
523 		SCNG(yy_start) = (unsigned char *)buf - offset;
524 		yy_scan_buffer(buf, size TSRMLS_CC);
525 	} else {
526 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
527 	}
528 
529 	BEGIN(INITIAL);
530 
531 	if (file_handle->opened_path) {
532 		file_path = file_handle->opened_path;
533 	} else {
534 		file_path = file_handle->filename;
535 	}
536 
537 	zend_set_compiled_filename(file_path TSRMLS_CC);
538 
539 	if (CG(start_lineno)) {
540 		CG(zend_lineno) = CG(start_lineno);
541 		CG(start_lineno) = 0;
542 	} else {
543 		CG(zend_lineno) = 1;
544 	}
545 
546 	RESET_DOC_COMMENT();
547 	CG(increment_lineno) = 0;
548 	return SUCCESS;
549 }
END_EXTERN_C()550 END_EXTERN_C()
551 
552 
553 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
554 {
555 	zend_lex_state original_lex_state;
556 	zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
557 	zend_op_array *original_active_op_array = CG(active_op_array);
558 	zend_op_array *retval=NULL;
559 	int compiler_result;
560 	zend_bool compilation_successful=0;
561 	znode retval_znode;
562 	zend_bool original_in_compilation = CG(in_compilation);
563 
564 	retval_znode.op_type = IS_CONST;
565 	INIT_PZVAL(&retval_znode.u.constant);
566 	ZVAL_LONG(&retval_znode.u.constant, 1);
567 
568 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
569 
570 	retval = op_array; /* success oriented */
571 
572 	if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
573 		if (type==ZEND_REQUIRE) {
574 			zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
575 			zend_bailout();
576 		} else {
577 			zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
578 		}
579 		compilation_successful=0;
580 	} else {
581 		init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
582 		CG(in_compilation) = 1;
583 		CG(active_op_array) = op_array;
584 		zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
585 		zend_init_compiler_context(TSRMLS_C);
586 		compiler_result = zendparse(TSRMLS_C);
587 		zend_do_return(&retval_znode, 0 TSRMLS_CC);
588 		CG(in_compilation) = original_in_compilation;
589 		if (compiler_result != 0) { /* parser error */
590 			zend_bailout();
591 		}
592 		compilation_successful=1;
593 	}
594 
595 	if (retval) {
596 		CG(active_op_array) = original_active_op_array;
597 		if (compilation_successful) {
598 			pass_two(op_array TSRMLS_CC);
599 			zend_release_labels(0 TSRMLS_CC);
600 		} else {
601 			efree(op_array);
602 			retval = NULL;
603 		}
604 	}
605 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
606 	return retval;
607 }
608 
609 
compile_filename(int type,zval * filename TSRMLS_DC)610 zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
611 {
612 	zend_file_handle file_handle;
613 	zval tmp;
614 	zend_op_array *retval;
615 	char *opened_path = NULL;
616 
617 	if (filename->type != IS_STRING) {
618 		tmp = *filename;
619 		zval_copy_ctor(&tmp);
620 		convert_to_string(&tmp);
621 		filename = &tmp;
622 	}
623 	file_handle.filename = Z_STRVAL_P(filename);
624 	file_handle.free_filename = 0;
625 	file_handle.type = ZEND_HANDLE_FILENAME;
626 	file_handle.opened_path = NULL;
627 	file_handle.handle.fp = NULL;
628 
629 	retval = zend_compile_file(&file_handle, type TSRMLS_CC);
630 	if (retval && file_handle.handle.stream.handle) {
631 		int dummy = 1;
632 
633 		if (!file_handle.opened_path) {
634 			file_handle.opened_path = opened_path = estrndup(Z_STRVAL_P(filename), Z_STRLEN_P(filename));
635 		}
636 
637 		zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
638 
639 		if (opened_path) {
640 			efree(opened_path);
641 		}
642 	}
643 	zend_destroy_file_handle(&file_handle TSRMLS_CC);
644 
645 	if (filename==&tmp) {
646 		zval_dtor(&tmp);
647 	}
648 	return retval;
649 }
650 
zend_prepare_string_for_scanning(zval * str,char * filename TSRMLS_DC)651 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
652 {
653 	char *buf;
654 	size_t size;
655 
656 	/* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
657 	Z_STRVAL_P(str) = str_erealloc(Z_STRVAL_P(str), Z_STRLEN_P(str) + ZEND_MMAP_AHEAD);
658 	memset(Z_STRVAL_P(str) + Z_STRLEN_P(str), 0, ZEND_MMAP_AHEAD);
659 
660 	SCNG(yy_in) = NULL;
661 	SCNG(yy_start) = NULL;
662 
663 	buf = Z_STRVAL_P(str);
664 	size = Z_STRLEN_P(str);
665 
666 	if (CG(multibyte)) {
667 		SCNG(script_org) = (unsigned char*)buf;
668 		SCNG(script_org_size) = size;
669 		SCNG(script_filtered) = NULL;
670 
671 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
672 
673 		if (SCNG(input_filter)) {
674 			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
675 				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
676 						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
677 			}
678 			buf = (char*)SCNG(script_filtered);
679 			size = SCNG(script_filtered_size);
680 		}
681 	}
682 
683 	yy_scan_buffer(buf, size TSRMLS_CC);
684 
685 	zend_set_compiled_filename(filename TSRMLS_CC);
686 	CG(zend_lineno) = 1;
687 	CG(increment_lineno) = 0;
688 	RESET_DOC_COMMENT();
689 	return SUCCESS;
690 }
691 
692 
zend_get_scanned_file_offset(TSRMLS_D)693 ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
694 {
695 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
696 	if (SCNG(input_filter)) {
697 		size_t original_offset = offset, length = 0;
698 		do {
699 			unsigned char *p = NULL;
700 			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
701 				return (size_t)-1;
702 			}
703 			efree(p);
704 			if (length > original_offset) {
705 				offset--;
706 			} else if (length < original_offset) {
707 				offset++;
708 			}
709 		} while (original_offset != length);
710 	}
711 	return offset;
712 }
713 
714 
compile_string(zval * source_string,char * filename TSRMLS_DC)715 zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
716 {
717 	zend_lex_state original_lex_state;
718 	zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
719 	zend_op_array *original_active_op_array = CG(active_op_array);
720 	zend_op_array *retval;
721 	zval tmp;
722 	int compiler_result;
723 	zend_bool original_in_compilation = CG(in_compilation);
724 
725 	if (Z_STRLEN_P(source_string)==0) {
726 		efree(op_array);
727 		return NULL;
728 	}
729 
730 	CG(in_compilation) = 1;
731 
732 	tmp = *source_string;
733 	zval_copy_ctor(&tmp);
734 	convert_to_string(&tmp);
735 	source_string = &tmp;
736 
737 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
738 	if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
739 		efree(op_array);
740 		retval = NULL;
741 	} else {
742 		zend_bool orig_interactive = CG(interactive);
743 
744 		CG(interactive) = 0;
745 		init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
746 		CG(interactive) = orig_interactive;
747 		CG(active_op_array) = op_array;
748 		zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
749 		zend_init_compiler_context(TSRMLS_C);
750 		BEGIN(ST_IN_SCRIPTING);
751 		compiler_result = zendparse(TSRMLS_C);
752 
753 		if (SCNG(script_filtered)) {
754 			efree(SCNG(script_filtered));
755 			SCNG(script_filtered) = NULL;
756 		}
757 
758 		if (compiler_result != 0) {
759 			CG(active_op_array) = original_active_op_array;
760 			CG(unclean_shutdown)=1;
761 			destroy_op_array(op_array TSRMLS_CC);
762 			efree(op_array);
763 			retval = NULL;
764 		} else {
765 			zend_do_return(NULL, 0 TSRMLS_CC);
766 			CG(active_op_array) = original_active_op_array;
767 			pass_two(op_array TSRMLS_CC);
768 			zend_release_labels(0 TSRMLS_CC);
769 			retval = op_array;
770 		}
771 	}
772 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
773 	zval_dtor(&tmp);
774 	CG(in_compilation) = original_in_compilation;
775 	return retval;
776 }
777 
778 
BEGIN_EXTERN_C()779 BEGIN_EXTERN_C()
780 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
781 {
782 	zend_lex_state original_lex_state;
783 	zend_file_handle file_handle;
784 
785 	file_handle.type = ZEND_HANDLE_FILENAME;
786 	file_handle.filename = filename;
787 	file_handle.free_filename = 0;
788 	file_handle.opened_path = NULL;
789 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
790 	if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
791 		zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
792 		zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
793 		return FAILURE;
794 	}
795 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
796 	if (SCNG(script_filtered)) {
797 		efree(SCNG(script_filtered));
798 		SCNG(script_filtered) = NULL;
799 	}
800 	zend_destroy_file_handle(&file_handle TSRMLS_CC);
801 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
802 	return SUCCESS;
803 }
804 
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name TSRMLS_DC)805 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
806 {
807 	zend_lex_state original_lex_state;
808 	zval tmp = *str;
809 
810 	str = &tmp;
811 	zval_copy_ctor(str);
812 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
813 	if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
814 		zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
815 		return FAILURE;
816 	}
817 	BEGIN(INITIAL);
818 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
819 	if (SCNG(script_filtered)) {
820 		efree(SCNG(script_filtered));
821 		SCNG(script_filtered) = NULL;
822 	}
823 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
824 	zval_dtor(str);
825 	return SUCCESS;
826 }
827 
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding TSRMLS_DC)828 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
829 {
830 	size_t length;
831 	unsigned char *new_yy_start;
832 
833 	/* convert and set */
834 	if (!SCNG(input_filter)) {
835 		if (SCNG(script_filtered)) {
836 			efree(SCNG(script_filtered));
837 			SCNG(script_filtered) = NULL;
838 		}
839 		SCNG(script_filtered_size) = 0;
840 		length = SCNG(script_org_size);
841 		new_yy_start = SCNG(script_org);
842 	} else {
843 		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
844 			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
845 					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
846 		}
847 		SCNG(script_filtered) = new_yy_start;
848 		SCNG(script_filtered_size) = length;
849 	}
850 
851 	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
852 	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
853 	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
854 	SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
855 
856 	SCNG(yy_start) = new_yy_start;
857 }
858 
859 
860 # define zend_copy_value(zendlval, yytext, yyleng) \
861 	if (SCNG(output_filter)) { \
862 		size_t sz = 0; \
863 		SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
864 		Z_STRLEN_P(zendlval) = sz; \
865 	} else { \
866 		Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng); \
867 		Z_STRLEN_P(zendlval) = yyleng; \
868 	}
869 
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type TSRMLS_DC)870 static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
871 {
872 	register char *s, *t;
873 	char *end;
874 
875 	ZVAL_STRINGL(zendlval, str, len, 1);
876 
877 	/* convert escape sequences */
878 	s = t = Z_STRVAL_P(zendlval);
879 	end = s+Z_STRLEN_P(zendlval);
880 	while (s<end) {
881 		if (*s=='\\') {
882 			s++;
883 			if (s >= end) {
884 				*t++ = '\\';
885 				break;
886 			}
887 
888 			switch(*s) {
889 				case 'n':
890 					*t++ = '\n';
891 					Z_STRLEN_P(zendlval)--;
892 					break;
893 				case 'r':
894 					*t++ = '\r';
895 					Z_STRLEN_P(zendlval)--;
896 					break;
897 				case 't':
898 					*t++ = '\t';
899 					Z_STRLEN_P(zendlval)--;
900 					break;
901 				case 'f':
902 					*t++ = '\f';
903 					Z_STRLEN_P(zendlval)--;
904 					break;
905 				case 'v':
906 					*t++ = '\v';
907 					Z_STRLEN_P(zendlval)--;
908 					break;
909 				case 'e':
910 #ifdef PHP_WIN32
911 					*t++ = VK_ESCAPE;
912 #else
913 					*t++ = '\e';
914 #endif
915 					Z_STRLEN_P(zendlval)--;
916 					break;
917 				case '"':
918 				case '`':
919 					if (*s != quote_type) {
920 						*t++ = '\\';
921 						*t++ = *s;
922 						break;
923 					}
924 				case '\\':
925 				case '$':
926 					*t++ = *s;
927 					Z_STRLEN_P(zendlval)--;
928 					break;
929 				case 'x':
930 				case 'X':
931 					if (ZEND_IS_HEX(*(s+1))) {
932 						char hex_buf[3] = { 0, 0, 0 };
933 
934 						Z_STRLEN_P(zendlval)--; /* for the 'x' */
935 
936 						hex_buf[0] = *(++s);
937 						Z_STRLEN_P(zendlval)--;
938 						if (ZEND_IS_HEX(*(s+1))) {
939 							hex_buf[1] = *(++s);
940 							Z_STRLEN_P(zendlval)--;
941 						}
942 						*t++ = (char) strtol(hex_buf, NULL, 16);
943 					} else {
944 						*t++ = '\\';
945 						*t++ = *s;
946 					}
947 					break;
948 				default:
949 					/* check for an octal */
950 					if (ZEND_IS_OCT(*s)) {
951 						char octal_buf[4] = { 0, 0, 0, 0 };
952 
953 						octal_buf[0] = *s;
954 						Z_STRLEN_P(zendlval)--;
955 						if (ZEND_IS_OCT(*(s+1))) {
956 							octal_buf[1] = *(++s);
957 							Z_STRLEN_P(zendlval)--;
958 							if (ZEND_IS_OCT(*(s+1))) {
959 								octal_buf[2] = *(++s);
960 								Z_STRLEN_P(zendlval)--;
961 							}
962 						}
963 						*t++ = (char) strtol(octal_buf, NULL, 8);
964 					} else {
965 						*t++ = '\\';
966 						*t++ = *s;
967 					}
968 					break;
969 			}
970 		} else {
971 			*t++ = *s;
972 		}
973 
974 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
975 			CG(zend_lineno)++;
976 		}
977 		s++;
978 	}
979 	*t = 0;
980 	if (SCNG(output_filter)) {
981 		size_t sz = 0;
982 		s = Z_STRVAL_P(zendlval);
983 		SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
984 		Z_STRLEN_P(zendlval) = sz;
985 		efree(s);
986 	}
987 }
988 
989 
lex_scan(zval * zendlval TSRMLS_DC)990 int lex_scan(zval *zendlval TSRMLS_DC)
991 {
992 restart:
993 	SCNG(yy_text) = YYCURSOR;
994 
995 yymore_restart:
996 
997 /*!re2c
998 re2c:yyfill:check = 0;
999 LNUM	[0-9]+
1000 DNUM	([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1001 EXPONENT_DNUM	(({LNUM}|{DNUM})[eE][+-]?{LNUM})
1002 HNUM	"0x"[0-9a-fA-F]+
1003 BNUM	"0b"[01]+
1004 LABEL	[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1005 WHITESPACE [ \n\r\t]+
1006 TABS_AND_SPACES [ \t]*
1007 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1008 ANY_CHAR [^]
1009 NEWLINE ("\r"|"\n"|"\r\n")
1010 
1011 /* compute yyleng before each rule */
1012 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1013 
1014 <ST_IN_SCRIPTING>"exit" {
1015 	return T_EXIT;
1016 }
1017 
1018 <ST_IN_SCRIPTING>"die" {
1019 	return T_EXIT;
1020 }
1021 
1022 <ST_IN_SCRIPTING>"function" {
1023 	return T_FUNCTION;
1024 }
1025 
1026 <ST_IN_SCRIPTING>"const" {
1027 	return T_CONST;
1028 }
1029 
1030 <ST_IN_SCRIPTING>"return" {
1031 	return T_RETURN;
1032 }
1033 
1034 <ST_IN_SCRIPTING>"yield" {
1035 	return T_YIELD;
1036 }
1037 
1038 <ST_IN_SCRIPTING>"try" {
1039 	return T_TRY;
1040 }
1041 
1042 <ST_IN_SCRIPTING>"catch" {
1043 	return T_CATCH;
1044 }
1045 
1046 <ST_IN_SCRIPTING>"finally" {
1047 	return T_FINALLY;
1048 }
1049 
1050 <ST_IN_SCRIPTING>"throw" {
1051 	return T_THROW;
1052 }
1053 
1054 <ST_IN_SCRIPTING>"if" {
1055 	return T_IF;
1056 }
1057 
1058 <ST_IN_SCRIPTING>"elseif" {
1059 	return T_ELSEIF;
1060 }
1061 
1062 <ST_IN_SCRIPTING>"endif" {
1063 	return T_ENDIF;
1064 }
1065 
1066 <ST_IN_SCRIPTING>"else" {
1067 	return T_ELSE;
1068 }
1069 
1070 <ST_IN_SCRIPTING>"while" {
1071 	return T_WHILE;
1072 }
1073 
1074 <ST_IN_SCRIPTING>"endwhile" {
1075 	return T_ENDWHILE;
1076 }
1077 
1078 <ST_IN_SCRIPTING>"do" {
1079 	return T_DO;
1080 }
1081 
1082 <ST_IN_SCRIPTING>"for" {
1083 	return T_FOR;
1084 }
1085 
1086 <ST_IN_SCRIPTING>"endfor" {
1087 	return T_ENDFOR;
1088 }
1089 
1090 <ST_IN_SCRIPTING>"foreach" {
1091 	return T_FOREACH;
1092 }
1093 
1094 <ST_IN_SCRIPTING>"endforeach" {
1095 	return T_ENDFOREACH;
1096 }
1097 
1098 <ST_IN_SCRIPTING>"declare" {
1099 	return T_DECLARE;
1100 }
1101 
1102 <ST_IN_SCRIPTING>"enddeclare" {
1103 	return T_ENDDECLARE;
1104 }
1105 
1106 <ST_IN_SCRIPTING>"instanceof" {
1107 	return T_INSTANCEOF;
1108 }
1109 
1110 <ST_IN_SCRIPTING>"as" {
1111 	return T_AS;
1112 }
1113 
1114 <ST_IN_SCRIPTING>"switch" {
1115 	return T_SWITCH;
1116 }
1117 
1118 <ST_IN_SCRIPTING>"endswitch" {
1119 	return T_ENDSWITCH;
1120 }
1121 
1122 <ST_IN_SCRIPTING>"case" {
1123 	return T_CASE;
1124 }
1125 
1126 <ST_IN_SCRIPTING>"default" {
1127 	return T_DEFAULT;
1128 }
1129 
1130 <ST_IN_SCRIPTING>"break" {
1131 	return T_BREAK;
1132 }
1133 
1134 <ST_IN_SCRIPTING>"continue" {
1135 	return T_CONTINUE;
1136 }
1137 
1138 <ST_IN_SCRIPTING>"goto" {
1139 	return T_GOTO;
1140 }
1141 
1142 <ST_IN_SCRIPTING>"echo" {
1143 	return T_ECHO;
1144 }
1145 
1146 <ST_IN_SCRIPTING>"print" {
1147 	return T_PRINT;
1148 }
1149 
1150 <ST_IN_SCRIPTING>"class" {
1151 	return T_CLASS;
1152 }
1153 
1154 <ST_IN_SCRIPTING>"interface" {
1155 	return T_INTERFACE;
1156 }
1157 
1158 <ST_IN_SCRIPTING>"trait" {
1159 	return T_TRAIT;
1160 }
1161 
1162 <ST_IN_SCRIPTING>"extends" {
1163 	return T_EXTENDS;
1164 }
1165 
1166 <ST_IN_SCRIPTING>"implements" {
1167 	return T_IMPLEMENTS;
1168 }
1169 
1170 <ST_IN_SCRIPTING>"->" {
1171 	yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1172 	return T_OBJECT_OPERATOR;
1173 }
1174 
1175 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1176 	ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1177 	HANDLE_NEWLINES(yytext, yyleng);
1178 	return T_WHITESPACE;
1179 }
1180 
1181 <ST_LOOKING_FOR_PROPERTY>"->" {
1182 	return T_OBJECT_OPERATOR;
1183 }
1184 
1185 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1186 	yy_pop_state(TSRMLS_C);
1187 	zend_copy_value(zendlval, yytext, yyleng);
1188 	zendlval->type = IS_STRING;
1189 	return T_STRING;
1190 }
1191 
1192 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1193 	yyless(0);
1194 	yy_pop_state(TSRMLS_C);
1195 	goto restart;
1196 }
1197 
1198 <ST_IN_SCRIPTING>"::" {
1199 	return T_PAAMAYIM_NEKUDOTAYIM;
1200 }
1201 
1202 <ST_IN_SCRIPTING>"\\" {
1203 	return T_NS_SEPARATOR;
1204 }
1205 
1206 <ST_IN_SCRIPTING>"..." {
1207 	return T_ELLIPSIS;
1208 }
1209 
1210 <ST_IN_SCRIPTING>"new" {
1211 	return T_NEW;
1212 }
1213 
1214 <ST_IN_SCRIPTING>"clone" {
1215 	return T_CLONE;
1216 }
1217 
1218 <ST_IN_SCRIPTING>"var" {
1219 	return T_VAR;
1220 }
1221 
1222 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1223 	return T_INT_CAST;
1224 }
1225 
1226 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1227 	return T_DOUBLE_CAST;
1228 }
1229 
1230 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1231 	return T_STRING_CAST;
1232 }
1233 
1234 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1235 	return T_ARRAY_CAST;
1236 }
1237 
1238 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1239 	return T_OBJECT_CAST;
1240 }
1241 
1242 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1243 	return T_BOOL_CAST;
1244 }
1245 
1246 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1247 	return T_UNSET_CAST;
1248 }
1249 
1250 <ST_IN_SCRIPTING>"eval" {
1251 	return T_EVAL;
1252 }
1253 
1254 <ST_IN_SCRIPTING>"include" {
1255 	return T_INCLUDE;
1256 }
1257 
1258 <ST_IN_SCRIPTING>"include_once" {
1259 	return T_INCLUDE_ONCE;
1260 }
1261 
1262 <ST_IN_SCRIPTING>"require" {
1263 	return T_REQUIRE;
1264 }
1265 
1266 <ST_IN_SCRIPTING>"require_once" {
1267 	return T_REQUIRE_ONCE;
1268 }
1269 
1270 <ST_IN_SCRIPTING>"namespace" {
1271 	return T_NAMESPACE;
1272 }
1273 
1274 <ST_IN_SCRIPTING>"use" {
1275 	return T_USE;
1276 }
1277 
1278 <ST_IN_SCRIPTING>"insteadof" {
1279         return T_INSTEADOF;
1280 }
1281 
1282 <ST_IN_SCRIPTING>"global" {
1283 	return T_GLOBAL;
1284 }
1285 
1286 <ST_IN_SCRIPTING>"isset" {
1287 	return T_ISSET;
1288 }
1289 
1290 <ST_IN_SCRIPTING>"empty" {
1291 	return T_EMPTY;
1292 }
1293 
1294 <ST_IN_SCRIPTING>"__halt_compiler" {
1295 	return T_HALT_COMPILER;
1296 }
1297 
1298 <ST_IN_SCRIPTING>"static" {
1299 	return T_STATIC;
1300 }
1301 
1302 <ST_IN_SCRIPTING>"abstract" {
1303 	return T_ABSTRACT;
1304 }
1305 
1306 <ST_IN_SCRIPTING>"final" {
1307 	return T_FINAL;
1308 }
1309 
1310 <ST_IN_SCRIPTING>"private" {
1311 	return T_PRIVATE;
1312 }
1313 
1314 <ST_IN_SCRIPTING>"protected" {
1315 	return T_PROTECTED;
1316 }
1317 
1318 <ST_IN_SCRIPTING>"public" {
1319 	return T_PUBLIC;
1320 }
1321 
1322 <ST_IN_SCRIPTING>"unset" {
1323 	return T_UNSET;
1324 }
1325 
1326 <ST_IN_SCRIPTING>"=>" {
1327 	return T_DOUBLE_ARROW;
1328 }
1329 
1330 <ST_IN_SCRIPTING>"list" {
1331 	return T_LIST;
1332 }
1333 
1334 <ST_IN_SCRIPTING>"array" {
1335 	return T_ARRAY;
1336 }
1337 
1338 <ST_IN_SCRIPTING>"callable" {
1339  return T_CALLABLE;
1340 }
1341 
1342 <ST_IN_SCRIPTING>"++" {
1343 	return T_INC;
1344 }
1345 
1346 <ST_IN_SCRIPTING>"--" {
1347 	return T_DEC;
1348 }
1349 
1350 <ST_IN_SCRIPTING>"===" {
1351 	return T_IS_IDENTICAL;
1352 }
1353 
1354 <ST_IN_SCRIPTING>"!==" {
1355 	return T_IS_NOT_IDENTICAL;
1356 }
1357 
1358 <ST_IN_SCRIPTING>"==" {
1359 	return T_IS_EQUAL;
1360 }
1361 
1362 <ST_IN_SCRIPTING>"!="|"<>" {
1363 	return T_IS_NOT_EQUAL;
1364 }
1365 
1366 <ST_IN_SCRIPTING>"<=" {
1367 	return T_IS_SMALLER_OR_EQUAL;
1368 }
1369 
1370 <ST_IN_SCRIPTING>">=" {
1371 	return T_IS_GREATER_OR_EQUAL;
1372 }
1373 
1374 <ST_IN_SCRIPTING>"+=" {
1375 	return T_PLUS_EQUAL;
1376 }
1377 
1378 <ST_IN_SCRIPTING>"-=" {
1379 	return T_MINUS_EQUAL;
1380 }
1381 
1382 <ST_IN_SCRIPTING>"*=" {
1383 	return T_MUL_EQUAL;
1384 }
1385 
1386 <ST_IN_SCRIPTING>"*\*" {
1387 	return T_POW;
1388 }
1389 
1390 <ST_IN_SCRIPTING>"*\*=" {
1391 	return T_POW_EQUAL;
1392 }
1393 
1394 <ST_IN_SCRIPTING>"/=" {
1395 	return T_DIV_EQUAL;
1396 }
1397 
1398 <ST_IN_SCRIPTING>".=" {
1399 	return T_CONCAT_EQUAL;
1400 }
1401 
1402 <ST_IN_SCRIPTING>"%=" {
1403 	return T_MOD_EQUAL;
1404 }
1405 
1406 <ST_IN_SCRIPTING>"<<=" {
1407 	return T_SL_EQUAL;
1408 }
1409 
1410 <ST_IN_SCRIPTING>">>=" {
1411 	return T_SR_EQUAL;
1412 }
1413 
1414 <ST_IN_SCRIPTING>"&=" {
1415 	return T_AND_EQUAL;
1416 }
1417 
1418 <ST_IN_SCRIPTING>"|=" {
1419 	return T_OR_EQUAL;
1420 }
1421 
1422 <ST_IN_SCRIPTING>"^=" {
1423 	return T_XOR_EQUAL;
1424 }
1425 
1426 <ST_IN_SCRIPTING>"||" {
1427 	return T_BOOLEAN_OR;
1428 }
1429 
1430 <ST_IN_SCRIPTING>"&&" {
1431 	return T_BOOLEAN_AND;
1432 }
1433 
1434 <ST_IN_SCRIPTING>"OR" {
1435 	return T_LOGICAL_OR;
1436 }
1437 
1438 <ST_IN_SCRIPTING>"AND" {
1439 	return T_LOGICAL_AND;
1440 }
1441 
1442 <ST_IN_SCRIPTING>"XOR" {
1443 	return T_LOGICAL_XOR;
1444 }
1445 
1446 <ST_IN_SCRIPTING>"<<" {
1447 	return T_SL;
1448 }
1449 
1450 <ST_IN_SCRIPTING>">>" {
1451 	return T_SR;
1452 }
1453 
1454 <ST_IN_SCRIPTING>{TOKENS} {
1455 	return yytext[0];
1456 }
1457 
1458 
1459 <ST_IN_SCRIPTING>"{" {
1460 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1461 	return '{';
1462 }
1463 
1464 
1465 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1466 	yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1467 	return T_DOLLAR_OPEN_CURLY_BRACES;
1468 }
1469 
1470 
1471 <ST_IN_SCRIPTING>"}" {
1472 	RESET_DOC_COMMENT();
1473 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
1474 		yy_pop_state(TSRMLS_C);
1475 	}
1476 	return '}';
1477 }
1478 
1479 
1480 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1481 	yyless(yyleng - 1);
1482 	zend_copy_value(zendlval, yytext, yyleng);
1483 	zendlval->type = IS_STRING;
1484 	yy_pop_state(TSRMLS_C);
1485 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1486 	return T_STRING_VARNAME;
1487 }
1488 
1489 
1490 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1491 	yyless(0);
1492 	yy_pop_state(TSRMLS_C);
1493 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1494 	goto restart;
1495 }
1496 
1497 <ST_IN_SCRIPTING>{BNUM} {
1498 	char *bin = yytext + 2; /* Skip "0b" */
1499 	int len = yyleng - 2;
1500 
1501 	/* Skip any leading 0s */
1502 	while (*bin == '0') {
1503 		++bin;
1504 		--len;
1505 	}
1506 
1507 	if (len < SIZEOF_LONG * 8) {
1508 		if (len == 0) {
1509 			Z_LVAL_P(zendlval) = 0;
1510 		} else {
1511 			Z_LVAL_P(zendlval) = strtol(bin, NULL, 2);
1512 		}
1513 		zendlval->type = IS_LONG;
1514 		return T_LNUMBER;
1515 	} else {
1516 		ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, NULL));
1517 		return T_DNUMBER;
1518 	}
1519 }
1520 
1521 <ST_IN_SCRIPTING>{LNUM} {
1522 	if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1523 		Z_LVAL_P(zendlval) = strtol(yytext, NULL, 0);
1524 	} else {
1525 		errno = 0;
1526 		Z_LVAL_P(zendlval) = strtol(yytext, NULL, 0);
1527 		if (errno == ERANGE) { /* Overflow */
1528 			if (yytext[0] == '0') { /* octal overflow */
1529 				Z_DVAL_P(zendlval) = zend_oct_strtod(yytext, NULL);
1530 			} else {
1531 				Z_DVAL_P(zendlval) = zend_strtod(yytext, NULL);
1532 			}
1533 			zendlval->type = IS_DOUBLE;
1534 			return T_DNUMBER;
1535 		}
1536 	}
1537 
1538 	zendlval->type = IS_LONG;
1539 	return T_LNUMBER;
1540 }
1541 
1542 <ST_IN_SCRIPTING>{HNUM} {
1543 	char *hex = yytext + 2; /* Skip "0x" */
1544 	int len = yyleng - 2;
1545 
1546 	/* Skip any leading 0s */
1547 	while (*hex == '0') {
1548 		hex++;
1549 		len--;
1550 	}
1551 
1552 	if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1553 		if (len == 0) {
1554 			Z_LVAL_P(zendlval) = 0;
1555 		} else {
1556 			Z_LVAL_P(zendlval) = strtol(hex, NULL, 16);
1557 		}
1558 		zendlval->type = IS_LONG;
1559 		return T_LNUMBER;
1560 	} else {
1561 		ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, NULL));
1562 		return T_DNUMBER;
1563 	}
1564 }
1565 
1566 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1567 	if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1568 		ZVAL_LONG(zendlval, strtol(yytext, NULL, 10));
1569 	} else {
1570 		ZVAL_STRINGL(zendlval, yytext, yyleng, 1);
1571 	}
1572 	return T_NUM_STRING;
1573 }
1574 
1575 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1576 	ZVAL_STRINGL(zendlval, yytext, yyleng, 1);
1577 	return T_NUM_STRING;
1578 }
1579 
1580 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1581 	ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1582 	return T_DNUMBER;
1583 }
1584 
1585 <ST_IN_SCRIPTING>"__CLASS__" {
1586 	zend_class_entry *ce = CG(active_class_entry);
1587 	if (ce && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1588 		/* We create a special __CLASS__ constant that is going to be resolved
1589 		   at run-time */
1590 		Z_STRLEN_P(zendlval) = sizeof("__CLASS__")-1;
1591 		Z_STRVAL_P(zendlval) = estrndup("__CLASS__", Z_STRLEN_P(zendlval));
1592 		zendlval->type = IS_CONSTANT;
1593 	} else {
1594 		if (ce && ce->name) {
1595 			ZVAL_STRINGL(zendlval, ce->name, ce->name_length, 1);
1596 		} else {
1597 			ZVAL_EMPTY_STRING(zendlval);
1598 		}
1599 	}
1600 	return T_CLASS_C;
1601 }
1602 
1603 <ST_IN_SCRIPTING>"__TRAIT__" {
1604 	zend_class_entry *ce = CG(active_class_entry);
1605 	if (ce && ce->name && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1606 		ZVAL_STRINGL(zendlval, ce->name, ce->name_length, 1);
1607 	} else {
1608 		ZVAL_EMPTY_STRING(zendlval);
1609 	}
1610 	return T_TRAIT_C;
1611 }
1612 
1613 <ST_IN_SCRIPTING>"__FUNCTION__" {
1614 	zend_op_array *op_array = CG(active_op_array);
1615 	if (op_array && op_array->function_name) {
1616 		ZVAL_STRING(zendlval, op_array->function_name, 1);
1617 	} else {
1618 		ZVAL_EMPTY_STRING(zendlval);
1619 	}
1620 	return T_FUNC_C;
1621 }
1622 
1623 <ST_IN_SCRIPTING>"__METHOD__" {
1624 	const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1625 	const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1626 
1627 	Z_STRLEN_P(zendlval) = zend_spprintf(&Z_STRVAL_P(zendlval), 0, "%s%s%s",
1628 		class_name ? class_name : "",
1629 		class_name && func_name ? "::" : "",
1630 		func_name ? func_name : ""
1631 		);
1632 	zendlval->type = IS_STRING;
1633 	return T_METHOD_C;
1634 }
1635 
1636 <ST_IN_SCRIPTING>"__LINE__" {
1637 	ZVAL_LONG(zendlval, CG(zend_lineno));
1638 	return T_LINE;
1639 }
1640 
1641 <ST_IN_SCRIPTING>"__FILE__" {
1642 	char *filename = zend_get_compiled_filename(TSRMLS_C);
1643 
1644 	if (!filename) {
1645 		filename = "";
1646 	}
1647 	ZVAL_STRING(zendlval, filename, 1);
1648 	return T_FILE;
1649 }
1650 
1651 <ST_IN_SCRIPTING>"__DIR__" {
1652 	char *filename = zend_get_compiled_filename(TSRMLS_C);
1653 	const size_t filename_len = strlen(filename);
1654 	char *dirname;
1655 
1656 	if (!filename) {
1657 		filename = "";
1658 	}
1659 
1660 	dirname = estrndup(filename, filename_len);
1661 	zend_dirname(dirname, filename_len);
1662 
1663 	if (strcmp(dirname, ".") == 0) {
1664 		dirname = erealloc(dirname, MAXPATHLEN);
1665 #if HAVE_GETCWD
1666 		VCWD_GETCWD(dirname, MAXPATHLEN);
1667 #elif HAVE_GETWD
1668 		VCWD_GETWD(dirname);
1669 #endif
1670 	}
1671 
1672 	ZVAL_STRING(zendlval, dirname, 0);
1673 	return T_DIR;
1674 }
1675 
1676 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1677 	if (CG(current_namespace)) {
1678 		*zendlval = *CG(current_namespace);
1679 		zval_copy_ctor(zendlval);
1680 	} else {
1681 		ZVAL_EMPTY_STRING(zendlval);
1682 	}
1683 	return T_NS_C;
1684 }
1685 
1686 <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1687 	YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1688 
1689 	if (bracket != SCNG(yy_text)) {
1690 		/* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1691 		YYCURSOR = bracket;
1692 		goto inline_html;
1693 	}
1694 
1695 	HANDLE_NEWLINES(yytext, yyleng);
1696 	ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1697 	BEGIN(ST_IN_SCRIPTING);
1698 	return T_OPEN_TAG;
1699 }
1700 
1701 
1702 <INITIAL>"<%=" {
1703 	if (CG(asp_tags)) {
1704 		ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1705 		BEGIN(ST_IN_SCRIPTING);
1706 		return T_OPEN_TAG_WITH_ECHO;
1707 	} else {
1708 		goto inline_char_handler;
1709 	}
1710 }
1711 
1712 
1713 <INITIAL>"<?=" {
1714 	ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1715 	BEGIN(ST_IN_SCRIPTING);
1716 	return T_OPEN_TAG_WITH_ECHO;
1717 }
1718 
1719 
1720 <INITIAL>"<%" {
1721 	if (CG(asp_tags)) {
1722 		ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1723 		BEGIN(ST_IN_SCRIPTING);
1724 		return T_OPEN_TAG;
1725 	} else {
1726 		goto inline_char_handler;
1727 	}
1728 }
1729 
1730 
1731 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1732 	ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1733 	HANDLE_NEWLINE(yytext[yyleng-1]);
1734 	BEGIN(ST_IN_SCRIPTING);
1735 	return T_OPEN_TAG;
1736 }
1737 
1738 
1739 <INITIAL>"<?" {
1740 	if (CG(short_tags)) {
1741 		ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1742 		BEGIN(ST_IN_SCRIPTING);
1743 		return T_OPEN_TAG;
1744 	} else {
1745 		goto inline_char_handler;
1746 	}
1747 }
1748 
1749 <INITIAL>{ANY_CHAR} {
1750 	if (YYCURSOR > YYLIMIT) {
1751 		return 0;
1752 	}
1753 
1754 inline_char_handler:
1755 
1756 	while (1) {
1757 		YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1758 
1759 		YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1760 
1761 		if (YYCURSOR < YYLIMIT) {
1762 			switch (*YYCURSOR) {
1763 				case '?':
1764 					if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1765 						break;
1766 					}
1767 					continue;
1768 				case '%':
1769 					if (CG(asp_tags)) {
1770 						break;
1771 					}
1772 					continue;
1773 				case 's':
1774 				case 'S':
1775 					/* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1776 					 * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1777 					YYCURSOR--;
1778 					yymore();
1779 				default:
1780 					continue;
1781 			}
1782 
1783 			YYCURSOR--;
1784 		}
1785 
1786 		break;
1787 	}
1788 
1789 inline_html:
1790 	yyleng = YYCURSOR - SCNG(yy_text);
1791 
1792 	if (SCNG(output_filter)) {
1793 		int readsize;
1794 		size_t sz = 0;
1795 		readsize = SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1796 		Z_STRLEN_P(zendlval) = sz;
1797 		if (readsize < yyleng) {
1798 			yyless(readsize);
1799 		}
1800 	} else {
1801 	  Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng);
1802 	  Z_STRLEN_P(zendlval) = yyleng;
1803 	}
1804 	zendlval->type = IS_STRING;
1805 	HANDLE_NEWLINES(yytext, yyleng);
1806 	return T_INLINE_HTML;
1807 }
1808 
1809 
1810 /* Make sure a label character follows "->", otherwise there is no property
1811  * and "->" will be taken literally
1812  */
1813 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1814 	yyless(yyleng - 3);
1815 	yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1816 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1817 	zendlval->type = IS_STRING;
1818 	return T_VARIABLE;
1819 }
1820 
1821 /* A [ always designates a variable offset, regardless of what follows
1822  */
1823 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1824 	yyless(yyleng - 1);
1825 	yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1826 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1827 	zendlval->type = IS_STRING;
1828 	return T_VARIABLE;
1829 }
1830 
1831 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1832 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1833 	zendlval->type = IS_STRING;
1834 	return T_VARIABLE;
1835 }
1836 
1837 <ST_VAR_OFFSET>"]" {
1838 	yy_pop_state(TSRMLS_C);
1839 	return ']';
1840 }
1841 
1842 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1843 	/* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1844 	return yytext[0];
1845 }
1846 
1847 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1848 	/* Invalid rule to return a more explicit parse error with proper line number */
1849 	yyless(0);
1850 	yy_pop_state(TSRMLS_C);
1851 	return T_ENCAPSED_AND_WHITESPACE;
1852 }
1853 
1854 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1855 	zend_copy_value(zendlval, yytext, yyleng);
1856 	zendlval->type = IS_STRING;
1857 	return T_STRING;
1858 }
1859 
1860 
1861 <ST_IN_SCRIPTING>"#"|"//" {
1862 	while (YYCURSOR < YYLIMIT) {
1863 		switch (*YYCURSOR++) {
1864 			case '\r':
1865 				if (*YYCURSOR == '\n') {
1866 					YYCURSOR++;
1867 				}
1868 				/* fall through */
1869 			case '\n':
1870 				CG(zend_lineno)++;
1871 				break;
1872 			case '%':
1873 				if (!CG(asp_tags)) {
1874 					continue;
1875 				}
1876 				/* fall through */
1877 			case '?':
1878 				if (*YYCURSOR == '>') {
1879 					YYCURSOR--;
1880 					break;
1881 				}
1882 				/* fall through */
1883 			default:
1884 				continue;
1885 		}
1886 
1887 		break;
1888 	}
1889 
1890 	yyleng = YYCURSOR - SCNG(yy_text);
1891 
1892 	return T_COMMENT;
1893 }
1894 
1895 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1896 	int doc_com;
1897 
1898 	if (yyleng > 2) {
1899 		doc_com = 1;
1900 		RESET_DOC_COMMENT();
1901 	} else {
1902 		doc_com = 0;
1903 	}
1904 
1905 	while (YYCURSOR < YYLIMIT) {
1906 		if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1907 			break;
1908 		}
1909 	}
1910 
1911 	if (YYCURSOR < YYLIMIT) {
1912 		YYCURSOR++;
1913 	} else {
1914 		zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1915 	}
1916 
1917 	yyleng = YYCURSOR - SCNG(yy_text);
1918 	HANDLE_NEWLINES(yytext, yyleng);
1919 
1920 	if (doc_com) {
1921 		CG(doc_comment) = estrndup(yytext, yyleng);
1922 		CG(doc_comment_len) = yyleng;
1923 		return T_DOC_COMMENT;
1924 	}
1925 
1926 	return T_COMMENT;
1927 }
1928 
1929 <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1930 	ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1931 	BEGIN(INITIAL);
1932 	return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1933 }
1934 
1935 
1936 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1937 	if (CG(asp_tags)) {
1938 		BEGIN(INITIAL);
1939 		ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1940 		return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1941 	} else {
1942 		yyless(1);
1943 		return yytext[0];
1944 	}
1945 }
1946 
1947 
1948 <ST_IN_SCRIPTING>b?['] {
1949 	register char *s, *t;
1950 	char *end;
1951 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
1952 
1953 	while (1) {
1954 		if (YYCURSOR < YYLIMIT) {
1955 			if (*YYCURSOR == '\'') {
1956 				YYCURSOR++;
1957 				yyleng = YYCURSOR - SCNG(yy_text);
1958 
1959 				break;
1960 			} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1961 				YYCURSOR++;
1962 			}
1963 		} else {
1964 			yyleng = YYLIMIT - SCNG(yy_text);
1965 
1966 			/* Unclosed single quotes; treat similar to double quotes, but without a separate token
1967 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1968 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1969 			return T_ENCAPSED_AND_WHITESPACE;
1970 		}
1971 	}
1972 
1973 	ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2, 1);
1974 
1975 	/* convert escape sequences */
1976 	s = t = Z_STRVAL_P(zendlval);
1977 	end = s+Z_STRLEN_P(zendlval);
1978 	while (s<end) {
1979 		if (*s=='\\') {
1980 			s++;
1981 
1982 			switch(*s) {
1983 				case '\\':
1984 				case '\'':
1985 					*t++ = *s;
1986 					Z_STRLEN_P(zendlval)--;
1987 					break;
1988 				default:
1989 					*t++ = '\\';
1990 					*t++ = *s;
1991 					break;
1992 			}
1993 		} else {
1994 			*t++ = *s;
1995 		}
1996 
1997 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1998 			CG(zend_lineno)++;
1999 		}
2000 		s++;
2001 	}
2002 	*t = 0;
2003 
2004 	if (SCNG(output_filter)) {
2005 		size_t sz = 0;
2006 		s = Z_STRVAL_P(zendlval);
2007 		SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
2008 		Z_STRLEN_P(zendlval) = sz;
2009 		efree(s);
2010 	}
2011 	return T_CONSTANT_ENCAPSED_STRING;
2012 }
2013 
2014 
2015 <ST_IN_SCRIPTING>b?["] {
2016 	int bprefix = (yytext[0] != '"') ? 1 : 0;
2017 
2018 	while (YYCURSOR < YYLIMIT) {
2019 		switch (*YYCURSOR++) {
2020 			case '"':
2021 				yyleng = YYCURSOR - SCNG(yy_text);
2022 				zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2023 				return T_CONSTANT_ENCAPSED_STRING;
2024 			case '$':
2025 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2026 					break;
2027 				}
2028 				continue;
2029 			case '{':
2030 				if (*YYCURSOR == '$') {
2031 					break;
2032 				}
2033 				continue;
2034 			case '\\':
2035 				if (YYCURSOR < YYLIMIT) {
2036 					YYCURSOR++;
2037 				}
2038 				/* fall through */
2039 			default:
2040 				continue;
2041 		}
2042 
2043 		YYCURSOR--;
2044 		break;
2045 	}
2046 
2047 	/* Remember how much was scanned to save rescanning */
2048 	SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2049 
2050 	YYCURSOR = SCNG(yy_text) + yyleng;
2051 
2052 	BEGIN(ST_DOUBLE_QUOTES);
2053 	return '"';
2054 }
2055 
2056 
2057 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2058 	char *s;
2059 	int bprefix = (yytext[0] != '<') ? 1 : 0;
2060 	zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2061 
2062 	CG(zend_lineno)++;
2063 	heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2064 	s = yytext+bprefix+3;
2065 	while ((*s == ' ') || (*s == '\t')) {
2066 		s++;
2067 		heredoc_label->length--;
2068 	}
2069 
2070 	if (*s == '\'') {
2071 		s++;
2072 		heredoc_label->length -= 2;
2073 
2074 		BEGIN(ST_NOWDOC);
2075 	} else {
2076 		if (*s == '"') {
2077 			s++;
2078 			heredoc_label->length -= 2;
2079 		}
2080 
2081 		BEGIN(ST_HEREDOC);
2082 	}
2083 
2084 	heredoc_label->label = estrndup(s, heredoc_label->length);
2085 
2086 	/* Check for ending label on the next line */
2087 	if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2088 		YYCTYPE *end = YYCURSOR + heredoc_label->length;
2089 
2090 		if (*end == ';') {
2091 			end++;
2092 		}
2093 
2094 		if (*end == '\n' || *end == '\r') {
2095 			BEGIN(ST_END_HEREDOC);
2096 		}
2097 	}
2098 
2099 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2100 
2101 	return T_START_HEREDOC;
2102 }
2103 
2104 
2105 <ST_IN_SCRIPTING>[`] {
2106 	BEGIN(ST_BACKQUOTE);
2107 	return '`';
2108 }
2109 
2110 
2111 <ST_END_HEREDOC>{ANY_CHAR} {
2112 	zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2113 
2114 	YYCURSOR += heredoc_label->length - 1;
2115 	yyleng = heredoc_label->length;
2116 
2117 	heredoc_label_dtor(heredoc_label);
2118 	efree(heredoc_label);
2119 
2120 	BEGIN(ST_IN_SCRIPTING);
2121 	return T_END_HEREDOC;
2122 }
2123 
2124 
2125 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2126 	Z_LVAL_P(zendlval) = (long) '{';
2127 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2128 	yyless(1);
2129 	return T_CURLY_OPEN;
2130 }
2131 
2132 
2133 <ST_DOUBLE_QUOTES>["] {
2134 	BEGIN(ST_IN_SCRIPTING);
2135 	return '"';
2136 }
2137 
2138 <ST_BACKQUOTE>[`] {
2139 	BEGIN(ST_IN_SCRIPTING);
2140 	return '`';
2141 }
2142 
2143 
2144 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2145 	if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2146 		YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2147 		SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2148 
2149 		goto double_quotes_scan_done;
2150 	}
2151 
2152 	if (YYCURSOR > YYLIMIT) {
2153 		return 0;
2154 	}
2155 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2156 		YYCURSOR++;
2157 	}
2158 
2159 	while (YYCURSOR < YYLIMIT) {
2160 		switch (*YYCURSOR++) {
2161 			case '"':
2162 				break;
2163 			case '$':
2164 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2165 					break;
2166 				}
2167 				continue;
2168 			case '{':
2169 				if (*YYCURSOR == '$') {
2170 					break;
2171 				}
2172 				continue;
2173 			case '\\':
2174 				if (YYCURSOR < YYLIMIT) {
2175 					YYCURSOR++;
2176 				}
2177 				/* fall through */
2178 			default:
2179 				continue;
2180 		}
2181 
2182 		YYCURSOR--;
2183 		break;
2184 	}
2185 
2186 double_quotes_scan_done:
2187 	yyleng = YYCURSOR - SCNG(yy_text);
2188 
2189 	zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2190 	return T_ENCAPSED_AND_WHITESPACE;
2191 }
2192 
2193 
2194 <ST_BACKQUOTE>{ANY_CHAR} {
2195 	if (YYCURSOR > YYLIMIT) {
2196 		return 0;
2197 	}
2198 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2199 		YYCURSOR++;
2200 	}
2201 
2202 	while (YYCURSOR < YYLIMIT) {
2203 		switch (*YYCURSOR++) {
2204 			case '`':
2205 				break;
2206 			case '$':
2207 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2208 					break;
2209 				}
2210 				continue;
2211 			case '{':
2212 				if (*YYCURSOR == '$') {
2213 					break;
2214 				}
2215 				continue;
2216 			case '\\':
2217 				if (YYCURSOR < YYLIMIT) {
2218 					YYCURSOR++;
2219 				}
2220 				/* fall through */
2221 			default:
2222 				continue;
2223 		}
2224 
2225 		YYCURSOR--;
2226 		break;
2227 	}
2228 
2229 	yyleng = YYCURSOR - SCNG(yy_text);
2230 
2231 	zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2232 	return T_ENCAPSED_AND_WHITESPACE;
2233 }
2234 
2235 
2236 <ST_HEREDOC>{ANY_CHAR} {
2237 	int newline = 0;
2238 
2239 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2240 
2241 	if (YYCURSOR > YYLIMIT) {
2242 		return 0;
2243 	}
2244 
2245 	YYCURSOR--;
2246 
2247 	while (YYCURSOR < YYLIMIT) {
2248 		switch (*YYCURSOR++) {
2249 			case '\r':
2250 				if (*YYCURSOR == '\n') {
2251 					YYCURSOR++;
2252 				}
2253 				/* fall through */
2254 			case '\n':
2255 				/* Check for ending label on the next line */
2256 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2257 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2258 
2259 					if (*end == ';') {
2260 						end++;
2261 					}
2262 
2263 					if (*end == '\n' || *end == '\r') {
2264 						/* newline before label will be subtracted from returned text, but
2265 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2266 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2267 							newline = 2; /* Windows newline */
2268 						} else {
2269 							newline = 1;
2270 						}
2271 
2272 						CG(increment_lineno) = 1; /* For newline before label */
2273 						BEGIN(ST_END_HEREDOC);
2274 
2275 						goto heredoc_scan_done;
2276 					}
2277 				}
2278 				continue;
2279 			case '$':
2280 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2281 					break;
2282 				}
2283 				continue;
2284 			case '{':
2285 				if (*YYCURSOR == '$') {
2286 					break;
2287 				}
2288 				continue;
2289 			case '\\':
2290 				if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2291 					YYCURSOR++;
2292 				}
2293 				/* fall through */
2294 			default:
2295 				continue;
2296 		}
2297 
2298 		YYCURSOR--;
2299 		break;
2300 	}
2301 
2302 heredoc_scan_done:
2303 	yyleng = YYCURSOR - SCNG(yy_text);
2304 
2305 	zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2306 	return T_ENCAPSED_AND_WHITESPACE;
2307 }
2308 
2309 
2310 <ST_NOWDOC>{ANY_CHAR} {
2311 	int newline = 0;
2312 
2313 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2314 
2315 	if (YYCURSOR > YYLIMIT) {
2316 		return 0;
2317 	}
2318 
2319 	YYCURSOR--;
2320 
2321 	while (YYCURSOR < YYLIMIT) {
2322 		switch (*YYCURSOR++) {
2323 			case '\r':
2324 				if (*YYCURSOR == '\n') {
2325 					YYCURSOR++;
2326 				}
2327 				/* fall through */
2328 			case '\n':
2329 				/* Check for ending label on the next line */
2330 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2331 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2332 
2333 					if (*end == ';') {
2334 						end++;
2335 					}
2336 
2337 					if (*end == '\n' || *end == '\r') {
2338 						/* newline before label will be subtracted from returned text, but
2339 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2340 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2341 							newline = 2; /* Windows newline */
2342 						} else {
2343 							newline = 1;
2344 						}
2345 
2346 						CG(increment_lineno) = 1; /* For newline before label */
2347 						BEGIN(ST_END_HEREDOC);
2348 
2349 						goto nowdoc_scan_done;
2350 					}
2351 				}
2352 				/* fall through */
2353 			default:
2354 				continue;
2355 		}
2356 	}
2357 
2358 nowdoc_scan_done:
2359 	yyleng = YYCURSOR - SCNG(yy_text);
2360 
2361 	zend_copy_value(zendlval, yytext, yyleng - newline);
2362 	zendlval->type = IS_STRING;
2363 	HANDLE_NEWLINES(yytext, yyleng - newline);
2364 	return T_ENCAPSED_AND_WHITESPACE;
2365 }
2366 
2367 
2368 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2369 	if (YYCURSOR > YYLIMIT) {
2370 		return 0;
2371 	}
2372 
2373 	zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2374 	goto restart;
2375 }
2376 
2377 */
2378 }
2379