xref: /PHP-5.5/Zend/zend_language_scanner.l (revision 73c1be26)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2015 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Marcus Boerger <helly@php.net>                              |
16    |          Nuno Lopes <nlopess@php.net>                                |
17    |          Scott MacVicar <scottmac@php.net>                           |
18    | Flex version authors:                                                |
19    |          Andi Gutmans <andi@zend.com>                                |
20    |          Zeev Suraski <zeev@zend.com>                                |
21    +----------------------------------------------------------------------+
22 */
23 
24 /* $Id$ */
25 
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31 
32 #include "zend_language_scanner_defs.h"
33 
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef PHP_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "tsrm_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52 
53 #define YYCTYPE   unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR  SCNG(yy_cursor)
56 #define YYLIMIT   SCNG(yy_limit)
57 #define YYMARKER  SCNG(yy_marker)
58 
59 #define YYGETCONDITION()  SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61 
62 #define STATE(name)  yyc##name
63 
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE      YYGETCONDITION()
67 #define yytext       ((char*)SCNG(yy_text))
68 #define yyleng       SCNG(yy_leng)
69 #define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                           yyleng   = (unsigned int)x; } while(0)
71 #define yymore()     goto yymore_restart
72 
73 /* perform sanity check. If this message is triggered you should
74    increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79 
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83 
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87 
88 /* Globals Macros */
89 #define SCNG	LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95 
96 #define HANDLE_NEWLINES(s, l)													\
97 do {																			\
98 	char *p = (s), *boundary = p+(l);											\
99 																				\
100 	while (p<boundary) {														\
101 		if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {					\
102 			CG(zend_lineno)++;													\
103 		}																		\
104 		p++;																	\
105 	}																			\
106 } while (0)
107 
108 #define HANDLE_NEWLINE(c) \
109 { \
110 	if (c == '\n' || c == '\r') { \
111 		CG(zend_lineno)++; \
112 	} \
113 }
114 
115 /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    CG(doc_comment_len)
118 
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120 
121 #define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123 
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125 
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127 {
128 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129 	assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131 }
132 
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134 {
135 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136 }
137 
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139 {
140 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142 }
143 
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145 {
146 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147 	assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150 }
151 
152 
_yy_push_state(int new_state TSRMLS_DC)153 static void _yy_push_state(int new_state TSRMLS_DC)
154 {
155 	zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156 	YYSETCONDITION(new_state);
157 }
158 
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160 
yy_pop_state(TSRMLS_D)161 static void yy_pop_state(TSRMLS_D)
162 {
163 	int *stack_state;
164 	zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165 	YYSETCONDITION(*stack_state);
166 	zend_stack_del_top(&SCNG(state_stack));
167 }
168 
yy_scan_buffer(char * str,unsigned int len TSRMLS_DC)169 static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170 {
171 	YYCURSOR       = (YYCTYPE*)str;
172 	YYLIMIT        = YYCURSOR + len;
173 	if (!SCNG(yy_start)) {
174 		SCNG(yy_start) = YYCURSOR;
175 	}
176 }
177 
startup_scanner(TSRMLS_D)178 void startup_scanner(TSRMLS_D)
179 {
180 	CG(parse_error) = 0;
181 	CG(doc_comment) = NULL;
182 	CG(doc_comment_len) = 0;
183 	zend_stack_init(&SCNG(state_stack));
184 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
185 }
186 
heredoc_label_dtor(zend_heredoc_label * heredoc_label)187 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
188     efree(heredoc_label->label);
189 }
190 
shutdown_scanner(TSRMLS_D)191 void shutdown_scanner(TSRMLS_D)
192 {
193 	CG(parse_error) = 0;
194 	RESET_DOC_COMMENT();
195 	zend_stack_destroy(&SCNG(state_stack));
196 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
197 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
198 }
199 
zend_save_lexical_state(zend_lex_state * lex_state TSRMLS_DC)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
201 {
202 	lex_state->yy_leng   = SCNG(yy_leng);
203 	lex_state->yy_start  = SCNG(yy_start);
204 	lex_state->yy_text   = SCNG(yy_text);
205 	lex_state->yy_cursor = SCNG(yy_cursor);
206 	lex_state->yy_marker = SCNG(yy_marker);
207 	lex_state->yy_limit  = SCNG(yy_limit);
208 
209 	lex_state->state_stack = SCNG(state_stack);
210 	zend_stack_init(&SCNG(state_stack));
211 
212 	lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 	zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214 
215 	lex_state->in = SCNG(yy_in);
216 	lex_state->yy_state = YYSTATE;
217 	lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
218 	lex_state->lineno = CG(zend_lineno);
219 
220 	lex_state->script_org = SCNG(script_org);
221 	lex_state->script_org_size = SCNG(script_org_size);
222 	lex_state->script_filtered = SCNG(script_filtered);
223 	lex_state->script_filtered_size = SCNG(script_filtered_size);
224 	lex_state->input_filter = SCNG(input_filter);
225 	lex_state->output_filter = SCNG(output_filter);
226 	lex_state->script_encoding = SCNG(script_encoding);
227 }
228 
zend_restore_lexical_state(zend_lex_state * lex_state TSRMLS_DC)229 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
230 {
231 	SCNG(yy_leng)   = lex_state->yy_leng;
232 	SCNG(yy_start)  = lex_state->yy_start;
233 	SCNG(yy_text)   = lex_state->yy_text;
234 	SCNG(yy_cursor) = lex_state->yy_cursor;
235 	SCNG(yy_marker) = lex_state->yy_marker;
236 	SCNG(yy_limit)  = lex_state->yy_limit;
237 
238 	zend_stack_destroy(&SCNG(state_stack));
239 	SCNG(state_stack) = lex_state->state_stack;
240 
241 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
242 	zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
243 	SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
244 
245 	SCNG(yy_in) = lex_state->in;
246 	YYSETCONDITION(lex_state->yy_state);
247 	CG(zend_lineno) = lex_state->lineno;
248 	zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
249 
250 	if (SCNG(script_filtered)) {
251 		efree(SCNG(script_filtered));
252 		SCNG(script_filtered) = NULL;
253 	}
254 	SCNG(script_org) = lex_state->script_org;
255 	SCNG(script_org_size) = lex_state->script_org_size;
256 	SCNG(script_filtered) = lex_state->script_filtered;
257 	SCNG(script_filtered_size) = lex_state->script_filtered_size;
258 	SCNG(input_filter) = lex_state->input_filter;
259 	SCNG(output_filter) = lex_state->output_filter;
260 	SCNG(script_encoding) = lex_state->script_encoding;
261 
262 	RESET_DOC_COMMENT();
263 }
264 
zend_destroy_file_handle(zend_file_handle * file_handle TSRMLS_DC)265 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
266 {
267 	zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
268 	/* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
269 	file_handle->opened_path = NULL;
270 	if (file_handle->free_filename) {
271 		file_handle->filename = NULL;
272 	}
273 }
274 
275 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
276 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
277 #define	BOM_UTF16_BE	"\xfe\xff"
278 #define	BOM_UTF16_LE	"\xff\xfe"
279 #define	BOM_UTF8		"\xef\xbb\xbf"
280 
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size TSRMLS_DC)281 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
282 {
283 	const unsigned char *p;
284 	int wchar_size = 2;
285 	int le = 0;
286 
287 	/* utf-16 or utf-32? */
288 	p = script;
289 	while ((p-script) < script_size) {
290 		p = memchr(p, 0, script_size-(p-script)-2);
291 		if (!p) {
292 			break;
293 		}
294 		if (*(p+1) == '\0' && *(p+2) == '\0') {
295 			wchar_size = 4;
296 			break;
297 		}
298 
299 		/* searching for UTF-32 specific byte orders, so this will do */
300 		p += 4;
301 	}
302 
303 	/* BE or LE? */
304 	p = script;
305 	while ((p-script) < script_size) {
306 		if (*p == '\0' && *(p+wchar_size-1) != '\0') {
307 			/* BE */
308 			le = 0;
309 			break;
310 		} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
311 			/* LE* */
312 			le = 1;
313 			break;
314 		}
315 		p += wchar_size;
316 	}
317 
318 	if (wchar_size == 2) {
319 		return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
320 	} else {
321 		return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
322 	}
323 
324 	return NULL;
325 }
326 
zend_multibyte_detect_unicode(TSRMLS_D)327 static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
328 {
329 	const zend_encoding *script_encoding = NULL;
330 	int bom_size;
331 	unsigned char *pos1, *pos2;
332 
333 	if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
334 		return NULL;
335 	}
336 
337 	/* check out BOM */
338 	if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
339 		script_encoding = zend_multibyte_encoding_utf32be;
340 		bom_size = sizeof(BOM_UTF32_BE)-1;
341 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
342 		script_encoding = zend_multibyte_encoding_utf32le;
343 		bom_size = sizeof(BOM_UTF32_LE)-1;
344 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
345 		script_encoding = zend_multibyte_encoding_utf16be;
346 		bom_size = sizeof(BOM_UTF16_BE)-1;
347 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
348 		script_encoding = zend_multibyte_encoding_utf16le;
349 		bom_size = sizeof(BOM_UTF16_LE)-1;
350 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
351 		script_encoding = zend_multibyte_encoding_utf8;
352 		bom_size = sizeof(BOM_UTF8)-1;
353 	}
354 
355 	if (script_encoding) {
356 		/* remove BOM */
357 		LANG_SCNG(script_org) += bom_size;
358 		LANG_SCNG(script_org_size) -= bom_size;
359 
360 		return script_encoding;
361 	}
362 
363 	/* script contains NULL bytes -> auto-detection */
364 	if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
365 		/* check if the NULL byte is after the __HALT_COMPILER(); */
366 		pos2 = LANG_SCNG(script_org);
367 
368 		while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
369 			pos2 = memchr(pos2, '_', pos1 - pos2);
370 			if (!pos2) break;
371 			pos2++;
372 			if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
373 				pos2 += sizeof("_HALT_COMPILER")-1;
374 				while (*pos2 == ' '  ||
375 					   *pos2 == '\t' ||
376 					   *pos2 == '\r' ||
377 					   *pos2 == '\n') {
378 					pos2++;
379 				}
380 				if (*pos2 == '(') {
381 					pos2++;
382 					while (*pos2 == ' '  ||
383 						   *pos2 == '\t' ||
384 						   *pos2 == '\r' ||
385 						   *pos2 == '\n') {
386 						pos2++;
387 					}
388 					if (*pos2 == ')') {
389 						pos2++;
390 						while (*pos2 == ' '  ||
391 							   *pos2 == '\t' ||
392 							   *pos2 == '\r' ||
393 							   *pos2 == '\n') {
394 							pos2++;
395 						}
396 						if (*pos2 == ';') {
397 							return NULL;
398 						}
399 					}
400 				}
401 			}
402 		}
403 		/* make best effort if BOM is missing */
404 		return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
405 	}
406 
407 	return NULL;
408 }
409 
zend_multibyte_find_script_encoding(TSRMLS_D)410 static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
411 {
412 	const zend_encoding *script_encoding;
413 
414 	if (CG(detect_unicode)) {
415 		/* check out bom(byte order mark) and see if containing wchars */
416 		script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
417 		if (script_encoding != NULL) {
418 			/* bom or wchar detection is prior to 'script_encoding' option */
419 			return script_encoding;
420 		}
421 	}
422 
423 	/* if no script_encoding specified, just leave alone */
424 	if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
425 		return NULL;
426 	}
427 
428 	/* if multiple encodings specified, detect automagically */
429 	if (CG(script_encoding_list_size) > 1) {
430 		return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
431 	}
432 
433 	return CG(script_encoding_list)[0];
434 }
435 
zend_multibyte_set_filter(const zend_encoding * onetime_encoding TSRMLS_DC)436 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
437 {
438 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
439 	const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
440 
441 	if (!script_encoding) {
442 		return FAILURE;
443 	}
444 
445 	/* judge input/output filter */
446 	LANG_SCNG(script_encoding) = script_encoding;
447 	LANG_SCNG(input_filter) = NULL;
448 	LANG_SCNG(output_filter) = NULL;
449 
450 	if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
451 		if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
452 			/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
453 			LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
454 			LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
455 		} else {
456 			LANG_SCNG(input_filter) = NULL;
457 			LANG_SCNG(output_filter) = NULL;
458 		}
459 		return SUCCESS;
460 	}
461 
462 	if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
463 		LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
464 		LANG_SCNG(output_filter) = NULL;
465 	} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
466 		LANG_SCNG(input_filter) = NULL;
467 		LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
468 	} else {
469 		/* both script and internal encodings are incompatible w/ flex */
470 		LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
471 		LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
472 	}
473 
474 	return 0;
475 }
476 
open_file_for_scanning(zend_file_handle * file_handle TSRMLS_DC)477 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
478 {
479 	const char *file_path = NULL;
480 	char *buf;
481 	size_t size, offset = 0;
482 
483 	/* The shebang line was read, get the current position to obtain the buffer start */
484 	if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
485 		if ((offset = ftell(file_handle->handle.fp)) == -1) {
486 			offset = 0;
487 		}
488 	}
489 
490 	if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
491 		return FAILURE;
492 	}
493 
494 	zend_llist_add_element(&CG(open_files), file_handle);
495 	if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
496 		zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
497 		size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
498 		fh->handle.stream.handle = (void*)(((char*)fh) + diff);
499 		file_handle->handle.stream.handle = fh->handle.stream.handle;
500 	}
501 
502 	/* Reset the scanner for scanning the new file */
503 	SCNG(yy_in) = file_handle;
504 	SCNG(yy_start) = NULL;
505 
506 	if (size != -1) {
507 		if (CG(multibyte)) {
508 			SCNG(script_org) = (unsigned char*)buf;
509 			SCNG(script_org_size) = size;
510 			SCNG(script_filtered) = NULL;
511 
512 			zend_multibyte_set_filter(NULL TSRMLS_CC);
513 
514 			if (SCNG(input_filter)) {
515 				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
516 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
517 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
518 				}
519 				buf = (char*)SCNG(script_filtered);
520 				size = SCNG(script_filtered_size);
521 			}
522 		}
523 		SCNG(yy_start) = (unsigned char *)buf - offset;
524 		yy_scan_buffer(buf, size TSRMLS_CC);
525 	} else {
526 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
527 	}
528 
529 	BEGIN(INITIAL);
530 
531 	if (file_handle->opened_path) {
532 		file_path = file_handle->opened_path;
533 	} else {
534 		file_path = file_handle->filename;
535 	}
536 
537 	zend_set_compiled_filename(file_path TSRMLS_CC);
538 
539 	if (CG(start_lineno)) {
540 		CG(zend_lineno) = CG(start_lineno);
541 		CG(start_lineno) = 0;
542 	} else {
543 		CG(zend_lineno) = 1;
544 	}
545 
546 	RESET_DOC_COMMENT();
547 	CG(increment_lineno) = 0;
548 	return SUCCESS;
549 }
END_EXTERN_C()550 END_EXTERN_C()
551 
552 
553 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
554 {
555 	zend_lex_state original_lex_state;
556 	zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
557 	zend_op_array *original_active_op_array = CG(active_op_array);
558 	zend_op_array *retval=NULL;
559 	int compiler_result;
560 	zend_bool compilation_successful=0;
561 	znode retval_znode;
562 	zend_bool original_in_compilation = CG(in_compilation);
563 
564 	retval_znode.op_type = IS_CONST;
565 	retval_znode.u.constant.type = IS_LONG;
566 	retval_znode.u.constant.value.lval = 1;
567 	Z_UNSET_ISREF(retval_znode.u.constant);
568 	Z_SET_REFCOUNT(retval_znode.u.constant, 1);
569 
570 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
571 
572 	retval = op_array; /* success oriented */
573 
574 	if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
575 		if (type==ZEND_REQUIRE) {
576 			zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
577 			zend_bailout();
578 		} else {
579 			zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
580 		}
581 		compilation_successful=0;
582 	} else {
583 		init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
584 		CG(in_compilation) = 1;
585 		CG(active_op_array) = op_array;
586 		zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
587 		zend_init_compiler_context(TSRMLS_C);
588 		compiler_result = zendparse(TSRMLS_C);
589 		zend_do_return(&retval_znode, 0 TSRMLS_CC);
590 		CG(in_compilation) = original_in_compilation;
591 		if (compiler_result != 0) { /* parser error */
592 			zend_bailout();
593 		}
594 		compilation_successful=1;
595 	}
596 
597 	if (retval) {
598 		CG(active_op_array) = original_active_op_array;
599 		if (compilation_successful) {
600 			pass_two(op_array TSRMLS_CC);
601 			zend_release_labels(0 TSRMLS_CC);
602 		} else {
603 			efree(op_array);
604 			retval = NULL;
605 		}
606 	}
607 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
608 	return retval;
609 }
610 
611 
compile_filename(int type,zval * filename TSRMLS_DC)612 zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
613 {
614 	zend_file_handle file_handle;
615 	zval tmp;
616 	zend_op_array *retval;
617 	char *opened_path = NULL;
618 
619 	if (filename->type != IS_STRING) {
620 		tmp = *filename;
621 		zval_copy_ctor(&tmp);
622 		convert_to_string(&tmp);
623 		filename = &tmp;
624 	}
625 	file_handle.filename = filename->value.str.val;
626 	file_handle.free_filename = 0;
627 	file_handle.type = ZEND_HANDLE_FILENAME;
628 	file_handle.opened_path = NULL;
629 	file_handle.handle.fp = NULL;
630 
631 	retval = zend_compile_file(&file_handle, type TSRMLS_CC);
632 	if (retval && file_handle.handle.stream.handle) {
633 		int dummy = 1;
634 
635 		if (!file_handle.opened_path) {
636 			file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
637 		}
638 
639 		zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
640 
641 		if (opened_path) {
642 			efree(opened_path);
643 		}
644 	}
645 	zend_destroy_file_handle(&file_handle TSRMLS_CC);
646 
647 	if (filename==&tmp) {
648 		zval_dtor(&tmp);
649 	}
650 	return retval;
651 }
652 
zend_prepare_string_for_scanning(zval * str,char * filename TSRMLS_DC)653 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
654 {
655 	char *buf;
656 	size_t size;
657 
658 	/* enforce two trailing NULLs for flex... */
659 	if (IS_INTERNED(str->value.str.val)) {
660 		char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
661 		memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
662 		str->value.str.val = tmp;
663 	} else {
664 		str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
665 	}
666 
667 	memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
668 
669 	SCNG(yy_in) = NULL;
670 	SCNG(yy_start) = NULL;
671 
672 	buf = str->value.str.val;
673 	size = str->value.str.len;
674 
675 	if (CG(multibyte)) {
676 		SCNG(script_org) = (unsigned char*)buf;
677 		SCNG(script_org_size) = size;
678 		SCNG(script_filtered) = NULL;
679 
680 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
681 
682 		if (SCNG(input_filter)) {
683 			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
684 				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
685 						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
686 			}
687 			buf = (char*)SCNG(script_filtered);
688 			size = SCNG(script_filtered_size);
689 		}
690 	}
691 
692 	yy_scan_buffer(buf, size TSRMLS_CC);
693 
694 	zend_set_compiled_filename(filename TSRMLS_CC);
695 	CG(zend_lineno) = 1;
696 	CG(increment_lineno) = 0;
697 	RESET_DOC_COMMENT();
698 	return SUCCESS;
699 }
700 
701 
zend_get_scanned_file_offset(TSRMLS_D)702 ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
703 {
704 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
705 	if (SCNG(input_filter)) {
706 		size_t original_offset = offset, length = 0;
707 		do {
708 			unsigned char *p = NULL;
709 			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
710 				return (size_t)-1;
711 			}
712 			efree(p);
713 			if (length > original_offset) {
714 				offset--;
715 			} else if (length < original_offset) {
716 				offset++;
717 			}
718 		} while (original_offset != length);
719 	}
720 	return offset;
721 }
722 
723 
compile_string(zval * source_string,char * filename TSRMLS_DC)724 zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
725 {
726 	zend_lex_state original_lex_state;
727 	zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
728 	zend_op_array *original_active_op_array = CG(active_op_array);
729 	zend_op_array *retval;
730 	zval tmp;
731 	int compiler_result;
732 	zend_bool original_in_compilation = CG(in_compilation);
733 
734 	if (source_string->value.str.len==0) {
735 		efree(op_array);
736 		return NULL;
737 	}
738 
739 	CG(in_compilation) = 1;
740 
741 	tmp = *source_string;
742 	zval_copy_ctor(&tmp);
743 	convert_to_string(&tmp);
744 	source_string = &tmp;
745 
746 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
747 	if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
748 		efree(op_array);
749 		retval = NULL;
750 	} else {
751 		zend_bool orig_interactive = CG(interactive);
752 
753 		CG(interactive) = 0;
754 		init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
755 		CG(interactive) = orig_interactive;
756 		CG(active_op_array) = op_array;
757 		zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
758 		zend_init_compiler_context(TSRMLS_C);
759 		BEGIN(ST_IN_SCRIPTING);
760 		compiler_result = zendparse(TSRMLS_C);
761 
762 		if (SCNG(script_filtered)) {
763 			efree(SCNG(script_filtered));
764 			SCNG(script_filtered) = NULL;
765 		}
766 
767 		if (compiler_result != 0) {
768 			CG(active_op_array) = original_active_op_array;
769 			CG(unclean_shutdown)=1;
770 			destroy_op_array(op_array TSRMLS_CC);
771 			efree(op_array);
772 			retval = NULL;
773 		} else {
774 			zend_do_return(NULL, 0 TSRMLS_CC);
775 			CG(active_op_array) = original_active_op_array;
776 			pass_two(op_array TSRMLS_CC);
777 			zend_release_labels(0 TSRMLS_CC);
778 			retval = op_array;
779 		}
780 	}
781 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
782 	zval_dtor(&tmp);
783 	CG(in_compilation) = original_in_compilation;
784 	return retval;
785 }
786 
787 
BEGIN_EXTERN_C()788 BEGIN_EXTERN_C()
789 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
790 {
791 	zend_lex_state original_lex_state;
792 	zend_file_handle file_handle;
793 
794 	file_handle.type = ZEND_HANDLE_FILENAME;
795 	file_handle.filename = filename;
796 	file_handle.free_filename = 0;
797 	file_handle.opened_path = NULL;
798 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
799 	if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
800 		zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
801 		zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
802 		return FAILURE;
803 	}
804 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
805 	if (SCNG(script_filtered)) {
806 		efree(SCNG(script_filtered));
807 		SCNG(script_filtered) = NULL;
808 	}
809 	zend_destroy_file_handle(&file_handle TSRMLS_CC);
810 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
811 	return SUCCESS;
812 }
813 
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name TSRMLS_DC)814 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
815 {
816 	zend_lex_state original_lex_state;
817 	zval tmp = *str;
818 
819 	str = &tmp;
820 	zval_copy_ctor(str);
821 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
822 	if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
823 		zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
824 		return FAILURE;
825 	}
826 	BEGIN(INITIAL);
827 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
828 	if (SCNG(script_filtered)) {
829 		efree(SCNG(script_filtered));
830 		SCNG(script_filtered) = NULL;
831 	}
832 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
833 	zval_dtor(str);
834 	return SUCCESS;
835 }
836 
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding TSRMLS_DC)837 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
838 {
839 	size_t length;
840 	unsigned char *new_yy_start;
841 
842 	/* convert and set */
843 	if (!SCNG(input_filter)) {
844 		if (SCNG(script_filtered)) {
845 			efree(SCNG(script_filtered));
846 			SCNG(script_filtered) = NULL;
847 		}
848 		SCNG(script_filtered_size) = 0;
849 		length = SCNG(script_org_size);
850 		new_yy_start = SCNG(script_org);
851 	} else {
852 		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
853 			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
854 					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
855 		}
856 		SCNG(script_filtered) = new_yy_start;
857 		SCNG(script_filtered_size) = length;
858 	}
859 
860 	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
861 	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
862 	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
863 	SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
864 
865 	SCNG(yy_start) = new_yy_start;
866 }
867 
868 
869 # define zend_copy_value(zendlval, yytext, yyleng) \
870 	if (SCNG(output_filter)) { \
871 		size_t sz = 0; \
872 		SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
873 		zendlval->value.str.len = sz; \
874 	} else { \
875 		zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
876 		zendlval->value.str.len = yyleng; \
877 	}
878 
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type TSRMLS_DC)879 static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
880 {
881 	register char *s, *t;
882 	char *end;
883 
884 	ZVAL_STRINGL(zendlval, str, len, 1);
885 
886 	/* convert escape sequences */
887 	s = t = zendlval->value.str.val;
888 	end = s+zendlval->value.str.len;
889 	while (s<end) {
890 		if (*s=='\\') {
891 			s++;
892 			if (s >= end) {
893 				*t++ = '\\';
894 				break;
895 			}
896 
897 			switch(*s) {
898 				case 'n':
899 					*t++ = '\n';
900 					zendlval->value.str.len--;
901 					break;
902 				case 'r':
903 					*t++ = '\r';
904 					zendlval->value.str.len--;
905 					break;
906 				case 't':
907 					*t++ = '\t';
908 					zendlval->value.str.len--;
909 					break;
910 				case 'f':
911 					*t++ = '\f';
912 					zendlval->value.str.len--;
913 					break;
914 				case 'v':
915 					*t++ = '\v';
916 					zendlval->value.str.len--;
917 					break;
918 				case 'e':
919 #ifdef PHP_WIN32
920 					*t++ = VK_ESCAPE;
921 #else
922 					*t++ = '\e';
923 #endif
924 					zendlval->value.str.len--;
925 					break;
926 				case '"':
927 				case '`':
928 					if (*s != quote_type) {
929 						*t++ = '\\';
930 						*t++ = *s;
931 						break;
932 					}
933 				case '\\':
934 				case '$':
935 					*t++ = *s;
936 					zendlval->value.str.len--;
937 					break;
938 				case 'x':
939 				case 'X':
940 					if (ZEND_IS_HEX(*(s+1))) {
941 						char hex_buf[3] = { 0, 0, 0 };
942 
943 						zendlval->value.str.len--; /* for the 'x' */
944 
945 						hex_buf[0] = *(++s);
946 						zendlval->value.str.len--;
947 						if (ZEND_IS_HEX(*(s+1))) {
948 							hex_buf[1] = *(++s);
949 							zendlval->value.str.len--;
950 						}
951 						*t++ = (char) strtol(hex_buf, NULL, 16);
952 					} else {
953 						*t++ = '\\';
954 						*t++ = *s;
955 					}
956 					break;
957 				default:
958 					/* check for an octal */
959 					if (ZEND_IS_OCT(*s)) {
960 						char octal_buf[4] = { 0, 0, 0, 0 };
961 
962 						octal_buf[0] = *s;
963 						zendlval->value.str.len--;
964 						if (ZEND_IS_OCT(*(s+1))) {
965 							octal_buf[1] = *(++s);
966 							zendlval->value.str.len--;
967 							if (ZEND_IS_OCT(*(s+1))) {
968 								octal_buf[2] = *(++s);
969 								zendlval->value.str.len--;
970 							}
971 						}
972 						*t++ = (char) strtol(octal_buf, NULL, 8);
973 					} else {
974 						*t++ = '\\';
975 						*t++ = *s;
976 					}
977 					break;
978 			}
979 		} else {
980 			*t++ = *s;
981 		}
982 
983 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
984 			CG(zend_lineno)++;
985 		}
986 		s++;
987 	}
988 	*t = 0;
989 	if (SCNG(output_filter)) {
990 		size_t sz = 0;
991 		s = zendlval->value.str.val;
992 		SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
993 		zendlval->value.str.len = sz;
994 		efree(s);
995 	}
996 }
997 
998 
lex_scan(zval * zendlval TSRMLS_DC)999 int lex_scan(zval *zendlval TSRMLS_DC)
1000 {
1001 restart:
1002 	SCNG(yy_text) = YYCURSOR;
1003 
1004 yymore_restart:
1005 
1006 /*!re2c
1007 re2c:yyfill:check = 0;
1008 LNUM	[0-9]+
1009 DNUM	([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1010 EXPONENT_DNUM	(({LNUM}|{DNUM})[eE][+-]?{LNUM})
1011 HNUM	"0x"[0-9a-fA-F]+
1012 BNUM	"0b"[01]+
1013 LABEL	[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1014 WHITESPACE [ \n\r\t]+
1015 TABS_AND_SPACES [ \t]*
1016 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1017 ANY_CHAR [^]
1018 NEWLINE ("\r"|"\n"|"\r\n")
1019 
1020 /* compute yyleng before each rule */
1021 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1022 
1023 
1024 <ST_IN_SCRIPTING>"exit" {
1025 	return T_EXIT;
1026 }
1027 
1028 <ST_IN_SCRIPTING>"die" {
1029 	return T_EXIT;
1030 }
1031 
1032 <ST_IN_SCRIPTING>"function" {
1033 	return T_FUNCTION;
1034 }
1035 
1036 <ST_IN_SCRIPTING>"const" {
1037 	return T_CONST;
1038 }
1039 
1040 <ST_IN_SCRIPTING>"return" {
1041 	return T_RETURN;
1042 }
1043 
1044 <ST_IN_SCRIPTING>"yield" {
1045 	return T_YIELD;
1046 }
1047 
1048 <ST_IN_SCRIPTING>"try" {
1049 	return T_TRY;
1050 }
1051 
1052 <ST_IN_SCRIPTING>"catch" {
1053 	return T_CATCH;
1054 }
1055 
1056 <ST_IN_SCRIPTING>"finally" {
1057 	return T_FINALLY;
1058 }
1059 
1060 <ST_IN_SCRIPTING>"throw" {
1061 	return T_THROW;
1062 }
1063 
1064 <ST_IN_SCRIPTING>"if" {
1065 	return T_IF;
1066 }
1067 
1068 <ST_IN_SCRIPTING>"elseif" {
1069 	return T_ELSEIF;
1070 }
1071 
1072 <ST_IN_SCRIPTING>"endif" {
1073 	return T_ENDIF;
1074 }
1075 
1076 <ST_IN_SCRIPTING>"else" {
1077 	return T_ELSE;
1078 }
1079 
1080 <ST_IN_SCRIPTING>"while" {
1081 	return T_WHILE;
1082 }
1083 
1084 <ST_IN_SCRIPTING>"endwhile" {
1085 	return T_ENDWHILE;
1086 }
1087 
1088 <ST_IN_SCRIPTING>"do" {
1089 	return T_DO;
1090 }
1091 
1092 <ST_IN_SCRIPTING>"for" {
1093 	return T_FOR;
1094 }
1095 
1096 <ST_IN_SCRIPTING>"endfor" {
1097 	return T_ENDFOR;
1098 }
1099 
1100 <ST_IN_SCRIPTING>"foreach" {
1101 	return T_FOREACH;
1102 }
1103 
1104 <ST_IN_SCRIPTING>"endforeach" {
1105 	return T_ENDFOREACH;
1106 }
1107 
1108 <ST_IN_SCRIPTING>"declare" {
1109 	return T_DECLARE;
1110 }
1111 
1112 <ST_IN_SCRIPTING>"enddeclare" {
1113 	return T_ENDDECLARE;
1114 }
1115 
1116 <ST_IN_SCRIPTING>"instanceof" {
1117 	return T_INSTANCEOF;
1118 }
1119 
1120 <ST_IN_SCRIPTING>"as" {
1121 	return T_AS;
1122 }
1123 
1124 <ST_IN_SCRIPTING>"switch" {
1125 	return T_SWITCH;
1126 }
1127 
1128 <ST_IN_SCRIPTING>"endswitch" {
1129 	return T_ENDSWITCH;
1130 }
1131 
1132 <ST_IN_SCRIPTING>"case" {
1133 	return T_CASE;
1134 }
1135 
1136 <ST_IN_SCRIPTING>"default" {
1137 	return T_DEFAULT;
1138 }
1139 
1140 <ST_IN_SCRIPTING>"break" {
1141 	return T_BREAK;
1142 }
1143 
1144 <ST_IN_SCRIPTING>"continue" {
1145 	return T_CONTINUE;
1146 }
1147 
1148 <ST_IN_SCRIPTING>"goto" {
1149 	return T_GOTO;
1150 }
1151 
1152 <ST_IN_SCRIPTING>"echo" {
1153 	return T_ECHO;
1154 }
1155 
1156 <ST_IN_SCRIPTING>"print" {
1157 	return T_PRINT;
1158 }
1159 
1160 <ST_IN_SCRIPTING>"class" {
1161 	return T_CLASS;
1162 }
1163 
1164 <ST_IN_SCRIPTING>"interface" {
1165 	return T_INTERFACE;
1166 }
1167 
1168 <ST_IN_SCRIPTING>"trait" {
1169 	return T_TRAIT;
1170 }
1171 
1172 <ST_IN_SCRIPTING>"extends" {
1173 	return T_EXTENDS;
1174 }
1175 
1176 <ST_IN_SCRIPTING>"implements" {
1177 	return T_IMPLEMENTS;
1178 }
1179 
1180 <ST_IN_SCRIPTING>"->" {
1181 	yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1182 	return T_OBJECT_OPERATOR;
1183 }
1184 
1185 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1186 	zendlval->value.str.val = yytext; /* no copying - intentional */
1187 	zendlval->value.str.len = yyleng;
1188 	zendlval->type = IS_STRING;
1189 	HANDLE_NEWLINES(yytext, yyleng);
1190 	return T_WHITESPACE;
1191 }
1192 
1193 <ST_LOOKING_FOR_PROPERTY>"->" {
1194 	return T_OBJECT_OPERATOR;
1195 }
1196 
1197 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1198 	yy_pop_state(TSRMLS_C);
1199 	zend_copy_value(zendlval, yytext, yyleng);
1200 	zendlval->type = IS_STRING;
1201 	return T_STRING;
1202 }
1203 
1204 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1205 	yyless(0);
1206 	yy_pop_state(TSRMLS_C);
1207 	goto restart;
1208 }
1209 
1210 <ST_IN_SCRIPTING>"::" {
1211 	return T_PAAMAYIM_NEKUDOTAYIM;
1212 }
1213 
1214 <ST_IN_SCRIPTING>"\\" {
1215 	return T_NS_SEPARATOR;
1216 }
1217 
1218 <ST_IN_SCRIPTING>"new" {
1219 	return T_NEW;
1220 }
1221 
1222 <ST_IN_SCRIPTING>"clone" {
1223 	return T_CLONE;
1224 }
1225 
1226 <ST_IN_SCRIPTING>"var" {
1227 	return T_VAR;
1228 }
1229 
1230 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1231 	return T_INT_CAST;
1232 }
1233 
1234 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1235 	return T_DOUBLE_CAST;
1236 }
1237 
1238 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1239 	return T_STRING_CAST;
1240 }
1241 
1242 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1243 	return T_ARRAY_CAST;
1244 }
1245 
1246 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1247 	return T_OBJECT_CAST;
1248 }
1249 
1250 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1251 	return T_BOOL_CAST;
1252 }
1253 
1254 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1255 	return T_UNSET_CAST;
1256 }
1257 
1258 <ST_IN_SCRIPTING>"eval" {
1259 	return T_EVAL;
1260 }
1261 
1262 <ST_IN_SCRIPTING>"include" {
1263 	return T_INCLUDE;
1264 }
1265 
1266 <ST_IN_SCRIPTING>"include_once" {
1267 	return T_INCLUDE_ONCE;
1268 }
1269 
1270 <ST_IN_SCRIPTING>"require" {
1271 	return T_REQUIRE;
1272 }
1273 
1274 <ST_IN_SCRIPTING>"require_once" {
1275 	return T_REQUIRE_ONCE;
1276 }
1277 
1278 <ST_IN_SCRIPTING>"namespace" {
1279 	return T_NAMESPACE;
1280 }
1281 
1282 <ST_IN_SCRIPTING>"use" {
1283 	return T_USE;
1284 }
1285 
1286 <ST_IN_SCRIPTING>"insteadof" {
1287         return T_INSTEADOF;
1288 }
1289 
1290 <ST_IN_SCRIPTING>"global" {
1291 	return T_GLOBAL;
1292 }
1293 
1294 <ST_IN_SCRIPTING>"isset" {
1295 	return T_ISSET;
1296 }
1297 
1298 <ST_IN_SCRIPTING>"empty" {
1299 	return T_EMPTY;
1300 }
1301 
1302 <ST_IN_SCRIPTING>"__halt_compiler" {
1303 	return T_HALT_COMPILER;
1304 }
1305 
1306 <ST_IN_SCRIPTING>"static" {
1307 	return T_STATIC;
1308 }
1309 
1310 <ST_IN_SCRIPTING>"abstract" {
1311 	return T_ABSTRACT;
1312 }
1313 
1314 <ST_IN_SCRIPTING>"final" {
1315 	return T_FINAL;
1316 }
1317 
1318 <ST_IN_SCRIPTING>"private" {
1319 	return T_PRIVATE;
1320 }
1321 
1322 <ST_IN_SCRIPTING>"protected" {
1323 	return T_PROTECTED;
1324 }
1325 
1326 <ST_IN_SCRIPTING>"public" {
1327 	return T_PUBLIC;
1328 }
1329 
1330 <ST_IN_SCRIPTING>"unset" {
1331 	return T_UNSET;
1332 }
1333 
1334 <ST_IN_SCRIPTING>"=>" {
1335 	return T_DOUBLE_ARROW;
1336 }
1337 
1338 <ST_IN_SCRIPTING>"list" {
1339 	return T_LIST;
1340 }
1341 
1342 <ST_IN_SCRIPTING>"array" {
1343 	return T_ARRAY;
1344 }
1345 
1346 <ST_IN_SCRIPTING>"callable" {
1347  return T_CALLABLE;
1348 }
1349 
1350 <ST_IN_SCRIPTING>"++" {
1351 	return T_INC;
1352 }
1353 
1354 <ST_IN_SCRIPTING>"--" {
1355 	return T_DEC;
1356 }
1357 
1358 <ST_IN_SCRIPTING>"===" {
1359 	return T_IS_IDENTICAL;
1360 }
1361 
1362 <ST_IN_SCRIPTING>"!==" {
1363 	return T_IS_NOT_IDENTICAL;
1364 }
1365 
1366 <ST_IN_SCRIPTING>"==" {
1367 	return T_IS_EQUAL;
1368 }
1369 
1370 <ST_IN_SCRIPTING>"!="|"<>" {
1371 	return T_IS_NOT_EQUAL;
1372 }
1373 
1374 <ST_IN_SCRIPTING>"<=" {
1375 	return T_IS_SMALLER_OR_EQUAL;
1376 }
1377 
1378 <ST_IN_SCRIPTING>">=" {
1379 	return T_IS_GREATER_OR_EQUAL;
1380 }
1381 
1382 <ST_IN_SCRIPTING>"+=" {
1383 	return T_PLUS_EQUAL;
1384 }
1385 
1386 <ST_IN_SCRIPTING>"-=" {
1387 	return T_MINUS_EQUAL;
1388 }
1389 
1390 <ST_IN_SCRIPTING>"*=" {
1391 	return T_MUL_EQUAL;
1392 }
1393 
1394 <ST_IN_SCRIPTING>"/=" {
1395 	return T_DIV_EQUAL;
1396 }
1397 
1398 <ST_IN_SCRIPTING>".=" {
1399 	return T_CONCAT_EQUAL;
1400 }
1401 
1402 <ST_IN_SCRIPTING>"%=" {
1403 	return T_MOD_EQUAL;
1404 }
1405 
1406 <ST_IN_SCRIPTING>"<<=" {
1407 	return T_SL_EQUAL;
1408 }
1409 
1410 <ST_IN_SCRIPTING>">>=" {
1411 	return T_SR_EQUAL;
1412 }
1413 
1414 <ST_IN_SCRIPTING>"&=" {
1415 	return T_AND_EQUAL;
1416 }
1417 
1418 <ST_IN_SCRIPTING>"|=" {
1419 	return T_OR_EQUAL;
1420 }
1421 
1422 <ST_IN_SCRIPTING>"^=" {
1423 	return T_XOR_EQUAL;
1424 }
1425 
1426 <ST_IN_SCRIPTING>"||" {
1427 	return T_BOOLEAN_OR;
1428 }
1429 
1430 <ST_IN_SCRIPTING>"&&" {
1431 	return T_BOOLEAN_AND;
1432 }
1433 
1434 <ST_IN_SCRIPTING>"OR" {
1435 	return T_LOGICAL_OR;
1436 }
1437 
1438 <ST_IN_SCRIPTING>"AND" {
1439 	return T_LOGICAL_AND;
1440 }
1441 
1442 <ST_IN_SCRIPTING>"XOR" {
1443 	return T_LOGICAL_XOR;
1444 }
1445 
1446 <ST_IN_SCRIPTING>"<<" {
1447 	return T_SL;
1448 }
1449 
1450 <ST_IN_SCRIPTING>">>" {
1451 	return T_SR;
1452 }
1453 
1454 <ST_IN_SCRIPTING>{TOKENS} {
1455 	return yytext[0];
1456 }
1457 
1458 
1459 <ST_IN_SCRIPTING>"{" {
1460 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1461 	return '{';
1462 }
1463 
1464 
1465 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1466 	yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1467 	return T_DOLLAR_OPEN_CURLY_BRACES;
1468 }
1469 
1470 
1471 <ST_IN_SCRIPTING>"}" {
1472 	RESET_DOC_COMMENT();
1473 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
1474 		yy_pop_state(TSRMLS_C);
1475 	}
1476 	return '}';
1477 }
1478 
1479 
1480 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1481 	yyless(yyleng - 1);
1482 	zend_copy_value(zendlval, yytext, yyleng);
1483 	zendlval->type = IS_STRING;
1484 	yy_pop_state(TSRMLS_C);
1485 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1486 	return T_STRING_VARNAME;
1487 }
1488 
1489 
1490 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1491 	yyless(0);
1492 	yy_pop_state(TSRMLS_C);
1493 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1494 	goto restart;
1495 }
1496 
1497 <ST_IN_SCRIPTING>{BNUM} {
1498 	char *bin = yytext + 2; /* Skip "0b" */
1499 	int len = yyleng - 2;
1500 
1501 	/* Skip any leading 0s */
1502 	while (*bin == '0') {
1503 		++bin;
1504 		--len;
1505 	}
1506 
1507 	if (len < SIZEOF_LONG * 8) {
1508 		if (len == 0) {
1509 			zendlval->value.lval = 0;
1510 		} else {
1511 			zendlval->value.lval = strtol(bin, NULL, 2);
1512 		}
1513 		zendlval->type = IS_LONG;
1514 		return T_LNUMBER;
1515 	} else {
1516 		zendlval->value.dval = zend_bin_strtod(bin, NULL);
1517 		zendlval->type = IS_DOUBLE;
1518 		return T_DNUMBER;
1519 	}
1520 }
1521 
1522 <ST_IN_SCRIPTING>{LNUM} {
1523 	if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1524 		zendlval->value.lval = strtol(yytext, NULL, 0);
1525 	} else {
1526 		errno = 0;
1527 		zendlval->value.lval = strtol(yytext, NULL, 0);
1528 		if (errno == ERANGE) { /* Overflow */
1529 			if (yytext[0] == '0') { /* octal overflow */
1530 				zendlval->value.dval = zend_oct_strtod(yytext, NULL);
1531 			} else {
1532 				zendlval->value.dval = zend_strtod(yytext, NULL);
1533 			}
1534 			zendlval->type = IS_DOUBLE;
1535 			return T_DNUMBER;
1536 		}
1537 	}
1538 
1539 	zendlval->type = IS_LONG;
1540 	return T_LNUMBER;
1541 }
1542 
1543 <ST_IN_SCRIPTING>{HNUM} {
1544 	char *hex = yytext + 2; /* Skip "0x" */
1545 	int len = yyleng - 2;
1546 
1547 	/* Skip any leading 0s */
1548 	while (*hex == '0') {
1549 		hex++;
1550 		len--;
1551 	}
1552 
1553 	if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1554 		if (len == 0) {
1555 			zendlval->value.lval = 0;
1556 		} else {
1557 			zendlval->value.lval = strtol(hex, NULL, 16);
1558 		}
1559 		zendlval->type = IS_LONG;
1560 		return T_LNUMBER;
1561 	} else {
1562 		zendlval->value.dval = zend_hex_strtod(hex, NULL);
1563 		zendlval->type = IS_DOUBLE;
1564 		return T_DNUMBER;
1565 	}
1566 }
1567 
1568 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1569 	if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1570 		zendlval->value.lval = strtol(yytext, NULL, 10);
1571 		zendlval->type = IS_LONG;
1572 	} else {
1573 		zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1574 		zendlval->value.str.len = yyleng;
1575 		zendlval->type = IS_STRING;
1576 	}
1577 	return T_NUM_STRING;
1578 }
1579 
1580 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1581 	zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1582 	zendlval->value.str.len = yyleng;
1583 	zendlval->type = IS_STRING;
1584 	return T_NUM_STRING;
1585 }
1586 
1587 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1588 	zendlval->value.dval = zend_strtod(yytext, NULL);
1589 	zendlval->type = IS_DOUBLE;
1590 	return T_DNUMBER;
1591 }
1592 
1593 <ST_IN_SCRIPTING>"__CLASS__" {
1594 	const char *class_name = NULL;
1595 
1596 	if (CG(active_class_entry)
1597 		&& (ZEND_ACC_TRAIT ==
1598 			(CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1599 		/* We create a special __CLASS__ constant that is going to be resolved
1600 		   at run-time */
1601 		zendlval->value.str.len = sizeof("__CLASS__")-1;
1602 		zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len);
1603 		zendlval->type = IS_CONSTANT;
1604 	} else {
1605 		if (CG(active_class_entry)) {
1606 			class_name = CG(active_class_entry)->name;
1607 		}
1608 
1609 		if (!class_name) {
1610 			class_name = "";
1611 		}
1612 
1613 		zendlval->value.str.len = strlen(class_name);
1614 		zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
1615 		zendlval->type = IS_STRING;
1616 	}
1617 	return T_CLASS_C;
1618 }
1619 
1620 <ST_IN_SCRIPTING>"__TRAIT__" {
1621 	const char *trait_name = NULL;
1622 
1623 	if (CG(active_class_entry)
1624 		&& (ZEND_ACC_TRAIT ==
1625 			(CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1626 		trait_name = CG(active_class_entry)->name;
1627 	}
1628 
1629 	if (!trait_name) {
1630 		trait_name = "";
1631 	}
1632 
1633 	zendlval->value.str.len = strlen(trait_name);
1634 	zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
1635 	zendlval->type = IS_STRING;
1636 
1637 	return T_TRAIT_C;
1638 }
1639 
1640 <ST_IN_SCRIPTING>"__FUNCTION__" {
1641 	const char *func_name = NULL;
1642 
1643 	if (CG(active_op_array)) {
1644 		func_name = CG(active_op_array)->function_name;
1645 	}
1646 
1647 	if (!func_name) {
1648 		func_name = "";
1649 	}
1650 	zendlval->value.str.len = strlen(func_name);
1651 	zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
1652 	zendlval->type = IS_STRING;
1653 	return T_FUNC_C;
1654 }
1655 
1656 <ST_IN_SCRIPTING>"__METHOD__" {
1657 	const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1658 	const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1659 	size_t len = 0;
1660 
1661 	if (class_name) {
1662 		len += strlen(class_name) + 2;
1663 	}
1664 	if (func_name) {
1665 		len += strlen(func_name);
1666 	}
1667 
1668 	zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
1669 		class_name ? class_name : "",
1670 		class_name && func_name ? "::" : "",
1671 		func_name ? func_name : ""
1672 		);
1673 	zendlval->type = IS_STRING;
1674 	return T_METHOD_C;
1675 }
1676 
1677 <ST_IN_SCRIPTING>"__LINE__" {
1678 	zendlval->value.lval = CG(zend_lineno);
1679 	zendlval->type = IS_LONG;
1680 	return T_LINE;
1681 }
1682 
1683 <ST_IN_SCRIPTING>"__FILE__" {
1684 	char *filename = zend_get_compiled_filename(TSRMLS_C);
1685 
1686 	if (!filename) {
1687 		filename = "";
1688 	}
1689 	zendlval->value.str.len = strlen(filename);
1690 	zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
1691 	zendlval->type = IS_STRING;
1692 	return T_FILE;
1693 }
1694 
1695 <ST_IN_SCRIPTING>"__DIR__" {
1696 	char *filename = zend_get_compiled_filename(TSRMLS_C);
1697 	const size_t filename_len = strlen(filename);
1698 	char *dirname;
1699 
1700 	if (!filename) {
1701 		filename = "";
1702 	}
1703 
1704 	dirname = estrndup(filename, filename_len);
1705 	zend_dirname(dirname, filename_len);
1706 
1707 	if (strcmp(dirname, ".") == 0) {
1708 		dirname = erealloc(dirname, MAXPATHLEN);
1709 #if HAVE_GETCWD
1710 		VCWD_GETCWD(dirname, MAXPATHLEN);
1711 #elif HAVE_GETWD
1712 		VCWD_GETWD(dirname);
1713 #endif
1714 	}
1715 
1716 	zendlval->value.str.len = strlen(dirname);
1717 	zendlval->value.str.val = dirname;
1718 	zendlval->type = IS_STRING;
1719 	return T_DIR;
1720 }
1721 
1722 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1723 	if (CG(current_namespace)) {
1724 		*zendlval = *CG(current_namespace);
1725 		zval_copy_ctor(zendlval);
1726 	} else {
1727 		ZVAL_EMPTY_STRING(zendlval);
1728 	}
1729 	return T_NS_C;
1730 }
1731 
1732 <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1733 	YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1734 
1735 	if (bracket != SCNG(yy_text)) {
1736 		/* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1737 		YYCURSOR = bracket;
1738 		goto inline_html;
1739 	}
1740 
1741 	HANDLE_NEWLINES(yytext, yyleng);
1742 	zendlval->value.str.val = yytext; /* no copying - intentional */
1743 	zendlval->value.str.len = yyleng;
1744 	zendlval->type = IS_STRING;
1745 	BEGIN(ST_IN_SCRIPTING);
1746 	return T_OPEN_TAG;
1747 }
1748 
1749 
1750 <INITIAL>"<%=" {
1751 	if (CG(asp_tags)) {
1752 		zendlval->value.str.val = yytext; /* no copying - intentional */
1753 		zendlval->value.str.len = yyleng;
1754 		zendlval->type = IS_STRING;
1755 		BEGIN(ST_IN_SCRIPTING);
1756 		return T_OPEN_TAG_WITH_ECHO;
1757 	} else {
1758 		goto inline_char_handler;
1759 	}
1760 }
1761 
1762 
1763 <INITIAL>"<?=" {
1764 	zendlval->value.str.val = yytext; /* no copying - intentional */
1765 	zendlval->value.str.len = yyleng;
1766 	zendlval->type = IS_STRING;
1767 	BEGIN(ST_IN_SCRIPTING);
1768 	return T_OPEN_TAG_WITH_ECHO;
1769 }
1770 
1771 
1772 <INITIAL>"<%" {
1773 	if (CG(asp_tags)) {
1774 		zendlval->value.str.val = yytext; /* no copying - intentional */
1775 		zendlval->value.str.len = yyleng;
1776 		zendlval->type = IS_STRING;
1777 		BEGIN(ST_IN_SCRIPTING);
1778 		return T_OPEN_TAG;
1779 	} else {
1780 		goto inline_char_handler;
1781 	}
1782 }
1783 
1784 
1785 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1786 	zendlval->value.str.val = yytext; /* no copying - intentional */
1787 	zendlval->value.str.len = yyleng;
1788 	zendlval->type = IS_STRING;
1789 	HANDLE_NEWLINE(yytext[yyleng-1]);
1790 	BEGIN(ST_IN_SCRIPTING);
1791 	return T_OPEN_TAG;
1792 }
1793 
1794 
1795 <INITIAL>"<?" {
1796 	if (CG(short_tags)) {
1797 		zendlval->value.str.val = yytext; /* no copying - intentional */
1798 		zendlval->value.str.len = yyleng;
1799 		zendlval->type = IS_STRING;
1800 		BEGIN(ST_IN_SCRIPTING);
1801 		return T_OPEN_TAG;
1802 	} else {
1803 		goto inline_char_handler;
1804 	}
1805 }
1806 
1807 <INITIAL>{ANY_CHAR} {
1808 	if (YYCURSOR > YYLIMIT) {
1809 		return 0;
1810 	}
1811 
1812 inline_char_handler:
1813 
1814 	while (1) {
1815 		YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1816 
1817 		YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1818 
1819 		if (YYCURSOR < YYLIMIT) {
1820 			switch (*YYCURSOR) {
1821 				case '?':
1822 					if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1823 						break;
1824 					}
1825 					continue;
1826 				case '%':
1827 					if (CG(asp_tags)) {
1828 						break;
1829 					}
1830 					continue;
1831 				case 's':
1832 				case 'S':
1833 					/* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1834 					 * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1835 					YYCURSOR--;
1836 					yymore();
1837 				default:
1838 					continue;
1839 			}
1840 
1841 			YYCURSOR--;
1842 		}
1843 
1844 		break;
1845 	}
1846 
1847 inline_html:
1848 	yyleng = YYCURSOR - SCNG(yy_text);
1849 
1850 	if (SCNG(output_filter)) {
1851 		int readsize;
1852 		size_t sz = 0;
1853 		readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1854 		zendlval->value.str.len = sz;
1855 		if (readsize < yyleng) {
1856 			yyless(readsize);
1857 		}
1858 	} else {
1859 	  zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
1860 	  zendlval->value.str.len = yyleng;
1861 	}
1862 	zendlval->type = IS_STRING;
1863 	HANDLE_NEWLINES(yytext, yyleng);
1864 	return T_INLINE_HTML;
1865 }
1866 
1867 
1868 /* Make sure a label character follows "->", otherwise there is no property
1869  * and "->" will be taken literally
1870  */
1871 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1872 	yyless(yyleng - 3);
1873 	yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1874 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1875 	zendlval->type = IS_STRING;
1876 	return T_VARIABLE;
1877 }
1878 
1879 /* A [ always designates a variable offset, regardless of what follows
1880  */
1881 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1882 	yyless(yyleng - 1);
1883 	yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1884 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1885 	zendlval->type = IS_STRING;
1886 	return T_VARIABLE;
1887 }
1888 
1889 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1890 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1891 	zendlval->type = IS_STRING;
1892 	return T_VARIABLE;
1893 }
1894 
1895 <ST_VAR_OFFSET>"]" {
1896 	yy_pop_state(TSRMLS_C);
1897 	return ']';
1898 }
1899 
1900 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1901 	/* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1902 	return yytext[0];
1903 }
1904 
1905 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1906 	/* Invalid rule to return a more explicit parse error with proper line number */
1907 	yyless(0);
1908 	yy_pop_state(TSRMLS_C);
1909 	return T_ENCAPSED_AND_WHITESPACE;
1910 }
1911 
1912 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1913 	zend_copy_value(zendlval, yytext, yyleng);
1914 	zendlval->type = IS_STRING;
1915 	return T_STRING;
1916 }
1917 
1918 
1919 <ST_IN_SCRIPTING>"#"|"//" {
1920 	while (YYCURSOR < YYLIMIT) {
1921 		switch (*YYCURSOR++) {
1922 			case '\r':
1923 				if (*YYCURSOR == '\n') {
1924 					YYCURSOR++;
1925 				}
1926 				/* fall through */
1927 			case '\n':
1928 				CG(zend_lineno)++;
1929 				break;
1930 			case '%':
1931 				if (!CG(asp_tags)) {
1932 					continue;
1933 				}
1934 				/* fall through */
1935 			case '?':
1936 				if (*YYCURSOR == '>') {
1937 					YYCURSOR--;
1938 					break;
1939 				}
1940 				/* fall through */
1941 			default:
1942 				continue;
1943 		}
1944 
1945 		break;
1946 	}
1947 
1948 	yyleng = YYCURSOR - SCNG(yy_text);
1949 
1950 	return T_COMMENT;
1951 }
1952 
1953 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1954 	int doc_com;
1955 
1956 	if (yyleng > 2) {
1957 		doc_com = 1;
1958 		RESET_DOC_COMMENT();
1959 	} else {
1960 		doc_com = 0;
1961 	}
1962 
1963 	while (YYCURSOR < YYLIMIT) {
1964 		if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1965 			break;
1966 		}
1967 	}
1968 
1969 	if (YYCURSOR < YYLIMIT) {
1970 		YYCURSOR++;
1971 	} else {
1972 		zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1973 	}
1974 
1975 	yyleng = YYCURSOR - SCNG(yy_text);
1976 	HANDLE_NEWLINES(yytext, yyleng);
1977 
1978 	if (doc_com) {
1979 		CG(doc_comment) = estrndup(yytext, yyleng);
1980 		CG(doc_comment_len) = yyleng;
1981 		return T_DOC_COMMENT;
1982 	}
1983 
1984 	return T_COMMENT;
1985 }
1986 
1987 <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1988 	zendlval->value.str.val = yytext; /* no copying - intentional */
1989 	zendlval->value.str.len = yyleng;
1990 	zendlval->type = IS_STRING;
1991 	BEGIN(INITIAL);
1992 	return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1993 }
1994 
1995 
1996 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1997 	if (CG(asp_tags)) {
1998 		BEGIN(INITIAL);
1999 		zendlval->value.str.len = yyleng;
2000 		zendlval->type = IS_STRING;
2001 		zendlval->value.str.val = yytext; /* no copying - intentional */
2002 		return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
2003 	} else {
2004 		yyless(1);
2005 		return yytext[0];
2006 	}
2007 }
2008 
2009 
2010 <ST_IN_SCRIPTING>b?['] {
2011 	register char *s, *t;
2012 	char *end;
2013 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
2014 
2015 	while (1) {
2016 		if (YYCURSOR < YYLIMIT) {
2017 			if (*YYCURSOR == '\'') {
2018 				YYCURSOR++;
2019 				yyleng = YYCURSOR - SCNG(yy_text);
2020 
2021 				break;
2022 			} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2023 				YYCURSOR++;
2024 			}
2025 		} else {
2026 			yyleng = YYLIMIT - SCNG(yy_text);
2027 
2028 			/* Unclosed single quotes; treat similar to double quotes, but without a separate token
2029 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2030 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
2031 			return T_ENCAPSED_AND_WHITESPACE;
2032 		}
2033 	}
2034 
2035 	zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
2036 	zendlval->value.str.len = yyleng-bprefix-2;
2037 	zendlval->type = IS_STRING;
2038 
2039 	/* convert escape sequences */
2040 	s = t = zendlval->value.str.val;
2041 	end = s+zendlval->value.str.len;
2042 	while (s<end) {
2043 		if (*s=='\\') {
2044 			s++;
2045 
2046 			switch(*s) {
2047 				case '\\':
2048 				case '\'':
2049 					*t++ = *s;
2050 					zendlval->value.str.len--;
2051 					break;
2052 				default:
2053 					*t++ = '\\';
2054 					*t++ = *s;
2055 					break;
2056 			}
2057 		} else {
2058 			*t++ = *s;
2059 		}
2060 
2061 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2062 			CG(zend_lineno)++;
2063 		}
2064 		s++;
2065 	}
2066 	*t = 0;
2067 
2068 	if (SCNG(output_filter)) {
2069 		size_t sz = 0;
2070 		s = zendlval->value.str.val;
2071 		SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
2072 		zendlval->value.str.len = sz;
2073 		efree(s);
2074 	}
2075 	return T_CONSTANT_ENCAPSED_STRING;
2076 }
2077 
2078 
2079 <ST_IN_SCRIPTING>b?["] {
2080 	int bprefix = (yytext[0] != '"') ? 1 : 0;
2081 
2082 	while (YYCURSOR < YYLIMIT) {
2083 		switch (*YYCURSOR++) {
2084 			case '"':
2085 				yyleng = YYCURSOR - SCNG(yy_text);
2086 				zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2087 				return T_CONSTANT_ENCAPSED_STRING;
2088 			case '$':
2089 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2090 					break;
2091 				}
2092 				continue;
2093 			case '{':
2094 				if (*YYCURSOR == '$') {
2095 					break;
2096 				}
2097 				continue;
2098 			case '\\':
2099 				if (YYCURSOR < YYLIMIT) {
2100 					YYCURSOR++;
2101 				}
2102 				/* fall through */
2103 			default:
2104 				continue;
2105 		}
2106 
2107 		YYCURSOR--;
2108 		break;
2109 	}
2110 
2111 	/* Remember how much was scanned to save rescanning */
2112 	SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2113 
2114 	YYCURSOR = SCNG(yy_text) + yyleng;
2115 
2116 	BEGIN(ST_DOUBLE_QUOTES);
2117 	return '"';
2118 }
2119 
2120 
2121 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2122 	char *s;
2123 	int bprefix = (yytext[0] != '<') ? 1 : 0;
2124 	zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2125 
2126 	CG(zend_lineno)++;
2127 	heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2128 	s = yytext+bprefix+3;
2129 	while ((*s == ' ') || (*s == '\t')) {
2130 		s++;
2131 		heredoc_label->length--;
2132 	}
2133 
2134 	if (*s == '\'') {
2135 		s++;
2136 		heredoc_label->length -= 2;
2137 
2138 		BEGIN(ST_NOWDOC);
2139 	} else {
2140 		if (*s == '"') {
2141 			s++;
2142 			heredoc_label->length -= 2;
2143 		}
2144 
2145 		BEGIN(ST_HEREDOC);
2146 	}
2147 
2148 	heredoc_label->label = estrndup(s, heredoc_label->length);
2149 
2150 	/* Check for ending label on the next line */
2151 	if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2152 		YYCTYPE *end = YYCURSOR + heredoc_label->length;
2153 
2154 		if (*end == ';') {
2155 			end++;
2156 		}
2157 
2158 		if (*end == '\n' || *end == '\r') {
2159 			BEGIN(ST_END_HEREDOC);
2160 		}
2161 	}
2162 
2163 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2164 
2165 	return T_START_HEREDOC;
2166 }
2167 
2168 
2169 <ST_IN_SCRIPTING>[`] {
2170 	BEGIN(ST_BACKQUOTE);
2171 	return '`';
2172 }
2173 
2174 
2175 <ST_END_HEREDOC>{ANY_CHAR} {
2176 	zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2177 
2178 	YYCURSOR += heredoc_label->length - 1;
2179 	yyleng = heredoc_label->length;
2180 
2181 	heredoc_label_dtor(heredoc_label);
2182 	efree(heredoc_label);
2183 
2184 	BEGIN(ST_IN_SCRIPTING);
2185 	return T_END_HEREDOC;
2186 }
2187 
2188 
2189 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2190 	zendlval->value.lval = (long) '{';
2191 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2192 	yyless(1);
2193 	return T_CURLY_OPEN;
2194 }
2195 
2196 
2197 <ST_DOUBLE_QUOTES>["] {
2198 	BEGIN(ST_IN_SCRIPTING);
2199 	return '"';
2200 }
2201 
2202 <ST_BACKQUOTE>[`] {
2203 	BEGIN(ST_IN_SCRIPTING);
2204 	return '`';
2205 }
2206 
2207 
2208 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2209 	if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2210 		YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2211 		SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2212 
2213 		goto double_quotes_scan_done;
2214 	}
2215 
2216 	if (YYCURSOR > YYLIMIT) {
2217 		return 0;
2218 	}
2219 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2220 		YYCURSOR++;
2221 	}
2222 
2223 	while (YYCURSOR < YYLIMIT) {
2224 		switch (*YYCURSOR++) {
2225 			case '"':
2226 				break;
2227 			case '$':
2228 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2229 					break;
2230 				}
2231 				continue;
2232 			case '{':
2233 				if (*YYCURSOR == '$') {
2234 					break;
2235 				}
2236 				continue;
2237 			case '\\':
2238 				if (YYCURSOR < YYLIMIT) {
2239 					YYCURSOR++;
2240 				}
2241 				/* fall through */
2242 			default:
2243 				continue;
2244 		}
2245 
2246 		YYCURSOR--;
2247 		break;
2248 	}
2249 
2250 double_quotes_scan_done:
2251 	yyleng = YYCURSOR - SCNG(yy_text);
2252 
2253 	zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2254 	return T_ENCAPSED_AND_WHITESPACE;
2255 }
2256 
2257 
2258 <ST_BACKQUOTE>{ANY_CHAR} {
2259 	if (YYCURSOR > YYLIMIT) {
2260 		return 0;
2261 	}
2262 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2263 		YYCURSOR++;
2264 	}
2265 
2266 	while (YYCURSOR < YYLIMIT) {
2267 		switch (*YYCURSOR++) {
2268 			case '`':
2269 				break;
2270 			case '$':
2271 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2272 					break;
2273 				}
2274 				continue;
2275 			case '{':
2276 				if (*YYCURSOR == '$') {
2277 					break;
2278 				}
2279 				continue;
2280 			case '\\':
2281 				if (YYCURSOR < YYLIMIT) {
2282 					YYCURSOR++;
2283 				}
2284 				/* fall through */
2285 			default:
2286 				continue;
2287 		}
2288 
2289 		YYCURSOR--;
2290 		break;
2291 	}
2292 
2293 	yyleng = YYCURSOR - SCNG(yy_text);
2294 
2295 	zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2296 	return T_ENCAPSED_AND_WHITESPACE;
2297 }
2298 
2299 
2300 <ST_HEREDOC>{ANY_CHAR} {
2301 	int newline = 0;
2302 
2303 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2304 
2305 	if (YYCURSOR > YYLIMIT) {
2306 		return 0;
2307 	}
2308 
2309 	YYCURSOR--;
2310 
2311 	while (YYCURSOR < YYLIMIT) {
2312 		switch (*YYCURSOR++) {
2313 			case '\r':
2314 				if (*YYCURSOR == '\n') {
2315 					YYCURSOR++;
2316 				}
2317 				/* fall through */
2318 			case '\n':
2319 				/* Check for ending label on the next line */
2320 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2321 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2322 
2323 					if (*end == ';') {
2324 						end++;
2325 					}
2326 
2327 					if (*end == '\n' || *end == '\r') {
2328 						/* newline before label will be subtracted from returned text, but
2329 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2330 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2331 							newline = 2; /* Windows newline */
2332 						} else {
2333 							newline = 1;
2334 						}
2335 
2336 						CG(increment_lineno) = 1; /* For newline before label */
2337 						BEGIN(ST_END_HEREDOC);
2338 
2339 						goto heredoc_scan_done;
2340 					}
2341 				}
2342 				continue;
2343 			case '$':
2344 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2345 					break;
2346 				}
2347 				continue;
2348 			case '{':
2349 				if (*YYCURSOR == '$') {
2350 					break;
2351 				}
2352 				continue;
2353 			case '\\':
2354 				if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2355 					YYCURSOR++;
2356 				}
2357 				/* fall through */
2358 			default:
2359 				continue;
2360 		}
2361 
2362 		YYCURSOR--;
2363 		break;
2364 	}
2365 
2366 heredoc_scan_done:
2367 	yyleng = YYCURSOR - SCNG(yy_text);
2368 
2369 	zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2370 	return T_ENCAPSED_AND_WHITESPACE;
2371 }
2372 
2373 
2374 <ST_NOWDOC>{ANY_CHAR} {
2375 	int newline = 0;
2376 
2377 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2378 
2379 	if (YYCURSOR > YYLIMIT) {
2380 		return 0;
2381 	}
2382 
2383 	YYCURSOR--;
2384 
2385 	while (YYCURSOR < YYLIMIT) {
2386 		switch (*YYCURSOR++) {
2387 			case '\r':
2388 				if (*YYCURSOR == '\n') {
2389 					YYCURSOR++;
2390 				}
2391 				/* fall through */
2392 			case '\n':
2393 				/* Check for ending label on the next line */
2394 				if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2395 					YYCTYPE *end = YYCURSOR + heredoc_label->length;
2396 
2397 					if (*end == ';') {
2398 						end++;
2399 					}
2400 
2401 					if (*end == '\n' || *end == '\r') {
2402 						/* newline before label will be subtracted from returned text, but
2403 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2404 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2405 							newline = 2; /* Windows newline */
2406 						} else {
2407 							newline = 1;
2408 						}
2409 
2410 						CG(increment_lineno) = 1; /* For newline before label */
2411 						BEGIN(ST_END_HEREDOC);
2412 
2413 						goto nowdoc_scan_done;
2414 					}
2415 				}
2416 				/* fall through */
2417 			default:
2418 				continue;
2419 		}
2420 	}
2421 
2422 nowdoc_scan_done:
2423 	yyleng = YYCURSOR - SCNG(yy_text);
2424 
2425 	zend_copy_value(zendlval, yytext, yyleng - newline);
2426 	zendlval->type = IS_STRING;
2427 	HANDLE_NEWLINES(yytext, yyleng - newline);
2428 	return T_ENCAPSED_AND_WHITESPACE;
2429 }
2430 
2431 
2432 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2433 	if (YYCURSOR > YYLIMIT) {
2434 		return 0;
2435 	}
2436 
2437 	zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2438 	goto restart;
2439 }
2440 
2441 */
2442 }
2443