xref: /PHP-5.4/Zend/zend_language_scanner.l (revision c0d060f5)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Marcus Boerger <helly@php.net>                              |
16    |          Nuno Lopes <nlopess@php.net>                                |
17    |          Scott MacVicar <scottmac@php.net>                           |
18    | Flex version authors:                                                |
19    |          Andi Gutmans <andi@zend.com>                                |
20    |          Zeev Suraski <zeev@zend.com>                                |
21    +----------------------------------------------------------------------+
22 */
23 
24 /* $Id$ */
25 
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31 
32 #include "zend_language_scanner_defs.h"
33 
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef PHP_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "tsrm_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52 
53 #define YYCTYPE   unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR  SCNG(yy_cursor)
56 #define YYLIMIT   SCNG(yy_limit)
57 #define YYMARKER  SCNG(yy_marker)
58 
59 #define YYGETCONDITION()  SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61 
62 #define STATE(name)  yyc##name
63 
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE      YYGETCONDITION()
67 #define yytext       ((char*)SCNG(yy_text))
68 #define yyleng       SCNG(yy_leng)
69 #define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                           yyleng   = (unsigned int)x; } while(0)
71 #define yymore()     goto yymore_restart
72 
73 /* perform sanity check. If this message is triggered you should
74    increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79 
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83 
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87 
88 /* Globals Macros */
89 #define SCNG	LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95 
96 #define HANDLE_NEWLINES(s, l)													\
97 do {																			\
98 	char *p = (s), *boundary = p+(l);											\
99 																				\
100 	while (p<boundary) {														\
101 		if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {					\
102 			CG(zend_lineno)++;													\
103 		}																		\
104 		p++;																	\
105 	}																			\
106 } while (0)
107 
108 #define HANDLE_NEWLINE(c) \
109 { \
110 	if (c == '\n' || c == '\r') { \
111 		CG(zend_lineno)++; \
112 	} \
113 }
114 
115 /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    CG(doc_comment_len)
118 
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120 
121 #define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123 
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125 
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127 {
128 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129 	assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131 }
132 
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134 {
135 	return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136 }
137 
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139 {
140 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142 }
143 
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145 {
146 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147 	assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148 	return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150 }
151 
152 
_yy_push_state(int new_state TSRMLS_DC)153 static void _yy_push_state(int new_state TSRMLS_DC)
154 {
155 	zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156 	YYSETCONDITION(new_state);
157 }
158 
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160 
yy_pop_state(TSRMLS_D)161 static void yy_pop_state(TSRMLS_D)
162 {
163 	int *stack_state;
164 	zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165 	YYSETCONDITION(*stack_state);
166 	zend_stack_del_top(&SCNG(state_stack));
167 }
168 
yy_scan_buffer(char * str,unsigned int len TSRMLS_DC)169 static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170 {
171 	YYCURSOR       = (YYCTYPE*)str;
172 	YYLIMIT        = YYCURSOR + len;
173 	if (!SCNG(yy_start)) {
174 		SCNG(yy_start) = YYCURSOR;
175 	}
176 }
177 
startup_scanner(TSRMLS_D)178 void startup_scanner(TSRMLS_D)
179 {
180 	CG(parse_error) = 0;
181 	CG(heredoc) = NULL;
182 	CG(heredoc_len) = 0;
183 	CG(doc_comment) = NULL;
184 	CG(doc_comment_len) = 0;
185 	zend_stack_init(&SCNG(state_stack));
186 }
187 
shutdown_scanner(TSRMLS_D)188 void shutdown_scanner(TSRMLS_D)
189 {
190 	if (CG(heredoc)) {
191 		efree(CG(heredoc));
192 		CG(heredoc_len)=0;
193 	}
194 	CG(parse_error) = 0;
195 	zend_stack_destroy(&SCNG(state_stack));
196 	RESET_DOC_COMMENT();
197 }
198 
zend_save_lexical_state(zend_lex_state * lex_state TSRMLS_DC)199 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
200 {
201 	lex_state->yy_leng   = SCNG(yy_leng);
202 	lex_state->yy_start  = SCNG(yy_start);
203 	lex_state->yy_text   = SCNG(yy_text);
204 	lex_state->yy_cursor = SCNG(yy_cursor);
205 	lex_state->yy_marker = SCNG(yy_marker);
206 	lex_state->yy_limit  = SCNG(yy_limit);
207 
208 	lex_state->state_stack = SCNG(state_stack);
209 	zend_stack_init(&SCNG(state_stack));
210 
211 	lex_state->in = SCNG(yy_in);
212 	lex_state->yy_state = YYSTATE;
213 	lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
214 	lex_state->lineno = CG(zend_lineno);
215 
216 	lex_state->script_org = SCNG(script_org);
217 	lex_state->script_org_size = SCNG(script_org_size);
218 	lex_state->script_filtered = SCNG(script_filtered);
219 	lex_state->script_filtered_size = SCNG(script_filtered_size);
220 	lex_state->input_filter = SCNG(input_filter);
221 	lex_state->output_filter = SCNG(output_filter);
222 	lex_state->script_encoding = SCNG(script_encoding);
223 }
224 
zend_restore_lexical_state(zend_lex_state * lex_state TSRMLS_DC)225 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
226 {
227 	SCNG(yy_leng)   = lex_state->yy_leng;
228 	SCNG(yy_start)  = lex_state->yy_start;
229 	SCNG(yy_text)   = lex_state->yy_text;
230 	SCNG(yy_cursor) = lex_state->yy_cursor;
231 	SCNG(yy_marker) = lex_state->yy_marker;
232 	SCNG(yy_limit)  = lex_state->yy_limit;
233 
234 	zend_stack_destroy(&SCNG(state_stack));
235 	SCNG(state_stack) = lex_state->state_stack;
236 
237 	SCNG(yy_in) = lex_state->in;
238 	YYSETCONDITION(lex_state->yy_state);
239 	CG(zend_lineno) = lex_state->lineno;
240 	zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
241 
242 	if (SCNG(script_filtered)) {
243 		efree(SCNG(script_filtered));
244 		SCNG(script_filtered) = NULL;
245 	}
246 	SCNG(script_org) = lex_state->script_org;
247 	SCNG(script_org_size) = lex_state->script_org_size;
248 	SCNG(script_filtered) = lex_state->script_filtered;
249 	SCNG(script_filtered_size) = lex_state->script_filtered_size;
250 	SCNG(input_filter) = lex_state->input_filter;
251 	SCNG(output_filter) = lex_state->output_filter;
252 	SCNG(script_encoding) = lex_state->script_encoding;
253 
254 	if (CG(heredoc)) {
255 		efree(CG(heredoc));
256 		CG(heredoc) = NULL;
257 		CG(heredoc_len) = 0;
258 	}
259 	RESET_DOC_COMMENT();
260 }
261 
zend_destroy_file_handle(zend_file_handle * file_handle TSRMLS_DC)262 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
263 {
264 	zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
265 	/* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
266 	file_handle->opened_path = NULL;
267 	if (file_handle->free_filename) {
268 		file_handle->filename = NULL;
269 	}
270 }
271 
272 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
273 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
274 #define	BOM_UTF16_BE	"\xfe\xff"
275 #define	BOM_UTF16_LE	"\xff\xfe"
276 #define	BOM_UTF8		"\xef\xbb\xbf"
277 
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size TSRMLS_DC)278 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
279 {
280 	const unsigned char *p;
281 	int wchar_size = 2;
282 	int le = 0;
283 
284 	/* utf-16 or utf-32? */
285 	p = script;
286 	while ((p-script) < script_size) {
287 		p = memchr(p, 0, script_size-(p-script)-2);
288 		if (!p) {
289 			break;
290 		}
291 		if (*(p+1) == '\0' && *(p+2) == '\0') {
292 			wchar_size = 4;
293 			break;
294 		}
295 
296 		/* searching for UTF-32 specific byte orders, so this will do */
297 		p += 4;
298 	}
299 
300 	/* BE or LE? */
301 	p = script;
302 	while ((p-script) < script_size) {
303 		if (*p == '\0' && *(p+wchar_size-1) != '\0') {
304 			/* BE */
305 			le = 0;
306 			break;
307 		} else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
308 			/* LE* */
309 			le = 1;
310 			break;
311 		}
312 		p += wchar_size;
313 	}
314 
315 	if (wchar_size == 2) {
316 		return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
317 	} else {
318 		return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
319 	}
320 
321 	return NULL;
322 }
323 
zend_multibyte_detect_unicode(TSRMLS_D)324 static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
325 {
326 	const zend_encoding *script_encoding = NULL;
327 	int bom_size;
328 	unsigned char *pos1, *pos2;
329 
330 	if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
331 		return NULL;
332 	}
333 
334 	/* check out BOM */
335 	if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
336 		script_encoding = zend_multibyte_encoding_utf32be;
337 		bom_size = sizeof(BOM_UTF32_BE)-1;
338 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
339 		script_encoding = zend_multibyte_encoding_utf32le;
340 		bom_size = sizeof(BOM_UTF32_LE)-1;
341 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
342 		script_encoding = zend_multibyte_encoding_utf16be;
343 		bom_size = sizeof(BOM_UTF16_BE)-1;
344 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
345 		script_encoding = zend_multibyte_encoding_utf16le;
346 		bom_size = sizeof(BOM_UTF16_LE)-1;
347 	} else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
348 		script_encoding = zend_multibyte_encoding_utf8;
349 		bom_size = sizeof(BOM_UTF8)-1;
350 	}
351 
352 	if (script_encoding) {
353 		/* remove BOM */
354 		LANG_SCNG(script_org) += bom_size;
355 		LANG_SCNG(script_org_size) -= bom_size;
356 
357 		return script_encoding;
358 	}
359 
360 	/* script contains NULL bytes -> auto-detection */
361 	if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
362 		/* check if the NULL byte is after the __HALT_COMPILER(); */
363 		pos2 = LANG_SCNG(script_org);
364 
365 		while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
366 			pos2 = memchr(pos2, '_', pos1 - pos2);
367 			if (!pos2) break;
368 			pos2++;
369 			if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
370 				pos2 += sizeof("_HALT_COMPILER")-1;
371 				while (*pos2 == ' '  ||
372 					   *pos2 == '\t' ||
373 					   *pos2 == '\r' ||
374 					   *pos2 == '\n') {
375 					pos2++;
376 				}
377 				if (*pos2 == '(') {
378 					pos2++;
379 					while (*pos2 == ' '  ||
380 						   *pos2 == '\t' ||
381 						   *pos2 == '\r' ||
382 						   *pos2 == '\n') {
383 						pos2++;
384 					}
385 					if (*pos2 == ')') {
386 						pos2++;
387 						while (*pos2 == ' '  ||
388 							   *pos2 == '\t' ||
389 							   *pos2 == '\r' ||
390 							   *pos2 == '\n') {
391 							pos2++;
392 						}
393 						if (*pos2 == ';') {
394 							return NULL;
395 						}
396 					}
397 				}
398 			}
399 		}
400 		/* make best effort if BOM is missing */
401 		return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
402 	}
403 
404 	return NULL;
405 }
406 
zend_multibyte_find_script_encoding(TSRMLS_D)407 static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
408 {
409 	const zend_encoding *script_encoding;
410 
411 	if (CG(detect_unicode)) {
412 		/* check out bom(byte order mark) and see if containing wchars */
413 		script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
414 		if (script_encoding != NULL) {
415 			/* bom or wchar detection is prior to 'script_encoding' option */
416 			return script_encoding;
417 		}
418 	}
419 
420 	/* if no script_encoding specified, just leave alone */
421 	if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
422 		return NULL;
423 	}
424 
425 	/* if multiple encodings specified, detect automagically */
426 	if (CG(script_encoding_list_size) > 1) {
427 		return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
428 	}
429 
430 	return CG(script_encoding_list)[0];
431 }
432 
zend_multibyte_set_filter(const zend_encoding * onetime_encoding TSRMLS_DC)433 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
434 {
435 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
436 	const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
437 
438 	if (!script_encoding) {
439 		return FAILURE;
440 	}
441 
442 	/* judge input/output filter */
443 	LANG_SCNG(script_encoding) = script_encoding;
444 	LANG_SCNG(input_filter) = NULL;
445 	LANG_SCNG(output_filter) = NULL;
446 
447 	if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
448 		if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
449 			/* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
450 			LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
451 			LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
452 		} else {
453 			LANG_SCNG(input_filter) = NULL;
454 			LANG_SCNG(output_filter) = NULL;
455 		}
456 		return SUCCESS;
457 	}
458 
459 	if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
460 		LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
461 		LANG_SCNG(output_filter) = NULL;
462 	} else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
463 		LANG_SCNG(input_filter) = NULL;
464 		LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
465 	} else {
466 		/* both script and internal encodings are incompatible w/ flex */
467 		LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
468 		LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
469 	}
470 
471 	return 0;
472 }
473 
open_file_for_scanning(zend_file_handle * file_handle TSRMLS_DC)474 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
475 {
476 	const char *file_path = NULL;
477 	char *buf;
478 	size_t size, offset = 0;
479 
480 	/* The shebang line was read, get the current position to obtain the buffer start */
481 	if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
482 		if ((offset = ftell(file_handle->handle.fp)) == -1) {
483 			offset = 0;
484 		}
485 	}
486 
487 	if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
488 		return FAILURE;
489 	}
490 
491 	zend_llist_add_element(&CG(open_files), file_handle);
492 	if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
493 		zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
494 		size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
495 		fh->handle.stream.handle = (void*)(((char*)fh) + diff);
496 		file_handle->handle.stream.handle = fh->handle.stream.handle;
497 	}
498 
499 	/* Reset the scanner for scanning the new file */
500 	SCNG(yy_in) = file_handle;
501 	SCNG(yy_start) = NULL;
502 
503 	if (size != -1) {
504 		if (CG(multibyte)) {
505 			SCNG(script_org) = (unsigned char*)buf;
506 			SCNG(script_org_size) = size;
507 			SCNG(script_filtered) = NULL;
508 
509 			zend_multibyte_set_filter(NULL TSRMLS_CC);
510 
511 			if (SCNG(input_filter)) {
512 				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
513 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
514 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
515 				}
516 				buf = (char*)SCNG(script_filtered);
517 				size = SCNG(script_filtered_size);
518 			}
519 		}
520 		SCNG(yy_start) = (unsigned char *)buf - offset;
521 		yy_scan_buffer(buf, size TSRMLS_CC);
522 	} else {
523 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
524 	}
525 
526 	BEGIN(INITIAL);
527 
528 	if (file_handle->opened_path) {
529 		file_path = file_handle->opened_path;
530 	} else {
531 		file_path = file_handle->filename;
532 	}
533 
534 	zend_set_compiled_filename(file_path TSRMLS_CC);
535 
536 	if (CG(start_lineno)) {
537 		CG(zend_lineno) = CG(start_lineno);
538 		CG(start_lineno) = 0;
539 	} else {
540 		CG(zend_lineno) = 1;
541 	}
542 
543 	RESET_DOC_COMMENT();
544 	CG(increment_lineno) = 0;
545 	return SUCCESS;
546 }
END_EXTERN_C()547 END_EXTERN_C()
548 
549 
550 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
551 {
552 	zend_lex_state original_lex_state;
553 	zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
554 	zend_op_array *original_active_op_array = CG(active_op_array);
555 	zend_op_array *retval=NULL;
556 	int compiler_result;
557 	zend_bool compilation_successful=0;
558 	znode retval_znode;
559 	zend_bool original_in_compilation = CG(in_compilation);
560 
561 	retval_znode.op_type = IS_CONST;
562 	retval_znode.u.constant.type = IS_LONG;
563 	retval_znode.u.constant.value.lval = 1;
564 	Z_UNSET_ISREF(retval_znode.u.constant);
565 	Z_SET_REFCOUNT(retval_znode.u.constant, 1);
566 
567 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
568 
569 	retval = op_array; /* success oriented */
570 
571 	if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
572 		if (type==ZEND_REQUIRE) {
573 			zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
574 			zend_bailout();
575 		} else {
576 			zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
577 		}
578 		compilation_successful=0;
579 	} else {
580 		init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
581 		CG(in_compilation) = 1;
582 		CG(active_op_array) = op_array;
583 		zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
584 		zend_init_compiler_context(TSRMLS_C);
585 		compiler_result = zendparse(TSRMLS_C);
586 		zend_do_return(&retval_znode, 0 TSRMLS_CC);
587 		CG(in_compilation) = original_in_compilation;
588 		if (compiler_result != 0) { /* parser error */
589 			zend_bailout();
590 		}
591 		compilation_successful=1;
592 	}
593 
594 	if (retval) {
595 		CG(active_op_array) = original_active_op_array;
596 		if (compilation_successful) {
597 			pass_two(op_array TSRMLS_CC);
598 			zend_release_labels(0 TSRMLS_CC);
599 		} else {
600 			efree(op_array);
601 			retval = NULL;
602 		}
603 	}
604 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
605 	return retval;
606 }
607 
608 
compile_filename(int type,zval * filename TSRMLS_DC)609 zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
610 {
611 	zend_file_handle file_handle;
612 	zval tmp;
613 	zend_op_array *retval;
614 	char *opened_path = NULL;
615 
616 	if (filename->type != IS_STRING) {
617 		tmp = *filename;
618 		zval_copy_ctor(&tmp);
619 		convert_to_string(&tmp);
620 		filename = &tmp;
621 	}
622 	file_handle.filename = filename->value.str.val;
623 	file_handle.free_filename = 0;
624 	file_handle.type = ZEND_HANDLE_FILENAME;
625 	file_handle.opened_path = NULL;
626 	file_handle.handle.fp = NULL;
627 
628 	retval = zend_compile_file(&file_handle, type TSRMLS_CC);
629 	if (retval && file_handle.handle.stream.handle) {
630 		int dummy = 1;
631 
632 		if (!file_handle.opened_path) {
633 			file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
634 		}
635 
636 		zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
637 
638 		if (opened_path) {
639 			efree(opened_path);
640 		}
641 	}
642 	zend_destroy_file_handle(&file_handle TSRMLS_CC);
643 
644 	if (filename==&tmp) {
645 		zval_dtor(&tmp);
646 	}
647 	return retval;
648 }
649 
zend_prepare_string_for_scanning(zval * str,char * filename TSRMLS_DC)650 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
651 {
652 	char *buf;
653 	size_t size;
654 
655 	/* enforce two trailing NULLs for flex... */
656 	if (IS_INTERNED(str->value.str.val)) {
657 		char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
658 		memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
659 		str->value.str.val = tmp;
660 	} else {
661 		str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
662 	}
663 
664 	memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
665 
666 	SCNG(yy_in) = NULL;
667 	SCNG(yy_start) = NULL;
668 
669 	buf = str->value.str.val;
670 	size = str->value.str.len;
671 
672 	if (CG(multibyte)) {
673 		SCNG(script_org) = (unsigned char*)buf;
674 		SCNG(script_org_size) = size;
675 		SCNG(script_filtered) = NULL;
676 
677 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
678 
679 		if (SCNG(input_filter)) {
680 			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
681 				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
682 						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
683 			}
684 			buf = (char*)SCNG(script_filtered);
685 			size = SCNG(script_filtered_size);
686 		}
687 	}
688 
689 	yy_scan_buffer(buf, size TSRMLS_CC);
690 
691 	zend_set_compiled_filename(filename TSRMLS_CC);
692 	CG(zend_lineno) = 1;
693 	CG(increment_lineno) = 0;
694 	RESET_DOC_COMMENT();
695 	return SUCCESS;
696 }
697 
698 
zend_get_scanned_file_offset(TSRMLS_D)699 ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
700 {
701 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
702 	if (SCNG(input_filter)) {
703 		size_t original_offset = offset, length = 0;
704 		do {
705 			unsigned char *p = NULL;
706 			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
707 				return (size_t)-1;
708 			}
709 			efree(p);
710 			if (length > original_offset) {
711 				offset--;
712 			} else if (length < original_offset) {
713 				offset++;
714 			}
715 		} while (original_offset != length);
716 	}
717 	return offset;
718 }
719 
720 
compile_string(zval * source_string,char * filename TSRMLS_DC)721 zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
722 {
723 	zend_lex_state original_lex_state;
724 	zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
725 	zend_op_array *original_active_op_array = CG(active_op_array);
726 	zend_op_array *retval;
727 	zval tmp;
728 	int compiler_result;
729 	zend_bool original_in_compilation = CG(in_compilation);
730 
731 	if (source_string->value.str.len==0) {
732 		efree(op_array);
733 		return NULL;
734 	}
735 
736 	CG(in_compilation) = 1;
737 
738 	tmp = *source_string;
739 	zval_copy_ctor(&tmp);
740 	convert_to_string(&tmp);
741 	source_string = &tmp;
742 
743 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
744 	if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
745 		efree(op_array);
746 		retval = NULL;
747 	} else {
748 		zend_bool orig_interactive = CG(interactive);
749 
750 		CG(interactive) = 0;
751 		init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
752 		CG(interactive) = orig_interactive;
753 		CG(active_op_array) = op_array;
754 		zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
755 		zend_init_compiler_context(TSRMLS_C);
756 		BEGIN(ST_IN_SCRIPTING);
757 		compiler_result = zendparse(TSRMLS_C);
758 
759 		if (SCNG(script_filtered)) {
760 			efree(SCNG(script_filtered));
761 			SCNG(script_filtered) = NULL;
762 		}
763 
764 		if (compiler_result != 0) {
765 			CG(active_op_array) = original_active_op_array;
766 			CG(unclean_shutdown)=1;
767 			destroy_op_array(op_array TSRMLS_CC);
768 			efree(op_array);
769 			retval = NULL;
770 		} else {
771 			zend_do_return(NULL, 0 TSRMLS_CC);
772 			CG(active_op_array) = original_active_op_array;
773 			pass_two(op_array TSRMLS_CC);
774 			zend_release_labels(0 TSRMLS_CC);
775 			retval = op_array;
776 		}
777 	}
778 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
779 	zval_dtor(&tmp);
780 	CG(in_compilation) = original_in_compilation;
781 	return retval;
782 }
783 
784 
BEGIN_EXTERN_C()785 BEGIN_EXTERN_C()
786 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
787 {
788 	zend_lex_state original_lex_state;
789 	zend_file_handle file_handle;
790 
791 	file_handle.type = ZEND_HANDLE_FILENAME;
792 	file_handle.filename = filename;
793 	file_handle.free_filename = 0;
794 	file_handle.opened_path = NULL;
795 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
796 	if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
797 		zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
798 		zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
799 		return FAILURE;
800 	}
801 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
802 	if (SCNG(script_filtered)) {
803 		efree(SCNG(script_filtered));
804 		SCNG(script_filtered) = NULL;
805 	}
806 	zend_destroy_file_handle(&file_handle TSRMLS_CC);
807 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
808 	return SUCCESS;
809 }
810 
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name TSRMLS_DC)811 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
812 {
813 	zend_lex_state original_lex_state;
814 	zval tmp = *str;
815 
816 	str = &tmp;
817 	zval_copy_ctor(str);
818 	zend_save_lexical_state(&original_lex_state TSRMLS_CC);
819 	if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
820 		zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
821 		return FAILURE;
822 	}
823 	BEGIN(INITIAL);
824 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
825 	if (SCNG(script_filtered)) {
826 		efree(SCNG(script_filtered));
827 		SCNG(script_filtered) = NULL;
828 	}
829 	zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
830 	zval_dtor(str);
831 	return SUCCESS;
832 }
833 
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding TSRMLS_DC)834 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
835 {
836 	size_t length;
837 	unsigned char *new_yy_start;
838 
839 	/* convert and set */
840 	if (!SCNG(input_filter)) {
841 		if (SCNG(script_filtered)) {
842 			efree(SCNG(script_filtered));
843 			SCNG(script_filtered) = NULL;
844 		}
845 		SCNG(script_filtered_size) = 0;
846 		length = SCNG(script_org_size);
847 		new_yy_start = SCNG(script_org);
848 	} else {
849 		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
850 			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
851 					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
852 		}
853 		SCNG(script_filtered) = new_yy_start;
854 		SCNG(script_filtered_size) = length;
855 	}
856 
857 	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
858 	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
859 	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
860 	SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
861 
862 	SCNG(yy_start) = new_yy_start;
863 }
864 
865 
866 # define zend_copy_value(zendlval, yytext, yyleng) \
867 	if (SCNG(output_filter)) { \
868 		size_t sz = 0; \
869 		SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
870 		zendlval->value.str.len = sz; \
871 	} else { \
872 		zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
873 		zendlval->value.str.len = yyleng; \
874 	}
875 
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type TSRMLS_DC)876 static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
877 {
878 	register char *s, *t;
879 	char *end;
880 
881 	ZVAL_STRINGL(zendlval, str, len, 1);
882 
883 	/* convert escape sequences */
884 	s = t = zendlval->value.str.val;
885 	end = s+zendlval->value.str.len;
886 	while (s<end) {
887 		if (*s=='\\') {
888 			s++;
889 			if (s >= end) {
890 				*t++ = '\\';
891 				break;
892 			}
893 
894 			switch(*s) {
895 				case 'n':
896 					*t++ = '\n';
897 					zendlval->value.str.len--;
898 					break;
899 				case 'r':
900 					*t++ = '\r';
901 					zendlval->value.str.len--;
902 					break;
903 				case 't':
904 					*t++ = '\t';
905 					zendlval->value.str.len--;
906 					break;
907 				case 'f':
908 					*t++ = '\f';
909 					zendlval->value.str.len--;
910 					break;
911 				case 'v':
912 					*t++ = '\v';
913 					zendlval->value.str.len--;
914 					break;
915 				case 'e':
916 #ifdef PHP_WIN32
917 					*t++ = VK_ESCAPE;
918 #else
919 					*t++ = '\e';
920 #endif
921 					zendlval->value.str.len--;
922 					break;
923 				case '"':
924 				case '`':
925 					if (*s != quote_type) {
926 						*t++ = '\\';
927 						*t++ = *s;
928 						break;
929 					}
930 				case '\\':
931 				case '$':
932 					*t++ = *s;
933 					zendlval->value.str.len--;
934 					break;
935 				case 'x':
936 				case 'X':
937 					if (ZEND_IS_HEX(*(s+1))) {
938 						char hex_buf[3] = { 0, 0, 0 };
939 
940 						zendlval->value.str.len--; /* for the 'x' */
941 
942 						hex_buf[0] = *(++s);
943 						zendlval->value.str.len--;
944 						if (ZEND_IS_HEX(*(s+1))) {
945 							hex_buf[1] = *(++s);
946 							zendlval->value.str.len--;
947 						}
948 						*t++ = (char) strtol(hex_buf, NULL, 16);
949 					} else {
950 						*t++ = '\\';
951 						*t++ = *s;
952 					}
953 					break;
954 				default:
955 					/* check for an octal */
956 					if (ZEND_IS_OCT(*s)) {
957 						char octal_buf[4] = { 0, 0, 0, 0 };
958 
959 						octal_buf[0] = *s;
960 						zendlval->value.str.len--;
961 						if (ZEND_IS_OCT(*(s+1))) {
962 							octal_buf[1] = *(++s);
963 							zendlval->value.str.len--;
964 							if (ZEND_IS_OCT(*(s+1))) {
965 								octal_buf[2] = *(++s);
966 								zendlval->value.str.len--;
967 							}
968 						}
969 						*t++ = (char) strtol(octal_buf, NULL, 8);
970 					} else {
971 						*t++ = '\\';
972 						*t++ = *s;
973 					}
974 					break;
975 			}
976 		} else {
977 			*t++ = *s;
978 		}
979 
980 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
981 			CG(zend_lineno)++;
982 		}
983 		s++;
984 	}
985 	*t = 0;
986 	if (SCNG(output_filter)) {
987 		size_t sz = 0;
988 		s = zendlval->value.str.val;
989 		SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
990 		zendlval->value.str.len = sz;
991 		efree(s);
992 	}
993 }
994 
995 
lex_scan(zval * zendlval TSRMLS_DC)996 int lex_scan(zval *zendlval TSRMLS_DC)
997 {
998 restart:
999 	SCNG(yy_text) = YYCURSOR;
1000 
1001 yymore_restart:
1002 
1003 /*!re2c
1004 re2c:yyfill:check = 0;
1005 LNUM	[0-9]+
1006 DNUM	([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1007 EXPONENT_DNUM	(({LNUM}|{DNUM})[eE][+-]?{LNUM})
1008 HNUM	"0x"[0-9a-fA-F]+
1009 BNUM	"0b"[01]+
1010 LABEL	[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1011 WHITESPACE [ \n\r\t]+
1012 TABS_AND_SPACES [ \t]*
1013 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1014 ANY_CHAR [^]
1015 NEWLINE ("\r"|"\n"|"\r\n")
1016 
1017 /* compute yyleng before each rule */
1018 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1019 
1020 
1021 <ST_IN_SCRIPTING>"exit" {
1022 	return T_EXIT;
1023 }
1024 
1025 <ST_IN_SCRIPTING>"die" {
1026 	return T_EXIT;
1027 }
1028 
1029 <ST_IN_SCRIPTING>"function" {
1030 	return T_FUNCTION;
1031 }
1032 
1033 <ST_IN_SCRIPTING>"const" {
1034 	return T_CONST;
1035 }
1036 
1037 <ST_IN_SCRIPTING>"return" {
1038 	return T_RETURN;
1039 }
1040 
1041 <ST_IN_SCRIPTING>"try" {
1042 	return T_TRY;
1043 }
1044 
1045 <ST_IN_SCRIPTING>"catch" {
1046 	return T_CATCH;
1047 }
1048 
1049 <ST_IN_SCRIPTING>"throw" {
1050 	return T_THROW;
1051 }
1052 
1053 <ST_IN_SCRIPTING>"if" {
1054 	return T_IF;
1055 }
1056 
1057 <ST_IN_SCRIPTING>"elseif" {
1058 	return T_ELSEIF;
1059 }
1060 
1061 <ST_IN_SCRIPTING>"endif" {
1062 	return T_ENDIF;
1063 }
1064 
1065 <ST_IN_SCRIPTING>"else" {
1066 	return T_ELSE;
1067 }
1068 
1069 <ST_IN_SCRIPTING>"while" {
1070 	return T_WHILE;
1071 }
1072 
1073 <ST_IN_SCRIPTING>"endwhile" {
1074 	return T_ENDWHILE;
1075 }
1076 
1077 <ST_IN_SCRIPTING>"do" {
1078 	return T_DO;
1079 }
1080 
1081 <ST_IN_SCRIPTING>"for" {
1082 	return T_FOR;
1083 }
1084 
1085 <ST_IN_SCRIPTING>"endfor" {
1086 	return T_ENDFOR;
1087 }
1088 
1089 <ST_IN_SCRIPTING>"foreach" {
1090 	return T_FOREACH;
1091 }
1092 
1093 <ST_IN_SCRIPTING>"endforeach" {
1094 	return T_ENDFOREACH;
1095 }
1096 
1097 <ST_IN_SCRIPTING>"declare" {
1098 	return T_DECLARE;
1099 }
1100 
1101 <ST_IN_SCRIPTING>"enddeclare" {
1102 	return T_ENDDECLARE;
1103 }
1104 
1105 <ST_IN_SCRIPTING>"instanceof" {
1106 	return T_INSTANCEOF;
1107 }
1108 
1109 <ST_IN_SCRIPTING>"as" {
1110 	return T_AS;
1111 }
1112 
1113 <ST_IN_SCRIPTING>"switch" {
1114 	return T_SWITCH;
1115 }
1116 
1117 <ST_IN_SCRIPTING>"endswitch" {
1118 	return T_ENDSWITCH;
1119 }
1120 
1121 <ST_IN_SCRIPTING>"case" {
1122 	return T_CASE;
1123 }
1124 
1125 <ST_IN_SCRIPTING>"default" {
1126 	return T_DEFAULT;
1127 }
1128 
1129 <ST_IN_SCRIPTING>"break" {
1130 	return T_BREAK;
1131 }
1132 
1133 <ST_IN_SCRIPTING>"continue" {
1134 	return T_CONTINUE;
1135 }
1136 
1137 <ST_IN_SCRIPTING>"goto" {
1138 	return T_GOTO;
1139 }
1140 
1141 <ST_IN_SCRIPTING>"echo" {
1142 	return T_ECHO;
1143 }
1144 
1145 <ST_IN_SCRIPTING>"print" {
1146 	return T_PRINT;
1147 }
1148 
1149 <ST_IN_SCRIPTING>"class" {
1150 	return T_CLASS;
1151 }
1152 
1153 <ST_IN_SCRIPTING>"interface" {
1154 	return T_INTERFACE;
1155 }
1156 
1157 <ST_IN_SCRIPTING>"trait" {
1158 	return T_TRAIT;
1159 }
1160 
1161 <ST_IN_SCRIPTING>"extends" {
1162 	return T_EXTENDS;
1163 }
1164 
1165 <ST_IN_SCRIPTING>"implements" {
1166 	return T_IMPLEMENTS;
1167 }
1168 
1169 <ST_IN_SCRIPTING>"->" {
1170 	yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1171 	return T_OBJECT_OPERATOR;
1172 }
1173 
1174 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1175 	zendlval->value.str.val = yytext; /* no copying - intentional */
1176 	zendlval->value.str.len = yyleng;
1177 	zendlval->type = IS_STRING;
1178 	HANDLE_NEWLINES(yytext, yyleng);
1179 	return T_WHITESPACE;
1180 }
1181 
1182 <ST_LOOKING_FOR_PROPERTY>"->" {
1183 	return T_OBJECT_OPERATOR;
1184 }
1185 
1186 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1187 	yy_pop_state(TSRMLS_C);
1188 	zend_copy_value(zendlval, yytext, yyleng);
1189 	zendlval->type = IS_STRING;
1190 	return T_STRING;
1191 }
1192 
1193 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1194 	yyless(0);
1195 	yy_pop_state(TSRMLS_C);
1196 	goto restart;
1197 }
1198 
1199 <ST_IN_SCRIPTING>"::" {
1200 	return T_PAAMAYIM_NEKUDOTAYIM;
1201 }
1202 
1203 <ST_IN_SCRIPTING>"\\" {
1204 	return T_NS_SEPARATOR;
1205 }
1206 
1207 <ST_IN_SCRIPTING>"new" {
1208 	return T_NEW;
1209 }
1210 
1211 <ST_IN_SCRIPTING>"clone" {
1212 	return T_CLONE;
1213 }
1214 
1215 <ST_IN_SCRIPTING>"var" {
1216 	return T_VAR;
1217 }
1218 
1219 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1220 	return T_INT_CAST;
1221 }
1222 
1223 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1224 	return T_DOUBLE_CAST;
1225 }
1226 
1227 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1228 	return T_STRING_CAST;
1229 }
1230 
1231 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1232 	return T_ARRAY_CAST;
1233 }
1234 
1235 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1236 	return T_OBJECT_CAST;
1237 }
1238 
1239 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1240 	return T_BOOL_CAST;
1241 }
1242 
1243 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1244 	return T_UNSET_CAST;
1245 }
1246 
1247 <ST_IN_SCRIPTING>"eval" {
1248 	return T_EVAL;
1249 }
1250 
1251 <ST_IN_SCRIPTING>"include" {
1252 	return T_INCLUDE;
1253 }
1254 
1255 <ST_IN_SCRIPTING>"include_once" {
1256 	return T_INCLUDE_ONCE;
1257 }
1258 
1259 <ST_IN_SCRIPTING>"require" {
1260 	return T_REQUIRE;
1261 }
1262 
1263 <ST_IN_SCRIPTING>"require_once" {
1264 	return T_REQUIRE_ONCE;
1265 }
1266 
1267 <ST_IN_SCRIPTING>"namespace" {
1268 	return T_NAMESPACE;
1269 }
1270 
1271 <ST_IN_SCRIPTING>"use" {
1272 	return T_USE;
1273 }
1274 
1275 <ST_IN_SCRIPTING>"insteadof" {
1276         return T_INSTEADOF;
1277 }
1278 
1279 <ST_IN_SCRIPTING>"global" {
1280 	return T_GLOBAL;
1281 }
1282 
1283 <ST_IN_SCRIPTING>"isset" {
1284 	return T_ISSET;
1285 }
1286 
1287 <ST_IN_SCRIPTING>"empty" {
1288 	return T_EMPTY;
1289 }
1290 
1291 <ST_IN_SCRIPTING>"__halt_compiler" {
1292 	return T_HALT_COMPILER;
1293 }
1294 
1295 <ST_IN_SCRIPTING>"static" {
1296 	return T_STATIC;
1297 }
1298 
1299 <ST_IN_SCRIPTING>"abstract" {
1300 	return T_ABSTRACT;
1301 }
1302 
1303 <ST_IN_SCRIPTING>"final" {
1304 	return T_FINAL;
1305 }
1306 
1307 <ST_IN_SCRIPTING>"private" {
1308 	return T_PRIVATE;
1309 }
1310 
1311 <ST_IN_SCRIPTING>"protected" {
1312 	return T_PROTECTED;
1313 }
1314 
1315 <ST_IN_SCRIPTING>"public" {
1316 	return T_PUBLIC;
1317 }
1318 
1319 <ST_IN_SCRIPTING>"unset" {
1320 	return T_UNSET;
1321 }
1322 
1323 <ST_IN_SCRIPTING>"=>" {
1324 	return T_DOUBLE_ARROW;
1325 }
1326 
1327 <ST_IN_SCRIPTING>"list" {
1328 	return T_LIST;
1329 }
1330 
1331 <ST_IN_SCRIPTING>"array" {
1332 	return T_ARRAY;
1333 }
1334 
1335 <ST_IN_SCRIPTING>"callable" {
1336  return T_CALLABLE;
1337 }
1338 
1339 <ST_IN_SCRIPTING>"++" {
1340 	return T_INC;
1341 }
1342 
1343 <ST_IN_SCRIPTING>"--" {
1344 	return T_DEC;
1345 }
1346 
1347 <ST_IN_SCRIPTING>"===" {
1348 	return T_IS_IDENTICAL;
1349 }
1350 
1351 <ST_IN_SCRIPTING>"!==" {
1352 	return T_IS_NOT_IDENTICAL;
1353 }
1354 
1355 <ST_IN_SCRIPTING>"==" {
1356 	return T_IS_EQUAL;
1357 }
1358 
1359 <ST_IN_SCRIPTING>"!="|"<>" {
1360 	return T_IS_NOT_EQUAL;
1361 }
1362 
1363 <ST_IN_SCRIPTING>"<=" {
1364 	return T_IS_SMALLER_OR_EQUAL;
1365 }
1366 
1367 <ST_IN_SCRIPTING>">=" {
1368 	return T_IS_GREATER_OR_EQUAL;
1369 }
1370 
1371 <ST_IN_SCRIPTING>"+=" {
1372 	return T_PLUS_EQUAL;
1373 }
1374 
1375 <ST_IN_SCRIPTING>"-=" {
1376 	return T_MINUS_EQUAL;
1377 }
1378 
1379 <ST_IN_SCRIPTING>"*=" {
1380 	return T_MUL_EQUAL;
1381 }
1382 
1383 <ST_IN_SCRIPTING>"/=" {
1384 	return T_DIV_EQUAL;
1385 }
1386 
1387 <ST_IN_SCRIPTING>".=" {
1388 	return T_CONCAT_EQUAL;
1389 }
1390 
1391 <ST_IN_SCRIPTING>"%=" {
1392 	return T_MOD_EQUAL;
1393 }
1394 
1395 <ST_IN_SCRIPTING>"<<=" {
1396 	return T_SL_EQUAL;
1397 }
1398 
1399 <ST_IN_SCRIPTING>">>=" {
1400 	return T_SR_EQUAL;
1401 }
1402 
1403 <ST_IN_SCRIPTING>"&=" {
1404 	return T_AND_EQUAL;
1405 }
1406 
1407 <ST_IN_SCRIPTING>"|=" {
1408 	return T_OR_EQUAL;
1409 }
1410 
1411 <ST_IN_SCRIPTING>"^=" {
1412 	return T_XOR_EQUAL;
1413 }
1414 
1415 <ST_IN_SCRIPTING>"||" {
1416 	return T_BOOLEAN_OR;
1417 }
1418 
1419 <ST_IN_SCRIPTING>"&&" {
1420 	return T_BOOLEAN_AND;
1421 }
1422 
1423 <ST_IN_SCRIPTING>"OR" {
1424 	return T_LOGICAL_OR;
1425 }
1426 
1427 <ST_IN_SCRIPTING>"AND" {
1428 	return T_LOGICAL_AND;
1429 }
1430 
1431 <ST_IN_SCRIPTING>"XOR" {
1432 	return T_LOGICAL_XOR;
1433 }
1434 
1435 <ST_IN_SCRIPTING>"<<" {
1436 	return T_SL;
1437 }
1438 
1439 <ST_IN_SCRIPTING>">>" {
1440 	return T_SR;
1441 }
1442 
1443 <ST_IN_SCRIPTING>{TOKENS} {
1444 	return yytext[0];
1445 }
1446 
1447 
1448 <ST_IN_SCRIPTING>"{" {
1449 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1450 	return '{';
1451 }
1452 
1453 
1454 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1455 	yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1456 	return T_DOLLAR_OPEN_CURLY_BRACES;
1457 }
1458 
1459 
1460 <ST_IN_SCRIPTING>"}" {
1461 	RESET_DOC_COMMENT();
1462 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
1463 		yy_pop_state(TSRMLS_C);
1464 	}
1465 	return '}';
1466 }
1467 
1468 
1469 <ST_LOOKING_FOR_VARNAME>{LABEL} {
1470 	zend_copy_value(zendlval, yytext, yyleng);
1471 	zendlval->type = IS_STRING;
1472 	yy_pop_state(TSRMLS_C);
1473 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1474 	return T_STRING_VARNAME;
1475 }
1476 
1477 
1478 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1479 	yyless(0);
1480 	yy_pop_state(TSRMLS_C);
1481 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1482 	goto restart;
1483 }
1484 
1485 <ST_IN_SCRIPTING>{BNUM} {
1486 	char *bin = yytext + 2; /* Skip "0b" */
1487 	int len = yyleng - 2;
1488 
1489 	/* Skip any leading 0s */
1490 	while (*bin == '0') {
1491 		++bin;
1492 		--len;
1493 	}
1494 
1495 	if (len < SIZEOF_LONG * 8) {
1496 		if (len == 0) {
1497 			zendlval->value.lval = 0;
1498 		} else {
1499 			zendlval->value.lval = strtol(bin, NULL, 2);
1500 		}
1501 		zendlval->type = IS_LONG;
1502 		return T_LNUMBER;
1503 	} else {
1504 		zendlval->value.dval = zend_bin_strtod(bin, NULL);
1505 		zendlval->type = IS_DOUBLE;
1506 		return T_DNUMBER;
1507 	}
1508 }
1509 
1510 <ST_IN_SCRIPTING>{LNUM} {
1511 	if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1512 		zendlval->value.lval = strtol(yytext, NULL, 0);
1513 	} else {
1514 		errno = 0;
1515 		zendlval->value.lval = strtol(yytext, NULL, 0);
1516 		if (errno == ERANGE) { /* Overflow */
1517 			if (yytext[0] == '0') { /* octal overflow */
1518 				zendlval->value.dval = zend_oct_strtod(yytext, NULL);
1519 			} else {
1520 				zendlval->value.dval = zend_strtod(yytext, NULL);
1521 			}
1522 			zendlval->type = IS_DOUBLE;
1523 			return T_DNUMBER;
1524 		}
1525 	}
1526 
1527 	zendlval->type = IS_LONG;
1528 	return T_LNUMBER;
1529 }
1530 
1531 <ST_IN_SCRIPTING>{HNUM} {
1532 	char *hex = yytext + 2; /* Skip "0x" */
1533 	int len = yyleng - 2;
1534 
1535 	/* Skip any leading 0s */
1536 	while (*hex == '0') {
1537 		hex++;
1538 		len--;
1539 	}
1540 
1541 	if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1542 		if (len == 0) {
1543 			zendlval->value.lval = 0;
1544 		} else {
1545 			zendlval->value.lval = strtol(hex, NULL, 16);
1546 		}
1547 		zendlval->type = IS_LONG;
1548 		return T_LNUMBER;
1549 	} else {
1550 		zendlval->value.dval = zend_hex_strtod(hex, NULL);
1551 		zendlval->type = IS_DOUBLE;
1552 		return T_DNUMBER;
1553 	}
1554 }
1555 
1556 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1557 	if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1558 		zendlval->value.lval = strtol(yytext, NULL, 10);
1559 		zendlval->type = IS_LONG;
1560 	} else {
1561 		zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1562 		zendlval->value.str.len = yyleng;
1563 		zendlval->type = IS_STRING;
1564 	}
1565 	return T_NUM_STRING;
1566 }
1567 
1568 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1569 	zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1570 	zendlval->value.str.len = yyleng;
1571 	zendlval->type = IS_STRING;
1572 	return T_NUM_STRING;
1573 }
1574 
1575 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1576 	zendlval->value.dval = zend_strtod(yytext, NULL);
1577 	zendlval->type = IS_DOUBLE;
1578 	return T_DNUMBER;
1579 }
1580 
1581 <ST_IN_SCRIPTING>"__CLASS__" {
1582 	const char *class_name = NULL;
1583 
1584 	if (CG(active_class_entry)
1585 		&& (ZEND_ACC_TRAIT ==
1586 			(CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1587 		/* We create a special __CLASS__ constant that is going to be resolved
1588 		   at run-time */
1589 		zendlval->value.str.len = sizeof("__CLASS__")-1;
1590 		zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len);
1591 		zendlval->type = IS_CONSTANT;
1592 	} else {
1593 		if (CG(active_class_entry)) {
1594 			class_name = CG(active_class_entry)->name;
1595 		}
1596 
1597 		if (!class_name) {
1598 			class_name = "";
1599 		}
1600 
1601 		zendlval->value.str.len = strlen(class_name);
1602 		zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
1603 		zendlval->type = IS_STRING;
1604 	}
1605 	return T_CLASS_C;
1606 }
1607 
1608 <ST_IN_SCRIPTING>"__TRAIT__" {
1609 	const char *trait_name = NULL;
1610 
1611 	if (CG(active_class_entry)
1612 		&& (ZEND_ACC_TRAIT ==
1613 			(CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1614 		trait_name = CG(active_class_entry)->name;
1615 	}
1616 
1617 	if (!trait_name) {
1618 		trait_name = "";
1619 	}
1620 
1621 	zendlval->value.str.len = strlen(trait_name);
1622 	zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
1623 	zendlval->type = IS_STRING;
1624 
1625 	return T_TRAIT_C;
1626 }
1627 
1628 <ST_IN_SCRIPTING>"__FUNCTION__" {
1629 	const char *func_name = NULL;
1630 
1631 	if (CG(active_op_array)) {
1632 		func_name = CG(active_op_array)->function_name;
1633 	}
1634 
1635 	if (!func_name) {
1636 		func_name = "";
1637 	}
1638 	zendlval->value.str.len = strlen(func_name);
1639 	zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
1640 	zendlval->type = IS_STRING;
1641 	return T_FUNC_C;
1642 }
1643 
1644 <ST_IN_SCRIPTING>"__METHOD__" {
1645 	const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1646 	const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1647 	size_t len = 0;
1648 
1649 	if (class_name) {
1650 		len += strlen(class_name) + 2;
1651 	}
1652 	if (func_name) {
1653 		len += strlen(func_name);
1654 	}
1655 
1656 	zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
1657 		class_name ? class_name : "",
1658 		class_name && func_name ? "::" : "",
1659 		func_name ? func_name : ""
1660 		);
1661 	zendlval->type = IS_STRING;
1662 	return T_METHOD_C;
1663 }
1664 
1665 <ST_IN_SCRIPTING>"__LINE__" {
1666 	zendlval->value.lval = CG(zend_lineno);
1667 	zendlval->type = IS_LONG;
1668 	return T_LINE;
1669 }
1670 
1671 <ST_IN_SCRIPTING>"__FILE__" {
1672 	char *filename = zend_get_compiled_filename(TSRMLS_C);
1673 
1674 	if (!filename) {
1675 		filename = "";
1676 	}
1677 	zendlval->value.str.len = strlen(filename);
1678 	zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
1679 	zendlval->type = IS_STRING;
1680 	return T_FILE;
1681 }
1682 
1683 <ST_IN_SCRIPTING>"__DIR__" {
1684 	char *filename = zend_get_compiled_filename(TSRMLS_C);
1685 	const size_t filename_len = strlen(filename);
1686 	char *dirname;
1687 
1688 	if (!filename) {
1689 		filename = "";
1690 	}
1691 
1692 	dirname = estrndup(filename, filename_len);
1693 	zend_dirname(dirname, filename_len);
1694 
1695 	if (strcmp(dirname, ".") == 0) {
1696 		dirname = erealloc(dirname, MAXPATHLEN);
1697 #if HAVE_GETCWD
1698 		VCWD_GETCWD(dirname, MAXPATHLEN);
1699 #elif HAVE_GETWD
1700 		VCWD_GETWD(dirname);
1701 #endif
1702 	}
1703 
1704 	zendlval->value.str.len = strlen(dirname);
1705 	zendlval->value.str.val = dirname;
1706 	zendlval->type = IS_STRING;
1707 	return T_DIR;
1708 }
1709 
1710 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1711 	if (CG(current_namespace)) {
1712 		*zendlval = *CG(current_namespace);
1713 		zval_copy_ctor(zendlval);
1714 	} else {
1715 		ZVAL_EMPTY_STRING(zendlval);
1716 	}
1717 	return T_NS_C;
1718 }
1719 
1720 <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1721 	YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1722 
1723 	if (bracket != SCNG(yy_text)) {
1724 		/* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1725 		YYCURSOR = bracket;
1726 		goto inline_html;
1727 	}
1728 
1729 	HANDLE_NEWLINES(yytext, yyleng);
1730 	zendlval->value.str.val = yytext; /* no copying - intentional */
1731 	zendlval->value.str.len = yyleng;
1732 	zendlval->type = IS_STRING;
1733 	BEGIN(ST_IN_SCRIPTING);
1734 	return T_OPEN_TAG;
1735 }
1736 
1737 
1738 <INITIAL>"<%=" {
1739 	if (CG(asp_tags)) {
1740 		zendlval->value.str.val = yytext; /* no copying - intentional */
1741 		zendlval->value.str.len = yyleng;
1742 		zendlval->type = IS_STRING;
1743 		BEGIN(ST_IN_SCRIPTING);
1744 		return T_OPEN_TAG_WITH_ECHO;
1745 	} else {
1746 		goto inline_char_handler;
1747 	}
1748 }
1749 
1750 
1751 <INITIAL>"<?=" {
1752 	zendlval->value.str.val = yytext; /* no copying - intentional */
1753 	zendlval->value.str.len = yyleng;
1754 	zendlval->type = IS_STRING;
1755 	BEGIN(ST_IN_SCRIPTING);
1756 	return T_OPEN_TAG_WITH_ECHO;
1757 }
1758 
1759 
1760 <INITIAL>"<%" {
1761 	if (CG(asp_tags)) {
1762 		zendlval->value.str.val = yytext; /* no copying - intentional */
1763 		zendlval->value.str.len = yyleng;
1764 		zendlval->type = IS_STRING;
1765 		BEGIN(ST_IN_SCRIPTING);
1766 		return T_OPEN_TAG;
1767 	} else {
1768 		goto inline_char_handler;
1769 	}
1770 }
1771 
1772 
1773 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1774 	zendlval->value.str.val = yytext; /* no copying - intentional */
1775 	zendlval->value.str.len = yyleng;
1776 	zendlval->type = IS_STRING;
1777 	HANDLE_NEWLINE(yytext[yyleng-1]);
1778 	BEGIN(ST_IN_SCRIPTING);
1779 	return T_OPEN_TAG;
1780 }
1781 
1782 
1783 <INITIAL>"<?" {
1784 	if (CG(short_tags)) {
1785 		zendlval->value.str.val = yytext; /* no copying - intentional */
1786 		zendlval->value.str.len = yyleng;
1787 		zendlval->type = IS_STRING;
1788 		BEGIN(ST_IN_SCRIPTING);
1789 		return T_OPEN_TAG;
1790 	} else {
1791 		goto inline_char_handler;
1792 	}
1793 }
1794 
1795 <INITIAL>{ANY_CHAR} {
1796 	if (YYCURSOR > YYLIMIT) {
1797 		return 0;
1798 	}
1799 
1800 inline_char_handler:
1801 
1802 	while (1) {
1803 		YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1804 
1805 		YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1806 
1807 		if (YYCURSOR < YYLIMIT) {
1808 			switch (*YYCURSOR) {
1809 				case '?':
1810 					if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1811 						break;
1812 					}
1813 					continue;
1814 				case '%':
1815 					if (CG(asp_tags)) {
1816 						break;
1817 					}
1818 					continue;
1819 				case 's':
1820 				case 'S':
1821 					/* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1822 					 * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1823 					YYCURSOR--;
1824 					yymore();
1825 				default:
1826 					continue;
1827 			}
1828 
1829 			YYCURSOR--;
1830 		}
1831 
1832 		break;
1833 	}
1834 
1835 inline_html:
1836 	yyleng = YYCURSOR - SCNG(yy_text);
1837 
1838 	if (SCNG(output_filter)) {
1839 		int readsize;
1840 		size_t sz = 0;
1841 		readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1842 		zendlval->value.str.len = sz;
1843 		if (readsize < yyleng) {
1844 			yyless(readsize);
1845 		}
1846 	} else {
1847 	  zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
1848 	  zendlval->value.str.len = yyleng;
1849 	}
1850 	zendlval->type = IS_STRING;
1851 	HANDLE_NEWLINES(yytext, yyleng);
1852 	return T_INLINE_HTML;
1853 }
1854 
1855 
1856 /* Make sure a label character follows "->", otherwise there is no property
1857  * and "->" will be taken literally
1858  */
1859 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1860 	yyless(yyleng - 3);
1861 	yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1862 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1863 	zendlval->type = IS_STRING;
1864 	return T_VARIABLE;
1865 }
1866 
1867 /* A [ always designates a variable offset, regardless of what follows
1868  */
1869 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1870 	yyless(yyleng - 1);
1871 	yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1872 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1873 	zendlval->type = IS_STRING;
1874 	return T_VARIABLE;
1875 }
1876 
1877 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1878 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1879 	zendlval->type = IS_STRING;
1880 	return T_VARIABLE;
1881 }
1882 
1883 <ST_VAR_OFFSET>"]" {
1884 	yy_pop_state(TSRMLS_C);
1885 	return ']';
1886 }
1887 
1888 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1889 	/* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1890 	return yytext[0];
1891 }
1892 
1893 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1894 	/* Invalid rule to return a more explicit parse error with proper line number */
1895 	yyless(0);
1896 	yy_pop_state(TSRMLS_C);
1897 	return T_ENCAPSED_AND_WHITESPACE;
1898 }
1899 
1900 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1901 	zend_copy_value(zendlval, yytext, yyleng);
1902 	zendlval->type = IS_STRING;
1903 	return T_STRING;
1904 }
1905 
1906 
1907 <ST_IN_SCRIPTING>"#"|"//" {
1908 	while (YYCURSOR < YYLIMIT) {
1909 		switch (*YYCURSOR++) {
1910 			case '\r':
1911 				if (*YYCURSOR == '\n') {
1912 					YYCURSOR++;
1913 				}
1914 				/* fall through */
1915 			case '\n':
1916 				CG(zend_lineno)++;
1917 				break;
1918 			case '%':
1919 				if (!CG(asp_tags)) {
1920 					continue;
1921 				}
1922 				/* fall through */
1923 			case '?':
1924 				if (*YYCURSOR == '>') {
1925 					YYCURSOR--;
1926 					break;
1927 				}
1928 				/* fall through */
1929 			default:
1930 				continue;
1931 		}
1932 
1933 		break;
1934 	}
1935 
1936 	yyleng = YYCURSOR - SCNG(yy_text);
1937 
1938 	return T_COMMENT;
1939 }
1940 
1941 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1942 	int doc_com;
1943 
1944 	if (yyleng > 2) {
1945 		doc_com = 1;
1946 		RESET_DOC_COMMENT();
1947 	} else {
1948 		doc_com = 0;
1949 	}
1950 
1951 	while (YYCURSOR < YYLIMIT) {
1952 		if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1953 			break;
1954 		}
1955 	}
1956 
1957 	if (YYCURSOR < YYLIMIT) {
1958 		YYCURSOR++;
1959 	} else {
1960 		zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1961 	}
1962 
1963 	yyleng = YYCURSOR - SCNG(yy_text);
1964 	HANDLE_NEWLINES(yytext, yyleng);
1965 
1966 	if (doc_com) {
1967 		CG(doc_comment) = estrndup(yytext, yyleng);
1968 		CG(doc_comment_len) = yyleng;
1969 		return T_DOC_COMMENT;
1970 	}
1971 
1972 	return T_COMMENT;
1973 }
1974 
1975 <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1976 	zendlval->value.str.val = yytext; /* no copying - intentional */
1977 	zendlval->value.str.len = yyleng;
1978 	zendlval->type = IS_STRING;
1979 	BEGIN(INITIAL);
1980 	return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1981 }
1982 
1983 
1984 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1985 	if (CG(asp_tags)) {
1986 		BEGIN(INITIAL);
1987 		zendlval->value.str.len = yyleng;
1988 		zendlval->type = IS_STRING;
1989 		zendlval->value.str.val = yytext; /* no copying - intentional */
1990 		return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1991 	} else {
1992 		yyless(1);
1993 		return yytext[0];
1994 	}
1995 }
1996 
1997 
1998 <ST_IN_SCRIPTING>b?['] {
1999 	register char *s, *t;
2000 	char *end;
2001 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
2002 
2003 	while (1) {
2004 		if (YYCURSOR < YYLIMIT) {
2005 			if (*YYCURSOR == '\'') {
2006 				YYCURSOR++;
2007 				yyleng = YYCURSOR - SCNG(yy_text);
2008 
2009 				break;
2010 			} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2011 				YYCURSOR++;
2012 			}
2013 		} else {
2014 			yyleng = YYLIMIT - SCNG(yy_text);
2015 
2016 			/* Unclosed single quotes; treat similar to double quotes, but without a separate token
2017 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2018 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
2019 			return T_ENCAPSED_AND_WHITESPACE;
2020 		}
2021 	}
2022 
2023 	zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
2024 	zendlval->value.str.len = yyleng-bprefix-2;
2025 	zendlval->type = IS_STRING;
2026 
2027 	/* convert escape sequences */
2028 	s = t = zendlval->value.str.val;
2029 	end = s+zendlval->value.str.len;
2030 	while (s<end) {
2031 		if (*s=='\\') {
2032 			s++;
2033 
2034 			switch(*s) {
2035 				case '\\':
2036 				case '\'':
2037 					*t++ = *s;
2038 					zendlval->value.str.len--;
2039 					break;
2040 				default:
2041 					*t++ = '\\';
2042 					*t++ = *s;
2043 					break;
2044 			}
2045 		} else {
2046 			*t++ = *s;
2047 		}
2048 
2049 		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2050 			CG(zend_lineno)++;
2051 		}
2052 		s++;
2053 	}
2054 	*t = 0;
2055 
2056 	if (SCNG(output_filter)) {
2057 		size_t sz = 0;
2058 		s = zendlval->value.str.val;
2059 		SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
2060 		zendlval->value.str.len = sz;
2061 		efree(s);
2062 	}
2063 	return T_CONSTANT_ENCAPSED_STRING;
2064 }
2065 
2066 
2067 <ST_IN_SCRIPTING>b?["] {
2068 	int bprefix = (yytext[0] != '"') ? 1 : 0;
2069 
2070 	while (YYCURSOR < YYLIMIT) {
2071 		switch (*YYCURSOR++) {
2072 			case '"':
2073 				yyleng = YYCURSOR - SCNG(yy_text);
2074 				zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2075 				return T_CONSTANT_ENCAPSED_STRING;
2076 			case '$':
2077 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2078 					break;
2079 				}
2080 				continue;
2081 			case '{':
2082 				if (*YYCURSOR == '$') {
2083 					break;
2084 				}
2085 				continue;
2086 			case '\\':
2087 				if (YYCURSOR < YYLIMIT) {
2088 					YYCURSOR++;
2089 				}
2090 				/* fall through */
2091 			default:
2092 				continue;
2093 		}
2094 
2095 		YYCURSOR--;
2096 		break;
2097 	}
2098 
2099 	/* Remember how much was scanned to save rescanning */
2100 	SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2101 
2102 	YYCURSOR = SCNG(yy_text) + yyleng;
2103 
2104 	BEGIN(ST_DOUBLE_QUOTES);
2105 	return '"';
2106 }
2107 
2108 
2109 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2110 	char *s;
2111 	int bprefix = (yytext[0] != '<') ? 1 : 0;
2112 
2113 	/* save old heredoc label */
2114 	Z_STRVAL_P(zendlval) = CG(heredoc);
2115 	Z_STRLEN_P(zendlval) = CG(heredoc_len);
2116 
2117 	CG(zend_lineno)++;
2118 	CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2119 	s = yytext+bprefix+3;
2120 	while ((*s == ' ') || (*s == '\t')) {
2121 		s++;
2122 		CG(heredoc_len)--;
2123 	}
2124 
2125 	if (*s == '\'') {
2126 		s++;
2127 		CG(heredoc_len) -= 2;
2128 
2129 		BEGIN(ST_NOWDOC);
2130 	} else {
2131 		if (*s == '"') {
2132 			s++;
2133 			CG(heredoc_len) -= 2;
2134 		}
2135 
2136 		BEGIN(ST_HEREDOC);
2137 	}
2138 
2139 	CG(heredoc) = estrndup(s, CG(heredoc_len));
2140 
2141 	/* Check for ending label on the next line */
2142 	if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
2143 		YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2144 
2145 		if (*end == ';') {
2146 			end++;
2147 		}
2148 
2149 		if (*end == '\n' || *end == '\r') {
2150 			BEGIN(ST_END_HEREDOC);
2151 		}
2152 	}
2153 
2154 	return T_START_HEREDOC;
2155 }
2156 
2157 
2158 <ST_IN_SCRIPTING>[`] {
2159 	BEGIN(ST_BACKQUOTE);
2160 	return '`';
2161 }
2162 
2163 
2164 <ST_END_HEREDOC>{ANY_CHAR} {
2165 	YYCURSOR += CG(heredoc_len) - 1;
2166 	yyleng = CG(heredoc_len);
2167 
2168 	Z_STRVAL_P(zendlval) = CG(heredoc);
2169 	Z_STRLEN_P(zendlval) = CG(heredoc_len);
2170 	CG(heredoc) = NULL;
2171 	CG(heredoc_len) = 0;
2172 	BEGIN(ST_IN_SCRIPTING);
2173 	return T_END_HEREDOC;
2174 }
2175 
2176 
2177 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2178 	zendlval->value.lval = (long) '{';
2179 	yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2180 	yyless(1);
2181 	return T_CURLY_OPEN;
2182 }
2183 
2184 
2185 <ST_DOUBLE_QUOTES>["] {
2186 	BEGIN(ST_IN_SCRIPTING);
2187 	return '"';
2188 }
2189 
2190 <ST_BACKQUOTE>[`] {
2191 	BEGIN(ST_IN_SCRIPTING);
2192 	return '`';
2193 }
2194 
2195 
2196 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2197 	if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2198 		YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2199 		SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2200 
2201 		goto double_quotes_scan_done;
2202 	}
2203 
2204 	if (YYCURSOR > YYLIMIT) {
2205 		return 0;
2206 	}
2207 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2208 		YYCURSOR++;
2209 	}
2210 
2211 	while (YYCURSOR < YYLIMIT) {
2212 		switch (*YYCURSOR++) {
2213 			case '"':
2214 				break;
2215 			case '$':
2216 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2217 					break;
2218 				}
2219 				continue;
2220 			case '{':
2221 				if (*YYCURSOR == '$') {
2222 					break;
2223 				}
2224 				continue;
2225 			case '\\':
2226 				if (YYCURSOR < YYLIMIT) {
2227 					YYCURSOR++;
2228 				}
2229 				/* fall through */
2230 			default:
2231 				continue;
2232 		}
2233 
2234 		YYCURSOR--;
2235 		break;
2236 	}
2237 
2238 double_quotes_scan_done:
2239 	yyleng = YYCURSOR - SCNG(yy_text);
2240 
2241 	zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2242 	return T_ENCAPSED_AND_WHITESPACE;
2243 }
2244 
2245 
2246 <ST_BACKQUOTE>{ANY_CHAR} {
2247 	if (YYCURSOR > YYLIMIT) {
2248 		return 0;
2249 	}
2250 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2251 		YYCURSOR++;
2252 	}
2253 
2254 	while (YYCURSOR < YYLIMIT) {
2255 		switch (*YYCURSOR++) {
2256 			case '`':
2257 				break;
2258 			case '$':
2259 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2260 					break;
2261 				}
2262 				continue;
2263 			case '{':
2264 				if (*YYCURSOR == '$') {
2265 					break;
2266 				}
2267 				continue;
2268 			case '\\':
2269 				if (YYCURSOR < YYLIMIT) {
2270 					YYCURSOR++;
2271 				}
2272 				/* fall through */
2273 			default:
2274 				continue;
2275 		}
2276 
2277 		YYCURSOR--;
2278 		break;
2279 	}
2280 
2281 	yyleng = YYCURSOR - SCNG(yy_text);
2282 
2283 	zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2284 	return T_ENCAPSED_AND_WHITESPACE;
2285 }
2286 
2287 
2288 <ST_HEREDOC>{ANY_CHAR} {
2289 	int newline = 0;
2290 
2291 	if (YYCURSOR > YYLIMIT) {
2292 		return 0;
2293 	}
2294 
2295 	YYCURSOR--;
2296 
2297 	while (YYCURSOR < YYLIMIT) {
2298 		switch (*YYCURSOR++) {
2299 			case '\r':
2300 				if (*YYCURSOR == '\n') {
2301 					YYCURSOR++;
2302 				}
2303 				/* fall through */
2304 			case '\n':
2305 				/* Check for ending label on the next line */
2306 				if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2307 					YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2308 
2309 					if (*end == ';') {
2310 						end++;
2311 					}
2312 
2313 					if (*end == '\n' || *end == '\r') {
2314 						/* newline before label will be subtracted from returned text, but
2315 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2316 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2317 							newline = 2; /* Windows newline */
2318 						} else {
2319 							newline = 1;
2320 						}
2321 
2322 						CG(increment_lineno) = 1; /* For newline before label */
2323 						BEGIN(ST_END_HEREDOC);
2324 
2325 						goto heredoc_scan_done;
2326 					}
2327 				}
2328 				continue;
2329 			case '$':
2330 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2331 					break;
2332 				}
2333 				continue;
2334 			case '{':
2335 				if (*YYCURSOR == '$') {
2336 					break;
2337 				}
2338 				continue;
2339 			case '\\':
2340 				if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2341 					YYCURSOR++;
2342 				}
2343 				/* fall through */
2344 			default:
2345 				continue;
2346 		}
2347 
2348 		YYCURSOR--;
2349 		break;
2350 	}
2351 
2352 heredoc_scan_done:
2353 	yyleng = YYCURSOR - SCNG(yy_text);
2354 
2355 	zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2356 	return T_ENCAPSED_AND_WHITESPACE;
2357 }
2358 
2359 
2360 <ST_NOWDOC>{ANY_CHAR} {
2361 	int newline = 0;
2362 
2363 	if (YYCURSOR > YYLIMIT) {
2364 		return 0;
2365 	}
2366 
2367 	YYCURSOR--;
2368 
2369 	while (YYCURSOR < YYLIMIT) {
2370 		switch (*YYCURSOR++) {
2371 			case '\r':
2372 				if (*YYCURSOR == '\n') {
2373 					YYCURSOR++;
2374 				}
2375 				/* fall through */
2376 			case '\n':
2377 				/* Check for ending label on the next line */
2378 				if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2379 					YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2380 
2381 					if (*end == ';') {
2382 						end++;
2383 					}
2384 
2385 					if (*end == '\n' || *end == '\r') {
2386 						/* newline before label will be subtracted from returned text, but
2387 						 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2388 						if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2389 							newline = 2; /* Windows newline */
2390 						} else {
2391 							newline = 1;
2392 						}
2393 
2394 						CG(increment_lineno) = 1; /* For newline before label */
2395 						BEGIN(ST_END_HEREDOC);
2396 
2397 						goto nowdoc_scan_done;
2398 					}
2399 				}
2400 				/* fall through */
2401 			default:
2402 				continue;
2403 		}
2404 	}
2405 
2406 nowdoc_scan_done:
2407 	yyleng = YYCURSOR - SCNG(yy_text);
2408 
2409 	zend_copy_value(zendlval, yytext, yyleng - newline);
2410 	zendlval->type = IS_STRING;
2411 	HANDLE_NEWLINES(yytext, yyleng - newline);
2412 	return T_ENCAPSED_AND_WHITESPACE;
2413 }
2414 
2415 
2416 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2417 	if (YYCURSOR > YYLIMIT) {
2418 		return 0;
2419 	}
2420 
2421 	zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2422 	goto restart;
2423 }
2424 
2425 */
2426 }
2427