1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Marcus Boerger <helly@php.net> |
16 | Nuno Lopes <nlopess@php.net> |
17 | Scott MacVicar <scottmac@php.net> |
18 | Flex version authors: |
19 | Andi Gutmans <andi@zend.com> |
20 | Zeev Suraski <zeev@zend.com> |
21 +----------------------------------------------------------------------+
22 */
23
24 /* $Id$ */
25
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31
32 #include "zend_language_scanner_defs.h"
33
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef ZEND_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52
53 #define YYCTYPE unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR SCNG(yy_cursor)
56 #define YYLIMIT SCNG(yy_limit)
57 #define YYMARKER SCNG(yy_marker)
58
59 #define YYGETCONDITION() SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61
62 #define STATE(name) yyc##name
63
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE YYGETCONDITION()
67 #define yytext ((char*)SCNG(yy_text))
68 #define yyleng SCNG(yy_leng)
69 #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
70 yyleng = (unsigned int)x; } while(0)
71 #define yymore() goto yymore_restart
72
73 /* perform sanity check. If this message is triggered you should
74 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87
88 /* Globals Macros */
89 #define SCNG LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95
96 #define HANDLE_NEWLINES(s, l) \
97 do { \
98 char *p = (s), *boundary = p+(l); \
99 \
100 while (p<boundary) { \
101 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
102 CG(zend_lineno)++; \
103 } \
104 p++; \
105 } \
106 } while (0)
107
108 #define HANDLE_NEWLINE(c) \
109 { \
110 if (c == '\n' || c == '\r') { \
111 CG(zend_lineno)++; \
112 } \
113 }
114
115 /* To save initial string length after scanning to first variable */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
118
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
120
121 #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127 {
128 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129 ZEND_ASSERT(internal_encoding);
130 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131 }
132
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134 {
135 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136 }
137
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139 {
140 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142 }
143
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145 {
146 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147 ZEND_ASSERT(internal_encoding);
148 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8);
150 }
151
152
_yy_push_state(int new_state)153 static void _yy_push_state(int new_state)
154 {
155 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156 YYSETCONDITION(new_state);
157 }
158
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
yy_pop_state(void)161 static void yy_pop_state(void)
162 {
163 int *stack_state = zend_stack_top(&SCNG(state_stack));
164 YYSETCONDITION(*stack_state);
165 zend_stack_del_top(&SCNG(state_stack));
166 }
167
yy_scan_buffer(char * str,unsigned int len)168 static void yy_scan_buffer(char *str, unsigned int len)
169 {
170 YYCURSOR = (YYCTYPE*)str;
171 YYLIMIT = YYCURSOR + len;
172 if (!SCNG(yy_start)) {
173 SCNG(yy_start) = YYCURSOR;
174 }
175 }
176
startup_scanner(void)177 void startup_scanner(void)
178 {
179 CG(parse_error) = 0;
180 CG(doc_comment) = NULL;
181 CG(extra_fn_flags) = 0;
182 zend_stack_init(&SCNG(state_stack), sizeof(int));
183 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
184 }
185
heredoc_label_dtor(zend_heredoc_label * heredoc_label)186 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
187 efree(heredoc_label->label);
188 }
189
shutdown_scanner(void)190 void shutdown_scanner(void)
191 {
192 CG(parse_error) = 0;
193 RESET_DOC_COMMENT();
194 zend_stack_destroy(&SCNG(state_stack));
195 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
196 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
197 SCNG(on_event) = NULL;
198 }
199
zend_save_lexical_state(zend_lex_state * lex_state)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
201 {
202 lex_state->yy_leng = SCNG(yy_leng);
203 lex_state->yy_start = SCNG(yy_start);
204 lex_state->yy_text = SCNG(yy_text);
205 lex_state->yy_cursor = SCNG(yy_cursor);
206 lex_state->yy_marker = SCNG(yy_marker);
207 lex_state->yy_limit = SCNG(yy_limit);
208
209 lex_state->state_stack = SCNG(state_stack);
210 zend_stack_init(&SCNG(state_stack), sizeof(int));
211
212 lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214
215 lex_state->in = SCNG(yy_in);
216 lex_state->yy_state = YYSTATE;
217 lex_state->filename = zend_get_compiled_filename();
218 lex_state->lineno = CG(zend_lineno);
219
220 lex_state->script_org = SCNG(script_org);
221 lex_state->script_org_size = SCNG(script_org_size);
222 lex_state->script_filtered = SCNG(script_filtered);
223 lex_state->script_filtered_size = SCNG(script_filtered_size);
224 lex_state->input_filter = SCNG(input_filter);
225 lex_state->output_filter = SCNG(output_filter);
226 lex_state->script_encoding = SCNG(script_encoding);
227
228 lex_state->on_event = SCNG(on_event);
229 lex_state->on_event_context = SCNG(on_event_context);
230
231 lex_state->ast = CG(ast);
232 lex_state->ast_arena = CG(ast_arena);
233 }
234
zend_restore_lexical_state(zend_lex_state * lex_state)235 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
236 {
237 SCNG(yy_leng) = lex_state->yy_leng;
238 SCNG(yy_start) = lex_state->yy_start;
239 SCNG(yy_text) = lex_state->yy_text;
240 SCNG(yy_cursor) = lex_state->yy_cursor;
241 SCNG(yy_marker) = lex_state->yy_marker;
242 SCNG(yy_limit) = lex_state->yy_limit;
243
244 zend_stack_destroy(&SCNG(state_stack));
245 SCNG(state_stack) = lex_state->state_stack;
246
247 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
248 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
249 SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
250
251 SCNG(yy_in) = lex_state->in;
252 YYSETCONDITION(lex_state->yy_state);
253 CG(zend_lineno) = lex_state->lineno;
254 zend_restore_compiled_filename(lex_state->filename);
255
256 if (SCNG(script_filtered)) {
257 efree(SCNG(script_filtered));
258 SCNG(script_filtered) = NULL;
259 }
260 SCNG(script_org) = lex_state->script_org;
261 SCNG(script_org_size) = lex_state->script_org_size;
262 SCNG(script_filtered) = lex_state->script_filtered;
263 SCNG(script_filtered_size) = lex_state->script_filtered_size;
264 SCNG(input_filter) = lex_state->input_filter;
265 SCNG(output_filter) = lex_state->output_filter;
266 SCNG(script_encoding) = lex_state->script_encoding;
267
268 SCNG(on_event) = lex_state->on_event;
269 SCNG(on_event_context) = lex_state->on_event_context;
270
271 CG(ast) = lex_state->ast;
272 CG(ast_arena) = lex_state->ast_arena;
273
274 RESET_DOC_COMMENT();
275 }
276
zend_destroy_file_handle(zend_file_handle * file_handle)277 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
278 {
279 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
280 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
281 file_handle->opened_path = NULL;
282 if (file_handle->free_filename) {
283 file_handle->filename = NULL;
284 }
285 }
286
zend_lex_tstring(zval * zv)287 ZEND_API void zend_lex_tstring(zval *zv)
288 {
289 if (SCNG(on_event)) {
290 SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
291 }
292
293 ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
294 }
295
296 #define BOM_UTF32_BE "\x00\x00\xfe\xff"
297 #define BOM_UTF32_LE "\xff\xfe\x00\x00"
298 #define BOM_UTF16_BE "\xfe\xff"
299 #define BOM_UTF16_LE "\xff\xfe"
300 #define BOM_UTF8 "\xef\xbb\xbf"
301
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)302 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
303 {
304 const unsigned char *p;
305 int wchar_size = 2;
306 int le = 0;
307
308 /* utf-16 or utf-32? */
309 p = script;
310 assert(p >= script);
311 while ((size_t)(p-script) < script_size) {
312 p = memchr(p, 0, script_size-(p-script)-2);
313 if (!p) {
314 break;
315 }
316 if (*(p+1) == '\0' && *(p+2) == '\0') {
317 wchar_size = 4;
318 break;
319 }
320
321 /* searching for UTF-32 specific byte orders, so this will do */
322 p += 4;
323 }
324
325 /* BE or LE? */
326 p = script;
327 assert(p >= script);
328 while ((size_t)(p-script) < script_size) {
329 if (*p == '\0' && *(p+wchar_size-1) != '\0') {
330 /* BE */
331 le = 0;
332 break;
333 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
334 /* LE* */
335 le = 1;
336 break;
337 }
338 p += wchar_size;
339 }
340
341 if (wchar_size == 2) {
342 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
343 } else {
344 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
345 }
346
347 return NULL;
348 }
349
zend_multibyte_detect_unicode(void)350 static const zend_encoding* zend_multibyte_detect_unicode(void)
351 {
352 const zend_encoding *script_encoding = NULL;
353 int bom_size;
354 unsigned char *pos1, *pos2;
355
356 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
357 return NULL;
358 }
359
360 /* check out BOM */
361 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
362 script_encoding = zend_multibyte_encoding_utf32be;
363 bom_size = sizeof(BOM_UTF32_BE)-1;
364 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
365 script_encoding = zend_multibyte_encoding_utf32le;
366 bom_size = sizeof(BOM_UTF32_LE)-1;
367 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
368 script_encoding = zend_multibyte_encoding_utf16be;
369 bom_size = sizeof(BOM_UTF16_BE)-1;
370 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
371 script_encoding = zend_multibyte_encoding_utf16le;
372 bom_size = sizeof(BOM_UTF16_LE)-1;
373 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
374 script_encoding = zend_multibyte_encoding_utf8;
375 bom_size = sizeof(BOM_UTF8)-1;
376 }
377
378 if (script_encoding) {
379 /* remove BOM */
380 LANG_SCNG(script_org) += bom_size;
381 LANG_SCNG(script_org_size) -= bom_size;
382
383 return script_encoding;
384 }
385
386 /* script contains NULL bytes -> auto-detection */
387 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
388 /* check if the NULL byte is after the __HALT_COMPILER(); */
389 pos2 = LANG_SCNG(script_org);
390
391 while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
392 pos2 = memchr(pos2, '_', pos1 - pos2);
393 if (!pos2) break;
394 pos2++;
395 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
396 pos2 += sizeof("_HALT_COMPILER")-1;
397 while (*pos2 == ' ' ||
398 *pos2 == '\t' ||
399 *pos2 == '\r' ||
400 *pos2 == '\n') {
401 pos2++;
402 }
403 if (*pos2 == '(') {
404 pos2++;
405 while (*pos2 == ' ' ||
406 *pos2 == '\t' ||
407 *pos2 == '\r' ||
408 *pos2 == '\n') {
409 pos2++;
410 }
411 if (*pos2 == ')') {
412 pos2++;
413 while (*pos2 == ' ' ||
414 *pos2 == '\t' ||
415 *pos2 == '\r' ||
416 *pos2 == '\n') {
417 pos2++;
418 }
419 if (*pos2 == ';') {
420 return NULL;
421 }
422 }
423 }
424 }
425 }
426 /* make best effort if BOM is missing */
427 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
428 }
429
430 return NULL;
431 }
432
zend_multibyte_find_script_encoding(void)433 static const zend_encoding* zend_multibyte_find_script_encoding(void)
434 {
435 const zend_encoding *script_encoding;
436
437 if (CG(detect_unicode)) {
438 /* check out bom(byte order mark) and see if containing wchars */
439 script_encoding = zend_multibyte_detect_unicode();
440 if (script_encoding != NULL) {
441 /* bom or wchar detection is prior to 'script_encoding' option */
442 return script_encoding;
443 }
444 }
445
446 /* if no script_encoding specified, just leave alone */
447 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
448 return NULL;
449 }
450
451 /* if multiple encodings specified, detect automagically */
452 if (CG(script_encoding_list_size) > 1) {
453 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
454 }
455
456 return CG(script_encoding_list)[0];
457 }
458
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)459 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
460 {
461 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
462 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
463
464 if (!script_encoding) {
465 return FAILURE;
466 }
467
468 /* judge input/output filter */
469 LANG_SCNG(script_encoding) = script_encoding;
470 LANG_SCNG(input_filter) = NULL;
471 LANG_SCNG(output_filter) = NULL;
472
473 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
474 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
475 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
476 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
477 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
478 } else {
479 LANG_SCNG(input_filter) = NULL;
480 LANG_SCNG(output_filter) = NULL;
481 }
482 return SUCCESS;
483 }
484
485 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
486 LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
487 LANG_SCNG(output_filter) = NULL;
488 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
489 LANG_SCNG(input_filter) = NULL;
490 LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
491 } else {
492 /* both script and internal encodings are incompatible w/ flex */
493 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
494 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
495 }
496
497 return 0;
498 }
499
open_file_for_scanning(zend_file_handle * file_handle)500 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
501 {
502 char *buf;
503 size_t size, offset = 0;
504 zend_string *compiled_filename;
505
506 /* The shebang line was read, get the current position to obtain the buffer start */
507 if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
508 if ((offset = ftell(file_handle->handle.fp)) == (size_t)-1) {
509 offset = 0;
510 }
511 }
512
513 if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
514 return FAILURE;
515 }
516
517 zend_llist_add_element(&CG(open_files), file_handle);
518 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
519 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
520 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
521 fh->handle.stream.handle = (void*)(((char*)fh) + diff);
522 file_handle->handle.stream.handle = fh->handle.stream.handle;
523 }
524
525 /* Reset the scanner for scanning the new file */
526 SCNG(yy_in) = file_handle;
527 SCNG(yy_start) = NULL;
528
529 if (size != (size_t)-1) {
530 if (CG(multibyte)) {
531 SCNG(script_org) = (unsigned char*)buf;
532 SCNG(script_org_size) = size;
533 SCNG(script_filtered) = NULL;
534
535 zend_multibyte_set_filter(NULL);
536
537 if (SCNG(input_filter)) {
538 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
539 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
540 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
541 }
542 buf = (char*)SCNG(script_filtered);
543 size = SCNG(script_filtered_size);
544 }
545 }
546 SCNG(yy_start) = (unsigned char *)buf - offset;
547 yy_scan_buffer(buf, (unsigned int)size);
548 } else {
549 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
550 }
551
552 BEGIN(INITIAL);
553
554 if (file_handle->opened_path) {
555 compiled_filename = zend_string_copy(file_handle->opened_path);
556 } else {
557 compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
558 }
559
560 zend_set_compiled_filename(compiled_filename);
561 zend_string_release(compiled_filename);
562
563 if (CG(start_lineno)) {
564 CG(zend_lineno) = CG(start_lineno);
565 CG(start_lineno) = 0;
566 } else {
567 CG(zend_lineno) = 1;
568 }
569
570 RESET_DOC_COMMENT();
571 CG(increment_lineno) = 0;
572 return SUCCESS;
573 }
END_EXTERN_C()574 END_EXTERN_C()
575
576 static zend_op_array *zend_compile(int type)
577 {
578 zend_op_array *op_array = NULL;
579 zend_bool original_in_compilation = CG(in_compilation);
580
581 CG(in_compilation) = 1;
582 CG(ast) = NULL;
583 CG(ast_arena) = zend_arena_create(1024 * 32);
584
585 if (!zendparse()) {
586 int last_lineno = CG(zend_lineno);
587 zend_file_context original_file_context;
588 zend_oparray_context original_oparray_context;
589 zend_op_array *original_active_op_array = CG(active_op_array);
590
591 op_array = emalloc(sizeof(zend_op_array));
592 init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
593 CG(active_op_array) = op_array;
594
595 if (zend_ast_process) {
596 zend_ast_process(CG(ast));
597 }
598
599 zend_file_context_begin(&original_file_context);
600 zend_oparray_context_begin(&original_oparray_context);
601 zend_compile_top_stmt(CG(ast));
602 CG(zend_lineno) = last_lineno;
603 zend_emit_final_return(type == ZEND_USER_FUNCTION);
604 op_array->line_start = 1;
605 op_array->line_end = last_lineno;
606 pass_two(op_array);
607 zend_oparray_context_end(&original_oparray_context);
608 zend_file_context_end(&original_file_context);
609
610 CG(active_op_array) = original_active_op_array;
611 }
612
613 zend_ast_destroy(CG(ast));
614 zend_arena_destroy(CG(ast_arena));
615
616 CG(in_compilation) = original_in_compilation;
617
618 return op_array;
619 }
620
compile_file(zend_file_handle * file_handle,int type)621 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
622 {
623 zend_lex_state original_lex_state;
624 zend_op_array *op_array = NULL;
625 zend_save_lexical_state(&original_lex_state);
626
627 if (open_file_for_scanning(file_handle)==FAILURE) {
628 if (type==ZEND_REQUIRE) {
629 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
630 zend_bailout();
631 } else {
632 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
633 }
634 } else {
635 op_array = zend_compile(ZEND_USER_FUNCTION);
636 }
637
638 zend_restore_lexical_state(&original_lex_state);
639 return op_array;
640 }
641
642
compile_filename(int type,zval * filename)643 zend_op_array *compile_filename(int type, zval *filename)
644 {
645 zend_file_handle file_handle;
646 zval tmp;
647 zend_op_array *retval;
648 zend_string *opened_path = NULL;
649
650 if (Z_TYPE_P(filename) != IS_STRING) {
651 tmp = *filename;
652 zval_copy_ctor(&tmp);
653 convert_to_string(&tmp);
654 filename = &tmp;
655 }
656 file_handle.filename = Z_STRVAL_P(filename);
657 file_handle.free_filename = 0;
658 file_handle.type = ZEND_HANDLE_FILENAME;
659 file_handle.opened_path = NULL;
660 file_handle.handle.fp = NULL;
661
662 retval = zend_compile_file(&file_handle, type);
663 if (retval && file_handle.handle.stream.handle) {
664 if (!file_handle.opened_path) {
665 file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
666 }
667
668 zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
669
670 if (opened_path) {
671 zend_string_release(opened_path);
672 }
673 }
674 zend_destroy_file_handle(&file_handle);
675
676 if (filename==&tmp) {
677 zval_dtor(&tmp);
678 }
679 return retval;
680 }
681
zend_prepare_string_for_scanning(zval * str,char * filename)682 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
683 {
684 char *buf;
685 size_t size, old_len;
686 zend_string *new_compiled_filename;
687
688 /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
689 old_len = Z_STRLEN_P(str);
690 Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
691 Z_TYPE_INFO_P(str) = IS_STRING_EX;
692 memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
693
694 SCNG(yy_in) = NULL;
695 SCNG(yy_start) = NULL;
696
697 buf = Z_STRVAL_P(str);
698 size = old_len;
699
700 if (CG(multibyte)) {
701 SCNG(script_org) = (unsigned char*)buf;
702 SCNG(script_org_size) = size;
703 SCNG(script_filtered) = NULL;
704
705 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
706
707 if (SCNG(input_filter)) {
708 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
709 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
710 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
711 }
712 buf = (char*)SCNG(script_filtered);
713 size = SCNG(script_filtered_size);
714 }
715 }
716
717 yy_scan_buffer(buf, (unsigned int)size);
718
719 new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
720 zend_set_compiled_filename(new_compiled_filename);
721 zend_string_release(new_compiled_filename);
722 CG(zend_lineno) = 1;
723 CG(increment_lineno) = 0;
724 RESET_DOC_COMMENT();
725 return SUCCESS;
726 }
727
728
zend_get_scanned_file_offset(void)729 ZEND_API size_t zend_get_scanned_file_offset(void)
730 {
731 size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
732 if (SCNG(input_filter)) {
733 size_t original_offset = offset, length = 0;
734 do {
735 unsigned char *p = NULL;
736 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
737 return (size_t)-1;
738 }
739 efree(p);
740 if (length > original_offset) {
741 offset--;
742 } else if (length < original_offset) {
743 offset++;
744 }
745 } while (original_offset != length);
746 }
747 return offset;
748 }
749
compile_string(zval * source_string,char * filename)750 zend_op_array *compile_string(zval *source_string, char *filename)
751 {
752 zend_lex_state original_lex_state;
753 zend_op_array *op_array = NULL;
754 zval tmp;
755
756 if (Z_STRLEN_P(source_string)==0) {
757 return NULL;
758 }
759
760 ZVAL_DUP(&tmp, source_string);
761 convert_to_string(&tmp);
762 source_string = &tmp;
763
764 zend_save_lexical_state(&original_lex_state);
765 if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
766 BEGIN(ST_IN_SCRIPTING);
767 op_array = zend_compile(ZEND_EVAL_CODE);
768 }
769
770 zend_restore_lexical_state(&original_lex_state);
771 zval_dtor(&tmp);
772
773 return op_array;
774 }
775
776
BEGIN_EXTERN_C()777 BEGIN_EXTERN_C()
778 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
779 {
780 zend_lex_state original_lex_state;
781 zend_file_handle file_handle;
782
783 file_handle.type = ZEND_HANDLE_FILENAME;
784 file_handle.filename = filename;
785 file_handle.free_filename = 0;
786 file_handle.opened_path = NULL;
787 zend_save_lexical_state(&original_lex_state);
788 if (open_file_for_scanning(&file_handle)==FAILURE) {
789 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
790 zend_restore_lexical_state(&original_lex_state);
791 return FAILURE;
792 }
793 zend_highlight(syntax_highlighter_ini);
794 if (SCNG(script_filtered)) {
795 efree(SCNG(script_filtered));
796 SCNG(script_filtered) = NULL;
797 }
798 zend_destroy_file_handle(&file_handle);
799 zend_restore_lexical_state(&original_lex_state);
800 return SUCCESS;
801 }
802
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)803 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
804 {
805 zend_lex_state original_lex_state;
806 zval tmp = *str;
807
808 str = &tmp;
809 zval_copy_ctor(str);
810 zend_save_lexical_state(&original_lex_state);
811 if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
812 zend_restore_lexical_state(&original_lex_state);
813 return FAILURE;
814 }
815 BEGIN(INITIAL);
816 zend_highlight(syntax_highlighter_ini);
817 if (SCNG(script_filtered)) {
818 efree(SCNG(script_filtered));
819 SCNG(script_filtered) = NULL;
820 }
821 zend_restore_lexical_state(&original_lex_state);
822 zval_dtor(str);
823 return SUCCESS;
824 }
825
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)826 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
827 {
828 size_t length;
829 unsigned char *new_yy_start;
830
831 /* convert and set */
832 if (!SCNG(input_filter)) {
833 if (SCNG(script_filtered)) {
834 efree(SCNG(script_filtered));
835 SCNG(script_filtered) = NULL;
836 }
837 SCNG(script_filtered_size) = 0;
838 length = SCNG(script_org_size);
839 new_yy_start = SCNG(script_org);
840 } else {
841 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
842 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
843 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
844 }
845 if (SCNG(script_filtered)) {
846 efree(SCNG(script_filtered));
847 }
848 SCNG(script_filtered) = new_yy_start;
849 SCNG(script_filtered_size) = length;
850 }
851
852 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
853 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
854 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
855 SCNG(yy_limit) = new_yy_start + length;
856
857 SCNG(yy_start) = new_yy_start;
858 }
859
860
861 // TODO: avoid reallocation ???
862 # define zend_copy_value(zendlval, yytext, yyleng) \
863 if (SCNG(output_filter)) { \
864 size_t sz = 0; \
865 char *s = NULL; \
866 SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
867 ZVAL_STRINGL(zendlval, s, sz); \
868 efree(s); \
869 } else { \
870 ZVAL_STRINGL(zendlval, yytext, yyleng); \
871 }
872
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)873 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
874 {
875 register char *s, *t;
876 char *end;
877
878 ZVAL_STRINGL(zendlval, str, len);
879
880 /* convert escape sequences */
881 s = t = Z_STRVAL_P(zendlval);
882 end = s+Z_STRLEN_P(zendlval);
883 while (s<end) {
884 if (*s=='\\') {
885 s++;
886 if (s >= end) {
887 *t++ = '\\';
888 break;
889 }
890
891 switch(*s) {
892 case 'n':
893 *t++ = '\n';
894 Z_STRLEN_P(zendlval)--;
895 break;
896 case 'r':
897 *t++ = '\r';
898 Z_STRLEN_P(zendlval)--;
899 break;
900 case 't':
901 *t++ = '\t';
902 Z_STRLEN_P(zendlval)--;
903 break;
904 case 'f':
905 *t++ = '\f';
906 Z_STRLEN_P(zendlval)--;
907 break;
908 case 'v':
909 *t++ = '\v';
910 Z_STRLEN_P(zendlval)--;
911 break;
912 case 'e':
913 #ifdef ZEND_WIN32
914 *t++ = VK_ESCAPE;
915 #else
916 *t++ = '\e';
917 #endif
918 Z_STRLEN_P(zendlval)--;
919 break;
920 case '"':
921 case '`':
922 if (*s != quote_type) {
923 *t++ = '\\';
924 *t++ = *s;
925 break;
926 }
927 case '\\':
928 case '$':
929 *t++ = *s;
930 Z_STRLEN_P(zendlval)--;
931 break;
932 case 'x':
933 case 'X':
934 if (ZEND_IS_HEX(*(s+1))) {
935 char hex_buf[3] = { 0, 0, 0 };
936
937 Z_STRLEN_P(zendlval)--; /* for the 'x' */
938
939 hex_buf[0] = *(++s);
940 Z_STRLEN_P(zendlval)--;
941 if (ZEND_IS_HEX(*(s+1))) {
942 hex_buf[1] = *(++s);
943 Z_STRLEN_P(zendlval)--;
944 }
945 *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
946 } else {
947 *t++ = '\\';
948 *t++ = *s;
949 }
950 break;
951 /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
952 case 'u':
953 {
954 /* cache where we started so we can parse after validating */
955 char *start = s + 1;
956 size_t len = 0;
957 zend_bool valid = 1;
958 unsigned long codepoint;
959 size_t byte_len = 0;
960
961 if (*start != '{') {
962 /* we silently let this pass to avoid breaking code
963 * with JSON in string literals (e.g. "\"\u202e\""
964 */
965 *t++ = '\\';
966 *t++ = 'u';
967 break;
968 } else {
969 /* on the other hand, invalid \u{blah} errors */
970 s++;
971 len++;
972 s++;
973 while (*s != '}') {
974 if (!ZEND_IS_HEX(*s)) {
975 valid = 0;
976 break;
977 } else {
978 len++;
979 }
980 s++;
981 }
982 if (*s == '}') {
983 valid = 1;
984 len++;
985 }
986 }
987
988 /* \u{} is invalid */
989 if (len <= 2) {
990 valid = 0;
991 }
992
993 if (!valid) {
994 zend_throw_exception(zend_ce_parse_error,
995 "Invalid UTF-8 codepoint escape sequence", 0);
996 zval_ptr_dtor(zendlval);
997 ZVAL_UNDEF(zendlval);
998 return FAILURE;
999 }
1000
1001 errno = 0;
1002 codepoint = strtoul(start + 1, NULL, 16);
1003
1004 /* per RFC 3629, UTF-8 can only represent 21 bits */
1005 if (codepoint > 0x10FFFF || errno) {
1006 zend_throw_exception(zend_ce_parse_error,
1007 "Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1008 zval_ptr_dtor(zendlval);
1009 ZVAL_UNDEF(zendlval);
1010 return FAILURE;
1011 }
1012
1013 /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1014 if (codepoint < 0x80) {
1015 byte_len = 1;
1016 *t++ = codepoint;
1017 } else if (codepoint <= 0x7FF) {
1018 byte_len = 2;
1019 *t++ = (codepoint >> 6) + 0xC0;
1020 *t++ = (codepoint & 0x3F) + 0x80;
1021 } else if (codepoint <= 0xFFFF) {
1022 byte_len = 3;
1023 *t++ = (codepoint >> 12) + 0xE0;
1024 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1025 *t++ = (codepoint & 0x3F) + 0x80;
1026 } else if (codepoint <= 0x10FFFF) {
1027 byte_len = 4;
1028 *t++ = (codepoint >> 18) + 0xF0;
1029 *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1030 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1031 *t++ = (codepoint & 0x3F) + 0x80;
1032 }
1033
1034 Z_STRLEN_P(zendlval) -= 2; /* \u */
1035 Z_STRLEN_P(zendlval) -= (len - byte_len);
1036 }
1037 break;
1038 default:
1039 /* check for an octal */
1040 if (ZEND_IS_OCT(*s)) {
1041 char octal_buf[4] = { 0, 0, 0, 0 };
1042
1043 octal_buf[0] = *s;
1044 Z_STRLEN_P(zendlval)--;
1045 if (ZEND_IS_OCT(*(s+1))) {
1046 octal_buf[1] = *(++s);
1047 Z_STRLEN_P(zendlval)--;
1048 if (ZEND_IS_OCT(*(s+1))) {
1049 octal_buf[2] = *(++s);
1050 Z_STRLEN_P(zendlval)--;
1051 }
1052 }
1053 if (octal_buf[2] &&
1054 (octal_buf[0] > '3')) {
1055 /* 3 octit values must not overflow 0xFF (\377) */
1056 zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
1057 }
1058
1059 *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1060 } else {
1061 *t++ = '\\';
1062 *t++ = *s;
1063 }
1064 break;
1065 }
1066 } else {
1067 *t++ = *s;
1068 }
1069
1070 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1071 CG(zend_lineno)++;
1072 }
1073 s++;
1074 }
1075 *t = 0;
1076 if (SCNG(output_filter)) {
1077 size_t sz = 0;
1078 unsigned char *str;
1079 // TODO: avoid realocation ???
1080 s = Z_STRVAL_P(zendlval);
1081 SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1082 zval_ptr_dtor(zendlval);
1083 ZVAL_STRINGL(zendlval, (char *) str, sz);
1084 efree(str);
1085 }
1086 return SUCCESS;
1087 }
1088
emit_token(int token,int token_line)1089 static zend_always_inline int emit_token(int token, int token_line)
1090 {
1091 if (SCNG(on_event)) {
1092 SCNG(on_event)(ON_TOKEN, token, token_line, SCNG(on_event_context));
1093 }
1094
1095 return token;
1096 }
1097
1098 #define RETURN_TOKEN(token) return emit_token(token, start_line);
1099
lex_scan(zval * zendlval)1100 int lex_scan(zval *zendlval)
1101 {
1102
1103 int start_line = CG(zend_lineno);
1104
1105 restart:
1106 SCNG(yy_text) = YYCURSOR;
1107
1108 /*!re2c
1109 re2c:yyfill:check = 0;
1110 LNUM [0-9]+
1111 DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1112 EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1113 HNUM "0x"[0-9a-fA-F]+
1114 BNUM "0b"[01]+
1115 LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1116 WHITESPACE [ \n\r\t]+
1117 TABS_AND_SPACES [ \t]*
1118 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1119 ANY_CHAR [^]
1120 NEWLINE ("\r"|"\n"|"\r\n")
1121
1122 /* compute yyleng before each rule */
1123 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1124
1125 <ST_IN_SCRIPTING>"exit" {
1126 RETURN_TOKEN(T_EXIT);
1127 }
1128
1129 <ST_IN_SCRIPTING>"die" {
1130 RETURN_TOKEN(T_EXIT);
1131 }
1132
1133 <ST_IN_SCRIPTING>"function" {
1134 RETURN_TOKEN(T_FUNCTION);
1135 }
1136
1137 <ST_IN_SCRIPTING>"const" {
1138 RETURN_TOKEN(T_CONST);
1139 }
1140
1141 <ST_IN_SCRIPTING>"return" {
1142 RETURN_TOKEN(T_RETURN);
1143 }
1144
1145 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1146 yyless(yyleng - 1);
1147 HANDLE_NEWLINES(yytext, yyleng);
1148 RETURN_TOKEN(T_YIELD_FROM);
1149 }
1150
1151 <ST_IN_SCRIPTING>"yield" {
1152 RETURN_TOKEN(T_YIELD);
1153 }
1154
1155 <ST_IN_SCRIPTING>"try" {
1156 RETURN_TOKEN(T_TRY);
1157 }
1158
1159 <ST_IN_SCRIPTING>"catch" {
1160 RETURN_TOKEN(T_CATCH);
1161 }
1162
1163 <ST_IN_SCRIPTING>"finally" {
1164 RETURN_TOKEN(T_FINALLY);
1165 }
1166
1167 <ST_IN_SCRIPTING>"throw" {
1168 RETURN_TOKEN(T_THROW);
1169 }
1170
1171 <ST_IN_SCRIPTING>"if" {
1172 RETURN_TOKEN(T_IF);
1173 }
1174
1175 <ST_IN_SCRIPTING>"elseif" {
1176 RETURN_TOKEN(T_ELSEIF);
1177 }
1178
1179 <ST_IN_SCRIPTING>"endif" {
1180 RETURN_TOKEN(T_ENDIF);
1181 }
1182
1183 <ST_IN_SCRIPTING>"else" {
1184 RETURN_TOKEN(T_ELSE);
1185 }
1186
1187 <ST_IN_SCRIPTING>"while" {
1188 RETURN_TOKEN(T_WHILE);
1189 }
1190
1191 <ST_IN_SCRIPTING>"endwhile" {
1192 RETURN_TOKEN(T_ENDWHILE);
1193 }
1194
1195 <ST_IN_SCRIPTING>"do" {
1196 RETURN_TOKEN(T_DO);
1197 }
1198
1199 <ST_IN_SCRIPTING>"for" {
1200 RETURN_TOKEN(T_FOR);
1201 }
1202
1203 <ST_IN_SCRIPTING>"endfor" {
1204 RETURN_TOKEN(T_ENDFOR);
1205 }
1206
1207 <ST_IN_SCRIPTING>"foreach" {
1208 RETURN_TOKEN(T_FOREACH);
1209 }
1210
1211 <ST_IN_SCRIPTING>"endforeach" {
1212 RETURN_TOKEN(T_ENDFOREACH);
1213 }
1214
1215 <ST_IN_SCRIPTING>"declare" {
1216 RETURN_TOKEN(T_DECLARE);
1217 }
1218
1219 <ST_IN_SCRIPTING>"enddeclare" {
1220 RETURN_TOKEN(T_ENDDECLARE);
1221 }
1222
1223 <ST_IN_SCRIPTING>"instanceof" {
1224 RETURN_TOKEN(T_INSTANCEOF);
1225 }
1226
1227 <ST_IN_SCRIPTING>"as" {
1228 RETURN_TOKEN(T_AS);
1229 }
1230
1231 <ST_IN_SCRIPTING>"switch" {
1232 RETURN_TOKEN(T_SWITCH);
1233 }
1234
1235 <ST_IN_SCRIPTING>"endswitch" {
1236 RETURN_TOKEN(T_ENDSWITCH);
1237 }
1238
1239 <ST_IN_SCRIPTING>"case" {
1240 RETURN_TOKEN(T_CASE);
1241 }
1242
1243 <ST_IN_SCRIPTING>"default" {
1244 RETURN_TOKEN(T_DEFAULT);
1245 }
1246
1247 <ST_IN_SCRIPTING>"break" {
1248 RETURN_TOKEN(T_BREAK);
1249 }
1250
1251 <ST_IN_SCRIPTING>"continue" {
1252 RETURN_TOKEN(T_CONTINUE);
1253 }
1254
1255 <ST_IN_SCRIPTING>"goto" {
1256 RETURN_TOKEN(T_GOTO);
1257 }
1258
1259 <ST_IN_SCRIPTING>"echo" {
1260 RETURN_TOKEN(T_ECHO);
1261 }
1262
1263 <ST_IN_SCRIPTING>"print" {
1264 RETURN_TOKEN(T_PRINT);
1265 }
1266
1267 <ST_IN_SCRIPTING>"class" {
1268 RETURN_TOKEN(T_CLASS);
1269 }
1270
1271 <ST_IN_SCRIPTING>"interface" {
1272 RETURN_TOKEN(T_INTERFACE);
1273 }
1274
1275 <ST_IN_SCRIPTING>"trait" {
1276 RETURN_TOKEN(T_TRAIT);
1277 }
1278
1279 <ST_IN_SCRIPTING>"extends" {
1280 RETURN_TOKEN(T_EXTENDS);
1281 }
1282
1283 <ST_IN_SCRIPTING>"implements" {
1284 RETURN_TOKEN(T_IMPLEMENTS);
1285 }
1286
1287 <ST_IN_SCRIPTING>"->" {
1288 yy_push_state(ST_LOOKING_FOR_PROPERTY);
1289 RETURN_TOKEN(T_OBJECT_OPERATOR);
1290 }
1291
1292 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1293 HANDLE_NEWLINES(yytext, yyleng);
1294 RETURN_TOKEN(T_WHITESPACE);
1295 }
1296
1297 <ST_LOOKING_FOR_PROPERTY>"->" {
1298 RETURN_TOKEN(T_OBJECT_OPERATOR);
1299 }
1300
1301 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1302 yy_pop_state();
1303 zend_copy_value(zendlval, yytext, yyleng);
1304 RETURN_TOKEN(T_STRING);
1305 }
1306
1307 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1308 yyless(0);
1309 yy_pop_state();
1310 goto restart;
1311 }
1312
1313 <ST_IN_SCRIPTING>"::" {
1314 RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1315 }
1316
1317 <ST_IN_SCRIPTING>"\\" {
1318 RETURN_TOKEN(T_NS_SEPARATOR);
1319 }
1320
1321 <ST_IN_SCRIPTING>"..." {
1322 RETURN_TOKEN(T_ELLIPSIS);
1323 }
1324
1325 <ST_IN_SCRIPTING>"??" {
1326 RETURN_TOKEN(T_COALESCE);
1327 }
1328
1329 <ST_IN_SCRIPTING>"new" {
1330 RETURN_TOKEN(T_NEW);
1331 }
1332
1333 <ST_IN_SCRIPTING>"clone" {
1334 RETURN_TOKEN(T_CLONE);
1335 }
1336
1337 <ST_IN_SCRIPTING>"var" {
1338 RETURN_TOKEN(T_VAR);
1339 }
1340
1341 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1342 RETURN_TOKEN(T_INT_CAST);
1343 }
1344
1345 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1346 RETURN_TOKEN(T_DOUBLE_CAST);
1347 }
1348
1349 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1350 RETURN_TOKEN(T_STRING_CAST);
1351 }
1352
1353 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1354 RETURN_TOKEN(T_ARRAY_CAST);
1355 }
1356
1357 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1358 RETURN_TOKEN(T_OBJECT_CAST);
1359 }
1360
1361 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1362 RETURN_TOKEN(T_BOOL_CAST);
1363 }
1364
1365 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1366 RETURN_TOKEN(T_UNSET_CAST);
1367 }
1368
1369 <ST_IN_SCRIPTING>"eval" {
1370 RETURN_TOKEN(T_EVAL);
1371 }
1372
1373 <ST_IN_SCRIPTING>"include" {
1374 RETURN_TOKEN(T_INCLUDE);
1375 }
1376
1377 <ST_IN_SCRIPTING>"include_once" {
1378 RETURN_TOKEN(T_INCLUDE_ONCE);
1379 }
1380
1381 <ST_IN_SCRIPTING>"require" {
1382 RETURN_TOKEN(T_REQUIRE);
1383 }
1384
1385 <ST_IN_SCRIPTING>"require_once" {
1386 RETURN_TOKEN(T_REQUIRE_ONCE);
1387 }
1388
1389 <ST_IN_SCRIPTING>"namespace" {
1390 RETURN_TOKEN(T_NAMESPACE);
1391 }
1392
1393 <ST_IN_SCRIPTING>"use" {
1394 RETURN_TOKEN(T_USE);
1395 }
1396
1397 <ST_IN_SCRIPTING>"insteadof" {
1398 RETURN_TOKEN(T_INSTEADOF);
1399 }
1400
1401 <ST_IN_SCRIPTING>"global" {
1402 RETURN_TOKEN(T_GLOBAL);
1403 }
1404
1405 <ST_IN_SCRIPTING>"isset" {
1406 RETURN_TOKEN(T_ISSET);
1407 }
1408
1409 <ST_IN_SCRIPTING>"empty" {
1410 RETURN_TOKEN(T_EMPTY);
1411 }
1412
1413 <ST_IN_SCRIPTING>"__halt_compiler" {
1414 RETURN_TOKEN(T_HALT_COMPILER);
1415 }
1416
1417 <ST_IN_SCRIPTING>"static" {
1418 RETURN_TOKEN(T_STATIC);
1419 }
1420
1421 <ST_IN_SCRIPTING>"abstract" {
1422 RETURN_TOKEN(T_ABSTRACT);
1423 }
1424
1425 <ST_IN_SCRIPTING>"final" {
1426 RETURN_TOKEN(T_FINAL);
1427 }
1428
1429 <ST_IN_SCRIPTING>"private" {
1430 RETURN_TOKEN(T_PRIVATE);
1431 }
1432
1433 <ST_IN_SCRIPTING>"protected" {
1434 RETURN_TOKEN(T_PROTECTED);
1435 }
1436
1437 <ST_IN_SCRIPTING>"public" {
1438 RETURN_TOKEN(T_PUBLIC);
1439 }
1440
1441 <ST_IN_SCRIPTING>"unset" {
1442 RETURN_TOKEN(T_UNSET);
1443 }
1444
1445 <ST_IN_SCRIPTING>"=>" {
1446 RETURN_TOKEN(T_DOUBLE_ARROW);
1447 }
1448
1449 <ST_IN_SCRIPTING>"list" {
1450 RETURN_TOKEN(T_LIST);
1451 }
1452
1453 <ST_IN_SCRIPTING>"array" {
1454 RETURN_TOKEN(T_ARRAY);
1455 }
1456
1457 <ST_IN_SCRIPTING>"callable" {
1458 RETURN_TOKEN(T_CALLABLE);
1459 }
1460
1461 <ST_IN_SCRIPTING>"++" {
1462 RETURN_TOKEN(T_INC);
1463 }
1464
1465 <ST_IN_SCRIPTING>"--" {
1466 RETURN_TOKEN(T_DEC);
1467 }
1468
1469 <ST_IN_SCRIPTING>"===" {
1470 RETURN_TOKEN(T_IS_IDENTICAL);
1471 }
1472
1473 <ST_IN_SCRIPTING>"!==" {
1474 RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1475 }
1476
1477 <ST_IN_SCRIPTING>"==" {
1478 RETURN_TOKEN(T_IS_EQUAL);
1479 }
1480
1481 <ST_IN_SCRIPTING>"!="|"<>" {
1482 RETURN_TOKEN(T_IS_NOT_EQUAL);
1483 }
1484
1485 <ST_IN_SCRIPTING>"<=>" {
1486 RETURN_TOKEN(T_SPACESHIP);
1487 }
1488
1489 <ST_IN_SCRIPTING>"<=" {
1490 RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1491 }
1492
1493 <ST_IN_SCRIPTING>">=" {
1494 RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1495 }
1496
1497 <ST_IN_SCRIPTING>"+=" {
1498 RETURN_TOKEN(T_PLUS_EQUAL);
1499 }
1500
1501 <ST_IN_SCRIPTING>"-=" {
1502 RETURN_TOKEN(T_MINUS_EQUAL);
1503 }
1504
1505 <ST_IN_SCRIPTING>"*=" {
1506 RETURN_TOKEN(T_MUL_EQUAL);
1507 }
1508
1509 <ST_IN_SCRIPTING>"*\*" {
1510 RETURN_TOKEN(T_POW);
1511 }
1512
1513 <ST_IN_SCRIPTING>"*\*=" {
1514 RETURN_TOKEN(T_POW_EQUAL);
1515 }
1516
1517 <ST_IN_SCRIPTING>"/=" {
1518 RETURN_TOKEN(T_DIV_EQUAL);
1519 }
1520
1521 <ST_IN_SCRIPTING>".=" {
1522 RETURN_TOKEN(T_CONCAT_EQUAL);
1523 }
1524
1525 <ST_IN_SCRIPTING>"%=" {
1526 RETURN_TOKEN(T_MOD_EQUAL);
1527 }
1528
1529 <ST_IN_SCRIPTING>"<<=" {
1530 RETURN_TOKEN(T_SL_EQUAL);
1531 }
1532
1533 <ST_IN_SCRIPTING>">>=" {
1534 RETURN_TOKEN(T_SR_EQUAL);
1535 }
1536
1537 <ST_IN_SCRIPTING>"&=" {
1538 RETURN_TOKEN(T_AND_EQUAL);
1539 }
1540
1541 <ST_IN_SCRIPTING>"|=" {
1542 RETURN_TOKEN(T_OR_EQUAL);
1543 }
1544
1545 <ST_IN_SCRIPTING>"^=" {
1546 RETURN_TOKEN(T_XOR_EQUAL);
1547 }
1548
1549 <ST_IN_SCRIPTING>"||" {
1550 RETURN_TOKEN(T_BOOLEAN_OR);
1551 }
1552
1553 <ST_IN_SCRIPTING>"&&" {
1554 RETURN_TOKEN(T_BOOLEAN_AND);
1555 }
1556
1557 <ST_IN_SCRIPTING>"OR" {
1558 RETURN_TOKEN(T_LOGICAL_OR);
1559 }
1560
1561 <ST_IN_SCRIPTING>"AND" {
1562 RETURN_TOKEN(T_LOGICAL_AND);
1563 }
1564
1565 <ST_IN_SCRIPTING>"XOR" {
1566 RETURN_TOKEN(T_LOGICAL_XOR);
1567 }
1568
1569 <ST_IN_SCRIPTING>"<<" {
1570 RETURN_TOKEN(T_SL);
1571 }
1572
1573 <ST_IN_SCRIPTING>">>" {
1574 RETURN_TOKEN(T_SR);
1575 }
1576
1577 <ST_IN_SCRIPTING>{TOKENS} {
1578 RETURN_TOKEN(yytext[0]);
1579 }
1580
1581
1582 <ST_IN_SCRIPTING>"{" {
1583 yy_push_state(ST_IN_SCRIPTING);
1584 RETURN_TOKEN('{');
1585 }
1586
1587
1588 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1589 yy_push_state(ST_LOOKING_FOR_VARNAME);
1590 RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1591 }
1592
1593
1594 <ST_IN_SCRIPTING>"}" {
1595 RESET_DOC_COMMENT();
1596 if (!zend_stack_is_empty(&SCNG(state_stack))) {
1597 yy_pop_state();
1598 }
1599 RETURN_TOKEN('}');
1600 }
1601
1602
1603 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1604 yyless(yyleng - 1);
1605 zend_copy_value(zendlval, yytext, yyleng);
1606 yy_pop_state();
1607 yy_push_state(ST_IN_SCRIPTING);
1608 RETURN_TOKEN(T_STRING_VARNAME);
1609 }
1610
1611
1612 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1613 yyless(0);
1614 yy_pop_state();
1615 yy_push_state(ST_IN_SCRIPTING);
1616 goto restart;
1617 }
1618
1619 <ST_IN_SCRIPTING>{BNUM} {
1620 char *bin = yytext + 2; /* Skip "0b" */
1621 int len = yyleng - 2;
1622 char *end;
1623
1624 /* Skip any leading 0s */
1625 while (*bin == '0') {
1626 ++bin;
1627 --len;
1628 }
1629
1630 if (len < SIZEOF_ZEND_LONG * 8) {
1631 if (len == 0) {
1632 ZVAL_LONG(zendlval, 0);
1633 } else {
1634 errno = 0;
1635 ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1636 ZEND_ASSERT(!errno && end == yytext + yyleng);
1637 }
1638 RETURN_TOKEN(T_LNUMBER);
1639 } else {
1640 ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1641 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1642 ZEND_ASSERT(end == yytext + yyleng);
1643 RETURN_TOKEN(T_DNUMBER);
1644 }
1645 }
1646
1647 <ST_IN_SCRIPTING>{LNUM} {
1648 char *end;
1649 if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1650 errno = 0;
1651 /* base must be passed explicitly for correct parse error on Windows */
1652 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
1653 /* This isn't an assert, we need to ensure 019 isn't valid octal
1654 * Because the lexing itself doesn't do that for us
1655 */
1656 if (end != yytext + yyleng) {
1657 zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1658 ZVAL_UNDEF(zendlval);
1659 RETURN_TOKEN(T_LNUMBER);
1660 }
1661 } else {
1662 errno = 0;
1663 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1664 if (errno == ERANGE) { /* Overflow */
1665 errno = 0;
1666 if (yytext[0] == '0') { /* octal overflow */
1667 ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1668 } else {
1669 ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1670 }
1671 /* Also not an assert for the same reason */
1672 if (end != yytext + yyleng) {
1673 zend_throw_exception(zend_ce_parse_error,
1674 "Invalid numeric literal", 0);
1675 ZVAL_UNDEF(zendlval);
1676 RETURN_TOKEN(T_DNUMBER);
1677 }
1678 RETURN_TOKEN(T_DNUMBER);
1679 }
1680 /* Also not an assert for the same reason */
1681 if (end != yytext + yyleng) {
1682 zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1683 ZVAL_UNDEF(zendlval);
1684 RETURN_TOKEN(T_DNUMBER);
1685 }
1686 }
1687 ZEND_ASSERT(!errno);
1688 RETURN_TOKEN(T_LNUMBER);
1689 }
1690
1691 <ST_IN_SCRIPTING>{HNUM} {
1692 char *hex = yytext + 2; /* Skip "0x" */
1693 int len = yyleng - 2;
1694 char *end;
1695
1696 /* Skip any leading 0s */
1697 while (*hex == '0') {
1698 hex++;
1699 len--;
1700 }
1701
1702 if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1703 if (len == 0) {
1704 ZVAL_LONG(zendlval, 0);
1705 } else {
1706 errno = 0;
1707 ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1708 ZEND_ASSERT(!errno && end == hex + len);
1709 }
1710 RETURN_TOKEN(T_LNUMBER);
1711 } else {
1712 ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1713 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1714 ZEND_ASSERT(end == hex + len);
1715 RETURN_TOKEN(T_DNUMBER);
1716 }
1717 }
1718
1719 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1720 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1721 char *end;
1722 errno = 0;
1723 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1724 if (errno == ERANGE) {
1725 goto string;
1726 }
1727 ZEND_ASSERT(end == yytext + yyleng);
1728 } else {
1729 string:
1730 ZVAL_STRINGL(zendlval, yytext, yyleng);
1731 }
1732 RETURN_TOKEN(T_NUM_STRING);
1733 }
1734
1735 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1736 ZVAL_STRINGL(zendlval, yytext, yyleng);
1737 RETURN_TOKEN(T_NUM_STRING);
1738 }
1739
1740 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1741 const char *end;
1742
1743 ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1744 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1745 ZEND_ASSERT(end == yytext + yyleng);
1746 RETURN_TOKEN(T_DNUMBER);
1747 }
1748
1749 <ST_IN_SCRIPTING>"__CLASS__" {
1750 RETURN_TOKEN(T_CLASS_C);
1751 }
1752
1753 <ST_IN_SCRIPTING>"__TRAIT__" {
1754 RETURN_TOKEN(T_TRAIT_C);
1755 }
1756
1757 <ST_IN_SCRIPTING>"__FUNCTION__" {
1758 RETURN_TOKEN(T_FUNC_C);
1759 }
1760
1761 <ST_IN_SCRIPTING>"__METHOD__" {
1762 RETURN_TOKEN(T_METHOD_C);
1763 }
1764
1765 <ST_IN_SCRIPTING>"__LINE__" {
1766 RETURN_TOKEN(T_LINE);
1767 }
1768
1769 <ST_IN_SCRIPTING>"__FILE__" {
1770 RETURN_TOKEN(T_FILE);
1771 }
1772
1773 <ST_IN_SCRIPTING>"__DIR__" {
1774 RETURN_TOKEN(T_DIR);
1775 }
1776
1777 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1778 RETURN_TOKEN(T_NS_C);
1779 }
1780
1781
1782 <INITIAL>"<?=" {
1783 BEGIN(ST_IN_SCRIPTING);
1784 RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1785 }
1786
1787
1788 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1789 HANDLE_NEWLINE(yytext[yyleng-1]);
1790 BEGIN(ST_IN_SCRIPTING);
1791 RETURN_TOKEN(T_OPEN_TAG);
1792 }
1793
1794
1795 <INITIAL>"<?" {
1796 if (CG(short_tags)) {
1797 BEGIN(ST_IN_SCRIPTING);
1798 RETURN_TOKEN(T_OPEN_TAG);
1799 } else {
1800 goto inline_char_handler;
1801 }
1802 }
1803
1804 <INITIAL>{ANY_CHAR} {
1805 if (YYCURSOR > YYLIMIT) {
1806 RETURN_TOKEN(END);
1807 }
1808
1809 inline_char_handler:
1810
1811 while (1) {
1812 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1813
1814 YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1815
1816 if (YYCURSOR >= YYLIMIT) {
1817 break;
1818 }
1819
1820 if (*YYCURSOR == '?') {
1821 if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1822
1823 YYCURSOR--;
1824 break;
1825 }
1826 }
1827 }
1828
1829 yyleng = YYCURSOR - SCNG(yy_text);
1830
1831 if (SCNG(output_filter)) {
1832 size_t readsize;
1833 char *s = NULL;
1834 size_t sz = 0;
1835 // TODO: avoid reallocation ???
1836 readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1837 ZVAL_STRINGL(zendlval, s, sz);
1838 efree(s);
1839 if (readsize < yyleng) {
1840 yyless(readsize);
1841 }
1842 } else {
1843 ZVAL_STRINGL(zendlval, yytext, yyleng);
1844 }
1845 HANDLE_NEWLINES(yytext, yyleng);
1846 RETURN_TOKEN(T_INLINE_HTML);
1847 }
1848
1849
1850 /* Make sure a label character follows "->", otherwise there is no property
1851 * and "->" will be taken literally
1852 */
1853 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
1854 yyless(yyleng - 3);
1855 yy_push_state(ST_LOOKING_FOR_PROPERTY);
1856 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1857 RETURN_TOKEN(T_VARIABLE);
1858 }
1859
1860 /* A [ always designates a variable offset, regardless of what follows
1861 */
1862 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1863 yyless(yyleng - 1);
1864 yy_push_state(ST_VAR_OFFSET);
1865 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1866 RETURN_TOKEN(T_VARIABLE);
1867 }
1868
1869 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1870 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1871 RETURN_TOKEN(T_VARIABLE);
1872 }
1873
1874 <ST_VAR_OFFSET>"]" {
1875 yy_pop_state();
1876 RETURN_TOKEN(']');
1877 }
1878
1879 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1880 /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
1881 RETURN_TOKEN(yytext[0]);
1882 }
1883
1884 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1885 /* Invalid rule to return a more explicit parse error with proper line number */
1886 yyless(0);
1887 yy_pop_state();
1888 ZVAL_NULL(zendlval);
1889 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1890 }
1891
1892 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1893 zend_copy_value(zendlval, yytext, yyleng);
1894 RETURN_TOKEN(T_STRING);
1895 }
1896
1897
1898 <ST_IN_SCRIPTING>"#"|"//" {
1899 while (YYCURSOR < YYLIMIT) {
1900 switch (*YYCURSOR++) {
1901 case '\r':
1902 if (*YYCURSOR == '\n') {
1903 YYCURSOR++;
1904 }
1905 /* fall through */
1906 case '\n':
1907 CG(zend_lineno)++;
1908 break;
1909 case '?':
1910 if (*YYCURSOR == '>') {
1911 YYCURSOR--;
1912 break;
1913 }
1914 /* fall through */
1915 default:
1916 continue;
1917 }
1918
1919 break;
1920 }
1921
1922 yyleng = YYCURSOR - SCNG(yy_text);
1923
1924 RETURN_TOKEN(T_COMMENT);
1925 }
1926
1927 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1928 int doc_com;
1929
1930 if (yyleng > 2) {
1931 doc_com = 1;
1932 RESET_DOC_COMMENT();
1933 } else {
1934 doc_com = 0;
1935 }
1936
1937 while (YYCURSOR < YYLIMIT) {
1938 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1939 break;
1940 }
1941 }
1942
1943 if (YYCURSOR < YYLIMIT) {
1944 YYCURSOR++;
1945 } else {
1946 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1947 }
1948
1949 yyleng = YYCURSOR - SCNG(yy_text);
1950 HANDLE_NEWLINES(yytext, yyleng);
1951
1952 if (doc_com) {
1953 CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1954 RETURN_TOKEN(T_DOC_COMMENT);
1955 }
1956
1957 RETURN_TOKEN(T_COMMENT);
1958 }
1959
1960 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1961 BEGIN(INITIAL);
1962 if (yytext[yyleng-1] != '>') {
1963 CG(increment_lineno) = 1;
1964 }
1965 RETURN_TOKEN(T_CLOSE_TAG); /* implicit ';' at php-end tag */
1966 }
1967
1968
1969 <ST_IN_SCRIPTING>b?['] {
1970 register char *s, *t;
1971 char *end;
1972 int bprefix = (yytext[0] != '\'') ? 1 : 0;
1973
1974 while (1) {
1975 if (YYCURSOR < YYLIMIT) {
1976 if (*YYCURSOR == '\'') {
1977 YYCURSOR++;
1978 yyleng = YYCURSOR - SCNG(yy_text);
1979
1980 break;
1981 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1982 YYCURSOR++;
1983 }
1984 } else {
1985 yyleng = YYLIMIT - SCNG(yy_text);
1986
1987 /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1988 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1989 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1990 ZVAL_NULL(zendlval);
1991 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1992 }
1993 }
1994
1995 ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1996
1997 /* convert escape sequences */
1998 s = t = Z_STRVAL_P(zendlval);
1999 end = s+Z_STRLEN_P(zendlval);
2000 while (s<end) {
2001 if (*s=='\\') {
2002 s++;
2003
2004 switch(*s) {
2005 case '\\':
2006 case '\'':
2007 *t++ = *s;
2008 Z_STRLEN_P(zendlval)--;
2009 break;
2010 default:
2011 *t++ = '\\';
2012 *t++ = *s;
2013 break;
2014 }
2015 } else {
2016 *t++ = *s;
2017 }
2018
2019 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2020 CG(zend_lineno)++;
2021 }
2022 s++;
2023 }
2024 *t = 0;
2025
2026 if (SCNG(output_filter)) {
2027 size_t sz = 0;
2028 char *str = NULL;
2029 s = Z_STRVAL_P(zendlval);
2030 // TODO: avoid reallocation ???
2031 SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2032 ZVAL_STRINGL(zendlval, str, sz);
2033 }
2034 RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2035 }
2036
2037
2038 <ST_IN_SCRIPTING>b?["] {
2039 int bprefix = (yytext[0] != '"') ? 1 : 0;
2040
2041 while (YYCURSOR < YYLIMIT) {
2042 switch (*YYCURSOR++) {
2043 case '"':
2044 yyleng = YYCURSOR - SCNG(yy_text);
2045 zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
2046 RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2047 case '$':
2048 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2049 break;
2050 }
2051 continue;
2052 case '{':
2053 if (*YYCURSOR == '$') {
2054 break;
2055 }
2056 continue;
2057 case '\\':
2058 if (YYCURSOR < YYLIMIT) {
2059 YYCURSOR++;
2060 }
2061 /* fall through */
2062 default:
2063 continue;
2064 }
2065
2066 YYCURSOR--;
2067 break;
2068 }
2069
2070 /* Remember how much was scanned to save rescanning */
2071 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2072
2073 YYCURSOR = SCNG(yy_text) + yyleng;
2074
2075 BEGIN(ST_DOUBLE_QUOTES);
2076 RETURN_TOKEN('"');
2077 }
2078
2079
2080 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2081 char *s;
2082 int bprefix = (yytext[0] != '<') ? 1 : 0;
2083 zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2084
2085 CG(zend_lineno)++;
2086 heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2087 s = yytext+bprefix+3;
2088 while ((*s == ' ') || (*s == '\t')) {
2089 s++;
2090 heredoc_label->length--;
2091 }
2092
2093 if (*s == '\'') {
2094 s++;
2095 heredoc_label->length -= 2;
2096
2097 BEGIN(ST_NOWDOC);
2098 } else {
2099 if (*s == '"') {
2100 s++;
2101 heredoc_label->length -= 2;
2102 }
2103
2104 BEGIN(ST_HEREDOC);
2105 }
2106
2107 heredoc_label->label = estrndup(s, heredoc_label->length);
2108
2109 /* Check for ending label on the next line */
2110 if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2111 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2112
2113 if (*end == ';') {
2114 end++;
2115 }
2116
2117 if (*end == '\n' || *end == '\r') {
2118 BEGIN(ST_END_HEREDOC);
2119 }
2120 }
2121
2122 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2123
2124 RETURN_TOKEN(T_START_HEREDOC);
2125 }
2126
2127
2128 <ST_IN_SCRIPTING>[`] {
2129 BEGIN(ST_BACKQUOTE);
2130 RETURN_TOKEN('`');
2131 }
2132
2133
2134 <ST_END_HEREDOC>{ANY_CHAR} {
2135 zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2136
2137 YYCURSOR += heredoc_label->length - 1;
2138 yyleng = heredoc_label->length;
2139
2140 heredoc_label_dtor(heredoc_label);
2141 efree(heredoc_label);
2142
2143 BEGIN(ST_IN_SCRIPTING);
2144 RETURN_TOKEN(T_END_HEREDOC);
2145 }
2146
2147
2148 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2149 Z_LVAL_P(zendlval) = (zend_long) '{';
2150 yy_push_state(ST_IN_SCRIPTING);
2151 yyless(1);
2152 RETURN_TOKEN(T_CURLY_OPEN);
2153 }
2154
2155
2156 <ST_DOUBLE_QUOTES>["] {
2157 BEGIN(ST_IN_SCRIPTING);
2158 RETURN_TOKEN('"');
2159 }
2160
2161 <ST_BACKQUOTE>[`] {
2162 BEGIN(ST_IN_SCRIPTING);
2163 RETURN_TOKEN('`');
2164 }
2165
2166
2167 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2168 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2169 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2170 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2171
2172 goto double_quotes_scan_done;
2173 }
2174
2175 if (YYCURSOR > YYLIMIT) {
2176 RETURN_TOKEN(END);
2177 }
2178 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2179 YYCURSOR++;
2180 }
2181
2182 while (YYCURSOR < YYLIMIT) {
2183 switch (*YYCURSOR++) {
2184 case '"':
2185 break;
2186 case '$':
2187 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2188 break;
2189 }
2190 continue;
2191 case '{':
2192 if (*YYCURSOR == '$') {
2193 break;
2194 }
2195 continue;
2196 case '\\':
2197 if (YYCURSOR < YYLIMIT) {
2198 YYCURSOR++;
2199 }
2200 /* fall through */
2201 default:
2202 continue;
2203 }
2204
2205 YYCURSOR--;
2206 break;
2207 }
2208
2209 double_quotes_scan_done:
2210 yyleng = YYCURSOR - SCNG(yy_text);
2211
2212 zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2213 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2214 }
2215
2216
2217 <ST_BACKQUOTE>{ANY_CHAR} {
2218 if (YYCURSOR > YYLIMIT) {
2219 RETURN_TOKEN(END);
2220 }
2221 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2222 YYCURSOR++;
2223 }
2224
2225 while (YYCURSOR < YYLIMIT) {
2226 switch (*YYCURSOR++) {
2227 case '`':
2228 break;
2229 case '$':
2230 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2231 break;
2232 }
2233 continue;
2234 case '{':
2235 if (*YYCURSOR == '$') {
2236 break;
2237 }
2238 continue;
2239 case '\\':
2240 if (YYCURSOR < YYLIMIT) {
2241 YYCURSOR++;
2242 }
2243 /* fall through */
2244 default:
2245 continue;
2246 }
2247
2248 YYCURSOR--;
2249 break;
2250 }
2251
2252 yyleng = YYCURSOR - SCNG(yy_text);
2253
2254 zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2255 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2256 }
2257
2258
2259 <ST_HEREDOC>{ANY_CHAR} {
2260 int newline = 0;
2261
2262 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2263
2264 if (YYCURSOR > YYLIMIT) {
2265 RETURN_TOKEN(END);
2266 }
2267
2268 YYCURSOR--;
2269
2270 while (YYCURSOR < YYLIMIT) {
2271 switch (*YYCURSOR++) {
2272 case '\r':
2273 if (*YYCURSOR == '\n') {
2274 YYCURSOR++;
2275 }
2276 /* fall through */
2277 case '\n':
2278 /* Check for ending label on the next line */
2279 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2280 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2281
2282 if (*end == ';') {
2283 end++;
2284 }
2285
2286 if (*end == '\n' || *end == '\r') {
2287 /* newline before label will be subtracted from returned text, but
2288 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2289 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2290 newline = 2; /* Windows newline */
2291 } else {
2292 newline = 1;
2293 }
2294
2295 CG(increment_lineno) = 1; /* For newline before label */
2296 BEGIN(ST_END_HEREDOC);
2297
2298 goto heredoc_scan_done;
2299 }
2300 }
2301 continue;
2302 case '$':
2303 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2304 break;
2305 }
2306 continue;
2307 case '{':
2308 if (*YYCURSOR == '$') {
2309 break;
2310 }
2311 continue;
2312 case '\\':
2313 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2314 YYCURSOR++;
2315 }
2316 /* fall through */
2317 default:
2318 continue;
2319 }
2320
2321 YYCURSOR--;
2322 break;
2323 }
2324
2325 heredoc_scan_done:
2326 yyleng = YYCURSOR - SCNG(yy_text);
2327
2328 zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2329 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2330 }
2331
2332
2333 <ST_NOWDOC>{ANY_CHAR} {
2334 int newline = 0;
2335
2336 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2337
2338 if (YYCURSOR > YYLIMIT) {
2339 RETURN_TOKEN(END);
2340 }
2341
2342 YYCURSOR--;
2343
2344 while (YYCURSOR < YYLIMIT) {
2345 switch (*YYCURSOR++) {
2346 case '\r':
2347 if (*YYCURSOR == '\n') {
2348 YYCURSOR++;
2349 }
2350 /* fall through */
2351 case '\n':
2352 /* Check for ending label on the next line */
2353 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2354 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2355
2356 if (*end == ';') {
2357 end++;
2358 }
2359
2360 if (*end == '\n' || *end == '\r') {
2361 /* newline before label will be subtracted from returned text, but
2362 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2363 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2364 newline = 2; /* Windows newline */
2365 } else {
2366 newline = 1;
2367 }
2368
2369 CG(increment_lineno) = 1; /* For newline before label */
2370 BEGIN(ST_END_HEREDOC);
2371
2372 goto nowdoc_scan_done;
2373 }
2374 }
2375 /* fall through */
2376 default:
2377 continue;
2378 }
2379 }
2380
2381 nowdoc_scan_done:
2382 yyleng = YYCURSOR - SCNG(yy_text);
2383
2384 zend_copy_value(zendlval, yytext, yyleng - newline);
2385 HANDLE_NEWLINES(yytext, yyleng - newline);
2386 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2387 }
2388
2389
2390 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2391 if (YYCURSOR > YYLIMIT) {
2392 RETURN_TOKEN(END);
2393 }
2394
2395 zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2396 goto restart;
2397 }
2398
2399 */
2400 }
2401