1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1998-2017 Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Marcus Boerger <helly@php.net> |
16 | Nuno Lopes <nlopess@php.net> |
17 | Scott MacVicar <scottmac@php.net> |
18 | Flex version authors: |
19 | Andi Gutmans <andi@zend.com> |
20 | Zeev Suraski <zeev@zend.com> |
21 +----------------------------------------------------------------------+
22 */
23
24 /* $Id$ */
25
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31
32 #include "zend_language_scanner_defs.h"
33
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef ZEND_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52
53 #define YYCTYPE unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR SCNG(yy_cursor)
56 #define YYLIMIT SCNG(yy_limit)
57 #define YYMARKER SCNG(yy_marker)
58
59 #define YYGETCONDITION() SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61
62 #define STATE(name) yyc##name
63
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE YYGETCONDITION()
67 #define yytext ((char*)SCNG(yy_text))
68 #define yyleng SCNG(yy_leng)
69 #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
70 yyleng = (unsigned int)x; } while(0)
71 #define yymore() goto yymore_restart
72
73 /* perform sanity check. If this message is triggered you should
74 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87
88 /* Globals Macros */
89 #define SCNG LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95
96 #define HANDLE_NEWLINES(s, l) \
97 do { \
98 char *p = (s), *boundary = p+(l); \
99 \
100 while (p<boundary) { \
101 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
102 CG(zend_lineno)++; \
103 } \
104 p++; \
105 } \
106 } while (0)
107
108 #define HANDLE_NEWLINE(c) \
109 { \
110 if (c == '\n' || c == '\r') { \
111 CG(zend_lineno)++; \
112 } \
113 }
114
115 /* To save initial string length after scanning to first variable */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
118
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121 #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127 {
128 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129 ZEND_ASSERT(internal_encoding);
130 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131 }
132
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134 {
135 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136 }
137
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139 {
140 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142 }
143
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145 {
146 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147 ZEND_ASSERT(internal_encoding);
148 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8);
150 }
151
152
_yy_push_state(int new_state)153 static void _yy_push_state(int new_state)
154 {
155 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156 YYSETCONDITION(new_state);
157 }
158
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
yy_pop_state(void)161 static void yy_pop_state(void)
162 {
163 int *stack_state = zend_stack_top(&SCNG(state_stack));
164 YYSETCONDITION(*stack_state);
165 zend_stack_del_top(&SCNG(state_stack));
166 }
167
yy_scan_buffer(char * str,unsigned int len)168 static void yy_scan_buffer(char *str, unsigned int len)
169 {
170 YYCURSOR = (YYCTYPE*)str;
171 YYLIMIT = YYCURSOR + len;
172 if (!SCNG(yy_start)) {
173 SCNG(yy_start) = YYCURSOR;
174 }
175 }
176
startup_scanner(void)177 void startup_scanner(void)
178 {
179 CG(parse_error) = 0;
180 CG(doc_comment) = NULL;
181 zend_stack_init(&SCNG(state_stack), sizeof(int));
182 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183 }
184
heredoc_label_dtor(zend_heredoc_label * heredoc_label)185 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186 efree(heredoc_label->label);
187 }
188
shutdown_scanner(void)189 void shutdown_scanner(void)
190 {
191 CG(parse_error) = 0;
192 RESET_DOC_COMMENT();
193 zend_stack_destroy(&SCNG(state_stack));
194 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196 SCNG(on_event) = NULL;
197 }
198
zend_save_lexical_state(zend_lex_state * lex_state)199 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
200 {
201 lex_state->yy_leng = SCNG(yy_leng);
202 lex_state->yy_start = SCNG(yy_start);
203 lex_state->yy_text = SCNG(yy_text);
204 lex_state->yy_cursor = SCNG(yy_cursor);
205 lex_state->yy_marker = SCNG(yy_marker);
206 lex_state->yy_limit = SCNG(yy_limit);
207
208 lex_state->state_stack = SCNG(state_stack);
209 zend_stack_init(&SCNG(state_stack), sizeof(int));
210
211 lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
212 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
213
214 lex_state->in = SCNG(yy_in);
215 lex_state->yy_state = YYSTATE;
216 lex_state->filename = zend_get_compiled_filename();
217 lex_state->lineno = CG(zend_lineno);
218
219 lex_state->script_org = SCNG(script_org);
220 lex_state->script_org_size = SCNG(script_org_size);
221 lex_state->script_filtered = SCNG(script_filtered);
222 lex_state->script_filtered_size = SCNG(script_filtered_size);
223 lex_state->input_filter = SCNG(input_filter);
224 lex_state->output_filter = SCNG(output_filter);
225 lex_state->script_encoding = SCNG(script_encoding);
226
227 lex_state->on_event = SCNG(on_event);
228
229 lex_state->ast = CG(ast);
230 lex_state->ast_arena = CG(ast_arena);
231 }
232
zend_restore_lexical_state(zend_lex_state * lex_state)233 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
234 {
235 SCNG(yy_leng) = lex_state->yy_leng;
236 SCNG(yy_start) = lex_state->yy_start;
237 SCNG(yy_text) = lex_state->yy_text;
238 SCNG(yy_cursor) = lex_state->yy_cursor;
239 SCNG(yy_marker) = lex_state->yy_marker;
240 SCNG(yy_limit) = lex_state->yy_limit;
241
242 zend_stack_destroy(&SCNG(state_stack));
243 SCNG(state_stack) = lex_state->state_stack;
244
245 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
246 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
247 SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
248
249 SCNG(yy_in) = lex_state->in;
250 YYSETCONDITION(lex_state->yy_state);
251 CG(zend_lineno) = lex_state->lineno;
252 zend_restore_compiled_filename(lex_state->filename);
253
254 if (SCNG(script_filtered)) {
255 efree(SCNG(script_filtered));
256 SCNG(script_filtered) = NULL;
257 }
258 SCNG(script_org) = lex_state->script_org;
259 SCNG(script_org_size) = lex_state->script_org_size;
260 SCNG(script_filtered) = lex_state->script_filtered;
261 SCNG(script_filtered_size) = lex_state->script_filtered_size;
262 SCNG(input_filter) = lex_state->input_filter;
263 SCNG(output_filter) = lex_state->output_filter;
264 SCNG(script_encoding) = lex_state->script_encoding;
265
266 SCNG(on_event) = lex_state->on_event;
267
268 CG(ast) = lex_state->ast;
269 CG(ast_arena) = lex_state->ast_arena;
270
271 RESET_DOC_COMMENT();
272 }
273
zend_destroy_file_handle(zend_file_handle * file_handle)274 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
275 {
276 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
277 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
278 file_handle->opened_path = NULL;
279 if (file_handle->free_filename) {
280 file_handle->filename = NULL;
281 }
282 }
283
zend_lex_tstring(zval * zv)284 ZEND_API void zend_lex_tstring(zval *zv)
285 {
286 if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0);
287
288 ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
289 }
290
291 #define BOM_UTF32_BE "\x00\x00\xfe\xff"
292 #define BOM_UTF32_LE "\xff\xfe\x00\x00"
293 #define BOM_UTF16_BE "\xfe\xff"
294 #define BOM_UTF16_LE "\xff\xfe"
295 #define BOM_UTF8 "\xef\xbb\xbf"
296
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)297 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
298 {
299 const unsigned char *p;
300 int wchar_size = 2;
301 int le = 0;
302
303 /* utf-16 or utf-32? */
304 p = script;
305 assert(p >= script);
306 while ((size_t)(p-script) < script_size) {
307 p = memchr(p, 0, script_size-(p-script)-2);
308 if (!p) {
309 break;
310 }
311 if (*(p+1) == '\0' && *(p+2) == '\0') {
312 wchar_size = 4;
313 break;
314 }
315
316 /* searching for UTF-32 specific byte orders, so this will do */
317 p += 4;
318 }
319
320 /* BE or LE? */
321 p = script;
322 assert(p >= script);
323 while ((size_t)(p-script) < script_size) {
324 if (*p == '\0' && *(p+wchar_size-1) != '\0') {
325 /* BE */
326 le = 0;
327 break;
328 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
329 /* LE* */
330 le = 1;
331 break;
332 }
333 p += wchar_size;
334 }
335
336 if (wchar_size == 2) {
337 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
338 } else {
339 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
340 }
341
342 return NULL;
343 }
344
zend_multibyte_detect_unicode(void)345 static const zend_encoding* zend_multibyte_detect_unicode(void)
346 {
347 const zend_encoding *script_encoding = NULL;
348 int bom_size;
349 unsigned char *pos1, *pos2;
350
351 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
352 return NULL;
353 }
354
355 /* check out BOM */
356 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
357 script_encoding = zend_multibyte_encoding_utf32be;
358 bom_size = sizeof(BOM_UTF32_BE)-1;
359 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
360 script_encoding = zend_multibyte_encoding_utf32le;
361 bom_size = sizeof(BOM_UTF32_LE)-1;
362 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
363 script_encoding = zend_multibyte_encoding_utf16be;
364 bom_size = sizeof(BOM_UTF16_BE)-1;
365 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
366 script_encoding = zend_multibyte_encoding_utf16le;
367 bom_size = sizeof(BOM_UTF16_LE)-1;
368 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
369 script_encoding = zend_multibyte_encoding_utf8;
370 bom_size = sizeof(BOM_UTF8)-1;
371 }
372
373 if (script_encoding) {
374 /* remove BOM */
375 LANG_SCNG(script_org) += bom_size;
376 LANG_SCNG(script_org_size) -= bom_size;
377
378 return script_encoding;
379 }
380
381 /* script contains NULL bytes -> auto-detection */
382 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
383 /* check if the NULL byte is after the __HALT_COMPILER(); */
384 pos2 = LANG_SCNG(script_org);
385
386 while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
387 pos2 = memchr(pos2, '_', pos1 - pos2);
388 if (!pos2) break;
389 pos2++;
390 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
391 pos2 += sizeof("_HALT_COMPILER")-1;
392 while (*pos2 == ' ' ||
393 *pos2 == '\t' ||
394 *pos2 == '\r' ||
395 *pos2 == '\n') {
396 pos2++;
397 }
398 if (*pos2 == '(') {
399 pos2++;
400 while (*pos2 == ' ' ||
401 *pos2 == '\t' ||
402 *pos2 == '\r' ||
403 *pos2 == '\n') {
404 pos2++;
405 }
406 if (*pos2 == ')') {
407 pos2++;
408 while (*pos2 == ' ' ||
409 *pos2 == '\t' ||
410 *pos2 == '\r' ||
411 *pos2 == '\n') {
412 pos2++;
413 }
414 if (*pos2 == ';') {
415 return NULL;
416 }
417 }
418 }
419 }
420 }
421 /* make best effort if BOM is missing */
422 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
423 }
424
425 return NULL;
426 }
427
zend_multibyte_find_script_encoding(void)428 static const zend_encoding* zend_multibyte_find_script_encoding(void)
429 {
430 const zend_encoding *script_encoding;
431
432 if (CG(detect_unicode)) {
433 /* check out bom(byte order mark) and see if containing wchars */
434 script_encoding = zend_multibyte_detect_unicode();
435 if (script_encoding != NULL) {
436 /* bom or wchar detection is prior to 'script_encoding' option */
437 return script_encoding;
438 }
439 }
440
441 /* if no script_encoding specified, just leave alone */
442 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
443 return NULL;
444 }
445
446 /* if multiple encodings specified, detect automagically */
447 if (CG(script_encoding_list_size) > 1) {
448 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
449 }
450
451 return CG(script_encoding_list)[0];
452 }
453
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)454 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
455 {
456 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
457 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
458
459 if (!script_encoding) {
460 return FAILURE;
461 }
462
463 /* judge input/output filter */
464 LANG_SCNG(script_encoding) = script_encoding;
465 LANG_SCNG(input_filter) = NULL;
466 LANG_SCNG(output_filter) = NULL;
467
468 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
469 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
470 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
471 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
472 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
473 } else {
474 LANG_SCNG(input_filter) = NULL;
475 LANG_SCNG(output_filter) = NULL;
476 }
477 return SUCCESS;
478 }
479
480 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
481 LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
482 LANG_SCNG(output_filter) = NULL;
483 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
484 LANG_SCNG(input_filter) = NULL;
485 LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
486 } else {
487 /* both script and internal encodings are incompatible w/ flex */
488 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
489 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
490 }
491
492 return 0;
493 }
494
open_file_for_scanning(zend_file_handle * file_handle)495 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
496 {
497 char *buf;
498 size_t size, offset = 0;
499 zend_string *compiled_filename;
500
501 /* The shebang line was read, get the current position to obtain the buffer start */
502 if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
503 if ((offset = ftell(file_handle->handle.fp)) == -1) {
504 offset = 0;
505 }
506 }
507
508 if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
509 return FAILURE;
510 }
511
512 zend_llist_add_element(&CG(open_files), file_handle);
513 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
514 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
515 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
516 fh->handle.stream.handle = (void*)(((char*)fh) + diff);
517 file_handle->handle.stream.handle = fh->handle.stream.handle;
518 }
519
520 /* Reset the scanner for scanning the new file */
521 SCNG(yy_in) = file_handle;
522 SCNG(yy_start) = NULL;
523
524 if (size != -1) {
525 if (CG(multibyte)) {
526 SCNG(script_org) = (unsigned char*)buf;
527 SCNG(script_org_size) = size;
528 SCNG(script_filtered) = NULL;
529
530 zend_multibyte_set_filter(NULL);
531
532 if (SCNG(input_filter)) {
533 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
534 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
535 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
536 }
537 buf = (char*)SCNG(script_filtered);
538 size = SCNG(script_filtered_size);
539 }
540 }
541 SCNG(yy_start) = (unsigned char *)buf - offset;
542 yy_scan_buffer(buf, (unsigned int)size);
543 } else {
544 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
545 }
546
547 BEGIN(INITIAL);
548
549 if (file_handle->opened_path) {
550 compiled_filename = zend_string_copy(file_handle->opened_path);
551 } else {
552 compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
553 }
554
555 zend_set_compiled_filename(compiled_filename);
556 zend_string_release(compiled_filename);
557
558 if (CG(start_lineno)) {
559 CG(zend_lineno) = CG(start_lineno);
560 CG(start_lineno) = 0;
561 } else {
562 CG(zend_lineno) = 1;
563 }
564
565 RESET_DOC_COMMENT();
566 CG(increment_lineno) = 0;
567 return SUCCESS;
568 }
END_EXTERN_C()569 END_EXTERN_C()
570
571
572 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
573 {
574 zend_lex_state original_lex_state;
575 zend_op_array *op_array = NULL;
576 zend_save_lexical_state(&original_lex_state);
577
578 if (open_file_for_scanning(file_handle)==FAILURE) {
579 if (type==ZEND_REQUIRE) {
580 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
581 zend_bailout();
582 } else {
583 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
584 }
585 } else {
586 zend_bool original_in_compilation = CG(in_compilation);
587 CG(in_compilation) = 1;
588
589 CG(ast) = NULL;
590 CG(ast_arena) = zend_arena_create(1024 * 32);
591 if (!zendparse()) {
592 int last_lineno = CG(zend_lineno);
593 zval retval_zv;
594 zend_file_context original_file_context;
595 zend_oparray_context original_oparray_context;
596 zend_op_array *original_active_op_array = CG(active_op_array);
597 op_array = emalloc(sizeof(zend_op_array));
598 init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE);
599 CG(active_op_array) = op_array;
600 ZVAL_LONG(&retval_zv, 1);
601
602 if (zend_ast_process) {
603 zend_ast_process(CG(ast));
604 }
605
606 zend_file_context_begin(&original_file_context);
607 zend_oparray_context_begin(&original_oparray_context);
608 zend_compile_top_stmt(CG(ast));
609 CG(zend_lineno) = last_lineno;
610 zend_emit_final_return(&retval_zv);
611 op_array->line_start = 1;
612 op_array->line_end = last_lineno;
613 pass_two(op_array);
614 zend_oparray_context_end(&original_oparray_context);
615 zend_file_context_end(&original_file_context);
616
617 CG(active_op_array) = original_active_op_array;
618 }
619
620 zend_ast_destroy(CG(ast));
621 zend_arena_destroy(CG(ast_arena));
622 CG(in_compilation) = original_in_compilation;
623 }
624
625 zend_restore_lexical_state(&original_lex_state);
626 return op_array;
627 }
628
629
compile_filename(int type,zval * filename)630 zend_op_array *compile_filename(int type, zval *filename)
631 {
632 zend_file_handle file_handle;
633 zval tmp;
634 zend_op_array *retval;
635 zend_string *opened_path = NULL;
636
637 if (Z_TYPE_P(filename) != IS_STRING) {
638 tmp = *filename;
639 zval_copy_ctor(&tmp);
640 convert_to_string(&tmp);
641 filename = &tmp;
642 }
643 file_handle.filename = Z_STRVAL_P(filename);
644 file_handle.free_filename = 0;
645 file_handle.type = ZEND_HANDLE_FILENAME;
646 file_handle.opened_path = NULL;
647 file_handle.handle.fp = NULL;
648
649 retval = zend_compile_file(&file_handle, type);
650 if (retval && file_handle.handle.stream.handle) {
651 if (!file_handle.opened_path) {
652 file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
653 }
654
655 zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
656
657 if (opened_path) {
658 zend_string_release(opened_path);
659 }
660 }
661 zend_destroy_file_handle(&file_handle);
662
663 if (filename==&tmp) {
664 zval_dtor(&tmp);
665 }
666 return retval;
667 }
668
zend_prepare_string_for_scanning(zval * str,char * filename)669 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
670 {
671 char *buf;
672 size_t size, old_len;
673 zend_string *new_compiled_filename;
674
675 /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
676 old_len = Z_STRLEN_P(str);
677 Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
678 Z_TYPE_INFO_P(str) = IS_STRING_EX;
679 memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
680
681 SCNG(yy_in) = NULL;
682 SCNG(yy_start) = NULL;
683
684 buf = Z_STRVAL_P(str);
685 size = old_len;
686
687 if (CG(multibyte)) {
688 SCNG(script_org) = (unsigned char*)buf;
689 SCNG(script_org_size) = size;
690 SCNG(script_filtered) = NULL;
691
692 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
693
694 if (SCNG(input_filter)) {
695 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
696 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
697 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
698 }
699 buf = (char*)SCNG(script_filtered);
700 size = SCNG(script_filtered_size);
701 }
702 }
703
704 yy_scan_buffer(buf, (unsigned int)size);
705
706 new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
707 zend_set_compiled_filename(new_compiled_filename);
708 zend_string_release(new_compiled_filename);
709 CG(zend_lineno) = 1;
710 CG(increment_lineno) = 0;
711 RESET_DOC_COMMENT();
712 return SUCCESS;
713 }
714
715
zend_get_scanned_file_offset(void)716 ZEND_API size_t zend_get_scanned_file_offset(void)
717 {
718 size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
719 if (SCNG(input_filter)) {
720 size_t original_offset = offset, length = 0;
721 do {
722 unsigned char *p = NULL;
723 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
724 return (size_t)-1;
725 }
726 efree(p);
727 if (length > original_offset) {
728 offset--;
729 } else if (length < original_offset) {
730 offset++;
731 }
732 } while (original_offset != length);
733 }
734 return offset;
735 }
736
737
compile_string(zval * source_string,char * filename)738 zend_op_array *compile_string(zval *source_string, char *filename)
739 {
740 zend_lex_state original_lex_state;
741 zend_op_array *op_array = NULL;
742 zval tmp;
743 zend_bool original_in_compilation = CG(in_compilation);
744
745 if (Z_STRLEN_P(source_string)==0) {
746 return NULL;
747 }
748
749 ZVAL_DUP(&tmp, source_string);
750 convert_to_string(&tmp);
751 source_string = &tmp;
752
753 CG(in_compilation) = 1;
754 zend_save_lexical_state(&original_lex_state);
755 if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
756 CG(ast) = NULL;
757 CG(ast_arena) = zend_arena_create(1024 * 32);
758 BEGIN(ST_IN_SCRIPTING);
759
760 if (!zendparse()) {
761 int last_lineno = CG(zend_lineno);
762 zend_file_context original_file_context;
763 zend_oparray_context original_oparray_context;
764 zend_op_array *original_active_op_array = CG(active_op_array);
765 op_array = emalloc(sizeof(zend_op_array));
766 init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE);
767 CG(active_op_array) = op_array;
768
769 if (zend_ast_process) {
770 zend_ast_process(CG(ast));
771 }
772
773 zend_file_context_begin(&original_file_context);
774 zend_oparray_context_begin(&original_oparray_context);
775 zend_compile_top_stmt(CG(ast));
776 CG(zend_lineno) = last_lineno;
777 zend_emit_final_return(NULL);
778 op_array->line_start = 1;
779 op_array->line_end = last_lineno;
780 pass_two(op_array);
781 zend_oparray_context_end(&original_oparray_context);
782 zend_file_context_end(&original_file_context);
783
784 CG(active_op_array) = original_active_op_array;
785 }
786
787 zend_ast_destroy(CG(ast));
788 zend_arena_destroy(CG(ast_arena));
789 }
790
791 zend_restore_lexical_state(&original_lex_state);
792 zval_dtor(&tmp);
793 CG(in_compilation) = original_in_compilation;
794 return op_array;
795 }
796
797
BEGIN_EXTERN_C()798 BEGIN_EXTERN_C()
799 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
800 {
801 zend_lex_state original_lex_state;
802 zend_file_handle file_handle;
803
804 file_handle.type = ZEND_HANDLE_FILENAME;
805 file_handle.filename = filename;
806 file_handle.free_filename = 0;
807 file_handle.opened_path = NULL;
808 zend_save_lexical_state(&original_lex_state);
809 if (open_file_for_scanning(&file_handle)==FAILURE) {
810 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
811 zend_restore_lexical_state(&original_lex_state);
812 return FAILURE;
813 }
814 zend_highlight(syntax_highlighter_ini);
815 if (SCNG(script_filtered)) {
816 efree(SCNG(script_filtered));
817 SCNG(script_filtered) = NULL;
818 }
819 zend_destroy_file_handle(&file_handle);
820 zend_restore_lexical_state(&original_lex_state);
821 return SUCCESS;
822 }
823
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)824 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
825 {
826 zend_lex_state original_lex_state;
827 zval tmp = *str;
828
829 str = &tmp;
830 zval_copy_ctor(str);
831 zend_save_lexical_state(&original_lex_state);
832 if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
833 zend_restore_lexical_state(&original_lex_state);
834 return FAILURE;
835 }
836 BEGIN(INITIAL);
837 zend_highlight(syntax_highlighter_ini);
838 if (SCNG(script_filtered)) {
839 efree(SCNG(script_filtered));
840 SCNG(script_filtered) = NULL;
841 }
842 zend_restore_lexical_state(&original_lex_state);
843 zval_dtor(str);
844 return SUCCESS;
845 }
846
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)847 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
848 {
849 size_t length;
850 unsigned char *new_yy_start;
851
852 /* convert and set */
853 if (!SCNG(input_filter)) {
854 if (SCNG(script_filtered)) {
855 efree(SCNG(script_filtered));
856 SCNG(script_filtered) = NULL;
857 }
858 SCNG(script_filtered_size) = 0;
859 length = SCNG(script_org_size);
860 new_yy_start = SCNG(script_org);
861 } else {
862 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
863 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
864 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
865 }
866 if (SCNG(script_filtered)) {
867 efree(SCNG(script_filtered));
868 }
869 SCNG(script_filtered) = new_yy_start;
870 SCNG(script_filtered_size) = length;
871 }
872
873 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
874 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
875 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
876 SCNG(yy_limit) = new_yy_start + length;
877
878 SCNG(yy_start) = new_yy_start;
879 }
880
881
882 // TODO: avoid reallocation ???
883 # define zend_copy_value(zendlval, yytext, yyleng) \
884 if (SCNG(output_filter)) { \
885 size_t sz = 0; \
886 char *s = NULL; \
887 SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
888 ZVAL_STRINGL(zendlval, s, sz); \
889 efree(s); \
890 } else { \
891 ZVAL_STRINGL(zendlval, yytext, yyleng); \
892 }
893
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)894 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
895 {
896 register char *s, *t;
897 char *end;
898
899 ZVAL_STRINGL(zendlval, str, len);
900
901 /* convert escape sequences */
902 s = t = Z_STRVAL_P(zendlval);
903 end = s+Z_STRLEN_P(zendlval);
904 while (s<end) {
905 if (*s=='\\') {
906 s++;
907 if (s >= end) {
908 *t++ = '\\';
909 break;
910 }
911
912 switch(*s) {
913 case 'n':
914 *t++ = '\n';
915 Z_STRLEN_P(zendlval)--;
916 break;
917 case 'r':
918 *t++ = '\r';
919 Z_STRLEN_P(zendlval)--;
920 break;
921 case 't':
922 *t++ = '\t';
923 Z_STRLEN_P(zendlval)--;
924 break;
925 case 'f':
926 *t++ = '\f';
927 Z_STRLEN_P(zendlval)--;
928 break;
929 case 'v':
930 *t++ = '\v';
931 Z_STRLEN_P(zendlval)--;
932 break;
933 case 'e':
934 #ifdef ZEND_WIN32
935 *t++ = VK_ESCAPE;
936 #else
937 *t++ = '\e';
938 #endif
939 Z_STRLEN_P(zendlval)--;
940 break;
941 case '"':
942 case '`':
943 if (*s != quote_type) {
944 *t++ = '\\';
945 *t++ = *s;
946 break;
947 }
948 case '\\':
949 case '$':
950 *t++ = *s;
951 Z_STRLEN_P(zendlval)--;
952 break;
953 case 'x':
954 case 'X':
955 if (ZEND_IS_HEX(*(s+1))) {
956 char hex_buf[3] = { 0, 0, 0 };
957
958 Z_STRLEN_P(zendlval)--; /* for the 'x' */
959
960 hex_buf[0] = *(++s);
961 Z_STRLEN_P(zendlval)--;
962 if (ZEND_IS_HEX(*(s+1))) {
963 hex_buf[1] = *(++s);
964 Z_STRLEN_P(zendlval)--;
965 }
966 *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
967 } else {
968 *t++ = '\\';
969 *t++ = *s;
970 }
971 break;
972 /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
973 case 'u':
974 {
975 /* cache where we started so we can parse after validating */
976 char *start = s + 1;
977 size_t len = 0;
978 zend_bool valid = 1;
979 unsigned long codepoint;
980 size_t byte_len = 0;
981
982 if (*start != '{') {
983 /* we silently let this pass to avoid breaking code
984 * with JSON in string literals (e.g. "\"\u202e\""
985 */
986 *t++ = '\\';
987 *t++ = 'u';
988 break;
989 } else {
990 /* on the other hand, invalid \u{blah} errors */
991 s++;
992 len++;
993 s++;
994 while (*s != '}') {
995 if (!ZEND_IS_HEX(*s)) {
996 valid = 0;
997 break;
998 } else {
999 len++;
1000 }
1001 s++;
1002 }
1003 if (*s == '}') {
1004 valid = 1;
1005 len++;
1006 }
1007 }
1008
1009 /* \u{} is invalid */
1010 if (len <= 2) {
1011 valid = 0;
1012 }
1013
1014 if (!valid) {
1015 zend_throw_exception(zend_ce_parse_error,
1016 "Invalid UTF-8 codepoint escape sequence", 0);
1017 zval_ptr_dtor(zendlval);
1018 ZVAL_UNDEF(zendlval);
1019 return FAILURE;
1020 }
1021
1022 errno = 0;
1023 codepoint = strtoul(start + 1, NULL, 16);
1024
1025 /* per RFC 3629, UTF-8 can only represent 21 bits */
1026 if (codepoint > 0x10FFFF || errno) {
1027 zend_throw_exception(zend_ce_parse_error,
1028 "Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1029 zval_ptr_dtor(zendlval);
1030 ZVAL_UNDEF(zendlval);
1031 return FAILURE;
1032 }
1033
1034 /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1035 if (codepoint < 0x80) {
1036 byte_len = 1;
1037 *t++ = codepoint;
1038 } else if (codepoint <= 0x7FF) {
1039 byte_len = 2;
1040 *t++ = (codepoint >> 6) + 0xC0;
1041 *t++ = (codepoint & 0x3F) + 0x80;
1042 } else if (codepoint <= 0xFFFF) {
1043 byte_len = 3;
1044 *t++ = (codepoint >> 12) + 0xE0;
1045 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1046 *t++ = (codepoint & 0x3F) + 0x80;
1047 } else if (codepoint <= 0x10FFFF) {
1048 byte_len = 4;
1049 *t++ = (codepoint >> 18) + 0xF0;
1050 *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1051 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1052 *t++ = (codepoint & 0x3F) + 0x80;
1053 }
1054
1055 Z_STRLEN_P(zendlval) -= 2; /* \u */
1056 Z_STRLEN_P(zendlval) -= (len - byte_len);
1057 }
1058 break;
1059 default:
1060 /* check for an octal */
1061 if (ZEND_IS_OCT(*s)) {
1062 char octal_buf[4] = { 0, 0, 0, 0 };
1063
1064 octal_buf[0] = *s;
1065 Z_STRLEN_P(zendlval)--;
1066 if (ZEND_IS_OCT(*(s+1))) {
1067 octal_buf[1] = *(++s);
1068 Z_STRLEN_P(zendlval)--;
1069 if (ZEND_IS_OCT(*(s+1))) {
1070 octal_buf[2] = *(++s);
1071 Z_STRLEN_P(zendlval)--;
1072 }
1073 }
1074 *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1075 } else {
1076 *t++ = '\\';
1077 *t++ = *s;
1078 }
1079 break;
1080 }
1081 } else {
1082 *t++ = *s;
1083 }
1084
1085 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1086 CG(zend_lineno)++;
1087 }
1088 s++;
1089 }
1090 *t = 0;
1091 if (SCNG(output_filter)) {
1092 size_t sz = 0;
1093 unsigned char *str;
1094 // TODO: avoid realocation ???
1095 s = Z_STRVAL_P(zendlval);
1096 SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1097 zval_ptr_dtor(zendlval);
1098 ZVAL_STRINGL(zendlval, (char *) str, sz);
1099 efree(str);
1100 }
1101 return SUCCESS;
1102 }
1103
emit_token(int token,int token_line)1104 static zend_always_inline int emit_token(int token, int token_line)
1105 {
1106 if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line);
1107
1108 return token;
1109 }
1110
1111 #define RETURN_TOKEN(token) return emit_token(token, start_line);
1112
lex_scan(zval * zendlval)1113 int lex_scan(zval *zendlval)
1114 {
1115
1116 int start_line = CG(zend_lineno);
1117
1118 restart:
1119 SCNG(yy_text) = YYCURSOR;
1120
1121 /*!re2c
1122 re2c:yyfill:check = 0;
1123 LNUM [0-9]+
1124 DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1125 EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1126 HNUM "0x"[0-9a-fA-F]+
1127 BNUM "0b"[01]+
1128 LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1129 WHITESPACE [ \n\r\t]+
1130 TABS_AND_SPACES [ \t]*
1131 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1132 ANY_CHAR [^]
1133 NEWLINE ("\r"|"\n"|"\r\n")
1134
1135 /* compute yyleng before each rule */
1136 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1137
1138 <ST_IN_SCRIPTING>"exit" {
1139 RETURN_TOKEN(T_EXIT);
1140 }
1141
1142 <ST_IN_SCRIPTING>"die" {
1143 RETURN_TOKEN(T_EXIT);
1144 }
1145
1146 <ST_IN_SCRIPTING>"function" {
1147 RETURN_TOKEN(T_FUNCTION);
1148 }
1149
1150 <ST_IN_SCRIPTING>"const" {
1151 RETURN_TOKEN(T_CONST);
1152 }
1153
1154 <ST_IN_SCRIPTING>"return" {
1155 RETURN_TOKEN(T_RETURN);
1156 }
1157
1158 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1159 yyless(yyleng - 1);
1160 HANDLE_NEWLINES(yytext, yyleng);
1161 RETURN_TOKEN(T_YIELD_FROM);
1162 }
1163
1164 <ST_IN_SCRIPTING>"yield" {
1165 RETURN_TOKEN(T_YIELD);
1166 }
1167
1168 <ST_IN_SCRIPTING>"try" {
1169 RETURN_TOKEN(T_TRY);
1170 }
1171
1172 <ST_IN_SCRIPTING>"catch" {
1173 RETURN_TOKEN(T_CATCH);
1174 }
1175
1176 <ST_IN_SCRIPTING>"finally" {
1177 RETURN_TOKEN(T_FINALLY);
1178 }
1179
1180 <ST_IN_SCRIPTING>"throw" {
1181 RETURN_TOKEN(T_THROW);
1182 }
1183
1184 <ST_IN_SCRIPTING>"if" {
1185 RETURN_TOKEN(T_IF);
1186 }
1187
1188 <ST_IN_SCRIPTING>"elseif" {
1189 RETURN_TOKEN(T_ELSEIF);
1190 }
1191
1192 <ST_IN_SCRIPTING>"endif" {
1193 RETURN_TOKEN(T_ENDIF);
1194 }
1195
1196 <ST_IN_SCRIPTING>"else" {
1197 RETURN_TOKEN(T_ELSE);
1198 }
1199
1200 <ST_IN_SCRIPTING>"while" {
1201 RETURN_TOKEN(T_WHILE);
1202 }
1203
1204 <ST_IN_SCRIPTING>"endwhile" {
1205 RETURN_TOKEN(T_ENDWHILE);
1206 }
1207
1208 <ST_IN_SCRIPTING>"do" {
1209 RETURN_TOKEN(T_DO);
1210 }
1211
1212 <ST_IN_SCRIPTING>"for" {
1213 RETURN_TOKEN(T_FOR);
1214 }
1215
1216 <ST_IN_SCRIPTING>"endfor" {
1217 RETURN_TOKEN(T_ENDFOR);
1218 }
1219
1220 <ST_IN_SCRIPTING>"foreach" {
1221 RETURN_TOKEN(T_FOREACH);
1222 }
1223
1224 <ST_IN_SCRIPTING>"endforeach" {
1225 RETURN_TOKEN(T_ENDFOREACH);
1226 }
1227
1228 <ST_IN_SCRIPTING>"declare" {
1229 RETURN_TOKEN(T_DECLARE);
1230 }
1231
1232 <ST_IN_SCRIPTING>"enddeclare" {
1233 RETURN_TOKEN(T_ENDDECLARE);
1234 }
1235
1236 <ST_IN_SCRIPTING>"instanceof" {
1237 RETURN_TOKEN(T_INSTANCEOF);
1238 }
1239
1240 <ST_IN_SCRIPTING>"as" {
1241 RETURN_TOKEN(T_AS);
1242 }
1243
1244 <ST_IN_SCRIPTING>"switch" {
1245 RETURN_TOKEN(T_SWITCH);
1246 }
1247
1248 <ST_IN_SCRIPTING>"endswitch" {
1249 RETURN_TOKEN(T_ENDSWITCH);
1250 }
1251
1252 <ST_IN_SCRIPTING>"case" {
1253 RETURN_TOKEN(T_CASE);
1254 }
1255
1256 <ST_IN_SCRIPTING>"default" {
1257 RETURN_TOKEN(T_DEFAULT);
1258 }
1259
1260 <ST_IN_SCRIPTING>"break" {
1261 RETURN_TOKEN(T_BREAK);
1262 }
1263
1264 <ST_IN_SCRIPTING>"continue" {
1265 RETURN_TOKEN(T_CONTINUE);
1266 }
1267
1268 <ST_IN_SCRIPTING>"goto" {
1269 RETURN_TOKEN(T_GOTO);
1270 }
1271
1272 <ST_IN_SCRIPTING>"echo" {
1273 RETURN_TOKEN(T_ECHO);
1274 }
1275
1276 <ST_IN_SCRIPTING>"print" {
1277 RETURN_TOKEN(T_PRINT);
1278 }
1279
1280 <ST_IN_SCRIPTING>"class" {
1281 RETURN_TOKEN(T_CLASS);
1282 }
1283
1284 <ST_IN_SCRIPTING>"interface" {
1285 RETURN_TOKEN(T_INTERFACE);
1286 }
1287
1288 <ST_IN_SCRIPTING>"trait" {
1289 RETURN_TOKEN(T_TRAIT);
1290 }
1291
1292 <ST_IN_SCRIPTING>"extends" {
1293 RETURN_TOKEN(T_EXTENDS);
1294 }
1295
1296 <ST_IN_SCRIPTING>"implements" {
1297 RETURN_TOKEN(T_IMPLEMENTS);
1298 }
1299
1300 <ST_IN_SCRIPTING>"->" {
1301 yy_push_state(ST_LOOKING_FOR_PROPERTY);
1302 RETURN_TOKEN(T_OBJECT_OPERATOR);
1303 }
1304
1305 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1306 HANDLE_NEWLINES(yytext, yyleng);
1307 RETURN_TOKEN(T_WHITESPACE);
1308 }
1309
1310 <ST_LOOKING_FOR_PROPERTY>"->" {
1311 RETURN_TOKEN(T_OBJECT_OPERATOR);
1312 }
1313
1314 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1315 yy_pop_state();
1316 zend_copy_value(zendlval, yytext, yyleng);
1317 RETURN_TOKEN(T_STRING);
1318 }
1319
1320 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1321 yyless(0);
1322 yy_pop_state();
1323 goto restart;
1324 }
1325
1326 <ST_IN_SCRIPTING>"::" {
1327 RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1328 }
1329
1330 <ST_IN_SCRIPTING>"\\" {
1331 RETURN_TOKEN(T_NS_SEPARATOR);
1332 }
1333
1334 <ST_IN_SCRIPTING>"..." {
1335 RETURN_TOKEN(T_ELLIPSIS);
1336 }
1337
1338 <ST_IN_SCRIPTING>"??" {
1339 RETURN_TOKEN(T_COALESCE);
1340 }
1341
1342 <ST_IN_SCRIPTING>"new" {
1343 RETURN_TOKEN(T_NEW);
1344 }
1345
1346 <ST_IN_SCRIPTING>"clone" {
1347 RETURN_TOKEN(T_CLONE);
1348 }
1349
1350 <ST_IN_SCRIPTING>"var" {
1351 RETURN_TOKEN(T_VAR);
1352 }
1353
1354 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1355 RETURN_TOKEN(T_INT_CAST);
1356 }
1357
1358 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1359 RETURN_TOKEN(T_DOUBLE_CAST);
1360 }
1361
1362 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1363 RETURN_TOKEN(T_STRING_CAST);
1364 }
1365
1366 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1367 RETURN_TOKEN(T_ARRAY_CAST);
1368 }
1369
1370 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1371 RETURN_TOKEN(T_OBJECT_CAST);
1372 }
1373
1374 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1375 RETURN_TOKEN(T_BOOL_CAST);
1376 }
1377
1378 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1379 RETURN_TOKEN(T_UNSET_CAST);
1380 }
1381
1382 <ST_IN_SCRIPTING>"eval" {
1383 RETURN_TOKEN(T_EVAL);
1384 }
1385
1386 <ST_IN_SCRIPTING>"include" {
1387 RETURN_TOKEN(T_INCLUDE);
1388 }
1389
1390 <ST_IN_SCRIPTING>"include_once" {
1391 RETURN_TOKEN(T_INCLUDE_ONCE);
1392 }
1393
1394 <ST_IN_SCRIPTING>"require" {
1395 RETURN_TOKEN(T_REQUIRE);
1396 }
1397
1398 <ST_IN_SCRIPTING>"require_once" {
1399 RETURN_TOKEN(T_REQUIRE_ONCE);
1400 }
1401
1402 <ST_IN_SCRIPTING>"namespace" {
1403 RETURN_TOKEN(T_NAMESPACE);
1404 }
1405
1406 <ST_IN_SCRIPTING>"use" {
1407 RETURN_TOKEN(T_USE);
1408 }
1409
1410 <ST_IN_SCRIPTING>"insteadof" {
1411 RETURN_TOKEN(T_INSTEADOF);
1412 }
1413
1414 <ST_IN_SCRIPTING>"global" {
1415 RETURN_TOKEN(T_GLOBAL);
1416 }
1417
1418 <ST_IN_SCRIPTING>"isset" {
1419 RETURN_TOKEN(T_ISSET);
1420 }
1421
1422 <ST_IN_SCRIPTING>"empty" {
1423 RETURN_TOKEN(T_EMPTY);
1424 }
1425
1426 <ST_IN_SCRIPTING>"__halt_compiler" {
1427 RETURN_TOKEN(T_HALT_COMPILER);
1428 }
1429
1430 <ST_IN_SCRIPTING>"static" {
1431 RETURN_TOKEN(T_STATIC);
1432 }
1433
1434 <ST_IN_SCRIPTING>"abstract" {
1435 RETURN_TOKEN(T_ABSTRACT);
1436 }
1437
1438 <ST_IN_SCRIPTING>"final" {
1439 RETURN_TOKEN(T_FINAL);
1440 }
1441
1442 <ST_IN_SCRIPTING>"private" {
1443 RETURN_TOKEN(T_PRIVATE);
1444 }
1445
1446 <ST_IN_SCRIPTING>"protected" {
1447 RETURN_TOKEN(T_PROTECTED);
1448 }
1449
1450 <ST_IN_SCRIPTING>"public" {
1451 RETURN_TOKEN(T_PUBLIC);
1452 }
1453
1454 <ST_IN_SCRIPTING>"unset" {
1455 RETURN_TOKEN(T_UNSET);
1456 }
1457
1458 <ST_IN_SCRIPTING>"=>" {
1459 RETURN_TOKEN(T_DOUBLE_ARROW);
1460 }
1461
1462 <ST_IN_SCRIPTING>"list" {
1463 RETURN_TOKEN(T_LIST);
1464 }
1465
1466 <ST_IN_SCRIPTING>"array" {
1467 RETURN_TOKEN(T_ARRAY);
1468 }
1469
1470 <ST_IN_SCRIPTING>"callable" {
1471 RETURN_TOKEN(T_CALLABLE);
1472 }
1473
1474 <ST_IN_SCRIPTING>"++" {
1475 RETURN_TOKEN(T_INC);
1476 }
1477
1478 <ST_IN_SCRIPTING>"--" {
1479 RETURN_TOKEN(T_DEC);
1480 }
1481
1482 <ST_IN_SCRIPTING>"===" {
1483 RETURN_TOKEN(T_IS_IDENTICAL);
1484 }
1485
1486 <ST_IN_SCRIPTING>"!==" {
1487 RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1488 }
1489
1490 <ST_IN_SCRIPTING>"==" {
1491 RETURN_TOKEN(T_IS_EQUAL);
1492 }
1493
1494 <ST_IN_SCRIPTING>"!="|"<>" {
1495 RETURN_TOKEN(T_IS_NOT_EQUAL);
1496 }
1497
1498 <ST_IN_SCRIPTING>"<=>" {
1499 RETURN_TOKEN(T_SPACESHIP);
1500 }
1501
1502 <ST_IN_SCRIPTING>"<=" {
1503 RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1504 }
1505
1506 <ST_IN_SCRIPTING>">=" {
1507 RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1508 }
1509
1510 <ST_IN_SCRIPTING>"+=" {
1511 RETURN_TOKEN(T_PLUS_EQUAL);
1512 }
1513
1514 <ST_IN_SCRIPTING>"-=" {
1515 RETURN_TOKEN(T_MINUS_EQUAL);
1516 }
1517
1518 <ST_IN_SCRIPTING>"*=" {
1519 RETURN_TOKEN(T_MUL_EQUAL);
1520 }
1521
1522 <ST_IN_SCRIPTING>"*\*" {
1523 RETURN_TOKEN(T_POW);
1524 }
1525
1526 <ST_IN_SCRIPTING>"*\*=" {
1527 RETURN_TOKEN(T_POW_EQUAL);
1528 }
1529
1530 <ST_IN_SCRIPTING>"/=" {
1531 RETURN_TOKEN(T_DIV_EQUAL);
1532 }
1533
1534 <ST_IN_SCRIPTING>".=" {
1535 RETURN_TOKEN(T_CONCAT_EQUAL);
1536 }
1537
1538 <ST_IN_SCRIPTING>"%=" {
1539 RETURN_TOKEN(T_MOD_EQUAL);
1540 }
1541
1542 <ST_IN_SCRIPTING>"<<=" {
1543 RETURN_TOKEN(T_SL_EQUAL);
1544 }
1545
1546 <ST_IN_SCRIPTING>">>=" {
1547 RETURN_TOKEN(T_SR_EQUAL);
1548 }
1549
1550 <ST_IN_SCRIPTING>"&=" {
1551 RETURN_TOKEN(T_AND_EQUAL);
1552 }
1553
1554 <ST_IN_SCRIPTING>"|=" {
1555 RETURN_TOKEN(T_OR_EQUAL);
1556 }
1557
1558 <ST_IN_SCRIPTING>"^=" {
1559 RETURN_TOKEN(T_XOR_EQUAL);
1560 }
1561
1562 <ST_IN_SCRIPTING>"||" {
1563 RETURN_TOKEN(T_BOOLEAN_OR);
1564 }
1565
1566 <ST_IN_SCRIPTING>"&&" {
1567 RETURN_TOKEN(T_BOOLEAN_AND);
1568 }
1569
1570 <ST_IN_SCRIPTING>"OR" {
1571 RETURN_TOKEN(T_LOGICAL_OR);
1572 }
1573
1574 <ST_IN_SCRIPTING>"AND" {
1575 RETURN_TOKEN(T_LOGICAL_AND);
1576 }
1577
1578 <ST_IN_SCRIPTING>"XOR" {
1579 RETURN_TOKEN(T_LOGICAL_XOR);
1580 }
1581
1582 <ST_IN_SCRIPTING>"<<" {
1583 RETURN_TOKEN(T_SL);
1584 }
1585
1586 <ST_IN_SCRIPTING>">>" {
1587 RETURN_TOKEN(T_SR);
1588 }
1589
1590 <ST_IN_SCRIPTING>{TOKENS} {
1591 RETURN_TOKEN(yytext[0]);
1592 }
1593
1594
1595 <ST_IN_SCRIPTING>"{" {
1596 yy_push_state(ST_IN_SCRIPTING);
1597 RETURN_TOKEN('{');
1598 }
1599
1600
1601 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1602 yy_push_state(ST_LOOKING_FOR_VARNAME);
1603 RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1604 }
1605
1606
1607 <ST_IN_SCRIPTING>"}" {
1608 RESET_DOC_COMMENT();
1609 if (!zend_stack_is_empty(&SCNG(state_stack))) {
1610 yy_pop_state();
1611 }
1612 RETURN_TOKEN('}');
1613 }
1614
1615
1616 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1617 yyless(yyleng - 1);
1618 zend_copy_value(zendlval, yytext, yyleng);
1619 yy_pop_state();
1620 yy_push_state(ST_IN_SCRIPTING);
1621 RETURN_TOKEN(T_STRING_VARNAME);
1622 }
1623
1624
1625 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1626 yyless(0);
1627 yy_pop_state();
1628 yy_push_state(ST_IN_SCRIPTING);
1629 goto restart;
1630 }
1631
1632 <ST_IN_SCRIPTING>{BNUM} {
1633 char *bin = yytext + 2; /* Skip "0b" */
1634 int len = yyleng - 2;
1635 char *end;
1636
1637 /* Skip any leading 0s */
1638 while (*bin == '0') {
1639 ++bin;
1640 --len;
1641 }
1642
1643 if (len < SIZEOF_ZEND_LONG * 8) {
1644 if (len == 0) {
1645 ZVAL_LONG(zendlval, 0);
1646 } else {
1647 errno = 0;
1648 ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1649 ZEND_ASSERT(!errno && end == yytext + yyleng);
1650 }
1651 RETURN_TOKEN(T_LNUMBER);
1652 } else {
1653 ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1654 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1655 ZEND_ASSERT(end == yytext + yyleng);
1656 RETURN_TOKEN(T_DNUMBER);
1657 }
1658 }
1659
1660 <ST_IN_SCRIPTING>{LNUM} {
1661 char *end;
1662 if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1663 errno = 0;
1664 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1665 /* This isn't an assert, we need to ensure 019 isn't valid octal
1666 * Because the lexing itself doesn't do that for us
1667 */
1668 if (end != yytext + yyleng) {
1669 zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1670 ZVAL_UNDEF(zendlval);
1671 RETURN_TOKEN(T_LNUMBER);
1672 }
1673 } else {
1674 errno = 0;
1675 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1676 if (errno == ERANGE) { /* Overflow */
1677 errno = 0;
1678 if (yytext[0] == '0') { /* octal overflow */
1679 ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1680 } else {
1681 ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1682 }
1683 /* Also not an assert for the same reason */
1684 if (end != yytext + yyleng) {
1685 zend_throw_exception(zend_ce_parse_error,
1686 "Invalid numeric literal", 0);
1687 ZVAL_UNDEF(zendlval);
1688 RETURN_TOKEN(T_DNUMBER);
1689 }
1690 RETURN_TOKEN(T_DNUMBER);
1691 }
1692 /* Also not an assert for the same reason */
1693 if (end != yytext + yyleng) {
1694 zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1695 ZVAL_UNDEF(zendlval);
1696 RETURN_TOKEN(T_DNUMBER);
1697 }
1698 }
1699 ZEND_ASSERT(!errno);
1700 RETURN_TOKEN(T_LNUMBER);
1701 }
1702
1703 <ST_IN_SCRIPTING>{HNUM} {
1704 char *hex = yytext + 2; /* Skip "0x" */
1705 int len = yyleng - 2;
1706 char *end;
1707
1708 /* Skip any leading 0s */
1709 while (*hex == '0') {
1710 hex++;
1711 len--;
1712 }
1713
1714 if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1715 if (len == 0) {
1716 ZVAL_LONG(zendlval, 0);
1717 } else {
1718 errno = 0;
1719 ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1720 ZEND_ASSERT(!errno && end == hex + len);
1721 }
1722 RETURN_TOKEN(T_LNUMBER);
1723 } else {
1724 ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1725 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1726 ZEND_ASSERT(end == hex + len);
1727 RETURN_TOKEN(T_DNUMBER);
1728 }
1729 }
1730
1731 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1732 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1733 char *end;
1734 errno = 0;
1735 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1736 if (errno == ERANGE) {
1737 goto string;
1738 }
1739 ZEND_ASSERT(end == yytext + yyleng);
1740 } else {
1741 string:
1742 ZVAL_STRINGL(zendlval, yytext, yyleng);
1743 }
1744 RETURN_TOKEN(T_NUM_STRING);
1745 }
1746
1747 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1748 ZVAL_STRINGL(zendlval, yytext, yyleng);
1749 RETURN_TOKEN(T_NUM_STRING);
1750 }
1751
1752 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1753 const char *end;
1754
1755 ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1756 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1757 ZEND_ASSERT(end == yytext + yyleng);
1758 RETURN_TOKEN(T_DNUMBER);
1759 }
1760
1761 <ST_IN_SCRIPTING>"__CLASS__" {
1762 RETURN_TOKEN(T_CLASS_C);
1763 }
1764
1765 <ST_IN_SCRIPTING>"__TRAIT__" {
1766 RETURN_TOKEN(T_TRAIT_C);
1767 }
1768
1769 <ST_IN_SCRIPTING>"__FUNCTION__" {
1770 RETURN_TOKEN(T_FUNC_C);
1771 }
1772
1773 <ST_IN_SCRIPTING>"__METHOD__" {
1774 RETURN_TOKEN(T_METHOD_C);
1775 }
1776
1777 <ST_IN_SCRIPTING>"__LINE__" {
1778 RETURN_TOKEN(T_LINE);
1779 }
1780
1781 <ST_IN_SCRIPTING>"__FILE__" {
1782 RETURN_TOKEN(T_FILE);
1783 }
1784
1785 <ST_IN_SCRIPTING>"__DIR__" {
1786 RETURN_TOKEN(T_DIR);
1787 }
1788
1789 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1790 RETURN_TOKEN(T_NS_C);
1791 }
1792
1793
1794 <INITIAL>"<?=" {
1795 BEGIN(ST_IN_SCRIPTING);
1796 RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1797 }
1798
1799
1800 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1801 HANDLE_NEWLINE(yytext[yyleng-1]);
1802 BEGIN(ST_IN_SCRIPTING);
1803 RETURN_TOKEN(T_OPEN_TAG);
1804 }
1805
1806
1807 <INITIAL>"<?" {
1808 if (CG(short_tags)) {
1809 BEGIN(ST_IN_SCRIPTING);
1810 RETURN_TOKEN(T_OPEN_TAG);
1811 } else {
1812 goto inline_char_handler;
1813 }
1814 }
1815
1816 <INITIAL>{ANY_CHAR} {
1817 if (YYCURSOR > YYLIMIT) {
1818 RETURN_TOKEN(END);
1819 }
1820
1821 inline_char_handler:
1822
1823 while (1) {
1824 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1825
1826 YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1827
1828 if (YYCURSOR >= YYLIMIT) {
1829 break;
1830 }
1831
1832 if (*YYCURSOR == '?') {
1833 if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1834
1835 YYCURSOR--;
1836 break;
1837 }
1838 }
1839 }
1840
1841 yyleng = YYCURSOR - SCNG(yy_text);
1842
1843 if (SCNG(output_filter)) {
1844 size_t readsize;
1845 char *s = NULL;
1846 size_t sz = 0;
1847 // TODO: avoid reallocation ???
1848 readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1849 ZVAL_STRINGL(zendlval, s, sz);
1850 efree(s);
1851 if (readsize < yyleng) {
1852 yyless(readsize);
1853 }
1854 } else {
1855 ZVAL_STRINGL(zendlval, yytext, yyleng);
1856 }
1857 HANDLE_NEWLINES(yytext, yyleng);
1858 RETURN_TOKEN(T_INLINE_HTML);
1859 }
1860
1861
1862 /* Make sure a label character follows "->", otherwise there is no property
1863 * and "->" will be taken literally
1864 */
1865 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1866 yyless(yyleng - 3);
1867 yy_push_state(ST_LOOKING_FOR_PROPERTY);
1868 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1869 RETURN_TOKEN(T_VARIABLE);
1870 }
1871
1872 /* A [ always designates a variable offset, regardless of what follows
1873 */
1874 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1875 yyless(yyleng - 1);
1876 yy_push_state(ST_VAR_OFFSET);
1877 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1878 RETURN_TOKEN(T_VARIABLE);
1879 }
1880
1881 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1882 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1883 RETURN_TOKEN(T_VARIABLE);
1884 }
1885
1886 <ST_VAR_OFFSET>"]" {
1887 yy_pop_state();
1888 RETURN_TOKEN(']');
1889 }
1890
1891 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1892 /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1893 RETURN_TOKEN(yytext[0]);
1894 }
1895
1896 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1897 /* Invalid rule to return a more explicit parse error with proper line number */
1898 yyless(0);
1899 yy_pop_state();
1900 ZVAL_NULL(zendlval);
1901 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1902 }
1903
1904 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1905 zend_copy_value(zendlval, yytext, yyleng);
1906 RETURN_TOKEN(T_STRING);
1907 }
1908
1909
1910 <ST_IN_SCRIPTING>"#"|"//" {
1911 while (YYCURSOR < YYLIMIT) {
1912 switch (*YYCURSOR++) {
1913 case '\r':
1914 if (*YYCURSOR == '\n') {
1915 YYCURSOR++;
1916 }
1917 /* fall through */
1918 case '\n':
1919 CG(zend_lineno)++;
1920 break;
1921 case '?':
1922 if (*YYCURSOR == '>') {
1923 YYCURSOR--;
1924 break;
1925 }
1926 /* fall through */
1927 default:
1928 continue;
1929 }
1930
1931 break;
1932 }
1933
1934 yyleng = YYCURSOR - SCNG(yy_text);
1935
1936 RETURN_TOKEN(T_COMMENT);
1937 }
1938
1939 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1940 int doc_com;
1941
1942 if (yyleng > 2) {
1943 doc_com = 1;
1944 RESET_DOC_COMMENT();
1945 } else {
1946 doc_com = 0;
1947 }
1948
1949 while (YYCURSOR < YYLIMIT) {
1950 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1951 break;
1952 }
1953 }
1954
1955 if (YYCURSOR < YYLIMIT) {
1956 YYCURSOR++;
1957 } else {
1958 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1959 }
1960
1961 yyleng = YYCURSOR - SCNG(yy_text);
1962 HANDLE_NEWLINES(yytext, yyleng);
1963
1964 if (doc_com) {
1965 CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1966 RETURN_TOKEN(T_DOC_COMMENT);
1967 }
1968
1969 RETURN_TOKEN(T_COMMENT);
1970 }
1971
1972 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1973 BEGIN(INITIAL);
1974 RETURN_TOKEN(T_CLOSE_TAG); /* implicit ';' at php-end tag */
1975 }
1976
1977
1978 <ST_IN_SCRIPTING>b?['] {
1979 register char *s, *t;
1980 char *end;
1981 int bprefix = (yytext[0] != '\'') ? 1 : 0;
1982
1983 while (1) {
1984 if (YYCURSOR < YYLIMIT) {
1985 if (*YYCURSOR == '\'') {
1986 YYCURSOR++;
1987 yyleng = YYCURSOR - SCNG(yy_text);
1988
1989 break;
1990 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1991 YYCURSOR++;
1992 }
1993 } else {
1994 yyleng = YYLIMIT - SCNG(yy_text);
1995
1996 /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1997 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1998 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1999 ZVAL_NULL(zendlval);
2000 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2001 }
2002 }
2003
2004 ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
2005
2006 /* convert escape sequences */
2007 s = t = Z_STRVAL_P(zendlval);
2008 end = s+Z_STRLEN_P(zendlval);
2009 while (s<end) {
2010 if (*s=='\\') {
2011 s++;
2012
2013 switch(*s) {
2014 case '\\':
2015 case '\'':
2016 *t++ = *s;
2017 Z_STRLEN_P(zendlval)--;
2018 break;
2019 default:
2020 *t++ = '\\';
2021 *t++ = *s;
2022 break;
2023 }
2024 } else {
2025 *t++ = *s;
2026 }
2027
2028 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2029 CG(zend_lineno)++;
2030 }
2031 s++;
2032 }
2033 *t = 0;
2034
2035 if (SCNG(output_filter)) {
2036 size_t sz = 0;
2037 char *str = NULL;
2038 s = Z_STRVAL_P(zendlval);
2039 // TODO: avoid reallocation ???
2040 SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2041 ZVAL_STRINGL(zendlval, str, sz);
2042 }
2043 RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2044 }
2045
2046
2047 <ST_IN_SCRIPTING>b?["] {
2048 int bprefix = (yytext[0] != '"') ? 1 : 0;
2049
2050 while (YYCURSOR < YYLIMIT) {
2051 switch (*YYCURSOR++) {
2052 case '"':
2053 yyleng = YYCURSOR - SCNG(yy_text);
2054 zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
2055 RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2056 case '$':
2057 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2058 break;
2059 }
2060 continue;
2061 case '{':
2062 if (*YYCURSOR == '$') {
2063 break;
2064 }
2065 continue;
2066 case '\\':
2067 if (YYCURSOR < YYLIMIT) {
2068 YYCURSOR++;
2069 }
2070 /* fall through */
2071 default:
2072 continue;
2073 }
2074
2075 YYCURSOR--;
2076 break;
2077 }
2078
2079 /* Remember how much was scanned to save rescanning */
2080 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2081
2082 YYCURSOR = SCNG(yy_text) + yyleng;
2083
2084 BEGIN(ST_DOUBLE_QUOTES);
2085 RETURN_TOKEN('"');
2086 }
2087
2088
2089 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2090 char *s;
2091 int bprefix = (yytext[0] != '<') ? 1 : 0;
2092 zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2093
2094 CG(zend_lineno)++;
2095 heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2096 s = yytext+bprefix+3;
2097 while ((*s == ' ') || (*s == '\t')) {
2098 s++;
2099 heredoc_label->length--;
2100 }
2101
2102 if (*s == '\'') {
2103 s++;
2104 heredoc_label->length -= 2;
2105
2106 BEGIN(ST_NOWDOC);
2107 } else {
2108 if (*s == '"') {
2109 s++;
2110 heredoc_label->length -= 2;
2111 }
2112
2113 BEGIN(ST_HEREDOC);
2114 }
2115
2116 heredoc_label->label = estrndup(s, heredoc_label->length);
2117
2118 /* Check for ending label on the next line */
2119 if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2120 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2121
2122 if (*end == ';') {
2123 end++;
2124 }
2125
2126 if (*end == '\n' || *end == '\r') {
2127 BEGIN(ST_END_HEREDOC);
2128 }
2129 }
2130
2131 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2132
2133 RETURN_TOKEN(T_START_HEREDOC);
2134 }
2135
2136
2137 <ST_IN_SCRIPTING>[`] {
2138 BEGIN(ST_BACKQUOTE);
2139 RETURN_TOKEN('`');
2140 }
2141
2142
2143 <ST_END_HEREDOC>{ANY_CHAR} {
2144 zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2145
2146 YYCURSOR += heredoc_label->length - 1;
2147 yyleng = heredoc_label->length;
2148
2149 heredoc_label_dtor(heredoc_label);
2150 efree(heredoc_label);
2151
2152 BEGIN(ST_IN_SCRIPTING);
2153 RETURN_TOKEN(T_END_HEREDOC);
2154 }
2155
2156
2157 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2158 Z_LVAL_P(zendlval) = (zend_long) '{';
2159 yy_push_state(ST_IN_SCRIPTING);
2160 yyless(1);
2161 RETURN_TOKEN(T_CURLY_OPEN);
2162 }
2163
2164
2165 <ST_DOUBLE_QUOTES>["] {
2166 BEGIN(ST_IN_SCRIPTING);
2167 RETURN_TOKEN('"');
2168 }
2169
2170 <ST_BACKQUOTE>[`] {
2171 BEGIN(ST_IN_SCRIPTING);
2172 RETURN_TOKEN('`');
2173 }
2174
2175
2176 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2177 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2178 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2179 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2180
2181 goto double_quotes_scan_done;
2182 }
2183
2184 if (YYCURSOR > YYLIMIT) {
2185 RETURN_TOKEN(END);
2186 }
2187 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2188 YYCURSOR++;
2189 }
2190
2191 while (YYCURSOR < YYLIMIT) {
2192 switch (*YYCURSOR++) {
2193 case '"':
2194 break;
2195 case '$':
2196 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2197 break;
2198 }
2199 continue;
2200 case '{':
2201 if (*YYCURSOR == '$') {
2202 break;
2203 }
2204 continue;
2205 case '\\':
2206 if (YYCURSOR < YYLIMIT) {
2207 YYCURSOR++;
2208 }
2209 /* fall through */
2210 default:
2211 continue;
2212 }
2213
2214 YYCURSOR--;
2215 break;
2216 }
2217
2218 double_quotes_scan_done:
2219 yyleng = YYCURSOR - SCNG(yy_text);
2220
2221 zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2222 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2223 }
2224
2225
2226 <ST_BACKQUOTE>{ANY_CHAR} {
2227 if (YYCURSOR > YYLIMIT) {
2228 RETURN_TOKEN(END);
2229 }
2230 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2231 YYCURSOR++;
2232 }
2233
2234 while (YYCURSOR < YYLIMIT) {
2235 switch (*YYCURSOR++) {
2236 case '`':
2237 break;
2238 case '$':
2239 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2240 break;
2241 }
2242 continue;
2243 case '{':
2244 if (*YYCURSOR == '$') {
2245 break;
2246 }
2247 continue;
2248 case '\\':
2249 if (YYCURSOR < YYLIMIT) {
2250 YYCURSOR++;
2251 }
2252 /* fall through */
2253 default:
2254 continue;
2255 }
2256
2257 YYCURSOR--;
2258 break;
2259 }
2260
2261 yyleng = YYCURSOR - SCNG(yy_text);
2262
2263 zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2264 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2265 }
2266
2267
2268 <ST_HEREDOC>{ANY_CHAR} {
2269 int newline = 0;
2270
2271 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2272
2273 if (YYCURSOR > YYLIMIT) {
2274 RETURN_TOKEN(END);
2275 }
2276
2277 YYCURSOR--;
2278
2279 while (YYCURSOR < YYLIMIT) {
2280 switch (*YYCURSOR++) {
2281 case '\r':
2282 if (*YYCURSOR == '\n') {
2283 YYCURSOR++;
2284 }
2285 /* fall through */
2286 case '\n':
2287 /* Check for ending label on the next line */
2288 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2289 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2290
2291 if (*end == ';') {
2292 end++;
2293 }
2294
2295 if (*end == '\n' || *end == '\r') {
2296 /* newline before label will be subtracted from returned text, but
2297 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2298 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2299 newline = 2; /* Windows newline */
2300 } else {
2301 newline = 1;
2302 }
2303
2304 CG(increment_lineno) = 1; /* For newline before label */
2305 BEGIN(ST_END_HEREDOC);
2306
2307 goto heredoc_scan_done;
2308 }
2309 }
2310 continue;
2311 case '$':
2312 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2313 break;
2314 }
2315 continue;
2316 case '{':
2317 if (*YYCURSOR == '$') {
2318 break;
2319 }
2320 continue;
2321 case '\\':
2322 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2323 YYCURSOR++;
2324 }
2325 /* fall through */
2326 default:
2327 continue;
2328 }
2329
2330 YYCURSOR--;
2331 break;
2332 }
2333
2334 heredoc_scan_done:
2335 yyleng = YYCURSOR - SCNG(yy_text);
2336
2337 zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2338 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2339 }
2340
2341
2342 <ST_NOWDOC>{ANY_CHAR} {
2343 int newline = 0;
2344
2345 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2346
2347 if (YYCURSOR > YYLIMIT) {
2348 RETURN_TOKEN(END);
2349 }
2350
2351 YYCURSOR--;
2352
2353 while (YYCURSOR < YYLIMIT) {
2354 switch (*YYCURSOR++) {
2355 case '\r':
2356 if (*YYCURSOR == '\n') {
2357 YYCURSOR++;
2358 }
2359 /* fall through */
2360 case '\n':
2361 /* Check for ending label on the next line */
2362 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2363 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2364
2365 if (*end == ';') {
2366 end++;
2367 }
2368
2369 if (*end == '\n' || *end == '\r') {
2370 /* newline before label will be subtracted from returned text, but
2371 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2372 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2373 newline = 2; /* Windows newline */
2374 } else {
2375 newline = 1;
2376 }
2377
2378 CG(increment_lineno) = 1; /* For newline before label */
2379 BEGIN(ST_END_HEREDOC);
2380
2381 goto nowdoc_scan_done;
2382 }
2383 }
2384 /* fall through */
2385 default:
2386 continue;
2387 }
2388 }
2389
2390 nowdoc_scan_done:
2391 yyleng = YYCURSOR - SCNG(yy_text);
2392
2393 zend_copy_value(zendlval, yytext, yyleng - newline);
2394 HANDLE_NEWLINES(yytext, yyleng - newline);
2395 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2396 }
2397
2398
2399 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2400 if (YYCURSOR > YYLIMIT) {
2401 RETURN_TOKEN(END);
2402 }
2403
2404 zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2405 goto restart;
2406 }
2407
2408 */
2409 }
2410