1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Marcus Boerger <helly@php.net> |
16 | Nuno Lopes <nlopess@php.net> |
17 | Scott MacVicar <scottmac@php.net> |
18 | Flex version authors: |
19 | Andi Gutmans <andi@zend.com> |
20 | Zeev Suraski <zeev@zend.com> |
21 +----------------------------------------------------------------------+
22 */
23
24 /* $Id$ */
25
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31
32 #include "zend_language_scanner_defs.h"
33
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef ZEND_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52
53 #define YYCTYPE unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR SCNG(yy_cursor)
56 #define YYLIMIT SCNG(yy_limit)
57 #define YYMARKER SCNG(yy_marker)
58
59 #define YYGETCONDITION() SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61
62 #define STATE(name) yyc##name
63
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE YYGETCONDITION()
67 #define yytext ((char*)SCNG(yy_text))
68 #define yyleng SCNG(yy_leng)
69 #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
70 yyleng = (unsigned int)x; } while(0)
71 #define yymore() goto yymore_restart
72
73 /* perform sanity check. If this message is triggered you should
74 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87
88 /* Globals Macros */
89 #define SCNG LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95
96 #define HANDLE_NEWLINES(s, l) \
97 do { \
98 char *p = (s), *boundary = p+(l); \
99 \
100 while (p<boundary) { \
101 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
102 CG(zend_lineno)++; \
103 } \
104 p++; \
105 } \
106 } while (0)
107
108 #define HANDLE_NEWLINE(c) \
109 { \
110 if (c == '\n' || c == '\r') { \
111 CG(zend_lineno)++; \
112 } \
113 }
114
115 /* To save initial string length after scanning to first variable */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
118
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
120
121 #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127 {
128 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129 ZEND_ASSERT(internal_encoding);
130 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131 }
132
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134 {
135 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136 }
137
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139 {
140 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142 }
143
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145 {
146 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147 ZEND_ASSERT(internal_encoding);
148 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8);
150 }
151
152
_yy_push_state(int new_state)153 static void _yy_push_state(int new_state)
154 {
155 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156 YYSETCONDITION(new_state);
157 }
158
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
yy_pop_state(void)161 static void yy_pop_state(void)
162 {
163 int *stack_state = zend_stack_top(&SCNG(state_stack));
164 YYSETCONDITION(*stack_state);
165 zend_stack_del_top(&SCNG(state_stack));
166 }
167
yy_scan_buffer(char * str,unsigned int len)168 static void yy_scan_buffer(char *str, unsigned int len)
169 {
170 YYCURSOR = (YYCTYPE*)str;
171 YYLIMIT = YYCURSOR + len;
172 if (!SCNG(yy_start)) {
173 SCNG(yy_start) = YYCURSOR;
174 }
175 }
176
startup_scanner(void)177 void startup_scanner(void)
178 {
179 CG(parse_error) = 0;
180 CG(doc_comment) = NULL;
181 CG(extra_fn_flags) = 0;
182 zend_stack_init(&SCNG(state_stack), sizeof(int));
183 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
184 }
185
heredoc_label_dtor(zend_heredoc_label * heredoc_label)186 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
187 efree(heredoc_label->label);
188 }
189
shutdown_scanner(void)190 void shutdown_scanner(void)
191 {
192 CG(parse_error) = 0;
193 RESET_DOC_COMMENT();
194 zend_stack_destroy(&SCNG(state_stack));
195 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
196 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
197 SCNG(on_event) = NULL;
198 }
199
zend_save_lexical_state(zend_lex_state * lex_state)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
201 {
202 lex_state->yy_leng = SCNG(yy_leng);
203 lex_state->yy_start = SCNG(yy_start);
204 lex_state->yy_text = SCNG(yy_text);
205 lex_state->yy_cursor = SCNG(yy_cursor);
206 lex_state->yy_marker = SCNG(yy_marker);
207 lex_state->yy_limit = SCNG(yy_limit);
208
209 lex_state->state_stack = SCNG(state_stack);
210 zend_stack_init(&SCNG(state_stack), sizeof(int));
211
212 lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214
215 lex_state->in = SCNG(yy_in);
216 lex_state->yy_state = YYSTATE;
217 lex_state->filename = zend_get_compiled_filename();
218 lex_state->lineno = CG(zend_lineno);
219
220 lex_state->script_org = SCNG(script_org);
221 lex_state->script_org_size = SCNG(script_org_size);
222 lex_state->script_filtered = SCNG(script_filtered);
223 lex_state->script_filtered_size = SCNG(script_filtered_size);
224 lex_state->input_filter = SCNG(input_filter);
225 lex_state->output_filter = SCNG(output_filter);
226 lex_state->script_encoding = SCNG(script_encoding);
227
228 lex_state->on_event = SCNG(on_event);
229 lex_state->on_event_context = SCNG(on_event_context);
230
231 lex_state->ast = CG(ast);
232 lex_state->ast_arena = CG(ast_arena);
233 }
234
zend_restore_lexical_state(zend_lex_state * lex_state)235 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
236 {
237 SCNG(yy_leng) = lex_state->yy_leng;
238 SCNG(yy_start) = lex_state->yy_start;
239 SCNG(yy_text) = lex_state->yy_text;
240 SCNG(yy_cursor) = lex_state->yy_cursor;
241 SCNG(yy_marker) = lex_state->yy_marker;
242 SCNG(yy_limit) = lex_state->yy_limit;
243
244 zend_stack_destroy(&SCNG(state_stack));
245 SCNG(state_stack) = lex_state->state_stack;
246
247 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
248 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
249 SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
250
251 SCNG(yy_in) = lex_state->in;
252 YYSETCONDITION(lex_state->yy_state);
253 CG(zend_lineno) = lex_state->lineno;
254 zend_restore_compiled_filename(lex_state->filename);
255
256 if (SCNG(script_filtered)) {
257 efree(SCNG(script_filtered));
258 SCNG(script_filtered) = NULL;
259 }
260 SCNG(script_org) = lex_state->script_org;
261 SCNG(script_org_size) = lex_state->script_org_size;
262 SCNG(script_filtered) = lex_state->script_filtered;
263 SCNG(script_filtered_size) = lex_state->script_filtered_size;
264 SCNG(input_filter) = lex_state->input_filter;
265 SCNG(output_filter) = lex_state->output_filter;
266 SCNG(script_encoding) = lex_state->script_encoding;
267
268 SCNG(on_event) = lex_state->on_event;
269 SCNG(on_event_context) = lex_state->on_event_context;
270
271 CG(ast) = lex_state->ast;
272 CG(ast_arena) = lex_state->ast_arena;
273
274 RESET_DOC_COMMENT();
275 }
276
zend_destroy_file_handle(zend_file_handle * file_handle)277 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
278 {
279 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
280 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
281 file_handle->opened_path = NULL;
282 if (file_handle->free_filename) {
283 file_handle->filename = NULL;
284 }
285 }
286
zend_lex_tstring(zval * zv)287 ZEND_API void zend_lex_tstring(zval *zv)
288 {
289 if (SCNG(on_event)) {
290 SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
291 }
292
293 ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
294 }
295
296 #define BOM_UTF32_BE "\x00\x00\xfe\xff"
297 #define BOM_UTF32_LE "\xff\xfe\x00\x00"
298 #define BOM_UTF16_BE "\xfe\xff"
299 #define BOM_UTF16_LE "\xff\xfe"
300 #define BOM_UTF8 "\xef\xbb\xbf"
301
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size)302 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
303 {
304 const unsigned char *p;
305 int wchar_size = 2;
306 int le = 0;
307
308 /* utf-16 or utf-32? */
309 p = script;
310 assert(p >= script);
311 while ((size_t)(p-script) < script_size) {
312 p = memchr(p, 0, script_size-(p-script)-2);
313 if (!p) {
314 break;
315 }
316 if (*(p+1) == '\0' && *(p+2) == '\0') {
317 wchar_size = 4;
318 break;
319 }
320
321 /* searching for UTF-32 specific byte orders, so this will do */
322 p += 4;
323 }
324
325 /* BE or LE? */
326 p = script;
327 assert(p >= script);
328 while ((size_t)(p-script) < script_size) {
329 if (*p == '\0' && *(p+wchar_size-1) != '\0') {
330 /* BE */
331 le = 0;
332 break;
333 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
334 /* LE* */
335 le = 1;
336 break;
337 }
338 p += wchar_size;
339 }
340
341 if (wchar_size == 2) {
342 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
343 } else {
344 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
345 }
346
347 return NULL;
348 }
349
zend_multibyte_detect_unicode(void)350 static const zend_encoding* zend_multibyte_detect_unicode(void)
351 {
352 const zend_encoding *script_encoding = NULL;
353 int bom_size;
354 unsigned char *pos1, *pos2;
355
356 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
357 return NULL;
358 }
359
360 /* check out BOM */
361 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
362 script_encoding = zend_multibyte_encoding_utf32be;
363 bom_size = sizeof(BOM_UTF32_BE)-1;
364 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
365 script_encoding = zend_multibyte_encoding_utf32le;
366 bom_size = sizeof(BOM_UTF32_LE)-1;
367 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
368 script_encoding = zend_multibyte_encoding_utf16be;
369 bom_size = sizeof(BOM_UTF16_BE)-1;
370 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
371 script_encoding = zend_multibyte_encoding_utf16le;
372 bom_size = sizeof(BOM_UTF16_LE)-1;
373 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
374 script_encoding = zend_multibyte_encoding_utf8;
375 bom_size = sizeof(BOM_UTF8)-1;
376 }
377
378 if (script_encoding) {
379 /* remove BOM */
380 LANG_SCNG(script_org) += bom_size;
381 LANG_SCNG(script_org_size) -= bom_size;
382
383 return script_encoding;
384 }
385
386 /* script contains NULL bytes -> auto-detection */
387 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
388 /* check if the NULL byte is after the __HALT_COMPILER(); */
389 pos2 = LANG_SCNG(script_org);
390
391 while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
392 pos2 = memchr(pos2, '_', pos1 - pos2);
393 if (!pos2) break;
394 pos2++;
395 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
396 pos2 += sizeof("_HALT_COMPILER")-1;
397 while (*pos2 == ' ' ||
398 *pos2 == '\t' ||
399 *pos2 == '\r' ||
400 *pos2 == '\n') {
401 pos2++;
402 }
403 if (*pos2 == '(') {
404 pos2++;
405 while (*pos2 == ' ' ||
406 *pos2 == '\t' ||
407 *pos2 == '\r' ||
408 *pos2 == '\n') {
409 pos2++;
410 }
411 if (*pos2 == ')') {
412 pos2++;
413 while (*pos2 == ' ' ||
414 *pos2 == '\t' ||
415 *pos2 == '\r' ||
416 *pos2 == '\n') {
417 pos2++;
418 }
419 if (*pos2 == ';') {
420 return NULL;
421 }
422 }
423 }
424 }
425 }
426 /* make best effort if BOM is missing */
427 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
428 }
429
430 return NULL;
431 }
432
zend_multibyte_find_script_encoding(void)433 static const zend_encoding* zend_multibyte_find_script_encoding(void)
434 {
435 const zend_encoding *script_encoding;
436
437 if (CG(detect_unicode)) {
438 /* check out bom(byte order mark) and see if containing wchars */
439 script_encoding = zend_multibyte_detect_unicode();
440 if (script_encoding != NULL) {
441 /* bom or wchar detection is prior to 'script_encoding' option */
442 return script_encoding;
443 }
444 }
445
446 /* if no script_encoding specified, just leave alone */
447 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
448 return NULL;
449 }
450
451 /* if multiple encodings specified, detect automagically */
452 if (CG(script_encoding_list_size) > 1) {
453 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
454 }
455
456 return CG(script_encoding_list)[0];
457 }
458
zend_multibyte_set_filter(const zend_encoding * onetime_encoding)459 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
460 {
461 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
462 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
463
464 if (!script_encoding) {
465 return FAILURE;
466 }
467
468 /* judge input/output filter */
469 LANG_SCNG(script_encoding) = script_encoding;
470 LANG_SCNG(input_filter) = NULL;
471 LANG_SCNG(output_filter) = NULL;
472
473 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
474 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
475 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
476 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
477 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
478 } else {
479 LANG_SCNG(input_filter) = NULL;
480 LANG_SCNG(output_filter) = NULL;
481 }
482 return SUCCESS;
483 }
484
485 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
486 LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
487 LANG_SCNG(output_filter) = NULL;
488 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
489 LANG_SCNG(input_filter) = NULL;
490 LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
491 } else {
492 /* both script and internal encodings are incompatible w/ flex */
493 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
494 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
495 }
496
497 return 0;
498 }
499
open_file_for_scanning(zend_file_handle * file_handle)500 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
501 {
502 char *buf;
503 size_t size, offset = 0;
504 zend_string *compiled_filename;
505
506 /* The shebang line was read, get the current position to obtain the buffer start */
507 if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
508 if ((offset = ftell(file_handle->handle.fp)) == (size_t)-1) {
509 offset = 0;
510 }
511 }
512
513 if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
514 return FAILURE;
515 }
516
517 zend_llist_add_element(&CG(open_files), file_handle);
518 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
519 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
520 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
521 fh->handle.stream.handle = (void*)(((char*)fh) + diff);
522 file_handle->handle.stream.handle = fh->handle.stream.handle;
523 }
524
525 /* Reset the scanner for scanning the new file */
526 SCNG(yy_in) = file_handle;
527 SCNG(yy_start) = NULL;
528
529 if (size != (size_t)-1) {
530 if (CG(multibyte)) {
531 SCNG(script_org) = (unsigned char*)buf;
532 SCNG(script_org_size) = size;
533 SCNG(script_filtered) = NULL;
534
535 zend_multibyte_set_filter(NULL);
536
537 if (SCNG(input_filter)) {
538 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
539 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
540 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
541 }
542 buf = (char*)SCNG(script_filtered);
543 size = SCNG(script_filtered_size);
544 }
545 }
546 SCNG(yy_start) = (unsigned char *)buf - offset;
547 yy_scan_buffer(buf, (unsigned int)size);
548 } else {
549 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
550 }
551
552 BEGIN(INITIAL);
553
554 if (file_handle->opened_path) {
555 compiled_filename = zend_string_copy(file_handle->opened_path);
556 } else {
557 compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
558 }
559
560 zend_set_compiled_filename(compiled_filename);
561 zend_string_release(compiled_filename);
562
563 if (CG(start_lineno)) {
564 CG(zend_lineno) = CG(start_lineno);
565 CG(start_lineno) = 0;
566 } else {
567 CG(zend_lineno) = 1;
568 }
569
570 RESET_DOC_COMMENT();
571 CG(increment_lineno) = 0;
572 return SUCCESS;
573 }
END_EXTERN_C()574 END_EXTERN_C()
575
576 static zend_op_array *zend_compile(int type)
577 {
578 zend_op_array *op_array = NULL;
579 zend_bool original_in_compilation = CG(in_compilation);
580
581 CG(in_compilation) = 1;
582 CG(ast) = NULL;
583 CG(ast_arena) = zend_arena_create(1024 * 32);
584
585 if (!zendparse()) {
586 int last_lineno = CG(zend_lineno);
587 zend_file_context original_file_context;
588 zend_oparray_context original_oparray_context;
589 zend_op_array *original_active_op_array = CG(active_op_array);
590
591 op_array = emalloc(sizeof(zend_op_array));
592 init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
593 CG(active_op_array) = op_array;
594
595 if (zend_ast_process) {
596 zend_ast_process(CG(ast));
597 }
598
599 zend_file_context_begin(&original_file_context);
600 zend_oparray_context_begin(&original_oparray_context);
601 zend_compile_top_stmt(CG(ast));
602 CG(zend_lineno) = last_lineno;
603 zend_emit_final_return(type == ZEND_USER_FUNCTION);
604 op_array->line_start = 1;
605 op_array->line_end = last_lineno;
606 pass_two(op_array);
607 zend_oparray_context_end(&original_oparray_context);
608 zend_file_context_end(&original_file_context);
609
610 CG(active_op_array) = original_active_op_array;
611 }
612
613 zend_ast_destroy(CG(ast));
614 zend_arena_destroy(CG(ast_arena));
615
616 CG(in_compilation) = original_in_compilation;
617
618 return op_array;
619 }
620
compile_file(zend_file_handle * file_handle,int type)621 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
622 {
623 zend_lex_state original_lex_state;
624 zend_op_array *op_array = NULL;
625 zend_save_lexical_state(&original_lex_state);
626
627 if (open_file_for_scanning(file_handle)==FAILURE) {
628 if (type==ZEND_REQUIRE) {
629 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
630 zend_bailout();
631 } else {
632 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
633 }
634 } else {
635 op_array = zend_compile(ZEND_USER_FUNCTION);
636 }
637
638 zend_restore_lexical_state(&original_lex_state);
639 return op_array;
640 }
641
642
compile_filename(int type,zval * filename)643 zend_op_array *compile_filename(int type, zval *filename)
644 {
645 zend_file_handle file_handle;
646 zval tmp;
647 zend_op_array *retval;
648 zend_string *opened_path = NULL;
649
650 if (Z_TYPE_P(filename) != IS_STRING) {
651 tmp = *filename;
652 zval_copy_ctor(&tmp);
653 convert_to_string(&tmp);
654 filename = &tmp;
655 }
656 file_handle.filename = Z_STRVAL_P(filename);
657 file_handle.free_filename = 0;
658 file_handle.type = ZEND_HANDLE_FILENAME;
659 file_handle.opened_path = NULL;
660 file_handle.handle.fp = NULL;
661
662 retval = zend_compile_file(&file_handle, type);
663 if (retval && file_handle.handle.stream.handle) {
664 if (!file_handle.opened_path) {
665 file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
666 }
667
668 zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
669
670 if (opened_path) {
671 zend_string_release(opened_path);
672 }
673 }
674 zend_destroy_file_handle(&file_handle);
675
676 if (filename==&tmp) {
677 zval_dtor(&tmp);
678 }
679 return retval;
680 }
681
zend_prepare_string_for_scanning(zval * str,char * filename)682 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
683 {
684 char *buf;
685 size_t size, old_len;
686 zend_string *new_compiled_filename;
687
688 /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
689 old_len = Z_STRLEN_P(str);
690 Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
691 Z_TYPE_INFO_P(str) = IS_STRING_EX;
692 memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
693
694 SCNG(yy_in) = NULL;
695 SCNG(yy_start) = NULL;
696
697 buf = Z_STRVAL_P(str);
698 size = old_len;
699
700 if (CG(multibyte)) {
701 SCNG(script_org) = (unsigned char*)buf;
702 SCNG(script_org_size) = size;
703 SCNG(script_filtered) = NULL;
704
705 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
706
707 if (SCNG(input_filter)) {
708 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
709 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
710 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
711 }
712 buf = (char*)SCNG(script_filtered);
713 size = SCNG(script_filtered_size);
714 }
715 }
716
717 yy_scan_buffer(buf, (unsigned int)size);
718
719 new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
720 zend_set_compiled_filename(new_compiled_filename);
721 zend_string_release(new_compiled_filename);
722 CG(zend_lineno) = 1;
723 CG(increment_lineno) = 0;
724 RESET_DOC_COMMENT();
725 return SUCCESS;
726 }
727
728
zend_get_scanned_file_offset(void)729 ZEND_API size_t zend_get_scanned_file_offset(void)
730 {
731 size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
732 if (SCNG(input_filter)) {
733 size_t original_offset = offset, length = 0;
734 do {
735 unsigned char *p = NULL;
736 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
737 return (size_t)-1;
738 }
739 efree(p);
740 if (length > original_offset) {
741 offset--;
742 } else if (length < original_offset) {
743 offset++;
744 }
745 } while (original_offset != length);
746 }
747 return offset;
748 }
749
compile_string(zval * source_string,char * filename)750 zend_op_array *compile_string(zval *source_string, char *filename)
751 {
752 zend_lex_state original_lex_state;
753 zend_op_array *op_array = NULL;
754 zval tmp;
755
756 if (Z_STRLEN_P(source_string)==0) {
757 return NULL;
758 }
759
760 ZVAL_DUP(&tmp, source_string);
761 convert_to_string(&tmp);
762 source_string = &tmp;
763
764 zend_save_lexical_state(&original_lex_state);
765 if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
766 BEGIN(ST_IN_SCRIPTING);
767 op_array = zend_compile(ZEND_EVAL_CODE);
768 }
769
770 zend_restore_lexical_state(&original_lex_state);
771 zval_dtor(&tmp);
772
773 return op_array;
774 }
775
776
BEGIN_EXTERN_C()777 BEGIN_EXTERN_C()
778 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
779 {
780 zend_lex_state original_lex_state;
781 zend_file_handle file_handle;
782
783 file_handle.type = ZEND_HANDLE_FILENAME;
784 file_handle.filename = filename;
785 file_handle.free_filename = 0;
786 file_handle.opened_path = NULL;
787 zend_save_lexical_state(&original_lex_state);
788 if (open_file_for_scanning(&file_handle)==FAILURE) {
789 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
790 zend_restore_lexical_state(&original_lex_state);
791 return FAILURE;
792 }
793 zend_highlight(syntax_highlighter_ini);
794 if (SCNG(script_filtered)) {
795 efree(SCNG(script_filtered));
796 SCNG(script_filtered) = NULL;
797 }
798 zend_destroy_file_handle(&file_handle);
799 zend_restore_lexical_state(&original_lex_state);
800 return SUCCESS;
801 }
802
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name)803 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
804 {
805 zend_lex_state original_lex_state;
806 zval tmp = *str;
807
808 str = &tmp;
809 zval_copy_ctor(str);
810 zend_save_lexical_state(&original_lex_state);
811 if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
812 zend_restore_lexical_state(&original_lex_state);
813 return FAILURE;
814 }
815 BEGIN(INITIAL);
816 zend_highlight(syntax_highlighter_ini);
817 if (SCNG(script_filtered)) {
818 efree(SCNG(script_filtered));
819 SCNG(script_filtered) = NULL;
820 }
821 zend_restore_lexical_state(&original_lex_state);
822 zval_dtor(str);
823 return SUCCESS;
824 }
825
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding)826 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
827 {
828 size_t length;
829 unsigned char *new_yy_start;
830
831 /* convert and set */
832 if (!SCNG(input_filter)) {
833 if (SCNG(script_filtered)) {
834 efree(SCNG(script_filtered));
835 SCNG(script_filtered) = NULL;
836 }
837 SCNG(script_filtered_size) = 0;
838 length = SCNG(script_org_size);
839 new_yy_start = SCNG(script_org);
840 } else {
841 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
842 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
843 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
844 }
845 if (SCNG(script_filtered)) {
846 efree(SCNG(script_filtered));
847 }
848 SCNG(script_filtered) = new_yy_start;
849 SCNG(script_filtered_size) = length;
850 }
851
852 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
853 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
854 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
855 SCNG(yy_limit) = new_yy_start + length;
856
857 SCNG(yy_start) = new_yy_start;
858 }
859
860
861 // TODO: avoid reallocation ???
862 # define zend_copy_value(zendlval, yytext, yyleng) \
863 if (SCNG(output_filter)) { \
864 size_t sz = 0; \
865 char *s = NULL; \
866 SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
867 ZVAL_STRINGL(zendlval, s, sz); \
868 efree(s); \
869 } else { \
870 ZVAL_STRINGL(zendlval, yytext, yyleng); \
871 }
872
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type)873 static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
874 {
875 register char *s, *t;
876 char *end;
877
878 ZVAL_STRINGL(zendlval, str, len);
879
880 /* convert escape sequences */
881 s = t = Z_STRVAL_P(zendlval);
882 end = s+Z_STRLEN_P(zendlval);
883 while (s<end) {
884 if (*s=='\\') {
885 s++;
886 if (s >= end) {
887 *t++ = '\\';
888 break;
889 }
890
891 switch(*s) {
892 case 'n':
893 *t++ = '\n';
894 Z_STRLEN_P(zendlval)--;
895 break;
896 case 'r':
897 *t++ = '\r';
898 Z_STRLEN_P(zendlval)--;
899 break;
900 case 't':
901 *t++ = '\t';
902 Z_STRLEN_P(zendlval)--;
903 break;
904 case 'f':
905 *t++ = '\f';
906 Z_STRLEN_P(zendlval)--;
907 break;
908 case 'v':
909 *t++ = '\v';
910 Z_STRLEN_P(zendlval)--;
911 break;
912 case 'e':
913 #ifdef ZEND_WIN32
914 *t++ = VK_ESCAPE;
915 #else
916 *t++ = '\e';
917 #endif
918 Z_STRLEN_P(zendlval)--;
919 break;
920 case '"':
921 case '`':
922 if (*s != quote_type) {
923 *t++ = '\\';
924 *t++ = *s;
925 break;
926 }
927 case '\\':
928 case '$':
929 *t++ = *s;
930 Z_STRLEN_P(zendlval)--;
931 break;
932 case 'x':
933 case 'X':
934 if (ZEND_IS_HEX(*(s+1))) {
935 char hex_buf[3] = { 0, 0, 0 };
936
937 Z_STRLEN_P(zendlval)--; /* for the 'x' */
938
939 hex_buf[0] = *(++s);
940 Z_STRLEN_P(zendlval)--;
941 if (ZEND_IS_HEX(*(s+1))) {
942 hex_buf[1] = *(++s);
943 Z_STRLEN_P(zendlval)--;
944 }
945 *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
946 } else {
947 *t++ = '\\';
948 *t++ = *s;
949 }
950 break;
951 /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
952 case 'u':
953 {
954 /* cache where we started so we can parse after validating */
955 char *start = s + 1;
956 size_t len = 0;
957 zend_bool valid = 1;
958 unsigned long codepoint;
959 size_t byte_len = 0;
960
961 if (*start != '{') {
962 /* we silently let this pass to avoid breaking code
963 * with JSON in string literals (e.g. "\"\u202e\""
964 */
965 *t++ = '\\';
966 *t++ = 'u';
967 break;
968 } else {
969 /* on the other hand, invalid \u{blah} errors */
970 s++;
971 len++;
972 s++;
973 while (*s != '}') {
974 if (!ZEND_IS_HEX(*s)) {
975 valid = 0;
976 break;
977 } else {
978 len++;
979 }
980 s++;
981 }
982 if (*s == '}') {
983 valid = 1;
984 len++;
985 }
986 }
987
988 /* \u{} is invalid */
989 if (len <= 2) {
990 valid = 0;
991 }
992
993 if (!valid) {
994 zend_throw_exception(zend_ce_parse_error,
995 "Invalid UTF-8 codepoint escape sequence", 0);
996 zval_ptr_dtor(zendlval);
997 ZVAL_UNDEF(zendlval);
998 return FAILURE;
999 }
1000
1001 errno = 0;
1002 codepoint = strtoul(start + 1, NULL, 16);
1003
1004 /* per RFC 3629, UTF-8 can only represent 21 bits */
1005 if (codepoint > 0x10FFFF || errno) {
1006 zend_throw_exception(zend_ce_parse_error,
1007 "Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
1008 zval_ptr_dtor(zendlval);
1009 ZVAL_UNDEF(zendlval);
1010 return FAILURE;
1011 }
1012
1013 /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1014 if (codepoint < 0x80) {
1015 byte_len = 1;
1016 *t++ = codepoint;
1017 } else if (codepoint <= 0x7FF) {
1018 byte_len = 2;
1019 *t++ = (codepoint >> 6) + 0xC0;
1020 *t++ = (codepoint & 0x3F) + 0x80;
1021 } else if (codepoint <= 0xFFFF) {
1022 byte_len = 3;
1023 *t++ = (codepoint >> 12) + 0xE0;
1024 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1025 *t++ = (codepoint & 0x3F) + 0x80;
1026 } else if (codepoint <= 0x10FFFF) {
1027 byte_len = 4;
1028 *t++ = (codepoint >> 18) + 0xF0;
1029 *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1030 *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1031 *t++ = (codepoint & 0x3F) + 0x80;
1032 }
1033
1034 Z_STRLEN_P(zendlval) -= 2; /* \u */
1035 Z_STRLEN_P(zendlval) -= (len - byte_len);
1036 }
1037 break;
1038 default:
1039 /* check for an octal */
1040 if (ZEND_IS_OCT(*s)) {
1041 char octal_buf[4] = { 0, 0, 0, 0 };
1042
1043 octal_buf[0] = *s;
1044 Z_STRLEN_P(zendlval)--;
1045 if (ZEND_IS_OCT(*(s+1))) {
1046 octal_buf[1] = *(++s);
1047 Z_STRLEN_P(zendlval)--;
1048 if (ZEND_IS_OCT(*(s+1))) {
1049 octal_buf[2] = *(++s);
1050 Z_STRLEN_P(zendlval)--;
1051 }
1052 }
1053 if (octal_buf[2] &&
1054 (octal_buf[0] > '3')) {
1055 /* 3 octit values must not overflow 0xFF (\377) */
1056 zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
1057 }
1058
1059 *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1060 } else {
1061 *t++ = '\\';
1062 *t++ = *s;
1063 }
1064 break;
1065 }
1066 } else {
1067 *t++ = *s;
1068 }
1069
1070 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1071 CG(zend_lineno)++;
1072 }
1073 s++;
1074 }
1075 *t = 0;
1076 if (SCNG(output_filter)) {
1077 size_t sz = 0;
1078 unsigned char *str;
1079 // TODO: avoid realocation ???
1080 s = Z_STRVAL_P(zendlval);
1081 SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1082 zval_ptr_dtor(zendlval);
1083 ZVAL_STRINGL(zendlval, (char *) str, sz);
1084 efree(str);
1085 }
1086 return SUCCESS;
1087 }
1088
emit_token(int token,int token_line)1089 static zend_always_inline int emit_token(int token, int token_line)
1090 {
1091 if (SCNG(on_event)) {
1092 SCNG(on_event)(ON_TOKEN, token, token_line, SCNG(on_event_context));
1093 }
1094
1095 return token;
1096 }
1097
1098 #define RETURN_TOKEN(token) return emit_token(token, start_line);
1099
lex_scan(zval * zendlval)1100 int lex_scan(zval *zendlval)
1101 {
1102
1103 int start_line = CG(zend_lineno);
1104
1105 restart:
1106 SCNG(yy_text) = YYCURSOR;
1107
1108 /*!re2c
1109 re2c:yyfill:check = 0;
1110 LNUM [0-9]+
1111 DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1112 EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1113 HNUM "0x"[0-9a-fA-F]+
1114 BNUM "0b"[01]+
1115 LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1116 WHITESPACE [ \n\r\t]+
1117 TABS_AND_SPACES [ \t]*
1118 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1119 ANY_CHAR [^]
1120 NEWLINE ("\r"|"\n"|"\r\n")
1121
1122 /* compute yyleng before each rule */
1123 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1124
1125 <ST_IN_SCRIPTING>"exit" {
1126 RETURN_TOKEN(T_EXIT);
1127 }
1128
1129 <ST_IN_SCRIPTING>"die" {
1130 RETURN_TOKEN(T_EXIT);
1131 }
1132
1133 <ST_IN_SCRIPTING>"function" {
1134 RETURN_TOKEN(T_FUNCTION);
1135 }
1136
1137 <ST_IN_SCRIPTING>"const" {
1138 RETURN_TOKEN(T_CONST);
1139 }
1140
1141 <ST_IN_SCRIPTING>"return" {
1142 RETURN_TOKEN(T_RETURN);
1143 }
1144
1145 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
1146 yyless(yyleng - 1);
1147 HANDLE_NEWLINES(yytext, yyleng);
1148 RETURN_TOKEN(T_YIELD_FROM);
1149 }
1150
1151 <ST_IN_SCRIPTING>"yield" {
1152 RETURN_TOKEN(T_YIELD);
1153 }
1154
1155 <ST_IN_SCRIPTING>"try" {
1156 RETURN_TOKEN(T_TRY);
1157 }
1158
1159 <ST_IN_SCRIPTING>"catch" {
1160 RETURN_TOKEN(T_CATCH);
1161 }
1162
1163 <ST_IN_SCRIPTING>"finally" {
1164 RETURN_TOKEN(T_FINALLY);
1165 }
1166
1167 <ST_IN_SCRIPTING>"throw" {
1168 RETURN_TOKEN(T_THROW);
1169 }
1170
1171 <ST_IN_SCRIPTING>"if" {
1172 RETURN_TOKEN(T_IF);
1173 }
1174
1175 <ST_IN_SCRIPTING>"elseif" {
1176 RETURN_TOKEN(T_ELSEIF);
1177 }
1178
1179 <ST_IN_SCRIPTING>"endif" {
1180 RETURN_TOKEN(T_ENDIF);
1181 }
1182
1183 <ST_IN_SCRIPTING>"else" {
1184 RETURN_TOKEN(T_ELSE);
1185 }
1186
1187 <ST_IN_SCRIPTING>"while" {
1188 RETURN_TOKEN(T_WHILE);
1189 }
1190
1191 <ST_IN_SCRIPTING>"endwhile" {
1192 RETURN_TOKEN(T_ENDWHILE);
1193 }
1194
1195 <ST_IN_SCRIPTING>"do" {
1196 RETURN_TOKEN(T_DO);
1197 }
1198
1199 <ST_IN_SCRIPTING>"for" {
1200 RETURN_TOKEN(T_FOR);
1201 }
1202
1203 <ST_IN_SCRIPTING>"endfor" {
1204 RETURN_TOKEN(T_ENDFOR);
1205 }
1206
1207 <ST_IN_SCRIPTING>"foreach" {
1208 RETURN_TOKEN(T_FOREACH);
1209 }
1210
1211 <ST_IN_SCRIPTING>"endforeach" {
1212 RETURN_TOKEN(T_ENDFOREACH);
1213 }
1214
1215 <ST_IN_SCRIPTING>"declare" {
1216 RETURN_TOKEN(T_DECLARE);
1217 }
1218
1219 <ST_IN_SCRIPTING>"enddeclare" {
1220 RETURN_TOKEN(T_ENDDECLARE);
1221 }
1222
1223 <ST_IN_SCRIPTING>"instanceof" {
1224 RETURN_TOKEN(T_INSTANCEOF);
1225 }
1226
1227 <ST_IN_SCRIPTING>"as" {
1228 RETURN_TOKEN(T_AS);
1229 }
1230
1231 <ST_IN_SCRIPTING>"switch" {
1232 RETURN_TOKEN(T_SWITCH);
1233 }
1234
1235 <ST_IN_SCRIPTING>"endswitch" {
1236 RETURN_TOKEN(T_ENDSWITCH);
1237 }
1238
1239 <ST_IN_SCRIPTING>"case" {
1240 RETURN_TOKEN(T_CASE);
1241 }
1242
1243 <ST_IN_SCRIPTING>"default" {
1244 RETURN_TOKEN(T_DEFAULT);
1245 }
1246
1247 <ST_IN_SCRIPTING>"break" {
1248 RETURN_TOKEN(T_BREAK);
1249 }
1250
1251 <ST_IN_SCRIPTING>"continue" {
1252 RETURN_TOKEN(T_CONTINUE);
1253 }
1254
1255 <ST_IN_SCRIPTING>"goto" {
1256 RETURN_TOKEN(T_GOTO);
1257 }
1258
1259 <ST_IN_SCRIPTING>"echo" {
1260 RETURN_TOKEN(T_ECHO);
1261 }
1262
1263 <ST_IN_SCRIPTING>"print" {
1264 RETURN_TOKEN(T_PRINT);
1265 }
1266
1267 <ST_IN_SCRIPTING>"class" {
1268 RETURN_TOKEN(T_CLASS);
1269 }
1270
1271 <ST_IN_SCRIPTING>"interface" {
1272 RETURN_TOKEN(T_INTERFACE);
1273 }
1274
1275 <ST_IN_SCRIPTING>"trait" {
1276 RETURN_TOKEN(T_TRAIT);
1277 }
1278
1279 <ST_IN_SCRIPTING>"extends" {
1280 RETURN_TOKEN(T_EXTENDS);
1281 }
1282
1283 <ST_IN_SCRIPTING>"implements" {
1284 RETURN_TOKEN(T_IMPLEMENTS);
1285 }
1286
1287 <ST_IN_SCRIPTING>"->" {
1288 yy_push_state(ST_LOOKING_FOR_PROPERTY);
1289 RETURN_TOKEN(T_OBJECT_OPERATOR);
1290 }
1291
1292 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1293 HANDLE_NEWLINES(yytext, yyleng);
1294 RETURN_TOKEN(T_WHITESPACE);
1295 }
1296
1297 <ST_LOOKING_FOR_PROPERTY>"->" {
1298 RETURN_TOKEN(T_OBJECT_OPERATOR);
1299 }
1300
1301 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1302 yy_pop_state();
1303 zend_copy_value(zendlval, yytext, yyleng);
1304 RETURN_TOKEN(T_STRING);
1305 }
1306
1307 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1308 yyless(0);
1309 yy_pop_state();
1310 goto restart;
1311 }
1312
1313 <ST_IN_SCRIPTING>"::" {
1314 RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1315 }
1316
1317 <ST_IN_SCRIPTING>"\\" {
1318 RETURN_TOKEN(T_NS_SEPARATOR);
1319 }
1320
1321 <ST_IN_SCRIPTING>"..." {
1322 RETURN_TOKEN(T_ELLIPSIS);
1323 }
1324
1325 <ST_IN_SCRIPTING>"??" {
1326 RETURN_TOKEN(T_COALESCE);
1327 }
1328
1329 <ST_IN_SCRIPTING>"new" {
1330 RETURN_TOKEN(T_NEW);
1331 }
1332
1333 <ST_IN_SCRIPTING>"clone" {
1334 RETURN_TOKEN(T_CLONE);
1335 }
1336
1337 <ST_IN_SCRIPTING>"var" {
1338 RETURN_TOKEN(T_VAR);
1339 }
1340
1341 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1342 RETURN_TOKEN(T_INT_CAST);
1343 }
1344
1345 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1346 RETURN_TOKEN(T_DOUBLE_CAST);
1347 }
1348
1349 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1350 RETURN_TOKEN(T_STRING_CAST);
1351 }
1352
1353 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1354 RETURN_TOKEN(T_ARRAY_CAST);
1355 }
1356
1357 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1358 RETURN_TOKEN(T_OBJECT_CAST);
1359 }
1360
1361 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1362 RETURN_TOKEN(T_BOOL_CAST);
1363 }
1364
1365 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1366 RETURN_TOKEN(T_UNSET_CAST);
1367 }
1368
1369 <ST_IN_SCRIPTING>"eval" {
1370 RETURN_TOKEN(T_EVAL);
1371 }
1372
1373 <ST_IN_SCRIPTING>"include" {
1374 RETURN_TOKEN(T_INCLUDE);
1375 }
1376
1377 <ST_IN_SCRIPTING>"include_once" {
1378 RETURN_TOKEN(T_INCLUDE_ONCE);
1379 }
1380
1381 <ST_IN_SCRIPTING>"require" {
1382 RETURN_TOKEN(T_REQUIRE);
1383 }
1384
1385 <ST_IN_SCRIPTING>"require_once" {
1386 RETURN_TOKEN(T_REQUIRE_ONCE);
1387 }
1388
1389 <ST_IN_SCRIPTING>"namespace" {
1390 RETURN_TOKEN(T_NAMESPACE);
1391 }
1392
1393 <ST_IN_SCRIPTING>"use" {
1394 RETURN_TOKEN(T_USE);
1395 }
1396
1397 <ST_IN_SCRIPTING>"insteadof" {
1398 RETURN_TOKEN(T_INSTEADOF);
1399 }
1400
1401 <ST_IN_SCRIPTING>"global" {
1402 RETURN_TOKEN(T_GLOBAL);
1403 }
1404
1405 <ST_IN_SCRIPTING>"isset" {
1406 RETURN_TOKEN(T_ISSET);
1407 }
1408
1409 <ST_IN_SCRIPTING>"empty" {
1410 RETURN_TOKEN(T_EMPTY);
1411 }
1412
1413 <ST_IN_SCRIPTING>"__halt_compiler" {
1414 RETURN_TOKEN(T_HALT_COMPILER);
1415 }
1416
1417 <ST_IN_SCRIPTING>"static" {
1418 RETURN_TOKEN(T_STATIC);
1419 }
1420
1421 <ST_IN_SCRIPTING>"abstract" {
1422 RETURN_TOKEN(T_ABSTRACT);
1423 }
1424
1425 <ST_IN_SCRIPTING>"final" {
1426 RETURN_TOKEN(T_FINAL);
1427 }
1428
1429 <ST_IN_SCRIPTING>"private" {
1430 RETURN_TOKEN(T_PRIVATE);
1431 }
1432
1433 <ST_IN_SCRIPTING>"protected" {
1434 RETURN_TOKEN(T_PROTECTED);
1435 }
1436
1437 <ST_IN_SCRIPTING>"public" {
1438 RETURN_TOKEN(T_PUBLIC);
1439 }
1440
1441 <ST_IN_SCRIPTING>"unset" {
1442 RETURN_TOKEN(T_UNSET);
1443 }
1444
1445 <ST_IN_SCRIPTING>"=>" {
1446 RETURN_TOKEN(T_DOUBLE_ARROW);
1447 }
1448
1449 <ST_IN_SCRIPTING>"list" {
1450 RETURN_TOKEN(T_LIST);
1451 }
1452
1453 <ST_IN_SCRIPTING>"array" {
1454 RETURN_TOKEN(T_ARRAY);
1455 }
1456
1457 <ST_IN_SCRIPTING>"callable" {
1458 RETURN_TOKEN(T_CALLABLE);
1459 }
1460
1461 <ST_IN_SCRIPTING>"++" {
1462 RETURN_TOKEN(T_INC);
1463 }
1464
1465 <ST_IN_SCRIPTING>"--" {
1466 RETURN_TOKEN(T_DEC);
1467 }
1468
1469 <ST_IN_SCRIPTING>"===" {
1470 RETURN_TOKEN(T_IS_IDENTICAL);
1471 }
1472
1473 <ST_IN_SCRIPTING>"!==" {
1474 RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1475 }
1476
1477 <ST_IN_SCRIPTING>"==" {
1478 RETURN_TOKEN(T_IS_EQUAL);
1479 }
1480
1481 <ST_IN_SCRIPTING>"!="|"<>" {
1482 RETURN_TOKEN(T_IS_NOT_EQUAL);
1483 }
1484
1485 <ST_IN_SCRIPTING>"<=>" {
1486 RETURN_TOKEN(T_SPACESHIP);
1487 }
1488
1489 <ST_IN_SCRIPTING>"<=" {
1490 RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1491 }
1492
1493 <ST_IN_SCRIPTING>">=" {
1494 RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1495 }
1496
1497 <ST_IN_SCRIPTING>"+=" {
1498 RETURN_TOKEN(T_PLUS_EQUAL);
1499 }
1500
1501 <ST_IN_SCRIPTING>"-=" {
1502 RETURN_TOKEN(T_MINUS_EQUAL);
1503 }
1504
1505 <ST_IN_SCRIPTING>"*=" {
1506 RETURN_TOKEN(T_MUL_EQUAL);
1507 }
1508
1509 <ST_IN_SCRIPTING>"*\*" {
1510 RETURN_TOKEN(T_POW);
1511 }
1512
1513 <ST_IN_SCRIPTING>"*\*=" {
1514 RETURN_TOKEN(T_POW_EQUAL);
1515 }
1516
1517 <ST_IN_SCRIPTING>"/=" {
1518 RETURN_TOKEN(T_DIV_EQUAL);
1519 }
1520
1521 <ST_IN_SCRIPTING>".=" {
1522 RETURN_TOKEN(T_CONCAT_EQUAL);
1523 }
1524
1525 <ST_IN_SCRIPTING>"%=" {
1526 RETURN_TOKEN(T_MOD_EQUAL);
1527 }
1528
1529 <ST_IN_SCRIPTING>"<<=" {
1530 RETURN_TOKEN(T_SL_EQUAL);
1531 }
1532
1533 <ST_IN_SCRIPTING>">>=" {
1534 RETURN_TOKEN(T_SR_EQUAL);
1535 }
1536
1537 <ST_IN_SCRIPTING>"&=" {
1538 RETURN_TOKEN(T_AND_EQUAL);
1539 }
1540
1541 <ST_IN_SCRIPTING>"|=" {
1542 RETURN_TOKEN(T_OR_EQUAL);
1543 }
1544
1545 <ST_IN_SCRIPTING>"^=" {
1546 RETURN_TOKEN(T_XOR_EQUAL);
1547 }
1548
1549 <ST_IN_SCRIPTING>"||" {
1550 RETURN_TOKEN(T_BOOLEAN_OR);
1551 }
1552
1553 <ST_IN_SCRIPTING>"&&" {
1554 RETURN_TOKEN(T_BOOLEAN_AND);
1555 }
1556
1557 <ST_IN_SCRIPTING>"OR" {
1558 RETURN_TOKEN(T_LOGICAL_OR);
1559 }
1560
1561 <ST_IN_SCRIPTING>"AND" {
1562 RETURN_TOKEN(T_LOGICAL_AND);
1563 }
1564
1565 <ST_IN_SCRIPTING>"XOR" {
1566 RETURN_TOKEN(T_LOGICAL_XOR);
1567 }
1568
1569 <ST_IN_SCRIPTING>"<<" {
1570 RETURN_TOKEN(T_SL);
1571 }
1572
1573 <ST_IN_SCRIPTING>">>" {
1574 RETURN_TOKEN(T_SR);
1575 }
1576
1577 <ST_IN_SCRIPTING>{TOKENS} {
1578 RETURN_TOKEN(yytext[0]);
1579 }
1580
1581
1582 <ST_IN_SCRIPTING>"{" {
1583 yy_push_state(ST_IN_SCRIPTING);
1584 RETURN_TOKEN('{');
1585 }
1586
1587
1588 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1589 yy_push_state(ST_LOOKING_FOR_VARNAME);
1590 RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1591 }
1592
1593
1594 <ST_IN_SCRIPTING>"}" {
1595 RESET_DOC_COMMENT();
1596 if (!zend_stack_is_empty(&SCNG(state_stack))) {
1597 yy_pop_state();
1598 }
1599 RETURN_TOKEN('}');
1600 }
1601
1602
1603 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1604 yyless(yyleng - 1);
1605 zend_copy_value(zendlval, yytext, yyleng);
1606 yy_pop_state();
1607 yy_push_state(ST_IN_SCRIPTING);
1608 RETURN_TOKEN(T_STRING_VARNAME);
1609 }
1610
1611
1612 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1613 yyless(0);
1614 yy_pop_state();
1615 yy_push_state(ST_IN_SCRIPTING);
1616 goto restart;
1617 }
1618
1619 <ST_IN_SCRIPTING>{BNUM} {
1620 char *bin = yytext + 2; /* Skip "0b" */
1621 int len = yyleng - 2;
1622 char *end;
1623
1624 /* Skip any leading 0s */
1625 while (*bin == '0') {
1626 ++bin;
1627 --len;
1628 }
1629
1630 if (len < SIZEOF_ZEND_LONG * 8) {
1631 if (len == 0) {
1632 ZVAL_LONG(zendlval, 0);
1633 } else {
1634 errno = 0;
1635 ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1636 ZEND_ASSERT(!errno && end == yytext + yyleng);
1637 }
1638 RETURN_TOKEN(T_LNUMBER);
1639 } else {
1640 ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1641 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1642 ZEND_ASSERT(end == yytext + yyleng);
1643 RETURN_TOKEN(T_DNUMBER);
1644 }
1645 }
1646
1647 <ST_IN_SCRIPTING>{LNUM} {
1648 char *end;
1649 if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1650 errno = 0;
1651 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1652 /* This isn't an assert, we need to ensure 019 isn't valid octal
1653 * Because the lexing itself doesn't do that for us
1654 */
1655 if (end != yytext + yyleng) {
1656 zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1657 ZVAL_UNDEF(zendlval);
1658 RETURN_TOKEN(T_LNUMBER);
1659 }
1660 } else {
1661 errno = 0;
1662 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1663 if (errno == ERANGE) { /* Overflow */
1664 errno = 0;
1665 if (yytext[0] == '0') { /* octal overflow */
1666 ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1667 } else {
1668 ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1669 }
1670 /* Also not an assert for the same reason */
1671 if (end != yytext + yyleng) {
1672 zend_throw_exception(zend_ce_parse_error,
1673 "Invalid numeric literal", 0);
1674 ZVAL_UNDEF(zendlval);
1675 RETURN_TOKEN(T_DNUMBER);
1676 }
1677 RETURN_TOKEN(T_DNUMBER);
1678 }
1679 /* Also not an assert for the same reason */
1680 if (end != yytext + yyleng) {
1681 zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1682 ZVAL_UNDEF(zendlval);
1683 RETURN_TOKEN(T_DNUMBER);
1684 }
1685 }
1686 ZEND_ASSERT(!errno);
1687 RETURN_TOKEN(T_LNUMBER);
1688 }
1689
1690 <ST_IN_SCRIPTING>{HNUM} {
1691 char *hex = yytext + 2; /* Skip "0x" */
1692 int len = yyleng - 2;
1693 char *end;
1694
1695 /* Skip any leading 0s */
1696 while (*hex == '0') {
1697 hex++;
1698 len--;
1699 }
1700
1701 if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1702 if (len == 0) {
1703 ZVAL_LONG(zendlval, 0);
1704 } else {
1705 errno = 0;
1706 ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1707 ZEND_ASSERT(!errno && end == hex + len);
1708 }
1709 RETURN_TOKEN(T_LNUMBER);
1710 } else {
1711 ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1712 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1713 ZEND_ASSERT(end == hex + len);
1714 RETURN_TOKEN(T_DNUMBER);
1715 }
1716 }
1717
1718 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1719 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1720 char *end;
1721 errno = 0;
1722 ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1723 if (errno == ERANGE) {
1724 goto string;
1725 }
1726 ZEND_ASSERT(end == yytext + yyleng);
1727 } else {
1728 string:
1729 ZVAL_STRINGL(zendlval, yytext, yyleng);
1730 }
1731 RETURN_TOKEN(T_NUM_STRING);
1732 }
1733
1734 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1735 ZVAL_STRINGL(zendlval, yytext, yyleng);
1736 RETURN_TOKEN(T_NUM_STRING);
1737 }
1738
1739 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1740 const char *end;
1741
1742 ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1743 /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1744 ZEND_ASSERT(end == yytext + yyleng);
1745 RETURN_TOKEN(T_DNUMBER);
1746 }
1747
1748 <ST_IN_SCRIPTING>"__CLASS__" {
1749 RETURN_TOKEN(T_CLASS_C);
1750 }
1751
1752 <ST_IN_SCRIPTING>"__TRAIT__" {
1753 RETURN_TOKEN(T_TRAIT_C);
1754 }
1755
1756 <ST_IN_SCRIPTING>"__FUNCTION__" {
1757 RETURN_TOKEN(T_FUNC_C);
1758 }
1759
1760 <ST_IN_SCRIPTING>"__METHOD__" {
1761 RETURN_TOKEN(T_METHOD_C);
1762 }
1763
1764 <ST_IN_SCRIPTING>"__LINE__" {
1765 RETURN_TOKEN(T_LINE);
1766 }
1767
1768 <ST_IN_SCRIPTING>"__FILE__" {
1769 RETURN_TOKEN(T_FILE);
1770 }
1771
1772 <ST_IN_SCRIPTING>"__DIR__" {
1773 RETURN_TOKEN(T_DIR);
1774 }
1775
1776 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1777 RETURN_TOKEN(T_NS_C);
1778 }
1779
1780
1781 <INITIAL>"<?=" {
1782 BEGIN(ST_IN_SCRIPTING);
1783 RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1784 }
1785
1786
1787 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1788 HANDLE_NEWLINE(yytext[yyleng-1]);
1789 BEGIN(ST_IN_SCRIPTING);
1790 RETURN_TOKEN(T_OPEN_TAG);
1791 }
1792
1793
1794 <INITIAL>"<?" {
1795 if (CG(short_tags)) {
1796 BEGIN(ST_IN_SCRIPTING);
1797 RETURN_TOKEN(T_OPEN_TAG);
1798 } else {
1799 goto inline_char_handler;
1800 }
1801 }
1802
1803 <INITIAL>{ANY_CHAR} {
1804 if (YYCURSOR > YYLIMIT) {
1805 RETURN_TOKEN(END);
1806 }
1807
1808 inline_char_handler:
1809
1810 while (1) {
1811 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1812
1813 YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1814
1815 if (YYCURSOR >= YYLIMIT) {
1816 break;
1817 }
1818
1819 if (*YYCURSOR == '?') {
1820 if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1821
1822 YYCURSOR--;
1823 break;
1824 }
1825 }
1826 }
1827
1828 yyleng = YYCURSOR - SCNG(yy_text);
1829
1830 if (SCNG(output_filter)) {
1831 size_t readsize;
1832 char *s = NULL;
1833 size_t sz = 0;
1834 // TODO: avoid reallocation ???
1835 readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1836 ZVAL_STRINGL(zendlval, s, sz);
1837 efree(s);
1838 if (readsize < yyleng) {
1839 yyless(readsize);
1840 }
1841 } else {
1842 ZVAL_STRINGL(zendlval, yytext, yyleng);
1843 }
1844 HANDLE_NEWLINES(yytext, yyleng);
1845 RETURN_TOKEN(T_INLINE_HTML);
1846 }
1847
1848
1849 /* Make sure a label character follows "->", otherwise there is no property
1850 * and "->" will be taken literally
1851 */
1852 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
1853 yyless(yyleng - 3);
1854 yy_push_state(ST_LOOKING_FOR_PROPERTY);
1855 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1856 RETURN_TOKEN(T_VARIABLE);
1857 }
1858
1859 /* A [ always designates a variable offset, regardless of what follows
1860 */
1861 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1862 yyless(yyleng - 1);
1863 yy_push_state(ST_VAR_OFFSET);
1864 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1865 RETURN_TOKEN(T_VARIABLE);
1866 }
1867
1868 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1869 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1870 RETURN_TOKEN(T_VARIABLE);
1871 }
1872
1873 <ST_VAR_OFFSET>"]" {
1874 yy_pop_state();
1875 RETURN_TOKEN(']');
1876 }
1877
1878 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1879 /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
1880 RETURN_TOKEN(yytext[0]);
1881 }
1882
1883 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1884 /* Invalid rule to return a more explicit parse error with proper line number */
1885 yyless(0);
1886 yy_pop_state();
1887 ZVAL_NULL(zendlval);
1888 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1889 }
1890
1891 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1892 zend_copy_value(zendlval, yytext, yyleng);
1893 RETURN_TOKEN(T_STRING);
1894 }
1895
1896
1897 <ST_IN_SCRIPTING>"#"|"//" {
1898 while (YYCURSOR < YYLIMIT) {
1899 switch (*YYCURSOR++) {
1900 case '\r':
1901 if (*YYCURSOR == '\n') {
1902 YYCURSOR++;
1903 }
1904 /* fall through */
1905 case '\n':
1906 CG(zend_lineno)++;
1907 break;
1908 case '?':
1909 if (*YYCURSOR == '>') {
1910 YYCURSOR--;
1911 break;
1912 }
1913 /* fall through */
1914 default:
1915 continue;
1916 }
1917
1918 break;
1919 }
1920
1921 yyleng = YYCURSOR - SCNG(yy_text);
1922
1923 RETURN_TOKEN(T_COMMENT);
1924 }
1925
1926 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1927 int doc_com;
1928
1929 if (yyleng > 2) {
1930 doc_com = 1;
1931 RESET_DOC_COMMENT();
1932 } else {
1933 doc_com = 0;
1934 }
1935
1936 while (YYCURSOR < YYLIMIT) {
1937 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1938 break;
1939 }
1940 }
1941
1942 if (YYCURSOR < YYLIMIT) {
1943 YYCURSOR++;
1944 } else {
1945 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1946 }
1947
1948 yyleng = YYCURSOR - SCNG(yy_text);
1949 HANDLE_NEWLINES(yytext, yyleng);
1950
1951 if (doc_com) {
1952 CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1953 RETURN_TOKEN(T_DOC_COMMENT);
1954 }
1955
1956 RETURN_TOKEN(T_COMMENT);
1957 }
1958
1959 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1960 BEGIN(INITIAL);
1961 RETURN_TOKEN(T_CLOSE_TAG); /* implicit ';' at php-end tag */
1962 }
1963
1964
1965 <ST_IN_SCRIPTING>b?['] {
1966 register char *s, *t;
1967 char *end;
1968 int bprefix = (yytext[0] != '\'') ? 1 : 0;
1969
1970 while (1) {
1971 if (YYCURSOR < YYLIMIT) {
1972 if (*YYCURSOR == '\'') {
1973 YYCURSOR++;
1974 yyleng = YYCURSOR - SCNG(yy_text);
1975
1976 break;
1977 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1978 YYCURSOR++;
1979 }
1980 } else {
1981 yyleng = YYLIMIT - SCNG(yy_text);
1982
1983 /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1984 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1985 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1986 ZVAL_NULL(zendlval);
1987 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1988 }
1989 }
1990
1991 ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1992
1993 /* convert escape sequences */
1994 s = t = Z_STRVAL_P(zendlval);
1995 end = s+Z_STRLEN_P(zendlval);
1996 while (s<end) {
1997 if (*s=='\\') {
1998 s++;
1999
2000 switch(*s) {
2001 case '\\':
2002 case '\'':
2003 *t++ = *s;
2004 Z_STRLEN_P(zendlval)--;
2005 break;
2006 default:
2007 *t++ = '\\';
2008 *t++ = *s;
2009 break;
2010 }
2011 } else {
2012 *t++ = *s;
2013 }
2014
2015 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2016 CG(zend_lineno)++;
2017 }
2018 s++;
2019 }
2020 *t = 0;
2021
2022 if (SCNG(output_filter)) {
2023 size_t sz = 0;
2024 char *str = NULL;
2025 s = Z_STRVAL_P(zendlval);
2026 // TODO: avoid reallocation ???
2027 SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2028 ZVAL_STRINGL(zendlval, str, sz);
2029 }
2030 RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2031 }
2032
2033
2034 <ST_IN_SCRIPTING>b?["] {
2035 int bprefix = (yytext[0] != '"') ? 1 : 0;
2036
2037 while (YYCURSOR < YYLIMIT) {
2038 switch (*YYCURSOR++) {
2039 case '"':
2040 yyleng = YYCURSOR - SCNG(yy_text);
2041 zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
2042 RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2043 case '$':
2044 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2045 break;
2046 }
2047 continue;
2048 case '{':
2049 if (*YYCURSOR == '$') {
2050 break;
2051 }
2052 continue;
2053 case '\\':
2054 if (YYCURSOR < YYLIMIT) {
2055 YYCURSOR++;
2056 }
2057 /* fall through */
2058 default:
2059 continue;
2060 }
2061
2062 YYCURSOR--;
2063 break;
2064 }
2065
2066 /* Remember how much was scanned to save rescanning */
2067 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2068
2069 YYCURSOR = SCNG(yy_text) + yyleng;
2070
2071 BEGIN(ST_DOUBLE_QUOTES);
2072 RETURN_TOKEN('"');
2073 }
2074
2075
2076 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2077 char *s;
2078 int bprefix = (yytext[0] != '<') ? 1 : 0;
2079 zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2080
2081 CG(zend_lineno)++;
2082 heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2083 s = yytext+bprefix+3;
2084 while ((*s == ' ') || (*s == '\t')) {
2085 s++;
2086 heredoc_label->length--;
2087 }
2088
2089 if (*s == '\'') {
2090 s++;
2091 heredoc_label->length -= 2;
2092
2093 BEGIN(ST_NOWDOC);
2094 } else {
2095 if (*s == '"') {
2096 s++;
2097 heredoc_label->length -= 2;
2098 }
2099
2100 BEGIN(ST_HEREDOC);
2101 }
2102
2103 heredoc_label->label = estrndup(s, heredoc_label->length);
2104
2105 /* Check for ending label on the next line */
2106 if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2107 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2108
2109 if (*end == ';') {
2110 end++;
2111 }
2112
2113 if (*end == '\n' || *end == '\r') {
2114 BEGIN(ST_END_HEREDOC);
2115 }
2116 }
2117
2118 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2119
2120 RETURN_TOKEN(T_START_HEREDOC);
2121 }
2122
2123
2124 <ST_IN_SCRIPTING>[`] {
2125 BEGIN(ST_BACKQUOTE);
2126 RETURN_TOKEN('`');
2127 }
2128
2129
2130 <ST_END_HEREDOC>{ANY_CHAR} {
2131 zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2132
2133 YYCURSOR += heredoc_label->length - 1;
2134 yyleng = heredoc_label->length;
2135
2136 heredoc_label_dtor(heredoc_label);
2137 efree(heredoc_label);
2138
2139 BEGIN(ST_IN_SCRIPTING);
2140 RETURN_TOKEN(T_END_HEREDOC);
2141 }
2142
2143
2144 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2145 Z_LVAL_P(zendlval) = (zend_long) '{';
2146 yy_push_state(ST_IN_SCRIPTING);
2147 yyless(1);
2148 RETURN_TOKEN(T_CURLY_OPEN);
2149 }
2150
2151
2152 <ST_DOUBLE_QUOTES>["] {
2153 BEGIN(ST_IN_SCRIPTING);
2154 RETURN_TOKEN('"');
2155 }
2156
2157 <ST_BACKQUOTE>[`] {
2158 BEGIN(ST_IN_SCRIPTING);
2159 RETURN_TOKEN('`');
2160 }
2161
2162
2163 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2164 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2165 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2166 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2167
2168 goto double_quotes_scan_done;
2169 }
2170
2171 if (YYCURSOR > YYLIMIT) {
2172 RETURN_TOKEN(END);
2173 }
2174 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2175 YYCURSOR++;
2176 }
2177
2178 while (YYCURSOR < YYLIMIT) {
2179 switch (*YYCURSOR++) {
2180 case '"':
2181 break;
2182 case '$':
2183 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2184 break;
2185 }
2186 continue;
2187 case '{':
2188 if (*YYCURSOR == '$') {
2189 break;
2190 }
2191 continue;
2192 case '\\':
2193 if (YYCURSOR < YYLIMIT) {
2194 YYCURSOR++;
2195 }
2196 /* fall through */
2197 default:
2198 continue;
2199 }
2200
2201 YYCURSOR--;
2202 break;
2203 }
2204
2205 double_quotes_scan_done:
2206 yyleng = YYCURSOR - SCNG(yy_text);
2207
2208 zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2209 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2210 }
2211
2212
2213 <ST_BACKQUOTE>{ANY_CHAR} {
2214 if (YYCURSOR > YYLIMIT) {
2215 RETURN_TOKEN(END);
2216 }
2217 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2218 YYCURSOR++;
2219 }
2220
2221 while (YYCURSOR < YYLIMIT) {
2222 switch (*YYCURSOR++) {
2223 case '`':
2224 break;
2225 case '$':
2226 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2227 break;
2228 }
2229 continue;
2230 case '{':
2231 if (*YYCURSOR == '$') {
2232 break;
2233 }
2234 continue;
2235 case '\\':
2236 if (YYCURSOR < YYLIMIT) {
2237 YYCURSOR++;
2238 }
2239 /* fall through */
2240 default:
2241 continue;
2242 }
2243
2244 YYCURSOR--;
2245 break;
2246 }
2247
2248 yyleng = YYCURSOR - SCNG(yy_text);
2249
2250 zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2251 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2252 }
2253
2254
2255 <ST_HEREDOC>{ANY_CHAR} {
2256 int newline = 0;
2257
2258 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2259
2260 if (YYCURSOR > YYLIMIT) {
2261 RETURN_TOKEN(END);
2262 }
2263
2264 YYCURSOR--;
2265
2266 while (YYCURSOR < YYLIMIT) {
2267 switch (*YYCURSOR++) {
2268 case '\r':
2269 if (*YYCURSOR == '\n') {
2270 YYCURSOR++;
2271 }
2272 /* fall through */
2273 case '\n':
2274 /* Check for ending label on the next line */
2275 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2276 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2277
2278 if (*end == ';') {
2279 end++;
2280 }
2281
2282 if (*end == '\n' || *end == '\r') {
2283 /* newline before label will be subtracted from returned text, but
2284 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2285 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2286 newline = 2; /* Windows newline */
2287 } else {
2288 newline = 1;
2289 }
2290
2291 CG(increment_lineno) = 1; /* For newline before label */
2292 BEGIN(ST_END_HEREDOC);
2293
2294 goto heredoc_scan_done;
2295 }
2296 }
2297 continue;
2298 case '$':
2299 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2300 break;
2301 }
2302 continue;
2303 case '{':
2304 if (*YYCURSOR == '$') {
2305 break;
2306 }
2307 continue;
2308 case '\\':
2309 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2310 YYCURSOR++;
2311 }
2312 /* fall through */
2313 default:
2314 continue;
2315 }
2316
2317 YYCURSOR--;
2318 break;
2319 }
2320
2321 heredoc_scan_done:
2322 yyleng = YYCURSOR - SCNG(yy_text);
2323
2324 zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2325 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2326 }
2327
2328
2329 <ST_NOWDOC>{ANY_CHAR} {
2330 int newline = 0;
2331
2332 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2333
2334 if (YYCURSOR > YYLIMIT) {
2335 RETURN_TOKEN(END);
2336 }
2337
2338 YYCURSOR--;
2339
2340 while (YYCURSOR < YYLIMIT) {
2341 switch (*YYCURSOR++) {
2342 case '\r':
2343 if (*YYCURSOR == '\n') {
2344 YYCURSOR++;
2345 }
2346 /* fall through */
2347 case '\n':
2348 /* Check for ending label on the next line */
2349 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2350 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2351
2352 if (*end == ';') {
2353 end++;
2354 }
2355
2356 if (*end == '\n' || *end == '\r') {
2357 /* newline before label will be subtracted from returned text, but
2358 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2359 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2360 newline = 2; /* Windows newline */
2361 } else {
2362 newline = 1;
2363 }
2364
2365 CG(increment_lineno) = 1; /* For newline before label */
2366 BEGIN(ST_END_HEREDOC);
2367
2368 goto nowdoc_scan_done;
2369 }
2370 }
2371 /* fall through */
2372 default:
2373 continue;
2374 }
2375 }
2376
2377 nowdoc_scan_done:
2378 yyleng = YYCURSOR - SCNG(yy_text);
2379
2380 zend_copy_value(zendlval, yytext, yyleng - newline);
2381 HANDLE_NEWLINES(yytext, yyleng - newline);
2382 RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2383 }
2384
2385
2386 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2387 if (YYCURSOR > YYLIMIT) {
2388 RETURN_TOKEN(END);
2389 }
2390
2391 zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2392 goto restart;
2393 }
2394
2395 */
2396 }
2397