1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1998-2016 Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Marcus Boerger <helly@php.net> |
16 | Nuno Lopes <nlopess@php.net> |
17 | Scott MacVicar <scottmac@php.net> |
18 | Flex version authors: |
19 | Andi Gutmans <andi@zend.com> |
20 | Zeev Suraski <zeev@zend.com> |
21 +----------------------------------------------------------------------+
22 */
23
24 /* $Id$ */
25
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31
32 #include "zend_language_scanner_defs.h"
33
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef PHP_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "zend_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52
53 #define YYCTYPE unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR SCNG(yy_cursor)
56 #define YYLIMIT SCNG(yy_limit)
57 #define YYMARKER SCNG(yy_marker)
58
59 #define YYGETCONDITION() SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61
62 #define STATE(name) yyc##name
63
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE YYGETCONDITION()
67 #define yytext ((char*)SCNG(yy_text))
68 #define yyleng SCNG(yy_leng)
69 #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
70 yyleng = (unsigned int)x; } while(0)
71 #define yymore() goto yymore_restart
72
73 /* perform sanity check. If this message is triggered you should
74 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87
88 /* Globals Macros */
89 #define SCNG LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95
96 #define HANDLE_NEWLINES(s, l) \
97 do { \
98 char *p = (s), *boundary = p+(l); \
99 \
100 while (p<boundary) { \
101 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
102 CG(zend_lineno)++; \
103 } \
104 p++; \
105 } \
106 } while (0)
107
108 #define HANDLE_NEWLINE(c) \
109 { \
110 if (c == '\n' || c == '\r') { \
111 CG(zend_lineno)++; \
112 } \
113 }
114
115 /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
118
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121 #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127 {
128 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131 }
132
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134 {
135 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136 }
137
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139 {
140 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142 }
143
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145 {
146 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150 }
151
152
_yy_push_state(int new_state TSRMLS_DC)153 static void _yy_push_state(int new_state TSRMLS_DC)
154 {
155 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156 YYSETCONDITION(new_state);
157 }
158
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
yy_pop_state(TSRMLS_D)161 static void yy_pop_state(TSRMLS_D)
162 {
163 int *stack_state;
164 zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165 YYSETCONDITION(*stack_state);
166 zend_stack_del_top(&SCNG(state_stack));
167 }
168
yy_scan_buffer(char * str,unsigned int len TSRMLS_DC)169 static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170 {
171 YYCURSOR = (YYCTYPE*)str;
172 YYLIMIT = YYCURSOR + len;
173 if (!SCNG(yy_start)) {
174 SCNG(yy_start) = YYCURSOR;
175 }
176 }
177
startup_scanner(TSRMLS_D)178 void startup_scanner(TSRMLS_D)
179 {
180 CG(parse_error) = 0;
181 CG(doc_comment) = NULL;
182 CG(doc_comment_len) = 0;
183 zend_stack_init(&SCNG(state_stack));
184 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
185 }
186
heredoc_label_dtor(zend_heredoc_label * heredoc_label)187 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
188 efree(heredoc_label->label);
189 }
190
shutdown_scanner(TSRMLS_D)191 void shutdown_scanner(TSRMLS_D)
192 {
193 CG(parse_error) = 0;
194 RESET_DOC_COMMENT();
195 zend_stack_destroy(&SCNG(state_stack));
196 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
197 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
198 }
199
zend_save_lexical_state(zend_lex_state * lex_state TSRMLS_DC)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
201 {
202 lex_state->yy_leng = SCNG(yy_leng);
203 lex_state->yy_start = SCNG(yy_start);
204 lex_state->yy_text = SCNG(yy_text);
205 lex_state->yy_cursor = SCNG(yy_cursor);
206 lex_state->yy_marker = SCNG(yy_marker);
207 lex_state->yy_limit = SCNG(yy_limit);
208
209 lex_state->state_stack = SCNG(state_stack);
210 zend_stack_init(&SCNG(state_stack));
211
212 lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214
215 lex_state->in = SCNG(yy_in);
216 lex_state->yy_state = YYSTATE;
217 lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
218 lex_state->lineno = CG(zend_lineno);
219
220 lex_state->script_org = SCNG(script_org);
221 lex_state->script_org_size = SCNG(script_org_size);
222 lex_state->script_filtered = SCNG(script_filtered);
223 lex_state->script_filtered_size = SCNG(script_filtered_size);
224 lex_state->input_filter = SCNG(input_filter);
225 lex_state->output_filter = SCNG(output_filter);
226 lex_state->script_encoding = SCNG(script_encoding);
227 }
228
zend_restore_lexical_state(zend_lex_state * lex_state TSRMLS_DC)229 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
230 {
231 SCNG(yy_leng) = lex_state->yy_leng;
232 SCNG(yy_start) = lex_state->yy_start;
233 SCNG(yy_text) = lex_state->yy_text;
234 SCNG(yy_cursor) = lex_state->yy_cursor;
235 SCNG(yy_marker) = lex_state->yy_marker;
236 SCNG(yy_limit) = lex_state->yy_limit;
237
238 zend_stack_destroy(&SCNG(state_stack));
239 SCNG(state_stack) = lex_state->state_stack;
240
241 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
242 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
243 SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
244
245 SCNG(yy_in) = lex_state->in;
246 YYSETCONDITION(lex_state->yy_state);
247 CG(zend_lineno) = lex_state->lineno;
248 zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
249
250 if (SCNG(script_filtered)) {
251 efree(SCNG(script_filtered));
252 SCNG(script_filtered) = NULL;
253 }
254 SCNG(script_org) = lex_state->script_org;
255 SCNG(script_org_size) = lex_state->script_org_size;
256 SCNG(script_filtered) = lex_state->script_filtered;
257 SCNG(script_filtered_size) = lex_state->script_filtered_size;
258 SCNG(input_filter) = lex_state->input_filter;
259 SCNG(output_filter) = lex_state->output_filter;
260 SCNG(script_encoding) = lex_state->script_encoding;
261
262 RESET_DOC_COMMENT();
263 }
264
zend_destroy_file_handle(zend_file_handle * file_handle TSRMLS_DC)265 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
266 {
267 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
268 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
269 file_handle->opened_path = NULL;
270 if (file_handle->free_filename) {
271 file_handle->filename = NULL;
272 }
273 }
274
275 #define BOM_UTF32_BE "\x00\x00\xfe\xff"
276 #define BOM_UTF32_LE "\xff\xfe\x00\x00"
277 #define BOM_UTF16_BE "\xfe\xff"
278 #define BOM_UTF16_LE "\xff\xfe"
279 #define BOM_UTF8 "\xef\xbb\xbf"
280
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size TSRMLS_DC)281 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
282 {
283 const unsigned char *p;
284 int wchar_size = 2;
285 int le = 0;
286
287 /* utf-16 or utf-32? */
288 p = script;
289 while ((p-script) < script_size) {
290 p = memchr(p, 0, script_size-(p-script)-2);
291 if (!p) {
292 break;
293 }
294 if (*(p+1) == '\0' && *(p+2) == '\0') {
295 wchar_size = 4;
296 break;
297 }
298
299 /* searching for UTF-32 specific byte orders, so this will do */
300 p += 4;
301 }
302
303 /* BE or LE? */
304 p = script;
305 while ((p-script) < script_size) {
306 if (*p == '\0' && *(p+wchar_size-1) != '\0') {
307 /* BE */
308 le = 0;
309 break;
310 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
311 /* LE* */
312 le = 1;
313 break;
314 }
315 p += wchar_size;
316 }
317
318 if (wchar_size == 2) {
319 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
320 } else {
321 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
322 }
323
324 return NULL;
325 }
326
zend_multibyte_detect_unicode(TSRMLS_D)327 static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
328 {
329 const zend_encoding *script_encoding = NULL;
330 int bom_size;
331 unsigned char *pos1, *pos2;
332
333 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
334 return NULL;
335 }
336
337 /* check out BOM */
338 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
339 script_encoding = zend_multibyte_encoding_utf32be;
340 bom_size = sizeof(BOM_UTF32_BE)-1;
341 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
342 script_encoding = zend_multibyte_encoding_utf32le;
343 bom_size = sizeof(BOM_UTF32_LE)-1;
344 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
345 script_encoding = zend_multibyte_encoding_utf16be;
346 bom_size = sizeof(BOM_UTF16_BE)-1;
347 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
348 script_encoding = zend_multibyte_encoding_utf16le;
349 bom_size = sizeof(BOM_UTF16_LE)-1;
350 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
351 script_encoding = zend_multibyte_encoding_utf8;
352 bom_size = sizeof(BOM_UTF8)-1;
353 }
354
355 if (script_encoding) {
356 /* remove BOM */
357 LANG_SCNG(script_org) += bom_size;
358 LANG_SCNG(script_org_size) -= bom_size;
359
360 return script_encoding;
361 }
362
363 /* script contains NULL bytes -> auto-detection */
364 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
365 /* check if the NULL byte is after the __HALT_COMPILER(); */
366 pos2 = LANG_SCNG(script_org);
367
368 while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
369 pos2 = memchr(pos2, '_', pos1 - pos2);
370 if (!pos2) break;
371 pos2++;
372 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
373 pos2 += sizeof("_HALT_COMPILER")-1;
374 while (*pos2 == ' ' ||
375 *pos2 == '\t' ||
376 *pos2 == '\r' ||
377 *pos2 == '\n') {
378 pos2++;
379 }
380 if (*pos2 == '(') {
381 pos2++;
382 while (*pos2 == ' ' ||
383 *pos2 == '\t' ||
384 *pos2 == '\r' ||
385 *pos2 == '\n') {
386 pos2++;
387 }
388 if (*pos2 == ')') {
389 pos2++;
390 while (*pos2 == ' ' ||
391 *pos2 == '\t' ||
392 *pos2 == '\r' ||
393 *pos2 == '\n') {
394 pos2++;
395 }
396 if (*pos2 == ';') {
397 return NULL;
398 }
399 }
400 }
401 }
402 }
403 /* make best effort if BOM is missing */
404 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
405 }
406
407 return NULL;
408 }
409
zend_multibyte_find_script_encoding(TSRMLS_D)410 static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
411 {
412 const zend_encoding *script_encoding;
413
414 if (CG(detect_unicode)) {
415 /* check out bom(byte order mark) and see if containing wchars */
416 script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
417 if (script_encoding != NULL) {
418 /* bom or wchar detection is prior to 'script_encoding' option */
419 return script_encoding;
420 }
421 }
422
423 /* if no script_encoding specified, just leave alone */
424 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
425 return NULL;
426 }
427
428 /* if multiple encodings specified, detect automagically */
429 if (CG(script_encoding_list_size) > 1) {
430 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
431 }
432
433 return CG(script_encoding_list)[0];
434 }
435
zend_multibyte_set_filter(const zend_encoding * onetime_encoding TSRMLS_DC)436 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
437 {
438 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
439 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
440
441 if (!script_encoding) {
442 return FAILURE;
443 }
444
445 /* judge input/output filter */
446 LANG_SCNG(script_encoding) = script_encoding;
447 LANG_SCNG(input_filter) = NULL;
448 LANG_SCNG(output_filter) = NULL;
449
450 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
451 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
452 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
453 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
454 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
455 } else {
456 LANG_SCNG(input_filter) = NULL;
457 LANG_SCNG(output_filter) = NULL;
458 }
459 return SUCCESS;
460 }
461
462 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
463 LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
464 LANG_SCNG(output_filter) = NULL;
465 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
466 LANG_SCNG(input_filter) = NULL;
467 LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
468 } else {
469 /* both script and internal encodings are incompatible w/ flex */
470 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
471 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
472 }
473
474 return 0;
475 }
476
open_file_for_scanning(zend_file_handle * file_handle TSRMLS_DC)477 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
478 {
479 const char *file_path = NULL;
480 char *buf;
481 size_t size, offset = 0;
482
483 /* The shebang line was read, get the current position to obtain the buffer start */
484 if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
485 if ((offset = ftell(file_handle->handle.fp)) == -1) {
486 offset = 0;
487 }
488 }
489
490 if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
491 return FAILURE;
492 }
493
494 zend_llist_add_element(&CG(open_files), file_handle);
495 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
496 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
497 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
498 fh->handle.stream.handle = (void*)(((char*)fh) + diff);
499 file_handle->handle.stream.handle = fh->handle.stream.handle;
500 }
501
502 /* Reset the scanner for scanning the new file */
503 SCNG(yy_in) = file_handle;
504 SCNG(yy_start) = NULL;
505
506 if (size != -1) {
507 if (CG(multibyte)) {
508 SCNG(script_org) = (unsigned char*)buf;
509 SCNG(script_org_size) = size;
510 SCNG(script_filtered) = NULL;
511
512 zend_multibyte_set_filter(NULL TSRMLS_CC);
513
514 if (SCNG(input_filter)) {
515 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
516 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
517 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
518 }
519 buf = (char*)SCNG(script_filtered);
520 size = SCNG(script_filtered_size);
521 }
522 }
523 SCNG(yy_start) = (unsigned char *)buf - offset;
524 yy_scan_buffer(buf, size TSRMLS_CC);
525 } else {
526 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
527 }
528
529 BEGIN(INITIAL);
530
531 if (file_handle->opened_path) {
532 file_path = file_handle->opened_path;
533 } else {
534 file_path = file_handle->filename;
535 }
536
537 zend_set_compiled_filename(file_path TSRMLS_CC);
538
539 if (CG(start_lineno)) {
540 CG(zend_lineno) = CG(start_lineno);
541 CG(start_lineno) = 0;
542 } else {
543 CG(zend_lineno) = 1;
544 }
545
546 RESET_DOC_COMMENT();
547 CG(increment_lineno) = 0;
548 return SUCCESS;
549 }
END_EXTERN_C()550 END_EXTERN_C()
551
552
553 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
554 {
555 zend_lex_state original_lex_state;
556 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
557 zend_op_array *original_active_op_array = CG(active_op_array);
558 zend_op_array *retval=NULL;
559 int compiler_result;
560 zend_bool compilation_successful=0;
561 znode retval_znode;
562 zend_bool original_in_compilation = CG(in_compilation);
563
564 retval_znode.op_type = IS_CONST;
565 INIT_PZVAL(&retval_znode.u.constant);
566 ZVAL_LONG(&retval_znode.u.constant, 1);
567
568 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
569
570 retval = op_array; /* success oriented */
571
572 if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
573 if (type==ZEND_REQUIRE) {
574 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
575 zend_bailout();
576 } else {
577 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
578 }
579 compilation_successful=0;
580 } else {
581 init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
582 CG(in_compilation) = 1;
583 CG(active_op_array) = op_array;
584 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
585 zend_init_compiler_context(TSRMLS_C);
586 compiler_result = zendparse(TSRMLS_C);
587 zend_do_return(&retval_znode, 0 TSRMLS_CC);
588 CG(in_compilation) = original_in_compilation;
589 if (compiler_result != 0) { /* parser error */
590 zend_bailout();
591 }
592 compilation_successful=1;
593 }
594
595 if (retval) {
596 CG(active_op_array) = original_active_op_array;
597 if (compilation_successful) {
598 pass_two(op_array TSRMLS_CC);
599 zend_release_labels(0 TSRMLS_CC);
600 } else {
601 efree(op_array);
602 retval = NULL;
603 }
604 }
605 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
606 return retval;
607 }
608
609
compile_filename(int type,zval * filename TSRMLS_DC)610 zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
611 {
612 zend_file_handle file_handle;
613 zval tmp;
614 zend_op_array *retval;
615 char *opened_path = NULL;
616
617 if (filename->type != IS_STRING) {
618 tmp = *filename;
619 zval_copy_ctor(&tmp);
620 convert_to_string(&tmp);
621 filename = &tmp;
622 }
623 file_handle.filename = Z_STRVAL_P(filename);
624 file_handle.free_filename = 0;
625 file_handle.type = ZEND_HANDLE_FILENAME;
626 file_handle.opened_path = NULL;
627 file_handle.handle.fp = NULL;
628
629 retval = zend_compile_file(&file_handle, type TSRMLS_CC);
630 if (retval && file_handle.handle.stream.handle) {
631 int dummy = 1;
632
633 if (!file_handle.opened_path) {
634 file_handle.opened_path = opened_path = estrndup(Z_STRVAL_P(filename), Z_STRLEN_P(filename));
635 }
636
637 zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
638
639 if (opened_path) {
640 efree(opened_path);
641 }
642 }
643 zend_destroy_file_handle(&file_handle TSRMLS_CC);
644
645 if (filename==&tmp) {
646 zval_dtor(&tmp);
647 }
648 return retval;
649 }
650
zend_prepare_string_for_scanning(zval * str,char * filename TSRMLS_DC)651 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
652 {
653 char *buf;
654 size_t size;
655
656 /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
657 Z_STRVAL_P(str) = str_erealloc(Z_STRVAL_P(str), Z_STRLEN_P(str) + ZEND_MMAP_AHEAD);
658 memset(Z_STRVAL_P(str) + Z_STRLEN_P(str), 0, ZEND_MMAP_AHEAD);
659
660 SCNG(yy_in) = NULL;
661 SCNG(yy_start) = NULL;
662
663 buf = Z_STRVAL_P(str);
664 size = Z_STRLEN_P(str);
665
666 if (CG(multibyte)) {
667 SCNG(script_org) = (unsigned char*)buf;
668 SCNG(script_org_size) = size;
669 SCNG(script_filtered) = NULL;
670
671 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
672
673 if (SCNG(input_filter)) {
674 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
675 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
676 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
677 }
678 buf = (char*)SCNG(script_filtered);
679 size = SCNG(script_filtered_size);
680 }
681 }
682
683 yy_scan_buffer(buf, size TSRMLS_CC);
684
685 zend_set_compiled_filename(filename TSRMLS_CC);
686 CG(zend_lineno) = 1;
687 CG(increment_lineno) = 0;
688 RESET_DOC_COMMENT();
689 return SUCCESS;
690 }
691
692
zend_get_scanned_file_offset(TSRMLS_D)693 ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
694 {
695 size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
696 if (SCNG(input_filter)) {
697 size_t original_offset = offset, length = 0;
698 do {
699 unsigned char *p = NULL;
700 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
701 return (size_t)-1;
702 }
703 efree(p);
704 if (length > original_offset) {
705 offset--;
706 } else if (length < original_offset) {
707 offset++;
708 }
709 } while (original_offset != length);
710 }
711 return offset;
712 }
713
714
compile_string(zval * source_string,char * filename TSRMLS_DC)715 zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
716 {
717 zend_lex_state original_lex_state;
718 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
719 zend_op_array *original_active_op_array = CG(active_op_array);
720 zend_op_array *retval;
721 zval tmp;
722 int compiler_result;
723 zend_bool original_in_compilation = CG(in_compilation);
724
725 if (Z_STRLEN_P(source_string)==0) {
726 efree(op_array);
727 return NULL;
728 }
729
730 CG(in_compilation) = 1;
731
732 tmp = *source_string;
733 zval_copy_ctor(&tmp);
734 convert_to_string(&tmp);
735 source_string = &tmp;
736
737 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
738 if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
739 efree(op_array);
740 retval = NULL;
741 } else {
742 zend_bool orig_interactive = CG(interactive);
743
744 CG(interactive) = 0;
745 init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
746 CG(interactive) = orig_interactive;
747 CG(active_op_array) = op_array;
748 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
749 zend_init_compiler_context(TSRMLS_C);
750 BEGIN(ST_IN_SCRIPTING);
751 compiler_result = zendparse(TSRMLS_C);
752
753 if (SCNG(script_filtered)) {
754 efree(SCNG(script_filtered));
755 SCNG(script_filtered) = NULL;
756 }
757
758 if (compiler_result != 0) {
759 CG(active_op_array) = original_active_op_array;
760 CG(unclean_shutdown)=1;
761 destroy_op_array(op_array TSRMLS_CC);
762 efree(op_array);
763 retval = NULL;
764 } else {
765 zend_do_return(NULL, 0 TSRMLS_CC);
766 CG(active_op_array) = original_active_op_array;
767 pass_two(op_array TSRMLS_CC);
768 zend_release_labels(0 TSRMLS_CC);
769 retval = op_array;
770 }
771 }
772 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
773 zval_dtor(&tmp);
774 CG(in_compilation) = original_in_compilation;
775 return retval;
776 }
777
778
BEGIN_EXTERN_C()779 BEGIN_EXTERN_C()
780 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
781 {
782 zend_lex_state original_lex_state;
783 zend_file_handle file_handle;
784
785 file_handle.type = ZEND_HANDLE_FILENAME;
786 file_handle.filename = filename;
787 file_handle.free_filename = 0;
788 file_handle.opened_path = NULL;
789 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
790 if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
791 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
792 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
793 return FAILURE;
794 }
795 zend_highlight(syntax_highlighter_ini TSRMLS_CC);
796 if (SCNG(script_filtered)) {
797 efree(SCNG(script_filtered));
798 SCNG(script_filtered) = NULL;
799 }
800 zend_destroy_file_handle(&file_handle TSRMLS_CC);
801 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
802 return SUCCESS;
803 }
804
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name TSRMLS_DC)805 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
806 {
807 zend_lex_state original_lex_state;
808 zval tmp = *str;
809
810 str = &tmp;
811 zval_copy_ctor(str);
812 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
813 if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
814 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
815 return FAILURE;
816 }
817 BEGIN(INITIAL);
818 zend_highlight(syntax_highlighter_ini TSRMLS_CC);
819 if (SCNG(script_filtered)) {
820 efree(SCNG(script_filtered));
821 SCNG(script_filtered) = NULL;
822 }
823 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
824 zval_dtor(str);
825 return SUCCESS;
826 }
827
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding TSRMLS_DC)828 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
829 {
830 size_t length;
831 unsigned char *new_yy_start;
832
833 /* convert and set */
834 if (!SCNG(input_filter)) {
835 if (SCNG(script_filtered)) {
836 efree(SCNG(script_filtered));
837 SCNG(script_filtered) = NULL;
838 }
839 SCNG(script_filtered_size) = 0;
840 length = SCNG(script_org_size);
841 new_yy_start = SCNG(script_org);
842 } else {
843 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
844 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
845 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
846 }
847 SCNG(script_filtered) = new_yy_start;
848 SCNG(script_filtered_size) = length;
849 }
850
851 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
852 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
853 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
854 SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
855
856 SCNG(yy_start) = new_yy_start;
857 }
858
859
860 # define zend_copy_value(zendlval, yytext, yyleng) \
861 if (SCNG(output_filter)) { \
862 size_t sz = 0; \
863 SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
864 Z_STRLEN_P(zendlval) = sz; \
865 } else { \
866 Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng); \
867 Z_STRLEN_P(zendlval) = yyleng; \
868 }
869
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type TSRMLS_DC)870 static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
871 {
872 register char *s, *t;
873 char *end;
874
875 ZVAL_STRINGL(zendlval, str, len, 1);
876
877 /* convert escape sequences */
878 s = t = Z_STRVAL_P(zendlval);
879 end = s+Z_STRLEN_P(zendlval);
880 while (s<end) {
881 if (*s=='\\') {
882 s++;
883 if (s >= end) {
884 *t++ = '\\';
885 break;
886 }
887
888 switch(*s) {
889 case 'n':
890 *t++ = '\n';
891 Z_STRLEN_P(zendlval)--;
892 break;
893 case 'r':
894 *t++ = '\r';
895 Z_STRLEN_P(zendlval)--;
896 break;
897 case 't':
898 *t++ = '\t';
899 Z_STRLEN_P(zendlval)--;
900 break;
901 case 'f':
902 *t++ = '\f';
903 Z_STRLEN_P(zendlval)--;
904 break;
905 case 'v':
906 *t++ = '\v';
907 Z_STRLEN_P(zendlval)--;
908 break;
909 case 'e':
910 #ifdef PHP_WIN32
911 *t++ = VK_ESCAPE;
912 #else
913 *t++ = '\e';
914 #endif
915 Z_STRLEN_P(zendlval)--;
916 break;
917 case '"':
918 case '`':
919 if (*s != quote_type) {
920 *t++ = '\\';
921 *t++ = *s;
922 break;
923 }
924 case '\\':
925 case '$':
926 *t++ = *s;
927 Z_STRLEN_P(zendlval)--;
928 break;
929 case 'x':
930 case 'X':
931 if (ZEND_IS_HEX(*(s+1))) {
932 char hex_buf[3] = { 0, 0, 0 };
933
934 Z_STRLEN_P(zendlval)--; /* for the 'x' */
935
936 hex_buf[0] = *(++s);
937 Z_STRLEN_P(zendlval)--;
938 if (ZEND_IS_HEX(*(s+1))) {
939 hex_buf[1] = *(++s);
940 Z_STRLEN_P(zendlval)--;
941 }
942 *t++ = (char) strtol(hex_buf, NULL, 16);
943 } else {
944 *t++ = '\\';
945 *t++ = *s;
946 }
947 break;
948 default:
949 /* check for an octal */
950 if (ZEND_IS_OCT(*s)) {
951 char octal_buf[4] = { 0, 0, 0, 0 };
952
953 octal_buf[0] = *s;
954 Z_STRLEN_P(zendlval)--;
955 if (ZEND_IS_OCT(*(s+1))) {
956 octal_buf[1] = *(++s);
957 Z_STRLEN_P(zendlval)--;
958 if (ZEND_IS_OCT(*(s+1))) {
959 octal_buf[2] = *(++s);
960 Z_STRLEN_P(zendlval)--;
961 }
962 }
963 *t++ = (char) strtol(octal_buf, NULL, 8);
964 } else {
965 *t++ = '\\';
966 *t++ = *s;
967 }
968 break;
969 }
970 } else {
971 *t++ = *s;
972 }
973
974 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
975 CG(zend_lineno)++;
976 }
977 s++;
978 }
979 *t = 0;
980 if (SCNG(output_filter)) {
981 size_t sz = 0;
982 s = Z_STRVAL_P(zendlval);
983 SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
984 Z_STRLEN_P(zendlval) = sz;
985 efree(s);
986 }
987 }
988
989
lex_scan(zval * zendlval TSRMLS_DC)990 int lex_scan(zval *zendlval TSRMLS_DC)
991 {
992 restart:
993 SCNG(yy_text) = YYCURSOR;
994
995 yymore_restart:
996
997 /*!re2c
998 re2c:yyfill:check = 0;
999 LNUM [0-9]+
1000 DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1001 EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1002 HNUM "0x"[0-9a-fA-F]+
1003 BNUM "0b"[01]+
1004 LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1005 WHITESPACE [ \n\r\t]+
1006 TABS_AND_SPACES [ \t]*
1007 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1008 ANY_CHAR [^]
1009 NEWLINE ("\r"|"\n"|"\r\n")
1010
1011 /* compute yyleng before each rule */
1012 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1013
1014 <ST_IN_SCRIPTING>"exit" {
1015 return T_EXIT;
1016 }
1017
1018 <ST_IN_SCRIPTING>"die" {
1019 return T_EXIT;
1020 }
1021
1022 <ST_IN_SCRIPTING>"function" {
1023 return T_FUNCTION;
1024 }
1025
1026 <ST_IN_SCRIPTING>"const" {
1027 return T_CONST;
1028 }
1029
1030 <ST_IN_SCRIPTING>"return" {
1031 return T_RETURN;
1032 }
1033
1034 <ST_IN_SCRIPTING>"yield" {
1035 return T_YIELD;
1036 }
1037
1038 <ST_IN_SCRIPTING>"try" {
1039 return T_TRY;
1040 }
1041
1042 <ST_IN_SCRIPTING>"catch" {
1043 return T_CATCH;
1044 }
1045
1046 <ST_IN_SCRIPTING>"finally" {
1047 return T_FINALLY;
1048 }
1049
1050 <ST_IN_SCRIPTING>"throw" {
1051 return T_THROW;
1052 }
1053
1054 <ST_IN_SCRIPTING>"if" {
1055 return T_IF;
1056 }
1057
1058 <ST_IN_SCRIPTING>"elseif" {
1059 return T_ELSEIF;
1060 }
1061
1062 <ST_IN_SCRIPTING>"endif" {
1063 return T_ENDIF;
1064 }
1065
1066 <ST_IN_SCRIPTING>"else" {
1067 return T_ELSE;
1068 }
1069
1070 <ST_IN_SCRIPTING>"while" {
1071 return T_WHILE;
1072 }
1073
1074 <ST_IN_SCRIPTING>"endwhile" {
1075 return T_ENDWHILE;
1076 }
1077
1078 <ST_IN_SCRIPTING>"do" {
1079 return T_DO;
1080 }
1081
1082 <ST_IN_SCRIPTING>"for" {
1083 return T_FOR;
1084 }
1085
1086 <ST_IN_SCRIPTING>"endfor" {
1087 return T_ENDFOR;
1088 }
1089
1090 <ST_IN_SCRIPTING>"foreach" {
1091 return T_FOREACH;
1092 }
1093
1094 <ST_IN_SCRIPTING>"endforeach" {
1095 return T_ENDFOREACH;
1096 }
1097
1098 <ST_IN_SCRIPTING>"declare" {
1099 return T_DECLARE;
1100 }
1101
1102 <ST_IN_SCRIPTING>"enddeclare" {
1103 return T_ENDDECLARE;
1104 }
1105
1106 <ST_IN_SCRIPTING>"instanceof" {
1107 return T_INSTANCEOF;
1108 }
1109
1110 <ST_IN_SCRIPTING>"as" {
1111 return T_AS;
1112 }
1113
1114 <ST_IN_SCRIPTING>"switch" {
1115 return T_SWITCH;
1116 }
1117
1118 <ST_IN_SCRIPTING>"endswitch" {
1119 return T_ENDSWITCH;
1120 }
1121
1122 <ST_IN_SCRIPTING>"case" {
1123 return T_CASE;
1124 }
1125
1126 <ST_IN_SCRIPTING>"default" {
1127 return T_DEFAULT;
1128 }
1129
1130 <ST_IN_SCRIPTING>"break" {
1131 return T_BREAK;
1132 }
1133
1134 <ST_IN_SCRIPTING>"continue" {
1135 return T_CONTINUE;
1136 }
1137
1138 <ST_IN_SCRIPTING>"goto" {
1139 return T_GOTO;
1140 }
1141
1142 <ST_IN_SCRIPTING>"echo" {
1143 return T_ECHO;
1144 }
1145
1146 <ST_IN_SCRIPTING>"print" {
1147 return T_PRINT;
1148 }
1149
1150 <ST_IN_SCRIPTING>"class" {
1151 return T_CLASS;
1152 }
1153
1154 <ST_IN_SCRIPTING>"interface" {
1155 return T_INTERFACE;
1156 }
1157
1158 <ST_IN_SCRIPTING>"trait" {
1159 return T_TRAIT;
1160 }
1161
1162 <ST_IN_SCRIPTING>"extends" {
1163 return T_EXTENDS;
1164 }
1165
1166 <ST_IN_SCRIPTING>"implements" {
1167 return T_IMPLEMENTS;
1168 }
1169
1170 <ST_IN_SCRIPTING>"->" {
1171 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1172 return T_OBJECT_OPERATOR;
1173 }
1174
1175 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1176 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1177 HANDLE_NEWLINES(yytext, yyleng);
1178 return T_WHITESPACE;
1179 }
1180
1181 <ST_LOOKING_FOR_PROPERTY>"->" {
1182 return T_OBJECT_OPERATOR;
1183 }
1184
1185 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1186 yy_pop_state(TSRMLS_C);
1187 zend_copy_value(zendlval, yytext, yyleng);
1188 zendlval->type = IS_STRING;
1189 return T_STRING;
1190 }
1191
1192 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1193 yyless(0);
1194 yy_pop_state(TSRMLS_C);
1195 goto restart;
1196 }
1197
1198 <ST_IN_SCRIPTING>"::" {
1199 return T_PAAMAYIM_NEKUDOTAYIM;
1200 }
1201
1202 <ST_IN_SCRIPTING>"\\" {
1203 return T_NS_SEPARATOR;
1204 }
1205
1206 <ST_IN_SCRIPTING>"..." {
1207 return T_ELLIPSIS;
1208 }
1209
1210 <ST_IN_SCRIPTING>"new" {
1211 return T_NEW;
1212 }
1213
1214 <ST_IN_SCRIPTING>"clone" {
1215 return T_CLONE;
1216 }
1217
1218 <ST_IN_SCRIPTING>"var" {
1219 return T_VAR;
1220 }
1221
1222 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1223 return T_INT_CAST;
1224 }
1225
1226 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1227 return T_DOUBLE_CAST;
1228 }
1229
1230 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1231 return T_STRING_CAST;
1232 }
1233
1234 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1235 return T_ARRAY_CAST;
1236 }
1237
1238 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1239 return T_OBJECT_CAST;
1240 }
1241
1242 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1243 return T_BOOL_CAST;
1244 }
1245
1246 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1247 return T_UNSET_CAST;
1248 }
1249
1250 <ST_IN_SCRIPTING>"eval" {
1251 return T_EVAL;
1252 }
1253
1254 <ST_IN_SCRIPTING>"include" {
1255 return T_INCLUDE;
1256 }
1257
1258 <ST_IN_SCRIPTING>"include_once" {
1259 return T_INCLUDE_ONCE;
1260 }
1261
1262 <ST_IN_SCRIPTING>"require" {
1263 return T_REQUIRE;
1264 }
1265
1266 <ST_IN_SCRIPTING>"require_once" {
1267 return T_REQUIRE_ONCE;
1268 }
1269
1270 <ST_IN_SCRIPTING>"namespace" {
1271 return T_NAMESPACE;
1272 }
1273
1274 <ST_IN_SCRIPTING>"use" {
1275 return T_USE;
1276 }
1277
1278 <ST_IN_SCRIPTING>"insteadof" {
1279 return T_INSTEADOF;
1280 }
1281
1282 <ST_IN_SCRIPTING>"global" {
1283 return T_GLOBAL;
1284 }
1285
1286 <ST_IN_SCRIPTING>"isset" {
1287 return T_ISSET;
1288 }
1289
1290 <ST_IN_SCRIPTING>"empty" {
1291 return T_EMPTY;
1292 }
1293
1294 <ST_IN_SCRIPTING>"__halt_compiler" {
1295 return T_HALT_COMPILER;
1296 }
1297
1298 <ST_IN_SCRIPTING>"static" {
1299 return T_STATIC;
1300 }
1301
1302 <ST_IN_SCRIPTING>"abstract" {
1303 return T_ABSTRACT;
1304 }
1305
1306 <ST_IN_SCRIPTING>"final" {
1307 return T_FINAL;
1308 }
1309
1310 <ST_IN_SCRIPTING>"private" {
1311 return T_PRIVATE;
1312 }
1313
1314 <ST_IN_SCRIPTING>"protected" {
1315 return T_PROTECTED;
1316 }
1317
1318 <ST_IN_SCRIPTING>"public" {
1319 return T_PUBLIC;
1320 }
1321
1322 <ST_IN_SCRIPTING>"unset" {
1323 return T_UNSET;
1324 }
1325
1326 <ST_IN_SCRIPTING>"=>" {
1327 return T_DOUBLE_ARROW;
1328 }
1329
1330 <ST_IN_SCRIPTING>"list" {
1331 return T_LIST;
1332 }
1333
1334 <ST_IN_SCRIPTING>"array" {
1335 return T_ARRAY;
1336 }
1337
1338 <ST_IN_SCRIPTING>"callable" {
1339 return T_CALLABLE;
1340 }
1341
1342 <ST_IN_SCRIPTING>"++" {
1343 return T_INC;
1344 }
1345
1346 <ST_IN_SCRIPTING>"--" {
1347 return T_DEC;
1348 }
1349
1350 <ST_IN_SCRIPTING>"===" {
1351 return T_IS_IDENTICAL;
1352 }
1353
1354 <ST_IN_SCRIPTING>"!==" {
1355 return T_IS_NOT_IDENTICAL;
1356 }
1357
1358 <ST_IN_SCRIPTING>"==" {
1359 return T_IS_EQUAL;
1360 }
1361
1362 <ST_IN_SCRIPTING>"!="|"<>" {
1363 return T_IS_NOT_EQUAL;
1364 }
1365
1366 <ST_IN_SCRIPTING>"<=" {
1367 return T_IS_SMALLER_OR_EQUAL;
1368 }
1369
1370 <ST_IN_SCRIPTING>">=" {
1371 return T_IS_GREATER_OR_EQUAL;
1372 }
1373
1374 <ST_IN_SCRIPTING>"+=" {
1375 return T_PLUS_EQUAL;
1376 }
1377
1378 <ST_IN_SCRIPTING>"-=" {
1379 return T_MINUS_EQUAL;
1380 }
1381
1382 <ST_IN_SCRIPTING>"*=" {
1383 return T_MUL_EQUAL;
1384 }
1385
1386 <ST_IN_SCRIPTING>"*\*" {
1387 return T_POW;
1388 }
1389
1390 <ST_IN_SCRIPTING>"*\*=" {
1391 return T_POW_EQUAL;
1392 }
1393
1394 <ST_IN_SCRIPTING>"/=" {
1395 return T_DIV_EQUAL;
1396 }
1397
1398 <ST_IN_SCRIPTING>".=" {
1399 return T_CONCAT_EQUAL;
1400 }
1401
1402 <ST_IN_SCRIPTING>"%=" {
1403 return T_MOD_EQUAL;
1404 }
1405
1406 <ST_IN_SCRIPTING>"<<=" {
1407 return T_SL_EQUAL;
1408 }
1409
1410 <ST_IN_SCRIPTING>">>=" {
1411 return T_SR_EQUAL;
1412 }
1413
1414 <ST_IN_SCRIPTING>"&=" {
1415 return T_AND_EQUAL;
1416 }
1417
1418 <ST_IN_SCRIPTING>"|=" {
1419 return T_OR_EQUAL;
1420 }
1421
1422 <ST_IN_SCRIPTING>"^=" {
1423 return T_XOR_EQUAL;
1424 }
1425
1426 <ST_IN_SCRIPTING>"||" {
1427 return T_BOOLEAN_OR;
1428 }
1429
1430 <ST_IN_SCRIPTING>"&&" {
1431 return T_BOOLEAN_AND;
1432 }
1433
1434 <ST_IN_SCRIPTING>"OR" {
1435 return T_LOGICAL_OR;
1436 }
1437
1438 <ST_IN_SCRIPTING>"AND" {
1439 return T_LOGICAL_AND;
1440 }
1441
1442 <ST_IN_SCRIPTING>"XOR" {
1443 return T_LOGICAL_XOR;
1444 }
1445
1446 <ST_IN_SCRIPTING>"<<" {
1447 return T_SL;
1448 }
1449
1450 <ST_IN_SCRIPTING>">>" {
1451 return T_SR;
1452 }
1453
1454 <ST_IN_SCRIPTING>{TOKENS} {
1455 return yytext[0];
1456 }
1457
1458
1459 <ST_IN_SCRIPTING>"{" {
1460 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1461 return '{';
1462 }
1463
1464
1465 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1466 yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1467 return T_DOLLAR_OPEN_CURLY_BRACES;
1468 }
1469
1470
1471 <ST_IN_SCRIPTING>"}" {
1472 RESET_DOC_COMMENT();
1473 if (!zend_stack_is_empty(&SCNG(state_stack))) {
1474 yy_pop_state(TSRMLS_C);
1475 }
1476 return '}';
1477 }
1478
1479
1480 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1481 yyless(yyleng - 1);
1482 zend_copy_value(zendlval, yytext, yyleng);
1483 zendlval->type = IS_STRING;
1484 yy_pop_state(TSRMLS_C);
1485 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1486 return T_STRING_VARNAME;
1487 }
1488
1489
1490 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1491 yyless(0);
1492 yy_pop_state(TSRMLS_C);
1493 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1494 goto restart;
1495 }
1496
1497 <ST_IN_SCRIPTING>{BNUM} {
1498 char *bin = yytext + 2; /* Skip "0b" */
1499 int len = yyleng - 2;
1500
1501 /* Skip any leading 0s */
1502 while (*bin == '0') {
1503 ++bin;
1504 --len;
1505 }
1506
1507 if (len < SIZEOF_LONG * 8) {
1508 if (len == 0) {
1509 Z_LVAL_P(zendlval) = 0;
1510 } else {
1511 Z_LVAL_P(zendlval) = strtol(bin, NULL, 2);
1512 }
1513 zendlval->type = IS_LONG;
1514 return T_LNUMBER;
1515 } else {
1516 ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, NULL));
1517 return T_DNUMBER;
1518 }
1519 }
1520
1521 <ST_IN_SCRIPTING>{LNUM} {
1522 if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1523 Z_LVAL_P(zendlval) = strtol(yytext, NULL, 0);
1524 } else {
1525 errno = 0;
1526 Z_LVAL_P(zendlval) = strtol(yytext, NULL, 0);
1527 if (errno == ERANGE) { /* Overflow */
1528 if (yytext[0] == '0') { /* octal overflow */
1529 Z_DVAL_P(zendlval) = zend_oct_strtod(yytext, NULL);
1530 } else {
1531 Z_DVAL_P(zendlval) = zend_strtod(yytext, NULL);
1532 }
1533 zendlval->type = IS_DOUBLE;
1534 return T_DNUMBER;
1535 }
1536 }
1537
1538 zendlval->type = IS_LONG;
1539 return T_LNUMBER;
1540 }
1541
1542 <ST_IN_SCRIPTING>{HNUM} {
1543 char *hex = yytext + 2; /* Skip "0x" */
1544 int len = yyleng - 2;
1545
1546 /* Skip any leading 0s */
1547 while (*hex == '0') {
1548 hex++;
1549 len--;
1550 }
1551
1552 if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1553 if (len == 0) {
1554 Z_LVAL_P(zendlval) = 0;
1555 } else {
1556 Z_LVAL_P(zendlval) = strtol(hex, NULL, 16);
1557 }
1558 zendlval->type = IS_LONG;
1559 return T_LNUMBER;
1560 } else {
1561 ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, NULL));
1562 return T_DNUMBER;
1563 }
1564 }
1565
1566 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1567 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1568 ZVAL_LONG(zendlval, strtol(yytext, NULL, 10));
1569 } else {
1570 ZVAL_STRINGL(zendlval, yytext, yyleng, 1);
1571 }
1572 return T_NUM_STRING;
1573 }
1574
1575 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1576 ZVAL_STRINGL(zendlval, yytext, yyleng, 1);
1577 return T_NUM_STRING;
1578 }
1579
1580 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1581 ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1582 return T_DNUMBER;
1583 }
1584
1585 <ST_IN_SCRIPTING>"__CLASS__" {
1586 zend_class_entry *ce = CG(active_class_entry);
1587 if (ce && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1588 /* We create a special __CLASS__ constant that is going to be resolved
1589 at run-time */
1590 Z_STRLEN_P(zendlval) = sizeof("__CLASS__")-1;
1591 Z_STRVAL_P(zendlval) = estrndup("__CLASS__", Z_STRLEN_P(zendlval));
1592 zendlval->type = IS_CONSTANT;
1593 } else {
1594 if (ce && ce->name) {
1595 ZVAL_STRINGL(zendlval, ce->name, ce->name_length, 1);
1596 } else {
1597 ZVAL_EMPTY_STRING(zendlval);
1598 }
1599 }
1600 return T_CLASS_C;
1601 }
1602
1603 <ST_IN_SCRIPTING>"__TRAIT__" {
1604 zend_class_entry *ce = CG(active_class_entry);
1605 if (ce && ce->name && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1606 ZVAL_STRINGL(zendlval, ce->name, ce->name_length, 1);
1607 } else {
1608 ZVAL_EMPTY_STRING(zendlval);
1609 }
1610 return T_TRAIT_C;
1611 }
1612
1613 <ST_IN_SCRIPTING>"__FUNCTION__" {
1614 zend_op_array *op_array = CG(active_op_array);
1615 if (op_array && op_array->function_name) {
1616 ZVAL_STRING(zendlval, op_array->function_name, 1);
1617 } else {
1618 ZVAL_EMPTY_STRING(zendlval);
1619 }
1620 return T_FUNC_C;
1621 }
1622
1623 <ST_IN_SCRIPTING>"__METHOD__" {
1624 const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1625 const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1626
1627 Z_STRLEN_P(zendlval) = zend_spprintf(&Z_STRVAL_P(zendlval), 0, "%s%s%s",
1628 class_name ? class_name : "",
1629 class_name && func_name ? "::" : "",
1630 func_name ? func_name : ""
1631 );
1632 zendlval->type = IS_STRING;
1633 return T_METHOD_C;
1634 }
1635
1636 <ST_IN_SCRIPTING>"__LINE__" {
1637 ZVAL_LONG(zendlval, CG(zend_lineno));
1638 return T_LINE;
1639 }
1640
1641 <ST_IN_SCRIPTING>"__FILE__" {
1642 char *filename = zend_get_compiled_filename(TSRMLS_C);
1643
1644 if (!filename) {
1645 filename = "";
1646 }
1647 ZVAL_STRING(zendlval, filename, 1);
1648 return T_FILE;
1649 }
1650
1651 <ST_IN_SCRIPTING>"__DIR__" {
1652 char *filename = zend_get_compiled_filename(TSRMLS_C);
1653 const size_t filename_len = strlen(filename);
1654 char *dirname;
1655
1656 if (!filename) {
1657 filename = "";
1658 }
1659
1660 dirname = estrndup(filename, filename_len);
1661 zend_dirname(dirname, filename_len);
1662
1663 if (strcmp(dirname, ".") == 0) {
1664 dirname = erealloc(dirname, MAXPATHLEN);
1665 #if HAVE_GETCWD
1666 VCWD_GETCWD(dirname, MAXPATHLEN);
1667 #elif HAVE_GETWD
1668 VCWD_GETWD(dirname);
1669 #endif
1670 }
1671
1672 ZVAL_STRING(zendlval, dirname, 0);
1673 return T_DIR;
1674 }
1675
1676 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1677 if (CG(current_namespace)) {
1678 *zendlval = *CG(current_namespace);
1679 zval_copy_ctor(zendlval);
1680 } else {
1681 ZVAL_EMPTY_STRING(zendlval);
1682 }
1683 return T_NS_C;
1684 }
1685
1686 <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1687 YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1688
1689 if (bracket != SCNG(yy_text)) {
1690 /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1691 YYCURSOR = bracket;
1692 goto inline_html;
1693 }
1694
1695 HANDLE_NEWLINES(yytext, yyleng);
1696 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1697 BEGIN(ST_IN_SCRIPTING);
1698 return T_OPEN_TAG;
1699 }
1700
1701
1702 <INITIAL>"<%=" {
1703 if (CG(asp_tags)) {
1704 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1705 BEGIN(ST_IN_SCRIPTING);
1706 return T_OPEN_TAG_WITH_ECHO;
1707 } else {
1708 goto inline_char_handler;
1709 }
1710 }
1711
1712
1713 <INITIAL>"<?=" {
1714 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1715 BEGIN(ST_IN_SCRIPTING);
1716 return T_OPEN_TAG_WITH_ECHO;
1717 }
1718
1719
1720 <INITIAL>"<%" {
1721 if (CG(asp_tags)) {
1722 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1723 BEGIN(ST_IN_SCRIPTING);
1724 return T_OPEN_TAG;
1725 } else {
1726 goto inline_char_handler;
1727 }
1728 }
1729
1730
1731 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1732 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1733 HANDLE_NEWLINE(yytext[yyleng-1]);
1734 BEGIN(ST_IN_SCRIPTING);
1735 return T_OPEN_TAG;
1736 }
1737
1738
1739 <INITIAL>"<?" {
1740 if (CG(short_tags)) {
1741 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1742 BEGIN(ST_IN_SCRIPTING);
1743 return T_OPEN_TAG;
1744 } else {
1745 goto inline_char_handler;
1746 }
1747 }
1748
1749 <INITIAL>{ANY_CHAR} {
1750 if (YYCURSOR > YYLIMIT) {
1751 return 0;
1752 }
1753
1754 inline_char_handler:
1755
1756 while (1) {
1757 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1758
1759 YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1760
1761 if (YYCURSOR < YYLIMIT) {
1762 switch (*YYCURSOR) {
1763 case '?':
1764 if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1765 break;
1766 }
1767 continue;
1768 case '%':
1769 if (CG(asp_tags)) {
1770 break;
1771 }
1772 continue;
1773 case 's':
1774 case 'S':
1775 /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1776 * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1777 YYCURSOR--;
1778 yymore();
1779 default:
1780 continue;
1781 }
1782
1783 YYCURSOR--;
1784 }
1785
1786 break;
1787 }
1788
1789 inline_html:
1790 yyleng = YYCURSOR - SCNG(yy_text);
1791
1792 if (SCNG(output_filter)) {
1793 int readsize;
1794 size_t sz = 0;
1795 readsize = SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1796 Z_STRLEN_P(zendlval) = sz;
1797 if (readsize < yyleng) {
1798 yyless(readsize);
1799 }
1800 } else {
1801 Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng);
1802 Z_STRLEN_P(zendlval) = yyleng;
1803 }
1804 zendlval->type = IS_STRING;
1805 HANDLE_NEWLINES(yytext, yyleng);
1806 return T_INLINE_HTML;
1807 }
1808
1809
1810 /* Make sure a label character follows "->", otherwise there is no property
1811 * and "->" will be taken literally
1812 */
1813 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1814 yyless(yyleng - 3);
1815 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1816 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1817 zendlval->type = IS_STRING;
1818 return T_VARIABLE;
1819 }
1820
1821 /* A [ always designates a variable offset, regardless of what follows
1822 */
1823 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1824 yyless(yyleng - 1);
1825 yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1826 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1827 zendlval->type = IS_STRING;
1828 return T_VARIABLE;
1829 }
1830
1831 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1832 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1833 zendlval->type = IS_STRING;
1834 return T_VARIABLE;
1835 }
1836
1837 <ST_VAR_OFFSET>"]" {
1838 yy_pop_state(TSRMLS_C);
1839 return ']';
1840 }
1841
1842 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1843 /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1844 return yytext[0];
1845 }
1846
1847 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1848 /* Invalid rule to return a more explicit parse error with proper line number */
1849 yyless(0);
1850 yy_pop_state(TSRMLS_C);
1851 return T_ENCAPSED_AND_WHITESPACE;
1852 }
1853
1854 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1855 zend_copy_value(zendlval, yytext, yyleng);
1856 zendlval->type = IS_STRING;
1857 return T_STRING;
1858 }
1859
1860
1861 <ST_IN_SCRIPTING>"#"|"//" {
1862 while (YYCURSOR < YYLIMIT) {
1863 switch (*YYCURSOR++) {
1864 case '\r':
1865 if (*YYCURSOR == '\n') {
1866 YYCURSOR++;
1867 }
1868 /* fall through */
1869 case '\n':
1870 CG(zend_lineno)++;
1871 break;
1872 case '%':
1873 if (!CG(asp_tags)) {
1874 continue;
1875 }
1876 /* fall through */
1877 case '?':
1878 if (*YYCURSOR == '>') {
1879 YYCURSOR--;
1880 break;
1881 }
1882 /* fall through */
1883 default:
1884 continue;
1885 }
1886
1887 break;
1888 }
1889
1890 yyleng = YYCURSOR - SCNG(yy_text);
1891
1892 return T_COMMENT;
1893 }
1894
1895 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1896 int doc_com;
1897
1898 if (yyleng > 2) {
1899 doc_com = 1;
1900 RESET_DOC_COMMENT();
1901 } else {
1902 doc_com = 0;
1903 }
1904
1905 while (YYCURSOR < YYLIMIT) {
1906 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1907 break;
1908 }
1909 }
1910
1911 if (YYCURSOR < YYLIMIT) {
1912 YYCURSOR++;
1913 } else {
1914 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1915 }
1916
1917 yyleng = YYCURSOR - SCNG(yy_text);
1918 HANDLE_NEWLINES(yytext, yyleng);
1919
1920 if (doc_com) {
1921 CG(doc_comment) = estrndup(yytext, yyleng);
1922 CG(doc_comment_len) = yyleng;
1923 return T_DOC_COMMENT;
1924 }
1925
1926 return T_COMMENT;
1927 }
1928
1929 <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1930 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1931 BEGIN(INITIAL);
1932 return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1933 }
1934
1935
1936 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1937 if (CG(asp_tags)) {
1938 BEGIN(INITIAL);
1939 ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1940 return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1941 } else {
1942 yyless(1);
1943 return yytext[0];
1944 }
1945 }
1946
1947
1948 <ST_IN_SCRIPTING>b?['] {
1949 register char *s, *t;
1950 char *end;
1951 int bprefix = (yytext[0] != '\'') ? 1 : 0;
1952
1953 while (1) {
1954 if (YYCURSOR < YYLIMIT) {
1955 if (*YYCURSOR == '\'') {
1956 YYCURSOR++;
1957 yyleng = YYCURSOR - SCNG(yy_text);
1958
1959 break;
1960 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1961 YYCURSOR++;
1962 }
1963 } else {
1964 yyleng = YYLIMIT - SCNG(yy_text);
1965
1966 /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1967 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1968 * rule, which continued in ST_IN_SCRIPTING state after the quote */
1969 return T_ENCAPSED_AND_WHITESPACE;
1970 }
1971 }
1972
1973 ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2, 1);
1974
1975 /* convert escape sequences */
1976 s = t = Z_STRVAL_P(zendlval);
1977 end = s+Z_STRLEN_P(zendlval);
1978 while (s<end) {
1979 if (*s=='\\') {
1980 s++;
1981
1982 switch(*s) {
1983 case '\\':
1984 case '\'':
1985 *t++ = *s;
1986 Z_STRLEN_P(zendlval)--;
1987 break;
1988 default:
1989 *t++ = '\\';
1990 *t++ = *s;
1991 break;
1992 }
1993 } else {
1994 *t++ = *s;
1995 }
1996
1997 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1998 CG(zend_lineno)++;
1999 }
2000 s++;
2001 }
2002 *t = 0;
2003
2004 if (SCNG(output_filter)) {
2005 size_t sz = 0;
2006 s = Z_STRVAL_P(zendlval);
2007 SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
2008 Z_STRLEN_P(zendlval) = sz;
2009 efree(s);
2010 }
2011 return T_CONSTANT_ENCAPSED_STRING;
2012 }
2013
2014
2015 <ST_IN_SCRIPTING>b?["] {
2016 int bprefix = (yytext[0] != '"') ? 1 : 0;
2017
2018 while (YYCURSOR < YYLIMIT) {
2019 switch (*YYCURSOR++) {
2020 case '"':
2021 yyleng = YYCURSOR - SCNG(yy_text);
2022 zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2023 return T_CONSTANT_ENCAPSED_STRING;
2024 case '$':
2025 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2026 break;
2027 }
2028 continue;
2029 case '{':
2030 if (*YYCURSOR == '$') {
2031 break;
2032 }
2033 continue;
2034 case '\\':
2035 if (YYCURSOR < YYLIMIT) {
2036 YYCURSOR++;
2037 }
2038 /* fall through */
2039 default:
2040 continue;
2041 }
2042
2043 YYCURSOR--;
2044 break;
2045 }
2046
2047 /* Remember how much was scanned to save rescanning */
2048 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2049
2050 YYCURSOR = SCNG(yy_text) + yyleng;
2051
2052 BEGIN(ST_DOUBLE_QUOTES);
2053 return '"';
2054 }
2055
2056
2057 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2058 char *s;
2059 int bprefix = (yytext[0] != '<') ? 1 : 0;
2060 zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2061
2062 CG(zend_lineno)++;
2063 heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2064 s = yytext+bprefix+3;
2065 while ((*s == ' ') || (*s == '\t')) {
2066 s++;
2067 heredoc_label->length--;
2068 }
2069
2070 if (*s == '\'') {
2071 s++;
2072 heredoc_label->length -= 2;
2073
2074 BEGIN(ST_NOWDOC);
2075 } else {
2076 if (*s == '"') {
2077 s++;
2078 heredoc_label->length -= 2;
2079 }
2080
2081 BEGIN(ST_HEREDOC);
2082 }
2083
2084 heredoc_label->label = estrndup(s, heredoc_label->length);
2085
2086 /* Check for ending label on the next line */
2087 if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2088 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2089
2090 if (*end == ';') {
2091 end++;
2092 }
2093
2094 if (*end == '\n' || *end == '\r') {
2095 BEGIN(ST_END_HEREDOC);
2096 }
2097 }
2098
2099 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2100
2101 return T_START_HEREDOC;
2102 }
2103
2104
2105 <ST_IN_SCRIPTING>[`] {
2106 BEGIN(ST_BACKQUOTE);
2107 return '`';
2108 }
2109
2110
2111 <ST_END_HEREDOC>{ANY_CHAR} {
2112 zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2113
2114 YYCURSOR += heredoc_label->length - 1;
2115 yyleng = heredoc_label->length;
2116
2117 heredoc_label_dtor(heredoc_label);
2118 efree(heredoc_label);
2119
2120 BEGIN(ST_IN_SCRIPTING);
2121 return T_END_HEREDOC;
2122 }
2123
2124
2125 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2126 Z_LVAL_P(zendlval) = (long) '{';
2127 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2128 yyless(1);
2129 return T_CURLY_OPEN;
2130 }
2131
2132
2133 <ST_DOUBLE_QUOTES>["] {
2134 BEGIN(ST_IN_SCRIPTING);
2135 return '"';
2136 }
2137
2138 <ST_BACKQUOTE>[`] {
2139 BEGIN(ST_IN_SCRIPTING);
2140 return '`';
2141 }
2142
2143
2144 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2145 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2146 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2147 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2148
2149 goto double_quotes_scan_done;
2150 }
2151
2152 if (YYCURSOR > YYLIMIT) {
2153 return 0;
2154 }
2155 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2156 YYCURSOR++;
2157 }
2158
2159 while (YYCURSOR < YYLIMIT) {
2160 switch (*YYCURSOR++) {
2161 case '"':
2162 break;
2163 case '$':
2164 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2165 break;
2166 }
2167 continue;
2168 case '{':
2169 if (*YYCURSOR == '$') {
2170 break;
2171 }
2172 continue;
2173 case '\\':
2174 if (YYCURSOR < YYLIMIT) {
2175 YYCURSOR++;
2176 }
2177 /* fall through */
2178 default:
2179 continue;
2180 }
2181
2182 YYCURSOR--;
2183 break;
2184 }
2185
2186 double_quotes_scan_done:
2187 yyleng = YYCURSOR - SCNG(yy_text);
2188
2189 zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2190 return T_ENCAPSED_AND_WHITESPACE;
2191 }
2192
2193
2194 <ST_BACKQUOTE>{ANY_CHAR} {
2195 if (YYCURSOR > YYLIMIT) {
2196 return 0;
2197 }
2198 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2199 YYCURSOR++;
2200 }
2201
2202 while (YYCURSOR < YYLIMIT) {
2203 switch (*YYCURSOR++) {
2204 case '`':
2205 break;
2206 case '$':
2207 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2208 break;
2209 }
2210 continue;
2211 case '{':
2212 if (*YYCURSOR == '$') {
2213 break;
2214 }
2215 continue;
2216 case '\\':
2217 if (YYCURSOR < YYLIMIT) {
2218 YYCURSOR++;
2219 }
2220 /* fall through */
2221 default:
2222 continue;
2223 }
2224
2225 YYCURSOR--;
2226 break;
2227 }
2228
2229 yyleng = YYCURSOR - SCNG(yy_text);
2230
2231 zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2232 return T_ENCAPSED_AND_WHITESPACE;
2233 }
2234
2235
2236 <ST_HEREDOC>{ANY_CHAR} {
2237 int newline = 0;
2238
2239 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2240
2241 if (YYCURSOR > YYLIMIT) {
2242 return 0;
2243 }
2244
2245 YYCURSOR--;
2246
2247 while (YYCURSOR < YYLIMIT) {
2248 switch (*YYCURSOR++) {
2249 case '\r':
2250 if (*YYCURSOR == '\n') {
2251 YYCURSOR++;
2252 }
2253 /* fall through */
2254 case '\n':
2255 /* Check for ending label on the next line */
2256 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2257 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2258
2259 if (*end == ';') {
2260 end++;
2261 }
2262
2263 if (*end == '\n' || *end == '\r') {
2264 /* newline before label will be subtracted from returned text, but
2265 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2266 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2267 newline = 2; /* Windows newline */
2268 } else {
2269 newline = 1;
2270 }
2271
2272 CG(increment_lineno) = 1; /* For newline before label */
2273 BEGIN(ST_END_HEREDOC);
2274
2275 goto heredoc_scan_done;
2276 }
2277 }
2278 continue;
2279 case '$':
2280 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2281 break;
2282 }
2283 continue;
2284 case '{':
2285 if (*YYCURSOR == '$') {
2286 break;
2287 }
2288 continue;
2289 case '\\':
2290 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2291 YYCURSOR++;
2292 }
2293 /* fall through */
2294 default:
2295 continue;
2296 }
2297
2298 YYCURSOR--;
2299 break;
2300 }
2301
2302 heredoc_scan_done:
2303 yyleng = YYCURSOR - SCNG(yy_text);
2304
2305 zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2306 return T_ENCAPSED_AND_WHITESPACE;
2307 }
2308
2309
2310 <ST_NOWDOC>{ANY_CHAR} {
2311 int newline = 0;
2312
2313 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2314
2315 if (YYCURSOR > YYLIMIT) {
2316 return 0;
2317 }
2318
2319 YYCURSOR--;
2320
2321 while (YYCURSOR < YYLIMIT) {
2322 switch (*YYCURSOR++) {
2323 case '\r':
2324 if (*YYCURSOR == '\n') {
2325 YYCURSOR++;
2326 }
2327 /* fall through */
2328 case '\n':
2329 /* Check for ending label on the next line */
2330 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2331 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2332
2333 if (*end == ';') {
2334 end++;
2335 }
2336
2337 if (*end == '\n' || *end == '\r') {
2338 /* newline before label will be subtracted from returned text, but
2339 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2340 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2341 newline = 2; /* Windows newline */
2342 } else {
2343 newline = 1;
2344 }
2345
2346 CG(increment_lineno) = 1; /* For newline before label */
2347 BEGIN(ST_END_HEREDOC);
2348
2349 goto nowdoc_scan_done;
2350 }
2351 }
2352 /* fall through */
2353 default:
2354 continue;
2355 }
2356 }
2357
2358 nowdoc_scan_done:
2359 yyleng = YYCURSOR - SCNG(yy_text);
2360
2361 zend_copy_value(zendlval, yytext, yyleng - newline);
2362 zendlval->type = IS_STRING;
2363 HANDLE_NEWLINES(yytext, yyleng - newline);
2364 return T_ENCAPSED_AND_WHITESPACE;
2365 }
2366
2367
2368 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2369 if (YYCURSOR > YYLIMIT) {
2370 return 0;
2371 }
2372
2373 zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2374 goto restart;
2375 }
2376
2377 */
2378 }
2379