1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1998-2015 Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Marcus Boerger <helly@php.net> |
16 | Nuno Lopes <nlopess@php.net> |
17 | Scott MacVicar <scottmac@php.net> |
18 | Flex version authors: |
19 | Andi Gutmans <andi@zend.com> |
20 | Zeev Suraski <zeev@zend.com> |
21 +----------------------------------------------------------------------+
22 */
23
24 /* $Id$ */
25
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31
32 #include "zend_language_scanner_defs.h"
33
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef PHP_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "tsrm_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52
53 #define YYCTYPE unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR SCNG(yy_cursor)
56 #define YYLIMIT SCNG(yy_limit)
57 #define YYMARKER SCNG(yy_marker)
58
59 #define YYGETCONDITION() SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61
62 #define STATE(name) yyc##name
63
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE YYGETCONDITION()
67 #define yytext ((char*)SCNG(yy_text))
68 #define yyleng SCNG(yy_leng)
69 #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
70 yyleng = (unsigned int)x; } while(0)
71 #define yymore() goto yymore_restart
72
73 /* perform sanity check. If this message is triggered you should
74 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87
88 /* Globals Macros */
89 #define SCNG LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95
96 #define HANDLE_NEWLINES(s, l) \
97 do { \
98 char *p = (s), *boundary = p+(l); \
99 \
100 while (p<boundary) { \
101 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
102 CG(zend_lineno)++; \
103 } \
104 p++; \
105 } \
106 } while (0)
107
108 #define HANDLE_NEWLINE(c) \
109 { \
110 if (c == '\n' || c == '\r') { \
111 CG(zend_lineno)++; \
112 } \
113 }
114
115 /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
118
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121 #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127 {
128 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131 }
132
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134 {
135 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136 }
137
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139 {
140 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142 }
143
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145 {
146 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150 }
151
152
_yy_push_state(int new_state TSRMLS_DC)153 static void _yy_push_state(int new_state TSRMLS_DC)
154 {
155 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156 YYSETCONDITION(new_state);
157 }
158
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
yy_pop_state(TSRMLS_D)161 static void yy_pop_state(TSRMLS_D)
162 {
163 int *stack_state;
164 zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165 YYSETCONDITION(*stack_state);
166 zend_stack_del_top(&SCNG(state_stack));
167 }
168
yy_scan_buffer(char * str,unsigned int len TSRMLS_DC)169 static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170 {
171 YYCURSOR = (YYCTYPE*)str;
172 YYLIMIT = YYCURSOR + len;
173 if (!SCNG(yy_start)) {
174 SCNG(yy_start) = YYCURSOR;
175 }
176 }
177
startup_scanner(TSRMLS_D)178 void startup_scanner(TSRMLS_D)
179 {
180 CG(parse_error) = 0;
181 CG(doc_comment) = NULL;
182 CG(doc_comment_len) = 0;
183 zend_stack_init(&SCNG(state_stack));
184 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
185 }
186
heredoc_label_dtor(zend_heredoc_label * heredoc_label)187 static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
188 efree(heredoc_label->label);
189 }
190
shutdown_scanner(TSRMLS_D)191 void shutdown_scanner(TSRMLS_D)
192 {
193 CG(parse_error) = 0;
194 RESET_DOC_COMMENT();
195 zend_stack_destroy(&SCNG(state_stack));
196 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
197 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
198 }
199
zend_save_lexical_state(zend_lex_state * lex_state TSRMLS_DC)200 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
201 {
202 lex_state->yy_leng = SCNG(yy_leng);
203 lex_state->yy_start = SCNG(yy_start);
204 lex_state->yy_text = SCNG(yy_text);
205 lex_state->yy_cursor = SCNG(yy_cursor);
206 lex_state->yy_marker = SCNG(yy_marker);
207 lex_state->yy_limit = SCNG(yy_limit);
208
209 lex_state->state_stack = SCNG(state_stack);
210 zend_stack_init(&SCNG(state_stack));
211
212 lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213 zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214
215 lex_state->in = SCNG(yy_in);
216 lex_state->yy_state = YYSTATE;
217 lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
218 lex_state->lineno = CG(zend_lineno);
219
220 lex_state->script_org = SCNG(script_org);
221 lex_state->script_org_size = SCNG(script_org_size);
222 lex_state->script_filtered = SCNG(script_filtered);
223 lex_state->script_filtered_size = SCNG(script_filtered_size);
224 lex_state->input_filter = SCNG(input_filter);
225 lex_state->output_filter = SCNG(output_filter);
226 lex_state->script_encoding = SCNG(script_encoding);
227 }
228
zend_restore_lexical_state(zend_lex_state * lex_state TSRMLS_DC)229 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
230 {
231 SCNG(yy_leng) = lex_state->yy_leng;
232 SCNG(yy_start) = lex_state->yy_start;
233 SCNG(yy_text) = lex_state->yy_text;
234 SCNG(yy_cursor) = lex_state->yy_cursor;
235 SCNG(yy_marker) = lex_state->yy_marker;
236 SCNG(yy_limit) = lex_state->yy_limit;
237
238 zend_stack_destroy(&SCNG(state_stack));
239 SCNG(state_stack) = lex_state->state_stack;
240
241 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
242 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
243 SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
244
245 SCNG(yy_in) = lex_state->in;
246 YYSETCONDITION(lex_state->yy_state);
247 CG(zend_lineno) = lex_state->lineno;
248 zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
249
250 if (SCNG(script_filtered)) {
251 efree(SCNG(script_filtered));
252 SCNG(script_filtered) = NULL;
253 }
254 SCNG(script_org) = lex_state->script_org;
255 SCNG(script_org_size) = lex_state->script_org_size;
256 SCNG(script_filtered) = lex_state->script_filtered;
257 SCNG(script_filtered_size) = lex_state->script_filtered_size;
258 SCNG(input_filter) = lex_state->input_filter;
259 SCNG(output_filter) = lex_state->output_filter;
260 SCNG(script_encoding) = lex_state->script_encoding;
261
262 RESET_DOC_COMMENT();
263 }
264
zend_destroy_file_handle(zend_file_handle * file_handle TSRMLS_DC)265 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
266 {
267 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
268 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
269 file_handle->opened_path = NULL;
270 if (file_handle->free_filename) {
271 file_handle->filename = NULL;
272 }
273 }
274
275 #define BOM_UTF32_BE "\x00\x00\xfe\xff"
276 #define BOM_UTF32_LE "\xff\xfe\x00\x00"
277 #define BOM_UTF16_BE "\xfe\xff"
278 #define BOM_UTF16_LE "\xff\xfe"
279 #define BOM_UTF8 "\xef\xbb\xbf"
280
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size TSRMLS_DC)281 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
282 {
283 const unsigned char *p;
284 int wchar_size = 2;
285 int le = 0;
286
287 /* utf-16 or utf-32? */
288 p = script;
289 while ((p-script) < script_size) {
290 p = memchr(p, 0, script_size-(p-script)-2);
291 if (!p) {
292 break;
293 }
294 if (*(p+1) == '\0' && *(p+2) == '\0') {
295 wchar_size = 4;
296 break;
297 }
298
299 /* searching for UTF-32 specific byte orders, so this will do */
300 p += 4;
301 }
302
303 /* BE or LE? */
304 p = script;
305 while ((p-script) < script_size) {
306 if (*p == '\0' && *(p+wchar_size-1) != '\0') {
307 /* BE */
308 le = 0;
309 break;
310 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
311 /* LE* */
312 le = 1;
313 break;
314 }
315 p += wchar_size;
316 }
317
318 if (wchar_size == 2) {
319 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
320 } else {
321 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
322 }
323
324 return NULL;
325 }
326
zend_multibyte_detect_unicode(TSRMLS_D)327 static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
328 {
329 const zend_encoding *script_encoding = NULL;
330 int bom_size;
331 unsigned char *pos1, *pos2;
332
333 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
334 return NULL;
335 }
336
337 /* check out BOM */
338 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
339 script_encoding = zend_multibyte_encoding_utf32be;
340 bom_size = sizeof(BOM_UTF32_BE)-1;
341 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
342 script_encoding = zend_multibyte_encoding_utf32le;
343 bom_size = sizeof(BOM_UTF32_LE)-1;
344 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
345 script_encoding = zend_multibyte_encoding_utf16be;
346 bom_size = sizeof(BOM_UTF16_BE)-1;
347 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
348 script_encoding = zend_multibyte_encoding_utf16le;
349 bom_size = sizeof(BOM_UTF16_LE)-1;
350 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
351 script_encoding = zend_multibyte_encoding_utf8;
352 bom_size = sizeof(BOM_UTF8)-1;
353 }
354
355 if (script_encoding) {
356 /* remove BOM */
357 LANG_SCNG(script_org) += bom_size;
358 LANG_SCNG(script_org_size) -= bom_size;
359
360 return script_encoding;
361 }
362
363 /* script contains NULL bytes -> auto-detection */
364 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
365 /* check if the NULL byte is after the __HALT_COMPILER(); */
366 pos2 = LANG_SCNG(script_org);
367
368 while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
369 pos2 = memchr(pos2, '_', pos1 - pos2);
370 if (!pos2) break;
371 pos2++;
372 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
373 pos2 += sizeof("_HALT_COMPILER")-1;
374 while (*pos2 == ' ' ||
375 *pos2 == '\t' ||
376 *pos2 == '\r' ||
377 *pos2 == '\n') {
378 pos2++;
379 }
380 if (*pos2 == '(') {
381 pos2++;
382 while (*pos2 == ' ' ||
383 *pos2 == '\t' ||
384 *pos2 == '\r' ||
385 *pos2 == '\n') {
386 pos2++;
387 }
388 if (*pos2 == ')') {
389 pos2++;
390 while (*pos2 == ' ' ||
391 *pos2 == '\t' ||
392 *pos2 == '\r' ||
393 *pos2 == '\n') {
394 pos2++;
395 }
396 if (*pos2 == ';') {
397 return NULL;
398 }
399 }
400 }
401 }
402 }
403 /* make best effort if BOM is missing */
404 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
405 }
406
407 return NULL;
408 }
409
zend_multibyte_find_script_encoding(TSRMLS_D)410 static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
411 {
412 const zend_encoding *script_encoding;
413
414 if (CG(detect_unicode)) {
415 /* check out bom(byte order mark) and see if containing wchars */
416 script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
417 if (script_encoding != NULL) {
418 /* bom or wchar detection is prior to 'script_encoding' option */
419 return script_encoding;
420 }
421 }
422
423 /* if no script_encoding specified, just leave alone */
424 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
425 return NULL;
426 }
427
428 /* if multiple encodings specified, detect automagically */
429 if (CG(script_encoding_list_size) > 1) {
430 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
431 }
432
433 return CG(script_encoding_list)[0];
434 }
435
zend_multibyte_set_filter(const zend_encoding * onetime_encoding TSRMLS_DC)436 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
437 {
438 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
439 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
440
441 if (!script_encoding) {
442 return FAILURE;
443 }
444
445 /* judge input/output filter */
446 LANG_SCNG(script_encoding) = script_encoding;
447 LANG_SCNG(input_filter) = NULL;
448 LANG_SCNG(output_filter) = NULL;
449
450 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
451 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
452 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
453 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
454 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
455 } else {
456 LANG_SCNG(input_filter) = NULL;
457 LANG_SCNG(output_filter) = NULL;
458 }
459 return SUCCESS;
460 }
461
462 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
463 LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
464 LANG_SCNG(output_filter) = NULL;
465 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
466 LANG_SCNG(input_filter) = NULL;
467 LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
468 } else {
469 /* both script and internal encodings are incompatible w/ flex */
470 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
471 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
472 }
473
474 return 0;
475 }
476
open_file_for_scanning(zend_file_handle * file_handle TSRMLS_DC)477 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
478 {
479 const char *file_path = NULL;
480 char *buf;
481 size_t size, offset = 0;
482
483 /* The shebang line was read, get the current position to obtain the buffer start */
484 if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
485 if ((offset = ftell(file_handle->handle.fp)) == -1) {
486 offset = 0;
487 }
488 }
489
490 if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
491 return FAILURE;
492 }
493
494 zend_llist_add_element(&CG(open_files), file_handle);
495 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
496 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
497 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
498 fh->handle.stream.handle = (void*)(((char*)fh) + diff);
499 file_handle->handle.stream.handle = fh->handle.stream.handle;
500 }
501
502 /* Reset the scanner for scanning the new file */
503 SCNG(yy_in) = file_handle;
504 SCNG(yy_start) = NULL;
505
506 if (size != -1) {
507 if (CG(multibyte)) {
508 SCNG(script_org) = (unsigned char*)buf;
509 SCNG(script_org_size) = size;
510 SCNG(script_filtered) = NULL;
511
512 zend_multibyte_set_filter(NULL TSRMLS_CC);
513
514 if (SCNG(input_filter)) {
515 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
516 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
517 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
518 }
519 buf = (char*)SCNG(script_filtered);
520 size = SCNG(script_filtered_size);
521 }
522 }
523 SCNG(yy_start) = (unsigned char *)buf - offset;
524 yy_scan_buffer(buf, size TSRMLS_CC);
525 } else {
526 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
527 }
528
529 BEGIN(INITIAL);
530
531 if (file_handle->opened_path) {
532 file_path = file_handle->opened_path;
533 } else {
534 file_path = file_handle->filename;
535 }
536
537 zend_set_compiled_filename(file_path TSRMLS_CC);
538
539 if (CG(start_lineno)) {
540 CG(zend_lineno) = CG(start_lineno);
541 CG(start_lineno) = 0;
542 } else {
543 CG(zend_lineno) = 1;
544 }
545
546 RESET_DOC_COMMENT();
547 CG(increment_lineno) = 0;
548 return SUCCESS;
549 }
END_EXTERN_C()550 END_EXTERN_C()
551
552
553 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
554 {
555 zend_lex_state original_lex_state;
556 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
557 zend_op_array *original_active_op_array = CG(active_op_array);
558 zend_op_array *retval=NULL;
559 int compiler_result;
560 zend_bool compilation_successful=0;
561 znode retval_znode;
562 zend_bool original_in_compilation = CG(in_compilation);
563
564 retval_znode.op_type = IS_CONST;
565 retval_znode.u.constant.type = IS_LONG;
566 retval_znode.u.constant.value.lval = 1;
567 Z_UNSET_ISREF(retval_znode.u.constant);
568 Z_SET_REFCOUNT(retval_znode.u.constant, 1);
569
570 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
571
572 retval = op_array; /* success oriented */
573
574 if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
575 if (type==ZEND_REQUIRE) {
576 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
577 zend_bailout();
578 } else {
579 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
580 }
581 compilation_successful=0;
582 } else {
583 init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
584 CG(in_compilation) = 1;
585 CG(active_op_array) = op_array;
586 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
587 zend_init_compiler_context(TSRMLS_C);
588 compiler_result = zendparse(TSRMLS_C);
589 zend_do_return(&retval_znode, 0 TSRMLS_CC);
590 CG(in_compilation) = original_in_compilation;
591 if (compiler_result != 0) { /* parser error */
592 zend_bailout();
593 }
594 compilation_successful=1;
595 }
596
597 if (retval) {
598 CG(active_op_array) = original_active_op_array;
599 if (compilation_successful) {
600 pass_two(op_array TSRMLS_CC);
601 zend_release_labels(0 TSRMLS_CC);
602 } else {
603 efree(op_array);
604 retval = NULL;
605 }
606 }
607 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
608 return retval;
609 }
610
611
compile_filename(int type,zval * filename TSRMLS_DC)612 zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
613 {
614 zend_file_handle file_handle;
615 zval tmp;
616 zend_op_array *retval;
617 char *opened_path = NULL;
618
619 if (filename->type != IS_STRING) {
620 tmp = *filename;
621 zval_copy_ctor(&tmp);
622 convert_to_string(&tmp);
623 filename = &tmp;
624 }
625 file_handle.filename = filename->value.str.val;
626 file_handle.free_filename = 0;
627 file_handle.type = ZEND_HANDLE_FILENAME;
628 file_handle.opened_path = NULL;
629 file_handle.handle.fp = NULL;
630
631 retval = zend_compile_file(&file_handle, type TSRMLS_CC);
632 if (retval && file_handle.handle.stream.handle) {
633 int dummy = 1;
634
635 if (!file_handle.opened_path) {
636 file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
637 }
638
639 zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
640
641 if (opened_path) {
642 efree(opened_path);
643 }
644 }
645 zend_destroy_file_handle(&file_handle TSRMLS_CC);
646
647 if (filename==&tmp) {
648 zval_dtor(&tmp);
649 }
650 return retval;
651 }
652
zend_prepare_string_for_scanning(zval * str,char * filename TSRMLS_DC)653 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
654 {
655 char *buf;
656 size_t size;
657
658 /* enforce two trailing NULLs for flex... */
659 if (IS_INTERNED(str->value.str.val)) {
660 char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
661 memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
662 str->value.str.val = tmp;
663 } else {
664 str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
665 }
666
667 memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
668
669 SCNG(yy_in) = NULL;
670 SCNG(yy_start) = NULL;
671
672 buf = str->value.str.val;
673 size = str->value.str.len;
674
675 if (CG(multibyte)) {
676 SCNG(script_org) = (unsigned char*)buf;
677 SCNG(script_org_size) = size;
678 SCNG(script_filtered) = NULL;
679
680 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
681
682 if (SCNG(input_filter)) {
683 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
684 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
685 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
686 }
687 buf = (char*)SCNG(script_filtered);
688 size = SCNG(script_filtered_size);
689 }
690 }
691
692 yy_scan_buffer(buf, size TSRMLS_CC);
693
694 zend_set_compiled_filename(filename TSRMLS_CC);
695 CG(zend_lineno) = 1;
696 CG(increment_lineno) = 0;
697 RESET_DOC_COMMENT();
698 return SUCCESS;
699 }
700
701
zend_get_scanned_file_offset(TSRMLS_D)702 ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
703 {
704 size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
705 if (SCNG(input_filter)) {
706 size_t original_offset = offset, length = 0;
707 do {
708 unsigned char *p = NULL;
709 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
710 return (size_t)-1;
711 }
712 efree(p);
713 if (length > original_offset) {
714 offset--;
715 } else if (length < original_offset) {
716 offset++;
717 }
718 } while (original_offset != length);
719 }
720 return offset;
721 }
722
723
compile_string(zval * source_string,char * filename TSRMLS_DC)724 zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
725 {
726 zend_lex_state original_lex_state;
727 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
728 zend_op_array *original_active_op_array = CG(active_op_array);
729 zend_op_array *retval;
730 zval tmp;
731 int compiler_result;
732 zend_bool original_in_compilation = CG(in_compilation);
733
734 if (source_string->value.str.len==0) {
735 efree(op_array);
736 return NULL;
737 }
738
739 CG(in_compilation) = 1;
740
741 tmp = *source_string;
742 zval_copy_ctor(&tmp);
743 convert_to_string(&tmp);
744 source_string = &tmp;
745
746 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
747 if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
748 efree(op_array);
749 retval = NULL;
750 } else {
751 zend_bool orig_interactive = CG(interactive);
752
753 CG(interactive) = 0;
754 init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
755 CG(interactive) = orig_interactive;
756 CG(active_op_array) = op_array;
757 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
758 zend_init_compiler_context(TSRMLS_C);
759 BEGIN(ST_IN_SCRIPTING);
760 compiler_result = zendparse(TSRMLS_C);
761
762 if (SCNG(script_filtered)) {
763 efree(SCNG(script_filtered));
764 SCNG(script_filtered) = NULL;
765 }
766
767 if (compiler_result != 0) {
768 CG(active_op_array) = original_active_op_array;
769 CG(unclean_shutdown)=1;
770 destroy_op_array(op_array TSRMLS_CC);
771 efree(op_array);
772 retval = NULL;
773 } else {
774 zend_do_return(NULL, 0 TSRMLS_CC);
775 CG(active_op_array) = original_active_op_array;
776 pass_two(op_array TSRMLS_CC);
777 zend_release_labels(0 TSRMLS_CC);
778 retval = op_array;
779 }
780 }
781 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
782 zval_dtor(&tmp);
783 CG(in_compilation) = original_in_compilation;
784 return retval;
785 }
786
787
BEGIN_EXTERN_C()788 BEGIN_EXTERN_C()
789 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
790 {
791 zend_lex_state original_lex_state;
792 zend_file_handle file_handle;
793
794 file_handle.type = ZEND_HANDLE_FILENAME;
795 file_handle.filename = filename;
796 file_handle.free_filename = 0;
797 file_handle.opened_path = NULL;
798 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
799 if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
800 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
801 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
802 return FAILURE;
803 }
804 zend_highlight(syntax_highlighter_ini TSRMLS_CC);
805 if (SCNG(script_filtered)) {
806 efree(SCNG(script_filtered));
807 SCNG(script_filtered) = NULL;
808 }
809 zend_destroy_file_handle(&file_handle TSRMLS_CC);
810 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
811 return SUCCESS;
812 }
813
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name TSRMLS_DC)814 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
815 {
816 zend_lex_state original_lex_state;
817 zval tmp = *str;
818
819 str = &tmp;
820 zval_copy_ctor(str);
821 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
822 if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
823 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
824 return FAILURE;
825 }
826 BEGIN(INITIAL);
827 zend_highlight(syntax_highlighter_ini TSRMLS_CC);
828 if (SCNG(script_filtered)) {
829 efree(SCNG(script_filtered));
830 SCNG(script_filtered) = NULL;
831 }
832 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
833 zval_dtor(str);
834 return SUCCESS;
835 }
836
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding TSRMLS_DC)837 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
838 {
839 size_t length;
840 unsigned char *new_yy_start;
841
842 /* convert and set */
843 if (!SCNG(input_filter)) {
844 if (SCNG(script_filtered)) {
845 efree(SCNG(script_filtered));
846 SCNG(script_filtered) = NULL;
847 }
848 SCNG(script_filtered_size) = 0;
849 length = SCNG(script_org_size);
850 new_yy_start = SCNG(script_org);
851 } else {
852 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
853 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
854 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
855 }
856 SCNG(script_filtered) = new_yy_start;
857 SCNG(script_filtered_size) = length;
858 }
859
860 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
861 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
862 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
863 SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
864
865 SCNG(yy_start) = new_yy_start;
866 }
867
868
869 # define zend_copy_value(zendlval, yytext, yyleng) \
870 if (SCNG(output_filter)) { \
871 size_t sz = 0; \
872 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
873 zendlval->value.str.len = sz; \
874 } else { \
875 zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
876 zendlval->value.str.len = yyleng; \
877 }
878
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type TSRMLS_DC)879 static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
880 {
881 register char *s, *t;
882 char *end;
883
884 ZVAL_STRINGL(zendlval, str, len, 1);
885
886 /* convert escape sequences */
887 s = t = zendlval->value.str.val;
888 end = s+zendlval->value.str.len;
889 while (s<end) {
890 if (*s=='\\') {
891 s++;
892 if (s >= end) {
893 *t++ = '\\';
894 break;
895 }
896
897 switch(*s) {
898 case 'n':
899 *t++ = '\n';
900 zendlval->value.str.len--;
901 break;
902 case 'r':
903 *t++ = '\r';
904 zendlval->value.str.len--;
905 break;
906 case 't':
907 *t++ = '\t';
908 zendlval->value.str.len--;
909 break;
910 case 'f':
911 *t++ = '\f';
912 zendlval->value.str.len--;
913 break;
914 case 'v':
915 *t++ = '\v';
916 zendlval->value.str.len--;
917 break;
918 case 'e':
919 #ifdef PHP_WIN32
920 *t++ = VK_ESCAPE;
921 #else
922 *t++ = '\e';
923 #endif
924 zendlval->value.str.len--;
925 break;
926 case '"':
927 case '`':
928 if (*s != quote_type) {
929 *t++ = '\\';
930 *t++ = *s;
931 break;
932 }
933 case '\\':
934 case '$':
935 *t++ = *s;
936 zendlval->value.str.len--;
937 break;
938 case 'x':
939 case 'X':
940 if (ZEND_IS_HEX(*(s+1))) {
941 char hex_buf[3] = { 0, 0, 0 };
942
943 zendlval->value.str.len--; /* for the 'x' */
944
945 hex_buf[0] = *(++s);
946 zendlval->value.str.len--;
947 if (ZEND_IS_HEX(*(s+1))) {
948 hex_buf[1] = *(++s);
949 zendlval->value.str.len--;
950 }
951 *t++ = (char) strtol(hex_buf, NULL, 16);
952 } else {
953 *t++ = '\\';
954 *t++ = *s;
955 }
956 break;
957 default:
958 /* check for an octal */
959 if (ZEND_IS_OCT(*s)) {
960 char octal_buf[4] = { 0, 0, 0, 0 };
961
962 octal_buf[0] = *s;
963 zendlval->value.str.len--;
964 if (ZEND_IS_OCT(*(s+1))) {
965 octal_buf[1] = *(++s);
966 zendlval->value.str.len--;
967 if (ZEND_IS_OCT(*(s+1))) {
968 octal_buf[2] = *(++s);
969 zendlval->value.str.len--;
970 }
971 }
972 *t++ = (char) strtol(octal_buf, NULL, 8);
973 } else {
974 *t++ = '\\';
975 *t++ = *s;
976 }
977 break;
978 }
979 } else {
980 *t++ = *s;
981 }
982
983 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
984 CG(zend_lineno)++;
985 }
986 s++;
987 }
988 *t = 0;
989 if (SCNG(output_filter)) {
990 size_t sz = 0;
991 s = zendlval->value.str.val;
992 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
993 zendlval->value.str.len = sz;
994 efree(s);
995 }
996 }
997
998
lex_scan(zval * zendlval TSRMLS_DC)999 int lex_scan(zval *zendlval TSRMLS_DC)
1000 {
1001 restart:
1002 SCNG(yy_text) = YYCURSOR;
1003
1004 yymore_restart:
1005
1006 /*!re2c
1007 re2c:yyfill:check = 0;
1008 LNUM [0-9]+
1009 DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1010 EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1011 HNUM "0x"[0-9a-fA-F]+
1012 BNUM "0b"[01]+
1013 LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1014 WHITESPACE [ \n\r\t]+
1015 TABS_AND_SPACES [ \t]*
1016 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1017 ANY_CHAR [^]
1018 NEWLINE ("\r"|"\n"|"\r\n")
1019
1020 /* compute yyleng before each rule */
1021 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1022
1023
1024 <ST_IN_SCRIPTING>"exit" {
1025 return T_EXIT;
1026 }
1027
1028 <ST_IN_SCRIPTING>"die" {
1029 return T_EXIT;
1030 }
1031
1032 <ST_IN_SCRIPTING>"function" {
1033 return T_FUNCTION;
1034 }
1035
1036 <ST_IN_SCRIPTING>"const" {
1037 return T_CONST;
1038 }
1039
1040 <ST_IN_SCRIPTING>"return" {
1041 return T_RETURN;
1042 }
1043
1044 <ST_IN_SCRIPTING>"yield" {
1045 return T_YIELD;
1046 }
1047
1048 <ST_IN_SCRIPTING>"try" {
1049 return T_TRY;
1050 }
1051
1052 <ST_IN_SCRIPTING>"catch" {
1053 return T_CATCH;
1054 }
1055
1056 <ST_IN_SCRIPTING>"finally" {
1057 return T_FINALLY;
1058 }
1059
1060 <ST_IN_SCRIPTING>"throw" {
1061 return T_THROW;
1062 }
1063
1064 <ST_IN_SCRIPTING>"if" {
1065 return T_IF;
1066 }
1067
1068 <ST_IN_SCRIPTING>"elseif" {
1069 return T_ELSEIF;
1070 }
1071
1072 <ST_IN_SCRIPTING>"endif" {
1073 return T_ENDIF;
1074 }
1075
1076 <ST_IN_SCRIPTING>"else" {
1077 return T_ELSE;
1078 }
1079
1080 <ST_IN_SCRIPTING>"while" {
1081 return T_WHILE;
1082 }
1083
1084 <ST_IN_SCRIPTING>"endwhile" {
1085 return T_ENDWHILE;
1086 }
1087
1088 <ST_IN_SCRIPTING>"do" {
1089 return T_DO;
1090 }
1091
1092 <ST_IN_SCRIPTING>"for" {
1093 return T_FOR;
1094 }
1095
1096 <ST_IN_SCRIPTING>"endfor" {
1097 return T_ENDFOR;
1098 }
1099
1100 <ST_IN_SCRIPTING>"foreach" {
1101 return T_FOREACH;
1102 }
1103
1104 <ST_IN_SCRIPTING>"endforeach" {
1105 return T_ENDFOREACH;
1106 }
1107
1108 <ST_IN_SCRIPTING>"declare" {
1109 return T_DECLARE;
1110 }
1111
1112 <ST_IN_SCRIPTING>"enddeclare" {
1113 return T_ENDDECLARE;
1114 }
1115
1116 <ST_IN_SCRIPTING>"instanceof" {
1117 return T_INSTANCEOF;
1118 }
1119
1120 <ST_IN_SCRIPTING>"as" {
1121 return T_AS;
1122 }
1123
1124 <ST_IN_SCRIPTING>"switch" {
1125 return T_SWITCH;
1126 }
1127
1128 <ST_IN_SCRIPTING>"endswitch" {
1129 return T_ENDSWITCH;
1130 }
1131
1132 <ST_IN_SCRIPTING>"case" {
1133 return T_CASE;
1134 }
1135
1136 <ST_IN_SCRIPTING>"default" {
1137 return T_DEFAULT;
1138 }
1139
1140 <ST_IN_SCRIPTING>"break" {
1141 return T_BREAK;
1142 }
1143
1144 <ST_IN_SCRIPTING>"continue" {
1145 return T_CONTINUE;
1146 }
1147
1148 <ST_IN_SCRIPTING>"goto" {
1149 return T_GOTO;
1150 }
1151
1152 <ST_IN_SCRIPTING>"echo" {
1153 return T_ECHO;
1154 }
1155
1156 <ST_IN_SCRIPTING>"print" {
1157 return T_PRINT;
1158 }
1159
1160 <ST_IN_SCRIPTING>"class" {
1161 return T_CLASS;
1162 }
1163
1164 <ST_IN_SCRIPTING>"interface" {
1165 return T_INTERFACE;
1166 }
1167
1168 <ST_IN_SCRIPTING>"trait" {
1169 return T_TRAIT;
1170 }
1171
1172 <ST_IN_SCRIPTING>"extends" {
1173 return T_EXTENDS;
1174 }
1175
1176 <ST_IN_SCRIPTING>"implements" {
1177 return T_IMPLEMENTS;
1178 }
1179
1180 <ST_IN_SCRIPTING>"->" {
1181 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1182 return T_OBJECT_OPERATOR;
1183 }
1184
1185 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1186 zendlval->value.str.val = yytext; /* no copying - intentional */
1187 zendlval->value.str.len = yyleng;
1188 zendlval->type = IS_STRING;
1189 HANDLE_NEWLINES(yytext, yyleng);
1190 return T_WHITESPACE;
1191 }
1192
1193 <ST_LOOKING_FOR_PROPERTY>"->" {
1194 return T_OBJECT_OPERATOR;
1195 }
1196
1197 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1198 yy_pop_state(TSRMLS_C);
1199 zend_copy_value(zendlval, yytext, yyleng);
1200 zendlval->type = IS_STRING;
1201 return T_STRING;
1202 }
1203
1204 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1205 yyless(0);
1206 yy_pop_state(TSRMLS_C);
1207 goto restart;
1208 }
1209
1210 <ST_IN_SCRIPTING>"::" {
1211 return T_PAAMAYIM_NEKUDOTAYIM;
1212 }
1213
1214 <ST_IN_SCRIPTING>"\\" {
1215 return T_NS_SEPARATOR;
1216 }
1217
1218 <ST_IN_SCRIPTING>"new" {
1219 return T_NEW;
1220 }
1221
1222 <ST_IN_SCRIPTING>"clone" {
1223 return T_CLONE;
1224 }
1225
1226 <ST_IN_SCRIPTING>"var" {
1227 return T_VAR;
1228 }
1229
1230 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1231 return T_INT_CAST;
1232 }
1233
1234 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1235 return T_DOUBLE_CAST;
1236 }
1237
1238 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1239 return T_STRING_CAST;
1240 }
1241
1242 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1243 return T_ARRAY_CAST;
1244 }
1245
1246 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1247 return T_OBJECT_CAST;
1248 }
1249
1250 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1251 return T_BOOL_CAST;
1252 }
1253
1254 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1255 return T_UNSET_CAST;
1256 }
1257
1258 <ST_IN_SCRIPTING>"eval" {
1259 return T_EVAL;
1260 }
1261
1262 <ST_IN_SCRIPTING>"include" {
1263 return T_INCLUDE;
1264 }
1265
1266 <ST_IN_SCRIPTING>"include_once" {
1267 return T_INCLUDE_ONCE;
1268 }
1269
1270 <ST_IN_SCRIPTING>"require" {
1271 return T_REQUIRE;
1272 }
1273
1274 <ST_IN_SCRIPTING>"require_once" {
1275 return T_REQUIRE_ONCE;
1276 }
1277
1278 <ST_IN_SCRIPTING>"namespace" {
1279 return T_NAMESPACE;
1280 }
1281
1282 <ST_IN_SCRIPTING>"use" {
1283 return T_USE;
1284 }
1285
1286 <ST_IN_SCRIPTING>"insteadof" {
1287 return T_INSTEADOF;
1288 }
1289
1290 <ST_IN_SCRIPTING>"global" {
1291 return T_GLOBAL;
1292 }
1293
1294 <ST_IN_SCRIPTING>"isset" {
1295 return T_ISSET;
1296 }
1297
1298 <ST_IN_SCRIPTING>"empty" {
1299 return T_EMPTY;
1300 }
1301
1302 <ST_IN_SCRIPTING>"__halt_compiler" {
1303 return T_HALT_COMPILER;
1304 }
1305
1306 <ST_IN_SCRIPTING>"static" {
1307 return T_STATIC;
1308 }
1309
1310 <ST_IN_SCRIPTING>"abstract" {
1311 return T_ABSTRACT;
1312 }
1313
1314 <ST_IN_SCRIPTING>"final" {
1315 return T_FINAL;
1316 }
1317
1318 <ST_IN_SCRIPTING>"private" {
1319 return T_PRIVATE;
1320 }
1321
1322 <ST_IN_SCRIPTING>"protected" {
1323 return T_PROTECTED;
1324 }
1325
1326 <ST_IN_SCRIPTING>"public" {
1327 return T_PUBLIC;
1328 }
1329
1330 <ST_IN_SCRIPTING>"unset" {
1331 return T_UNSET;
1332 }
1333
1334 <ST_IN_SCRIPTING>"=>" {
1335 return T_DOUBLE_ARROW;
1336 }
1337
1338 <ST_IN_SCRIPTING>"list" {
1339 return T_LIST;
1340 }
1341
1342 <ST_IN_SCRIPTING>"array" {
1343 return T_ARRAY;
1344 }
1345
1346 <ST_IN_SCRIPTING>"callable" {
1347 return T_CALLABLE;
1348 }
1349
1350 <ST_IN_SCRIPTING>"++" {
1351 return T_INC;
1352 }
1353
1354 <ST_IN_SCRIPTING>"--" {
1355 return T_DEC;
1356 }
1357
1358 <ST_IN_SCRIPTING>"===" {
1359 return T_IS_IDENTICAL;
1360 }
1361
1362 <ST_IN_SCRIPTING>"!==" {
1363 return T_IS_NOT_IDENTICAL;
1364 }
1365
1366 <ST_IN_SCRIPTING>"==" {
1367 return T_IS_EQUAL;
1368 }
1369
1370 <ST_IN_SCRIPTING>"!="|"<>" {
1371 return T_IS_NOT_EQUAL;
1372 }
1373
1374 <ST_IN_SCRIPTING>"<=" {
1375 return T_IS_SMALLER_OR_EQUAL;
1376 }
1377
1378 <ST_IN_SCRIPTING>">=" {
1379 return T_IS_GREATER_OR_EQUAL;
1380 }
1381
1382 <ST_IN_SCRIPTING>"+=" {
1383 return T_PLUS_EQUAL;
1384 }
1385
1386 <ST_IN_SCRIPTING>"-=" {
1387 return T_MINUS_EQUAL;
1388 }
1389
1390 <ST_IN_SCRIPTING>"*=" {
1391 return T_MUL_EQUAL;
1392 }
1393
1394 <ST_IN_SCRIPTING>"/=" {
1395 return T_DIV_EQUAL;
1396 }
1397
1398 <ST_IN_SCRIPTING>".=" {
1399 return T_CONCAT_EQUAL;
1400 }
1401
1402 <ST_IN_SCRIPTING>"%=" {
1403 return T_MOD_EQUAL;
1404 }
1405
1406 <ST_IN_SCRIPTING>"<<=" {
1407 return T_SL_EQUAL;
1408 }
1409
1410 <ST_IN_SCRIPTING>">>=" {
1411 return T_SR_EQUAL;
1412 }
1413
1414 <ST_IN_SCRIPTING>"&=" {
1415 return T_AND_EQUAL;
1416 }
1417
1418 <ST_IN_SCRIPTING>"|=" {
1419 return T_OR_EQUAL;
1420 }
1421
1422 <ST_IN_SCRIPTING>"^=" {
1423 return T_XOR_EQUAL;
1424 }
1425
1426 <ST_IN_SCRIPTING>"||" {
1427 return T_BOOLEAN_OR;
1428 }
1429
1430 <ST_IN_SCRIPTING>"&&" {
1431 return T_BOOLEAN_AND;
1432 }
1433
1434 <ST_IN_SCRIPTING>"OR" {
1435 return T_LOGICAL_OR;
1436 }
1437
1438 <ST_IN_SCRIPTING>"AND" {
1439 return T_LOGICAL_AND;
1440 }
1441
1442 <ST_IN_SCRIPTING>"XOR" {
1443 return T_LOGICAL_XOR;
1444 }
1445
1446 <ST_IN_SCRIPTING>"<<" {
1447 return T_SL;
1448 }
1449
1450 <ST_IN_SCRIPTING>">>" {
1451 return T_SR;
1452 }
1453
1454 <ST_IN_SCRIPTING>{TOKENS} {
1455 return yytext[0];
1456 }
1457
1458
1459 <ST_IN_SCRIPTING>"{" {
1460 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1461 return '{';
1462 }
1463
1464
1465 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1466 yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1467 return T_DOLLAR_OPEN_CURLY_BRACES;
1468 }
1469
1470
1471 <ST_IN_SCRIPTING>"}" {
1472 RESET_DOC_COMMENT();
1473 if (!zend_stack_is_empty(&SCNG(state_stack))) {
1474 yy_pop_state(TSRMLS_C);
1475 }
1476 return '}';
1477 }
1478
1479
1480 <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1481 yyless(yyleng - 1);
1482 zend_copy_value(zendlval, yytext, yyleng);
1483 zendlval->type = IS_STRING;
1484 yy_pop_state(TSRMLS_C);
1485 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1486 return T_STRING_VARNAME;
1487 }
1488
1489
1490 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1491 yyless(0);
1492 yy_pop_state(TSRMLS_C);
1493 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1494 goto restart;
1495 }
1496
1497 <ST_IN_SCRIPTING>{BNUM} {
1498 char *bin = yytext + 2; /* Skip "0b" */
1499 int len = yyleng - 2;
1500
1501 /* Skip any leading 0s */
1502 while (*bin == '0') {
1503 ++bin;
1504 --len;
1505 }
1506
1507 if (len < SIZEOF_LONG * 8) {
1508 if (len == 0) {
1509 zendlval->value.lval = 0;
1510 } else {
1511 zendlval->value.lval = strtol(bin, NULL, 2);
1512 }
1513 zendlval->type = IS_LONG;
1514 return T_LNUMBER;
1515 } else {
1516 zendlval->value.dval = zend_bin_strtod(bin, NULL);
1517 zendlval->type = IS_DOUBLE;
1518 return T_DNUMBER;
1519 }
1520 }
1521
1522 <ST_IN_SCRIPTING>{LNUM} {
1523 if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1524 zendlval->value.lval = strtol(yytext, NULL, 0);
1525 } else {
1526 errno = 0;
1527 zendlval->value.lval = strtol(yytext, NULL, 0);
1528 if (errno == ERANGE) { /* Overflow */
1529 if (yytext[0] == '0') { /* octal overflow */
1530 zendlval->value.dval = zend_oct_strtod(yytext, NULL);
1531 } else {
1532 zendlval->value.dval = zend_strtod(yytext, NULL);
1533 }
1534 zendlval->type = IS_DOUBLE;
1535 return T_DNUMBER;
1536 }
1537 }
1538
1539 zendlval->type = IS_LONG;
1540 return T_LNUMBER;
1541 }
1542
1543 <ST_IN_SCRIPTING>{HNUM} {
1544 char *hex = yytext + 2; /* Skip "0x" */
1545 int len = yyleng - 2;
1546
1547 /* Skip any leading 0s */
1548 while (*hex == '0') {
1549 hex++;
1550 len--;
1551 }
1552
1553 if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1554 if (len == 0) {
1555 zendlval->value.lval = 0;
1556 } else {
1557 zendlval->value.lval = strtol(hex, NULL, 16);
1558 }
1559 zendlval->type = IS_LONG;
1560 return T_LNUMBER;
1561 } else {
1562 zendlval->value.dval = zend_hex_strtod(hex, NULL);
1563 zendlval->type = IS_DOUBLE;
1564 return T_DNUMBER;
1565 }
1566 }
1567
1568 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1569 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1570 zendlval->value.lval = strtol(yytext, NULL, 10);
1571 zendlval->type = IS_LONG;
1572 } else {
1573 zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1574 zendlval->value.str.len = yyleng;
1575 zendlval->type = IS_STRING;
1576 }
1577 return T_NUM_STRING;
1578 }
1579
1580 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1581 zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1582 zendlval->value.str.len = yyleng;
1583 zendlval->type = IS_STRING;
1584 return T_NUM_STRING;
1585 }
1586
1587 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1588 zendlval->value.dval = zend_strtod(yytext, NULL);
1589 zendlval->type = IS_DOUBLE;
1590 return T_DNUMBER;
1591 }
1592
1593 <ST_IN_SCRIPTING>"__CLASS__" {
1594 const char *class_name = NULL;
1595
1596 if (CG(active_class_entry)
1597 && (ZEND_ACC_TRAIT ==
1598 (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1599 /* We create a special __CLASS__ constant that is going to be resolved
1600 at run-time */
1601 zendlval->value.str.len = sizeof("__CLASS__")-1;
1602 zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len);
1603 zendlval->type = IS_CONSTANT;
1604 } else {
1605 if (CG(active_class_entry)) {
1606 class_name = CG(active_class_entry)->name;
1607 }
1608
1609 if (!class_name) {
1610 class_name = "";
1611 }
1612
1613 zendlval->value.str.len = strlen(class_name);
1614 zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
1615 zendlval->type = IS_STRING;
1616 }
1617 return T_CLASS_C;
1618 }
1619
1620 <ST_IN_SCRIPTING>"__TRAIT__" {
1621 const char *trait_name = NULL;
1622
1623 if (CG(active_class_entry)
1624 && (ZEND_ACC_TRAIT ==
1625 (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1626 trait_name = CG(active_class_entry)->name;
1627 }
1628
1629 if (!trait_name) {
1630 trait_name = "";
1631 }
1632
1633 zendlval->value.str.len = strlen(trait_name);
1634 zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
1635 zendlval->type = IS_STRING;
1636
1637 return T_TRAIT_C;
1638 }
1639
1640 <ST_IN_SCRIPTING>"__FUNCTION__" {
1641 const char *func_name = NULL;
1642
1643 if (CG(active_op_array)) {
1644 func_name = CG(active_op_array)->function_name;
1645 }
1646
1647 if (!func_name) {
1648 func_name = "";
1649 }
1650 zendlval->value.str.len = strlen(func_name);
1651 zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
1652 zendlval->type = IS_STRING;
1653 return T_FUNC_C;
1654 }
1655
1656 <ST_IN_SCRIPTING>"__METHOD__" {
1657 const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1658 const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1659 size_t len = 0;
1660
1661 if (class_name) {
1662 len += strlen(class_name) + 2;
1663 }
1664 if (func_name) {
1665 len += strlen(func_name);
1666 }
1667
1668 zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
1669 class_name ? class_name : "",
1670 class_name && func_name ? "::" : "",
1671 func_name ? func_name : ""
1672 );
1673 zendlval->type = IS_STRING;
1674 return T_METHOD_C;
1675 }
1676
1677 <ST_IN_SCRIPTING>"__LINE__" {
1678 zendlval->value.lval = CG(zend_lineno);
1679 zendlval->type = IS_LONG;
1680 return T_LINE;
1681 }
1682
1683 <ST_IN_SCRIPTING>"__FILE__" {
1684 char *filename = zend_get_compiled_filename(TSRMLS_C);
1685
1686 if (!filename) {
1687 filename = "";
1688 }
1689 zendlval->value.str.len = strlen(filename);
1690 zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
1691 zendlval->type = IS_STRING;
1692 return T_FILE;
1693 }
1694
1695 <ST_IN_SCRIPTING>"__DIR__" {
1696 char *filename = zend_get_compiled_filename(TSRMLS_C);
1697 const size_t filename_len = strlen(filename);
1698 char *dirname;
1699
1700 if (!filename) {
1701 filename = "";
1702 }
1703
1704 dirname = estrndup(filename, filename_len);
1705 zend_dirname(dirname, filename_len);
1706
1707 if (strcmp(dirname, ".") == 0) {
1708 dirname = erealloc(dirname, MAXPATHLEN);
1709 #if HAVE_GETCWD
1710 VCWD_GETCWD(dirname, MAXPATHLEN);
1711 #elif HAVE_GETWD
1712 VCWD_GETWD(dirname);
1713 #endif
1714 }
1715
1716 zendlval->value.str.len = strlen(dirname);
1717 zendlval->value.str.val = dirname;
1718 zendlval->type = IS_STRING;
1719 return T_DIR;
1720 }
1721
1722 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1723 if (CG(current_namespace)) {
1724 *zendlval = *CG(current_namespace);
1725 zval_copy_ctor(zendlval);
1726 } else {
1727 ZVAL_EMPTY_STRING(zendlval);
1728 }
1729 return T_NS_C;
1730 }
1731
1732 <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1733 YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1734
1735 if (bracket != SCNG(yy_text)) {
1736 /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1737 YYCURSOR = bracket;
1738 goto inline_html;
1739 }
1740
1741 HANDLE_NEWLINES(yytext, yyleng);
1742 zendlval->value.str.val = yytext; /* no copying - intentional */
1743 zendlval->value.str.len = yyleng;
1744 zendlval->type = IS_STRING;
1745 BEGIN(ST_IN_SCRIPTING);
1746 return T_OPEN_TAG;
1747 }
1748
1749
1750 <INITIAL>"<%=" {
1751 if (CG(asp_tags)) {
1752 zendlval->value.str.val = yytext; /* no copying - intentional */
1753 zendlval->value.str.len = yyleng;
1754 zendlval->type = IS_STRING;
1755 BEGIN(ST_IN_SCRIPTING);
1756 return T_OPEN_TAG_WITH_ECHO;
1757 } else {
1758 goto inline_char_handler;
1759 }
1760 }
1761
1762
1763 <INITIAL>"<?=" {
1764 zendlval->value.str.val = yytext; /* no copying - intentional */
1765 zendlval->value.str.len = yyleng;
1766 zendlval->type = IS_STRING;
1767 BEGIN(ST_IN_SCRIPTING);
1768 return T_OPEN_TAG_WITH_ECHO;
1769 }
1770
1771
1772 <INITIAL>"<%" {
1773 if (CG(asp_tags)) {
1774 zendlval->value.str.val = yytext; /* no copying - intentional */
1775 zendlval->value.str.len = yyleng;
1776 zendlval->type = IS_STRING;
1777 BEGIN(ST_IN_SCRIPTING);
1778 return T_OPEN_TAG;
1779 } else {
1780 goto inline_char_handler;
1781 }
1782 }
1783
1784
1785 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1786 zendlval->value.str.val = yytext; /* no copying - intentional */
1787 zendlval->value.str.len = yyleng;
1788 zendlval->type = IS_STRING;
1789 HANDLE_NEWLINE(yytext[yyleng-1]);
1790 BEGIN(ST_IN_SCRIPTING);
1791 return T_OPEN_TAG;
1792 }
1793
1794
1795 <INITIAL>"<?" {
1796 if (CG(short_tags)) {
1797 zendlval->value.str.val = yytext; /* no copying - intentional */
1798 zendlval->value.str.len = yyleng;
1799 zendlval->type = IS_STRING;
1800 BEGIN(ST_IN_SCRIPTING);
1801 return T_OPEN_TAG;
1802 } else {
1803 goto inline_char_handler;
1804 }
1805 }
1806
1807 <INITIAL>{ANY_CHAR} {
1808 if (YYCURSOR > YYLIMIT) {
1809 return 0;
1810 }
1811
1812 inline_char_handler:
1813
1814 while (1) {
1815 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1816
1817 YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1818
1819 if (YYCURSOR < YYLIMIT) {
1820 switch (*YYCURSOR) {
1821 case '?':
1822 if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1823 break;
1824 }
1825 continue;
1826 case '%':
1827 if (CG(asp_tags)) {
1828 break;
1829 }
1830 continue;
1831 case 's':
1832 case 'S':
1833 /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1834 * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1835 YYCURSOR--;
1836 yymore();
1837 default:
1838 continue;
1839 }
1840
1841 YYCURSOR--;
1842 }
1843
1844 break;
1845 }
1846
1847 inline_html:
1848 yyleng = YYCURSOR - SCNG(yy_text);
1849
1850 if (SCNG(output_filter)) {
1851 int readsize;
1852 size_t sz = 0;
1853 readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1854 zendlval->value.str.len = sz;
1855 if (readsize < yyleng) {
1856 yyless(readsize);
1857 }
1858 } else {
1859 zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
1860 zendlval->value.str.len = yyleng;
1861 }
1862 zendlval->type = IS_STRING;
1863 HANDLE_NEWLINES(yytext, yyleng);
1864 return T_INLINE_HTML;
1865 }
1866
1867
1868 /* Make sure a label character follows "->", otherwise there is no property
1869 * and "->" will be taken literally
1870 */
1871 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1872 yyless(yyleng - 3);
1873 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1874 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1875 zendlval->type = IS_STRING;
1876 return T_VARIABLE;
1877 }
1878
1879 /* A [ always designates a variable offset, regardless of what follows
1880 */
1881 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1882 yyless(yyleng - 1);
1883 yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1884 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1885 zendlval->type = IS_STRING;
1886 return T_VARIABLE;
1887 }
1888
1889 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1890 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1891 zendlval->type = IS_STRING;
1892 return T_VARIABLE;
1893 }
1894
1895 <ST_VAR_OFFSET>"]" {
1896 yy_pop_state(TSRMLS_C);
1897 return ']';
1898 }
1899
1900 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1901 /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1902 return yytext[0];
1903 }
1904
1905 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1906 /* Invalid rule to return a more explicit parse error with proper line number */
1907 yyless(0);
1908 yy_pop_state(TSRMLS_C);
1909 return T_ENCAPSED_AND_WHITESPACE;
1910 }
1911
1912 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1913 zend_copy_value(zendlval, yytext, yyleng);
1914 zendlval->type = IS_STRING;
1915 return T_STRING;
1916 }
1917
1918
1919 <ST_IN_SCRIPTING>"#"|"//" {
1920 while (YYCURSOR < YYLIMIT) {
1921 switch (*YYCURSOR++) {
1922 case '\r':
1923 if (*YYCURSOR == '\n') {
1924 YYCURSOR++;
1925 }
1926 /* fall through */
1927 case '\n':
1928 CG(zend_lineno)++;
1929 break;
1930 case '%':
1931 if (!CG(asp_tags)) {
1932 continue;
1933 }
1934 /* fall through */
1935 case '?':
1936 if (*YYCURSOR == '>') {
1937 YYCURSOR--;
1938 break;
1939 }
1940 /* fall through */
1941 default:
1942 continue;
1943 }
1944
1945 break;
1946 }
1947
1948 yyleng = YYCURSOR - SCNG(yy_text);
1949
1950 return T_COMMENT;
1951 }
1952
1953 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1954 int doc_com;
1955
1956 if (yyleng > 2) {
1957 doc_com = 1;
1958 RESET_DOC_COMMENT();
1959 } else {
1960 doc_com = 0;
1961 }
1962
1963 while (YYCURSOR < YYLIMIT) {
1964 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1965 break;
1966 }
1967 }
1968
1969 if (YYCURSOR < YYLIMIT) {
1970 YYCURSOR++;
1971 } else {
1972 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1973 }
1974
1975 yyleng = YYCURSOR - SCNG(yy_text);
1976 HANDLE_NEWLINES(yytext, yyleng);
1977
1978 if (doc_com) {
1979 CG(doc_comment) = estrndup(yytext, yyleng);
1980 CG(doc_comment_len) = yyleng;
1981 return T_DOC_COMMENT;
1982 }
1983
1984 return T_COMMENT;
1985 }
1986
1987 <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1988 zendlval->value.str.val = yytext; /* no copying - intentional */
1989 zendlval->value.str.len = yyleng;
1990 zendlval->type = IS_STRING;
1991 BEGIN(INITIAL);
1992 return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1993 }
1994
1995
1996 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1997 if (CG(asp_tags)) {
1998 BEGIN(INITIAL);
1999 zendlval->value.str.len = yyleng;
2000 zendlval->type = IS_STRING;
2001 zendlval->value.str.val = yytext; /* no copying - intentional */
2002 return T_CLOSE_TAG; /* implicit ';' at php-end tag */
2003 } else {
2004 yyless(1);
2005 return yytext[0];
2006 }
2007 }
2008
2009
2010 <ST_IN_SCRIPTING>b?['] {
2011 register char *s, *t;
2012 char *end;
2013 int bprefix = (yytext[0] != '\'') ? 1 : 0;
2014
2015 while (1) {
2016 if (YYCURSOR < YYLIMIT) {
2017 if (*YYCURSOR == '\'') {
2018 YYCURSOR++;
2019 yyleng = YYCURSOR - SCNG(yy_text);
2020
2021 break;
2022 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2023 YYCURSOR++;
2024 }
2025 } else {
2026 yyleng = YYLIMIT - SCNG(yy_text);
2027
2028 /* Unclosed single quotes; treat similar to double quotes, but without a separate token
2029 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2030 * rule, which continued in ST_IN_SCRIPTING state after the quote */
2031 return T_ENCAPSED_AND_WHITESPACE;
2032 }
2033 }
2034
2035 zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
2036 zendlval->value.str.len = yyleng-bprefix-2;
2037 zendlval->type = IS_STRING;
2038
2039 /* convert escape sequences */
2040 s = t = zendlval->value.str.val;
2041 end = s+zendlval->value.str.len;
2042 while (s<end) {
2043 if (*s=='\\') {
2044 s++;
2045
2046 switch(*s) {
2047 case '\\':
2048 case '\'':
2049 *t++ = *s;
2050 zendlval->value.str.len--;
2051 break;
2052 default:
2053 *t++ = '\\';
2054 *t++ = *s;
2055 break;
2056 }
2057 } else {
2058 *t++ = *s;
2059 }
2060
2061 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2062 CG(zend_lineno)++;
2063 }
2064 s++;
2065 }
2066 *t = 0;
2067
2068 if (SCNG(output_filter)) {
2069 size_t sz = 0;
2070 s = zendlval->value.str.val;
2071 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
2072 zendlval->value.str.len = sz;
2073 efree(s);
2074 }
2075 return T_CONSTANT_ENCAPSED_STRING;
2076 }
2077
2078
2079 <ST_IN_SCRIPTING>b?["] {
2080 int bprefix = (yytext[0] != '"') ? 1 : 0;
2081
2082 while (YYCURSOR < YYLIMIT) {
2083 switch (*YYCURSOR++) {
2084 case '"':
2085 yyleng = YYCURSOR - SCNG(yy_text);
2086 zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2087 return T_CONSTANT_ENCAPSED_STRING;
2088 case '$':
2089 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2090 break;
2091 }
2092 continue;
2093 case '{':
2094 if (*YYCURSOR == '$') {
2095 break;
2096 }
2097 continue;
2098 case '\\':
2099 if (YYCURSOR < YYLIMIT) {
2100 YYCURSOR++;
2101 }
2102 /* fall through */
2103 default:
2104 continue;
2105 }
2106
2107 YYCURSOR--;
2108 break;
2109 }
2110
2111 /* Remember how much was scanned to save rescanning */
2112 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2113
2114 YYCURSOR = SCNG(yy_text) + yyleng;
2115
2116 BEGIN(ST_DOUBLE_QUOTES);
2117 return '"';
2118 }
2119
2120
2121 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2122 char *s;
2123 int bprefix = (yytext[0] != '<') ? 1 : 0;
2124 zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2125
2126 CG(zend_lineno)++;
2127 heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2128 s = yytext+bprefix+3;
2129 while ((*s == ' ') || (*s == '\t')) {
2130 s++;
2131 heredoc_label->length--;
2132 }
2133
2134 if (*s == '\'') {
2135 s++;
2136 heredoc_label->length -= 2;
2137
2138 BEGIN(ST_NOWDOC);
2139 } else {
2140 if (*s == '"') {
2141 s++;
2142 heredoc_label->length -= 2;
2143 }
2144
2145 BEGIN(ST_HEREDOC);
2146 }
2147
2148 heredoc_label->label = estrndup(s, heredoc_label->length);
2149
2150 /* Check for ending label on the next line */
2151 if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2152 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2153
2154 if (*end == ';') {
2155 end++;
2156 }
2157
2158 if (*end == '\n' || *end == '\r') {
2159 BEGIN(ST_END_HEREDOC);
2160 }
2161 }
2162
2163 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2164
2165 return T_START_HEREDOC;
2166 }
2167
2168
2169 <ST_IN_SCRIPTING>[`] {
2170 BEGIN(ST_BACKQUOTE);
2171 return '`';
2172 }
2173
2174
2175 <ST_END_HEREDOC>{ANY_CHAR} {
2176 zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2177
2178 YYCURSOR += heredoc_label->length - 1;
2179 yyleng = heredoc_label->length;
2180
2181 heredoc_label_dtor(heredoc_label);
2182 efree(heredoc_label);
2183
2184 BEGIN(ST_IN_SCRIPTING);
2185 return T_END_HEREDOC;
2186 }
2187
2188
2189 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2190 zendlval->value.lval = (long) '{';
2191 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2192 yyless(1);
2193 return T_CURLY_OPEN;
2194 }
2195
2196
2197 <ST_DOUBLE_QUOTES>["] {
2198 BEGIN(ST_IN_SCRIPTING);
2199 return '"';
2200 }
2201
2202 <ST_BACKQUOTE>[`] {
2203 BEGIN(ST_IN_SCRIPTING);
2204 return '`';
2205 }
2206
2207
2208 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2209 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2210 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2211 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2212
2213 goto double_quotes_scan_done;
2214 }
2215
2216 if (YYCURSOR > YYLIMIT) {
2217 return 0;
2218 }
2219 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2220 YYCURSOR++;
2221 }
2222
2223 while (YYCURSOR < YYLIMIT) {
2224 switch (*YYCURSOR++) {
2225 case '"':
2226 break;
2227 case '$':
2228 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2229 break;
2230 }
2231 continue;
2232 case '{':
2233 if (*YYCURSOR == '$') {
2234 break;
2235 }
2236 continue;
2237 case '\\':
2238 if (YYCURSOR < YYLIMIT) {
2239 YYCURSOR++;
2240 }
2241 /* fall through */
2242 default:
2243 continue;
2244 }
2245
2246 YYCURSOR--;
2247 break;
2248 }
2249
2250 double_quotes_scan_done:
2251 yyleng = YYCURSOR - SCNG(yy_text);
2252
2253 zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2254 return T_ENCAPSED_AND_WHITESPACE;
2255 }
2256
2257
2258 <ST_BACKQUOTE>{ANY_CHAR} {
2259 if (YYCURSOR > YYLIMIT) {
2260 return 0;
2261 }
2262 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2263 YYCURSOR++;
2264 }
2265
2266 while (YYCURSOR < YYLIMIT) {
2267 switch (*YYCURSOR++) {
2268 case '`':
2269 break;
2270 case '$':
2271 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2272 break;
2273 }
2274 continue;
2275 case '{':
2276 if (*YYCURSOR == '$') {
2277 break;
2278 }
2279 continue;
2280 case '\\':
2281 if (YYCURSOR < YYLIMIT) {
2282 YYCURSOR++;
2283 }
2284 /* fall through */
2285 default:
2286 continue;
2287 }
2288
2289 YYCURSOR--;
2290 break;
2291 }
2292
2293 yyleng = YYCURSOR - SCNG(yy_text);
2294
2295 zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2296 return T_ENCAPSED_AND_WHITESPACE;
2297 }
2298
2299
2300 <ST_HEREDOC>{ANY_CHAR} {
2301 int newline = 0;
2302
2303 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2304
2305 if (YYCURSOR > YYLIMIT) {
2306 return 0;
2307 }
2308
2309 YYCURSOR--;
2310
2311 while (YYCURSOR < YYLIMIT) {
2312 switch (*YYCURSOR++) {
2313 case '\r':
2314 if (*YYCURSOR == '\n') {
2315 YYCURSOR++;
2316 }
2317 /* fall through */
2318 case '\n':
2319 /* Check for ending label on the next line */
2320 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2321 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2322
2323 if (*end == ';') {
2324 end++;
2325 }
2326
2327 if (*end == '\n' || *end == '\r') {
2328 /* newline before label will be subtracted from returned text, but
2329 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2330 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2331 newline = 2; /* Windows newline */
2332 } else {
2333 newline = 1;
2334 }
2335
2336 CG(increment_lineno) = 1; /* For newline before label */
2337 BEGIN(ST_END_HEREDOC);
2338
2339 goto heredoc_scan_done;
2340 }
2341 }
2342 continue;
2343 case '$':
2344 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2345 break;
2346 }
2347 continue;
2348 case '{':
2349 if (*YYCURSOR == '$') {
2350 break;
2351 }
2352 continue;
2353 case '\\':
2354 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2355 YYCURSOR++;
2356 }
2357 /* fall through */
2358 default:
2359 continue;
2360 }
2361
2362 YYCURSOR--;
2363 break;
2364 }
2365
2366 heredoc_scan_done:
2367 yyleng = YYCURSOR - SCNG(yy_text);
2368
2369 zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2370 return T_ENCAPSED_AND_WHITESPACE;
2371 }
2372
2373
2374 <ST_NOWDOC>{ANY_CHAR} {
2375 int newline = 0;
2376
2377 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2378
2379 if (YYCURSOR > YYLIMIT) {
2380 return 0;
2381 }
2382
2383 YYCURSOR--;
2384
2385 while (YYCURSOR < YYLIMIT) {
2386 switch (*YYCURSOR++) {
2387 case '\r':
2388 if (*YYCURSOR == '\n') {
2389 YYCURSOR++;
2390 }
2391 /* fall through */
2392 case '\n':
2393 /* Check for ending label on the next line */
2394 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2395 YYCTYPE *end = YYCURSOR + heredoc_label->length;
2396
2397 if (*end == ';') {
2398 end++;
2399 }
2400
2401 if (*end == '\n' || *end == '\r') {
2402 /* newline before label will be subtracted from returned text, but
2403 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2404 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2405 newline = 2; /* Windows newline */
2406 } else {
2407 newline = 1;
2408 }
2409
2410 CG(increment_lineno) = 1; /* For newline before label */
2411 BEGIN(ST_END_HEREDOC);
2412
2413 goto nowdoc_scan_done;
2414 }
2415 }
2416 /* fall through */
2417 default:
2418 continue;
2419 }
2420 }
2421
2422 nowdoc_scan_done:
2423 yyleng = YYCURSOR - SCNG(yy_text);
2424
2425 zend_copy_value(zendlval, yytext, yyleng - newline);
2426 zendlval->type = IS_STRING;
2427 HANDLE_NEWLINES(yytext, yyleng - newline);
2428 return T_ENCAPSED_AND_WHITESPACE;
2429 }
2430
2431
2432 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2433 if (YYCURSOR > YYLIMIT) {
2434 return 0;
2435 }
2436
2437 zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2438 goto restart;
2439 }
2440
2441 */
2442 }
2443