1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Marcus Boerger <helly@php.net> |
16 | Nuno Lopes <nlopess@php.net> |
17 | Scott MacVicar <scottmac@php.net> |
18 | Flex version authors: |
19 | Andi Gutmans <andi@zend.com> |
20 | Zeev Suraski <zeev@zend.com> |
21 +----------------------------------------------------------------------+
22 */
23
24 /* $Id$ */
25
26 #if 0
27 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28 #else
29 # define YYDEBUG(s, c)
30 #endif
31
32 #include "zend_language_scanner_defs.h"
33
34 #include <errno.h>
35 #include "zend.h"
36 #ifdef PHP_WIN32
37 # include <Winuser.h>
38 #endif
39 #include "zend_alloc.h"
40 #include <zend_language_parser.h>
41 #include "zend_compile.h"
42 #include "zend_language_scanner.h"
43 #include "zend_highlight.h"
44 #include "zend_constants.h"
45 #include "zend_variables.h"
46 #include "zend_operators.h"
47 #include "zend_API.h"
48 #include "zend_strtod.h"
49 #include "zend_exceptions.h"
50 #include "tsrm_virtual_cwd.h"
51 #include "tsrm_config_common.h"
52
53 #define YYCTYPE unsigned char
54 #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55 #define YYCURSOR SCNG(yy_cursor)
56 #define YYLIMIT SCNG(yy_limit)
57 #define YYMARKER SCNG(yy_marker)
58
59 #define YYGETCONDITION() SCNG(yy_state)
60 #define YYSETCONDITION(s) SCNG(yy_state) = s
61
62 #define STATE(name) yyc##name
63
64 /* emulate flex constructs */
65 #define BEGIN(state) YYSETCONDITION(STATE(state))
66 #define YYSTATE YYGETCONDITION()
67 #define yytext ((char*)SCNG(yy_text))
68 #define yyleng SCNG(yy_leng)
69 #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
70 yyleng = (unsigned int)x; } while(0)
71 #define yymore() goto yymore_restart
72
73 /* perform sanity check. If this message is triggered you should
74 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75 /*!max:re2c */
76 #if ZEND_MMAP_AHEAD < YYMAXFILL
77 # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78 #endif
79
80 #ifdef HAVE_STDARG_H
81 # include <stdarg.h>
82 #endif
83
84 #ifdef HAVE_UNISTD_H
85 # include <unistd.h>
86 #endif
87
88 /* Globals Macros */
89 #define SCNG LANG_SCNG
90 #ifdef ZTS
91 ZEND_API ts_rsrc_id language_scanner_globals_id;
92 #else
93 ZEND_API zend_php_scanner_globals language_scanner_globals;
94 #endif
95
96 #define HANDLE_NEWLINES(s, l) \
97 do { \
98 char *p = (s), *boundary = p+(l); \
99 \
100 while (p<boundary) { \
101 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
102 CG(zend_lineno)++; \
103 } \
104 p++; \
105 } \
106 } while (0)
107
108 #define HANDLE_NEWLINE(c) \
109 { \
110 if (c == '\n' || c == '\r') { \
111 CG(zend_lineno)++; \
112 } \
113 }
114
115 /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116 #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117 #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
118
119 #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121 #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
122 #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
BEGIN_EXTERN_C()124 BEGIN_EXTERN_C()
125
126 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127 {
128 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131 }
132
encoding_filter_script_to_intermediate(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)133 static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134 {
135 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136 }
137
encoding_filter_intermediate_to_script(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)138 static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139 {
140 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141 LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142 }
143
encoding_filter_intermediate_to_internal(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length TSRMLS_DC)144 static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145 {
146 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148 return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149 internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150 }
151
152
_yy_push_state(int new_state TSRMLS_DC)153 static void _yy_push_state(int new_state TSRMLS_DC)
154 {
155 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156 YYSETCONDITION(new_state);
157 }
158
159 #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
yy_pop_state(TSRMLS_D)161 static void yy_pop_state(TSRMLS_D)
162 {
163 int *stack_state;
164 zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165 YYSETCONDITION(*stack_state);
166 zend_stack_del_top(&SCNG(state_stack));
167 }
168
yy_scan_buffer(char * str,unsigned int len TSRMLS_DC)169 static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170 {
171 YYCURSOR = (YYCTYPE*)str;
172 YYLIMIT = YYCURSOR + len;
173 if (!SCNG(yy_start)) {
174 SCNG(yy_start) = YYCURSOR;
175 }
176 }
177
startup_scanner(TSRMLS_D)178 void startup_scanner(TSRMLS_D)
179 {
180 CG(parse_error) = 0;
181 CG(heredoc) = NULL;
182 CG(heredoc_len) = 0;
183 CG(doc_comment) = NULL;
184 CG(doc_comment_len) = 0;
185 zend_stack_init(&SCNG(state_stack));
186 }
187
shutdown_scanner(TSRMLS_D)188 void shutdown_scanner(TSRMLS_D)
189 {
190 if (CG(heredoc)) {
191 efree(CG(heredoc));
192 CG(heredoc_len)=0;
193 }
194 CG(parse_error) = 0;
195 zend_stack_destroy(&SCNG(state_stack));
196 RESET_DOC_COMMENT();
197 }
198
zend_save_lexical_state(zend_lex_state * lex_state TSRMLS_DC)199 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
200 {
201 lex_state->yy_leng = SCNG(yy_leng);
202 lex_state->yy_start = SCNG(yy_start);
203 lex_state->yy_text = SCNG(yy_text);
204 lex_state->yy_cursor = SCNG(yy_cursor);
205 lex_state->yy_marker = SCNG(yy_marker);
206 lex_state->yy_limit = SCNG(yy_limit);
207
208 lex_state->state_stack = SCNG(state_stack);
209 zend_stack_init(&SCNG(state_stack));
210
211 lex_state->in = SCNG(yy_in);
212 lex_state->yy_state = YYSTATE;
213 lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
214 lex_state->lineno = CG(zend_lineno);
215
216 lex_state->script_org = SCNG(script_org);
217 lex_state->script_org_size = SCNG(script_org_size);
218 lex_state->script_filtered = SCNG(script_filtered);
219 lex_state->script_filtered_size = SCNG(script_filtered_size);
220 lex_state->input_filter = SCNG(input_filter);
221 lex_state->output_filter = SCNG(output_filter);
222 lex_state->script_encoding = SCNG(script_encoding);
223 }
224
zend_restore_lexical_state(zend_lex_state * lex_state TSRMLS_DC)225 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
226 {
227 SCNG(yy_leng) = lex_state->yy_leng;
228 SCNG(yy_start) = lex_state->yy_start;
229 SCNG(yy_text) = lex_state->yy_text;
230 SCNG(yy_cursor) = lex_state->yy_cursor;
231 SCNG(yy_marker) = lex_state->yy_marker;
232 SCNG(yy_limit) = lex_state->yy_limit;
233
234 zend_stack_destroy(&SCNG(state_stack));
235 SCNG(state_stack) = lex_state->state_stack;
236
237 SCNG(yy_in) = lex_state->in;
238 YYSETCONDITION(lex_state->yy_state);
239 CG(zend_lineno) = lex_state->lineno;
240 zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
241
242 if (SCNG(script_filtered)) {
243 efree(SCNG(script_filtered));
244 SCNG(script_filtered) = NULL;
245 }
246 SCNG(script_org) = lex_state->script_org;
247 SCNG(script_org_size) = lex_state->script_org_size;
248 SCNG(script_filtered) = lex_state->script_filtered;
249 SCNG(script_filtered_size) = lex_state->script_filtered_size;
250 SCNG(input_filter) = lex_state->input_filter;
251 SCNG(output_filter) = lex_state->output_filter;
252 SCNG(script_encoding) = lex_state->script_encoding;
253
254 if (CG(heredoc)) {
255 efree(CG(heredoc));
256 CG(heredoc) = NULL;
257 CG(heredoc_len) = 0;
258 }
259 RESET_DOC_COMMENT();
260 }
261
zend_destroy_file_handle(zend_file_handle * file_handle TSRMLS_DC)262 ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
263 {
264 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
265 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
266 file_handle->opened_path = NULL;
267 if (file_handle->free_filename) {
268 file_handle->filename = NULL;
269 }
270 }
271
272 #define BOM_UTF32_BE "\x00\x00\xfe\xff"
273 #define BOM_UTF32_LE "\xff\xfe\x00\x00"
274 #define BOM_UTF16_BE "\xfe\xff"
275 #define BOM_UTF16_LE "\xff\xfe"
276 #define BOM_UTF8 "\xef\xbb\xbf"
277
zend_multibyte_detect_utf_encoding(const unsigned char * script,size_t script_size TSRMLS_DC)278 static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
279 {
280 const unsigned char *p;
281 int wchar_size = 2;
282 int le = 0;
283
284 /* utf-16 or utf-32? */
285 p = script;
286 while ((p-script) < script_size) {
287 p = memchr(p, 0, script_size-(p-script)-2);
288 if (!p) {
289 break;
290 }
291 if (*(p+1) == '\0' && *(p+2) == '\0') {
292 wchar_size = 4;
293 break;
294 }
295
296 /* searching for UTF-32 specific byte orders, so this will do */
297 p += 4;
298 }
299
300 /* BE or LE? */
301 p = script;
302 while ((p-script) < script_size) {
303 if (*p == '\0' && *(p+wchar_size-1) != '\0') {
304 /* BE */
305 le = 0;
306 break;
307 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
308 /* LE* */
309 le = 1;
310 break;
311 }
312 p += wchar_size;
313 }
314
315 if (wchar_size == 2) {
316 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
317 } else {
318 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
319 }
320
321 return NULL;
322 }
323
zend_multibyte_detect_unicode(TSRMLS_D)324 static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
325 {
326 const zend_encoding *script_encoding = NULL;
327 int bom_size;
328 unsigned char *pos1, *pos2;
329
330 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
331 return NULL;
332 }
333
334 /* check out BOM */
335 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
336 script_encoding = zend_multibyte_encoding_utf32be;
337 bom_size = sizeof(BOM_UTF32_BE)-1;
338 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
339 script_encoding = zend_multibyte_encoding_utf32le;
340 bom_size = sizeof(BOM_UTF32_LE)-1;
341 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
342 script_encoding = zend_multibyte_encoding_utf16be;
343 bom_size = sizeof(BOM_UTF16_BE)-1;
344 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
345 script_encoding = zend_multibyte_encoding_utf16le;
346 bom_size = sizeof(BOM_UTF16_LE)-1;
347 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
348 script_encoding = zend_multibyte_encoding_utf8;
349 bom_size = sizeof(BOM_UTF8)-1;
350 }
351
352 if (script_encoding) {
353 /* remove BOM */
354 LANG_SCNG(script_org) += bom_size;
355 LANG_SCNG(script_org_size) -= bom_size;
356
357 return script_encoding;
358 }
359
360 /* script contains NULL bytes -> auto-detection */
361 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
362 /* check if the NULL byte is after the __HALT_COMPILER(); */
363 pos2 = LANG_SCNG(script_org);
364
365 while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
366 pos2 = memchr(pos2, '_', pos1 - pos2);
367 if (!pos2) break;
368 pos2++;
369 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
370 pos2 += sizeof("_HALT_COMPILER")-1;
371 while (*pos2 == ' ' ||
372 *pos2 == '\t' ||
373 *pos2 == '\r' ||
374 *pos2 == '\n') {
375 pos2++;
376 }
377 if (*pos2 == '(') {
378 pos2++;
379 while (*pos2 == ' ' ||
380 *pos2 == '\t' ||
381 *pos2 == '\r' ||
382 *pos2 == '\n') {
383 pos2++;
384 }
385 if (*pos2 == ')') {
386 pos2++;
387 while (*pos2 == ' ' ||
388 *pos2 == '\t' ||
389 *pos2 == '\r' ||
390 *pos2 == '\n') {
391 pos2++;
392 }
393 if (*pos2 == ';') {
394 return NULL;
395 }
396 }
397 }
398 }
399 }
400 /* make best effort if BOM is missing */
401 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
402 }
403
404 return NULL;
405 }
406
zend_multibyte_find_script_encoding(TSRMLS_D)407 static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
408 {
409 const zend_encoding *script_encoding;
410
411 if (CG(detect_unicode)) {
412 /* check out bom(byte order mark) and see if containing wchars */
413 script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
414 if (script_encoding != NULL) {
415 /* bom or wchar detection is prior to 'script_encoding' option */
416 return script_encoding;
417 }
418 }
419
420 /* if no script_encoding specified, just leave alone */
421 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
422 return NULL;
423 }
424
425 /* if multiple encodings specified, detect automagically */
426 if (CG(script_encoding_list_size) > 1) {
427 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
428 }
429
430 return CG(script_encoding_list)[0];
431 }
432
zend_multibyte_set_filter(const zend_encoding * onetime_encoding TSRMLS_DC)433 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
434 {
435 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
436 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
437
438 if (!script_encoding) {
439 return FAILURE;
440 }
441
442 /* judge input/output filter */
443 LANG_SCNG(script_encoding) = script_encoding;
444 LANG_SCNG(input_filter) = NULL;
445 LANG_SCNG(output_filter) = NULL;
446
447 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
448 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
449 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
450 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
451 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
452 } else {
453 LANG_SCNG(input_filter) = NULL;
454 LANG_SCNG(output_filter) = NULL;
455 }
456 return SUCCESS;
457 }
458
459 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
460 LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
461 LANG_SCNG(output_filter) = NULL;
462 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
463 LANG_SCNG(input_filter) = NULL;
464 LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
465 } else {
466 /* both script and internal encodings are incompatible w/ flex */
467 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
468 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
469 }
470
471 return 0;
472 }
473
open_file_for_scanning(zend_file_handle * file_handle TSRMLS_DC)474 ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
475 {
476 const char *file_path = NULL;
477 char *buf;
478 size_t size, offset = 0;
479
480 /* The shebang line was read, get the current position to obtain the buffer start */
481 if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
482 if ((offset = ftell(file_handle->handle.fp)) == -1) {
483 offset = 0;
484 }
485 }
486
487 if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
488 return FAILURE;
489 }
490
491 zend_llist_add_element(&CG(open_files), file_handle);
492 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
493 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
494 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
495 fh->handle.stream.handle = (void*)(((char*)fh) + diff);
496 file_handle->handle.stream.handle = fh->handle.stream.handle;
497 }
498
499 /* Reset the scanner for scanning the new file */
500 SCNG(yy_in) = file_handle;
501 SCNG(yy_start) = NULL;
502
503 if (size != -1) {
504 if (CG(multibyte)) {
505 SCNG(script_org) = (unsigned char*)buf;
506 SCNG(script_org_size) = size;
507 SCNG(script_filtered) = NULL;
508
509 zend_multibyte_set_filter(NULL TSRMLS_CC);
510
511 if (SCNG(input_filter)) {
512 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
513 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
514 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
515 }
516 buf = (char*)SCNG(script_filtered);
517 size = SCNG(script_filtered_size);
518 }
519 }
520 SCNG(yy_start) = (unsigned char *)buf - offset;
521 yy_scan_buffer(buf, size TSRMLS_CC);
522 } else {
523 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
524 }
525
526 BEGIN(INITIAL);
527
528 if (file_handle->opened_path) {
529 file_path = file_handle->opened_path;
530 } else {
531 file_path = file_handle->filename;
532 }
533
534 zend_set_compiled_filename(file_path TSRMLS_CC);
535
536 if (CG(start_lineno)) {
537 CG(zend_lineno) = CG(start_lineno);
538 CG(start_lineno) = 0;
539 } else {
540 CG(zend_lineno) = 1;
541 }
542
543 RESET_DOC_COMMENT();
544 CG(increment_lineno) = 0;
545 return SUCCESS;
546 }
END_EXTERN_C()547 END_EXTERN_C()
548
549
550 ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
551 {
552 zend_lex_state original_lex_state;
553 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
554 zend_op_array *original_active_op_array = CG(active_op_array);
555 zend_op_array *retval=NULL;
556 int compiler_result;
557 zend_bool compilation_successful=0;
558 znode retval_znode;
559 zend_bool original_in_compilation = CG(in_compilation);
560
561 retval_znode.op_type = IS_CONST;
562 retval_znode.u.constant.type = IS_LONG;
563 retval_znode.u.constant.value.lval = 1;
564 Z_UNSET_ISREF(retval_znode.u.constant);
565 Z_SET_REFCOUNT(retval_znode.u.constant, 1);
566
567 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
568
569 retval = op_array; /* success oriented */
570
571 if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
572 if (type==ZEND_REQUIRE) {
573 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
574 zend_bailout();
575 } else {
576 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
577 }
578 compilation_successful=0;
579 } else {
580 init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
581 CG(in_compilation) = 1;
582 CG(active_op_array) = op_array;
583 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
584 zend_init_compiler_context(TSRMLS_C);
585 compiler_result = zendparse(TSRMLS_C);
586 zend_do_return(&retval_znode, 0 TSRMLS_CC);
587 CG(in_compilation) = original_in_compilation;
588 if (compiler_result != 0) { /* parser error */
589 zend_bailout();
590 }
591 compilation_successful=1;
592 }
593
594 if (retval) {
595 CG(active_op_array) = original_active_op_array;
596 if (compilation_successful) {
597 pass_two(op_array TSRMLS_CC);
598 zend_release_labels(0 TSRMLS_CC);
599 } else {
600 efree(op_array);
601 retval = NULL;
602 }
603 }
604 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
605 return retval;
606 }
607
608
compile_filename(int type,zval * filename TSRMLS_DC)609 zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
610 {
611 zend_file_handle file_handle;
612 zval tmp;
613 zend_op_array *retval;
614 char *opened_path = NULL;
615
616 if (filename->type != IS_STRING) {
617 tmp = *filename;
618 zval_copy_ctor(&tmp);
619 convert_to_string(&tmp);
620 filename = &tmp;
621 }
622 file_handle.filename = filename->value.str.val;
623 file_handle.free_filename = 0;
624 file_handle.type = ZEND_HANDLE_FILENAME;
625 file_handle.opened_path = NULL;
626 file_handle.handle.fp = NULL;
627
628 retval = zend_compile_file(&file_handle, type TSRMLS_CC);
629 if (retval && file_handle.handle.stream.handle) {
630 int dummy = 1;
631
632 if (!file_handle.opened_path) {
633 file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
634 }
635
636 zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
637
638 if (opened_path) {
639 efree(opened_path);
640 }
641 }
642 zend_destroy_file_handle(&file_handle TSRMLS_CC);
643
644 if (filename==&tmp) {
645 zval_dtor(&tmp);
646 }
647 return retval;
648 }
649
zend_prepare_string_for_scanning(zval * str,char * filename TSRMLS_DC)650 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
651 {
652 char *buf;
653 size_t size;
654
655 /* enforce two trailing NULLs for flex... */
656 if (IS_INTERNED(str->value.str.val)) {
657 char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
658 memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
659 str->value.str.val = tmp;
660 } else {
661 str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
662 }
663
664 memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
665
666 SCNG(yy_in) = NULL;
667 SCNG(yy_start) = NULL;
668
669 buf = str->value.str.val;
670 size = str->value.str.len;
671
672 if (CG(multibyte)) {
673 SCNG(script_org) = (unsigned char*)buf;
674 SCNG(script_org_size) = size;
675 SCNG(script_filtered) = NULL;
676
677 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
678
679 if (SCNG(input_filter)) {
680 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
681 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
682 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
683 }
684 buf = (char*)SCNG(script_filtered);
685 size = SCNG(script_filtered_size);
686 }
687 }
688
689 yy_scan_buffer(buf, size TSRMLS_CC);
690
691 zend_set_compiled_filename(filename TSRMLS_CC);
692 CG(zend_lineno) = 1;
693 CG(increment_lineno) = 0;
694 RESET_DOC_COMMENT();
695 return SUCCESS;
696 }
697
698
zend_get_scanned_file_offset(TSRMLS_D)699 ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
700 {
701 size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
702 if (SCNG(input_filter)) {
703 size_t original_offset = offset, length = 0;
704 do {
705 unsigned char *p = NULL;
706 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
707 return (size_t)-1;
708 }
709 efree(p);
710 if (length > original_offset) {
711 offset--;
712 } else if (length < original_offset) {
713 offset++;
714 }
715 } while (original_offset != length);
716 }
717 return offset;
718 }
719
720
compile_string(zval * source_string,char * filename TSRMLS_DC)721 zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
722 {
723 zend_lex_state original_lex_state;
724 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
725 zend_op_array *original_active_op_array = CG(active_op_array);
726 zend_op_array *retval;
727 zval tmp;
728 int compiler_result;
729 zend_bool original_in_compilation = CG(in_compilation);
730
731 if (source_string->value.str.len==0) {
732 efree(op_array);
733 return NULL;
734 }
735
736 CG(in_compilation) = 1;
737
738 tmp = *source_string;
739 zval_copy_ctor(&tmp);
740 convert_to_string(&tmp);
741 source_string = &tmp;
742
743 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
744 if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
745 efree(op_array);
746 retval = NULL;
747 } else {
748 zend_bool orig_interactive = CG(interactive);
749
750 CG(interactive) = 0;
751 init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
752 CG(interactive) = orig_interactive;
753 CG(active_op_array) = op_array;
754 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
755 zend_init_compiler_context(TSRMLS_C);
756 BEGIN(ST_IN_SCRIPTING);
757 compiler_result = zendparse(TSRMLS_C);
758
759 if (SCNG(script_filtered)) {
760 efree(SCNG(script_filtered));
761 SCNG(script_filtered) = NULL;
762 }
763
764 if (compiler_result != 0) {
765 CG(active_op_array) = original_active_op_array;
766 CG(unclean_shutdown)=1;
767 destroy_op_array(op_array TSRMLS_CC);
768 efree(op_array);
769 retval = NULL;
770 } else {
771 zend_do_return(NULL, 0 TSRMLS_CC);
772 CG(active_op_array) = original_active_op_array;
773 pass_two(op_array TSRMLS_CC);
774 zend_release_labels(0 TSRMLS_CC);
775 retval = op_array;
776 }
777 }
778 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
779 zval_dtor(&tmp);
780 CG(in_compilation) = original_in_compilation;
781 return retval;
782 }
783
784
BEGIN_EXTERN_C()785 BEGIN_EXTERN_C()
786 int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
787 {
788 zend_lex_state original_lex_state;
789 zend_file_handle file_handle;
790
791 file_handle.type = ZEND_HANDLE_FILENAME;
792 file_handle.filename = filename;
793 file_handle.free_filename = 0;
794 file_handle.opened_path = NULL;
795 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
796 if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
797 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
798 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
799 return FAILURE;
800 }
801 zend_highlight(syntax_highlighter_ini TSRMLS_CC);
802 if (SCNG(script_filtered)) {
803 efree(SCNG(script_filtered));
804 SCNG(script_filtered) = NULL;
805 }
806 zend_destroy_file_handle(&file_handle TSRMLS_CC);
807 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
808 return SUCCESS;
809 }
810
highlight_string(zval * str,zend_syntax_highlighter_ini * syntax_highlighter_ini,char * str_name TSRMLS_DC)811 int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
812 {
813 zend_lex_state original_lex_state;
814 zval tmp = *str;
815
816 str = &tmp;
817 zval_copy_ctor(str);
818 zend_save_lexical_state(&original_lex_state TSRMLS_CC);
819 if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
820 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
821 return FAILURE;
822 }
823 BEGIN(INITIAL);
824 zend_highlight(syntax_highlighter_ini TSRMLS_CC);
825 if (SCNG(script_filtered)) {
826 efree(SCNG(script_filtered));
827 SCNG(script_filtered) = NULL;
828 }
829 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
830 zval_dtor(str);
831 return SUCCESS;
832 }
833
zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter,const zend_encoding * old_encoding TSRMLS_DC)834 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
835 {
836 size_t length;
837 unsigned char *new_yy_start;
838
839 /* convert and set */
840 if (!SCNG(input_filter)) {
841 if (SCNG(script_filtered)) {
842 efree(SCNG(script_filtered));
843 SCNG(script_filtered) = NULL;
844 }
845 SCNG(script_filtered_size) = 0;
846 length = SCNG(script_org_size);
847 new_yy_start = SCNG(script_org);
848 } else {
849 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
850 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
851 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
852 }
853 SCNG(script_filtered) = new_yy_start;
854 SCNG(script_filtered_size) = length;
855 }
856
857 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
858 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
859 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
860 SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
861
862 SCNG(yy_start) = new_yy_start;
863 }
864
865
866 # define zend_copy_value(zendlval, yytext, yyleng) \
867 if (SCNG(output_filter)) { \
868 size_t sz = 0; \
869 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
870 zendlval->value.str.len = sz; \
871 } else { \
872 zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
873 zendlval->value.str.len = yyleng; \
874 }
875
zend_scan_escape_string(zval * zendlval,char * str,int len,char quote_type TSRMLS_DC)876 static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
877 {
878 register char *s, *t;
879 char *end;
880
881 ZVAL_STRINGL(zendlval, str, len, 1);
882
883 /* convert escape sequences */
884 s = t = zendlval->value.str.val;
885 end = s+zendlval->value.str.len;
886 while (s<end) {
887 if (*s=='\\') {
888 s++;
889 if (s >= end) {
890 *t++ = '\\';
891 break;
892 }
893
894 switch(*s) {
895 case 'n':
896 *t++ = '\n';
897 zendlval->value.str.len--;
898 break;
899 case 'r':
900 *t++ = '\r';
901 zendlval->value.str.len--;
902 break;
903 case 't':
904 *t++ = '\t';
905 zendlval->value.str.len--;
906 break;
907 case 'f':
908 *t++ = '\f';
909 zendlval->value.str.len--;
910 break;
911 case 'v':
912 *t++ = '\v';
913 zendlval->value.str.len--;
914 break;
915 case 'e':
916 #ifdef PHP_WIN32
917 *t++ = VK_ESCAPE;
918 #else
919 *t++ = '\e';
920 #endif
921 zendlval->value.str.len--;
922 break;
923 case '"':
924 case '`':
925 if (*s != quote_type) {
926 *t++ = '\\';
927 *t++ = *s;
928 break;
929 }
930 case '\\':
931 case '$':
932 *t++ = *s;
933 zendlval->value.str.len--;
934 break;
935 case 'x':
936 case 'X':
937 if (ZEND_IS_HEX(*(s+1))) {
938 char hex_buf[3] = { 0, 0, 0 };
939
940 zendlval->value.str.len--; /* for the 'x' */
941
942 hex_buf[0] = *(++s);
943 zendlval->value.str.len--;
944 if (ZEND_IS_HEX(*(s+1))) {
945 hex_buf[1] = *(++s);
946 zendlval->value.str.len--;
947 }
948 *t++ = (char) strtol(hex_buf, NULL, 16);
949 } else {
950 *t++ = '\\';
951 *t++ = *s;
952 }
953 break;
954 default:
955 /* check for an octal */
956 if (ZEND_IS_OCT(*s)) {
957 char octal_buf[4] = { 0, 0, 0, 0 };
958
959 octal_buf[0] = *s;
960 zendlval->value.str.len--;
961 if (ZEND_IS_OCT(*(s+1))) {
962 octal_buf[1] = *(++s);
963 zendlval->value.str.len--;
964 if (ZEND_IS_OCT(*(s+1))) {
965 octal_buf[2] = *(++s);
966 zendlval->value.str.len--;
967 }
968 }
969 *t++ = (char) strtol(octal_buf, NULL, 8);
970 } else {
971 *t++ = '\\';
972 *t++ = *s;
973 }
974 break;
975 }
976 } else {
977 *t++ = *s;
978 }
979
980 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
981 CG(zend_lineno)++;
982 }
983 s++;
984 }
985 *t = 0;
986 if (SCNG(output_filter)) {
987 size_t sz = 0;
988 s = zendlval->value.str.val;
989 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
990 zendlval->value.str.len = sz;
991 efree(s);
992 }
993 }
994
995
lex_scan(zval * zendlval TSRMLS_DC)996 int lex_scan(zval *zendlval TSRMLS_DC)
997 {
998 restart:
999 SCNG(yy_text) = YYCURSOR;
1000
1001 yymore_restart:
1002
1003 /*!re2c
1004 re2c:yyfill:check = 0;
1005 LNUM [0-9]+
1006 DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1007 EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1008 HNUM "0x"[0-9a-fA-F]+
1009 BNUM "0b"[01]+
1010 LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1011 WHITESPACE [ \n\r\t]+
1012 TABS_AND_SPACES [ \t]*
1013 TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1014 ANY_CHAR [^]
1015 NEWLINE ("\r"|"\n"|"\r\n")
1016
1017 /* compute yyleng before each rule */
1018 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1019
1020
1021 <ST_IN_SCRIPTING>"exit" {
1022 return T_EXIT;
1023 }
1024
1025 <ST_IN_SCRIPTING>"die" {
1026 return T_EXIT;
1027 }
1028
1029 <ST_IN_SCRIPTING>"function" {
1030 return T_FUNCTION;
1031 }
1032
1033 <ST_IN_SCRIPTING>"const" {
1034 return T_CONST;
1035 }
1036
1037 <ST_IN_SCRIPTING>"return" {
1038 return T_RETURN;
1039 }
1040
1041 <ST_IN_SCRIPTING>"try" {
1042 return T_TRY;
1043 }
1044
1045 <ST_IN_SCRIPTING>"catch" {
1046 return T_CATCH;
1047 }
1048
1049 <ST_IN_SCRIPTING>"throw" {
1050 return T_THROW;
1051 }
1052
1053 <ST_IN_SCRIPTING>"if" {
1054 return T_IF;
1055 }
1056
1057 <ST_IN_SCRIPTING>"elseif" {
1058 return T_ELSEIF;
1059 }
1060
1061 <ST_IN_SCRIPTING>"endif" {
1062 return T_ENDIF;
1063 }
1064
1065 <ST_IN_SCRIPTING>"else" {
1066 return T_ELSE;
1067 }
1068
1069 <ST_IN_SCRIPTING>"while" {
1070 return T_WHILE;
1071 }
1072
1073 <ST_IN_SCRIPTING>"endwhile" {
1074 return T_ENDWHILE;
1075 }
1076
1077 <ST_IN_SCRIPTING>"do" {
1078 return T_DO;
1079 }
1080
1081 <ST_IN_SCRIPTING>"for" {
1082 return T_FOR;
1083 }
1084
1085 <ST_IN_SCRIPTING>"endfor" {
1086 return T_ENDFOR;
1087 }
1088
1089 <ST_IN_SCRIPTING>"foreach" {
1090 return T_FOREACH;
1091 }
1092
1093 <ST_IN_SCRIPTING>"endforeach" {
1094 return T_ENDFOREACH;
1095 }
1096
1097 <ST_IN_SCRIPTING>"declare" {
1098 return T_DECLARE;
1099 }
1100
1101 <ST_IN_SCRIPTING>"enddeclare" {
1102 return T_ENDDECLARE;
1103 }
1104
1105 <ST_IN_SCRIPTING>"instanceof" {
1106 return T_INSTANCEOF;
1107 }
1108
1109 <ST_IN_SCRIPTING>"as" {
1110 return T_AS;
1111 }
1112
1113 <ST_IN_SCRIPTING>"switch" {
1114 return T_SWITCH;
1115 }
1116
1117 <ST_IN_SCRIPTING>"endswitch" {
1118 return T_ENDSWITCH;
1119 }
1120
1121 <ST_IN_SCRIPTING>"case" {
1122 return T_CASE;
1123 }
1124
1125 <ST_IN_SCRIPTING>"default" {
1126 return T_DEFAULT;
1127 }
1128
1129 <ST_IN_SCRIPTING>"break" {
1130 return T_BREAK;
1131 }
1132
1133 <ST_IN_SCRIPTING>"continue" {
1134 return T_CONTINUE;
1135 }
1136
1137 <ST_IN_SCRIPTING>"goto" {
1138 return T_GOTO;
1139 }
1140
1141 <ST_IN_SCRIPTING>"echo" {
1142 return T_ECHO;
1143 }
1144
1145 <ST_IN_SCRIPTING>"print" {
1146 return T_PRINT;
1147 }
1148
1149 <ST_IN_SCRIPTING>"class" {
1150 return T_CLASS;
1151 }
1152
1153 <ST_IN_SCRIPTING>"interface" {
1154 return T_INTERFACE;
1155 }
1156
1157 <ST_IN_SCRIPTING>"trait" {
1158 return T_TRAIT;
1159 }
1160
1161 <ST_IN_SCRIPTING>"extends" {
1162 return T_EXTENDS;
1163 }
1164
1165 <ST_IN_SCRIPTING>"implements" {
1166 return T_IMPLEMENTS;
1167 }
1168
1169 <ST_IN_SCRIPTING>"->" {
1170 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1171 return T_OBJECT_OPERATOR;
1172 }
1173
1174 <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1175 zendlval->value.str.val = yytext; /* no copying - intentional */
1176 zendlval->value.str.len = yyleng;
1177 zendlval->type = IS_STRING;
1178 HANDLE_NEWLINES(yytext, yyleng);
1179 return T_WHITESPACE;
1180 }
1181
1182 <ST_LOOKING_FOR_PROPERTY>"->" {
1183 return T_OBJECT_OPERATOR;
1184 }
1185
1186 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1187 yy_pop_state(TSRMLS_C);
1188 zend_copy_value(zendlval, yytext, yyleng);
1189 zendlval->type = IS_STRING;
1190 return T_STRING;
1191 }
1192
1193 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1194 yyless(0);
1195 yy_pop_state(TSRMLS_C);
1196 goto restart;
1197 }
1198
1199 <ST_IN_SCRIPTING>"::" {
1200 return T_PAAMAYIM_NEKUDOTAYIM;
1201 }
1202
1203 <ST_IN_SCRIPTING>"\\" {
1204 return T_NS_SEPARATOR;
1205 }
1206
1207 <ST_IN_SCRIPTING>"new" {
1208 return T_NEW;
1209 }
1210
1211 <ST_IN_SCRIPTING>"clone" {
1212 return T_CLONE;
1213 }
1214
1215 <ST_IN_SCRIPTING>"var" {
1216 return T_VAR;
1217 }
1218
1219 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1220 return T_INT_CAST;
1221 }
1222
1223 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1224 return T_DOUBLE_CAST;
1225 }
1226
1227 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1228 return T_STRING_CAST;
1229 }
1230
1231 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1232 return T_ARRAY_CAST;
1233 }
1234
1235 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1236 return T_OBJECT_CAST;
1237 }
1238
1239 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1240 return T_BOOL_CAST;
1241 }
1242
1243 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1244 return T_UNSET_CAST;
1245 }
1246
1247 <ST_IN_SCRIPTING>"eval" {
1248 return T_EVAL;
1249 }
1250
1251 <ST_IN_SCRIPTING>"include" {
1252 return T_INCLUDE;
1253 }
1254
1255 <ST_IN_SCRIPTING>"include_once" {
1256 return T_INCLUDE_ONCE;
1257 }
1258
1259 <ST_IN_SCRIPTING>"require" {
1260 return T_REQUIRE;
1261 }
1262
1263 <ST_IN_SCRIPTING>"require_once" {
1264 return T_REQUIRE_ONCE;
1265 }
1266
1267 <ST_IN_SCRIPTING>"namespace" {
1268 return T_NAMESPACE;
1269 }
1270
1271 <ST_IN_SCRIPTING>"use" {
1272 return T_USE;
1273 }
1274
1275 <ST_IN_SCRIPTING>"insteadof" {
1276 return T_INSTEADOF;
1277 }
1278
1279 <ST_IN_SCRIPTING>"global" {
1280 return T_GLOBAL;
1281 }
1282
1283 <ST_IN_SCRIPTING>"isset" {
1284 return T_ISSET;
1285 }
1286
1287 <ST_IN_SCRIPTING>"empty" {
1288 return T_EMPTY;
1289 }
1290
1291 <ST_IN_SCRIPTING>"__halt_compiler" {
1292 return T_HALT_COMPILER;
1293 }
1294
1295 <ST_IN_SCRIPTING>"static" {
1296 return T_STATIC;
1297 }
1298
1299 <ST_IN_SCRIPTING>"abstract" {
1300 return T_ABSTRACT;
1301 }
1302
1303 <ST_IN_SCRIPTING>"final" {
1304 return T_FINAL;
1305 }
1306
1307 <ST_IN_SCRIPTING>"private" {
1308 return T_PRIVATE;
1309 }
1310
1311 <ST_IN_SCRIPTING>"protected" {
1312 return T_PROTECTED;
1313 }
1314
1315 <ST_IN_SCRIPTING>"public" {
1316 return T_PUBLIC;
1317 }
1318
1319 <ST_IN_SCRIPTING>"unset" {
1320 return T_UNSET;
1321 }
1322
1323 <ST_IN_SCRIPTING>"=>" {
1324 return T_DOUBLE_ARROW;
1325 }
1326
1327 <ST_IN_SCRIPTING>"list" {
1328 return T_LIST;
1329 }
1330
1331 <ST_IN_SCRIPTING>"array" {
1332 return T_ARRAY;
1333 }
1334
1335 <ST_IN_SCRIPTING>"callable" {
1336 return T_CALLABLE;
1337 }
1338
1339 <ST_IN_SCRIPTING>"++" {
1340 return T_INC;
1341 }
1342
1343 <ST_IN_SCRIPTING>"--" {
1344 return T_DEC;
1345 }
1346
1347 <ST_IN_SCRIPTING>"===" {
1348 return T_IS_IDENTICAL;
1349 }
1350
1351 <ST_IN_SCRIPTING>"!==" {
1352 return T_IS_NOT_IDENTICAL;
1353 }
1354
1355 <ST_IN_SCRIPTING>"==" {
1356 return T_IS_EQUAL;
1357 }
1358
1359 <ST_IN_SCRIPTING>"!="|"<>" {
1360 return T_IS_NOT_EQUAL;
1361 }
1362
1363 <ST_IN_SCRIPTING>"<=" {
1364 return T_IS_SMALLER_OR_EQUAL;
1365 }
1366
1367 <ST_IN_SCRIPTING>">=" {
1368 return T_IS_GREATER_OR_EQUAL;
1369 }
1370
1371 <ST_IN_SCRIPTING>"+=" {
1372 return T_PLUS_EQUAL;
1373 }
1374
1375 <ST_IN_SCRIPTING>"-=" {
1376 return T_MINUS_EQUAL;
1377 }
1378
1379 <ST_IN_SCRIPTING>"*=" {
1380 return T_MUL_EQUAL;
1381 }
1382
1383 <ST_IN_SCRIPTING>"/=" {
1384 return T_DIV_EQUAL;
1385 }
1386
1387 <ST_IN_SCRIPTING>".=" {
1388 return T_CONCAT_EQUAL;
1389 }
1390
1391 <ST_IN_SCRIPTING>"%=" {
1392 return T_MOD_EQUAL;
1393 }
1394
1395 <ST_IN_SCRIPTING>"<<=" {
1396 return T_SL_EQUAL;
1397 }
1398
1399 <ST_IN_SCRIPTING>">>=" {
1400 return T_SR_EQUAL;
1401 }
1402
1403 <ST_IN_SCRIPTING>"&=" {
1404 return T_AND_EQUAL;
1405 }
1406
1407 <ST_IN_SCRIPTING>"|=" {
1408 return T_OR_EQUAL;
1409 }
1410
1411 <ST_IN_SCRIPTING>"^=" {
1412 return T_XOR_EQUAL;
1413 }
1414
1415 <ST_IN_SCRIPTING>"||" {
1416 return T_BOOLEAN_OR;
1417 }
1418
1419 <ST_IN_SCRIPTING>"&&" {
1420 return T_BOOLEAN_AND;
1421 }
1422
1423 <ST_IN_SCRIPTING>"OR" {
1424 return T_LOGICAL_OR;
1425 }
1426
1427 <ST_IN_SCRIPTING>"AND" {
1428 return T_LOGICAL_AND;
1429 }
1430
1431 <ST_IN_SCRIPTING>"XOR" {
1432 return T_LOGICAL_XOR;
1433 }
1434
1435 <ST_IN_SCRIPTING>"<<" {
1436 return T_SL;
1437 }
1438
1439 <ST_IN_SCRIPTING>">>" {
1440 return T_SR;
1441 }
1442
1443 <ST_IN_SCRIPTING>{TOKENS} {
1444 return yytext[0];
1445 }
1446
1447
1448 <ST_IN_SCRIPTING>"{" {
1449 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1450 return '{';
1451 }
1452
1453
1454 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1455 yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1456 return T_DOLLAR_OPEN_CURLY_BRACES;
1457 }
1458
1459
1460 <ST_IN_SCRIPTING>"}" {
1461 RESET_DOC_COMMENT();
1462 if (!zend_stack_is_empty(&SCNG(state_stack))) {
1463 yy_pop_state(TSRMLS_C);
1464 }
1465 return '}';
1466 }
1467
1468
1469 <ST_LOOKING_FOR_VARNAME>{LABEL} {
1470 zend_copy_value(zendlval, yytext, yyleng);
1471 zendlval->type = IS_STRING;
1472 yy_pop_state(TSRMLS_C);
1473 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1474 return T_STRING_VARNAME;
1475 }
1476
1477
1478 <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1479 yyless(0);
1480 yy_pop_state(TSRMLS_C);
1481 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1482 goto restart;
1483 }
1484
1485 <ST_IN_SCRIPTING>{BNUM} {
1486 char *bin = yytext + 2; /* Skip "0b" */
1487 int len = yyleng - 2;
1488
1489 /* Skip any leading 0s */
1490 while (*bin == '0') {
1491 ++bin;
1492 --len;
1493 }
1494
1495 if (len < SIZEOF_LONG * 8) {
1496 if (len == 0) {
1497 zendlval->value.lval = 0;
1498 } else {
1499 zendlval->value.lval = strtol(bin, NULL, 2);
1500 }
1501 zendlval->type = IS_LONG;
1502 return T_LNUMBER;
1503 } else {
1504 zendlval->value.dval = zend_bin_strtod(bin, NULL);
1505 zendlval->type = IS_DOUBLE;
1506 return T_DNUMBER;
1507 }
1508 }
1509
1510 <ST_IN_SCRIPTING>{LNUM} {
1511 if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1512 zendlval->value.lval = strtol(yytext, NULL, 0);
1513 } else {
1514 errno = 0;
1515 zendlval->value.lval = strtol(yytext, NULL, 0);
1516 if (errno == ERANGE) { /* Overflow */
1517 if (yytext[0] == '0') { /* octal overflow */
1518 zendlval->value.dval = zend_oct_strtod(yytext, NULL);
1519 } else {
1520 zendlval->value.dval = zend_strtod(yytext, NULL);
1521 }
1522 zendlval->type = IS_DOUBLE;
1523 return T_DNUMBER;
1524 }
1525 }
1526
1527 zendlval->type = IS_LONG;
1528 return T_LNUMBER;
1529 }
1530
1531 <ST_IN_SCRIPTING>{HNUM} {
1532 char *hex = yytext + 2; /* Skip "0x" */
1533 int len = yyleng - 2;
1534
1535 /* Skip any leading 0s */
1536 while (*hex == '0') {
1537 hex++;
1538 len--;
1539 }
1540
1541 if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1542 if (len == 0) {
1543 zendlval->value.lval = 0;
1544 } else {
1545 zendlval->value.lval = strtol(hex, NULL, 16);
1546 }
1547 zendlval->type = IS_LONG;
1548 return T_LNUMBER;
1549 } else {
1550 zendlval->value.dval = zend_hex_strtod(hex, NULL);
1551 zendlval->type = IS_DOUBLE;
1552 return T_DNUMBER;
1553 }
1554 }
1555
1556 <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1557 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1558 zendlval->value.lval = strtol(yytext, NULL, 10);
1559 zendlval->type = IS_LONG;
1560 } else {
1561 zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1562 zendlval->value.str.len = yyleng;
1563 zendlval->type = IS_STRING;
1564 }
1565 return T_NUM_STRING;
1566 }
1567
1568 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1569 zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1570 zendlval->value.str.len = yyleng;
1571 zendlval->type = IS_STRING;
1572 return T_NUM_STRING;
1573 }
1574
1575 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1576 zendlval->value.dval = zend_strtod(yytext, NULL);
1577 zendlval->type = IS_DOUBLE;
1578 return T_DNUMBER;
1579 }
1580
1581 <ST_IN_SCRIPTING>"__CLASS__" {
1582 const char *class_name = NULL;
1583
1584 if (CG(active_class_entry)
1585 && (ZEND_ACC_TRAIT ==
1586 (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1587 /* We create a special __CLASS__ constant that is going to be resolved
1588 at run-time */
1589 zendlval->value.str.len = sizeof("__CLASS__")-1;
1590 zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len);
1591 zendlval->type = IS_CONSTANT;
1592 } else {
1593 if (CG(active_class_entry)) {
1594 class_name = CG(active_class_entry)->name;
1595 }
1596
1597 if (!class_name) {
1598 class_name = "";
1599 }
1600
1601 zendlval->value.str.len = strlen(class_name);
1602 zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
1603 zendlval->type = IS_STRING;
1604 }
1605 return T_CLASS_C;
1606 }
1607
1608 <ST_IN_SCRIPTING>"__TRAIT__" {
1609 const char *trait_name = NULL;
1610
1611 if (CG(active_class_entry)
1612 && (ZEND_ACC_TRAIT ==
1613 (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1614 trait_name = CG(active_class_entry)->name;
1615 }
1616
1617 if (!trait_name) {
1618 trait_name = "";
1619 }
1620
1621 zendlval->value.str.len = strlen(trait_name);
1622 zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
1623 zendlval->type = IS_STRING;
1624
1625 return T_TRAIT_C;
1626 }
1627
1628 <ST_IN_SCRIPTING>"__FUNCTION__" {
1629 const char *func_name = NULL;
1630
1631 if (CG(active_op_array)) {
1632 func_name = CG(active_op_array)->function_name;
1633 }
1634
1635 if (!func_name) {
1636 func_name = "";
1637 }
1638 zendlval->value.str.len = strlen(func_name);
1639 zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
1640 zendlval->type = IS_STRING;
1641 return T_FUNC_C;
1642 }
1643
1644 <ST_IN_SCRIPTING>"__METHOD__" {
1645 const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1646 const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1647 size_t len = 0;
1648
1649 if (class_name) {
1650 len += strlen(class_name) + 2;
1651 }
1652 if (func_name) {
1653 len += strlen(func_name);
1654 }
1655
1656 zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
1657 class_name ? class_name : "",
1658 class_name && func_name ? "::" : "",
1659 func_name ? func_name : ""
1660 );
1661 zendlval->type = IS_STRING;
1662 return T_METHOD_C;
1663 }
1664
1665 <ST_IN_SCRIPTING>"__LINE__" {
1666 zendlval->value.lval = CG(zend_lineno);
1667 zendlval->type = IS_LONG;
1668 return T_LINE;
1669 }
1670
1671 <ST_IN_SCRIPTING>"__FILE__" {
1672 char *filename = zend_get_compiled_filename(TSRMLS_C);
1673
1674 if (!filename) {
1675 filename = "";
1676 }
1677 zendlval->value.str.len = strlen(filename);
1678 zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
1679 zendlval->type = IS_STRING;
1680 return T_FILE;
1681 }
1682
1683 <ST_IN_SCRIPTING>"__DIR__" {
1684 char *filename = zend_get_compiled_filename(TSRMLS_C);
1685 const size_t filename_len = strlen(filename);
1686 char *dirname;
1687
1688 if (!filename) {
1689 filename = "";
1690 }
1691
1692 dirname = estrndup(filename, filename_len);
1693 zend_dirname(dirname, filename_len);
1694
1695 if (strcmp(dirname, ".") == 0) {
1696 dirname = erealloc(dirname, MAXPATHLEN);
1697 #if HAVE_GETCWD
1698 VCWD_GETCWD(dirname, MAXPATHLEN);
1699 #elif HAVE_GETWD
1700 VCWD_GETWD(dirname);
1701 #endif
1702 }
1703
1704 zendlval->value.str.len = strlen(dirname);
1705 zendlval->value.str.val = dirname;
1706 zendlval->type = IS_STRING;
1707 return T_DIR;
1708 }
1709
1710 <ST_IN_SCRIPTING>"__NAMESPACE__" {
1711 if (CG(current_namespace)) {
1712 *zendlval = *CG(current_namespace);
1713 zval_copy_ctor(zendlval);
1714 } else {
1715 ZVAL_EMPTY_STRING(zendlval);
1716 }
1717 return T_NS_C;
1718 }
1719
1720 <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1721 YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1722
1723 if (bracket != SCNG(yy_text)) {
1724 /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1725 YYCURSOR = bracket;
1726 goto inline_html;
1727 }
1728
1729 HANDLE_NEWLINES(yytext, yyleng);
1730 zendlval->value.str.val = yytext; /* no copying - intentional */
1731 zendlval->value.str.len = yyleng;
1732 zendlval->type = IS_STRING;
1733 BEGIN(ST_IN_SCRIPTING);
1734 return T_OPEN_TAG;
1735 }
1736
1737
1738 <INITIAL>"<%=" {
1739 if (CG(asp_tags)) {
1740 zendlval->value.str.val = yytext; /* no copying - intentional */
1741 zendlval->value.str.len = yyleng;
1742 zendlval->type = IS_STRING;
1743 BEGIN(ST_IN_SCRIPTING);
1744 return T_OPEN_TAG_WITH_ECHO;
1745 } else {
1746 goto inline_char_handler;
1747 }
1748 }
1749
1750
1751 <INITIAL>"<?=" {
1752 zendlval->value.str.val = yytext; /* no copying - intentional */
1753 zendlval->value.str.len = yyleng;
1754 zendlval->type = IS_STRING;
1755 BEGIN(ST_IN_SCRIPTING);
1756 return T_OPEN_TAG_WITH_ECHO;
1757 }
1758
1759
1760 <INITIAL>"<%" {
1761 if (CG(asp_tags)) {
1762 zendlval->value.str.val = yytext; /* no copying - intentional */
1763 zendlval->value.str.len = yyleng;
1764 zendlval->type = IS_STRING;
1765 BEGIN(ST_IN_SCRIPTING);
1766 return T_OPEN_TAG;
1767 } else {
1768 goto inline_char_handler;
1769 }
1770 }
1771
1772
1773 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1774 zendlval->value.str.val = yytext; /* no copying - intentional */
1775 zendlval->value.str.len = yyleng;
1776 zendlval->type = IS_STRING;
1777 HANDLE_NEWLINE(yytext[yyleng-1]);
1778 BEGIN(ST_IN_SCRIPTING);
1779 return T_OPEN_TAG;
1780 }
1781
1782
1783 <INITIAL>"<?" {
1784 if (CG(short_tags)) {
1785 zendlval->value.str.val = yytext; /* no copying - intentional */
1786 zendlval->value.str.len = yyleng;
1787 zendlval->type = IS_STRING;
1788 BEGIN(ST_IN_SCRIPTING);
1789 return T_OPEN_TAG;
1790 } else {
1791 goto inline_char_handler;
1792 }
1793 }
1794
1795 <INITIAL>{ANY_CHAR} {
1796 if (YYCURSOR > YYLIMIT) {
1797 return 0;
1798 }
1799
1800 inline_char_handler:
1801
1802 while (1) {
1803 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1804
1805 YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1806
1807 if (YYCURSOR < YYLIMIT) {
1808 switch (*YYCURSOR) {
1809 case '?':
1810 if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1811 break;
1812 }
1813 continue;
1814 case '%':
1815 if (CG(asp_tags)) {
1816 break;
1817 }
1818 continue;
1819 case 's':
1820 case 'S':
1821 /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1822 * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1823 YYCURSOR--;
1824 yymore();
1825 default:
1826 continue;
1827 }
1828
1829 YYCURSOR--;
1830 }
1831
1832 break;
1833 }
1834
1835 inline_html:
1836 yyleng = YYCURSOR - SCNG(yy_text);
1837
1838 if (SCNG(output_filter)) {
1839 int readsize;
1840 size_t sz = 0;
1841 readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1842 zendlval->value.str.len = sz;
1843 if (readsize < yyleng) {
1844 yyless(readsize);
1845 }
1846 } else {
1847 zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
1848 zendlval->value.str.len = yyleng;
1849 }
1850 zendlval->type = IS_STRING;
1851 HANDLE_NEWLINES(yytext, yyleng);
1852 return T_INLINE_HTML;
1853 }
1854
1855
1856 /* Make sure a label character follows "->", otherwise there is no property
1857 * and "->" will be taken literally
1858 */
1859 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1860 yyless(yyleng - 3);
1861 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1862 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1863 zendlval->type = IS_STRING;
1864 return T_VARIABLE;
1865 }
1866
1867 /* A [ always designates a variable offset, regardless of what follows
1868 */
1869 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1870 yyless(yyleng - 1);
1871 yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1872 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1873 zendlval->type = IS_STRING;
1874 return T_VARIABLE;
1875 }
1876
1877 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1878 zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1879 zendlval->type = IS_STRING;
1880 return T_VARIABLE;
1881 }
1882
1883 <ST_VAR_OFFSET>"]" {
1884 yy_pop_state(TSRMLS_C);
1885 return ']';
1886 }
1887
1888 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1889 /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1890 return yytext[0];
1891 }
1892
1893 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1894 /* Invalid rule to return a more explicit parse error with proper line number */
1895 yyless(0);
1896 yy_pop_state(TSRMLS_C);
1897 return T_ENCAPSED_AND_WHITESPACE;
1898 }
1899
1900 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1901 zend_copy_value(zendlval, yytext, yyleng);
1902 zendlval->type = IS_STRING;
1903 return T_STRING;
1904 }
1905
1906
1907 <ST_IN_SCRIPTING>"#"|"//" {
1908 while (YYCURSOR < YYLIMIT) {
1909 switch (*YYCURSOR++) {
1910 case '\r':
1911 if (*YYCURSOR == '\n') {
1912 YYCURSOR++;
1913 }
1914 /* fall through */
1915 case '\n':
1916 CG(zend_lineno)++;
1917 break;
1918 case '%':
1919 if (!CG(asp_tags)) {
1920 continue;
1921 }
1922 /* fall through */
1923 case '?':
1924 if (*YYCURSOR == '>') {
1925 YYCURSOR--;
1926 break;
1927 }
1928 /* fall through */
1929 default:
1930 continue;
1931 }
1932
1933 break;
1934 }
1935
1936 yyleng = YYCURSOR - SCNG(yy_text);
1937
1938 return T_COMMENT;
1939 }
1940
1941 <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1942 int doc_com;
1943
1944 if (yyleng > 2) {
1945 doc_com = 1;
1946 RESET_DOC_COMMENT();
1947 } else {
1948 doc_com = 0;
1949 }
1950
1951 while (YYCURSOR < YYLIMIT) {
1952 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1953 break;
1954 }
1955 }
1956
1957 if (YYCURSOR < YYLIMIT) {
1958 YYCURSOR++;
1959 } else {
1960 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1961 }
1962
1963 yyleng = YYCURSOR - SCNG(yy_text);
1964 HANDLE_NEWLINES(yytext, yyleng);
1965
1966 if (doc_com) {
1967 CG(doc_comment) = estrndup(yytext, yyleng);
1968 CG(doc_comment_len) = yyleng;
1969 return T_DOC_COMMENT;
1970 }
1971
1972 return T_COMMENT;
1973 }
1974
1975 <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1976 zendlval->value.str.val = yytext; /* no copying - intentional */
1977 zendlval->value.str.len = yyleng;
1978 zendlval->type = IS_STRING;
1979 BEGIN(INITIAL);
1980 return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1981 }
1982
1983
1984 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1985 if (CG(asp_tags)) {
1986 BEGIN(INITIAL);
1987 zendlval->value.str.len = yyleng;
1988 zendlval->type = IS_STRING;
1989 zendlval->value.str.val = yytext; /* no copying - intentional */
1990 return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1991 } else {
1992 yyless(1);
1993 return yytext[0];
1994 }
1995 }
1996
1997
1998 <ST_IN_SCRIPTING>b?['] {
1999 register char *s, *t;
2000 char *end;
2001 int bprefix = (yytext[0] != '\'') ? 1 : 0;
2002
2003 while (1) {
2004 if (YYCURSOR < YYLIMIT) {
2005 if (*YYCURSOR == '\'') {
2006 YYCURSOR++;
2007 yyleng = YYCURSOR - SCNG(yy_text);
2008
2009 break;
2010 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2011 YYCURSOR++;
2012 }
2013 } else {
2014 yyleng = YYLIMIT - SCNG(yy_text);
2015
2016 /* Unclosed single quotes; treat similar to double quotes, but without a separate token
2017 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2018 * rule, which continued in ST_IN_SCRIPTING state after the quote */
2019 return T_ENCAPSED_AND_WHITESPACE;
2020 }
2021 }
2022
2023 zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
2024 zendlval->value.str.len = yyleng-bprefix-2;
2025 zendlval->type = IS_STRING;
2026
2027 /* convert escape sequences */
2028 s = t = zendlval->value.str.val;
2029 end = s+zendlval->value.str.len;
2030 while (s<end) {
2031 if (*s=='\\') {
2032 s++;
2033
2034 switch(*s) {
2035 case '\\':
2036 case '\'':
2037 *t++ = *s;
2038 zendlval->value.str.len--;
2039 break;
2040 default:
2041 *t++ = '\\';
2042 *t++ = *s;
2043 break;
2044 }
2045 } else {
2046 *t++ = *s;
2047 }
2048
2049 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2050 CG(zend_lineno)++;
2051 }
2052 s++;
2053 }
2054 *t = 0;
2055
2056 if (SCNG(output_filter)) {
2057 size_t sz = 0;
2058 s = zendlval->value.str.val;
2059 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
2060 zendlval->value.str.len = sz;
2061 efree(s);
2062 }
2063 return T_CONSTANT_ENCAPSED_STRING;
2064 }
2065
2066
2067 <ST_IN_SCRIPTING>b?["] {
2068 int bprefix = (yytext[0] != '"') ? 1 : 0;
2069
2070 while (YYCURSOR < YYLIMIT) {
2071 switch (*YYCURSOR++) {
2072 case '"':
2073 yyleng = YYCURSOR - SCNG(yy_text);
2074 zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2075 return T_CONSTANT_ENCAPSED_STRING;
2076 case '$':
2077 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2078 break;
2079 }
2080 continue;
2081 case '{':
2082 if (*YYCURSOR == '$') {
2083 break;
2084 }
2085 continue;
2086 case '\\':
2087 if (YYCURSOR < YYLIMIT) {
2088 YYCURSOR++;
2089 }
2090 /* fall through */
2091 default:
2092 continue;
2093 }
2094
2095 YYCURSOR--;
2096 break;
2097 }
2098
2099 /* Remember how much was scanned to save rescanning */
2100 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2101
2102 YYCURSOR = SCNG(yy_text) + yyleng;
2103
2104 BEGIN(ST_DOUBLE_QUOTES);
2105 return '"';
2106 }
2107
2108
2109 <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2110 char *s;
2111 int bprefix = (yytext[0] != '<') ? 1 : 0;
2112
2113 /* save old heredoc label */
2114 Z_STRVAL_P(zendlval) = CG(heredoc);
2115 Z_STRLEN_P(zendlval) = CG(heredoc_len);
2116
2117 CG(zend_lineno)++;
2118 CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2119 s = yytext+bprefix+3;
2120 while ((*s == ' ') || (*s == '\t')) {
2121 s++;
2122 CG(heredoc_len)--;
2123 }
2124
2125 if (*s == '\'') {
2126 s++;
2127 CG(heredoc_len) -= 2;
2128
2129 BEGIN(ST_NOWDOC);
2130 } else {
2131 if (*s == '"') {
2132 s++;
2133 CG(heredoc_len) -= 2;
2134 }
2135
2136 BEGIN(ST_HEREDOC);
2137 }
2138
2139 CG(heredoc) = estrndup(s, CG(heredoc_len));
2140
2141 /* Check for ending label on the next line */
2142 if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
2143 YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2144
2145 if (*end == ';') {
2146 end++;
2147 }
2148
2149 if (*end == '\n' || *end == '\r') {
2150 BEGIN(ST_END_HEREDOC);
2151 }
2152 }
2153
2154 return T_START_HEREDOC;
2155 }
2156
2157
2158 <ST_IN_SCRIPTING>[`] {
2159 BEGIN(ST_BACKQUOTE);
2160 return '`';
2161 }
2162
2163
2164 <ST_END_HEREDOC>{ANY_CHAR} {
2165 YYCURSOR += CG(heredoc_len) - 1;
2166 yyleng = CG(heredoc_len);
2167
2168 Z_STRVAL_P(zendlval) = CG(heredoc);
2169 Z_STRLEN_P(zendlval) = CG(heredoc_len);
2170 CG(heredoc) = NULL;
2171 CG(heredoc_len) = 0;
2172 BEGIN(ST_IN_SCRIPTING);
2173 return T_END_HEREDOC;
2174 }
2175
2176
2177 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2178 zendlval->value.lval = (long) '{';
2179 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2180 yyless(1);
2181 return T_CURLY_OPEN;
2182 }
2183
2184
2185 <ST_DOUBLE_QUOTES>["] {
2186 BEGIN(ST_IN_SCRIPTING);
2187 return '"';
2188 }
2189
2190 <ST_BACKQUOTE>[`] {
2191 BEGIN(ST_IN_SCRIPTING);
2192 return '`';
2193 }
2194
2195
2196 <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2197 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2198 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2199 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2200
2201 goto double_quotes_scan_done;
2202 }
2203
2204 if (YYCURSOR > YYLIMIT) {
2205 return 0;
2206 }
2207 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2208 YYCURSOR++;
2209 }
2210
2211 while (YYCURSOR < YYLIMIT) {
2212 switch (*YYCURSOR++) {
2213 case '"':
2214 break;
2215 case '$':
2216 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2217 break;
2218 }
2219 continue;
2220 case '{':
2221 if (*YYCURSOR == '$') {
2222 break;
2223 }
2224 continue;
2225 case '\\':
2226 if (YYCURSOR < YYLIMIT) {
2227 YYCURSOR++;
2228 }
2229 /* fall through */
2230 default:
2231 continue;
2232 }
2233
2234 YYCURSOR--;
2235 break;
2236 }
2237
2238 double_quotes_scan_done:
2239 yyleng = YYCURSOR - SCNG(yy_text);
2240
2241 zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2242 return T_ENCAPSED_AND_WHITESPACE;
2243 }
2244
2245
2246 <ST_BACKQUOTE>{ANY_CHAR} {
2247 if (YYCURSOR > YYLIMIT) {
2248 return 0;
2249 }
2250 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2251 YYCURSOR++;
2252 }
2253
2254 while (YYCURSOR < YYLIMIT) {
2255 switch (*YYCURSOR++) {
2256 case '`':
2257 break;
2258 case '$':
2259 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2260 break;
2261 }
2262 continue;
2263 case '{':
2264 if (*YYCURSOR == '$') {
2265 break;
2266 }
2267 continue;
2268 case '\\':
2269 if (YYCURSOR < YYLIMIT) {
2270 YYCURSOR++;
2271 }
2272 /* fall through */
2273 default:
2274 continue;
2275 }
2276
2277 YYCURSOR--;
2278 break;
2279 }
2280
2281 yyleng = YYCURSOR - SCNG(yy_text);
2282
2283 zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2284 return T_ENCAPSED_AND_WHITESPACE;
2285 }
2286
2287
2288 <ST_HEREDOC>{ANY_CHAR} {
2289 int newline = 0;
2290
2291 if (YYCURSOR > YYLIMIT) {
2292 return 0;
2293 }
2294
2295 YYCURSOR--;
2296
2297 while (YYCURSOR < YYLIMIT) {
2298 switch (*YYCURSOR++) {
2299 case '\r':
2300 if (*YYCURSOR == '\n') {
2301 YYCURSOR++;
2302 }
2303 /* fall through */
2304 case '\n':
2305 /* Check for ending label on the next line */
2306 if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2307 YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2308
2309 if (*end == ';') {
2310 end++;
2311 }
2312
2313 if (*end == '\n' || *end == '\r') {
2314 /* newline before label will be subtracted from returned text, but
2315 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2316 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2317 newline = 2; /* Windows newline */
2318 } else {
2319 newline = 1;
2320 }
2321
2322 CG(increment_lineno) = 1; /* For newline before label */
2323 BEGIN(ST_END_HEREDOC);
2324
2325 goto heredoc_scan_done;
2326 }
2327 }
2328 continue;
2329 case '$':
2330 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2331 break;
2332 }
2333 continue;
2334 case '{':
2335 if (*YYCURSOR == '$') {
2336 break;
2337 }
2338 continue;
2339 case '\\':
2340 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2341 YYCURSOR++;
2342 }
2343 /* fall through */
2344 default:
2345 continue;
2346 }
2347
2348 YYCURSOR--;
2349 break;
2350 }
2351
2352 heredoc_scan_done:
2353 yyleng = YYCURSOR - SCNG(yy_text);
2354
2355 zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2356 return T_ENCAPSED_AND_WHITESPACE;
2357 }
2358
2359
2360 <ST_NOWDOC>{ANY_CHAR} {
2361 int newline = 0;
2362
2363 if (YYCURSOR > YYLIMIT) {
2364 return 0;
2365 }
2366
2367 YYCURSOR--;
2368
2369 while (YYCURSOR < YYLIMIT) {
2370 switch (*YYCURSOR++) {
2371 case '\r':
2372 if (*YYCURSOR == '\n') {
2373 YYCURSOR++;
2374 }
2375 /* fall through */
2376 case '\n':
2377 /* Check for ending label on the next line */
2378 if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2379 YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2380
2381 if (*end == ';') {
2382 end++;
2383 }
2384
2385 if (*end == '\n' || *end == '\r') {
2386 /* newline before label will be subtracted from returned text, but
2387 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2388 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2389 newline = 2; /* Windows newline */
2390 } else {
2391 newline = 1;
2392 }
2393
2394 CG(increment_lineno) = 1; /* For newline before label */
2395 BEGIN(ST_END_HEREDOC);
2396
2397 goto nowdoc_scan_done;
2398 }
2399 }
2400 /* fall through */
2401 default:
2402 continue;
2403 }
2404 }
2405
2406 nowdoc_scan_done:
2407 yyleng = YYCURSOR - SCNG(yy_text);
2408
2409 zend_copy_value(zendlval, yytext, yyleng - newline);
2410 zendlval->type = IS_STRING;
2411 HANDLE_NEWLINES(yytext, yyleng - newline);
2412 return T_ENCAPSED_AND_WHITESPACE;
2413 }
2414
2415
2416 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2417 if (YYCURSOR > YYLIMIT) {
2418 return 0;
2419 }
2420
2421 zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2422 goto restart;
2423 }
2424
2425 */
2426 }
2427