1<?php declare(strict_types=1);
2
3namespace PhpParser;
4
5/*
6 * This parser is based on a skeleton written by Moriyoshi Koizumi, which in
7 * turn is based on work by Masato Bito.
8 */
9
10use PhpParser\Node\Expr;
11use PhpParser\Node\Expr\Array_;
12use PhpParser\Node\Expr\Cast\Double;
13use PhpParser\Node\Identifier;
14use PhpParser\Node\InterpolatedStringPart;
15use PhpParser\Node\Name;
16use PhpParser\Node\Param;
17use PhpParser\Node\Scalar\InterpolatedString;
18use PhpParser\Node\Scalar\Int_;
19use PhpParser\Node\Scalar\String_;
20use PhpParser\Node\Stmt;
21use PhpParser\Node\Stmt\Class_;
22use PhpParser\Node\Stmt\ClassConst;
23use PhpParser\Node\Stmt\ClassMethod;
24use PhpParser\Node\Stmt\Else_;
25use PhpParser\Node\Stmt\ElseIf_;
26use PhpParser\Node\Stmt\Enum_;
27use PhpParser\Node\Stmt\Interface_;
28use PhpParser\Node\Stmt\Namespace_;
29use PhpParser\Node\Stmt\Nop;
30use PhpParser\Node\Stmt\Property;
31use PhpParser\Node\Stmt\TryCatch;
32use PhpParser\Node\UseItem;
33use PhpParser\NodeVisitor\CommentAnnotatingVisitor;
34
35abstract class ParserAbstract implements Parser {
36    private const SYMBOL_NONE = -1;
37
38    /** @var Lexer Lexer that is used when parsing */
39    protected Lexer $lexer;
40    /** @var PhpVersion PHP version to target on a best-effort basis */
41    protected PhpVersion $phpVersion;
42
43    /*
44     * The following members will be filled with generated parsing data:
45     */
46
47    /** @var int Size of $tokenToSymbol map */
48    protected int $tokenToSymbolMapSize;
49    /** @var int Size of $action table */
50    protected int $actionTableSize;
51    /** @var int Size of $goto table */
52    protected int $gotoTableSize;
53
54    /** @var int Symbol number signifying an invalid token */
55    protected int $invalidSymbol;
56    /** @var int Symbol number of error recovery token */
57    protected int $errorSymbol;
58    /** @var int Action number signifying default action */
59    protected int $defaultAction;
60    /** @var int Rule number signifying that an unexpected token was encountered */
61    protected int $unexpectedTokenRule;
62
63    protected int $YY2TBLSTATE;
64    /** @var int Number of non-leaf states */
65    protected int $numNonLeafStates;
66
67    /** @var int[] Map of PHP token IDs to internal symbols */
68    protected array $phpTokenToSymbol;
69    /** @var array<int, bool> Map of PHP token IDs to drop */
70    protected array $dropTokens;
71    /** @var int[] Map of external symbols (static::T_*) to internal symbols */
72    protected array $tokenToSymbol;
73    /** @var string[] Map of symbols to their names */
74    protected array $symbolToName;
75    /** @var array<int, string> Names of the production rules (only necessary for debugging) */
76    protected array $productions;
77
78    /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this
79     *             state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the
80     *             action is defaulted, i.e. $actionDefault[$state] should be used instead. */
81    protected array $actionBase;
82    /** @var int[] Table of actions. Indexed according to $actionBase comment. */
83    protected array $action;
84    /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol
85     *             then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */
86    protected array $actionCheck;
87    /** @var int[] Map of states to their default action */
88    protected array $actionDefault;
89    /** @var callable[] Semantic action callbacks */
90    protected array $reduceCallbacks;
91
92    /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this
93     *             non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */
94    protected array $gotoBase;
95    /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */
96    protected array $goto;
97    /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal
98     *             then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */
99    protected array $gotoCheck;
100    /** @var int[] Map of non-terminals to the default state to goto after their reduction */
101    protected array $gotoDefault;
102
103    /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for
104     *             determining the state to goto after reduction. */
105    protected array $ruleToNonTerminal;
106    /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to
107     *             be popped from the stack(s) on reduction. */
108    protected array $ruleToLength;
109
110    /*
111     * The following members are part of the parser state:
112     */
113
114    /** @var mixed Temporary value containing the result of last semantic action (reduction) */
115    protected $semValue;
116    /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */
117    protected array $semStack;
118    /** @var int[] Token start position stack */
119    protected array $tokenStartStack;
120    /** @var int[] Token end position stack */
121    protected array $tokenEndStack;
122
123    /** @var ErrorHandler Error handler */
124    protected ErrorHandler $errorHandler;
125    /** @var int Error state, used to avoid error floods */
126    protected int $errorState;
127
128    /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */
129    protected ?\SplObjectStorage $createdArrays;
130
131    /** @var Token[] Tokens for the current parse */
132    protected array $tokens;
133    /** @var int Current position in token array */
134    protected int $tokenPos;
135
136    /**
137     * Initialize $reduceCallbacks map.
138     */
139    abstract protected function initReduceCallbacks(): void;
140
141    /**
142     * Creates a parser instance.
143     *
144     * Options:
145     *  * phpVersion: ?PhpVersion,
146     *
147     * @param Lexer $lexer A lexer
148     * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This
149     *                               option is best-effort: Even if specified, parsing will generally assume the latest
150     *                               supported version and only adjust behavior in minor ways, for example by omitting
151     *                               errors in older versions and interpreting type hints as a name or identifier depending
152     *                               on version.
153     */
154    public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) {
155        $this->lexer = $lexer;
156        $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported();
157
158        $this->initReduceCallbacks();
159        $this->phpTokenToSymbol = $this->createTokenMap();
160        $this->dropTokens = array_fill_keys(
161            [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true
162        );
163    }
164
165    /**
166     * Parses PHP code into a node tree.
167     *
168     * If a non-throwing error handler is used, the parser will continue parsing after an error
169     * occurred and attempt to build a partial AST.
170     *
171     * @param string $code The source code to parse
172     * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults
173     *                                        to ErrorHandler\Throwing.
174     *
175     * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and
176     *                          the parser was unable to recover from an error).
177     */
178    public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array {
179        $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing();
180        $this->createdArrays = new \SplObjectStorage();
181
182        $this->tokens = $this->lexer->tokenize($code, $this->errorHandler);
183        $result = $this->doParse();
184
185        // Report errors for any empty elements used inside arrays. This is delayed until after the main parse,
186        // because we don't know a priori whether a given array expression will be used in a destructuring context
187        // or not.
188        foreach ($this->createdArrays as $node) {
189            foreach ($node->items as $item) {
190                if ($item->value instanceof Expr\Error) {
191                    $this->errorHandler->handleError(
192                        new Error('Cannot use empty array elements in arrays', $item->getAttributes()));
193                }
194            }
195        }
196
197        // Clear out some of the interior state, so we don't hold onto unnecessary
198        // memory between uses of the parser
199        $this->tokenStartStack = [];
200        $this->tokenEndStack = [];
201        $this->semStack = [];
202        $this->semValue = null;
203        $this->createdArrays = null;
204
205        if ($result !== null) {
206            $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens));
207            $traverser->traverse($result);
208        }
209
210        return $result;
211    }
212
213    public function getTokens(): array {
214        return $this->tokens;
215    }
216
217    /** @return Stmt[]|null */
218    protected function doParse(): ?array {
219        // We start off with no lookahead-token
220        $symbol = self::SYMBOL_NONE;
221        $tokenValue = null;
222        $this->tokenPos = -1;
223
224        // Keep stack of start and end attributes
225        $this->tokenStartStack = [];
226        $this->tokenEndStack = [0];
227
228        // Start off in the initial state and keep a stack of previous states
229        $state = 0;
230        $stateStack = [$state];
231
232        // Semantic value stack (contains values of tokens and semantic action results)
233        $this->semStack = [];
234
235        // Current position in the stack(s)
236        $stackPos = 0;
237
238        $this->errorState = 0;
239
240        for (;;) {
241            //$this->traceNewState($state, $symbol);
242
243            if ($this->actionBase[$state] === 0) {
244                $rule = $this->actionDefault[$state];
245            } else {
246                if ($symbol === self::SYMBOL_NONE) {
247                    do {
248                        $token = $this->tokens[++$this->tokenPos];
249                        $tokenId = $token->id;
250                    } while (isset($this->dropTokens[$tokenId]));
251
252                    // Map the lexer token id to the internally used symbols.
253                    $tokenValue = $token->text;
254                    if (!isset($this->phpTokenToSymbol[$tokenId])) {
255                        throw new \RangeException(sprintf(
256                            'The lexer returned an invalid token (id=%d, value=%s)',
257                            $tokenId, $tokenValue
258                        ));
259                    }
260                    $symbol = $this->phpTokenToSymbol[$tokenId];
261
262                    //$this->traceRead($symbol);
263                }
264
265                $idx = $this->actionBase[$state] + $symbol;
266                if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)
267                     || ($state < $this->YY2TBLSTATE
268                         && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
269                         && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol))
270                    && ($action = $this->action[$idx]) !== $this->defaultAction) {
271                    /*
272                     * >= numNonLeafStates: shift and reduce
273                     * > 0: shift
274                     * = 0: accept
275                     * < 0: reduce
276                     * = -YYUNEXPECTED: error
277                     */
278                    if ($action > 0) {
279                        /* shift */
280                        //$this->traceShift($symbol);
281
282                        ++$stackPos;
283                        $stateStack[$stackPos] = $state = $action;
284                        $this->semStack[$stackPos] = $tokenValue;
285                        $this->tokenStartStack[$stackPos] = $this->tokenPos;
286                        $this->tokenEndStack[$stackPos] = $this->tokenPos;
287                        $symbol = self::SYMBOL_NONE;
288
289                        if ($this->errorState) {
290                            --$this->errorState;
291                        }
292
293                        if ($action < $this->numNonLeafStates) {
294                            continue;
295                        }
296
297                        /* $yyn >= numNonLeafStates means shift-and-reduce */
298                        $rule = $action - $this->numNonLeafStates;
299                    } else {
300                        $rule = -$action;
301                    }
302                } else {
303                    $rule = $this->actionDefault[$state];
304                }
305            }
306
307            for (;;) {
308                if ($rule === 0) {
309                    /* accept */
310                    //$this->traceAccept();
311                    return $this->semValue;
312                }
313                if ($rule !== $this->unexpectedTokenRule) {
314                    /* reduce */
315                    //$this->traceReduce($rule);
316
317                    $ruleLength = $this->ruleToLength[$rule];
318                    try {
319                        $callback = $this->reduceCallbacks[$rule];
320                        if ($callback !== null) {
321                            $callback($this, $stackPos);
322                        } elseif ($ruleLength > 0) {
323                            $this->semValue = $this->semStack[$stackPos - $ruleLength + 1];
324                        }
325                    } catch (Error $e) {
326                        if (-1 === $e->getStartLine()) {
327                            $e->setStartLine($this->tokens[$this->tokenPos]->line);
328                        }
329
330                        $this->emitError($e);
331                        // Can't recover from this type of error
332                        return null;
333                    }
334
335                    /* Goto - shift nonterminal */
336                    $lastTokenEnd = $this->tokenEndStack[$stackPos];
337                    $stackPos -= $ruleLength;
338                    $nonTerminal = $this->ruleToNonTerminal[$rule];
339                    $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos];
340                    if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) {
341                        $state = $this->goto[$idx];
342                    } else {
343                        $state = $this->gotoDefault[$nonTerminal];
344                    }
345
346                    ++$stackPos;
347                    $stateStack[$stackPos]     = $state;
348                    $this->semStack[$stackPos] = $this->semValue;
349                    $this->tokenEndStack[$stackPos] = $lastTokenEnd;
350                    if ($ruleLength === 0) {
351                        // Empty productions use the start attributes of the lookahead token.
352                        $this->tokenStartStack[$stackPos] = $this->tokenPos;
353                    }
354                } else {
355                    /* error */
356                    switch ($this->errorState) {
357                        case 0:
358                            $msg = $this->getErrorMessage($symbol, $state);
359                            $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos)));
360                            // Break missing intentionally
361                            // no break
362                        case 1:
363                        case 2:
364                            $this->errorState = 3;
365
366                            // Pop until error-expecting state uncovered
367                            while (!(
368                                (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0
369                                    && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
370                                || ($state < $this->YY2TBLSTATE
371                                    && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0
372                                    && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
373                            ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this
374                                if ($stackPos <= 0) {
375                                    // Could not recover from error
376                                    return null;
377                                }
378                                $state = $stateStack[--$stackPos];
379                                //$this->tracePop($state);
380                            }
381
382                            //$this->traceShift($this->errorSymbol);
383                            ++$stackPos;
384                            $stateStack[$stackPos] = $state = $action;
385
386                            // We treat the error symbol as being empty, so we reset the end attributes
387                            // to the end attributes of the last non-error symbol
388                            $this->tokenStartStack[$stackPos] = $this->tokenPos;
389                            $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1];
390                            break;
391
392                        case 3:
393                            if ($symbol === 0) {
394                                // Reached EOF without recovering from error
395                                return null;
396                            }
397
398                            //$this->traceDiscard($symbol);
399                            $symbol = self::SYMBOL_NONE;
400                            break 2;
401                    }
402                }
403
404                if ($state < $this->numNonLeafStates) {
405                    break;
406                }
407
408                /* >= numNonLeafStates means shift-and-reduce */
409                $rule = $state - $this->numNonLeafStates;
410            }
411        }
412
413        throw new \RuntimeException('Reached end of parser loop');
414    }
415
416    protected function emitError(Error $error): void {
417        $this->errorHandler->handleError($error);
418    }
419
420    /**
421     * Format error message including expected tokens.
422     *
423     * @param int $symbol Unexpected symbol
424     * @param int $state State at time of error
425     *
426     * @return string Formatted error message
427     */
428    protected function getErrorMessage(int $symbol, int $state): string {
429        $expectedString = '';
430        if ($expected = $this->getExpectedTokens($state)) {
431            $expectedString = ', expecting ' . implode(' or ', $expected);
432        }
433
434        return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString;
435    }
436
437    /**
438     * Get limited number of expected tokens in given state.
439     *
440     * @param int $state State
441     *
442     * @return string[] Expected tokens. If too many, an empty array is returned.
443     */
444    protected function getExpectedTokens(int $state): array {
445        $expected = [];
446
447        $base = $this->actionBase[$state];
448        foreach ($this->symbolToName as $symbol => $name) {
449            $idx = $base + $symbol;
450            if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
451                || $state < $this->YY2TBLSTATE
452                && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
453                && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
454            ) {
455                if ($this->action[$idx] !== $this->unexpectedTokenRule
456                    && $this->action[$idx] !== $this->defaultAction
457                    && $symbol !== $this->errorSymbol
458                ) {
459                    if (count($expected) === 4) {
460                        /* Too many expected tokens */
461                        return [];
462                    }
463
464                    $expected[] = $name;
465                }
466            }
467        }
468
469        return $expected;
470    }
471
472    /**
473     * Get attributes for a node with the given start and end token positions.
474     *
475     * @param int $tokenStartPos Token position the node starts at
476     * @param int $tokenEndPos Token position the node ends at
477     * @return array<string, mixed> Attributes
478     */
479    protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array {
480        $startToken = $this->tokens[$tokenStartPos];
481        $afterEndToken = $this->tokens[$tokenEndPos + 1];
482        return [
483            'startLine' => $startToken->line,
484            'startTokenPos' => $tokenStartPos,
485            'startFilePos' => $startToken->pos,
486            'endLine' => $afterEndToken->line,
487            'endTokenPos' => $tokenEndPos,
488            'endFilePos' => $afterEndToken->pos - 1,
489        ];
490    }
491
492    /**
493     * Get attributes for a single token at the given token position.
494     *
495     * @return array<string, mixed> Attributes
496     */
497    protected function getAttributesForToken(int $tokenPos): array {
498        if ($tokenPos < \count($this->tokens) - 1) {
499            return $this->getAttributes($tokenPos, $tokenPos);
500        }
501
502        // Get attributes for the sentinel token.
503        $token = $this->tokens[$tokenPos];
504        return [
505            'startLine' => $token->line,
506            'startTokenPos' => $tokenPos,
507            'startFilePos' => $token->pos,
508            'endLine' => $token->line,
509            'endTokenPos' => $tokenPos,
510            'endFilePos' => $token->pos,
511        ];
512    }
513
514    /*
515     * Tracing functions used for debugging the parser.
516     */
517
518    /*
519    protected function traceNewState($state, $symbol): void {
520        echo '% State ' . $state
521            . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n";
522    }
523
524    protected function traceRead($symbol): void {
525        echo '% Reading ' . $this->symbolToName[$symbol] . "\n";
526    }
527
528    protected function traceShift($symbol): void {
529        echo '% Shift ' . $this->symbolToName[$symbol] . "\n";
530    }
531
532    protected function traceAccept(): void {
533        echo "% Accepted.\n";
534    }
535
536    protected function traceReduce($n): void {
537        echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n";
538    }
539
540    protected function tracePop($state): void {
541        echo '% Recovering, uncovered state ' . $state . "\n";
542    }
543
544    protected function traceDiscard($symbol): void {
545        echo '% Discard ' . $this->symbolToName[$symbol] . "\n";
546    }
547    */
548
549    /*
550     * Helper functions invoked by semantic actions
551     */
552
553    /**
554     * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions.
555     *
556     * @param Node\Stmt[] $stmts
557     * @return Node\Stmt[]
558     */
559    protected function handleNamespaces(array $stmts): array {
560        $hasErrored = false;
561        $style = $this->getNamespacingStyle($stmts);
562        if (null === $style) {
563            // not namespaced, nothing to do
564            return $stmts;
565        }
566        if ('brace' === $style) {
567            // For braced namespaces we only have to check that there are no invalid statements between the namespaces
568            $afterFirstNamespace = false;
569            foreach ($stmts as $stmt) {
570                if ($stmt instanceof Node\Stmt\Namespace_) {
571                    $afterFirstNamespace = true;
572                } elseif (!$stmt instanceof Node\Stmt\HaltCompiler
573                        && !$stmt instanceof Node\Stmt\Nop
574                        && $afterFirstNamespace && !$hasErrored) {
575                    $this->emitError(new Error(
576                        'No code may exist outside of namespace {}', $stmt->getAttributes()));
577                    $hasErrored = true; // Avoid one error for every statement
578                }
579            }
580            return $stmts;
581        } else {
582            // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts
583            $resultStmts = [];
584            $targetStmts = &$resultStmts;
585            $lastNs = null;
586            foreach ($stmts as $stmt) {
587                if ($stmt instanceof Node\Stmt\Namespace_) {
588                    if ($lastNs !== null) {
589                        $this->fixupNamespaceAttributes($lastNs);
590                    }
591                    if ($stmt->stmts === null) {
592                        $stmt->stmts = [];
593                        $targetStmts = &$stmt->stmts;
594                        $resultStmts[] = $stmt;
595                    } else {
596                        // This handles the invalid case of mixed style namespaces
597                        $resultStmts[] = $stmt;
598                        $targetStmts = &$resultStmts;
599                    }
600                    $lastNs = $stmt;
601                } elseif ($stmt instanceof Node\Stmt\HaltCompiler) {
602                    // __halt_compiler() is not moved into the namespace
603                    $resultStmts[] = $stmt;
604                } else {
605                    $targetStmts[] = $stmt;
606                }
607            }
608            if ($lastNs !== null) {
609                $this->fixupNamespaceAttributes($lastNs);
610            }
611            return $resultStmts;
612        }
613    }
614
615    private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void {
616        // We moved the statements into the namespace node, as such the end of the namespace node
617        // needs to be extended to the end of the statements.
618        if (empty($stmt->stmts)) {
619            return;
620        }
621
622        // We only move the builtin end attributes here. This is the best we can do with the
623        // knowledge we have.
624        $endAttributes = ['endLine', 'endFilePos', 'endTokenPos'];
625        $lastStmt = $stmt->stmts[count($stmt->stmts) - 1];
626        foreach ($endAttributes as $endAttribute) {
627            if ($lastStmt->hasAttribute($endAttribute)) {
628                $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute));
629            }
630        }
631    }
632
633    /** @return array<string, mixed> */
634    private function getNamespaceErrorAttributes(Namespace_ $node): array {
635        $attrs = $node->getAttributes();
636        // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace.
637        if (isset($attrs['startLine'])) {
638            $attrs['endLine'] = $attrs['startLine'];
639        }
640        if (isset($attrs['startTokenPos'])) {
641            $attrs['endTokenPos'] = $attrs['startTokenPos'];
642        }
643        if (isset($attrs['startFilePos'])) {
644            $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1;
645        }
646        return $attrs;
647    }
648
649    /**
650     * Determine namespacing style (semicolon or brace)
651     *
652     * @param Node[] $stmts Top-level statements.
653     *
654     * @return null|string One of "semicolon", "brace" or null (no namespaces)
655     */
656    private function getNamespacingStyle(array $stmts): ?string {
657        $style = null;
658        $hasNotAllowedStmts = false;
659        foreach ($stmts as $i => $stmt) {
660            if ($stmt instanceof Node\Stmt\Namespace_) {
661                $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace';
662                if (null === $style) {
663                    $style = $currentStyle;
664                    if ($hasNotAllowedStmts) {
665                        $this->emitError(new Error(
666                            'Namespace declaration statement has to be the very first statement in the script',
667                            $this->getNamespaceErrorAttributes($stmt)
668                        ));
669                    }
670                } elseif ($style !== $currentStyle) {
671                    $this->emitError(new Error(
672                        'Cannot mix bracketed namespace declarations with unbracketed namespace declarations',
673                        $this->getNamespaceErrorAttributes($stmt)
674                    ));
675                    // Treat like semicolon style for namespace normalization
676                    return 'semicolon';
677                }
678                continue;
679            }
680
681            /* declare(), __halt_compiler() and nops can be used before a namespace declaration */
682            if ($stmt instanceof Node\Stmt\Declare_
683                || $stmt instanceof Node\Stmt\HaltCompiler
684                || $stmt instanceof Node\Stmt\Nop) {
685                continue;
686            }
687
688            /* There may be a hashbang line at the very start of the file */
689            if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) {
690                continue;
691            }
692
693            /* Everything else if forbidden before namespace declarations */
694            $hasNotAllowedStmts = true;
695        }
696        return $style;
697    }
698
699    /** @return Name|Identifier */
700    protected function handleBuiltinTypes(Name $name) {
701        if (!$name->isUnqualified()) {
702            return $name;
703        }
704
705        $lowerName = $name->toLowerString();
706        if (!$this->phpVersion->supportsBuiltinType($lowerName)) {
707            return $name;
708        }
709
710        return new Node\Identifier($lowerName, $name->getAttributes());
711    }
712
713    /**
714     * Get combined start and end attributes at a stack location
715     *
716     * @param int $stackPos Stack location
717     *
718     * @return array<string, mixed> Combined start and end attributes
719     */
720    protected function getAttributesAt(int $stackPos): array {
721        return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]);
722    }
723
724    protected function getFloatCastKind(string $cast): int {
725        $cast = strtolower($cast);
726        if (strpos($cast, 'float') !== false) {
727            return Double::KIND_FLOAT;
728        }
729
730        if (strpos($cast, 'real') !== false) {
731            return Double::KIND_REAL;
732        }
733
734        return Double::KIND_DOUBLE;
735    }
736
737    /** @param array<string, mixed> $attributes */
738    protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ {
739        try {
740            return Int_::fromString($str, $attributes, $allowInvalidOctal);
741        } catch (Error $error) {
742            $this->emitError($error);
743            // Use dummy value
744            return new Int_(0, $attributes);
745        }
746    }
747
748    /**
749     * Parse a T_NUM_STRING token into either an integer or string node.
750     *
751     * @param string $str Number string
752     * @param array<string, mixed> $attributes Attributes
753     *
754     * @return Int_|String_ Integer or string node.
755     */
756    protected function parseNumString(string $str, array $attributes) {
757        if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) {
758            return new String_($str, $attributes);
759        }
760
761        $num = +$str;
762        if (!is_int($num)) {
763            return new String_($str, $attributes);
764        }
765
766        return new Int_($num, $attributes);
767    }
768
769    /** @param array<string, mixed> $attributes */
770    protected function stripIndentation(
771        string $string, int $indentLen, string $indentChar,
772        bool $newlineAtStart, bool $newlineAtEnd, array $attributes
773    ): string {
774        if ($indentLen === 0) {
775            return $string;
776        }
777
778        $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)';
779        $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])';
780        $regex = '/' . $start . '([ \t]*)(' . $end . ')?/';
781        return preg_replace_callback(
782            $regex,
783            function ($matches) use ($indentLen, $indentChar, $attributes) {
784                $prefix = substr($matches[1], 0, $indentLen);
785                if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) {
786                    $this->emitError(new Error(
787                        'Invalid indentation - tabs and spaces cannot be mixed', $attributes
788                    ));
789                } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) {
790                    $this->emitError(new Error(
791                        'Invalid body indentation level ' .
792                        '(expecting an indentation level of at least ' . $indentLen . ')',
793                        $attributes
794                    ));
795                }
796                return substr($matches[0], strlen($prefix));
797            },
798            $string
799        );
800    }
801
802    /**
803     * @param string|(Expr|InterpolatedStringPart)[] $contents
804     * @param array<string, mixed> $attributes
805     * @param array<string, mixed> $endTokenAttributes
806     */
807    protected function parseDocString(
808        string $startToken, $contents, string $endToken,
809        array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape
810    ): Expr {
811        $kind = strpos($startToken, "'") === false
812            ? String_::KIND_HEREDOC : String_::KIND_NOWDOC;
813
814        $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/';
815        $result = preg_match($regex, $startToken, $matches);
816        assert($result === 1);
817        $label = $matches[1];
818
819        $result = preg_match('/\A[ \t]*/', $endToken, $matches);
820        assert($result === 1);
821        $indentation = $matches[0];
822
823        $attributes['kind'] = $kind;
824        $attributes['docLabel'] = $label;
825        $attributes['docIndentation'] = $indentation;
826
827        $indentHasSpaces = false !== strpos($indentation, " ");
828        $indentHasTabs = false !== strpos($indentation, "\t");
829        if ($indentHasSpaces && $indentHasTabs) {
830            $this->emitError(new Error(
831                'Invalid indentation - tabs and spaces cannot be mixed',
832                $endTokenAttributes
833            ));
834
835            // Proceed processing as if this doc string is not indented
836            $indentation = '';
837        }
838
839        $indentLen = \strlen($indentation);
840        $indentChar = $indentHasSpaces ? " " : "\t";
841
842        if (\is_string($contents)) {
843            if ($contents === '') {
844                $attributes['rawValue'] = $contents;
845                return new String_('', $attributes);
846            }
847
848            $contents = $this->stripIndentation(
849                $contents, $indentLen, $indentChar, true, true, $attributes
850            );
851            $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);
852            $attributes['rawValue'] = $contents;
853
854            if ($kind === String_::KIND_HEREDOC) {
855                $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape);
856            }
857
858            return new String_($contents, $attributes);
859        } else {
860            assert(count($contents) > 0);
861            if (!$contents[0] instanceof Node\InterpolatedStringPart) {
862                // If there is no leading encapsed string part, pretend there is an empty one
863                $this->stripIndentation(
864                    '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes()
865                );
866            }
867
868            $newContents = [];
869            foreach ($contents as $i => $part) {
870                if ($part instanceof Node\InterpolatedStringPart) {
871                    $isLast = $i === \count($contents) - 1;
872                    $part->value = $this->stripIndentation(
873                        $part->value, $indentLen, $indentChar,
874                        $i === 0, $isLast, $part->getAttributes()
875                    );
876                    if ($isLast) {
877                        $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value);
878                    }
879                    $part->setAttribute('rawValue', $part->value);
880                    $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape);
881                    if ('' === $part->value) {
882                        continue;
883                    }
884                }
885                $newContents[] = $part;
886            }
887            return new InterpolatedString($newContents, $attributes);
888        }
889    }
890
891    protected function createCommentFromToken(Token $token, int $tokenPos): Comment {
892        assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT);
893        return \T_DOC_COMMENT === $token->id
894            ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos,
895                $token->getEndLine(), $token->getEndPos() - 1, $tokenPos)
896            : new Comment($token->text, $token->line, $token->pos, $tokenPos,
897                $token->getEndLine(), $token->getEndPos() - 1, $tokenPos);
898    }
899
900    /**
901     * Get last comment before the given token position, if any
902     */
903    protected function getCommentBeforeToken(int $tokenPos): ?Comment {
904        while (--$tokenPos >= 0) {
905            $token = $this->tokens[$tokenPos];
906            if (!isset($this->dropTokens[$token->id])) {
907                break;
908            }
909
910            if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) {
911                return $this->createCommentFromToken($token, $tokenPos);
912            }
913        }
914        return null;
915    }
916
917    /**
918     * Create a zero-length nop to capture preceding comments, if any.
919     */
920    protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop {
921        $comment = $this->getCommentBeforeToken($tokenPos);
922        if ($comment === null) {
923            return null;
924        }
925
926        $commentEndLine = $comment->getEndLine();
927        $commentEndFilePos = $comment->getEndFilePos();
928        $commentEndTokenPos = $comment->getEndTokenPos();
929        $attributes = [
930            'startLine' => $commentEndLine,
931            'endLine' => $commentEndLine,
932            'startFilePos' => $commentEndFilePos + 1,
933            'endFilePos' => $commentEndFilePos,
934            'startTokenPos' => $commentEndTokenPos + 1,
935            'endTokenPos' => $commentEndTokenPos,
936        ];
937        return new Nop($attributes);
938    }
939
940    protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop {
941        if ($this->getCommentBeforeToken($tokenStartPos) === null) {
942            return null;
943        }
944        return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos));
945    }
946
947    protected function handleHaltCompiler(): string {
948        // Prevent the lexer from returning any further tokens.
949        $nextToken = $this->tokens[$this->tokenPos + 1];
950        $this->tokenPos = \count($this->tokens) - 2;
951
952        // Return text after __halt_compiler.
953        return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : '';
954    }
955
956    protected function inlineHtmlHasLeadingNewline(int $stackPos): bool {
957        $tokenPos = $this->tokenStartStack[$stackPos];
958        $token = $this->tokens[$tokenPos];
959        assert($token->id == \T_INLINE_HTML);
960        if ($tokenPos > 0) {
961            $prevToken = $this->tokens[$tokenPos - 1];
962            assert($prevToken->id == \T_CLOSE_TAG);
963            return false !== strpos($prevToken->text, "\n")
964                || false !== strpos($prevToken->text, "\r");
965        }
966        return true;
967    }
968
969    /**
970     * @return array<string, mixed>
971     */
972    protected function createEmptyElemAttributes(int $tokenPos): array {
973        return $this->getAttributesForToken($tokenPos);
974    }
975
976    protected function fixupArrayDestructuring(Array_ $node): Expr\List_ {
977        $this->createdArrays->detach($node);
978        return new Expr\List_(array_map(function (Node\ArrayItem $item) {
979            if ($item->value instanceof Expr\Error) {
980                // We used Error as a placeholder for empty elements, which are legal for destructuring.
981                return null;
982            }
983            if ($item->value instanceof Array_) {
984                return new Node\ArrayItem(
985                    $this->fixupArrayDestructuring($item->value),
986                    $item->key, $item->byRef, $item->getAttributes());
987            }
988            return $item;
989        }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes());
990    }
991
992    protected function postprocessList(Expr\List_ $node): void {
993        foreach ($node->items as $i => $item) {
994            if ($item->value instanceof Expr\Error) {
995                // We used Error as a placeholder for empty elements, which are legal for destructuring.
996                $node->items[$i] = null;
997            }
998        }
999    }
1000
1001    /** @param ElseIf_|Else_ $node */
1002    protected function fixupAlternativeElse($node): void {
1003        // Make sure a trailing nop statement carrying comments is part of the node.
1004        $numStmts = \count($node->stmts);
1005        if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) {
1006            $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes();
1007            if (isset($nopAttrs['endLine'])) {
1008                $node->setAttribute('endLine', $nopAttrs['endLine']);
1009            }
1010            if (isset($nopAttrs['endFilePos'])) {
1011                $node->setAttribute('endFilePos', $nopAttrs['endFilePos']);
1012            }
1013            if (isset($nopAttrs['endTokenPos'])) {
1014                $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']);
1015            }
1016        }
1017    }
1018
1019    protected function checkClassModifier(int $a, int $b, int $modifierPos): void {
1020        try {
1021            Modifiers::verifyClassModifier($a, $b);
1022        } catch (Error $error) {
1023            $error->setAttributes($this->getAttributesAt($modifierPos));
1024            $this->emitError($error);
1025        }
1026    }
1027
1028    protected function checkModifier(int $a, int $b, int $modifierPos): void {
1029        // Jumping through some hoops here because verifyModifier() is also used elsewhere
1030        try {
1031            Modifiers::verifyModifier($a, $b);
1032        } catch (Error $error) {
1033            $error->setAttributes($this->getAttributesAt($modifierPos));
1034            $this->emitError($error);
1035        }
1036    }
1037
1038    protected function checkParam(Param $node): void {
1039        if ($node->variadic && null !== $node->default) {
1040            $this->emitError(new Error(
1041                'Variadic parameter cannot have a default value',
1042                $node->default->getAttributes()
1043            ));
1044        }
1045    }
1046
1047    protected function checkTryCatch(TryCatch $node): void {
1048        if (empty($node->catches) && null === $node->finally) {
1049            $this->emitError(new Error(
1050                'Cannot use try without catch or finally', $node->getAttributes()
1051            ));
1052        }
1053    }
1054
1055    protected function checkNamespace(Namespace_ $node): void {
1056        if (null !== $node->stmts) {
1057            foreach ($node->stmts as $stmt) {
1058                if ($stmt instanceof Namespace_) {
1059                    $this->emitError(new Error(
1060                        'Namespace declarations cannot be nested', $stmt->getAttributes()
1061                    ));
1062                }
1063            }
1064        }
1065    }
1066
1067    private function checkClassName(?Identifier $name, int $namePos): void {
1068        if (null !== $name && $name->isSpecialClassName()) {
1069            $this->emitError(new Error(
1070                sprintf('Cannot use \'%s\' as class name as it is reserved', $name),
1071                $this->getAttributesAt($namePos)
1072            ));
1073        }
1074    }
1075
1076    /** @param Name[] $interfaces */
1077    private function checkImplementedInterfaces(array $interfaces): void {
1078        foreach ($interfaces as $interface) {
1079            if ($interface->isSpecialClassName()) {
1080                $this->emitError(new Error(
1081                    sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface),
1082                    $interface->getAttributes()
1083                ));
1084            }
1085        }
1086    }
1087
1088    protected function checkClass(Class_ $node, int $namePos): void {
1089        $this->checkClassName($node->name, $namePos);
1090
1091        if ($node->extends && $node->extends->isSpecialClassName()) {
1092            $this->emitError(new Error(
1093                sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends),
1094                $node->extends->getAttributes()
1095            ));
1096        }
1097
1098        $this->checkImplementedInterfaces($node->implements);
1099    }
1100
1101    protected function checkInterface(Interface_ $node, int $namePos): void {
1102        $this->checkClassName($node->name, $namePos);
1103        $this->checkImplementedInterfaces($node->extends);
1104    }
1105
1106    protected function checkEnum(Enum_ $node, int $namePos): void {
1107        $this->checkClassName($node->name, $namePos);
1108        $this->checkImplementedInterfaces($node->implements);
1109    }
1110
1111    protected function checkClassMethod(ClassMethod $node, int $modifierPos): void {
1112        if ($node->flags & Modifiers::STATIC) {
1113            switch ($node->name->toLowerString()) {
1114                case '__construct':
1115                    $this->emitError(new Error(
1116                        sprintf('Constructor %s() cannot be static', $node->name),
1117                        $this->getAttributesAt($modifierPos)));
1118                    break;
1119                case '__destruct':
1120                    $this->emitError(new Error(
1121                        sprintf('Destructor %s() cannot be static', $node->name),
1122                        $this->getAttributesAt($modifierPos)));
1123                    break;
1124                case '__clone':
1125                    $this->emitError(new Error(
1126                        sprintf('Clone method %s() cannot be static', $node->name),
1127                        $this->getAttributesAt($modifierPos)));
1128                    break;
1129            }
1130        }
1131
1132        if ($node->flags & Modifiers::READONLY) {
1133            $this->emitError(new Error(
1134                sprintf('Method %s() cannot be readonly', $node->name),
1135                $this->getAttributesAt($modifierPos)));
1136        }
1137    }
1138
1139    protected function checkClassConst(ClassConst $node, int $modifierPos): void {
1140        if ($node->flags & Modifiers::STATIC) {
1141            $this->emitError(new Error(
1142                "Cannot use 'static' as constant modifier",
1143                $this->getAttributesAt($modifierPos)));
1144        }
1145        if ($node->flags & Modifiers::ABSTRACT) {
1146            $this->emitError(new Error(
1147                "Cannot use 'abstract' as constant modifier",
1148                $this->getAttributesAt($modifierPos)));
1149        }
1150        if ($node->flags & Modifiers::READONLY) {
1151            $this->emitError(new Error(
1152                "Cannot use 'readonly' as constant modifier",
1153                $this->getAttributesAt($modifierPos)));
1154        }
1155    }
1156
1157    protected function checkProperty(Property $node, int $modifierPos): void {
1158        if ($node->flags & Modifiers::ABSTRACT) {
1159            $this->emitError(new Error('Properties cannot be declared abstract',
1160                $this->getAttributesAt($modifierPos)));
1161        }
1162
1163        if ($node->flags & Modifiers::FINAL) {
1164            $this->emitError(new Error('Properties cannot be declared final',
1165                $this->getAttributesAt($modifierPos)));
1166        }
1167    }
1168
1169    protected function checkUseUse(UseItem $node, int $namePos): void {
1170        if ($node->alias && $node->alias->isSpecialClassName()) {
1171            $this->emitError(new Error(
1172                sprintf(
1173                    'Cannot use %s as %s because \'%2$s\' is a special class name',
1174                    $node->name, $node->alias
1175                ),
1176                $this->getAttributesAt($namePos)
1177            ));
1178        }
1179    }
1180
1181    /**
1182     * Creates the token map.
1183     *
1184     * The token map maps the PHP internal token identifiers
1185     * to the identifiers used by the Parser. Additionally it
1186     * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
1187     *
1188     * @return array<int, int> The token map
1189     */
1190    protected function createTokenMap(): array {
1191        $tokenMap = [];
1192
1193        for ($i = 0; $i < 1000; ++$i) {
1194            if ($i < 256) {
1195                // Single-char tokens use an identity mapping.
1196                $tokenMap[$i] = $i;
1197            } elseif (\T_DOUBLE_COLON === $i) {
1198                // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
1199                $tokenMap[$i] = static::T_PAAMAYIM_NEKUDOTAYIM;
1200            } elseif (\T_OPEN_TAG_WITH_ECHO === $i) {
1201                // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
1202                $tokenMap[$i] = static::T_ECHO;
1203            } elseif (\T_CLOSE_TAG === $i) {
1204                // T_CLOSE_TAG is equivalent to ';'
1205                $tokenMap[$i] = ord(';');
1206            } elseif ('UNKNOWN' !== $name = token_name($i)) {
1207                if (defined($name = static::class . '::' . $name)) {
1208                    // Other tokens can be mapped directly
1209                    $tokenMap[$i] = constant($name);
1210                }
1211            }
1212        }
1213
1214        // Assign tokens for which we define compatibility constants, as token_name() does not know them.
1215        $tokenMap[\T_FN] = static::T_FN;
1216        $tokenMap[\T_COALESCE_EQUAL] = static::T_COALESCE_EQUAL;
1217        $tokenMap[\T_NAME_QUALIFIED] = static::T_NAME_QUALIFIED;
1218        $tokenMap[\T_NAME_FULLY_QUALIFIED] = static::T_NAME_FULLY_QUALIFIED;
1219        $tokenMap[\T_NAME_RELATIVE] = static::T_NAME_RELATIVE;
1220        $tokenMap[\T_MATCH] = static::T_MATCH;
1221        $tokenMap[\T_NULLSAFE_OBJECT_OPERATOR] = static::T_NULLSAFE_OBJECT_OPERATOR;
1222        $tokenMap[\T_ATTRIBUTE] = static::T_ATTRIBUTE;
1223        $tokenMap[\T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG] = static::T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG;
1224        $tokenMap[\T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG] = static::T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG;
1225        $tokenMap[\T_ENUM] = static::T_ENUM;
1226        $tokenMap[\T_READONLY] = static::T_READONLY;
1227
1228        // We have create a map from PHP token IDs to external symbol IDs.
1229        // Now map them to the internal symbol ID.
1230        $fullTokenMap = [];
1231        foreach ($tokenMap as $phpToken => $extSymbol) {
1232            $intSymbol = $this->tokenToSymbol[$extSymbol];
1233            if ($intSymbol === $this->invalidSymbol) {
1234                continue;
1235            }
1236            $fullTokenMap[$phpToken] = $intSymbol;
1237        }
1238
1239        return $fullTokenMap;
1240    }
1241}
1242