1<?php declare(strict_types=1);
2
3namespace PhpParser;
4
5/*
6 * This parser is based on a skeleton written by Moriyoshi Koizumi, which in
7 * turn is based on work by Masato Bito.
8 */
9
10use PhpParser\Node\Arg;
11use PhpParser\Node\Expr;
12use PhpParser\Node\Expr\Array_;
13use PhpParser\Node\Expr\Cast\Double;
14use PhpParser\Node\Identifier;
15use PhpParser\Node\InterpolatedStringPart;
16use PhpParser\Node\Name;
17use PhpParser\Node\Param;
18use PhpParser\Node\PropertyHook;
19use PhpParser\Node\Scalar\InterpolatedString;
20use PhpParser\Node\Scalar\Int_;
21use PhpParser\Node\Scalar\String_;
22use PhpParser\Node\Stmt;
23use PhpParser\Node\Stmt\Class_;
24use PhpParser\Node\Stmt\ClassConst;
25use PhpParser\Node\Stmt\ClassMethod;
26use PhpParser\Node\Stmt\Else_;
27use PhpParser\Node\Stmt\ElseIf_;
28use PhpParser\Node\Stmt\Enum_;
29use PhpParser\Node\Stmt\Interface_;
30use PhpParser\Node\Stmt\Namespace_;
31use PhpParser\Node\Stmt\Nop;
32use PhpParser\Node\Stmt\Property;
33use PhpParser\Node\Stmt\TryCatch;
34use PhpParser\Node\UseItem;
35use PhpParser\Node\VarLikeIdentifier;
36use PhpParser\NodeVisitor\CommentAnnotatingVisitor;
37
38abstract class ParserAbstract implements Parser {
39    private const SYMBOL_NONE = -1;
40
41    /** @var Lexer Lexer that is used when parsing */
42    protected Lexer $lexer;
43    /** @var PhpVersion PHP version to target on a best-effort basis */
44    protected PhpVersion $phpVersion;
45
46    /*
47     * The following members will be filled with generated parsing data:
48     */
49
50    /** @var int Size of $tokenToSymbol map */
51    protected int $tokenToSymbolMapSize;
52    /** @var int Size of $action table */
53    protected int $actionTableSize;
54    /** @var int Size of $goto table */
55    protected int $gotoTableSize;
56
57    /** @var int Symbol number signifying an invalid token */
58    protected int $invalidSymbol;
59    /** @var int Symbol number of error recovery token */
60    protected int $errorSymbol;
61    /** @var int Action number signifying default action */
62    protected int $defaultAction;
63    /** @var int Rule number signifying that an unexpected token was encountered */
64    protected int $unexpectedTokenRule;
65
66    protected int $YY2TBLSTATE;
67    /** @var int Number of non-leaf states */
68    protected int $numNonLeafStates;
69
70    /** @var int[] Map of PHP token IDs to internal symbols */
71    protected array $phpTokenToSymbol;
72    /** @var array<int, bool> Map of PHP token IDs to drop */
73    protected array $dropTokens;
74    /** @var int[] Map of external symbols (static::T_*) to internal symbols */
75    protected array $tokenToSymbol;
76    /** @var string[] Map of symbols to their names */
77    protected array $symbolToName;
78    /** @var array<int, string> Names of the production rules (only necessary for debugging) */
79    protected array $productions;
80
81    /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this
82     *             state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the
83     *             action is defaulted, i.e. $actionDefault[$state] should be used instead. */
84    protected array $actionBase;
85    /** @var int[] Table of actions. Indexed according to $actionBase comment. */
86    protected array $action;
87    /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol
88     *             then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */
89    protected array $actionCheck;
90    /** @var int[] Map of states to their default action */
91    protected array $actionDefault;
92    /** @var callable[] Semantic action callbacks */
93    protected array $reduceCallbacks;
94
95    /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this
96     *             non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */
97    protected array $gotoBase;
98    /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */
99    protected array $goto;
100    /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal
101     *             then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */
102    protected array $gotoCheck;
103    /** @var int[] Map of non-terminals to the default state to goto after their reduction */
104    protected array $gotoDefault;
105
106    /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for
107     *             determining the state to goto after reduction. */
108    protected array $ruleToNonTerminal;
109    /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to
110     *             be popped from the stack(s) on reduction. */
111    protected array $ruleToLength;
112
113    /*
114     * The following members are part of the parser state:
115     */
116
117    /** @var mixed Temporary value containing the result of last semantic action (reduction) */
118    protected $semValue;
119    /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */
120    protected array $semStack;
121    /** @var int[] Token start position stack */
122    protected array $tokenStartStack;
123    /** @var int[] Token end position stack */
124    protected array $tokenEndStack;
125
126    /** @var ErrorHandler Error handler */
127    protected ErrorHandler $errorHandler;
128    /** @var int Error state, used to avoid error floods */
129    protected int $errorState;
130
131    /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */
132    protected ?\SplObjectStorage $createdArrays;
133
134    /** @var Token[] Tokens for the current parse */
135    protected array $tokens;
136    /** @var int Current position in token array */
137    protected int $tokenPos;
138
139    /**
140     * Initialize $reduceCallbacks map.
141     */
142    abstract protected function initReduceCallbacks(): void;
143
144    /**
145     * Creates a parser instance.
146     *
147     * Options:
148     *  * phpVersion: ?PhpVersion,
149     *
150     * @param Lexer $lexer A lexer
151     * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This
152     *                               option is best-effort: Even if specified, parsing will generally assume the latest
153     *                               supported version and only adjust behavior in minor ways, for example by omitting
154     *                               errors in older versions and interpreting type hints as a name or identifier depending
155     *                               on version.
156     */
157    public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) {
158        $this->lexer = $lexer;
159        $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported();
160
161        $this->initReduceCallbacks();
162        $this->phpTokenToSymbol = $this->createTokenMap();
163        $this->dropTokens = array_fill_keys(
164            [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true
165        );
166    }
167
168    /**
169     * Parses PHP code into a node tree.
170     *
171     * If a non-throwing error handler is used, the parser will continue parsing after an error
172     * occurred and attempt to build a partial AST.
173     *
174     * @param string $code The source code to parse
175     * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults
176     *                                        to ErrorHandler\Throwing.
177     *
178     * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and
179     *                          the parser was unable to recover from an error).
180     */
181    public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array {
182        $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing();
183        $this->createdArrays = new \SplObjectStorage();
184
185        $this->tokens = $this->lexer->tokenize($code, $this->errorHandler);
186        $result = $this->doParse();
187
188        // Report errors for any empty elements used inside arrays. This is delayed until after the main parse,
189        // because we don't know a priori whether a given array expression will be used in a destructuring context
190        // or not.
191        foreach ($this->createdArrays as $node) {
192            foreach ($node->items as $item) {
193                if ($item->value instanceof Expr\Error) {
194                    $this->errorHandler->handleError(
195                        new Error('Cannot use empty array elements in arrays', $item->getAttributes()));
196                }
197            }
198        }
199
200        // Clear out some of the interior state, so we don't hold onto unnecessary
201        // memory between uses of the parser
202        $this->tokenStartStack = [];
203        $this->tokenEndStack = [];
204        $this->semStack = [];
205        $this->semValue = null;
206        $this->createdArrays = null;
207
208        if ($result !== null) {
209            $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens));
210            $traverser->traverse($result);
211        }
212
213        return $result;
214    }
215
216    public function getTokens(): array {
217        return $this->tokens;
218    }
219
220    /** @return Stmt[]|null */
221    protected function doParse(): ?array {
222        // We start off with no lookahead-token
223        $symbol = self::SYMBOL_NONE;
224        $tokenValue = null;
225        $this->tokenPos = -1;
226
227        // Keep stack of start and end attributes
228        $this->tokenStartStack = [];
229        $this->tokenEndStack = [0];
230
231        // Start off in the initial state and keep a stack of previous states
232        $state = 0;
233        $stateStack = [$state];
234
235        // Semantic value stack (contains values of tokens and semantic action results)
236        $this->semStack = [];
237
238        // Current position in the stack(s)
239        $stackPos = 0;
240
241        $this->errorState = 0;
242
243        for (;;) {
244            //$this->traceNewState($state, $symbol);
245
246            if ($this->actionBase[$state] === 0) {
247                $rule = $this->actionDefault[$state];
248            } else {
249                if ($symbol === self::SYMBOL_NONE) {
250                    do {
251                        $token = $this->tokens[++$this->tokenPos];
252                        $tokenId = $token->id;
253                    } while (isset($this->dropTokens[$tokenId]));
254
255                    // Map the lexer token id to the internally used symbols.
256                    $tokenValue = $token->text;
257                    if (!isset($this->phpTokenToSymbol[$tokenId])) {
258                        throw new \RangeException(sprintf(
259                            'The lexer returned an invalid token (id=%d, value=%s)',
260                            $tokenId, $tokenValue
261                        ));
262                    }
263                    $symbol = $this->phpTokenToSymbol[$tokenId];
264
265                    //$this->traceRead($symbol);
266                }
267
268                $idx = $this->actionBase[$state] + $symbol;
269                if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)
270                     || ($state < $this->YY2TBLSTATE
271                         && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
272                         && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol))
273                    && ($action = $this->action[$idx]) !== $this->defaultAction) {
274                    /*
275                     * >= numNonLeafStates: shift and reduce
276                     * > 0: shift
277                     * = 0: accept
278                     * < 0: reduce
279                     * = -YYUNEXPECTED: error
280                     */
281                    if ($action > 0) {
282                        /* shift */
283                        //$this->traceShift($symbol);
284
285                        ++$stackPos;
286                        $stateStack[$stackPos] = $state = $action;
287                        $this->semStack[$stackPos] = $tokenValue;
288                        $this->tokenStartStack[$stackPos] = $this->tokenPos;
289                        $this->tokenEndStack[$stackPos] = $this->tokenPos;
290                        $symbol = self::SYMBOL_NONE;
291
292                        if ($this->errorState) {
293                            --$this->errorState;
294                        }
295
296                        if ($action < $this->numNonLeafStates) {
297                            continue;
298                        }
299
300                        /* $yyn >= numNonLeafStates means shift-and-reduce */
301                        $rule = $action - $this->numNonLeafStates;
302                    } else {
303                        $rule = -$action;
304                    }
305                } else {
306                    $rule = $this->actionDefault[$state];
307                }
308            }
309
310            for (;;) {
311                if ($rule === 0) {
312                    /* accept */
313                    //$this->traceAccept();
314                    return $this->semValue;
315                }
316                if ($rule !== $this->unexpectedTokenRule) {
317                    /* reduce */
318                    //$this->traceReduce($rule);
319
320                    $ruleLength = $this->ruleToLength[$rule];
321                    try {
322                        $callback = $this->reduceCallbacks[$rule];
323                        if ($callback !== null) {
324                            $callback($this, $stackPos);
325                        } elseif ($ruleLength > 0) {
326                            $this->semValue = $this->semStack[$stackPos - $ruleLength + 1];
327                        }
328                    } catch (Error $e) {
329                        if (-1 === $e->getStartLine()) {
330                            $e->setStartLine($this->tokens[$this->tokenPos]->line);
331                        }
332
333                        $this->emitError($e);
334                        // Can't recover from this type of error
335                        return null;
336                    }
337
338                    /* Goto - shift nonterminal */
339                    $lastTokenEnd = $this->tokenEndStack[$stackPos];
340                    $stackPos -= $ruleLength;
341                    $nonTerminal = $this->ruleToNonTerminal[$rule];
342                    $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos];
343                    if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) {
344                        $state = $this->goto[$idx];
345                    } else {
346                        $state = $this->gotoDefault[$nonTerminal];
347                    }
348
349                    ++$stackPos;
350                    $stateStack[$stackPos]     = $state;
351                    $this->semStack[$stackPos] = $this->semValue;
352                    $this->tokenEndStack[$stackPos] = $lastTokenEnd;
353                    if ($ruleLength === 0) {
354                        // Empty productions use the start attributes of the lookahead token.
355                        $this->tokenStartStack[$stackPos] = $this->tokenPos;
356                    }
357                } else {
358                    /* error */
359                    switch ($this->errorState) {
360                        case 0:
361                            $msg = $this->getErrorMessage($symbol, $state);
362                            $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos)));
363                            // Break missing intentionally
364                            // no break
365                        case 1:
366                        case 2:
367                            $this->errorState = 3;
368
369                            // Pop until error-expecting state uncovered
370                            while (!(
371                                (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0
372                                    && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
373                                || ($state < $this->YY2TBLSTATE
374                                    && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0
375                                    && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
376                            ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this
377                                if ($stackPos <= 0) {
378                                    // Could not recover from error
379                                    return null;
380                                }
381                                $state = $stateStack[--$stackPos];
382                                //$this->tracePop($state);
383                            }
384
385                            //$this->traceShift($this->errorSymbol);
386                            ++$stackPos;
387                            $stateStack[$stackPos] = $state = $action;
388
389                            // We treat the error symbol as being empty, so we reset the end attributes
390                            // to the end attributes of the last non-error symbol
391                            $this->tokenStartStack[$stackPos] = $this->tokenPos;
392                            $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1];
393                            break;
394
395                        case 3:
396                            if ($symbol === 0) {
397                                // Reached EOF without recovering from error
398                                return null;
399                            }
400
401                            //$this->traceDiscard($symbol);
402                            $symbol = self::SYMBOL_NONE;
403                            break 2;
404                    }
405                }
406
407                if ($state < $this->numNonLeafStates) {
408                    break;
409                }
410
411                /* >= numNonLeafStates means shift-and-reduce */
412                $rule = $state - $this->numNonLeafStates;
413            }
414        }
415    }
416
417    protected function emitError(Error $error): void {
418        $this->errorHandler->handleError($error);
419    }
420
421    /**
422     * Format error message including expected tokens.
423     *
424     * @param int $symbol Unexpected symbol
425     * @param int $state State at time of error
426     *
427     * @return string Formatted error message
428     */
429    protected function getErrorMessage(int $symbol, int $state): string {
430        $expectedString = '';
431        if ($expected = $this->getExpectedTokens($state)) {
432            $expectedString = ', expecting ' . implode(' or ', $expected);
433        }
434
435        return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString;
436    }
437
438    /**
439     * Get limited number of expected tokens in given state.
440     *
441     * @param int $state State
442     *
443     * @return string[] Expected tokens. If too many, an empty array is returned.
444     */
445    protected function getExpectedTokens(int $state): array {
446        $expected = [];
447
448        $base = $this->actionBase[$state];
449        foreach ($this->symbolToName as $symbol => $name) {
450            $idx = $base + $symbol;
451            if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
452                || $state < $this->YY2TBLSTATE
453                && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
454                && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
455            ) {
456                if ($this->action[$idx] !== $this->unexpectedTokenRule
457                    && $this->action[$idx] !== $this->defaultAction
458                    && $symbol !== $this->errorSymbol
459                ) {
460                    if (count($expected) === 4) {
461                        /* Too many expected tokens */
462                        return [];
463                    }
464
465                    $expected[] = $name;
466                }
467            }
468        }
469
470        return $expected;
471    }
472
473    /**
474     * Get attributes for a node with the given start and end token positions.
475     *
476     * @param int $tokenStartPos Token position the node starts at
477     * @param int $tokenEndPos Token position the node ends at
478     * @return array<string, mixed> Attributes
479     */
480    protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array {
481        $startToken = $this->tokens[$tokenStartPos];
482        $afterEndToken = $this->tokens[$tokenEndPos + 1];
483        return [
484            'startLine' => $startToken->line,
485            'startTokenPos' => $tokenStartPos,
486            'startFilePos' => $startToken->pos,
487            'endLine' => $afterEndToken->line,
488            'endTokenPos' => $tokenEndPos,
489            'endFilePos' => $afterEndToken->pos - 1,
490        ];
491    }
492
493    /**
494     * Get attributes for a single token at the given token position.
495     *
496     * @return array<string, mixed> Attributes
497     */
498    protected function getAttributesForToken(int $tokenPos): array {
499        if ($tokenPos < \count($this->tokens) - 1) {
500            return $this->getAttributes($tokenPos, $tokenPos);
501        }
502
503        // Get attributes for the sentinel token.
504        $token = $this->tokens[$tokenPos];
505        return [
506            'startLine' => $token->line,
507            'startTokenPos' => $tokenPos,
508            'startFilePos' => $token->pos,
509            'endLine' => $token->line,
510            'endTokenPos' => $tokenPos,
511            'endFilePos' => $token->pos,
512        ];
513    }
514
515    /*
516     * Tracing functions used for debugging the parser.
517     */
518
519    /*
520    protected function traceNewState($state, $symbol): void {
521        echo '% State ' . $state
522            . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n";
523    }
524
525    protected function traceRead($symbol): void {
526        echo '% Reading ' . $this->symbolToName[$symbol] . "\n";
527    }
528
529    protected function traceShift($symbol): void {
530        echo '% Shift ' . $this->symbolToName[$symbol] . "\n";
531    }
532
533    protected function traceAccept(): void {
534        echo "% Accepted.\n";
535    }
536
537    protected function traceReduce($n): void {
538        echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n";
539    }
540
541    protected function tracePop($state): void {
542        echo '% Recovering, uncovered state ' . $state . "\n";
543    }
544
545    protected function traceDiscard($symbol): void {
546        echo '% Discard ' . $this->symbolToName[$symbol] . "\n";
547    }
548    */
549
550    /*
551     * Helper functions invoked by semantic actions
552     */
553
554    /**
555     * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions.
556     *
557     * @param Node\Stmt[] $stmts
558     * @return Node\Stmt[]
559     */
560    protected function handleNamespaces(array $stmts): array {
561        $hasErrored = false;
562        $style = $this->getNamespacingStyle($stmts);
563        if (null === $style) {
564            // not namespaced, nothing to do
565            return $stmts;
566        }
567        if ('brace' === $style) {
568            // For braced namespaces we only have to check that there are no invalid statements between the namespaces
569            $afterFirstNamespace = false;
570            foreach ($stmts as $stmt) {
571                if ($stmt instanceof Node\Stmt\Namespace_) {
572                    $afterFirstNamespace = true;
573                } elseif (!$stmt instanceof Node\Stmt\HaltCompiler
574                        && !$stmt instanceof Node\Stmt\Nop
575                        && $afterFirstNamespace && !$hasErrored) {
576                    $this->emitError(new Error(
577                        'No code may exist outside of namespace {}', $stmt->getAttributes()));
578                    $hasErrored = true; // Avoid one error for every statement
579                }
580            }
581            return $stmts;
582        } else {
583            // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts
584            $resultStmts = [];
585            $targetStmts = &$resultStmts;
586            $lastNs = null;
587            foreach ($stmts as $stmt) {
588                if ($stmt instanceof Node\Stmt\Namespace_) {
589                    if ($lastNs !== null) {
590                        $this->fixupNamespaceAttributes($lastNs);
591                    }
592                    if ($stmt->stmts === null) {
593                        $stmt->stmts = [];
594                        $targetStmts = &$stmt->stmts;
595                        $resultStmts[] = $stmt;
596                    } else {
597                        // This handles the invalid case of mixed style namespaces
598                        $resultStmts[] = $stmt;
599                        $targetStmts = &$resultStmts;
600                    }
601                    $lastNs = $stmt;
602                } elseif ($stmt instanceof Node\Stmt\HaltCompiler) {
603                    // __halt_compiler() is not moved into the namespace
604                    $resultStmts[] = $stmt;
605                } else {
606                    $targetStmts[] = $stmt;
607                }
608            }
609            if ($lastNs !== null) {
610                $this->fixupNamespaceAttributes($lastNs);
611            }
612            return $resultStmts;
613        }
614    }
615
616    private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void {
617        // We moved the statements into the namespace node, as such the end of the namespace node
618        // needs to be extended to the end of the statements.
619        if (empty($stmt->stmts)) {
620            return;
621        }
622
623        // We only move the builtin end attributes here. This is the best we can do with the
624        // knowledge we have.
625        $endAttributes = ['endLine', 'endFilePos', 'endTokenPos'];
626        $lastStmt = $stmt->stmts[count($stmt->stmts) - 1];
627        foreach ($endAttributes as $endAttribute) {
628            if ($lastStmt->hasAttribute($endAttribute)) {
629                $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute));
630            }
631        }
632    }
633
634    /** @return array<string, mixed> */
635    private function getNamespaceErrorAttributes(Namespace_ $node): array {
636        $attrs = $node->getAttributes();
637        // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace.
638        if (isset($attrs['startLine'])) {
639            $attrs['endLine'] = $attrs['startLine'];
640        }
641        if (isset($attrs['startTokenPos'])) {
642            $attrs['endTokenPos'] = $attrs['startTokenPos'];
643        }
644        if (isset($attrs['startFilePos'])) {
645            $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1;
646        }
647        return $attrs;
648    }
649
650    /**
651     * Determine namespacing style (semicolon or brace)
652     *
653     * @param Node[] $stmts Top-level statements.
654     *
655     * @return null|string One of "semicolon", "brace" or null (no namespaces)
656     */
657    private function getNamespacingStyle(array $stmts): ?string {
658        $style = null;
659        $hasNotAllowedStmts = false;
660        foreach ($stmts as $i => $stmt) {
661            if ($stmt instanceof Node\Stmt\Namespace_) {
662                $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace';
663                if (null === $style) {
664                    $style = $currentStyle;
665                    if ($hasNotAllowedStmts) {
666                        $this->emitError(new Error(
667                            'Namespace declaration statement has to be the very first statement in the script',
668                            $this->getNamespaceErrorAttributes($stmt)
669                        ));
670                    }
671                } elseif ($style !== $currentStyle) {
672                    $this->emitError(new Error(
673                        'Cannot mix bracketed namespace declarations with unbracketed namespace declarations',
674                        $this->getNamespaceErrorAttributes($stmt)
675                    ));
676                    // Treat like semicolon style for namespace normalization
677                    return 'semicolon';
678                }
679                continue;
680            }
681
682            /* declare(), __halt_compiler() and nops can be used before a namespace declaration */
683            if ($stmt instanceof Node\Stmt\Declare_
684                || $stmt instanceof Node\Stmt\HaltCompiler
685                || $stmt instanceof Node\Stmt\Nop) {
686                continue;
687            }
688
689            /* There may be a hashbang line at the very start of the file */
690            if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) {
691                continue;
692            }
693
694            /* Everything else if forbidden before namespace declarations */
695            $hasNotAllowedStmts = true;
696        }
697        return $style;
698    }
699
700    /** @return Name|Identifier */
701    protected function handleBuiltinTypes(Name $name) {
702        if (!$name->isUnqualified()) {
703            return $name;
704        }
705
706        $lowerName = $name->toLowerString();
707        if (!$this->phpVersion->supportsBuiltinType($lowerName)) {
708            return $name;
709        }
710
711        return new Node\Identifier($lowerName, $name->getAttributes());
712    }
713
714    /**
715     * Get combined start and end attributes at a stack location
716     *
717     * @param int $stackPos Stack location
718     *
719     * @return array<string, mixed> Combined start and end attributes
720     */
721    protected function getAttributesAt(int $stackPos): array {
722        return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]);
723    }
724
725    protected function getFloatCastKind(string $cast): int {
726        $cast = strtolower($cast);
727        if (strpos($cast, 'float') !== false) {
728            return Double::KIND_FLOAT;
729        }
730
731        if (strpos($cast, 'real') !== false) {
732            return Double::KIND_REAL;
733        }
734
735        return Double::KIND_DOUBLE;
736    }
737
738    /** @param array<string, mixed> $attributes */
739    protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ {
740        try {
741            return Int_::fromString($str, $attributes, $allowInvalidOctal);
742        } catch (Error $error) {
743            $this->emitError($error);
744            // Use dummy value
745            return new Int_(0, $attributes);
746        }
747    }
748
749    /**
750     * Parse a T_NUM_STRING token into either an integer or string node.
751     *
752     * @param string $str Number string
753     * @param array<string, mixed> $attributes Attributes
754     *
755     * @return Int_|String_ Integer or string node.
756     */
757    protected function parseNumString(string $str, array $attributes) {
758        if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) {
759            return new String_($str, $attributes);
760        }
761
762        $num = +$str;
763        if (!is_int($num)) {
764            return new String_($str, $attributes);
765        }
766
767        return new Int_($num, $attributes);
768    }
769
770    /** @param array<string, mixed> $attributes */
771    protected function stripIndentation(
772        string $string, int $indentLen, string $indentChar,
773        bool $newlineAtStart, bool $newlineAtEnd, array $attributes
774    ): string {
775        if ($indentLen === 0) {
776            return $string;
777        }
778
779        $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)';
780        $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])';
781        $regex = '/' . $start . '([ \t]*)(' . $end . ')?/';
782        return preg_replace_callback(
783            $regex,
784            function ($matches) use ($indentLen, $indentChar, $attributes) {
785                $prefix = substr($matches[1], 0, $indentLen);
786                if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) {
787                    $this->emitError(new Error(
788                        'Invalid indentation - tabs and spaces cannot be mixed', $attributes
789                    ));
790                } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) {
791                    $this->emitError(new Error(
792                        'Invalid body indentation level ' .
793                        '(expecting an indentation level of at least ' . $indentLen . ')',
794                        $attributes
795                    ));
796                }
797                return substr($matches[0], strlen($prefix));
798            },
799            $string
800        );
801    }
802
803    /**
804     * @param string|(Expr|InterpolatedStringPart)[] $contents
805     * @param array<string, mixed> $attributes
806     * @param array<string, mixed> $endTokenAttributes
807     */
808    protected function parseDocString(
809        string $startToken, $contents, string $endToken,
810        array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape
811    ): Expr {
812        $kind = strpos($startToken, "'") === false
813            ? String_::KIND_HEREDOC : String_::KIND_NOWDOC;
814
815        $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/';
816        $result = preg_match($regex, $startToken, $matches);
817        assert($result === 1);
818        $label = $matches[1];
819
820        $result = preg_match('/\A[ \t]*/', $endToken, $matches);
821        assert($result === 1);
822        $indentation = $matches[0];
823
824        $attributes['kind'] = $kind;
825        $attributes['docLabel'] = $label;
826        $attributes['docIndentation'] = $indentation;
827
828        $indentHasSpaces = false !== strpos($indentation, " ");
829        $indentHasTabs = false !== strpos($indentation, "\t");
830        if ($indentHasSpaces && $indentHasTabs) {
831            $this->emitError(new Error(
832                'Invalid indentation - tabs and spaces cannot be mixed',
833                $endTokenAttributes
834            ));
835
836            // Proceed processing as if this doc string is not indented
837            $indentation = '';
838        }
839
840        $indentLen = \strlen($indentation);
841        $indentChar = $indentHasSpaces ? " " : "\t";
842
843        if (\is_string($contents)) {
844            if ($contents === '') {
845                $attributes['rawValue'] = $contents;
846                return new String_('', $attributes);
847            }
848
849            $contents = $this->stripIndentation(
850                $contents, $indentLen, $indentChar, true, true, $attributes
851            );
852            $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);
853            $attributes['rawValue'] = $contents;
854
855            if ($kind === String_::KIND_HEREDOC) {
856                $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape);
857            }
858
859            return new String_($contents, $attributes);
860        } else {
861            assert(count($contents) > 0);
862            if (!$contents[0] instanceof Node\InterpolatedStringPart) {
863                // If there is no leading encapsed string part, pretend there is an empty one
864                $this->stripIndentation(
865                    '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes()
866                );
867            }
868
869            $newContents = [];
870            foreach ($contents as $i => $part) {
871                if ($part instanceof Node\InterpolatedStringPart) {
872                    $isLast = $i === \count($contents) - 1;
873                    $part->value = $this->stripIndentation(
874                        $part->value, $indentLen, $indentChar,
875                        $i === 0, $isLast, $part->getAttributes()
876                    );
877                    if ($isLast) {
878                        $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value);
879                    }
880                    $part->setAttribute('rawValue', $part->value);
881                    $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape);
882                    if ('' === $part->value) {
883                        continue;
884                    }
885                }
886                $newContents[] = $part;
887            }
888            return new InterpolatedString($newContents, $attributes);
889        }
890    }
891
892    protected function createCommentFromToken(Token $token, int $tokenPos): Comment {
893        assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT);
894        return \T_DOC_COMMENT === $token->id
895            ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos,
896                $token->getEndLine(), $token->getEndPos() - 1, $tokenPos)
897            : new Comment($token->text, $token->line, $token->pos, $tokenPos,
898                $token->getEndLine(), $token->getEndPos() - 1, $tokenPos);
899    }
900
901    /**
902     * Get last comment before the given token position, if any
903     */
904    protected function getCommentBeforeToken(int $tokenPos): ?Comment {
905        while (--$tokenPos >= 0) {
906            $token = $this->tokens[$tokenPos];
907            if (!isset($this->dropTokens[$token->id])) {
908                break;
909            }
910
911            if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) {
912                return $this->createCommentFromToken($token, $tokenPos);
913            }
914        }
915        return null;
916    }
917
918    /**
919     * Create a zero-length nop to capture preceding comments, if any.
920     */
921    protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop {
922        $comment = $this->getCommentBeforeToken($tokenPos);
923        if ($comment === null) {
924            return null;
925        }
926
927        $commentEndLine = $comment->getEndLine();
928        $commentEndFilePos = $comment->getEndFilePos();
929        $commentEndTokenPos = $comment->getEndTokenPos();
930        $attributes = [
931            'startLine' => $commentEndLine,
932            'endLine' => $commentEndLine,
933            'startFilePos' => $commentEndFilePos + 1,
934            'endFilePos' => $commentEndFilePos,
935            'startTokenPos' => $commentEndTokenPos + 1,
936            'endTokenPos' => $commentEndTokenPos,
937        ];
938        return new Nop($attributes);
939    }
940
941    protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop {
942        if ($this->getCommentBeforeToken($tokenStartPos) === null) {
943            return null;
944        }
945        return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos));
946    }
947
948    protected function handleHaltCompiler(): string {
949        // Prevent the lexer from returning any further tokens.
950        $nextToken = $this->tokens[$this->tokenPos + 1];
951        $this->tokenPos = \count($this->tokens) - 2;
952
953        // Return text after __halt_compiler.
954        return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : '';
955    }
956
957    protected function inlineHtmlHasLeadingNewline(int $stackPos): bool {
958        $tokenPos = $this->tokenStartStack[$stackPos];
959        $token = $this->tokens[$tokenPos];
960        assert($token->id == \T_INLINE_HTML);
961        if ($tokenPos > 0) {
962            $prevToken = $this->tokens[$tokenPos - 1];
963            assert($prevToken->id == \T_CLOSE_TAG);
964            return false !== strpos($prevToken->text, "\n")
965                || false !== strpos($prevToken->text, "\r");
966        }
967        return true;
968    }
969
970    /**
971     * @return array<string, mixed>
972     */
973    protected function createEmptyElemAttributes(int $tokenPos): array {
974        return $this->getAttributesForToken($tokenPos);
975    }
976
977    protected function fixupArrayDestructuring(Array_ $node): Expr\List_ {
978        $this->createdArrays->detach($node);
979        return new Expr\List_(array_map(function (Node\ArrayItem $item) {
980            if ($item->value instanceof Expr\Error) {
981                // We used Error as a placeholder for empty elements, which are legal for destructuring.
982                return null;
983            }
984            if ($item->value instanceof Array_) {
985                return new Node\ArrayItem(
986                    $this->fixupArrayDestructuring($item->value),
987                    $item->key, $item->byRef, $item->getAttributes());
988            }
989            return $item;
990        }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes());
991    }
992
993    protected function postprocessList(Expr\List_ $node): void {
994        foreach ($node->items as $i => $item) {
995            if ($item->value instanceof Expr\Error) {
996                // We used Error as a placeholder for empty elements, which are legal for destructuring.
997                $node->items[$i] = null;
998            }
999        }
1000    }
1001
1002    /** @param ElseIf_|Else_ $node */
1003    protected function fixupAlternativeElse($node): void {
1004        // Make sure a trailing nop statement carrying comments is part of the node.
1005        $numStmts = \count($node->stmts);
1006        if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) {
1007            $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes();
1008            if (isset($nopAttrs['endLine'])) {
1009                $node->setAttribute('endLine', $nopAttrs['endLine']);
1010            }
1011            if (isset($nopAttrs['endFilePos'])) {
1012                $node->setAttribute('endFilePos', $nopAttrs['endFilePos']);
1013            }
1014            if (isset($nopAttrs['endTokenPos'])) {
1015                $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']);
1016            }
1017        }
1018    }
1019
1020    protected function checkClassModifier(int $a, int $b, int $modifierPos): void {
1021        try {
1022            Modifiers::verifyClassModifier($a, $b);
1023        } catch (Error $error) {
1024            $error->setAttributes($this->getAttributesAt($modifierPos));
1025            $this->emitError($error);
1026        }
1027    }
1028
1029    protected function checkModifier(int $a, int $b, int $modifierPos): void {
1030        // Jumping through some hoops here because verifyModifier() is also used elsewhere
1031        try {
1032            Modifiers::verifyModifier($a, $b);
1033        } catch (Error $error) {
1034            $error->setAttributes($this->getAttributesAt($modifierPos));
1035            $this->emitError($error);
1036        }
1037    }
1038
1039    protected function checkParam(Param $node): void {
1040        if ($node->variadic && null !== $node->default) {
1041            $this->emitError(new Error(
1042                'Variadic parameter cannot have a default value',
1043                $node->default->getAttributes()
1044            ));
1045        }
1046    }
1047
1048    protected function checkTryCatch(TryCatch $node): void {
1049        if (empty($node->catches) && null === $node->finally) {
1050            $this->emitError(new Error(
1051                'Cannot use try without catch or finally', $node->getAttributes()
1052            ));
1053        }
1054    }
1055
1056    protected function checkNamespace(Namespace_ $node): void {
1057        if (null !== $node->stmts) {
1058            foreach ($node->stmts as $stmt) {
1059                if ($stmt instanceof Namespace_) {
1060                    $this->emitError(new Error(
1061                        'Namespace declarations cannot be nested', $stmt->getAttributes()
1062                    ));
1063                }
1064            }
1065        }
1066    }
1067
1068    private function checkClassName(?Identifier $name, int $namePos): void {
1069        if (null !== $name && $name->isSpecialClassName()) {
1070            $this->emitError(new Error(
1071                sprintf('Cannot use \'%s\' as class name as it is reserved', $name),
1072                $this->getAttributesAt($namePos)
1073            ));
1074        }
1075    }
1076
1077    /** @param Name[] $interfaces */
1078    private function checkImplementedInterfaces(array $interfaces): void {
1079        foreach ($interfaces as $interface) {
1080            if ($interface->isSpecialClassName()) {
1081                $this->emitError(new Error(
1082                    sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface),
1083                    $interface->getAttributes()
1084                ));
1085            }
1086        }
1087    }
1088
1089    protected function checkClass(Class_ $node, int $namePos): void {
1090        $this->checkClassName($node->name, $namePos);
1091
1092        if ($node->extends && $node->extends->isSpecialClassName()) {
1093            $this->emitError(new Error(
1094                sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends),
1095                $node->extends->getAttributes()
1096            ));
1097        }
1098
1099        $this->checkImplementedInterfaces($node->implements);
1100    }
1101
1102    protected function checkInterface(Interface_ $node, int $namePos): void {
1103        $this->checkClassName($node->name, $namePos);
1104        $this->checkImplementedInterfaces($node->extends);
1105    }
1106
1107    protected function checkEnum(Enum_ $node, int $namePos): void {
1108        $this->checkClassName($node->name, $namePos);
1109        $this->checkImplementedInterfaces($node->implements);
1110    }
1111
1112    protected function checkClassMethod(ClassMethod $node, int $modifierPos): void {
1113        if ($node->flags & Modifiers::STATIC) {
1114            switch ($node->name->toLowerString()) {
1115                case '__construct':
1116                    $this->emitError(new Error(
1117                        sprintf('Constructor %s() cannot be static', $node->name),
1118                        $this->getAttributesAt($modifierPos)));
1119                    break;
1120                case '__destruct':
1121                    $this->emitError(new Error(
1122                        sprintf('Destructor %s() cannot be static', $node->name),
1123                        $this->getAttributesAt($modifierPos)));
1124                    break;
1125                case '__clone':
1126                    $this->emitError(new Error(
1127                        sprintf('Clone method %s() cannot be static', $node->name),
1128                        $this->getAttributesAt($modifierPos)));
1129                    break;
1130            }
1131        }
1132
1133        if ($node->flags & Modifiers::READONLY) {
1134            $this->emitError(new Error(
1135                sprintf('Method %s() cannot be readonly', $node->name),
1136                $this->getAttributesAt($modifierPos)));
1137        }
1138    }
1139
1140    protected function checkClassConst(ClassConst $node, int $modifierPos): void {
1141        foreach ([Modifiers::STATIC, Modifiers::ABSTRACT, Modifiers::READONLY] as $modifier) {
1142            if ($node->flags & $modifier) {
1143                $this->emitError(new Error(
1144                    "Cannot use '" . Modifiers::toString($modifier) . "' as constant modifier",
1145                    $this->getAttributesAt($modifierPos)));
1146            }
1147        }
1148    }
1149
1150    protected function checkUseUse(UseItem $node, int $namePos): void {
1151        if ($node->alias && $node->alias->isSpecialClassName()) {
1152            $this->emitError(new Error(
1153                sprintf(
1154                    'Cannot use %s as %s because \'%2$s\' is a special class name',
1155                    $node->name, $node->alias
1156                ),
1157                $this->getAttributesAt($namePos)
1158            ));
1159        }
1160    }
1161
1162    protected function checkPropertyHooksForMultiProperty(Property $property, int $hookPos): void {
1163        if (count($property->props) > 1) {
1164            $this->emitError(new Error(
1165                'Cannot use hooks when declaring multiple properties', $this->getAttributesAt($hookPos)));
1166        }
1167    }
1168
1169    /** @param PropertyHook[] $hooks */
1170    protected function checkEmptyPropertyHookList(array $hooks, int $hookPos): void {
1171        if (empty($hooks)) {
1172            $this->emitError(new Error(
1173                'Property hook list cannot be empty', $this->getAttributesAt($hookPos)));
1174        }
1175    }
1176
1177    protected function checkPropertyHook(PropertyHook $hook, ?int $paramListPos): void {
1178        $name = $hook->name->toLowerString();
1179        if ($name !== 'get' && $name !== 'set') {
1180            $this->emitError(new Error(
1181                'Unknown hook "' . $hook->name . '", expected "get" or "set"',
1182                $hook->name->getAttributes()));
1183        }
1184        if ($name === 'get' && $paramListPos !== null) {
1185            $this->emitError(new Error(
1186                'get hook must not have a parameter list', $this->getAttributesAt($paramListPos)));
1187        }
1188    }
1189
1190    protected function checkPropertyHookModifiers(int $a, int $b, int $modifierPos): void {
1191        try {
1192            Modifiers::verifyModifier($a, $b);
1193        } catch (Error $error) {
1194            $error->setAttributes($this->getAttributesAt($modifierPos));
1195            $this->emitError($error);
1196        }
1197
1198        if ($b != Modifiers::FINAL) {
1199            $this->emitError(new Error(
1200                'Cannot use the ' . Modifiers::toString($b) . ' modifier on a property hook',
1201                $this->getAttributesAt($modifierPos)));
1202        }
1203    }
1204
1205    /**
1206     * @param Property|Param $node
1207     */
1208    protected function addPropertyNameToHooks(Node $node): void {
1209        if ($node instanceof Property) {
1210            $name = $node->props[0]->name->toString();
1211        } else {
1212            $name = $node->var->name;
1213        }
1214        foreach ($node->hooks as $hook) {
1215            $hook->setAttribute('propertyName', $name);
1216        }
1217    }
1218
1219    /** @param array<Node\Arg|Node\VariadicPlaceholder> $args */
1220    private function isSimpleExit(array $args): bool {
1221        if (\count($args) === 0) {
1222            return true;
1223        }
1224        if (\count($args) === 1) {
1225            $arg = $args[0];
1226            return $arg instanceof Arg && $arg->name === null &&
1227                   $arg->byRef === false && $arg->unpack === false;
1228        }
1229        return false;
1230    }
1231
1232    /**
1233     * @param array<Node\Arg|Node\VariadicPlaceholder> $args
1234     * @param array<string, mixed> $attrs
1235     */
1236    protected function createExitExpr(string $name, int $namePos, array $args, array $attrs): Expr {
1237        if ($this->isSimpleExit($args)) {
1238            // Create Exit node for backwards compatibility.
1239            $attrs['kind'] = strtolower($name) === 'exit' ? Expr\Exit_::KIND_EXIT : Expr\Exit_::KIND_DIE;
1240            return new Expr\Exit_(\count($args) === 1 ? $args[0]->value : null, $attrs);
1241        }
1242        return new Expr\FuncCall(new Name($name, $this->getAttributesAt($namePos)), $args, $attrs);
1243    }
1244
1245    /**
1246     * Creates the token map.
1247     *
1248     * The token map maps the PHP internal token identifiers
1249     * to the identifiers used by the Parser. Additionally it
1250     * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
1251     *
1252     * @return array<int, int> The token map
1253     */
1254    protected function createTokenMap(): array {
1255        $tokenMap = [];
1256
1257        // Single-char tokens use an identity mapping.
1258        for ($i = 0; $i < 256; ++$i) {
1259            $tokenMap[$i] = $i;
1260        }
1261
1262        foreach ($this->symbolToName as $name) {
1263            if ($name[0] === 'T') {
1264                $tokenMap[\constant($name)] = constant(static::class . '::' . $name);
1265            }
1266        }
1267
1268        // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
1269        $tokenMap[\T_OPEN_TAG_WITH_ECHO] = static::T_ECHO;
1270        // T_CLOSE_TAG is equivalent to ';'
1271        $tokenMap[\T_CLOSE_TAG] = ord(';');
1272
1273        // We have created a map from PHP token IDs to external symbol IDs.
1274        // Now map them to the internal symbol ID.
1275        $fullTokenMap = [];
1276        foreach ($tokenMap as $phpToken => $extSymbol) {
1277            $intSymbol = $this->tokenToSymbol[$extSymbol];
1278            if ($intSymbol === $this->invalidSymbol) {
1279                continue;
1280            }
1281            $fullTokenMap[$phpToken] = $intSymbol;
1282        }
1283
1284        return $fullTokenMap;
1285    }
1286}
1287