1<?php declare(strict_types=1); 2 3namespace PhpParser; 4 5/* 6 * This parser is based on a skeleton written by Moriyoshi Koizumi, which in 7 * turn is based on work by Masato Bito. 8 */ 9 10use PhpParser\Node\Expr; 11use PhpParser\Node\Expr\Array_; 12use PhpParser\Node\Expr\Cast\Double; 13use PhpParser\Node\Identifier; 14use PhpParser\Node\InterpolatedStringPart; 15use PhpParser\Node\Name; 16use PhpParser\Node\Param; 17use PhpParser\Node\Scalar\InterpolatedString; 18use PhpParser\Node\Scalar\Int_; 19use PhpParser\Node\Scalar\String_; 20use PhpParser\Node\Stmt; 21use PhpParser\Node\Stmt\Class_; 22use PhpParser\Node\Stmt\ClassConst; 23use PhpParser\Node\Stmt\ClassMethod; 24use PhpParser\Node\Stmt\Else_; 25use PhpParser\Node\Stmt\ElseIf_; 26use PhpParser\Node\Stmt\Enum_; 27use PhpParser\Node\Stmt\Interface_; 28use PhpParser\Node\Stmt\Namespace_; 29use PhpParser\Node\Stmt\Nop; 30use PhpParser\Node\Stmt\Property; 31use PhpParser\Node\Stmt\TryCatch; 32use PhpParser\Node\UseItem; 33use PhpParser\NodeVisitor\CommentAnnotatingVisitor; 34 35abstract class ParserAbstract implements Parser { 36 private const SYMBOL_NONE = -1; 37 38 /** @var Lexer Lexer that is used when parsing */ 39 protected Lexer $lexer; 40 /** @var PhpVersion PHP version to target on a best-effort basis */ 41 protected PhpVersion $phpVersion; 42 43 /* 44 * The following members will be filled with generated parsing data: 45 */ 46 47 /** @var int Size of $tokenToSymbol map */ 48 protected int $tokenToSymbolMapSize; 49 /** @var int Size of $action table */ 50 protected int $actionTableSize; 51 /** @var int Size of $goto table */ 52 protected int $gotoTableSize; 53 54 /** @var int Symbol number signifying an invalid token */ 55 protected int $invalidSymbol; 56 /** @var int Symbol number of error recovery token */ 57 protected int $errorSymbol; 58 /** @var int Action number signifying default action */ 59 protected int $defaultAction; 60 /** @var int Rule number signifying that an unexpected token was encountered */ 61 protected int $unexpectedTokenRule; 62 63 protected int $YY2TBLSTATE; 64 /** @var int Number of non-leaf states */ 65 protected int $numNonLeafStates; 66 67 /** @var int[] Map of PHP token IDs to internal symbols */ 68 protected array $phpTokenToSymbol; 69 /** @var array<int, bool> Map of PHP token IDs to drop */ 70 protected array $dropTokens; 71 /** @var int[] Map of external symbols (static::T_*) to internal symbols */ 72 protected array $tokenToSymbol; 73 /** @var string[] Map of symbols to their names */ 74 protected array $symbolToName; 75 /** @var array<int, string> Names of the production rules (only necessary for debugging) */ 76 protected array $productions; 77 78 /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this 79 * state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the 80 * action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 81 protected array $actionBase; 82 /** @var int[] Table of actions. Indexed according to $actionBase comment. */ 83 protected array $action; 84 /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol 85 * then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 86 protected array $actionCheck; 87 /** @var int[] Map of states to their default action */ 88 protected array $actionDefault; 89 /** @var callable[] Semantic action callbacks */ 90 protected array $reduceCallbacks; 91 92 /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this 93 * non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */ 94 protected array $gotoBase; 95 /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */ 96 protected array $goto; 97 /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal 98 * then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */ 99 protected array $gotoCheck; 100 /** @var int[] Map of non-terminals to the default state to goto after their reduction */ 101 protected array $gotoDefault; 102 103 /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for 104 * determining the state to goto after reduction. */ 105 protected array $ruleToNonTerminal; 106 /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to 107 * be popped from the stack(s) on reduction. */ 108 protected array $ruleToLength; 109 110 /* 111 * The following members are part of the parser state: 112 */ 113 114 /** @var mixed Temporary value containing the result of last semantic action (reduction) */ 115 protected $semValue; 116 /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */ 117 protected array $semStack; 118 /** @var int[] Token start position stack */ 119 protected array $tokenStartStack; 120 /** @var int[] Token end position stack */ 121 protected array $tokenEndStack; 122 123 /** @var ErrorHandler Error handler */ 124 protected ErrorHandler $errorHandler; 125 /** @var int Error state, used to avoid error floods */ 126 protected int $errorState; 127 128 /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */ 129 protected ?\SplObjectStorage $createdArrays; 130 131 /** @var Token[] Tokens for the current parse */ 132 protected array $tokens; 133 /** @var int Current position in token array */ 134 protected int $tokenPos; 135 136 /** 137 * Initialize $reduceCallbacks map. 138 */ 139 abstract protected function initReduceCallbacks(): void; 140 141 /** 142 * Creates a parser instance. 143 * 144 * Options: 145 * * phpVersion: ?PhpVersion, 146 * 147 * @param Lexer $lexer A lexer 148 * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This 149 * option is best-effort: Even if specified, parsing will generally assume the latest 150 * supported version and only adjust behavior in minor ways, for example by omitting 151 * errors in older versions and interpreting type hints as a name or identifier depending 152 * on version. 153 */ 154 public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) { 155 $this->lexer = $lexer; 156 $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported(); 157 158 $this->initReduceCallbacks(); 159 $this->phpTokenToSymbol = $this->createTokenMap(); 160 $this->dropTokens = array_fill_keys( 161 [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true 162 ); 163 } 164 165 /** 166 * Parses PHP code into a node tree. 167 * 168 * If a non-throwing error handler is used, the parser will continue parsing after an error 169 * occurred and attempt to build a partial AST. 170 * 171 * @param string $code The source code to parse 172 * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults 173 * to ErrorHandler\Throwing. 174 * 175 * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and 176 * the parser was unable to recover from an error). 177 */ 178 public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array { 179 $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing(); 180 $this->createdArrays = new \SplObjectStorage(); 181 182 $this->tokens = $this->lexer->tokenize($code, $this->errorHandler); 183 $result = $this->doParse(); 184 185 // Report errors for any empty elements used inside arrays. This is delayed until after the main parse, 186 // because we don't know a priori whether a given array expression will be used in a destructuring context 187 // or not. 188 foreach ($this->createdArrays as $node) { 189 foreach ($node->items as $item) { 190 if ($item->value instanceof Expr\Error) { 191 $this->errorHandler->handleError( 192 new Error('Cannot use empty array elements in arrays', $item->getAttributes())); 193 } 194 } 195 } 196 197 // Clear out some of the interior state, so we don't hold onto unnecessary 198 // memory between uses of the parser 199 $this->tokenStartStack = []; 200 $this->tokenEndStack = []; 201 $this->semStack = []; 202 $this->semValue = null; 203 $this->createdArrays = null; 204 205 if ($result !== null) { 206 $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens)); 207 $traverser->traverse($result); 208 } 209 210 return $result; 211 } 212 213 public function getTokens(): array { 214 return $this->tokens; 215 } 216 217 /** @return Stmt[]|null */ 218 protected function doParse(): ?array { 219 // We start off with no lookahead-token 220 $symbol = self::SYMBOL_NONE; 221 $tokenValue = null; 222 $this->tokenPos = -1; 223 224 // Keep stack of start and end attributes 225 $this->tokenStartStack = []; 226 $this->tokenEndStack = [0]; 227 228 // Start off in the initial state and keep a stack of previous states 229 $state = 0; 230 $stateStack = [$state]; 231 232 // Semantic value stack (contains values of tokens and semantic action results) 233 $this->semStack = []; 234 235 // Current position in the stack(s) 236 $stackPos = 0; 237 238 $this->errorState = 0; 239 240 for (;;) { 241 //$this->traceNewState($state, $symbol); 242 243 if ($this->actionBase[$state] === 0) { 244 $rule = $this->actionDefault[$state]; 245 } else { 246 if ($symbol === self::SYMBOL_NONE) { 247 do { 248 $token = $this->tokens[++$this->tokenPos]; 249 $tokenId = $token->id; 250 } while (isset($this->dropTokens[$tokenId])); 251 252 // Map the lexer token id to the internally used symbols. 253 $tokenValue = $token->text; 254 if (!isset($this->phpTokenToSymbol[$tokenId])) { 255 throw new \RangeException(sprintf( 256 'The lexer returned an invalid token (id=%d, value=%s)', 257 $tokenId, $tokenValue 258 )); 259 } 260 $symbol = $this->phpTokenToSymbol[$tokenId]; 261 262 //$this->traceRead($symbol); 263 } 264 265 $idx = $this->actionBase[$state] + $symbol; 266 if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol) 267 || ($state < $this->YY2TBLSTATE 268 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 269 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)) 270 && ($action = $this->action[$idx]) !== $this->defaultAction) { 271 /* 272 * >= numNonLeafStates: shift and reduce 273 * > 0: shift 274 * = 0: accept 275 * < 0: reduce 276 * = -YYUNEXPECTED: error 277 */ 278 if ($action > 0) { 279 /* shift */ 280 //$this->traceShift($symbol); 281 282 ++$stackPos; 283 $stateStack[$stackPos] = $state = $action; 284 $this->semStack[$stackPos] = $tokenValue; 285 $this->tokenStartStack[$stackPos] = $this->tokenPos; 286 $this->tokenEndStack[$stackPos] = $this->tokenPos; 287 $symbol = self::SYMBOL_NONE; 288 289 if ($this->errorState) { 290 --$this->errorState; 291 } 292 293 if ($action < $this->numNonLeafStates) { 294 continue; 295 } 296 297 /* $yyn >= numNonLeafStates means shift-and-reduce */ 298 $rule = $action - $this->numNonLeafStates; 299 } else { 300 $rule = -$action; 301 } 302 } else { 303 $rule = $this->actionDefault[$state]; 304 } 305 } 306 307 for (;;) { 308 if ($rule === 0) { 309 /* accept */ 310 //$this->traceAccept(); 311 return $this->semValue; 312 } 313 if ($rule !== $this->unexpectedTokenRule) { 314 /* reduce */ 315 //$this->traceReduce($rule); 316 317 $ruleLength = $this->ruleToLength[$rule]; 318 try { 319 $callback = $this->reduceCallbacks[$rule]; 320 if ($callback !== null) { 321 $callback($this, $stackPos); 322 } elseif ($ruleLength > 0) { 323 $this->semValue = $this->semStack[$stackPos - $ruleLength + 1]; 324 } 325 } catch (Error $e) { 326 if (-1 === $e->getStartLine()) { 327 $e->setStartLine($this->tokens[$this->tokenPos]->line); 328 } 329 330 $this->emitError($e); 331 // Can't recover from this type of error 332 return null; 333 } 334 335 /* Goto - shift nonterminal */ 336 $lastTokenEnd = $this->tokenEndStack[$stackPos]; 337 $stackPos -= $ruleLength; 338 $nonTerminal = $this->ruleToNonTerminal[$rule]; 339 $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos]; 340 if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) { 341 $state = $this->goto[$idx]; 342 } else { 343 $state = $this->gotoDefault[$nonTerminal]; 344 } 345 346 ++$stackPos; 347 $stateStack[$stackPos] = $state; 348 $this->semStack[$stackPos] = $this->semValue; 349 $this->tokenEndStack[$stackPos] = $lastTokenEnd; 350 if ($ruleLength === 0) { 351 // Empty productions use the start attributes of the lookahead token. 352 $this->tokenStartStack[$stackPos] = $this->tokenPos; 353 } 354 } else { 355 /* error */ 356 switch ($this->errorState) { 357 case 0: 358 $msg = $this->getErrorMessage($symbol, $state); 359 $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos))); 360 // Break missing intentionally 361 // no break 362 case 1: 363 case 2: 364 $this->errorState = 3; 365 366 // Pop until error-expecting state uncovered 367 while (!( 368 (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0 369 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 370 || ($state < $this->YY2TBLSTATE 371 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0 372 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 373 ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this 374 if ($stackPos <= 0) { 375 // Could not recover from error 376 return null; 377 } 378 $state = $stateStack[--$stackPos]; 379 //$this->tracePop($state); 380 } 381 382 //$this->traceShift($this->errorSymbol); 383 ++$stackPos; 384 $stateStack[$stackPos] = $state = $action; 385 386 // We treat the error symbol as being empty, so we reset the end attributes 387 // to the end attributes of the last non-error symbol 388 $this->tokenStartStack[$stackPos] = $this->tokenPos; 389 $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1]; 390 break; 391 392 case 3: 393 if ($symbol === 0) { 394 // Reached EOF without recovering from error 395 return null; 396 } 397 398 //$this->traceDiscard($symbol); 399 $symbol = self::SYMBOL_NONE; 400 break 2; 401 } 402 } 403 404 if ($state < $this->numNonLeafStates) { 405 break; 406 } 407 408 /* >= numNonLeafStates means shift-and-reduce */ 409 $rule = $state - $this->numNonLeafStates; 410 } 411 } 412 413 throw new \RuntimeException('Reached end of parser loop'); 414 } 415 416 protected function emitError(Error $error): void { 417 $this->errorHandler->handleError($error); 418 } 419 420 /** 421 * Format error message including expected tokens. 422 * 423 * @param int $symbol Unexpected symbol 424 * @param int $state State at time of error 425 * 426 * @return string Formatted error message 427 */ 428 protected function getErrorMessage(int $symbol, int $state): string { 429 $expectedString = ''; 430 if ($expected = $this->getExpectedTokens($state)) { 431 $expectedString = ', expecting ' . implode(' or ', $expected); 432 } 433 434 return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString; 435 } 436 437 /** 438 * Get limited number of expected tokens in given state. 439 * 440 * @param int $state State 441 * 442 * @return string[] Expected tokens. If too many, an empty array is returned. 443 */ 444 protected function getExpectedTokens(int $state): array { 445 $expected = []; 446 447 $base = $this->actionBase[$state]; 448 foreach ($this->symbolToName as $symbol => $name) { 449 $idx = $base + $symbol; 450 if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 451 || $state < $this->YY2TBLSTATE 452 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 453 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 454 ) { 455 if ($this->action[$idx] !== $this->unexpectedTokenRule 456 && $this->action[$idx] !== $this->defaultAction 457 && $symbol !== $this->errorSymbol 458 ) { 459 if (count($expected) === 4) { 460 /* Too many expected tokens */ 461 return []; 462 } 463 464 $expected[] = $name; 465 } 466 } 467 } 468 469 return $expected; 470 } 471 472 /** 473 * Get attributes for a node with the given start and end token positions. 474 * 475 * @param int $tokenStartPos Token position the node starts at 476 * @param int $tokenEndPos Token position the node ends at 477 * @return array<string, mixed> Attributes 478 */ 479 protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array { 480 $startToken = $this->tokens[$tokenStartPos]; 481 $afterEndToken = $this->tokens[$tokenEndPos + 1]; 482 return [ 483 'startLine' => $startToken->line, 484 'startTokenPos' => $tokenStartPos, 485 'startFilePos' => $startToken->pos, 486 'endLine' => $afterEndToken->line, 487 'endTokenPos' => $tokenEndPos, 488 'endFilePos' => $afterEndToken->pos - 1, 489 ]; 490 } 491 492 /** 493 * Get attributes for a single token at the given token position. 494 * 495 * @return array<string, mixed> Attributes 496 */ 497 protected function getAttributesForToken(int $tokenPos): array { 498 if ($tokenPos < \count($this->tokens) - 1) { 499 return $this->getAttributes($tokenPos, $tokenPos); 500 } 501 502 // Get attributes for the sentinel token. 503 $token = $this->tokens[$tokenPos]; 504 return [ 505 'startLine' => $token->line, 506 'startTokenPos' => $tokenPos, 507 'startFilePos' => $token->pos, 508 'endLine' => $token->line, 509 'endTokenPos' => $tokenPos, 510 'endFilePos' => $token->pos, 511 ]; 512 } 513 514 /* 515 * Tracing functions used for debugging the parser. 516 */ 517 518 /* 519 protected function traceNewState($state, $symbol): void { 520 echo '% State ' . $state 521 . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n"; 522 } 523 524 protected function traceRead($symbol): void { 525 echo '% Reading ' . $this->symbolToName[$symbol] . "\n"; 526 } 527 528 protected function traceShift($symbol): void { 529 echo '% Shift ' . $this->symbolToName[$symbol] . "\n"; 530 } 531 532 protected function traceAccept(): void { 533 echo "% Accepted.\n"; 534 } 535 536 protected function traceReduce($n): void { 537 echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n"; 538 } 539 540 protected function tracePop($state): void { 541 echo '% Recovering, uncovered state ' . $state . "\n"; 542 } 543 544 protected function traceDiscard($symbol): void { 545 echo '% Discard ' . $this->symbolToName[$symbol] . "\n"; 546 } 547 */ 548 549 /* 550 * Helper functions invoked by semantic actions 551 */ 552 553 /** 554 * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions. 555 * 556 * @param Node\Stmt[] $stmts 557 * @return Node\Stmt[] 558 */ 559 protected function handleNamespaces(array $stmts): array { 560 $hasErrored = false; 561 $style = $this->getNamespacingStyle($stmts); 562 if (null === $style) { 563 // not namespaced, nothing to do 564 return $stmts; 565 } 566 if ('brace' === $style) { 567 // For braced namespaces we only have to check that there are no invalid statements between the namespaces 568 $afterFirstNamespace = false; 569 foreach ($stmts as $stmt) { 570 if ($stmt instanceof Node\Stmt\Namespace_) { 571 $afterFirstNamespace = true; 572 } elseif (!$stmt instanceof Node\Stmt\HaltCompiler 573 && !$stmt instanceof Node\Stmt\Nop 574 && $afterFirstNamespace && !$hasErrored) { 575 $this->emitError(new Error( 576 'No code may exist outside of namespace {}', $stmt->getAttributes())); 577 $hasErrored = true; // Avoid one error for every statement 578 } 579 } 580 return $stmts; 581 } else { 582 // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts 583 $resultStmts = []; 584 $targetStmts = &$resultStmts; 585 $lastNs = null; 586 foreach ($stmts as $stmt) { 587 if ($stmt instanceof Node\Stmt\Namespace_) { 588 if ($lastNs !== null) { 589 $this->fixupNamespaceAttributes($lastNs); 590 } 591 if ($stmt->stmts === null) { 592 $stmt->stmts = []; 593 $targetStmts = &$stmt->stmts; 594 $resultStmts[] = $stmt; 595 } else { 596 // This handles the invalid case of mixed style namespaces 597 $resultStmts[] = $stmt; 598 $targetStmts = &$resultStmts; 599 } 600 $lastNs = $stmt; 601 } elseif ($stmt instanceof Node\Stmt\HaltCompiler) { 602 // __halt_compiler() is not moved into the namespace 603 $resultStmts[] = $stmt; 604 } else { 605 $targetStmts[] = $stmt; 606 } 607 } 608 if ($lastNs !== null) { 609 $this->fixupNamespaceAttributes($lastNs); 610 } 611 return $resultStmts; 612 } 613 } 614 615 private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void { 616 // We moved the statements into the namespace node, as such the end of the namespace node 617 // needs to be extended to the end of the statements. 618 if (empty($stmt->stmts)) { 619 return; 620 } 621 622 // We only move the builtin end attributes here. This is the best we can do with the 623 // knowledge we have. 624 $endAttributes = ['endLine', 'endFilePos', 'endTokenPos']; 625 $lastStmt = $stmt->stmts[count($stmt->stmts) - 1]; 626 foreach ($endAttributes as $endAttribute) { 627 if ($lastStmt->hasAttribute($endAttribute)) { 628 $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute)); 629 } 630 } 631 } 632 633 /** @return array<string, mixed> */ 634 private function getNamespaceErrorAttributes(Namespace_ $node): array { 635 $attrs = $node->getAttributes(); 636 // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace. 637 if (isset($attrs['startLine'])) { 638 $attrs['endLine'] = $attrs['startLine']; 639 } 640 if (isset($attrs['startTokenPos'])) { 641 $attrs['endTokenPos'] = $attrs['startTokenPos']; 642 } 643 if (isset($attrs['startFilePos'])) { 644 $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1; 645 } 646 return $attrs; 647 } 648 649 /** 650 * Determine namespacing style (semicolon or brace) 651 * 652 * @param Node[] $stmts Top-level statements. 653 * 654 * @return null|string One of "semicolon", "brace" or null (no namespaces) 655 */ 656 private function getNamespacingStyle(array $stmts): ?string { 657 $style = null; 658 $hasNotAllowedStmts = false; 659 foreach ($stmts as $i => $stmt) { 660 if ($stmt instanceof Node\Stmt\Namespace_) { 661 $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace'; 662 if (null === $style) { 663 $style = $currentStyle; 664 if ($hasNotAllowedStmts) { 665 $this->emitError(new Error( 666 'Namespace declaration statement has to be the very first statement in the script', 667 $this->getNamespaceErrorAttributes($stmt) 668 )); 669 } 670 } elseif ($style !== $currentStyle) { 671 $this->emitError(new Error( 672 'Cannot mix bracketed namespace declarations with unbracketed namespace declarations', 673 $this->getNamespaceErrorAttributes($stmt) 674 )); 675 // Treat like semicolon style for namespace normalization 676 return 'semicolon'; 677 } 678 continue; 679 } 680 681 /* declare(), __halt_compiler() and nops can be used before a namespace declaration */ 682 if ($stmt instanceof Node\Stmt\Declare_ 683 || $stmt instanceof Node\Stmt\HaltCompiler 684 || $stmt instanceof Node\Stmt\Nop) { 685 continue; 686 } 687 688 /* There may be a hashbang line at the very start of the file */ 689 if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) { 690 continue; 691 } 692 693 /* Everything else if forbidden before namespace declarations */ 694 $hasNotAllowedStmts = true; 695 } 696 return $style; 697 } 698 699 /** @return Name|Identifier */ 700 protected function handleBuiltinTypes(Name $name) { 701 if (!$name->isUnqualified()) { 702 return $name; 703 } 704 705 $lowerName = $name->toLowerString(); 706 if (!$this->phpVersion->supportsBuiltinType($lowerName)) { 707 return $name; 708 } 709 710 return new Node\Identifier($lowerName, $name->getAttributes()); 711 } 712 713 /** 714 * Get combined start and end attributes at a stack location 715 * 716 * @param int $stackPos Stack location 717 * 718 * @return array<string, mixed> Combined start and end attributes 719 */ 720 protected function getAttributesAt(int $stackPos): array { 721 return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]); 722 } 723 724 protected function getFloatCastKind(string $cast): int { 725 $cast = strtolower($cast); 726 if (strpos($cast, 'float') !== false) { 727 return Double::KIND_FLOAT; 728 } 729 730 if (strpos($cast, 'real') !== false) { 731 return Double::KIND_REAL; 732 } 733 734 return Double::KIND_DOUBLE; 735 } 736 737 /** @param array<string, mixed> $attributes */ 738 protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ { 739 try { 740 return Int_::fromString($str, $attributes, $allowInvalidOctal); 741 } catch (Error $error) { 742 $this->emitError($error); 743 // Use dummy value 744 return new Int_(0, $attributes); 745 } 746 } 747 748 /** 749 * Parse a T_NUM_STRING token into either an integer or string node. 750 * 751 * @param string $str Number string 752 * @param array<string, mixed> $attributes Attributes 753 * 754 * @return Int_|String_ Integer or string node. 755 */ 756 protected function parseNumString(string $str, array $attributes) { 757 if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) { 758 return new String_($str, $attributes); 759 } 760 761 $num = +$str; 762 if (!is_int($num)) { 763 return new String_($str, $attributes); 764 } 765 766 return new Int_($num, $attributes); 767 } 768 769 /** @param array<string, mixed> $attributes */ 770 protected function stripIndentation( 771 string $string, int $indentLen, string $indentChar, 772 bool $newlineAtStart, bool $newlineAtEnd, array $attributes 773 ): string { 774 if ($indentLen === 0) { 775 return $string; 776 } 777 778 $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)'; 779 $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])'; 780 $regex = '/' . $start . '([ \t]*)(' . $end . ')?/'; 781 return preg_replace_callback( 782 $regex, 783 function ($matches) use ($indentLen, $indentChar, $attributes) { 784 $prefix = substr($matches[1], 0, $indentLen); 785 if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) { 786 $this->emitError(new Error( 787 'Invalid indentation - tabs and spaces cannot be mixed', $attributes 788 )); 789 } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) { 790 $this->emitError(new Error( 791 'Invalid body indentation level ' . 792 '(expecting an indentation level of at least ' . $indentLen . ')', 793 $attributes 794 )); 795 } 796 return substr($matches[0], strlen($prefix)); 797 }, 798 $string 799 ); 800 } 801 802 /** 803 * @param string|(Expr|InterpolatedStringPart)[] $contents 804 * @param array<string, mixed> $attributes 805 * @param array<string, mixed> $endTokenAttributes 806 */ 807 protected function parseDocString( 808 string $startToken, $contents, string $endToken, 809 array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape 810 ): Expr { 811 $kind = strpos($startToken, "'") === false 812 ? String_::KIND_HEREDOC : String_::KIND_NOWDOC; 813 814 $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/'; 815 $result = preg_match($regex, $startToken, $matches); 816 assert($result === 1); 817 $label = $matches[1]; 818 819 $result = preg_match('/\A[ \t]*/', $endToken, $matches); 820 assert($result === 1); 821 $indentation = $matches[0]; 822 823 $attributes['kind'] = $kind; 824 $attributes['docLabel'] = $label; 825 $attributes['docIndentation'] = $indentation; 826 827 $indentHasSpaces = false !== strpos($indentation, " "); 828 $indentHasTabs = false !== strpos($indentation, "\t"); 829 if ($indentHasSpaces && $indentHasTabs) { 830 $this->emitError(new Error( 831 'Invalid indentation - tabs and spaces cannot be mixed', 832 $endTokenAttributes 833 )); 834 835 // Proceed processing as if this doc string is not indented 836 $indentation = ''; 837 } 838 839 $indentLen = \strlen($indentation); 840 $indentChar = $indentHasSpaces ? " " : "\t"; 841 842 if (\is_string($contents)) { 843 if ($contents === '') { 844 $attributes['rawValue'] = $contents; 845 return new String_('', $attributes); 846 } 847 848 $contents = $this->stripIndentation( 849 $contents, $indentLen, $indentChar, true, true, $attributes 850 ); 851 $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents); 852 $attributes['rawValue'] = $contents; 853 854 if ($kind === String_::KIND_HEREDOC) { 855 $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape); 856 } 857 858 return new String_($contents, $attributes); 859 } else { 860 assert(count($contents) > 0); 861 if (!$contents[0] instanceof Node\InterpolatedStringPart) { 862 // If there is no leading encapsed string part, pretend there is an empty one 863 $this->stripIndentation( 864 '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes() 865 ); 866 } 867 868 $newContents = []; 869 foreach ($contents as $i => $part) { 870 if ($part instanceof Node\InterpolatedStringPart) { 871 $isLast = $i === \count($contents) - 1; 872 $part->value = $this->stripIndentation( 873 $part->value, $indentLen, $indentChar, 874 $i === 0, $isLast, $part->getAttributes() 875 ); 876 if ($isLast) { 877 $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value); 878 } 879 $part->setAttribute('rawValue', $part->value); 880 $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape); 881 if ('' === $part->value) { 882 continue; 883 } 884 } 885 $newContents[] = $part; 886 } 887 return new InterpolatedString($newContents, $attributes); 888 } 889 } 890 891 protected function createCommentFromToken(Token $token, int $tokenPos): Comment { 892 assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT); 893 return \T_DOC_COMMENT === $token->id 894 ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos, 895 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos) 896 : new Comment($token->text, $token->line, $token->pos, $tokenPos, 897 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos); 898 } 899 900 /** 901 * Get last comment before the given token position, if any 902 */ 903 protected function getCommentBeforeToken(int $tokenPos): ?Comment { 904 while (--$tokenPos >= 0) { 905 $token = $this->tokens[$tokenPos]; 906 if (!isset($this->dropTokens[$token->id])) { 907 break; 908 } 909 910 if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) { 911 return $this->createCommentFromToken($token, $tokenPos); 912 } 913 } 914 return null; 915 } 916 917 /** 918 * Create a zero-length nop to capture preceding comments, if any. 919 */ 920 protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop { 921 $comment = $this->getCommentBeforeToken($tokenPos); 922 if ($comment === null) { 923 return null; 924 } 925 926 $commentEndLine = $comment->getEndLine(); 927 $commentEndFilePos = $comment->getEndFilePos(); 928 $commentEndTokenPos = $comment->getEndTokenPos(); 929 $attributes = [ 930 'startLine' => $commentEndLine, 931 'endLine' => $commentEndLine, 932 'startFilePos' => $commentEndFilePos + 1, 933 'endFilePos' => $commentEndFilePos, 934 'startTokenPos' => $commentEndTokenPos + 1, 935 'endTokenPos' => $commentEndTokenPos, 936 ]; 937 return new Nop($attributes); 938 } 939 940 protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop { 941 if ($this->getCommentBeforeToken($tokenStartPos) === null) { 942 return null; 943 } 944 return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos)); 945 } 946 947 protected function handleHaltCompiler(): string { 948 // Prevent the lexer from returning any further tokens. 949 $nextToken = $this->tokens[$this->tokenPos + 1]; 950 $this->tokenPos = \count($this->tokens) - 2; 951 952 // Return text after __halt_compiler. 953 return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : ''; 954 } 955 956 protected function inlineHtmlHasLeadingNewline(int $stackPos): bool { 957 $tokenPos = $this->tokenStartStack[$stackPos]; 958 $token = $this->tokens[$tokenPos]; 959 assert($token->id == \T_INLINE_HTML); 960 if ($tokenPos > 0) { 961 $prevToken = $this->tokens[$tokenPos - 1]; 962 assert($prevToken->id == \T_CLOSE_TAG); 963 return false !== strpos($prevToken->text, "\n") 964 || false !== strpos($prevToken->text, "\r"); 965 } 966 return true; 967 } 968 969 /** 970 * @return array<string, mixed> 971 */ 972 protected function createEmptyElemAttributes(int $tokenPos): array { 973 return $this->getAttributesForToken($tokenPos); 974 } 975 976 protected function fixupArrayDestructuring(Array_ $node): Expr\List_ { 977 $this->createdArrays->detach($node); 978 return new Expr\List_(array_map(function (Node\ArrayItem $item) { 979 if ($item->value instanceof Expr\Error) { 980 // We used Error as a placeholder for empty elements, which are legal for destructuring. 981 return null; 982 } 983 if ($item->value instanceof Array_) { 984 return new Node\ArrayItem( 985 $this->fixupArrayDestructuring($item->value), 986 $item->key, $item->byRef, $item->getAttributes()); 987 } 988 return $item; 989 }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes()); 990 } 991 992 protected function postprocessList(Expr\List_ $node): void { 993 foreach ($node->items as $i => $item) { 994 if ($item->value instanceof Expr\Error) { 995 // We used Error as a placeholder for empty elements, which are legal for destructuring. 996 $node->items[$i] = null; 997 } 998 } 999 } 1000 1001 /** @param ElseIf_|Else_ $node */ 1002 protected function fixupAlternativeElse($node): void { 1003 // Make sure a trailing nop statement carrying comments is part of the node. 1004 $numStmts = \count($node->stmts); 1005 if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) { 1006 $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes(); 1007 if (isset($nopAttrs['endLine'])) { 1008 $node->setAttribute('endLine', $nopAttrs['endLine']); 1009 } 1010 if (isset($nopAttrs['endFilePos'])) { 1011 $node->setAttribute('endFilePos', $nopAttrs['endFilePos']); 1012 } 1013 if (isset($nopAttrs['endTokenPos'])) { 1014 $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']); 1015 } 1016 } 1017 } 1018 1019 protected function checkClassModifier(int $a, int $b, int $modifierPos): void { 1020 try { 1021 Modifiers::verifyClassModifier($a, $b); 1022 } catch (Error $error) { 1023 $error->setAttributes($this->getAttributesAt($modifierPos)); 1024 $this->emitError($error); 1025 } 1026 } 1027 1028 protected function checkModifier(int $a, int $b, int $modifierPos): void { 1029 // Jumping through some hoops here because verifyModifier() is also used elsewhere 1030 try { 1031 Modifiers::verifyModifier($a, $b); 1032 } catch (Error $error) { 1033 $error->setAttributes($this->getAttributesAt($modifierPos)); 1034 $this->emitError($error); 1035 } 1036 } 1037 1038 protected function checkParam(Param $node): void { 1039 if ($node->variadic && null !== $node->default) { 1040 $this->emitError(new Error( 1041 'Variadic parameter cannot have a default value', 1042 $node->default->getAttributes() 1043 )); 1044 } 1045 } 1046 1047 protected function checkTryCatch(TryCatch $node): void { 1048 if (empty($node->catches) && null === $node->finally) { 1049 $this->emitError(new Error( 1050 'Cannot use try without catch or finally', $node->getAttributes() 1051 )); 1052 } 1053 } 1054 1055 protected function checkNamespace(Namespace_ $node): void { 1056 if (null !== $node->stmts) { 1057 foreach ($node->stmts as $stmt) { 1058 if ($stmt instanceof Namespace_) { 1059 $this->emitError(new Error( 1060 'Namespace declarations cannot be nested', $stmt->getAttributes() 1061 )); 1062 } 1063 } 1064 } 1065 } 1066 1067 private function checkClassName(?Identifier $name, int $namePos): void { 1068 if (null !== $name && $name->isSpecialClassName()) { 1069 $this->emitError(new Error( 1070 sprintf('Cannot use \'%s\' as class name as it is reserved', $name), 1071 $this->getAttributesAt($namePos) 1072 )); 1073 } 1074 } 1075 1076 /** @param Name[] $interfaces */ 1077 private function checkImplementedInterfaces(array $interfaces): void { 1078 foreach ($interfaces as $interface) { 1079 if ($interface->isSpecialClassName()) { 1080 $this->emitError(new Error( 1081 sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface), 1082 $interface->getAttributes() 1083 )); 1084 } 1085 } 1086 } 1087 1088 protected function checkClass(Class_ $node, int $namePos): void { 1089 $this->checkClassName($node->name, $namePos); 1090 1091 if ($node->extends && $node->extends->isSpecialClassName()) { 1092 $this->emitError(new Error( 1093 sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends), 1094 $node->extends->getAttributes() 1095 )); 1096 } 1097 1098 $this->checkImplementedInterfaces($node->implements); 1099 } 1100 1101 protected function checkInterface(Interface_ $node, int $namePos): void { 1102 $this->checkClassName($node->name, $namePos); 1103 $this->checkImplementedInterfaces($node->extends); 1104 } 1105 1106 protected function checkEnum(Enum_ $node, int $namePos): void { 1107 $this->checkClassName($node->name, $namePos); 1108 $this->checkImplementedInterfaces($node->implements); 1109 } 1110 1111 protected function checkClassMethod(ClassMethod $node, int $modifierPos): void { 1112 if ($node->flags & Modifiers::STATIC) { 1113 switch ($node->name->toLowerString()) { 1114 case '__construct': 1115 $this->emitError(new Error( 1116 sprintf('Constructor %s() cannot be static', $node->name), 1117 $this->getAttributesAt($modifierPos))); 1118 break; 1119 case '__destruct': 1120 $this->emitError(new Error( 1121 sprintf('Destructor %s() cannot be static', $node->name), 1122 $this->getAttributesAt($modifierPos))); 1123 break; 1124 case '__clone': 1125 $this->emitError(new Error( 1126 sprintf('Clone method %s() cannot be static', $node->name), 1127 $this->getAttributesAt($modifierPos))); 1128 break; 1129 } 1130 } 1131 1132 if ($node->flags & Modifiers::READONLY) { 1133 $this->emitError(new Error( 1134 sprintf('Method %s() cannot be readonly', $node->name), 1135 $this->getAttributesAt($modifierPos))); 1136 } 1137 } 1138 1139 protected function checkClassConst(ClassConst $node, int $modifierPos): void { 1140 if ($node->flags & Modifiers::STATIC) { 1141 $this->emitError(new Error( 1142 "Cannot use 'static' as constant modifier", 1143 $this->getAttributesAt($modifierPos))); 1144 } 1145 if ($node->flags & Modifiers::ABSTRACT) { 1146 $this->emitError(new Error( 1147 "Cannot use 'abstract' as constant modifier", 1148 $this->getAttributesAt($modifierPos))); 1149 } 1150 if ($node->flags & Modifiers::READONLY) { 1151 $this->emitError(new Error( 1152 "Cannot use 'readonly' as constant modifier", 1153 $this->getAttributesAt($modifierPos))); 1154 } 1155 } 1156 1157 protected function checkProperty(Property $node, int $modifierPos): void { 1158 if ($node->flags & Modifiers::ABSTRACT) { 1159 $this->emitError(new Error('Properties cannot be declared abstract', 1160 $this->getAttributesAt($modifierPos))); 1161 } 1162 1163 if ($node->flags & Modifiers::FINAL) { 1164 $this->emitError(new Error('Properties cannot be declared final', 1165 $this->getAttributesAt($modifierPos))); 1166 } 1167 } 1168 1169 protected function checkUseUse(UseItem $node, int $namePos): void { 1170 if ($node->alias && $node->alias->isSpecialClassName()) { 1171 $this->emitError(new Error( 1172 sprintf( 1173 'Cannot use %s as %s because \'%2$s\' is a special class name', 1174 $node->name, $node->alias 1175 ), 1176 $this->getAttributesAt($namePos) 1177 )); 1178 } 1179 } 1180 1181 /** 1182 * Creates the token map. 1183 * 1184 * The token map maps the PHP internal token identifiers 1185 * to the identifiers used by the Parser. Additionally it 1186 * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. 1187 * 1188 * @return array<int, int> The token map 1189 */ 1190 protected function createTokenMap(): array { 1191 $tokenMap = []; 1192 1193 for ($i = 0; $i < 1000; ++$i) { 1194 if ($i < 256) { 1195 // Single-char tokens use an identity mapping. 1196 $tokenMap[$i] = $i; 1197 } elseif (\T_DOUBLE_COLON === $i) { 1198 // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM 1199 $tokenMap[$i] = static::T_PAAMAYIM_NEKUDOTAYIM; 1200 } elseif (\T_OPEN_TAG_WITH_ECHO === $i) { 1201 // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO 1202 $tokenMap[$i] = static::T_ECHO; 1203 } elseif (\T_CLOSE_TAG === $i) { 1204 // T_CLOSE_TAG is equivalent to ';' 1205 $tokenMap[$i] = ord(';'); 1206 } elseif ('UNKNOWN' !== $name = token_name($i)) { 1207 if (defined($name = static::class . '::' . $name)) { 1208 // Other tokens can be mapped directly 1209 $tokenMap[$i] = constant($name); 1210 } 1211 } 1212 } 1213 1214 // Assign tokens for which we define compatibility constants, as token_name() does not know them. 1215 $tokenMap[\T_FN] = static::T_FN; 1216 $tokenMap[\T_COALESCE_EQUAL] = static::T_COALESCE_EQUAL; 1217 $tokenMap[\T_NAME_QUALIFIED] = static::T_NAME_QUALIFIED; 1218 $tokenMap[\T_NAME_FULLY_QUALIFIED] = static::T_NAME_FULLY_QUALIFIED; 1219 $tokenMap[\T_NAME_RELATIVE] = static::T_NAME_RELATIVE; 1220 $tokenMap[\T_MATCH] = static::T_MATCH; 1221 $tokenMap[\T_NULLSAFE_OBJECT_OPERATOR] = static::T_NULLSAFE_OBJECT_OPERATOR; 1222 $tokenMap[\T_ATTRIBUTE] = static::T_ATTRIBUTE; 1223 $tokenMap[\T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG] = static::T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG; 1224 $tokenMap[\T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG] = static::T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG; 1225 $tokenMap[\T_ENUM] = static::T_ENUM; 1226 $tokenMap[\T_READONLY] = static::T_READONLY; 1227 1228 // We have create a map from PHP token IDs to external symbol IDs. 1229 // Now map them to the internal symbol ID. 1230 $fullTokenMap = []; 1231 foreach ($tokenMap as $phpToken => $extSymbol) { 1232 $intSymbol = $this->tokenToSymbol[$extSymbol]; 1233 if ($intSymbol === $this->invalidSymbol) { 1234 continue; 1235 } 1236 $fullTokenMap[$phpToken] = $intSymbol; 1237 } 1238 1239 return $fullTokenMap; 1240 } 1241} 1242