1<?php declare(strict_types=1); 2 3namespace PhpParser; 4 5/* 6 * This parser is based on a skeleton written by Moriyoshi Koizumi, which in 7 * turn is based on work by Masato Bito. 8 */ 9 10use PhpParser\Node\Arg; 11use PhpParser\Node\Expr; 12use PhpParser\Node\Expr\Array_; 13use PhpParser\Node\Expr\Cast\Double; 14use PhpParser\Node\Identifier; 15use PhpParser\Node\InterpolatedStringPart; 16use PhpParser\Node\Name; 17use PhpParser\Node\Param; 18use PhpParser\Node\PropertyHook; 19use PhpParser\Node\Scalar\InterpolatedString; 20use PhpParser\Node\Scalar\Int_; 21use PhpParser\Node\Scalar\String_; 22use PhpParser\Node\Stmt; 23use PhpParser\Node\Stmt\Class_; 24use PhpParser\Node\Stmt\ClassConst; 25use PhpParser\Node\Stmt\ClassMethod; 26use PhpParser\Node\Stmt\Else_; 27use PhpParser\Node\Stmt\ElseIf_; 28use PhpParser\Node\Stmt\Enum_; 29use PhpParser\Node\Stmt\Interface_; 30use PhpParser\Node\Stmt\Namespace_; 31use PhpParser\Node\Stmt\Nop; 32use PhpParser\Node\Stmt\Property; 33use PhpParser\Node\Stmt\TryCatch; 34use PhpParser\Node\UseItem; 35use PhpParser\NodeVisitor\CommentAnnotatingVisitor; 36 37abstract class ParserAbstract implements Parser { 38 private const SYMBOL_NONE = -1; 39 40 /** @var Lexer Lexer that is used when parsing */ 41 protected Lexer $lexer; 42 /** @var PhpVersion PHP version to target on a best-effort basis */ 43 protected PhpVersion $phpVersion; 44 45 /* 46 * The following members will be filled with generated parsing data: 47 */ 48 49 /** @var int Size of $tokenToSymbol map */ 50 protected int $tokenToSymbolMapSize; 51 /** @var int Size of $action table */ 52 protected int $actionTableSize; 53 /** @var int Size of $goto table */ 54 protected int $gotoTableSize; 55 56 /** @var int Symbol number signifying an invalid token */ 57 protected int $invalidSymbol; 58 /** @var int Symbol number of error recovery token */ 59 protected int $errorSymbol; 60 /** @var int Action number signifying default action */ 61 protected int $defaultAction; 62 /** @var int Rule number signifying that an unexpected token was encountered */ 63 protected int $unexpectedTokenRule; 64 65 protected int $YY2TBLSTATE; 66 /** @var int Number of non-leaf states */ 67 protected int $numNonLeafStates; 68 69 /** @var int[] Map of PHP token IDs to internal symbols */ 70 protected array $phpTokenToSymbol; 71 /** @var array<int, bool> Map of PHP token IDs to drop */ 72 protected array $dropTokens; 73 /** @var int[] Map of external symbols (static::T_*) to internal symbols */ 74 protected array $tokenToSymbol; 75 /** @var string[] Map of symbols to their names */ 76 protected array $symbolToName; 77 /** @var array<int, string> Names of the production rules (only necessary for debugging) */ 78 protected array $productions; 79 80 /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this 81 * state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the 82 * action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 83 protected array $actionBase; 84 /** @var int[] Table of actions. Indexed according to $actionBase comment. */ 85 protected array $action; 86 /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol 87 * then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 88 protected array $actionCheck; 89 /** @var int[] Map of states to their default action */ 90 protected array $actionDefault; 91 /** @var callable[] Semantic action callbacks */ 92 protected array $reduceCallbacks; 93 94 /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this 95 * non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */ 96 protected array $gotoBase; 97 /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */ 98 protected array $goto; 99 /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal 100 * then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */ 101 protected array $gotoCheck; 102 /** @var int[] Map of non-terminals to the default state to goto after their reduction */ 103 protected array $gotoDefault; 104 105 /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for 106 * determining the state to goto after reduction. */ 107 protected array $ruleToNonTerminal; 108 /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to 109 * be popped from the stack(s) on reduction. */ 110 protected array $ruleToLength; 111 112 /* 113 * The following members are part of the parser state: 114 */ 115 116 /** @var mixed Temporary value containing the result of last semantic action (reduction) */ 117 protected $semValue; 118 /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */ 119 protected array $semStack; 120 /** @var int[] Token start position stack */ 121 protected array $tokenStartStack; 122 /** @var int[] Token end position stack */ 123 protected array $tokenEndStack; 124 125 /** @var ErrorHandler Error handler */ 126 protected ErrorHandler $errorHandler; 127 /** @var int Error state, used to avoid error floods */ 128 protected int $errorState; 129 130 /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */ 131 protected ?\SplObjectStorage $createdArrays; 132 133 /** @var Token[] Tokens for the current parse */ 134 protected array $tokens; 135 /** @var int Current position in token array */ 136 protected int $tokenPos; 137 138 /** 139 * Initialize $reduceCallbacks map. 140 */ 141 abstract protected function initReduceCallbacks(): void; 142 143 /** 144 * Creates a parser instance. 145 * 146 * Options: 147 * * phpVersion: ?PhpVersion, 148 * 149 * @param Lexer $lexer A lexer 150 * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This 151 * option is best-effort: Even if specified, parsing will generally assume the latest 152 * supported version and only adjust behavior in minor ways, for example by omitting 153 * errors in older versions and interpreting type hints as a name or identifier depending 154 * on version. 155 */ 156 public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) { 157 $this->lexer = $lexer; 158 $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported(); 159 160 $this->initReduceCallbacks(); 161 $this->phpTokenToSymbol = $this->createTokenMap(); 162 $this->dropTokens = array_fill_keys( 163 [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true 164 ); 165 } 166 167 /** 168 * Parses PHP code into a node tree. 169 * 170 * If a non-throwing error handler is used, the parser will continue parsing after an error 171 * occurred and attempt to build a partial AST. 172 * 173 * @param string $code The source code to parse 174 * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults 175 * to ErrorHandler\Throwing. 176 * 177 * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and 178 * the parser was unable to recover from an error). 179 */ 180 public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array { 181 $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing(); 182 $this->createdArrays = new \SplObjectStorage(); 183 184 $this->tokens = $this->lexer->tokenize($code, $this->errorHandler); 185 $result = $this->doParse(); 186 187 // Report errors for any empty elements used inside arrays. This is delayed until after the main parse, 188 // because we don't know a priori whether a given array expression will be used in a destructuring context 189 // or not. 190 foreach ($this->createdArrays as $node) { 191 foreach ($node->items as $item) { 192 if ($item->value instanceof Expr\Error) { 193 $this->errorHandler->handleError( 194 new Error('Cannot use empty array elements in arrays', $item->getAttributes())); 195 } 196 } 197 } 198 199 // Clear out some of the interior state, so we don't hold onto unnecessary 200 // memory between uses of the parser 201 $this->tokenStartStack = []; 202 $this->tokenEndStack = []; 203 $this->semStack = []; 204 $this->semValue = null; 205 $this->createdArrays = null; 206 207 if ($result !== null) { 208 $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens)); 209 $traverser->traverse($result); 210 } 211 212 return $result; 213 } 214 215 public function getTokens(): array { 216 return $this->tokens; 217 } 218 219 /** @return Stmt[]|null */ 220 protected function doParse(): ?array { 221 // We start off with no lookahead-token 222 $symbol = self::SYMBOL_NONE; 223 $tokenValue = null; 224 $this->tokenPos = -1; 225 226 // Keep stack of start and end attributes 227 $this->tokenStartStack = []; 228 $this->tokenEndStack = [0]; 229 230 // Start off in the initial state and keep a stack of previous states 231 $state = 0; 232 $stateStack = [$state]; 233 234 // Semantic value stack (contains values of tokens and semantic action results) 235 $this->semStack = []; 236 237 // Current position in the stack(s) 238 $stackPos = 0; 239 240 $this->errorState = 0; 241 242 for (;;) { 243 //$this->traceNewState($state, $symbol); 244 245 if ($this->actionBase[$state] === 0) { 246 $rule = $this->actionDefault[$state]; 247 } else { 248 if ($symbol === self::SYMBOL_NONE) { 249 do { 250 $token = $this->tokens[++$this->tokenPos]; 251 $tokenId = $token->id; 252 } while (isset($this->dropTokens[$tokenId])); 253 254 // Map the lexer token id to the internally used symbols. 255 $tokenValue = $token->text; 256 if (!isset($this->phpTokenToSymbol[$tokenId])) { 257 throw new \RangeException(sprintf( 258 'The lexer returned an invalid token (id=%d, value=%s)', 259 $tokenId, $tokenValue 260 )); 261 } 262 $symbol = $this->phpTokenToSymbol[$tokenId]; 263 264 //$this->traceRead($symbol); 265 } 266 267 $idx = $this->actionBase[$state] + $symbol; 268 if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol) 269 || ($state < $this->YY2TBLSTATE 270 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 271 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)) 272 && ($action = $this->action[$idx]) !== $this->defaultAction) { 273 /* 274 * >= numNonLeafStates: shift and reduce 275 * > 0: shift 276 * = 0: accept 277 * < 0: reduce 278 * = -YYUNEXPECTED: error 279 */ 280 if ($action > 0) { 281 /* shift */ 282 //$this->traceShift($symbol); 283 284 ++$stackPos; 285 $stateStack[$stackPos] = $state = $action; 286 $this->semStack[$stackPos] = $tokenValue; 287 $this->tokenStartStack[$stackPos] = $this->tokenPos; 288 $this->tokenEndStack[$stackPos] = $this->tokenPos; 289 $symbol = self::SYMBOL_NONE; 290 291 if ($this->errorState) { 292 --$this->errorState; 293 } 294 295 if ($action < $this->numNonLeafStates) { 296 continue; 297 } 298 299 /* $yyn >= numNonLeafStates means shift-and-reduce */ 300 $rule = $action - $this->numNonLeafStates; 301 } else { 302 $rule = -$action; 303 } 304 } else { 305 $rule = $this->actionDefault[$state]; 306 } 307 } 308 309 for (;;) { 310 if ($rule === 0) { 311 /* accept */ 312 //$this->traceAccept(); 313 return $this->semValue; 314 } 315 if ($rule !== $this->unexpectedTokenRule) { 316 /* reduce */ 317 //$this->traceReduce($rule); 318 319 $ruleLength = $this->ruleToLength[$rule]; 320 try { 321 $callback = $this->reduceCallbacks[$rule]; 322 if ($callback !== null) { 323 $callback($this, $stackPos); 324 } elseif ($ruleLength > 0) { 325 $this->semValue = $this->semStack[$stackPos - $ruleLength + 1]; 326 } 327 } catch (Error $e) { 328 if (-1 === $e->getStartLine()) { 329 $e->setStartLine($this->tokens[$this->tokenPos]->line); 330 } 331 332 $this->emitError($e); 333 // Can't recover from this type of error 334 return null; 335 } 336 337 /* Goto - shift nonterminal */ 338 $lastTokenEnd = $this->tokenEndStack[$stackPos]; 339 $stackPos -= $ruleLength; 340 $nonTerminal = $this->ruleToNonTerminal[$rule]; 341 $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos]; 342 if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) { 343 $state = $this->goto[$idx]; 344 } else { 345 $state = $this->gotoDefault[$nonTerminal]; 346 } 347 348 ++$stackPos; 349 $stateStack[$stackPos] = $state; 350 $this->semStack[$stackPos] = $this->semValue; 351 $this->tokenEndStack[$stackPos] = $lastTokenEnd; 352 if ($ruleLength === 0) { 353 // Empty productions use the start attributes of the lookahead token. 354 $this->tokenStartStack[$stackPos] = $this->tokenPos; 355 } 356 } else { 357 /* error */ 358 switch ($this->errorState) { 359 case 0: 360 $msg = $this->getErrorMessage($symbol, $state); 361 $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos))); 362 // Break missing intentionally 363 // no break 364 case 1: 365 case 2: 366 $this->errorState = 3; 367 368 // Pop until error-expecting state uncovered 369 while (!( 370 (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0 371 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 372 || ($state < $this->YY2TBLSTATE 373 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0 374 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 375 ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this 376 if ($stackPos <= 0) { 377 // Could not recover from error 378 return null; 379 } 380 $state = $stateStack[--$stackPos]; 381 //$this->tracePop($state); 382 } 383 384 //$this->traceShift($this->errorSymbol); 385 ++$stackPos; 386 $stateStack[$stackPos] = $state = $action; 387 388 // We treat the error symbol as being empty, so we reset the end attributes 389 // to the end attributes of the last non-error symbol 390 $this->tokenStartStack[$stackPos] = $this->tokenPos; 391 $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1]; 392 break; 393 394 case 3: 395 if ($symbol === 0) { 396 // Reached EOF without recovering from error 397 return null; 398 } 399 400 //$this->traceDiscard($symbol); 401 $symbol = self::SYMBOL_NONE; 402 break 2; 403 } 404 } 405 406 if ($state < $this->numNonLeafStates) { 407 break; 408 } 409 410 /* >= numNonLeafStates means shift-and-reduce */ 411 $rule = $state - $this->numNonLeafStates; 412 } 413 } 414 415 throw new \RuntimeException('Reached end of parser loop'); 416 } 417 418 protected function emitError(Error $error): void { 419 $this->errorHandler->handleError($error); 420 } 421 422 /** 423 * Format error message including expected tokens. 424 * 425 * @param int $symbol Unexpected symbol 426 * @param int $state State at time of error 427 * 428 * @return string Formatted error message 429 */ 430 protected function getErrorMessage(int $symbol, int $state): string { 431 $expectedString = ''; 432 if ($expected = $this->getExpectedTokens($state)) { 433 $expectedString = ', expecting ' . implode(' or ', $expected); 434 } 435 436 return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString; 437 } 438 439 /** 440 * Get limited number of expected tokens in given state. 441 * 442 * @param int $state State 443 * 444 * @return string[] Expected tokens. If too many, an empty array is returned. 445 */ 446 protected function getExpectedTokens(int $state): array { 447 $expected = []; 448 449 $base = $this->actionBase[$state]; 450 foreach ($this->symbolToName as $symbol => $name) { 451 $idx = $base + $symbol; 452 if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 453 || $state < $this->YY2TBLSTATE 454 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 455 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 456 ) { 457 if ($this->action[$idx] !== $this->unexpectedTokenRule 458 && $this->action[$idx] !== $this->defaultAction 459 && $symbol !== $this->errorSymbol 460 ) { 461 if (count($expected) === 4) { 462 /* Too many expected tokens */ 463 return []; 464 } 465 466 $expected[] = $name; 467 } 468 } 469 } 470 471 return $expected; 472 } 473 474 /** 475 * Get attributes for a node with the given start and end token positions. 476 * 477 * @param int $tokenStartPos Token position the node starts at 478 * @param int $tokenEndPos Token position the node ends at 479 * @return array<string, mixed> Attributes 480 */ 481 protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array { 482 $startToken = $this->tokens[$tokenStartPos]; 483 $afterEndToken = $this->tokens[$tokenEndPos + 1]; 484 return [ 485 'startLine' => $startToken->line, 486 'startTokenPos' => $tokenStartPos, 487 'startFilePos' => $startToken->pos, 488 'endLine' => $afterEndToken->line, 489 'endTokenPos' => $tokenEndPos, 490 'endFilePos' => $afterEndToken->pos - 1, 491 ]; 492 } 493 494 /** 495 * Get attributes for a single token at the given token position. 496 * 497 * @return array<string, mixed> Attributes 498 */ 499 protected function getAttributesForToken(int $tokenPos): array { 500 if ($tokenPos < \count($this->tokens) - 1) { 501 return $this->getAttributes($tokenPos, $tokenPos); 502 } 503 504 // Get attributes for the sentinel token. 505 $token = $this->tokens[$tokenPos]; 506 return [ 507 'startLine' => $token->line, 508 'startTokenPos' => $tokenPos, 509 'startFilePos' => $token->pos, 510 'endLine' => $token->line, 511 'endTokenPos' => $tokenPos, 512 'endFilePos' => $token->pos, 513 ]; 514 } 515 516 /* 517 * Tracing functions used for debugging the parser. 518 */ 519 520 /* 521 protected function traceNewState($state, $symbol): void { 522 echo '% State ' . $state 523 . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n"; 524 } 525 526 protected function traceRead($symbol): void { 527 echo '% Reading ' . $this->symbolToName[$symbol] . "\n"; 528 } 529 530 protected function traceShift($symbol): void { 531 echo '% Shift ' . $this->symbolToName[$symbol] . "\n"; 532 } 533 534 protected function traceAccept(): void { 535 echo "% Accepted.\n"; 536 } 537 538 protected function traceReduce($n): void { 539 echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n"; 540 } 541 542 protected function tracePop($state): void { 543 echo '% Recovering, uncovered state ' . $state . "\n"; 544 } 545 546 protected function traceDiscard($symbol): void { 547 echo '% Discard ' . $this->symbolToName[$symbol] . "\n"; 548 } 549 */ 550 551 /* 552 * Helper functions invoked by semantic actions 553 */ 554 555 /** 556 * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions. 557 * 558 * @param Node\Stmt[] $stmts 559 * @return Node\Stmt[] 560 */ 561 protected function handleNamespaces(array $stmts): array { 562 $hasErrored = false; 563 $style = $this->getNamespacingStyle($stmts); 564 if (null === $style) { 565 // not namespaced, nothing to do 566 return $stmts; 567 } 568 if ('brace' === $style) { 569 // For braced namespaces we only have to check that there are no invalid statements between the namespaces 570 $afterFirstNamespace = false; 571 foreach ($stmts as $stmt) { 572 if ($stmt instanceof Node\Stmt\Namespace_) { 573 $afterFirstNamespace = true; 574 } elseif (!$stmt instanceof Node\Stmt\HaltCompiler 575 && !$stmt instanceof Node\Stmt\Nop 576 && $afterFirstNamespace && !$hasErrored) { 577 $this->emitError(new Error( 578 'No code may exist outside of namespace {}', $stmt->getAttributes())); 579 $hasErrored = true; // Avoid one error for every statement 580 } 581 } 582 return $stmts; 583 } else { 584 // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts 585 $resultStmts = []; 586 $targetStmts = &$resultStmts; 587 $lastNs = null; 588 foreach ($stmts as $stmt) { 589 if ($stmt instanceof Node\Stmt\Namespace_) { 590 if ($lastNs !== null) { 591 $this->fixupNamespaceAttributes($lastNs); 592 } 593 if ($stmt->stmts === null) { 594 $stmt->stmts = []; 595 $targetStmts = &$stmt->stmts; 596 $resultStmts[] = $stmt; 597 } else { 598 // This handles the invalid case of mixed style namespaces 599 $resultStmts[] = $stmt; 600 $targetStmts = &$resultStmts; 601 } 602 $lastNs = $stmt; 603 } elseif ($stmt instanceof Node\Stmt\HaltCompiler) { 604 // __halt_compiler() is not moved into the namespace 605 $resultStmts[] = $stmt; 606 } else { 607 $targetStmts[] = $stmt; 608 } 609 } 610 if ($lastNs !== null) { 611 $this->fixupNamespaceAttributes($lastNs); 612 } 613 return $resultStmts; 614 } 615 } 616 617 private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void { 618 // We moved the statements into the namespace node, as such the end of the namespace node 619 // needs to be extended to the end of the statements. 620 if (empty($stmt->stmts)) { 621 return; 622 } 623 624 // We only move the builtin end attributes here. This is the best we can do with the 625 // knowledge we have. 626 $endAttributes = ['endLine', 'endFilePos', 'endTokenPos']; 627 $lastStmt = $stmt->stmts[count($stmt->stmts) - 1]; 628 foreach ($endAttributes as $endAttribute) { 629 if ($lastStmt->hasAttribute($endAttribute)) { 630 $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute)); 631 } 632 } 633 } 634 635 /** @return array<string, mixed> */ 636 private function getNamespaceErrorAttributes(Namespace_ $node): array { 637 $attrs = $node->getAttributes(); 638 // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace. 639 if (isset($attrs['startLine'])) { 640 $attrs['endLine'] = $attrs['startLine']; 641 } 642 if (isset($attrs['startTokenPos'])) { 643 $attrs['endTokenPos'] = $attrs['startTokenPos']; 644 } 645 if (isset($attrs['startFilePos'])) { 646 $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1; 647 } 648 return $attrs; 649 } 650 651 /** 652 * Determine namespacing style (semicolon or brace) 653 * 654 * @param Node[] $stmts Top-level statements. 655 * 656 * @return null|string One of "semicolon", "brace" or null (no namespaces) 657 */ 658 private function getNamespacingStyle(array $stmts): ?string { 659 $style = null; 660 $hasNotAllowedStmts = false; 661 foreach ($stmts as $i => $stmt) { 662 if ($stmt instanceof Node\Stmt\Namespace_) { 663 $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace'; 664 if (null === $style) { 665 $style = $currentStyle; 666 if ($hasNotAllowedStmts) { 667 $this->emitError(new Error( 668 'Namespace declaration statement has to be the very first statement in the script', 669 $this->getNamespaceErrorAttributes($stmt) 670 )); 671 } 672 } elseif ($style !== $currentStyle) { 673 $this->emitError(new Error( 674 'Cannot mix bracketed namespace declarations with unbracketed namespace declarations', 675 $this->getNamespaceErrorAttributes($stmt) 676 )); 677 // Treat like semicolon style for namespace normalization 678 return 'semicolon'; 679 } 680 continue; 681 } 682 683 /* declare(), __halt_compiler() and nops can be used before a namespace declaration */ 684 if ($stmt instanceof Node\Stmt\Declare_ 685 || $stmt instanceof Node\Stmt\HaltCompiler 686 || $stmt instanceof Node\Stmt\Nop) { 687 continue; 688 } 689 690 /* There may be a hashbang line at the very start of the file */ 691 if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) { 692 continue; 693 } 694 695 /* Everything else if forbidden before namespace declarations */ 696 $hasNotAllowedStmts = true; 697 } 698 return $style; 699 } 700 701 /** @return Name|Identifier */ 702 protected function handleBuiltinTypes(Name $name) { 703 if (!$name->isUnqualified()) { 704 return $name; 705 } 706 707 $lowerName = $name->toLowerString(); 708 if (!$this->phpVersion->supportsBuiltinType($lowerName)) { 709 return $name; 710 } 711 712 return new Node\Identifier($lowerName, $name->getAttributes()); 713 } 714 715 /** 716 * Get combined start and end attributes at a stack location 717 * 718 * @param int $stackPos Stack location 719 * 720 * @return array<string, mixed> Combined start and end attributes 721 */ 722 protected function getAttributesAt(int $stackPos): array { 723 return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]); 724 } 725 726 protected function getFloatCastKind(string $cast): int { 727 $cast = strtolower($cast); 728 if (strpos($cast, 'float') !== false) { 729 return Double::KIND_FLOAT; 730 } 731 732 if (strpos($cast, 'real') !== false) { 733 return Double::KIND_REAL; 734 } 735 736 return Double::KIND_DOUBLE; 737 } 738 739 /** @param array<string, mixed> $attributes */ 740 protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ { 741 try { 742 return Int_::fromString($str, $attributes, $allowInvalidOctal); 743 } catch (Error $error) { 744 $this->emitError($error); 745 // Use dummy value 746 return new Int_(0, $attributes); 747 } 748 } 749 750 /** 751 * Parse a T_NUM_STRING token into either an integer or string node. 752 * 753 * @param string $str Number string 754 * @param array<string, mixed> $attributes Attributes 755 * 756 * @return Int_|String_ Integer or string node. 757 */ 758 protected function parseNumString(string $str, array $attributes) { 759 if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) { 760 return new String_($str, $attributes); 761 } 762 763 $num = +$str; 764 if (!is_int($num)) { 765 return new String_($str, $attributes); 766 } 767 768 return new Int_($num, $attributes); 769 } 770 771 /** @param array<string, mixed> $attributes */ 772 protected function stripIndentation( 773 string $string, int $indentLen, string $indentChar, 774 bool $newlineAtStart, bool $newlineAtEnd, array $attributes 775 ): string { 776 if ($indentLen === 0) { 777 return $string; 778 } 779 780 $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)'; 781 $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])'; 782 $regex = '/' . $start . '([ \t]*)(' . $end . ')?/'; 783 return preg_replace_callback( 784 $regex, 785 function ($matches) use ($indentLen, $indentChar, $attributes) { 786 $prefix = substr($matches[1], 0, $indentLen); 787 if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) { 788 $this->emitError(new Error( 789 'Invalid indentation - tabs and spaces cannot be mixed', $attributes 790 )); 791 } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) { 792 $this->emitError(new Error( 793 'Invalid body indentation level ' . 794 '(expecting an indentation level of at least ' . $indentLen . ')', 795 $attributes 796 )); 797 } 798 return substr($matches[0], strlen($prefix)); 799 }, 800 $string 801 ); 802 } 803 804 /** 805 * @param string|(Expr|InterpolatedStringPart)[] $contents 806 * @param array<string, mixed> $attributes 807 * @param array<string, mixed> $endTokenAttributes 808 */ 809 protected function parseDocString( 810 string $startToken, $contents, string $endToken, 811 array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape 812 ): Expr { 813 $kind = strpos($startToken, "'") === false 814 ? String_::KIND_HEREDOC : String_::KIND_NOWDOC; 815 816 $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/'; 817 $result = preg_match($regex, $startToken, $matches); 818 assert($result === 1); 819 $label = $matches[1]; 820 821 $result = preg_match('/\A[ \t]*/', $endToken, $matches); 822 assert($result === 1); 823 $indentation = $matches[0]; 824 825 $attributes['kind'] = $kind; 826 $attributes['docLabel'] = $label; 827 $attributes['docIndentation'] = $indentation; 828 829 $indentHasSpaces = false !== strpos($indentation, " "); 830 $indentHasTabs = false !== strpos($indentation, "\t"); 831 if ($indentHasSpaces && $indentHasTabs) { 832 $this->emitError(new Error( 833 'Invalid indentation - tabs and spaces cannot be mixed', 834 $endTokenAttributes 835 )); 836 837 // Proceed processing as if this doc string is not indented 838 $indentation = ''; 839 } 840 841 $indentLen = \strlen($indentation); 842 $indentChar = $indentHasSpaces ? " " : "\t"; 843 844 if (\is_string($contents)) { 845 if ($contents === '') { 846 $attributes['rawValue'] = $contents; 847 return new String_('', $attributes); 848 } 849 850 $contents = $this->stripIndentation( 851 $contents, $indentLen, $indentChar, true, true, $attributes 852 ); 853 $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents); 854 $attributes['rawValue'] = $contents; 855 856 if ($kind === String_::KIND_HEREDOC) { 857 $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape); 858 } 859 860 return new String_($contents, $attributes); 861 } else { 862 assert(count($contents) > 0); 863 if (!$contents[0] instanceof Node\InterpolatedStringPart) { 864 // If there is no leading encapsed string part, pretend there is an empty one 865 $this->stripIndentation( 866 '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes() 867 ); 868 } 869 870 $newContents = []; 871 foreach ($contents as $i => $part) { 872 if ($part instanceof Node\InterpolatedStringPart) { 873 $isLast = $i === \count($contents) - 1; 874 $part->value = $this->stripIndentation( 875 $part->value, $indentLen, $indentChar, 876 $i === 0, $isLast, $part->getAttributes() 877 ); 878 if ($isLast) { 879 $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value); 880 } 881 $part->setAttribute('rawValue', $part->value); 882 $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape); 883 if ('' === $part->value) { 884 continue; 885 } 886 } 887 $newContents[] = $part; 888 } 889 return new InterpolatedString($newContents, $attributes); 890 } 891 } 892 893 protected function createCommentFromToken(Token $token, int $tokenPos): Comment { 894 assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT); 895 return \T_DOC_COMMENT === $token->id 896 ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos, 897 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos) 898 : new Comment($token->text, $token->line, $token->pos, $tokenPos, 899 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos); 900 } 901 902 /** 903 * Get last comment before the given token position, if any 904 */ 905 protected function getCommentBeforeToken(int $tokenPos): ?Comment { 906 while (--$tokenPos >= 0) { 907 $token = $this->tokens[$tokenPos]; 908 if (!isset($this->dropTokens[$token->id])) { 909 break; 910 } 911 912 if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) { 913 return $this->createCommentFromToken($token, $tokenPos); 914 } 915 } 916 return null; 917 } 918 919 /** 920 * Create a zero-length nop to capture preceding comments, if any. 921 */ 922 protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop { 923 $comment = $this->getCommentBeforeToken($tokenPos); 924 if ($comment === null) { 925 return null; 926 } 927 928 $commentEndLine = $comment->getEndLine(); 929 $commentEndFilePos = $comment->getEndFilePos(); 930 $commentEndTokenPos = $comment->getEndTokenPos(); 931 $attributes = [ 932 'startLine' => $commentEndLine, 933 'endLine' => $commentEndLine, 934 'startFilePos' => $commentEndFilePos + 1, 935 'endFilePos' => $commentEndFilePos, 936 'startTokenPos' => $commentEndTokenPos + 1, 937 'endTokenPos' => $commentEndTokenPos, 938 ]; 939 return new Nop($attributes); 940 } 941 942 protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop { 943 if ($this->getCommentBeforeToken($tokenStartPos) === null) { 944 return null; 945 } 946 return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos)); 947 } 948 949 protected function handleHaltCompiler(): string { 950 // Prevent the lexer from returning any further tokens. 951 $nextToken = $this->tokens[$this->tokenPos + 1]; 952 $this->tokenPos = \count($this->tokens) - 2; 953 954 // Return text after __halt_compiler. 955 return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : ''; 956 } 957 958 protected function inlineHtmlHasLeadingNewline(int $stackPos): bool { 959 $tokenPos = $this->tokenStartStack[$stackPos]; 960 $token = $this->tokens[$tokenPos]; 961 assert($token->id == \T_INLINE_HTML); 962 if ($tokenPos > 0) { 963 $prevToken = $this->tokens[$tokenPos - 1]; 964 assert($prevToken->id == \T_CLOSE_TAG); 965 return false !== strpos($prevToken->text, "\n") 966 || false !== strpos($prevToken->text, "\r"); 967 } 968 return true; 969 } 970 971 /** 972 * @return array<string, mixed> 973 */ 974 protected function createEmptyElemAttributes(int $tokenPos): array { 975 return $this->getAttributesForToken($tokenPos); 976 } 977 978 protected function fixupArrayDestructuring(Array_ $node): Expr\List_ { 979 $this->createdArrays->detach($node); 980 return new Expr\List_(array_map(function (Node\ArrayItem $item) { 981 if ($item->value instanceof Expr\Error) { 982 // We used Error as a placeholder for empty elements, which are legal for destructuring. 983 return null; 984 } 985 if ($item->value instanceof Array_) { 986 return new Node\ArrayItem( 987 $this->fixupArrayDestructuring($item->value), 988 $item->key, $item->byRef, $item->getAttributes()); 989 } 990 return $item; 991 }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes()); 992 } 993 994 protected function postprocessList(Expr\List_ $node): void { 995 foreach ($node->items as $i => $item) { 996 if ($item->value instanceof Expr\Error) { 997 // We used Error as a placeholder for empty elements, which are legal for destructuring. 998 $node->items[$i] = null; 999 } 1000 } 1001 } 1002 1003 /** @param ElseIf_|Else_ $node */ 1004 protected function fixupAlternativeElse($node): void { 1005 // Make sure a trailing nop statement carrying comments is part of the node. 1006 $numStmts = \count($node->stmts); 1007 if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) { 1008 $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes(); 1009 if (isset($nopAttrs['endLine'])) { 1010 $node->setAttribute('endLine', $nopAttrs['endLine']); 1011 } 1012 if (isset($nopAttrs['endFilePos'])) { 1013 $node->setAttribute('endFilePos', $nopAttrs['endFilePos']); 1014 } 1015 if (isset($nopAttrs['endTokenPos'])) { 1016 $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']); 1017 } 1018 } 1019 } 1020 1021 protected function checkClassModifier(int $a, int $b, int $modifierPos): void { 1022 try { 1023 Modifiers::verifyClassModifier($a, $b); 1024 } catch (Error $error) { 1025 $error->setAttributes($this->getAttributesAt($modifierPos)); 1026 $this->emitError($error); 1027 } 1028 } 1029 1030 protected function checkModifier(int $a, int $b, int $modifierPos): void { 1031 // Jumping through some hoops here because verifyModifier() is also used elsewhere 1032 try { 1033 Modifiers::verifyModifier($a, $b); 1034 } catch (Error $error) { 1035 $error->setAttributes($this->getAttributesAt($modifierPos)); 1036 $this->emitError($error); 1037 } 1038 } 1039 1040 protected function checkParam(Param $node): void { 1041 if ($node->variadic && null !== $node->default) { 1042 $this->emitError(new Error( 1043 'Variadic parameter cannot have a default value', 1044 $node->default->getAttributes() 1045 )); 1046 } 1047 } 1048 1049 protected function checkTryCatch(TryCatch $node): void { 1050 if (empty($node->catches) && null === $node->finally) { 1051 $this->emitError(new Error( 1052 'Cannot use try without catch or finally', $node->getAttributes() 1053 )); 1054 } 1055 } 1056 1057 protected function checkNamespace(Namespace_ $node): void { 1058 if (null !== $node->stmts) { 1059 foreach ($node->stmts as $stmt) { 1060 if ($stmt instanceof Namespace_) { 1061 $this->emitError(new Error( 1062 'Namespace declarations cannot be nested', $stmt->getAttributes() 1063 )); 1064 } 1065 } 1066 } 1067 } 1068 1069 private function checkClassName(?Identifier $name, int $namePos): void { 1070 if (null !== $name && $name->isSpecialClassName()) { 1071 $this->emitError(new Error( 1072 sprintf('Cannot use \'%s\' as class name as it is reserved', $name), 1073 $this->getAttributesAt($namePos) 1074 )); 1075 } 1076 } 1077 1078 /** @param Name[] $interfaces */ 1079 private function checkImplementedInterfaces(array $interfaces): void { 1080 foreach ($interfaces as $interface) { 1081 if ($interface->isSpecialClassName()) { 1082 $this->emitError(new Error( 1083 sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface), 1084 $interface->getAttributes() 1085 )); 1086 } 1087 } 1088 } 1089 1090 protected function checkClass(Class_ $node, int $namePos): void { 1091 $this->checkClassName($node->name, $namePos); 1092 1093 if ($node->extends && $node->extends->isSpecialClassName()) { 1094 $this->emitError(new Error( 1095 sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends), 1096 $node->extends->getAttributes() 1097 )); 1098 } 1099 1100 $this->checkImplementedInterfaces($node->implements); 1101 } 1102 1103 protected function checkInterface(Interface_ $node, int $namePos): void { 1104 $this->checkClassName($node->name, $namePos); 1105 $this->checkImplementedInterfaces($node->extends); 1106 } 1107 1108 protected function checkEnum(Enum_ $node, int $namePos): void { 1109 $this->checkClassName($node->name, $namePos); 1110 $this->checkImplementedInterfaces($node->implements); 1111 } 1112 1113 protected function checkClassMethod(ClassMethod $node, int $modifierPos): void { 1114 if ($node->flags & Modifiers::STATIC) { 1115 switch ($node->name->toLowerString()) { 1116 case '__construct': 1117 $this->emitError(new Error( 1118 sprintf('Constructor %s() cannot be static', $node->name), 1119 $this->getAttributesAt($modifierPos))); 1120 break; 1121 case '__destruct': 1122 $this->emitError(new Error( 1123 sprintf('Destructor %s() cannot be static', $node->name), 1124 $this->getAttributesAt($modifierPos))); 1125 break; 1126 case '__clone': 1127 $this->emitError(new Error( 1128 sprintf('Clone method %s() cannot be static', $node->name), 1129 $this->getAttributesAt($modifierPos))); 1130 break; 1131 } 1132 } 1133 1134 if ($node->flags & Modifiers::READONLY) { 1135 $this->emitError(new Error( 1136 sprintf('Method %s() cannot be readonly', $node->name), 1137 $this->getAttributesAt($modifierPos))); 1138 } 1139 } 1140 1141 protected function checkClassConst(ClassConst $node, int $modifierPos): void { 1142 foreach ([Modifiers::STATIC, Modifiers::ABSTRACT, Modifiers::READONLY] as $modifier) { 1143 if ($node->flags & $modifier) { 1144 $this->emitError(new Error( 1145 "Cannot use '" . Modifiers::toString($modifier) . "' as constant modifier", 1146 $this->getAttributesAt($modifierPos))); 1147 } 1148 } 1149 } 1150 1151 protected function checkUseUse(UseItem $node, int $namePos): void { 1152 if ($node->alias && $node->alias->isSpecialClassName()) { 1153 $this->emitError(new Error( 1154 sprintf( 1155 'Cannot use %s as %s because \'%2$s\' is a special class name', 1156 $node->name, $node->alias 1157 ), 1158 $this->getAttributesAt($namePos) 1159 )); 1160 } 1161 } 1162 1163 /** @param PropertyHook[] $hooks */ 1164 protected function checkPropertyHookList(array $hooks, int $hookPos): void { 1165 if (empty($hooks)) { 1166 $this->emitError(new Error( 1167 'Property hook list cannot be empty', $this->getAttributesAt($hookPos))); 1168 } 1169 } 1170 1171 protected function checkPropertyHook(PropertyHook $hook, ?int $paramListPos): void { 1172 $name = $hook->name->toLowerString(); 1173 if ($name !== 'get' && $name !== 'set') { 1174 $this->emitError(new Error( 1175 'Unknown hook "' . $hook->name . '", expected "get" or "set"', 1176 $hook->name->getAttributes())); 1177 } 1178 if ($name === 'get' && $paramListPos !== null) { 1179 $this->emitError(new Error( 1180 'get hook must not have a parameter list', $this->getAttributesAt($paramListPos))); 1181 } 1182 } 1183 1184 protected function checkPropertyHookModifiers(int $a, int $b, int $modifierPos): void { 1185 try { 1186 Modifiers::verifyModifier($a, $b); 1187 } catch (Error $error) { 1188 $error->setAttributes($this->getAttributesAt($modifierPos)); 1189 $this->emitError($error); 1190 } 1191 1192 if ($b != Modifiers::FINAL) { 1193 $this->emitError(new Error( 1194 'Cannot use the ' . Modifiers::toString($b) . ' modifier on a property hook', 1195 $this->getAttributesAt($modifierPos))); 1196 } 1197 } 1198 1199 /** @param array<Node\Arg|Node\VariadicPlaceholder> $args */ 1200 private function isSimpleExit(array $args): bool { 1201 if (\count($args) === 0) { 1202 return true; 1203 } 1204 if (\count($args) === 1) { 1205 $arg = $args[0]; 1206 return $arg instanceof Arg && $arg->name === null && 1207 $arg->byRef === false && $arg->unpack === false; 1208 } 1209 return false; 1210 } 1211 1212 /** 1213 * @param array<Node\Arg|Node\VariadicPlaceholder> $args 1214 * @param array<string, mixed> $attrs 1215 */ 1216 protected function createExitExpr(string $name, int $namePos, array $args, array $attrs): Expr { 1217 if ($this->isSimpleExit($args)) { 1218 // Create Exit node for backwards compatibility. 1219 $attrs['kind'] = strtolower($name) === 'exit' ? Expr\Exit_::KIND_EXIT : Expr\Exit_::KIND_DIE; 1220 return new Expr\Exit_(\count($args) === 1 ? $args[0]->value : null, $attrs); 1221 } 1222 return new Expr\FuncCall(new Name($name, $this->getAttributesAt($namePos)), $args, $attrs); 1223 } 1224 1225 /** 1226 * Creates the token map. 1227 * 1228 * The token map maps the PHP internal token identifiers 1229 * to the identifiers used by the Parser. Additionally it 1230 * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. 1231 * 1232 * @return array<int, int> The token map 1233 */ 1234 protected function createTokenMap(): array { 1235 $tokenMap = []; 1236 1237 // Single-char tokens use an identity mapping. 1238 for ($i = 0; $i < 256; ++$i) { 1239 $tokenMap[$i] = $i; 1240 } 1241 1242 foreach ($this->symbolToName as $name) { 1243 if ($name[0] === 'T') { 1244 $tokenMap[\constant($name)] = constant(static::class . '::' . $name); 1245 } 1246 } 1247 1248 // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO 1249 $tokenMap[\T_OPEN_TAG_WITH_ECHO] = static::T_ECHO; 1250 // T_CLOSE_TAG is equivalent to ';' 1251 $tokenMap[\T_CLOSE_TAG] = ord(';'); 1252 1253 // We have created a map from PHP token IDs to external symbol IDs. 1254 // Now map them to the internal symbol ID. 1255 $fullTokenMap = []; 1256 foreach ($tokenMap as $phpToken => $extSymbol) { 1257 $intSymbol = $this->tokenToSymbol[$extSymbol]; 1258 if ($intSymbol === $this->invalidSymbol) { 1259 continue; 1260 } 1261 $fullTokenMap[$phpToken] = $intSymbol; 1262 } 1263 1264 return $fullTokenMap; 1265 } 1266} 1267