1<?php declare(strict_types=1); 2 3namespace PhpParser; 4 5/* 6 * This parser is based on a skeleton written by Moriyoshi Koizumi, which in 7 * turn is based on work by Masato Bito. 8 */ 9 10use PhpParser\Node\Arg; 11use PhpParser\Node\Expr; 12use PhpParser\Node\Expr\Array_; 13use PhpParser\Node\Expr\Cast\Double; 14use PhpParser\Node\Identifier; 15use PhpParser\Node\InterpolatedStringPart; 16use PhpParser\Node\Name; 17use PhpParser\Node\Param; 18use PhpParser\Node\PropertyHook; 19use PhpParser\Node\Scalar\InterpolatedString; 20use PhpParser\Node\Scalar\Int_; 21use PhpParser\Node\Scalar\String_; 22use PhpParser\Node\Stmt; 23use PhpParser\Node\Stmt\Class_; 24use PhpParser\Node\Stmt\ClassConst; 25use PhpParser\Node\Stmt\ClassMethod; 26use PhpParser\Node\Stmt\Else_; 27use PhpParser\Node\Stmt\ElseIf_; 28use PhpParser\Node\Stmt\Enum_; 29use PhpParser\Node\Stmt\Interface_; 30use PhpParser\Node\Stmt\Namespace_; 31use PhpParser\Node\Stmt\Nop; 32use PhpParser\Node\Stmt\Property; 33use PhpParser\Node\Stmt\TryCatch; 34use PhpParser\Node\UseItem; 35use PhpParser\Node\VarLikeIdentifier; 36use PhpParser\NodeVisitor\CommentAnnotatingVisitor; 37 38abstract class ParserAbstract implements Parser { 39 private const SYMBOL_NONE = -1; 40 41 /** @var Lexer Lexer that is used when parsing */ 42 protected Lexer $lexer; 43 /** @var PhpVersion PHP version to target on a best-effort basis */ 44 protected PhpVersion $phpVersion; 45 46 /* 47 * The following members will be filled with generated parsing data: 48 */ 49 50 /** @var int Size of $tokenToSymbol map */ 51 protected int $tokenToSymbolMapSize; 52 /** @var int Size of $action table */ 53 protected int $actionTableSize; 54 /** @var int Size of $goto table */ 55 protected int $gotoTableSize; 56 57 /** @var int Symbol number signifying an invalid token */ 58 protected int $invalidSymbol; 59 /** @var int Symbol number of error recovery token */ 60 protected int $errorSymbol; 61 /** @var int Action number signifying default action */ 62 protected int $defaultAction; 63 /** @var int Rule number signifying that an unexpected token was encountered */ 64 protected int $unexpectedTokenRule; 65 66 protected int $YY2TBLSTATE; 67 /** @var int Number of non-leaf states */ 68 protected int $numNonLeafStates; 69 70 /** @var int[] Map of PHP token IDs to internal symbols */ 71 protected array $phpTokenToSymbol; 72 /** @var array<int, bool> Map of PHP token IDs to drop */ 73 protected array $dropTokens; 74 /** @var int[] Map of external symbols (static::T_*) to internal symbols */ 75 protected array $tokenToSymbol; 76 /** @var string[] Map of symbols to their names */ 77 protected array $symbolToName; 78 /** @var array<int, string> Names of the production rules (only necessary for debugging) */ 79 protected array $productions; 80 81 /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this 82 * state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the 83 * action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 84 protected array $actionBase; 85 /** @var int[] Table of actions. Indexed according to $actionBase comment. */ 86 protected array $action; 87 /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol 88 * then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 89 protected array $actionCheck; 90 /** @var int[] Map of states to their default action */ 91 protected array $actionDefault; 92 /** @var callable[] Semantic action callbacks */ 93 protected array $reduceCallbacks; 94 95 /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this 96 * non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */ 97 protected array $gotoBase; 98 /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */ 99 protected array $goto; 100 /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal 101 * then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */ 102 protected array $gotoCheck; 103 /** @var int[] Map of non-terminals to the default state to goto after their reduction */ 104 protected array $gotoDefault; 105 106 /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for 107 * determining the state to goto after reduction. */ 108 protected array $ruleToNonTerminal; 109 /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to 110 * be popped from the stack(s) on reduction. */ 111 protected array $ruleToLength; 112 113 /* 114 * The following members are part of the parser state: 115 */ 116 117 /** @var mixed Temporary value containing the result of last semantic action (reduction) */ 118 protected $semValue; 119 /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */ 120 protected array $semStack; 121 /** @var int[] Token start position stack */ 122 protected array $tokenStartStack; 123 /** @var int[] Token end position stack */ 124 protected array $tokenEndStack; 125 126 /** @var ErrorHandler Error handler */ 127 protected ErrorHandler $errorHandler; 128 /** @var int Error state, used to avoid error floods */ 129 protected int $errorState; 130 131 /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */ 132 protected ?\SplObjectStorage $createdArrays; 133 134 /** @var Token[] Tokens for the current parse */ 135 protected array $tokens; 136 /** @var int Current position in token array */ 137 protected int $tokenPos; 138 139 /** 140 * Initialize $reduceCallbacks map. 141 */ 142 abstract protected function initReduceCallbacks(): void; 143 144 /** 145 * Creates a parser instance. 146 * 147 * Options: 148 * * phpVersion: ?PhpVersion, 149 * 150 * @param Lexer $lexer A lexer 151 * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This 152 * option is best-effort: Even if specified, parsing will generally assume the latest 153 * supported version and only adjust behavior in minor ways, for example by omitting 154 * errors in older versions and interpreting type hints as a name or identifier depending 155 * on version. 156 */ 157 public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) { 158 $this->lexer = $lexer; 159 $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported(); 160 161 $this->initReduceCallbacks(); 162 $this->phpTokenToSymbol = $this->createTokenMap(); 163 $this->dropTokens = array_fill_keys( 164 [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true 165 ); 166 } 167 168 /** 169 * Parses PHP code into a node tree. 170 * 171 * If a non-throwing error handler is used, the parser will continue parsing after an error 172 * occurred and attempt to build a partial AST. 173 * 174 * @param string $code The source code to parse 175 * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults 176 * to ErrorHandler\Throwing. 177 * 178 * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and 179 * the parser was unable to recover from an error). 180 */ 181 public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array { 182 $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing(); 183 $this->createdArrays = new \SplObjectStorage(); 184 185 $this->tokens = $this->lexer->tokenize($code, $this->errorHandler); 186 $result = $this->doParse(); 187 188 // Report errors for any empty elements used inside arrays. This is delayed until after the main parse, 189 // because we don't know a priori whether a given array expression will be used in a destructuring context 190 // or not. 191 foreach ($this->createdArrays as $node) { 192 foreach ($node->items as $item) { 193 if ($item->value instanceof Expr\Error) { 194 $this->errorHandler->handleError( 195 new Error('Cannot use empty array elements in arrays', $item->getAttributes())); 196 } 197 } 198 } 199 200 // Clear out some of the interior state, so we don't hold onto unnecessary 201 // memory between uses of the parser 202 $this->tokenStartStack = []; 203 $this->tokenEndStack = []; 204 $this->semStack = []; 205 $this->semValue = null; 206 $this->createdArrays = null; 207 208 if ($result !== null) { 209 $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens)); 210 $traverser->traverse($result); 211 } 212 213 return $result; 214 } 215 216 public function getTokens(): array { 217 return $this->tokens; 218 } 219 220 /** @return Stmt[]|null */ 221 protected function doParse(): ?array { 222 // We start off with no lookahead-token 223 $symbol = self::SYMBOL_NONE; 224 $tokenValue = null; 225 $this->tokenPos = -1; 226 227 // Keep stack of start and end attributes 228 $this->tokenStartStack = []; 229 $this->tokenEndStack = [0]; 230 231 // Start off in the initial state and keep a stack of previous states 232 $state = 0; 233 $stateStack = [$state]; 234 235 // Semantic value stack (contains values of tokens and semantic action results) 236 $this->semStack = []; 237 238 // Current position in the stack(s) 239 $stackPos = 0; 240 241 $this->errorState = 0; 242 243 for (;;) { 244 //$this->traceNewState($state, $symbol); 245 246 if ($this->actionBase[$state] === 0) { 247 $rule = $this->actionDefault[$state]; 248 } else { 249 if ($symbol === self::SYMBOL_NONE) { 250 do { 251 $token = $this->tokens[++$this->tokenPos]; 252 $tokenId = $token->id; 253 } while (isset($this->dropTokens[$tokenId])); 254 255 // Map the lexer token id to the internally used symbols. 256 $tokenValue = $token->text; 257 if (!isset($this->phpTokenToSymbol[$tokenId])) { 258 throw new \RangeException(sprintf( 259 'The lexer returned an invalid token (id=%d, value=%s)', 260 $tokenId, $tokenValue 261 )); 262 } 263 $symbol = $this->phpTokenToSymbol[$tokenId]; 264 265 //$this->traceRead($symbol); 266 } 267 268 $idx = $this->actionBase[$state] + $symbol; 269 if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol) 270 || ($state < $this->YY2TBLSTATE 271 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 272 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)) 273 && ($action = $this->action[$idx]) !== $this->defaultAction) { 274 /* 275 * >= numNonLeafStates: shift and reduce 276 * > 0: shift 277 * = 0: accept 278 * < 0: reduce 279 * = -YYUNEXPECTED: error 280 */ 281 if ($action > 0) { 282 /* shift */ 283 //$this->traceShift($symbol); 284 285 ++$stackPos; 286 $stateStack[$stackPos] = $state = $action; 287 $this->semStack[$stackPos] = $tokenValue; 288 $this->tokenStartStack[$stackPos] = $this->tokenPos; 289 $this->tokenEndStack[$stackPos] = $this->tokenPos; 290 $symbol = self::SYMBOL_NONE; 291 292 if ($this->errorState) { 293 --$this->errorState; 294 } 295 296 if ($action < $this->numNonLeafStates) { 297 continue; 298 } 299 300 /* $yyn >= numNonLeafStates means shift-and-reduce */ 301 $rule = $action - $this->numNonLeafStates; 302 } else { 303 $rule = -$action; 304 } 305 } else { 306 $rule = $this->actionDefault[$state]; 307 } 308 } 309 310 for (;;) { 311 if ($rule === 0) { 312 /* accept */ 313 //$this->traceAccept(); 314 return $this->semValue; 315 } 316 if ($rule !== $this->unexpectedTokenRule) { 317 /* reduce */ 318 //$this->traceReduce($rule); 319 320 $ruleLength = $this->ruleToLength[$rule]; 321 try { 322 $callback = $this->reduceCallbacks[$rule]; 323 if ($callback !== null) { 324 $callback($this, $stackPos); 325 } elseif ($ruleLength > 0) { 326 $this->semValue = $this->semStack[$stackPos - $ruleLength + 1]; 327 } 328 } catch (Error $e) { 329 if (-1 === $e->getStartLine()) { 330 $e->setStartLine($this->tokens[$this->tokenPos]->line); 331 } 332 333 $this->emitError($e); 334 // Can't recover from this type of error 335 return null; 336 } 337 338 /* Goto - shift nonterminal */ 339 $lastTokenEnd = $this->tokenEndStack[$stackPos]; 340 $stackPos -= $ruleLength; 341 $nonTerminal = $this->ruleToNonTerminal[$rule]; 342 $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos]; 343 if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) { 344 $state = $this->goto[$idx]; 345 } else { 346 $state = $this->gotoDefault[$nonTerminal]; 347 } 348 349 ++$stackPos; 350 $stateStack[$stackPos] = $state; 351 $this->semStack[$stackPos] = $this->semValue; 352 $this->tokenEndStack[$stackPos] = $lastTokenEnd; 353 if ($ruleLength === 0) { 354 // Empty productions use the start attributes of the lookahead token. 355 $this->tokenStartStack[$stackPos] = $this->tokenPos; 356 } 357 } else { 358 /* error */ 359 switch ($this->errorState) { 360 case 0: 361 $msg = $this->getErrorMessage($symbol, $state); 362 $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos))); 363 // Break missing intentionally 364 // no break 365 case 1: 366 case 2: 367 $this->errorState = 3; 368 369 // Pop until error-expecting state uncovered 370 while (!( 371 (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0 372 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 373 || ($state < $this->YY2TBLSTATE 374 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0 375 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 376 ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this 377 if ($stackPos <= 0) { 378 // Could not recover from error 379 return null; 380 } 381 $state = $stateStack[--$stackPos]; 382 //$this->tracePop($state); 383 } 384 385 //$this->traceShift($this->errorSymbol); 386 ++$stackPos; 387 $stateStack[$stackPos] = $state = $action; 388 389 // We treat the error symbol as being empty, so we reset the end attributes 390 // to the end attributes of the last non-error symbol 391 $this->tokenStartStack[$stackPos] = $this->tokenPos; 392 $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1]; 393 break; 394 395 case 3: 396 if ($symbol === 0) { 397 // Reached EOF without recovering from error 398 return null; 399 } 400 401 //$this->traceDiscard($symbol); 402 $symbol = self::SYMBOL_NONE; 403 break 2; 404 } 405 } 406 407 if ($state < $this->numNonLeafStates) { 408 break; 409 } 410 411 /* >= numNonLeafStates means shift-and-reduce */ 412 $rule = $state - $this->numNonLeafStates; 413 } 414 } 415 } 416 417 protected function emitError(Error $error): void { 418 $this->errorHandler->handleError($error); 419 } 420 421 /** 422 * Format error message including expected tokens. 423 * 424 * @param int $symbol Unexpected symbol 425 * @param int $state State at time of error 426 * 427 * @return string Formatted error message 428 */ 429 protected function getErrorMessage(int $symbol, int $state): string { 430 $expectedString = ''; 431 if ($expected = $this->getExpectedTokens($state)) { 432 $expectedString = ', expecting ' . implode(' or ', $expected); 433 } 434 435 return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString; 436 } 437 438 /** 439 * Get limited number of expected tokens in given state. 440 * 441 * @param int $state State 442 * 443 * @return string[] Expected tokens. If too many, an empty array is returned. 444 */ 445 protected function getExpectedTokens(int $state): array { 446 $expected = []; 447 448 $base = $this->actionBase[$state]; 449 foreach ($this->symbolToName as $symbol => $name) { 450 $idx = $base + $symbol; 451 if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 452 || $state < $this->YY2TBLSTATE 453 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 454 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 455 ) { 456 if ($this->action[$idx] !== $this->unexpectedTokenRule 457 && $this->action[$idx] !== $this->defaultAction 458 && $symbol !== $this->errorSymbol 459 ) { 460 if (count($expected) === 4) { 461 /* Too many expected tokens */ 462 return []; 463 } 464 465 $expected[] = $name; 466 } 467 } 468 } 469 470 return $expected; 471 } 472 473 /** 474 * Get attributes for a node with the given start and end token positions. 475 * 476 * @param int $tokenStartPos Token position the node starts at 477 * @param int $tokenEndPos Token position the node ends at 478 * @return array<string, mixed> Attributes 479 */ 480 protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array { 481 $startToken = $this->tokens[$tokenStartPos]; 482 $afterEndToken = $this->tokens[$tokenEndPos + 1]; 483 return [ 484 'startLine' => $startToken->line, 485 'startTokenPos' => $tokenStartPos, 486 'startFilePos' => $startToken->pos, 487 'endLine' => $afterEndToken->line, 488 'endTokenPos' => $tokenEndPos, 489 'endFilePos' => $afterEndToken->pos - 1, 490 ]; 491 } 492 493 /** 494 * Get attributes for a single token at the given token position. 495 * 496 * @return array<string, mixed> Attributes 497 */ 498 protected function getAttributesForToken(int $tokenPos): array { 499 if ($tokenPos < \count($this->tokens) - 1) { 500 return $this->getAttributes($tokenPos, $tokenPos); 501 } 502 503 // Get attributes for the sentinel token. 504 $token = $this->tokens[$tokenPos]; 505 return [ 506 'startLine' => $token->line, 507 'startTokenPos' => $tokenPos, 508 'startFilePos' => $token->pos, 509 'endLine' => $token->line, 510 'endTokenPos' => $tokenPos, 511 'endFilePos' => $token->pos, 512 ]; 513 } 514 515 /* 516 * Tracing functions used for debugging the parser. 517 */ 518 519 /* 520 protected function traceNewState($state, $symbol): void { 521 echo '% State ' . $state 522 . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n"; 523 } 524 525 protected function traceRead($symbol): void { 526 echo '% Reading ' . $this->symbolToName[$symbol] . "\n"; 527 } 528 529 protected function traceShift($symbol): void { 530 echo '% Shift ' . $this->symbolToName[$symbol] . "\n"; 531 } 532 533 protected function traceAccept(): void { 534 echo "% Accepted.\n"; 535 } 536 537 protected function traceReduce($n): void { 538 echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n"; 539 } 540 541 protected function tracePop($state): void { 542 echo '% Recovering, uncovered state ' . $state . "\n"; 543 } 544 545 protected function traceDiscard($symbol): void { 546 echo '% Discard ' . $this->symbolToName[$symbol] . "\n"; 547 } 548 */ 549 550 /* 551 * Helper functions invoked by semantic actions 552 */ 553 554 /** 555 * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions. 556 * 557 * @param Node\Stmt[] $stmts 558 * @return Node\Stmt[] 559 */ 560 protected function handleNamespaces(array $stmts): array { 561 $hasErrored = false; 562 $style = $this->getNamespacingStyle($stmts); 563 if (null === $style) { 564 // not namespaced, nothing to do 565 return $stmts; 566 } 567 if ('brace' === $style) { 568 // For braced namespaces we only have to check that there are no invalid statements between the namespaces 569 $afterFirstNamespace = false; 570 foreach ($stmts as $stmt) { 571 if ($stmt instanceof Node\Stmt\Namespace_) { 572 $afterFirstNamespace = true; 573 } elseif (!$stmt instanceof Node\Stmt\HaltCompiler 574 && !$stmt instanceof Node\Stmt\Nop 575 && $afterFirstNamespace && !$hasErrored) { 576 $this->emitError(new Error( 577 'No code may exist outside of namespace {}', $stmt->getAttributes())); 578 $hasErrored = true; // Avoid one error for every statement 579 } 580 } 581 return $stmts; 582 } else { 583 // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts 584 $resultStmts = []; 585 $targetStmts = &$resultStmts; 586 $lastNs = null; 587 foreach ($stmts as $stmt) { 588 if ($stmt instanceof Node\Stmt\Namespace_) { 589 if ($lastNs !== null) { 590 $this->fixupNamespaceAttributes($lastNs); 591 } 592 if ($stmt->stmts === null) { 593 $stmt->stmts = []; 594 $targetStmts = &$stmt->stmts; 595 $resultStmts[] = $stmt; 596 } else { 597 // This handles the invalid case of mixed style namespaces 598 $resultStmts[] = $stmt; 599 $targetStmts = &$resultStmts; 600 } 601 $lastNs = $stmt; 602 } elseif ($stmt instanceof Node\Stmt\HaltCompiler) { 603 // __halt_compiler() is not moved into the namespace 604 $resultStmts[] = $stmt; 605 } else { 606 $targetStmts[] = $stmt; 607 } 608 } 609 if ($lastNs !== null) { 610 $this->fixupNamespaceAttributes($lastNs); 611 } 612 return $resultStmts; 613 } 614 } 615 616 private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void { 617 // We moved the statements into the namespace node, as such the end of the namespace node 618 // needs to be extended to the end of the statements. 619 if (empty($stmt->stmts)) { 620 return; 621 } 622 623 // We only move the builtin end attributes here. This is the best we can do with the 624 // knowledge we have. 625 $endAttributes = ['endLine', 'endFilePos', 'endTokenPos']; 626 $lastStmt = $stmt->stmts[count($stmt->stmts) - 1]; 627 foreach ($endAttributes as $endAttribute) { 628 if ($lastStmt->hasAttribute($endAttribute)) { 629 $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute)); 630 } 631 } 632 } 633 634 /** @return array<string, mixed> */ 635 private function getNamespaceErrorAttributes(Namespace_ $node): array { 636 $attrs = $node->getAttributes(); 637 // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace. 638 if (isset($attrs['startLine'])) { 639 $attrs['endLine'] = $attrs['startLine']; 640 } 641 if (isset($attrs['startTokenPos'])) { 642 $attrs['endTokenPos'] = $attrs['startTokenPos']; 643 } 644 if (isset($attrs['startFilePos'])) { 645 $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1; 646 } 647 return $attrs; 648 } 649 650 /** 651 * Determine namespacing style (semicolon or brace) 652 * 653 * @param Node[] $stmts Top-level statements. 654 * 655 * @return null|string One of "semicolon", "brace" or null (no namespaces) 656 */ 657 private function getNamespacingStyle(array $stmts): ?string { 658 $style = null; 659 $hasNotAllowedStmts = false; 660 foreach ($stmts as $i => $stmt) { 661 if ($stmt instanceof Node\Stmt\Namespace_) { 662 $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace'; 663 if (null === $style) { 664 $style = $currentStyle; 665 if ($hasNotAllowedStmts) { 666 $this->emitError(new Error( 667 'Namespace declaration statement has to be the very first statement in the script', 668 $this->getNamespaceErrorAttributes($stmt) 669 )); 670 } 671 } elseif ($style !== $currentStyle) { 672 $this->emitError(new Error( 673 'Cannot mix bracketed namespace declarations with unbracketed namespace declarations', 674 $this->getNamespaceErrorAttributes($stmt) 675 )); 676 // Treat like semicolon style for namespace normalization 677 return 'semicolon'; 678 } 679 continue; 680 } 681 682 /* declare(), __halt_compiler() and nops can be used before a namespace declaration */ 683 if ($stmt instanceof Node\Stmt\Declare_ 684 || $stmt instanceof Node\Stmt\HaltCompiler 685 || $stmt instanceof Node\Stmt\Nop) { 686 continue; 687 } 688 689 /* There may be a hashbang line at the very start of the file */ 690 if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) { 691 continue; 692 } 693 694 /* Everything else if forbidden before namespace declarations */ 695 $hasNotAllowedStmts = true; 696 } 697 return $style; 698 } 699 700 /** @return Name|Identifier */ 701 protected function handleBuiltinTypes(Name $name) { 702 if (!$name->isUnqualified()) { 703 return $name; 704 } 705 706 $lowerName = $name->toLowerString(); 707 if (!$this->phpVersion->supportsBuiltinType($lowerName)) { 708 return $name; 709 } 710 711 return new Node\Identifier($lowerName, $name->getAttributes()); 712 } 713 714 /** 715 * Get combined start and end attributes at a stack location 716 * 717 * @param int $stackPos Stack location 718 * 719 * @return array<string, mixed> Combined start and end attributes 720 */ 721 protected function getAttributesAt(int $stackPos): array { 722 return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]); 723 } 724 725 protected function getFloatCastKind(string $cast): int { 726 $cast = strtolower($cast); 727 if (strpos($cast, 'float') !== false) { 728 return Double::KIND_FLOAT; 729 } 730 731 if (strpos($cast, 'real') !== false) { 732 return Double::KIND_REAL; 733 } 734 735 return Double::KIND_DOUBLE; 736 } 737 738 /** @param array<string, mixed> $attributes */ 739 protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ { 740 try { 741 return Int_::fromString($str, $attributes, $allowInvalidOctal); 742 } catch (Error $error) { 743 $this->emitError($error); 744 // Use dummy value 745 return new Int_(0, $attributes); 746 } 747 } 748 749 /** 750 * Parse a T_NUM_STRING token into either an integer or string node. 751 * 752 * @param string $str Number string 753 * @param array<string, mixed> $attributes Attributes 754 * 755 * @return Int_|String_ Integer or string node. 756 */ 757 protected function parseNumString(string $str, array $attributes) { 758 if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) { 759 return new String_($str, $attributes); 760 } 761 762 $num = +$str; 763 if (!is_int($num)) { 764 return new String_($str, $attributes); 765 } 766 767 return new Int_($num, $attributes); 768 } 769 770 /** @param array<string, mixed> $attributes */ 771 protected function stripIndentation( 772 string $string, int $indentLen, string $indentChar, 773 bool $newlineAtStart, bool $newlineAtEnd, array $attributes 774 ): string { 775 if ($indentLen === 0) { 776 return $string; 777 } 778 779 $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)'; 780 $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])'; 781 $regex = '/' . $start . '([ \t]*)(' . $end . ')?/'; 782 return preg_replace_callback( 783 $regex, 784 function ($matches) use ($indentLen, $indentChar, $attributes) { 785 $prefix = substr($matches[1], 0, $indentLen); 786 if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) { 787 $this->emitError(new Error( 788 'Invalid indentation - tabs and spaces cannot be mixed', $attributes 789 )); 790 } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) { 791 $this->emitError(new Error( 792 'Invalid body indentation level ' . 793 '(expecting an indentation level of at least ' . $indentLen . ')', 794 $attributes 795 )); 796 } 797 return substr($matches[0], strlen($prefix)); 798 }, 799 $string 800 ); 801 } 802 803 /** 804 * @param string|(Expr|InterpolatedStringPart)[] $contents 805 * @param array<string, mixed> $attributes 806 * @param array<string, mixed> $endTokenAttributes 807 */ 808 protected function parseDocString( 809 string $startToken, $contents, string $endToken, 810 array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape 811 ): Expr { 812 $kind = strpos($startToken, "'") === false 813 ? String_::KIND_HEREDOC : String_::KIND_NOWDOC; 814 815 $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/'; 816 $result = preg_match($regex, $startToken, $matches); 817 assert($result === 1); 818 $label = $matches[1]; 819 820 $result = preg_match('/\A[ \t]*/', $endToken, $matches); 821 assert($result === 1); 822 $indentation = $matches[0]; 823 824 $attributes['kind'] = $kind; 825 $attributes['docLabel'] = $label; 826 $attributes['docIndentation'] = $indentation; 827 828 $indentHasSpaces = false !== strpos($indentation, " "); 829 $indentHasTabs = false !== strpos($indentation, "\t"); 830 if ($indentHasSpaces && $indentHasTabs) { 831 $this->emitError(new Error( 832 'Invalid indentation - tabs and spaces cannot be mixed', 833 $endTokenAttributes 834 )); 835 836 // Proceed processing as if this doc string is not indented 837 $indentation = ''; 838 } 839 840 $indentLen = \strlen($indentation); 841 $indentChar = $indentHasSpaces ? " " : "\t"; 842 843 if (\is_string($contents)) { 844 if ($contents === '') { 845 $attributes['rawValue'] = $contents; 846 return new String_('', $attributes); 847 } 848 849 $contents = $this->stripIndentation( 850 $contents, $indentLen, $indentChar, true, true, $attributes 851 ); 852 $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents); 853 $attributes['rawValue'] = $contents; 854 855 if ($kind === String_::KIND_HEREDOC) { 856 $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape); 857 } 858 859 return new String_($contents, $attributes); 860 } else { 861 assert(count($contents) > 0); 862 if (!$contents[0] instanceof Node\InterpolatedStringPart) { 863 // If there is no leading encapsed string part, pretend there is an empty one 864 $this->stripIndentation( 865 '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes() 866 ); 867 } 868 869 $newContents = []; 870 foreach ($contents as $i => $part) { 871 if ($part instanceof Node\InterpolatedStringPart) { 872 $isLast = $i === \count($contents) - 1; 873 $part->value = $this->stripIndentation( 874 $part->value, $indentLen, $indentChar, 875 $i === 0, $isLast, $part->getAttributes() 876 ); 877 if ($isLast) { 878 $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value); 879 } 880 $part->setAttribute('rawValue', $part->value); 881 $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape); 882 if ('' === $part->value) { 883 continue; 884 } 885 } 886 $newContents[] = $part; 887 } 888 return new InterpolatedString($newContents, $attributes); 889 } 890 } 891 892 protected function createCommentFromToken(Token $token, int $tokenPos): Comment { 893 assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT); 894 return \T_DOC_COMMENT === $token->id 895 ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos, 896 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos) 897 : new Comment($token->text, $token->line, $token->pos, $tokenPos, 898 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos); 899 } 900 901 /** 902 * Get last comment before the given token position, if any 903 */ 904 protected function getCommentBeforeToken(int $tokenPos): ?Comment { 905 while (--$tokenPos >= 0) { 906 $token = $this->tokens[$tokenPos]; 907 if (!isset($this->dropTokens[$token->id])) { 908 break; 909 } 910 911 if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) { 912 return $this->createCommentFromToken($token, $tokenPos); 913 } 914 } 915 return null; 916 } 917 918 /** 919 * Create a zero-length nop to capture preceding comments, if any. 920 */ 921 protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop { 922 $comment = $this->getCommentBeforeToken($tokenPos); 923 if ($comment === null) { 924 return null; 925 } 926 927 $commentEndLine = $comment->getEndLine(); 928 $commentEndFilePos = $comment->getEndFilePos(); 929 $commentEndTokenPos = $comment->getEndTokenPos(); 930 $attributes = [ 931 'startLine' => $commentEndLine, 932 'endLine' => $commentEndLine, 933 'startFilePos' => $commentEndFilePos + 1, 934 'endFilePos' => $commentEndFilePos, 935 'startTokenPos' => $commentEndTokenPos + 1, 936 'endTokenPos' => $commentEndTokenPos, 937 ]; 938 return new Nop($attributes); 939 } 940 941 protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop { 942 if ($this->getCommentBeforeToken($tokenStartPos) === null) { 943 return null; 944 } 945 return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos)); 946 } 947 948 protected function handleHaltCompiler(): string { 949 // Prevent the lexer from returning any further tokens. 950 $nextToken = $this->tokens[$this->tokenPos + 1]; 951 $this->tokenPos = \count($this->tokens) - 2; 952 953 // Return text after __halt_compiler. 954 return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : ''; 955 } 956 957 protected function inlineHtmlHasLeadingNewline(int $stackPos): bool { 958 $tokenPos = $this->tokenStartStack[$stackPos]; 959 $token = $this->tokens[$tokenPos]; 960 assert($token->id == \T_INLINE_HTML); 961 if ($tokenPos > 0) { 962 $prevToken = $this->tokens[$tokenPos - 1]; 963 assert($prevToken->id == \T_CLOSE_TAG); 964 return false !== strpos($prevToken->text, "\n") 965 || false !== strpos($prevToken->text, "\r"); 966 } 967 return true; 968 } 969 970 /** 971 * @return array<string, mixed> 972 */ 973 protected function createEmptyElemAttributes(int $tokenPos): array { 974 return $this->getAttributesForToken($tokenPos); 975 } 976 977 protected function fixupArrayDestructuring(Array_ $node): Expr\List_ { 978 $this->createdArrays->detach($node); 979 return new Expr\List_(array_map(function (Node\ArrayItem $item) { 980 if ($item->value instanceof Expr\Error) { 981 // We used Error as a placeholder for empty elements, which are legal for destructuring. 982 return null; 983 } 984 if ($item->value instanceof Array_) { 985 return new Node\ArrayItem( 986 $this->fixupArrayDestructuring($item->value), 987 $item->key, $item->byRef, $item->getAttributes()); 988 } 989 return $item; 990 }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes()); 991 } 992 993 protected function postprocessList(Expr\List_ $node): void { 994 foreach ($node->items as $i => $item) { 995 if ($item->value instanceof Expr\Error) { 996 // We used Error as a placeholder for empty elements, which are legal for destructuring. 997 $node->items[$i] = null; 998 } 999 } 1000 } 1001 1002 /** @param ElseIf_|Else_ $node */ 1003 protected function fixupAlternativeElse($node): void { 1004 // Make sure a trailing nop statement carrying comments is part of the node. 1005 $numStmts = \count($node->stmts); 1006 if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) { 1007 $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes(); 1008 if (isset($nopAttrs['endLine'])) { 1009 $node->setAttribute('endLine', $nopAttrs['endLine']); 1010 } 1011 if (isset($nopAttrs['endFilePos'])) { 1012 $node->setAttribute('endFilePos', $nopAttrs['endFilePos']); 1013 } 1014 if (isset($nopAttrs['endTokenPos'])) { 1015 $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']); 1016 } 1017 } 1018 } 1019 1020 protected function checkClassModifier(int $a, int $b, int $modifierPos): void { 1021 try { 1022 Modifiers::verifyClassModifier($a, $b); 1023 } catch (Error $error) { 1024 $error->setAttributes($this->getAttributesAt($modifierPos)); 1025 $this->emitError($error); 1026 } 1027 } 1028 1029 protected function checkModifier(int $a, int $b, int $modifierPos): void { 1030 // Jumping through some hoops here because verifyModifier() is also used elsewhere 1031 try { 1032 Modifiers::verifyModifier($a, $b); 1033 } catch (Error $error) { 1034 $error->setAttributes($this->getAttributesAt($modifierPos)); 1035 $this->emitError($error); 1036 } 1037 } 1038 1039 protected function checkParam(Param $node): void { 1040 if ($node->variadic && null !== $node->default) { 1041 $this->emitError(new Error( 1042 'Variadic parameter cannot have a default value', 1043 $node->default->getAttributes() 1044 )); 1045 } 1046 } 1047 1048 protected function checkTryCatch(TryCatch $node): void { 1049 if (empty($node->catches) && null === $node->finally) { 1050 $this->emitError(new Error( 1051 'Cannot use try without catch or finally', $node->getAttributes() 1052 )); 1053 } 1054 } 1055 1056 protected function checkNamespace(Namespace_ $node): void { 1057 if (null !== $node->stmts) { 1058 foreach ($node->stmts as $stmt) { 1059 if ($stmt instanceof Namespace_) { 1060 $this->emitError(new Error( 1061 'Namespace declarations cannot be nested', $stmt->getAttributes() 1062 )); 1063 } 1064 } 1065 } 1066 } 1067 1068 private function checkClassName(?Identifier $name, int $namePos): void { 1069 if (null !== $name && $name->isSpecialClassName()) { 1070 $this->emitError(new Error( 1071 sprintf('Cannot use \'%s\' as class name as it is reserved', $name), 1072 $this->getAttributesAt($namePos) 1073 )); 1074 } 1075 } 1076 1077 /** @param Name[] $interfaces */ 1078 private function checkImplementedInterfaces(array $interfaces): void { 1079 foreach ($interfaces as $interface) { 1080 if ($interface->isSpecialClassName()) { 1081 $this->emitError(new Error( 1082 sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface), 1083 $interface->getAttributes() 1084 )); 1085 } 1086 } 1087 } 1088 1089 protected function checkClass(Class_ $node, int $namePos): void { 1090 $this->checkClassName($node->name, $namePos); 1091 1092 if ($node->extends && $node->extends->isSpecialClassName()) { 1093 $this->emitError(new Error( 1094 sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends), 1095 $node->extends->getAttributes() 1096 )); 1097 } 1098 1099 $this->checkImplementedInterfaces($node->implements); 1100 } 1101 1102 protected function checkInterface(Interface_ $node, int $namePos): void { 1103 $this->checkClassName($node->name, $namePos); 1104 $this->checkImplementedInterfaces($node->extends); 1105 } 1106 1107 protected function checkEnum(Enum_ $node, int $namePos): void { 1108 $this->checkClassName($node->name, $namePos); 1109 $this->checkImplementedInterfaces($node->implements); 1110 } 1111 1112 protected function checkClassMethod(ClassMethod $node, int $modifierPos): void { 1113 if ($node->flags & Modifiers::STATIC) { 1114 switch ($node->name->toLowerString()) { 1115 case '__construct': 1116 $this->emitError(new Error( 1117 sprintf('Constructor %s() cannot be static', $node->name), 1118 $this->getAttributesAt($modifierPos))); 1119 break; 1120 case '__destruct': 1121 $this->emitError(new Error( 1122 sprintf('Destructor %s() cannot be static', $node->name), 1123 $this->getAttributesAt($modifierPos))); 1124 break; 1125 case '__clone': 1126 $this->emitError(new Error( 1127 sprintf('Clone method %s() cannot be static', $node->name), 1128 $this->getAttributesAt($modifierPos))); 1129 break; 1130 } 1131 } 1132 1133 if ($node->flags & Modifiers::READONLY) { 1134 $this->emitError(new Error( 1135 sprintf('Method %s() cannot be readonly', $node->name), 1136 $this->getAttributesAt($modifierPos))); 1137 } 1138 } 1139 1140 protected function checkClassConst(ClassConst $node, int $modifierPos): void { 1141 foreach ([Modifiers::STATIC, Modifiers::ABSTRACT, Modifiers::READONLY] as $modifier) { 1142 if ($node->flags & $modifier) { 1143 $this->emitError(new Error( 1144 "Cannot use '" . Modifiers::toString($modifier) . "' as constant modifier", 1145 $this->getAttributesAt($modifierPos))); 1146 } 1147 } 1148 } 1149 1150 protected function checkUseUse(UseItem $node, int $namePos): void { 1151 if ($node->alias && $node->alias->isSpecialClassName()) { 1152 $this->emitError(new Error( 1153 sprintf( 1154 'Cannot use %s as %s because \'%2$s\' is a special class name', 1155 $node->name, $node->alias 1156 ), 1157 $this->getAttributesAt($namePos) 1158 )); 1159 } 1160 } 1161 1162 protected function checkPropertyHooksForMultiProperty(Property $property, int $hookPos): void { 1163 if (count($property->props) > 1) { 1164 $this->emitError(new Error( 1165 'Cannot use hooks when declaring multiple properties', $this->getAttributesAt($hookPos))); 1166 } 1167 } 1168 1169 /** @param PropertyHook[] $hooks */ 1170 protected function checkEmptyPropertyHookList(array $hooks, int $hookPos): void { 1171 if (empty($hooks)) { 1172 $this->emitError(new Error( 1173 'Property hook list cannot be empty', $this->getAttributesAt($hookPos))); 1174 } 1175 } 1176 1177 protected function checkPropertyHook(PropertyHook $hook, ?int $paramListPos): void { 1178 $name = $hook->name->toLowerString(); 1179 if ($name !== 'get' && $name !== 'set') { 1180 $this->emitError(new Error( 1181 'Unknown hook "' . $hook->name . '", expected "get" or "set"', 1182 $hook->name->getAttributes())); 1183 } 1184 if ($name === 'get' && $paramListPos !== null) { 1185 $this->emitError(new Error( 1186 'get hook must not have a parameter list', $this->getAttributesAt($paramListPos))); 1187 } 1188 } 1189 1190 protected function checkPropertyHookModifiers(int $a, int $b, int $modifierPos): void { 1191 try { 1192 Modifiers::verifyModifier($a, $b); 1193 } catch (Error $error) { 1194 $error->setAttributes($this->getAttributesAt($modifierPos)); 1195 $this->emitError($error); 1196 } 1197 1198 if ($b != Modifiers::FINAL) { 1199 $this->emitError(new Error( 1200 'Cannot use the ' . Modifiers::toString($b) . ' modifier on a property hook', 1201 $this->getAttributesAt($modifierPos))); 1202 } 1203 } 1204 1205 /** 1206 * @param Property|Param $node 1207 */ 1208 protected function addPropertyNameToHooks(Node $node): void { 1209 if ($node instanceof Property) { 1210 $name = $node->props[0]->name->toString(); 1211 } else { 1212 $name = $node->var->name; 1213 } 1214 foreach ($node->hooks as $hook) { 1215 $hook->setAttribute('propertyName', $name); 1216 } 1217 } 1218 1219 /** @param array<Node\Arg|Node\VariadicPlaceholder> $args */ 1220 private function isSimpleExit(array $args): bool { 1221 if (\count($args) === 0) { 1222 return true; 1223 } 1224 if (\count($args) === 1) { 1225 $arg = $args[0]; 1226 return $arg instanceof Arg && $arg->name === null && 1227 $arg->byRef === false && $arg->unpack === false; 1228 } 1229 return false; 1230 } 1231 1232 /** 1233 * @param array<Node\Arg|Node\VariadicPlaceholder> $args 1234 * @param array<string, mixed> $attrs 1235 */ 1236 protected function createExitExpr(string $name, int $namePos, array $args, array $attrs): Expr { 1237 if ($this->isSimpleExit($args)) { 1238 // Create Exit node for backwards compatibility. 1239 $attrs['kind'] = strtolower($name) === 'exit' ? Expr\Exit_::KIND_EXIT : Expr\Exit_::KIND_DIE; 1240 return new Expr\Exit_(\count($args) === 1 ? $args[0]->value : null, $attrs); 1241 } 1242 return new Expr\FuncCall(new Name($name, $this->getAttributesAt($namePos)), $args, $attrs); 1243 } 1244 1245 /** 1246 * Creates the token map. 1247 * 1248 * The token map maps the PHP internal token identifiers 1249 * to the identifiers used by the Parser. Additionally it 1250 * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. 1251 * 1252 * @return array<int, int> The token map 1253 */ 1254 protected function createTokenMap(): array { 1255 $tokenMap = []; 1256 1257 // Single-char tokens use an identity mapping. 1258 for ($i = 0; $i < 256; ++$i) { 1259 $tokenMap[$i] = $i; 1260 } 1261 1262 foreach ($this->symbolToName as $name) { 1263 if ($name[0] === 'T') { 1264 $tokenMap[\constant($name)] = constant(static::class . '::' . $name); 1265 } 1266 } 1267 1268 // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO 1269 $tokenMap[\T_OPEN_TAG_WITH_ECHO] = static::T_ECHO; 1270 // T_CLOSE_TAG is equivalent to ';' 1271 $tokenMap[\T_CLOSE_TAG] = ord(';'); 1272 1273 // We have created a map from PHP token IDs to external symbol IDs. 1274 // Now map them to the internal symbol ID. 1275 $fullTokenMap = []; 1276 foreach ($tokenMap as $phpToken => $extSymbol) { 1277 $intSymbol = $this->tokenToSymbol[$extSymbol]; 1278 if ($intSymbol === $this->invalidSymbol) { 1279 continue; 1280 } 1281 $fullTokenMap[$phpToken] = $intSymbol; 1282 } 1283 1284 return $fullTokenMap; 1285 } 1286} 1287