1<?php declare(strict_types=1); 2 3namespace PhpParser; 4 5/* 6 * This parser is based on a skeleton written by Moriyoshi Koizumi, which in 7 * turn is based on work by Masato Bito. 8 */ 9 10use PhpParser\Node\Arg; 11use PhpParser\Node\Expr; 12use PhpParser\Node\Expr\Array_; 13use PhpParser\Node\Expr\Cast\Double; 14use PhpParser\Node\Identifier; 15use PhpParser\Node\InterpolatedStringPart; 16use PhpParser\Node\Name; 17use PhpParser\Node\Param; 18use PhpParser\Node\PropertyHook; 19use PhpParser\Node\Scalar\InterpolatedString; 20use PhpParser\Node\Scalar\Int_; 21use PhpParser\Node\Scalar\String_; 22use PhpParser\Node\Stmt; 23use PhpParser\Node\Stmt\Class_; 24use PhpParser\Node\Stmt\ClassConst; 25use PhpParser\Node\Stmt\ClassMethod; 26use PhpParser\Node\Stmt\Else_; 27use PhpParser\Node\Stmt\ElseIf_; 28use PhpParser\Node\Stmt\Enum_; 29use PhpParser\Node\Stmt\Interface_; 30use PhpParser\Node\Stmt\Namespace_; 31use PhpParser\Node\Stmt\Nop; 32use PhpParser\Node\Stmt\Property; 33use PhpParser\Node\Stmt\TryCatch; 34use PhpParser\Node\UseItem; 35use PhpParser\NodeVisitor\CommentAnnotatingVisitor; 36 37abstract class ParserAbstract implements Parser { 38 private const SYMBOL_NONE = -1; 39 40 /** @var Lexer Lexer that is used when parsing */ 41 protected Lexer $lexer; 42 /** @var PhpVersion PHP version to target on a best-effort basis */ 43 protected PhpVersion $phpVersion; 44 45 /* 46 * The following members will be filled with generated parsing data: 47 */ 48 49 /** @var int Size of $tokenToSymbol map */ 50 protected int $tokenToSymbolMapSize; 51 /** @var int Size of $action table */ 52 protected int $actionTableSize; 53 /** @var int Size of $goto table */ 54 protected int $gotoTableSize; 55 56 /** @var int Symbol number signifying an invalid token */ 57 protected int $invalidSymbol; 58 /** @var int Symbol number of error recovery token */ 59 protected int $errorSymbol; 60 /** @var int Action number signifying default action */ 61 protected int $defaultAction; 62 /** @var int Rule number signifying that an unexpected token was encountered */ 63 protected int $unexpectedTokenRule; 64 65 protected int $YY2TBLSTATE; 66 /** @var int Number of non-leaf states */ 67 protected int $numNonLeafStates; 68 69 /** @var int[] Map of PHP token IDs to internal symbols */ 70 protected array $phpTokenToSymbol; 71 /** @var array<int, bool> Map of PHP token IDs to drop */ 72 protected array $dropTokens; 73 /** @var int[] Map of external symbols (static::T_*) to internal symbols */ 74 protected array $tokenToSymbol; 75 /** @var string[] Map of symbols to their names */ 76 protected array $symbolToName; 77 /** @var array<int, string> Names of the production rules (only necessary for debugging) */ 78 protected array $productions; 79 80 /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this 81 * state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the 82 * action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 83 protected array $actionBase; 84 /** @var int[] Table of actions. Indexed according to $actionBase comment. */ 85 protected array $action; 86 /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol 87 * then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */ 88 protected array $actionCheck; 89 /** @var int[] Map of states to their default action */ 90 protected array $actionDefault; 91 /** @var callable[] Semantic action callbacks */ 92 protected array $reduceCallbacks; 93 94 /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this 95 * non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */ 96 protected array $gotoBase; 97 /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */ 98 protected array $goto; 99 /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal 100 * then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */ 101 protected array $gotoCheck; 102 /** @var int[] Map of non-terminals to the default state to goto after their reduction */ 103 protected array $gotoDefault; 104 105 /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for 106 * determining the state to goto after reduction. */ 107 protected array $ruleToNonTerminal; 108 /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to 109 * be popped from the stack(s) on reduction. */ 110 protected array $ruleToLength; 111 112 /* 113 * The following members are part of the parser state: 114 */ 115 116 /** @var mixed Temporary value containing the result of last semantic action (reduction) */ 117 protected $semValue; 118 /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */ 119 protected array $semStack; 120 /** @var int[] Token start position stack */ 121 protected array $tokenStartStack; 122 /** @var int[] Token end position stack */ 123 protected array $tokenEndStack; 124 125 /** @var ErrorHandler Error handler */ 126 protected ErrorHandler $errorHandler; 127 /** @var int Error state, used to avoid error floods */ 128 protected int $errorState; 129 130 /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */ 131 protected ?\SplObjectStorage $createdArrays; 132 133 /** @var Token[] Tokens for the current parse */ 134 protected array $tokens; 135 /** @var int Current position in token array */ 136 protected int $tokenPos; 137 138 /** 139 * Initialize $reduceCallbacks map. 140 */ 141 abstract protected function initReduceCallbacks(): void; 142 143 /** 144 * Creates a parser instance. 145 * 146 * Options: 147 * * phpVersion: ?PhpVersion, 148 * 149 * @param Lexer $lexer A lexer 150 * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This 151 * option is best-effort: Even if specified, parsing will generally assume the latest 152 * supported version and only adjust behavior in minor ways, for example by omitting 153 * errors in older versions and interpreting type hints as a name or identifier depending 154 * on version. 155 */ 156 public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) { 157 $this->lexer = $lexer; 158 $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported(); 159 160 $this->initReduceCallbacks(); 161 $this->phpTokenToSymbol = $this->createTokenMap(); 162 $this->dropTokens = array_fill_keys( 163 [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true 164 ); 165 } 166 167 /** 168 * Parses PHP code into a node tree. 169 * 170 * If a non-throwing error handler is used, the parser will continue parsing after an error 171 * occurred and attempt to build a partial AST. 172 * 173 * @param string $code The source code to parse 174 * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults 175 * to ErrorHandler\Throwing. 176 * 177 * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and 178 * the parser was unable to recover from an error). 179 */ 180 public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array { 181 $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing(); 182 $this->createdArrays = new \SplObjectStorage(); 183 184 $this->tokens = $this->lexer->tokenize($code, $this->errorHandler); 185 $result = $this->doParse(); 186 187 // Report errors for any empty elements used inside arrays. This is delayed until after the main parse, 188 // because we don't know a priori whether a given array expression will be used in a destructuring context 189 // or not. 190 foreach ($this->createdArrays as $node) { 191 foreach ($node->items as $item) { 192 if ($item->value instanceof Expr\Error) { 193 $this->errorHandler->handleError( 194 new Error('Cannot use empty array elements in arrays', $item->getAttributes())); 195 } 196 } 197 } 198 199 // Clear out some of the interior state, so we don't hold onto unnecessary 200 // memory between uses of the parser 201 $this->tokenStartStack = []; 202 $this->tokenEndStack = []; 203 $this->semStack = []; 204 $this->semValue = null; 205 $this->createdArrays = null; 206 207 if ($result !== null) { 208 $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens)); 209 $traverser->traverse($result); 210 } 211 212 return $result; 213 } 214 215 public function getTokens(): array { 216 return $this->tokens; 217 } 218 219 /** @return Stmt[]|null */ 220 protected function doParse(): ?array { 221 // We start off with no lookahead-token 222 $symbol = self::SYMBOL_NONE; 223 $tokenValue = null; 224 $this->tokenPos = -1; 225 226 // Keep stack of start and end attributes 227 $this->tokenStartStack = []; 228 $this->tokenEndStack = [0]; 229 230 // Start off in the initial state and keep a stack of previous states 231 $state = 0; 232 $stateStack = [$state]; 233 234 // Semantic value stack (contains values of tokens and semantic action results) 235 $this->semStack = []; 236 237 // Current position in the stack(s) 238 $stackPos = 0; 239 240 $this->errorState = 0; 241 242 for (;;) { 243 //$this->traceNewState($state, $symbol); 244 245 if ($this->actionBase[$state] === 0) { 246 $rule = $this->actionDefault[$state]; 247 } else { 248 if ($symbol === self::SYMBOL_NONE) { 249 do { 250 $token = $this->tokens[++$this->tokenPos]; 251 $tokenId = $token->id; 252 } while (isset($this->dropTokens[$tokenId])); 253 254 // Map the lexer token id to the internally used symbols. 255 $tokenValue = $token->text; 256 if (!isset($this->phpTokenToSymbol[$tokenId])) { 257 throw new \RangeException(sprintf( 258 'The lexer returned an invalid token (id=%d, value=%s)', 259 $tokenId, $tokenValue 260 )); 261 } 262 $symbol = $this->phpTokenToSymbol[$tokenId]; 263 264 //$this->traceRead($symbol); 265 } 266 267 $idx = $this->actionBase[$state] + $symbol; 268 if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol) 269 || ($state < $this->YY2TBLSTATE 270 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 271 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)) 272 && ($action = $this->action[$idx]) !== $this->defaultAction) { 273 /* 274 * >= numNonLeafStates: shift and reduce 275 * > 0: shift 276 * = 0: accept 277 * < 0: reduce 278 * = -YYUNEXPECTED: error 279 */ 280 if ($action > 0) { 281 /* shift */ 282 //$this->traceShift($symbol); 283 284 ++$stackPos; 285 $stateStack[$stackPos] = $state = $action; 286 $this->semStack[$stackPos] = $tokenValue; 287 $this->tokenStartStack[$stackPos] = $this->tokenPos; 288 $this->tokenEndStack[$stackPos] = $this->tokenPos; 289 $symbol = self::SYMBOL_NONE; 290 291 if ($this->errorState) { 292 --$this->errorState; 293 } 294 295 if ($action < $this->numNonLeafStates) { 296 continue; 297 } 298 299 /* $yyn >= numNonLeafStates means shift-and-reduce */ 300 $rule = $action - $this->numNonLeafStates; 301 } else { 302 $rule = -$action; 303 } 304 } else { 305 $rule = $this->actionDefault[$state]; 306 } 307 } 308 309 for (;;) { 310 if ($rule === 0) { 311 /* accept */ 312 //$this->traceAccept(); 313 return $this->semValue; 314 } 315 if ($rule !== $this->unexpectedTokenRule) { 316 /* reduce */ 317 //$this->traceReduce($rule); 318 319 $ruleLength = $this->ruleToLength[$rule]; 320 try { 321 $callback = $this->reduceCallbacks[$rule]; 322 if ($callback !== null) { 323 $callback($this, $stackPos); 324 } elseif ($ruleLength > 0) { 325 $this->semValue = $this->semStack[$stackPos - $ruleLength + 1]; 326 } 327 } catch (Error $e) { 328 if (-1 === $e->getStartLine()) { 329 $e->setStartLine($this->tokens[$this->tokenPos]->line); 330 } 331 332 $this->emitError($e); 333 // Can't recover from this type of error 334 return null; 335 } 336 337 /* Goto - shift nonterminal */ 338 $lastTokenEnd = $this->tokenEndStack[$stackPos]; 339 $stackPos -= $ruleLength; 340 $nonTerminal = $this->ruleToNonTerminal[$rule]; 341 $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos]; 342 if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) { 343 $state = $this->goto[$idx]; 344 } else { 345 $state = $this->gotoDefault[$nonTerminal]; 346 } 347 348 ++$stackPos; 349 $stateStack[$stackPos] = $state; 350 $this->semStack[$stackPos] = $this->semValue; 351 $this->tokenEndStack[$stackPos] = $lastTokenEnd; 352 if ($ruleLength === 0) { 353 // Empty productions use the start attributes of the lookahead token. 354 $this->tokenStartStack[$stackPos] = $this->tokenPos; 355 } 356 } else { 357 /* error */ 358 switch ($this->errorState) { 359 case 0: 360 $msg = $this->getErrorMessage($symbol, $state); 361 $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos))); 362 // Break missing intentionally 363 // no break 364 case 1: 365 case 2: 366 $this->errorState = 3; 367 368 // Pop until error-expecting state uncovered 369 while (!( 370 (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0 371 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 372 || ($state < $this->YY2TBLSTATE 373 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0 374 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol) 375 ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this 376 if ($stackPos <= 0) { 377 // Could not recover from error 378 return null; 379 } 380 $state = $stateStack[--$stackPos]; 381 //$this->tracePop($state); 382 } 383 384 //$this->traceShift($this->errorSymbol); 385 ++$stackPos; 386 $stateStack[$stackPos] = $state = $action; 387 388 // We treat the error symbol as being empty, so we reset the end attributes 389 // to the end attributes of the last non-error symbol 390 $this->tokenStartStack[$stackPos] = $this->tokenPos; 391 $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1]; 392 break; 393 394 case 3: 395 if ($symbol === 0) { 396 // Reached EOF without recovering from error 397 return null; 398 } 399 400 //$this->traceDiscard($symbol); 401 $symbol = self::SYMBOL_NONE; 402 break 2; 403 } 404 } 405 406 if ($state < $this->numNonLeafStates) { 407 break; 408 } 409 410 /* >= numNonLeafStates means shift-and-reduce */ 411 $rule = $state - $this->numNonLeafStates; 412 } 413 } 414 } 415 416 protected function emitError(Error $error): void { 417 $this->errorHandler->handleError($error); 418 } 419 420 /** 421 * Format error message including expected tokens. 422 * 423 * @param int $symbol Unexpected symbol 424 * @param int $state State at time of error 425 * 426 * @return string Formatted error message 427 */ 428 protected function getErrorMessage(int $symbol, int $state): string { 429 $expectedString = ''; 430 if ($expected = $this->getExpectedTokens($state)) { 431 $expectedString = ', expecting ' . implode(' or ', $expected); 432 } 433 434 return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString; 435 } 436 437 /** 438 * Get limited number of expected tokens in given state. 439 * 440 * @param int $state State 441 * 442 * @return string[] Expected tokens. If too many, an empty array is returned. 443 */ 444 protected function getExpectedTokens(int $state): array { 445 $expected = []; 446 447 $base = $this->actionBase[$state]; 448 foreach ($this->symbolToName as $symbol => $name) { 449 $idx = $base + $symbol; 450 if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 451 || $state < $this->YY2TBLSTATE 452 && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0 453 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol 454 ) { 455 if ($this->action[$idx] !== $this->unexpectedTokenRule 456 && $this->action[$idx] !== $this->defaultAction 457 && $symbol !== $this->errorSymbol 458 ) { 459 if (count($expected) === 4) { 460 /* Too many expected tokens */ 461 return []; 462 } 463 464 $expected[] = $name; 465 } 466 } 467 } 468 469 return $expected; 470 } 471 472 /** 473 * Get attributes for a node with the given start and end token positions. 474 * 475 * @param int $tokenStartPos Token position the node starts at 476 * @param int $tokenEndPos Token position the node ends at 477 * @return array<string, mixed> Attributes 478 */ 479 protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array { 480 $startToken = $this->tokens[$tokenStartPos]; 481 $afterEndToken = $this->tokens[$tokenEndPos + 1]; 482 return [ 483 'startLine' => $startToken->line, 484 'startTokenPos' => $tokenStartPos, 485 'startFilePos' => $startToken->pos, 486 'endLine' => $afterEndToken->line, 487 'endTokenPos' => $tokenEndPos, 488 'endFilePos' => $afterEndToken->pos - 1, 489 ]; 490 } 491 492 /** 493 * Get attributes for a single token at the given token position. 494 * 495 * @return array<string, mixed> Attributes 496 */ 497 protected function getAttributesForToken(int $tokenPos): array { 498 if ($tokenPos < \count($this->tokens) - 1) { 499 return $this->getAttributes($tokenPos, $tokenPos); 500 } 501 502 // Get attributes for the sentinel token. 503 $token = $this->tokens[$tokenPos]; 504 return [ 505 'startLine' => $token->line, 506 'startTokenPos' => $tokenPos, 507 'startFilePos' => $token->pos, 508 'endLine' => $token->line, 509 'endTokenPos' => $tokenPos, 510 'endFilePos' => $token->pos, 511 ]; 512 } 513 514 /* 515 * Tracing functions used for debugging the parser. 516 */ 517 518 /* 519 protected function traceNewState($state, $symbol): void { 520 echo '% State ' . $state 521 . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n"; 522 } 523 524 protected function traceRead($symbol): void { 525 echo '% Reading ' . $this->symbolToName[$symbol] . "\n"; 526 } 527 528 protected function traceShift($symbol): void { 529 echo '% Shift ' . $this->symbolToName[$symbol] . "\n"; 530 } 531 532 protected function traceAccept(): void { 533 echo "% Accepted.\n"; 534 } 535 536 protected function traceReduce($n): void { 537 echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n"; 538 } 539 540 protected function tracePop($state): void { 541 echo '% Recovering, uncovered state ' . $state . "\n"; 542 } 543 544 protected function traceDiscard($symbol): void { 545 echo '% Discard ' . $this->symbolToName[$symbol] . "\n"; 546 } 547 */ 548 549 /* 550 * Helper functions invoked by semantic actions 551 */ 552 553 /** 554 * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions. 555 * 556 * @param Node\Stmt[] $stmts 557 * @return Node\Stmt[] 558 */ 559 protected function handleNamespaces(array $stmts): array { 560 $hasErrored = false; 561 $style = $this->getNamespacingStyle($stmts); 562 if (null === $style) { 563 // not namespaced, nothing to do 564 return $stmts; 565 } 566 if ('brace' === $style) { 567 // For braced namespaces we only have to check that there are no invalid statements between the namespaces 568 $afterFirstNamespace = false; 569 foreach ($stmts as $stmt) { 570 if ($stmt instanceof Node\Stmt\Namespace_) { 571 $afterFirstNamespace = true; 572 } elseif (!$stmt instanceof Node\Stmt\HaltCompiler 573 && !$stmt instanceof Node\Stmt\Nop 574 && $afterFirstNamespace && !$hasErrored) { 575 $this->emitError(new Error( 576 'No code may exist outside of namespace {}', $stmt->getAttributes())); 577 $hasErrored = true; // Avoid one error for every statement 578 } 579 } 580 return $stmts; 581 } else { 582 // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts 583 $resultStmts = []; 584 $targetStmts = &$resultStmts; 585 $lastNs = null; 586 foreach ($stmts as $stmt) { 587 if ($stmt instanceof Node\Stmt\Namespace_) { 588 if ($lastNs !== null) { 589 $this->fixupNamespaceAttributes($lastNs); 590 } 591 if ($stmt->stmts === null) { 592 $stmt->stmts = []; 593 $targetStmts = &$stmt->stmts; 594 $resultStmts[] = $stmt; 595 } else { 596 // This handles the invalid case of mixed style namespaces 597 $resultStmts[] = $stmt; 598 $targetStmts = &$resultStmts; 599 } 600 $lastNs = $stmt; 601 } elseif ($stmt instanceof Node\Stmt\HaltCompiler) { 602 // __halt_compiler() is not moved into the namespace 603 $resultStmts[] = $stmt; 604 } else { 605 $targetStmts[] = $stmt; 606 } 607 } 608 if ($lastNs !== null) { 609 $this->fixupNamespaceAttributes($lastNs); 610 } 611 return $resultStmts; 612 } 613 } 614 615 private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void { 616 // We moved the statements into the namespace node, as such the end of the namespace node 617 // needs to be extended to the end of the statements. 618 if (empty($stmt->stmts)) { 619 return; 620 } 621 622 // We only move the builtin end attributes here. This is the best we can do with the 623 // knowledge we have. 624 $endAttributes = ['endLine', 'endFilePos', 'endTokenPos']; 625 $lastStmt = $stmt->stmts[count($stmt->stmts) - 1]; 626 foreach ($endAttributes as $endAttribute) { 627 if ($lastStmt->hasAttribute($endAttribute)) { 628 $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute)); 629 } 630 } 631 } 632 633 /** @return array<string, mixed> */ 634 private function getNamespaceErrorAttributes(Namespace_ $node): array { 635 $attrs = $node->getAttributes(); 636 // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace. 637 if (isset($attrs['startLine'])) { 638 $attrs['endLine'] = $attrs['startLine']; 639 } 640 if (isset($attrs['startTokenPos'])) { 641 $attrs['endTokenPos'] = $attrs['startTokenPos']; 642 } 643 if (isset($attrs['startFilePos'])) { 644 $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1; 645 } 646 return $attrs; 647 } 648 649 /** 650 * Determine namespacing style (semicolon or brace) 651 * 652 * @param Node[] $stmts Top-level statements. 653 * 654 * @return null|string One of "semicolon", "brace" or null (no namespaces) 655 */ 656 private function getNamespacingStyle(array $stmts): ?string { 657 $style = null; 658 $hasNotAllowedStmts = false; 659 foreach ($stmts as $i => $stmt) { 660 if ($stmt instanceof Node\Stmt\Namespace_) { 661 $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace'; 662 if (null === $style) { 663 $style = $currentStyle; 664 if ($hasNotAllowedStmts) { 665 $this->emitError(new Error( 666 'Namespace declaration statement has to be the very first statement in the script', 667 $this->getNamespaceErrorAttributes($stmt) 668 )); 669 } 670 } elseif ($style !== $currentStyle) { 671 $this->emitError(new Error( 672 'Cannot mix bracketed namespace declarations with unbracketed namespace declarations', 673 $this->getNamespaceErrorAttributes($stmt) 674 )); 675 // Treat like semicolon style for namespace normalization 676 return 'semicolon'; 677 } 678 continue; 679 } 680 681 /* declare(), __halt_compiler() and nops can be used before a namespace declaration */ 682 if ($stmt instanceof Node\Stmt\Declare_ 683 || $stmt instanceof Node\Stmt\HaltCompiler 684 || $stmt instanceof Node\Stmt\Nop) { 685 continue; 686 } 687 688 /* There may be a hashbang line at the very start of the file */ 689 if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) { 690 continue; 691 } 692 693 /* Everything else if forbidden before namespace declarations */ 694 $hasNotAllowedStmts = true; 695 } 696 return $style; 697 } 698 699 /** @return Name|Identifier */ 700 protected function handleBuiltinTypes(Name $name) { 701 if (!$name->isUnqualified()) { 702 return $name; 703 } 704 705 $lowerName = $name->toLowerString(); 706 if (!$this->phpVersion->supportsBuiltinType($lowerName)) { 707 return $name; 708 } 709 710 return new Node\Identifier($lowerName, $name->getAttributes()); 711 } 712 713 /** 714 * Get combined start and end attributes at a stack location 715 * 716 * @param int $stackPos Stack location 717 * 718 * @return array<string, mixed> Combined start and end attributes 719 */ 720 protected function getAttributesAt(int $stackPos): array { 721 return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]); 722 } 723 724 protected function getFloatCastKind(string $cast): int { 725 $cast = strtolower($cast); 726 if (strpos($cast, 'float') !== false) { 727 return Double::KIND_FLOAT; 728 } 729 730 if (strpos($cast, 'real') !== false) { 731 return Double::KIND_REAL; 732 } 733 734 return Double::KIND_DOUBLE; 735 } 736 737 /** @param array<string, mixed> $attributes */ 738 protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ { 739 try { 740 return Int_::fromString($str, $attributes, $allowInvalidOctal); 741 } catch (Error $error) { 742 $this->emitError($error); 743 // Use dummy value 744 return new Int_(0, $attributes); 745 } 746 } 747 748 /** 749 * Parse a T_NUM_STRING token into either an integer or string node. 750 * 751 * @param string $str Number string 752 * @param array<string, mixed> $attributes Attributes 753 * 754 * @return Int_|String_ Integer or string node. 755 */ 756 protected function parseNumString(string $str, array $attributes) { 757 if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) { 758 return new String_($str, $attributes); 759 } 760 761 $num = +$str; 762 if (!is_int($num)) { 763 return new String_($str, $attributes); 764 } 765 766 return new Int_($num, $attributes); 767 } 768 769 /** @param array<string, mixed> $attributes */ 770 protected function stripIndentation( 771 string $string, int $indentLen, string $indentChar, 772 bool $newlineAtStart, bool $newlineAtEnd, array $attributes 773 ): string { 774 if ($indentLen === 0) { 775 return $string; 776 } 777 778 $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)'; 779 $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])'; 780 $regex = '/' . $start . '([ \t]*)(' . $end . ')?/'; 781 return preg_replace_callback( 782 $regex, 783 function ($matches) use ($indentLen, $indentChar, $attributes) { 784 $prefix = substr($matches[1], 0, $indentLen); 785 if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) { 786 $this->emitError(new Error( 787 'Invalid indentation - tabs and spaces cannot be mixed', $attributes 788 )); 789 } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) { 790 $this->emitError(new Error( 791 'Invalid body indentation level ' . 792 '(expecting an indentation level of at least ' . $indentLen . ')', 793 $attributes 794 )); 795 } 796 return substr($matches[0], strlen($prefix)); 797 }, 798 $string 799 ); 800 } 801 802 /** 803 * @param string|(Expr|InterpolatedStringPart)[] $contents 804 * @param array<string, mixed> $attributes 805 * @param array<string, mixed> $endTokenAttributes 806 */ 807 protected function parseDocString( 808 string $startToken, $contents, string $endToken, 809 array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape 810 ): Expr { 811 $kind = strpos($startToken, "'") === false 812 ? String_::KIND_HEREDOC : String_::KIND_NOWDOC; 813 814 $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/'; 815 $result = preg_match($regex, $startToken, $matches); 816 assert($result === 1); 817 $label = $matches[1]; 818 819 $result = preg_match('/\A[ \t]*/', $endToken, $matches); 820 assert($result === 1); 821 $indentation = $matches[0]; 822 823 $attributes['kind'] = $kind; 824 $attributes['docLabel'] = $label; 825 $attributes['docIndentation'] = $indentation; 826 827 $indentHasSpaces = false !== strpos($indentation, " "); 828 $indentHasTabs = false !== strpos($indentation, "\t"); 829 if ($indentHasSpaces && $indentHasTabs) { 830 $this->emitError(new Error( 831 'Invalid indentation - tabs and spaces cannot be mixed', 832 $endTokenAttributes 833 )); 834 835 // Proceed processing as if this doc string is not indented 836 $indentation = ''; 837 } 838 839 $indentLen = \strlen($indentation); 840 $indentChar = $indentHasSpaces ? " " : "\t"; 841 842 if (\is_string($contents)) { 843 if ($contents === '') { 844 $attributes['rawValue'] = $contents; 845 return new String_('', $attributes); 846 } 847 848 $contents = $this->stripIndentation( 849 $contents, $indentLen, $indentChar, true, true, $attributes 850 ); 851 $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents); 852 $attributes['rawValue'] = $contents; 853 854 if ($kind === String_::KIND_HEREDOC) { 855 $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape); 856 } 857 858 return new String_($contents, $attributes); 859 } else { 860 assert(count($contents) > 0); 861 if (!$contents[0] instanceof Node\InterpolatedStringPart) { 862 // If there is no leading encapsed string part, pretend there is an empty one 863 $this->stripIndentation( 864 '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes() 865 ); 866 } 867 868 $newContents = []; 869 foreach ($contents as $i => $part) { 870 if ($part instanceof Node\InterpolatedStringPart) { 871 $isLast = $i === \count($contents) - 1; 872 $part->value = $this->stripIndentation( 873 $part->value, $indentLen, $indentChar, 874 $i === 0, $isLast, $part->getAttributes() 875 ); 876 if ($isLast) { 877 $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value); 878 } 879 $part->setAttribute('rawValue', $part->value); 880 $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape); 881 if ('' === $part->value) { 882 continue; 883 } 884 } 885 $newContents[] = $part; 886 } 887 return new InterpolatedString($newContents, $attributes); 888 } 889 } 890 891 protected function createCommentFromToken(Token $token, int $tokenPos): Comment { 892 assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT); 893 return \T_DOC_COMMENT === $token->id 894 ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos, 895 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos) 896 : new Comment($token->text, $token->line, $token->pos, $tokenPos, 897 $token->getEndLine(), $token->getEndPos() - 1, $tokenPos); 898 } 899 900 /** 901 * Get last comment before the given token position, if any 902 */ 903 protected function getCommentBeforeToken(int $tokenPos): ?Comment { 904 while (--$tokenPos >= 0) { 905 $token = $this->tokens[$tokenPos]; 906 if (!isset($this->dropTokens[$token->id])) { 907 break; 908 } 909 910 if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) { 911 return $this->createCommentFromToken($token, $tokenPos); 912 } 913 } 914 return null; 915 } 916 917 /** 918 * Create a zero-length nop to capture preceding comments, if any. 919 */ 920 protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop { 921 $comment = $this->getCommentBeforeToken($tokenPos); 922 if ($comment === null) { 923 return null; 924 } 925 926 $commentEndLine = $comment->getEndLine(); 927 $commentEndFilePos = $comment->getEndFilePos(); 928 $commentEndTokenPos = $comment->getEndTokenPos(); 929 $attributes = [ 930 'startLine' => $commentEndLine, 931 'endLine' => $commentEndLine, 932 'startFilePos' => $commentEndFilePos + 1, 933 'endFilePos' => $commentEndFilePos, 934 'startTokenPos' => $commentEndTokenPos + 1, 935 'endTokenPos' => $commentEndTokenPos, 936 ]; 937 return new Nop($attributes); 938 } 939 940 protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop { 941 if ($this->getCommentBeforeToken($tokenStartPos) === null) { 942 return null; 943 } 944 return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos)); 945 } 946 947 protected function handleHaltCompiler(): string { 948 // Prevent the lexer from returning any further tokens. 949 $nextToken = $this->tokens[$this->tokenPos + 1]; 950 $this->tokenPos = \count($this->tokens) - 2; 951 952 // Return text after __halt_compiler. 953 return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : ''; 954 } 955 956 protected function inlineHtmlHasLeadingNewline(int $stackPos): bool { 957 $tokenPos = $this->tokenStartStack[$stackPos]; 958 $token = $this->tokens[$tokenPos]; 959 assert($token->id == \T_INLINE_HTML); 960 if ($tokenPos > 0) { 961 $prevToken = $this->tokens[$tokenPos - 1]; 962 assert($prevToken->id == \T_CLOSE_TAG); 963 return false !== strpos($prevToken->text, "\n") 964 || false !== strpos($prevToken->text, "\r"); 965 } 966 return true; 967 } 968 969 /** 970 * @return array<string, mixed> 971 */ 972 protected function createEmptyElemAttributes(int $tokenPos): array { 973 return $this->getAttributesForToken($tokenPos); 974 } 975 976 protected function fixupArrayDestructuring(Array_ $node): Expr\List_ { 977 $this->createdArrays->detach($node); 978 return new Expr\List_(array_map(function (Node\ArrayItem $item) { 979 if ($item->value instanceof Expr\Error) { 980 // We used Error as a placeholder for empty elements, which are legal for destructuring. 981 return null; 982 } 983 if ($item->value instanceof Array_) { 984 return new Node\ArrayItem( 985 $this->fixupArrayDestructuring($item->value), 986 $item->key, $item->byRef, $item->getAttributes()); 987 } 988 return $item; 989 }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes()); 990 } 991 992 protected function postprocessList(Expr\List_ $node): void { 993 foreach ($node->items as $i => $item) { 994 if ($item->value instanceof Expr\Error) { 995 // We used Error as a placeholder for empty elements, which are legal for destructuring. 996 $node->items[$i] = null; 997 } 998 } 999 } 1000 1001 /** @param ElseIf_|Else_ $node */ 1002 protected function fixupAlternativeElse($node): void { 1003 // Make sure a trailing nop statement carrying comments is part of the node. 1004 $numStmts = \count($node->stmts); 1005 if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) { 1006 $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes(); 1007 if (isset($nopAttrs['endLine'])) { 1008 $node->setAttribute('endLine', $nopAttrs['endLine']); 1009 } 1010 if (isset($nopAttrs['endFilePos'])) { 1011 $node->setAttribute('endFilePos', $nopAttrs['endFilePos']); 1012 } 1013 if (isset($nopAttrs['endTokenPos'])) { 1014 $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']); 1015 } 1016 } 1017 } 1018 1019 protected function checkClassModifier(int $a, int $b, int $modifierPos): void { 1020 try { 1021 Modifiers::verifyClassModifier($a, $b); 1022 } catch (Error $error) { 1023 $error->setAttributes($this->getAttributesAt($modifierPos)); 1024 $this->emitError($error); 1025 } 1026 } 1027 1028 protected function checkModifier(int $a, int $b, int $modifierPos): void { 1029 // Jumping through some hoops here because verifyModifier() is also used elsewhere 1030 try { 1031 Modifiers::verifyModifier($a, $b); 1032 } catch (Error $error) { 1033 $error->setAttributes($this->getAttributesAt($modifierPos)); 1034 $this->emitError($error); 1035 } 1036 } 1037 1038 protected function checkParam(Param $node): void { 1039 if ($node->variadic && null !== $node->default) { 1040 $this->emitError(new Error( 1041 'Variadic parameter cannot have a default value', 1042 $node->default->getAttributes() 1043 )); 1044 } 1045 } 1046 1047 protected function checkTryCatch(TryCatch $node): void { 1048 if (empty($node->catches) && null === $node->finally) { 1049 $this->emitError(new Error( 1050 'Cannot use try without catch or finally', $node->getAttributes() 1051 )); 1052 } 1053 } 1054 1055 protected function checkNamespace(Namespace_ $node): void { 1056 if (null !== $node->stmts) { 1057 foreach ($node->stmts as $stmt) { 1058 if ($stmt instanceof Namespace_) { 1059 $this->emitError(new Error( 1060 'Namespace declarations cannot be nested', $stmt->getAttributes() 1061 )); 1062 } 1063 } 1064 } 1065 } 1066 1067 private function checkClassName(?Identifier $name, int $namePos): void { 1068 if (null !== $name && $name->isSpecialClassName()) { 1069 $this->emitError(new Error( 1070 sprintf('Cannot use \'%s\' as class name as it is reserved', $name), 1071 $this->getAttributesAt($namePos) 1072 )); 1073 } 1074 } 1075 1076 /** @param Name[] $interfaces */ 1077 private function checkImplementedInterfaces(array $interfaces): void { 1078 foreach ($interfaces as $interface) { 1079 if ($interface->isSpecialClassName()) { 1080 $this->emitError(new Error( 1081 sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface), 1082 $interface->getAttributes() 1083 )); 1084 } 1085 } 1086 } 1087 1088 protected function checkClass(Class_ $node, int $namePos): void { 1089 $this->checkClassName($node->name, $namePos); 1090 1091 if ($node->extends && $node->extends->isSpecialClassName()) { 1092 $this->emitError(new Error( 1093 sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends), 1094 $node->extends->getAttributes() 1095 )); 1096 } 1097 1098 $this->checkImplementedInterfaces($node->implements); 1099 } 1100 1101 protected function checkInterface(Interface_ $node, int $namePos): void { 1102 $this->checkClassName($node->name, $namePos); 1103 $this->checkImplementedInterfaces($node->extends); 1104 } 1105 1106 protected function checkEnum(Enum_ $node, int $namePos): void { 1107 $this->checkClassName($node->name, $namePos); 1108 $this->checkImplementedInterfaces($node->implements); 1109 } 1110 1111 protected function checkClassMethod(ClassMethod $node, int $modifierPos): void { 1112 if ($node->flags & Modifiers::STATIC) { 1113 switch ($node->name->toLowerString()) { 1114 case '__construct': 1115 $this->emitError(new Error( 1116 sprintf('Constructor %s() cannot be static', $node->name), 1117 $this->getAttributesAt($modifierPos))); 1118 break; 1119 case '__destruct': 1120 $this->emitError(new Error( 1121 sprintf('Destructor %s() cannot be static', $node->name), 1122 $this->getAttributesAt($modifierPos))); 1123 break; 1124 case '__clone': 1125 $this->emitError(new Error( 1126 sprintf('Clone method %s() cannot be static', $node->name), 1127 $this->getAttributesAt($modifierPos))); 1128 break; 1129 } 1130 } 1131 1132 if ($node->flags & Modifiers::READONLY) { 1133 $this->emitError(new Error( 1134 sprintf('Method %s() cannot be readonly', $node->name), 1135 $this->getAttributesAt($modifierPos))); 1136 } 1137 } 1138 1139 protected function checkClassConst(ClassConst $node, int $modifierPos): void { 1140 foreach ([Modifiers::STATIC, Modifiers::ABSTRACT, Modifiers::READONLY] as $modifier) { 1141 if ($node->flags & $modifier) { 1142 $this->emitError(new Error( 1143 "Cannot use '" . Modifiers::toString($modifier) . "' as constant modifier", 1144 $this->getAttributesAt($modifierPos))); 1145 } 1146 } 1147 } 1148 1149 protected function checkUseUse(UseItem $node, int $namePos): void { 1150 if ($node->alias && $node->alias->isSpecialClassName()) { 1151 $this->emitError(new Error( 1152 sprintf( 1153 'Cannot use %s as %s because \'%2$s\' is a special class name', 1154 $node->name, $node->alias 1155 ), 1156 $this->getAttributesAt($namePos) 1157 )); 1158 } 1159 } 1160 1161 /** @param PropertyHook[] $hooks */ 1162 protected function checkPropertyHookList(array $hooks, int $hookPos): void { 1163 if (empty($hooks)) { 1164 $this->emitError(new Error( 1165 'Property hook list cannot be empty', $this->getAttributesAt($hookPos))); 1166 } 1167 } 1168 1169 protected function checkPropertyHook(PropertyHook $hook, ?int $paramListPos): void { 1170 $name = $hook->name->toLowerString(); 1171 if ($name !== 'get' && $name !== 'set') { 1172 $this->emitError(new Error( 1173 'Unknown hook "' . $hook->name . '", expected "get" or "set"', 1174 $hook->name->getAttributes())); 1175 } 1176 if ($name === 'get' && $paramListPos !== null) { 1177 $this->emitError(new Error( 1178 'get hook must not have a parameter list', $this->getAttributesAt($paramListPos))); 1179 } 1180 } 1181 1182 protected function checkPropertyHookModifiers(int $a, int $b, int $modifierPos): void { 1183 try { 1184 Modifiers::verifyModifier($a, $b); 1185 } catch (Error $error) { 1186 $error->setAttributes($this->getAttributesAt($modifierPos)); 1187 $this->emitError($error); 1188 } 1189 1190 if ($b != Modifiers::FINAL) { 1191 $this->emitError(new Error( 1192 'Cannot use the ' . Modifiers::toString($b) . ' modifier on a property hook', 1193 $this->getAttributesAt($modifierPos))); 1194 } 1195 } 1196 1197 /** @param array<Node\Arg|Node\VariadicPlaceholder> $args */ 1198 private function isSimpleExit(array $args): bool { 1199 if (\count($args) === 0) { 1200 return true; 1201 } 1202 if (\count($args) === 1) { 1203 $arg = $args[0]; 1204 return $arg instanceof Arg && $arg->name === null && 1205 $arg->byRef === false && $arg->unpack === false; 1206 } 1207 return false; 1208 } 1209 1210 /** 1211 * @param array<Node\Arg|Node\VariadicPlaceholder> $args 1212 * @param array<string, mixed> $attrs 1213 */ 1214 protected function createExitExpr(string $name, int $namePos, array $args, array $attrs): Expr { 1215 if ($this->isSimpleExit($args)) { 1216 // Create Exit node for backwards compatibility. 1217 $attrs['kind'] = strtolower($name) === 'exit' ? Expr\Exit_::KIND_EXIT : Expr\Exit_::KIND_DIE; 1218 return new Expr\Exit_(\count($args) === 1 ? $args[0]->value : null, $attrs); 1219 } 1220 return new Expr\FuncCall(new Name($name, $this->getAttributesAt($namePos)), $args, $attrs); 1221 } 1222 1223 /** 1224 * Creates the token map. 1225 * 1226 * The token map maps the PHP internal token identifiers 1227 * to the identifiers used by the Parser. Additionally it 1228 * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. 1229 * 1230 * @return array<int, int> The token map 1231 */ 1232 protected function createTokenMap(): array { 1233 $tokenMap = []; 1234 1235 // Single-char tokens use an identity mapping. 1236 for ($i = 0; $i < 256; ++$i) { 1237 $tokenMap[$i] = $i; 1238 } 1239 1240 foreach ($this->symbolToName as $name) { 1241 if ($name[0] === 'T') { 1242 $tokenMap[\constant($name)] = constant(static::class . '::' . $name); 1243 } 1244 } 1245 1246 // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO 1247 $tokenMap[\T_OPEN_TAG_WITH_ECHO] = static::T_ECHO; 1248 // T_CLOSE_TAG is equivalent to ';' 1249 $tokenMap[\T_CLOSE_TAG] = ord(';'); 1250 1251 // We have created a map from PHP token IDs to external symbol IDs. 1252 // Now map them to the internal symbol ID. 1253 $fullTokenMap = []; 1254 foreach ($tokenMap as $phpToken => $extSymbol) { 1255 $intSymbol = $this->tokenToSymbol[$extSymbol]; 1256 if ($intSymbol === $this->invalidSymbol) { 1257 continue; 1258 } 1259 $fullTokenMap[$phpToken] = $intSymbol; 1260 } 1261 1262 return $fullTokenMap; 1263 } 1264} 1265