1<?php declare(strict_types=1); 2 3namespace PhpParser\Internal; 4 5if (\PHP_VERSION_ID >= 80000) { 6 class TokenPolyfill extends \PhpToken { 7 } 8 return; 9} 10 11/** 12 * This is a polyfill for the PhpToken class introduced in PHP 8.0. We do not actually polyfill 13 * PhpToken, because composer might end up picking a different polyfill implementation, which does 14 * not meet our requirements. 15 * 16 * @internal 17 */ 18class TokenPolyfill { 19 /** @var int The ID of the token. Either a T_* constant of a character code < 256. */ 20 public int $id; 21 /** @var string The textual content of the token. */ 22 public string $text; 23 /** @var int The 1-based starting line of the token (or -1 if unknown). */ 24 public int $line; 25 /** @var int The 0-based starting position of the token (or -1 if unknown). */ 26 public int $pos; 27 28 /** @var array<int, bool> Tokens ignored by the PHP parser. */ 29 private const IGNORABLE_TOKENS = [ 30 \T_WHITESPACE => true, 31 \T_COMMENT => true, 32 \T_DOC_COMMENT => true, 33 \T_OPEN_TAG => true, 34 ]; 35 36 /** @var array<int, bool> Tokens that may be part of a T_NAME_* identifier. */ 37 private static array $identifierTokens; 38 39 /** 40 * Create a Token with the given ID and text, as well optional line and position information. 41 */ 42 final public function __construct(int $id, string $text, int $line = -1, int $pos = -1) { 43 $this->id = $id; 44 $this->text = $text; 45 $this->line = $line; 46 $this->pos = $pos; 47 } 48 49 /** 50 * Get the name of the token. For single-char tokens this will be the token character. 51 * Otherwise it will be a T_* style name, or null if the token ID is unknown. 52 */ 53 public function getTokenName(): ?string { 54 if ($this->id < 256) { 55 return \chr($this->id); 56 } 57 58 $name = token_name($this->id); 59 return $name === 'UNKNOWN' ? null : $name; 60 } 61 62 /** 63 * Check whether the token is of the given kind. The kind may be either an integer that matches 64 * the token ID, a string that matches the token text, or an array of integers/strings. In the 65 * latter case, the function returns true if any of the kinds in the array match. 66 * 67 * @param int|string|(int|string)[] $kind 68 */ 69 public function is($kind): bool { 70 if (\is_int($kind)) { 71 return $this->id === $kind; 72 } 73 if (\is_string($kind)) { 74 return $this->text === $kind; 75 } 76 if (\is_array($kind)) { 77 foreach ($kind as $entry) { 78 if (\is_int($entry)) { 79 if ($this->id === $entry) { 80 return true; 81 } 82 } elseif (\is_string($entry)) { 83 if ($this->text === $entry) { 84 return true; 85 } 86 } else { 87 throw new \TypeError( 88 'Argument #1 ($kind) must only have elements of type string|int, ' . 89 gettype($entry) . ' given'); 90 } 91 } 92 return false; 93 } 94 throw new \TypeError( 95 'Argument #1 ($kind) must be of type string|int|array, ' .gettype($kind) . ' given'); 96 } 97 98 /** 99 * Check whether this token would be ignored by the PHP parser. Returns true for T_WHITESPACE, 100 * T_COMMENT, T_DOC_COMMENT and T_OPEN_TAG, and false for everything else. 101 */ 102 public function isIgnorable(): bool { 103 return isset(self::IGNORABLE_TOKENS[$this->id]); 104 } 105 106 /** 107 * Return the textual content of the token. 108 */ 109 public function __toString(): string { 110 return $this->text; 111 } 112 113 /** 114 * Tokenize the given source code and return an array of tokens. 115 * 116 * This performs certain canonicalizations to match the PHP 8.0 token format: 117 * * Bad characters are represented using T_BAD_CHARACTER rather than omitted. 118 * * T_COMMENT does not include trailing newlines, instead the newline is part of a following 119 * T_WHITESPACE token. 120 * * Namespaced names are represented using T_NAME_* tokens. 121 * 122 * @return static[] 123 */ 124 public static function tokenize(string $code, int $flags = 0): array { 125 self::init(); 126 127 $tokens = []; 128 $line = 1; 129 $pos = 0; 130 $origTokens = \token_get_all($code, $flags); 131 132 $numTokens = \count($origTokens); 133 for ($i = 0; $i < $numTokens; $i++) { 134 $token = $origTokens[$i]; 135 if (\is_string($token)) { 136 if (\strlen($token) === 2) { 137 // b" and B" are tokenized as single-char tokens, even though they aren't. 138 $tokens[] = new static(\ord('"'), $token, $line, $pos); 139 $pos += 2; 140 } else { 141 $tokens[] = new static(\ord($token), $token, $line, $pos); 142 $pos++; 143 } 144 } else { 145 $id = $token[0]; 146 $text = $token[1]; 147 148 // Emulate PHP 8.0 comment format, which does not include trailing whitespace anymore. 149 if ($id === \T_COMMENT && \substr($text, 0, 2) !== '/*' && 150 \preg_match('/(\r\n|\n|\r)$/D', $text, $matches) 151 ) { 152 $trailingNewline = $matches[0]; 153 $text = \substr($text, 0, -\strlen($trailingNewline)); 154 $tokens[] = new static($id, $text, $line, $pos); 155 $pos += \strlen($text); 156 157 if ($i + 1 < $numTokens && $origTokens[$i + 1][0] === \T_WHITESPACE) { 158 // Move trailing newline into following T_WHITESPACE token, if it already exists. 159 $origTokens[$i + 1][1] = $trailingNewline . $origTokens[$i + 1][1]; 160 $origTokens[$i + 1][2]--; 161 } else { 162 // Otherwise, we need to create a new T_WHITESPACE token. 163 $tokens[] = new static(\T_WHITESPACE, $trailingNewline, $line, $pos); 164 $line++; 165 $pos += \strlen($trailingNewline); 166 } 167 continue; 168 } 169 170 // Emulate PHP 8.0 T_NAME_* tokens, by combining sequences of T_NS_SEPARATOR and 171 // T_STRING into a single token. 172 if (($id === \T_NS_SEPARATOR || isset(self::$identifierTokens[$id]))) { 173 $newText = $text; 174 $lastWasSeparator = $id === \T_NS_SEPARATOR; 175 for ($j = $i + 1; $j < $numTokens; $j++) { 176 if ($lastWasSeparator) { 177 if (!isset(self::$identifierTokens[$origTokens[$j][0]])) { 178 break; 179 } 180 $lastWasSeparator = false; 181 } else { 182 if ($origTokens[$j][0] !== \T_NS_SEPARATOR) { 183 break; 184 } 185 $lastWasSeparator = true; 186 } 187 $newText .= $origTokens[$j][1]; 188 } 189 if ($lastWasSeparator) { 190 // Trailing separator is not part of the name. 191 $j--; 192 $newText = \substr($newText, 0, -1); 193 } 194 if ($j > $i + 1) { 195 if ($id === \T_NS_SEPARATOR) { 196 $id = \T_NAME_FULLY_QUALIFIED; 197 } elseif ($id === \T_NAMESPACE) { 198 $id = \T_NAME_RELATIVE; 199 } else { 200 $id = \T_NAME_QUALIFIED; 201 } 202 $tokens[] = new static($id, $newText, $line, $pos); 203 $pos += \strlen($newText); 204 $i = $j - 1; 205 continue; 206 } 207 } 208 209 $tokens[] = new static($id, $text, $line, $pos); 210 $line += \substr_count($text, "\n"); 211 $pos += \strlen($text); 212 } 213 } 214 return $tokens; 215 } 216 217 /** Initialize private static state needed by tokenize(). */ 218 private static function init(): void { 219 if (isset(self::$identifierTokens)) { 220 return; 221 } 222 223 // Based on semi_reserved production. 224 self::$identifierTokens = \array_fill_keys([ 225 \T_STRING, 226 \T_STATIC, \T_ABSTRACT, \T_FINAL, \T_PRIVATE, \T_PROTECTED, \T_PUBLIC, \T_READONLY, 227 \T_INCLUDE, \T_INCLUDE_ONCE, \T_EVAL, \T_REQUIRE, \T_REQUIRE_ONCE, \T_LOGICAL_OR, \T_LOGICAL_XOR, \T_LOGICAL_AND, 228 \T_INSTANCEOF, \T_NEW, \T_CLONE, \T_EXIT, \T_IF, \T_ELSEIF, \T_ELSE, \T_ENDIF, \T_ECHO, \T_DO, \T_WHILE, 229 \T_ENDWHILE, \T_FOR, \T_ENDFOR, \T_FOREACH, \T_ENDFOREACH, \T_DECLARE, \T_ENDDECLARE, \T_AS, \T_TRY, \T_CATCH, 230 \T_FINALLY, \T_THROW, \T_USE, \T_INSTEADOF, \T_GLOBAL, \T_VAR, \T_UNSET, \T_ISSET, \T_EMPTY, \T_CONTINUE, \T_GOTO, 231 \T_FUNCTION, \T_CONST, \T_RETURN, \T_PRINT, \T_YIELD, \T_LIST, \T_SWITCH, \T_ENDSWITCH, \T_CASE, \T_DEFAULT, 232 \T_BREAK, \T_ARRAY, \T_CALLABLE, \T_EXTENDS, \T_IMPLEMENTS, \T_NAMESPACE, \T_TRAIT, \T_INTERFACE, \T_CLASS, 233 \T_CLASS_C, \T_TRAIT_C, \T_FUNC_C, \T_METHOD_C, \T_LINE, \T_FILE, \T_DIR, \T_NS_C, \T_HALT_COMPILER, \T_FN, 234 \T_MATCH, 235 ], true); 236 } 237} 238