1<?php declare(strict_types=1);
2
3namespace PhpParser\Node\Scalar;
4
5use PhpParser\Error;
6use PhpParser\Node\Scalar;
7
8class String_ extends Scalar {
9    /* For use in "kind" attribute */
10    public const KIND_SINGLE_QUOTED = 1;
11    public const KIND_DOUBLE_QUOTED = 2;
12    public const KIND_HEREDOC = 3;
13    public const KIND_NOWDOC = 4;
14
15    /** @var string String value */
16    public string $value;
17
18    /** @var array<string, string> Escaped character to its decoded value */
19    protected static array $replacements = [
20        '\\' => '\\',
21        '$'  =>  '$',
22        'n'  => "\n",
23        'r'  => "\r",
24        't'  => "\t",
25        'f'  => "\f",
26        'v'  => "\v",
27        'e'  => "\x1B",
28    ];
29
30    /**
31     * Constructs a string scalar node.
32     *
33     * @param string $value Value of the string
34     * @param array<string, mixed> $attributes Additional attributes
35     */
36    public function __construct(string $value, array $attributes = []) {
37        $this->attributes = $attributes;
38        $this->value = $value;
39    }
40
41    public function getSubNodeNames(): array {
42        return ['value'];
43    }
44
45    /**
46     * @param array<string, mixed> $attributes
47     * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
48     */
49    public static function fromString(string $str, array $attributes = [], bool $parseUnicodeEscape = true): self {
50        $attributes['kind'] = ($str[0] === "'" || ($str[1] === "'" && ($str[0] === 'b' || $str[0] === 'B')))
51            ? Scalar\String_::KIND_SINGLE_QUOTED
52            : Scalar\String_::KIND_DOUBLE_QUOTED;
53
54        $attributes['rawValue'] = $str;
55
56        $string = self::parse($str, $parseUnicodeEscape);
57
58        return new self($string, $attributes);
59    }
60
61    /**
62     * @internal
63     *
64     * Parses a string token.
65     *
66     * @param string $str String token content
67     * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
68     *
69     * @return string The parsed string
70     */
71    public static function parse(string $str, bool $parseUnicodeEscape = true): string {
72        $bLength = 0;
73        if ('b' === $str[0] || 'B' === $str[0]) {
74            $bLength = 1;
75        }
76
77        if ('\'' === $str[$bLength]) {
78            return str_replace(
79                ['\\\\', '\\\''],
80                ['\\', '\''],
81                substr($str, $bLength + 1, -1)
82            );
83        } else {
84            return self::parseEscapeSequences(
85                substr($str, $bLength + 1, -1), '"', $parseUnicodeEscape
86            );
87        }
88    }
89
90    /**
91     * @internal
92     *
93     * Parses escape sequences in strings (all string types apart from single quoted).
94     *
95     * @param string $str String without quotes
96     * @param null|string $quote Quote type
97     * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
98     *
99     * @return string String with escape sequences parsed
100     */
101    public static function parseEscapeSequences(string $str, ?string $quote, bool $parseUnicodeEscape = true): string {
102        if (null !== $quote) {
103            $str = str_replace('\\' . $quote, $quote, $str);
104        }
105
106        $extra = '';
107        if ($parseUnicodeEscape) {
108            $extra = '|u\{([0-9a-fA-F]+)\}';
109        }
110
111        return preg_replace_callback(
112            '~\\\\([\\\\$nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}' . $extra . ')~',
113            function ($matches) {
114                $str = $matches[1];
115
116                if (isset(self::$replacements[$str])) {
117                    return self::$replacements[$str];
118                }
119                if ('x' === $str[0] || 'X' === $str[0]) {
120                    return chr(hexdec(substr($str, 1)));
121                }
122                if ('u' === $str[0]) {
123                    $dec = hexdec($matches[2]);
124                    // If it overflowed to float, treat as INT_MAX, it will throw an error anyway.
125                    return self::codePointToUtf8(\is_int($dec) ? $dec : \PHP_INT_MAX);
126                } else {
127                    return chr(octdec($str));
128                }
129            },
130            $str
131        );
132    }
133
134    /**
135     * Converts a Unicode code point to its UTF-8 encoded representation.
136     *
137     * @param int $num Code point
138     *
139     * @return string UTF-8 representation of code point
140     */
141    private static function codePointToUtf8(int $num): string {
142        if ($num <= 0x7F) {
143            return chr($num);
144        }
145        if ($num <= 0x7FF) {
146            return chr(($num >> 6) + 0xC0) . chr(($num & 0x3F) + 0x80);
147        }
148        if ($num <= 0xFFFF) {
149            return chr(($num >> 12) + 0xE0) . chr((($num >> 6) & 0x3F) + 0x80) . chr(($num & 0x3F) + 0x80);
150        }
151        if ($num <= 0x1FFFFF) {
152            return chr(($num >> 18) + 0xF0) . chr((($num >> 12) & 0x3F) + 0x80)
153                 . chr((($num >> 6) & 0x3F) + 0x80) . chr(($num & 0x3F) + 0x80);
154        }
155        throw new Error('Invalid UTF-8 codepoint escape sequence: Codepoint too large');
156    }
157
158    public function getType(): string {
159        return 'Scalar_String';
160    }
161}
162