xref: /PHP-Parser/lib/PhpParser/Comment.php (revision 09691fc8)
1<?php declare(strict_types=1);
2
3namespace PhpParser;
4
5class Comment implements \JsonSerializable {
6    protected string $text;
7    protected int $startLine;
8    protected int $startFilePos;
9    protected int $startTokenPos;
10    protected int $endLine;
11    protected int $endFilePos;
12    protected int $endTokenPos;
13
14    /**
15     * Constructs a comment node.
16     *
17     * @param string $text Comment text (including comment delimiters like /*)
18     * @param int $startLine Line number the comment started on
19     * @param int $startFilePos File offset the comment started on
20     * @param int $startTokenPos Token offset the comment started on
21     */
22    public function __construct(
23        string $text,
24        int $startLine = -1, int $startFilePos = -1, int $startTokenPos = -1,
25        int $endLine = -1, int $endFilePos = -1, int $endTokenPos = -1
26    ) {
27        $this->text = $text;
28        $this->startLine = $startLine;
29        $this->startFilePos = $startFilePos;
30        $this->startTokenPos = $startTokenPos;
31        $this->endLine = $endLine;
32        $this->endFilePos = $endFilePos;
33        $this->endTokenPos = $endTokenPos;
34    }
35
36    /**
37     * Gets the comment text.
38     *
39     * @return string The comment text (including comment delimiters like /*)
40     */
41    public function getText(): string {
42        return $this->text;
43    }
44
45    /**
46     * Gets the line number the comment started on.
47     *
48     * @return int Line number (or -1 if not available)
49     * @phpstan-return -1|positive-int
50     */
51    public function getStartLine(): int {
52        return $this->startLine;
53    }
54
55    /**
56     * Gets the file offset the comment started on.
57     *
58     * @return int File offset (or -1 if not available)
59     */
60    public function getStartFilePos(): int {
61        return $this->startFilePos;
62    }
63
64    /**
65     * Gets the token offset the comment started on.
66     *
67     * @return int Token offset (or -1 if not available)
68     */
69    public function getStartTokenPos(): int {
70        return $this->startTokenPos;
71    }
72
73    /**
74     * Gets the line number the comment ends on.
75     *
76     * @return int Line number (or -1 if not available)
77     * @phpstan-return -1|positive-int
78     */
79    public function getEndLine(): int {
80        return $this->endLine;
81    }
82
83    /**
84     * Gets the file offset the comment ends on.
85     *
86     * @return int File offset (or -1 if not available)
87     */
88    public function getEndFilePos(): int {
89        return $this->endFilePos;
90    }
91
92    /**
93     * Gets the token offset the comment ends on.
94     *
95     * @return int Token offset (or -1 if not available)
96     */
97    public function getEndTokenPos(): int {
98        return $this->endTokenPos;
99    }
100
101    /**
102     * Gets the comment text.
103     *
104     * @return string The comment text (including comment delimiters like /*)
105     */
106    public function __toString(): string {
107        return $this->text;
108    }
109
110    /**
111     * Gets the reformatted comment text.
112     *
113     * "Reformatted" here means that we try to clean up the whitespace at the
114     * starts of the lines. This is necessary because we receive the comments
115     * without leading whitespace on the first line, but with leading whitespace
116     * on all subsequent lines.
117     *
118     * Additionally, this normalizes CRLF newlines to LF newlines.
119     */
120    public function getReformattedText(): string {
121        $text = str_replace("\r\n", "\n", $this->text);
122        $newlinePos = strpos($text, "\n");
123        if (false === $newlinePos) {
124            // Single line comments don't need further processing
125            return $text;
126        }
127        if (preg_match('(^.*(?:\n\s+\*.*)+$)', $text)) {
128            // Multi line comment of the type
129            //
130            //     /*
131            //      * Some text.
132            //      * Some more text.
133            //      */
134            //
135            // is handled by replacing the whitespace sequences before the * by a single space
136            return preg_replace('(^\s+\*)m', ' *', $text);
137        }
138        if (preg_match('(^/\*\*?\s*\n)', $text) && preg_match('(\n(\s*)\*/$)', $text, $matches)) {
139            // Multi line comment of the type
140            //
141            //    /*
142            //        Some text.
143            //        Some more text.
144            //    */
145            //
146            // is handled by removing the whitespace sequence on the line before the closing
147            // */ on all lines. So if the last line is "    */", then "    " is removed at the
148            // start of all lines.
149            return preg_replace('(^' . preg_quote($matches[1]) . ')m', '', $text);
150        }
151        if (preg_match('(^/\*\*?\s*(?!\s))', $text, $matches)) {
152            // Multi line comment of the type
153            //
154            //     /* Some text.
155            //        Some more text.
156            //          Indented text.
157            //        Even more text. */
158            //
159            // is handled by removing the difference between the shortest whitespace prefix on all
160            // lines and the length of the "/* " opening sequence.
161            $prefixLen = $this->getShortestWhitespacePrefixLen(substr($text, $newlinePos + 1));
162            $removeLen = $prefixLen - strlen($matches[0]);
163            return preg_replace('(^\s{' . $removeLen . '})m', '', $text);
164        }
165
166        // No idea how to format this comment, so simply return as is
167        return $text;
168    }
169
170    /**
171     * Get length of shortest whitespace prefix (at the start of a line).
172     *
173     * If there is a line with no prefix whitespace, 0 is a valid return value.
174     *
175     * @param string $str String to check
176     * @return int Length in characters. Tabs count as single characters.
177     */
178    private function getShortestWhitespacePrefixLen(string $str): int {
179        $lines = explode("\n", $str);
180        $shortestPrefixLen = \PHP_INT_MAX;
181        foreach ($lines as $line) {
182            preg_match('(^\s*)', $line, $matches);
183            $prefixLen = strlen($matches[0]);
184            if ($prefixLen < $shortestPrefixLen) {
185                $shortestPrefixLen = $prefixLen;
186            }
187        }
188        return $shortestPrefixLen;
189    }
190
191    /**
192     * @return array{nodeType:string, text:mixed, line:mixed, filePos:mixed}
193     */
194    public function jsonSerialize(): array {
195        // Technically not a node, but we make it look like one anyway
196        $type = $this instanceof Comment\Doc ? 'Comment_Doc' : 'Comment';
197        return [
198            'nodeType' => $type,
199            'text' => $this->text,
200            // TODO: Rename these to include "start".
201            'line' => $this->startLine,
202            'filePos' => $this->startFilePos,
203            'tokenPos' => $this->startTokenPos,
204            'endLine' => $this->endLine,
205            'endFilePos' => $this->endFilePos,
206            'endTokenPos' => $this->endTokenPos,
207        ];
208    }
209}
210