1 ///////////////////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3 // Copyright (C) 2001-2025 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ///////////////////////////////////////////////////////////////////////////////////////////////
19
20 package com.puppycrawl.tools.checkstyle.checks.javadoc;
21
22 import java.util.LinkedList;
23 import java.util.List;
24
25 /**
26 * <div>
27 * Helper class used to parse HTML tags or generic type identifiers
28 * from a single-line of text. Just the beginning of the HTML tag
29 * is located. No attempt is made to parse out the complete tag,
30 * particularly since some of the tag parameters could be located
31 * on the following line of text. The {@code hasNextTag} and
32 * {@code nextTag} methods are used to iterate through the HTML
33 * tags or generic type identifiers that were found on the line of text.
34 * </div>
35 *
36 * <p>
37 * This class isn't really specific to HTML tags. Currently, the only HTML
38 * tag that this class looks specifically for is the HTML comment tag.
39 * This class helps figure out if a tag exists and if it is well-formed.
40 * It does not know whether it is valid HTML. This class is also used for
41 * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>,
42 * <MY_FOO_TYPE>}, etc. According to this class they are valid tags.
43 * </p>
44 *
45 */
46 class TagParser {
47
48 /** HtmlTags found on the input line of text. */
49 private final List<HtmlTag> tags = new LinkedList<>();
50
51 /**
52 * Constructs a TagParser and finds the first tag if any.
53 *
54 * @param text the line of text to parse.
55 * @param lineNo the source line number.
56 */
57 /* package */ TagParser(String[] text, int lineNo) {
58 parseTags(text, lineNo);
59 }
60
61 /**
62 * Returns the next available HtmlTag.
63 *
64 * @return a HtmlTag or {@code null} if none available.
65 * @throws IndexOutOfBoundsException if there are no HtmlTags
66 * left to return.
67 */
68 public HtmlTag nextTag() {
69 return tags.remove(0);
70 }
71
72 /**
73 * Indicates if there are any more HtmlTag to retrieve.
74 *
75 * @return {@code true} if there are more tags.
76 */
77 public boolean hasNextTag() {
78 return !tags.isEmpty();
79 }
80
81 /**
82 * Performs lazy initialization on the internal tags List
83 * and adds the tag.
84 *
85 * @param tag the HtmlTag to add.
86 */
87 private void add(HtmlTag tag) {
88 tags.add(tag);
89 }
90
91 /**
92 * Parses the text line for any HTML tags and adds them to the internal
93 * List of tags.
94 *
95 * @param text the source line to parse.
96 * @param lineNo the source line number.
97 */
98 private void parseTags(String[] text, int lineNo) {
99 final int nLines = text.length;
100 Point position = new Point(0, 0);
101 while (position.lineNo() < nLines) {
102 // if this is html comment then skip it
103 if (isCommentTag(text, position)) {
104 position = skipHtmlComment(text, position);
105 }
106 else if (isTag(text, position)) {
107 position = parseTag(text, lineNo, nLines, position);
108 }
109 else {
110 position = getNextPoint(text, position);
111 }
112 position = findChar(text, '<', position);
113 }
114 }
115
116 /**
117 * Parses the tag and return position after it.
118 *
119 * @param text the source line to parse.
120 * @param lineNo the source line number.
121 * @param nLines line length
122 * @param position start position for parsing
123 * @return position after tag
124 */
125 private Point parseTag(String[] text, int lineNo, final int nLines, Point position) {
126 // find end of tag
127 final Point endTag = findChar(text, '>', position);
128 final boolean incompleteTag = endTag.lineNo() >= nLines;
129 // get tag id (one word)
130 final String tagId = getTagId(text, position);
131 // is this closed tag
132 final boolean closedTag =
133 endTag.lineNo() < nLines
134 && text[endTag.lineNo()]
135 .charAt(endTag.columnNo() - 1) == '/';
136 // add new tag
137 add(new HtmlTag(tagId,
138 position.lineNo() + lineNo,
139 position.columnNo(),
140 closedTag,
141 incompleteTag,
142 text[position.lineNo()]));
143 return endTag;
144 }
145
146 /**
147 * Checks if the given position is start one for HTML tag.
148 *
149 * @param javadocText text of javadoc comments.
150 * @param pos position to check.
151 * @return {@code true} some HTML tag starts from given position.
152 */
153 private static boolean isTag(String[] javadocText, Point pos) {
154 final int column = pos.columnNo() + 1;
155 final String text = javadocText[pos.lineNo()];
156
157 // Character.isJavaIdentifier... may not be a valid HTML
158 // identifier but is valid for generics
159 return column >= text.length()
160 || Character.isJavaIdentifierStart(text.charAt(column))
161 || text.charAt(column) == '/';
162 }
163
164 /**
165 * Parse tag id.
166 *
167 * @param javadocText text of javadoc comments.
168 * @param tagStart start position of the tag
169 * @return id for given tag
170 */
171 private static String getTagId(String[] javadocText, Point tagStart) {
172 String tagId = "";
173 int column = tagStart.columnNo() + 1;
174 String text = javadocText[tagStart.lineNo()];
175 if (column < text.length()) {
176 if (text.charAt(column) == '/') {
177 column++;
178 }
179 text = text.substring(column);
180 int position = 0;
181
182 // Character.isJavaIdentifier... may not be a valid HTML
183 // identifier but is valid for generics
184 while (position < text.length()
185 && Character.isJavaIdentifierPart(text.charAt(position))) {
186 position++;
187 }
188
189 tagId = text.substring(0, position);
190 }
191 return tagId;
192 }
193
194 /**
195 * If this is a HTML-comments.
196 *
197 * @param text text of javadoc comments
198 * @param pos position to check
199 * @return {@code true} if HTML-comments
200 * starts form given position.
201 */
202 private static boolean isCommentTag(String[] text, Point pos) {
203 return text[pos.lineNo()].startsWith("<!--", pos.columnNo());
204 }
205
206 /**
207 * Skips HTML comments.
208 *
209 * @param text text of javadoc comments.
210 * @param fromPoint start position of HTML-comments
211 * @return position after HTML-comments
212 */
213 private static Point skipHtmlComment(String[] text, Point fromPoint) {
214 Point toPoint = fromPoint;
215 while (toPoint.lineNo() < text.length && !text[toPoint.lineNo()]
216 .substring(0, toPoint.columnNo() + 1).endsWith("-->")) {
217 toPoint = getNextPoint(text, toPoint);
218 }
219 return toPoint;
220 }
221
222 /**
223 * Finds next occurrence of given character.
224 *
225 * @param text text to search
226 * @param character character to search
227 * @param from position to start search
228 * @return position of next occurrence of given character
229 */
230 private static Point findChar(String[] text, char character, Point from) {
231 Point curr = new Point(from.lineNo(), from.columnNo());
232 while (curr.lineNo() < text.length
233 && text[curr.lineNo()].charAt(curr.columnNo()) != character) {
234 curr = getNextPoint(text, curr);
235 }
236
237 return curr;
238 }
239
240 /**
241 * Increments column number to be examined, moves onto the next line when no
242 * more characters are available.
243 *
244 * @param text to search.
245 * @param from location to search from
246 * @return next point to be examined
247 */
248 private static Point getNextPoint(String[] text, Point from) {
249 int line = from.lineNo();
250 int column = from.columnNo() + 1;
251 while (line < text.length && column >= text[line].length()) {
252 // go to the next line
253 line++;
254 column = 0;
255 }
256 return new Point(line, column);
257 }
258
259 /**
260 * Represents current position in the text.
261 *
262 * @param lineNo Line number.
263 * @param columnNo Column number.
264 */
265 private record Point(int lineNo, int columnNo) {
266
267 /**
268 * Creates new {@code Point} instance.
269 *
270 * @param lineNo line number
271 * @param columnNo column number
272 */
273 private Point {
274 }
275
276 }
277 }