1 ///////////////////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3 // Copyright (C) 2001-2026 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ///////////////////////////////////////////////////////////////////////////////////////////////
19
20 package com.puppycrawl.tools.checkstyle;
21
22 import java.util.Set;
23
24 import org.antlr.v4.runtime.BaseErrorListener;
25 import org.antlr.v4.runtime.CharStreams;
26 import org.antlr.v4.runtime.CommonTokenStream;
27 import org.antlr.v4.runtime.RecognitionException;
28 import org.antlr.v4.runtime.Recognizer;
29 import org.antlr.v4.runtime.atn.PredictionMode;
30 import org.antlr.v4.runtime.misc.ParseCancellationException;
31
32 import com.puppycrawl.tools.checkstyle.api.DetailAST;
33 import com.puppycrawl.tools.checkstyle.api.DetailNode;
34 import com.puppycrawl.tools.checkstyle.grammar.SimpleToken;
35 import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsLexer;
36 import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsParser;
37 import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
38
39 /**
40 * Used for parsing Javadoc comment as DetailNode tree.
41 *
42 */
43 public class JavadocDetailNodeParser {
44
45 /**
46 * Parse error while rule recognition.
47 */
48 public static final String MSG_JAVADOC_PARSE_RULE_ERROR = "javadoc.parse.rule.error";
49
50 /**
51 * Message property key for the Unclosed HTML message.
52 */
53 public static final String MSG_UNCLOSED_HTML_TAG = "javadoc.unclosedHtml";
54
55 /** Symbols with which javadoc starts. */
56 private static final String JAVADOC_START = "/**";
57
58 /**
59 * Parses the given Javadoc comment AST into a {@link ParseStatus} object.
60 *
61 * <p>
62 * This method extracts the raw Javadoc comment text from the supplied
63 * {@link DetailAST}, creates a new lexer and parser for the Javadoc grammar,
64 * and attempts to parse it into an AST of {@link DetailNode}s.
65 * The parser uses {@link PredictionMode#SLL} for
66 * faster performance and stops parsing on the first error encountered by
67 * using {@link CheckstyleParserErrorStrategy}.
68 * </p>
69 *
70 * @param javadocCommentAst
71 * the {@link DetailAST} node representing the Javadoc comment in the
72 * source file
73 * @return a {@link ParseStatus} containing the root of the parsed Javadoc
74 * tree (if successful), the first non-tight HTML tag (if any), and
75 * the error message (if parsing failed)
76 */
77 public ParseStatus parseJavadocComment(DetailAST javadocCommentAst) {
78 final int blockCommentLineNumber = javadocCommentAst.getLineNo();
79
80 final String javadocComment = JavadocUtil.getJavadocCommentContent(javadocCommentAst);
81 final ParseStatus result = new ParseStatus();
82
83 // Use a new error listener each time to be able to use
84 // one check instance for multiple files to be checked
85 // without getting side effects.
86 final DescriptiveErrorListener errorListener = new DescriptiveErrorListener();
87
88 // Log messages should have line number in scope of file,
89 // not in scope of Javadoc comment.
90 // Offset is line number of beginning of Javadoc comment.
91 errorListener.setOffset(javadocCommentAst.getLineNo() - 1);
92
93 final JavadocCommentsLexer lexer =
94 new JavadocCommentsLexer(CharStreams.fromString(javadocComment), true);
95
96 lexer.removeErrorListeners();
97 lexer.addErrorListener(errorListener);
98
99 final CommonTokenStream tokens = new CommonTokenStream(lexer);
100 tokens.fill();
101
102 final Set<SimpleToken> unclosedTags = lexer.getUnclosedTagNameTokens();
103 final JavadocCommentsParser parser = new JavadocCommentsParser(tokens, unclosedTags);
104
105 // set prediction mode to SLL to speed up parsing
106 parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
107
108 // remove default error listeners
109 parser.removeErrorListeners();
110
111 parser.addErrorListener(errorListener);
112
113 // JavadocParserErrorStrategy stops parsing on first parse error encountered unlike the
114 // DefaultErrorStrategy used by ANTLR which rather attempts error recovery.
115 parser.setErrorHandler(new CheckstyleParserErrorStrategy());
116
117 try {
118 final JavadocCommentsParser.JavadocContext javadoc = parser.javadoc();
119 final int javadocColumnNumber = javadocCommentAst.getColumnNo()
120 + JAVADOC_START.length();
121
122 final JavadocCommentsAstVisitor visitor = new JavadocCommentsAstVisitor(
123 tokens, blockCommentLineNumber, javadocColumnNumber);
124 final DetailNode tree = visitor.visit(javadoc);
125
126 result.setTree(tree);
127
128 result.firstNonTightHtmlTag = visitor.getFirstNonTightHtmlTag();
129
130 result.setParseErrorMessage(errorListener.getErrorMessage());
131 }
132 catch (ParseCancellationException | IllegalArgumentException exc) {
133 result.setParseErrorMessage(errorListener.getErrorMessage());
134 }
135
136 return result;
137 }
138
139 /**
140 * Custom error listener for JavadocParser that prints user readable errors.
141 */
142 private static final class DescriptiveErrorListener extends BaseErrorListener {
143
144 /**
145 * Offset is line number of beginning of the Javadoc comment. Log
146 * messages should have line number in scope of file, not in scope of
147 * Javadoc comment.
148 */
149 private int offset;
150
151 /**
152 * Error message that appeared while parsing.
153 */
154 private ParseErrorMessage errorMessage;
155
156 /**
157 * Getter for error message during parsing.
158 *
159 * @return Error message during parsing.
160 */
161 private ParseErrorMessage getErrorMessage() {
162 return errorMessage;
163 }
164
165 /**
166 * Sets offset. Offset is line number of beginning of the Javadoc
167 * comment. Log messages should have line number in scope of file, not
168 * in scope of Javadoc comment.
169 *
170 * @param offset
171 * offset line number
172 */
173 /* package */ void setOffset(int offset) {
174 this.offset = offset;
175 }
176
177 /**
178 * Logs parser errors in Checkstyle manner. Parser can generate error
179 * messages. There is special error that parser can generate. It is
180 * missed close HTML tag. This case is special because parser prints
181 * error like {@code "no viable alternative at input 'b \n *\n'"} and it
182 * is not clear that error is about missed close HTML tag. Other error
183 * messages are not special and logged simply as "Parse Error...".
184 *
185 * <p>{@inheritDoc}
186 */
187 @Override
188 public void syntaxError(
189 Recognizer<?, ?> recognizer, Object offendingSymbol,
190 int line, int charPositionInLine,
191 String msg, RecognitionException ex) {
192 final int lineNumber = offset + line;
193
194 final String target;
195 if (recognizer instanceof JavadocCommentsLexer lexer) {
196 target = lexer.getPreviousToken().getText();
197 }
198 else {
199 final int ruleIndex = ex.getCtx().getRuleIndex();
200 final String ruleName = recognizer.getRuleNames()[ruleIndex];
201 target = convertUpperCamelToUpperUnderscore(ruleName);
202 }
203
204 errorMessage = new ParseErrorMessage(lineNumber,
205 MSG_JAVADOC_PARSE_RULE_ERROR, charPositionInLine, msg, target);
206
207 }
208
209 /**
210 * Converts the given {@code text} from camel case to all upper case with
211 * underscores separating each word.
212 *
213 * @param text The string to convert.
214 * @return The result of the conversion.
215 */
216 private static String convertUpperCamelToUpperUnderscore(String text) {
217 final StringBuilder result = new StringBuilder(20);
218 for (int index = 0; index < text.length(); index++) {
219 final char letter = text.charAt(index);
220 if (Character.isUpperCase(letter)) {
221 result.append('_');
222 }
223 result.append(Character.toUpperCase(letter));
224 }
225 return result.toString();
226 }
227 }
228
229 /**
230 * Contains result of parsing javadoc comment: DetailNode tree and parse
231 * error message.
232 */
233 public static class ParseStatus {
234
235 /**
236 * DetailNode tree (is null if parsing fails).
237 */
238 private DetailNode tree;
239
240 /**
241 * Parse error message (is null if parsing is successful).
242 */
243 private ParseErrorMessage parseErrorMessage;
244
245 /**
246 * Stores the first non-tight HTML tag encountered while parsing javadoc.
247 *
248 * @see <a
249 * href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
250 * Tight HTML rules</a>
251 */
252 private DetailNode firstNonTightHtmlTag;
253
254 /**
255 * Getter for DetailNode tree.
256 *
257 * @return DetailNode tree if parsing was successful, null otherwise.
258 */
259 public DetailNode getTree() {
260 return tree;
261 }
262
263 /**
264 * Sets DetailNode tree.
265 *
266 * @param tree DetailNode tree.
267 */
268 public void setTree(DetailNode tree) {
269 this.tree = tree;
270 }
271
272 /**
273 * Getter for error message during parsing.
274 *
275 * @return Error message if parsing was unsuccessful, null otherwise.
276 */
277 public ParseErrorMessage getParseErrorMessage() {
278 return parseErrorMessage;
279 }
280
281 /**
282 * Sets parse error message.
283 *
284 * @param parseErrorMessage Parse error message.
285 */
286 public void setParseErrorMessage(ParseErrorMessage parseErrorMessage) {
287 this.parseErrorMessage = parseErrorMessage;
288 }
289
290 /**
291 * This method is used to check if the javadoc parsed has non-tight HTML tags.
292 *
293 * @return returns true if the javadoc has at least one non-tight HTML tag; false otherwise
294 * @see <a
295 * href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
296 * Tight HTML rules</a>
297 */
298 public boolean isNonTight() {
299 return firstNonTightHtmlTag != null;
300 }
301
302 /**
303 * Getter for the first non-tight HTML tag encountered while parsing javadoc.
304 *
305 * @return the first non-tight HTML tag that is encountered while parsing Javadoc,
306 * if one exists
307 * @see <a href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
308 * Tight HTML rules</a>
309 */
310 public DetailNode getFirstNonTightHtmlTag() {
311 return firstNonTightHtmlTag;
312 }
313
314 }
315
316 /**
317 * Contains information about parse error message.
318 */
319 public static class ParseErrorMessage {
320
321 /**
322 * Line number where parse error occurred.
323 */
324 private final int lineNumber;
325
326 /**
327 * Key for error message.
328 */
329 private final String messageKey;
330
331 /**
332 * Error message arguments.
333 */
334 private final Object[] messageArguments;
335
336 /**
337 * Initializes parse error message.
338 *
339 * @param lineNumber line number
340 * @param messageKey message key
341 * @param messageArguments message arguments
342 */
343 /* package */ ParseErrorMessage(int lineNumber, String messageKey,
344 Object... messageArguments) {
345 this.lineNumber = lineNumber;
346 this.messageKey = messageKey;
347 this.messageArguments = messageArguments.clone();
348 }
349
350 /**
351 * Getter for line number where parse error occurred.
352 *
353 * @return Line number where parse error occurred.
354 */
355 public int getLineNumber() {
356 return lineNumber;
357 }
358
359 /**
360 * Getter for key for error message.
361 *
362 * @return Key for error message.
363 */
364 public String getMessageKey() {
365 return messageKey;
366 }
367
368 /**
369 * Getter for error message arguments.
370 *
371 * @return Array of error message arguments.
372 */
373 public Object[] getMessageArguments() {
374 return messageArguments.clone();
375 }
376
377 }
378 }