View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2025 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.javadoc;
21  
22  import java.util.ArrayDeque;
23  import java.util.Arrays;
24  import java.util.Deque;
25  import java.util.List;
26  import java.util.Locale;
27  import java.util.Set;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import com.puppycrawl.tools.checkstyle.JavadocDetailNodeParser;
32  import com.puppycrawl.tools.checkstyle.StatelessCheck;
33  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
34  import com.puppycrawl.tools.checkstyle.api.DetailAST;
35  import com.puppycrawl.tools.checkstyle.api.FileContents;
36  import com.puppycrawl.tools.checkstyle.api.Scope;
37  import com.puppycrawl.tools.checkstyle.api.TextBlock;
38  import com.puppycrawl.tools.checkstyle.api.TokenTypes;
39  import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
40  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
41  import com.puppycrawl.tools.checkstyle.utils.ScopeUtil;
42  
43  /**
44   * <div>
45   * Validates Javadoc comments to help ensure they are well formed.
46   * </div>
47   *
48   * <p>
49   * The following checks are performed:
50   * </p>
51   * <ul>
52   * <li>
53   * Ensures the first sentence ends with proper punctuation
54   * (That is a period, question mark, or exclamation mark, by default).
55   * Note that this check is not applied to inline {@code @return} tags,
56   * because the Javadoc tools automatically appends a period to the end of the tag
57   * content.
58   * Javadoc automatically places the first sentence in the method summary
59   * table and index. Without proper punctuation the Javadoc may be malformed.
60   * All items eligible for the {@code {@inheritDoc}} tag are exempt from this
61   * requirement.
62   * </li>
63   * <li>
64   * Check text for Javadoc statements that do not have any description.
65   * This includes both completely empty Javadoc, and Javadoc with only tags
66   * such as {@code @param} and {@code @return}.
67   * </li>
68   * <li>
69   * Check text for incomplete HTML tags. Verifies that HTML tags have
70   * corresponding end tags and issues an "Unclosed HTML tag found:" error if not.
71   * An "Extra HTML tag found:" error is issued if an end tag is found without
72   * a previous open tag.
73   * </li>
74   * <li>
75   * Check that a package Javadoc comment is well-formed (as described above).
76   * </li>
77   * <li>
78   * Check for allowed HTML tags. The list of allowed HTML tags is
79   * "a", "abbr", "acronym", "address", "area", "b", "bdo", "big", "blockquote",
80   * "br", "caption", "cite", "code", "colgroup", "dd", "del", "dfn", "div", "dl",
81   * "dt", "em", "fieldset", "font", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
82   * "i", "img", "ins", "kbd", "li", "ol", "p", "pre", "q", "samp", "small",
83   * "span", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
84   * "thead", "tr", "tt", "u", "ul", "var".
85   * </li>
86   * </ul>
87   *
88   * <p>
89   * These checks were patterned after the checks made by the
90   * <a href="https://maven-doccheck.sourceforge.net">DocCheck</a> doclet
91   * available from Sun. Note: Original Sun's DocCheck tool does not exist anymore.
92   * </p>
93   *
94   * @since 3.2
95   */
96  @StatelessCheck
97  public class JavadocStyleCheck
98      extends AbstractCheck {
99  
100     /** Message property key for the Empty Javadoc message. */
101     public static final String MSG_EMPTY = "javadoc.empty";
102 
103     /** Message property key for the No Javadoc end of Sentence Period message. */
104     public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
105 
106     /** Message property key for the Incomplete Tag message. */
107     public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
108 
109     /** Message property key for the Unclosed HTML message. */
110     public static final String MSG_UNCLOSED_HTML = JavadocDetailNodeParser.MSG_UNCLOSED_HTML_TAG;
111 
112     /** Message property key for the Extra HTML message. */
113     public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
114 
115     /** HTML tags that do not require a close tag. */
116     private static final Set<String> SINGLE_TAGS = Set.of(
117         "br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th"
118     );
119 
120     /**
121      * HTML tags that are allowed in java docs.
122      * From <a href="https://www.w3schools.com/tags/default.asp">w3schools</a>:
123      * <br>
124      * The forms and structure tags are not allowed
125      */
126     private static final Set<String> ALLOWED_TAGS = Set.of(
127         "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
128         "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
129         "del", "dfn", "div", "dl", "dt", "em", "fieldset", "font", "h1",
130         "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
131         "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
132         "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead",
133         "tr", "tt", "u", "ul", "var"
134     );
135 
136     /** Specify the format for inline return Javadoc. */
137     private static final Pattern INLINE_RETURN_TAG_PATTERN =
138             Pattern.compile("\\{@return.*?}\\s*");
139 
140     /** Specify the format for first word in javadoc. */
141     private static final Pattern SENTENCE_SEPARATOR = Pattern.compile("\\.(?=\\s|$)");
142 
143     /** Specify the visibility scope where Javadoc comments are checked. */
144     private Scope scope = Scope.PRIVATE;
145 
146     /** Specify the visibility scope where Javadoc comments are not checked. */
147     private Scope excludeScope;
148 
149     /** Specify the format for matching the end of a sentence. */
150     private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");
151 
152     /**
153      * Control whether to check the first sentence for proper end of sentence.
154      */
155     private boolean checkFirstSentence = true;
156 
157     /**
158      * Control whether to check for incomplete HTML tags.
159      */
160     private boolean checkHtml = true;
161 
162     /**
163      * Control whether to check if the Javadoc is missing a describing text.
164      */
165     private boolean checkEmptyJavadoc;
166 
167     @Override
168     public int[] getDefaultTokens() {
169         return getAcceptableTokens();
170     }
171 
172     @Override
173     public int[] getAcceptableTokens() {
174         return new int[] {
175             TokenTypes.ANNOTATION_DEF,
176             TokenTypes.ANNOTATION_FIELD_DEF,
177             TokenTypes.CLASS_DEF,
178             TokenTypes.CTOR_DEF,
179             TokenTypes.ENUM_CONSTANT_DEF,
180             TokenTypes.ENUM_DEF,
181             TokenTypes.INTERFACE_DEF,
182             TokenTypes.METHOD_DEF,
183             TokenTypes.PACKAGE_DEF,
184             TokenTypes.VARIABLE_DEF,
185             TokenTypes.RECORD_DEF,
186             TokenTypes.COMPACT_CTOR_DEF,
187         };
188     }
189 
190     @Override
191     public int[] getRequiredTokens() {
192         return CommonUtil.EMPTY_INT_ARRAY;
193     }
194 
195     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
196     @Override
197     @SuppressWarnings("deprecation")
198     public void visitToken(DetailAST ast) {
199         if (shouldCheck(ast)) {
200             final FileContents contents = getFileContents();
201             // Need to start searching for the comment before the annotations
202             // that may exist. Even if annotations are not defined on the
203             // package, the ANNOTATIONS AST is defined.
204             final TextBlock textBlock =
205                 contents.getJavadocBefore(ast.getFirstChild().getLineNo());
206 
207             checkComment(ast, textBlock);
208         }
209     }
210 
211     /**
212      * Whether we should check this node.
213      *
214      * @param ast a given node.
215      * @return whether we should check a given node.
216      */
217     private boolean shouldCheck(final DetailAST ast) {
218         boolean check = false;
219 
220         if (ast.getType() == TokenTypes.PACKAGE_DEF) {
221             check = CheckUtil.isPackageInfo(getFilePath());
222         }
223         else if (!ScopeUtil.isInCodeBlock(ast)) {
224             final Scope customScope = ScopeUtil.getScope(ast);
225             final Scope surroundingScope = ScopeUtil.getSurroundingScope(ast);
226 
227             check = customScope.isIn(scope)
228                     && surroundingScope.isIn(scope)
229                     && (excludeScope == null || !customScope.isIn(excludeScope)
230                             || !surroundingScope.isIn(excludeScope));
231         }
232         return check;
233     }
234 
235     /**
236      * Performs the various checks against the Javadoc comment.
237      *
238      * @param ast the AST of the element being documented
239      * @param comment the source lines that make up the Javadoc comment.
240      *
241      * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
242      * @see #checkHtmlTags(DetailAST, TextBlock)
243      */
244     private void checkComment(final DetailAST ast, final TextBlock comment) {
245         if (comment != null) {
246             if (checkFirstSentence) {
247                 checkFirstSentenceEnding(ast, comment);
248             }
249 
250             if (checkHtml) {
251                 checkHtmlTags(ast, comment);
252             }
253 
254             if (checkEmptyJavadoc) {
255                 checkJavadocIsNotEmpty(comment);
256             }
257         }
258     }
259 
260     /**
261      * Checks that the first sentence ends with proper punctuation.  This method
262      * uses a regular expression that checks for the presence of a period,
263      * question mark, or exclamation mark followed either by whitespace, an
264      * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
265      * comments for TokenTypes that are valid for {_AT_inheritDoc}.
266      *
267      * @param ast the current node
268      * @param comment the source lines that make up the Javadoc comment.
269      */
270     private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
271         final String commentText = getCommentText(comment.getText());
272         final boolean hasInLineReturnTag = Arrays.stream(SENTENCE_SEPARATOR.split(commentText))
273                 .findFirst()
274                 .map(INLINE_RETURN_TAG_PATTERN::matcher)
275                 .filter(Matcher::find)
276                 .isPresent();
277 
278         if (!hasInLineReturnTag
279             && !commentText.isEmpty()
280             && !endOfSentenceFormat.matcher(commentText).find()
281             && !(commentText.startsWith("{@inheritDoc}")
282             && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
283             log(comment.getStartLineNo(), MSG_NO_PERIOD);
284         }
285     }
286 
287     /**
288      * Checks that the Javadoc is not empty.
289      *
290      * @param comment the source lines that make up the Javadoc comment.
291      */
292     private void checkJavadocIsNotEmpty(TextBlock comment) {
293         final String commentText = getCommentText(comment.getText());
294 
295         if (commentText.isEmpty()) {
296             log(comment.getStartLineNo(), MSG_EMPTY);
297         }
298     }
299 
300     /**
301      * Returns the comment text from the Javadoc.
302      *
303      * @param comments the lines of Javadoc.
304      * @return a comment text String.
305      */
306     private static String getCommentText(String... comments) {
307         final StringBuilder builder = new StringBuilder(1024);
308         for (final String line : comments) {
309             final int textStart = findTextStart(line);
310 
311             if (textStart != -1) {
312                 if (line.charAt(textStart) == '@') {
313                     // we have found the tag section
314                     break;
315                 }
316                 builder.append(line.substring(textStart));
317                 trimTail(builder);
318                 builder.append('\n');
319             }
320         }
321 
322         return builder.toString().trim();
323     }
324 
325     /**
326      * Finds the index of the first non-whitespace character ignoring the
327      * Javadoc comment start and end strings (&#47;** and *&#47;) as well as any
328      * leading asterisk.
329      *
330      * @param line the Javadoc comment line of text to scan.
331      * @return the int index relative to 0 for the start of text
332      *         or -1 if not found.
333      */
334     private static int findTextStart(String line) {
335         int textStart = -1;
336         int index = 0;
337         while (index < line.length()) {
338             if (!Character.isWhitespace(line.charAt(index))) {
339                 if (line.regionMatches(index, "/**", 0, "/**".length())
340                     || line.regionMatches(index, "*/", 0, 2)) {
341                     index++;
342                 }
343                 else if (line.charAt(index) != '*') {
344                     textStart = index;
345                     break;
346                 }
347             }
348             index++;
349         }
350         return textStart;
351     }
352 
353     /**
354      * Trims any trailing whitespace or the end of Javadoc comment string.
355      *
356      * @param builder the StringBuilder to trim.
357      */
358     private static void trimTail(StringBuilder builder) {
359         int index = builder.length() - 1;
360         while (true) {
361             if (Character.isWhitespace(builder.charAt(index))) {
362                 builder.deleteCharAt(index);
363             }
364             else if (index > 0 && builder.charAt(index) == '/'
365                     && builder.charAt(index - 1) == '*') {
366                 builder.deleteCharAt(index);
367                 builder.deleteCharAt(index - 1);
368                 index--;
369                 while (builder.charAt(index - 1) == '*') {
370                     builder.deleteCharAt(index - 1);
371                     index--;
372                 }
373             }
374             else {
375                 break;
376             }
377             index--;
378         }
379     }
380 
381     /**
382      * Checks the comment for HTML tags that do not have a corresponding close
383      * tag or a close tag that has no previous open tag.  This code was
384      * primarily copied from the DocCheck checkHtml method.
385      *
386      * @param ast the node with the Javadoc
387      * @param comment the {@code TextBlock} which represents
388      *                 the Javadoc comment.
389      * @noinspection MethodWithMultipleReturnPoints
390      * @noinspectionreason MethodWithMultipleReturnPoints - check and method are
391      *      too complex to break apart
392      */
393     // -@cs[ReturnCount] Too complex to break apart.
394     private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
395         final int lineNo = comment.getStartLineNo();
396         final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
397         final String[] text = comment.getText();
398 
399         final TagParser parser = new TagParser(text, lineNo);
400 
401         while (parser.hasNextTag()) {
402             final HtmlTag tag = parser.nextTag();
403 
404             if (tag.isIncompleteTag()) {
405                 log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
406                     text[tag.getLineNo() - lineNo]);
407                 return;
408             }
409             if (tag.isClosedTag()) {
410                 // do nothing
411                 continue;
412             }
413             if (tag.isCloseTag()) {
414                 // We have found a close tag.
415                 if (isExtraHtml(tag.getId(), htmlStack)) {
416                     // No corresponding open tag was found on the stack.
417                     log(tag.getLineNo(),
418                         tag.getPosition(),
419                         MSG_EXTRA_HTML,
420                         tag.getText());
421                 }
422                 else {
423                     // See if there are any unclosed tags that were opened
424                     // after this one.
425                     checkUnclosedTags(htmlStack, tag.getId());
426                 }
427             }
428             else {
429                 // We only push html tags that are allowed
430                 if (isAllowedTag(tag)) {
431                     htmlStack.push(tag);
432                 }
433             }
434         }
435 
436         // Identify any tags left on the stack.
437         // Skip multiples, like <b>...<b>
438         String lastFound = "";
439         final List<String> typeParameters = CheckUtil.getTypeParameterNames(ast);
440         for (final HtmlTag htmlTag : htmlStack) {
441             if (!isSingleTag(htmlTag)
442                 && !htmlTag.getId().equals(lastFound)
443                 && !typeParameters.contains(htmlTag.getId())) {
444                 log(htmlTag.getLineNo(), htmlTag.getPosition(),
445                         MSG_UNCLOSED_HTML, htmlTag.getText());
446                 lastFound = htmlTag.getId();
447             }
448         }
449     }
450 
451     /**
452      * Checks to see if there are any unclosed tags on the stack.  The token
453      * represents a html tag that has been closed and has a corresponding open
454      * tag on the stack.  Any tags, except single tags, that were opened
455      * (pushed on the stack) after the token are missing a close.
456      *
457      * @param htmlStack the stack of opened HTML tags.
458      * @param token the current HTML tag name that has been closed.
459      */
460     private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
461         final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
462         HtmlTag lastOpenTag = htmlStack.pop();
463         while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
464             // Find unclosed elements. Put them on a stack so the
465             // output order won't be back-to-front.
466             if (isSingleTag(lastOpenTag)) {
467                 lastOpenTag = htmlStack.pop();
468             }
469             else {
470                 unclosedTags.push(lastOpenTag);
471                 lastOpenTag = htmlStack.pop();
472             }
473         }
474 
475         // Output the unterminated tags, if any
476         // Skip multiples, like <b>..<b>
477         String lastFound = "";
478         for (final HtmlTag htag : unclosedTags) {
479             lastOpenTag = htag;
480             if (lastOpenTag.getId().equals(lastFound)) {
481                 continue;
482             }
483             lastFound = lastOpenTag.getId();
484             log(lastOpenTag.getLineNo(),
485                 lastOpenTag.getPosition(),
486                 MSG_UNCLOSED_HTML,
487                 lastOpenTag.getText());
488         }
489     }
490 
491     /**
492      * Determines if the HtmlTag is one which does not require a close tag.
493      *
494      * @param tag the HtmlTag to check.
495      * @return {@code true} if the HtmlTag is a single tag.
496      */
497     private static boolean isSingleTag(HtmlTag tag) {
498         // If it's a singleton tag (<p>, <br>, etc.), ignore it
499         // Can't simply not put them on the stack, since singletons
500         // like <dt> and <dd> (unhappily) may either be terminated
501         // or not terminated. Both options are legal.
502         return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
503     }
504 
505     /**
506      * Determines if the HtmlTag is one which is allowed in a javadoc.
507      *
508      * @param tag the HtmlTag to check.
509      * @return {@code true} if the HtmlTag is an allowed html tag.
510      */
511     private static boolean isAllowedTag(HtmlTag tag) {
512         return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
513     }
514 
515     /**
516      * Determines if the given token is an extra HTML tag. This indicates that
517      * a close tag was found that does not have a corresponding open tag.
518      *
519      * @param token an HTML tag id for which a close was found.
520      * @param htmlStack a Stack of previous open HTML tags.
521      * @return {@code false} if a previous open tag was found
522      *         for the token.
523      */
524     private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
525         boolean isExtra = true;
526         for (final HtmlTag tag : htmlStack) {
527             // Loop, looking for tags that are closed.
528             // The loop is needed in case there are unclosed
529             // tags on the stack. In that case, the stack would
530             // not be empty, but this tag would still be extra.
531             if (token.equalsIgnoreCase(tag.getId())) {
532                 isExtra = false;
533                 break;
534             }
535         }
536 
537         return isExtra;
538     }
539 
540     /**
541      * Setter to specify the visibility scope where Javadoc comments are checked.
542      *
543      * @param scope a scope.
544      * @since 3.2
545      */
546     public void setScope(Scope scope) {
547         this.scope = scope;
548     }
549 
550     /**
551      * Setter to specify the visibility scope where Javadoc comments are not checked.
552      *
553      * @param excludeScope a scope.
554      * @since 3.4
555      */
556     public void setExcludeScope(Scope excludeScope) {
557         this.excludeScope = excludeScope;
558     }
559 
560     /**
561      * Setter to specify the format for matching the end of a sentence.
562      *
563      * @param pattern a pattern.
564      * @since 5.0
565      */
566     public void setEndOfSentenceFormat(Pattern pattern) {
567         endOfSentenceFormat = pattern;
568     }
569 
570     /**
571      * Setter to control whether to check the first sentence for proper end of sentence.
572      *
573      * @param flag {@code true} if the first sentence is to be checked
574      * @since 3.2
575      */
576     public void setCheckFirstSentence(boolean flag) {
577         checkFirstSentence = flag;
578     }
579 
580     /**
581      * Setter to control whether to check for incomplete HTML tags.
582      *
583      * @param flag {@code true} if HTML checking is to be performed.
584      * @since 3.2
585      */
586     public void setCheckHtml(boolean flag) {
587         checkHtml = flag;
588     }
589 
590     /**
591      * Setter to control whether to check if the Javadoc is missing a describing text.
592      *
593      * @param flag {@code true} if empty Javadoc checking should be done.
594      * @since 3.4
595      */
596     public void setCheckEmptyJavadoc(boolean flag) {
597         checkEmptyJavadoc = flag;
598     }
599 
600 }